print 'Load documents from CSV' corpus = Corpus(source_file_path='input/egc.csv', language='french', # language for stop words vectorization='tfidf', # 'tf' (term-frequency) or 'tfidf' (term-frequency inverse-document-frequency) max_relative_frequency=0.8, # ignore words which relative frequency is > than max_relative_frequency min_absolute_frequency=4, # ignore words which absolute frequency is < than min_absolute_frequency preprocessor=FrenchLemmatizer()) # pre-process documents print 'corpus size:', corpus.size print 'vocabulary size:', len(corpus.vocabulary) print 'Vector representation of document 0:\n', corpus.vector_for_document(0) # Instantiate a topic model topic_model = NonNegativeMatrixFactorization(corpus) # Estimate the optimal number of topics viz = Visualization(topic_model) viz.plot_greene_metric(min_num_topics=10, max_num_topics=30, tao=10, step=1, top_n_words=10) viz.plot_arun_metric(min_num_topics=5, max_num_topics=30, iterations=10) viz.plot_consensus_metric(min_num_topics=5, max_num_topics=30, iterations=10) # Infer topics print 'Inferring topics...' topic_model.infer_topics(num_topics=15) # Save model on disk
class Cnn: def __init__(self, config): self.config = config self.visualization = Visualization(self.config) pass def inference(self, x, mode_name): histogram_summary = False if mode_name == self.config.MODE.VALIDATION else True kernel_image_summary = False if mode_name == self.config.MODE.VALIDATION else True activation_image_summary = False if mode_name == self.config.MODE.VALIDATION else True with tf.name_scope('inputs'): x = tf.reshape(x, [ -1, self.config.IMAGE_SIZE.HEIGHT, self.config.IMAGE_SIZE.WIDTH, self.config.IMAGE_SIZE.CHANNELS ]) tf.summary.image("/inputs", x, max_outputs=4) with tf.variable_scope('convolution1'): convolution_1 = self.conv_layer( input_tensor=x, depth_in=self.config.IMAGE_SIZE.CHANNELS, depth_out=64, mode_name=mode_name, histogram_summary=histogram_summary, kernel_image_summary=kernel_image_summary, activation_image_summary=activation_image_summary) with tf.variable_scope('max_pooling1'): max_pooling_1 = tf.layers.max_pooling2d(inputs=convolution_1, pool_size=[2, 2], strides=2) with tf.variable_scope('convolution2'): convolution_2 = self.conv_layer( input_tensor=max_pooling_1, depth_in=64, depth_out=128, mode_name=mode_name, histogram_summary=histogram_summary, kernel_image_summary=False, activation_image_summary=activation_image_summary) with tf.variable_scope('max_pooling2'): max_pooling_2 = tf.layers.max_pooling2d(inputs=convolution_2, pool_size=[2, 2], strides=2) with tf.variable_scope('convolution3'): convolution_3 = self.conv_layer( input_tensor=max_pooling_2, depth_in=128, depth_out=256, mode_name=mode_name, histogram_summary=histogram_summary, kernel_image_summary=False, activation_image_summary=activation_image_summary) with tf.variable_scope('max_pooling3'): max_pooling_3 = tf.layers.max_pooling2d(inputs=convolution_3, pool_size=[2, 2], strides=2) with tf.variable_scope('convolution4'): convolution_4 = self.conv_layer( input_tensor=max_pooling_3, depth_in=256, depth_out=512, mode_name=mode_name, histogram_summary=histogram_summary, kernel_image_summary=False, activation_image_summary=activation_image_summary) with tf.variable_scope('max_pooling4'): max_pooling_4 = tf.layers.max_pooling2d(inputs=convolution_4, pool_size=[2, 2], strides=2) with tf.variable_scope('dense1'): max_pooling_3 = tf.reshape(max_pooling_4, [ max_pooling_4.shape[0].value, max_pooling_4.shape[1].value * max_pooling_4.shape[2].value * max_pooling_4.shape[3].value ]) dense_1 = tf.layers.dense(inputs=max_pooling_3, units=1024, activation=activation.lrelu) if mode_name == self.config.MODE.TRAINING: tf.summary.histogram('dense1'.format(mode_name), dense_1) with tf.variable_scope('logits'): logits = tf.layers.dense(inputs=dense_1, units=self.config.NUM_CLASSES, activation=activation.lrelu) if mode_name == self.config.MODE.TRAINING: tf.summary.histogram('logits', logits) tf.summary.histogram('softmax', tf.nn.softmax(logits=logits)) return logits def _conv2d(self, x, weights, strides): return tf.nn.conv2d(x, weights, strides=strides, padding='SAME') def conv_layer(self, mode_name, input_tensor, depth_in, depth_out, kernel_height=3, kernel_width=3, strides=(1, 1, 1, 1), activation_fn=activation.lrelu, histogram_summary=False, kernel_image_summary=False, activation_image_summary=False): weights = tf.get_variable( "weights", [kernel_height, kernel_width, depth_in, depth_out], initializer=tf.truncated_normal_initializer(stddev=0.01)) biases = tf.get_variable("biases", [depth_out], initializer=tf.constant_initializer(0.01)) convolutions = self._conv2d(input_tensor, weights, strides=strides) activations = activation_fn(convolutions + biases, self.config.LEAKY_RELU_ALPHA) if histogram_summary: tf.summary.histogram(mode_name + '_weights', weights) tf.summary.histogram(mode_name + '_activations', activations) if kernel_image_summary: weights_image_grid = self.visualization.kernels_image_grid( kernel=weights) tf.summary.image(mode_name + '/features', weights_image_grid, max_outputs=1) if activation_image_summary: activation_image = self.visualization.activation_image( activations=activations) tf.summary.image("/activated", activation_image) return activations
def __init__(self, config): self.config = config self.visualization = Visualization(self.config) pass
print 'Load documents from CSV' corpus = Corpus(source_file_path='input/egc.csv', language='french', # determines the stop words vectorization='tf', # 'tf' (term-frequency) or 'tfidf' (term-frequency inverse-document-frequency) max_relative_frequency=0.8, # ignore words which relative frequency is > than max_relative_frequency min_absolute_frequency=4, # ignore words which absolute frequency is < than min_absolute_frequency preprocessor=None) # determines how documents are preprocessed (e.g. stemming, lemmatization) print 'corpus size:', corpus.size print 'vocabulary size:', len(corpus.vocabulary) print 'Vector representation of document 2:\n', corpus.vector_for_document(2) # Instanciate a topic model topic_model = LatentDirichletAllocation(corpus=corpus) # Estimating the optimal number of topics using the method proposed by Arun et al. viz = Visualization(topic_model) viz.plot_greene_metric(min_num_topics=10, max_num_topics=12, tao=10, step=1, top_n_words=10, file_path='output/greene.png') # viz.plot_arun_metric(10, 30, 5, '/Users/adrien/Desktop/arun.png') # Infer topics topic_model.infer_topics(num_topics=20) # Print results print '\nTopics:' topic_model.print_topics(num_words=10) print '\nDocument 2:', topic_model.corpus.full_content(2) print '\nTopic distribution for document 2:', topic_model.topic_distribution_for_document(2) print '\nMost likely topic for document 2:', topic_model.most_likely_topic_for_document(2) print '\nTopics frequency:', topic_model.topics_frequency() print '\nTopic 2 frequency:', topic_model.topic_frequency(2)
def test(session_name=None, is_visualize=False): if session_name is None: session_name = input("Session name: ") config = Configuration() # general settings data_sets = DataSet(config) # data sets retrieval model = Clstmnn(config) # model builder evaluation = Evaluation(config) visualization = Visualization(config) with tf.Graph().as_default(): data_set = data_sets.get_data_sets(config.TESTING_BATCH_SIZE) print('Building model...') predictions_testing = model.inference(x=data_set.testing_set.x, mode_name=config.MODE.TESTING, reuse_lstm=None) mse = evaluation.loss(predictions=predictions_testing, labels=data_set.testing_set.y, mode_name=config.MODE.TESTING) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) merged = tf.summary.merge_all() saver = tf.train.Saver() print('Starting session...') with tf.Session() as sess: sess.run(init_op) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) summary_writer = tf.summary.FileWriter( config.OUTPUT_PATH + session_name + '_tested', sess.graph) sessions_helper = Sessions(config=config, session=sess, saver=saver, session_name=session_name, summary_writer=summary_writer, coordinator=coord, threads=threads) sessions_helper.restore() print() summary = None start_time = time.time() mses = [] actual_labels = [] predicted_labels = [] for epoch in range(config.TESTING_EPOCHS): for step in range( int(data_set.testing_set.size / config.TESTING_BATCH_SIZE)): summary = sess.run(merged) sys.stdout.write('\r>> Examples tested: {}/{}'.format( step, int(data_set.testing_set.size / config.TESTING_BATCH_SIZE))) sys.stdout.flush() example_image, actual_label, predicted_label, mse_result = sess.run( [ data_set.testing_set.x, data_set.testing_set.y, predictions_testing, mse ]) mses.append(mse_result) actual_labels.append(actual_label) predicted_labels.append(predicted_label) if is_visualize: visualization.show_example(predicted_label, actual_label, example_image, mse_result) summary_writer.add_summary(summary, 1) print() print('testing completed in %s' % (time.time() - start_time)) print('%s: MSE @ 1 = %.9f' % (datetime.now(), np.array(mses).mean())) visualization.display_on_map(actual_labels, predicted_labels, session_name, np.array(mses).mean()) sessions_helper.end()
def test(session_name=None, is_visualize=False): if session_name is None: session_name = input("Session name: ") config = Configuration() # general settings data_sets = DataSet(config) # data sets retrieval model = Cnn(config) # model builder evaluation = Evaluation(config) visualization = Visualization(config) with tf.Graph().as_default(): data_set = data_sets.get_data_sets(config.TESTING_BATCH_SIZE) print('Building model...') predictions_testing = model.inference(x=data_set.testing_set.x, mode_name=config.MODE.TESTING) is_correct = evaluation.correct_number(predictions_testing, data_set.testing_set.y) predictions_testing = tf.argmax(predictions_testing, 1) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) merged = tf.summary.merge_all() saver = tf.train.Saver() print('Starting session...') with tf.Session() as sess: sess.run(init_op) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) summary_writer = tf.summary.FileWriter( config.OUTPUT_PATH + session_name + '_tested', sess.graph) sessions_helper = Sessions(config=config, session=sess, saver=saver, session_name=session_name, summary_writer=summary_writer, coordinator=coord, threads=threads) sessions_helper.restore() print() true_count = 0 summary = None start_time = time.time() labels = [] predictions = [] for epoch in range(config.TESTING_EPOCHS): for step in range( int(data_set.testing_set.size / config.TESTING_BATCH_SIZE)): summary = sess.run(merged) sys.stdout.write('\r>> Examples tested: {}/{}'.format( step, int(data_set.testing_set.size / config.TESTING_BATCH_SIZE))) sys.stdout.flush() example_image, actual_label, predicted_label, is_correct_result = sess.run( [ data_set.testing_set.x, data_set.testing_set.y, predictions_testing, is_correct ]) true_count += np.sum(is_correct_result) labels.append(actual_label) predictions.append(predicted_label) if is_visualize: visualization.show_example(predicted_label, actual_label, example_image, is_correct_result) summary_writer.add_summary(summary, 1) np_labels = np.array(labels) np_predictions = np.array(predictions) conf_matrix = tf.confusion_matrix( labels=tf.squeeze(np_labels), predictions=tf.squeeze(np_predictions), num_classes=config.NUM_CLASSES) print() c_m = sess.run(conf_matrix) print(c_m) precision = true_count / data_set.testing_set.size print() print('testing completed in %s' % (time.time() - start_time)) print('%s: accuracy @ 1 = %.3f' % (datetime.now(), precision * 100)) sessions_helper.end()
import time from flask_cors import CORS import os from flask import Flask, render_template, request, jsonify #from werkzeug import secure_filename from werkzeug.utils import secure_filename from visualization.visualization import Visualization from forecast_model.prophet import ProphetModel from forecast_model.arima import ArimaModel from anomaly.model import anomaly ano = anomaly() ar = ArimaModel() draw = Visualization() UPLOAD_FOLDER = '/data' app = Flask(__name__) app.secret_key = "secret key" app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 CORS(app) @app.route('/') def homepage(): return render_template('index.html') ALLOWED_EXTENSIONS = set(['csv'])
def visualize(): """Visualize the processed dataset and output the plots into the result/visualization folder""" Visualization().run()