def evaluate(experiment_name, step=''): logging.info('*' * 50) logging.info('RUNNING EVALUATION FOR MODEL: %s', experiment_name) if step == '': interesting_checkpoint = tf.train.latest_checkpoint( os.path.join(CURRENT_DIR, '..', 'checkpoint', experiment_name)) else: interesting_checkpoint = os.path.join(CURRENT_DIR, '..', 'checkpoint', experiment_name, 'step-{}'.format(step)) dataset_manager = DatasetManager() dataset_manager.boot() with tf.Graph().as_default() as gr: logging.info('-- Restoring graph for model: %s', interesting_checkpoint) saver = tf.train.import_meta_graph( '{}.meta'.format(interesting_checkpoint)) logging.info('-- Restored graph for model named: %s', interesting_checkpoint) with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)).as_default() as sess: saver.restore(sess=sess, save_path=interesting_checkpoint) logging.info('-- Restored variables for model named: %s', interesting_checkpoint) list_predictions = [] list_labels = [] for docs, labels in dataset_manager.get_test_by_batch( batch_size=FLAGS.BATCH_SIZE): tf_input = gr.get_tensor_by_name('input/tf_input:0') tf_predictions = gr.get_tensor_by_name('prediction:0') prediction = sess.run(tf_predictions, feed_dict={tf_input: docs}) list_predictions.extend(prediction) list_labels.extend(labels) logging.debug('-- Prediction length: %s/%s', len(list_predictions), dataset_manager.test_y.shape[0]) logging.info('-- Report for model: %s', experiment_name) logging.info( classification_report(y_true=list_labels, y_pred=list_predictions, digits=4)) logging.info( confusion_matrix(y_true=list_labels, y_pred=list_predictions))
def predict(list_docs, experiment_name, step='', batch_size=64): logging.info('*' * 50) logging.info('RUNNING PREDICT FOR MODEL: %s', experiment_name) if step == '': interesting_checkpoint = tf.train.latest_checkpoint(os.path.join(CURRENT_DIR, '..', 'checkpoint', experiment_name)) else: interesting_checkpoint = os.path.join(CURRENT_DIR, '..', 'checkpoint', experiment_name, 'step-{}'.format(step)) dataset_manager = DatasetManager() dataset_manager.boot() list_preprocessed_sentences = preprocessor.preprocess(list_docs) list_vecs = dataset_manager.text2vec.doc_to_vec(list_preprocessed_sentences) print(dataset_manager.text2vec.vec_to_doc(list_vecs)) list_vecs = dataset_manager.equalize_vector_length_to_np(list_vectors=list_vecs, max_length=model_v6.SENTENCE_LENGTH_MAX) with tf.Graph().as_default() as gr: logging.info('-- Restoring graph for model: %s', interesting_checkpoint) saver = tf.train.import_meta_graph('{}.meta'.format(interesting_checkpoint)) logging.info('-- Restored graph for model named: %s', interesting_checkpoint) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)).as_default() as sess: saver.restore(sess=sess, save_path=interesting_checkpoint) logging.info('-- Restored variables for model named: %s', interesting_checkpoint) list_predictions = [] num_steps = len(list_vecs) // batch_size logging.info('There will be %s steps', num_steps + 1) for i in range(num_steps + 1): tf_input = gr.get_tensor_by_name('input/tf_input:0') tf_predictions = gr.get_tensor_by_name('prediction:0') prediction = sess.run(tf_predictions, feed_dict={ tf_input: list_vecs[i*batch_size: (i+1)*batch_size] }) list_predictions.extend([dataset_manager.LABEL_UNMAPPING[p] for p in prediction]) return list_predictions
def predict(list_sentences, output_file, experiment_name, step='', list_labels=[]): dataset_manager = DatasetManager() dataset_manager.boot() list_preprocessed_sentences = preprocessor.preprocess(list_sentences) list_vecs = dataset_manager.text2vec.doc_to_vec( list_preprocessed_sentences) list_vecs = dataset_manager.equalize_vector_length_to_np( list_vectors=list_vecs, max_length=model_v1.SENTENCE_LENGTH_MAX) list_labels = dataset_manager.convert_labels_to_np(list_labels) if step == '': interesting_checkpoint = tf.train.latest_checkpoint( os.path.join(CURRENT_DIR, '..', 'checkpoint', experiment_name)) else: interesting_checkpoint = os.path.join(CURRENT_DIR, '..', 'checkpoint', experiment_name, 'step-{}'.format(step)) with tf.Graph().as_default() as gr: logging.info('-- Restoring graph for model: %s', interesting_checkpoint) saver = tf.train.import_meta_graph( '{}.meta'.format(interesting_checkpoint)) logging.info('-- Restored graph for model named: %s', interesting_checkpoint) with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)).as_default() as sess: saver.restore(sess=sess, save_path=interesting_checkpoint) logging.info('-- Restored variables for model named: %s', interesting_checkpoint) tf_input = gr.get_tensor_by_name('input/tf_input:0') tf_predictions = gr.get_tensor_by_name('prediction:0') prediction = sess.run(tf_predictions, feed_dict={tf_input: list_vecs}) if len(list_labels) != 0: logging.info('-- Report for model: %s', experiment_name) logging.info( classification_report(y_true=list_labels, y_pred=prediction)) result_dict = dict() result_dict['sentence'] = list_sentences result_dict['pre-processed'] = list_preprocessed_sentences result_dict[ 'pre-processed_recover'] = dataset_manager.text2vec.vec_to_doc( list_vecs) result_dict['predict'] = prediction if len(list_labels) != 0: result_dict['label'] = list_labels pd.DataFrame(result_dict).to_csv(output_file, index=None) logging.debug('Saved result at %s', output_file)
def run(experiment_name): BEST_THRES = 3 WORST_THRES = 3 POPULATION_STEPS = 500 ITERATIONS = 100 POPULATION_SIZE = 10 accuracy_hist = np.zeros((POPULATION_SIZE, POPULATION_STEPS)) l1_scale_hist = np.zeros((POPULATION_SIZE, POPULATION_STEPS)) best_accuracy_hist = np.zeros((POPULATION_STEPS, )) best_l1_scale_hist = np.zeros((POPULATION_STEPS, )) with tf.Graph().as_default() as gr: with tf.variable_scope('input'): tf_input = tf.placeholder( dtype=tf.int32, shape=[ None, model_population_based_tunning.SENTENCE_LENGTH_MAX ], name='tf_input') tf_labels = tf.placeholder(dtype=tf.int32, shape=[None], name='tf_labels') models = [ create_model( i, is_included_regularization=FLAGS.IS_INCLUDED_REGULARIZATION) for i in range(10) ] # It will help us with creation of different scope_name for each model for index, model in enumerate(models): with tf.variable_scope(str(index)): model.boot(tf_input, tf_labels) logging.info('Graph size: %s', utils.count_trainable_variables()) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=FLAGS.GPU) with tf.Session(config=tf.ConfigProto( gpu_options=gpu_options, allow_soft_placement=True, log_device_placement=FLAGS.LOG_DEVICE_PLACEMENT)).as_default( ) as sess: sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) dataset_manager = DatasetManager() dataset_manager.boot() dataset_generator = dataset_manager.get_batch( batch_size=FLAGS.BATCH_SIZE, number_epochs=10 * FLAGS.NUMBER_EPOCHS) for i in range(POPULATION_STEPS): # Copy best sess.run([ m.get_copy_from_op(models[0]) for m in models[-WORST_THRES:] ]) # Perturb others sess.run([m.l1_scale_perturb_op for m in models[BEST_THRES:]]) # Training for _ in range(ITERATIONS): docs, labels = next(dataset_generator) sess.run([m.tf_optimizer for m in models], feed_dict={ tf_input: docs, tf_labels: labels }) docs, labels = next(dataset_generator) # Evaluate l1_scales = sess.run({m: m.l1_scale for m in models}) accuracies = sess.run({m: m.tf_acc for m in models}, feed_dict={ tf_input: docs, tf_labels: labels }) models.sort(key=lambda m: accuracies[m], reverse=True) # Logging best_accuracy_hist[i] = accuracies[models[0]] best_l1_scale_hist[i] = l1_scales[models[0]] for m in models: l1_scale_hist[m.model_id, i] = l1_scales[m] accuracy_hist[m.model_id, i] = accuracies[m] with open('temp', 'w') as output_f: json.dump( { 'accuracy_hist': accuracy_hist, 'l1_scale_hist': l1_scale_hist, 'best_accuracy_hist': best_accuracy_hist, 'best_l1_scale_hist': best_l1_scale_hist }, output_f)
def run(experiment_name): with tf.Graph().as_default() as gr: with tf.variable_scope('input'): tf_input = tf.placeholder(dtype=tf.int32, shape=[None, model.SENTENCE_LENGTH_MAX], name='tf_input') tf_labels = tf.placeholder(dtype=tf.int32, shape=[None], name='tf_labels') tf_logits = model.inference(tf_input) tf_loss = model.loss(tf_logits, tf_labels) tf_optimizer, tf_global_step = model.optimize(tf_loss) model.measure_acc(tf_logits, tf_labels) tf_all_summary = tf.summary.merge_all() tf_train_writer = tf.summary.FileWriter(logdir=os.path.join( CURRENT_DIR, '..', 'summary', 'train_' + experiment_name), graph=gr) tf_test_writer = tf.summary.FileWriter(logdir=os.path.join( CURRENT_DIR, '..', 'summary', 'test_' + experiment_name), graph=gr) tf_embedding_writer = tf.summary.FileWriter(logdir=os.path.join( CURRENT_DIR, '..', 'checkpoint', experiment_name)) # Visual word embedding config = projector.ProjectorConfig() embedding = config.embeddings.add() embedding.tensor_name = 'embedding/word_embeddings' # Reference model_v6.py embedding.metadata_path = os.path.join(CURRENT_DIR, 'data', DatasetManager.VOCAB_FILE) projector.visualize_embeddings(tf_embedding_writer, config) saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=0.03) logging.info('Graph size: %s', utils.count_trainable_variables()) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=FLAGS.GPU) with tf.Session(config=tf.ConfigProto( gpu_options=gpu_options, allow_soft_placement=True, log_device_placement=FLAGS.LOG_DEVICE_PLACEMENT)).as_default( ) as sess: sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) dataset_manager = DatasetManager() dataset_manager.boot() for docs, labels in dataset_manager.get_batch( batch_size=FLAGS.BATCH_SIZE, number_epochs=FLAGS.NUMBER_EPOCHS): _, global_step = sess.run([tf_optimizer, tf_global_step], feed_dict={ tf_input: docs, tf_labels: labels }) summary_interval_step = 10 if global_step % summary_interval_step == 0: logging.debug('Global step: %s', global_step) train_summary_data = sess.run(tf_all_summary, feed_dict={ tf_input: docs, tf_labels: labels }) tf_train_writer.add_summary(train_summary_data, global_step=global_step) if global_step % summary_interval_step == 0: docs_test, labels_test = dataset_manager.get_test_set( FLAGS.TEST_SIZE, is_shuffled=True) test_summary_data = sess.run(tf_all_summary, feed_dict={ tf_input: docs_test, tf_labels: labels_test }) tf_test_writer.add_summary(test_summary_data, global_step=global_step) if global_step % 200 == 0: path_to_save = os.path.join(CURRENT_DIR, '..', 'checkpoint', experiment_name) if not os.path.exists(path_to_save): os.makedirs(path_to_save) saved_file = saver.save(sess, save_path=os.path.join( path_to_save, 'step'), global_step=global_step, write_meta_graph=True) logging.debug('Saving model at %s', saved_file)