def eval_model(): if not tf.io.gfile.exists(data_conf.EVAL_DIR): tf.io.gfile.makedirs(data_conf.EVAL_DIR) util.save_config_values(data_conf, data_conf.EVAL_DIR + "/data_") util.save_config_values(model_conf, data_conf.EVAL_DIR + "/model_") filepath = data_conf.EVAL_RECORD_PATH + '/*' filenames = glob.glob(filepath) global_step = tf.compat.v1.train.get_or_create_global_step() dataset = tf.data.TFRecordDataset(filenames) dataset = dataset.map(get_single_sample) batch_size = 1 dataset = dataset.padded_batch(batch_size, padded_shapes=([None], [ANSWER_COUNT, None], [None], (), [None, None], ())) iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) next_q, next_a, next_l, next_plot_ids, next_plots, next_q_types = iterator.get_next( ) logits, word_atts, sent_atts, pl_d = predict_batch( [next_q, next_a, next_plots], training=False) next_q_types = tf.reshape(next_q_types, ()) probabs = model.compute_probabilities(logits=logits) loss_example = model.compute_batch_mean_loss(logits, next_l, model_conf.LOSS_FUNC) accuracy_example = tf.reduce_mean(input_tensor=model.compute_accuracies( logits=logits, labels=next_l, dim=1)) # do not restore embeddings in case the vocabulary size has changed #to_restore = tf.contrib.slim.get_variables_to_restore(exclude=["embeddings"]) saver = tf.compat.v1.train.Saver() summary_writer = tf.compat.v1.summary.FileWriter(data_conf.TRAIN_DIR) step = 0 total_acc = 0.0 total_loss = 0.0 p_counts = 0 last_p = '' type_counts = np.zeros(6, dtype=np.int32) type_accs = np.zeros(6) with tf.compat.v1.Session() as sess: init_op = tf.group(tf.compat.v1.global_variables_initializer(), tf.compat.v1.local_variables_initializer()) sess.run(init_op) ckpt = tf.train.get_checkpoint_state(data_conf.TRAIN_DIR) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) else: print('No checkpoint file found') coord = tf.train.Coordinator() threads = tf.compat.v1.train.start_queue_runners(sess=sess, coord=coord) sess.run(set_embeddings_op, feed_dict={place: vectors}) try: while not coord.should_stop(): loss_val, acc_val, probs_val, gs_val, q_type_val, q_val, labels_val, p_val, a_val, p_id_val, atts_val, \ sent_atts_val = sess.run([loss_example, accuracy_example, probabs, global_step, next_q_types, next_q, next_l, next_plots, next_a, next_plot_ids, word_atts, sent_atts]) total_loss += loss_val total_acc += acc_val predicted_probabilities = probs_val[0] sentence_attentions = sent_atts_val[0] pred_index = np.argmax(probs_val[0]) labels = labels_val[0] gold = np.argmax(labels) filename = '' q_s = '' for index in q_val[0]: word = (vocab[index]) q_s += (word + ' ') filename += (word + '_') filename += "?" p_id = str(p_id_val[0].decode("utf-8")) path = data_conf.EVAL_DIR + "/plots/" + p_id + "_" + str( step) + "/" # + filename # write attention heat-map if (p_id != last_p and p_counts < data_conf.PLOT_SAMPLES_NUM): # if True: for i, a_att in enumerate(atts_val[0]): # a_att = np.mean(a_att, 2) qa_s = q_s + "? (acc: " + str(acc_val) + ")\n " for index in a_val[0][i]: word = vocab[index] qa_s += (word + ' ') filename += word + "_" lv = " (label: " + str(int( labels[i])) + " - prediction: " + (str( "%.2f" % (predicted_probabilities[i] * 100))) + "%)" qa_s += lv a_sents = [] y_labels = [] for j, att in enumerate(a_att): a_s = [] y_labels.append( str("%.2f" % (sent_atts_val[0][i][j] * 100)) + "%") for index in p_val[0][j]: a_s.append(vocab[index]) a_sents.append(a_s) util.plot_attention(np.array(a_att), np.array(a_sents), qa_s, y_labels, path, filename) last_p = p_id p_counts += 1 print("Sample loss: " + str(loss_val)) print("Sample labels: " + str(labels)) print("Sample probabilities: " + str(predicted_probabilities)) print("Sample acc: " + str(acc_val)) util.print_predictions(data_conf.EVAL_DIR, step, gold, predicted_probabilities, data_conf.MODE) util.print_sentence_attentions(data_conf.EVAL_DIR, step, sentence_attentions) step += 1 print("Total acc: " + str(total_acc / step)) print("Local_step: " + str(step * batch_size)) print("Global_step: " + str(gs_val)) print("===========================================") except tf.errors.OutOfRangeError: summary = tf.compat.v1.Summary() summary.value.add(tag='validation_loss', simple_value=total_loss / step) summary.value.add(tag='validation_accuracy', simple_value=(total_acc / step)) summary_writer.add_summary(summary, gs_val) keys = util.get_question_keys() if data_conf.MODE == "val": with open(data_conf.EVAL_DIR + "/val_accuracy.txt", "a") as file: file.write("global step: " + str(gs_val) + " - total accuracy: " + str(total_acc / step) + "- total loss: " + str(total_loss / step) + "\n") file.write("Types (name / count / correct / accuracy):\n") for entry in zip(keys, type_counts, type_accs, (type_accs / type_counts)): file.write(str(entry) + "\n") file.write( "===================================================================" + "\n") util.save_eval_score("global step: " + str(gs_val) + " - acc : " + str(total_acc / step) + " - total loss: " + str(total_loss / step) + " - " + data_conf.TRAIN_DIR + "_" + str(gs_val)) finally: coord.request_stop() coord.join(threads)
def eval_model(model_type, attack_level, num_modified_words, percentage_attacked_samples): print("evaluate") print("%s white-box adversarial attack modifies %d words of %d%% of the instances: " % ( attack_level, num_modified_words, percentage_attacked_samples)) global model_conf if model_type == "lstm": import movieqa.conf_lstm as model_conf else: import movieqa.conf_cnn as model_conf if not tf.io.gfile.exists(data_conf.EVAL_DIR): tf.io.gfile.makedirs(data_conf.EVAL_DIR) util.save_config_values(data_conf, data_conf.TRAIN_DIR + "/data") util.save_config_values(model_conf, data_conf.TRAIN_DIR + "/model") filepath = data_conf.EVAL_RECORD_PATH + '/*' filenames = glob.glob(filepath) print("Evaluating adversarial attack on %s" % filenames) global_step = tf.contrib.framework.get_or_create_global_step() dataset = tf.contrib.data.TFRecordDataset(filenames) dataset = dataset.map(get_single_sample) batch_size = 1 dataset = dataset.padded_batch(batch_size, padded_shapes=( [None], [ANSWER_COUNT, None], [None], (), [None, None], ())) iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) next_q, next_a, next_l, next_plot_ids, next_plots, next_q_types = iterator.get_next() _, w_atts, s_atts, _ = predict_batch(model_type, [next_q, next_a, next_plots], training=False) if attack_level == "sentence": m_p = tf.compat.v1.py_func(remove_plot_sentence, [next_plots, s_atts, next_l], [tf.int64])[0] elif attack_level == "word": m_p = tf.compat.v1.py_func(modify_plot_sentence, [next_plots, w_atts, s_atts, next_l, num_modified_words, percentage_attacked_samples], [tf.int64])[0] logits, atts, sent_atts, pl_d = predict_batch(model_type, [next_q, next_a, m_p], training=False) next_q_types = tf.reshape(next_q_types, ()) probabs = model.compute_probabilities(logits=logits) loss_example = model.compute_batch_mean_loss(logits, next_l, model_conf.LOSS_FUNC) accuracy_example = tf.reduce_mean(input_tensor=model.compute_accuracies(logits=logits, labels=next_l, dim=1)) to_restore = tf.contrib.slim.get_variables_to_restore(exclude=["embeddings"]) saver = tf.compat.v1.train.Saver(to_restore) summary_writer = tf.compat.v1.summary.FileWriter(data_conf.TRAIN_DIR) step = 0 total_acc = 0.0 total_prec = 0.0 total_rank = 0.0 total_loss = 0.0 type_counts = np.zeros(6, dtype=np.int32) type_accs = np.zeros(6) max_sent_atts = {} max_atts = {} p_counts = 0 last_p = '' with tf.compat.v1.Session() as sess: init_op = tf.group(tf.compat.v1.global_variables_initializer(), tf.compat.v1.local_variables_initializer()) sess.run(init_op) ckpt = tf.train.get_checkpoint_state(data_conf.TRAIN_DIR) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) else: print('No checkpoint file found') _ = sess.run(set_embeddings_op, feed_dict={place: vectors}) coord = tf.train.Coordinator() threads = tf.compat.v1.train.start_queue_runners(sess=sess, coord=coord) try: while not coord.should_stop(): loss_val, acc_val, probs_val, gs_val, q_type_val, q_val, atts_val, sent_atts_val, labels_val, p_val, a_val, p_id_val = sess.run( [loss_example, accuracy_example, probabs, global_step, next_q_types, next_q, atts, sent_atts, next_l, pl_d, next_a, next_plot_ids]) type_accs[q_type_val + 1] += acc_val type_counts[q_type_val + 1] += 1 predicted_probabilities = probs_val[0] sentence_attentions = sent_atts_val[0] total_loss += loss_val total_acc += acc_val pred_index = np.argmax(probs_val[0]) labels = labels_val[0] gold = np.argmax(labels) filename = '' q_s = '' for index in q_val[0]: word = (vocab[index]) q_s += (word + ' ') filename += (word + '_') p_id = str(p_id_val[0].decode("utf-8")) path = data_conf.EVAL_DIR + "/plots/" + p_id + "/" + filename corr_ans = np.argmax(labels_val[0]) max_att_val = np.argmax(sent_atts_val[0][corr_ans]) att_row = np.max(atts_val[0][corr_ans][max_att_val], 1) red = np.max(atts_val[0][corr_ans][max_att_val], 1) att_inds = np.argsort(red)[::-1] if (p_id != last_p and p_counts < 20): for i, a_att in enumerate(atts_val[0]): a_att = np.mean(a_att, 2) qa_s = q_s + "? (acc: " + str(acc_val) + ")\n " for index in a_val[0][i]: qa_s += (vocab[index] + ' ') lv = " (label: " + str(int(labels_val[0][i])) + " - prediction: " + ( str("%.2f" % (probs_val[0][i] * 100))) + "%)" qa_s += lv a_sents = [] y_labels = [] for j, att in enumerate(a_att): a_s = [] y_labels.append(str("%.2f" % (sent_atts_val[0][i][j] * 100)) + "%") for index in p_val[0][j]: a_s.append(vocab[index]) a_sents.append(a_s) # util.plot_attention(np.array(a_att), np.array(a_sents),qa_s,y_labels,path,filename) last_p = p_id p_counts += 1 m_ap = util.example_precision(probs_val[0], labels_val[0], 5) rank = util.example_rank(probs_val[0], labels_val[0], 5) total_prec += m_ap total_rank += rank print("Sample loss: " + str(loss_val)) print("Sample acc: " + str(acc_val)) print("Sample prec: " + str(m_ap)) print("Sample rank: " + str(rank)) util.print_predictions(data_conf.EVAL_DIR, step, gold, predicted_probabilities, data_conf.MODE) util.print_sentence_attentions(data_conf.EVAL_DIR, step, sentence_attentions) step += 1 print("Total acc: " + str(total_acc / step)) print("Total prec: " + str(total_prec / step)) print("Total rank: " + str(total_rank / step)) print("Local_step: " + str(step * batch_size)) print("Global_step: " + str(gs_val)) if attack_level == "word": print("%d modified word(s)" % num_modified_words) print("===========================================") except tf.errors.OutOfRangeError: summary = tf.compat.v1.Summary() summary.value.add(tag='validation_loss', simple_value=total_loss / step) summary.value.add(tag='validation_accuracy', simple_value=(total_acc / step)) summary_writer.add_summary(summary, gs_val) keys = util.get_question_keys() with open(data_conf.EVAL_DIR + "/accuracy.txt", "a") as file: file.write("global step: " + str(gs_val) + " - total accuracy: " + str( total_acc / step) + "- total loss: " + str(total_loss / step) + str(num_modified_words) + "" "\n") file.write("Types (name / count / correct / accuracy):\n") for entry in zip(keys, type_counts, type_accs, (type_accs / type_counts)): file.write(str(entry) + "\n") file.write("===================================================================" + "\n") util.save_eval_score( "global step: " + str(gs_val) + " - acc : " + str( total_acc / step) + " - total loss: " + str( total_loss / step) + " - " + data_conf.TRAIN_DIR + "_" + str(gs_val)) finally: coord.request_stop() coord.join(threads)
def eval_model(): if not tf.gfile.Exists(data_conf.EVAL_DIR): tf.gfile.MakeDirs(data_conf.EVAL_DIR) util.save_config_values(data_conf, data_conf.EVAL_DIR + "/data") util.save_config_values(model_conf, data_conf.EVAL_DIR + "/model") filepath = data_conf.EVAL_RECORD_PATH + '/*' filenames = glob.glob(filepath) print("Evaluate model on %s" % str(filenames)) global_step = tf.contrib.framework.get_or_create_global_step() dataset = tf.contrib.data.TFRecordDataset(filenames) dataset = dataset.map(get_single_sample) batch_size = 1 dataset = dataset.padded_batch(batch_size, padded_shapes=([None], [ANSWER_COUNT, None], [None], (), [None], ())) iterator = dataset.make_one_shot_iterator() next_q, next_a, next_l, next_plot_ids, next_plots, next_q_types = iterator.get_next( ) logits = predict_batch([next_q, next_a, next_plots], training=False) next_q_types = tf.reshape(next_q_types, ()) probabs = model.compute_probabilities(logits=logits) loss_example = model.compute_batch_mean_loss(logits, next_l, model_conf.LOSS_FUNC) accuracy_example = tf.reduce_mean( model.compute_accuracies(logits=logits, labels=next_l, dim=1)) saver = tf.train.Saver() summary_writer = tf.summary.FileWriter(data_conf.TRAIN_DIR) step = 0 total_acc = 0.0 total_loss = 0.0 type_counts = np.zeros(6, dtype=np.int32) type_accs = np.zeros(6) with tf.Session() as sess: init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess.run(init_op) ckpt = tf.train.get_checkpoint_state(data_conf.TRAIN_DIR) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) else: print('No checkpoint file found') coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: while not coord.should_stop(): loss_val, acc_val, probs_val, gs_val, q_type_val, labels_val = sess.run( [ loss_example, accuracy_example, probabs, global_step, next_q_types, next_l ]) predicted_probabilities = probs_val[0] pred_index = np.argmax(probs_val[0]) labels = labels_val[0] gold = np.argmax(labels) type_accs[q_type_val + 1] += acc_val type_counts[q_type_val + 1] += 1 total_loss += loss_val total_acc += acc_val print("Sample loss: " + str(loss_val)) print("Sample acc: " + str(acc_val)) util.print_predictions(data_conf.EVAL_DIR, step, gold, predicted_probabilities, data_conf.MODE) step += 1 print("Total acc: " + str(total_acc / step)) print("Local_step: " + str(step * batch_size)) print("Global_step: " + str(gs_val)) print("===========================================") except tf.errors.OutOfRangeError: summary = tf.Summary() summary.value.add(tag='validation_loss', simple_value=total_loss / step) summary.value.add(tag='validation_accuracy', simple_value=(total_acc / step)) summary_writer.add_summary(summary, gs_val) keys = util.get_question_keys() if data_conf.MODE == "val": with open(data_conf.EVAL_DIR + "/val_accuracy.txt", "a") as file: file.write("global step: " + str(gs_val) + " - total accuracy: " + str(total_acc / step) + "- total loss: " + str(total_loss / step) + "\n") file.write("Types (name / count / correct / accuracy):\n") for entry in zip(keys, type_counts, type_accs, (type_accs / type_counts)): file.write(str(entry) + "\n") file.write( "===================================================================" + "\n") util.save_eval_score("global step: " + str(gs_val) + " - acc : " + str(total_acc / step) + " - total loss: " + str(total_loss / step) + " - " + data_conf.TRAIN_DIR + "_" + str(gs_val)) finally: coord.request_stop() coord.join(threads)