Esempio n. 1
0
def eval_model():
    if not tf.io.gfile.exists(data_conf.EVAL_DIR):
        tf.io.gfile.makedirs(data_conf.EVAL_DIR)

    util.save_config_values(data_conf, data_conf.EVAL_DIR + "/data_")
    util.save_config_values(model_conf, data_conf.EVAL_DIR + "/model_")

    filepath = data_conf.EVAL_RECORD_PATH + '/*'
    filenames = glob.glob(filepath)

    global_step = tf.compat.v1.train.get_or_create_global_step()
    dataset = tf.data.TFRecordDataset(filenames)
    dataset = dataset.map(get_single_sample)
    batch_size = 1

    dataset = dataset.padded_batch(batch_size,
                                   padded_shapes=([None], [ANSWER_COUNT, None],
                                                  [None], (), [None,
                                                               None], ()))

    iterator = tf.compat.v1.data.make_one_shot_iterator(dataset)

    next_q, next_a, next_l, next_plot_ids, next_plots, next_q_types = iterator.get_next(
    )

    logits, word_atts, sent_atts, pl_d = predict_batch(
        [next_q, next_a, next_plots], training=False)

    next_q_types = tf.reshape(next_q_types, ())

    probabs = model.compute_probabilities(logits=logits)
    loss_example = model.compute_batch_mean_loss(logits, next_l,
                                                 model_conf.LOSS_FUNC)
    accuracy_example = tf.reduce_mean(input_tensor=model.compute_accuracies(
        logits=logits, labels=next_l, dim=1))

    # do not restore embeddings in case the vocabulary size has changed
    #to_restore = tf.contrib.slim.get_variables_to_restore(exclude=["embeddings"])

    saver = tf.compat.v1.train.Saver()
    summary_writer = tf.compat.v1.summary.FileWriter(data_conf.TRAIN_DIR)

    step = 0
    total_acc = 0.0
    total_loss = 0.0
    p_counts = 0
    last_p = ''
    type_counts = np.zeros(6, dtype=np.int32)
    type_accs = np.zeros(6)
    with tf.compat.v1.Session() as sess:
        init_op = tf.group(tf.compat.v1.global_variables_initializer(),
                           tf.compat.v1.local_variables_initializer())
        sess.run(init_op)
        ckpt = tf.train.get_checkpoint_state(data_conf.TRAIN_DIR)
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            print('No checkpoint file found')
        coord = tf.train.Coordinator()
        threads = tf.compat.v1.train.start_queue_runners(sess=sess,
                                                         coord=coord)
        sess.run(set_embeddings_op, feed_dict={place: vectors})
        try:
            while not coord.should_stop():
                loss_val, acc_val, probs_val, gs_val, q_type_val, q_val, labels_val, p_val, a_val, p_id_val, atts_val, \
                sent_atts_val = sess.run([loss_example, accuracy_example, probabs, global_step, next_q_types, next_q,
                                          next_l, next_plots, next_a, next_plot_ids, word_atts, sent_atts])

                total_loss += loss_val
                total_acc += acc_val

                predicted_probabilities = probs_val[0]
                sentence_attentions = sent_atts_val[0]

                pred_index = np.argmax(probs_val[0])
                labels = labels_val[0]
                gold = np.argmax(labels)

                filename = ''
                q_s = ''
                for index in q_val[0]:
                    word = (vocab[index])
                    q_s += (word + ' ')
                    filename += (word + '_')

                filename += "?"

                p_id = str(p_id_val[0].decode("utf-8"))
                path = data_conf.EVAL_DIR + "/plots/" + p_id + "_" + str(
                    step) + "/"  # + filename

                # write attention heat-map
                if (p_id != last_p and p_counts < data_conf.PLOT_SAMPLES_NUM):
                    # if True:
                    for i, a_att in enumerate(atts_val[0]):
                        # a_att = np.mean(a_att, 2)
                        qa_s = q_s + "? (acc: " + str(acc_val) + ")\n "
                        for index in a_val[0][i]:
                            word = vocab[index]
                            qa_s += (word + ' ')
                            filename += word + "_"
                        lv = " (label: " + str(int(
                            labels[i])) + " - prediction: " + (str(
                                "%.2f" %
                                (predicted_probabilities[i] * 100))) + "%)"
                        qa_s += lv

                        a_sents = []
                        y_labels = []

                        for j, att in enumerate(a_att):
                            a_s = []
                            y_labels.append(
                                str("%.2f" % (sent_atts_val[0][i][j] * 100)) +
                                "%")
                            for index in p_val[0][j]:
                                a_s.append(vocab[index])
                            a_sents.append(a_s)
                        util.plot_attention(np.array(a_att), np.array(a_sents),
                                            qa_s, y_labels, path, filename)
                    last_p = p_id
                    p_counts += 1

                print("Sample loss: " + str(loss_val))
                print("Sample labels: " + str(labels))
                print("Sample probabilities: " + str(predicted_probabilities))
                print("Sample acc: " + str(acc_val))

                util.print_predictions(data_conf.EVAL_DIR, step, gold,
                                       predicted_probabilities, data_conf.MODE)
                util.print_sentence_attentions(data_conf.EVAL_DIR, step,
                                               sentence_attentions)

                step += 1

                print("Total acc: " + str(total_acc / step))
                print("Local_step: " + str(step * batch_size))
                print("Global_step: " + str(gs_val))
                print("===========================================")
        except tf.errors.OutOfRangeError:
            summary = tf.compat.v1.Summary()
            summary.value.add(tag='validation_loss',
                              simple_value=total_loss / step)
            summary.value.add(tag='validation_accuracy',
                              simple_value=(total_acc / step))
            summary_writer.add_summary(summary, gs_val)
            keys = util.get_question_keys()
            if data_conf.MODE == "val":
                with open(data_conf.EVAL_DIR + "/val_accuracy.txt",
                          "a") as file:
                    file.write("global step: " + str(gs_val) +
                               " - total accuracy: " + str(total_acc / step) +
                               "- total loss: " + str(total_loss / step) +
                               "\n")
                    file.write("Types (name / count / correct / accuracy):\n")
                    for entry in zip(keys, type_counts, type_accs,
                                     (type_accs / type_counts)):
                        file.write(str(entry) + "\n")
                    file.write(
                        "==================================================================="
                        + "\n")
                    util.save_eval_score("global step: " + str(gs_val) +
                                         " - acc : " + str(total_acc / step) +
                                         " - total loss: " +
                                         str(total_loss / step) + " - " +
                                         data_conf.TRAIN_DIR + "_" +
                                         str(gs_val))
        finally:
            coord.request_stop()
        coord.join(threads)
Esempio n. 2
0
def eval_model(model_type, attack_level, num_modified_words, percentage_attacked_samples):
    print("evaluate")
    print("%s white-box adversarial attack modifies %d words of %d%% of the instances: " % (
        attack_level, num_modified_words, percentage_attacked_samples))

    global model_conf
    if model_type == "lstm":
        import movieqa.conf_lstm as model_conf
    else:
        import movieqa.conf_cnn as model_conf

    if not tf.io.gfile.exists(data_conf.EVAL_DIR):
        tf.io.gfile.makedirs(data_conf.EVAL_DIR)

    util.save_config_values(data_conf, data_conf.TRAIN_DIR + "/data")
    util.save_config_values(model_conf, data_conf.TRAIN_DIR + "/model")

    filepath = data_conf.EVAL_RECORD_PATH + '/*'
    filenames = glob.glob(filepath)
    print("Evaluating adversarial attack on %s" % filenames)

    global_step = tf.contrib.framework.get_or_create_global_step()
    dataset = tf.contrib.data.TFRecordDataset(filenames)
    dataset = dataset.map(get_single_sample)
    batch_size = 1

    dataset = dataset.padded_batch(batch_size, padded_shapes=(
        [None], [ANSWER_COUNT, None], [None], (), [None, None], ()))

    iterator = tf.compat.v1.data.make_one_shot_iterator(dataset)

    next_q, next_a, next_l, next_plot_ids, next_plots, next_q_types = iterator.get_next()

    _, w_atts, s_atts, _ = predict_batch(model_type, [next_q, next_a, next_plots], training=False)

    if attack_level == "sentence":
        m_p = tf.compat.v1.py_func(remove_plot_sentence, [next_plots, s_atts, next_l], [tf.int64])[0]
    elif attack_level == "word":
        m_p = tf.compat.v1.py_func(modify_plot_sentence,
                         [next_plots, w_atts, s_atts, next_l, num_modified_words, percentage_attacked_samples],
                         [tf.int64])[0]

    logits, atts, sent_atts, pl_d = predict_batch(model_type, [next_q, next_a, m_p], training=False)

    next_q_types = tf.reshape(next_q_types, ())

    probabs = model.compute_probabilities(logits=logits)
    loss_example = model.compute_batch_mean_loss(logits, next_l, model_conf.LOSS_FUNC)
    accuracy_example = tf.reduce_mean(input_tensor=model.compute_accuracies(logits=logits, labels=next_l, dim=1))

    to_restore = tf.contrib.slim.get_variables_to_restore(exclude=["embeddings"])
    saver = tf.compat.v1.train.Saver(to_restore)
    summary_writer = tf.compat.v1.summary.FileWriter(data_conf.TRAIN_DIR)

    step = 0
    total_acc = 0.0
    total_prec = 0.0
    total_rank = 0.0
    total_loss = 0.0
    type_counts = np.zeros(6, dtype=np.int32)
    type_accs = np.zeros(6)
    max_sent_atts = {}
    max_atts = {}
    p_counts = 0
    last_p = ''
    with tf.compat.v1.Session() as sess:
        init_op = tf.group(tf.compat.v1.global_variables_initializer(), tf.compat.v1.local_variables_initializer())
        sess.run(init_op)
        ckpt = tf.train.get_checkpoint_state(data_conf.TRAIN_DIR)
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            print('No checkpoint file found')
        _ = sess.run(set_embeddings_op, feed_dict={place: vectors})
        coord = tf.train.Coordinator()
        threads = tf.compat.v1.train.start_queue_runners(sess=sess, coord=coord)
        try:
            while not coord.should_stop():
                loss_val, acc_val, probs_val, gs_val, q_type_val, q_val, atts_val, sent_atts_val, labels_val, p_val, a_val, p_id_val = sess.run(
                    [loss_example, accuracy_example, probabs, global_step, next_q_types, next_q, atts, sent_atts,
                     next_l,
                     pl_d, next_a, next_plot_ids])
                type_accs[q_type_val + 1] += acc_val
                type_counts[q_type_val + 1] += 1

                predicted_probabilities = probs_val[0]
                sentence_attentions = sent_atts_val[0]

                total_loss += loss_val
                total_acc += acc_val

                pred_index = np.argmax(probs_val[0])
                labels = labels_val[0]
                gold = np.argmax(labels)

                filename = ''
                q_s = ''
                for index in q_val[0]:
                    word = (vocab[index])
                    q_s += (word + ' ')
                    filename += (word + '_')

                p_id = str(p_id_val[0].decode("utf-8"))
                path = data_conf.EVAL_DIR + "/plots/" + p_id + "/" + filename

                corr_ans = np.argmax(labels_val[0])

                max_att_val = np.argmax(sent_atts_val[0][corr_ans])

                att_row = np.max(atts_val[0][corr_ans][max_att_val], 1)

                red = np.max(atts_val[0][corr_ans][max_att_val], 1)

                att_inds = np.argsort(red)[::-1]

                if (p_id != last_p and p_counts < 20):
                    for i, a_att in enumerate(atts_val[0]):
                        a_att = np.mean(a_att, 2)
                        qa_s = q_s + "? (acc: " + str(acc_val) + ")\n "
                        for index in a_val[0][i]:
                            qa_s += (vocab[index] + ' ')
                        lv = " (label: " + str(int(labels_val[0][i])) + " - prediction: " + (
                            str("%.2f" % (probs_val[0][i] * 100))) + "%)"
                        qa_s += lv

                        a_sents = []
                        y_labels = []

                        for j, att in enumerate(a_att):
                            a_s = []
                            y_labels.append(str("%.2f" % (sent_atts_val[0][i][j] * 100)) + "%")
                            for index in p_val[0][j]:
                                a_s.append(vocab[index])
                            a_sents.append(a_s)
                    # util.plot_attention(np.array(a_att), np.array(a_sents),qa_s,y_labels,path,filename)
                    last_p = p_id
                    p_counts += 1

                m_ap = util.example_precision(probs_val[0], labels_val[0], 5)
                rank = util.example_rank(probs_val[0], labels_val[0], 5)
                total_prec += m_ap
                total_rank += rank

                print("Sample loss: " + str(loss_val))
                print("Sample acc: " + str(acc_val))
                print("Sample prec: " + str(m_ap))
                print("Sample rank: " + str(rank))

                util.print_predictions(data_conf.EVAL_DIR, step, gold, predicted_probabilities, data_conf.MODE)
                util.print_sentence_attentions(data_conf.EVAL_DIR, step, sentence_attentions)

                step += 1

                print("Total acc: " + str(total_acc / step))
                print("Total prec: " + str(total_prec / step))
                print("Total rank: " + str(total_rank / step))
                print("Local_step: " + str(step * batch_size))
                print("Global_step: " + str(gs_val))
                if attack_level == "word":
                    print("%d modified word(s)" % num_modified_words)
                print("===========================================")
        except tf.errors.OutOfRangeError:

            summary = tf.compat.v1.Summary()
            summary.value.add(tag='validation_loss', simple_value=total_loss / step)
            summary.value.add(tag='validation_accuracy', simple_value=(total_acc / step))
            summary_writer.add_summary(summary, gs_val)
            keys = util.get_question_keys()
            with open(data_conf.EVAL_DIR + "/accuracy.txt", "a") as file:
                file.write("global step: " + str(gs_val) + " - total accuracy: " + str(
                    total_acc / step) + "- total loss: " + str(total_loss / step) + str(num_modified_words) + "" "\n")
                file.write("Types (name / count / correct / accuracy):\n")
                for entry in zip(keys, type_counts, type_accs, (type_accs / type_counts)):
                    file.write(str(entry) + "\n")
                file.write("===================================================================" + "\n")
                util.save_eval_score(
                    "global step: " + str(gs_val) + " - acc : " + str(
                        total_acc / step) + " - total loss: " + str(
                        total_loss / step) + " - " + data_conf.TRAIN_DIR + "_" + str(gs_val))
        finally:
            coord.request_stop()
        coord.join(threads)
Esempio n. 3
0
def eval_model():
    if not tf.gfile.Exists(data_conf.EVAL_DIR):
        tf.gfile.MakeDirs(data_conf.EVAL_DIR)

    util.save_config_values(data_conf, data_conf.EVAL_DIR + "/data")
    util.save_config_values(model_conf, data_conf.EVAL_DIR + "/model")

    filepath = data_conf.EVAL_RECORD_PATH + '/*'
    filenames = glob.glob(filepath)

    print("Evaluate model on %s" % str(filenames))

    global_step = tf.contrib.framework.get_or_create_global_step()
    dataset = tf.contrib.data.TFRecordDataset(filenames)
    dataset = dataset.map(get_single_sample)
    batch_size = 1

    dataset = dataset.padded_batch(batch_size,
                                   padded_shapes=([None], [ANSWER_COUNT, None],
                                                  [None], (), [None], ()))

    iterator = dataset.make_one_shot_iterator()

    next_q, next_a, next_l, next_plot_ids, next_plots, next_q_types = iterator.get_next(
    )

    logits = predict_batch([next_q, next_a, next_plots], training=False)

    next_q_types = tf.reshape(next_q_types, ())

    probabs = model.compute_probabilities(logits=logits)
    loss_example = model.compute_batch_mean_loss(logits, next_l,
                                                 model_conf.LOSS_FUNC)
    accuracy_example = tf.reduce_mean(
        model.compute_accuracies(logits=logits, labels=next_l, dim=1))

    saver = tf.train.Saver()
    summary_writer = tf.summary.FileWriter(data_conf.TRAIN_DIR)

    step = 0
    total_acc = 0.0
    total_loss = 0.0
    type_counts = np.zeros(6, dtype=np.int32)
    type_accs = np.zeros(6)
    with tf.Session() as sess:
        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())
        sess.run(init_op)
        ckpt = tf.train.get_checkpoint_state(data_conf.TRAIN_DIR)
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            print('No checkpoint file found')
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        try:
            while not coord.should_stop():
                loss_val, acc_val, probs_val, gs_val, q_type_val, labels_val = sess.run(
                    [
                        loss_example, accuracy_example, probabs, global_step,
                        next_q_types, next_l
                    ])

                predicted_probabilities = probs_val[0]
                pred_index = np.argmax(probs_val[0])
                labels = labels_val[0]
                gold = np.argmax(labels)

                type_accs[q_type_val + 1] += acc_val
                type_counts[q_type_val + 1] += 1

                total_loss += loss_val
                total_acc += acc_val

                print("Sample loss: " + str(loss_val))
                print("Sample acc: " + str(acc_val))

                util.print_predictions(data_conf.EVAL_DIR, step, gold,
                                       predicted_probabilities, data_conf.MODE)

                step += 1

                print("Total acc: " + str(total_acc / step))
                print("Local_step: " + str(step * batch_size))
                print("Global_step: " + str(gs_val))
                print("===========================================")

        except tf.errors.OutOfRangeError:
            summary = tf.Summary()
            summary.value.add(tag='validation_loss',
                              simple_value=total_loss / step)
            summary.value.add(tag='validation_accuracy',
                              simple_value=(total_acc / step))
            summary_writer.add_summary(summary, gs_val)
            keys = util.get_question_keys()

            if data_conf.MODE == "val":
                with open(data_conf.EVAL_DIR + "/val_accuracy.txt",
                          "a") as file:
                    file.write("global step: " + str(gs_val) +
                               " - total accuracy: " + str(total_acc / step) +
                               "- total loss: " + str(total_loss / step) +
                               "\n")
                    file.write("Types (name / count / correct / accuracy):\n")
                    for entry in zip(keys, type_counts, type_accs,
                                     (type_accs / type_counts)):
                        file.write(str(entry) + "\n")
                    file.write(
                        "==================================================================="
                        + "\n")
                    util.save_eval_score("global step: " + str(gs_val) +
                                         " - acc : " + str(total_acc / step) +
                                         " - total loss: " +
                                         str(total_loss / step) + " - " +
                                         data_conf.TRAIN_DIR + "_" +
                                         str(gs_val))
        finally:
            coord.request_stop()
        coord.join(threads)