Beispiel #1
0
def evaluate(sess,
             dataset,
             model,
             step,
             max_dev_itr=100,
             verbose=True,
             mode='val'):

    samples_path, history_path = None, None
    results_dir = model.val_results_dir if mode == 'val'\
                                        else model.test_results_dir
    samples_path = os.path.join(results_dir,
                                '{}_samples_{}.txt'.format(mode, step))
    history_path = os.path.join(results_dir, '{}_history.txt'.format(mode))

    avg_val_loss, avg_val_pco = 0.0, 0.0
    print("Running Evaluation {}:".format(mode))
    tflearn.is_training(False, session=sess)

    # This is needed to reset the local variables initialized by
    # TF for calculating streaming Pearson Correlation and MSE
    sess.run(tf.local_variables_initializer())
    all_dev_x1, all_dev_x2, all_dev_sims, all_dev_gt = [], [], [], []
    dev_itr = 0
    while (dev_itr < max_dev_itr and max_dev_itr != 0) \
                                    or mode in ['test', 'train']:
        val_batch = dataset.next_batch(FLAGS.batch_size, pad=0)

        sents_batch = datasets.merge_sentences(
            val_batch, 2 * model.args["sequence_length"] + 1, FLAGS.batch_size)

        val_loss, val_pco, val_mse, val_sim = \
            model.evaluate_step(sess, sents_batch, val_batch.sim)
        avg_val_loss += val_mse
        avg_val_pco += val_pco[0]
        all_dev_x1 += id2seq(val_batch.s1, dataset.vocab_i2w)
        all_dev_x2 += id2seq(val_batch.s2, dataset.vocab_i2w)
        all_dev_sims += val_sim.tolist()
        all_dev_gt += val_batch.sim
        dev_itr += 1

        if mode == 'test' and dataset.epochs_completed == 1: break
        if mode == 'train' and dataset.epochs_completed == 1: break

    result_set = (all_dev_x1, all_dev_x2, all_dev_sims, all_dev_gt)
    avg_loss = avg_val_loss / dev_itr
    avg_pco = avg_val_pco / dev_itr
    if verbose:
        print("{}:\t Loss: {}\tPco{}".format(mode, avg_loss, avg_pco))

    with open(samples_path, 'w') as sf, open(history_path, 'a') as hf:
        for x1, x2, sim, gt in zip(all_dev_x1, all_dev_x2, all_dev_sims,
                                   all_dev_gt):
            sf.write('{}\t{}\t{}\t{}\n'.format(x1, x2, sim, gt))
        hf.write('STEP:{}\tTIME:{}\tPCO:{}\tMSE\t{}\n'.format(
            step,
            datetime.datetime.now().isoformat(), avg_pco, avg_loss))
    tflearn.is_training(True, session=sess)
    return avg_loss, avg_pco, result_set
Beispiel #2
0
def evaluate(sess,
             dataset,
             model,
             step,
             max_dev_itr=100,
             verbose=True,
             mode='val'):
    results_dir = model.val_results_dir if mode == 'val' \
        else model.test_results_dir
    samples_path = os.path.join(results_dir,
                                '{}_samples_{}.txt'.format(mode, step))
    history_path = os.path.join(results_dir, '{}_history.txt'.format(mode))

    avg_val_loss, avg_acc = 0.0, 0.0
    print("Running Evaluation {}:".format(mode))
    tflearn.is_training(False, session=sess)

    # This is needed to reset the local variables initialized by
    # TF for calculating streaming Pearson Correlation and MSE
    all_dev_text, all_dev_pred, all_dev_gt = [], [], []
    dev_itr = 0
    while (dev_itr < max_dev_itr and max_dev_itr != 0) \
            or mode in ['test', 'train']:
        val_batch = dataset.next_batch(FLAGS.batch_size,
                                       pad=model.args["sequence_length"],
                                       one_hot=False,
                                       raw=False)
        cat_targets = [
            to_categorical(n, len(dataset.vocab_w2i[2])) for n in val_batch.ner
        ]
        loss, pred, acc = model.evaluate_step(sess, val_batch.sentences,
                                              val_batch.ner, cat_targets)
        avg_val_loss += loss
        avg_acc += acc
        all_dev_text += id2seq(val_batch.sentences, dataset.vocab_i2w[0])
        all_dev_pred += onehot2seq(pred, dataset.vocab_i2w[2])
        all_dev_gt += onehot2seq(cat_targets, dataset.vocab_i2w[2])
        dev_itr += 1

        if mode == 'test' and dataset.epochs_completed == 1: break
        if mode == 'train' and dataset.epochs_completed == 1: break

    result_set = (all_dev_text, all_dev_pred, all_dev_gt)
    avg_loss = avg_val_loss / dev_itr
    avg_acc = avg_acc / dev_itr
    if verbose:
        print("{}:\t Loss: {}".format(mode, avg_loss, avg_acc))

    with open(samples_path, 'w') as sf, open(history_path, 'a') as hf:
        for x1, pred, gt in zip(all_dev_text, all_dev_pred, all_dev_gt):
            sf.write('{}\t{}\t{}\n'.format(x1, pred, gt))
        hf.write('STEP:{}\tTIME:{}\tacc:{}\tLoss\t{}\n'.format(
            step,
            datetime.datetime.now().isoformat(), avg_acc, avg_loss))
    tflearn.is_training(True, session=sess)
    return avg_loss, avg_acc, result_set
def evaluate(sess,
             dataset,
             model,
             step,
             max_dev_itr=100,
             verbose=True,
             mode='val'):

    results_dir = model.val_results_dir if mode == 'val'\
                                        else model.test_results_dir
    samples_path = os.path.join(results_dir,
                                '{}_samples_{}.txt'.format(mode, step))
    history_path = os.path.join(results_dir, '{}_history.txt'.format(mode))

    avg_val_loss, sum_accuracy = 0.0, 0.0
    print("Running Evaluation {}:".format(mode))
    tflearn.is_training(False, session=sess)

    # This is needed to reset the local variables initialized by
    sess.run(tf.local_variables_initializer())
    all_dev_sentence, all_dev_score, all_dev_gt = [], [], []
    dev_itr = 0
    while (dev_itr < max_dev_itr and max_dev_itr != 0) \
                                    or mode in ['test', 'train']:
        val_batch = dataset.next_batch(FLAGS.batch_size,
                                       one_hot=True,
                                       pad=model.args["sequence_length"])
        val_loss, val_accuracy, val_correct_preds, val_ratings = \
            model.evaluate_step(sess, val_batch.text, val_batch.ratings)
        avg_val_loss += val_loss
        sum_accuracy += np.sum(val_correct_preds)
        all_dev_sentence += id2seq(val_batch.text, dataset.vocab_i2w)
        all_dev_score += val_ratings.tolist()
        all_dev_gt += val_batch.ratings.tolist()
        dev_itr += 1

        if mode == 'test' and dataset.epochs_completed == 1: break
        if mode == 'train' and dataset.epochs_completed == 1: break

    result_set = (all_dev_sentence, all_dev_score, all_dev_gt)
    avg_loss = avg_val_loss / dev_itr
    avg_accuracy = sum_accuracy / (dev_itr * FLAGS.batch_size)
    if verbose:
        print("{}:\t Loss: {}\tAccuracy: {}".format(mode, avg_loss,
                                                    avg_accuracy))

    with open(samples_path, 'w') as sf, open(history_path, 'a') as hf:
        for sentence, score, gt in zip(all_dev_sentence, all_dev_score,
                                       all_dev_gt):
            sf.write('{}\t{}\t{}\n'.format(sentence, score, gt))
        hf.write('STEP:{}\tTIME:{}\tACCURACY:{}\n'.format(
            step,
            datetime.datetime.now().isoformat(), avg_accuracy, avg_loss))
    tflearn.is_training(True, session=sess)
    return avg_loss, avg_accuracy, result_set