def evaluate(sess, dataset, model, step, max_dev_itr=100, verbose=True, mode='val'): samples_path, history_path = None, None results_dir = model.val_results_dir if mode == 'val'\ else model.test_results_dir samples_path = os.path.join(results_dir, '{}_samples_{}.txt'.format(mode, step)) history_path = os.path.join(results_dir, '{}_history.txt'.format(mode)) avg_val_loss, avg_val_pco = 0.0, 0.0 print("Running Evaluation {}:".format(mode)) tflearn.is_training(False, session=sess) # This is needed to reset the local variables initialized by # TF for calculating streaming Pearson Correlation and MSE sess.run(tf.local_variables_initializer()) all_dev_x1, all_dev_x2, all_dev_sims, all_dev_gt = [], [], [], [] dev_itr = 0 while (dev_itr < max_dev_itr and max_dev_itr != 0) \ or mode in ['test', 'train']: val_batch = dataset.next_batch(FLAGS.batch_size, pad=0) sents_batch = datasets.merge_sentences( val_batch, 2 * model.args["sequence_length"] + 1, FLAGS.batch_size) val_loss, val_pco, val_mse, val_sim = \ model.evaluate_step(sess, sents_batch, val_batch.sim) avg_val_loss += val_mse avg_val_pco += val_pco[0] all_dev_x1 += id2seq(val_batch.s1, dataset.vocab_i2w) all_dev_x2 += id2seq(val_batch.s2, dataset.vocab_i2w) all_dev_sims += val_sim.tolist() all_dev_gt += val_batch.sim dev_itr += 1 if mode == 'test' and dataset.epochs_completed == 1: break if mode == 'train' and dataset.epochs_completed == 1: break result_set = (all_dev_x1, all_dev_x2, all_dev_sims, all_dev_gt) avg_loss = avg_val_loss / dev_itr avg_pco = avg_val_pco / dev_itr if verbose: print("{}:\t Loss: {}\tPco{}".format(mode, avg_loss, avg_pco)) with open(samples_path, 'w') as sf, open(history_path, 'a') as hf: for x1, x2, sim, gt in zip(all_dev_x1, all_dev_x2, all_dev_sims, all_dev_gt): sf.write('{}\t{}\t{}\t{}\n'.format(x1, x2, sim, gt)) hf.write('STEP:{}\tTIME:{}\tPCO:{}\tMSE\t{}\n'.format( step, datetime.datetime.now().isoformat(), avg_pco, avg_loss)) tflearn.is_training(True, session=sess) return avg_loss, avg_pco, result_set
def evaluate(sess, dataset, model, step, max_dev_itr=100, verbose=True, mode='val'): results_dir = model.val_results_dir if mode == 'val' \ else model.test_results_dir samples_path = os.path.join(results_dir, '{}_samples_{}.txt'.format(mode, step)) history_path = os.path.join(results_dir, '{}_history.txt'.format(mode)) avg_val_loss, avg_acc = 0.0, 0.0 print("Running Evaluation {}:".format(mode)) tflearn.is_training(False, session=sess) # This is needed to reset the local variables initialized by # TF for calculating streaming Pearson Correlation and MSE all_dev_text, all_dev_pred, all_dev_gt = [], [], [] dev_itr = 0 while (dev_itr < max_dev_itr and max_dev_itr != 0) \ or mode in ['test', 'train']: val_batch = dataset.next_batch(FLAGS.batch_size, pad=model.args["sequence_length"], one_hot=False, raw=False) cat_targets = [ to_categorical(n, len(dataset.vocab_w2i[2])) for n in val_batch.ner ] loss, pred, acc = model.evaluate_step(sess, val_batch.sentences, val_batch.ner, cat_targets) avg_val_loss += loss avg_acc += acc all_dev_text += id2seq(val_batch.sentences, dataset.vocab_i2w[0]) all_dev_pred += onehot2seq(pred, dataset.vocab_i2w[2]) all_dev_gt += onehot2seq(cat_targets, dataset.vocab_i2w[2]) dev_itr += 1 if mode == 'test' and dataset.epochs_completed == 1: break if mode == 'train' and dataset.epochs_completed == 1: break result_set = (all_dev_text, all_dev_pred, all_dev_gt) avg_loss = avg_val_loss / dev_itr avg_acc = avg_acc / dev_itr if verbose: print("{}:\t Loss: {}".format(mode, avg_loss, avg_acc)) with open(samples_path, 'w') as sf, open(history_path, 'a') as hf: for x1, pred, gt in zip(all_dev_text, all_dev_pred, all_dev_gt): sf.write('{}\t{}\t{}\n'.format(x1, pred, gt)) hf.write('STEP:{}\tTIME:{}\tacc:{}\tLoss\t{}\n'.format( step, datetime.datetime.now().isoformat(), avg_acc, avg_loss)) tflearn.is_training(True, session=sess) return avg_loss, avg_acc, result_set
def evaluate(sess, dataset, model, step, max_dev_itr=100, verbose=True, mode='val'): results_dir = model.val_results_dir if mode == 'val'\ else model.test_results_dir samples_path = os.path.join(results_dir, '{}_samples_{}.txt'.format(mode, step)) history_path = os.path.join(results_dir, '{}_history.txt'.format(mode)) avg_val_loss, sum_accuracy = 0.0, 0.0 print("Running Evaluation {}:".format(mode)) tflearn.is_training(False, session=sess) # This is needed to reset the local variables initialized by sess.run(tf.local_variables_initializer()) all_dev_sentence, all_dev_score, all_dev_gt = [], [], [] dev_itr = 0 while (dev_itr < max_dev_itr and max_dev_itr != 0) \ or mode in ['test', 'train']: val_batch = dataset.next_batch(FLAGS.batch_size, one_hot=True, pad=model.args["sequence_length"]) val_loss, val_accuracy, val_correct_preds, val_ratings = \ model.evaluate_step(sess, val_batch.text, val_batch.ratings) avg_val_loss += val_loss sum_accuracy += np.sum(val_correct_preds) all_dev_sentence += id2seq(val_batch.text, dataset.vocab_i2w) all_dev_score += val_ratings.tolist() all_dev_gt += val_batch.ratings.tolist() dev_itr += 1 if mode == 'test' and dataset.epochs_completed == 1: break if mode == 'train' and dataset.epochs_completed == 1: break result_set = (all_dev_sentence, all_dev_score, all_dev_gt) avg_loss = avg_val_loss / dev_itr avg_accuracy = sum_accuracy / (dev_itr * FLAGS.batch_size) if verbose: print("{}:\t Loss: {}\tAccuracy: {}".format(mode, avg_loss, avg_accuracy)) with open(samples_path, 'w') as sf, open(history_path, 'a') as hf: for sentence, score, gt in zip(all_dev_sentence, all_dev_score, all_dev_gt): sf.write('{}\t{}\t{}\n'.format(sentence, score, gt)) hf.write('STEP:{}\tTIME:{}\tACCURACY:{}\n'.format( step, datetime.datetime.now().isoformat(), avg_accuracy, avg_loss)) tflearn.is_training(True, session=sess) return avg_loss, avg_accuracy, result_set