from framework.utils.data.text_indexer import TextIndexer from word_classifier.data import ClassifierData from framework.evaluator import Evaluator import framework.utils.common as utils from time import time import numpy as np import os params = utils.load_param_file('params.py') vocab_file = os.path.join(utils.get_dict_value(params, 'output_location'), 'vocab.pkl') ckpt = os.path.join(utils.get_dict_value(params, 'output_location'), utils.get_dict_value(params, 'model_name') + '.ckpt') e = Evaluator.load2(ckpt) i = TextIndexer.from_file(vocab_file) test_data = ClassifierData.get_monolingual_test(params=params) model_results = [] timestr = str(int(time())) f = open( os.path.join(utils.get_dict_value(params, 'output_location'), 'heldout_%s.txt' % timestr), 'w') ferr = open( os.path.join(utils.get_dict_value(params, 'output_location'), 'heldout_%s_err.txt' % timestr), 'w') f.write('Exec Time\tModel Score\tGround Truth\tSentence\n') for batch_no in range(4): print("WORKING ON BATCH %s" % batch_no)
def eval(params, save_accuracy_file=True, batch_size=5000, num_batches=20, topn=1, verbose=True): num_before = utils.get_dict_value(params, "num_words_before") num_after = utils.get_dict_value(params, "num_words_after") ckpt = os.path.join(utils.get_dict_value(params,'output_location'), utils.get_dict_value(params, 'model_name') + '.ckpt') accuracy_file = os.path.join(utils.get_dict_value(params,'output_location'), 'accuracy.txt') vocab_file = os.path.join(utils.get_dict_value(params, 'output_location'), 'vocab.pkl') keywords_file = os.path.join(utils.get_dict_value(params, 'output_location'), 'keywords.pkl') e = Evaluator.load2(ckpt) i = TextIndexer.from_file(vocab_file) #test_sentence = "<S> ___ quick brown fox jumped over the lazy dog" test_sentence = "<S> ___ is no way to know whether it will work" #test_sentence = "<S> ___ house is on fire" # test_sentence = "<S> ___ in your best interest to lie" # test_sentence = "<S> ___ yours and I cannot touch it" #test_sentence = "<S> I ate a ___ and an apple" #test_sentence = "<S> I have to take ___ life away" # test_sentence = "<S> ___ may and it is raining" #test_sentence = "<S> This will take ___ before it will actually work" #test_sentence = "<S> this is probably bigger ___ that" # test_sentence = "<S> ___ is no place like home" #test_sentence = "I have ___ of money" #test_sentence = "<S> I think I ___ have it" test_sentence = "<S> don 't forget to get orange , banana , and ___ ." # test_sentence = "<S> in the heat ___ the night" # test_sentence = "<S> in the river , ___ the boat" # test_sentence = "<S> nothing can be ___ from the truth" # test_sentence = "<S> the ___ knot will unwind" # test_sentence = "<S> if you keep playing, you will ___ ." test_sentence = "<s> I ate a ___ of oranges ." # test_sentence = "<s> I ate a ___ and oranges ." # test_sentence = "<s> I live in a ___ ." # test_sentence = "<s> I ate a ___ of oranges ." test_sentence = "<s> I ate a ___ and oranges ." test_sentence = "<s> I live in a ___ ." test_sentence = "<s> I have seen it on him , and can ___ to it ." test_sentence = "<s> the thieves ___ the library and got very little for their pains ." # input data with open('/mnt/work/NeuralRewriting/eval/small_eval_data.json') as f: data = json.load(f) with open(keywords_file, 'rb') as f: k = pickle.load(f) unk_list = [] for q in data: query_word = q['query_word'] orig_sent = q['orig_sent'] options = q['options'] orig_sent = orig_sent.replace(query_word, "___") orig_sent = "<s> " + orig_sent test_sentence = orig_sent.lower() split_sentence = list(split_sentence_for_eval(test_sentence.split(), ["___"], num_before, num_after)) # print(split_sentence[0][0]) _, sentence, _, _ = i.index_wordlist(split_sentence[0][0]) bef = time() r = e.eval({'sentence': [sentence]}, {'sm_decision'}) aft = time() sm = r[0][0] for o in options: synonym = o['synonym'] if synonym not in k: score = -1000 unk_list += [synonym] else: score = math.log(sm[k.index(synonym)]) o['clmtV1'] = score print(score) # save output with open('/mnt/work/NeuralRewriting/eval/small_eval_data_out.json','w') as f: json.dump(data,f) print(len(unk_list)) print(unk_list)
return result params = utils.load_param_file(paramsfile) vocab_file = os.path.join(utils.get_dict_value(params, 'output_location'), 'vocab.pkl') ckpt = os.path.join(utils.get_dict_value(params, 'output_location'), utils.get_dict_value(params, 'model_name') + '.ckpt') gdfile = os.path.join(utils.get_dict_value(params, 'output_location'), "release", utils.get_dict_value(params, 'model_name') + '.graphdef') #e = Evaluator.load2(ckpt) e = Evaluator.load_graphdef(gdfile) i = TextIndexer.from_file(vocab_file) sentence = "Linda owns a catering business in New Orleans She enjoys cooking for special events such as weddings , parties , and holidays " sentence = "Driving home from school , Brett vowed to protect the fragile ecosystem all " \ "the while the tires of his Cadillac Escalade flattened the toads hopping on the wet streets" sentence = sys.argv[1] tokens = sentence.lower().split() mi = generate_model_input_sentences(tokens, params) imi = [] for s in mi: a, indexed, b, c = i.index_wordlist(s) print(indexed) imi.append(indexed) # print(a) # print(b)
poly, _ = polynomial(x, 2, name='p') poly, _ = rename_nodes(poly, ['ybar']) return poly def train_iteration_done(trainer, iteration, loss_value, done, run_results): # debug dump of the variables as we train if iteration % 100 == 0: a0 = [v for v in tf.all_variables() if v.name == 'p/a0:0'] a1 = [v for v in tf.all_variables() if v.name == 'p/a1:0'] a2 = [v for v in tf.all_variables() if v.name == 'p/a2:0'] logging.info([trainer._training_data.current_epoch(), iteration, loss_value, a0[0].eval(), a1[0].eval(), a2[0].eval()]) # # example of how to exit on condition: # if (loss_value < 1): # done = True return done logging.basicConfig(level=logging.INFO) # training code: generate some fake data, create a trainer with the data, run the trainer, and save the model fake_data = training_data.generate_fake_1d_training_data(['x', 'y0']) trainer = Trainer(inference=inference, model_output_location=OUTPUT_DIR, name='quadratic', training_data=fake_data, train_iteration_done=train_iteration_done,batch_size=16) trainer.run(num_epochs=5000,restore_latest_ckpt=False, save_network=False) trainer.save(output_dir=OUTPUT_DIR,pb_filename=PB_FILENAME,ckpt_filename=CKPT_FILENAME) # test evaluation code e = Evaluator.load(model_dir=OUTPUT_DIR,pb_filename=PB_FILENAME,ckpt_filename=CKPT_FILENAME) logging.info(e.eval({'x': 10}, 'ybar'))
def eval(params, save_accuracy_file=True, batch_size=5000, num_batches=20, topn=1, verbose=True): num_before = utils.get_dict_value(params, "num_words_before") num_after = utils.get_dict_value(params, "num_words_after") ckpt = os.path.join(utils.get_dict_value(params,'output_location'), utils.get_dict_value(params, 'model_name') + '.ckpt') accuracy_file = os.path.join(utils.get_dict_value(params,'output_location'), 'accuracy.txt') keywords = params['keywords'] rkeywords = params['id_to_keyword'] vocab_file = os.path.join(utils.get_dict_value(params, 'output_location'), 'vocab.pkl') e = Evaluator.load2(ckpt) i = TextIndexer.from_file(vocab_file) e.dump_variable_sizes() exit(0) test_sentence = "it is better to die happy then to live miserably" # test_sentence = "there going two make a turkey today" # test_sentence = "they 're going two make a turkey today" test_sentence = "to big to fail my ass !" # test_sentence = "two big is not bad" # test_sentence = "<S> two big fishes in the same bucket" # test_sentence = "<S> there are two fishes in the same bucket ." # test_sentence = "<S> there are too fishes in the same bucket ." #test_sentence = "<S> I had two fishes for dinner" test_sentence = "<S> its raining men . hallelujah !" # test_sentence = "it 's head is too big" #test_sentence = "if it 's not one than it 's true" #test_sentence = "i would except it , but i don 't think it 's true" print("HELLO") print(keywords) split_sentence = list(split_sentence_for_eval(test_sentence.split(), keywords, num_before, num_after)) print(split_sentence) for j in range(len(split_sentence)): print(split_sentence[j][0]) _, sentence, _, _ = i.index_wordlist(split_sentence[j][0]) bef = time() r = e.eval({'sentence': [sentence]}, {'sm_decision'}) aft = time() #print(r[0][0]) sm = r[0][0] am = np.argmax(sm) #print(am) k = rkeywords if am == 0: print("DO NOTHING") else: print(k[am - 1]) k = [''] + k sm, k = zip(*sorted(zip(sm, k), reverse=True)) # print(k) for q,(x,y) in enumerate(zip(sm,k)): if q > 10: break print("%0.4f %s" %(x,y)) print(test_sentence) print("EVAL TIME = %s"%(aft-bef))
from framework.evaluator import Evaluator from classifier_data import ClassifierData import framework.utils.common as utils from framework.trainer import Trainer import framework.subgraph.losses as losses import logging import sys import os from framework.utils.data.text_indexer import TextIndexer import tensorflow as tf import numpy as np from time import time e = Evaluator.load2("outputv0.ckpt") i = TextIndexer.from_file('vocab.pkl') e.dump_variable_sizes() params = { 'num_words_before': 5, 'num_words_after': 5, 'embedding_size': 300, 'vocab_size': 100000, 'embedding_device': None, 'batch_size': 128, 'num_classes': 2, 'mini_batches_between_checkpoint': 100, 'monolingual_dir': '/mnt/work/1-billion-word-language-modeling-benchmark' } test_data = ClassifierData.get_monolingual_test(params=params) batch = test_data.next_batch(batch_size=50000)