if not utils.get_dict_value(params, 'ignore_negative_data', False): params['num_classes'] = len(params['keywords']) + 1 else: params['num_classes'] = len(params['keywords']) indexer = TextIndexer.from_txt_file(utils.get_dict_value(params, 'vocab_file')) indexer.add_token('<pad>') indexer.add_token('unk') os.makedirs(utils.get_dict_value(params, 'output_location'), exist_ok=True) indexer.save_vocab_as_pkl( os.path.join(utils.get_dict_value(params, 'output_location'), 'vocab.pkl')) shutil.copyfile( param_file, os.path.join(utils.get_dict_value(params, 'output_location'), param_file)) params['vocab_size'] = indexer.vocab_size() training_data = ClassifierData.get_monolingual_training( base_dir=params['monolingual_dir'], indexer=indexer, params=params) def on_checkpoint_saved(trainer, params, save_path): msg = 'saved checkpoint: ' + save_path print(msg) def train_iteration_done(trainer, epoch, index, iteration_count, loss_value, training_done, run_results, params): if iteration_count == 1: trainer._out_file = open( os.path.join(utils.get_dict_value(params, 'output_location'), 'training_log.txt'), 'w') msg = ("%s, %s" % (time(), loss_value))
indexer = TextIndexer.from_txt_file(utils.get_dict_value(params, 'vocab_file'), max_size=utils.get_dict_value(params,'max_vocab_size',-1)) indexer.add_token('<pad>') indexer.add_token('unk') output_indexer = copy.deepcopy(indexer) output_indexer.add_token('<blank>') os.makedirs(utils.get_dict_value(params,'output_location'), exist_ok=True) indexer.save_vocab_as_pkl(os.path.join(utils.get_dict_value(params,'output_location'), 'vocab.pkl')) files_to_copy = [param_file] for file in files_to_copy: shutil.copyfile(file,os.path.join(utils.get_dict_value(params,'output_location'), file)) params['vocab_size'] = indexer.vocab_size() if 'training_data_dir' in params: training_data = ClassifierData.get_training_data(base_dir=params['training_data_dir'], indexer=indexer, params=params, gen_data_fcn = data.gen_data) else: training_data = ClassifierData.get_monolingual_training(base_dir=params['monolingual_dir'], indexer=indexer, params=params, gen_data_fcn = data.gen_data) live_replacement_count_filename = os.path.join(utils.get_dict_value(params,'output_location'), 'live_replacement_count.txt') saved_replacement_count_filename = os.path.join(utils.get_dict_value(params,'output_location'), 'saved_replacement_count.txt') def on_checkpoint_saved(trainer, params, save_path): msg = 'saved checkpoint: ' + save_path print(msg) save_y_count(trainer, saved_replacement_count_filename) def save_y_count(trainer, filename = 'replacement_counts.txt'): with open(filename, 'w') as f:
import framework.utils.common as utils from time import time import numpy as np import os params = utils.load_param_file('output/determinerV3/params.py') vocab_file = os.path.join(utils.get_dict_value(params, 'output_location'), 'vocab.pkl') ckpt = os.path.join(utils.get_dict_value(params, 'output_location'), utils.get_dict_value(params, 'model_name') + '.ckpt') e = Evaluator.load2(ckpt) i = TextIndexer.from_file(vocab_file) test_data = ClassifierData.get_monolingual_test(params=params) model_results = [] timestr = str(int(time())) f = open( os.path.join(utils.get_dict_value(params, 'output_location'), 'heldout_%s.txt' % timestr), 'w') fe = open( os.path.join(utils.get_dict_value(params, 'output_location'), 'heldout_%s_err.txt' % timestr), 'w') fip = open( os.path.join(utils.get_dict_value(params, 'output_location'), 'heldout_%s_err2.txt' % timestr), 'w') f.write('Exec Time\tModel Pick\tModel Score\tGround Truth\tSentence\n') fe.write('Exec Time\tModel Pick\tModel Score\tGround Truth\tSentence\n') no_right = [0, 0, 0, 0]
params['num_classes'] = len(params['keywords']) + 1 release_cmd = 'python3 ../tools/release_model.py %s' % sys.argv[1] shell_call(release_cmd) vocab_file = os.path.join(utils.get_dict_value(params, 'output_location'), 'vocab.pkl') release_dir = os.path.join(utils.get_dict_value(params, 'output_location'), params['model_name']) graphdef_file = os.path.join(release_dir, params['model_name'] + '.graphdef') ckpt = os.path.join(utils.get_dict_value(params, 'output_location'), utils.get_dict_value(params, 'model_name') + '.ckpt') e = Evaluator.load_graphdef(graphdef_file) e.dump_variable_sizes() i = TextIndexer.from_file(vocab_file) test_data = ClassifierData.get_data_from_dirs( ['/mnt/work/training_data/statmt.tokenized/valid'], params=params) #test_data = ClassifierData.get_data(params=params) model_results = [] timestr = str(int(time())) f = open( os.path.join(utils.get_dict_value(params, 'output_location'), 'heldout_%s.txt' % timestr), 'w') fe = open( os.path.join(utils.get_dict_value(params, 'output_location'), 'heldout_%s_err.txt' % timestr), 'w') fip = open( os.path.join(utils.get_dict_value(params, 'output_location'), 'heldout_%s_err2.txt' % timestr), 'w') fscores = open( os.path.join(utils.get_dict_value(params, 'output_location'),
import sys params = utils.load_param_file(sys.argv[1]) vocab_file = os.path.join(utils.get_dict_value(params, 'output_location'), 'vocab.pkl') ckpt = os.path.join(utils.get_dict_value(params, 'output_location'), utils.get_dict_value(params, 'model_name') + '.ckpt') #e = Evaluator.load_graphdef('commaV10.graphdef') e = Evaluator.load2(ckpt) #e.dump_variable_sizes() i = TextIndexer.from_file(vocab_file) test_data = ClassifierData.get_data(params=params, type='valid') model_results = [] timestr = str(int(time())) f = open( os.path.join(utils.get_dict_value(params, 'output_location'), 'heldout_%s.txt' % timestr), 'w') f.write('Exec Time\tModel Score\tGround Truth\tSentence\n') for batch_no in range(10): print("WORKING ON BATCH %s" % batch_no) batch = test_data.next_batch(batch_size=10000) for sentence, ground_truth in zip(batch['sentence'], batch['y']): _, indexed, _, _ = i.index_wordlist(sentence) before_time = time() r = e.eval({'sentence': [indexed]}, {'sm_decision'}) after_time = time()
'its', ['it', "'s"], \ 'lead', 'led', \ 'lose', 'loose', \ 'precede', 'proceed', \ 'passed', 'past', \ 'principal', 'principle', \ 'sell', 'sale', 'site', 'sight', \ 'stationary', 'stationery', \ 'unk', 'a','an','the' ] param_file = 'params.py' params = utils.load_param_file(param_file) params['num_classes'] = len(params['keywords']) + 1 d = ClassifierData.get_monolingual_training( base_dir=params['monolingual_dir'], params=params, gen_data_fcn=gen_data) d.next_batch(10) """ tok1 = tuple([x.lower() for x in tokens[toki:toki+2]]) tok2 = tuple([x.lower() for x in tokens[toki:toki+3]]) if tok2 in keywords: ki = keywords[tok2] results.append(\ (tokens[(toki-num_before):toki]+tokens[(toki+3):(toki+num_after+3)], \ ki + class_offset)) elif tok1 in keywords: ki = keywords[tok1] results.append(\ (tokens[(toki-num_before):toki]+tokens[(toki+2):(toki+num_after+2)], \ ki + class_offset))
vocab_file = os.path.join(utils.get_dict_value(params, 'output_location'), 'vocab.pkl') release_dir = os.path.join(utils.get_dict_value(params, 'output_location'), params['model_name']) graphdef_file = os.path.join(release_dir, params['model_name'] + '.graphdef') ckpt = os.path.join(utils.get_dict_value(params, 'output_location'), utils.get_dict_value(params, 'model_name') + '.ckpt') e = Evaluator.load_graphdef(graphdef_file) e.dump_variable_sizes() i = TextIndexer.from_file(vocab_file) #test_data = ClassifierData.get_data_from_dirs(['/mnt/work/training_data/enron.tokenized/valid'],params=params) #test_data = ClassifierData.get_data_from_dirs(['/mnt/work/training_data/enron.test.tokenized'],params=params) test_data = ClassifierData(file_list=[ '/mnt/work/training_data/oxoml-enron-sentsentences.test.v2.tokenized.txt' ], params=params) model_results = [] timestr = str(int(time())) num_classes = params['num_classes'] no_right = [0] * num_classes no_total = [0] * num_classes no_total_model = [0] * num_classes error_scenario = [] for x in range(num_classes): error_scenario += [[0] * num_classes] topn = 1 last = 0
params['num_classes'] = len(params['keywords']) + 1 indexer = TextIndexer.from_txt_file(utils.get_dict_value(params, 'vocab_file')) indexer.add_token('<pad>') indexer.add_token('unk') print("VOCAB SIZE=%s" % indexer.vocab_size()) os.makedirs(utils.get_dict_value(params, 'output_location'), exist_ok=True) indexer.save_vocab_as_pkl( os.path.join(utils.get_dict_value(params, 'output_location'), 'vocab.pkl')) shutil.copyfile( param_file, os.path.join(utils.get_dict_value(params, 'output_location'), param_file)) params['vocab_size'] = indexer.vocab_size() training_data = ClassifierData.get_monolingual_training( base_dir=params['monolingual_dir'], indexer=indexer, params=params, gen_data_from_file_fcn=_gen_data_from_file, gen_data_fcn=_gen_data) def on_checkpoint_saved(trainer, params, save_path): msg = 'saved checkpoint: ' + save_path print(msg) #print(training_data.next_batch(10)) trainer = Trainer(inference=model.inference, batch_size=utils.get_dict_value(params, 'batch_size', 128), loss=losses.softmax_xentropy, model_output_location=utils.get_dict_value( params, 'output_location'),
indexer.add_token('<pad>') if utils.get_dict_value(params, 'all_lowercase', False): indexer.add_token('<s>') else: indexer.add_token('<s>') indexer.add_token('unk') os.makedirs(utils.get_dict_value(params, 'output_location'), exist_ok=True) indexer.save_vocab_as_pkl( os.path.join(utils.get_dict_value(params, 'output_location'), 'vocab.pkl')) shutil.copyfile( param_file, os.path.join(utils.get_dict_value(params, 'output_location'), param_file)) params['vocab_size'] = indexer.vocab_size() print("VOCAB SIZE: %s" % params['vocab_size']) training_data = ClassifierData.get_data(params, type='train', indexer=indexer) #if 'training_data_dir' in params: # training_data = ClassifierData.get_training_data(base_dir=params['training_data_dir'], indexer=indexer, params=params) #else: # training_data = ClassifierData.get_monolingual_training(base_dir=params['monolingual_dir'], # indexer=indexer, # params=params) def on_checkpoint_saved(trainer, params, save_path): msg = 'saved checkpoint: ' + save_path print(msg) def train_iteration_done(trainer, epoch, index, iteration_count, loss_value, training_done, run_results, params):