Esempio n. 1
0
	def load(self, model_dir):
		self._model_dir = model_dir
		self._paramsfile = os.path.join(self._model_dir, 'params.py')
		self._params = utils.load_param_file(self._paramsfile)
		ckpt = os.path.join(utils.get_dict_value(self._params,'output_location'),
												utils.get_dict_value(self._params, 'model_name') + '.ckpt')
		self._e = Evaluator.load2(ckpt)
Esempio n. 2
0
def main(argv):
    try:
        argv = FLAGS(argv)  # parse flags
    except gflags.FlagsError as e:
        print('%s\\nUsage: %s ARGS\\n%s' % (e, sys.argv[0], FLAGS))
        sys.exit(1)

    params = utils.load_param_file(FLAGS.paramsfile)
    model_dirname = os.path.dirname(FLAGS.paramsfile)
    src_dir = os.path.abspath(os.path.join(model_dirname, '..'))
    target_dir = FLAGS.target_dir
    model_name = params['model_name']
    release_dir_name = FLAGS.release_dir_name
    if release_dir_name == "":
        release_dir_name = model_name
    release_num, dir_name, target_model_dir = copy2repo(
        src_dir, target_dir, model_name, release_dir_name)
    if release_num == -1:
        print("source dir %s doesn't exist" % dir_name)
    elif release_num == -2:
        print("release source dir %s doesn't exist" % dir_name)
    elif release_num == -3:
        print("target dir %s doesn't exist" % dir_name)
    else:
        add_release_num_to_json(
            os.path.join(target_model_dir, release_dir_name, 'params.json'),
            release_num)
        add_release_num_to_json(
            os.path.join(target_model_dir,
                         '%s.%s' % (release_dir_name, release_num),
                         'params.json'), release_num)
        print("SUCCESSFULLY COPY MODEL %s.%s TO REPO" %
              (model_name, release_num))
Esempio n. 3
0
def main(argv):
    try:
        argv = FLAGS(argv)  # parse flags
    except gflags.FlagsError as e:
        print('%s\\nUsage: %s ARGS\\n%s' % (e, sys.argv[0], FLAGS))
        sys.exit(1)
    print(FLAGS.paramsfile)
    params = utils.load_param_file(FLAGS.paramsfile)
    eval(params,
         save_accuracy_file=False,
         batch_size=1,
         num_batches=200,
         topn=5)
Esempio n. 4
0
	def load(self, model_dir):
		self._model_dir = model_dir
		self._paramsfile = os.path.join(self._model_dir, 'params.py')
		self._params = utils.load_param_file(self._paramsfile)
		self._num_before = utils.get_dict_value(self._params, "num_words_before")
		self._num_after = utils.get_dict_value(self._params, "num_words_after")
		ckpt = os.path.join(utils.get_dict_value(self._params,'output_location'),
												utils.get_dict_value(self._params, 'model_name') + '.ckpt')
		vocab_file = os.path.join(utils.get_dict_value(self._params, 'output_location'), 'vocab.pkl')
		self._e = Evaluator.load2(ckpt)
		self._i = TextIndexer.from_file(vocab_file)
		self._keywords = self._params['keywords']
		self._id_to_word = self._params['id_to_keyword']
Esempio n. 5
0
def params2json(params_file, json_filename):
	params = utils.load_param_file(params_file)
	with open(json_filename, 'w') as fo:
		ignore_list = []
		for f in params:
			if not (isinstance(params[f], list) or isinstance(params[f], dict) or
								isinstance(params[f], int) or isinstance(params[f], str)
							or isinstance(params[f], float)
							):
				ignore_list.append(f);
		for f in ignore_list:
			print("IGNORING %s" % f)
			del params[f]
		json.dump(params, fo)
Esempio n. 6
0
def main(argv):
    params_files_list = [
        'output/tellmeV12/params.py', 'output/tellmeV15/params.py',
        'output/tellmeV17/params.py', 'output/tellmeV18/params.py',
        'output/tellmeV19/params.py'
    ]

    params = []
    data = []
    for paramsfile in params_files_list:
        params.append(utils.load_param_file(paramsfile))
        data.append(load_results(params[-1]))

    fig, ax = plt.subplots()

    for cdata in data:
        ax.plot([(x * 8192) / 1000000 for x in cdata[1]], cdata[8])

    ax.set(xlabel='Million Records Seen',
           ylabel='Accuracy @ 1',
           title=','.join([x['model_name'] for x in params]))
    ax.grid()

    max_value = .82  #np.max(data[8])
    #plt.ylim((.75,math.ceil(max_value*10)/10))
    plt.ylim((.75, .81))
    #plt.ylim((.75,1))
    fig.savefig("accuracy_compare.png")
    plt.show(block=False)

    fig, ax = plt.subplots()
    for cdata in data:
        ax.plot([(x * 8192) / 1000000 for x in cdata[1]], cdata[3])

    ax.set(xlabel='Million Records Seen',
           ylabel='Loss',
           title=','.join([x['model_name'] for x in params]))
    ax.grid()

    min_value = 0  #np.min(data[3])
    plt.ylim((math.floor(min_value * 10) / 10, 1))
    #plt.ylim((.75,1))
    fig.savefig("loss_compare.png")
    plt.show(block=False)

    input("Press enter to exit...")
Esempio n. 7
0
	def load(self, model_dir):
		self._model_dir = model_dir
		self._paramsfile = os.path.join(self._model_dir, 'params.py')
		self._params = utils.load_param_file(self._paramsfile)
		self._num_before = utils.get_dict_value(self._params, "num_words_before")
		self._num_after = utils.get_dict_value(self._params, "num_words_after")
		ckpt = os.path.join(utils.get_dict_value(self._params,'output_location'),
												utils.get_dict_value(self._params, 'model_name') + '.ckpt')
		vocab_file = os.path.join(utils.get_dict_value(self._params, 'output_location'), 'vocab.pkl')
		self._e = Evaluator.load2(ckpt)
		self._i = TextIndexer.from_file(vocab_file)
		with open(os.path.join(
				utils.get_dict_value(self._params, 'output_location'),
				'keywords.pkl'), 'rb') as f:
			keywords = pickle.load(f)
		self._params['keywords'] = keywords
		self._keywords = self._params['keywords']
		self._keyword_map, self._keyword_list = gen_keywords(self._params)
Esempio n. 8
0
def main(argv):
    try:
        argv = FLAGS(argv)  # parse flags
    except gflags.FlagsError as e:
        print('%s\\nUsage: %s ARGS\\n%s' % (e, sys.argv[0], FLAGS))
        sys.exit(1)
    print(FLAGS.paramsfile)
    params = utils.load_param_file(FLAGS.paramsfile)
    data = load_results(params)
    fig, ax = plt.subplots()
    ax.plot([(x * 8192) / 1000000 for x in data[1]], data[8])

    ax.set(xlabel='Million Records Seen',
           ylabel='Accuracy @ 1',
           title=params['model_name'])
    ax.grid()

    max_value = np.max(data[8])
    #	plt.ylim((.75,math.ceil(max_value*10)/10))
    #plt.ylim((.75,1))
    fig.savefig(
        os.path.join(utils.get_dict_value(params, 'output_location'),
                     "accuracy.png"))
    plt.show(block=False)

    fig, ax = plt.subplots()
    ax.plot([(x * 8192) / 1000000 for x in data[1]], data[3])

    ax.set(xlabel='Million Records Seen',
           ylabel='Loss',
           title=params['model_name'])
    ax.grid()

    min_value = np.min(data[3])
    #	plt.ylim((math.floor(min_value*10)/10,1))
    #plt.ylim((.75,1))
    fig.savefig(
        os.path.join(utils.get_dict_value(params, 'output_location'),
                     "loss.png"))
    plt.show(block=False)

    input("Press enter to exit...")
Esempio n. 9
0
from framework.utils.data.text_indexer import TextIndexer
from word_classifier.data import ClassifierData
import framework.subgraph.losses as losses
import framework.utils.common as utils
import data
from framework.trainer import Trainer, _default_train_iteration_done
from time import time
import pickle
import model
import os
import shutil
import copy
import numpy as np

param_file = 'params.py'
params = utils.load_param_file(param_file)
params['num_classes'] = len(params['keywords'])+1
indexer = TextIndexer.from_txt_file(utils.get_dict_value(params, 'vocab_file'), max_size=utils.get_dict_value(params,'max_vocab_size',-1))
indexer.add_token('<pad>')
indexer.add_token('unk')
output_indexer = copy.deepcopy(indexer)
output_indexer.add_token('<blank>')
os.makedirs(utils.get_dict_value(params,'output_location'), exist_ok=True)
indexer.save_vocab_as_pkl(os.path.join(utils.get_dict_value(params,'output_location'), 'vocab.pkl'))

files_to_copy = [param_file]
for file in files_to_copy:
	shutil.copyfile(file,os.path.join(utils.get_dict_value(params,'output_location'), file))

params['vocab_size'] = indexer.vocab_size()
Esempio n. 10
0
from framework.utils.data.text_indexer import TextIndexer
from framework.evaluator import Evaluator
import framework.utils.common as utils
import os
import numpy as np
run_server = False

params = utils.load_param_file('params.py')

vocab_file = os.path.join(utils.get_dict_value(params, 'output_location'),
                          'vocab.pkl')
ckpt = os.path.join(utils.get_dict_value(params, 'output_location'),
                    utils.get_dict_value(params, 'model_name') + '.ckpt')

sentences = ['The apple , which is rotten is not edible']

e = Evaluator.load2(ckpt)
i = TextIndexer.from_file(vocab_file)

num_before = utils.get_dict_value(params, "num_words_before")
num_after = utils.get_dict_value(params, "num_words_after")
pad_tok = utils.get_dict_value(params, "pad_tok", '<pad>')

sentence = "In simple terms , high precision means that an algorithm " \
   "returned substantially more relevant results than irrelevant ones , while" \
   " high recall means that an algorithm returned most of the relevant results ."

sentence = "<S> In simple terms , high precision means that algorithm " \
   "returned substantially more relevant results than irrelevant ones , while" \
   " high recall means that algorithm returned most of relevant results ."
#sentence = "<S> Precision can be seen as measure of exactness or quality , "\
Esempio n. 11
0
    locations = []
    probs = []
    for candidate_idx, candidate in enumerate(candidates):
        _, indexed, _, _ = i.index_wordlist(candidate)
        r = e.eval({'sentence': [indexed]}, {'sm_decision'})
        print('%s %s' % (candidate[int(len(candidate) / 2)], r[0][0][1]))
        #		print(candidate)
        #		print(r)
        if r[0][0][1] > thres:
            locations.append(candidate_idx)
            probs.append(r[0][0][1])
    print(probs)
    return locations


params = utils.load_param_file('params.10_10.py')

vocab_file = os.path.join(utils.get_dict_value(params, 'output_location'),
                          'vocab.pkl')
ckpt = os.path.join(utils.get_dict_value(params, 'output_location'),
                    utils.get_dict_value(params, 'model_name') + '.ckpt')

sentences = [
    'To evaluate if the Transformer can generalize to other tasks we performed experiments on English constituency parsing',
    'We also trained it in a semi-supervised setting , using the larger high-confidence and BerkleyParser corpora from with approximately 17M sentences',
    'We used a vocabulary of 16K tokens for the WSJ only setting and a vocabulary of 32K tokens for the semi-supervised setting',
    'Our results in Table 4 show that despite the lack of task-specific tuning our model performs surprisingly well , yielding '
    'better results than all previously reported models with the exception of the Recurrrent Neural Network Grammar',
    'In this work we presented the Transformer , the first sequence transduction model based entirely on'
    ' attention replacing the recurrent layers most commonly used in encoder-decoder architectures with multi-headed self-attention',
    'We are excited about the future of attention-based models and plan to apply them to other tasks',
Esempio n. 12
0
from framework.utils.data.text_indexer import TextIndexer
from word_classifier.data import ClassifierData
from framework.evaluator import Evaluator
from shell_command import shell_call
import framework.utils.common as utils
from time import time
import numpy as np
import os
import sys

params = utils.load_param_file(sys.argv[1])

params['num_classes'] = len(params['keywords']) + 1
release_cmd = 'python3 ../tools/release_model.py %s' % sys.argv[1]
shell_call(release_cmd)
vocab_file = os.path.join(utils.get_dict_value(params, 'output_location'),
                          'vocab.pkl')
release_dir = os.path.join(utils.get_dict_value(params, 'output_location'),
                           params['model_name'])
graphdef_file = os.path.join(release_dir, params['model_name'] + '.graphdef')
ckpt = os.path.join(utils.get_dict_value(params, 'output_location'),
                    utils.get_dict_value(params, 'model_name') + '.ckpt')

e = Evaluator.load_graphdef(graphdef_file)
e.dump_variable_sizes()
i = TextIndexer.from_file(vocab_file)

test_data = ClassifierData.get_data_from_dirs(
    ['/mnt/work/training_data/statmt.tokenized/valid'], params=params)
#test_data = ClassifierData.get_data(params=params)
model_results = []
Esempio n. 13
0
from framework.utils.data.text_indexer import TextIndexer
from word_classifier.data import ClassifierData
from framework.evaluator import Evaluator
import framework.utils.common as utils
from time import time
import numpy as np
import os

params = utils.load_param_file('output/determinerV3/params.py')

vocab_file = os.path.join(utils.get_dict_value(params, 'output_location'),
                          'vocab.pkl')
ckpt = os.path.join(utils.get_dict_value(params, 'output_location'),
                    utils.get_dict_value(params, 'model_name') + '.ckpt')

e = Evaluator.load2(ckpt)
i = TextIndexer.from_file(vocab_file)

test_data = ClassifierData.get_monolingual_test(params=params)
model_results = []

timestr = str(int(time()))
f = open(
    os.path.join(utils.get_dict_value(params, 'output_location'),
                 'heldout_%s.txt' % timestr), 'w')
fe = open(
    os.path.join(utils.get_dict_value(params, 'output_location'),
                 'heldout_%s_err.txt' % timestr), 'w')
fip = open(
    os.path.join(utils.get_dict_value(params, 'output_location'),
                 'heldout_%s_err2.txt' % timestr), 'w')
Esempio n. 14
0
import os
import numpy as np
import time
import pickle
import math
import sys
from tokenex.tokenizer import Tokenizer
run_server = False
rtime = time.time()

prelim = False
infile = None
out_file = None

paramsfile = sys.argv[1]
params = utils.load_param_file(paramsfile)  #'output/rnnlmV8/par# ams.py')
dir_path = os.path.dirname(paramsfile)

if not prelim:
    infile = '../we_test/devtest_sent_list.pkl'
    out_file = '../we_test/devtest_sent_prob.pkl'
    #infile = '../we_test/valid_sent_list.pkl'
    #out_file= '../we_test/valid_sent_prob.pkl'
    cmd = 'tar -czvf ' + str(rtime) + '.tgz ' + dir_path
    print(cmd)
    os.system(cmd)
vocab_file = os.path.join(utils.get_dict_value(params, 'output_location'),
                          'vocab.pkl')
ckpt = os.path.join(utils.get_dict_value(params, 'output_location'),
                    utils.get_dict_value(params, 'model_name') + '.ckpt')
Esempio n. 15
0
from framework.evaluator import Evaluator
import framework.utils.common as utils
from tellme.data import TellmeData
import numpy as np
from time import time
import gflags
import os
import sys

params = utils.load_param_file('./output/tellmeV8/params.py')

ckpt = os.path.join(utils.get_dict_value(params, 'output_location'),
                    utils.get_dict_value(params, 'model_name') + '.ckpt')
accuracy_file = os.path.join(utils.get_dict_value(params, 'output_location'),
                             'accuracy.txt')
e = Evaluator.load2(ckpt)
e.dump_graph()
e.save_graph_as_pbtxt('tellmev8.pbtxt')
Esempio n. 16
0
            params=params,
            gen_data_from_file_fcn=gen_data_from_file_fcn,
            gen_data_fcn=gen_data_fcn)


#result = gen_data_from_file('/mnt/work/1-billion-word-language-modeling-benchmark/training-monolingual.tokenized.shuffled/news.en-00001-of-00100')
#for x in result:
#	print(x)
if __name__ == "__main__":
    from framework.utils.data.text_indexer import TextIndexer
    #	x = "We went to the store , and I bought some fruits".split()
    #	print(x)
    #	y = gen_data(x, [','])
    #	for yy in y:
    #		print(yy)
    params = utils.load_param_file('../determiner.ccnn/params.py')
    indexer = TextIndexer.from_txt_file(
        utils.get_dict_value(params, 'vocab_file'),
        max_size=utils.get_dict_value(params, 'max_vocab_size', -1))
    indexer.add_token('<pad>')
    indexer.add_token('unk')
    indexer = None
    d = ClassifierData.get_data('/mnt/work/data_gen_test',
                                indexer=indexer,
                                params=params)
    a = d.next_batch(batch_size=20)
    for x, y in zip(a['y'], a['sentence']):
        print("%s:%s" % (y, x))
#	print(training_data.next_batch(batch_size=16))
#	x = ['fraud', 'or', 'wrongdoing', 'have', 'contributed', 'to', 'the', 'current', 'problems', ';', 'authorities', 'need', 'to', ',', 'and', 'are', 'prosecuting', 'them', '.', '<pad>']
#	a,b = merge_tokens_for_text(x)