if __name__ == "__main__": parser = argparse.ArgumentParser( description= "Benchmark CNN on a bipartite ranking task (answer selection)") parser.add_argument("-N", help="GloVe dim", type=int, default=300) parser.add_argument("--wang", help="whether to run on Wang inst. of YodaQA dataset", type=int, default=0) parser.add_argument("--params", help="additional training parameters", type=str, default='') args = parser.parse_args() glove = emb.GloVe(N=args.N) if args.wang == 1: s0, s1, y, vocab, gr = load_set('data/anssel/wang/train-all.csv', glove) s0t, s1t, yt, _, grt = load_set('data/anssel/wang/dev.csv', glove, vocab) else: s0, s1, y, vocab, gr = load_set( 'data/anssel/yodaqa/curatedv1-training.csv', glove) s0t, s1t, yt, _, grt = load_set('data/anssel/yodaqa/curatedv1-val.csv', glove, vocab) kwargs = eval('dict(' + args.params + ')') model = prep_model(glove, vocab, oact='linear', **kwargs) model.compile( loss={'score': ranknet},
ev.eval_hypev(prediction_t, s0t, grt['score'], 'Val') dump_questions(s0, s1, gr['score'], prediction, 'Train') dump_questions(s0t, s1t, grt['score'], prediction_t, 'Val') return model if __name__ == "__main__": modelname, trainf, valf = sys.argv[1:4] # modelname, trainf, valf = 'avg', 'data/hypev/argus/argus_train.csv', 'data/hypev/argus/argus_test.csv' params = sys.argv[4:] module = importlib.import_module('.' + modelname, 'models') conf, ps, h = config(module.config, params) runid = '%s-%x' % (modelname, h) print('RunID: %s (%s)' % (runid, ps)) if conf['embdim'] is not None: print('GloVe') glove = emb.GloVe(N=conf['embdim']) else: glove = None print('Dataset') s0, s1, y, vocab, gr = load_set(trainf) s0t, s1t, yt, _, grt = load_set(valf, vocab) pickle.dump(vocab, open('vocab.txt', 'wb')) train_and_eval(runid, module.prep_model, conf, glove, vocab, gr, s0, grt, s0t)
params = sys.argv[6:] load_s1texts(s1f) model_module = importlib.import_module('.' + modelname, 'models') task_module = importlib.import_module('.' + taskname, 'tasks') task = task_module.task() conf, ps, h = config(model_module.config, task.config, params) task.set_conf(conf) print(ps) # TODO we should be able to get away with actually *not* loading # this at all! if conf['embdim'] is not None: print('GloVe') task.emb = emb.GloVe(N=conf['embdim']) else: task.emb = None print('Dataset') task.load_vocab(vocabf) print('Model') task.c['skip_oneclass'] = False # load_samples() returns oneclass model = task.build_model(model_module.prep_model) print(weightsf) model.load_weights(weightsf) print("Running...") app.run(port=conf.get('port', 5051),
args = sys.argv[1:] if args[0] == '--revocab': revocab = True args = args[1:] else: revocab = False trainf, valf, testf, dumptrainf, dumpvalf, dumptestf, vocabf = args if revocab: vocab = Vocabulary(sentence_gen([trainf]), count_thres=2) print('%d words' % (len(vocab.word_idx))) pickle.dump(vocab, open(vocabf, "wb")) else: vocab = pickle.load(open(vocabf, "rb")) print('%d words' % (len(vocab.word_idx))) glove = emb.GloVe(N=300) # XXX: hardcoded print('Preprocessing train file') si0, si1, sj0, sj1, f0_, f1_, labels = load_set(trainf, vocab, glove) pickle.dump((si0, si1, sj0, sj1, f0_, f1_, labels), open(dumptrainf, "wb")) print('Preprocessing validation file') si0, si1, sj0, sj1, f0_, f1_, labels = load_set(valf, vocab, glove) pickle.dump((si0, si1, sj0, sj1, f0_, f1_, labels), open(dumpvalf, "wb")) print('Preprocessing test file') si0, si1, sj0, sj1, f0_, f1_, labels = load_set(testf, vocab, glove) pickle.dump((si0, si1, sj0, sj1, f0_, f1_, labels), open(dumptestf, "wb"))
import pysts.kerasts.blocks as B from pysts.kerasts.callbacks import AnsSelCB from pysts.kerasts.objectives import ranknet from pysts.hyperparam import RandomSearch import anssel_train if __name__ == "__main__": modelname, trainf, valf = sys.argv[1:4] module = importlib.import_module('.' + modelname, 'models') s0, s1, y, vocab, gr = anssel_train.load_set(trainf) s0t, s1t, yt, _, grt = anssel_train.load_set(valf, vocab) glove = emb.GloVe(300) # XXX hardcoded N rs = RandomSearch(modelname + '_rlog.txt', dropout=[1 / 2, 2 / 3, 3 / 4], inp_e_dropout=[1 / 2, 3 / 4, 4 / 5], l2reg=[1e-4, 1e-3, 1e-2], cnnact=['tanh', 'tanh', 'relu'], cnninit=['glorot_uniform', 'glorot_uniform', 'normal'], cdim={ 1: [0, 0, 1 / 2, 1, 2], 2: [0, 0, 1 / 2, 1, 2, 0], 3: [0, 0, 1 / 2, 1, 2, 0], 4: [0, 0, 1 / 2, 1, 2, 0], 5: [0, 0, 1 / 2, 1, 2] }, project=[True, True, False],
class ElasticScore(Feature): def __init__(self, answer, i): Feature.set_type(self, rel) Feature.set_name(self, 'Elastic score') Feature.set_value(self, answer.sources[i].elastic) import pickle import importlib import pysts.embedding as emb from argus.keras_build import config, build_model, load_sent module = importlib.import_module('.' + 'rnn', 'models') conf, ps, h = config(module.config, []) print 'loading sts model, glove' glove = emb.GloVe(N=50) vocab = pickle.load(open('sources/vocab.txt')) print 'glove loaded' model = build_model(glove, vocab, module.prep_model, conf) model.load_weights('sources/models/keras_model.h5') print 'sts model loaded' class STS_NN(Feature): """ Keras models from brmson/dataset-sts """ def __init__(self, answer, i): Feature.set_type(self, clas + rel) Feature.set_name(self, 'STS_NN') gr = load_sent(answer.q.text, answer.sources[i].sentence, vocab)