Exemple #1
0
if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description=
        "Benchmark CNN on a bipartite ranking task (answer selection)")
    parser.add_argument("-N", help="GloVe dim", type=int, default=300)
    parser.add_argument("--wang",
                        help="whether to run on Wang inst. of YodaQA dataset",
                        type=int,
                        default=0)
    parser.add_argument("--params",
                        help="additional training parameters",
                        type=str,
                        default='')
    args = parser.parse_args()

    glove = emb.GloVe(N=args.N)
    if args.wang == 1:
        s0, s1, y, vocab, gr = load_set('data/anssel/wang/train-all.csv',
                                        glove)
        s0t, s1t, yt, _, grt = load_set('data/anssel/wang/dev.csv', glove,
                                        vocab)
    else:
        s0, s1, y, vocab, gr = load_set(
            'data/anssel/yodaqa/curatedv1-training.csv', glove)
        s0t, s1t, yt, _, grt = load_set('data/anssel/yodaqa/curatedv1-val.csv',
                                        glove, vocab)

    kwargs = eval('dict(' + args.params + ')')
    model = prep_model(glove, vocab, oact='linear', **kwargs)
    model.compile(
        loss={'score': ranknet},
        ev.eval_hypev(prediction_t, s0t, grt['score'], 'Val')
        dump_questions(s0, s1, gr['score'], prediction, 'Train')
        dump_questions(s0t, s1t, grt['score'], prediction_t, 'Val')
    return model


if __name__ == "__main__":
    modelname, trainf, valf = sys.argv[1:4]
    # modelname, trainf, valf = 'avg', 'data/hypev/argus/argus_train.csv', 'data/hypev/argus/argus_test.csv'
    params = sys.argv[4:]

    module = importlib.import_module('.' + modelname, 'models')
    conf, ps, h = config(module.config, params)

    runid = '%s-%x' % (modelname, h)
    print('RunID: %s  (%s)' % (runid, ps))

    if conf['embdim'] is not None:
        print('GloVe')
        glove = emb.GloVe(N=conf['embdim'])
    else:
        glove = None

    print('Dataset')
    s0, s1, y, vocab, gr = load_set(trainf)
    s0t, s1t, yt, _, grt = load_set(valf, vocab)
    pickle.dump(vocab, open('vocab.txt', 'wb'))

    train_and_eval(runid, module.prep_model, conf, glove, vocab, gr, s0, grt,
                   s0t)
Exemple #3
0
    params = sys.argv[6:]

    load_s1texts(s1f)

    model_module = importlib.import_module('.' + modelname, 'models')
    task_module = importlib.import_module('.' + taskname, 'tasks')
    task = task_module.task()
    conf, ps, h = config(model_module.config, task.config, params)
    task.set_conf(conf)
    print(ps)

    # TODO we should be able to get away with actually *not* loading
    # this at all!
    if conf['embdim'] is not None:
        print('GloVe')
        task.emb = emb.GloVe(N=conf['embdim'])
    else:
        task.emb = None

    print('Dataset')
    task.load_vocab(vocabf)

    print('Model')
    task.c['skip_oneclass'] = False  # load_samples() returns oneclass
    model = task.build_model(model_module.prep_model)

    print(weightsf)
    model.load_weights(weightsf)

    print("Running...")
    app.run(port=conf.get('port', 5051),
    args = sys.argv[1:]
    if args[0] == '--revocab':
        revocab = True
        args = args[1:]
    else:
        revocab = False

    trainf, valf, testf, dumptrainf, dumpvalf, dumptestf, vocabf = args

    if revocab:
        vocab = Vocabulary(sentence_gen([trainf]), count_thres=2)
        print('%d words' % (len(vocab.word_idx)))
        pickle.dump(vocab, open(vocabf, "wb"))
    else:
        vocab = pickle.load(open(vocabf, "rb"))
        print('%d words' % (len(vocab.word_idx)))

    glove = emb.GloVe(N=300)  # XXX: hardcoded

    print('Preprocessing train file')
    si0, si1, sj0, sj1, f0_, f1_, labels = load_set(trainf, vocab, glove)
    pickle.dump((si0, si1, sj0, sj1, f0_, f1_, labels), open(dumptrainf, "wb"))

    print('Preprocessing validation file')
    si0, si1, sj0, sj1, f0_, f1_, labels = load_set(valf, vocab, glove)
    pickle.dump((si0, si1, sj0, sj1, f0_, f1_, labels), open(dumpvalf, "wb"))

    print('Preprocessing test file')
    si0, si1, sj0, sj1, f0_, f1_, labels = load_set(testf, vocab, glove)
    pickle.dump((si0, si1, sj0, sj1, f0_, f1_, labels), open(dumptestf, "wb"))
Exemple #5
0
import pysts.kerasts.blocks as B
from pysts.kerasts.callbacks import AnsSelCB
from pysts.kerasts.objectives import ranknet
from pysts.hyperparam import RandomSearch

import anssel_train

if __name__ == "__main__":
    modelname, trainf, valf = sys.argv[1:4]

    module = importlib.import_module('.' + modelname, 'models')

    s0, s1, y, vocab, gr = anssel_train.load_set(trainf)
    s0t, s1t, yt, _, grt = anssel_train.load_set(valf, vocab)

    glove = emb.GloVe(300)  # XXX hardcoded N

    rs = RandomSearch(modelname + '_rlog.txt',
                      dropout=[1 / 2, 2 / 3, 3 / 4],
                      inp_e_dropout=[1 / 2, 3 / 4, 4 / 5],
                      l2reg=[1e-4, 1e-3, 1e-2],
                      cnnact=['tanh', 'tanh', 'relu'],
                      cnninit=['glorot_uniform', 'glorot_uniform', 'normal'],
                      cdim={
                          1: [0, 0, 1 / 2, 1, 2],
                          2: [0, 0, 1 / 2, 1, 2, 0],
                          3: [0, 0, 1 / 2, 1, 2, 0],
                          4: [0, 0, 1 / 2, 1, 2, 0],
                          5: [0, 0, 1 / 2, 1, 2]
                      },
                      project=[True, True, False],
Exemple #6
0
class ElasticScore(Feature):
    def __init__(self, answer, i):
        Feature.set_type(self, rel)
        Feature.set_name(self, 'Elastic score')
        Feature.set_value(self, answer.sources[i].elastic)


import pickle
import importlib
import pysts.embedding as emb
from argus.keras_build import config, build_model, load_sent

module = importlib.import_module('.' + 'rnn', 'models')
conf, ps, h = config(module.config, [])
print 'loading sts model, glove'
glove = emb.GloVe(N=50)
vocab = pickle.load(open('sources/vocab.txt'))
print 'glove loaded'
model = build_model(glove, vocab, module.prep_model, conf)
model.load_weights('sources/models/keras_model.h5')
print 'sts model loaded'


class STS_NN(Feature):
    """
    Keras models from brmson/dataset-sts
    """
    def __init__(self, answer, i):
        Feature.set_type(self, clas + rel)
        Feature.set_name(self, 'STS_NN')
        gr = load_sent(answer.q.text, answer.sources[i].sentence, vocab)