Ejemplo n.º 1
0
def train_and_eval(runid, module_prep_model, c, glove, vocab, gr, s0, grt, s0t, s0pad=s0pad, s1pad=s1pad, do_eval=True):
    print('Model')
    model = build_model(glove, vocab, module_prep_model, c, s0pad=s0pad, s1pad=s1pad)

    print('Training')
    if c.get('balance_class', False):
        one_ratio = np.sum(gr['score'] == 1) / len(gr['score'])
        class_weight = {'score': {0: one_ratio, 1: 0.5}}
    else:
        class_weight = {}
    # XXX: samples_per_epoch is in brmson/keras fork, TODO fit_generator()?
    model.fit(gr, validation_data=grt,
              callbacks=[AnsSelCB(s0t, grt),
                         ModelCheckpoint('weights-'+runid+'-bestval.h5', save_best_only=True, monitor='mrr', mode='max'),
                         EarlyStopping(monitor='mrr', mode='max', patience=4)],
              class_weight=class_weight,
              batch_size=c['batch_size'], nb_epoch=c['nb_epoch'], samples_per_epoch=int(len(s0)*c['epoch_fract']))
    model.save_weights('weights-'+runid+'-final.h5', overwrite=True)
    if c['ptscorer'] is None:
        model.save_weights('weights-'+runid+'-bestval.h5', overwrite=True)
    model.load_weights('weights-'+runid+'-bestval.h5')

    if do_eval:
        print('Predict&Eval (best epoch)')
        ev.eval_anssel(model.predict(gr)['score'][:,0], s0, gr['score'], 'Train')
        ev.eval_anssel(model.predict(grt)['score'][:,0], s0t, grt['score'], 'Val')
    return model
Ejemplo n.º 2
0
def train_and_eval(runid, module_prep_model, c, glove, vocab, gr, s0, grt, s0t):
    print('Model')
    model = build_model(glove, vocab, module_prep_model, c)

    print('Training')
    # XXX: samples_per_epoch is in brmson/keras fork, TODO fit_generator()?
    model.fit(gr, validation_data=grt,
              callbacks=[AnsSelCB(s0t, grt),
                         ModelCheckpoint('weights-'+runid+'-bestval.h5', save_best_only=True, monitor='mrr', mode='max')],
              batch_size=160, nb_epoch=16, samples_per_epoch=5000)
    model.save_weights('weights-'+runid+'-final.h5', overwrite=True)

    ev.eval_anssel(model.predict(gr)['score'][:,0], s0, gr['score'], 'Train')
    ev.eval_anssel(model.predict(grt)['score'][:,0], s0t, grt['score'], 'Val')
Ejemplo n.º 3
0
    def eval(self, model):
        res = []
        for gr, fname in [(self.gr, self.trainf), (self.grv, self.valf),
                          (self.grt, self.testf)]:
            if gr is None:
                res.append(None)
                continue

            # In case of prescoring pruning, we want to predict only
            # on the prescoring subset, but evaluate on the complete
            # dataset, actually!  Therefore, we then unprune again.
            # TODO: Cache the pruning
            gr_p = self.prescoring_apply(gr)
            ypred = self.predict(model, gr_p)
            gr, ypred = graph_input_unprune(
                gr, gr_p, ypred, 0.
                if self.c['loss'] == 'binary_crossentropy' else float(-1e15))

            res.append(
                ev.eval_anssel(ypred,
                               gr['si0'] + gr['sj0'],
                               gr['si1'] + gr['sj1'],
                               gr['score'],
                               fname,
                               MAP=True))
        return tuple(res)
Ejemplo n.º 4
0
 def eval(self, model):
     res = []
     for gr, fname in [(self.gr, self.trainf), (self.grv, self.valf), (self.grt, self.testf)]:
         if gr is None:
             res.append(None)
             continue
         ypred = model.predict(gr)['score'][:,0]
         res.append(ev.eval_anssel(ypred, gr['si0'], gr['si1'], gr['score'], fname, MAP=True))
     return tuple(res)
Ejemplo n.º 5
0
def train_and_eval(runid, module_prep_model, c, glove, vocab, gr, s0, grt,
                   s0t):
    print('Model')
    model = build_model(glove, vocab, module_prep_model, c)

    print('Training')
    # XXX: samples_per_epoch is in brmson/keras fork, TODO fit_generator()?
    model.fit(gr,
              validation_data=grt,
              callbacks=[
                  AnsSelCB(s0t, grt),
                  ModelCheckpoint('weights-' + runid + '-bestval.h5',
                                  save_best_only=True,
                                  monitor='mrr',
                                  mode='max')
              ],
              batch_size=160,
              nb_epoch=16,
              samples_per_epoch=5000)
    model.save_weights('weights-' + runid + '-final.h5', overwrite=True)

    ev.eval_anssel(model.predict(gr)['score'][:, 0], s0, gr['score'], 'Train')
    ev.eval_anssel(model.predict(grt)['score'][:, 0], s0t, grt['score'], 'Val')
def transfer_eval(runid, weightsf, module_prep_model, c, glove, vocab, gr, grv):
    print('Model')
    model = anssel_train.build_model(glove, vocab, module_prep_model, c, s0pad=s0pad, s1pad=s1pad, optimizer=c['opt'], fix_layers=c['fix_layers'])
    print('Model (weights)')
    model.load_weights(weightsf)
    ev.eval_anssel(model.predict(grv)['score'][:,0], grv['si0'], grv['score'], 'anssel Val (bef. train)')

    print('Training')
    if c.get('balance_class', False):
        one_ratio = np.sum(gr['score'] == 1) / len(gr['score'])
        class_weight = {'score': {0: one_ratio, 1: 0.5}}
    else:
        class_weight = {}
    model.fit(gr, validation_data=grv,
              callbacks=[AnsSelCB(s0v, grv),
                         ModelCheckpoint('weights-'+runid+'-bestval.h5', save_best_only=True, monitor='mrr', mode='max'),
                         EarlyStopping(monitor='mrr', mode='max', patience=4)],
              class_weight=class_weight,
              batch_size=conf['batch_size'], nb_epoch=conf['nb_epoch'], samples_per_epoch=int(len(gr['score'])*conf['epoch_fract']))
    model.save_weights('weights-'+runid+'-final.h5', overwrite=True)

    print('Predict&Eval (best epoch)')
    model.load_weights('weights-'+runid+'-bestval.h5')
    ev.eval_anssel(model.predict(grv)['score'][:,0], grv['si0'], grv['score'], 'anssel Val')
Ejemplo n.º 7
0
 def eval(self, model):
     res = []
     for gr, fname in [(self.gr, self.trainf), (self.grv, self.valf),
                       (self.grt, self.testf)]:
         if gr is None:
             res.append(None)
             continue
         ypred = model.predict(gr)['score'][:, 0]
         res.append(
             ev.eval_anssel(ypred,
                            gr['si0'],
                            gr['si1'],
                            gr['score'],
                            fname,
                            MAP=True))
     return tuple(res)
Ejemplo n.º 8
0
    def eval(self, model):
        res = []
        for gr, fname in [(self.gr, self.trainf), (self.grv, self.valf), (self.grt, self.testf)]:
            if gr is None:
                res.append(None)
                continue

            # In case of prescoring pruning, we want to predict only
            # on the prescoring subset, but evaluate on the complete
            # dataset, actually!  Therefore, we then unprune again.
            # TODO: Cache the pruning
            gr_p = self.prescoring_apply(gr)
            ypred = model.predict(gr_p)['score'][:,0]
            gr, ypred = graph_input_unprune(gr, gr_p, ypred, 0. if self.c['loss'] == 'binary_crossentropy' else float(-1e15))

            res.append(ev.eval_anssel(ypred, gr['si0'], gr['si1'], gr['score'], fname, MAP=True))
        return tuple(res)
Ejemplo n.º 9
0
    modelname, weightsfile, trainf, valf, trec_qrels_file, trec_top_file = sys.argv[1:7]
    params = sys.argv[7:]

    module = importlib.import_module('.'+modelname, 'models')
    conf, ps, h = anssel_train.config(module.config, params)

    print('GloVe')
    glove = emb.GloVe(N=conf['embdim'])

    print('Dataset')
    s0, s1, y, vocab, gr = anssel_train.load_set(trainf)
    s0t, s1t, yt, _, grt = anssel_train.load_set(valf, vocab)

    print('Model')
    model = anssel_train.build_model(glove, vocab, module.prep_model, conf)

    print('Weights')
    model.load_weights(weightsfile)

    print('Prediction')
    ypred = model.predict(gr)['score'][:,0]
    ypredt = model.predict(grt)['score'][:,0]

    ev.eval_anssel(ypred, s0, y, trainf)
    ev.eval_anssel(ypredt, s0t, yt, valf)

    with open(trec_qrels_file, 'wt') as f:
        save_trec_qrels(f, s0t, s1t, yt)
    with open(trec_top_file, 'wt') as f:
        save_trec_top(f, s0t, s1t, ypredt, modelname)
Ejemplo n.º 10
0
                                  'anssel-yodaqa/curatedv1-training.csv',
                                  balance=(args.balance == 1))
        Xtest, ytest = load_set(glove,
                                'anssel-yodaqa/curatedv1-val.csv',
                                subsample0=1)

    model = prep_model(glove)
    model.compile(loss={'score': 'binary_crossentropy'}, optimizer='adam')
    model.fit({
        'e0': Xtrain[0],
        'e1': Xtrain[1],
        'score': ytrain
    },
              batch_size=20,
              nb_epoch=2000,
              validation_data={
                  'e0': Xtest[0],
                  'e1': Xtest[1],
                  'score': ytest
              })
    ev.eval_anssel(
        model.predict({
            'e0': Xtrain[0],
            'e1': Xtrain[1]
        })['score'][:, 0], Xtrain[0], ytrain, 'Train')
    ev.eval_anssel(
        model.predict({
            'e0': Xtest[0],
            'e1': Xtest[1]
        })['score'][:, 0], Xtest[0], ytest, 'Test')
Ejemplo n.º 11
0
    parser.add_argument("-N", help="GloVe dim", type=int, default=50)  # for our naive method, 300**2 would be too much
    parser.add_argument("--balance", help="whether to manually balance the dataset", type=int, default=1)
    parser.add_argument("--wang", help="whether to run on Wang inst. of YodaQA dataset", type=int, default=0)
    args = parser.parse_args()

    glove = emb.GloVe(N=args.N)
    if args.wang == 1:
        Xtrain, ytrain = load_set(glove, 'anssel-wang/train-all.csv', balance=(args.balance == 1))
        Xtest, ytest = load_set(glove, 'anssel-wang/test.csv', subsample0=1)
    else:
        Xtrain, ytrain = load_set(glove, 'anssel-yodaqa/curatedv1-training.csv', balance=(args.balance == 1))
        Xtest, ytest = load_set(glove, 'anssel-yodaqa/curatedv1-val.csv', subsample0=1)

    logreg = linear_model.LogisticRegression(C=0.01, verbose=1, n_jobs=7)
    logreg.fit(logreg_M(*Xtrain), ytrain)
    ev.eval_anssel(logreg.predict_proba(logreg_M(*Xtrain))[:, 1], Xtrain[0], ytrain, 'Train')
    ev.eval_anssel(logreg.predict_proba(logreg_M(*Xtest))[:, 1], Xtest[0], ytest, 'Test')


"""
Performance tuning on anssel-yodaqa:
    * Completely unbalanced, C=1
        Train Accuracy: 0.899176 (y=0 0.983992, y=1 0.334139)
        Train MRR: 0.626233  (on training set, y=0 is subsampled!)
        Test Accuracy: 0.926688 (y=0 0.965770, y=1 0.095908)
        Test MRR: 0.218704
    * sklearn balancing (class_weight='auto'), C=1
        Train Accuracy: 0.816569 (y=0 0.812480, y=1 0.843807)
        Train MRR: 0.620643  (on training set, y=0 is subsampled!)
        Test Accuracy: 0.714450 (y=0 0.727787, y=1 0.430946)
        Test MRR: 0.235821
Ejemplo n.º 12
0
        Xtrain, ytrain = load_set(glove,
                                  'anssel-wang/train-all.csv',
                                  balance=(args.balance == 1))
        Xtest, ytest = load_set(glove, 'anssel-wang/test.csv', subsample0=1)
    else:
        Xtrain, ytrain = load_set(glove,
                                  'anssel-yodaqa/curatedv1-training.csv',
                                  balance=(args.balance == 1))
        Xtest, ytest = load_set(glove,
                                'anssel-yodaqa/curatedv1-val.csv',
                                subsample0=1)

    logreg = linear_model.LogisticRegression(C=0.01, verbose=1, n_jobs=7)
    logreg.fit(logreg_M(*Xtrain), ytrain)
    ev.eval_anssel(
        logreg.predict_proba(logreg_M(*Xtrain))[:, 1], Xtrain[0], ytrain,
        'Train')
    ev.eval_anssel(
        logreg.predict_proba(logreg_M(*Xtest))[:, 1], Xtest[0], ytest, 'Test')
"""
Performance tuning on anssel-yodaqa:
    * Completely unbalanced, C=1
        Train Accuracy: 0.899176 (y=0 0.983992, y=1 0.334139)
        Train MRR: 0.626233  (on training set, y=0 is subsampled!)
        Test Accuracy: 0.926688 (y=0 0.965770, y=1 0.095908)
        Test MRR: 0.218704
    * sklearn balancing (class_weight='auto'), C=1
        Train Accuracy: 0.816569 (y=0 0.812480, y=1 0.843807)
        Train MRR: 0.620643  (on training set, y=0 is subsampled!)
        Test Accuracy: 0.714450 (y=0 0.727787, y=1 0.430946)
        Test MRR: 0.235821
Ejemplo n.º 13
0
    if conf['embdim'] is not None:
        print('GloVe')
        glove = emb.GloVe(N=conf['embdim'])
    else:
        glove = None

    print('Dataset')
    s0, s1, y, vocab, gr = anssel_train.load_set(trainf)
    s0t, s1t, yt, _, grt = anssel_train.load_set(valf, vocab)

    print('Model')
    model = anssel_train.build_model(glove, vocab, module.prep_model, conf)

    print('Weights')
    model.load_weights(weightsfile)

    print('Prediction')
    ypred = model.predict(gr)['score'][:,0]
    ypredt = model.predict(grt)['score'][:,0]

    ev.eval_anssel(ypred, s0, y, trainf)
    ev.eval_anssel(ypredt, s0t, yt, valf)

    with open(trec_qrels_file, 'wt') as f:
        save_trec_qrels(f, s0t, s1t, yt)
    with open(trec_top_file, 'wt') as f:
        save_trec_top(f, s0t, s1t, ypredt, modelname)
    mapt = trec_eval_get(trec_qrels_file, trec_top_file, 'map')
    print('%s MAP: %f' % (valf, mapt))
Ejemplo n.º 14
0
    e0, e1, y = load_set(train_filename, st, args.cache_dir)
    e0t, e1t, yt = load_set(val_filename, st, args.cache_dir)

    model = prep_model(N)

    model.compile(loss={'score': 'binary_crossentropy'},
                  optimizer=Adam(lr=0.001))
    hist = model.fit({
        'e0': e0,
        'e1': e1,
        'score': y
    },
                     batch_size=20,
                     nb_epoch=2000,
                     validation_data={
                         'e0': e0t,
                         'e1': e1t,
                         'score': yt
                     })

    ev.eval_anssel(
        model.predict({
            'e0': e0,
            'e1': e1
        })['score'][:, 0], e0, e1, yt, 'Train')
    ev.eval_anssel(
        model.predict({
            'e0': e0t,
            'e1': e1t
        })['score'][:, 0], e0t, e1t, yt, 'Test')
Ejemplo n.º 15
0
    parser.add_argument("--wang", help="whether to run on Wang inst. of YodaQA dataset", type=int, default=0)

    parser.add_argument("--cache_dir", help="directory where to save/load cached datasets", type=str, default="")

    # possible: /storage/ostrava1/home/nadvorj1/skip-thoughts/
    parser.add_argument("--skipthoughts_datadir", help="directory with precomputed Skip_thoughts embeddings (containing bi_skip.npz...)", type=str, default="")
    args = parser.parse_args()

    if args.wang == 1:
        train_filename = "data/anssel/wang/train-all.csv"
        val_filename = "data/anssel/wang/test.csv"
    else:
        train_filename = "data/anssel/yodaqa/curatedv2-training.csv"
        val_filename = "data/anssel/yodaqa/curatedv2-val.csv"

    st = emb.SkipThought(datadir=args.skipthoughts_datadir, uni_bi="combined")
    N = st.N

    e0, e1, y = load_set(train_filename, st, args.cache_dir)
    e0t, e1t, yt = load_set(val_filename, st, args.cache_dir)

    model = prep_model(N)

    model.compile(loss={'score': 'binary_crossentropy'}, optimizer=Adam(lr=0.001))
    hist = model.fit({'e0': e0, 'e1': e1, 'score': y},
                     batch_size=20, nb_epoch=2000,
                     validation_data={'e0': e0t, 'e1': e1t, 'score': yt})

    ev.eval_anssel(model.predict({'e0': e0, 'e1': e1})['score'][:, 0], e0, yt, 'Train')
    ev.eval_anssel(model.predict({'e0': e0t, 'e1': e1t})['score'][:, 0], e0t, yt, 'Test')
Ejemplo n.º 16
0
    mrr = []
    mrrv = []
    mrrt = []
    mapt = []
    for i in range(niter):
        runid = '%s-%x-%02d' % (modelname, h, i)
        print('RunID: %s  (%s)' % (runid, ps))

        model = anssel_train.train_and_eval(runid, module.prep_model, conf, glove, vocab, gr, s0, grv, s0v, do_eval=False)

        print('Predict&Eval (best val epoch)')
        ypred = model.predict(gr)['score'][:,0]
        ypredv = model.predict(grv)['score'][:,0]
        ypredt = model.predict(grt)['score'][:,0]

        mrr.append(ev.eval_anssel(ypred, s0, y, trainf))
        mrrv.append(ev.eval_anssel(ypredv, s0v, yv, valf))
        mrrt.append(ev.eval_anssel(ypredt, s0t, yt, testf))
        mapt.append(ev_map(s0t, s1t, yt, ypredt, testf))

        rdata = {'ps': ps, 'ypred': (ypred, ypredv, ypredt), 'mrr': (mrr, mrrv, mrrt), 'map': (None, None, mapt)}
        pickle.dump(rdata, open('%s-res.pickle' % (runid,), 'wb'), protocol=2)

    brr = stat(niter, trainf, 'MRR', mrr)
    brrv = stat(niter, valf, 'MRR', mrrv)
    bapt = stat(niter, testf, 'MAP', mapt)
    brrt = stat(niter, testf, 'MRR', mrrt)

    # README table format:
    print(                  '| % -24s | %.6f    | %.6f | %.6f | %.6f | %s' % (modelname, np.mean(mrr), np.mean(mrrv), np.mean(mapt), np.mean(mrrt),
                                                                              '(defaults)' if not params else ' '.join(['``%s``' % (p,) for p in params])))
Ejemplo n.º 17
0
                   layer=Activation(oact))
    model.add_output(name='score', input='scoreS')
    return model


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Benchmark CNN on a bipartite ranking task (answer selection)")
    parser.add_argument("-N", help="GloVe dim", type=int, default=300)
    parser.add_argument("--wang", help="whether to run on Wang inst. of YodaQA dataset", type=int, default=0)
    parser.add_argument("--params", help="additional training parameters", type=str, default='')
    args = parser.parse_args()

    glove = emb.GloVe(N=args.N)
    if args.wang == 1:
        s0, s1, y, vocab, gr = load_set('data/anssel/wang/train-all.csv')
        s0t, s1t, yt, _, grt = load_set('data/anssel/wang/dev.csv', vocab)
    else:
        s0, s1, y, vocab, gr = load_set('data/anssel/yodaqa/curatedv1-training.csv')
        s0t, s1t, yt, _, grt = load_set('data/anssel/yodaqa/curatedv1-val.csv', vocab)

    kwargs = eval('dict(' + args.params + ')')
    model = prep_model(glove, vocab, oact='linear', **kwargs)
    model.compile(loss={'score': ranknet}, optimizer='adam')  # for 'binary_crossentropy', drop the custom oact
    model.fit(gr, validation_data=grt,
              callbacks=[AnsSelCB(s0t, grt),
                         ModelCheckpoint('weights-cnn-bestval.h5', save_best_only=True, monitor='mrr', mode='max')],
              batch_size=160, nb_epoch=8)
    model.save_weights('weights-cnn-final.h5', overwrite=True)
    ev.eval_anssel(model.predict(gr)['score'][:,0], s0, y, 'Train')
    ev.eval_anssel(model.predict(grt)['score'][:,0], s0t, yt, 'Val')
Ejemplo n.º 18
0
                   layer=Dense(1, W_regularizer=l2(l2reg)))
    model.add_node(name='outS', input='out',
                   layer=Activation('sigmoid'))

    model.add_output(name='score', input='outS')
    return model


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Benchmark kst1503 on binary classification / point ranking task (anssel-yodaqa)")
    parser.add_argument("-N", help="GloVe dim", type=int, default=300)
    parser.add_argument("--balance", help="whether to manually balance the dataset", type=int, default=1)
    parser.add_argument("--wang", help="whether to run on Wang inst. of YodaQA dataset", type=int, default=0)
    args = parser.parse_args()

    glove = emb.GloVe(N=args.N)
    if args.wang == 1:
        Xtrain, ytrain = load_set(glove, 'data/anssel/wang/train-all.csv', balance=(args.balance == 1))
        Xtest, ytest = load_set(glove, 'data/anssel/wang/test.csv', subsample0=1)
    else:
        Xtrain, ytrain = load_set(glove, 'data/anssel/yodaqa/curatedv1-training.csv', balance=(args.balance == 1))
        Xtest, ytest = load_set(glove, 'data/anssel/yodaqa/curatedv1-val.csv', subsample0=1)

    model = prep_model(glove)
    model.compile(loss={'score': 'binary_crossentropy'}, optimizer='adam')
    model.fit({'e0': Xtrain[0], 'e1': Xtrain[1], 'score': ytrain},
              batch_size=20, nb_epoch=2000,
              validation_data={'e0': Xtest[0], 'e1': Xtest[1], 'score': ytest})
    ev.eval_anssel(model.predict({'e0': Xtrain[0], 'e1': Xtrain[1]})['score'][:, 0], Xtrain[0], ytrain, 'Train')
    ev.eval_anssel(model.predict({'e0': Xtest[0], 'e1': Xtest[1]})['score'][:, 0], Xtest[0], ytest, 'Test')
Ejemplo n.º 19
0
    if args.wang == 1:
        s0, s1, y, vocab, gr = load_set('data/anssel/wang/train-all.csv',
                                        glove)
        s0t, s1t, yt, _, grt = load_set('data/anssel/wang/dev.csv', glove,
                                        vocab)
    else:
        s0, s1, y, vocab, gr = load_set(
            'data/anssel/yodaqa/curatedv1-training.csv', glove)
        s0t, s1t, yt, _, grt = load_set('data/anssel/yodaqa/curatedv1-val.csv',
                                        glove, vocab)

    kwargs = eval('dict(' + args.params + ')')
    model = prep_model(glove, vocab, oact='linear', **kwargs)
    model.compile(
        loss={'score': ranknet},
        optimizer='adam')  # for 'binary_crossentropy', drop the custom oact
    model.fit(gr,
              validation_data=grt,
              callbacks=[
                  AnsSelCB(s0t, grt),
                  ModelCheckpoint('weights-cnn-bestval.h5',
                                  save_best_only=True,
                                  monitor='mrr',
                                  mode='max')
              ],
              batch_size=160,
              nb_epoch=8)
    model.save_weights('weights-cnn-final.h5', overwrite=True)
    ev.eval_anssel(model.predict(gr)['score'][:, 0], s0, s1, y, 'Train')
    ev.eval_anssel(model.predict(grt)['score'][:, 0], s0t, s1t, yt, 'Val')
Ejemplo n.º 20
0
    parser = argparse.ArgumentParser(description="Benchmark CNN on a bipartite ranking task (answer selection)")
    parser.add_argument("-N", help="GloVe dim", type=int, default=300)
    parser.add_argument("--wang", help="whether to run on Wang inst. of YodaQA dataset", type=int, default=0)
    parser.add_argument("--params", help="additional training parameters", type=str, default="")
    args = parser.parse_args()

    glove = emb.GloVe(N=args.N)
    if args.wang == 1:
        s0, s1, y, vocab, gr = load_set("data/anssel/wang/train-all.csv")
        s0t, s1t, yt, _, grt = load_set("data/anssel/wang/dev.csv", vocab)
    else:
        s0, s1, y, vocab, gr = load_set("data/anssel/yodaqa/curatedv1-training.csv")
        s0t, s1t, yt, _, grt = load_set("data/anssel/yodaqa/curatedv1-val.csv", vocab)

    kwargs = eval("dict(" + args.params + ")")
    model = prep_model(glove, vocab, oact="linear", **kwargs)
    model.compile(loss={"score": ranknet}, optimizer="adam")  # for 'binary_crossentropy', drop the custom oact
    model.fit(
        gr,
        validation_data=grt,
        callbacks=[
            AnsSelCB(s0t, grt),
            ModelCheckpoint("weights-cnn-bestval.h5", save_best_only=True, monitor="mrr", mode="max"),
        ],
        batch_size=160,
        nb_epoch=8,
    )
    model.save_weights("weights-cnn-final.h5", overwrite=True)
    ev.eval_anssel(model.predict(gr)["score"][:, 0], s0, s1, y, "Train")
    ev.eval_anssel(model.predict(grt)["score"][:, 0], s0t, s1t, yt, "Val")