Ejemplo n.º 1
0
def cmd_predict_v(dataset='coco',
                  datapath='.',
                  model_path='.',
                  model_name='model.pkl.gz',
                  batch_size=128,
                  output_v='predict_v.npy',
                  output_r='predict_r.npy'):
    M = load(model_path, model_name=model_name)
    model = M['model']
    batcher = M['batcher']
    mapper = M['batcher'].mapper
    predict_v = predictor_v(model)
    predict_r = predictor_r(model)
    prov = dp.getDataProvider(dataset, root=datapath)
    sents = list(prov.iterSentences(split='val'))
    inputs = list(
        mapper.transform(
            [tokens(sent, tokenizer=batcher.tokenizer) for sent in sents]))
    print len(model.network.params())
    preds_v = numpy.vstack([
        predict_v(batcher.batch_inp(batch))
        for batch in grouper(inputs, batch_size)
    ])
    numpy.save(os.path.join(model_path, output_v), preds_v)
    preds_r = numpy.vstack([
        predict_r(batcher.batch_inp(batch))
        for batch in grouper(inputs, batch_size)
    ])
    numpy.save(os.path.join(model_path, output_r), preds_r)
Ejemplo n.º 2
0
def cmd_predict_r(model_path='.',
                  batch_size=128,
                  split='train',
                  output_premise='predict_premise_r.npy',
                  output_hypo='predict_hypo_r.npy',
                  output_labels='entailment_labels.npy'):
    def load(f):
        return pickle.load(gzip.open(os.path.join(model_path, f)))

    model_name = 'model.pkl.gz'
    batcher, scaler, model = map(
        load, ['batcher.pkl.gz', 'scaler.pkl.gz', model_name])
    mapper = batcher.mapper
    predict_r = predictor_r(model)
    sents_premise, sents_hypo, labels = zip(*parse_snli(split=split))
    inputs_premise = list(mapper.transform(sents_premise))
    inputs_hypo = list(mapper.transform(sents_hypo))
    preds_premise_r = numpy.vstack([
        predict_r(batcher.batch_inp(batch))
        for batch in grouper(inputs_premise, batch_size)
    ])
    numpy.save(os.path.join(model_path, split + '_' + output_premise),
               preds_premise_r)
    preds_hypo_r = numpy.vstack([
        predict_r(batcher.batch_inp(batch))
        for batch in grouper(inputs_hypo, batch_size)
    ])
    numpy.save(os.path.join(model_path, split + '_' + output_hypo),
               preds_hypo_r)
    numpy.save(os.path.join(model_path, split + '_' + output_labels), labels)
Ejemplo n.º 3
0
def cmd_predict_r(model_path='.', 
                  batch_size=128,
                  split='train',
                  output_premise='predict_premise_r.npy',
                  output_hypo='predict_hypo_r.npy',
                  output_labels='entailment_labels.npy'):
    def load(f):
        return pickle.load(gzip.open(os.path.join(model_path, f)))
    model_name = 'model.pkl.gz'
    batcher, scaler, model = map(load, ['batcher.pkl.gz','scaler.pkl.gz', model_name])
    mapper = batcher.mapper
    predict_r = predictor_r(model)
    sents_premise, sents_hypo, labels  = zip(*parse_snli(split=split))
    inputs_premise = list(mapper.transform(sents_premise))
    inputs_hypo    = list(mapper.transform(sents_hypo))
    preds_premise_r = numpy.vstack([ predict_r(batcher.batch_inp(batch))
                                     for batch in grouper(inputs_premise, batch_size) ])
    numpy.save(os.path.join(model_path, split + '_' + output_premise), preds_premise_r)
    preds_hypo_r = numpy.vstack([ predict_r(batcher.batch_inp(batch))
                                     for batch in grouper(inputs_hypo, batch_size) ])
    numpy.save(os.path.join(model_path, split + '_' + output_hypo), preds_hypo_r)
    numpy.save(os.path.join(model_path, split + '_' + output_labels), labels)
Ejemplo n.º 4
0
def cmd_predict_v(dataset='coco',
                  datapath='.',
                  model_path='.',
                  model_name='model.pkl.gz',
                  batch_size=128,
                  output_v='predict_v.npy',
                  output_r='predict_r.npy'):
    M = load(model_path, model_name=model_name)
    model = M['model']
    batcher = M['batcher']
    mapper = M['batcher'].mapper
    predict_v = predictor_v(model)
    predict_r = predictor_r(model)
    prov   = dp.getDataProvider(dataset, root=datapath)
    sents  = list(prov.iterSentences(split='val'))
    inputs = list(mapper.transform([tokens(sent, tokenizer=batcher.tokenizer) for sent in sents ]))
    print len(model.network.params())
    preds_v  = numpy.vstack([ predict_v(batcher.batch_inp(batch))
                            for batch in grouper(inputs, batch_size) ])
    numpy.save(os.path.join(model_path, output_v), preds_v)
    preds_r = numpy.vstack([ predict_r(batcher.batch_inp(batch))
                             for batch in grouper(inputs, batch_size) ])
    numpy.save(os.path.join(model_path, output_r), preds_r)