Ejemplo n.º 1
0
def adverse_generate2(gen_model, ad_model, cmodel, train, word_index, glove, threshold = 0.95, batch_size = 64, ci = False):
    mb = load_data.get_minibatches_idx(len(train), batch_size, shuffle=True)
    p = Progbar(len(train))
    results = []
    for i, train_index in mb:
        if len(train_index) != batch_size:
            continue
        orig_batch = [train[k] for k in train_index]
        class_indices = [load_data.LABEL_LIST.index(train[k][2]) for k in train_index]
        probs = generation.generation_predict_embed(gen_model, word_index.index, orig_batch,
                     np.random.random_integers(0, len(train), len(orig_batch)), class_indices = class_indices)
        gen_batch = generation.get_classes(probs)
        ad_preds = ad_model.predict_on_batch(gen_batch)[0].flatten()
        
        X = []
        for i in range(len(orig_batch)):
	    concat = orig_batch[i][0] + ["--"] + word_index.get_seq(gen_batch[i])
            X.append(load_data.load_word_vecs(concat, glove))
        X = np.array(X)
        X_padded = load_data.pad_sequences(X, dim = len(X[0][0]))
        cpreds = cmodel.predict_on_batch(X_padded)[0][np.arange(len(X_padded)), class_indices]
        
        pred_seq = [word_index.print_seq(gen) for gen in gen_batch]
        premises = [" ".join(ex[0]) for ex in orig_batch]
        classes = np.array(load_data.LABEL_LIST)[class_indices]
        zipped = zip(cpreds, ad_preds, premises, pred_seq, classes)
        results += [el for el in zipped if el[0] * el[1]> threshold]
        p.add(len(train_index),[('added', float(len([el for el in zipped if el[0] * el[1]> threshold])))])
        if len(results) > 200:
            print (i + 1) * batch_size
            return results
    return results
Ejemplo n.º 2
0
def predict_example(premise, hypothesis, model, glove):
    concat = premise.split() + ["--"] + hypothesis.split()
    for word in concat:
        if word not in glove:
	    print word, 'not in glove'
	    return
    vec = load_data.load_word_vecs(concat, glove)
    return model.predict_on_batch(np.expand_dims(vec, axis=0))
Ejemplo n.º 3
0
def predict_example(premise, hypothesis, model, glove):
    concat = premise.split() + ["--"] + hypothesis.split()
    for word in concat:
        if word not in glove:
            print word, 'not in glove'
            return
    vec = load_data.load_word_vecs(concat, glove)
    return model.predict_on_batch(np.expand_dims(vec, axis=0))
Ejemplo n.º 4
0
import paraphrase
import numpy as np

import itertools
import os

 

if __name__ == "__main__":
    train, dev, test = load_data.load_all_snli_datasets('data/snli_1.0/')
    glove = load_data.import_glove('data/snli_vectors.txt')

    
    for ex in train+dev:
        load_data.load_word_vecs(ex[0] + ex[1], glove)
    load_data.load_word_vec('EOS', glove)
    wi = load_data.WordIndex(glove)    

def grid_experiments(train, dev, glove, embed_size = 300, hidden_size = 100):
    lr_vec = [0.001, 0.0003, 0.0001]
    dropout_vec = [0.0, 0.1, 0.2]
    reg_vec = [0.0, 0.001, 0.0003, 0.0001]

    for params in itertools.product(lr_vec, dropout_vec, reg_vec):
	filename = 'lr' + str(params[0]).replace('.','') + '_drop' + str(params[1]).replace('.','') + '_reg' + str(params[2]).replace('.','')
	print 'Model', filename
	model = models.init_model(embed_size, hidden_size, params[0], params[1], params[2])
	models.train_model(train, dev, glove, model, 'models/' + filename)