def subset_missing_glove_word(test_set): glove_bare = load_data.import_glove('data/snli_vectors_300.txt') result = [] i = 0 for ex in test_set: if not all(word in glove_bare for word in (ex[0] + ex[1])): result.append(i) i += 1 return result
import load_data import models import misc import paraphrase import numpy as np import itertools import os if __name__ == "__main__": train, dev, test = load_data.load_all_snli_datasets('data/snli_1.0/') glove = load_data.import_glove('data/snli_vectors.txt') for ex in train+dev: load_data.load_word_vecs(ex[0] + ex[1], glove) load_data.load_word_vec('EOS', glove) wi = load_data.WordIndex(glove) def grid_experiments(train, dev, glove, embed_size = 300, hidden_size = 100): lr_vec = [0.001, 0.0003, 0.0001] dropout_vec = [0.0, 0.1, 0.2] reg_vec = [0.0, 0.001, 0.0003, 0.0001] for params in itertools.product(lr_vec, dropout_vec, reg_vec): filename = 'lr' + str(params[0]).replace('.','') + '_drop' + str(params[1]).replace('.','') + '_reg' + str(params[2]).replace('.','') print 'Model', filename
if p in glove: add_pair(result, p, h) if h in glove: add_pair(result, h, p) count += 1 if count % 100000 == 0: print count return result def dump_parap(filename, data): with open(filename, 'wb') as f: pickle.dump(data, f) def load_parap(filename): with open(filename, 'rb') as f: return pickle.load(f) def add_pair(dct, p, h): if p not in dct: dct[p] = set() dct[p].add(h) if __name__ == "__main__": glove = load_data.import_glove(GLOVE) rep = load_ppdb_data(glove)
if p in glove: add_pair(result, p, h) if h in glove: add_pair(result, h, p) count += 1 if count % 100000 == 0: print count return result def dump_parap(filename, data): with open(filename, "wb") as f: pickle.dump(data, f) def load_parap(filename): with open(filename, "rb") as f: return pickle.load(f) def add_pair(dct, p, h): if p not in dct: dct[p] = set() dct[p].add(h) if __name__ == "__main__": glove = load_data.import_glove(GLOVE) rep = load_ppdb_data(glove)