def train(train_loc, dev_loc, shape, settings): train_texts1, train_texts2, train_labels = read_snli(train_loc) dev_texts1, dev_texts2, dev_labels = read_snli(dev_loc) print("Loading spaCy") nlp = spacy.load("en_vectors_web_lg") assert nlp.path is not None print("Processing texts...") train_X = create_dataset(nlp, train_texts1, train_texts2, 100, shape[0]) dev_X = create_dataset(nlp, dev_texts1, dev_texts2, 100, shape[0]) print("Compiling network") model = build_model(get_embeddings(nlp.vocab), shape, settings) print(settings) model.fit( train_X, train_labels, validation_data=(dev_X, dev_labels), epochs=settings["nr_epoch"], batch_size=settings["batch_size"], ) if not (nlp.path / "similarity").exists(): (nlp.path / "similarity").mkdir() print("Saving to", nlp.path / "similarity") weights = model.get_weights() # remove the embedding matrix. We can reconstruct it. del weights[1] with (nlp.path / "similarity" / "model").open("wb") as file_: pickle.dump(weights, file_) with (nlp.path / "similarity" / "config.json").open("w") as file_: file_.write(model.to_json())
def train(train_loc, dev_loc, shape, settings): train_texts1, train_texts2, train_labels = read_snli(train_loc) dev_texts1, dev_texts2, dev_labels = read_snli(dev_loc) print("Loading spaCy") nlp = spacy.load('en') assert nlp.path is not None print("Compiling network") model = build_model(get_embeddings(nlp.vocab), shape, settings) print("Processing texts...") Xs = [] for texts in (train_texts1, train_texts2, dev_texts1, dev_texts2): Xs.append(get_word_ids(list(nlp.pipe(texts, n_threads=20, batch_size=20000)), max_length=shape[0], rnn_encode=settings['gru_encode'], tree_truncate=settings['tree_truncate'])) train_X1, train_X2, dev_X1, dev_X2 = Xs print(settings) model.fit( [train_X1, train_X2], train_labels, validation_data=([dev_X1, dev_X2], dev_labels), nb_epoch=settings['nr_epoch'], batch_size=settings['batch_size']) if not (nlp.path / 'similarity').exists(): (nlp.path / 'similarity').mkdir() print("Saving to", nlp.path / 'similarity') weights = model.get_weights() with (nlp.path / 'similarity' / 'model').open('wb') as file_: pickle.dump(weights[1:], file_) with (nlp.path / 'similarity' / 'config.json').open('wb') as file_: file_.write(model.to_json())
def load_model(path, nlp): with open(path + 'config.json') as file_: model = model_from_json(file_.read()) # print ("HUH") with open(path + 'model', 'rb') as file_: weights = pickle.load(file_) embeddings = get_embeddings(nlp.vocab) model.set_weights([embeddings] + weights) return model
def train(model_dir, train_loc, dev_loc, shape, settings): print("Loading spaCy") nlp = spacy.load('en', tagger=False, parser=False, entity=False, matcher=False) print("Compiling network") model = build_model(get_embeddings(nlp.vocab), shape, settings) print("Processing texts...") train_X = get_features(list(nlp.pipe(train_texts))) dev_X = get_features(list(nlp.pipe(dev_texts))) model.fit( train_X, train_labels, validation_data=(dev_X, dev_labels), nb_epoch=settings['nr_epoch'], batch_size=settings['batch_size'])
def train(model_dir, train_loc, dev_loc, shape, settings): print("Loading spaCy") nlp = spacy.load('en', tagger=False, parser=False, entity=False, matcher=False) print("Compiling network") model = build_model(get_embeddings(nlp.vocab), shape, settings) print("Processing texts...") train_X = get_features(list(nlp.pipe(train_texts))) dev_X = get_features(list(nlp.pipe(dev_texts))) model.fit(train_X, train_labels, validation_data=(dev_X, dev_labels), nb_epoch=settings['nr_epoch'], batch_size=settings['batch_size'])
def train(model_dir, train_loc, dev_loc, shape, settings): train_texts1, train_texts2, train_labels = read_snli(train_loc) dev_texts1, dev_texts2, dev_labels = read_snli(dev_loc) print("Loading spaCy") nlp = spacy.load('en') print("Compiling network") model = build_model(get_embeddings(nlp.vocab), shape, settings) print("Processing texts...") Xs = [] for texts in (train_texts1, train_texts2, dev_texts1, dev_texts2): Xs.append( get_word_ids(list(nlp.pipe(texts, n_threads=20, batch_size=20000)), max_length=shape[0], rnn_encode=settings['gru_encode'], tree_truncate=settings['tree_truncate'])) train_X1, train_X2, dev_X1, dev_X2 = Xs print(settings) model.fit([train_X1, train_X2], train_labels, validation_data=([dev_X1, dev_X2], dev_labels), nb_epoch=settings['nr_epoch'], batch_size=settings['batch_size'])
def train(model_dir, train_loc, dev_loc, shape, settings): train_texts1, train_texts2, train_labels = read_snli(train_loc) dev_texts1, dev_texts2, dev_labels = read_snli(dev_loc) print("Loading spaCy") nlp = spacy.load('en') print("Compiling network") model = build_model(get_embeddings(nlp.vocab), shape, settings) print("Processing texts...") Xs = [] for texts in (train_texts1, train_texts2, dev_texts1, dev_texts2): Xs.append(get_word_ids(list(nlp.pipe(texts, n_threads=20, batch_size=20000)), max_length=shape[0], rnn_encode=settings['gru_encode'], tree_truncate=settings['tree_truncate'])) train_X1, train_X2, dev_X1, dev_X2 = Xs print(settings) model.fit( [train_X1, train_X2], train_labels, validation_data=([dev_X1, dev_X2], dev_labels), nb_epoch=settings['nr_epoch'], batch_size=settings['batch_size'])
def attention_foldrun(X, X2, y, name, Xte = None, Xte2 = None, start_fold = 0): skf = StratifiedKFold(n_splits = 10, random_state = 111, shuffle = True) if isinstance(X, pd.core.frame.DataFrame): X = X.values if isinstance(y, pd.core.frame.DataFrame): y = y.is_duplicate.values if isinstance(y, pd.core.frame.Series): y = y.values print('Running Decomposable Attention model with parameters:', settings) i = 1 losses = [] train_splits = [] val_splits = [] for tr_index, val_index in skf.split(X, y): train_splits.append(tr_index) val_splits.append(val_index) for i in range(start_fold, start_fold + 2): X_trq1, X_valq1 = X[train_splits[i]], X[val_splits[i]] X_trq2, X_valq2 = X2[train_splits[i]], X2[val_splits[i]] y_tr, y_val = y[train_splits[i]], y[val_splits[i]] y_tr = to_categorical(y_tr) y_val = to_categorical(y_val) t = time.time() print('Start training on fold: {}'.format(i)) callbacks = [ModelCheckpoint('checks/decomposable_{}_10SKF_fold{}.h5'.format(i, name), monitor='val_loss', verbose = 0, save_best_only = True), EarlyStopping(monitor='val_loss', patience = 4, verbose = 1)] model = build_model(get_embeddings(nlp.vocab), shape, settings) model.fit([X_trq1, X_trq2], y_tr, validation_data=([X_valq1, X_valq2], y_val), nb_epoch=settings['nr_epoch'], batch_size=settings['batch_size'], callbacks = callbacks) val_pred = model.predict([X_valq1, X_valq2], batch_size = 64) score = log_loss(y_val, val_pred) losses.append(score) print('Predicting training set.') val_pred = pd.DataFrame(val_pred, index = val_splits[i]) val_pred.columns = ['attention_feat1', 'attention_feat2'] val_pred.to_pickle('OOF_preds/train_attentionpreds_fold{}.pkl'.format(i)) print(val_pred.head()) if Xte is not None: print('Predicting test set.') test_preds = model.predict([Xte, Xte2], batch_size = 64) test_preds = pd.DataFrame(test_preds) test_preds.columns = ['attention_feat1', 'attention_feat2'] test_preds.to_pickle('OOF_preds/test_attentionpreds_fold{}.pkl'.format(i)) del test_preds gc.collect() print('Final score for fold {} :'.format(i), score, '\n', 'Time it took to train and predict on fold:', time.time() - t, '\n') del X_trq1, X_valq1, X_trq2, X_valq2, y_tr, y_val, val_pred gc.collect() i += 1 print('Mean logloss for model in 10-folds SKF:', np.array(losses).mean(axis = 0)) return
def train(train_loc, dev_loc, shape, settings): train_texts1, train_texts2, train_labels, train_styling_arrays_1, train_styling_arrays_2, train_TWPs_1, train_TWPs_2 = read_snli( train_loc) dev_texts1, dev_texts2, dev_labels, dev_styling_arrays_1, dev_styling_arrays_2, dev_TWPs_1, dev_TWPs_2 = read_snli( dev_loc) print("Loading spaCy") nlp = en_core_web_sm.load() # en_vectors_web_lg.load(vocab=nlp.vocab) path = '/home/ankesh/div_merging_models/alpha1/' print("Compiling network") # sense = sense2vec.load() model = build_model(get_embeddings(nlp.vocab), shape, settings) print("Processing texts...") Xs = [] # train_texts1 = train_texts1[:1000] # train_styling_arrays_1 = train_styling_arrays_1[:10] # train_TWPs_1 = train_TWPs_1[:10] # train_texts2 = train_texts2[:10] # train_styling_arrays_2 = train_styling_arrays_2[:10] # train_TWPs_2 = train_TWPs_2[:10] # dev_texts1 = dev_texts1[:1] # dev_styling_arrays_1 = dev_styling_arrays_1[:1] # dev_TWPs_1 = dev_TWPs_1[:1] # dev_texts2 = dev_texts2[:1] # dev_styling_arrays_2 = dev_styling_arrays_2[:1] # dev_TWPs_2 = dev_TWPs_2[:1] # print (train_texts1[0]) # print (train_texts2[0]) for texts, styling_array, TWP in ((train_texts1, train_styling_arrays_1, train_TWPs_1), (train_texts2, train_styling_arrays_2, train_TWPs_2), (dev_texts1, dev_styling_arrays_1, dev_TWPs_1), (dev_texts2, dev_styling_arrays_2, dev_TWPs_2)): Xs.append( get_word_ids(list(nlp.pipe(texts, n_threads=20, batch_size=20000)), styling_array, TWP, max_length=shape[0], rnn_encode=settings['gru_encode'], tree_truncate=settings['tree_truncate'])) train_X1, train_X2, dev_X1, dev_X2 = Xs # print (train_X1[0]) # print ('-'*10) # print (train_X2[0]) # print ("shape of train X1", train_X1.shape) # print("+"*40) print(settings) model.fit([train_X1, train_X2], train_labels, validation_data=([dev_X1, dev_X2], dev_labels), nb_epoch=settings['nr_epoch'], batch_size=settings['batch_size']) # if not (nlp.path / 'similarity').exists(): # (nlp.path / 'similarity').mkdir() print("Saving to", path + 'similarity') weights = model.get_weights() with open(path + 'similarity/' + 'model', 'wb') as file_: pickle.dump(weights[1:], file_) with open(path + 'similarity/' + 'config.json', 'wb') as file_: file_.write(model.to_json())