def train_lstm(model_filename, weights_filename, l1_d=128, l2_d=128, b_s=128, bi=False): """ Trains a 2 layer lstm and saves the model in the files specified. Args: model_filename: The filename to save the model weights_filename: The filename to save the weights l1_d: Layer one dimensions l2_d: Layer two dimensions b_s: Batch size Returns: model: The lstm model fit on the training data """ dm=DataModel() texts, labels=dm.get_train_data() word_index, data=utils.get_word_index(texts) x_train, y_train, x_val, y_val, _, _=utils.get_train_val_test_data( data, labels) word_embeddings=utils.get_glove_embeddings() embedding_matrix=utils.get_embedding_matrix(word_embeddings, word_index) if bi: model=_bi_lstm(embedding_matrix, x_train, y_train, x_val, y_val, 3, word_index, l1_d, l2_d, b_s) else: model=_lstm(embedding_matrix, x_train, y_train, x_val, y_val, 3, word_index, l1_d, l2_d, b_s) save_model(model, model_filename, weights_filename) return model
def accuracy_test(model_filename, weights_filename): dm = DataModel() x_train, y_train = dm.get_train_data() train_samples = int(0.8 * len(x_train)) x_test = x_train[train_samples:] y_test = y_train[train_samples:] _, data = utils.get_word_index(x_test) labels = to_categorical(np.asarray(y_test)) # word_embeddings = utils.get_glove_embeddings() # _ = utils.get_embedding_matrix(word_embeddings, word_index) md = utils.load_model(model_filename, weights_filename) result = md.evaluate(data, labels) print('\nTest loss:', result[0]) print('Test accuracy:', result[1])
def generate_w2c_word_embeddings(): """ Generates word 2 vector embeddings. """ dm = DataModel() train_x, _ = dm.get_train_data() w2c = Word2Vec(method="skipgram", corpus=list(train_x), window_size=5, n_hidden=128, n_epochs=3, learning_rate=0.08) W1, W2, loss_vs_epoch = w2c.run() pkl_dump = [W1, W2, loss_vs_epoch] with open('embeddings.pickle', 'wb') as handle: pickle.dump(pkl_dump, handle)