Esempio n. 1
0
def train_lstm(model_filename, weights_filename, l1_d=128, l2_d=128, b_s=128, bi=False):
    """
    Trains a 2 layer lstm and saves the model in the files specified.

    Args:
        model_filename: The filename to save the model
        weights_filename: The filename to save the weights
        l1_d: Layer one dimensions
        l2_d: Layer two dimensions
        b_s: Batch size

    Returns:
        model: The lstm model fit on the training data
    """
    dm=DataModel()
    texts, labels=dm.get_train_data()
    word_index, data=utils.get_word_index(texts)
    x_train, y_train, x_val, y_val, _, _=utils.get_train_val_test_data(
        data, labels)
    word_embeddings=utils.get_glove_embeddings()
    embedding_matrix=utils.get_embedding_matrix(word_embeddings, word_index)
    if bi:
        model=_bi_lstm(embedding_matrix, x_train, y_train, x_val,
                  y_val, 3, word_index, l1_d, l2_d, b_s)
    else:
        model=_lstm(embedding_matrix, x_train, y_train, x_val,
                  y_val, 3, word_index, l1_d, l2_d, b_s)
    save_model(model, model_filename, weights_filename)
    return model
Esempio n. 2
0
def accuracy_test(model_filename, weights_filename):
    dm = DataModel()
    x_train, y_train = dm.get_train_data()
    train_samples = int(0.8 * len(x_train))
    x_test = x_train[train_samples:]
    y_test = y_train[train_samples:]

    _, data = utils.get_word_index(x_test)
    labels = to_categorical(np.asarray(y_test))
    # word_embeddings = utils.get_glove_embeddings()
    # _ = utils.get_embedding_matrix(word_embeddings, word_index)
    md = utils.load_model(model_filename, weights_filename)
    result = md.evaluate(data, labels)
    print('\nTest loss:', result[0])
    print('Test accuracy:', result[1])
Esempio n. 3
0
def generate_w2c_word_embeddings():
    """
    Generates word 2 vector embeddings.
    """
    dm = DataModel()
    train_x, _ = dm.get_train_data()

    w2c = Word2Vec(method="skipgram",
                   corpus=list(train_x),
                   window_size=5,
                   n_hidden=128,
                   n_epochs=3,
                   learning_rate=0.08)

    W1, W2, loss_vs_epoch = w2c.run()

    pkl_dump = [W1, W2, loss_vs_epoch]
    with open('embeddings.pickle', 'wb') as handle:
        pickle.dump(pkl_dump, handle)