Ejemplo n.º 1
0
def sentiment_analysis(load_model, label_type, embs_convert_type,
                       label_type_folder, target_data_folder, save_folder):
    check_type(label_type,
               types_list=['tonality', 'toxicity'],
               type_name='label')
    check_type(embs_convert_type,
               types_list=['mean', 'length_64'],
               type_name='embeddings convert')

    x_target, y_target = load_target_data(label_type=label_type,
                                          convert_type=embs_convert_type,
                                          data_folder=target_data_folder)

    if not load_model:
        x_source, x_source_test, y_source, y_source_test = load_source_data(
            label_type=label_type,
            label_data_folder=label_type_folder,
            convert_type=embs_convert_type)
        model = train_lstm(x_source=x_source,
                           y_source=y_source,
                           label_type=label_type,
                           convert_type=embs_convert_type,
                           save_folder=save_folder,
                           epochs=5)
        predict(model=model, x=x_source_test, y=y_source_test, title='Source')
    else:
        model = load_lstm(label_type=label_type,
                          convert_type=embs_convert_type,
                          folder=save_folder)

    y_pred = predict(model=model, x=x_target, y=y_target, title='Target')

    return y_pred
Ejemplo n.º 2
0
def main():
    model_filename = 'models/lstm_model_3.json'
    weights_filename = 'models/lstm_model_3.h5'
    kaggle_filename = 'output/kaggle_lstm_256.csv'
    lstm.train_lstm(model_filename,
                    weights_filename,
                    l1_d=256,
                    l2_d=256,
                    b_s=128)
    run_lstm_on_test_data(model_filename, weights_filename, kaggle_filename)

    model_filename = 'models/bi_lstm_model_128.json'
    weights_filename = 'models/bi_lstm_model_128.h5'
    kaggle_filename = 'output/bi_kaggle_lstm_128.csv'
    lstm.train_lstm(model_filename,
                    weights_filename,
                    l1_d=128,
                    l2_d=128,
                    b_s=128,
                    bi=True)
    run_lstm_on_test_data(model_filename, weights_filename, kaggle_filename)

    model_filename = 'models/bi_lstm_model_265.json'
    weights_filename = 'models/bi_lstm_model_256.h5'
    kaggle_filename = 'output/bi_kaggle_lstm_256.csv'
    lstm.train_lstm(model_filename,
                    weights_filename,
                    l1_d=256,
                    l2_d=256,
                    b_s=128,
                    bi=True)
    run_lstm_on_test_data(model_filename, weights_filename, kaggle_filename)
def test_lstm():
    lstm.train_lstm(max_epochs=1, test_size=1000, saveto='')
Ejemplo n.º 4
0
vocab = len(dic)
print("Vocab=%d" %vocab)

X_train_batch = drop_tail(X_train_batch, seq_len)
X_val_batch = drop_tail(X_val_batch, seq_len)
#print 'ddddd'
print X_train_batch.shape
print X_val_batch.shape



model = lstm.setup_rnn_model(mx.cpu(),
                             num_lstm_layer=num_lstm_layer,
                             seq_len=seq_len,
                             num_hidden=num_hidden,
                             num_embed=num_embed,
                             num_label=vocab,
                             batch_size=batch_size,
                             input_size=vocab,
                             initializer=mx.initializer.Uniform(0.1),dropout=0.5)
# max_grad_norm=5.0 | update_period=1 | wd=0 | learning_rate=0.1 | num_roud=25
lstm.train_lstm(model, X_train_batch, X_val_batch,
                num_round=num_round,
                half_life=2,
                max_grad_norm = max_grad_norm,
                update_period=update_period,
                learning_rate=learning_rate,
                wd=wd)
#               momentum=momentum)

Ejemplo n.º 5
0
# whether do group-wise concat
concat_decode = False
use_loss = True
model = lstm.setup_rnn_model(mx.gpu(),
                             group2ctx=group2ctx,
                             concat_decode=concat_decode,
                             use_loss=use_loss,
                             num_lstm_layer=num_lstm_layer,
                             seq_len=X_train_batch.default_bucket_key,
                             num_hidden=num_hidden,
                             num_embed=num_embed,
                             num_label=vocab,
                             batch_size=batch_size,
                             input_size=vocab,
                             initializer=mx.initializer.Uniform(0.1),
                             dropout=0.5,
                             buckets=buckets)

lstm.train_lstm(model,
                X_train_batch,
                X_val_batch,
                num_round=num_round,
                concat_decode=concat_decode,
                use_loss=use_loss,
                half_life=2,
                max_grad_norm=max_grad_norm,
                update_period=update_period,
                learning_rate=learning_rate,
                batch_size=batch_size,
                wd=wd)
X_val, _ = load_data("./data/ptb.valid.txt", dic)
X_train_batch = replicate_data(X_train, batch_size)
X_val_batch = replicate_data(X_val, batch_size)
vocab = len(dic)
print("Vocab=%d" % vocab)

X_train_batch = drop_tail(X_train_batch, seq_len)
X_val_batch = drop_tail(X_val_batch, seq_len)

model = lstm.setup_rnn_model(mx.gpu(),
                             num_lstm_layer=num_lstm_layer,
                             seq_len=seq_len,
                             num_hidden=num_hidden,
                             num_embed=num_embed,
                             num_label=vocab,
                             batch_size=batch_size,
                             input_size=vocab,
                             initializer=mx.initializer.Uniform(0.1),
                             dropout=0.5)

lstm.train_lstm(model,
                X_train_batch,
                X_val_batch,
                num_round=num_round,
                half_life=2,
                max_grad_norm=max_grad_norm,
                update_period=update_period,
                learning_rate=learning_rate,
                wd=wd,
                momentum=momentum)
Ejemplo n.º 7
0
def test_lstm():
    lstm.train_lstm(max_epochs=1, test_size=1000, saveto='')
Ejemplo n.º 8
0
LIN_valid.save_LSTM_input(mat_indexes_valid, valid_indexes_file)
X_val = LIN_valid.load_LSTM_input(valid_indexes_file)

model = lstm.setup_rnn_model(mx.gpu(),
                             num_lstm_layer=num_lstm_layer,
                             seq_len=sentence_len,
                             num_hidden=num_hidden,
                             num_embed=num_embed,
                             num_label=max_vocab,
                             batch_size=batch_size,
                             input_size=max_vocab,
                             initializer=mx.initializer.Uniform(0.1),
                             dropout=0.5)

for i in xrange(nb_split):
    input_small_file = small_file_prefix + '.' + str(i)
    mat_indexes_file = mat_indexes_file_prefix + '.' + str(i)
    LIN = LSTMinput(input_small_file, sentence_len)
    X_train = LIN.load_LSTM_input(mat_indexes_file)
    lstm.train_lstm(model,
                    X_train,
                    X_val,
                    num_round=num_round,
                    half_life=2,
                    max_grad_norm=max_grad_norm,
                    update_period=update_period,
                    learning_rate=learning_rate,
                    wd=wd)
    del X_train
    del LIN
Ejemplo n.º 9
0
             'decode': mx.gpu(ngpu - 1)}

for i in range(num_lstm_layer):
    group2ctx['layer%d' % i] = mx.gpu(i * ngpu // num_lstm_layer)

# whether do group-wise concat
concat_decode = False
use_loss=True
model = lstm.setup_rnn_model(mx.gpu(), group2ctx=group2ctx,
                             concat_decode=concat_decode,
                             use_loss=use_loss,
                             num_lstm_layer=num_lstm_layer,
                             seq_len=X_train_batch.default_bucket_key,
                             num_hidden=num_hidden,
                             num_embed=num_embed,
                             num_label=vocab,
                             batch_size=batch_size,
                             input_size=vocab,
                             initializer=mx.initializer.Uniform(0.1),dropout=0.5, buckets=buckets)

lstm.train_lstm(model, X_train_batch, X_val_batch,
                num_round=num_round,
                concat_decode=concat_decode,
                use_loss=use_loss,
                half_life=2,
                max_grad_norm = max_grad_norm,
                update_period=update_period,
                learning_rate=learning_rate,
                batch_size = batch_size,
                wd=wd)
Ejemplo n.º 10
0
# import data
f = open('dataset/dataset_1.csv')
# read stock info
df = pd.read_csv(f)
# get col 3-10
data = df.iloc[:, 2:10].values

# set parameter
input_size = 7   # input size
output_size = 1  # output size
lr = 0.001       # learning rate
time_step = 20   # time step for lstm

choice = 1
if choice == 0:
    res1, acc1, mae1 = lstm.train_lstm(data, input_size, output_size, lr, time_step, choice=0)
    res2, acc2, mae2 = bp.train_bp(data, input_size, output_size, lr, choice=0)
    # plot acc and mae
    plt.figure()
    plt.title('acc')
    plt.xlabel('Number of iterations')
    plt.ylabel('value')
    plt.plot(list(range(len(acc1))), acc1, color='green')
    plt.plot(list(range(len(acc2))), acc2, color='red')
    plt.show()

    plt.figure()
    plt.title('mae')
    plt.xlabel('Number of iterations')
    plt.ylabel('value')
    plt.plot(list(range(len(mae1))), mae1, color='green')
Ejemplo n.º 11
0
X_train, dic = load_data("./data/ptb.train.txt")
X_val, _ = load_data("./data/ptb.valid.txt", dic)
X_train_batch = replicate_data(X_train, batch_size)
X_val_batch = replicate_data(X_val, batch_size)
vocab = len(dic)
print("Vocab=%d" %vocab)

X_train_batch = drop_tail(X_train_batch, seq_len)
X_val_batch = drop_tail(X_val_batch, seq_len)


model = lstm.setup_rnn_model(mx.gpu(),
                             num_lstm_layer=num_lstm_layer,
                             seq_len=seq_len,
                             num_hidden=num_hidden,
                             num_embed=num_embed,
                             num_label=vocab,
                             batch_size=batch_size,
                             input_size=vocab,
                             initializer=mx.initializer.Uniform(0.1))

lstm.train_lstm(model, X_train_batch, X_val_batch,
                num_round=num_round,
                half_life=2,
                update_period=update_period,
                learning_rate=learning_rate,
                wd=wd,
                momentum=momentum,
                clip_gradient=clip_gradient)