Ejemplo n.º 1
0
vocab = len(dic)
print("Vocab=%d" %vocab)

X_train_batch = drop_tail(X_train_batch, seq_len)
X_val_batch = drop_tail(X_val_batch, seq_len)
#print 'ddddd'
print X_train_batch.shape
print X_val_batch.shape



model = lstm.setup_rnn_model(mx.cpu(),
                             num_lstm_layer=num_lstm_layer,
                             seq_len=seq_len,
                             num_hidden=num_hidden,
                             num_embed=num_embed,
                             num_label=vocab,
                             batch_size=batch_size,
                             input_size=vocab,
                             initializer=mx.initializer.Uniform(0.1),dropout=0.5)
# max_grad_norm=5.0 | update_period=1 | wd=0 | learning_rate=0.1 | num_roud=25
lstm.train_lstm(model, X_train_batch, X_val_batch,
                num_round=num_round,
                half_life=2,
                max_grad_norm = max_grad_norm,
                update_period=update_period,
                learning_rate=learning_rate,
                wd=wd)
#               momentum=momentum)

Ejemplo n.º 2
0
# A simple two GPU placement plan
group2ctx = {'embed': mx.gpu(0), 'decode': mx.gpu(ngpu - 1)}

for i in range(num_lstm_layer):
    group2ctx['layer%d' % i] = mx.gpu(i * ngpu // num_lstm_layer)

# whether do group-wise concat
concat_decode = False
use_loss = True
model = lstm.setup_rnn_model(mx.gpu(),
                             group2ctx=group2ctx,
                             concat_decode=concat_decode,
                             use_loss=use_loss,
                             num_lstm_layer=num_lstm_layer,
                             seq_len=X_train_batch.default_bucket_key,
                             num_hidden=num_hidden,
                             num_embed=num_embed,
                             num_label=vocab,
                             batch_size=batch_size,
                             input_size=vocab,
                             initializer=mx.initializer.Uniform(0.1),
                             dropout=0.5,
                             buckets=buckets)

lstm.train_lstm(model,
                X_train_batch,
                X_val_batch,
                num_round=num_round,
                concat_decode=concat_decode,
                use_loss=use_loss,
                half_life=2,
                max_grad_norm=max_grad_norm,
Ejemplo n.º 3
0
# A simple two GPU placement plan
group2ctx = {'embed': mx.gpu(0),
             'decode': mx.gpu(ngpu - 1)}

for i in range(num_lstm_layer):
    group2ctx['layer%d' % i] = mx.gpu(i * ngpu // num_lstm_layer)

# whether do group-wise concat
concat_decode = False
use_loss=True
model = lstm.setup_rnn_model(mx.gpu(), group2ctx=group2ctx,
                             concat_decode=concat_decode,
                             use_loss=use_loss,
                             num_lstm_layer=num_lstm_layer,
                             seq_len=X_train_batch.default_bucket_key,
                             num_hidden=num_hidden,
                             num_embed=num_embed,
                             num_label=vocab,
                             batch_size=batch_size,
                             input_size=vocab,
                             initializer=mx.initializer.Uniform(0.1),dropout=0.5, buckets=buckets)

lstm.train_lstm(model, X_train_batch, X_val_batch,
                num_round=num_round,
                concat_decode=concat_decode,
                use_loss=use_loss,
                half_life=2,
                max_grad_norm = max_grad_norm,
                update_period=update_period,
                learning_rate=learning_rate,
                batch_size = batch_size,
X_train, dic = load_data("./data/ptb.train.txt")
X_val, _ = load_data("./data/ptb.valid.txt", dic)
X_train_batch = replicate_data(X_train, batch_size)
X_val_batch = replicate_data(X_val, batch_size)
vocab = len(dic)
print("Vocab=%d" % vocab)

X_train_batch = drop_tail(X_train_batch, seq_len)
X_val_batch = drop_tail(X_val_batch, seq_len)

model = lstm.setup_rnn_model(mx.gpu(),
                             num_lstm_layer=num_lstm_layer,
                             seq_len=seq_len,
                             num_hidden=num_hidden,
                             num_embed=num_embed,
                             num_label=vocab,
                             batch_size=batch_size,
                             input_size=vocab,
                             initializer=mx.initializer.Uniform(0.1),
                             dropout=0.5)

lstm.train_lstm(model,
                X_train_batch,
                X_val_batch,
                num_round=num_round,
                half_life=2,
                max_grad_norm=max_grad_norm,
                update_period=update_period,
                learning_rate=learning_rate,
                wd=wd,
                momentum=momentum)
Ejemplo n.º 5
0
ngpu = 1
# A simple two GPU placement plan
group2ctx = {'embed': mx.gpu(0),
             'decode': mx.gpu(ngpu - 1)}

for i in range(num_lstm_layer):
    group2ctx['layer%d' % i] = mx.gpu(i * ngpu // num_lstm_layer)

# whether do group-wise concat
concat_decode = True

model = lstm.setup_rnn_model(mx.gpu(), group2ctx=group2ctx,
                             concat_decode=concat_decode,
                             num_lstm_layer=num_lstm_layer,
                             seq_len=seq_len,
                             num_hidden=num_hidden,
                             num_embed=num_embed,
                             num_label=vocab,
                             batch_size=batch_size,
                             input_size=vocab,
                             initializer=mx.initializer.Uniform(0.1),dropout=0.5)

lstm.train_lstm(model, X_train_batch, X_val_batch,
                num_round=num_round,
                concat_decode=concat_decode,
                half_life=2,
                max_grad_norm = max_grad_norm,
                update_period=update_period,
                learning_rate=learning_rate,
                wd=wd)