dic = default_build_vocab("./data/ptb.train.txt") vocab = len(dic) # static buckets buckets = [8, 16, 24, 32, 60] init_c = [('l%d_init_c' % l, (batch_size, num_hidden)) for l in range(num_lstm_layer)] init_h = [('l%d_init_h' % l, (batch_size, num_hidden)) for l in range(num_lstm_layer)] init_states = init_c + init_h X_train_batch = BucketSentenceIter("./data/ptb.train.txt", dic, buckets, batch_size, init_states, model_parallel=True) X_val_batch = BucketSentenceIter("./data/ptb.valid.txt", dic, buckets, batch_size, init_states, model_parallel=True) ngpu = 2 # A simple two GPU placement plan group2ctx = {'embed': mx.gpu(0), 'decode': mx.gpu(ngpu - 1)} for i in range(num_lstm_layer): group2ctx['layer%d' % i] = mx.gpu(i * ngpu // num_lstm_layer)
vocab = default_build_vocab(os.path.join(data_dir, 'ptb.train.txt')) init_h = [ mx.io.DataDesc('LSTM_state', (num_lstm_layer, batch_size, num_hidden), layout='TNC') ] init_c = [ mx.io.DataDesc('LSTM_state_cell', (num_lstm_layer, batch_size, num_hidden), layout='TNC') ] init_states = init_c + init_h data_train = BucketSentenceIter(os.path.join(data_dir, 'ptb.train.txt'), vocab, buckets, batch_size, init_states, time_major=True) data_val = BucketSentenceIter(os.path.join(data_dir, 'ptb.valid.txt'), vocab, buckets, batch_size, init_states, time_major=True) def sym_gen(seq_len): data = mx.sym.Variable('data') label = mx.sym.Variable('softmax_label') embed = mx.sym.Embedding(data=data, input_dim=len(vocab), output_dim=num_embed,
def sym_gen(seq_len): return lstm_unroll(num_lstm_layer, seq_len, len(vocab), num_hidden=num_hidden, num_embed=num_embed, num_label=len(vocab)) init_c = [('l%d_init_c' % l, (batch_size, num_hidden)) for l in range(num_lstm_layer)] init_h = [('l%d_init_h' % l, (batch_size, num_hidden)) for l in range(num_lstm_layer)] init_states = init_c + init_h data_train = BucketSentenceIter("./data/ptb.train.txt", vocab, buckets, batch_size, init_states) data_val = BucketSentenceIter("./data/ptb.valid.txt", vocab, buckets, batch_size, init_states) if dummy_data: data_train = DummyIter(data_train) data_val = DummyIter(data_val) if len(buckets) == 1: # only 1 bucket, disable bucketing symbol = sym_gen(buckets[0]) else: symbol = sym_gen model = mx.model.FeedForward(ctx=contexts, symbol=symbol,
num_hidden = 2500 num_label = 1500 num_lstm_layer = 2 num_epoch = 326 print(batch_size, buckets, num_hidden, num_lstm_layer, num_epoch) img_data, wave_data = default_build_vocab("./data/data/2.mp4", "./data/data/2.mp3") init_c = [('l%d_init_c' % l, (batch_size, num_hidden)) for l in range(num_lstm_layer)] init_h = [('l%d_init_h' % l, (batch_size, num_hidden)) for l in range(num_lstm_layer)] init_states = init_c + init_h data_train = BucketSentenceIter(img_data, wave_data, buckets, batch_size, init_states, num_label) model = mx.model.FeedForward.load('model/lip', num_epoch, ctx=mx.context.gpu(1), num_epoch=500, learning_rate=0.5) import logging head = '%(asctime)-15s %(message)s' logging.basicConfig(level=logging.DEBUG, format=head) model.fit(X=data_train, eval_data=None, eval_metric=mx.metric.np(Perplexity), batch_end_callback=mx.callback.Speedometer(batch_size, 50), epoch_end_callback=mx.callback.do_checkpoint('model/lip'))
momentum = 0.0 # dummy data is used to test speed without IO dummy_data = False contexts = [mx.context.gpu(i) for i in range(1)] vocab = default_build_vocab(os.path.join(data_dir, "ptb.train.txt")) init_c = [('l%d_init_c' % l, (batch_size, num_hidden)) for l in range(num_lstm_layer)] init_h = [('l%d_init_h' % l, (batch_size, num_hidden)) for l in range(num_lstm_layer)] init_states = init_c + init_h data_train = BucketSentenceIter(os.path.join(data_dir, "ptb.train.txt"), vocab, buckets, batch_size, init_states) data_val = BucketSentenceIter(os.path.join(data_dir, "ptb.valid.txt"), vocab, buckets, batch_size, init_states) if dummy_data: data_train = DummyIter(data_train) data_val = DummyIter(data_val) state_names = [x[0] for x in init_states] def sym_gen(seq_len): sym = lstm_unroll(num_lstm_layer, seq_len, len(vocab), num_hidden=num_hidden, num_embed=num_embed,
num_lstm_layer = 2 num_epoch = 2 learning_rate = 0.01 momentum = 0.0 contexts = [mx.context.gpu(i) for i in range(4)] vocab = default_build_vocab( os.path.join(data_dir, 'sherlockholmes.train.txt')) init_h = [('LSTM_init_h', (batch_size, num_lstm_layer, num_hidden))] init_c = [('LSTM_init_c', (batch_size, num_lstm_layer, num_hidden))] init_states = init_c + init_h data_train = BucketSentenceIter( os.path.join(data_dir, 'sherlockholmes.train.txt'), vocab, buckets, batch_size, init_states) data_val = BucketSentenceIter( os.path.join(data_dir, 'sherlockholmes.valid.txt'), vocab, buckets, batch_size, init_states) def sym_gen(seq_len): data = mx.sym.Variable('data') label = mx.sym.Variable('softmax_label') embed = mx.sym.Embedding(data=data, input_dim=len(vocab), output_dim=num_embed, name='embed') # TODO(tofix) # The inputs and labels from IO are all in batch-major.
dummy_data = False contexts = [mx.context.gpu(i) for i in range(N)] vocab = default_build_vocab("./data/sherlockholmes.train.txt") def sym_gen(seq_len): return lstm_unroll(num_lstm_layer, seq_len, len(vocab), num_hidden=num_hidden, num_embed=num_embed, num_label=len(vocab)) init_c = [('l%d_init_c'%l, (batch_size, num_hidden)) for l in range(num_lstm_layer)] init_h = [('l%d_init_h'%l, (batch_size, num_hidden)) for l in range(num_lstm_layer)] init_states = init_c + init_h data_train = BucketSentenceIter("./data/sherlockholmes.train.txt", vocab, buckets, batch_size, init_states) data_val = BucketSentenceIter("./data/sherlockholmes.valid.txt", vocab, buckets, batch_size, init_states) if dummy_data: data_train = DummyIter(data_train) data_val = DummyIter(data_val) if len(buckets) == 1: # only 1 bucket, disable bucketing symbol = sym_gen(buckets[0]) else: symbol = sym_gen model = mx.model.FeedForward(ctx=contexts, symbol=symbol,
num_label=len(vocab) + 1, dropout=0.2) # initalize states for LSTM init_c = [('l%d_init_c' % l, (batch_size, num_hidden)) for l in range(num_lstm_layer)] init_h = [('l%d_init_h' % l, (batch_size, num_hidden)) for l in range(num_lstm_layer)] init_states = init_c + init_h print vocab # we can build an iterator for text data_train = BucketSentenceIter("./obama.txt", vocab, buckets, batch_size, init_states, seperate_char='\n', text2id=text2id, read_content=read_content) # the network symbol symbol = sym_gen(buckets[0]) # Train a LSTM network as simple as feedforward network model = mx.model.FeedForward(ctx=devs, symbol=symbol, num_epoch=num_epoch, learning_rate=learning_rate, momentum=momentum, wd=0.0001, initializer=mx.init.Xavier(factor_type="in", magnitude=2.34)) # Fit it
dummy_data = False contexts = [mx.context.gpu(i) for i in range(1)] vocab = default_build_vocab(args.data_path+"/ptb.train.txt") def sym_gen(seq_len): return lstm_unroll(num_lstm_layer, seq_len, 10000, num_hidden=num_hidden, num_embed=num_embed, num_label=10000) init_c = [('l%d_init_c'%l, (batch_size, num_hidden)) for l in range(num_lstm_layer)] init_h = [('l%d_init_h'%l, (batch_size, num_hidden)) for l in range(num_lstm_layer)] init_states = init_c + init_h data_train = BucketSentenceIter(args.data_path+"/ptb.train.txt", vocab, buckets, batch_size, args.num_batch, init_states) if dummy_data: data_train = DummyIter(data_train) if len(buckets) == 1: # only 1 bucket, disable bucketing symbol = sym_gen(buckets[0]) else: symbol = sym_gen model = mx.model.FeedForward(ctx=contexts, symbol=symbol, num_epoch=num_epoch, learning_rate=learning_rate, momentum=momentum,