Esempio n. 1
0
dic = default_build_vocab("./data/ptb.train.txt")
vocab = len(dic)

# static buckets
buckets = [8, 16, 24, 32, 60]

init_c = [('l%d_init_c' % l, (batch_size, num_hidden))
          for l in range(num_lstm_layer)]
init_h = [('l%d_init_h' % l, (batch_size, num_hidden))
          for l in range(num_lstm_layer)]
init_states = init_c + init_h

X_train_batch = BucketSentenceIter("./data/ptb.train.txt",
                                   dic,
                                   buckets,
                                   batch_size,
                                   init_states,
                                   model_parallel=True)
X_val_batch = BucketSentenceIter("./data/ptb.valid.txt",
                                 dic,
                                 buckets,
                                 batch_size,
                                 init_states,
                                 model_parallel=True)

ngpu = 2
# A simple two GPU placement plan
group2ctx = {'embed': mx.gpu(0), 'decode': mx.gpu(ngpu - 1)}

for i in range(num_lstm_layer):
    group2ctx['layer%d' % i] = mx.gpu(i * ngpu // num_lstm_layer)
Esempio n. 2
0
    vocab = default_build_vocab(os.path.join(data_dir, 'ptb.train.txt'))

    init_h = [
        mx.io.DataDesc('LSTM_state', (num_lstm_layer, batch_size, num_hidden),
                       layout='TNC')
    ]
    init_c = [
        mx.io.DataDesc('LSTM_state_cell',
                       (num_lstm_layer, batch_size, num_hidden),
                       layout='TNC')
    ]
    init_states = init_c + init_h

    data_train = BucketSentenceIter(os.path.join(data_dir, 'ptb.train.txt'),
                                    vocab,
                                    buckets,
                                    batch_size,
                                    init_states,
                                    time_major=True)
    data_val = BucketSentenceIter(os.path.join(data_dir, 'ptb.valid.txt'),
                                  vocab,
                                  buckets,
                                  batch_size,
                                  init_states,
                                  time_major=True)

    def sym_gen(seq_len):
        data = mx.sym.Variable('data')
        label = mx.sym.Variable('softmax_label')
        embed = mx.sym.Embedding(data=data,
                                 input_dim=len(vocab),
                                 output_dim=num_embed,
Esempio n. 3
0
    def sym_gen(seq_len):
        return lstm_unroll(num_lstm_layer,
                           seq_len,
                           len(vocab),
                           num_hidden=num_hidden,
                           num_embed=num_embed,
                           num_label=len(vocab))

    init_c = [('l%d_init_c' % l, (batch_size, num_hidden))
              for l in range(num_lstm_layer)]
    init_h = [('l%d_init_h' % l, (batch_size, num_hidden))
              for l in range(num_lstm_layer)]
    init_states = init_c + init_h

    data_train = BucketSentenceIter("./data/ptb.train.txt", vocab, buckets,
                                    batch_size, init_states)
    data_val = BucketSentenceIter("./data/ptb.valid.txt", vocab, buckets,
                                  batch_size, init_states)

    if dummy_data:
        data_train = DummyIter(data_train)
        data_val = DummyIter(data_val)

    if len(buckets) == 1:
        # only 1 bucket, disable bucketing
        symbol = sym_gen(buckets[0])
    else:
        symbol = sym_gen

    model = mx.model.FeedForward(ctx=contexts,
                                 symbol=symbol,
Esempio n. 4
0
    num_hidden = 2500
    num_label = 1500
    num_lstm_layer = 2
    num_epoch = 326

    print(batch_size, buckets, num_hidden, num_lstm_layer, num_epoch)

    img_data, wave_data = default_build_vocab("./data/data/2.mp4",
                                              "./data/data/2.mp3")

    init_c = [('l%d_init_c' % l, (batch_size, num_hidden))
              for l in range(num_lstm_layer)]
    init_h = [('l%d_init_h' % l, (batch_size, num_hidden))
              for l in range(num_lstm_layer)]
    init_states = init_c + init_h
    data_train = BucketSentenceIter(img_data, wave_data, buckets, batch_size,
                                    init_states, num_label)
    model = mx.model.FeedForward.load('model/lip',
                                      num_epoch,
                                      ctx=mx.context.gpu(1),
                                      num_epoch=500,
                                      learning_rate=0.5)
    import logging
    head = '%(asctime)-15s %(message)s'
    logging.basicConfig(level=logging.DEBUG, format=head)
    model.fit(X=data_train,
              eval_data=None,
              eval_metric=mx.metric.np(Perplexity),
              batch_end_callback=mx.callback.Speedometer(batch_size, 50),
              epoch_end_callback=mx.callback.do_checkpoint('model/lip'))
Esempio n. 5
0
    momentum = 0.0

    # dummy data is used to test speed without IO
    dummy_data = False

    contexts = [mx.context.gpu(i) for i in range(1)]

    vocab = default_build_vocab(os.path.join(data_dir, "ptb.train.txt"))

    init_c = [('l%d_init_c' % l, (batch_size, num_hidden))
              for l in range(num_lstm_layer)]
    init_h = [('l%d_init_h' % l, (batch_size, num_hidden))
              for l in range(num_lstm_layer)]
    init_states = init_c + init_h

    data_train = BucketSentenceIter(os.path.join(data_dir, "ptb.train.txt"),
                                    vocab, buckets, batch_size, init_states)
    data_val = BucketSentenceIter(os.path.join(data_dir, "ptb.valid.txt"),
                                  vocab, buckets, batch_size, init_states)

    if dummy_data:
        data_train = DummyIter(data_train)
        data_val = DummyIter(data_val)

    state_names = [x[0] for x in init_states]

    def sym_gen(seq_len):
        sym = lstm_unroll(num_lstm_layer,
                          seq_len,
                          len(vocab),
                          num_hidden=num_hidden,
                          num_embed=num_embed,
    num_lstm_layer = 2

    num_epoch = 2
    learning_rate = 0.01
    momentum = 0.0

    contexts = [mx.context.gpu(i) for i in range(4)]
    vocab = default_build_vocab(
        os.path.join(data_dir, 'sherlockholmes.train.txt'))

    init_h = [('LSTM_init_h', (batch_size, num_lstm_layer, num_hidden))]
    init_c = [('LSTM_init_c', (batch_size, num_lstm_layer, num_hidden))]
    init_states = init_c + init_h

    data_train = BucketSentenceIter(
        os.path.join(data_dir, 'sherlockholmes.train.txt'), vocab, buckets,
        batch_size, init_states)
    data_val = BucketSentenceIter(
        os.path.join(data_dir, 'sherlockholmes.valid.txt'), vocab, buckets,
        batch_size, init_states)

    def sym_gen(seq_len):
        data = mx.sym.Variable('data')
        label = mx.sym.Variable('softmax_label')
        embed = mx.sym.Embedding(data=data,
                                 input_dim=len(vocab),
                                 output_dim=num_embed,
                                 name='embed')

        # TODO(tofix)
        # The inputs and labels from IO are all in batch-major.
    dummy_data = False

    contexts = [mx.context.gpu(i) for i in range(N)]

    vocab = default_build_vocab("./data/sherlockholmes.train.txt")

    def sym_gen(seq_len):
        return lstm_unroll(num_lstm_layer, seq_len, len(vocab),
                           num_hidden=num_hidden, num_embed=num_embed,
                           num_label=len(vocab))

    init_c = [('l%d_init_c'%l, (batch_size, num_hidden)) for l in range(num_lstm_layer)]
    init_h = [('l%d_init_h'%l, (batch_size, num_hidden)) for l in range(num_lstm_layer)]
    init_states = init_c + init_h

    data_train = BucketSentenceIter("./data/sherlockholmes.train.txt", vocab,
                                    buckets, batch_size, init_states)
    data_val = BucketSentenceIter("./data/sherlockholmes.valid.txt", vocab,
                                  buckets, batch_size, init_states)

    if dummy_data:
        data_train = DummyIter(data_train)
        data_val = DummyIter(data_val)

    if len(buckets) == 1:
        # only 1 bucket, disable bucketing
        symbol = sym_gen(buckets[0])
    else:
        symbol = sym_gen

    model = mx.model.FeedForward(ctx=contexts,
                                 symbol=symbol,
Esempio n. 8
0
                       num_label=len(vocab) + 1,
                       dropout=0.2)


# initalize states for LSTM
init_c = [('l%d_init_c' % l, (batch_size, num_hidden))
          for l in range(num_lstm_layer)]
init_h = [('l%d_init_h' % l, (batch_size, num_hidden))
          for l in range(num_lstm_layer)]
init_states = init_c + init_h
print vocab
# we can build an iterator for text
data_train = BucketSentenceIter("./obama.txt",
                                vocab,
                                buckets,
                                batch_size,
                                init_states,
                                seperate_char='\n',
                                text2id=text2id,
                                read_content=read_content)
# the network symbol
symbol = sym_gen(buckets[0])
# Train a LSTM network as simple as feedforward network
model = mx.model.FeedForward(ctx=devs,
                             symbol=symbol,
                             num_epoch=num_epoch,
                             learning_rate=learning_rate,
                             momentum=momentum,
                             wd=0.0001,
                             initializer=mx.init.Xavier(factor_type="in",
                                                        magnitude=2.34))
# Fit it
    dummy_data = False

    contexts = [mx.context.gpu(i) for i in range(1)]

    vocab = default_build_vocab(args.data_path+"/ptb.train.txt")

    def sym_gen(seq_len):
        return lstm_unroll(num_lstm_layer, seq_len, 10000,
                           num_hidden=num_hidden, num_embed=num_embed,
                           num_label=10000)

    init_c = [('l%d_init_c'%l, (batch_size, num_hidden)) for l in range(num_lstm_layer)]
    init_h = [('l%d_init_h'%l, (batch_size, num_hidden)) for l in range(num_lstm_layer)]
    init_states = init_c + init_h

    data_train = BucketSentenceIter(args.data_path+"/ptb.train.txt", vocab,
                                    buckets, batch_size, args.num_batch, init_states)

    if dummy_data:
        data_train = DummyIter(data_train)

    if len(buckets) == 1:
        # only 1 bucket, disable bucketing
        symbol = sym_gen(buckets[0])
    else:
        symbol = sym_gen
        
    model = mx.model.FeedForward(ctx=contexts,
                                 symbol=symbol,
                                 num_epoch=num_epoch,
                                 learning_rate=learning_rate,
                                 momentum=momentum,