예제 #1
0
def forward(net, hyper, sentence_batches):
    batch = next(sentence_batches)
    #sentence_batch = np.array(pad_batch([x['body'] for x in batch], hyper))
    sentence_batch = np.array(pad_batch(batch, hyper))
    length = min(sentence_batch.shape[1], 100)
    assert length > 0

    filler = layers.Filler(type='uniform', max=hyper['init_range'],
        min=(-hyper['init_range']))
    net.forward_layer(layers.NumpyData(name='lstm_seed',
        data=np.zeros((hyper['batch_size'], hyper['mem_cells'], 1, 1))))
    net.forward_layer(layers.NumpyData(name='label',
        data=np.zeros((hyper['batch_size'] * length, 1, 1, 1))))
    loss = []
    for step in range(length):
        net.forward_layer(layers.DummyData(name=('word%d' % step),
            shape=[hyper['batch_size'], 1, 1, 1]))
        if step == 0:
            prev_hidden = 'lstm_seed'
            prev_mem = 'lstm_seed'
            word = np.zeros(sentence_batch[:, 0].shape)
        else:
            prev_hidden = 'lstm%d_hidden' % (step - 1)
            prev_mem = 'lstm%d_mem' % (step - 1)
            word = sentence_batch[:, step - 1]
        net.tops['word%d' % step].data[:,0,0,0] = word
        net.forward_layer(layers.Wordvec(name=('wordvec%d' % step),
            bottoms=['word%d' % step],
            dimension=hyper['mem_cells'], vocab_size=hyper['vocab_size'],
            param_names=['wordvec_param'], weight_filler=filler))
        net.forward_layer(layers.Concat(name='lstm_concat%d' % step,
            bottoms=[prev_hidden, 'wordvec%d' % step]))
        net.forward_layer(layers.Lstm(name='lstm%d' % step,
            bottoms=['lstm_concat%d' % step, prev_mem],
            param_names=['lstm_input_value', 'lstm_input_gate',
                'lstm_forget_gate', 'lstm_output_gate'],
            tops=['lstm%d_hidden' % step, 'lstm%d_mem' % step],
            num_cells=hyper['mem_cells'], weight_filler=filler))
        net.forward_layer(layers.Dropout(name='dropout%d' % step,
            bottoms=['lstm%d_hidden' % step], dropout_ratio=0.16))
        
        label = np.reshape(sentence_batch[:, step], (hyper['batch_size'], 1, 1, 1))
        net.forward_layer(layers.NumpyData(name='label%d' % step,
            data=label))
        net.forward_layer(layers.InnerProduct(name='ip%d' % step, bottoms=['dropout%d' % step],
            param_names=['softmax_ip_weights', 'softmax_ip_bias'],
            num_output=hyper['vocab_size'], weight_filler=filler))
        loss.append(net.forward_layer(layers.SoftmaxWithLoss(name='softmax_loss%d' % step,
            ignore_label=hyper['zero_symbol'], bottoms=['ip%d' % step, 'label%d' % step])))

    return np.mean(loss)
예제 #2
0
def eval_forward(net, hyper):
    output_words = []
    filler = layers.Filler(type='uniform', max=hyper['init_range'],
        min=(-hyper['init_range']))
    net.forward_layer(layers.NumpyData(name='lstm_hidden_prev',
        data=np.zeros((1, hyper['mem_cells'], 1, 1))))
    net.forward_layer(layers.NumpyData(name='lstm_mem_prev',
        data=np.zeros((1, hyper['mem_cells'], 1, 1))))
    length = hyper['length']
    for step in range(length):
        net.forward_layer(layers.NumpyData(name=('word'),
            data=np.zeros((1, 1, 1, 1))))
        prev_hidden = 'lstm_hidden_prev'
        prev_mem = 'lstm_mem_prev'
        word = np.zeros((1, 1, 1, 1))
        if step == 0:
            #output = ord('.')
            output = vocab[' ']
        else:
            output = softmax_choice(net.tops['softmax'].data)
        output_words.append(output)
        net.tops['word'].data[0,0,0,0] = output
        net.forward_layer(layers.Wordvec(name=('wordvec'),
            bottoms=['word'],
            dimension=hyper['mem_cells'], vocab_size=hyper['vocab_size'],
            param_names=['wordvec_param'], weight_filler=filler))
        net.forward_layer(layers.Concat(name='lstm_concat',
            bottoms=[prev_hidden, 'wordvec']))
        net.forward_layer(layers.Lstm(name='lstm',
            bottoms=['lstm_concat', prev_mem],
            param_names=['lstm_input_value', 'lstm_input_gate',
                'lstm_forget_gate', 'lstm_output_gate'],
            tops=['lstm_hidden_next', 'lstm_mem_next'],
            num_cells=hyper['mem_cells'], weight_filler=filler))
        net.forward_layer(layers.Dropout(name='dropout',
            bottoms=['lstm_hidden_next'], dropout_ratio=0.16))

        net.forward_layer(layers.InnerProduct(name='ip', bottoms=['dropout'],
            param_names=['softmax_ip_weights', 'softmax_ip_bias'],
            num_output=hyper['vocab_size'], weight_filler=filler))
        net.tops['ip'].data[:] *= hyper['i_temperature']
        net.forward_layer(layers.Softmax(name='softmax',
            ignore_label=hyper['zero_symbol'], bottoms=['ip']))
        net.tops['lstm_hidden_prev'].data_tensor.copy_from(net.tops['lstm_hidden_next'].data_tensor)
        net.tops['lstm_mem_prev'].data_tensor.copy_from(net.tops['lstm_mem_next'].data_tensor)
        net.reset_forward()
    print ''.join([ivocab[x].encode('utf8') for x in output_words])
    return 0.
예제 #3
0
def forward(net, sentence_batches):
    source_batch, target_batch = next(sentence_batches)

    filler = layers.Filler(type='uniform',
                           max=hyper['init_range'],
                           min=(-hyper['init_range']))
    net.forward_layer(
        layers.NumpyData(name='source_lstm_seed',
                         data=np.zeros(
                             (hyper['batch_size'], hyper['mem_cells'], 1, 1))))
    hidden_bottoms = ['source_lstm_seed']
    mem_bottoms = ['source_lstm_seed']
    lengths = [
        min(len([1 for token in x
                 if token != hyper['pad_symbol']]), hyper['max_len'])
        for x in source_batch
    ]
    for step in range(source_batch.shape[1]):
        net.forward_layer(
            layers.DummyData(name=('source_word%d' % step),
                             shape=[hyper['batch_size'], 1, 1, 1]))
        if step == 0:
            prev_hidden = 'source_lstm_seed'
            prev_mem = 'source_lstm_seed'
        else:
            prev_hidden = 'source_lstm%d_hidden' % (step - 1)
            prev_mem = 'source_lstm%d_mem' % (step - 1)
        next_hidden = 'source_lstm%d_hidden' % (step)
        next_mem = 'source_lstm%d_mem' % (step)
        hidden_bottoms.append(next_hidden)
        mem_bottoms.append(next_mem)
        word = source_batch[:, step]
        net.tops['source_word%d' % step].data[:, 0, 0, 0] = word
        wordvec = layers.Wordvec(name=('source_wordvec%d' % step),
                                 bottoms=['source_word%d' % step],
                                 dimension=hyper['mem_cells'],
                                 vocab_size=hyper['vocab_size'],
                                 param_names=['source_wordvec_param'],
                                 weight_filler=filler)
        concat = layers.Concat(
            name='source_lstm_concat%d' % step,
            bottoms=[prev_hidden, 'source_wordvec%d' % step])
        lstm = layers.Lstm(
            name='source_lstm%d' % step,
            bottoms=['source_lstm_concat%d' % step, prev_mem],
            param_names=[
                'source_lstm_input_value', 'source_lstm_input_gate',
                'source_lstm_forget_gate', 'source_lstm_output_gate'
            ],
            tops=['source_lstm%d_hidden' % step,
                  'source_lstm%d_mem' % step],
            num_cells=hyper['mem_cells'],
            weight_filler=filler)
        net.forward_layer(wordvec)
        net.forward_layer(concat)
        net.forward_layer(lstm)

    net.forward_layer(
        layers.CapSequence(name='hidden_seed',
                           sequence_lengths=lengths,
                           bottoms=hidden_bottoms))
    net.forward_layer(
        layers.CapSequence(name='mem_seed',
                           sequence_lengths=lengths,
                           bottoms=mem_bottoms))

    loss = []
    for step in range(target_batch.shape[1]):
        if step == 0:
            prev_hidden = 'hidden_seed'
            prev_mem = 'mem_seed'
            word = np.zeros(target_batch[:, 0].shape)
        else:
            prev_hidden = 'lstm%d_hidden' % (step - 1)
            prev_mem = 'lstm%d_mem' % (step - 1)
            word = target_batch[:, step - 1]
        word = layers.NumpyData(name=('word%d' % step),
                                data=np.reshape(
                                    word, (hyper['batch_size'], 1, 1, 1)))
        wordvec = layers.Wordvec(name=('wordvec%d' % step),
                                 bottoms=['word%d' % step],
                                 dimension=hyper['mem_cells'],
                                 vocab_size=hyper['vocab_size'],
                                 param_names=['source_wordvec_param'],
                                 weight_filler=filler)
        concat = layers.Concat(name='lstm_concat%d' % step,
                               bottoms=[prev_hidden,
                                        'wordvec%d' % step])
        lstm = layers.Lstm(name='lstm%d' % step,
                           bottoms=['lstm_concat%d' % step, prev_mem],
                           param_names=[
                               'lstm_input_value', 'lstm_input_gate',
                               'lstm_forget_gate', 'lstm_output_gate'
                           ],
                           tops=['lstm%d_hidden' % step,
                                 'lstm%d_mem' % step],
                           num_cells=hyper['mem_cells'],
                           weight_filler=filler)
        dropout = layers.Dropout(name='dropout%d' % step,
                                 bottoms=['lstm%d_hidden' % step],
                                 dropout_ratio=0.16)

        net.forward_layer(word)
        net.forward_layer(wordvec)
        net.forward_layer(concat)
        net.forward_layer(lstm)
        net.forward_layer(dropout)

        net.forward_layer(
            layers.NumpyData(name='label%d' % step,
                             data=np.reshape(target_batch[:, step],
                                             (hyper['batch_size'], 1, 1, 1))))
        net.forward_layer(
            layers.InnerProduct(name='ip%d' % step,
                                bottoms=['dropout%d' % step],
                                param_names=['ip_weight', 'ip_bias'],
                                num_output=hyper['vocab_size'],
                                weight_filler=filler))
        loss.append(
            net.forward_layer(
                layers.SoftmaxWithLoss(
                    name='softmax_loss%d' % step,
                    ignore_label=hyper['pad_symbol'],
                    bottoms=['ip%d' % step, 'label%d' % step])))
        loss.append(
            net.forward_layer(
                layers.Softmax(name='softmax%d' % step,
                               ignore_label=hyper['pad_symbol'],
                               bottoms=['ip%d' % step])))
    return np.mean(loss)