Beispiel #1
0
def generate_lstm(net, step, lstm_params, lstm_out, dropout_ratio):
    """Takes the parameters to create the lstm, concatenates the lstm input
    with the previous hidden state, runs the lstm for the current timestep
    and then applies dropout to the output hidden state."""

    hidden_bottom = lstm_out[0]
    mem_bottom = lstm_out[1]
    num_cells = lstm_params[0]
    filler = lstm_params[1]
    net.f(Concat("concat%d" % step, bottoms=["lstm_input", hidden_bottom]))
    try:
        lstm_unit = LstmUnit("lstm%d" % step, num_cells,
                       weight_filler=filler, tie_output_forget=True,
                       param_names=["input_value", "input_gate",
                                    "forget_gate", "output_gate"],
                       bottoms=["concat%d" % step, mem_bottom],
                       tops=["lstm_hidden%d" % step, "lstm_mem%d" % step])
    except:
        # Old version of Apollocaffe sets tie_output_forget=True by default
        lstm_unit = LstmUnit("lstm%d" % step, num_cells,
                       weight_filler=filler,
                       param_names=["input_value", "input_gate",
                                    "forget_gate", "output_gate"],
                       bottoms=["concat%d" % step, mem_bottom],
                       tops=["lstm_hidden%d" % step, "lstm_mem%d" % step])
    net.f(lstm_unit)
    net.f(Dropout("dropout%d" % step, dropout_ratio,
                  bottoms=["lstm_hidden%d" % step]))
Beispiel #2
0
def forward(net, sentence_batches):
    net.clear_forward()
    batch = next(sentence_batches)
    sentence_batch = pad_batch(batch)
    length = min(sentence_batch.shape[1], 100)
    assert length > 0

    net.f(NumpyData('lstm_seed', np.zeros((batch_size, dimension))))
    for step in range(length):
        if step == 0:
            prev_hidden = 'lstm_seed'
            prev_mem = 'lstm_seed'
            word = np.zeros(sentence_batch[:, 0].shape)
        else:
            prev_hidden = 'lstm%d_hidden' % (step - 1)
            prev_mem = 'lstm%d_mem' % (step - 1)
            word = sentence_batch[:, step - 1]
        net.f(NumpyData('word%d' % step, word))
        net.f(
            Wordvec('wordvec%d' % step,
                    dimension,
                    vocab_size,
                    bottoms=['word%d' % step],
                    param_names=['wordvec_param']))
        net.f(
            Concat('lstm_concat%d' % step,
                   bottoms=[prev_hidden, 'wordvec%d' % step]))
        net.f(
            LstmUnit('lstm%d' % step,
                     bottoms=['lstm_concat%d' % step, prev_mem],
                     param_names=[
                         'lstm_input_value', 'lstm_input_gate',
                         'lstm_forget_gate', 'lstm_output_gate'
                     ],
                     tops=['lstm%d_hidden' % step,
                           'lstm%d_mem' % step],
                     num_cells=dimension))
        net.f(
            Dropout('dropout%d' % step, 0.16,
                    bottoms=['lstm%d_hidden' % step]))

        net.f(NumpyData('label%d' % step, sentence_batch[:, step]))
        net.f(
            InnerProduct('ip%d' % step,
                         vocab_size,
                         bottoms=['dropout%d' % step],
                         param_names=['softmax_ip_weights',
                                      'softmax_ip_bias']))
        net.f(
            SoftmaxWithLoss('softmax_loss%d' % step,
                            ignore_label=zero_symbol,
                            bottoms=['ip%d' % step,
                                     'label%d' % step]))
Beispiel #3
0
def eval_forward(net):
    net.clear_forward()
    output_words = []
    net.f(NumpyData('lstm_hidden_prev', np.zeros((1, dimension))))
    net.f(NumpyData('lstm_mem_prev', np.zeros((1, dimension))))
    length = 150
    for step in range(length):
        net.clear_forward()
        net.f(NumpyData('word', [0]))
        prev_hidden = 'lstm_hidden_prev'
        prev_mem = 'lstm_mem_prev'
        if step == 0:
            output = ord('.')
        else:
            output = softmax_choice(net.blobs['softmax'].data)
        output_words.append(output)
        net.blobs['word'].data[0] = output
        net.f(
            Wordvec('wordvec',
                    dimension,
                    vocab_size,
                    bottoms=['word'],
                    param_names=['wordvec_param']))
        net.f(Concat('lstm_concat', bottoms=[prev_hidden, 'wordvec']))
        net.f(
            LstmUnit('lstm',
                     dimension,
                     bottoms=['lstm_concat', prev_mem],
                     param_names=[
                         'lstm_input_value', 'lstm_input_gate',
                         'lstm_forget_gate', 'lstm_output_gate'
                     ],
                     tops=['lstm_hidden_next', 'lstm_mem_next']))
        net.f(Dropout('dropout', 0.16, bottoms=['lstm_hidden_next']))

        net.f(
            InnerProduct('ip',
                         vocab_size,
                         bottoms=['dropout'],
                         param_names=['softmax_ip_weights',
                                      'softmax_ip_bias']))
        net.blobs['ip'].data[:] *= i_temperature
        net.f(Softmax('softmax', bottoms=['ip']))
        net.blobs['lstm_hidden_prev'].data_tensor.copy_from(
            net.blobs['lstm_hidden_next'].data_tensor)
        net.blobs['lstm_mem_prev'].data_tensor.copy_from(
            net.blobs['lstm_mem_next'].data_tensor)
    print ''.join([chr(x) for x in output_words])
Beispiel #4
0
def evaluate_forward(net, net_config):
    net.clear_forward()
    length = 20

    net.f(NumpyData("prev_hidden", np.zeros((1, net_config["mem_cells"]))))
    net.f(NumpyData("prev_mem", np.zeros((1, net_config["mem_cells"]))))
    filler = Filler("uniform", net_config["init_range"])
    predictions = []

    value = 0.5
    for _ in range(length):
        # We'll be updating values in place for efficient memory usage. This
        # will break backprop and cause warnings. Use clear_forward to suppress.
        net.clear_forward()

        # Add 0.5 to the sum at each step
        net.f(NumpyData("value", data=np.array(value).reshape((1, 1))))
        prev_hidden = "prev_hidden"
        prev_mem = "prev_mem"
        net.f(Concat("lstm_concat", bottoms=[prev_hidden, "value"]))
        net.f(
            LstmUnit("lstm",
                     net_config["mem_cells"],
                     bottoms=["lstm_concat", prev_mem],
                     param_names=[
                         "input_value", "input_gate", "forget_gate",
                         "output_gate"
                     ],
                     weight_filler=filler,
                     tops=["next_hidden", "next_mem"]))
        net.f(InnerProduct("ip", 1, bottoms=["next_hidden"]))
        predictions.append(float(net.blobs["ip"].data.flatten()[0]))
        # set up for next prediction by copying LSTM outputs back to inputs
        net.blobs["prev_hidden"].data_tensor.copy_from(
            net.blobs["next_hidden"].data_tensor)
        net.blobs["prev_mem"].data_tensor.copy_from(
            net.blobs["next_mem"].data_tensor)

    targets = np.cumsum([value for _ in predictions])
    residuals = [x - y for x, y in zip(predictions, targets)]
    return targets, predictions, residuals
Beispiel #5
0
def generate_lstm(net, step, num_cells, hidden_bottom, mem_bottom, filler,
                  dropout_ratio):
    """Takes the parameters to create the lstm, concatenates the lstm input
    with the previous hidden state, runs the lstm for the current timestep and then 
    applies dropout to the output hidden state."""

    net.f(Concat("concat%d" % step, bottoms=["lstm_input", hidden_bottom]))
    net.f(
        LstmUnit("lstm%d" % step,
                 num_cells,
                 weight_filler=filler,
                 param_names=[
                     "input_value", "input_gate", "forget_gate", "output_gate"
                 ],
                 bottoms=["concat%d" % step, mem_bottom],
                 tops=["lstm_hidden%d" % step,
                       "lstm_mem%d" % step]))
    net.f(
        Dropout("dropout%d" % step,
                dropout_ratio,
                bottoms=["lstm_hidden%d" % step]))
Beispiel #6
0
def evaluate_forward(net, net_config, feat,scene_feat):
    net.clear_forward()
    feat_dim=feat.shape[1]

    net.f(NumpyData("prev_hidden", np.zeros((1, net_config["mem_cells"]))))
    net.f(NumpyData("prev_mem", np.zeros((1, net_config["mem_cells"]))))
    filler = Filler("uniform", net_config["init_range"])

    length = feat.shape[0]+1
    for step in range(length):
        net.clear_forward()
        if step==0:
            value=scene_feat.reshape(1,feat_dim)
        else:
            value = feat[step-1,:].reshape(1,feat_dim)
        net.f(NumpyData("value", data=value ))
        prev_hidden = "prev_hidden"
        prev_mem = "prev_mem"
        net.f(Concat("lstm_concat", bottoms=[prev_hidden, "value"]))
        net.f(LstmUnit("lstm", net_config["mem_cells"],
            bottoms=["lstm_concat", prev_mem],
            param_names=[
                "input_value", "input_gate", "forget_gate", "output_gate"],
            weight_filler=filler,
            tops=["next_hidden", "next_mem"]))
        net.f(InnerProduct("ip", 1, bottoms=["next_hidden"]))
        net.blobs["prev_hidden"].data_tensor.copy_from(
            net.blobs["next_hidden"].data_tensor)
        net.blobs["prev_mem"].data_tensor.copy_from(
            net.blobs["next_mem"].data_tensor)

    for i in xrange(6):
        net.f(InnerProduct("ip%d"%i, 2, bottoms=["next_hidden"]))
        net.f(Softmax("prob%d"%i, bottoms=["ip%d"%i]))

    predictions=[]
    for i in xrange(6):
        predictions.append(float(net.blobs["prob%d"%i].data.flatten()[1]))

    return predictions
Beispiel #7
0
def lstm_layers(net, step, filler, net_config):
    layer_list = []
    if step == 0:
        prev_hidden = "lstm_seed"
        prev_mem = "lstm_seed"
    else:
        prev_hidden = "lstm_hidden%d" % (step - 1)
        prev_mem = "lstm_mem%d" % (step - 1)
    # Concatenate the hidden output with the next input value
    layer_list.append(
        Concat("lstm_concat%d" % step, bottoms=[prev_hidden,
                                                "value%d" % step]))
    # Run the LSTM for one more step
    layer_list.append(
        LstmUnit("lstm%d" % step,
                 net_config["mem_cells"],
                 bottoms=["lstm_concat%d" % step, prev_mem],
                 param_names=[
                     "input_value", "input_gate", "forget_gate", "output_gate"
                 ],
                 tops=["lstm_hidden%d" % step,
                       "lstm_mem%d" % step],
                 weight_filler=filler))
    return layer_list
Beispiel #8
0
def forward(net, input_data, net_config, deploy=False):
    """Defines and creates the ReInspect network given the net, input data
    and configurations."""

    net.clear_forward()

    net.f(
        NumpyData("wordvec_layer",
                  data=np.array(input_data["wordvec_layer"])))  # 128*38*100*1
    net.f(NumpyData("target_words",
                    data=np.array(input_data["target_words"])))  # 128*100*1*1

    tops = []
    slice_point = []
    for i in range(net_config['max_len']):
        tops.append('label%d' % i)
        if i != 0:
            slice_point.append(i)
    net.f(
        Slice("label_slice_layer",
              slice_dim=1,
              bottoms=["target_words"],
              tops=tops,
              slice_point=slice_point))

    tops = []
    slice_point = []
    for i in range(net_config['max_len']):
        tops.append('target_wordvec%d_4d' % i)
        if i != 0:
            slice_point.append(i)
    net.f(
        Slice("wordvec_slice_layer",
              slice_dim=2,
              bottoms=['wordvec_layer'],
              tops=tops,
              slice_point=slice_point))

    for i in range(net_config["max_len"]):  # 128*38*1*1 -> 128*38
        net.f("""
            name: "target_wordvec%d"
            type: "Reshape"
            bottom: "target_wordvec%d_4d"
            top: "target_wordvec%d"
            reshape_param {
              shape {
                dim: 0  # copy the dimension from below
                dim: -1
              }
            }
            """ % (i, i, i))
        #net.f(Reshape('target_wordvec%d'%i, bottoms = ['target_wordvec%d_4d'%i], shape = [0,-1]))

    filler = Filler("uniform", net_config["init_range"])
    for i in range(net_config['max_len']):
        if i == 0:
            net.f(
                NumpyData(
                    "dummy_layer",
                    np.zeros((net_config["batch_size"],
                              net_config["lstm_num_cells"]))))
            net.f(
                NumpyData(
                    "dummy_mem_cell",
                    np.zeros((net_config["batch_size"],
                              net_config["lstm_num_cells"]))))

        for j in range(net_config['lstm_num_stacks']):
            bottoms = []
            if j == 0:
                bottoms.append('target_wordvec%d' % i)
            if j >= 1:
                bottoms.append('dropout%d_%d' % (j - 1, i))
            if i == 0:
                bottoms.append("dummy_layer")
            else:
                bottoms.append('lstm%d_hidden%d' % (j, i - 1))
            net.f(Concat('concat%d_layer%d' % (j, i), bottoms=bottoms))

            param_names = []
            for k in range(4):
                param_names.append('lstm%d_param_%d' % (j, k))
            bottoms = ['concat%d_layer%d' % (j, i)]
            if i == 0:
                bottoms.append('dummy_mem_cell')
            else:
                bottoms.append('lstm%d_mem_cell%d' % (j, i - 1))
            net.f(
                LstmUnit('lstm%d_layer%d' % (j, i),
                         net_config["lstm_num_cells"],
                         weight_filler=filler,
                         param_names=param_names,
                         bottoms=bottoms,
                         tops=[
                             'lstm%d_hidden%d' % (j, i),
                             'lstm%d_mem_cell%d' % (j, i)
                         ]))

            net.f(
                Dropout('dropout%d_%d' % (j, i),
                        net_config["dropout_ratio"],
                        bottoms=['lstm%d_hidden%d' % (j, i)]))

    bottoms = []
    for i in range(net_config['max_len']):
        bottoms.append('dropout%d_%d' % (net_config['lstm_num_stacks'] - 1, i))
    net.f(Concat('hidden_concat', bottoms=bottoms, concat_dim=0))

    net.f(
        InnerProduct("inner_product",
                     net_config['vocab_size'],
                     bottoms=["hidden_concat"],
                     weight_filler=filler))

    bottoms = []
    for i in range(net_config['max_len']):
        bottoms.append('label%d' % i)
    net.f(Concat('label_concat', bottoms=bottoms, concat_dim=0))

    if deploy:
        net.f(Softmax("word_probs", bottoms=["inner_product"]))
    else:
        net.f(
            SoftmaxWithLoss("word_loss",
                            bottoms=["inner_product", "label_concat"],
                            ignore_label=net_config['zero_symbol']))
Beispiel #9
0
def forward(net, input_data, net_config, phase='train', deploy=False):
    """Defines and creates the ReInspect network given the net, input data
    and configurations."""

    net.clear_forward()

    batch_ws_i = input_data["ws_i"]
    batch_stop_i = [net_config['max_len']] * net_config['batch_size']
    wordvec_layer = input_data["wordvec_layer"]  # 128*38*100*1
    net.f(NumpyData("target_words",
                    data=np.array(input_data["target_words"])))  # 128*100*1*1

    tops = []
    slice_point = []
    for i in range(net_config['max_len']):
        tops.append('label%d' % i)
        if i != 0:
            slice_point.append(i)
    net.f(
        Slice("label_slice_layer",
              slice_dim=1,
              bottoms=["target_words"],
              tops=tops,
              slice_point=slice_point))

    net.f(NumpyData("target_wordvec%d" % 0,
                    data=wordvec_layer[:, :, 0, 0]))  # start symbol, 128*38

    filler = Filler("uniform", net_config["init_range"])
    for i in range(net_config['max_len']):
        if i == 0:
            net.f(
                NumpyData(
                    "dummy_layer",
                    np.zeros((net_config["batch_size"],
                              net_config["lstm_num_cells"]))))
            net.f(
                NumpyData(
                    "dummy_mem_cell",
                    np.zeros((net_config["batch_size"],
                              net_config["lstm_num_cells"]))))

        for j in range(net_config['lstm_num_stacks']):
            bottoms = []
            if j == 0:
                bottoms.append('target_wordvec%d' % i)
            if j >= 1:
                bottoms.append('dropout%d_%d' % (j - 1, i))
            if i == 0:
                bottoms.append("dummy_layer")
            else:
                bottoms.append('lstm%d_hidden%d' % (j, i - 1))
            net.f(Concat('concat%d_layer%d' % (j, i), bottoms=bottoms))

            param_names = []
            for k in range(4):
                param_names.append('lstm%d_param_%d' % (j, k))
            bottoms = ['concat%d_layer%d' % (j, i)]
            if i == 0:
                bottoms.append('dummy_mem_cell')
            else:
                bottoms.append('lstm%d_mem_cell%d' % (j, i - 1))
            net.f(
                LstmUnit('lstm%d_layer%d' % (j, i),
                         net_config["lstm_num_cells"],
                         weight_filler=filler,
                         param_names=param_names,
                         bottoms=bottoms,
                         tops=[
                             'lstm%d_hidden%d' % (j, i),
                             'lstm%d_mem_cell%d' % (j, i)
                         ]))

            net.f(
                Dropout('dropout%d_%d' % (j, i),
                        net_config["dropout_ratio"],
                        bottoms=['lstm%d_hidden%d' % (j, i)]))

        net.f(
            InnerProduct("ip%d" % i,
                         net_config['vocab_size'],
                         bottoms=[
                             'dropout%d_%d' %
                             (net_config['lstm_num_stacks'] - 1, i)
                         ],
                         weight_filler=filler))

        if i < net_config['max_len'] - 1:
            tar_wordvec = np.array(wordvec_layer[:, :, i + 1, 0])  # 128*38
            if phase == 'test':
                net.f(Softmax("word_probs%d" % i, bottoms=["ip%d" % i]))
                probs = net.blobs["word_probs%d" % i].data
                for bi in range(net_config['batch_size']):
                    if i >= batch_ws_i[bi] and i < batch_stop_i[bi]:
                        vec = [0] * net_config["vocab_size"]
                        peakIndex = np.argmax(probs[bi, :])
                        if peakIndex == net_config['whitespace_symbol']:
                            batch_stop_i[bi] = i + 1
                        vec[peakIndex] = 1
                        tar_wordvec[bi, :] = vec
            net.f(NumpyData("target_wordvec%d" % (i + 1), data=tar_wordvec))

    bottoms = []
    for i in range(net_config['max_len']):
        bottoms.append("ip%d" % i)
    net.f(Concat('ip_concat', bottoms=bottoms, concat_dim=0))

    bottoms = []
    for i in range(net_config['max_len']):
        bottoms.append('label%d' % i)
    net.f(Concat('label_concat', bottoms=bottoms, concat_dim=0))

    if deploy:
        net.f(Softmax("word_probs", bottoms=["ip_concat"]))

    net.f(
        SoftmaxWithLoss("word_loss",
                        bottoms=["ip_concat", "label_concat"],
                        ignore_label=net_config['zero_symbol']))