コード例 #1
0
def addition_rnn_neural_network(inputs, labels):
    print('Build model...')

    # input_shape=(None, num_feature).
    _, hidden, _ = basic_lstm(inputs, None, None, HIDDEN_SIZE)
    expand_hidden = fluid.layers.expand(hidden[0],
                                        expand_times=[1, DIGITS + 1])
    outputs = fluid.layers.reshape(expand_hidden,
                                   shape=[BATCH_SIZE, DIGITS + 1, HIDDEN_SIZE])

    for _ in range(LAYERS):

        # outputs, _, _ = fluid.layers.lstm(outputs, init_h, init_c, MAXLEN, HIDDEN_SIZE, num_layers=1)
        outputs, _, _ = basic_lstm(outputs, None, None, HIDDEN_SIZE)

    probs = fluid.layers.fc(input=outputs,
                            size=len(chars),
                            act='softmax',
                            num_flatten_dims=2)

    loss = fluid.layers.cross_entropy(input=probs,
                                      label=labels,
                                      soft_label=True)
    avg_loss = fluid.layers.mean(loss)
    preds = fluid.layers.reshape(probs,
                                 shape=[BATCH_SIZE * (DIGITS + 1),
                                        len(chars)])
    labs = fluid.layers.reshape(fluid.layers.argmax(labels, axis=-1),
                                shape=[BATCH_SIZE * (DIGITS + 1), 1])
    accuracy = fluid.layers.accuracy(preds, labs)
    return avg_loss, accuracy
コード例 #2
0
def babirnn_neural_network(sentence, question, answer):

    encoded_sentence_emb = fluid.layers.embedding(
        input=sentence, size=[vocab_size, EMBED_HIDDEN_SIZE], is_sparse=True)
    _, encoded_sentence, _ = basic_lstm(encoded_sentence_emb, None, None,
                                        SENT_HIDDEN_SIZE)
    encoded_question_emb = fluid.layers.embedding(
        input=question, size=[vocab_size, EMBED_HIDDEN_SIZE], is_sparse=True)
    _, encoded_question, _ = basic_lstm(encoded_question_emb, None, None,
                                        QUERY_HIDDEN_SIZE)

    merged = fluid.layers.concat(
        input=[encoded_sentence[0], encoded_question[0]], axis=-1)
    preds = fluid.layers.fc(input=merged, size=vocab_size, act='softmax')

    # loss
    loss = fluid.layers.cross_entropy(input=preds,
                                      label=answer,
                                      soft_label=True)
    avg_loss = fluid.layers.mean(loss)
    label = fluid.layers.reshape(fluid.layers.argmax(answer, axis=-1),
                                 shape=[-1, 1])
    accuracy = fluid.layers.accuracy(input=preds, label=label)

    return preds, avg_loss, accuracy
コード例 #3
0
def hierarchical_rnn_neural_network(img, label):

    img = (img + 1) / 2  # [-1, 1] --> [0, 1]
    encoded_rows, _, _ = basic_lstm(img, None, None, row_hidden)

    _, encoded_columns, _ = basic_lstm(encoded_rows, None, None, col_hidden)
    prediction = fluid.layers.fc(encoded_columns[0],
                                 num_classes,
                                 act='softmax')
    loss = fluid.layers.cross_entropy(input=prediction, label=label)
    avg_loss = fluid.layers.mean(loss)
    accuracy = fluid.layers.accuracy(input=prediction, label=label)

    return avg_loss, accuracy
コード例 #4
0
    def test_name(self):
        batch_size = 20
        input_size = 128
        hidden_size = 256
        num_layers = 2
        dropout = 0.5
        bidirectional = True
        batch_first = False

        with new_program_scope():
            input = layers.data(name="input",
                                shape=[-1, batch_size, input_size],
                                dtype='float32')
            pre_hidden = layers.data(name="pre_hidden",
                                     shape=[-1, hidden_size],
                                     dtype='float32')
            pre_cell = layers.data(name="pre_cell",
                                   shape=[-1, hidden_size],
                                   dtype='float32')
            sequence_length = layers.data(name="sequence_length",
                                          shape=[-1],
                                          dtype='int32')

            rnn_out, last_hidden, last_cell = basic_lstm( input, pre_hidden, pre_cell, \
                hidden_size, num_layers = num_layers, \
                sequence_length = sequence_length, dropout_prob=dropout, bidirectional = bidirectional, \
                param_attr=fluid.ParamAttr( name ="test1"), bias_attr=fluid.ParamAttr( name = "test1"),  \
                batch_first = batch_first)

            var_list = fluid.io.get_program_parameter(
                fluid.default_main_program())

            for var in var_list:
                self.assertTrue(var.name in self.name_set)
コード例 #5
0
    def _build_encoder(self):
        self.enc_output, enc_last_hidden, enc_last_cell = basic_lstm( self.src_emb, None, None, self.hidden_size, num_layers=self.num_layers, batch_first=self.batch_first, \
                dropout_prob=self.dropout, \
                param_attr = ParamAttr( initializer=fluid.initializer.UniformInitializer(low=-self.init_scale, high=self.init_scale) ), \
                bias_attr = ParamAttr( initializer = fluid.initializer.Constant(0.0) ), \
                sequence_length=self.src_sequence_length)

        return self.enc_output, enc_last_hidden, enc_last_cell
コード例 #6
0
def build_model(is_training):
    
    input_text = fluid.layers.data(name = "text", shape = [-1, max_len, 1], dtype = "int64")
    input_re_text = fluid.layers.data(name = "re_text", shape = [-1, max_len, 1], dtype = "int64")

    input_text_len = fluid.layers.data(name = "text_len", shape = [-1], dtype = "int32")
    if is_training:
        input_label = fluid.layers.data(name = "label", shape = [-1, 1], dtype = "int64")

    input_text_emb = fluid.layers.embedding(input = input_text, size = [vocab_size, embedding_dims], param_attr = ParamAttr(name = "shared_emb"))
    input_re_text_emb = fluid.layers.embedding(input = input_re_text, size = [vocab_size, embedding_dims], param_attr = ParamAttr(name = "shared_emb"))

    _, _, input_text_lstm = basic_lstm(input_text_emb, None, None, lstm_hidden_size, num_layers = 1, sequence_length = input_text_len)

    _, _, input_re_text_lstm = basic_lstm(input_re_text_emb, None, None, lstm_hidden_size, num_layers = 1, sequence_length = input_text_len)

    input_text_lstm = fluid.layers.transpose(input_text_lstm, perm = [1, 0, 2])
    input_re_text_lstm = fluid.layers.transpose(input_re_text_lstm, perm = [1, 0, 2])

    input_text_lstm = fluid.layers.reshape(input_text_lstm, shape = [-1, lstm_hidden_size])
    input_re_text_lstm = fluid.layers.reshape(input_re_text_lstm, shape = [-1, lstm_hidden_size])

    input_text_hidden = fluid.layers.concat([input_text_lstm, input_re_text_lstm], axis = -1)

    input_text_hidden = fluid.layers.dropout(input_text_lstm, 0.5, is_test = not is_training)

    input_text_hidden = fluid.layers.fc(input_text_hidden, size = 2, act = "softmax")

    if is_training:
        loss = fluid.layers.cross_entropy(input_text_hidden, input_label)
        loss = fluid.layers.reduce_mean(loss)

        optimizer = fluid.optimizer.AdamOptimizer(learning_rate = 0.01)
        optimizer.minimize(loss)

        return loss

    else:

        return input_text_hidden
コード例 #7
0
def lstm_text_generation_neural_network(sentences, next_chars=None):
    print('Build model...')

    _, hidden, _ = basic_lstm(sentences, None, None, hidden_size=128)
    preds = fluid.layers.fc(input=hidden[0], size=len(chars), act='softmax')

    # loss
    loss = fluid.layers.cross_entropy(input=preds,
                                      label=next_chars,
                                      soft_label=True)
    avg_loss = fluid.layers.mean(loss)
    label = fluid.layers.reshape(fluid.layers.argmax(next_chars, axis=-1),
                                 shape=[-1, 1])
    accuracy = fluid.layers.accuracy(input=preds, label=label)

    return preds, avg_loss, accuracy
コード例 #8
0
 def _build_rnn_graph(self, inputs, init_hidden, init_cell,
                      sequence_length_ph):
     rnn_out, last_hidden, last_cell = basic_lstm(
         input=inputs,
         init_hidden=init_hidden,
         init_cell=init_cell,
         hidden_size=self.n_hidden_,
         num_layers=self.num_layers_,
         batch_first=True,
         dropout_prob=self.dropout_prob_,
         sequence_length=sequence_length_ph,
         param_attr=ParamAttr(
             initializer=fluid.initializer.UniformInitializer(
                 low=-self.init_scale_, high=self.init_scale_)),
         bias_attr=ParamAttr(initializer=fluid.initializer.Constant(0.0)),
         forget_bias=0.0)
     return rnn_out, last_hidden, last_cell
コード例 #9
0
    def _build_decoder(self, enc_last_hidden, enc_last_cell, mode='train'):
        softmax_weight = layers.create_parameter([self.hidden_size, self.tar_vocab_size], dtype="float32", name="softmax_weight", \
                    default_initializer=fluid.initializer.UniformInitializer(low=-self.init_scale, high=self.init_scale))
        if mode == 'train':

            #fluid.layers.Print(self.tar_emb)
            #fluid.layers.Print(enc_last_hidden)
            #fluid.layers.Print(enc_last_cell)
            dec_output, dec_last_hidden, dec_last_cell = basic_lstm( self.tar_emb, enc_last_hidden, enc_last_cell, \
                    self.hidden_size, num_layers=self.num_layers, \
                    batch_first=self.batch_first, \
                    dropout_prob=self.dropout, \
                    param_attr = ParamAttr( initializer=fluid.initializer.UniformInitializer(low=-self.init_scale, high=self.init_scale) ), \
                    bias_attr = ParamAttr( initializer = fluid.initializer.Constant(0.0) ))

            dec_output = layers.matmul(dec_output, softmax_weight)

            return dec_output
        else:
            print("mode not supprt", mode)
コード例 #10
0
    def _build_decoder(self,
                       enc_last_hidden,
                       enc_last_cell,
                       mode='train',
                       beam_size=10):
        softmax_weight = layers.create_parameter([self.hidden_size, self.tar_vocab_size], dtype="float32", name="softmax_weight", \
                    default_initializer=fluid.initializer.UniformInitializer(low=-self.init_scale, high=self.init_scale))
        if mode == 'train':
            dec_output, dec_last_hidden, dec_last_cell = basic_lstm( self.tar_emb, enc_last_hidden, enc_last_cell, \
                    self.hidden_size, num_layers=self.num_layers, \
                    batch_first=self.batch_first, \
                    dropout_prob=self.dropout, \
                    param_attr = ParamAttr( initializer=fluid.initializer.UniformInitializer(low=-self.init_scale, high=self.init_scale) ), \
                    bias_attr = ParamAttr( initializer = fluid.initializer.Constant(0.0) ))

            dec_output = layers.matmul(dec_output, softmax_weight)

            return dec_output
        elif mode == 'beam_search' or mode == 'greedy_search':
            dec_unit_list = []
            name = 'basic_lstm'
            for i in range(self.num_layers):
                new_name = name + "_layers_" + str(i)
                dec_unit_list.append(
                    BasicLSTMUnit(new_name, self.hidden_size, dtype='float32'))

            def decoder_step(current_in, pre_hidden_array, pre_cell_array):
                new_hidden_array = []
                new_cell_array = []

                step_in = current_in
                for i in range(self.num_layers):
                    pre_hidden = pre_hidden_array[i]
                    pre_cell = pre_cell_array[i]

                    new_hidden, new_cell = dec_unit_list[i](step_in,
                                                            pre_hidden,
                                                            pre_cell)

                    new_hidden_array.append(new_hidden)
                    new_cell_array.append(new_cell)

                    step_in = new_hidden

                return step_in, new_hidden_array, new_cell_array

            if mode == 'beam_search':
                max_src_seq_len = layers.shape(self.src)[1]
                max_length = max_src_seq_len * 2
                #max_length = layers.fill_constant( [1], dtype='int32', value = 10)
                pre_ids = layers.fill_constant([1, 1], dtype='int64', value=1)
                full_ids = layers.fill_constant([1, 1], dtype='int64', value=1)

                score = layers.fill_constant([1], dtype='float32', value=0.0)

                #eos_ids = layers.fill_constant( [1, 1], dtype='int64', value=2)

                pre_hidden_array = []
                pre_cell_array = []
                pre_feed = layers.fill_constant([beam_size, self.hidden_size],
                                                dtype='float32',
                                                value=0)
                for i in range(self.num_layers):
                    pre_hidden_array.append(
                        layers.expand(enc_last_hidden[i], [beam_size, 1]))
                    pre_cell_array.append(
                        layers.expand(enc_last_cell[i], [beam_size, 1]))

                eos_ids = layers.fill_constant([beam_size],
                                               dtype='int64',
                                               value=2)
                init_score = np.zeros((beam_size)).astype('float32')
                init_score[1:] = -INF
                pre_score = layers.assign(init_score)
                #pre_score = layers.fill_constant( [1,], dtype='float32', value= 0.0)
                tokens = layers.fill_constant([beam_size, 1],
                                              dtype='int64',
                                              value=1)

                enc_memory = layers.expand(self.enc_output, [beam_size, 1, 1])

                pre_tokens = layers.fill_constant([beam_size, 1],
                                                  dtype='int64',
                                                  value=1)

                finished_seq = layers.fill_constant([beam_size, 1],
                                                    dtype='int64',
                                                    value=0)
                finished_scores = layers.fill_constant([beam_size],
                                                       dtype='float32',
                                                       value=-INF)
                finished_flag = layers.fill_constant([beam_size],
                                                     dtype='float32',
                                                     value=0.0)

                step_idx = layers.fill_constant(shape=[1],
                                                dtype='int32',
                                                value=0)
                cond = layers.less_than(x=step_idx,
                                        y=max_length)  # default force_cpu=True

                parent_idx = layers.fill_constant([1], dtype='int32', value=0)
                while_op = layers.While(cond)

                def compute_topk_scores_and_seq(sequences,
                                                scores,
                                                scores_to_gather,
                                                flags,
                                                beam_size,
                                                select_beam=None,
                                                generate_id=None):
                    scores = layers.reshape(scores, shape=[1, -1])
                    _, topk_indexs = layers.topk(scores, k=beam_size)

                    topk_indexs = layers.reshape(topk_indexs, shape=[-1])

                    # gather result

                    top_seq = layers.gather(sequences, topk_indexs)
                    topk_flags = layers.gather(flags, topk_indexs)
                    topk_gather_scores = layers.gather(scores_to_gather,
                                                       topk_indexs)

                    if select_beam:
                        topk_beam = layers.gather(select_beam, topk_indexs)
                    else:
                        topk_beam = select_beam

                    if generate_id:
                        topk_id = layers.gather(generate_id, topk_indexs)
                    else:
                        topk_id = generate_id
                    return top_seq, topk_gather_scores, topk_flags, topk_beam, topk_id

                def grow_alive(curr_seq, curr_scores, curr_log_probs,
                               curr_finished, select_beam, generate_id):
                    curr_scores += curr_finished * -INF
                    return compute_topk_scores_and_seq(curr_seq,
                                                       curr_scores,
                                                       curr_log_probs,
                                                       curr_finished,
                                                       beam_size,
                                                       select_beam,
                                                       generate_id=generate_id)

                def grow_finished(finished_seq, finished_scores, finished_flag,
                                  curr_seq, curr_scores, curr_finished):
                    finished_seq = layers.concat([
                        finished_seq,
                        layers.fill_constant(
                            [beam_size, 1], dtype='int64', value=1)
                    ],
                                                 axis=1)
                    curr_scores += (1.0 - curr_finished) * -INF
                    #layers.Print( curr_scores, message="curr scores")
                    curr_finished_seq = layers.concat([finished_seq, curr_seq],
                                                      axis=0)
                    curr_finished_scores = layers.concat(
                        [finished_scores, curr_scores], axis=0)
                    curr_finished_flags = layers.concat(
                        [finished_flag, curr_finished], axis=0)

                    return compute_topk_scores_and_seq(curr_finished_seq,
                                                       curr_finished_scores,
                                                       curr_finished_scores,
                                                       curr_finished_flags,
                                                       beam_size)

                def is_finished(alive_log_prob, finished_scores,
                                finished_in_finished):

                    max_out_len = 200
                    max_length_penalty = layers.pow(
                        layers.fill_constant([1],
                                             dtype='float32',
                                             value=((5.0 + max_out_len) /
                                                    6.0)), alpha)

                    lower_bound_alive_score = layers.slice(
                        alive_log_prob, starts=[0], ends=[1],
                        axes=[0]) / max_length_penalty

                    lowest_score_of_fininshed_in_finished = finished_scores * finished_in_finished
                    lowest_score_of_fininshed_in_finished += (
                        1.0 - finished_in_finished) * -INF
                    lowest_score_of_fininshed_in_finished = layers.reduce_min(
                        lowest_score_of_fininshed_in_finished)

                    met = layers.less_than(
                        lower_bound_alive_score,
                        lowest_score_of_fininshed_in_finished)
                    met = layers.cast(met, 'float32')
                    bound_is_met = layers.reduce_sum(met)

                    finished_eos_num = layers.reduce_sum(finished_in_finished)

                    finish_cond = layers.less_than(
                        finished_eos_num,
                        layers.fill_constant([1],
                                             dtype='float32',
                                             value=beam_size))

                    return finish_cond

                def grow_top_k(step_idx, alive_seq, alive_log_prob,
                               parant_idx):
                    pre_ids = alive_seq

                    dec_step_emb = layers.embedding(
                        input=pre_ids,
                        size=[self.tar_vocab_size, self.hidden_size],
                        dtype='float32',
                        is_sparse=False,
                        param_attr=fluid.ParamAttr(
                            name='target_embedding',
                            initializer=fluid.initializer.UniformInitializer(
                                low=-self.init_scale, high=self.init_scale)))

                    dec_att_out, new_hidden_array, new_cell_array = decoder_step(
                        dec_step_emb, pre_hidden_array, pre_cell_array)

                    projection = layers.matmul(dec_att_out, softmax_weight)

                    logits = layers.softmax(projection)
                    current_log = layers.elementwise_add(x=layers.log(logits),
                                                         y=alive_log_prob,
                                                         axis=0)
                    base_1 = layers.cast(step_idx, 'float32') + 6.0
                    base_1 /= 6.0
                    length_penalty = layers.pow(base_1, alpha)

                    len_pen = layers.pow(
                        ((5. + layers.cast(step_idx + 1, 'float32')) / 6.),
                        alpha)

                    current_log = layers.reshape(current_log, shape=[1, -1])

                    current_log = current_log / length_penalty
                    topk_scores, topk_indices = layers.topk(input=current_log,
                                                            k=beam_size)

                    topk_scores = layers.reshape(topk_scores, shape=[-1])

                    topk_log_probs = topk_scores * length_penalty

                    generate_id = layers.reshape(
                        topk_indices, shape=[-1]) % self.tar_vocab_size

                    selected_beam = layers.reshape(
                        topk_indices, shape=[-1]) // self.tar_vocab_size

                    topk_finished = layers.equal(generate_id, eos_ids)

                    topk_finished = layers.cast(topk_finished, 'float32')

                    generate_id = layers.reshape(generate_id, shape=[-1, 1])

                    pre_tokens_list = layers.gather(tokens, selected_beam)

                    full_tokens_list = layers.concat(
                        [pre_tokens_list, generate_id], axis=1)


                    return full_tokens_list, topk_log_probs, topk_scores, topk_finished, selected_beam, generate_id, \
                            dec_att_out, new_hidden_array, new_cell_array

                with while_op.block():
                    topk_seq, topk_log_probs, topk_scores, topk_finished, topk_beam, topk_generate_id, attention_out, new_hidden_array, new_cell_array = \
                        grow_top_k(  step_idx, pre_tokens, pre_score, parent_idx)
                    alive_seq, alive_log_prob, _, alive_beam, alive_id = grow_alive(
                        topk_seq, topk_scores, topk_log_probs, topk_finished,
                        topk_beam, topk_generate_id)

                    finished_seq_2, finished_scores_2, finished_flags_2, _, _ = grow_finished(
                        finished_seq, finished_scores, finished_flag, topk_seq,
                        topk_scores, topk_finished)

                    finished_cond = is_finished(alive_log_prob,
                                                finished_scores_2,
                                                finished_flags_2)

                    layers.increment(x=step_idx, value=1.0, in_place=True)

                    layers.assign(alive_beam, parent_idx)
                    layers.assign(alive_id, pre_tokens)
                    layers.assign(alive_log_prob, pre_score)
                    layers.assign(alive_seq, tokens)
                    layers.assign(finished_seq_2, finished_seq)
                    layers.assign(finished_scores_2, finished_scores)
                    layers.assign(finished_flags_2, finished_flag)

                    # update init_hidden, init_cell, input_feed
                    new_feed = layers.gather(attention_out, parent_idx)
                    layers.assign(new_feed, pre_feed)
                    for i in range(self.num_layers):
                        new_hidden_var = layers.gather(new_hidden_array[i],
                                                       parent_idx)
                        layers.assign(new_hidden_var, pre_hidden_array[i])
                        new_cell_var = layers.gather(new_cell_array[i],
                                                     parent_idx)
                        layers.assign(new_cell_var, pre_cell_array[i])

                    length_cond = layers.less_than(x=step_idx, y=max_length)
                    layers.logical_and(x=length_cond,
                                       y=finished_cond,
                                       out=cond)

                tokens_with_eos = tokens

                all_seq = layers.concat([tokens_with_eos, finished_seq],
                                        axis=0)
                all_score = layers.concat([pre_score, finished_scores], axis=0)
                _, topk_index = layers.topk(all_score, k=beam_size)
                topk_index = layers.reshape(topk_index, shape=[-1])
                final_seq = layers.gather(all_seq, topk_index)
                final_score = layers.gather(all_score, topk_index)

                return final_seq
            elif mode == 'greedy_search':
                max_src_seq_len = layers.shape(self.src)[1]
                max_length = max_src_seq_len * 2
                #max_length = layers.fill_constant( [1], dtype='int32', value = 10)
                pre_ids = layers.fill_constant([1, 1], dtype='int64', value=1)
                full_ids = layers.fill_constant([1, 1], dtype='int64', value=1)

                score = layers.fill_constant([1], dtype='float32', value=0.0)

                eos_ids = layers.fill_constant([1, 1], dtype='int64', value=2)

                pre_hidden_array = []
                pre_cell_array = []
                pre_feed = layers.fill_constant([1, self.hidden_size],
                                                dtype='float32',
                                                value=0)
                for i in range(self.num_layers):
                    pre_hidden_array.append(enc_last_hidden[i])
                    pre_cell_array.append(enc_last_cell[i])
                    #pre_hidden_array.append( layers.fill_constant( [1, hidden_size], dtype='float32', value=0)  )
                    #pre_cell_array.append( layers.fill_constant( [1, hidden_size], dtype='float32', value=0) )

                step_idx = layers.fill_constant(shape=[1],
                                                dtype='int32',
                                                value=0)
                cond = layers.less_than(x=step_idx,
                                        y=max_length)  # default force_cpu=True
                while_op = layers.While(cond)

                with while_op.block():

                    dec_step_emb = layers.embedding(
                        input=pre_ids,
                        size=[self.tar_vocab_size, self.hidden_size],
                        dtype='float32',
                        is_sparse=False,
                        param_attr=fluid.ParamAttr(
                            name='target_embedding',
                            initializer=fluid.initializer.UniformInitializer(
                                low=-self.init_scale, high=self.init_scale)))

                    dec_att_out, new_hidden_array, new_cell_array = decoder_step(
                        dec_step_emb, pre_hidden_array, pre_cell_array)

                    projection = layers.matmul(dec_att_out, softmax_weight)

                    logits = layers.softmax(projection)
                    logits = layers.log(logits)

                    current_log = layers.elementwise_add(logits, score, axis=0)

                    topk_score, topk_indices = layers.topk(input=current_log,
                                                           k=1)

                    new_ids = layers.concat([full_ids, topk_indices])
                    layers.assign(new_ids, full_ids)
                    #layers.Print( full_ids, message="ful ids")
                    layers.assign(topk_score, score)
                    layers.assign(topk_indices, pre_ids)
                    layers.assign(dec_att_out, pre_feed)
                    for i in range(self.num_layers):
                        layers.assign(new_hidden_array[i], pre_hidden_array[i])
                        layers.assign(new_cell_array[i], pre_cell_array[i])

                    layers.increment(x=step_idx, value=1.0, in_place=True)

                    eos_met = layers.not_equal(topk_indices, eos_ids)
                    length_cond = layers.less_than(x=step_idx, y=max_length)
                    layers.logical_and(x=length_cond, y=eos_met, out=cond)

                return full_ids

            raise Exception("error")
        else:
            print("mode not supprt", mode)
コード例 #11
0
def build_model(is_training):

    input_text = fluid.layers.data(name="text",
                                   shape=[-1, max_len, 1],
                                   dtype="int64")
    input_text_len = fluid.layers.data(name="text_len",
                                       shape=[-1],
                                       dtype="int32")

    if is_training:
        input_label = fluid.layers.data(name="label",
                                        shape=[-1, 1],
                                        dtype="int64")

    input_text_emb = fluid.layers.embedding(
        input=input_text,
        size=[vocab_size, embedding_dims],
        param_attr=ParamAttr(name="shared_emb"))

    input_text_emb = fluid.layers.transpose(input_text_emb, perm=[0, 2, 1])
    input_text_emb = fluid.layers.reshape(
        input_text_emb, shape=[-1, embedding_dims, max_len, 1])

    input_text_conv = fluid.layers.conv2d(input=input_text_emb,
                                          num_filters=filters,
                                          filter_size=(kernel_size, 1),
                                          stride=(conv_stride, 1))
    input_text_conv = fluid.layers.relu(input_text_conv)

    input_text_conv = fluid.layers.pool2d(input_text_conv,
                                          pool_size=(pool_size, 1),
                                          pool_stride=(pool_stride, 1))

    input_text_conv = fluid.layers.squeeze(input_text_conv, axes=[3])

    _, _, input_text_lstm = basic_lstm(input_text_conv,
                                       None,
                                       None,
                                       lstm_hidden_size,
                                       num_layers=1,
                                       sequence_length=input_text_len)

    input_text_lstm = fluid.layers.transpose(input_text_lstm, perm=[1, 0, 2])

    input_text_lstm = fluid.layers.reshape(input_text_lstm,
                                           shape=[-1, lstm_hidden_size])

    input_text_hidden = fluid.layers.fc(input_text_lstm, size=2, act="softmax")

    if is_training:
        loss = fluid.layers.cross_entropy(input_text_hidden, input_label)
        loss = fluid.layers.reduce_mean(loss)

        optimizer = fluid.optimizer.AdamOptimizer(learning_rate=0.01)
        optimizer.minimize(loss)

        return loss

    else:

        return input_text_hidden
コード例 #12
0
ファイル: test_rnn_cell_api.py プロジェクト: iducn/Paddle
    def test_run(self):
        inputs_basic_lstm = fluid.data(
            name='inputs_basic_lstm',
            shape=[None, None, self.input_size],
            dtype='float32')
        sequence_length = fluid.data(
            name="sequence_length", shape=[None], dtype='int64')

        inputs_dynamic_rnn = layers.transpose(inputs_basic_lstm, perm=[1, 0, 2])
        cell = LSTMCell(self.hidden_size, name="LSTMCell_for_rnn")
        output, final_state = dynamic_rnn(
            cell=cell,
            inputs=inputs_dynamic_rnn,
            sequence_length=sequence_length,
            is_reverse=False)
        output_new = layers.transpose(output, perm=[1, 0, 2])

        rnn_out, last_hidden, last_cell = basic_lstm(inputs_basic_lstm, None, None, self.hidden_size, num_layers=1, \
                batch_first = False, bidirectional=False, sequence_length=sequence_length, forget_bias = 1.0)

        if core.is_compiled_with_cuda():
            place = core.CUDAPlace(0)
        else:
            place = core.CPUPlace()
        exe = Executor(place)
        exe.run(framework.default_startup_program())

        inputs_basic_lstm_np = np.random.uniform(
            -0.1, 0.1,
            (self.seq_len, self.batch_size, self.input_size)).astype('float32')
        sequence_length_np = np.ones(
            self.batch_size, dtype='int64') * self.seq_len

        inputs_np = np.random.uniform(
            -0.1, 0.1, (self.batch_size, self.input_size)).astype('float32')
        pre_hidden_np = np.random.uniform(
            -0.1, 0.1, (self.batch_size, self.hidden_size)).astype('float32')
        pre_cell_np = np.random.uniform(
            -0.1, 0.1, (self.batch_size, self.hidden_size)).astype('float32')

        param_names = [[
            "LSTMCell_for_rnn/BasicLSTMUnit_0.w_0",
            "basic_lstm_layers_0/BasicLSTMUnit_0.w_0"
        ], [
            "LSTMCell_for_rnn/BasicLSTMUnit_0.b_0",
            "basic_lstm_layers_0/BasicLSTMUnit_0.b_0"
        ]]

        for names in param_names:
            param = np.array(fluid.global_scope().find_var(names[0]).get_tensor(
            ))
            param = np.random.uniform(
                -0.1, 0.1, size=param.shape).astype('float32')
            fluid.global_scope().find_var(names[0]).get_tensor().set(param,
                                                                     place)
            fluid.global_scope().find_var(names[1]).get_tensor().set(param,
                                                                     place)

        out = exe.run(feed={
            'inputs_basic_lstm': inputs_basic_lstm_np,
            'sequence_length': sequence_length_np,
            'inputs': inputs_np,
            'pre_hidden': pre_hidden_np,
            'pre_cell': pre_cell_np
        },
                      fetch_list=[output_new, rnn_out])

        self.assertTrue(np.allclose(out[0], out[1], rtol=1e-4))
コード例 #13
0
input_encoder_m = layers.embedding(input = input_sequence, size = [vocab_size, 64])
input_encoder_m = layers.dropout(input_encoder_m, 0.3)

input_encoder_c = layers.embedding(input = input_sequence, size = [vocab_size, query_maxlen])
input_encoder_c = layers.dropout(input_encoder_c, 0.3)

question_encoder = layers.embedding(input = input_sequence, size = [vocab_size, 64])
question_encoder = layers.dropout(question_encoder, 0.3)

match = layers.elementwise_mul(input_encoder_m, question_encoder)
response = layers.softmax(match, axis = -1)

answer = layers.concat([response, question_encoder], axis = -1)

_, _, answer = basic_lstm(answer, None, None, 32)
answer = layers.transpose(answer, perm = (1, 0, 2))
answer = layers.reshape(answer, shape = [-1, 32])

answer = layers.dropout(answer, 0.3)
answer = layers.fc(answer, size = vocab_size, act = "softmax")

loss = layers.cross_entropy(answer, true_answer)
loss = layers.reduce_mean(loss)

optimizer = fluid.optimizer.AdamOptimizer(learning_rate = 0.01)
optimizer.minimize(loss)

exe = fluid.Executor(fluid.CPUPlace())

exe.run(fluid.default_startup_program())
コード例 #14
0
def lm_model(hidden_size,
             vocab_size,
             num_layers=2,
             num_steps=20,
             init_scale=0.1,
             dropout=None,
             rnn_model='static',
             use_dataloader=False):
    def padding_rnn(input_embedding, len=3, init_hidden=None, init_cell=None):
        weight_1_arr = []
        weight_2_arr = []
        bias_arr = []
        hidden_array = []
        cell_array = []
        mask_array = []
        for i in range(num_layers):
            weight_1 = layers.create_parameter(
                [hidden_size * 2, hidden_size * 4],
                dtype="float32",
                name="fc_weight1_" + str(i),
                default_initializer=fluid.initializer.UniformInitializer(
                    low=-init_scale, high=init_scale))
            weight_1_arr.append(weight_1)
            bias_1 = layers.create_parameter(
                [hidden_size * 4],
                dtype="float32",
                name="fc_bias1_" + str(i),
                default_initializer=fluid.initializer.Constant(0.0))
            bias_arr.append(bias_1)

            pre_hidden = layers.slice(
                init_hidden, axes=[0], starts=[i], ends=[i + 1])
            pre_cell = layers.slice(
                init_cell, axes=[0], starts=[i], ends=[i + 1])
            pre_hidden = layers.reshape(pre_hidden, shape=[-1, hidden_size])
            pre_cell = layers.reshape(pre_cell, shape=[-1, hidden_size])
            hidden_array.append(pre_hidden)
            cell_array.append(pre_cell)

        input_embedding = layers.transpose(input_embedding, perm=[1, 0, 2])
        rnn = PaddingRNN()

        with rnn.step():
            input = rnn.step_input(input_embedding)
            for k in range(num_layers):
                pre_hidden = rnn.memory(init=hidden_array[k])
                pre_cell = rnn.memory(init=cell_array[k])
                weight_1 = weight_1_arr[k]
                bias = bias_arr[k]

                nn = layers.concat([input, pre_hidden], 1)
                gate_input = layers.matmul(x=nn, y=weight_1)

                gate_input = layers.elementwise_add(gate_input, bias)
                i = layers.slice(
                    gate_input, axes=[1], starts=[0], ends=[hidden_size])
                j = layers.slice(
                    gate_input,
                    axes=[1],
                    starts=[hidden_size],
                    ends=[hidden_size * 2])
                f = layers.slice(
                    gate_input,
                    axes=[1],
                    starts=[hidden_size * 2],
                    ends=[hidden_size * 3])
                o = layers.slice(
                    gate_input,
                    axes=[1],
                    starts=[hidden_size * 3],
                    ends=[hidden_size * 4])

                c = pre_cell * layers.sigmoid(f) + layers.sigmoid(
                    i) * layers.tanh(j)
                m = layers.tanh(c) * layers.sigmoid(o)

                rnn.update_memory(pre_hidden, m)
                rnn.update_memory(pre_cell, c)

                rnn.step_output(m)
                rnn.step_output(c)

                input = m

                if dropout != None and dropout > 0.0:
                    input = layers.dropout(
                        input,
                        dropout_prob=dropout,
                        dropout_implementation='upscale_in_train')

            rnn.step_output(input)
        rnnout = rnn()

        last_hidden_array = []
        last_cell_array = []
        real_res = rnnout[-1]
        for i in range(num_layers):
            m = rnnout[i * 2]
            c = rnnout[i * 2 + 1]
            m.stop_gradient = True
            c.stop_gradient = True
            last_h = layers.slice(
                m, axes=[0], starts=[num_steps - 1], ends=[num_steps])
            last_hidden_array.append(last_h)
            last_c = layers.slice(
                c, axes=[0], starts=[num_steps - 1], ends=[num_steps])
            last_cell_array.append(last_c)
        real_res = layers.transpose(x=real_res, perm=[1, 0, 2])
        last_hidden = layers.concat(last_hidden_array, 0)
        last_cell = layers.concat(last_cell_array, 0)

        return real_res, last_hidden, last_cell

    def encoder_static(input_embedding, len=3, init_hidden=None,
                       init_cell=None):

        weight_1_arr = []
        weight_2_arr = []
        bias_arr = []
        hidden_array = []
        cell_array = []
        mask_array = []
        for i in range(num_layers):
            weight_1 = layers.create_parameter(
                [hidden_size * 2, hidden_size * 4],
                dtype="float32",
                name="fc_weight1_" + str(i),
                default_initializer=fluid.initializer.UniformInitializer(
                    low=-init_scale, high=init_scale))
            weight_1_arr.append(weight_1)
            bias_1 = layers.create_parameter(
                [hidden_size * 4],
                dtype="float32",
                name="fc_bias1_" + str(i),
                default_initializer=fluid.initializer.Constant(0.0))
            bias_arr.append(bias_1)

            pre_hidden = layers.slice(
                init_hidden, axes=[0], starts=[i], ends=[i + 1])
            pre_cell = layers.slice(
                init_cell, axes=[0], starts=[i], ends=[i + 1])
            pre_hidden = layers.reshape(
                pre_hidden, shape=[-1, hidden_size], inplace=True)
            pre_cell = layers.reshape(
                pre_cell, shape=[-1, hidden_size], inplace=True)
            hidden_array.append(pre_hidden)
            cell_array.append(pre_cell)

        res = []
        sliced_inputs = layers.split(
            input_embedding, num_or_sections=len, dim=1)

        for index in range(len):
            input = sliced_inputs[index]
            input = layers.reshape(input, shape=[-1, hidden_size], inplace=True)
            for k in range(num_layers):
                pre_hidden = hidden_array[k]
                pre_cell = cell_array[k]
                weight_1 = weight_1_arr[k]
                bias = bias_arr[k]
                nn = layers.concat([input, pre_hidden], 1)
                gate_input = layers.matmul(x=nn, y=weight_1)

                gate_input = layers.elementwise_add(gate_input, bias)
                i, j, f, o = layers.split(gate_input, num_or_sections=4, dim=-1)

                c = pre_cell * layers.sigmoid(f) + layers.sigmoid(
                    i) * layers.tanh(j)
                m = layers.tanh(c) * layers.sigmoid(o)

                hidden_array[k] = m
                cell_array[k] = c
                input = m

                if dropout != None and dropout > 0.0:
                    input = layers.dropout(
                        input,
                        dropout_prob=dropout,
                        dropout_implementation='upscale_in_train')

            res.append(input)

        last_hidden = layers.concat(hidden_array, 1)
        last_hidden = layers.reshape(
            last_hidden, shape=[-1, num_layers, hidden_size], inplace=True)
        last_hidden = layers.transpose(x=last_hidden, perm=[1, 0, 2])

        last_cell = layers.concat(cell_array, 1)
        last_cell = layers.reshape(
            last_cell, shape=[-1, num_layers, hidden_size])
        last_cell = layers.transpose(x=last_cell, perm=[1, 0, 2])

        real_res = layers.concat(res, 0)
        real_res = layers.reshape(
            real_res, shape=[len, -1, hidden_size], inplace=True)
        real_res = layers.transpose(x=real_res, perm=[1, 0, 2])

        return real_res, last_hidden, last_cell

    x = fluid.data(name="x", shape=[None, num_steps, 1], dtype='int64')
    y = fluid.data(name="y", shape=[None, 1], dtype='int64')

    if use_dataloader:
        dataloader = fluid.io.DataLoader.from_generator(
            feed_list=[x, y],
            capacity=16,
            iterable=False,
            use_double_buffer=True)

    init_hidden = fluid.data(
        name="init_hidden",
        shape=[None, num_layers, hidden_size],
        dtype='float32')
    init_cell = fluid.data(
        name="init_cell",
        shape=[None, num_layers, hidden_size],
        dtype='float32')

    init_hidden = layers.transpose(init_hidden, perm=[1, 0, 2])
    init_cell = layers.transpose(init_cell, perm=[1, 0, 2])

    init_hidden_reshape = layers.reshape(
        init_hidden, shape=[num_layers, -1, hidden_size])
    init_cell_reshape = layers.reshape(
        init_cell, shape=[num_layers, -1, hidden_size])

    x_emb = layers.embedding(
        input=x,
        size=[vocab_size, hidden_size],
        dtype='float32',
        is_sparse=False,
        param_attr=fluid.ParamAttr(
            name='embedding_para',
            initializer=fluid.initializer.UniformInitializer(
                low=-init_scale, high=init_scale)))

    x_emb = layers.reshape(
        x_emb, shape=[-1, num_steps, hidden_size], inplace=True)
    if dropout != None and dropout > 0.0:
        x_emb = layers.dropout(
            x_emb,
            dropout_prob=dropout,
            dropout_implementation='upscale_in_train')

    if rnn_model == "padding":
        rnn_out, last_hidden, last_cell = padding_rnn(
            x_emb,
            len=num_steps,
            init_hidden=init_hidden_reshape,
            init_cell=init_cell_reshape)
    elif rnn_model == "static":
        rnn_out, last_hidden, last_cell = encoder_static(
            x_emb,
            len=num_steps,
            init_hidden=init_hidden_reshape,
            init_cell=init_cell_reshape)
    elif rnn_model == "cudnn":
        x_emb = layers.transpose(x_emb, perm=[1, 0, 2])
        rnn_out, last_hidden, last_cell = layers.lstm(
            x_emb,
            init_hidden_reshape,
            init_cell_reshape,
            num_steps,
            hidden_size,
            num_layers,
            is_bidirec=False,
            default_initializer=fluid.initializer.UniformInitializer(
                low=-init_scale, high=init_scale))
        rnn_out = layers.transpose(rnn_out, perm=[1, 0, 2])
    elif rnn_model == "basic_lstm":
        rnn_out, last_hidden, last_cell = basic_lstm( x_emb, init_hidden, init_cell, hidden_size, \
                num_layers=num_layers, batch_first=True, dropout_prob=dropout, \
                param_attr = ParamAttr( initializer=fluid.initializer.UniformInitializer(low=-init_scale, high=init_scale) ), \
                bias_attr = ParamAttr( initializer = fluid.initializer.Constant(0.0) ), \
                forget_bias = 0.0)
    else:
        print("type not support")
        return

    rnn_out = layers.reshape(
        rnn_out, shape=[-1, num_steps, hidden_size], inplace=True)

    softmax_weight = layers.create_parameter(
        [hidden_size, vocab_size],
        dtype="float32",
        name="softmax_weight",
        default_initializer=fluid.initializer.UniformInitializer(
            low=-init_scale, high=init_scale))
    softmax_bias = layers.create_parameter(
        [vocab_size],
        dtype="float32",
        name='softmax_bias',
        default_initializer=fluid.initializer.UniformInitializer(
            low=-init_scale, high=init_scale))

    projection = layers.matmul(rnn_out, softmax_weight)
    projection = layers.elementwise_add(projection, softmax_bias)
    projection = layers.reshape(
        projection, shape=[-1, vocab_size], inplace=True)

    loss = layers.softmax_with_cross_entropy(
        logits=projection, label=y, soft_label=False)

    loss = layers.reshape(loss, shape=[-1, num_steps], inplace=True)
    loss = layers.reduce_mean(loss, dim=[0])
    loss = layers.reduce_sum(loss)

    loss.persistable = True

    # This will feed last_hidden, last_cell to init_hidden, init_cell, which
    # can be used directly in next batch. This can avoid the fetching of
    # last_hidden and last_cell and feeding of init_hidden and init_cell in
    # each training step.
    last_hidden = layers.transpose(last_hidden, perm=[1, 0, 2])
    last_cell = layers.transpose(last_cell, perm=[1, 0, 2])
    feeding_list = ['x', 'y', 'init_hidden', 'init_cell']
    if use_dataloader:
        return loss, last_hidden, last_cell, feeding_list, dataloader
    else:
        return loss, last_hidden, last_cell, feeding_list
コード例 #15
0
    def test_run(self):
        x = layers.data(name='x',
                        shape=[-1, self.batch_size, self.hidden_size],
                        dtype='float32')
        sequence_length = layers.data(name="sequence_length",
                                      shape=[-1],
                                      dtype='float32')

        rnn_out, last_hidden, last_cell = basic_lstm( x, None, None, self.hidden_size, num_layers=self.num_layers, \
                batch_first = self.batch_first, bidirectional=self.is_bidirect, sequence_length=sequence_length, forget_bias = self.forget_bias )

        last_hidden.persisbale = True
        rnn_out.persisbale = True

        if core.is_compiled_with_cuda():
            place = core.CUDAPlace(0)
        else:
            place = core.CPUPlace()
        exe = Executor(place)
        exe.run(framework.default_startup_program())

        param_list = fluid.default_main_program().block(0).all_parameters()

        # process weight and bias
        gate_weight = []
        gate_bias = []

        for i in range(self.num_layers):
            gate_w_name = "basic_lstm_layers_" + str(
                i) + "/BasicLSTMUnit_0.w_0"
            gate_b_name = "basic_lstm_layers_" + str(
                i) + "/BasicLSTMUnit_0.b_0"

            gate_w = np.array(
                fluid.global_scope().find_var(gate_w_name).get_tensor())
            gate_w = np.random.uniform(-0.1, 0.1,
                                       size=gate_w.shape).astype('float32')
            fluid.global_scope().find_var(gate_w_name).get_tensor().set(
                gate_w, place)

            gate_b = np.array(
                fluid.global_scope().find_var(gate_b_name).get_tensor())
            gate_b = np.random.uniform(-0.1, 0.1,
                                       size=gate_b.shape).astype('float32')
            fluid.global_scope().find_var(gate_b_name).get_tensor().set(
                gate_b, place)

            gate_weight.append(gate_w)
            gate_bias.append(gate_b)

        if self.is_bidirect:
            for i in range(self.num_layers):
                gate_w_name = "basic_lstm_reverse_layers_" + str(
                    i) + "/BasicLSTMUnit_0.w_0"
                gate_b_name = "basic_lstm_reverse_layers_" + str(
                    i) + "/BasicLSTMUnit_0.b_0"

                gate_w = np.array(
                    fluid.global_scope().find_var(gate_w_name).get_tensor())
                gate_w = np.random.uniform(-0.1, 0.1,
                                           size=gate_w.shape).astype('float32')
                fluid.global_scope().find_var(gate_w_name).get_tensor().set(
                    gate_w, place)

                gate_b = np.array(
                    fluid.global_scope().find_var(gate_b_name).get_tensor())
                gate_b = np.random.uniform(-0.1, 0.1,
                                           size=gate_b.shape).astype('float32')
                fluid.global_scope().find_var(gate_b_name).get_tensor().set(
                    gate_b, place)

                gate_weight.append(gate_w)
                gate_bias.append(gate_b)

        step_input_np = np.random.uniform(-0.1, 0.1,
                                          (self.seq_len, self.batch_size,
                                           self.hidden_size)).astype('float32')
        sequence_length_np = np.random.randint(
            self.seq_len // 2, self.seq_len,
            size=(self.batch_size)).astype('int64')

        out = exe.run(feed={
            'x': step_input_np,
            'sequence_length': sequence_length_np
        },
                      fetch_list=[rnn_out, last_hidden, last_cell])

        api_rnn_out = out[0]
        api_last_hidden = out[1]
        api_last_cell = out[2]

        np_out = lstm_np(step_input_np,
                         None,
                         None,
                         self.hidden_size,
                         gate_weight,
                         gate_bias,
                         num_layers=self.num_layers,
                         batch_first=self.batch_first,
                         is_bidirect=self.is_bidirect,
                         sequence_length=sequence_length_np)

        self.assertTrue(np.allclose(api_rnn_out, np_out[0], rtol=1e-4, atol=0))
        self.assertTrue(
            np.allclose(api_last_hidden, np_out[1], rtol=1e-4, atol=0))
        self.assertTrue(
            np.allclose(api_last_cell, np_out[2], rtol=1e-4, atol=0))
コード例 #16
0
ファイル: train.py プロジェクト: ziyuli/paddle_in_example
x = layers.data(name="x", shape=[-1, tsteps, 1], dtype="float32")
y = layers.data(name="y", shape=[-1, 1], dtype="float32")

lstm1_init_h = layers.data(name="lstm1_h",
                           shape=[1, batch_size, 50],
                           dtype="float32",
                           append_batch_size=False)
lstm1_init_c = layers.data(name="lstm1_c",
                           shape=[1, batch_size, 50],
                           dtype="float32",
                           append_batch_size=False)

lstm1, lstm1_h, lstm1_c = basic_lstm(x,
                                     lstm1_init_h,
                                     lstm1_init_c,
                                     50,
                                     num_layers=1)
_, lstm2_h, lstm2_c = basic_lstm(lstm1, lstm1_h, lstm1_c, 50, num_layers=1)
lstm2_c_batch_first = layers.transpose(lstm2_c, [1, 0, 2])
pred = layers.fc(lstm2_c_batch_first, 1)
loss = layers.reduce_mean(layers.square(pred - y))

test_program = fluid.default_main_program().clone(for_test=True)

optimizer = fluid.optimizer.RMSPropOptimizer(learning_rate=0.001)
optimizer.minimize(loss)

exe = fluid.Executor(fluid.CPUPlace())
exe.run(fluid.default_startup_program())
コード例 #17
0
ファイル: lm_model.py プロジェクト: guoshengCS/rnn-benchmark
def lm_model(hidden_size,
             vocab_size,
             batch_size,
             num_layers=2,
             num_steps=20,
             init_scale=0.1,
             dropout=None,
             rnn_model='static',
             use_dataloader=False):
    if rnn_model == 'lod':
        x = fluid.data(name="x", shape=[None, 1], dtype='int64', lod_level=1)
        y = fluid.data(name="y", shape=[None, 1], dtype='int64', lod_level=1)

        if use_dataloader:
            dataloader = fluid.io.DataLoader.from_generator(
                feed_list=[x, y],
                capacity=16,
                iterable=False,
                use_double_buffer=True)

        init_hidden = fluid.data(name="init_hidden",
                                 shape=[None, num_layers, hidden_size],
                                 dtype='float32')
        init_cell = fluid.data(name="init_cell",
                               shape=[None, num_layers, hidden_size],
                               dtype='float32')

        init_cell.persistable = True
        init_hidden.persistable = True

        x_emb = layers.embedding(
            input=x,
            size=[vocab_size, hidden_size],
            dtype='float32',
            is_sparse=False,
            param_attr=fluid.ParamAttr(
                name='embedding_para',
                initializer=fluid.initializer.UniformInitializer(
                    low=-init_scale, high=init_scale)))

        if dropout != None and dropout > 0.0:
            x_emb = layers.dropout(x_emb,
                                   dropout_prob=dropout,
                                   dropout_implementation='upscale_in_train')

        lstm_input = x_emb
        last_hidden_array = []
        last_cell_array = []
        for i in range(num_layers):
            lstm_input = fluid.layers.fc(input=lstm_input,
                                         size=hidden_size * 4,
                                         bias_attr=False)
            hidden, cell = fluid.layers.dynamic_lstm(
                input=lstm_input,
                size=hidden_size * 4,
                h_0=init_hidden[:, i, :],
                c_0=init_cell[:, i, :],
                use_peepholes=False,
                param_attr=fluid.ParamAttr(
                    initializer=fluid.initializer.UniformInitializer(
                        low=-init_scale, high=init_scale)))
            last_hidden = layers.sequence_pool(hidden, pool_type='last')
            last_cell = layers.sequence_pool(cell, pool_type='last')
            last_hidden_array.append(last_hidden)
            last_cell_array.append(last_cell)

            lstm_input = hidden
            if dropout != None and dropout > 0.0:
                lstm_input = layers.dropout(
                    lstm_input,
                    dropout_prob=dropout,
                    dropout_implementation='upscale_in_train')

        last_hidden = layers.stack(last_hidden_array, 1)
        last_cell = layers.stack(last_cell_array, 1)

        softmax_weight = layers.create_parameter(
            [hidden_size, vocab_size],
            dtype="float32",
            name="softmax_weight",
            default_initializer=fluid.initializer.UniformInitializer(
                low=-init_scale, high=init_scale))
        softmax_bias = layers.create_parameter(
            [vocab_size],
            dtype="float32",
            name='softmax_bias',
            default_initializer=fluid.initializer.UniformInitializer(
                low=-init_scale, high=init_scale))

        projection = layers.matmul(hidden, softmax_weight)
        projection = layers.elementwise_add(projection, softmax_bias, axis=-1)

        loss = layers.softmax_with_cross_entropy(logits=projection,
                                                 label=y,
                                                 soft_label=False)
        loss = layers.sequence_pool(loss, pool_type='sum')
        loss = layers.reduce_mean(loss)

        feeding_list = ['x', 'y', 'init_hidden', 'init_cell']
        if use_dataloader:
            return loss, last_hidden, last_cell, feeding_list, dataloader
        else:
            return loss, last_hidden, last_cell, feeding_list

    def seq2seq_api_rnn(input_embedding,
                        len=3,
                        init_hiddens=None,
                        init_cells=None):
        class EncoderCell(layers.RNNCell):
            def __init__(self,
                         num_layers,
                         hidden_size,
                         dropout_prob=0.,
                         forget_bias=0.):
                self.num_layers = num_layers
                self.hidden_size = hidden_size
                self.dropout_prob = dropout_prob
                self.lstm_cells = []
                for i in range(num_layers):
                    self.lstm_cells.append(
                        layers.LSTMCell(
                            hidden_size,
                            forget_bias=forget_bias,
                            param_attr=fluid.ParamAttr(
                                initializer=fluid.initializer.
                                UniformInitializer(low=-init_scale,
                                                   high=init_scale))))

            def call(self, step_input, states):
                new_states = []
                for i in range(self.num_layers):
                    out, new_state = self.lstm_cells[i](step_input, states[i])
                    step_input = layers.dropout(
                        out,
                        self.dropout_prob,
                        dropout_implementation='upscale_in_train'
                    ) if self.dropout_prob > 0 else out
                    new_states.append(new_state)
                return step_input, new_states

        cell = EncoderCell(num_layers, hidden_size, dropout)
        output, new_states = layers.rnn(
            cell,
            inputs=input_embedding,
            initial_states=[[hidden, cell] for hidden, cell in zip([
                layers.reshape(init_hidden, shape=[-1, hidden_size])
                for init_hidden in layers.split(
                    init_hiddens, num_or_sections=num_layers, dim=0)
            ], [
                layers.reshape(init_cell, shape=[-1, hidden_size])
                for init_cell in layers.split(
                    init_cells, num_or_sections=num_layers, dim=0)
            ])],
            time_major=False)
        last_hidden = layers.stack([hidden for hidden, _ in new_states], 0)
        last_cell = layers.stack([cell for _, cell in new_states], 0)
        return output, last_hidden, last_cell

    def padding_rnn(input_embedding, len=3, init_hidden=None, init_cell=None):
        weight_1_arr = []
        weight_2_arr = []
        bias_arr = []
        hidden_array = []
        cell_array = []
        mask_array = []
        for i in range(num_layers):
            weight_1 = layers.create_parameter(
                [hidden_size * 2, hidden_size * 4],
                dtype="float32",
                name="fc_weight1_" + str(i),
                default_initializer=fluid.initializer.UniformInitializer(
                    low=-init_scale, high=init_scale))
            weight_1_arr.append(weight_1)
            bias_1 = layers.create_parameter(
                [hidden_size * 4],
                dtype="float32",
                name="fc_bias1_" + str(i),
                default_initializer=fluid.initializer.Constant(0.0))
            bias_arr.append(bias_1)

            pre_hidden = layers.slice(init_hidden,
                                      axes=[0],
                                      starts=[i],
                                      ends=[i + 1])
            pre_cell = layers.slice(init_cell,
                                    axes=[0],
                                    starts=[i],
                                    ends=[i + 1])
            pre_hidden = layers.reshape(pre_hidden, shape=[-1, hidden_size])
            pre_cell = layers.reshape(pre_cell, shape=[-1, hidden_size])
            hidden_array.append(pre_hidden)
            cell_array.append(pre_cell)

        input_embedding = layers.transpose(input_embedding, perm=[1, 0, 2])
        rnn = PaddingRNN()

        with rnn.step():
            input = rnn.step_input(input_embedding)
            for k in range(num_layers):
                pre_hidden = rnn.memory(init=hidden_array[k])
                pre_cell = rnn.memory(init=cell_array[k])
                weight_1 = weight_1_arr[k]
                bias = bias_arr[k]

                nn = layers.concat([input, pre_hidden], 1)
                gate_input = layers.matmul(x=nn, y=weight_1)

                gate_input = layers.elementwise_add(gate_input, bias)
                i = layers.slice(gate_input,
                                 axes=[1],
                                 starts=[0],
                                 ends=[hidden_size])
                j = layers.slice(gate_input,
                                 axes=[1],
                                 starts=[hidden_size],
                                 ends=[hidden_size * 2])
                f = layers.slice(gate_input,
                                 axes=[1],
                                 starts=[hidden_size * 2],
                                 ends=[hidden_size * 3])
                o = layers.slice(gate_input,
                                 axes=[1],
                                 starts=[hidden_size * 3],
                                 ends=[hidden_size * 4])

                c = pre_cell * layers.sigmoid(f) + layers.sigmoid(
                    i) * layers.tanh(j)
                m = layers.tanh(c) * layers.sigmoid(o)

                rnn.update_memory(pre_hidden, m)
                rnn.update_memory(pre_cell, c)

                rnn.step_output(m)
                rnn.step_output(c)

                input = m

                if dropout != None and dropout > 0.0:
                    input = layers.dropout(
                        input,
                        dropout_prob=dropout,
                        dropout_implementation='upscale_in_train')

            rnn.step_output(input)
        rnnout = rnn()

        last_hidden_array = []
        last_cell_array = []
        real_res = rnnout[-1]
        for i in range(num_layers):
            m = rnnout[i * 2]
            c = rnnout[i * 2 + 1]
            m.stop_gradient = True
            c.stop_gradient = True
            last_h = layers.slice(m,
                                  axes=[0],
                                  starts=[num_steps - 1],
                                  ends=[num_steps])
            last_hidden_array.append(last_h)
            last_c = layers.slice(c,
                                  axes=[0],
                                  starts=[num_steps - 1],
                                  ends=[num_steps])
            last_cell_array.append(last_c)
        real_res = layers.transpose(x=real_res, perm=[1, 0, 2])
        last_hidden = layers.concat(last_hidden_array, 0)
        last_cell = layers.concat(last_cell_array, 0)

        return real_res, last_hidden, last_cell

    def encoder_static(input_embedding,
                       len=3,
                       init_hidden=None,
                       init_cell=None):

        weight_1_arr = []
        weight_2_arr = []
        bias_arr = []
        hidden_array = []
        cell_array = []
        mask_array = []
        for i in range(num_layers):
            weight_1 = layers.create_parameter(
                [hidden_size * 2, hidden_size * 4],
                dtype="float32",
                name="fc_weight1_" + str(i),
                default_initializer=fluid.initializer.UniformInitializer(
                    low=-init_scale, high=init_scale))
            weight_1_arr.append(weight_1)
            bias_1 = layers.create_parameter(
                [hidden_size * 4],
                dtype="float32",
                name="fc_bias1_" + str(i),
                default_initializer=fluid.initializer.Constant(0.0))
            bias_arr.append(bias_1)

            pre_hidden = layers.slice(init_hidden,
                                      axes=[0],
                                      starts=[i],
                                      ends=[i + 1])
            pre_cell = layers.slice(init_cell,
                                    axes=[0],
                                    starts=[i],
                                    ends=[i + 1])
            pre_hidden = layers.reshape(pre_hidden,
                                        shape=[-1, hidden_size],
                                        inplace=True)
            pre_cell = layers.reshape(pre_cell,
                                      shape=[-1, hidden_size],
                                      inplace=True)
            hidden_array.append(pre_hidden)
            cell_array.append(pre_cell)

        res = []
        sliced_inputs = layers.split(input_embedding,
                                     num_or_sections=len,
                                     dim=1)

        for index in range(len):
            input = sliced_inputs[index]
            input = layers.reshape(input,
                                   shape=[-1, hidden_size],
                                   inplace=True)
            for k in range(num_layers):
                pre_hidden = hidden_array[k]
                pre_cell = cell_array[k]
                weight_1 = weight_1_arr[k]
                bias = bias_arr[k]

                nn = layers.concat([input, pre_hidden], 1)
                gate_input = layers.matmul(x=nn, y=weight_1)

                gate_input = layers.elementwise_add(gate_input, bias)
                i, j, f, o = layers.split(gate_input,
                                          num_or_sections=4,
                                          dim=-1)

                try:
                    from paddle.fluid.contrib.layers import fused_elemwise_activation
                    # fluid.contrib.layers.fused_elemwise_activation can do a fused
                    # operation, like:
                    # 1) x + sigmoid(y); x + tanh(y)
                    # 2) tanh(x + y)
                    # Now the unary operation supported in this fused op is limit, and
                    # we will extent this operation to support more unary operations and
                    # do this kind of fusion automitically in future version of paddle.fluid.
                    # layers.sigmoid(i) * layers.tanh(j)
                    tmp0 = fused_elemwise_activation(
                        x=layers.tanh(j),
                        y=i,
                        functor_list=['elementwise_mul', 'sigmoid'],
                        save_intermediate_out=False)
                    # pre_cell * layers.sigmoid(f)
                    tmp1 = fused_elemwise_activation(
                        x=pre_cell,
                        y=f,
                        functor_list=['elementwise_mul', 'sigmoid'],
                        save_intermediate_out=False)
                    c = tmp0 + tmp1
                    # layers.tanh(c) * layers.sigmoid(o)
                    m = fused_elemwise_activation(
                        x=layers.tanh(c),
                        y=o,
                        functor_list=['elementwise_mul', 'sigmoid'],
                        save_intermediate_out=False)
                except ImportError:
                    c = pre_cell * layers.sigmoid(f) + layers.sigmoid(
                        i) * layers.tanh(j)
                    m = layers.tanh(c) * layers.sigmoid(o)

                hidden_array[k] = m
                cell_array[k] = c
                input = m

                if dropout != None and dropout > 0.0:
                    input = layers.dropout(
                        input,
                        dropout_prob=dropout,
                        dropout_implementation='upscale_in_train')

            res.append(input)

        last_hidden = layers.concat(hidden_array, 1)
        last_hidden = layers.reshape(last_hidden,
                                     shape=[-1, num_layers, hidden_size],
                                     inplace=True)
        last_hidden = layers.transpose(x=last_hidden, perm=[1, 0, 2])

        last_cell = layers.concat(cell_array, 1)
        last_cell = layers.reshape(last_cell,
                                   shape=[-1, num_layers, hidden_size])
        last_cell = layers.transpose(x=last_cell, perm=[1, 0, 2])

        real_res = layers.concat(res, 0)
        real_res = layers.reshape(real_res,
                                  shape=[len, -1, hidden_size],
                                  inplace=True)
        real_res = layers.transpose(x=real_res, perm=[1, 0, 2])

        return real_res, last_hidden, last_cell

    batch_size_each = batch_size // fluid.core.get_cuda_device_count()
    x = fluid.data(
        # name="x", shape=[batch_size_each, num_steps, 1], dtype='int64')
        name="x",
        shape=[None, num_steps, 1],
        dtype='int64')
    y = fluid.data(
        # name="y", shape=[batch_size_each * num_steps, 1], dtype='int64')
        name="y",
        shape=[None, 1],
        dtype='int64')

    if use_dataloader:
        dataloader = fluid.io.DataLoader.from_generator(feed_list=[x, y],
                                                        capacity=16,
                                                        iterable=False,
                                                        use_double_buffer=True)

    init_hidden = fluid.data(
        name="init_hidden",
        # shape=[num_layers, batch_size_each, hidden_size],
        shape=[num_layers, None, hidden_size],
        dtype='float32')
    init_cell = fluid.data(
        name="init_cell",
        # shape=[num_layers, batch_size_each, hidden_size],
        shape=[num_layers, None, hidden_size],
        dtype='float32')

    init_cell.persistable = True
    init_hidden.persistable = True

    init_hidden_reshape = layers.reshape(init_hidden,
                                         shape=[num_layers, -1, hidden_size])
    init_cell_reshape = layers.reshape(init_cell,
                                       shape=[num_layers, -1, hidden_size])

    x_emb = layers.embedding(
        input=x,
        size=[vocab_size, hidden_size],
        dtype='float32',
        is_sparse=False,
        param_attr=fluid.ParamAttr(
            name='embedding_para',
            initializer=fluid.initializer.UniformInitializer(low=-init_scale,
                                                             high=init_scale)))

    x_emb = layers.reshape(x_emb,
                           shape=[-1, num_steps, hidden_size],
                           inplace=True)
    if dropout != None and dropout > 0.0:
        x_emb = layers.dropout(x_emb,
                               dropout_prob=dropout,
                               dropout_implementation='upscale_in_train')

    if rnn_model == "padding":
        rnn_out, last_hidden, last_cell = padding_rnn(
            x_emb,
            len=num_steps,
            init_hidden=init_hidden_reshape,
            init_cell=init_cell_reshape)
    elif rnn_model == "static":
        rnn_out, last_hidden, last_cell = encoder_static(
            x_emb,
            len=num_steps,
            init_hidden=init_hidden_reshape,
            init_cell=init_cell_reshape)
    elif rnn_model == "cudnn":
        x_emb = layers.transpose(x_emb, perm=[1, 0, 2])
        rnn_out, last_hidden, last_cell = layers.lstm(
            x_emb,
            init_hidden_reshape,
            init_cell_reshape,
            num_steps,
            hidden_size,
            num_layers,
            is_bidirec=False,
            default_initializer=fluid.initializer.UniformInitializer(
                low=-init_scale, high=init_scale))
        rnn_out = layers.transpose(rnn_out, perm=[1, 0, 2])
    elif rnn_model == "basic_lstm":
        rnn_out, last_hidden, last_cell = basic_lstm( x_emb, init_hidden, init_cell, hidden_size, \
                num_layers=num_layers, batch_first=True, dropout_prob=dropout, \
                param_attr = ParamAttr( initializer=fluid.initializer.UniformInitializer(low=-init_scale, high=init_scale) ), \
                bias_attr = ParamAttr( initializer = fluid.initializer.Constant(0.0) ), \
                forget_bias = 0.0)
    elif rnn_model == "seq2seq_api":
        rnn_out, last_hidden, last_cell = seq2seq_api_rnn(
            x_emb,
            len=num_steps,
            init_hiddens=init_hidden_reshape,
            init_cells=init_cell_reshape)
    else:
        print("type not support")
        return

    rnn_out = layers.reshape(rnn_out,
                             shape=[-1, num_steps, hidden_size],
                             inplace=True)

    softmax_weight = layers.create_parameter(
        [hidden_size, vocab_size],
        dtype="float32",
        name="softmax_weight",
        default_initializer=fluid.initializer.UniformInitializer(
            low=-init_scale, high=init_scale))
    softmax_bias = layers.create_parameter(
        [vocab_size],
        dtype="float32",
        name='softmax_bias',
        default_initializer=fluid.initializer.UniformInitializer(
            low=-init_scale, high=init_scale))

    projection = layers.matmul(rnn_out, softmax_weight)
    projection = layers.elementwise_add(projection, softmax_bias)
    projection = layers.reshape(projection,
                                shape=[-1, vocab_size],
                                inplace=True)

    loss = layers.softmax_with_cross_entropy(logits=projection,
                                             label=y,
                                             soft_label=False)

    loss = layers.reshape(loss, shape=[-1, num_steps], inplace=True)
    loss = layers.reduce_mean(loss, dim=[0])
    loss = layers.reduce_sum(loss)

    loss.persistable = True
    last_cell.persistable = True
    last_hidden.persistable = True

    # This will feed last_hidden, last_cell to init_hidden, init_cell, which
    # can be used directly in next batch. This can avoid the fetching of
    # last_hidden and last_cell and feeding of init_hidden and init_cell in
    # each training step.
    layers.assign(input=last_cell, output=init_cell)
    layers.assign(input=last_hidden, output=init_hidden)

    feeding_list = ['x', 'y', 'init_hidden', 'init_cell']
    if use_dataloader:
        return loss, last_hidden, last_cell, feeding_list, dataloader
    else:
        return loss, last_hidden, last_cell, feeding_list
コード例 #18
0
def lm_model(hidden_size,
             vocab_size,
             batch_size,
             num_layers=2,
             num_steps=20,
             init_scale=0.1,
             dropout=None,
             rnn_model='static',
             use_py_reader=False):
    def padding_rnn(input_embedding, len=3, init_hidden=None, init_cell=None):
        weight_1_arr = []
        weight_2_arr = []
        bias_arr = []
        hidden_array = []
        cell_array = []
        mask_array = []
        for i in range(num_layers):
            weight_1 = layers.create_parameter(
                [hidden_size * 2, hidden_size * 4],
                dtype="float32",
                name="fc_weight1_" + str(i),
                default_initializer=fluid.initializer.UniformInitializer(
                    low=-init_scale, high=init_scale))
            weight_1_arr.append(weight_1)
            bias_1 = layers.create_parameter(
                [hidden_size * 4],
                dtype="float32",
                name="fc_bias1_" + str(i),
                default_initializer=fluid.initializer.Constant(0.0))
            bias_arr.append(bias_1)

            pre_hidden = layers.slice(
                init_hidden, axes=[0], starts=[i], ends=[i + 1])
            pre_cell = layers.slice(
                init_cell, axes=[0], starts=[i], ends=[i + 1])
            pre_hidden = layers.reshape(pre_hidden, shape=[-1, hidden_size])
            pre_cell = layers.reshape(pre_cell, shape=[-1, hidden_size])
            hidden_array.append(pre_hidden)
            cell_array.append(pre_cell)

        input_embedding = layers.transpose(input_embedding, perm=[1, 0, 2])
        rnn = PaddingRNN()

        with rnn.step():
            input = rnn.step_input(input_embedding)
            for k in range(num_layers):
                pre_hidden = rnn.memory(init=hidden_array[k])
                pre_cell = rnn.memory(init=cell_array[k])
                weight_1 = weight_1_arr[k]
                bias = bias_arr[k]

                nn = layers.concat([input, pre_hidden], 1)
                gate_input = layers.matmul(x=nn, y=weight_1)

                gate_input = layers.elementwise_add(gate_input, bias)
                i = layers.slice(
                    gate_input, axes=[1], starts=[0], ends=[hidden_size])
                j = layers.slice(
                    gate_input,
                    axes=[1],
                    starts=[hidden_size],
                    ends=[hidden_size * 2])
                f = layers.slice(
                    gate_input,
                    axes=[1],
                    starts=[hidden_size * 2],
                    ends=[hidden_size * 3])
                o = layers.slice(
                    gate_input,
                    axes=[1],
                    starts=[hidden_size * 3],
                    ends=[hidden_size * 4])

                c = pre_cell * layers.sigmoid(f) + layers.sigmoid(
                    i) * layers.tanh(j)
                m = layers.tanh(c) * layers.sigmoid(o)

                rnn.update_memory(pre_hidden, m)
                rnn.update_memory(pre_cell, c)

                rnn.step_output(m)
                rnn.step_output(c)

                input = m

                if dropout != None and dropout > 0.0:
                    input = layers.dropout(
                        input,
                        dropout_prob=dropout,
                        dropout_implementation='upscale_in_train')

            rnn.step_output(input)
        rnnout = rnn()

        last_hidden_array = []
        last_cell_array = []
        real_res = rnnout[-1]
        for i in range(num_layers):
            m = rnnout[i * 2]
            c = rnnout[i * 2 + 1]
            m.stop_gradient = True
            c.stop_gradient = True
            last_h = layers.slice(
                m, axes=[0], starts=[num_steps - 1], ends=[num_steps])
            last_hidden_array.append(last_h)
            last_c = layers.slice(
                c, axes=[0], starts=[num_steps - 1], ends=[num_steps])
            last_cell_array.append(last_c)
        real_res = layers.transpose(x=real_res, perm=[1, 0, 2])
        last_hidden = layers.concat(last_hidden_array, 0)
        last_cell = layers.concat(last_cell_array, 0)

        return real_res, last_hidden, last_cell

    def encoder_static(input_embedding, len=3, init_hidden=None,
                       init_cell=None):

        weight_1_arr = []
        weight_2_arr = []
        bias_arr = []
        hidden_array = []
        cell_array = []
        mask_array = []
        for i in range(num_layers):
            weight_1 = layers.create_parameter(
                [hidden_size * 2, hidden_size * 4],
                dtype="float32",
                name="fc_weight1_" + str(i),
                default_initializer=fluid.initializer.UniformInitializer(
                    low=-init_scale, high=init_scale))
            weight_1_arr.append(weight_1)
            bias_1 = layers.create_parameter(
                [hidden_size * 4],
                dtype="float32",
                name="fc_bias1_" + str(i),
                default_initializer=fluid.initializer.Constant(0.0))
            bias_arr.append(bias_1)

            pre_hidden = layers.slice(
                init_hidden, axes=[0], starts=[i], ends=[i + 1])
            pre_cell = layers.slice(
                init_cell, axes=[0], starts=[i], ends=[i + 1])
            pre_hidden = layers.reshape(
                pre_hidden, shape=[-1, hidden_size], inplace=True)
            pre_cell = layers.reshape(
                pre_cell, shape=[-1, hidden_size], inplace=True)
            hidden_array.append(pre_hidden)
            cell_array.append(pre_cell)

        res = []
        sliced_inputs = layers.split(
            input_embedding, num_or_sections=len, dim=1)

        for index in range(len):
            input = sliced_inputs[index]
            input = layers.reshape(input, shape=[-1, hidden_size], inplace=True)
            for k in range(num_layers):
                pre_hidden = hidden_array[k]
                pre_cell = cell_array[k]
                weight_1 = weight_1_arr[k]
                bias = bias_arr[k]

                nn = layers.concat([input, pre_hidden], 1)
                gate_input = layers.matmul(x=nn, y=weight_1)

                gate_input = layers.elementwise_add(gate_input, bias)
                i, j, f, o = layers.split(gate_input, num_or_sections=4, dim=-1)

                try:
                    from paddle.fluid.contrib.layers import fused_elemwise_activation
                    # fluid.contrib.layers.fused_elemwise_activation can do a fused
                    # operation, like:
                    # 1) x + sigmoid(y); x + tanh(y)
                    # 2) tanh(x + y)
                    # Now the unary operation supported in this fused op is limit, and
                    # we will extent this operation to support more unary operations and
                    # do this kind of fusion automitically in future version of paddle.fluid.
                    # layers.sigmoid(i) * layers.tanh(j)
                    tmp0 = fused_elemwise_activation(
                        x=layers.tanh(j),
                        y=i,
                        functor_list=['elementwise_mul', 'sigmoid'],
                        save_intermediate_out=False)
                    # pre_cell * layers.sigmoid(f)
                    tmp1 = fused_elemwise_activation(
                        x=pre_cell,
                        y=f,
                        functor_list=['elementwise_mul', 'sigmoid'],
                        save_intermediate_out=False)
                    c = tmp0 + tmp1
                    # layers.tanh(c) * layers.sigmoid(o)
                    m = fused_elemwise_activation(
                        x=layers.tanh(c),
                        y=o,
                        functor_list=['elementwise_mul', 'sigmoid'],
                        save_intermediate_out=False)
                except ImportError:
                    c = pre_cell * layers.sigmoid(f) + layers.sigmoid(
                        i) * layers.tanh(j)
                    m = layers.tanh(c) * layers.sigmoid(o)

                hidden_array[k] = m
                cell_array[k] = c
                input = m

                if dropout != None and dropout > 0.0:
                    input = layers.dropout(
                        input,
                        dropout_prob=dropout,
                        dropout_implementation='upscale_in_train')

            res.append(input)

        last_hidden = layers.concat(hidden_array, 1)
        last_hidden = layers.reshape(
            last_hidden, shape=[-1, num_layers, hidden_size], inplace=True)
        last_hidden = layers.transpose(x=last_hidden, perm=[1, 0, 2])

        last_cell = layers.concat(cell_array, 1)
        last_cell = layers.reshape(
            last_cell, shape=[-1, num_layers, hidden_size])
        last_cell = layers.transpose(x=last_cell, perm=[1, 0, 2])

        real_res = layers.concat(res, 0)
        real_res = layers.reshape(
            real_res, shape=[len, -1, hidden_size], inplace=True)
        real_res = layers.transpose(x=real_res, perm=[1, 0, 2])

        return real_res, last_hidden, last_cell

    batch_size_each = batch_size
    if use_py_reader:
        feed_shapes = [[batch_size_each, num_steps, 1],
                       [batch_size_each * num_steps, 1]]
        py_reader = fluid.layers.py_reader(
            capacity=16, shapes=feed_shapes, dtypes=['int64', 'int64'])
        x, y = fluid.layers.read_file(py_reader)
    else:
        x = layers.data(
            name="x",
            shape=[batch_size_each, num_steps, 1],
            dtype='int64',
            append_batch_size=False)
        y = layers.data(
            name="y",
            shape=[batch_size_each * num_steps, 1],
            dtype='int64',
            append_batch_size=False)

    init_hidden = layers.data(
        name="init_hidden",
        shape=[num_layers, batch_size_each, hidden_size],
        dtype='float32',
        append_batch_size=False)
    init_cell = layers.data(
        name="init_cell",
        shape=[num_layers, batch_size_each, hidden_size],
        dtype='float32',
        append_batch_size=False)

    init_cell.persistable = True
    init_hidden.persistable = True

    init_hidden_reshape = layers.reshape(
        init_hidden, shape=[num_layers, -1, hidden_size])
    init_cell_reshape = layers.reshape(
        init_cell, shape=[num_layers, -1, hidden_size])

    x_emb = layers.embedding(
        input=x,
        size=[vocab_size, hidden_size],
        dtype='float32',
        is_sparse=False,
        param_attr=fluid.ParamAttr(
            name='embedding_para',
            initializer=fluid.initializer.UniformInitializer(
                low=-init_scale, high=init_scale)))

    x_emb = layers.reshape(
        x_emb, shape=[-1, num_steps, hidden_size], inplace=True)
    if dropout != None and dropout > 0.0:
        x_emb = layers.dropout(
            x_emb,
            dropout_prob=dropout,
            dropout_implementation='upscale_in_train')

    if rnn_model == "padding":
        rnn_out, last_hidden, last_cell = padding_rnn(
            x_emb,
            len=num_steps,
            init_hidden=init_hidden_reshape,
            init_cell=init_cell_reshape)
    elif rnn_model == "static":
        rnn_out, last_hidden, last_cell = encoder_static(
            x_emb,
            len=num_steps,
            init_hidden=init_hidden_reshape,
            init_cell=init_cell_reshape)
    elif rnn_model == "cudnn":
        x_emb = layers.transpose(x_emb, perm=[1, 0, 2])
        rnn_out, last_hidden, last_cell = layers.lstm(
            x_emb,
            init_hidden_reshape,
            init_cell_reshape,
            num_steps,
            hidden_size,
            num_layers,
            is_bidirec=False,
            default_initializer=fluid.initializer.UniformInitializer(
                low=-init_scale, high=init_scale))
        rnn_out = layers.transpose(rnn_out, perm=[1, 0, 2])
    elif rnn_model == "basic_lstm":
        rnn_out, last_hidden, last_cell = basic_lstm( x_emb, init_hidden, init_cell, hidden_size, \
                num_layers=num_layers, batch_first=True, dropout_prob=dropout, \
                param_attr = ParamAttr( initializer=fluid.initializer.UniformInitializer(low=-init_scale, high=init_scale) ), \
                bias_attr = ParamAttr( initializer = fluid.initializer.Constant(0.0) ), \
                forget_bias = 0.0)
    else:
        print("type not support")
        return

    rnn_out = layers.reshape(
        rnn_out, shape=[-1, num_steps, hidden_size], inplace=True)

    softmax_weight = layers.create_parameter(
        [hidden_size, vocab_size],
        dtype="float32",
        name="softmax_weight",
        default_initializer=fluid.initializer.UniformInitializer(
            low=-init_scale, high=init_scale))
    softmax_bias = layers.create_parameter(
        [vocab_size],
        dtype="float32",
        name='softmax_bias',
        default_initializer=fluid.initializer.UniformInitializer(
            low=-init_scale, high=init_scale))

    projection = layers.matmul(rnn_out, softmax_weight)
    projection = layers.elementwise_add(projection, softmax_bias)
    projection = layers.reshape(
        projection, shape=[-1, vocab_size], inplace=True)

    loss = layers.softmax_with_cross_entropy(
        logits=projection, label=y, soft_label=False)

    loss = layers.reshape(loss, shape=[-1, num_steps], inplace=True)
    loss = layers.reduce_mean(loss, dim=[0])
    loss = layers.reduce_sum(loss)

    loss.persistable = True
    last_cell.persistable = True
    last_hidden.persistable = True

    # This will feed last_hidden, last_cell to init_hidden, init_cell, which
    # can be used directly in next batch. This can avoid the fetching of
    # last_hidden and last_cell and feeding of init_hidden and init_cell in
    # each training step.
    layers.assign(input=last_cell, output=init_cell)
    layers.assign(input=last_hidden, output=init_hidden)

    feeding_list = ['x', 'y', 'init_hidden', 'init_cell']
    if use_py_reader:
        return loss, last_hidden, last_cell, feeding_list, py_reader
    else:
        return loss, last_hidden, last_cell, feeding_list
コード例 #19
0
            batch_label.append([sample[1]])

            if len(batch_img) >= batch_size:
                yield np.array(batch_img).astype("float32"), np.array(batch_label).astype("int64")
                batch_img = []
                batch_label = []

    if batch_img:
        yield np.array(batch_img).astype("float32"), np.array(batch_label).astype("int64")


# define network
data = fluid.layers.data(name="img", shape=[-1, 28, 28], dtype='float32')
label = fluid.layers.data(name="label", shape=[-1,1], dtype='int64')
sequence_length = fluid.layers.data(name="sequence_length", shape=[-1], dtype='int32')
output_row, _, _ = basic_lstm(data, None, None, 128,sequence_length=sequence_length)
output_col, _, _ = basic_lstm(output_row, None, None, 128,sequence_length=sequence_length)
predict=fluid.layers.fc(input=output_row, size=num_classes,act="softmax")
cost = fluid.layers.cross_entropy(input=predict, label=label)
loss = fluid.layers.reduce_mean(cost)
acc = fluid.layers.accuracy(input = predict, label = label)


#set train and test program
test_program = fluid.default_main_program().clone(for_test=True)

#define optimizer
optimizer = fluid.optimizer.RMSPropOptimizer(learning_rate=0.001,rho=0.9)
optimizer.minimize(loss)