Exemple #1
0
    def _attention(self, direct, cur_token, prev, to_apply, to_apply_proj):
        with layer.mixed(size=cur_token.size,
                         bias_attr=Attr.Param(direct + '.bp',
                             initial_std=0.),
                         act=Act.Linear()) as proj:
            proj += layer.full_matrix_projection(
                    input=cur_token,
                    param_attr=Attr.Param(direct + '.wp'))
            proj += layer.full_matrix_projection(
                    input=prev,
                    param_attr=Attr.Param(direct + '.wr'))

        expanded = layer.expand(input=proj, expand_as=to_apply)
        att_context = layer.addto(input=[expanded, to_apply_proj],
                                  act=Act.Tanh(),
                                  bias_attr=False)

        att_weights = layer.fc(input=att_context,
                               param_attr=Attr.Param(direct + '.w'),
                               bias_attr=Attr.Param(direct + '.b',
                                   initial_std=0.),
                               act=Act.SequenceSoftmax(),
                               size=1)
        scaled = layer.scaling(input=to_apply, weight=att_weights)
        applied = layer.pooling(input=scaled,
                                pooling_type=paddle.pooling.Sum())
        return applied
Exemple #2
0
    def _attention(self, direct, cur_token, prev, to_apply, to_apply_proj):
        with layer.mixed(size=cur_token.size,
                         bias_attr=Attr.Param(direct + '.bp', initial_std=0.),
                         act=Act.Linear()) as proj:
            proj += layer.full_matrix_projection(input=cur_token,
                                                 param_attr=Attr.Param(direct +
                                                                       '.wp'))
            proj += layer.full_matrix_projection(input=prev,
                                                 param_attr=Attr.Param(direct +
                                                                       '.wr'))

        expanded = layer.expand(input=proj, expand_as=to_apply)
        att_context = layer.addto(input=[expanded, to_apply_proj],
                                  act=Act.Tanh(),
                                  bias_attr=False)

        att_weights = layer.fc(input=att_context,
                               param_attr=Attr.Param(direct + '.w'),
                               bias_attr=Attr.Param(direct + '.b',
                                                    initial_std=0.),
                               act=Act.SequenceSoftmax(),
                               size=1)
        scaled = layer.scaling(input=to_apply, weight=att_weights)
        applied = layer.pooling(input=scaled,
                                pooling_type=paddle.pooling.Sum())
        return applied
Exemple #3
0
    def test_projection(self):
        input = layer.data(name='data2', type=data_type.dense_vector(784))
        word = layer.data(
            name='word2', type=data_type.integer_value_sequence(10000))
        fc0 = layer.fc(input=input, size=100, act=activation.Sigmoid())
        fc1 = layer.fc(input=input, size=200, act=activation.Sigmoid())
        mixed0 = layer.mixed(
            size=256,
            input=[
                layer.full_matrix_projection(input=fc0),
                layer.full_matrix_projection(input=fc1)
            ])
        with layer.mixed(size=200) as mixed1:
            mixed1 += layer.full_matrix_projection(input=fc0)
            mixed1 += layer.identity_projection(input=fc1)

        table = layer.table_projection(input=word)
        emb0 = layer.mixed(size=512, input=table)
        with layer.mixed(size=512) as emb1:
            emb1 += table

        scale = layer.scaling_projection(input=fc0)
        scale0 = layer.mixed(size=100, input=scale)
        with layer.mixed(size=100) as scale1:
            scale1 += scale

        dotmul = layer.dotmul_projection(input=fc0)
        dotmul0 = layer.mixed(size=100, input=dotmul)
        with layer.mixed(size=100) as dotmul1:
            dotmul1 += dotmul

        context = layer.context_projection(input=fc0, context_len=5)
        context0 = layer.mixed(size=500, input=context)
        with layer.mixed(size=500) as context1:
            context1 += context

        conv = layer.conv_projection(
            input=input,
            filter_size=1,
            num_channels=1,
            num_filters=128,
            stride=1,
            padding=0)
        conv0 = layer.mixed(input=conv, bias_attr=True)
        with layer.mixed(bias_attr=True) as conv1:
            conv1 += conv

        print layer.parse_network(mixed0)
        print layer.parse_network(mixed1)
        print layer.parse_network(emb0)
        print layer.parse_network(emb1)
        print layer.parse_network(scale0)
        print layer.parse_network(scale1)
        print layer.parse_network(dotmul0)
        print layer.parse_network(dotmul1)
        print layer.parse_network(conv0)
        print layer.parse_network(conv1)
Exemple #4
0
    def test_projection(self):
        input = layer.data(name='data', type=data_type.dense_vector(784))
        word = layer.data(
            name='word', type=data_type.integer_value_sequence(10000))
        fc0 = layer.fc(input=input, size=100, act=activation.Sigmoid())
        fc1 = layer.fc(input=input, size=200, act=activation.Sigmoid())
        mixed0 = layer.mixed(
            size=256,
            input=[
                layer.full_matrix_projection(input=fc0),
                layer.full_matrix_projection(input=fc1)
            ])
        with layer.mixed(size=200) as mixed1:
            mixed1 += layer.full_matrix_projection(input=fc0)
            mixed1 += layer.identity_projection(input=fc1)

        table = layer.table_projection(input=word)
        emb0 = layer.mixed(size=512, input=table)
        with layer.mixed(size=512) as emb1:
            emb1 += table

        scale = layer.scaling_projection(input=fc0)
        scale0 = layer.mixed(size=100, input=scale)
        with layer.mixed(size=100) as scale1:
            scale1 += scale

        dotmul = layer.dotmul_projection(input=fc0)
        dotmul0 = layer.mixed(size=100, input=dotmul)
        with layer.mixed(size=100) as dotmul1:
            dotmul1 += dotmul

        context = layer.context_projection(input=fc0, context_len=5)
        context0 = layer.mixed(size=100, input=context)
        with layer.mixed(size=100) as context1:
            context1 += context

        conv = layer.conv_projection(
            input=input,
            filter_size=1,
            num_channels=1,
            num_filters=128,
            stride=1,
            padding=0)
        conv0 = layer.mixed(input=conv, bias_attr=True)
        with layer.mixed(bias_attr=True) as conv1:
            conv1 += conv

        print layer.parse_network(mixed0)
        print layer.parse_network(mixed1)
        print layer.parse_network(emb0)
        print layer.parse_network(emb1)
        print layer.parse_network(scale0)
        print layer.parse_network(scale1)
        print layer.parse_network(dotmul0)
        print layer.parse_network(dotmul1)
        print layer.parse_network(conv0)
        print layer.parse_network(conv1)
Exemple #5
0
    def _step(self, name, h_q_all, q_proj, h_p_cur):
        """
        Match-LSTM step. This function performs operations done in one
        time step.

        Args:
            h_p_cur: Current hidden of paragraph encodings: h_i.
                     This is the `REAL` input of the group, like
                     x_t in normal rnn.
            h_q_all: Question encodings.

        Returns:
            The $h^{r}_{i}$ in the paper.
        """
        direct = 'left' if 'left' in name else 'right'

        h_r_prev = paddle.layer.memory(name=name + '_out_',
                                       size=h_q_all.size,
                                       boot_layer=None)
        q_expr = self._attention(direct, h_p_cur, h_r_prev, h_q_all, q_proj)
        z_cur = self.fusion_layer(h_p_cur, q_expr)

        with layer.mixed(size=h_q_all.size * 4,
                         act=Act.Tanh(),
                         bias_attr=False) as match_input:
            match_input += layer.full_matrix_projection(
                           input=z_cur,
                           param_attr=Attr.Param('match_input_%s.w0' % direct))

        step_out = paddle.networks.lstmemory_unit(
                   name=name + '_out_',
                   out_memory=h_r_prev,
                   param_attr=Attr.Param('step_lstm_%s.w' % direct),
                   input_proj_bias_attr=Attr.Param(
                       'step_lstm_mixed_%s.bias' % direct,
                       initial_std=0.),
                   lstm_bias_attr=Attr.Param('step_lstm_%s.bias' % direct,
                       initial_std=0.),
                   input=match_input,
                   size=h_q_all.size)
        return step_out
Exemple #6
0
    def _step(self, name, h_q_all, q_proj, h_p_cur):
        """
        Match-LSTM step. This function performs operations done in one
        time step.

        Args:
            h_p_cur: Current hidden of paragraph encodings: h_i.
                     This is the `REAL` input of the group, like
                     x_t in normal rnn.
            h_q_all: Question encodings.

        Returns:
            The $h^{r}_{i}$ in the paper.
        """
        direct = 'left' if 'left' in name else 'right'

        h_r_prev = paddle.layer.memory(name=name + '_out_',
                                       size=h_q_all.size,
                                       boot_layer=None)
        q_expr = self._attention(direct, h_p_cur, h_r_prev, h_q_all, q_proj)
        z_cur = self.fusion_layer(h_p_cur, q_expr)

        with layer.mixed(size=h_q_all.size * 4,
                         act=Act.Tanh(),
                         bias_attr=False) as match_input:
            match_input += layer.full_matrix_projection(
                input=z_cur,
                param_attr=Attr.Param('match_input_%s.w0' % direct))

        step_out = paddle.networks.lstmemory_unit(
            name=name + '_out_',
            out_memory=h_r_prev,
            param_attr=Attr.Param('step_lstm_%s.w' % direct),
            input_proj_bias_attr=Attr.Param('step_lstm_mixed_%s.bias' % direct,
                                            initial_std=0.),
            lstm_bias_attr=Attr.Param('step_lstm_%s.bias' % direct,
                                      initial_std=0.),
            input=match_input,
            size=h_q_all.size)
        return step_out
Exemple #7
0
    def _step(self, name, h_q_all, q_proj, h_p_cur, qe_comm, ee_comm):
        """
        Match-LSTM step. This function performs operations done in one
        time step.

        Args:
            h_p_cur: Current hidden of paragraph encodings: h_i.
                     This is the `REAL` input of the group, like
                     x_t in normal rnn.
            h_q_all: Question encodings.

        Returns:
            The $h^{r}_{i}$ in the paper.
        """
        conf = mLSTM_crf_config.TrainingConfig()
        direct = 'left' if 'left' in name else 'right'

        # 获取上一个时间步的输出
        h_r_prev = paddle.layer.memory(name=name + '_out_',
                                       size=h_q_all.size,
                                       boot_layer=None)
        # h_p_cur :: Current hidden of paragraph encodings
        # h_q_all :: q wordEmbedding
        # q_proj  :: q_proj_(left or right)
        q_expr = self._attention(direct, h_p_cur, h_r_prev, h_q_all, q_proj)
        z_cur = self.fusion_layer(h_p_cur, q_expr)

        # feature embeddings
        comm_initial_std = 1 / math.sqrt(64.0)
        qe_comm_emb = paddle.layer.embedding(input=qe_comm,
                                             size=conf.com_vec_dim,
                                             param_attr=paddle.attr.ParamAttr(
                                                 name="_cw_embedding.w0",
                                                 initial_std=comm_initial_std,
                                                 l2_rate=conf.default_l2_rate))

        ee_comm_emb = paddle.layer.embedding(input=ee_comm,
                                             size=conf.com_vec_dim,
                                             param_attr=paddle.attr.ParamAttr(
                                                 name="_eecom_embedding.w0",
                                                 initial_std=comm_initial_std,
                                                 l2_rate=conf.default_l2_rate))

        # layer.mixed :: 综合输入映射到指定维度,为 lstm 的输入做准备!
        with layer.mixed(size=h_q_all.size * 4,
                         act=Act.Tanh(),
                         bias_attr=False) as match_input:
            match_input += layer.full_matrix_projection(
                input=z_cur,
                param_attr=Attr.Param('match_input_z_%s.w0' % direct))
            match_input += layer.full_matrix_projection(
                input=qe_comm_emb,
                param_attr=Attr.Param('match_input_qe_%s.w0' % direct))
            match_input += layer.full_matrix_projection(
                input=ee_comm_emb,
                param_attr=Attr.Param('match_input_ee_%s.w0' % direct))

        step_out = paddle.networks.lstmemory_unit(
            name=name + '_out_',
            out_memory=h_r_prev,
            param_attr=Attr.Param('step_lstm_%s.w' % direct),
            input_proj_bias_attr=Attr.Param('step_lstm_mixed_%s.bias' % direct,
                                            initial_std=0.),
            lstm_bias_attr=Attr.Param('step_lstm_%s.bias' % direct,
                                      initial_std=0.),
            input=match_input,
            size=h_q_all.size)
        return step_out