Beispiel #1
0
    def _attention(self, direct, cur_token, prev, to_apply, to_apply_proj):
        with layer.mixed(size=cur_token.size,
                         bias_attr=Attr.Param(direct + '.bp', initial_std=0.),
                         act=Act.Linear()) as proj:
            proj += layer.full_matrix_projection(input=cur_token,
                                                 param_attr=Attr.Param(direct +
                                                                       '.wp'))
            proj += layer.full_matrix_projection(input=prev,
                                                 param_attr=Attr.Param(direct +
                                                                       '.wr'))

        expanded = layer.expand(input=proj, expand_as=to_apply)
        att_context = layer.addto(input=[expanded, to_apply_proj],
                                  act=Act.Tanh(),
                                  bias_attr=False)

        att_weights = layer.fc(input=att_context,
                               param_attr=Attr.Param(direct + '.w'),
                               bias_attr=Attr.Param(direct + '.b',
                                                    initial_std=0.),
                               act=Act.SequenceSoftmax(),
                               size=1)
        scaled = layer.scaling(input=to_apply, weight=att_weights)
        applied = layer.pooling(input=scaled,
                                pooling_type=paddle.pooling.Sum())
        return applied
Beispiel #2
0
    def network(self):
        """
        Implements the whole network of Match-LSTM.

        Returns:
            A tuple of LayerOutput objects containing the start and end
            probability distributions respectively.
        """
        self.check_and_create_data()
        self.create_shared_params()
        q_enc = self.get_enc(self.q_ids, type='q')
        p_encs = []
        p_matches = []
        for p in self.p_ids:
            p_encs.append(self.get_enc(p, type='p'))

        q_proj_left = layer.fc(size=self.emb_dim * 2,
                               bias_attr=False,
                               param_attr=Attr.Param(self.name + '_left_' +
                                                     '.wq'),
                               input=q_enc)
        q_proj_right = layer.fc(size=self.emb_dim * 2,
                                bias_attr=False,
                                param_attr=Attr.Param(self.name + '_right_' +
                                                      '.wq'),
                                input=q_enc)
        for i, p in enumerate(p_encs):
            left_out = self.recurrent_group(
                self.name + '_left_' + str(i),
                [layer.StaticInput(q_enc),
                 layer.StaticInput(q_proj_left), p],
                reverse=False)
            right_out = self.recurrent_group(
                self.name + '_right_' + str(i),
                [layer.StaticInput(q_enc),
                 layer.StaticInput(q_proj_right), p],
                reverse=True)
            match_seq = layer.concat(input=[left_out, right_out])
            match_seq_dropped = self.drop_out(match_seq, drop_rate=0.5)
            bi_match_seq = paddle.networks.bidirectional_lstm(
                input=match_seq_dropped,
                size=match_seq.size,
                fwd_mat_param_attr=Attr.Param('pn_f_enc_mat.w'),
                fwd_bias_param_attr=Attr.Param('pn_f_enc.bias',
                                               initial_std=0.),
                fwd_inner_param_attr=Attr.Param('pn_f_enc_inn.w'),
                bwd_mat_param_attr=Attr.Param('pn_b_enc_mat.w'),
                bwd_bias_param_attr=Attr.Param('pn_b_enc.bias',
                                               initial_std=0.),
                bwd_inner_param_attr=Attr.Param('pn_b_enc_inn.w'),
                return_seq=True)
            p_matches.append(bi_match_seq)

        all_docs = reduce(lambda x, y: layer.seq_concat(a=x, b=y), p_matches)
        all_docs_dropped = self.drop_out(all_docs, drop_rate=0.5)
        start = self.decode('start', all_docs_dropped)
        end = self.decode('end', all_docs_dropped)
        return start, end
Beispiel #3
0
 def _step_basic(self, h_cur, u):
     expanded_h = layer.expand(input=h_cur, expand_as=u)
     hu = layer.concat(input=[expanded_h, u])
     with layer.mixed(bias_attr=False) as dot_hu:
         dot_hu += layer.dotmul_operator(a=expanded_h, b=u)
     cat_all = layer.concat(input=[hu, dot_hu])
     s = layer.fc(size=1,
                  bias_attr=False,
                  param_attr=Attr.Param(self.name + '.ws'),
                  input=cat_all)
     return s
Beispiel #4
0
    def _step(self, name, h_q_all, q_proj, h_p_cur):
        """
        Match-LSTM step. This function performs operations done in one
        time step.

        Args:
            h_p_cur: Current hidden of paragraph encodings: h_i.
                     This is the `REAL` input of the group, like
                     x_t in normal rnn.
            h_q_all: Question encodings.

        Returns:
            The $h^{r}_{i}$ in the paper.
        """
        direct = 'left' if 'left' in name else 'right'

        h_r_prev = paddle.layer.memory(name=name + '_out_',
                                       size=h_q_all.size,
                                       boot_layer=None)
        q_expr = self._attention(direct, h_p_cur, h_r_prev, h_q_all, q_proj)
        z_cur = self.fusion_layer(h_p_cur, q_expr)

        with layer.mixed(size=h_q_all.size * 4,
                         act=Act.Tanh(),
                         bias_attr=False) as match_input:
            match_input += layer.full_matrix_projection(
                input=z_cur,
                param_attr=Attr.Param('match_input_%s.w0' % direct))

        step_out = paddle.networks.lstmemory_unit(
            name=name + '_out_',
            out_memory=h_r_prev,
            param_attr=Attr.Param('step_lstm_%s.w' % direct),
            input_proj_bias_attr=Attr.Param('step_lstm_mixed_%s.bias' % direct,
                                            initial_std=0.),
            lstm_bias_attr=Attr.Param('step_lstm_%s.bias' % direct,
                                      initial_std=0.),
            input=match_input,
            size=h_q_all.size)
        return step_out
Beispiel #5
0
    def network(self):
        """
        Implements the detail of the model.
        """
        self.check_and_create_data()
        self.create_shared_params()
        q_enc = self.get_enc(self.q_ids, type='q')
        a_enc = self.get_enc(self.a_ids, type='q')

        q_proj_left = layer.fc(size=self.emb_dim * 2,
                               bias_attr=False,
                               param_attr=Attr.Param(self.name + '_left.wq'),
                               input=q_enc)
        q_proj_right = layer.fc(size=self.emb_dim * 2,
                                bias_attr=False,
                                param_attr=Attr.Param(self.name + '_right.wq'),
                                input=q_enc)
        left_match = self.recurrent_group(
            self.name + '_left',
            [layer.StaticInput(q_enc),
             layer.StaticInput(q_proj_left), a_enc],
            reverse=False)
        right_match = self.recurrent_group(
            self.name + '_right',
            [layer.StaticInput(q_enc),
             layer.StaticInput(q_proj_right), a_enc],
            reverse=True)
        match_seq = layer.concat(input=[left_match, right_match])
        with layer.mixed(size=match_seq.size,
                         act=Act.Identity(),
                         layer_attr=Attr.ExtraLayerAttribute(drop_rate=0.2),
                         bias_attr=False) as dropped:
            dropped += layer.identity_projection(match_seq)
        match_result = layer.pooling(input=dropped,
                                     pooling_type=paddle.pooling.Max())
        cls = layer.fc(input=match_result,
                       act=Act.Softmax(),
                       size=self.label_dim)
        return cls
Beispiel #6
0
    def network(self, question, evidence, qe_comm, ee_comm, conf):
        """
        Implements the whole network of Match-LSTM.

        Returns:
            A tuple of LayerOutput objects containing the start and end
            probability distributions respectively.
        """

        q_enc = self.get_enc(question, conf, type='q')
        p_enc = self.get_enc(evidence, conf, type='q')

        q_proj_left = layer.fc(size=conf.word_vec_dim * 2,
                               bias_attr=False,
                               param_attr=Attr.Param(self.name + '_left_' +
                                                     '.wq'),
                               input=q_enc)
        q_proj_right = layer.fc(size=conf.word_vec_dim * 2,
                                bias_attr=False,
                                param_attr=Attr.Param(self.name + '_right_' +
                                                      '.wq'),
                                input=q_enc)
        # StaticInput 定义了一个只读的Memory,由StaticInput指定的输入不会被recurrent_group拆解,
        # recurrent_group 循环展开的每个时间步总是能够引用所有输入,可以是一个非序列,或者一个单层序列。
        left_out = self.recurrent_group(self.name + '_left', [
            layer.StaticInput(q_enc),
            layer.StaticInput(q_proj_left), p_enc, qe_comm, ee_comm
        ],
                                        reverse=False)
        right_out = self.recurrent_group(self.name + '_right_', [
            layer.StaticInput(q_enc),
            layer.StaticInput(q_proj_right), p_enc, qe_comm, ee_comm
        ],
                                         reverse=True)
        match_seq = layer.concat(input=[left_out, right_out])
        return self.drop_out(match_seq, drop_rate=0.5)
Beispiel #7
0
 def _get_enc(self, input, type='q'):
     embs = self.get_embs(input)
     enc = networks.bidirectional_lstm(
         input=embs,
         size=self.emb_dim,
         fwd_mat_param_attr=Attr.Param(self.name + '_f_enc_mat.w' + type),
         fwd_bias_param_attr=Attr.Param(self.name + '_f_enc.bias' + type,
                                        initial_std=0.),
         fwd_inner_param_attr=Attr.Param(self.name + '_f_enc_inn.w' + type),
         bwd_mat_param_attr=Attr.Param(self.name + '_b_enc_mat.w' + type),
         bwd_bias_param_attr=Attr.Param(self.name + '_b_enc.bias' + type,
                                        initial_std=0.),
         bwd_inner_param_attr=Attr.Param(self.name + '_b_enc_inn.w' + type),
         return_seq=True)
     enc_dropped = self.drop_out(enc, drop_rate=0.25)
     return enc_dropped
Beispiel #8
0
 def get_enc(self, input, type='q'):
     """
     Encodes the input by feeding it into a bidirectional lstm and
     concatenates the forward and backward expression of each time step.
     """
     embs = self.get_embs(input)
     enc = paddle.networks.bidirectional_lstm(
         input=embs,
         size=self.emb_dim,
         fwd_mat_param_attr=Attr.Param('f_enc_mat.w' + type),
         fwd_bias_param_attr=Attr.Param('f_enc.bias' + type,
                                        initial_std=0.),
         fwd_inner_param_attr=Attr.Param('f_enc_inn.w' + type),
         bwd_mat_param_attr=Attr.Param('b_enc_mat.w' + type),
         bwd_bias_param_attr=Attr.Param('b_enc.bias' + type,
                                        initial_std=0.),
         bwd_inner_param_attr=Attr.Param('b_enc_inn.w' + type),
         return_seq=True)
     enc_dropped = self.drop_out(enc, drop_rate=0.5)
     return enc_dropped
Beispiel #9
0
    def network(self):
        """
        Implements the whole network.

        Returns:
            A tuple of LayerOutput objects containing the start and end
            probability distributions respectively.
        """
        self.check_and_create_data()
        self.create_shared_params()
        u = self._get_enc(self.q_ids, type='q')
        m1s = []
        m2s = []
        for p in self.p_ids:
            h = self._get_enc(p, type='q')
            g = self._attention_flow(h, u)
            m1 = networks.bidirectional_lstm(
                fwd_mat_param_attr=Attr.Param('_f_m1_mat.w'),
                fwd_bias_param_attr=Attr.Param('_f_m1.bias', initial_std=0.),
                fwd_inner_param_attr=Attr.Param('_f_m1_inn.w'),
                bwd_mat_param_attr=Attr.Param('_b_m1_mat.w'),
                bwd_bias_param_attr=Attr.Param('_b_m1.bias', initial_std=0.),
                bwd_inner_param_attr=Attr.Param('_b_m1_inn.w'),
                input=g,
                size=self.emb_dim,
                return_seq=True)
            m1_dropped = self.drop_out(m1, drop_rate=0.)
            cat_g_m1 = layer.concat(input=[g, m1_dropped])

            m2 = networks.bidirectional_lstm(
                fwd_mat_param_attr=Attr.Param('_f_m2_mat.w'),
                fwd_bias_param_attr=Attr.Param('_f_m2.bias', initial_std=0.),
                fwd_inner_param_attr=Attr.Param('_f_m2_inn.w'),
                bwd_mat_param_attr=Attr.Param('_b_m2_mat.w'),
                bwd_bias_param_attr=Attr.Param('_b_m2.bias', initial_std=0.),
                bwd_inner_param_attr=Attr.Param('_b_m2_inn.w'),
                input=m1,
                size=self.emb_dim,
                return_seq=True)
            m2_dropped = self.drop_out(m2, drop_rate=0.)
            cat_g_m2 = layer.concat(input=[g, m2_dropped])
            m1s.append(cat_g_m1)
            m2s.append(cat_g_m2)

        all_m1 = reduce(lambda x, y: layer.seq_concat(a=x, b=y), m1s)
        all_m2 = reduce(lambda x, y: layer.seq_concat(a=x, b=y), m2s)

        start = self.decode('start', all_m1)
        end = self.decode('end', all_m2)
        return start, end
Beispiel #10
0
import paddle.v2.data_type as data_type
import paddle.v2.layer as layer
import paddle.v2.pooling as pooling
import paddle.v2.networks as networks

pixel = layer.data(name='pixel', type=data_type.dense_vector(128))
label = layer.data(name='label', type=data_type.integer_value(10))
weight = layer.data(name='weight', type=data_type.dense_vector(1))
combine_weight = layer.data(name='weight_combine',
                            type=data_type.dense_vector(10))
score = layer.data(name='score', type=data_type.dense_vector(1))

hidden = layer.fc(input=pixel,
                  size=100,
                  act=activation.Sigmoid(),
                  param_attr=attr.Param(name='hidden'))
inference = layer.fc(input=hidden, size=10, act=activation.Softmax())
conv = layer.img_conv(input=pixel,
                      filter_size=1,
                      filter_size_y=1,
                      num_channels=8,
                      num_filters=16,
                      act=activation.Linear())


class ImageLayerTest(unittest.TestCase):
    def test_conv_layer(self):
        conv_shift = layer.conv_shift(a=pixel, b=score)
        print layer.parse_network(conv, conv_shift)

    def test_pooling_layer(self):
Beispiel #11
0
    def _step(self, name, h_q_all, q_proj, h_p_cur, qe_comm, ee_comm):
        """
        Match-LSTM step. This function performs operations done in one
        time step.

        Args:
            h_p_cur: Current hidden of paragraph encodings: h_i.
                     This is the `REAL` input of the group, like
                     x_t in normal rnn.
            h_q_all: Question encodings.

        Returns:
            The $h^{r}_{i}$ in the paper.
        """
        conf = mLSTM_crf_config.TrainingConfig()
        direct = 'left' if 'left' in name else 'right'

        # 获取上一个时间步的输出
        h_r_prev = paddle.layer.memory(name=name + '_out_',
                                       size=h_q_all.size,
                                       boot_layer=None)
        # h_p_cur :: Current hidden of paragraph encodings
        # h_q_all :: q wordEmbedding
        # q_proj  :: q_proj_(left or right)
        q_expr = self._attention(direct, h_p_cur, h_r_prev, h_q_all, q_proj)
        z_cur = self.fusion_layer(h_p_cur, q_expr)

        # feature embeddings
        comm_initial_std = 1 / math.sqrt(64.0)
        qe_comm_emb = paddle.layer.embedding(input=qe_comm,
                                             size=conf.com_vec_dim,
                                             param_attr=paddle.attr.ParamAttr(
                                                 name="_cw_embedding.w0",
                                                 initial_std=comm_initial_std,
                                                 l2_rate=conf.default_l2_rate))

        ee_comm_emb = paddle.layer.embedding(input=ee_comm,
                                             size=conf.com_vec_dim,
                                             param_attr=paddle.attr.ParamAttr(
                                                 name="_eecom_embedding.w0",
                                                 initial_std=comm_initial_std,
                                                 l2_rate=conf.default_l2_rate))

        # layer.mixed :: 综合输入映射到指定维度,为 lstm 的输入做准备!
        with layer.mixed(size=h_q_all.size * 4,
                         act=Act.Tanh(),
                         bias_attr=False) as match_input:
            match_input += layer.full_matrix_projection(
                input=z_cur,
                param_attr=Attr.Param('match_input_z_%s.w0' % direct))
            match_input += layer.full_matrix_projection(
                input=qe_comm_emb,
                param_attr=Attr.Param('match_input_qe_%s.w0' % direct))
            match_input += layer.full_matrix_projection(
                input=ee_comm_emb,
                param_attr=Attr.Param('match_input_ee_%s.w0' % direct))

        step_out = paddle.networks.lstmemory_unit(
            name=name + '_out_',
            out_memory=h_r_prev,
            param_attr=Attr.Param('step_lstm_%s.w' % direct),
            input_proj_bias_attr=Attr.Param('step_lstm_mixed_%s.bias' % direct,
                                            initial_std=0.),
            lstm_bias_attr=Attr.Param('step_lstm_%s.bias' % direct,
                                      initial_std=0.),
            input=match_input,
            size=h_q_all.size)
        return step_out