Beispiel #1
0
    def get_loss(self, start_prob, end_prob, start_label, end_label):
        """
        Compute the loss: $l_{\theta} = -logP(start)\cdotP(end|start)$

        Returns:
            A LayerOutput object containing loss.
        """
        probs = layer.seq_concat(a=start_prob, b=end_prob)
        labels = layer.seq_concat(a=start_label, b=end_label)

        log_probs = layer.mixed(
                    size=probs.size,
                    act=Act.Log(),
                    bias_attr=False,
                    input=paddle.layer.identity_projection(probs))

        neg_log_probs = layer.slope_intercept(
                        input=log_probs,
                        slope=-1,
                        intercept=0)

        loss = paddle.layer.mixed(
               size=1,
               input=paddle.layer.dotmul_operator(a=neg_log_probs, b=labels))

        sum_val = paddle.layer.pooling(input=loss,
                                       pooling_type=paddle.pooling.Sum())
        cost = paddle.layer.sum_cost(input=sum_val)
        return cost
Beispiel #2
0
    def network(self):
        """
        Implements the whole network.

        Returns:
            A tuple of LayerOutput objects containing the start and end
            probability distributions respectively.
        """
        self.check_and_create_data()
        self.create_shared_params()
        u = self._get_enc(self.q_ids, type='q')
        m1s = []
        m2s = []
        for p in self.p_ids:
            h = self._get_enc(p, type='q')
            g = self._attention_flow(h, u)
            m1 = networks.bidirectional_lstm(
                fwd_mat_param_attr=Attr.Param('_f_m1_mat.w'),
                fwd_bias_param_attr=Attr.Param('_f_m1.bias', initial_std=0.),
                fwd_inner_param_attr=Attr.Param('_f_m1_inn.w'),
                bwd_mat_param_attr=Attr.Param('_b_m1_mat.w'),
                bwd_bias_param_attr=Attr.Param('_b_m1.bias', initial_std=0.),
                bwd_inner_param_attr=Attr.Param('_b_m1_inn.w'),
                input=g,
                size=self.emb_dim,
                return_seq=True)
            m1_dropped = self.drop_out(m1, drop_rate=0.)
            cat_g_m1 = layer.concat(input=[g, m1_dropped])

            m2 = networks.bidirectional_lstm(
                fwd_mat_param_attr=Attr.Param('_f_m2_mat.w'),
                fwd_bias_param_attr=Attr.Param('_f_m2.bias', initial_std=0.),
                fwd_inner_param_attr=Attr.Param('_f_m2_inn.w'),
                bwd_mat_param_attr=Attr.Param('_b_m2_mat.w'),
                bwd_bias_param_attr=Attr.Param('_b_m2.bias', initial_std=0.),
                bwd_inner_param_attr=Attr.Param('_b_m2_inn.w'),
                input=m1,
                size=self.emb_dim,
                return_seq=True)
            m2_dropped = self.drop_out(m2, drop_rate=0.)
            cat_g_m2 = layer.concat(input=[g, m2_dropped])
            m1s.append(cat_g_m1)
            m2s.append(cat_g_m2)

        all_m1 = reduce(lambda x, y: layer.seq_concat(a=x, b=y), m1s)
        all_m2 = reduce(lambda x, y: layer.seq_concat(a=x, b=y), m2s)

        start = self.decode('start', all_m1)
        end = self.decode('end', all_m2)
        return start, end
Beispiel #3
0
 def check_and_create_data(self):
     """
     Checks if the input data is legal and creates the data layers
     according to the input fields.
     """
     if self.is_infer:
         expected = ['q_ids', 'p_ids', 'para_length',
                     '[start_label, end_label, ...]']
         if len(self.inputs) < 2 * self.doc_num + 1:
             raise ValueError(r'''Input schema: expected vs given:
                      {} vs {}'''.format(expected, self.inputs))
     else:
         expected = ['q_ids', 'p_ids', 'para_length',
                     'start_label', 'end_label', '...']
         if len(self.inputs) < 4 * self.doc_num + 1:
             raise ValueError(r'''Input schema: expected vs given:
                      {} vs {}'''.format(expected, self.inputs))
         self.start_labels = []
         for i in range(1 + 2 * self.doc_num, 1 + 3 * self.doc_num):
             self.start_labels.append(
                     layer.data(name=self.inputs[i],
                         type=data_type.dense_vector_sequence(1)))
         self.start_label = reduce(
                 lambda x, y: layer.seq_concat(a=x, b=y),
                 self.start_labels)
         self.end_labels = []
         for i in range(1 + 3 * self.doc_num, 1 + 4 * self.doc_num):
             self.end_labels.append(
                     layer.data(name=self.inputs[i],
                         type=data_type.dense_vector_sequence(1)))
         self.end_label = reduce(
                 lambda x, y: layer.seq_concat(a=x, b=y),
                 self.end_labels)
     self.q_ids = layer.data(
             name=self.inputs[0],
             type=data_type.integer_value_sequence(self.vocab_size))
     self.p_ids = []
     for i in range(1, 1 + self.doc_num):
         self.p_ids.append(
                 layer.data(name=self.inputs[i],
                     type=data_type.integer_value_sequence(self.vocab_size)))
     self.para_lens = []
     for i in range(1 + self.doc_num, 1 + 2 * self.doc_num):
         self.para_lens.append(
                 layer.data(name=self.inputs[i],
                     type=data_type.dense_vector_sequence(1)))
     self.para_len = reduce(lambda x, y: layer.seq_concat(a=x, b=y),
             self.para_lens)
Beispiel #4
0
    def network(self):
        """
        Implements the whole network of Match-LSTM.

        Returns:
            A tuple of LayerOutput objects containing the start and end
            probability distributions respectively.
        """
        self.check_and_create_data()
        self.create_shared_params()
        q_enc = self.get_enc(self.q_ids, type='q')
        p_encs = []
        p_matches = []
        for p in self.p_ids:
            p_encs.append(self.get_enc(p, type='p'))

        q_proj_left = layer.fc(size=self.emb_dim * 2,
                               bias_attr=False,
                               param_attr=Attr.Param(
                                   self.name + '_left_' + '.wq'),
                               input=q_enc)
        q_proj_right = layer.fc(size=self.emb_dim * 2,
                                bias_attr=False,
                                param_attr=Attr.Param(
                                    self.name + '_right_' + '.wq'),
                                input=q_enc)
        for i, p in enumerate(p_encs):
            left_out = self.recurrent_group(
                       self.name + '_left_' + str(i),
                       [layer.StaticInput(q_enc),
                           layer.StaticInput(q_proj_left), p],
                       reverse=False)
            right_out = self.recurrent_group(
                        self.name + '_right_' + str(i),
                        [layer.StaticInput(q_enc),
                            layer.StaticInput(q_proj_right), p],
                        reverse=True)
            match_seq = layer.concat(input=[left_out, right_out])
            match_seq_dropped = self.drop_out(match_seq, drop_rate=0.5)
            bi_match_seq = paddle.networks.bidirectional_lstm(
                    input=match_seq_dropped,
                    size=match_seq.size,
                    fwd_mat_param_attr=Attr.Param('pn_f_enc_mat.w'),
                    fwd_bias_param_attr=Attr.Param('pn_f_enc.bias',
                        initial_std=0.),
                    fwd_inner_param_attr=Attr.Param('pn_f_enc_inn.w'),
                    bwd_mat_param_attr=Attr.Param('pn_b_enc_mat.w'),
                    bwd_bias_param_attr=Attr.Param('pn_b_enc.bias',
                        initial_std=0.),
                    bwd_inner_param_attr=Attr.Param('pn_b_enc_inn.w'),
                    return_seq=True)
            p_matches.append(bi_match_seq)

        all_docs = reduce(lambda x, y: layer.seq_concat(a=x, b=y),
                    p_matches)
        all_docs_dropped = self.drop_out(all_docs, drop_rate=0.5)
        start = self.decode('start', all_docs_dropped)
        end = self.decode('end', all_docs_dropped)
        return start, end
Beispiel #5
0
    def network(self):
        """
        Implements the whole network of Match-LSTM.

        Returns:
            A tuple of LayerOutput objects containing the start and end
            probability distributions respectively.
        """
        self.check_and_create_data()
        self.create_shared_params()
        q_enc = self.get_enc(self.q_ids, type='q')
        p_encs = []
        p_matches = []
        for p in self.p_ids:
            p_encs.append(self.get_enc(p, type='p'))

        q_proj_left = layer.fc(size=self.emb_dim * 2,
                               bias_attr=False,
                               param_attr=Attr.Param(self.name + '_left_' +
                                                     '.wq'),
                               input=q_enc)
        q_proj_right = layer.fc(size=self.emb_dim * 2,
                                bias_attr=False,
                                param_attr=Attr.Param(self.name + '_right_' +
                                                      '.wq'),
                                input=q_enc)
        for i, p in enumerate(p_encs):
            left_out = self.recurrent_group(
                self.name + '_left_' + str(i),
                [layer.StaticInput(q_enc),
                 layer.StaticInput(q_proj_left), p],
                reverse=False)
            right_out = self.recurrent_group(
                self.name + '_right_' + str(i),
                [layer.StaticInput(q_enc),
                 layer.StaticInput(q_proj_right), p],
                reverse=True)
            match_seq = layer.concat(input=[left_out, right_out])
            match_seq_dropped = self.drop_out(match_seq, drop_rate=0.5)
            bi_match_seq = paddle.networks.bidirectional_lstm(
                input=match_seq_dropped,
                size=match_seq.size,
                fwd_mat_param_attr=Attr.Param('pn_f_enc_mat.w'),
                fwd_bias_param_attr=Attr.Param('pn_f_enc.bias',
                                               initial_std=0.),
                fwd_inner_param_attr=Attr.Param('pn_f_enc_inn.w'),
                bwd_mat_param_attr=Attr.Param('pn_b_enc_mat.w'),
                bwd_bias_param_attr=Attr.Param('pn_b_enc.bias',
                                               initial_std=0.),
                bwd_inner_param_attr=Attr.Param('pn_b_enc_inn.w'),
                return_seq=True)
            p_matches.append(bi_match_seq)

        all_docs = reduce(lambda x, y: layer.seq_concat(a=x, b=y), p_matches)
        all_docs_dropped = self.drop_out(all_docs, drop_rate=0.5)
        start = self.decode('start', all_docs_dropped)
        end = self.decode('end', all_docs_dropped)
        return start, end
Beispiel #6
0
 def test_aggregate_layer(self):
     pool = layer.pooling(input=pixel,
                          pooling_type=pooling.Avg(),
                          agg_level=layer.AggregateLevel.EACH_SEQUENCE)
     last_seq = layer.last_seq(input=pixel)
     first_seq = layer.first_seq(input=pixel)
     concat = layer.concat(input=[last_seq, first_seq])
     seq_concat = layer.seq_concat(a=last_seq, b=first_seq)
     print layer.parse_network(pool, last_seq, first_seq, concat,
                               seq_concat)
Beispiel #7
0
 def test_aggregate_layer(self):
     pool = layer.pooling(
         input=pixel,
         pooling_type=pooling.Avg(),
         agg_level=layer.AggregateLevel.TO_SEQUENCE)
     last_seq = layer.last_seq(input=pixel)
     first_seq = layer.first_seq(input=pixel)
     concat = layer.concat(input=[last_seq, first_seq])
     seq_concat = layer.seq_concat(a=last_seq, b=first_seq)
     print layer.parse_network(
         [pool, last_seq, first_seq, concat, seq_concat])
Beispiel #8
0
    def infer(self):
        """
        The inferring interface.

        Returns:
            start_end: A sequence of concatenated start and end probabilities.
            para_len: A sequence of the lengths of every paragraph, which is
                      used for parse the inferring output.
        """
        start, end = self.network()
        start_end = layer.seq_concat(name='start_end', a=start, b=end)
        return start_end, self.para_len