コード例 #1
0
def _apply_rule(condition, inputs, gmr_mask, grammar, name=None):
    """apply_rule.

    Args:
        condition (TYPE): NULL
        inputs (Variable): shape = [batch_size, max_len, hidden_size]. infer 阶段 max_len 恒为1
        gmr_mask (TYPE): NULL
        grammar (TYPE): NULL

    Returns: TODO

    Raises: NULL
    """
    fc_name = None
    if name is not None:
        fc_name = name + '_apply_rule_fc'

    condition = layers.cast(condition, dtype='float32')
    gmr_output = layers.fc(inputs,
                           size=grammar.grammar_size,
                           **nn_utils.param_attr(fc_name,
                                                 INIT_SCALE,
                                                 need_bias=True))
    gmr_output_masked = layers.elementwise_add(gmr_output, gmr_mask)

    zeros = layers.fill_constant_batch_size_like(
        gmr_output_masked,
        shape=[-1, grammar.MAX_TABLE + grammar.MAX_COLUMN + grammar.MAX_VALUE],
        dtype='float32',
        value=-INF)
    final_output = tensor.concat([gmr_output_masked, zeros], axis=-1)
    true_final_output = layers.elementwise_mul(final_output, condition, axis=0)
    return true_final_output
コード例 #2
0
    def call(self, step_input, cell_state, attn_k, attn_v, padding_mask):
        """one step call

        Args:
            step_input (Variable): [batch_size, hidden_size]
            cell_state (tuple): (Variable, Variable)

        Returns: tuple
            same as input: (Variable, (Variable, Variable))

        Raises: NULL
        """
        step_feed, step_state = cell_state
        step_input = layers.concat([step_input, step_feed], 1)
        step_out, new_state = self.rnn_cell(step_input, step_state)

        decode_attn = models.Attention('dot_prod', name=self._name + '_attn')
        attn_out = decode_attn.forward(step_out,
                                       attn_k,
                                       attn_v,
                                       padding_mask=padding_mask)
        output = layers.fc(layers.concat([step_out, attn_out], axis=-1),
                           size=self._hidden_size,
                           num_flatten_dims=1,
                           act='tanh',
                           name=self._name + '_out_fc',
                           **nn_utils.param_attr(self._name + '_out_fc',
                                                 self._init_scale,
                                                 need_bias=False))
        if self._dropout > 0.:
            output = layers.dropout(x=output,
                                    dropout_prob=self._dropout,
                                    dropout_implementation="upscale_in_train")

        return output, [output, new_state]
コード例 #3
0
 def _table_to_lf_input(ori_encoding):
     """trans ori_encoding to size of lf_embedding
     """
     output = layers.fc(input=ori_encoding,
                        size=self.lf_emb_size,
                        num_flatten_dims=2,
                        **nn_utils.param_attr('fc_table2lf_input',
                                              self.init_scale,
                                              need_bias=False))
     return output
コード例 #4
0
    def _feature_embedder(self, one_hot_fea, name):
        """feature embedder

        Args:
            one_hot_fea (Variable): shape=[batch_size, feature_dim], dtype=float32
            name (str): layers name

        Returns: TODO

        Raises: NULL
        """
        output = layers.fc(input=one_hot_fea,
                           size=self.hidden_size,
                           num_flatten_dims=2,
                           **nn_utils.param_attr(name,
                                                 self.init_scale,
                                                 need_bias=True))
        return output
コード例 #5
0
    def _decoder(self, enc_output, enc_state, mode="train", beam_size=1):
        """decoder

        Args:
            enc_output (TYPE): NULL
            enc_state (TYPE): NULL
            mode (string): running mode: train|infer. default is "train"
            beam_size (int): default is 1

        Returns: TODO

        Raises: NULL

        """
        output_layer = functools.partial(gmr_models.grammar_output,
                                         name='decoder_output')
        decode_cell = models.RNNDecodeCell(self.hidden_size,
                                           dropout=self.dropout,
                                           init_scale=self.init_scale)
        dec_vocab = gmr_models.DecoderDynamicVocab(
            self.tname_encoding, self.tname_item_lens, self.cname_encoding,
            self.cname_item_lens, self.value_encoding, self.value_item_lens,
            self.column2table_mask)
        dec_attn_key = layers.fc(self.question_encoding,
                                 size=self.hidden_size,
                                 num_flatten_dims=2,
                                 **nn_utils.param_attr('dec_attn_key',
                                                       self.init_scale,
                                                       need_bias=True))

        init_state0 = layers.fc(enc_state[0],
                                size=self.hidden_size,
                                num_flatten_dims=1,
                                act='tanh',
                                **nn_utils.param_attr('dec_init_state0_fc',
                                                      self.init_scale,
                                                      need_bias=True))
        init_state1 = layers.fc(enc_state[1],
                                size=self.hidden_size,
                                num_flatten_dims=1,
                                act='tanh',
                                **nn_utils.param_attr('dec_init_state1_fc',
                                                      self.init_scale,
                                                      need_bias=True))

        #dec_init_zero = layers.zeros_like(init_state1)
        init_state = [
            decode_cell.get_initial_states(batch_ref=self.question_encoding,
                                           shape=[self.hidden_size]),
            [init_state0, init_state1],
        ]
        dec_cell_params = {
            "attn_k": dec_attn_key,
            "attn_v": self.question_encoding,
            "padding_mask": self.question_mask - 1.0
        }
        if mode == "train":
            ## 解码端词表 emb ##
            self.train_label_emb = self._lf_embedder(self.train_label,
                                                     self.label_lens)
            dec_output, dec_state = fluid.layers.rnn(
                cell=decode_cell,
                inputs=self.train_label_emb,
                initial_states=init_state,
                sequence_length=None,
                **dec_cell_params)

            outputs, _ = output_layer(dec_output, self.infer_actions,
                                      self.infer_gmr_mask,
                                      self.valid_table_mask, dec_vocab,
                                      self.grammar)
            return layers.elementwise_mul(outputs, self.label_mask, axis=0)
        elif mode == "infer":
            gmr_infer_decoder = gmr_models.GrammarInferDecoder(
                decode_cell,
                beam_size=self.beam_size,
                grammar=self.grammar,
                fn_embedding=self._lf_embedder,
                fn_output=output_layer)

            outputs, _ = gmr_models.decode_with_grammar(
                gmr_infer_decoder,
                inits=init_state,
                decode_vocab=dec_vocab,
                max_step_num=self.max_infer_step,
                **dec_cell_params)
            return outputs
        else:
            raise ValueError("unsupported running mode: %s" % (mode))
コード例 #6
0
    def _table_encoder(self,
                       inputs,
                       input_lens,
                       name_lens,
                       name_pos,
                       name_tok_len,
                       inputs_fea,
                       name,
                       question_encoding=None,
                       q_padding_mask=None):
        """table encoder.

        Args:
            inputs (TYPE): NULL
            input_lens (TYPE): NULL
            name_lens (TYPE): NULL
            name_pos (TYPE): NULL
            name_tok_len (TYPE): NULL
            inputs_fea (TYPE): NULL
            name (str/list): NULL
            question_encoding(Variable): NULL
            q_padding_mask(Variable): NULL

        Returns: TODO

        Raises: NULL

        """
        if type(name) is tuple or type(name) is list:
            assert len(name) == 3, "name tuple's len must equal to 3"
            enc_name, attn_name, fc_name = name
        else:  # type(name) is str
            enc_name = name + '_rnn'
            attn_name = name + '_attn'
            fc_name = name + '_out_fc'

        if self.table_enc_type == 'birnn':
            encoder = models.Sequence2DEncoder(self.table_enc_type,
                                               dropout=self.dropout,
                                               init_scale=self.init_scale,
                                               name=enc_name,
                                               num_layers=self.encoder_layers,
                                               hidden_size=self.hidden_size //
                                               2,
                                               bidirectional=True)
        elif self.table_enc_type == 'simple_sum':
            encoder = models.Sequence2DEncoder(self.table_enc_type,
                                               dropout=self.dropout,
                                               init_scale=self.init_scale,
                                               name=name)
        else:
            raise ValueError("unsupported table encoder type: %s" %
                             (self.table_enc_type))

        enc_output, _ = encoder.forward(inputs, input_lens, name_lens,
                                        name_pos, name_tok_len)
        if self.table_attention is not None and question_encoding is not None:
            attn = models.Attention(score_type=self.table_attention,
                                    name=attn_name)
            ctx = attn.forward(enc_output,
                               question_encoding,
                               padding_mask=q_padding_mask)
            #enc_output_attn = layers.elementwise_add(enc_output, ctx)
            enc_output = layers.concat([enc_output, ctx], axis=-1)
        if inputs_fea is not None:
            #enc_output = layers.elementwise_add(enc_output, inputs_fea)
            enc_output = layers.concat([enc_output, inputs_fea], axis=-1)

        final_output = layers.fc(enc_output,
                                 size=self.hidden_size,
                                 num_flatten_dims=2,
                                 **nn_utils.param_attr(fc_name,
                                                       self.init_scale,
                                                       need_bias=True))

        return final_output, None
コード例 #7
0
    def _ernie_encoder(self, slots_dict):
        """use ernie to encode question, tables/columns/values

        Args:
            slots_dict (TYPE): NULL

        Returns: TODO

        Raises: NULL
        """
        batch_instance = slots_dict["question_tokens"][C.RECORD_ID]
        input_qtc_src = batch_instance[DName.QTC_IDS]
        input_qtc_pos = batch_instance[DName.QTC_POS_IDS]
        input_qtc_sent = batch_instance[DName.QTC_SENTENCE_IDS]
        input_qtc_mask = batch_instance[DName.QTC_MASK_IDS]
        input_qtc_task = batch_instance[DName.QTC_TASK_IDS]
        input_qv_src = batch_instance[DName.QV_IDS]
        input_qv_pos = batch_instance[DName.QV_POS_IDS]
        input_qv_sent = batch_instance[DName.QV_SENTENCE_IDS]
        input_qv_mask = batch_instance[DName.QV_MASK_IDS]
        input_qv_task = batch_instance[DName.QV_TASK_IDS]
        input_q_pos = batch_instance[DName.Q_POS]
        input_t_pos = batch_instance[DName.T_POS]
        input_c_pos = batch_instance[DName.C_POS]
        input_v_pos = batch_instance[DName.V_POS]
        q_span_lens = batch_instance[DName.Q_LEN]
        self.tname_item_lens = batch_instance[DName.T_LEN]
        self.cname_item_lens = batch_instance[DName.C_LEN]
        self.value_item_lens = batch_instance[DName.V_LEN]
        q_span_tok_lens = batch_instance[DName.Q_SPAN_LEN]
        tname_token_lens = batch_instance[DName.T_TOKS_LEN]
        cname_token_lens = batch_instance[DName.C_TOKS_LEN]
        value_token_lens = batch_instance[DName.V_TOKS_LEN]
        self.all_inputs_name += [
            input_qtc_src.name, input_qtc_pos.name, input_qtc_sent.name,
            input_qtc_mask.name, input_qtc_task.name, input_qv_src.name,
            input_qv_pos.name, input_qv_sent.name, input_qv_mask.name,
            input_qv_task.name, input_q_pos.name, input_t_pos.name,
            input_c_pos.name, input_v_pos.name, q_span_lens.name,
            q_span_tok_lens.name, self.tname_item_lens.name,
            self.cname_item_lens.name, self.value_item_lens.name,
            tname_token_lens.name, cname_token_lens.name, value_token_lens.name
        ]

        config_path = self.encoder_params.get("config_path")
        use_fp16 = self.encoder_params.get("use_fp16", False)
        ernie_config = ErnieConfig(config_path)
        ernie_qtc = ErnieModel(src_ids=input_qtc_src,
                               position_ids=input_qtc_pos,
                               sentence_ids=input_qtc_sent,
                               task_ids=input_qtc_task,
                               input_mask=input_qtc_mask,
                               config=ernie_config,
                               use_fp16=use_fp16)
        qtc_enc_output = ernie_qtc.get_sequence_output()
        qtc_enc_output = layers.fc(qtc_enc_output,
                                   size=self.hidden_size,
                                   num_flatten_dims=2,
                                   **nn_utils.param_attr('ernie_output',
                                                         self.init_scale,
                                                         need_bias=True))
        ernie_qv = ErnieModel(src_ids=input_qv_src,
                              position_ids=input_qv_pos,
                              sentence_ids=input_qv_sent,
                              task_ids=input_qv_task,
                              input_mask=input_qv_mask,
                              config=ernie_config,
                              use_fp16=use_fp16)
        qv_enc_output = ernie_qv.get_sequence_output()
        qv_enc_output = layers.fc(qv_enc_output,
                                  size=self.hidden_size,
                                  num_flatten_dims=2,
                                  **nn_utils.param_attr('ernie_output',
                                                        self.init_scale,
                                                        need_bias=True))

        output_state = layers.dropout(
            x=ernie_qtc.get_pooled_output() + ernie_qv.get_pooled_output(),
            dropout_prob=self.dropout,
            dropout_implementation="upscale_in_train")
        output_state = layers.fc(output_state,
                                 size=self.hidden_size,
                                 num_flatten_dims=1,
                                 **nn_utils.param_attr('all_state',
                                                       self.init_scale,
                                                       need_bias=True))
        #question_enc = nn_utils.batch_gather(qtc_enc_output, input_q_pos)
        question_enc, _ = self._table_encoder(qtc_enc_output,
                                              None,
                                              q_span_lens,
                                              input_q_pos,
                                              q_span_tok_lens,
                                              self.question_fea_emb,
                                              name='question_enc')
        max_q_span_len = input_q_pos.shape[1]
        self.question_mask = layers.sequence_mask(q_span_lens,
                                                  maxlen=max_q_span_len,
                                                  dtype='float32')

        q_padding_mask = self.question_mask - 1.0
        table_enc, _ = self._table_encoder(qtc_enc_output,
                                           None,
                                           self.tname_item_lens,
                                           input_t_pos,
                                           tname_token_lens,
                                           self.table_fea_emb,
                                           name=('table_enc', 'table_enc_attn',
                                                 'tab_enc_out'),
                                           question_encoding=question_enc,
                                           q_padding_mask=q_padding_mask)
        column_enc, _ = self._table_encoder(qtc_enc_output,
                                            None,
                                            self.cname_item_lens,
                                            input_c_pos,
                                            cname_token_lens,
                                            self.column_fea_emb,
                                            name=('table_enc',
                                                  'table_enc_attn',
                                                  'col_enc_out'),
                                            question_encoding=question_enc,
                                            q_padding_mask=q_padding_mask)
        value_enc, _ = self._table_encoder(qv_enc_output,
                                           None,
                                           self.value_item_lens,
                                           input_v_pos,
                                           value_token_lens,
                                           self.value_fea_emb,
                                           name=('table_enc', 'table_enc_attn',
                                                 'val_enc_out'),
                                           question_encoding=question_enc,
                                           q_padding_mask=q_padding_mask)

        return [[output_state, output_state], question_enc, table_enc,
                column_enc, value_enc]
コード例 #8
0
    def forward(self, q, v, mask=None):
        """forward

        Args:
            q (Variable): shape = [batch_size, seq_len1, hidden_size] or [batch_size, hidden_size].
                          dtype = float32
            v (Variable): shape = [batch_size, seq_len2, hidden_size]. dtype = float32
            mask (Variable): shape = [batch_size, seq_len2]. dtype = v.dtype. Default is None

        Returns: Variable
            shape = [batch_size, seq_len2], dtype = float32.

        Raises:
            RuntimeError: while giving unsupported score_type.
        """
        input_dim = len(q.shape)
        if input_dim == 2:
            q = layers.unsqueeze(q, [1])

        if self._score_type == 'dot_prod':
            ptr_score = layers.matmul(q, v, transpose_y=True)
        elif self._score_type == 'affine':
            q_tmp = layers.fc(q,
                              size=v.shape[2],
                              num_flatten_dims=2,
                              **nn_utils.param_attr(self._name,
                                                    self._init_scale,
                                                    need_bias=True))
            ptr_score = layers.matmul(q_tmp, v, transpose_y=True)
        elif self._score_type == 'std':
            if self._hidden_size <= 0:
                raise ValueError("hidden_size should greater than 0")
            q_tmp = layers.fc(q,
                              size=self._hidden_size,
                              num_flatten_dims=2,
                              **nn_utils.param_attr(self._name + '_q',
                                                    self._init_scale,
                                                    need_bias=True))
            v_tmp = layers.fc(v,
                              size=self._hidden_size,
                              num_flatten_dims=2,
                              **nn_utils.param_attr(self._name + '_k',
                                                    self._init_scale,
                                                    need_bias=True))

            # shape = [batch_size, seq_len1, seq_len2, hidden_size]
            q_tmp_expand = layers.expand(layers.unsqueeze(q_tmp, [2]),
                                         [1, 1, v_tmp.shape[1], 1])
            # shape = [batch_size, 1, seq_len2, hidden_size]
            v_tmp_expand = layers.unsqueeze(v_tmp, [1])
            ptr_score = layers.fc(layers.elementwise_add(q_tmp_expand,
                                                         v_tmp_expand,
                                                         act='tanh'),
                                  size=1,
                                  num_flatten_dims=3,
                                  **nn_utils.param_attr(self._name + '_w',
                                                        self._init_scale,
                                                        need_bias=True))
            ptr_score = layers.squeeze(ptr_score, [3])
        else:
            raise RuntimeError(
                'Supported score types: dot_prod/affine/std. but got %s' %
                (self._score_type))

        if mask is not None:
            score_for_mask = layers.transpose(ptr_score, [1, 0, 2])
            ptr_score_masked = layers.elementwise_add(score_for_mask,
                                                      (mask - 1.0) * INF,
                                                      axis=-1)
            ptr_score = layers.transpose(ptr_score_masked, [1, 0, 2])

        if input_dim == 2:
            ptr_score = layers.squeeze(ptr_score, [1])
        return ptr_score
コード例 #9
0
    def forward(self, q, k, v=None, padding_mask=None, num_heads=1):
        """forward

        Args:
            q (Variable): shape = [batch_size, seq_len1, hidden_size_q]
            k (Variable): shape = [batch_size, seq_len2, hidden_size_k]
            v (Variable): shape = [batch_size, seq_len2, hidden_size_v]
            mask (Variable): lens of k and v. Default is None
            num_heads (int): currently only support 1. Default is 1

        Returns: TODO

        Raises: NULL
        """
        q_shape = q.shape
        if len(q_shape) == 2:
            q = layers.unsqueeze(q, [1])

        if v is None:
            v = k
        if self._score_type == 'dot_prod':
            # [batch_size, q_lens, k_lens]
            attn_score = layers.matmul(q, k, transpose_y=True)
        elif self._score_type == 'affine':
            k_tmp = layers.fc(k,
                              size=q.shape[2],
                              num_flatten_dims=2,
                              **nn_utils.param_attr(self._name,
                                                    self._init_scale,
                                                    need_bias=True))
            attn_score = layers.matmul(q, k_tmp, transpose_y=True)
        elif self._score_type == 'std':
            if self._hidden_size <= 0:
                raise ValueError("hidden_size should greater than 0")
            q_tmp = layers.fc(q,
                              size=self._hidden_size,
                              num_flatten_dims=2,
                              **nn_utils.param_attr(self._name + '_q',
                                                    self._init_scale,
                                                    need_bias=True))
            k_tmp = layers.fc(k,
                              size=self._hidden_size,
                              num_flatten_dims=2,
                              **nn_utils.param_attr(self._name + '_k',
                                                    self._init_scale,
                                                    need_bias=True))

            # shape = [batch_size, seq_len1, seq_len2, hidden_size]
            q_tmp_expand = layers.expand(layers.unsqueeze(q_tmp, [2]),
                                         [1, 1, v.shape[1], 1])
            # shape = [batch_size, 1, seq_len2, hidden_size]
            k_tmp_expand = layers.unsqueeze(k_tmp, [1])
            attn_score = layers.fc(layers.elementwise_add(q_tmp_expand,
                                                          k_tmp_expand,
                                                          act='tanh'),
                                   size=1,
                                   num_flatten_dims=3,
                                   **nn_utils.param_attr(self._name + '_w',
                                                         self._init_scale,
                                                         need_bias=True))
            attn_score = layers.squeeze(attn_score, [3])
        else:
            raise RuntimeError(
                'Supported score types: dot_prod/affine/std. but got %s' %
                (self._score_type))

        if padding_mask is not None:
            attn_for_mask = layers.transpose(attn_score, [1, 0, 2])
            attn_score_masked = layers.elementwise_add(attn_for_mask,
                                                       padding_mask * INF,
                                                       axis=-1)
            attn_score = layers.transpose(attn_score_masked, [1, 0, 2])

        weight = layers.softmax(attn_score)
        attn = layers.matmul(weight, v)
        if len(q_shape) == 2:
            attn = layers.squeeze(attn, [1])

        return attn