Exemple #1
0
    def build(self, inputs, scope_name=""):
        mask_pos = inputs["reader"]["mask_pos"]
        if self._is_training:
            mask_label = inputs["reader"]["mask_label"]
            max_position = inputs["reader"]["batchsize_x_seqlen"] - 1
            mask_pos = fluid.layers.elementwise_min(mask_pos, max_position)
            mask_pos.stop_gradient = True

        word_emb = inputs["backbone"]["embedding_table"]
        enc_out = inputs["backbone"]["encoder_outputs"]

        emb_size = word_emb.shape[-1]

        _param_initializer = fluid.initializer.TruncatedNormal(
            scale=self._initializer_range)

        reshaped_emb_out = fluid.layers.reshape(x=enc_out,
                                                shape=[-1, emb_size])

        # extract masked tokens' feature
        mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos)

        # transform: fc
        mask_trans_feat = fluid.layers.fc(
            input=mask_feat,
            size=emb_size,
            act=self._hidden_act,
            param_attr=fluid.ParamAttr(name=scope_name +
                                       'mask_lm_trans_fc.w_0',
                                       initializer=_param_initializer),
            bias_attr=fluid.ParamAttr(name=scope_name +
                                      'mask_lm_trans_fc.b_0'))
        # transform: layer norm
        mask_trans_feat = pre_process_layer(mask_trans_feat,
                                            'n',
                                            name=scope_name + 'mask_lm_trans')

        mask_lm_out_bias_attr = fluid.ParamAttr(
            name=scope_name + "mask_lm_out_fc.b_0",
            initializer=fluid.initializer.Constant(value=0.0))

        fc_out = fluid.layers.matmul(x=mask_trans_feat,
                                     y=word_emb,
                                     transpose_y=True)
        fc_out += fluid.layers.create_parameter(shape=[self._vocab_size],
                                                dtype='float32',
                                                attr=mask_lm_out_bias_attr,
                                                is_bias=True)

        if self._is_training:
            inputs = fluid.layers.softmax(fc_out)
            mask_lm_loss = fluid.layers.cross_entropy(input=inputs,
                                                      label=mask_label)
            loss = fluid.layers.mean(mask_lm_loss)
            return {'loss': loss}
        else:
            return {'logits': fc_out}
Exemple #2
0
    def build(self, inputs, scope_name=""):

        src_ids = inputs['token_ids']
        pos_ids = inputs['position_ids']
        sent_ids = inputs['segment_ids']
        input_mask = inputs['input_mask']
        task_ids = inputs['task_ids']

        input_buffer = {}
        output_buffer = {}
        input_buffer['base'] = [
            src_ids, pos_ids, sent_ids, input_mask, task_ids
        ]
        output_buffer['base'] = {}

        if self._learning_strategy == 'pairwise' and self._phase == 'train':
            src_ids = inputs['token_ids_neg']
            pos_ids = inputs['position_ids_neg']
            sent_ids = inputs['segment_ids_neg']
            input_mask = inputs['input_mask_neg']
            task_ids = inputs['task_ids_neg']
            input_buffer['neg'] = [
                src_ids, pos_ids, sent_ids, input_mask, task_ids
            ]
            output_buffer['neg'] = {}

        for key, (src_ids, pos_ids, sent_ids, input_mask,
                  task_ids) in input_buffer.items():
            # padding id in vocabulary must be set to 0
            emb_out = fluid.embedding(
                input=src_ids,
                size=[self._voc_size, self._emb_size],
                dtype=self._emb_dtype,
                param_attr=fluid.ParamAttr(
                    name=scope_name + self._word_emb_name,
                    initializer=self._param_initializer),
                is_sparse=False)

            # fluid.global_scope().find_var('backbone-word_embedding').get_tensor()
            embedding_table = fluid.default_main_program().global_block().var(
                scope_name + self._word_emb_name)

            position_emb_out = fluid.embedding(
                input=pos_ids,
                size=[self._max_position_seq_len, self._emb_size],
                dtype=self._emb_dtype,
                param_attr=fluid.ParamAttr(
                    name=scope_name + self._pos_emb_name,
                    initializer=self._param_initializer))

            sent_emb_out = fluid.embedding(
                sent_ids,
                size=[self._sent_types, self._emb_size],
                dtype=self._emb_dtype,
                param_attr=fluid.ParamAttr(
                    name=scope_name + self._sent_emb_name,
                    initializer=self._param_initializer))

            emb_out = emb_out + position_emb_out
            emb_out = emb_out + sent_emb_out

            task_emb_out = fluid.embedding(
                task_ids,
                size=[self._task_types, self._emb_size],
                dtype=self._emb_dtype,
                param_attr=fluid.ParamAttr(
                    name=scope_name + self._task_emb_name,
                    initializer=self._param_initializer))

            emb_out = emb_out + task_emb_out

            emb_out = pre_process_layer(emb_out,
                                        'nd',
                                        self._prepostprocess_dropout,
                                        name=scope_name + 'pre_encoder')

            self_attn_mask = fluid.layers.matmul(x=input_mask,
                                                 y=input_mask,
                                                 transpose_y=True)

            self_attn_mask = fluid.layers.scale(x=self_attn_mask,
                                                scale=10000.0,
                                                bias=-1.0,
                                                bias_after_scale=False)
            n_head_self_attn_mask = fluid.layers.stack(x=[self_attn_mask] *
                                                       self._n_head,
                                                       axis=1)
            n_head_self_attn_mask.stop_gradient = True

            enc_out = encoder(
                enc_input=emb_out,
                attn_bias=n_head_self_attn_mask,
                n_layer=self._n_layer,
                n_head=self._n_head,
                d_key=self._emb_size // self._n_head,
                d_value=self._emb_size // self._n_head,
                d_model=self._emb_size,
                d_inner_hid=self._emb_size * 4,
                prepostprocess_dropout=self._prepostprocess_dropout,
                attention_dropout=self._attention_dropout,
                relu_dropout=0,
                hidden_act=self._hidden_act,
                preprocess_cmd="",
                postprocess_cmd="dan",
                param_initializer=self._param_initializer,
                name=scope_name + 'encoder')

            next_sent_feat = fluid.layers.slice(input=enc_out,
                                                axes=[1],
                                                starts=[0],
                                                ends=[1])
            next_sent_feat = fluid.layers.reshape(
                next_sent_feat, [-1, next_sent_feat.shape[-1]])
            next_sent_feat = fluid.layers.fc(
                input=next_sent_feat,
                size=self._emb_size,
                act="tanh",
                param_attr=fluid.ParamAttr(
                    name=scope_name + "pooled_fc.w_0",
                    initializer=self._param_initializer),
                bias_attr=scope_name + "pooled_fc.b_0")

            output_buffer[key]['word_embedding'] = emb_out
            output_buffer[key]['encoder_outputs'] = enc_out
            output_buffer[key]['sentence_embedding'] = next_sent_feat
            output_buffer[key]['sentence_pair_embedding'] = next_sent_feat

        ret = {}
        ret['embedding_table'] = embedding_table
        ret['word_embedding'] = output_buffer['base']['word_embedding']
        ret['encoder_outputs'] = output_buffer['base']['encoder_outputs']
        ret['sentence_embedding'] = output_buffer['base']['sentence_embedding']
        ret['sentence_pair_embedding'] = output_buffer['base'][
            'sentence_pair_embedding']

        if self._learning_strategy == 'pairwise' and self._phase == 'train':
            ret['word_embedding_neg'] = output_buffer['neg']['word_embedding']
            ret['encoder_outputs_neg'] = output_buffer['neg'][
                'encoder_outputs']
            ret['sentence_embedding_neg'] = output_buffer['neg'][
                'sentence_embedding']
            ret['sentence_pair_embedding_neg'] = output_buffer['neg'][
                'sentence_pair_embedding']

        return ret