Exemple #1
0
    def _build_model(self, src_ids, position_ids, sentence_ids,
                     self_attn_mask):
        # padding id in vocabulary must be set to 0
        emb_out = fluid.layers.embedding(
            input=src_ids,
            size=[self._voc_size, self._emb_size],
            dtype=self._dtype,
            param_attr=fluid.ParamAttr(name=self._word_emb_name,
                                       initializer=self._param_initializer),
            is_sparse=False)
        position_emb_out = fluid.layers.embedding(
            input=position_ids,
            size=[self._max_position_seq_len, self._emb_size],
            dtype=self._dtype,
            param_attr=fluid.ParamAttr(name=self._pos_emb_name,
                                       initializer=self._param_initializer))

        sent_emb_out = fluid.layers.embedding(
            sentence_ids,
            size=[self._sent_types, self._emb_size],
            dtype=self._dtype,
            param_attr=fluid.ParamAttr(name=self._sent_emb_name,
                                       initializer=self._param_initializer))

        emb_out = emb_out + position_emb_out
        emb_out = emb_out + sent_emb_out

        emb_out = pre_process_layer(emb_out,
                                    'nd',
                                    self._prepostprocess_dropout,
                                    name='pre_encoder')

        if self._dtype == "float16":
            self_attn_mask = fluid.layers.cast(x=self_attn_mask,
                                               dtype=self._dtype)

        n_head_self_attn_mask = fluid.layers.stack(x=[self_attn_mask] *
                                                   self._n_head,
                                                   axis=1)
        n_head_self_attn_mask.stop_gradient = True

        self._enc_out = encoder(
            enc_input=emb_out,
            attn_bias=n_head_self_attn_mask,
            n_layer=self._n_layer,
            n_head=self._n_head,
            d_key=self._emb_size // self._n_head,
            d_value=self._emb_size // self._n_head,
            d_model=self._emb_size,
            d_inner_hid=self._emb_size * 4,
            prepostprocess_dropout=self._prepostprocess_dropout,
            attention_dropout=self._attention_dropout,
            relu_dropout=0,
            hidden_act=self._hidden_act,
            preprocess_cmd="",
            postprocess_cmd="dan",
            param_initializer=self._param_initializer,
            name='encoder')
Exemple #2
0
    def encode(self,
               emb_ids=None,
               input_mask=None,
               image_input=None,
               emb_obj_ids=None,
               gather_idx=None):
        """unimo encoder"""
        if emb_ids is not None and image_input is not None and emb_obj_ids is not None:
            input_type = 'vol'
        elif emb_ids is not None and image_input is not None:
            input_type = 'vl'
        elif emb_ids is not None:
            input_type = 'l'
        elif image_input is not None and emb_obj_ids is not None:
            input_type = 'vo'
        else:
            raise ValueError('input feature error')

        emb_feature, n_head_self_attn_mask, _v_seq_len, _o_seq_len = self._gen_input(
            emb_ids=emb_ids,
            input_mask=input_mask,
            image_input=image_input,
            emb_obj_ids=emb_obj_ids,
            input_type=input_type)
        enc_out = encoder(enc_input=emb_feature,
                          attn_bias=n_head_self_attn_mask,
                          n_layer=self._n_layer,
                          n_head=self._n_head,
                          d_key=self._emb_size // self._n_head,
                          d_value=self._emb_size // self._n_head,
                          d_model=self._emb_size,
                          d_inner_hid=self._emb_size * 4,
                          prepostprocess_dropout=self._prepostprocess_dropout,
                          attention_dropout=self._attention_dropout,
                          relu_dropout=0,
                          hidden_act=self._hidden_act,
                          preprocess_cmd="",
                          postprocess_cmd="dan",
                          param_initializer=self._param_initializer,
                          name='encoder',
                          caches=self.caches,
                          gather_idx=gather_idx)

        if input_type == 'vol':
            assert _v_seq_len is not None and _o_seq_len is not None, "the input is invalid"
            _vol_seq_len = layers.shape(enc_out)[1]
            enc_v_out = fluid.layers.slice(input=enc_out,
                                           axes=[1],
                                           starts=[0],
                                           ends=[_v_seq_len])
            enc_o_out = fluid.layers.slice(input=enc_out,
                                           axes=[1],
                                           starts=[_v_seq_len],
                                           ends=[_v_seq_len + _o_seq_len])
            enc_l_out = fluid.layers.slice(input=enc_out,
                                           axes=[1],
                                           starts=[_v_seq_len + _o_seq_len],
                                           ends=[_vol_seq_len])
            enc_vol_out = enc_out
            return enc_vol_out, enc_v_out, enc_l_out
        elif input_type == 'vl':
            assert _v_seq_len is not None and _o_seq_len is None, "the input is invalid"
            _vl_seq_len = layers.shape(enc_out)[1]
            enc_v_out = fluid.layers.slice(input=enc_out,
                                           axes=[1],
                                           starts=[0],
                                           ends=[_v_seq_len])
            enc_l_out = fluid.layers.slice(input=enc_out,
                                           axes=[1],
                                           starts=[_v_seq_len],
                                           ends=[_vl_seq_len])
            enc_vl_out = enc_out
            return enc_vl_out, enc_v_out, enc_l_out
        elif input_type == 'vo':
            assert _v_seq_len is not None and _o_seq_len is not None, "the input is invalid"
            enc_v_out = fluid.layers.slice(input=enc_out,
                                           axes=[1],
                                           starts=[0],
                                           ends=[_v_seq_len])
            return enc_v_out
        elif input_type == 'l':
            assert _v_seq_len is None and _o_seq_len is None, "the input is invalid"
            enc_l_out = enc_out
            return enc_l_out
        else:
            raise ValueError("The input type is invalid")
Exemple #3
0
    def _build_model(self, src_ids, position_ids, sentence_ids, task_ids,
                     input_mask):
        # padding id in vocabulary must be set to 0
        emb_out = fluid.layers.embedding(
            input=src_ids,
            size=[self._voc_size, self._emb_size],
            dtype=self._emb_dtype,
            param_attr=fluid.ParamAttr(
                name=self._word_emb_name,
                initializer=self._param_initializer,
                regularizer=fluid.regularizer.L2Decay(1e-3)),
            is_sparse=False)

        position_emb_out = fluid.layers.embedding(
            input=position_ids,
            size=[self._max_position_seq_len, self._emb_size],
            dtype=self._emb_dtype,
            param_attr=fluid.ParamAttr(name=self._pos_emb_name,
                                       initializer=self._param_initializer))

        sent_emb_out = fluid.layers.embedding(
            sentence_ids,
            size=[self._sent_types, self._emb_size],
            dtype=self._emb_dtype,
            param_attr=fluid.ParamAttr(name=self._sent_emb_name,
                                       initializer=self._param_initializer))

        emb_out = emb_out + position_emb_out
        emb_out = emb_out + sent_emb_out

        if self._use_task_id:
            task_emb_out = fluid.layers.embedding(
                task_ids,
                size=[self._task_types, self._emb_size],
                dtype=self._emb_dtype,
                param_attr=fluid.ParamAttr(
                    name=self._task_emb_name,
                    initializer=self._param_initializer))

            emb_out = emb_out + task_emb_out

        emb_out = pre_process_layer(emb_out,
                                    'nd',
                                    self._prepostprocess_dropout,
                                    name='pre_encoder')

        if self._dtype is "float16":
            emb_out = fluid.layers.cast(x=emb_out, dtype=self._dtype)
            input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype)
        self_attn_mask = fluid.layers.matmul(x=input_mask,
                                             y=input_mask,
                                             transpose_y=True)

        self_attn_mask = fluid.layers.scale(x=self_attn_mask,
                                            scale=10000.0,
                                            bias=-1.0,
                                            bias_after_scale=False)
        n_head_self_attn_mask = fluid.layers.stack(x=[self_attn_mask] *
                                                   self._n_head,
                                                   axis=1)
        n_head_self_attn_mask.stop_gradient = True

        self._enc_out = encoder(
            enc_input=emb_out,
            attn_bias=n_head_self_attn_mask,
            n_layer=self._n_layer,
            n_head=self._n_head,
            d_key=self._emb_size // self._n_head,
            d_value=self._emb_size // self._n_head,
            d_model=self._emb_size,
            d_inner_hid=self._emb_size * 4,
            prepostprocess_dropout=self._prepostprocess_dropout,
            attention_dropout=self._attention_dropout,
            relu_dropout=0,
            hidden_act=self._hidden_act,
            preprocess_cmd="",
            postprocess_cmd="dan",
            param_initializer=self._param_initializer,
            name='encoder')
Exemple #4
0
    def _build_model(self, emb=None):
        # padding id in vocabulary must be set to 0
        if emb is None:
            if self.topo is None or self.topo.mp.size == 1:
                emb_out = fluid.layers.embedding(
                    input=self.src_ids,
                    size=[self._voc_size, self._emb_size],
                    dtype=self._emb_dtype,
                    param_attr=fluid.ParamAttr(
                        name=self._word_emb_name,
                        initializer=self._param_initializer),
                    is_sparse=False)
            else:
                self._word_emb_name = self._word_emb_name + '_' + str(
                    self.topo.mp.rank)
                src_ids = fluid.layers.squeeze(self.src_ids, [-1])
                emb_out = paddle.distributed.split(
                    src_ids,
                    size=(self._voc_size, self._emb_size),
                    operation='embedding',
                    weight_attr=fluid.ParamAttr(
                        name=self._word_emb_name,
                        initializer=self._param_initializer),
                    num_partitions=self.topo.mp.size)
        else:
            emb.stop_gradient = True
            emb_out = fluid.layers.gather_nd(emb, self.src_ids)
            emb_out.stop_gradient = False

        self.position_emb_out = fluid.layers.embedding(
            input=self.position_ids,
            size=[self._max_position_seq_len, self._emb_size],
            dtype=self._emb_dtype,
            param_attr=fluid.ParamAttr(name=self._pos_emb_name,
                                       initializer=self._param_initializer))

        self.sent_emb_out = fluid.layers.embedding(
            self.sentence_ids,
            size=[self._sent_types, self._emb_size],
            dtype=self._emb_dtype,
            param_attr=fluid.ParamAttr(name=self._sent_emb_name,
                                       initializer=self._param_initializer))
        """
        self.task_emb_out = fluid.layers.embedding(
            self.task_ids,
            size=[self._task_types, self._emb_size],
            dtype=self._emb_dtype,
            param_attr=fluid.ParamAttr(
                name=self._task_emb_name, initializer=self._param_initializer))
        """
        sum_emb = emb_out + self.position_emb_out
        sum_emb = sum_emb + self.sent_emb_out
        # print('[ERROR] for debuging not add task_emb out')
        # emb_out = emb_out + task_emb_out

        # for albert shold be n
        # for bert should be nd
        sum_emb = pre_process_layer(sum_emb,
                                    self.config['pre_encoder_cmd'],
                                    self._prepostprocess_dropout,
                                    name='pre_encoder',
                                    epsilon=self.config['epsilon'])

        if self.config['emb_mapping_in']:
            sum_emb = fluid.layers.fc(input=sum_emb,
                                      num_flatten_dims=2,
                                      size=self._hidden_size,
                                      param_attr=fluid.ParamAttr(
                                          name='emb_hidden_mapping',
                                          initializer=self._param_initializer),
                                      bias_attr='emb_hidden_mapping_bias')

        self_attn_mask = fluid.layers.matmul(x=self.input_mask,
                                             y=self.input_mask,
                                             transpose_y=True)

        self_attn_mask = fluid.layers.scale(x=self_attn_mask,
                                            scale=10000.0,
                                            bias=-1.0,
                                            bias_after_scale=False)
        n_head_self_attn_mask = fluid.layers.stack(x=[self_attn_mask] *
                                                   self._n_head,
                                                   axis=1)
        n_head_self_attn_mask.stop_gradient = True

        self._enc_out, self._checkpoints = encoder(
            enc_input=sum_emb,
            attn_bias=n_head_self_attn_mask,
            n_layer=self._n_layer,
            n_head=self._n_head,
            d_key=self._hidden_size // self._n_head,
            d_value=self._hidden_size // self._n_head,
            d_model=self._hidden_size,
            d_inner_hid=self._hidden_size * 4,
            prepostprocess_dropout=self._prepostprocess_dropout,
            attention_dropout=self._attention_dropout,
            relu_dropout=0,
            hidden_act=self._hidden_act,
            preprocess_cmd=self.config['preprocess_cmd'],
            postprocess_cmd=self.config['postprocess_cmd'],
            param_initializer=self._param_initializer,
            name='encoder',
            param_share=self._param_share,
            epsilon=self.config['epsilon'],
            n_layer_per_block=self.config['n_layer_per_block'],
            topo=self.topo,
            preln=self.preln)