コード例 #1
0
class BertModelLayer(Layer):
    """
    bert
    """
    def __init__(self, config, return_pooled_out=True, use_fp16=False):
        super(BertModelLayer, self).__init__()

        self._emb_size = config['hidden_size']
        self._n_layer = config['num_hidden_layers']
        self._n_head = config['num_attention_heads']
        self._voc_size = config['vocab_size']
        self._max_position_seq_len = config['max_position_embeddings']
        self._sent_types = config['type_vocab_size']
        self._hidden_act = config['hidden_act']
        self._prepostprocess_dropout = config['hidden_dropout_prob']
        self._attention_dropout = config['attention_probs_dropout_prob']
        self.return_pooled_out = return_pooled_out

        self._word_emb_name = "word_embedding"
        self._pos_emb_name = "pos_embedding"
        self._sent_emb_name = "sent_embedding"
        self._dtype = "float16" if use_fp16 else "float32"

        self._param_initializer = fluid.initializer.TruncatedNormal(
            scale=config['initializer_range'])

        self._src_emb = Embedding(size=[self._voc_size, self._emb_size],
                                  param_attr=fluid.ParamAttr(
                                      name=self._word_emb_name,
                                      initializer=self._param_initializer),
                                  dtype=self._dtype)

        self._pos_emb = Embedding(
            size=[self._max_position_seq_len, self._emb_size],
            param_attr=fluid.ParamAttr(name=self._pos_emb_name,
                                       initializer=self._param_initializer),
            dtype=self._dtype)

        self._sent_emb = Embedding(size=[self._sent_types, self._emb_size],
                                   param_attr=fluid.ParamAttr(
                                       name=self._sent_emb_name,
                                       initializer=self._param_initializer),
                                   dtype=self._dtype)

        self.pooled_fc = Linear(input_dim=self._emb_size,
                                output_dim=self._emb_size,
                                param_attr=fluid.ParamAttr(
                                    name="pooled_fc.w_0",
                                    initializer=self._param_initializer),
                                bias_attr="pooled_fc.b_0",
                                act="tanh")

        self.pre_process_layer = PrePostProcessLayer(
            "nd", self._emb_size, self._prepostprocess_dropout, "")

        self._encoder = EncoderLayer(
            hidden_act=self._hidden_act,
            n_layer=self._n_layer,
            n_head=self._n_head,
            d_key=self._emb_size // self._n_head,
            d_value=self._emb_size // self._n_head,
            d_model=self._emb_size,
            d_inner_hid=self._emb_size * 4,
            prepostprocess_dropout=self._prepostprocess_dropout,
            attention_dropout=self._attention_dropout,
            relu_dropout=0,
            preprocess_cmd="",
            postprocess_cmd="dan",
            param_initializer=self._param_initializer)

    def emb_names(self):
        return self._src_emb.parameters() + self._pos_emb.parameters(
        ) + self._sent_emb.parameters()

    def forward(self, src_ids, position_ids, sentence_ids, input_mask):
        """
        forward
        """
        src_emb = self._src_emb(src_ids)
        pos_emb = self._pos_emb(position_ids)
        sent_emb = self._sent_emb(sentence_ids)

        emb_out = src_emb + pos_emb
        emb_out = emb_out + sent_emb

        emb_out = self.pre_process_layer(emb_out)

        self_attn_mask = fluid.layers.matmul(x=input_mask,
                                             y=input_mask,
                                             transpose_y=True)
        self_attn_mask = fluid.layers.scale(x=self_attn_mask,
                                            scale=10000.0,
                                            bias=-1.0,
                                            bias_after_scale=False)
        n_head_self_attn_mask = fluid.layers.stack(x=[self_attn_mask] *
                                                   self._n_head,
                                                   axis=1)
        n_head_self_attn_mask.stop_gradient = True

        enc_outputs = self._encoder(emb_out, n_head_self_attn_mask)

        if not self.return_pooled_out:
            return enc_outputs
        next_sent_feats = []
        for enc_output in enc_outputs:
            next_sent_feat = fluid.layers.slice(input=enc_output,
                                                axes=[1],
                                                starts=[0],
                                                ends=[1])
            next_sent_feat = self.pooled_fc(next_sent_feat)
            next_sent_feat = fluid.layers.reshape(next_sent_feat,
                                                  shape=[-1, self._emb_size])
            next_sent_feats.append(next_sent_feat)

        return enc_outputs, next_sent_feats
コード例 #2
0
ファイル: bert.py プロジェクト: feng-zhen/PaddleSlim
class BertModelLayer(Layer):
    def __init__(self,
                 emb_size=128,
                 hidden_size=768,
                 n_layer=12,
                 voc_size=30522,
                 max_position_seq_len=512,
                 sent_types=2,
                 return_pooled_out=True,
                 initializer_range=1.0,
                 conv_type="conv_bn",
                 search_layer=False,
                 use_fp16=False,
                 use_fixed_gumbel=False,
                 gumbel_alphas=None):
        super(BertModelLayer, self).__init__()

        self._emb_size = emb_size
        self._hidden_size = hidden_size
        self._n_layer = n_layer
        self._voc_size = voc_size
        self._max_position_seq_len = max_position_seq_len
        self._sent_types = sent_types
        self.return_pooled_out = return_pooled_out

        self.use_fixed_gumbel = use_fixed_gumbel

        self._word_emb_name = "s_word_embedding"
        self._pos_emb_name = "s_pos_embedding"
        self._sent_emb_name = "s_sent_embedding"
        self._dtype = "float16" if use_fp16 else "float32"

        self._conv_type = conv_type
        self._search_layer = search_layer
        self._param_initializer = fluid.initializer.TruncatedNormal(
            scale=initializer_range)

        self._src_emb = Embedding(size=[self._voc_size, self._emb_size],
                                  param_attr=fluid.ParamAttr(
                                      name=self._word_emb_name,
                                      initializer=self._param_initializer),
                                  dtype=self._dtype)

        self._pos_emb = Embedding(
            size=[self._max_position_seq_len, self._emb_size],
            param_attr=fluid.ParamAttr(name=self._pos_emb_name,
                                       initializer=self._param_initializer),
            dtype=self._dtype)

        self._sent_emb = Embedding(size=[self._sent_types, self._emb_size],
                                   param_attr=fluid.ParamAttr(
                                       name=self._sent_emb_name,
                                       initializer=self._param_initializer),
                                   dtype=self._dtype)

        self._emb_fac = Linear(
            input_dim=self._emb_size,
            output_dim=self._hidden_size,
            param_attr=fluid.ParamAttr(name="s_emb_factorization"))

        self._encoder = EncoderLayer(n_layer=self._n_layer,
                                     hidden_size=self._hidden_size,
                                     search_layer=self._search_layer,
                                     use_fixed_gumbel=self.use_fixed_gumbel,
                                     gumbel_alphas=gumbel_alphas)

    def emb_names(self):
        return self._src_emb.parameters() + self._pos_emb.parameters(
        ) + self._sent_emb.parameters()

    def max_flops(self):
        return self._encoder.max_flops

    def max_model_size(self):
        return self._encoder.max_model_size

    def arch_parameters(self):
        return [self._encoder.alphas]  #, self._encoder.k]

    def forward(self,
                src_ids,
                position_ids,
                sentence_ids,
                flops=[],
                model_size=[]):
        """
        forward
        """
        ids = np.squeeze(src_ids.numpy())
        sids = np.squeeze(sentence_ids.numpy())
        batchsize = ids.shape[0]

        ids_0 = ids[((sids == 0) & (ids != 0))]
        seqlen_0 = ((sids == 0) & (ids != 0)).astype(np.int64).sum(1)
        y_0 = np.concatenate([np.arange(s) for s in seqlen_0])
        x_0 = np.concatenate(
            [np.ones([s], dtype=np.int64) * i for i, s in enumerate(seqlen_0)])
        ids0 = np.zeros([batchsize, seqlen_0.max()], dtype=np.int64)
        ids0[(x_0, y_0)] = ids_0

        ids_1 = ids[(sids == 1) & (ids != 0)]
        seqlen_1 = ((sids == 1) & (ids != 0)).astype(np.int64).sum(1)
        y_1 = np.concatenate([np.arange(s) for s in seqlen_1])
        x_1 = np.concatenate(
            [np.ones([s], dtype=np.int64) * i for i, s in enumerate(seqlen_1)])
        ids1 = np.zeros([batchsize, seqlen_1.max()], dtype=np.int64)
        ids1[(x_1, y_1)] = ids_1

        msl = max(seqlen_0.max(), seqlen_1.max())
        ids0 = np.pad(ids0, [[0, 0], [0, msl - seqlen_0.max()]],
                      mode='constant')
        ids1 = np.pad(ids1, [[0, 0], [0, msl - seqlen_1.max()]],
                      mode='constant')

        ids0 = fluid.dygraph.to_variable(ids0)
        ids1 = fluid.dygraph.to_variable(ids1)

        src_emb_0 = self._src_emb(ids0)
        src_emb_1 = self._src_emb(ids1)
        emb_out_0 = self._emb_fac(src_emb_0)
        emb_out_1 = self._emb_fac(src_emb_1)
        # (bs, seq_len, 768)

        enc_outputs = self._encoder(emb_out,
                                    flops=flops,
                                    model_size=model_size)
        return enc_outputs
コード例 #3
0
ファイル: bert.py プロジェクト: will-jl944/PaddleSlim
class BertModelLayer(Layer):
    def __init__(self,
                 num_labels,
                 emb_size=128,
                 hidden_size=768,
                 n_layer=12,
                 voc_size=30522,
                 max_position_seq_len=512,
                 sent_types=2,
                 return_pooled_out=True,
                 initializer_range=1.0,
                 conv_type="conv_bn",
                 search_layer=False,
                 use_fp16=False,
                 use_fixed_gumbel=False,
                 gumbel_alphas=None):
        super(BertModelLayer, self).__init__()

        self._emb_size = emb_size
        self._hidden_size = hidden_size
        self._n_layer = n_layer
        self._voc_size = voc_size
        self._max_position_seq_len = max_position_seq_len
        self._sent_types = sent_types
        self.return_pooled_out = return_pooled_out

        self.use_fixed_gumbel = use_fixed_gumbel

        self._word_emb_name = "s_word_embedding"
        self._pos_emb_name = "s_pos_embedding"
        self._sent_emb_name = "s_sent_embedding"
        self._dtype = "float16" if use_fp16 else "float32"

        self._conv_type = conv_type
        self._search_layer = search_layer
        self._param_initializer = fluid.initializer.TruncatedNormal(
            scale=initializer_range)

        self._src_emb = Embedding(size=[self._voc_size, self._emb_size],
                                  param_attr=fluid.ParamAttr(
                                      name=self._word_emb_name,
                                      initializer=self._param_initializer),
                                  dtype=self._dtype)

        self._pos_emb = Embedding(
            size=[self._max_position_seq_len, self._emb_size],
            param_attr=fluid.ParamAttr(name=self._pos_emb_name,
                                       initializer=self._param_initializer),
            dtype=self._dtype)

        self._sent_emb = Embedding(size=[self._sent_types, self._emb_size],
                                   param_attr=fluid.ParamAttr(
                                       name=self._sent_emb_name,
                                       initializer=self._param_initializer),
                                   dtype=self._dtype)

        self._emb_fac = Linear(
            input_dim=self._emb_size,
            output_dim=self._hidden_size,
            param_attr=fluid.ParamAttr(name="s_emb_factorization"))

        self._encoder = EncoderLayer(num_labels=num_labels,
                                     n_layer=self._n_layer,
                                     hidden_size=self._hidden_size,
                                     search_layer=self._search_layer,
                                     use_fixed_gumbel=self.use_fixed_gumbel,
                                     gumbel_alphas=gumbel_alphas)

    def emb_names(self):
        return self._src_emb.parameters() + self._pos_emb.parameters(
        ) + self._sent_emb.parameters()

    def emb_names(self):
        return self._src_emb.parameters() + self._pos_emb.parameters(
        ) + self._sent_emb.parameters()

    def max_flops(self):
        return self._encoder.max_flops

    def max_model_size(self):
        return self._encoder.max_model_size

    def arch_parameters(self):
        return [self._encoder.alphas]  #, self._encoder.k]

    def forward(self, data_ids, epoch):
        """
        forward
        """
        ids0 = data_ids[5]
        ids1 = data_ids[6]

        src_emb_0 = self._src_emb(ids0)
        src_emb_1 = self._src_emb(ids1)
        emb_out_0 = self._emb_fac(src_emb_0)
        emb_out_1 = self._emb_fac(src_emb_1)
        # (bs, seq_len, hidden_size)

        enc_outputs = self._encoder(emb_out_0, emb_out_1, epoch)

        return enc_outputs