Beispiel #1
0
    def __init__(self,
                 config,
                 return_pooled_out=True,
                 weight_sharing=False,
                 use_fp16=False):
        super(PretrainModelLayer, self).__init__()
        self.config = config
        self._voc_size = config['vocab_size']
        self._emb_size = config['hidden_size']
        self._hidden_act = config['hidden_act']
        self._prepostprocess_dropout = config['hidden_dropout_prob']

        self._word_emb_name = "word_embedding"
        self._param_initializer = fluid.initializer.TruncatedNormal(
            scale=config['initializer_range'])
        self._weight_sharing = weight_sharing
        self.use_fp16 = use_fp16
        self._dtype = "float16" if use_fp16 else "float32"

        self.bert_layer = BertModelLayer(config=self.config,
                                         return_pooled_out=True,
                                         use_fp16=self.use_fp16)

        self.pre_process_layer = PrePostProcessLayer(
            "n", self._emb_size, self._prepostprocess_dropout)

        self.pooled_fc = Linear(input_dim=self._emb_size,
                                output_dim=self._emb_size,
                                param_attr=fluid.ParamAttr(
                                    name="mask_lm_trans_fc.w_0",
                                    initializer=self._param_initializer),
                                bias_attr="mask_lm_trans_fc.b_0",
                                act="tanh")

        self.mask_lm_out_bias_attr = fluid.ParamAttr(
            name="mask_lm_out_fc.b_0",
            initializer=fluid.initializer.Constant(value=0.0))

        if not self._weight_sharing:
            self.out_fc = Linear(input_dim=self._emb_size,
                                 output_dim=self._voc_size,
                                 param_attr=fluid.ParamAttr(
                                     name="mask_lm_out_fc.w_0",
                                     initializer=self._param_initializer),
                                 bias_attr=self.mask_lm_out_bias_attr)
        else:
            self.fc_create_params = self.create_parameter(
                shape=[self._voc_size],
                dtype=self._dtype,
                attr=self.mask_lm_out_bias_attr,
                is_bias=True)

        self.next_sent_fc = Linear(input_dim=self._emb_size,
                                   output_dim=2,
                                   param_attr=fluid.ParamAttr(
                                       name="next_sent_fc.w_0",
                                       initializer=self._param_initializer),
                                   bias_attr="next_sent_fc.b_0")
Beispiel #2
0
    def __init__(self,
                 hidden_act,
                 n_head,
                 d_key,
                 d_value,
                 d_model,
                 d_inner_hid,
                 prepostprocess_dropout,
                 attention_dropout,
                 relu_dropout,
                 preprocess_cmd="n",
                 postprocess_cmd="da",
                 param_initializer=None,
                 name=""):

        super(EncoderSubLayer, self).__init__()
        self.name = name
        self._preprocess_cmd = preprocess_cmd
        self._postprocess_cmd = postprocess_cmd
        self._prepostprocess_dropout = prepostprocess_dropout
        self._preprocess_layer = PrePostProcessLayer(self._preprocess_cmd,
                                                     d_model,
                                                     prepostprocess_dropout)
        self._multihead_attention_layer = MultiHeadAttention(
            d_key, d_value, d_model, n_head, attention_dropout,
            param_initializer)
        self._postprocess_layer = PrePostProcessLayer(
            self._postprocess_cmd, d_model, self._prepostprocess_dropout)
        self._preprocess_layer2 = PrePostProcessLayer(
            self._preprocess_cmd, d_model, self._prepostprocess_dropout)
        self._positionwise_feed_forward = PositionwiseFeedForwardLayer(
            hidden_act,
            d_inner_hid,
            d_model,
            relu_dropout,
            param_initializer,
            name=name + "_ffn")
        self._postprocess_layer2 = PrePostProcessLayer(
            self._postprocess_cmd, d_model, self._prepostprocess_dropout)
Beispiel #3
0
    def __init__(self,
                 hidden_act,
                 n_layer,
                 n_head,
                 d_key,
                 d_value,
                 d_model,
                 d_inner_hid,
                 prepostprocess_dropout,
                 attention_dropout,
                 relu_dropout,
                 preprocess_cmd="n",
                 postprocess_cmd="da",
                 param_initializer=None,
                 name=""):

        super(EncoderLayer, self).__init__()
        self._preprocess_cmd = preprocess_cmd
        self._encoder_sublayers = list()
        self._prepostprocess_dropout = prepostprocess_dropout
        self._n_layer = n_layer
        self._hidden_act = hidden_act
        self._preprocess_layer = PrePostProcessLayer(
            self._preprocess_cmd, 3, self._prepostprocess_dropout)

        for i in range(n_layer):
            self._encoder_sublayers.append(
                self.add_sublayer(
                    'esl_%d' % i,
                    EncoderSubLayer(hidden_act,
                                    n_head,
                                    d_key,
                                    d_value,
                                    d_model,
                                    d_inner_hid,
                                    prepostprocess_dropout,
                                    attention_dropout,
                                    relu_dropout,
                                    preprocess_cmd,
                                    postprocess_cmd,
                                    param_initializer,
                                    name=name + '_layer_' + str(i))))
Beispiel #4
0
    def __init__(self, config, return_pooled_out=True, use_fp16=False):
        super(BertModelLayer, self).__init__()

        self._emb_size = config['hidden_size']
        self._n_layer = config['num_hidden_layers']
        self._n_head = config['num_attention_heads']
        self._voc_size = config['vocab_size']
        self._max_position_seq_len = config['max_position_embeddings']
        self._sent_types = config['type_vocab_size']
        self._hidden_act = config['hidden_act']
        self._prepostprocess_dropout = config['hidden_dropout_prob']
        self._attention_dropout = config['attention_probs_dropout_prob']
        self.return_pooled_out = return_pooled_out

        self._word_emb_name = "word_embedding"
        self._pos_emb_name = "pos_embedding"
        self._sent_emb_name = "sent_embedding"
        self._dtype = "float16" if use_fp16 else "float32"

        self._param_initializer = fluid.initializer.TruncatedNormal(
            scale=config['initializer_range'])

        self._src_emb = Embedding(size=[self._voc_size, self._emb_size],
                                  param_attr=fluid.ParamAttr(
                                      name=self._word_emb_name,
                                      initializer=self._param_initializer),
                                  dtype=self._dtype)

        self._pos_emb = Embedding(
            size=[self._max_position_seq_len, self._emb_size],
            param_attr=fluid.ParamAttr(name=self._pos_emb_name,
                                       initializer=self._param_initializer),
            dtype=self._dtype)

        self._sent_emb = Embedding(size=[self._sent_types, self._emb_size],
                                   param_attr=fluid.ParamAttr(
                                       name=self._sent_emb_name,
                                       initializer=self._param_initializer),
                                   dtype=self._dtype)

        self.pooled_fc = Linear(input_dim=self._emb_size,
                                output_dim=self._emb_size,
                                param_attr=fluid.ParamAttr(
                                    name="pooled_fc.w_0",
                                    initializer=self._param_initializer),
                                bias_attr="pooled_fc.b_0",
                                act="tanh")

        self.pre_process_layer = PrePostProcessLayer(
            "nd", self._emb_size, self._prepostprocess_dropout)

        self._encoder = EncoderLayer(
            hidden_act=self._hidden_act,
            n_layer=self._n_layer,
            n_head=self._n_head,
            d_key=self._emb_size // self._n_head,
            d_value=self._emb_size // self._n_head,
            d_model=self._emb_size,
            d_inner_hid=self._emb_size * 4,
            prepostprocess_dropout=self._prepostprocess_dropout,
            attention_dropout=self._attention_dropout,
            relu_dropout=0,
            preprocess_cmd="",
            postprocess_cmd="dan",
            param_initializer=self._param_initializer)