Esempio n. 1
0
    def __init__(self,
                 vocab_size,
                 emb_dim=512,
                 hidden_size=512,
                 n_layers=8,
                 n_heads=8,
                 padding_idx=0,
                 dropout_rate=0.1):
        """
        __init__
        """
        super(TransformerEncoderModel, self).__init__()
        self.padding_idx = padding_idx
        self.token_embedding = nn.Embedding(vocab_size,
                                            emb_dim,
                                            padding_idx=padding_idx)
        max_pos_len = 3000
        self.pos_embedding = nn.Embedding(max_pos_len,
                                          emb_dim,
                                          padding_idx=padding_idx)

        self.dropout = nn.Dropout(p=dropout_rate)
        self.transformer_encoder_layer = nn.TransformerEncoderLayer(emb_dim, n_heads, dim_feedforward=hidden_size * 4, \
                                                dropout=0.1, activation='gelu', attn_dropout=0.1, act_dropout=0)
        self.transformer_encoder = nn.TransformerEncoder(
            self.transformer_encoder_layer, n_layers)
        self.layer_norm = nn.LayerNorm(hidden_size)
        self.dropout = nn.Dropout(dropout_rate)
        self.apply(self.init_weights)
Esempio n. 2
0
 def __init__(self,
              vocab_size,
              hidden_size=768,
              num_hidden_layers=12,
              num_attention_heads=12,
              intermediate_size=3072,
              hidden_act="gelu",
              hidden_dropout_prob=0.1,
              attention_probs_dropout_prob=0.1,
              max_position_embeddings=512,
              type_vocab_size=16,
              initializer_range=0.02,
              pad_token_id=0):
     super(BertModel, self).__init__()
     self.pad_token_id = pad_token_id
     self.initializer_range = initializer_range
     self.embeddings = BertEmbeddings(vocab_size, hidden_size, hidden_dropout_prob, max_position_embeddings,
                                      type_vocab_size)
     encoder_layer = nn.TransformerEncoderLayer(
         hidden_size,
         num_attention_heads,
         intermediate_size,
         dropout=hidden_dropout_prob,
         activation=hidden_act,
         attn_dropout=attention_probs_dropout_prob,
         act_dropout=0)
     self.encoder = nn.TransformerEncoder(encoder_layer, num_hidden_layers)
     self.pooler = BertPooler(hidden_size)
     self.apply(self.init_weights)
Esempio n. 3
0
 def __init__(self,
              vocab_size: int = 300,
              max_len: int = 128,
              emb_size: int = 768,
              n_layers: int = 12,
              n_heads: int = 8,
              dropout: float = 0.1,
              pad_idx: int = 0):
     super().__init__()
     self.input_emb = InputEmbedding(max_len=max_len,
                                     vocab_size=vocab_size,
                                     emb_size=emb_size,
                                     pad_idx=pad_idx,
                                     dropout=dropout)
     self.transformers = nn.LayerList([
         nn.TransformerEncoderLayer(d_model=emb_size,
                                    nhead=n_heads,
                                    dim_feedforward=4 * emb_size,
                                    normalize_before=True)
         for _ in range(n_layers)
     ])
     self.max_len = max_len
     self.n_heads = n_heads
     self.pad_idx = pad_idx
     self.dim_output = emb_size
     self.vocab_size = vocab_size
Esempio n. 4
0
 def __init__(self,
              vocab_size,
              hidden_size=768,
              num_hidden_layers=12,
              num_attention_heads=12,
              hidden_act="gelu",
              hidden_dropout_prob=0.1,
              attention_probs_dropout_prob=0.1,
              max_position_embeddings=514,
              initializer_range=0.02,
              pad_token_id=1):
     super(ErnieMModel, self).__init__()
     self.pad_token_id = pad_token_id
     self.initializer_range = initializer_range
     self.embeddings = ErnieMEmbeddings(vocab_size, hidden_size,
                                        hidden_dropout_prob,
                                        max_position_embeddings)
     encoder_layer = nn.TransformerEncoderLayer(
         hidden_size,
         num_attention_heads,
         dim_feedforward=4 * hidden_size,
         dropout=hidden_dropout_prob,
         activation=hidden_act,
         attn_dropout=attention_probs_dropout_prob,
         act_dropout=0,
         normalize_before=False)
     self.encoder = nn.TransformerEncoder(encoder_layer, num_hidden_layers)
     self.pooler = ErnieMPooler(hidden_size)
     self.apply(self.init_weights)
Esempio n. 5
0
 def __init__(self,
              vocab_size,
              hidden_size=768,
              num_hidden_layers=12,
              num_attention_heads=12,
              intermediate_size=3072,
              hidden_act="gelu",
              hidden_dropout_prob=0.1,
              attention_probs_dropout_prob=0.1,
              max_position_embeddings=512,
              type_vocab_size=2,
              initializer_range=0.02,
              pad_token_id=0):
     super(ErnieModel, self).__init__()
     self.pad_token_id = pad_token_id
     self.initializer_range = initializer_range
     weight_attr = paddle.ParamAttr(initializer=nn.initializer.Normal(
         mean=0.0, std=self.initializer_range))
     self.embeddings = ErnieEmbeddings(
         vocab_size, hidden_size, hidden_dropout_prob,
         max_position_embeddings, type_vocab_size, pad_token_id, weight_attr)
     encoder_layer = nn.TransformerEncoderLayer(
         hidden_size,
         num_attention_heads,
         intermediate_size,
         dropout=hidden_dropout_prob,
         activation=hidden_act,
         attn_dropout=attention_probs_dropout_prob,
         act_dropout=0,
         weight_attr=weight_attr, )
     self.encoder = nn.TransformerEncoder(encoder_layer, num_hidden_layers)
     self.pooler = ErniePooler(hidden_size, weight_attr)
     self.apply(self.init_weights)
Esempio n. 6
0
    def __init__(self, vocab_size, embedding_size, hidden_size,
                 num_hidden_layers, num_attention_heads, intermediate_size,
                 hidden_act, hidden_dropout_prob, attention_probs_dropout_prob,
                 max_position_embeddings, type_vocab_size, initializer_range,
                 pad_token_id):
        super(ElectraModel, self).__init__()
        self.pad_token_id = pad_token_id
        self.initializer_range = initializer_range
        self.embeddings = ElectraEmbeddings(vocab_size, embedding_size,
                                            hidden_dropout_prob,
                                            max_position_embeddings,
                                            type_vocab_size)

        if embedding_size != hidden_size:
            self.embeddings_project = nn.Linear(embedding_size, hidden_size)

        encoder_layer = nn.TransformerEncoderLayer(
            hidden_size,
            num_attention_heads,
            intermediate_size,
            dropout=hidden_dropout_prob,
            activation=hidden_act,
            attn_dropout=attention_probs_dropout_prob,
            act_dropout=0)
        self.encoder = nn.TransformerEncoder(encoder_layer, num_hidden_layers)

        self.init_weights()
Esempio n. 7
0
 def __init__(self):
     super(TestModel, self).__init__()
     encoder_layer = nn.TransformerEncoderLayer(312,
                                                12,
                                                1024,
                                                dropout=0.1,
                                                activation='gelu',
                                                attn_dropout=0.1,
                                                act_dropout=0)
     self.encoder = nn.TransformerEncoder(encoder_layer, 3)
     self.fc = nn.Linear(312, 3)
Esempio n. 8
0
 def __init__(self,
              vocab_size,
              vocab_file,
              hidden_size=768,
              num_hidden_layers=12,
              num_attention_heads=12,
              intermediate_size=3072,
              hidden_act="gelu",
              hidden_dropout_prob=0.1,
              attention_probs_dropout_prob=0.1,
              max_position_embeddings=512,
              type_vocab_size=2,
              initializer_range=0.02,
              pad_token_id=0,
              do_lower_case=True,
              is_split_into_words=False,
              max_seq_len=128,
              pad_to_max_seq_len=False):
     super(PPMiniLMModel, self).__init__()
     if not os.path.isfile(vocab_file):
         raise ValueError(
             "Can't find a vocabulary file at path '{}'. To load the "
             "vocabulary from a pretrained model please use "
             "`model = PPMiniLMModel.from_pretrained(PRETRAINED_MODEL_NAME)`"
             .format(vocab_file))
     self.vocab = self.load_vocabulary(vocab_file)
     self.do_lower_case = do_lower_case
     self.max_seq_len = max_seq_len
     self.is_split_into_words = is_split_into_words
     self.pad_token_id = pad_token_id
     self.pad_to_max_seq_len = pad_to_max_seq_len
     self.initializer_range = initializer_range
     weight_attr = paddle.ParamAttr(
         initializer=nn.initializer.TruncatedNormal(
             mean=0.0, std=self.initializer_range))
     self.embeddings = PPMiniLMEmbeddings(vocab_size, hidden_size,
                                          hidden_dropout_prob,
                                          max_position_embeddings,
                                          type_vocab_size, pad_token_id,
                                          weight_attr)
     encoder_layer = nn.TransformerEncoderLayer(
         hidden_size,
         num_attention_heads,
         intermediate_size,
         dropout=hidden_dropout_prob,
         activation=hidden_act,
         attn_dropout=attention_probs_dropout_prob,
         act_dropout=0,
         weight_attr=weight_attr,
         normalize_before=False)
     self.encoder = nn.TransformerEncoder(encoder_layer, num_hidden_layers)
     self.pooler = PPMiniLMPooler(hidden_size, weight_attr)
     self.apply(self.init_weights)
Esempio n. 9
0
    def __init__(self,
                 d_model=512,
                 nhead=8,
                 num_encoder_layers=6,
                 dim_feedforward=2048,
                 dropout=0.1,
                 activation="relu",
                 attn_dropout=None,
                 act_dropout=None,
                 normalize_before=False,
                 weight_attr=None,
                 bias_attr=None):
        """TransformerEncoder"""
        super(TransformerEncoder, self).__init__()

        if isinstance(bias_attr, (list, tuple)):
            if len(bias_attr) == 1:
                encoder_bias_attr = [bias_attr[0]] * 2
            elif len(bias_attr) == 2:
                encoder_bias_attr = bias_attr
            elif len(bias_attr) == 3:
                encoder_bias_attr = [bias_attr[0], bias_attr[-1]]
            else:
                assert False, (
                    "length of bias_attr should be 1 or 2 or 3 when it is a list/tuple"
                )
        else:
            encoder_bias_attr = bias_attr

        if isinstance(weight_attr, (list, tuple)):
            if len(weight_attr) == 1:
                encoder_weight_attr = [weight_attr[0]] * 2
            elif len(weight_attr) == 2:
                encoder_weight_attr = weight_attr
            elif len(weight_attr) == 3:
                encoder_weight_attr = [weight_attr[0], weight_attr[-1]]
            else:
                assert False, (
                    "length of weight_attr should be 1 or 2 or 3 when it is a list/tuple"
                )
        else:
            encoder_weight_attr = weight_attr

        encoder_layer = nn.TransformerEncoderLayer(
            d_model, nhead, dim_feedforward, dropout, activation, attn_dropout,
            act_dropout, normalize_before, encoder_weight_attr,
            encoder_bias_attr)
        encoder_norm = nn.LayerNorm(d_model)
        self.encoder = nn.TransformerEncoder(encoder_layer, num_encoder_layers,
                                             encoder_norm)

        self.d_model = d_model
        self.nhead = nhead
Esempio n. 10
0
    def __init__(self,
                 vocab_size,
                 embedding_size=128,
                 hidden_size=768,
                 num_hidden_layers=12,
                 num_attention_heads=12,
                 intermediate_size=3072,
                 hidden_dropout_prob=0.1,
                 attention_probs_dropout_prob=0.1,
                 max_position_embeddings=512,
                 type_vocab_size=16,
                 initializer_range=0.02,
                 pad_token_id=0,
                 use_content_summary=True,
                 content_summary_index=1,
                 cls_num=2):
        super(ErnieCtmModel, self).__init__()

        self.pad_token_id = pad_token_id
        self.content_summary_index = content_summary_index
        self.initializer_range = initializer_range
        self.embeddings = ErnieCtmEmbeddings(
            vocab_size,
            embedding_size,
            hidden_dropout_prob=hidden_dropout_prob,
            max_position_embeddings=max_position_embeddings,
            type_vocab_size=type_vocab_size,
            padding_idx=pad_token_id,
            cls_num=cls_num)
        self.embedding_hidden_mapping_in = nn.Linear(embedding_size,
                                                     hidden_size)
        encoder_layer = nn.TransformerEncoderLayer(
            hidden_size,
            num_attention_heads,
            intermediate_size,
            dropout=hidden_dropout_prob,
            activation="gelu",
            attn_dropout=attention_probs_dropout_prob,
            act_dropout=0)
        encoder_layer.activation = nn.GELU(approximate=True)

        self.encoder = nn.TransformerEncoder(encoder_layer, num_hidden_layers)
        self.pooler = ErnieCtmPooler(hidden_size)

        self.use_content_summary = use_content_summary
        self.content_summary_index = content_summary_index
        if use_content_summary is True:
            self.feature_fuse = nn.Linear(hidden_size * 2, intermediate_size)
            self.feature_output = nn.Linear(intermediate_size, hidden_size)

        self.apply(self.init_weights)
Esempio n. 11
0
    def __init__(
        self,
        vocab_size,
        hidden_size=768,
        num_hidden_layers=12,
        num_attention_heads=12,
        intermediate_size=3072,
        hidden_act='relu',
        hidden_dropout_prob=0.1,
        attention_probs_dropout_prob=0.1,
        normalize_before=False,
        max_position_embeddings=513,
        type_vocab_size=4,
        initializer_range=0.02,
        unk_token_id=17963,
        pad_token_id=0,
        bos_token_id=1,
        eos_token_id=3,
        mask_token_id=3,
    ):
        super(UNIMOModel, self).__init__()
        self.unk_token_id = unk_token_id
        self.pad_token_id = pad_token_id
        self.bos_token_id = bos_token_id
        self.eos_token_id = eos_token_id
        self.mask_token_id = mask_token_id
        self.initializer_range = initializer_range

        self.embeddings = UNIMOEmbeddings(vocab_size, hidden_size,
                                          hidden_dropout_prob,
                                          max_position_embeddings,
                                          type_vocab_size)
        encoder_layer = nn.TransformerEncoderLayer(
            hidden_size,
            num_attention_heads,
            intermediate_size,
            dropout=hidden_dropout_prob,
            activation=hidden_act,
            attn_dropout=attention_probs_dropout_prob,
            act_dropout=0,
            normalize_before=normalize_before)

        self.encoder_norm = nn.LayerNorm(hidden_size)
        self.dropout = nn.Dropout(hidden_dropout_prob)
        self.encoder = nn.TransformerEncoder(
            encoder_layer,
            num_hidden_layers,
        )

        self.apply(self.init_weights)
Esempio n. 12
0
 def __init__(
     self,
     vocab_size=23236,
     hidden_size=768,
     num_hidden_layers=12,
     num_attention_heads=12,
     intermediate_size=3072,
     hidden_act="gelu",
     hidden_dropout_prob=0.1,
     attention_probs_dropout_prob=0.1,
     max_position_embeddings=512,
     type_vocab_size=2,
     initializer_range=0.02,
     pad_token_id=0,
     pool_act="tanh",
     layer_norm_eps=1e-12,
     glyph_embedding_dim=1728,
     pinyin_map_len=32,
 ):
     super(ChineseBertModel, self).__init__()
     self.pad_token_id = pad_token_id
     self.layer_norm_eps = layer_norm_eps
     self.initializer_range = initializer_range
     self.embeddings = FusionBertEmbeddings(
         vocab_size,
         hidden_size,
         pad_token_id,
         type_vocab_size,
         max_position_embeddings,
         pinyin_map_len,
         glyph_embedding_dim,
         layer_norm_eps,
         hidden_dropout_prob,
     )
     encoder_layer = nn.TransformerEncoderLayer(
         hidden_size,
         num_attention_heads,
         intermediate_size,
         dropout=hidden_dropout_prob,
         activation=hidden_act,
         attn_dropout=attention_probs_dropout_prob,
         act_dropout=0,
     )
     self.encoder = nn.TransformerEncoder(encoder_layer, num_hidden_layers)
     self.pooler = BertPooler(hidden_size, pool_act)
     self.apply(self.init_weights)
Esempio n. 13
0
    def __init__(self,
                 vocab_size,
                 embed_tokens=None,
                 pad_token_id=0,
                 d_model=1280,
                 num_encoder_layers=2,
                 encoder_attention_heads=32,
                 encoder_ffn_dim=5120,
                 dropout=0.1,
                 activation_function='gelu',
                 attention_dropout=0.0,
                 activation_dropout=0.0,
                 max_position_embeddings=128,
                 init_std=0.02,
                 scale_embedding=True,
                 normalize_before=True):
        super().__init__()
        self.init_std = init_std
        self.pad_token_id = pad_token_id
        if embed_tokens is not None:
            self.embed_tokens = embed_tokens
        else:
            self.embed_tokens = nn.Embedding(num_embeddings=vocab_size,
                                             embedding_dim=d_model,
                                             padding_idx=pad_token_id)
        self.embed_scale = math.sqrt(d_model) if scale_embedding else 1.0
        self.encoder_embed_positions = BlenderbotLearnedPositionalEmbedding(
            num_embeddings=max_position_embeddings, embedding_dim=d_model)

        self.encoder_dropout = nn.Dropout(dropout)
        self.encoder_layernorm = nn.LayerNorm(normalized_shape=d_model)

        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=encoder_attention_heads,
            dim_feedforward=encoder_ffn_dim,
            dropout=dropout,
            activation=activation_function,
            attn_dropout=attention_dropout,
            act_dropout=activation_dropout,
            normalize_before=normalize_before)
        self.encoder = nn.TransformerEncoder(encoder_layer=encoder_layer,
                                             num_layers=num_encoder_layers)

        self.apply(self.init_weights)
Esempio n. 14
0
    def __init__(self,
                 vocab_size,
                 hidden_size=768,
                 num_hidden_layers=12,
                 num_attention_heads=12,
                 intermediate_size=3072,
                 hidden_act="gelu",
                 hidden_dropout_prob=0.1,
                 attention_probs_dropout_prob=0.1,
                 normalize_before=True,
                 max_position_embeddings=512,
                 type_vocab_size=2,
                 initializer_range=0.02,
                 unk_token_id=0,
                 pad_token_id=0,
                 bos_token_id=1,
                 eos_token_id=2,
                 mask_token_id=30000,
                 role_type_size=None):
        super(UnifiedTransformerModel, self).__init__()
        self.unk_token_id = unk_token_id
        self.pad_token_id = pad_token_id
        self.bos_token_id = bos_token_id
        self.eos_token_id = eos_token_id
        self.mask_token_id = mask_token_id
        self.initializer_range = initializer_range

        self.embeddings = UnifiedTransformerEmbeddings(
            vocab_size, hidden_size, hidden_dropout_prob,
            max_position_embeddings, type_vocab_size, role_type_size)
        encoder_layer = nn.TransformerEncoderLayer(
            hidden_size,
            num_attention_heads,
            intermediate_size,
            dropout=hidden_dropout_prob,
            activation=hidden_act,
            attn_dropout=attention_probs_dropout_prob,
            act_dropout=0,
            normalize_before=normalize_before)
        encoder_norm = nn.LayerNorm(hidden_size)
        self.encoder = nn.TransformerEncoder(encoder_layer, num_hidden_layers,
                                             encoder_norm)
        self.apply(self.init_weights)
Esempio n. 15
0
    def __init__(self,
                 vocab_size,
                 hidden_size=768,
                 num_hidden_layers=12,
                 num_attention_heads=12,
                 intermediate_size=3072,
                 hidden_act="gelu",
                 hidden_dropout_prob=0.1,
                 attention_probs_dropout_prob=0.1,
                 max_position_embeddings=512,
                 type_vocab_size=16,
                 initializer_range=0.02,
                 pad_token_id=0,
                 fit_size=768):
        super(TinyBertModel, self).__init__()
        self.pad_token_id = pad_token_id
        self.initializer_range = initializer_range
        self.embeddings = BertEmbeddings(vocab_size, hidden_size,
                                         hidden_dropout_prob,
                                         max_position_embeddings,
                                         type_vocab_size)

        encoder_layer = nn.TransformerEncoderLayer(
            hidden_size,
            num_attention_heads,
            intermediate_size,
            dropout=hidden_dropout_prob,
            activation=hidden_act,
            attn_dropout=attention_probs_dropout_prob,
            act_dropout=0)

        self.encoder = nn.TransformerEncoder(encoder_layer, num_hidden_layers)
        self.pooler = BertPooler(hidden_size)
        # fit_dense(s) means a hidden states' transformation from student to teacher.
        # `fit_denses` is used in v2 model, and `fit_dense` is used in other pretraining models.
        self.fit_denses = nn.LayerList([
            nn.Linear(hidden_size, fit_size)
            for i in range(num_hidden_layers + 1)
        ])
        self.fit_dense = nn.Linear(hidden_size, fit_size)
        self.apply(self.init_weights)
Esempio n. 16
0
    def __init__(
        self,
        vocab_size,
        hidden_size=768,
        num_hidden_layers=12,
        num_attention_heads=12,
        intermediate_size=3072,
        hidden_act="gelu",
        hidden_dropout_prob=0.1,
        attention_probs_dropout_prob=0.1,
        layer_norm_eps=1e-12,
        max_position_embeddings=512,
        max_2d_position_embeddings=1024,
        type_vocab_size=16,
        initializer_range=0.02,
        pad_token_id=0,
        pool_act="tanh",
    ):
        super(LayoutLMModel, self).__init__()
        #self.config = kwargs
        self.num_hidden_layers = num_hidden_layers
        self.pad_token_id = pad_token_id
        self.initializer_range = initializer_range
        self.embeddings = LayoutLMEmbeddings(vocab_size, hidden_size,
                                             hidden_dropout_prob,
                                             max_position_embeddings,
                                             max_2d_position_embeddings,
                                             layer_norm_eps, pad_token_id,
                                             type_vocab_size)

        encoder_layer = nn.TransformerEncoderLayer(
            hidden_size,
            num_attention_heads,
            intermediate_size,
            dropout=hidden_dropout_prob,
            activation=hidden_act,
            attn_dropout=attention_probs_dropout_prob,
            act_dropout=0)
        self.encoder = nn.TransformerEncoder(encoder_layer, num_hidden_layers)
        self.pooler = LayoutLMPooler(hidden_size, pool_act)
        self.apply(self.init_weights)
Esempio n. 17
0
    def __init__(self,
                 embed_tokens,
                 vocab_size,
                 pad_token_id=1,
                 d_model=768,
                 num_encoder_layers=6,
                 encoder_attention_heads=12,
                 encoder_ffn_dim=3072,
                 dropout=0.1,
                 activation_function='gelu',
                 attention_dropout=0.1,
                 activation_dropout=0.1,
                 max_position_embeddings=1024,
                 init_std=0.02):
        super().__init__()
        self.d_model = d_model
        self.init_std = init_std
        self.pad_token_id = pad_token_id
        if embed_tokens is not None:
            self.embed_tokens = embed_tokens
        else:
            self.embed_tokens = nn.Embedding(vocab_size, d_model, pad_token_id)

        self.encoder_embed_positions = MBartLearnedPositionalEmbedding(
            max_position_embeddings, d_model, pad_token_id)

        self.encoder_dropout = nn.Dropout(dropout)
        self.encoder_layernorm_embedding = nn.LayerNorm(d_model)
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=encoder_attention_heads,
            dim_feedforward=encoder_ffn_dim,
            dropout=dropout,
            activation=activation_function,
            attn_dropout=attention_dropout,
            act_dropout=activation_dropout,
            normalize_before=True)
        self.encoder = nn.TransformerEncoder(encoder_layer, num_encoder_layers,
                                             nn.LayerNorm(d_model))
        self.apply(self.init_weights)
Esempio n. 18
0
    def __init__(self, vocab_size, type_size, max_position_seq_len, num_layers,
                 n_head, hidden_size, attn_dropout, act_dropout):
        super(NSP, self).__init__()

        self.n_head = n_head
        self.hidden_size = hidden_size

        self.word_embedding_layer = nn.Embedding(vocab_size, hidden_size)
        self.sent_embedding_layer = nn.Embedding(type_size, hidden_size)
        self.pos_embedding_layer = nn.Embedding(max_position_seq_len,
                                                hidden_size)

        encoder_layer = nn.TransformerEncoderLayer(
            hidden_size, n_head, hidden_size * 4, act_dropout, 'gelu',
            attn_dropout, act_dropout, 'True')
        encoder_norm = nn.LayerNorm(hidden_size)
        self.encoder = nn.TransformerEncoder(encoder_layer, num_layers,
                                             encoder_norm)
        self.fc1 = nn.Linear(hidden_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, 2)

        self.dropout_layer = nn.Dropout(act_dropout)
        self.tanh_layer = nn.Tanh()
        self.softmax = nn.Softmax()
Esempio n. 19
0
    def __init__(self,
                 src_vocab_size,
                 trg_vocab_size,
                 max_length=256,
                 n_layer=6,
                 n_head=8,
                 d_model=512,
                 d_inner_hid=2048,
                 dropout=0.1,
                 weight_sharing=False,
                 bos_id=0,
                 eos_id=1,
                 waitk=-1):
        super(SimultaneousTransformer, self).__init__()
        self.trg_vocab_size = trg_vocab_size
        self.emb_dim = d_model
        self.bos_id = bos_id
        self.eos_id = eos_id
        self.dropout = dropout
        self.waitk = waitk
        self.n_layer = n_layer
        self.n_head = n_head
        self.d_model = d_model

        self.src_word_embedding = WordEmbedding(
            vocab_size=src_vocab_size, emb_dim=d_model, bos_id=self.bos_id)
        self.src_pos_embedding = PositionalEmbedding(
            emb_dim=d_model, max_length=max_length+1)
        if weight_sharing:
            assert src_vocab_size == trg_vocab_size, (
                "Vocabularies in source and target should be same for weight sharing."
            )
            self.trg_word_embedding = self.src_word_embedding
            self.trg_pos_embedding = self.src_pos_embedding
        else:
            self.trg_word_embedding = WordEmbedding(
                vocab_size=trg_vocab_size, emb_dim=d_model, bos_id=self.bos_id)
            self.trg_pos_embedding = PositionalEmbedding(
                emb_dim=d_model, max_length=max_length+1)

        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=n_head,
            dim_feedforward=d_inner_hid,
            dropout=dropout,
            activation='relu',
            normalize_before=True,
            bias_attr=[False, True])
        encoder_norm = nn.LayerNorm(d_model)
        self.encoder = nn.TransformerEncoder(
            encoder_layer=encoder_layer, num_layers=n_layer, norm=encoder_norm)

        decoder_layer = DecoderLayer(
            d_model=d_model,
            nhead=n_head,
            dim_feedforward=d_inner_hid,
            dropout=dropout,
            activation='relu',
            normalize_before=True,
            bias_attr=[False, False, True])
        decoder_norm = nn.LayerNorm(d_model)
        self.decoder = Decoder(
            decoder_layer=decoder_layer, num_layers=n_layer, norm=decoder_norm)

        if weight_sharing:
            self.linear = lambda x: paddle.matmul(
                x=x, y=self.trg_word_embedding.word_embedding.weight, transpose_y=True)
        else:
            self.linear = nn.Linear(
                in_features=d_model,
                out_features=trg_vocab_size,
                bias_attr=False)
Esempio n. 20
0
    def __init__(
            self,
            user_size,
            adgroup_size,
            pid_size,
            cms_segid_size,
            cms_group_size,
            final_gender_size,
            age_level_size,
            pvalue_level_size,
            shopping_level_size,
            occupation_size,
            new_user_class_level_size,
            campaign_size,
            customer_size,
            cate_size,
            brand_size,  # above is all sparse feat size
            sparse_embed_size=4,
            att_embedding_size=8,
            sess_count=5,
            sess_max_length=10,
            l2_reg_embedding=1e-6):
        super().__init__()

        # feature size
        self.user_size = user_size
        self.adgroup_size = adgroup_size
        self.pid_size = pid_size
        self.cms_segid_size = cms_segid_size
        self.cms_group_size = cms_group_size
        self.final_gender_size = final_gender_size
        self.age_level_size = age_level_size
        self.pvalue_level_size = pvalue_level_size
        self.shopping_level_size = shopping_level_size
        self.occupation_size = occupation_size
        self.new_user_class_level_size = new_user_class_level_size
        self.campaign_size = campaign_size
        self.customer_size = customer_size
        self.cate_size = cate_size
        self.brand_size = brand_size

        # sparse embed size
        self.sparse_embed_size = sparse_embed_size

        # transform attention embed size
        self.att_embedding_size = att_embedding_size

        # hyper_parameters
        self.sess_count = 5
        self.sess_max_length = 10

        # sparse embedding layer
        self.userid_embeddings_var = paddle.nn.Embedding(
            self.user_size,
            self.sparse_embed_size,
            sparse=True,
            weight_attr=paddle.ParamAttr(
                regularizer=paddle.regularizer.L2Decay(l2_reg_embedding),
                initializer=nn.initializer.Normal(mean=0.0, std=0.0001)))

        self.adgroup_embeddings_var = paddle.nn.Embedding(
            self.adgroup_size,
            self.sparse_embed_size,
            sparse=True,
            weight_attr=paddle.ParamAttr(
                regularizer=paddle.regularizer.L2Decay(l2_reg_embedding),
                initializer=nn.initializer.Normal(mean=0.0, std=0.0001)))

        self.pid_embeddings_var = paddle.nn.Embedding(
            self.pid_size,
            self.sparse_embed_size,
            #sparse=True,
            weight_attr=paddle.ParamAttr(
                regularizer=paddle.regularizer.L2Decay(l2_reg_embedding),
                initializer=nn.initializer.Normal(mean=0.0, std=0.0001)))

        self.cmsid_embeddings_var = paddle.nn.Embedding(
            self.cms_segid_size,
            self.sparse_embed_size,
            #sparse=True,
            weight_attr=paddle.ParamAttr(
                regularizer=paddle.regularizer.L2Decay(l2_reg_embedding),
                initializer=nn.initializer.Normal(mean=0.0, std=0.0001)))

        self.cmsgroup_embeddings_var = paddle.nn.Embedding(
            self.cms_group_size,
            self.sparse_embed_size,
            #sparse=True,
            weight_attr=paddle.ParamAttr(
                regularizer=paddle.regularizer.L2Decay(l2_reg_embedding),
                initializer=nn.initializer.Normal(mean=0.0, std=0.0001)))

        self.gender_embeddings_var = paddle.nn.Embedding(
            self.final_gender_size,
            self.sparse_embed_size,
            #sparse=True,
            weight_attr=paddle.ParamAttr(
                regularizer=paddle.regularizer.L2Decay(l2_reg_embedding),
                initializer=nn.initializer.Normal(mean=0.0, std=0.0001)))

        self.age_embeddings_var = paddle.nn.Embedding(
            self.age_level_size,
            self.sparse_embed_size,
            #sparse=True,
            weight_attr=paddle.ParamAttr(
                regularizer=paddle.regularizer.L2Decay(l2_reg_embedding),
                initializer=nn.initializer.Normal(mean=0.0, std=0.0001)))

        self.pvalue_embeddings_var = paddle.nn.Embedding(
            self.pvalue_level_size,
            self.sparse_embed_size,
            #sparse=True,
            weight_attr=paddle.ParamAttr(
                regularizer=paddle.regularizer.L2Decay(l2_reg_embedding),
                initializer=nn.initializer.Normal(mean=0.0, std=0.0001)))

        self.shopping_embeddings_var = paddle.nn.Embedding(
            self.shopping_level_size,
            self.sparse_embed_size,
            #sparse=True,
            weight_attr=paddle.ParamAttr(
                regularizer=paddle.regularizer.L2Decay(l2_reg_embedding),
                initializer=nn.initializer.Normal(mean=0.0, std=0.0001)))

        self.occupation_embeddings_var = paddle.nn.Embedding(
            self.occupation_size,
            self.sparse_embed_size,
            #sparse=True,
            weight_attr=paddle.ParamAttr(
                regularizer=paddle.regularizer.L2Decay(l2_reg_embedding),
                initializer=nn.initializer.Normal(mean=0.0, std=0.0001)))

        self.new_user_class_level_embeddings_var = paddle.nn.Embedding(
            self.new_user_class_level_size,
            self.sparse_embed_size,
            #sparse=True,
            weight_attr=paddle.ParamAttr(
                regularizer=paddle.regularizer.L2Decay(l2_reg_embedding),
                initializer=nn.initializer.Normal(mean=0.0, std=0.0001)))

        self.campaign_embeddings_var = paddle.nn.Embedding(
            self.campaign_size,
            self.sparse_embed_size,
            sparse=True,
            weight_attr=paddle.ParamAttr(
                regularizer=paddle.regularizer.L2Decay(l2_reg_embedding),
                initializer=nn.initializer.Normal(mean=0.0, std=0.0001)))

        self.customer_embeddings_var = paddle.nn.Embedding(
            self.customer_size,
            self.sparse_embed_size,
            sparse=True,
            weight_attr=paddle.ParamAttr(
                regularizer=paddle.regularizer.L2Decay(l2_reg_embedding),
                initializer=nn.initializer.Normal(mean=0.0, std=0.0001)))

        self.cate_embeddings_var = paddle.nn.Embedding(
            self.cate_size,
            self.sparse_embed_size,
            sparse=True,
            padding_idx=0,
            weight_attr=paddle.ParamAttr(
                regularizer=paddle.regularizer.L2Decay(l2_reg_embedding),
                initializer=nn.initializer.Normal(mean=0.0, std=0.0001)))

        self.brand_embeddings_var = paddle.nn.Embedding(
            self.brand_size,
            self.sparse_embed_size,
            sparse=True,
            padding_idx=0,
            weight_attr=paddle.ParamAttr(
                regularizer=paddle.regularizer.L2Decay(l2_reg_embedding),
                initializer=nn.initializer.Normal(mean=0.0, std=0.0001)))

        # sess interest extractor layer
        self.position_encoding = PositionalEncoder(2 * self.sparse_embed_size)
        self.transform = nn.TransformerEncoderLayer(
            d_model=self.att_embedding_size,
            nhead=8,
            dim_feedforward=64,
            weight_attr=self._get_weight_attr(),
            bias_attr=False,
            dropout=0.0)

        # sess interest interacting layer
        self.bilstm = nn.LSTM(2 * self.sparse_embed_size,
                              2 * self.sparse_embed_size,
                              num_layers=2,
                              direction='bidirectional')

        # sess interest activating layer
        self.transform_actpool = AttentionSequencePoolingLayer(
            weight_normalization=True, name='transform')
        self.lstm_actpool = AttentionSequencePoolingLayer(
            weight_normalization=True, name='lstm')

        # MLP moudle
        self.mlp = MLP(mlp_hidden_units=[77, 200, 80])