Beispiel #1
0
    def __init__(self,
                 num_layers,
                 num_heads,
                 embed_dim,
                 ff_dim,
                 dropout=0.,
                 norm_in=True):
        super(Decoder, self).__init__()
        self.self_atts = nn.ModuleList([])
        self.enc_dec_atts = nn.ModuleList([])
        self.pos_ffs = nn.ModuleList([])
        self.lnorms = nn.ModuleList([])
        for i in range(num_layers):
            self.self_atts.append(
                Attention(embed_dim, num_heads, dropout=dropout))
            self.enc_dec_atts.append(
                Attention(embed_dim, num_heads, dropout=dropout))
            self.pos_ffs.append(
                PositionWiseFeedForward(embed_dim, ff_dim, dropout=dropout))
            self.lnorms.append(
                nn.ModuleList(
                    [nn.LayerNorm(embed_dim, eps=1e-6) for _ in range(3)]))

        self.last_lnorm = nn.LayerNorm(embed_dim,
                                       eps=1e-6) if norm_in else None
        self.dropout = dropout
        self.num_layers = num_layers
Beispiel #2
0
    def encoder(self, inputs):
        if K.dtype(inputs) != 'int32':
            inputs = K.cast(inputs, 'int32')

        masks = K.equal(inputs, 0)
        # Embeddings
        embeddings = K.gather(self.embeddings, inputs)
        embeddings *= self._model_dim**0.5  # Scale
        # Position Encodings
        position_encodings = PositionEncoding(self._model_dim)(embeddings)
        # Embedings + Postion-encodings
        encodings = embeddings + position_encodings
        # Dropout
        encodings = K.dropout(encodings, self._dropout_rate)

        for i in range(self._encoder_stack):
            # Multi-head-Attention
            attention = MultiHeadAttention(self._n_heads,
                                           self._model_dim // self._n_heads)
            attention_input = [encodings, encodings, encodings, masks]
            attention_out = attention(attention_input)
            # Add & Norm
            attention_out += encodings
            attention_out = LayerNormalization()(attention_out)
            # Feed-Forward
            ff = PositionWiseFeedForward(self._model_dim,
                                         self._feed_forward_size)
            ff_out = ff(attention_out)
            # Add & Norm
            ff_out += attention_out
            encodings = LayerNormalization()(ff_out)

        return encodings, masks
 def build(self, input_shape):
     self.embeddings = self.add_weight(shape=(self._vocab_size,
                                              self._model_dim),
                                       initializer='glorot_uniform',
                                       trainable=True,
                                       name="embeddings")
     self.EncoderPositionEncoding = PositionEncoding(self._model_dim)
     self.EncoderMultiHeadAttetions = [
         MultiHeadAttention(self._n_heads, self._model_dim // self._n_heads)
         for _ in range(self._encoder_stack)
     ]
     self.EncoderLayerNorms0 = [
         LayerNormalization() for _ in range(self._encoder_stack)
     ]
     self.EncoderPositionWiseFeedForwards = [
         PositionWiseFeedForward(self._model_dim, self._feed_forward_size)
         for _ in range(self._encoder_stack)
     ]
     self.EncoderLayerNorms1 = [
         LayerNormalization() for _ in range(self._encoder_stack)
     ]
     self.DecoderPositionEncoding = PositionEncoding(self._model_dim)
     self.DecoderMultiHeadAttetions0 = [
         MultiHeadAttention(self._n_heads,
                            self._model_dim // self._n_heads,
                            future=True) for _ in range(self._decoder_stack)
     ]
     self.DecoderLayerNorms0 = [
         LayerNormalization() for _ in range(self._decoder_stack)
     ]
     self.DecoderMultiHeadAttetions1 = [
         MultiHeadAttention(self._n_heads, self._model_dim // self._n_heads)
         for _ in range(self._decoder_stack)
     ]
     self.DecoderLayerNorms1 = [
         LayerNormalization() for _ in range(self._decoder_stack)
     ]
     self.DecoderPositionWiseFeedForwards = [
         PositionWiseFeedForward(self._model_dim, self._feed_forward_size)
         for _ in range(self._decoder_stack)
     ]
     self.DecoderLayerNorms2 = [
         LayerNormalization() for _ in range(self._decoder_stack)
     ]
     super(Transformer, self).build(input_shape)
Beispiel #4
0
    def decoder(self, inputs):
        decoder_inputs, encoder_encodings, encoder_masks = inputs
        if K.dtype(decoder_inputs) != 'int32':
            decoder_inputs = K.cast(decoder_inputs, 'int32')

        decoder_masks = K.equal(decoder_inputs, 0)
        # Embeddings
        embeddings = K.gather(self.embeddings, decoder_inputs)
        embeddings *= self._model_dim**0.5  # Scale
        # Position Encodings
        position_encodings = PositionEncoding(self._model_dim)(embeddings)
        # Embedings + Postion-encodings
        encodings = embeddings + position_encodings
        # Dropout
        encodings = K.dropout(encodings, self._dropout_rate)

        for i in range(self._decoder_stack):
            # Masked-Multi-head-Attention
            masked_attention = MultiHeadAttention(self._n_heads,
                                                  self._model_dim //
                                                  self._n_heads,
                                                  future=True)
            masked_attention_input = [
                encodings, encodings, encodings, decoder_masks
            ]
            masked_attention_out = masked_attention(masked_attention_input)
            # Add & Norm
            masked_attention_out += encodings
            masked_attention_out = LayerNormalization()(masked_attention_out)

            # Multi-head-Attention
            attention = MultiHeadAttention(self._n_heads,
                                           self._model_dim // self._n_heads)
            attention_input = [
                masked_attention_out, encoder_encodings, encoder_encodings,
                encoder_masks
            ]
            attention_out = attention(attention_input)
            # Add & Norm
            attention_out += masked_attention_out
            attention_out = LayerNormalization()(attention_out)

            # Feed-Forward
            ff = PositionWiseFeedForward(self._model_dim,
                                         self._feed_forward_size)
            ff_out = ff(attention_out)
            # Add & Norm
            ff_out += attention_out
            encodings = LayerNormalization()(ff_out)

        # Pre-Softmax 与 Embeddings 共享参数
        linear_projection = K.dot(encodings, K.transpose(self.embeddings))
        outputs = K.softmax(linear_projection)
        return outputs
Beispiel #5
0
def get_age_model(DATA):

    feed_forward_size = 2048
    max_seq_len = 150
    model_dim = 256 + 256 + 64 + 32 + 8 + 16

    input_creative_id = Input(shape=(max_seq_len, ), name='creative_id')
    x1 = Embedding(
        input_dim=NUM_creative_id + 1,
        output_dim=256,
        weights=[DATA['creative_id_emb']],
        trainable=args.not_train_embedding,
        #    trainable=False,
        input_length=150,
        mask_zero=True)(input_creative_id)
    # encodings = PositionEncoding(model_dim)(x1)
    # encodings = Add()([embeddings, encodings])

    input_ad_id = Input(shape=(max_seq_len, ), name='ad_id')
    x2 = Embedding(
        input_dim=NUM_ad_id + 1,
        output_dim=256,
        weights=[DATA['ad_id_emb']],
        trainable=args.not_train_embedding,
        #    trainable=False,
        input_length=150,
        mask_zero=True)(input_ad_id)

    input_product_id = Input(shape=(max_seq_len, ), name='product_id')
    x3 = Embedding(
        input_dim=NUM_product_id + 1,
        output_dim=32,
        weights=[DATA['product_id_emb']],
        trainable=args.not_train_embedding,
        #    trainable=False,
        input_length=150,
        mask_zero=True)(input_product_id)

    input_advertiser_id = Input(shape=(max_seq_len, ), name='advertiser_id')
    x4 = Embedding(
        input_dim=NUM_advertiser_id + 1,
        output_dim=64,
        weights=[DATA['advertiser_id_emb']],
        trainable=args.not_train_embedding,
        #    trainable=False,
        input_length=150,
        mask_zero=True)(input_advertiser_id)

    input_industry = Input(shape=(max_seq_len, ), name='industry')
    x5 = Embedding(
        input_dim=NUM_industry + 1,
        output_dim=16,
        weights=[DATA['industry_emb']],
        trainable=True,
        #    trainable=False,
        input_length=150,
        mask_zero=True)(input_industry)

    input_product_category = Input(shape=(max_seq_len, ),
                                   name='product_category')
    x6 = Embedding(
        input_dim=NUM_product_category + 1,
        output_dim=8,
        weights=[DATA['product_category_emb']],
        trainable=True,
        #    trainable=False,
        input_length=150,
        mask_zero=True)(input_product_category)

    # (bs, 100, 128*2)
    encodings = layers.Concatenate(axis=2)([x1, x2, x3, x4, x5, x6])
    # (bs, 100)
    masks = tf.equal(input_creative_id, 0)

    # (bs, 100, 128*2)
    attention_out = MultiHeadAttention(
        8, 79)([encodings, encodings, encodings, masks])

    # Add & Norm
    attention_out += encodings
    attention_out = LayerNormalization()(attention_out)
    # Feed-Forward
    ff = PositionWiseFeedForward(model_dim, feed_forward_size)
    ff_out = ff(attention_out)
    # Add & Norm
    # ff_out (bs, 100, 128),但是attention_out是(bs,100,256)
    ff_out += attention_out
    encodings = LayerNormalization()(ff_out)
    encodings = GlobalMaxPooling1D()(encodings)
    encodings = Dropout(0.2)(encodings)

    # output_gender = Dense(2, activation='softmax', name='gender')(encodings)
    output_age = Dense(10, activation='softmax', name='age')(encodings)

    model = Model(inputs=[
        input_creative_id, input_ad_id, input_product_id, input_advertiser_id,
        input_industry, input_product_category
    ],
                  outputs=[output_age])

    model.compile(
        optimizer=optimizers.Adam(2.5e-4),
        loss={
            # 'gender': losses.CategoricalCrossentropy(from_logits=False),
            'age': losses.CategoricalCrossentropy(from_logits=False)
        },
        # loss_weights=[0.4, 0.6],
        metrics=['accuracy'])
    return model