Esempio n. 1
0
    def test_fit_self(self):
        input_layer = keras.layers.Input(
            shape=(2, 3),
            name='Input',
        )
        att_layer = MultiHeadAttention(
            head_num=3,
            name='Multi-Head-1',
        )(input_layer)
        dense_layer = keras.layers.Dense(units=3, name='Dense-1')(att_layer)
        att_layer = MultiHeadAttention(
            head_num=3,
            name='Multi-Head-2',
        )(dense_layer)
        output_layer = keras.layers.Dense(units=3, name='Dense-2')(att_layer)
        model = keras.models.Model(inputs=input_layer, outputs=output_layer)
        model.compile(
            optimizer='adam',
            loss='mse',
            metrics={},
        )
        model.summary()

        def _generator(batch_size=32):
            while True:
                inputs = np.random.random((batch_size, 2, 3))
                outputs = np.asarray([[[0.0, -0.1, 0.2]] * 2] * batch_size)
                yield inputs, outputs

        model.fit_generator(
            generator=_generator(),
            steps_per_epoch=1000,
            epochs=10,
            validation_data=_generator(),
            validation_steps=100,
            callbacks=[
                keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
            ],
        )
        model_path = os.path.join(tempfile.gettempdir(),
                                  'test_save_load_%f.h5' % np.random.random())
        model.save(model_path)
        model = keras.models.load_model(model_path,
                                        custom_objects={
                                            'MultiHeadAttention':
                                            MultiHeadAttention,
                                        })
        for inputs, _ in _generator(batch_size=3):
            predicts = model.predict(inputs)
            expect = np.asarray([[[0.0, -0.1, 0.2]] * 2] * 3)
            actual = np.round(predicts, decimals=1)
            self.assertTrue(np.allclose(expect, actual), (expect, actual))
            break
def local_context_learning(input_length, input_dim, output_dim, hidden_dim,
                           filters_num, kernel_val, learning_rate, drop_rate):
    basic_input = Input(shape=(input_length, input_dim))
    label_input = Input(shape=(1, ))
    weighted_input = adding_weight(input_length,
                                   input_dim)([basic_input, label_input])
    rnn_output = GRU(units=hidden_dim, return_sequences=True)(weighted_input)
    rnn_att = SeqSelfAttention(attention_activation='sigmoid')(rnn_output)
    cnn_output = Conv1D(filters=filters_num,
                        kernel_size=kernel_val,
                        padding="same")(weighted_input)
    cnn_output_reformat = Dense(hidden_dim)(cnn_output)
    cnn_att = SeqSelfAttention(
        attention_activation='sigmoid')(cnn_output_reformat)
    new_value = Concatenate(axis=1)([rnn_att, cnn_att])
    new_keys = Lambda(lambda x: ones_like(x))(new_value)
    new_result = MultiHeadAttention(head_num=2)(
        [weighted_input, new_keys, new_value])
    result = Flatten()(new_result)
    result_fix = Dropout(rate=drop_rate)(result)
    output = Dense(output_dim)(result_fix)
    fixed_output = Activation(activation='sigmoid')(output)
    model = Model([basic_input, label_input], fixed_output)
    ada = adam(lr=learning_rate)
    model.compile(optimizer=ada, loss='categorical_crossentropy')
    return model
def build_model(input_shape):
    emotive_params = 266
    n_feature_maps = 64

    main_input = keras.layers.Input(input_shape)
    input_layer = keras.layers.Input((input_shape[1], input_shape[2]))

    lstm_layer = keras.layers.LSTM(emotive_params,
                                   return_sequences=True)(input_layer)

    att_layer = MultiHeadAttention(
        head_num=14,
        name='Multi-Head',
    )(lstm_layer)

    lstm_layer = keras.layers.LSTM(emotive_params,
                                   return_sequences=True)(att_layer)
    gap_layerX = keras.layers.pooling.GlobalAveragePooling1D()(lstm_layer)

    # See architecture
    cnn_model = keras.layers.TimeDistributed(
        keras.models.Model(inputs=input_layer, outputs=gap_layerX))(main_input)
    lstm_layer = keras.layers.LSTM(n_feature_maps,
                                   return_sequences=True)(cnn_model)

    gap_layerX = keras.layers.pooling.GlobalAveragePooling1D()(lstm_layer)

    output_layer = keras.layers.Dense(n_feature_maps,
                                      activation='relu')(gap_layerX)
    output_layer = keras.layers.Dense(2, activation='softmax')(output_layer)

    model = keras.models.Model(inputs=main_input, outputs=output_layer)

    model.summary()
    return model
Esempio n. 4
0
    def __init__(self, length, word_num):
        self.input_length = length
        self.batch_size = 512 * 10
        self.word_num = word_num

        self.input_dimension = 128
        self.lstm_dim = 64
        self.middle_num = 2
        self.output_dimension = 64

        #self.input_embedding=keras.layers.Embedding(input_dim=self.word_num,output_dim=self.input_dimension,weights=[weights])
        self.input_embedding = keras.layers.Embedding(
            input_dim=self.word_num, output_dim=self.input_dimension)
        self.bilstm = keras.layers.Bidirectional(
            keras.layers.LSTM(self.lstm_dim / 2, return_sequences=True))
        self.middle_layer = []
        for i in range(0, self.middle_num):
            self.middle_layer.append(
                MultiHeadAttention(head_num=4,
                                   name="middle_layer_{}".format(i)))
        #self.outter_layer=keras.layers.Dense(self.output_dimension, activation="tanh", input_dim=self.input_length*self.lstm_dim, use_bias= True,name="ouput_layer")
        self.outter_layer = keras.layers.Dense(self.output_dimension,
                                               input_dim=self.input_length *
                                               self.lstm_dim,
                                               use_bias=True,
                                               name="ouput_layer")
Esempio n. 5
0
def build_model(input_shape):
    input_layer = keras.layers.Input(input_shape)

    conv1 = keras.layers.Conv1D(filters=128, kernel_size=8,
                                padding='same')(input_layer)
    conv1 = keras.layers.normalization.BatchNormalization()(conv1)
    conv1 = keras.layers.Activation(activation='relu')(conv1)

    conv2 = keras.layers.Conv1D(filters=256, kernel_size=5,
                                padding='same')(conv1)
    conv2 = keras.layers.normalization.BatchNormalization()(conv2)
    conv2 = keras.layers.Activation('relu')(conv2)

    conv3 = keras.layers.Conv1D(128, kernel_size=3, padding='same')(conv2)
    conv3 = keras.layers.normalization.BatchNormalization()(conv3)
    conv3 = keras.layers.Activation('relu')(conv3)

    # Attention

    lstm_layer = keras.layers.LSTM(64, return_sequences=True)(conv3)
    att_layer = MultiHeadAttention(head_num=8)(lstm_layer)
    lstm_layer = keras.layers.LSTM(64, return_sequences=True)(att_layer)

    gap_layer = keras.layers.pooling.GlobalAveragePooling1D()(lstm_layer)

    output_layer = keras.layers.Dense(2, activation='softmax')(gap_layer)

    model = keras.models.Model(inputs=input_layer, outputs=output_layer)
    model.summary()

    return model
 def test_compare_brute(self):
     for case in range(10):
         batch_size = np.random.randint(1, 10)
         token_num = np.random.randint(2, 10)
         head_num = np.random.randint(1, 5)
         feature_dim = np.random.randint(1, 5) * head_num
         seq_len = np.random.randint(1, 10)
         weights = []
         for i in range(8):
             if i % 2 == 0:
                 weights.append(np.random.random(
                     (feature_dim, feature_dim)))
             else:
                 weights.append(np.random.random((feature_dim, )))
         input_q_layer = keras.layers.Input(shape=(None, ))
         input_kv_layer = keras.layers.Input(shape=(None, ))
         embed_q_layer = keras.layers.Embedding(
             input_dim=token_num,
             output_dim=feature_dim,
             mask_zero=True,
         )(input_q_layer)
         embed_kv_layer = keras.layers.Embedding(
             input_dim=token_num,
             output_dim=feature_dim,
             mask_zero=True,
         )(input_kv_layer)
         att_layer = MultiHeadAttention(
             head_num=head_num,
             weights=weights,
             name='Multi-Head-1',
         )([embed_q_layer, embed_kv_layer, embed_kv_layer])
         att_brute_layer = MultiHeadAttentionBrute(
             head_num=head_num,
             weights=weights,
             name='Multi-Head-2',
         )([embed_q_layer, embed_kv_layer, embed_kv_layer])
         model = keras.models.Model(inputs=[input_q_layer, input_kv_layer],
                                    outputs=[att_layer, att_brute_layer])
         model.compile(optimizer='adam', loss='mse', metrics={})
         if case == 0:
             model.summary(line_length=120)
         data_q = np.random.randint(low=0,
                                    high=token_num,
                                    size=(batch_size, seq_len))
         data_kv = np.random.randint(low=0,
                                     high=token_num,
                                     size=(batch_size, seq_len))
         for i in range(batch_size):
             if np.sum(data_q[i]) == 0:
                 data_q[i][np.random.randint(
                     low=0,
                     high=seq_len)] = np.random.randint(low=1,
                                                        high=token_num)
             if np.sum(data_kv[i]) == 0:
                 data_kv[i][np.random.randint(
                     low=0,
                     high=seq_len)] = np.random.randint(low=1,
                                                        high=token_num)
         predicts = model.predict([data_q, data_kv])
         self.assertTrue(np.allclose(predicts[0], predicts[1]), predicts)
def getModel(voc_size, embedding_matrix):
    input_layer = Input(name='Input',
                        shape=(MAX_WORDS_TEXT, ),
                        dtype="float32")

    embedding_layer = Embedding(voc_size,
                                WORD_EMBEDDINGS_SIZE,
                                weights=[embedding_matrix],
                                input_length=MAX_WORDS_TEXT,
                                trainable=True)(input_layer)

    rnn_layer = Bidirectional(MultiplicativeLSTM(
        WORD_EMBEDDINGS_SIZE,
        return_sequences=True,
        dropout=0.2,
        recurrent_dropout=0.2,
        activation='pentanh',
        recurrent_activation='pentanh'),
                              merge_mode='concat')(embedding_layer)

    attention_layer = MultiHeadAttention(head_num=4)(rnn_layer)
    flatten = Flatten()(attention_layer)
    final = Dense(14, activation='sigmoid')(flatten)

    mdl = Model(inputs=input_layer, outputs=final)
    mdl.compile(loss='binary_crossentropy',
                optimizer='adam',
                metrics=['accuracy', auc_roc])

    print(mdl.summary())
    return mdl
 def _attention_builder(x):
     return MultiHeadAttention(
         head_num=head_num,
         activation=activation,
         history_only=history_only,
         trainable=trainable,
         name=name,
     )(x)
Esempio n. 9
0
    def test_fit_zeros(self):
        def _leaky_relu(x):
            return keras.activations.relu(x, alpha=0.01)

        input_layer = keras.layers.Input(
            shape=(2, 3),
            name='Input',
        )
        norm_layer = LayerNormalization(
            name='Layer-Normalization-1',
            trainable=False,
        )(input_layer)
        att_layer = MultiHeadAttention(
            head_num=3, activation=_leaky_relu,
            name='Multi-Head-Attentions')(norm_layer)
        dense_layer = keras.layers.Dense(units=3, name='Dense-1')(att_layer)
        norm_layer = LayerNormalization(
            name='Layer-Normalization-2',
            trainable=False,
        )(dense_layer)
        dense_layer = keras.layers.Dense(units=3, name='Dense-2')(norm_layer)
        model = keras.models.Model(
            inputs=input_layer,
            outputs=dense_layer,
        )
        model.compile(
            optimizer=keras.optimizers.Adam(lr=1e-3),
            loss='mse',
            metrics={},
        )
        model.summary()

        def _generator_zeros(batch_size=32):
            while True:
                batch_inputs = np.zeros((batch_size, 2, 3))
                batch_outputs = np.asarray([[[0.0, -0.1, 0.2]] * 2] *
                                           batch_size)
                yield batch_inputs, batch_outputs

        model.fit_generator(
            generator=_generator_zeros(),
            steps_per_epoch=100,
            epochs=100,
            validation_data=_generator_zeros(),
            validation_steps=100,
            callbacks=[
                keras.callbacks.EarlyStopping(monitor='val_loss',
                                              patience=5,
                                              min_delta=1e-4)
            ],
        )
        for inputs, _ in _generator_zeros(batch_size=3):
            predicts = model.predict(inputs)
            expect = np.round(np.asarray([[[0.0, -0.1, 0.2]] * 2] * 3),
                              decimals=1)
            actual = np.round(predicts, decimals=1)
            self.assertTrue(np.allclose(expect, actual), (expect, actual))
            break
Esempio n. 10
0
def character_network(lstm_layers: int, lstm_units: int, char_lstm_units: int, value_first: int, value_second: int, label_embedding_dim: int,
                      max_length, max_word_length, pos_tag: bool, character: bool,
                      attention, custom_layer: bool) -> Model:
    word_input = Input(shape=(max_length, 200), name='word_input')
    input_list = [word_input]
    lstm_list = [word_input]
    if pos_tag:
        pos_input = Input(shape=(max_length, 20), name='pos_input')
        input_list += [pos_input]
        lstm_list += [pos_input]
    if character:
        char_input = Input(shape=(max_length, max_word_length), name='char_input')
        char_embedding = TimeDistributed(Embedding(input_dim=number_of_charachters, output_dim=25))(char_input)
        char_lstm = TimeDistributed(Bidirectional(LSTM(char_lstm_units, return_sequences=False)))(char_embedding)
        lstm_list = input_list + [char_lstm]
        input_list += [char_input]
    if len(input_list) == 1:
        lstm_input = word_input
    else:
        lstm_input = Concatenate()(lstm_list)
    word_lstm = Bidirectional(LSTM(lstm_units, return_sequences=True,
                                       dropout=0.2, recurrent_dropout=0.2))(lstm_input)
    for i in range(lstm_layers-1):
            word_lstm = Bidirectional(LSTM(lstm_units, return_sequences=True,
                                           dropout=0.2, recurrent_dropout=0.2))(word_lstm)
    if attention:
        attention = MultiHeadAttention(head_num=8)(word_lstm)
        dense_first = TimeDistributed(Dense(value_first, activation=None))(attention)
    else:
        dense_first = TimeDistributed(Dense(value_first, activation=None))(word_lstm)
    crf_layer = ChainCRF(name='first_crf')
    first_output = crf_layer(dense_first)
    argmax = Lambda(lambda x: K.argmax(x))(first_output)
    label_embedding = Embedding(input_dim=value_first+1, output_dim=label_embedding_dim, trainable=True)(argmax)
        # print(entity_aware_matrix.shape)
    final_input = Concatenate(axis=2)([word_lstm, label_embedding])
    # print(second_input.shape)
    if custom_layer:
        entity_aware = EntityAwareDecodingLayer()([word_lstm, argmax, label_embedding])
        entity_aware_matrix = MyRepeatVector(max_length, 2*lstm_units+label_embedding_dim)(entity_aware)
        final_input = Concatenate(axis=2)([final_input, entity_aware_matrix])
    # print(final_input.shape)
    dense_second = TimeDistributed(Dense(value_second, activation=None))(final_input)
    second_crf = ChainCRF(name='second_crf')
    second_output = second_crf(dense_second)
    model = Model(inputs=input_list, outputs=[first_output, second_output])
    algorithm = Adam(lr=0.0001, decay=0, beta_1=0.9, beta_2=0.999)
    losses = {
        "first_crf": crf_layer.loss,
        "second_crf": second_crf.loss,
    }
    model.compile(loss=losses,
                  optimizer=algorithm,
                  metrics=['accuracy'])
    model.summary()
    return model
Esempio n. 11
0
 def test_invalid_head_num(self):
     with self.assertRaises(IndexError):
         input_layer = keras.layers.Input(
             shape=(2, 3),
             name='Input',
         )
         MultiHeadAttention(
             head_num=2,
             name='Multi-Head',
         )(input_layer)
def build_model(emb_cid, emb_advid):

    inp1 = layers.Input(shape=(max_len, ))
    inp2 = layers.Input(shape=(max_len, ))

    emb1 = layers.Embedding(input_dim=emb_cid.shape[0],
                            output_dim=emb_cid.shape[1],
                            input_length=max_len,
                            weights=[emb_cid],
                            trainable=False)(inp1)
    emb2 = layers.Embedding(input_dim=emb_advid.shape[0],
                            output_dim=emb_advid.shape[1],
                            input_length=max_len,
                            weights=[emb_advid],
                            trainable=False)(inp2)

    sdrop = layers.SpatialDropout1D(rate=0.2)

    emb1 = sdrop(emb1)
    emb2 = sdrop(emb2)

    content = layers.Concatenate()([emb1, emb2])

    mha = MultiHeadAttention(head_num=16)(content)
    mha = layers.Dropout(0.01)(mha)
    mha = layers.Add()([content, mha])
    mha = LayerNormalization()(mha)
    mha = layers.Dropout(0.01)(mha)
    mha_ff = FeedForward(256)(mha)
    mha_out = layers.Add()([mha, mha_ff])
    mha_out = LayerNormalization()(mha_out)

    lstm = layers.Bidirectional(layers.LSTM(128,
                                            return_sequences=True))(mha_out)

    avg_pool = layers.GlobalAveragePooling1D()(lstm)
    max_pool = layers.GlobalMaxPool1D()(lstm)

    x = layers.Concatenate()([avg_pool, max_pool])

    x = layers.Dense(128, activation='relu')(x)
    x = layers.BatchNormalization()(x)

    x = layers.Dense(64, activation='relu')(x)
    x = layers.BatchNormalization()(x)

    x = layers.Dropout(0.1)(x)

    out = layers.Dense(10, activation='softmax')(x)
    model = keras.Model(inputs=[inp1, inp2], outputs=out)
    model.compile(loss='categorical_crossentropy',
                  optimizer=keras.optimizers.Adam(1e-3),
                  metrics=['accuracy'])

    return model
Esempio n. 13
0
 def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
     super(TransformerBlock, self).__init__()
     self.ffn = Sequential([
         Dense(ff_dim, activation="relu"),
         Dense(embed_dim),
     ])
     self.layernorm1 = LayerNormalization()
     self.layernorm2 = LayerNormalization()
     self.dropout1 = Dropout(rate)
     self.dropout2 = Dropout(rate)
     self.att = MultiHeadAttention(head_num=num_heads, name='att_layer')
Esempio n. 14
0
 def test_mask_single(self):
     input_layer = keras.layers.Input(shape=(None, ))
     embed_layer = keras.layers.Embedding(input_dim=3,
                                          output_dim=4,
                                          mask_zero=True)(input_layer)
     att_layer = MultiHeadAttention(
         head_num=2,
         name='Multi-Head-2',
     )(embed_layer)
     mask_layer = GetMask()(att_layer)
     model = keras.models.Model(inputs=input_layer, outputs=mask_layer)
     model.compile(optimizer='adam', loss='mse', metrics={})
     predicts = model.predict(np.asarray([[1, 2, 1, 2, 0, 0]])).tolist()
     self.assertEqual([1.0] * 4 + [0.0] * 2, predicts[0], predicts[0])
def create_muti_head_self_attention_model(maxlen=20000,
                                          embedding=256,
                                          salayer=256):
    S_inputs = Input(shape=(maxlen, ), dtype='int32')
    embeddings = Embedding(maxlen, embedding)(S_inputs)
    O_seq = MultiHeadAttention(
        head_num=3,
        name='Multi-Head',
    )(embeddings)
    O_seq = Flatten()(O_seq)
    outputs = Dense(9, activation='softmax')(O_seq)
    model = Model(inputs=S_inputs, outputs=outputs)

    print(model.summary())
    return model
Esempio n. 16
0
    def create_model(self, params, index, logger):

        input1 = keras.Input(
            batch_shape=(params[index]["batch_size"],
                         params[index]["max_code_length"] + 2,
                         params[index]['dataset'].len_encoding),
            name='input_1')

        input2 = keras.Input(
            batch_shape=(params[index]["batch_size"],
                         params[index]["max_code_length"] + 2,
                         params[index]['dataset'].len_encoding),
            name='input_2')

        embedding = Dense(32, name='embedding1')

        dense1 = embedding(input1)
        dense2 = embedding(input2)

        lstm = Bidirectional(LSTM(512, name='lstm', return_sequences=True))

        if params[index]["attention"]:
            attention = MultiHeadAttention(head_num=2,
                                           name="multi_head_attention")

        lstm1 = lstm(dense1)
        lstm2 = lstm(dense2)

        if params[index]["attention"]:
            lstm1 = Flatten()(attention(lstm1))
            lstm2 = Flatten()(attention(lstm2))

        output_embedding = Dense(512, name="output_embedding")

        output_embedding1 = output_embedding(lstm1)
        output_embedding2 = output_embedding(lstm2)

        distance = Lambda(
            euclidean_distance,
            output_shape=eucl_dist_output_shape,
            name='distance')([output_embedding1, output_embedding2])

        return keras.Model(inputs=[input1, input2],
                           outputs=distance,
                           name=self.name + "-" + str(index))
Esempio n. 17
0
def encoder(seq_len, m_features, d_model, n_heads, dff, rate=0.1, encoder=None):
	"""Basic Attention Encoder. It can be concatenated with a previous encoder by passing it as argument."""
	if encoder == None:
		in_seq = keras.layers.Input(shape=(seq_len, m_features))
		in_seq = LayerNormalization()(in_seq)
	else::
		in_seq = encoder.output
	linear = keras.layers.Dense(units=d_model)(norm_0)
	pos = TrigPosEmbedding(mode=TrigPosEmbedding.MODE_ADD)(linear)
	mha = MultiHeadAttention(head_num=n_heads)(pos)
	mha_drop = keras.layers.Dropout(rate=rate)(mha)
	add_1  = keras.layers.Add()([pos, mha_drop])
	norm_1 = LayerNormalization()(add_1)
	ff = FeedForward(dff)(norm_1)
	ff_drop = keras.layers.Dropout(rate=rate)(ff)
	add_2 = keras.layers.Add()([ff_drop, norm_1])
	out = LayerNormalization()(add_2)
	return keras.Model(in_seq, out) if encoder == None else keras.Model(encoder.input, out)
def build_model(input_shape):

    main_input = keras.layers.Input(input_shape)
    input_layer = keras.layers.Input((input_shape[1], input_shape[2]))

    conv1 = keras.layers.Conv1D(filters=128, kernel_size=8,
                                padding='same')(input_layer)
    conv1 = keras.layers.normalization.BatchNormalization()(conv1)
    conv1 = keras.layers.Activation(activation='relu')(conv1)

    conv2 = keras.layers.Conv1D(filters=256, kernel_size=5,
                                padding='same')(conv1)
    conv2 = keras.layers.normalization.BatchNormalization()(conv2)
    conv2 = keras.layers.Activation('relu')(conv2)

    conv3 = keras.layers.Conv1D(128, kernel_size=3, padding='same')(conv2)
    conv3 = keras.layers.normalization.BatchNormalization()(conv3)
    conv3 = keras.layers.Activation('relu')(conv3)

    gap_layer = keras.layers.pooling.GlobalAveragePooling1D()(conv3)

    cnn_model = keras.layers.TimeDistributed(
        keras.models.Model(inputs=input_layer, outputs=gap_layer))(main_input)

    lstm_layer = keras.layers.LSTM(64, return_sequences=True)(cnn_model)
    print(lstm_layer)

    att_layer = MultiHeadAttention(head_num=8)(lstm_layer)
    print(att_layer)

    lstm_layer = keras.layers.LSTM(64, return_sequences=True)(att_layer)
    print(lstm_layer)

    gap_layerX = keras.layers.pooling.GlobalAveragePooling1D()(lstm_layer)

    output_layer = keras.layers.Dense(64, activation='relu')(gap_layerX)
    output_layer = keras.layers.Dense(2, activation='softmax')(output_layer)

    model = keras.models.Model(inputs=main_input, outputs=output_layer)

    model.summary()
    return model
Esempio n. 19
0
def loadBertModel():
    keras.utils.generic_utils.get_custom_objects().update({'pentanh': Pentanh()})
    input = Input((500, 3072))
    rnn_layer = Bidirectional(MultiplicativeLSTM(256,
                                                 return_sequences=True, dropout=0.2,
                                                 recurrent_dropout=0.2,
                                                 activation='pentanh',
                                                 recurrent_activation='pentanh'),
                              merge_mode='concat')(input)

    attention_layer = MultiHeadAttention(head_num=4)(rnn_layer)
    removeMask = Flatten()(attention_layer)
    final = Dense(14, activation='sigmoid')(removeMask)

    model_complete = Model(inputs=input, outputs=final)
    model_complete.compile(optimizer='adam',loss='binary_crossentropy',
                  metrics=['accuracy', auc_roc])

    print(model_complete.summary())
    return model_complete
def local_context_learning(input_length, input_dim, output_dim, hidden_dim,
                           filters_num, kernel_val, learning_rate, drop_rate):
    basic_input = Input(shape=(input_length, input_dim))
    label_input = Input(shape=(1, ))
    weighted_input = adding_weight()(basic_input)

    def true_process():
        return weighted_input

    def false_process():
        return basic_input

    actual_input = Lambda(lambda x: tf.cond(x > tf.constant(value=0.5),
                                            true_fn=true_process(),
                                            false_fn=false_process()))(
                                                label_input)
    rnn_output = GRU(hidden_dim, return_sequences=True)(actual_input)
    rnn_att = SeqSelfAttention(attention_activation='sigmoid')(rnn_output)
    cnn_output = Conv1D(filters=filters_num,
                        kernel_size=kernel_val,
                        padding="same")(actual_input)
    cnn_output_reformat = Dense(hidden_dim)(cnn_output)
    cnn_att = SeqSelfAttention(
        attention_activation='sigmoid')(cnn_output_reformat)
    fixed_rnn_output = adding_attention(rnn_output, rnn_att, input_length,
                                        hidden_dim)
    fixed_cnn_output = adding_attention(cnn_output_reformat, cnn_att,
                                        input_length, hidden_dim)
    new_value = Concatenate(axis=1)([fixed_rnn_output, fixed_cnn_output])
    new_keys = ones_like(new_value)
    new_result = MultiHeadAttention(head_num=2)(
        [actual_input, new_keys, new_value])
    result = Flatten()(new_result)
    result_fix = Dropout(rate=drop_rate)(result)
    output = Dense(output_dim)(result_fix)
    fixed_output = Activation(activation='sigmoid')(output)
    model = Model([basic_input, label_input], fixed_output)
    ada = adam(lr=learning_rate)
    model.compile(optimizer=ada, loss='categorical_crossentropy')
    return model
Esempio n. 21
0
def block(attention_input, head_num: int, feed_forward_units: int,
          dropout_rate: float) -> Tensor:

    attention_x = MultiHeadAttention(
        head_num=head_num,
        activation=None,
        use_bias=False,
        history_only=True,
        trainable=True,
    )(attention_input)
    attention_x = Dropout(dropout_rate)(attention_x)
    attention_x = Add()([attention_input, attention_x])
    feed_forward_input = LayerNormalization(trainable=True)(attention_x)

    feed_forward_x = FeedForward(units=feed_forward_units,
                                 activation='relu',
                                 trainable=True)(feed_forward_input)
    feed_forward_x = Dropout(dropout_rate)(feed_forward_x)
    feed_forward_x = Add()([feed_forward_input, feed_forward_x])
    block_output = LayerNormalization(trainable=True)(feed_forward_x)

    return block_output
Esempio n. 22
0
def build_model(input_shape):
    emotive_params = 266
    n_feature_maps = 64

    input_layer = keras.layers.Input(input_shape)
    lstm_layer = keras.layers.LSTM(emotive_params, return_sequences=True)(input_layer)

    att_layer = MultiHeadAttention(
        head_num=14,
        name='Multi-Head',
    )(lstm_layer)

    lstm_layer = keras.layers.LSTM(n_feature_maps, return_sequences=True)(att_layer)
    gap_layerX = keras.layers.pooling.GlobalAveragePooling1D()(lstm_layer)

    output_layer = keras.layers.Dense(n_feature_maps, activation='relu')(gap_layerX)
    output_layer = keras.layers.Dense(2, activation='softmax')(output_layer)

    model = keras.models.Model(inputs=input_layer, outputs=output_layer)

    model.summary()
    return model
 def test_sample(self):
     input_layer = keras.layers.Input(
         shape=(512, ),
         name='Input',
     )
     embed_layer = keras.layers.Embedding(
         input_dim=12,
         output_dim=768,
         mask_zero=True,
         name='Embedding',
     )(input_layer)
     output_layer = MultiHeadAttention(
         head_num=12,
         name='Multi-Head',
     )(embed_layer)
     model = keras.models.Model(inputs=input_layer, outputs=output_layer)
     model.compile(
         optimizer='adam',
         loss='mse',
         metrics={},
     )
     model.summary()
Esempio n. 24
0
def build_albert(token_num,
                 pos_num=512,
                 seq_len=512,
                 embed_dim=128,
                 hidden_dim=768,
                 transformer_num=12,
                 head_num=12,
                 feed_forward_dim=3072,
                 dropout_rate=0.1,
                 attention_activation=None,
                 feed_forward_activation='gelu',
                 training=True,
                 trainable=None,
                 output_layers=None):
    """Get ALBERT model.
    See: https://arxiv.org/pdf/1909.11942.pdf
    :param token_num: Number of tokens.
    :param pos_num: Maximum position.
    :param seq_len: Maximum length of the input sequence or None.
    :param embed_dim: Dimensions of embeddings.
    :param hidden_dim: Dimensions of hidden layers.
    :param transformer_num: Number of transformers.
    :param head_num: Number of heads in multi-head attention
                    in each transformer.
    :param feed_forward_dim: Dimension of the feed forward layer
                             in each transformer.
    :param dropout_rate: Dropout rate.
    :param attention_activation: Activation for attention layers.
    :param feed_forward_activation: Activation for feed-forward layers.
    :param training: A built model with MLM and NSP outputs will be returned
                     if it is `True`, otherwise the input layers and the last
                     feature extraction layer will be returned.
    :param trainable: Whether the model is trainable.
    :param output_layers: A list of indices of output layers.
    """
    if attention_activation == 'gelu':
        attention_activation = gelu
    if feed_forward_activation == 'gelu':
        feed_forward_activation = gelu
    if trainable is None:
        trainable = training

    def _trainable(_layer):
        if isinstance(trainable, (list, tuple, set)):
            for prefix in trainable:
                if _layer.name.startswith(prefix):
                    return True
            return False
        return trainable

    # Build inputs
    input_token = keras.layers.Input(shape=(seq_len, ), name='Input-Token')
    input_segment = keras.layers.Input(shape=(seq_len, ), name='Input-Segment')
    inputs = [input_token, input_segment]

    # Build embeddings
    embed_token, embed_weights, embed_projection = AdaptiveEmbedding(
        input_dim=token_num,
        output_dim=hidden_dim,
        embed_dim=embed_dim,
        mask_zero=True,
        trainable=trainable,
        return_embeddings=True,
        return_projections=True,
        name='Embed-Token',
    )(input_token)
    embed_segment = keras.layers.Embedding(
        input_dim=2,
        output_dim=hidden_dim,
        trainable=trainable,
        name='Embed-Segment',
    )(input_segment)
    embed_layer = keras.layers.Add(name='Embed-Token-Segment')(
        [embed_token, embed_segment])
    embed_layer = PositionEmbedding(
        input_dim=pos_num,
        output_dim=hidden_dim,
        mode=PositionEmbedding.MODE_ADD,
        trainable=trainable,
        name='Embedding-Position',
    )(embed_layer)

    if dropout_rate > 0.0:
        dropout_layer = keras.layers.Dropout(
            rate=dropout_rate,
            name='Embedding-Dropout',
        )(embed_layer)
    else:
        dropout_layer = embed_layer
    embed_layer = LayerNormalization(
        trainable=trainable,
        name='Embedding-Norm',
    )(dropout_layer)

    # Build shared transformer
    attention_layer = MultiHeadAttention(
        head_num=head_num,
        activation=attention_activation,
        name='Attention',
    )
    attention_normal = LayerNormalization(name='Attention-Normal')
    feed_forward_layer = FeedForward(units=feed_forward_dim,
                                     activation=feed_forward_activation,
                                     name='Feed-Forward')
    feed_forward_normal = LayerNormalization(name='Feed-Forward-Normal')

    transformed = embed_layer
    transformed_layers = []
    for i in range(transformer_num):
        attention_input = transformed
        transformed = attention_layer(transformed)
        if dropout_rate > 0.0:
            transformed = keras.layers.Dropout(
                rate=dropout_rate,
                name='Attention-Dropout-{}'.format(i + 1),
            )(transformed)
        transformed = keras.layers.Add(
            name='Attention-Add-{}'.format(i + 1), )(
                [attention_input, transformed])
        transformed = attention_normal(transformed)

        feed_forward_input = transformed
        transformed = feed_forward_layer(transformed)
        if dropout_rate > 0.0:
            transformed = keras.layers.Dropout(
                rate=dropout_rate,
                name='Feed-Forward-Dropout-{}'.format(i + 1),
            )(transformed)
        transformed = keras.layers.Add(
            name='Feed-Forward-Add-{}'.format(i + 1), )(
                [feed_forward_input, transformed])
        transformed = feed_forward_normal(transformed)
        transformed_layers.append(transformed)

    if training:
        # Build tasks
        mlm_dense_layer = keras.layers.Dense(
            units=hidden_dim,
            activation=feed_forward_activation,
            name='MLM-Dense',
        )(transformed)
        mlm_norm_layer = LayerNormalization(name='MLM-Norm')(mlm_dense_layer)
        mlm_pred_layer = AdaptiveSoftmax(
            input_dim=hidden_dim,
            output_dim=token_num,
            embed_dim=embed_dim,
            bind_embeddings=True,
            bind_projections=True,
            name='MLM-Sim',
        )([mlm_norm_layer, embed_weights, embed_projection])
        masked_layer = Masked(name='MLM')([mlm_pred_layer, inputs[-1]])
        extract_layer = Extract(index=0, name='Extract')(transformed)
        nsp_dense_layer = keras.layers.Dense(
            units=hidden_dim,
            activation='tanh',
            name='SOP-Dense',
        )(extract_layer)
        nsp_pred_layer = keras.layers.Dense(
            units=2,
            activation='softmax',
            name='SOP',
        )(nsp_dense_layer)
        model = keras.models.Model(inputs=inputs,
                                   outputs=[masked_layer, nsp_pred_layer])
        for layer in model.layers:
            layer.trainable = _trainable(layer)
        return model
    if output_layers is not None:
        if isinstance(output_layers, list):
            output_layers = [
                transformed_layers[index] for index in output_layers
            ]
            output = keras.layers.Concatenate(name='Output', )(output_layers)
        else:
            output = transformed_layers[output_layers]
        model = keras.models.Model(inputs=inputs, outputs=output)
        return model
    model = keras.models.Model(inputs=inputs, outputs=transformed)
    for layer in model.layers:
        layer.trainable = _trainable(layer)
    return inputs, transformed
Esempio n. 25
0
def build_model(emb_cid, emb_advid, emb_aid):

    inp1 = layers.Input(shape=(max_len, ))
    inp2 = layers.Input(shape=(max_len, ))
    inp3 = layers.Input(shape=(max_len, ))

    emb1 = layers.Embedding(input_dim=emb_cid.shape[0],
                            output_dim=emb_cid.shape[1],
                            input_length=max_len,
                            weights=[emb_cid],
                            trainable=False)(inp1)
    emb2 = layers.Embedding(input_dim=emb_advid.shape[0],
                            output_dim=emb_advid.shape[1],
                            input_length=max_len,
                            weights=[emb_advid],
                            trainable=False)(inp2)
    emb3 = layers.Embedding(input_dim=emb_aid.shape[0],
                            output_dim=emb_aid.shape[1],
                            input_length=max_len,
                            weights=[emb_aid],
                            trainable=False)(inp3)

    sdrop = layers.SpatialDropout1D(rate=0.2)

    emb1 = sdrop(emb1)
    emb2 = sdrop(emb2)
    emb3 = sdrop(emb3)

    id_c = emb1
    id_adv_ad = layers.Concatenate()([emb2, emb3])

    mha1 = MultiHeadAttention(head_num=16)(id_adv_ad)
    mha1 = layers.Dropout(0.01)(mha1)
    mha1 = layers.Add()([id_adv_ad, mha1])
    mha1 = LayerNormalization()(mha1)
    mha1 = layers.Dropout(0.01)(mha1)
    mha1_ff = FeedForward(128)(mha1)
    mha1_out = layers.Add()([mha1, mha1_ff])
    mha1_out = LayerNormalization()(mha1_out)
    id_adv_ad_lstm = layers.Bidirectional(
        layers.LSTM(200, return_sequences=True))(mha1_out)
    id_adv_ad_max_pool = layers.GlobalMaxPool1D()(id_adv_ad_lstm)

    mha2 = MultiHeadAttention(head_num=16)(id_c)
    mha2 = layers.Dropout(0.01)(mha2)
    mha2 = layers.Add()([id_c, mha2])
    mha2 = LayerNormalization()(mha2)
    mha2 = layers.Dropout(0.01)(mha2)
    mha2_ff = FeedForward(128)(mha2)
    mha2_out = layers.Add()([mha2, mha2_ff])
    mha2_out = LayerNormalization()(mha2_out)
    id_c_lstm = layers.Bidirectional(layers.LSTM(
        200, return_sequences=True))(mha2_out)
    id_c_max_pool = layers.GlobalMaxPool1D()(id_c_lstm)

    x = layers.Add()([id_c_max_pool, id_adv_ad_max_pool])

    x = layers.Dense(256, activation='relu')(x)
    x = layers.BatchNormalization()(x)

    x = layers.Dense(64, activation='relu')(x)
    x = layers.BatchNormalization()(x)

    x = layers.Dropout(0.15)(x)

    out = layers.Dense(10, activation='softmax')(x)
    model = keras.Model(inputs=[inp1, inp2, inp3], outputs=out)
    model.compile(loss='categorical_crossentropy',
                  optimizer=keras.optimizers.Adam(1e-3),
                  metrics=['accuracy'])

    return model
    def test_fit(self):
        input_layer = keras.layers.Input(
            shape=(1, 3),
            name='Input',
        )
        att_layer = MultiHeadAttention(
            head_num=3,
            activation=self._leaky_relu,
            name='Multi-Head-Attention-1')(input_layer)
        normal_layer = LayerNormalization(
            name='Layer-Normalization-1', )(att_layer)
        feed_forward_layer = FeedForward(
            units=12,
            activation=self._leaky_relu,
            name='FeedForward',
        )(normal_layer)
        normal_layer = LayerNormalization(
            name='Layer-Normalization-2', )(feed_forward_layer)
        output_layer = keras.layers.Add(name='Add')(
            [input_layer, normal_layer])
        model = keras.models.Model(
            inputs=input_layer,
            outputs=output_layer,
        )
        model.compile(
            optimizer='adam',
            loss='mse',
            metrics={},
        )

        def _generator(batch_size=32):
            while True:
                batch_inputs = np.random.random((batch_size, 1, 3))
                batch_outputs = batch_inputs + 0.2
                yield batch_inputs, batch_outputs

        model.fit_generator(
            generator=_generator(),
            steps_per_epoch=1000,
            epochs=10,
            validation_data=_generator(),
            validation_steps=100,
            callbacks=[
                keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
            ],
        )
        model_path = os.path.join(
            tempfile.gettempdir(),
            'keras_feed_forward_%f.h5' % np.random.random())
        model.save(model_path)
        model = keras.models.load_model(
            model_path,
            custom_objects={
                '_leaky_relu': self._leaky_relu,
                'MultiHeadAttention': MultiHeadAttention,
                'LayerNormalization': LayerNormalization,
                'FeedForward': FeedForward,
            },
        )
        for inputs, _ in _generator(batch_size=3):
            predicts = model.predict(inputs)
            expect = inputs + 0.2
            for i in range(3):
                for j in range(3):
                    self.assertTrue(
                        np.abs(expect[i, 0, j] - predicts[i, 0, j]) < 0.1,
                        (expect, predicts))
            break
Esempio n. 27
0
train_x = wv.Zero_padding(train_x,Maxseq_length)
test_x = wv.Zero_padding(test_x,Maxseq_length)

#train_y = to_categorical(train_y)
#test_y = to_categorical(test_y)
model = Sequential()

model.add(Conv1D(32,
                 3,
                 padding='valid',
                 activation='custom_gelu',
                 strides=1))
model.add(MaxPooling1D(pool_size=2))

model.add(MultiHeadAttention(
    head_num=32,
    name='Multi-Head-Attention',
))
model.add(layers.LSTM(32,return_sequences=True))
model.add(layers.Flatten())
model.add(Dropout(0.5))
model.add(layers.Dense(1,activation='sigmoid'))
model.build()
model.summary
model.compile(optimizer=Adam(lr=0.001),loss=keras.losses.binary_crossentropy,metrics=['accuracy'])
model.fit(train_x, train_y, batch_size=64,epochs=60)
scores = model.evaluate(test_x, test_y, verbose=2)

print("Accuracy: %.2f%%" % (scores[1]*100))
"""
text = layers.Conv2D(64,(3,300),padding='valid',strides = 1)(news_input)
text1 = layers.MaxPooling2D(pool_size = (1,1))(text)
def build_model(input_shape):
    n_feature_maps = 64

    main_input = keras.layers.Input(input_shape)
    input_layer = keras.layers.Input((input_shape[1], input_shape[2]))

    # input_layer = PreProcessingLayer(266)(input_layer)
    # print(input_layer)

    # BLOCK 1
    conv_x = keras.layers.Conv1D(filters=n_feature_maps,
                                 kernel_size=8,
                                 padding='same')(input_layer)
    conv_x = keras.layers.normalization.BatchNormalization()(conv_x)
    conv_x = keras.layers.Activation('relu')(conv_x)

    conv_y = keras.layers.Conv1D(filters=n_feature_maps,
                                 kernel_size=5,
                                 padding='same')(conv_x)
    conv_y = keras.layers.normalization.BatchNormalization()(conv_y)
    conv_y = keras.layers.Activation('relu')(conv_y)

    conv_z = keras.layers.Conv1D(filters=n_feature_maps,
                                 kernel_size=3,
                                 padding='same')(conv_y)
    conv_z = keras.layers.normalization.BatchNormalization()(conv_z)

    # expand channels for the sum
    shortcut_y = keras.layers.Conv1D(filters=n_feature_maps,
                                     kernel_size=1,
                                     padding='same')(input_layer)
    shortcut_y = keras.layers.normalization.BatchNormalization()(shortcut_y)

    output_block_1 = keras.layers.add([shortcut_y, conv_z])
    output_block_1 = keras.layers.Activation('relu')(output_block_1)

    # BLOCK 2
    conv_x = keras.layers.Conv1D(filters=n_feature_maps * 2,
                                 kernel_size=8,
                                 padding='same')(output_block_1)
    conv_x = keras.layers.normalization.BatchNormalization()(conv_x)
    conv_x = keras.layers.Activation('relu')(conv_x)

    conv_y = keras.layers.Conv1D(filters=n_feature_maps * 2,
                                 kernel_size=5,
                                 padding='same')(conv_x)
    conv_y = keras.layers.normalization.BatchNormalization()(conv_y)
    conv_y = keras.layers.Activation('relu')(conv_y)

    conv_z = keras.layers.Conv1D(filters=n_feature_maps * 2,
                                 kernel_size=3,
                                 padding='same')(conv_y)
    conv_z = keras.layers.normalization.BatchNormalization()(conv_z)

    # expand channels for the sum
    shortcut_y = keras.layers.Conv1D(filters=n_feature_maps * 2,
                                     kernel_size=1,
                                     padding='same')(output_block_1)
    shortcut_y = keras.layers.normalization.BatchNormalization()(shortcut_y)

    output_block_2 = keras.layers.add([shortcut_y, conv_z])
    output_block_2 = keras.layers.Activation('relu')(output_block_2)

    # BLOCK 3

    conv_x = keras.layers.Conv1D(filters=n_feature_maps * 2,
                                 kernel_size=8,
                                 padding='same')(output_block_2)
    conv_x = keras.layers.normalization.BatchNormalization()(conv_x)
    conv_x = keras.layers.Activation('relu')(conv_x)

    conv_y = keras.layers.Conv1D(filters=n_feature_maps * 2,
                                 kernel_size=5,
                                 padding='same')(conv_x)
    conv_y = keras.layers.normalization.BatchNormalization()(conv_y)
    conv_y = keras.layers.Activation('relu')(conv_y)

    conv_z = keras.layers.Conv1D(filters=n_feature_maps * 2,
                                 kernel_size=3,
                                 padding='same')(conv_y)
    conv_z = keras.layers.normalization.BatchNormalization()(conv_z)

    # no need to expand channels because they are equal
    shortcut_y = keras.layers.normalization.BatchNormalization()(
        output_block_2)

    output_block_3 = keras.layers.add([shortcut_y, conv_z])
    output_block_3 = keras.layers.Activation('relu')(output_block_3)
    print(output_block_3)

    # FINAL
    gap_layer = keras.layers.GlobalAveragePooling1D()(output_block_3)
    print(gap_layer)

    cnn_model = keras.layers.TimeDistributed(
        keras.models.Model(inputs=input_layer, outputs=gap_layer))(main_input)
    print(cnn_model)

    pos_enc = PreProcessingLayer()(cnn_model)
    print(pos_enc)

    # lstm_layer = keras.layers.Bidirectional(keras.layers.LSTM(n_feature_maps, return_sequences=True))(cnn_model)
    lstm_layer = keras.layers.LSTM(64, return_sequences=True)(pos_enc)
    att_layer = MultiHeadAttention(head_num=16)(lstm_layer)
    lstm_layer = keras.layers.LSTM(64, return_sequences=True)(att_layer)

    gap_layerX = keras.layers.pooling.GlobalAveragePooling1D()(lstm_layer)

    output_layer = keras.layers.Dense(64, activation='relu')(gap_layerX)
    output_layer = keras.layers.Dense(2, activation='softmax')(output_layer)

    model = keras.models.Model(inputs=main_input, outputs=output_layer)

    model.summary()
    return model
Esempio n. 29
0
 def get_model(self,
               params,
               a=False,
               b=False,
               c=False,
               d=False,
               e=False,
               f=False,
               g=False,
               dropout=0.5):
     hash_input = layers.Input(shape=(params['max_words'], ), dtype='int32')
     x = layers.Embedding(params['hash_mole'],
                          params['embed_size'],
                          input_length=params['max_words'],
                          name=self.embedding_name)(hash_input)
     x = layers.Dropout(dropout / 3)(x)
     if a:
         # did not train
         # needs positional embedding?
         x = MultiHeadAttention(4)(x)
         x = layers.Dropout(dropout / 3)(x)
     if b:
         x = layers.Bidirectional(
             self.get_lstm(params['units'] // 2, return_sequences=True))(x)
         x = layers.Dropout(dropout)(x)
         x = layers.TimeDistributed(MultiHeadAttention(4))(x)
         #x = layers.Flatten()(x)
         #x = layers.Dropout(dropout)(x)
         #x = layers.Dense(params['embed_size'])(x)
         x = layers.Dropout(dropout / 3)(x)
     #if c:
     x = layers.Bidirectional(
         self.get_lstm(params['units'] // 2,
                       return_sequences=False,
                       name=self.bidirectional_name))(x)
     x = layers.Dropout(dropout)(x)
     x = layers.RepeatVector(params['num_sylls'])(x)
     x = layers.Dropout(dropout)(x)
     if d:
         x = PositionEmbedding(input_dim=params['embed_size'],
                               output_dim=params['num_sylls'] * 4,
                               mode=PositionEmbedding.MODE_CONCAT)(x)
         x = layers.Dropout(dropout)(x)
         x = MultiHeadAttention(4)(x)
         x = layers.Dropout(dropout)(x)
     x = self.get_lstm(params['units'],
                       return_sequences=True,
                       name=self.cu_dnnlstm_name)(x)
     if e:
         x = PositionEmbedding(input_dim=params['units'],
                               output_dim=params['units'],
                               mode=PositionEmbedding.MODE_ADD)(x)
         x = layers.Dropout(dropout)(x)
         #x = layers.Dense(params['units'])(x)
         #x = layers.Dropout(dropout)(x)
     if f:
         # this was somewhat effective
         x = MultiHeadAttention(2)(x)
         x = layers.Dropout(dropout)(x)
     if g:
         x = layers.Dense(params['units'],
                          kernel_initializer='identity',
                          name='dense_identity',
                          activation='relu')(x)
         x = layers.Dropout(dropout)(x)
     output_layer = layers.Dense(params['max_features'],
                                 activation='softmax',
                                 name=self.dense_name)(x)
     model = Model(inputs=[hash_input], outputs=[output_layer])
     return model
Esempio n. 30
0
def build_model(emb_cid, emb_advid, emb_aid):

    inp1 = layers.Input(shape=(max_len, ))
    inp2 = layers.Input(shape=(max_len, ))
    inp3 = layers.Input(shape=(max_len, ))

    emb1 = layers.Embedding(input_dim=emb_cid.shape[0],
                            output_dim=emb_cid.shape[1],
                            input_length=max_len,
                            weights=[emb_cid],
                            trainable=False)(inp1)
    emb2 = layers.Embedding(input_dim=emb_advid.shape[0],
                            output_dim=emb_advid.shape[1],
                            input_length=max_len,
                            weights=[emb_advid],
                            trainable=False)(inp2)
    emb3 = layers.Embedding(input_dim=emb_aid.shape[0],
                            output_dim=emb_aid.shape[1],
                            input_length=max_len,
                            weights=[emb_aid],
                            trainable=False)(inp3)

    sdrop = layers.SpatialDropout1D(rate=0.2)

    emb1 = sdrop(emb1)
    emb2 = sdrop(emb2)
    emb3 = sdrop(emb3)

    content = layers.Concatenate()([emb1, emb2, emb3])

    mha1 = MultiHeadAttention(head_num=32)(content)
    mha1 = layers.Dropout(0.01)(mha1)
    mha1 = layers.Add()([content, mha1])
    mha1 = LayerNormalization()(mha1)
    mha1 = layers.Dropout(0.01)(mha1)
    mha1_ff = FeedForward(256)(mha1)
    mha1_out = layers.Add()([mha1, mha1_ff])
    mha1_out = LayerNormalization()(mha1_out)

    mha2 = MultiHeadAttention(head_num=32)(mha1_out)
    mha2 = layers.Dropout(0.01)(mha2)
    mha2 = layers.Add()([mha1_out, mha2])
    mha2 = LayerNormalization()(mha2)
    mha2 = layers.Dropout(0.01)(mha2)
    mha2_ff = FeedForward(256)(mha2)
    mha2_out = layers.Add()([mha2, mha2_ff])
    mha2_out = LayerNormalization()(mha2_out)

    mha3 = MultiHeadAttention(head_num=32)(mha2_out)
    mha3 = layers.Dropout(0.01)(mha3)
    mha3 = layers.Add()([mha2_out, mha3])
    mha3 = LayerNormalization()(mha3)
    mha3 = layers.Dropout(0.01)(mha3)
    mha3_ff = FeedForward(256)(mha3)
    mha3_out = layers.Add()([mha3, mha3_ff])
    mha3_out = LayerNormalization()(mha3_out)

    avg_pool = layers.GlobalAveragePooling1D()(mha3_out)
    max_pool = layers.GlobalMaxPool1D()(mha3_out)

    x = layers.Concatenate()([avg_pool, max_pool])

    x = layers.Dense(256)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.Dropout(0.2)(x)

    x = layers.Dense(128)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.Dropout(0.2)(x)

    x = layers.Dense(64)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.Dropout(0.2)(x)

    out = layers.Dense(1, activation='sigmoid')(x)
    model = keras.Model(inputs=[inp1, inp2, inp3], outputs=out)
    model.compile(loss='binary_crossentropy',
                  optimizer=keras.optimizers.Adam(2e-4),
                  metrics=['accuracy'])

    return model