Ejemplo n.º 1
0
def AttentionModel3(n_answer, embedding, mask_zero=True, trainable=True):
    context_shape = (MAX_CONTEXT_LENGTH, )
    question_shape = (MAX_QUESTION_LENGTH, )

    in_context = Input(shape=context_shape, dtype='int32')
    in_question = Input(shape=question_shape, dtype='int32')

    embedding_layer = PretrainedEmbedding(embeddings=embedding,
                                          mask_zero=mask_zero,
                                          rate=DROPOUT_RATE)
    context, context_mask = embedding_layer(
        in_context), embedding_layer.compute_mask(in_context)
    question, question_mask = embedding_layer(
        in_question), embedding_layer.compute_mask(in_question)

    context_fw = GRU(units=CONTEXTUAL_UNITS,
                     return_sequences=True,
                     return_state=False)(context, mask=context_mask)
    context_bw = GRU(units=CONTEXTUAL_UNITS,
                     return_sequences=True,
                     return_state=False,
                     go_backwards=True)(context, mask=context_mask)
    context_bw = tf.reverse(context_bw, axis=[1])

    question_output, question_state = GRU(units=CONTEXTUAL_UNITS,
                                          return_sequences=True,
                                          return_state=True)(
                                              question, mask=question_mask)

    # fw, bw = context_output[0], context_output[1]

    attention_fw = Attention(use_scale=True)([context_fw, question_output])
    attention_bw = Attention(use_scale=True)([context_bw, question_output])

    attention = Weighted_Sum()([attention_fw, attention_bw])

    context_weighted = GRU(units=CONTEXTUAL_UNITS,
                           return_sequences=True,
                           return_state=False)(context, mask=context_mask)

    c = tf.expand_dims(tf.reduce_sum(tf.multiply(context_weighted, attention),
                                     axis=-2),
                       axis=-1)
    # pred = Output_Layer(units=n_answer)([c, tf.expand_dims(question_state, axis=-1)])
    pred = Dense(units=n_answer, activation='softmax')(tf.reduce_sum(c,
                                                                     axis=-1))

    model = Model([in_context, in_question], pred)
    return model
Ejemplo n.º 2
0
 def __init__(self, d_model=256, rate=0.1, **kwargs):
     super(SimpleLayerAttention, self).__init__(**kwargs)
     self.qw = Dense(d_model)
     self.kw = Dense(d_model)
     self.vw = Dense(d_model)
     self.attn = Attention()
     self.dropout = Dropout(rate)
Ejemplo n.º 3
0
def test_attention():
    """
    Luong-style attention. query (batch, Tq, dim), key (batch, Tv, dim) and value (batch, Tv, dim) are inputs.
    following computations is processed.
    1. query-key dot-product as output shape [batch, Tq, Tv]
    2. softmax of above
    3. MatMul between 2. and value as output shape [batch, Tq, dim]
    """
    Tq = 10
    Tv = 10
    dim = 16
    q_shape = (Tq, dim)
    k_shape = (Tv, dim)
    v_shape = (Tv, dim)
    q = Input(q_shape)
    k = Input(k_shape)
    v = Input(v_shape)
    x = Attention()([q, k, v])
    model = Model([q, k, v], x)
    flops = get_flops(model, batch_size=1)
    assert (
        flops
        == 2 * Tq * Tv * dim  # No.1 (dot-product (MatMul))
        + 5 * Tq * Tv  # No.2 (softmax)
        + 2 * Tv * Tq * dim  # No.3 (MatMul)
    )
Ejemplo n.º 4
0
 def __init__(self):
     super(MultiheadAttentiveNoVGG, self).__init__()
     self.attention = Attention()
     self.conv_max = Conv2D(64, 3, activation='relu')
     self.dropout_max = Dropout(0.5)
     self.conv_avg = Conv2D(64, 3, activation='relu')
     self.dropout_avg = Dropout(0.5)
     self.pool_max = MaxPooling2D(pool_size=(4, 4),
                                  strides=None,
                                  padding='valid',
                                  data_format=None)
     self.pool_avg = MaxPooling2D(pool_size=(4, 4),
                                  strides=None,
                                  padding='valid',
                                  data_format=None)
     self.conv_max_2 = Conv2D(32, 3, activation='relu')
     self.conv_avg_2 = Conv2D(32, 3, activation='relu')
     self.d_max = Dense(64, activation='relu')
     self.d_avg = Dense(64, activation='relu')
     self.flatten = Flatten()
     self.bn = BatchNormalization()
     self.d1_2 = Dense(64, activation='sigmoid')
     self.d2 = Dense(5, activation='softmax')
     self.dropout_max = Dropout(0.5)
     self.dropout_avg = Dropout(0.5)
     self.dropout = Dropout(0.5)
Ejemplo n.º 5
0
    def __init__(self, config
                        ):
        super(CNNLSTMATTN, self).__init__()
        self.n_outputs = config.label_width
        self.filters = config.filters
        self.kernel_size = config.kernel_size
        self.activation = config.activation
        self.lstm_units = config.lstm_units

        self.conv1d1 = Conv1D(filters = self.filters, 
                            kernel_size = self.kernel_size, 
                            activation = self.activation)
        self.conv1d2 = Conv1D(filters = self.filters, 
                            kernel_size = self.kernel_size, 
                            activation = self.activation)
        self.mp1d = MaxPooling1D(pool_size = 2)
        self.flatten = Flatten()
        # self.lstm_in = LSTM(units = self.units, activation = self.activation)
        self.rv = RepeatVector(self.n_outputs)
        # output, forward_h, backward_h, forward_c, backward_c
        self.lstm_out = Bidirectional(LSTM(units = self.lstm_units, return_sequences = True, return_state = True))
        # self.td1 = TimeDistributed(Dense(10, activation = self.activation ))
        self.attention = Attention()
        self.concat = Concatenate()
        self.td2 = Dense(self.n_outputs) # self.n_outputs
Ejemplo n.º 6
0
 def head(inp):
     q = Dense(d_model)(inp)
     k = Dense(d_model)(inp)
     v = Dense(d_model)(inp)
     # causal = True: adds mask to prevent flow from future to past
     attention = Attention(use_scale=True, causal=True)([q, v, k])
     return attention
Ejemplo n.º 7
0
def build_model2():
    d = 0.2

    model_input = Input(shape=(time_steps, input_dim))
    x = Bidirectional(LSTM(128, return_sequences=True))(model_input)
    x = Dropout(d)(x)
    #x = Bidirectional(LSTM(64, return_sequences=False))(x)
    #x = Dropout(d)(x)
    a = Attention()([x, x])
    out1 = GlobalMaxPooling1D()(x)
    out2 = GlobalMaxPooling1D()(a)
    merge = Concatenate()([out1, out2])
    x = Dense(16, activation='relu')(merge)
    x = Dense(1, activation='sigmoid')(x)

    model = Model(model_input, x)

    lossfn = tf.keras.losses.BinaryCrossentropy(
        from_logits=False,
        label_smoothing=0.0,
        axis=-1,
        reduction="auto",
        name="binary_crossentropy",
    )
    # 二分类
    model.compile(optimizer='rmsprop', loss=lossfn, metrics=['accuracy'])
    return model
Ejemplo n.º 8
0
def Attention_CNN_Bi_LSTM_AE(n_steps, n_features, activation):
    en_input = Input(shape=[n_steps, n_features])
    e = Conv1D(32, kernel_size=1, padding="SAME",
               activation=activation)(en_input)
    e = MaxPool1D(pool_size=2)(e)
    e = Conv1D(64, kernel_size=3, padding="SAME", activation=activation)(e)
    e = MaxPool1D(pool_size=2)(e)
    e = Conv1D(128, kernel_size=5, padding="SAME", activation=activation)(e)
    e = MaxPool1D(pool_size=2)(e)
    e = Bidirectional(LSTM(64, recurrent_dropout=0.1, dropout=0.1))(e)
    e = Attention(use_scale=True)([e, e])
    en_output = Dense(get_output_dim(n_steps * n_features),
                      kernel_initializer='lecun_normal',
                      activation='selu')(e)
    encoder = keras.models.Model(inputs=[en_input], outputs=[en_output])

    decoder = keras.models.Sequential([
        RepeatVector(n_steps,
                     input_shape=[get_output_dim(n_steps * n_features)]),
        LSTM(256, return_sequences=True),
        keras.layers.Reshape([n_steps, 256, 1]),
        Conv2DTranspose(filters=16, kernel_size=3, activation=activation),
        Conv2DTranspose(filters=1, kernel_size=3, activation=activation),
        keras.layers.Flatten(),
        Dense(n_steps * n_features),
        keras.layers.Reshape([n_steps, n_features])
    ])

    return encoder, decoder
Ejemplo n.º 9
0
    def train(self):

        encoder_inputs = Input(shape=(320, 1024))

        encoder_BidirectionalLSTM = Bidirectional(
            LSTM(128, return_sequences=True))
        encoder_out = encoder_BidirectionalLSTM(encoder_inputs)

        decoder_LSTM = LSTM(256, return_sequences=True)
        decoder_out = decoder_LSTM(encoder_out)

        attn_layer = Attention(use_scale=True)
        attn_out = attn_layer([encoder_out, decoder_out])

        dense = TimeDistributed(Dense(1, activation='softmax'))
        decoder_pred = dense(attn_out)

        model = Model(inputs=encoder_inputs, outputs=decoder_pred)
        model.compile(loss='categorical_crossentropy',
                      optimizer='adam',
                      metrics=['accuracy'])
        model.summary()

        self.model = model

        t = trange(self.config.n_epochs, desc='Epoch', ncols=90)
        for epoch_i in t:

            model.fit_generator(generator=self.train_loader)

            ckpt_path = self.config.save_dir + '/epoch-{}.ckpt'.format(epoch_i)
            tqdm.write("Save parameters at {}".format(ckpt_path))
            model.save_weights('ckpt_path')
            self.evaluate(epoch_i)
Ejemplo n.º 10
0
 def __init__(self):
     super(MultiheadResNet, self).__init__()
     self.vgg = VGG16()
     self.attention = Attention()
     self.conv_max = Conv2D(64, 3, activation='relu')
     self.dropout_max = Dropout(0.5)
     self.conv_avg = Conv2D(64, 3, activation='relu')
     self.dropout_avg = Dropout(0.5)
     self.pool_max = MaxPooling2D(pool_size=(2, 2),
                                  strides=None,
                                  padding='valid',
                                  data_format=None)
     self.pool_avg = MaxPooling2D(pool_size=(2, 2),
                                  strides=None,
                                  padding='valid',
                                  data_format=None)
     self.conv_max_2 = Conv2D(32, 3, activation='relu')
     self.conv_avg_2 = Conv2D(32, 3, activation='relu')
     self.d_max = Dense(128, activation='relu')
     self.d_avg = Dense(128, activation='relu')
     self.flatten = Flatten()
     self.bn = BatchNormalization()
     self.d1_2 = Dense(56, activation='sigmoid')
     self.d2 = Dense(6, activation='softmax')
     self.dropout_vgg = Dropout(0.5)
     self.dropout_max = Dropout(0.5)
     self.dropout_avg = Dropout(0.5)
     self.dropout = Dropout(0.5)
     self.res_block = ResnetBlock(1, [1, 2, 3])
     self.res_block2 = ResnetBlock(1, [1, 2, 3])
     self.res_block3 = ResnetBlock(1, [1, 2, 3])
     self.res_block4 = ResnetBlock(1, [1, 2, 3])
Ejemplo n.º 11
0
    def define_model(self, length, vocab_size, num_outcome_classes):
        '''
        Defines and compiles a convolutional neural network model

        Parameters
        ___________
        length: int
            Max length of the text strings
        vocab_size: int
            Vocabulary size of the text documents
        '''

        input = Input(shape=(length, ))
        embed = Embedding(vocab_size, 200)(input)
        gru = Bidirectional(GRU(128, return_sequences=True))(embed)
        drop1 = SpatialDropout1D(0.2)(gru)
        conv = Conv1D(filters=256, kernel_size=4, activation='relu')(drop1)
        pool = MaxPooling1D(pool_size=2)(conv)
        att = Attention()([gru, pool])
        flat = Flatten()(att)
        drop2 = Dropout(.2)(flat)
        dense1 = Dense(64, activation='relu')(drop2)
        output = Dense(num_outcome_classes, activation='softmax')(dense1)

        model = Model(inputs=input, outputs=output)
        model.compile(loss='categorical_crossentropy',
                      optimizer='adam',
                      metrics=['categorical_accuracy',
                               metrics.AUC()])

        self.model = model
Ejemplo n.º 12
0
    def cnn_att_model(self, input_shape, output_shape, embedding_matrix):

        if not self.args.lr:
            self.args.lr = 0.001

        # Define model
        sequence_input = Input(shape=(input_shape, ), dtype='int32')

        embedding_layer = Embedding(input_dim=embedding_matrix.shape[0],
                                    output_dim=embedding_matrix.shape[1],
                                    weights=[embedding_matrix],
                                    input_length=input_shape,
                                    trainable=True)(sequence_input)

        H = Conv1D(self.args.units,
                   self.args.kernel_size,
                   activation=self.args.activation,
                   padding='same')(embedding_layer)
        H = BatchNormalization()(H)

        U = self.TrainableMatrix(output_shape, self.args.units)([])

        att = Attention(use_scale=True)([U, H])

        preds = self.Hadamard()(att)

        model = Model(sequence_input, preds)
        model.compile(loss='binary_crossentropy', optimizer=Adam(self.args.lr))

        return model
Ejemplo n.º 13
0
    def __init__(self,
                 embed_dim=512,
                 num_heads=8,
                 dropout_rate=0.1,
                 causal=False):
        super(MultiHeadAttention, self).__init__()
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.dropout_rate = dropout_rate
        self.causal = causal

        assert embed_dim % num_heads == 0
        head_dim = embed_dim // num_heads

        self.w_query = tf.keras.layers.Dense(embed_dim)
        self.w_value = tf.keras.layers.Dense(embed_dim)
        self.w_key = tf.keras.layers.Dense(embed_dim)
        self.attention = Attention(causal=causal, dropout=dropout_rate)
        self.w_projection = tf.keras.layers.Dense(embed_dim)

        self.reshape_split = tf.keras.layers.Reshape((-1, num_heads, head_dim))
        self.permute = tf.keras.layers.Permute((2, 1, 3))

        self.reshape_split_mask = tf.keras.layers.Reshape((-1, 1))
        self.permute_mask = tf.keras.layers.Permute((2, 1))

        self.reshape_merge = tf.keras.layers.Reshape((-1, embed_dim))
Ejemplo n.º 14
0
def AttentionModel(n_answer,
                    embedding,
                    mask_zero=True,
                    trainable=True):
    context_shape = (MAX_CONTEXT_LENGTH,)
    question_shape = (MAX_QUESTION_LENGTH,)

    in_context = Input(shape=context_shape, dtype='int32')
    in_question = Input(shape=question_shape, dtype='int32')

    embedding_layer = PretrainedEmbedding(embeddings=embedding, mask_zero=mask_zero, rate=DROPOUT_RATE)
    context, context_mask = embedding_layer(in_context), embedding_layer.compute_mask(in_context)
    question, question_mask = embedding_layer(in_question), embedding_layer.compute_mask(in_question)

    context_output, h = GRU(units=CONTEXTUAL_UNITS,
                               return_sequences=True,
                               return_state=True)(context, mask=context_mask)

    question_output = GRU(units=CONTEXTUAL_UNITS,
                          return_sequences=True,
                          return_state=False)(question, mask=question_mask)

    context_out = Bidirectional(GRU(units=CONTEXTUAL_UNITS,
                                   return_sequences=True,
                                   return_state=False),
                                merge_mode=None)(context_output,
                                               mask=context_mask,
                                               initial_state=[h, h])

    fw, bw = context_out[0], context_out[1]

    attention_fw = Attention(use_scale=True)([
        question_output, fw
    ])
    attention_bw = Attention(use_scale=True)([
        question_output, bw
    ])

    attention = tf.concat([attention_fw, attention_bw], axis=-1)

    answer = Dense(units=1, activation='relu')(attention)
    answer = tf.squeeze(answer, axis=-1)
    answer = Dense(units=n_answer, activation='softmax')(answer)

    model = Model([in_context, in_question], answer)
    return model
Ejemplo n.º 15
0
def self_layer_unit(query_emb, value_emb):

    query_value_attention_seq = Attention()([query_emb, value_emb])

    self_att_layer = tf.keras.layers.Concatenate()(
        [query_emb, query_value_attention_seq])
    self_att_layer = tf.keras.layers.LayerNormalization()(self_att_layer)
    return self_att_layer
Ejemplo n.º 16
0
def create_drbc_model(node_feature_dim=3,
                      aux_feature_dim=4,
                      rnn_repetitions=5,
                      aggregation: str = 'max',
                      combine='gru'):
    """
    :param node_feature_dim: initial node features, [Dc,1,1]
    :param aux_feature_dim: extra node features in the hidden layer in the decoder, [Dc,CI1,CI2,1]
    :param rnn_repetitions: how many loops are there in DrBCRNN
    :param aggregation: how to aggregate sequences after DrBCRNN {min, max, sum, mean, lstm}
    :param combine: how to combine in each iteration in DrBCRNN {structure2vec, graphsage, gru}
    :return: DrBC tf.keras model
    """
    input_node_features = Input(shape=(node_feature_dim, ),
                                name='node_features')
    input_aux_features = Input(shape=(aux_feature_dim, ), name='aux_features')
    input_n2n = Input(shape=(None, ), sparse=True, name='n2n_sum')

    node_features = Dense(units=128)(input_node_features)
    node_features = LeakyReLU()(node_features)
    node_features = Lambda(lambda x: tf.math.l2_normalize(x, axis=-1),
                           name='normalize_node_features')(node_features)

    n2n_features = DrBCRNN(units=128,
                           repetitions=rnn_repetitions,
                           combine=combine,
                           return_sequences=aggregation
                           is not None)([input_n2n, node_features])
    if aggregation == 'max':
        n2n_features = Lambda(lambda x: tf.reduce_max(x, axis=-1),
                              name='aggregate')(n2n_features)
    elif aggregation == 'min':
        n2n_features = Lambda(lambda x: tf.reduce_min(x, axis=-1),
                              name='aggregate')(n2n_features)
    elif aggregation == 'sum':
        n2n_features = Lambda(lambda x: tf.reduce_sum(x, axis=-1),
                              name='aggregate')(n2n_features)
    elif aggregation == 'mean':
        n2n_features = Lambda(lambda x: tf.reduce_mean(x, axis=-1),
                              name='aggregate')(n2n_features)
    elif aggregation == 'lstm':
        n2n_features = LSTM(units=128, return_sequences=True)(n2n_features)
        n2n_features = Attention()([n2n_features, n2n_features, n2n_features])
        n2n_features = Lambda(lambda x: tf.reduce_sum(x, axis=-1),
                              name='aggregate')(n2n_features)
    n2n_features = Lambda(lambda x: tf.math.l2_normalize(x, axis=-1),
                          name='normalize_n2n')(n2n_features)

    all_features = Concatenate(axis=-1)([n2n_features, input_aux_features])
    top = Dense(64)(all_features)
    top = LeakyReLU()(top)
    out = Dense(1)(top)

    return Model(inputs=[input_node_features, input_aux_features, input_n2n],
                 outputs=out,
                 name='DrBC')
Ejemplo n.º 17
0
    def model(inputlen,
              vocabulary,
              vector_dim,
              embedding_dim=128,
              lstm_unit=100,
              num_class=0,
              drop_rate=0.5,
              l2=0.01,
              train=False,
              init_emb_enable=False,
              init_emb=None,
              attention_enable=False):

        input = Input(shape=(inputlen))
        if train == True:
            label = Input(shape=(num_class))

        if init_emb_enable == False:
            embeddings_init = 'uniform'
        else:
            embeddings_init = keras.initializers.constant(init_emb)

        embedding = Embedding(input_dim=vocabulary,
                              output_dim=embedding_dim,
                              embeddings_initializer=embeddings_init,
                              input_length=inputlen)(input)
        reshape = Reshape((inputlen, embedding_dim))(embedding)

        if attention_enable == True:
            lstm = Bidirectional(LSTM(lstm_unit,
                                      return_sequences=True))(reshape)
            attention_output = Attention()([lstm, lstm])
            pool_output1 = GlobalMaxPooling1D()(lstm)
            pool_output2 = GlobalMaxPooling1D()(attention_output)
            lstm = Concatenate()([pool_output1, pool_output2])
        else:
            lstm = Bidirectional(LSTM(lstm_unit,
                                      return_sequences=False))(reshape)

        if drop_rate != 0:
            dropout = Dropout(drop_rate)(lstm)
        else:
            dropout = lstm
        dense = Dense(vector_dim,
                      activity_regularizer=regularizers.l2(l2))(dropout)

        bn = BatchNormalization()(dense)

        if train == True:
            output = ArcFace(n_classes=num_class)([bn, label])
            model = Model([input, label], output)
        else:
            model = Model(input, bn)

        return model
Ejemplo n.º 18
0
 def __init__(self, hidden_units, vocab_size, embedding_dim):
     super(Decoder, self).__init__()
     self.embedding = Embedding(vocab_size, embedding_dim,
                                mask_zero=False)  # Embedding Layer
     self.decoder_lstm = LSTM(hidden_units,
                              return_sequences=True,
                              return_state=True,
                              name="decode_lstm")  # Decode LSTM Layer
     self.attention = Attention()  # Attention Layer
     self.concatenate = Concatenate(
         axis=-1, name='concat_layer')  # Concatenate Layer
Ejemplo n.º 19
0
    def __init__(self,num_heads,dim):
        super().__init__()

        self.num_heads = num_heads
        self.dim = self.d_w = dim

        self.w_q = Dense(num_heads*dim,use_bias=False)
        self.w_k = Dense(num_heads*dim,use_bias=False)
        self.w_v = Dense(num_heads*dim,use_bias=False)
        self.w_o = Dense(num_heads*dim,use_bias=False)

        self.attn = [Attention() for _ in range(num_heads)] # you just need a single instance, but...
Ejemplo n.º 20
0
    def __init__(self, vocab_size, embedding_dim, dec_units, batch_size):
        super(MyDecoderWithAttention, self).__init__()
        self.batch_size = batch_size
        self.dec_units = dec_units
        self.embedding = Embedding(vocab_size, embedding_dim)
        self.gru = GRU(self.dec_units,
                       return_sequences=True,
                       return_state=True)
        self.fc = Dense(vocab_size)

        # __NEW__: attention, increase dropout to reduce the effect of the weights
        self.attention = Attention()
Ejemplo n.º 21
0
def create_CNN_LSTM():
    input = Input(shape=(224, 224, 1))
    x = Conv2D(filters=64, kernel_size=2, strides=1, padding="same")(input)
    x = MaxPooling2D()(x)
    x = Conv2D(filters=64, kernel_size=3, strides=1, padding="same")(x)
    x = MaxPooling2D()(x)
    x = Flatten()(x)
    x = Bidirectional(LSTM(units=128, dropout=0.3, recurrent_dropout=0.2))(x)
    output = Attention(dropout=0.3)(x)

    model = Model(inputs=input, outputs=output)
    return model
Ejemplo n.º 22
0
 def transformer_regression(units, x):
     query = Dense(8)(x)
     value = Dense(8)(x)
     key = Dense(8)(x)
     query, value, key = [
         tf.expand_dims(x, axis=1) for x in [query, value, key]
     ]
     x = Attention()([query, value, key])
     x = LayerNormalization()(x)
     x = GlobalAveragePooling1D(data_format='channels_last')(x)
     x = Dense(units)(x)
     return x
Ejemplo n.º 23
0
    def __init__(self, num_unit, num_window, vocab_size, head_num):
        super(Attention_Model, self).__init__()
        self.num_unit = num_unit # dimension of LSTM output
        self.num_window = num_window # window size
        self.vocab_size = vocab_size # vocabulary size
        self.head_num = head_num # number of attention heads

        self.embedding = Embedding(input_dim=vocab_size, output_dim=num_unit, input_length=num_window)
        self.att_layer = [Attention() for _ in range(head_num)] # multiple attention layers
        self.flatten = Flatten()
        self.dense_1 = Dense(150, activation='relu')
        self.dense_2 = Dense(3, activation='softmax')
Ejemplo n.º 24
0
    def __init__(self, summary=False, loadfrom=False):
        '''
        Define an encoder-decoder model
        '''

        if loadfrom:
            self.model = load_model(loadfrom,
                                    custom_objects={'Attention': Attention})

        else:
            encoder_input = Input(shape=(None, 29))
            decoder_input = Input(shape=(None, 43))

            # bLSTM

            encoder = Bidirectional(
                LSTM(256, return_sequences=True,
                     return_state=False))(encoder_input)

            # define the encoder

            encoder, state_fh, state_fc, state_bh, state_bc = Bidirectional(
                LSTM(256, return_sequences=True, return_state=True),
                merge_mode='sum')(encoder)
            states = [state_bh, state_bc]

            # define the decoder

            decoder, state_h, state_c = LSTM(256,
                                             return_sequences=True,
                                             return_state=True)(
                                                 decoder_input,
                                                 initial_state=states)

            # calculate Luong attention based on the encoder and decoder outputs

            attention = Attention()([decoder, encoder])

            inner = Concatenate()([attention, decoder])

            # output layer is a one-hot array with softmax activation

            output = TimeDistributed(Dense(43, activation='softmax'))(inner)

            # define the model

            self.model = model(inputs=[encoder_input, decoder_input],
                               outputs=output)

        if summary:
            self.model.summary()
Ejemplo n.º 25
0
 def build_model(self):
     input = Input((self.max_len,))
     query_embedding = Embedding(self.max_features, self.embedding_dims, input_length=self.max_len)(input)
     value_embedding = Embedding(self.max_features, self.embedding_dims, input_length=self.max_len)(input)
     convs = []
     for kernel_size in [3, 3, 3]:
         query_conv = Conv1D(128, kernel_size, activation='relu')(query_embedding)
         value_conv = Conv1D(128, kernel_size, activation='relu')(value_embedding)
     query_value_attention = Attention(self.max_len)([query_conv, value_conv])
     query_encoding = GlobalMaxPool1D()(query_conv)
     query_value_attention = GlobalMaxPool1D()(query_value_attention)
     concate = Concatenate(axis=-1)([query_encoding, query_value_attention])
     output = Dense(self.class_num, activation=self.activation)(concate)
     model = Model(inputs=input, outputs=output)
     return model
Ejemplo n.º 26
0
def Attention_Bi_LSTM_AE(n_steps, n_features, activation):
    en_input = Input(shape=[n_steps, n_features])
    e = Bidirectional(
        LSTM(256,
             recurrent_dropout=0.1,
             dropout=0.1,
             return_sequences=True,
             input_shape=[n_steps, n_features]))(en_input)
    e = Bidirectional(LSTM(128, recurrent_dropout=0.1, dropout=0.1))(e)
    e = Attention(use_scale=True)([e, e])
    en_output = Dense(get_output_dim(n_steps * n_features),
                      kernel_initializer='lecun_normal',
                      activation='selu')(e)
    encoder = keras.models.Model(inputs=[en_input], outputs=[en_output])

    de_input = Input(shape=[get_output_dim(n_steps * n_features)])
    d = RepeatVector(n_steps)(de_input)
    d = LSTM(128, return_sequences=True, recurrent_dropout=0.1, dropout=0.1)(d)
    d = LSTM(256, return_sequences=True, recurrent_dropout=0.1, dropout=0.1)(d)
    d = Attention(use_scale=True, causal=True)([d, d])
    de_output = TimeDistributed(Dense(n_features))(d)
    decoder = keras.models.Model(inputs=[de_input], outputs=[de_output])

    return encoder, decoder
Ejemplo n.º 27
0
def test_attention():
    n_features = 3
    embed_size = 8
    seq_size = 3
    batch_size = 2

    inputs = tx.Constant(np.random.random([batch_size, seq_size]),
                         n_units=seq_size,
                         dtype=tf.int32)
    emb = tx.Lookup(inputs,
                    seq_size=seq_size,
                    embedding_shape=[n_features, embed_size])
    seq = emb()

    # keras attention doesn't have multiple heads
    attention = Attention(use_scale=False)

    res = attention([seq, seq, seq])

    attention2 = tx.MHAttention(emb, emb, emb, n_units=embed_size, n_heads=1)
    assert len(attention2.variables) == 3

    attention2.wq = tx.Linear(emb,
                              n_units=None,
                              weights=tf.linalg.eye(embed_size, embed_size),
                              add_bias=False)
    attention2.wk = tx.Linear(emb,
                              n_units=None,
                              weights=tf.linalg.eye(embed_size, embed_size),
                              add_bias=False)
    attention2.wv = tx.Linear(emb,
                              n_units=None,
                              weights=tf.linalg.eye(embed_size, embed_size),
                              add_bias=False)

    assert tx.tensor_equal(attention2.wq(seq), seq)

    res2 = attention2()

    g = tx.Graph.build(inputs=emb, outputs=attention2)
    g = g.as_function(ord_inputs=emb, ord_outputs=attention2)

    res3 = g(seq)

    assert tx.tensor_equal(res, res2)
    assert tx.tensor_equal(res, res3)
Ejemplo n.º 28
0
        def att_hiddenfeat(ipt, iptname):
            cf = TimeDistributed(Flatten())(ipt)

            spec_btn = TimeDistributed(Dense(512, activation='relu'))(cf)
            spec_gru = Bidirectional(
                GRU(256, return_sequences=True, activation='sigmoid'))(cf)

            spec_att = Attention()([spec_btn, spec_gru])
            spec_att = Activation('sigmoid')(spec_att)

            specsman_query_value_attention = GlobalAveragePooling1D()(spec_att)
            specsman_query_coding = GlobalAveragePooling1D()(spec_btn)
            specsman_h = multiply(
                [specsman_query_value_attention, specsman_query_coding])
            specsman_h = LeakyReLU(alpha=0.2,
                                   name=iptname + '_specsman_h')(specsman_h)

            return specsman_h
Ejemplo n.º 29
0
def AttentionLeNet5(input_shape, num_classes=None, attention=None):
    """AttentionLeNet-5 network built with Keras
    Inputs:
        input_shape: input shape of the element of the batched data, e.g., (32, 32, 1), (28, 28, 1)
        attention: attention type, one of ["official", "senet"], default None.
    """
    input_ = Input(shape=input_shape)

    model_head = Sequential()
    model_head.add(input_)
    model_head.add(
        Conv2D(filters=6,
               kernel_size=(5, 5),
               padding="valid",
               activation="relu"))
    model_head.add(MaxPool2D(strides=2))
    model_head.add(
        Conv2D(filters=16,
               kernel_size=(5, 5),
               padding="valid",
               activation="relu"))
    model_head.add(MaxPool2D(strides=2))

    model_top = Sequential()
    model_top.add(Flatten())
    model_top.add(Dense(120, activation="relu"))
    model_top.add(Dense(84, activation="relu"))
    model_top.add(Dense(10, activation='softmax'))

    # Add Attention afer Flatten Layer
    if not attention in available_attention:
        raise ValueError(
            f"""attention argument must be in ["official", "senet"]""")
    if attention == "official":
        x_attention = Attention(model_head.output)
    elif attention == "senet":
        x_attention = senet_attention(model_head.output)

    x_attention = model_top(x_attention)
    model = Model(inputs=input_, outputs=x_attention)

    # model.build()

    return model
Ejemplo n.º 30
0
 def __init__(self):
     super(MultiheadAttentiveBiLSTMNoVGG, self).__init__()
     # self.vgg = VGG16()
     self.attention = Attention()
     self.conv_max = Conv2D(64, 3, activation='relu')
     self.dropout_max = Dropout(0.5)
     self.conv_avg = Conv2D(64, 3, activation='relu')
     self.dropout_avg = Dropout(0.5)
     self.pool_max = MaxPooling2D(pool_size=(2, 2),
                                  strides=None,
                                  padding='valid',
                                  data_format=None)
     self.pool_avg = MaxPooling2D(pool_size=(2, 2),
                                  strides=None,
                                  padding='valid',
                                  data_format=None)
     self.conv_max_2 = Conv2D(32, 3, activation='relu')
     self.conv_avg_2 = Conv2D(32, 3, activation='relu')
     self.d_max = Dense(128, activation='relu')
     self.d_avg = Dense(128, activation='relu')
     self.flatten = Flatten()
     self.bn = BatchNormalization()
     self.d1_2 = Dense(56, activation='sigmoid')
     self.d2 = Dense(6, activation='softmax')
     # self.dropout_vgg = Dropout(0.5)
     self.dropout_max = Dropout(0.5)
     self.dropout_avg = Dropout(0.5)
     self.dropout = Dropout(0.5)
     self.lstm_max_fw = LSTM(128, return_sequences=True)
     self.lstm_max_bw = LSTM(128,
                             return_sequences=True,
                             go_backwards=True,
                             activation='relu')
     self.bilstm_max = Bidirectional(self.lstm_max_fw,
                                     backward_layer=self.lstm_max_bw)
     self.lstm_avg_fw = LSTM(128, return_sequences=True)
     self.lstm_avg_bw = LSTM(128,
                             return_sequences=True,
                             go_backwards=True,
                             activation='relu')
     self.bilstm_avg = Bidirectional(self.lstm_avg_fw,
                                     backward_layer=self.lstm_avg_bw)
     self.dropout4 = Dropout(0.3)