Example #1
0
def test_additive_attention():
    """
    Bahdanau-style attention. query (batch, Tq, dim), key (batch, Tv, dim) and value (batch, Tv, dim) are inputs.
    following computations is processed.
    1. reshape query as shape [batch, Tq, 1, dim] and value as shape [batch, 1, Tv, dim]
    2. broadcasting multiply between additive of above as output shape [batch, Tq, Tv, dim]
    3. reduce_sum above with dim axis as output shape [batch, Tq, Tv]
    4. softmax of above
    5. MatMul between 4. and value as output shape [batch, Tq, dim]
    """
    Tq = 10
    Tv = 10
    dim = 16
    q_shape = (Tq, dim)
    k_shape = (Tv, dim)
    v_shape = (Tv, dim)
    q = Input(q_shape)
    k = Input(k_shape)
    v = Input(v_shape)
    x = AdditiveAttention()([q, k, v])
    model = Model([q, k, v], x)
    flops = get_flops(model, batch_size=1)
    assert (
        flops
        == Tq * Tv * dim  # No.2 (multiply)
        + Tq * Tv * dim  # No.3 (add)
        + Tq * Tv * (dim - 1)  # No.3 (reduce_sum)
        + 5 * Tq * Tv  # No.4 (softmax)
        + 2 * Tv * Tq * dim  # No.5 (MatMul)
    )
Example #2
0
def build_decoder(params):
    # decoder layers
    de_inputs = Input(shape=(params.de_max_len, ), name='de_inputs')
    de_init_state_h = Input(shape=(params.hidden_units, ),
                            name='de_init_state_h')
    de_init_state_c = Input(shape=(params.hidden_units, ),
                            name='de_init_state_c')
    de_en_outputs = Input(shape=(params.en_max_len, params.hidden_units),
                          name='de_en_inputs')

    # decoder forward
    de_embedding_layer = Embedding(params.de_vocab_size,
                                   params.embedding_dim,
                                   mask_zero=True)
    de_lstm_layer = LSTM(params.hidden_units, return_sequences=True)
    attention_layer = AdditiveAttention()
    con_layer = Concatenate()
    fc_layer = Dense(params.de_vocab_size, activation='softmax')

    # forward
    de_embedding = de_embedding_layer(de_inputs)
    de_lstm_outputs = de_lstm_layer(
        de_embedding, initial_state=[de_init_state_h, de_init_state_c])
    attention_vec = attention_layer([de_lstm_outputs, de_en_outputs])
    fc_inputs = con_layer([attention_vec, de_lstm_outputs])
    fc_outputs = fc_layer(fc_inputs)

    # decoder definition
    decoder = Model(
        inputs=[de_inputs, de_init_state_h, de_init_state_c, de_en_outputs],
        outputs=fc_outputs,
        name='decoder')
    decoder.summary()
    return decoder
Example #3
0
    def attention_lstm(self):
        input_x = Input(shape = self.input_shape, name = 'input')
        X = input_x
        
        for i in range(self.lstm_blocks):
            query = Dense(10, name='query_' + str(i))(X)
            key = Dense(10, name='key_' + str(i))(X)
            attention_weights = AdditiveAttention(use_scale = False, name='attention_'+str(i))([query, X, key])
            attention_weights = Dense(1, activation='softmax', name='attention_weights_'+str(i))(attention_weights)
            context = Multiply(name='context_'+str(i))([attention_weights,X])
            X = LSTM(self.n_units, return_sequences = True, 
                     recurrent_dropout=self.recurrent_dropout, 
                     kernel_regularizer=l1_l2(self.lstm_l1, self.lstm_l2),
                     activity_regularizer=l1_l2(self.lstm_l1, self.lstm_l2),
                     name = 'lstm_' + str(i))(context)
            if self.dropout_rate > 0:
                X = Dropout(self.dropout_rate, name='dropout_'+str(i))(X)
                
        X = LSTM(self.n_units, return_sequences = False, 
                 recurrent_dropout=self.recurrent_dropout, 
                 kernel_regularizer=l1_l2(self.lstm_l1, self.lstm_l2),
                 activity_regularizer=l1_l2(self.lstm_l1, self.lstm_l2),
                 name = 'lstm_last')(X)
        if self.dropout_rate > 0:
            X = Dropout(self.dropout_rate, name='dropout_last')(X)
        X = Dense(self.n_outputs, activation=self.activation, name = 'output')(X)

        return Model(inputs=input_x, outputs=X, name='attention_lstm')
def build_decoder(params):
    # decoder layers
    de_inputs = Input(shape=(1, ))
    de_init_state_h = Input(shape=(params.hidden_units, ))
    de_init_state_c = Input(shape=(params.hidden_units, ))
    de_en_outputs = Input(shape=(params.en_max_len, params.hidden_units))

    # decoder forward
    de_embedding_layer = Embedding(params.de_vocab_size,
                                   params.embedding_dim,
                                   mask_zero=True)
    de_lstm_layer = LSTM(params.hidden_units,
                         return_sequences=True,
                         return_state=True)
    attention_layer = AdditiveAttention()
    add_layer = Add()
    fc_layer = Dense(params.de_vocab_size, activation='softmax')

    # forward
    de_embedding = de_embedding_layer(de_inputs)
    de_lstm_outputs, de_output_state_h, de_output_state_c = de_lstm_layer(
        de_embedding)
    attention_vec = attention_layer([de_lstm_outputs, de_en_outputs])
    fc_inputs = add_layer([attention_vec, de_lstm_outputs])
    fc_outputs = fc_layer(fc_inputs)

    # decoder definition
    decoder = Model(
        inputs=[de_inputs, de_init_state_h, de_init_state_c, de_en_outputs],
        outputs=[fc_outputs, de_output_state_h, de_output_state_c])
    return decoder
Example #5
0
 def get_document_model(self, section_model, question_model):
     document_input = Input(shape=(None, None, None), name="document_input")
     document_encoded = TimeDistributed(section_model)(document_input)
     cnn_1d = Conv1D(128, 4, padding="same", activation="relu", strides=1)(document_encoded)
     attention = AdditiveAttention()([cnn_1d, question_model])
     output = GlobalAveragePooling1D()(attention)
     model = Model(document_input, output)
     return model
Example #6
0
def CRNN_Attention(img_width,
                   img_height,
                   img_channels,
                   len_characters,
                   trainable=True,
                   cnn_backbone_name=get_cnn_backbone('resnet_attention'),
                   rnn_backbone_name=None):
    """Instantiate a CRNN architecture with attention mechanism.

    Parameters:
        img_width:
            int, the width of image.
        img_height:
            int, the height of image.
        img_channels:
            int, the channels of image.
        len_characters:
            int, the length of characters.
        trainable:
            bool, default=True
            If true the model will be trained, if false the model will be inferred.
        cnn_backbone_name:
            str, the name of convolution part of CRNN model.
        rnn_backbone_name:
            str, the name of recurrent part of CRNN model.

    Returns:
        A Keras model instance.
    """
    input_image = Input(shape=(img_width, img_height, img_channels), name='Input-Image', dtype='float32')
    input_label = Input(shape=(None,), name='Input-Label', dtype='float32')

    # CNN backbone
    cnn_backbone = get_cnn_backbone(cnn_backbone_name)
    cnn_layer = cnn_backbone(input_image)

    # RNN backbone
    rnn_backbone = get_rnn_backbone(rnn_backbone_name)
    rnn_layer = rnn_backbone(cnn_layer)

    # Add attention
    attention_layer = AdditiveAttention(name='Attention')([cnn_layer, rnn_layer])
    concatenate_layer = Concatenate(name='Concatenate')([cnn_layer, attention_layer])

    # Connect to full-connect-layer.
    dense_layer = Dense(units=len_characters + 1,
                        activation='softmax',
                        name='Output-Dense')(concatenate_layer)
    ctc_layer = CTCLayer(name='ctc_loss')(input_label, dense_layer)

    if trainable is True:
        model = Model(inputs=[input_image, input_label], outputs=ctc_layer, name='ocr_model_train')
    else:
        model = Model(inputs=input_image, outputs=dense_layer, name='ocr_model_inference')

    return model
Example #7
0
    def get_text_model(self, embedding, use_attention=False, question_model=None):
        text_input = Input(shape=(None,), name="text_input")
        text_embedding = embedding(text_input)
        output = Conv1D(128, 4, padding="same", activation="relu", strides=1)(text_embedding)

        if use_attention:
            attention = AdditiveAttention()([output, question_model])
            output = GlobalAveragePooling1D()(attention)

        model = Model(text_input, output)
        return model
Example #8
0
    def __init__(
            self,
            num_hidden,
            embedding=False,
            embedding_len = 50
    ):
        """
        :param num_hidden:
        :param embedding:
        :param embedding_len:
        """
        super(
            SequenceAttentionLayer, self
        ).__init__()

        self.num_hidden = num_hidden
        self.if_embedding = embedding
        self.embedding_len = embedding_len
        if embedding:
            self.embedding_len = embedding_len
            self.embed = Embedding(
                input_dim=len(word_vectors),
                output_dim=self.embedding_len,
                weights=[word_vectors],
                mask_zero=True,
                trainable=False
            )
            self.embed_mask = Masking(mask_value=0)

        self.lstm = LSTM(
            self.num_hidden,
            activation="tanh",
            return_sequences=True
        )
        # pass the type of attention
        self.attention_layer = AdditiveAttention(
            use_scale=True
        )

        self.attention_layer.__setattr__('supports_masking', True)
Example #9
0
    def attention_lstm_residual(self):
        input_x = Input(shape = self.input_shape, name = 'input')
        X = input_x
        
        for i in range(self.lstm_blocks):
            query = Dense(10, name='query_' + str(i))(X)
            key = Dense(10, name='key_' + str(i))(X)
            attention_weights = AdditiveAttention(use_scale = False, name='attention_'+str(i))([query, X, key])
            attention_weights = Dense(1, activation='softmax', name='attention_weights_'+str(i))(attention_weights)
            context = Multiply(name='context_'+str(i))([attention_weights,X])
            X = LSTM(self.n_units, return_sequences = True, 
                     recurrent_dropout=self.recurrent_dropout, 
                     kernel_regularizer=l1_l2(self.lstm_l1, self.lstm_l2),
                     activity_regularizer=l1_l2(self.lstm_l1, self.lstm_l2),
                     name = 'lstm_' + str(i))(context)
            if self.dropout_rate > 0:
                X = Dropout(self.dropout_rate, name='dropout_'+str(i))(X)
                
        X = LSTM(self.n_units, return_sequences = False, 
                 recurrent_dropout=self.recurrent_dropout, 
                 kernel_regularizer=l1_l2(self.lstm_l1, self.lstm_l2),
                 activity_regularizer=l1_l2(self.lstm_l1, self.lstm_l2),
                 name = 'lstm_last')(X)
        if self.dropout_rate > 0:
            X = Dropout(self.dropout_rate, name='dropout_last')(X)
        
        crop_input = Cropping1D(cropping=(0, self.input_shape[0] - 1), name='crop_input')(input_x)
        if self.dropout_rate > 0:
            crop_input = Dropout(self.dropout_rate, name='dropout_crop_input')(crop_input)
        flatten_crop = Flatten(name='flatten_crop_input')(crop_input)
        query_input = Dense(10, name='query_input')(flatten_crop)
        key_input = Dense(10, name='key_input')(flatten_crop)
        attention_weights_input = AdditiveAttention(use_scale = False, name='attention_input')([query_input, flatten_crop, key_input])
        attention_weights_input = Dense(1, activation='softmax', name='attention_weights_input')(attention_weights_input)
        context_input = Multiply(name='context_input')([attention_weights_input, flatten_crop])
        concat = Concatenate(name='concat_output')([X, context_input])
        X = Dense(self.n_outputs, activation=self.activation, name = 'output')(concat)

        return Model(inputs=input_x, outputs=X, name='attention_lstm')
Example #10
0
 def get_section_model(self,
                       sentence_model,
                       question_output=None,
                       question_input=None):
     section_input = Input(shape=(section_max_size, sentence_max_size),
                           name="section_input")
     section_encoded = TimeDistributed(sentence_model)(
         [section_input, question_input])
     section_encoded = Conv1D(128,
                              4,
                              padding="same",
                              activation="relu",
                              strides=1)(section_encoded)
     attention = AdditiveAttention()([section_encoded, question_output])
     output = GlobalAveragePooling1D()(attention)
     model = Model(section_input, output)
     return model
Example #11
0
 def __init__(self, vocab_size, embedding_dim, hidden_units, de_max_len,
              en_max_len):
     super(Decoder, self).__init__()
     # parameters initialization
     self.vocab_size = vocab_size
     self.embedding_dim = embedding_dim
     self.hidden_units = hidden_units
     self.de_max_len = de_max_len
     self.en_max_len = en_max_len
     # layers initialization
     self.embedding = Embedding(self.vocab_size,
                                self.embedding_dim,
                                input_length=self.de_max_len)
     self.lstm = LSTM(self.hidden_units,
                      return_sequences=True,
                      return_state=True,
                      input_shape=(self.de_max_len, self.embedding_dim))
     self.attention = AdditiveAttention(input_shape=())
     self.fc = Dense(self.vocab_size, activation='softmax')
     self.add_layer = Add()
     self.encoder_outputs_layer = Input(shape=(self.en_max_len,
                                               embedding_dim))
     self.init_states_layer = Input(shape=(2, hidden_units))
    def get_model(self):
        input = Input(shape=(self.max_len, ))
        embedding = Embedding(self.max_features,
                              self.embedding_dims,
                              input_length=self.max_len,
                              trainable=True)(input)
        embedding = SpatialDropout1D(self.dropout_rate)(embedding)
        lstm_forward = LSTM(128, return_sequences=True)(embedding)
        lstm_backward = LSTM(128, return_sequences=True,
                             go_backwards=True)(embedding)
        x = Concatenate()([lstm_forward, embedding, lstm_backward])

        attn = AdditiveAttention()([x, x])

        x = [GlobalAveragePooling1D()(x)] + \
            [GlobalMaxPooling1D()(x)] + \
            [GlobalAveragePooling1D()(attn)] + \
            [GlobalMaxPooling1D()(attn)]

        x = Concatenate()(x)
        output = Dense(self.class_num, activation=self.last_activation)(x)
        model = Model(inputs=input, outputs=output)
        return model
Example #13
0
    def _create_model(self):
        '''Creates the GRU architecture described in the paper
        '''
        model = Sequential()
        input_shape = (self.window_size, 1)
        inputs = Input(input_shape)
        cnn_1 = Conv1D(16, 4, activation="relu", padding="same",
                       strides=1)(inputs)
        att = AdditiveAttention(causal='True')([cnn_1, cnn_1])
        b1 = Bidirectional(GRU(64, return_sequences=False, stateful=False),
                           merge_mode='concat')(att)
        x = Dense(64, activation='relu')(b1)
        outputs = Dense(1, activation='linear')(x)
        model = Model(inputs=inputs, outputs=outputs)

        model.compile(loss='mse', optimizer='adam')
        print(model.summary())
        plot_model(model,
                   to_file='model.png',
                   show_shapes=True,
                   show_layer_names=False)

        return model
Example #14
0
    def attention_lstm_dropout_input(self):
        dropout_input = Input(shape = (self.seq_len, self.droput_input_cols), name = 'dropout_input')
        remain_input = Input(shape = (self.seq_len, self.remain_input_cols), name = 'remain_input')

        dropout_x = Dropout(self.dropout_rate)(dropout_input)

        X = Concatenate(axis=-1)([remain_input, dropout_x])
        
        for i in range(self.lstm_blocks):
            query = Dense(10)(X)
            key = Dense(10)(X)
            context = AdditiveAttention()([query, X, key])
            #context = one_step_attention(a)
            X = LSTM(self.n_units, return_sequences = True, recurrent_dropout=self.recurrent_dropout)(context)
            if self.dropout_rate > 0:
                X = Dropout(self.dropout_rate)(X)
                
        X = LSTM(self.n_units, return_sequences = False, recurrent_dropout=self.recurrent_dropout)(X)
        if self.dropout_rate > 0:
            X = Dropout(self.dropout_rate)(X)
        X = Dense(self.n_outputs)(X)
        X = Activation(self.activation, name = 'output')(X)

        return Model(inputs=[dropout_input, remain_input], outputs=X)
        if fr_timesteps:
            decoder_inputs = Input(shape=(dec_timesteps - 1, dec_vsize), name='decoder_inputs')
        else:
            decoder_inputs = Input(shape=(None, dec_vsize), name='decoder_inputs')

    # Encoder GRU
    encoder_gru = GRU(hidden_size, return_sequences=True, return_state=True, name='encoder_gru')
    encoder_out, encoder_state = encoder_gru(encoder_inputs)

    # Set up the decoder GRU, using `encoder_states` as initial state.
    decoder_gru = GRU(hidden_size, return_sequences=True, return_state=True, name='decoder_gru')
    decoder_out, decoder_state = decoder_gru(decoder_inputs, initial_state=encoder_state)

    # Attention layer
    # attn_layer = AttentionLayer(name='attention_layer')
    attn_layer = AdditiveAttention(name="attention_layer")

    ## The input for AdditiveAttention: query, key
    ## It returns a tensor of shape as query
    ## This is different from the AttentionLayer developed by Thushan
    # attn_out, attn_states = attn_layer([encoder_out, decoder_out])

    attn_out, attn_states  = attn_layer([decoder_out,encoder_out],return_attention_scores=True)

    # Concat attention input and decoder GRU output
    decoder_concat_input = Concatenate(axis=-1, name='concat_layer')([decoder_out, attn_out])

    # Dense layer
    dense = Dense(dec_vsize, activation='softmax', name='softmax_layer')
    dense_time = TimeDistributed(dense, name='time_distributed_layer')
    decoder_pred = dense_time(decoder_concat_input)
Example #16
0
class SequenceAttentionLayer(tf.keras.layers.Layer):
    """Sequence model(LSTM) with attention sum of hidden states"""

    def __init__(
            self,
            num_hidden,
            embedding=False,
            embedding_len = 50
    ):
        """
        :param num_hidden:
        :param embedding:
        :param embedding_len:
        """
        super(
            SequenceAttentionLayer, self
        ).__init__()

        self.num_hidden = num_hidden
        self.if_embedding = embedding
        self.embedding_len = embedding_len
        if embedding:
            self.embedding_len = embedding_len
            self.embed = Embedding(
                input_dim=len(word_vectors),
                output_dim=self.embedding_len,
                weights=[word_vectors],
                mask_zero=True,
                trainable=False
            )
            self.embed_mask = Masking(mask_value=0)

        self.lstm = LSTM(
            self.num_hidden,
            activation="tanh",
            return_sequences=True
        )
        # pass the type of attention
        self.attention_layer = AdditiveAttention(
            use_scale=True
        )

        self.attention_layer.__setattr__('supports_masking', True)

    def get_config(self):
        config = super(SequenceAttentionLayer, self).get_config()
        config['num_hidden'] = self.num_hidden
        config["embedding"] = self.if_embedding
        config["embedding_len"] = self.embedding_len
        return config

    def compute_mask(self, inputs, mask=None):
        # Also split the mask into 2 if it presents.
        if not self.if_embedding:
            return None
        # inputs = self.embed(inputs)
        embed_mask = tf.math.not_equal(inputs, 0)
        return tf.math.not_equal(
            tf.reduce_sum(tf.cast(embed_mask, tf.int32), axis=-1), 0
        )

    def call(self, inputs, mask = None):
        # TODO include mask as input and make sure gets flowing
        # if embedding
        if self.if_embedding:
            inputs = self.embed(inputs)
            inputs = self.embed_mask(inputs)
            # putting every sentence in a single axis
            inputs_mask = inputs._keras_mask
            inputs = tf.reshape(
                inputs, shape = (-1 ,maxlen ,self.embedding_len)
            )
            mask = tf.reshape(
                inputs_mask,
                shape=(-1, maxlen)
            )

        lstm_out = self.lstm(inputs, mask=mask)
        lstm_mask = lstm_out._keras_mask

        h = self.attention_layer(
            [lstm_out, lstm_out],
            mask = [lstm_mask, lstm_mask]
        )
        out = tf.reduce_mean (h, axis=-2)
        if self.if_embedding:
            # reshaping back to (batch_size, max seq len, num hidden)
            out = tf.reshape(
                out,
                shape=(-1 ,max_sentences, self.num_hidden)
            )
        return out
Example #17
0
    def get_model(self):

        print("Vocabulary Size:", vectorizer.get_vocabulary_size())

        overview_input = Input(shape=(None, None),
                               dtype='int64',
                               name="OverviewInput")
        plot_input = Input(shape=(None, None), dtype='int64', name="PlotInput")
        subtitles_input = Input(shape=(None, None),
                                dtype='int64',
                                name="SubtitlesInput")
        sentence_input = Input(shape=(None, ),
                               dtype='int64',
                               name="SentenceInput")

        embedded_sentence = Embedding(vectorizer.get_vocabulary_size(),
                                      300,
                                      trainable=True,
                                      name="Embedding")(sentence_input)
        spatial_dropout_sentence = SpatialDropout1D(
            0.20, name="SpatialDropoutSentence")(embedded_sentence)
        cnn_sentence = Conv1D(64,
                              4,
                              padding="same",
                              activation="relu",
                              strides=1,
                              name="Conv1DSentence")(spatial_dropout_sentence)
        max_pool_sentence = MaxPooling1D(
            pool_size=3, name="MaxPooling1DSentence")(cnn_sentence)
        sentence_encoding = Bidirectional(LSTM(500))(max_pool_sentence)
        sentence_model = Model(sentence_input, sentence_encoding)

        segment_time_distributed = TimeDistributed(
            sentence_model, name="TimeDistributedSegment")
        segment_cnn = Conv1D(172,
                             2,
                             padding="same",
                             activation="relu",
                             name="SegmentConv1D")
        segment_max_pool = MaxPooling1D(pool_size=3, name="SegementMaxPool1D")

        segment_cnn_2 = Conv1D(172,
                               5,
                               padding="same",
                               activation="relu",
                               name="Segment2Conv1D")
        segment_max_pool_2 = MaxPooling1D(pool_size=3,
                                          name="Segment2MaxPool1D")

        overview_time_distributed = segment_time_distributed(overview_input)
        overview_cnn = segment_cnn(overview_time_distributed)
        overview_maxpool = segment_max_pool(overview_cnn)

        plot_time_distributed = segment_time_distributed(plot_input)
        plot_cnn = segment_cnn(plot_time_distributed)
        plot_maxpool = segment_max_pool(plot_cnn)

        subtitles_timedistributed = segment_time_distributed(subtitles_input)
        subtitles_cnn = segment_cnn_2(subtitles_timedistributed)
        subtitles_maxpool = segment_max_pool_2(subtitles_cnn)

        overview_dropout = SpatialDropout1D(0.40)(overview_maxpool)
        overview_pre_attention_output = Dense(
            172, name="OverviewPreAttnOutput")(overview_dropout)

        plot_dropout = SpatialDropout1D(0.40)(plot_maxpool)
        plot_pre_attention_output = Dense(
            172, name="PlotPreAttnOutput")(plot_dropout)

        subtitles_dropout = SpatialDropout1D(
            0.40, name="SubtitlesDropout")(subtitles_maxpool)
        subtitles_pre_attention_output = Dense(
            172, name="SubtitlesPreAttnOutput")(subtitles_dropout)

        attention_overview = AdditiveAttention(name="OverviewAttention")(
            [overview_pre_attention_output, overview_maxpool])
        attention_plot = AdditiveAttention(name="PlotAttention")(
            [plot_pre_attention_output, plot_maxpool])
        attention_subtitles = AdditiveAttention(name="SubtitlesAttention")(
            [subtitles_pre_attention_output, subtitles_maxpool])

        overview_output = GlobalAveragePooling1D(
            name="GlobalAvgPoolOverview")(attention_overview)
        plot_output = GlobalAveragePooling1D(
            name="GlobalAvgPoolPlot")(attention_plot)
        subtitles_output = GlobalAveragePooling1D(
            name="GlobalAvgPoolSubitles")(attention_subtitles)

        concat_output = Concatenate(axis=-1, name="OutputConcatenate")(
            [overview_output, plot_output, subtitles_output])
        dropput = Dropout(0.40)(concat_output)
        output = Dense(172, activation="sigmoid", name="Output")(dropput)

        model = Model([overview_input, plot_input, subtitles_input], output)

        model.compile(loss='binary_crossentropy',
                      optimizer='adamax',
                      metrics=self.METRICS)

        print(sentence_model.summary())
        print(model.summary())
        self.sentence_model = sentence_model
        self.model = model
        if self.load_weights:
            self.sentence_model.load_weights("data/weights/sentence_model.h5")
            self.model.load_weights("data/weights/model.h5")
            self.vectorizer.load("data/weights/vectorizer.dat")
        return sentence_model, model
Example #18
0
def LSTM_model_veracity(x_train_embeddings,
                        x_train_metafeatures,
                        y_train,
                        x_test_embeddings,
                        x_test_metafeatures,
                        params,
                        eval=False,
                        use_embeddings=True,
                        use_metafeatures=True,
                        Early_Stopping=True,
                        log_path=""):
    # Parameter search
    log_dir = log_path + datetime.datetime.now().strftime("%d%m%Y-%H%M%S")

    num_lstm_units = int(params['num_lstm_units'])
    num_lstm_layers = int(params['num_lstm_layers'])
    num_dense_layers = int(params['num_dense_layers'])
    num_dense_units = int(params['num_dense_units'])
    num_epochs = params['num_epochs']
    learn_rate = params['learn_rate']
    mb_size = params['mb_size']
    l2reg = params['l2reg']
    dropout = params['dropout']
    attention = params['attention']

    # Defining input shapes
    if use_embeddings:
        emb_shape = x_train_embeddings[0].shape

    if use_metafeatures:
        metafeatures_shape = x_train_metafeatures[0].shape

    # Creating the two inputs
    if use_embeddings:
        emb_input = Input(shape=emb_shape, name='Embeddings')

    if use_metafeatures:
        metafeatures_input = Input(shape=metafeatures_shape,
                                   name='Metafeatures')

    # Adding masks to account for zero paddings
    if use_embeddings:
        emb_mask = Masking(mask_value=0,
                           input_shape=(None, emb_shape))(emb_input)
    if use_metafeatures:
        metafeatures_mask = (Masking(
            mask_value=0,
            input_shape=(None, metafeatures_shape)))(metafeatures_input)

    # Adding attention and LSTM layers with varying layers and units using parameter search
    if attention == 1:
        for nl in range(num_lstm_layers):
            if use_embeddings:
                emb_LSTM_query = Bidirectional(
                    LSTM(num_lstm_units,
                         dropout=dropout,
                         recurrent_dropout=0.2,
                         return_sequences=True))(emb_mask)

                emb_LSTM_value = Bidirectional(
                    LSTM(num_lstm_units,
                         dropout=dropout,
                         recurrent_dropout=0.2,
                         return_sequences=True))(emb_mask)
            if use_metafeatures:
                metafeatures_LSTM_query = Bidirectional(
                    LSTM(num_lstm_units,
                         dropout=dropout,
                         recurrent_dropout=0.2,
                         return_sequences=True))(metafeatures_mask)

                metafeatures_LSTM_value = Bidirectional(
                    LSTM(num_lstm_units,
                         dropout=dropout,
                         recurrent_dropout=0.2,
                         return_sequences=True))(metafeatures_mask)
        if use_embeddings:
            emb_LSTM = AdditiveAttention(name='Attention_Embeddings')(
                [emb_LSTM_query, emb_LSTM_value])
        if use_metafeatures:
            metafeatures_LSTM = AdditiveAttention(
                name='Attention_Metafeatures')(
                    [metafeatures_LSTM_query, metafeatures_LSTM_value])
    else:
        if use_embeddings:
            emb_LSTM = Bidirectional(
                LSTM(num_lstm_units,
                     dropout=dropout,
                     recurrent_dropout=dropout,
                     return_sequences=True))(emb_mask)
        if use_metafeatures:
            metafeatures_LSTM = Bidirectional(
                LSTM(num_lstm_units,
                     dropout=dropout,
                     recurrent_dropout=dropout,
                     return_sequences=True))(metafeatures_mask)

    if use_embeddings and use_metafeatures:
        # Concatenating the two inputs
        model = Concatenate()([emb_LSTM, metafeatures_LSTM])
    elif use_metafeatures:
        model = metafeatures_LSTM

    # Adding attention and another LSTM to the concatenated layers
    if attention == 1:
        model_query = Bidirectional(
            LSTM(num_lstm_units,
                 dropout=dropout,
                 recurrent_dropout=0.2,
                 return_sequences=False))(model)
        model_value = Bidirectional(
            LSTM(num_lstm_units,
                 dropout=dropout,
                 recurrent_dropout=0.2,
                 return_sequences=False))(model)
        model = AdditiveAttention(name='Attention_Model')(
            [model_query, model_value])

    else:
        model = Bidirectional(
            LSTM(num_lstm_units,
                 dropout=dropout,
                 recurrent_dropout=dropout,
                 return_sequences=False))(model)

    # Adding dense layer with varying layers and units using parameter search
    for nl in range(num_dense_layers):
        model = Dense(num_dense_units)(model)
        model = LeakyReLU()(model)

    # Adding dropout to the model
    model = Dropout(dropout)(model)

    # Adding softmax dense layer with varying l2 regularizers using parameter search
    output = Dense(3,
                   activation='softmax',
                   activity_regularizer=regularizers.l2(l2reg),
                   name='labels')(model)

    # Model output
    if use_embeddings and use_metafeatures:
        model = Model(inputs=[emb_input, metafeatures_input], outputs=output)
    elif use_metafeatures:
        model = Model(inputs=metafeatures_input, outputs=output)
    #model = Model(inputs=emb_input, outputs=output)
    # Plotting the model
    #plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)

    # Adding Adam optimizer with varying learning rate using parameter search
    adam = optimizers.Adam(lr=learn_rate,
                           beta_1=0.9,
                           beta_2=0.999,
                           epsilon=1e-08,
                           decay=0.0)

    # Compiling model
    model.compile(optimizer=adam,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    callback_list = []
    #TensorBoard
    tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)
    callback_list.append(tensorboard_callback)

    #Early_Stopping
    if Early_Stopping:
        earlystop_callback = EarlyStopping(monitor='val_accuracy',
                                           min_delta=0.0001,
                                           patience=5)
        callback_list.append(earlystop_callback)

    #plot_model(model, "model.png")
    if Early_Stopping:
        # Fitting the model with varying batch sizes and epochs using parameter search
        if use_embeddings and use_metafeatures:
            model.fit(
                {
                    'Embeddings': x_train_embeddings,
                    'Metafeatures': x_train_metafeatures
                },
                y_train,
                batch_size=mb_size,
                epochs=num_epochs,
                shuffle=True,
                class_weight=None,
                verbose=1,
                callbacks=callback_list,
                validation_split=.1)
        elif use_metafeatures:
            model.fit(x_train_metafeatures,
                      y_train,
                      batch_size=mb_size,
                      epochs=num_epochs,
                      shuffle=True,
                      class_weight=None,
                      verbose=1,
                      callbacks=callback_list,
                      validation_split=.1)
    else:
        # Fitting the model with varying batch sizes and epochs using parameter search
        if use_embeddings and use_metafeatures:
            model.fit(
                {
                    'Embeddings': x_train_embeddings,
                    'Metafeatures': x_train_metafeatures
                },
                y_train,
                batch_size=mb_size,
                epochs=num_epochs,
                shuffle=True,
                class_weight=None,
                verbose=1,
                callbacks=callback_list)
        elif use_metafeatures:
            model.fit(x_train_metafeatures,
                      y_train,
                      batch_size=mb_size,
                      epochs=num_epochs,
                      shuffle=True,
                      class_weight=None,
                      verbose=1,
                      callbacks=callback_list)

    # Evaluation time
    if eval == True:

        model.save('output\\model_veracity.h5')
        json_string = model.to_json()
        with open('output\\model_architecture_veracity.json', 'w') as fout:
            json.dump(json_string, fout)
        model.save_weights('output\\model_veracity_weights.h5')

    # Getting confidence of the model
    if use_embeddings and use_metafeatures:
        pred_probabilities = model.predict(
            [x_test_embeddings, x_test_metafeatures],
            batch_size=mb_size,
            verbose=0)
        confidence = np.max(pred_probabilities, axis=1)

        # Getting predictions of the model
        y_prob = model.predict([x_test_embeddings, x_test_metafeatures],
                               batch_size=mb_size)
        Y_pred = y_prob.argmax(axis=-1)
    elif use_metafeatures:
        pred_probabilities = model.predict(x_test_metafeatures,
                                           batch_size=mb_size,
                                           verbose=0)
        confidence = np.max(pred_probabilities, axis=1)

        # Getting predictions of the model
        y_prob = model.predict(x_test_metafeatures, batch_size=mb_size)
        Y_pred = y_prob.argmax(axis=-1)

    return Y_pred, confidence
Example #19
0
    def compile(self, learning_rate=None, initial_step=0):
        """
        Build models (train, encoder and decoder)

        Architecture based on Bahdanau and Transformer model approach.
            Reference:
                Dzmitry Bahdanau and Kyunghyun Cho and Yoshua Bengio
                Neural Machine Translation by Jointly Learning to Align and Translate, 2014
                arXiv, URL: https://arxiv.org/abs/1409.0473

        Architecture based on Luong and Transformer model approach.
            Reference:
                Minh-Thang Luong and Hieu Pham and Christopher D. Manning
                Effective Approaches to Attention-based Neural Machine Translation, 2015
                arXiv, URL: https://arxiv.org/abs/1508.04025

        More References:
            Thushan Ganegedara
            Attention in Deep Networks with Keras
            Medium: https://towardsdatascience.com/light-on-math-ml-attention-with-keras-dc8dbc1fad39
            Github: https://github.com/thushv89/attention_keras

            Trung Tran
            Neural Machine Translation With Attention Mechanism
            Machine Talk: https://machinetaltf.keras.backend.org/2019/03/29/neural-machine-translation-with-attention-mechanism/
            Github: https://github.com/ChunML/NLP/tree/master/machine_translation
        """

        # Encoder and Decoder Inputs
        encoder_inputs = Input(shape=(None, self.tokenizer.vocab_size),
                               name="encoder_inputs")
        decoder_inputs = Input(shape=(None, self.tokenizer.vocab_size),
                               name="decoder_inputs")

        # Encoder bgru
        encoder_bgru = Bidirectional(GRU(self.units,
                                         return_sequences=True,
                                         return_state=True,
                                         dropout=self.dropout),
                                     name="encoder_bgru")

        encoder_out, state_h, state_c = encoder_bgru(encoder_inputs)

        # Set up the decoder GRU, using `encoder_states` as initial state.
        decoder_gru = GRU(self.units * 2,
                          return_sequences=True,
                          return_state=True,
                          dropout=self.dropout,
                          name="decoder_gru")

        decoder_out, _ = decoder_gru(
            decoder_inputs,
            initial_state=Concatenate(axis=-1)([state_h, state_c]))

        # Attention layer
        if self.mode == "bahdanau":
            attn_layer = AdditiveAttention(use_scale=False,
                                           name="attention_layer")
        else:
            attn_layer = Attention(use_scale=False, name="attention_layer")

        attn_out = attn_layer([decoder_out, encoder_out])

        # Normalization layer
        norm_layer = LayerNormalization(name="normalization")
        decoder_concat_input = norm_layer(
            Concatenate(axis=-1)([decoder_out, attn_out]))

        # Dense layer
        dense = Dense(self.tokenizer.vocab_size,
                      activation="softmax",
                      name="softmax_layer")
        dense_time_distributed = TimeDistributed(dense,
                                                 name="time_distributed_layer")

        decoder_pred = dense_time_distributed(decoder_concat_input)
        """ Train model """
        if learning_rate is None:
            learning_rate = CustomSchedule(d_model=self.tokenizer.vocab_size,
                                           initial_step=initial_step)
            self.learning_schedule = True
        else:
            self.learning_schedule = False

        optimizer = Adam(learning_rate=learning_rate,
                         clipnorm=1.0,
                         clipvalue=0.5,
                         epsilon=1e-8)

        self.model = Model(inputs=[encoder_inputs, decoder_inputs],
                           outputs=decoder_pred,
                           name="seq2seq")
        self.model.compile(optimizer=optimizer,
                           loss=self.loss_func,
                           metrics=["accuracy"])
        """ Inference model """
        """ Encoder (Inference) model """
        self.encoder = Model(inputs=encoder_inputs,
                             outputs=[encoder_out, state_h, state_c])
        """ Decoder (Inference) model """
        # Decoder Inputs (states)
        encoder_inf_states = Input(shape=(self.tokenizer.maxlen,
                                          self.units * 2),
                                   name="encoder_inf_states")
        decoder_init_states = Input(shape=(self.units * 2),
                                    name="decoder_init")
        decoder_inf_inputs = Input(shape=(1, self.tokenizer.vocab_size),
                                   name="decoder_inf_inputs")

        # Decoder GRU
        decoder_inf_out, decoder_inf_states = decoder_gru(
            decoder_inf_inputs, initial_state=decoder_init_states)

        # Attention layer
        attn_inf_out = attn_layer([decoder_inf_out, encoder_inf_states])

        # Normalization layer
        decoder_inf_concat = norm_layer(
            Concatenate(axis=-1)([decoder_inf_out, attn_inf_out]))

        # Dense layer
        decoder_inf_pred = dense_time_distributed(decoder_inf_concat)

        # Decoder model
        self.decoder = Model(inputs=[
            encoder_inf_states, decoder_init_states, decoder_inf_inputs
        ],
                             outputs=[decoder_inf_pred, decoder_inf_states])