Example #1
0
    def baseline_preconv_attetion(self, config):
        inp = Input(shape=(config.strmaxlen, ), name='input')
        emb = Embedding(config.max_features,
                        config.max_features,
                        embeddings_initializer='identity',
                        trainable=True)(inp)
        emb = SpatialDropout1D(config.prob_dropout)(emb)
        x = Conv1D(config.filter_size,
                   kernel_size=config.kernel_size,
                   strides=config.strides,
                   padding="valid",
                   kernel_initializer="he_uniform")(emb)
        x = Bidirectional(CuDNNGRU(config.cell_size_l1,
                                   return_sequences=True))(x)
        x = Bidirectional(CuDNNGRU(config.cell_size_l2,
                                   return_sequences=True))(x)
        x = Attention(config.strmaxlen / 2 - 1)(x)
        x = Dense(64, activation='relu')(x)
        outp = Dense(2, activation='softmax')(x)

        model = Model(inputs=inp, outputs=outp)
        model.compile(loss='categorical_crossentropy',
                      optimizer=Adam(lr=0.001, decay=0.0001),
                      metrics=['categorical_crossentropy', 'accuracy'])
        return model
Example #2
0
    def __init__(self,
                 num_classes,
                 encoder,
                 att_type='additive',
                 img_size=(512, 512)):
        super().__init__()
        self.device = torch.device(
            'cuda' if torch.cuda.is_available() else 'cpu')
        self.low_feat = IntermediateLayerGetter(encoder, {
            "layer1": "layer1"
        }).to(self.device)
        self.encoder = IntermediateLayerGetter(encoder, {
            "layer4": "out"
        }).to(self.device)
        # For resnet18
        encoder_dim = 512
        low_level_dim = 64
        self.num_classes = num_classes

        self.class_encoder = nn.Linear(num_classes, 512)

        self.attention_enc = Attention(encoder_dim, att_type)

        self.decoder = Decoder(2,
                               encoder_dim,
                               img_size,
                               low_level_dim=low_level_dim,
                               rates=[1, 6, 12, 18])
Example #3
0
    def dense_attention(self, config):
        inp = Input(shape=(config.strmaxlen, ), name='input')
        emb = Embedding(config.max_features,
                        config.max_features,
                        embeddings_initializer='identity',
                        trainable=True)(inp)
        emb1 = SpatialDropout1D(config.prob_dropout)(emb)
        ####
        l1_G = Bidirectional(
            CuDNNGRU(config.cell_size_l1, return_sequences=True))(emb)
        l2_GG = Bidirectional(
            CuDNNGRU(config.cell_size_l2, return_sequences=True))(l1_G)
        l3_GGC = Conv1D(config.filter_size,
                        kernel_size=config.kernel_size,
                        strides=config.strides,
                        padding="valid",
                        kernel_initializer="he_uniform")(l2_GG)
        attention_GGA = Attention(config.strmaxlen)(l2_GG)

        avg_pool_G = GlobalAveragePooling1D()(l1_G)
        max_pool_G = GlobalMaxPooling1D()(l1_G)
        avg_pool_GG = GlobalAveragePooling1D()(l2_GG)
        max_pool_GG = GlobalMaxPooling1D()(l2_GG)
        avg_pool_GGC = GlobalAveragePooling1D()(l3_GGC)
        max_pool_GGC = GlobalMaxPooling1D()(l3_GGC)
        attention_GGCA = Attention(int(config.strmaxlen / 2 - 1))(l3_GGC)

        conc_GGC = concatenate([
            avg_pool_G, max_pool_G, avg_pool_GG, max_pool_GG, avg_pool_GGC,
            max_pool_GGC, attention_GGA, attention_GGCA
        ])

        outp = Dropout(config.prob_dropout2)(conc_GGC)
        outp = Dense(2, activation='softmax')(outp)

        #       ==================================================================================================
        model = Model(inputs=inp, outputs=outp)
        model.compile(loss='categorical_crossentropy',
                      optimizer=Adam(lr=0.001, decay=0.0005),
                      metrics=['categorical_crossentropy', 'accuracy'])

        return model
    def __init__(self,
                 rnn="lstm",
                 input_length=5000,
                 embedding_size=128,
                 hidden_size=768,
                 num_layers=1,
                 num_classes=10,
                 vocab_size=209,
                 drop_prob=0.3):
        """
        :param rnn: indicates whether to use an LSTM or a GRU
        :param input_length: length of input subsequence
        :param embedding_size: dim of the embedding for each element in input
        :param hidden_size: number of features in the hidden state of LSTM
        :param num_layers: number of LSTM layers
        :param num_classes: number of classes for classification task
        :param vocab_size: size of vocabulary for embedding layer
        :param drop_prob:
        """

        super(DeepSRGM, self).__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size

        if rnn == "lstm":
            self.rnn = nn.LSTM(embedding_size,
                               hidden_size,
                               num_layers,
                               dropout=drop_prob,
                               batch_first=True)
        elif rnn == "gru":
            self.rnn = nn.GRU(embedding_size,
                              hidden_size,
                              num_layers,
                              dropout=drop_prob,
                              batch_first=True)

        self.embeddings = nn.Embedding(vocab_size, embedding_size)
        self.attention_layer = Attention(hidden_size, input_length)

        self.fc1 = nn.Linear(hidden_size, 384)
        self.fc2 = nn.Linear(384, num_classes)

        # self.batchNorm1d = nn.BatchNorm1d(input_length)
        self.dropout = nn.Dropout(drop_prob)
        self.relu = nn.ReLU()
def getModel():
    listOfWords = keras.layers.Input((sequenceLength, ), dtype="int32")
    embed = keras.layers.Embedding(input_dim=len(kTokenizer.word_index) + 1,
                                   output_dim=wordVectorLength,
                                   input_length=sequenceLength,
                                   trainable=True,
                                   mask_zero=mask_zero)(listOfWords)
    if (attention == 'yes'):
        if mask_zero:
            print(
                'Mask Zero:', mask_zero,
                ' : Using the custom Attention Layer from Christos Baziotis')
            vectorsForPrediction, attention_vectors = Attention(
                return_attention=True, name='attention_vector_layer')(embed)
        else:
            print(
                'Mask Zero:', mask_zero,
                ' : Using the function described here with repeat & permute blocks...'
            )
            vectorsForPrediction = applyAttention(embed)
    elif (attention == 'no'):
        countDocVector = keras.layers.Lambda(
            lambda x: keras.backend.sum(x, axis=1),
            output_shape=lambda s: (s[0], s[2]))(embed)
        vectorsForPrediction = keras.layers.Dense(
            units=denseUnits, activation='relu')(countDocVector)
    predictions = keras.layers.Dense(len(names),
                                     activation='sigmoid',
                                     use_bias=False)(vectorsForPrediction)
    model = keras.models.Model(inputs=listOfWords, outputs=predictions)
    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['categorical_accuracy'])
    print(model.summary())
    plot_model(model,
               show_shapes=True,
               to_file='results/' + attention + '-' + mask + '.png')
    if (attention == 'yes'):
        attention_layer_model = keras.models.Model(
            inputs=model.input,
            outputs=model.get_layer('attention_vector_layer').output)
    else:
        attention_layer_model = None
    return model, attention_layer_model
Example #6
0
    def __init__(self, embedding_matrix, EMBEDDING_DIM, MAX_SEQUENCE_LENGTH,
                 output_dim):
        with keras.backend.name_scope('embedding'):
            # load pre-trained word embeddings into an Embedding layer
            embedding_layer = keras.layers.Embedding(
                input_dim=embedding_matrix.shape[0],
                output_dim=EMBEDDING_DIM,
                embeddings_initializer=keras.initializers.Constant(
                    embedding_matrix),
                input_length=MAX_SEQUENCE_LENGTH,
                trainable=False,
                name='Embedding')
        cond_GRU = keras.layers.GRU(128, return_sequences=True)

        with keras.backend.name_scope('seq_input'):
            input_turn1 = keras.layers.Input(shape=(MAX_SEQUENCE_LENGTH, ),
                                             name='turn1')
            embedded_turn1 = embedding_layer(input_turn1)

            input_turn2 = keras.layers.Input(shape=(MAX_SEQUENCE_LENGTH, ),
                                             name='turn2')
            embedded_turn2 = embedding_layer(input_turn2)

            input_turn3 = keras.layers.Input(shape=(MAX_SEQUENCE_LENGTH, ),
                                             name='turn3')
            embedded_turn3 = embedding_layer(input_turn3)

            # keras.layers.GRU(128
            #     , kernel_regularizer=keras.regularizers.l1_l2(l1=0.01, l2=0.01))

        with keras.backend.name_scope('turn1_proc'):
            ts1 = keras.layers.Bidirectional(
                keras.layers.GRU(128, return_sequences=True,
                                 dropout=0.2))(embedded_turn1)
            ts1 = keras.layers.Dropout(0.3)(ts1)
            ts1, ts1_h = \
                        keras.layers.GRU(128, return_state=True
                            , dropout=0.2)(ts1)

        with keras.backend.name_scope('turn2_proc'):
            ts2 = cond_GRU(embedded_turn2, initial_state=[ts1_h])
            ts2 = keras.layers.Dropout(0.3)(ts2)
            ts2, ts2_h = \
                keras.layers.GRU(128, return_state=True
                    , dropout=0.2)(ts2)

        with keras.backend.name_scope('turn3_proc'):
            ts3 = keras.layers.Bidirectional(
                keras.layers.GRU(128, return_sequences=True,
                                 dropout=0.2))(embedded_turn3)
            ts3 = keras.layers.Dropout(0.3)(ts3)
            ts3 = keras.layers.Bidirectional(
                keras.layers.GRU(256,
                                 return_sequences=True,
                                 return_state=False,
                                 dropout=0.2))(ts3)

        # with keras.backend.name_scope('concat1'):
        #     merged = keras.layers.Concatenate()([ts1, ts2, ts3])

        with keras.backend.name_scope('attn'):
            a = Attention(MAX_SEQUENCE_LENGTH)(ts3)
        with keras.backend.name_scope('common'):
            x = keras.layers.Dense(128)(a)
            x = keras.layers.PReLU()(x)
            x = keras.layers.Dropout(0.5)(x)
            x = keras.layers.BatchNormalization()(x)

            x1 = keras.layers.Dense(128)(x)
            x1 = keras.layers.PReLU()(x1)
            x1 = keras.layers.Dropout(0.5)(x1)
            x1 = keras.layers.BatchNormalization()(x1)
            x1 = keras.layers.Add()([x, x1])

            x2 = keras.layers.Dense(128)(x1)
            x2 = keras.layers.PReLU()(x2)
            x2 = keras.layers.Dropout(0.25)(x2)
            x2 = keras.layers.BatchNormalization()(x2)
            x2 = keras.layers.Add()([x1, x2])

            x3 = keras.layers.Dense(128)(x2)
            x3 = keras.layers.PReLU()(x3)
            x3 = keras.layers.Dropout(0.5)(x3)
            x3 = keras.layers.BatchNormalization()(x3)
            x3 = keras.layers.Add()([x2, x3])

            x4 = keras.layers.Dense(output_dim)(x3)
            x4 = keras.layers.PReLU()(x4)
            x4 = keras.layers.BatchNormalization()(x4)

        with keras.backend.name_scope('woe_input'):
            t1_woe = keras.layers.Input(shape=(4, ), name='t1_woe')
            t2_woe = keras.layers.Input(shape=(4, ), name='t2_woe')
            t3_woe = keras.layers.Input(shape=(4, ), name='t3_woe')

        with keras.backend.name_scope('concat2'):
            merged = keras.layers.Concatenate()([x4, t1_woe, t2_woe, t3_woe])

        with keras.backend.name_scope('process_woe'):
            x = keras.layers.Dense(32)(merged)
            x = keras.layers.PReLU()(x)
            x = keras.layers.Dropout(0.5)(x)
            x = keras.layers.BatchNormalization()(x)

            x1 = keras.layers.Dense(32)(x)
            x1 = keras.layers.PReLU()(x1)
            x1 = keras.layers.Dropout(0.5)(x1)
            x1 = keras.layers.BatchNormalization()(x1)
            x1 = keras.layers.Add()([x, x1])

            x2 = keras.layers.Dense(32)(x1)
            x2 = keras.layers.PReLU()(x2)
            x2 = keras.layers.Dropout(0.5)(x2)
            x2 = keras.layers.BatchNormalization()(x2)
            x2 = keras.layers.Add()([x1, x2])

        preds = keras.layers.Dense(output_dim, activation='softmax')(x2)

        model = keras.models.Model(
            [input_turn1, input_turn2, input_turn3, t1_woe, t2_woe, t3_woe],
            preds)

        opt = keras.optimizers.Adam(0.001)

        model.compile(loss='categorical_crossentropy',
                      optimizer=opt,
                      metrics=['acc'])
        self.model = model

        # Summarize the model
        print(self.model.summary())
        from keras.utils import plot_model
        plot_model(model, to_file='condv1.png', show_shapes=True)
    y = [d[1] for d in data_tuples]

    split = int(SPLIT_FRACTION * len(data_tuples))
    X_train, y_train = X[split*2:], y[split*2:]
    X_val, y_val = X[split:split*2], y[split:split*2]
    X_test, y_test = X[:split], y[:split]

    print("X_train shape:", X_train.shape)
    print("X_val shape:", X_val.shape)
    print("X_test shape:", X_test.shape)

    print('Build model...')
    model = Sequential()
    model.add(Embedding(vocab_size, 128, mask_zero=False))
    model.add(Bidirectional(LSTM(128, dropout=0.5, recurrent_dropout=0.5, return_sequences=True)))
    model.add(Attention(direction="bidirectional"))
    model.add(Dense(50, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(
        loss='binary_crossentropy',
        optimizer='adam',
        metrics=['accuracy']
    )

    plot_model(model, to_file=RESULT_DIR+'/shake-model.png')
    model.summary()

    if os.path.isfile(WEIGHTS_FILE):
        model.load_weights(WEIGHTS_FILE)
Example #8
0
    def __init__(self):
        self.HOPS = 5
        self.DATASET = 'twitter'  # 'restaurant', 'laptop'
        self.POLARITIES_DIM = 3
        self.EMBEDDING_DIM = 200
        self.LEARNING_RATE = 0.01
        self.LSTM_PARAMS = {
            'units':
            200,
            'activation':
            'tanh',
            'recurrent_activation':
            'sigmoid',
            'kernel_initializer':
            initializers.RandomUniform(minval=-0.003, maxval=0.003),
            'recurrent_initializer':
            initializers.RandomUniform(minval=-0.003, maxval=0.003),
            'bias_initializer':
            initializers.RandomUniform(minval=-0.003, maxval=0.003),
            'kernel_regularizer':
            regularizers.l2(0.001),
            'recurrent_regularizer':
            regularizers.l2(0.001),
            'bias_regularizer':
            regularizers.l2(0.001),
            'dropout':
            0,
            'recurrent_dropout':
            0,
        }
        self.MAX_SEQUENCE_LENGTH = 40
        self.MAX_ASPECT_LENGTH = 2
        self.ITERATION = 500
        self.BATCH_SIZE = 200

        self.texts_raw_indices, self.texts_left_indices, self.aspects_indices, self.texts_right_indices, \
        self.polarities_matrix, \
        self.embedding_matrix, \
        self.tokenizer = \
            read_dataset(type=self.DATASET,
                         mode='train',
                         embedding_dim=self.EMBEDDING_DIM,
                         max_seq_len=self.MAX_SEQUENCE_LENGTH, max_aspect_len=self.MAX_ASPECT_LENGTH)

        if os.path.exists('ram_saved_model.h5'):
            print('loading saved model...')
            self.model = load_model('ram_saved_model.h5')
        else:
            print('Build model...')
            inputs_sentence = Input(shape=(self.MAX_SEQUENCE_LENGTH * 2 +
                                           self.MAX_ASPECT_LENGTH, ),
                                    name='inputs_sentence')
            inputs_aspect = Input(shape=(self.MAX_ASPECT_LENGTH, ),
                                  name='inputs_aspect')
            sentence = Embedding(input_dim=len(self.tokenizer.word_index) + 1,
                                 output_dim=self.EMBEDDING_DIM,
                                 input_length=self.MAX_SEQUENCE_LENGTH * 2 +
                                 self.MAX_ASPECT_LENGTH,
                                 weights=[self.embedding_matrix],
                                 trainable=False,
                                 name='sentence_embedding')(inputs_sentence)
            aspect = Embedding(input_dim=len(self.tokenizer.word_index) + 1,
                               output_dim=self.EMBEDDING_DIM,
                               input_length=self.MAX_ASPECT_LENGTH,
                               weights=[self.embedding_matrix],
                               trainable=False,
                               name='aspect_embedding')(inputs_aspect)
            memory = Bidirectional(LSTM(**self.LSTM_PARAMS,
                                        return_sequences=True),
                                   name='memory')(sentence)
            aspect = Bidirectional(LSTM(**self.LSTM_PARAMS,
                                        return_sequences=True),
                                   name='aspect')(aspect)
            x = Lambda(lambda xin: K.mean(xin, axis=1),
                       name='aspect_mean')(aspect)
            SharedAttention = Attention(name='shared_attention')
            for i in range(self.HOPS):
                x = SharedAttention((memory, x))
            x = Dense(self.POLARITIES_DIM)(x)
            predictions = Activation('softmax')(x)
            model = Model(inputs=[inputs_sentence, inputs_aspect],
                          outputs=predictions)
            model.summary()
            model.compile(loss='categorical_crossentropy',
                          optimizer=optimizers.Adam(lr=self.LEARNING_RATE),
                          metrics=['acc'])
            # plot_model(model, to_file='model.png')
            self.model = model
    def __init__(self):
        self.HOPS = 3
        self.SCORE_FUNCTION = 'mlp'  # scaled_dot_product / mlp (concat) / bi_linear (general dot)
        self.DATASET = 'twitter'  # 'twitter', 'restaurant', 'laptop'
        self.POLARITIES_DIM = 3
        self.EMBEDDING_DIM = 300
        self.LEARNING_RATE = 0.001
        self.INITIALIZER = initializers.RandomUniform(minval=-0.05, maxval=0.05)
        self.REGULARIZER = regularizers.l2(0.001)
        self.LSTM_PARAMS = {
            'units': 200,
            'activation': 'tanh',
            'recurrent_activation': 'sigmoid',
            'kernel_initializer': self.INITIALIZER,
            'recurrent_initializer': self.INITIALIZER,
            'bias_initializer': self.INITIALIZER,
            'kernel_regularizer': self.REGULARIZER,
            'recurrent_regularizer': self.REGULARIZER,
            'bias_regularizer': self.REGULARIZER,
            'dropout': 0,
            'recurrent_dropout': 0,
        }
        self.MAX_SEQUENCE_LENGTH = 80
        self.MAX_ASPECT_LENGTH = 10
        self.BATCH_SIZE = 32
        self.EPOCHS = 5

        self.texts_raw_indices, self.texts_raw_without_aspects_indices, self.texts_left_indices, self.texts_left_with_aspects_indices, \
        self.aspects_indices, self.texts_right_indices, self.texts_right_with_aspects_indices, \
        self.polarities_matrix, \
        self.embedding_matrix, \
        self.tokenizer = \
            read_dataset(type=self.DATASET,
                         mode='train',
                         embedding_dim=self.EMBEDDING_DIM,
                         max_seq_len=self.MAX_SEQUENCE_LENGTH, max_aspect_len=self.MAX_ASPECT_LENGTH)

        if os.path.exists('ram_saved_model.h5'):
            print('loading saved model...')
            self.model = load_model('ram_saved_model.h5')
        else:
            print('Build model...')
            inputs_sentence = Input(shape=(self.MAX_SEQUENCE_LENGTH,), name='inputs_sentence')
            inputs_aspect = Input(shape=(self.MAX_ASPECT_LENGTH,), name='inputs_aspect')
            nonzero_count = Lambda(lambda xin: tf.count_nonzero(xin, dtype=tf.float32))(inputs_aspect)
            sentence = Embedding(input_dim=len(self.tokenizer.word_index) + 1,
                          output_dim=self.EMBEDDING_DIM,
                          input_length=self.MAX_SEQUENCE_LENGTH,
                          mask_zero=True,
                          weights=[self.embedding_matrix],
                          trainable=False, name='sentence_embedding')(inputs_sentence)
            aspect = Embedding(input_dim=len(self.tokenizer.word_index) + 1,
                             output_dim=self.EMBEDDING_DIM,
                             input_length=self.MAX_ASPECT_LENGTH,
                             mask_zero=True,
                             weights=[self.embedding_matrix],
                             trainable=False, name='aspect_embedding')(inputs_aspect)
            memory = Bidirectional(LSTM(return_sequences=True, **self.LSTM_PARAMS), name='memory')(sentence)
            aspect = Bidirectional(LSTM(return_sequences=True, **self.LSTM_PARAMS), name='aspect')(aspect)
            x = Lambda(lambda xin: K.sum(xin[0], axis=1) / xin[1], name='aspect_mean')([aspect, nonzero_count])
            shared_attention = Attention(score_function=self.SCORE_FUNCTION,
                                         initializer=self.INITIALIZER, regularizer=self.REGULARIZER,
                                         name='shared_attention')
            for i in range(self.HOPS):
                x = shared_attention((memory, x))
            x = Flatten()(x)
            x = Dense(self.POLARITIES_DIM)(x)
            predictions = Activation('softmax')(x)
            model = Model(inputs=[inputs_sentence, inputs_aspect], outputs=predictions)
            model.summary()
            model.compile(loss='categorical_crossentropy', optimizer=optimizers.Adam(lr=self.LEARNING_RATE), metrics=['acc', f1])
            # plot_model(model, to_file='model.png')
            self.model = model
Example #10
0
 def test_attention(self):
     attention = Attention(attention_weight_vector_dim=5)
     x = Input(shape=(5, 10))
     y = attention(x)
     self.assertEqual(shape(y), (None, 10), "y")
     self.assertEqual(hasattr(y, '_keras_history'), True, "y")
Example #11
0
    def get_model222(self, config):
        inp = Input(shape=(config.strmaxlen, ), name='input')
        #         inp = Input(shape=(config.max_features, ), name='input')

        emb = Embedding(config.max_features,
                        config.max_features,
                        embeddings_initializer='identity',
                        trainable=True)(inp)
        #         emb1 = Embedding(config.max_features, config.embed_size, trainable = True)(inp)
        emb1 = SpatialDropout1D(config.prob_dropout)(emb)

        ####
        l1_L = Bidirectional(
            CuDNNLSTM(config.cell_size_l1, return_sequences=True))(emb1)

        l2_LL = Bidirectional(
            CuDNNLSTM(config.cell_size_l2, return_sequences=True))(l1_L)
        l2_LG = Bidirectional(
            CuDNNGRU(config.cell_size_l2, return_sequences=True))(l1_L)

        l3_LLC = Conv1D(config.filter_size,
                        kernel_size=config.kernel_size,
                        strides=2,
                        padding="valid",
                        kernel_initializer="he_uniform")(l2_LL)
        l3_LGC = Conv1D(config.filter_size,
                        kernel_size=config.kernel_size,
                        strides=2,
                        padding="valid",
                        kernel_initializer="he_uniform")(l2_LG)

        avg_pool_L = GlobalAveragePooling1D()(l1_L)
        max_pool_L = GlobalMaxPooling1D()(l1_L)

        avg_pool_LL = GlobalAveragePooling1D()(l2_LL)
        max_pool_LL = GlobalMaxPooling1D()(l2_LL)
        avg_pool_LG = GlobalAveragePooling1D()(l2_LG)
        max_pool_LG = GlobalMaxPooling1D()(l2_LG)

        attention_LLA = Attention(config.strmaxlen)(l2_LL)
        attention_LGA = Attention(config.strmaxlen)(l2_LG)

        avg_pool_LLC = GlobalAveragePooling1D()(l3_LLC)
        max_pool_LLC = GlobalMaxPooling1D()(l3_LLC)
        avg_pool_LGC = GlobalAveragePooling1D()(l3_LGC)
        max_pool_LGC = GlobalMaxPooling1D()(l3_LGC)

        attention_LLCA = Attention(int(config.strmaxlen / 2 - 1))(l3_LLC)
        attention_LGCA = Attention(int(config.strmaxlen / 2 - 1))(l3_LGC)

        conc_LLC = concatenate([
            avg_pool_L, max_pool_L, avg_pool_LL, max_pool_LL, avg_pool_LLC,
            max_pool_LLC, attention_LLA, attention_LLCA
        ])
        conc_LGC = concatenate([
            avg_pool_L, max_pool_L, avg_pool_LG, max_pool_LG, avg_pool_LGC,
            max_pool_LGC, attention_LGA, attention_LGCA
        ])

        out_LL = Dropout(config.prob_dropout2)(conc_LLC)
        out_LG = Dropout(config.prob_dropout2)(conc_LGC)
        out_LL = Dense(2, activation='softmax')(out_LL)
        out_LG = Dense(2)(out_LG)
        ####

        #         emb2 = Embedding(config.max_features, config.max_features,embeddings_initializer='identity', trainable = True)(inp)
        #         emb1 = Embedding(config.max_features, config.embed_size, trainable = True)(inp)
        emb2 = SpatialDropout1D(config.prob_dropout)(emb)

        ####
        l1_G = Bidirectional(
            CuDNNGRU(config.cell_size_l1, return_sequences=True))(emb2)

        l2_GL = Bidirectional(
            CuDNNLSTM(config.cell_size_l2, return_sequences=True))(l1_G)
        l2_GG = Bidirectional(
            CuDNNGRU(config.cell_size_l2, return_sequences=True))(l1_G)

        l3_GLC = Conv1D(config.filter_size,
                        kernel_size=config.kernel_size,
                        strides=2,
                        padding="valid",
                        kernel_initializer="he_uniform")(l2_GL)
        l3_GGC = Conv1D(config.filter_size,
                        kernel_size=config.kernel_size,
                        strides=2,
                        padding="valid",
                        kernel_initializer="he_uniform")(l2_GG)

        avg_pool_G = GlobalAveragePooling1D()(l1_G)
        max_pool_G = GlobalMaxPooling1D()(l1_G)

        avg_pool_GL = GlobalAveragePooling1D()(l2_GL)
        max_pool_GL = GlobalMaxPooling1D()(l2_GL)
        avg_pool_GG = GlobalAveragePooling1D()(l2_GG)
        max_pool_GG = GlobalMaxPooling1D()(l2_GG)

        attention_GLA = Attention(config.strmaxlen)(l2_GL)
        attention_GGA = Attention(config.strmaxlen)(l2_GG)

        avg_pool_GLC = GlobalAveragePooling1D()(l3_GLC)
        max_pool_GLC = GlobalMaxPooling1D()(l3_GLC)
        avg_pool_GGC = GlobalAveragePooling1D()(l3_GGC)
        max_pool_GGC = GlobalMaxPooling1D()(l3_GGC)

        attention_GLCA = Attention(int(config.strmaxlen / 2 - 1))(l3_GLC)
        attention_GGCA = Attention(int(config.strmaxlen / 2 - 1))(l3_GGC)

        conc_GLC = concatenate([
            avg_pool_G, max_pool_G, avg_pool_GL, max_pool_GL, avg_pool_GLC,
            max_pool_GLC, attention_GLA, attention_GLCA
        ])
        conc_GGC = concatenate([
            avg_pool_G, max_pool_G, avg_pool_GG, max_pool_GG, avg_pool_GGC,
            max_pool_GGC, attention_GGA, attention_GGCA
        ])

        out_GL = Dropout(config.prob_dropout2)(conc_GLC)
        out_GG = Dropout(config.prob_dropout2)(conc_GGC)
        out_GL = Dense(1)(out_GL)
        out_GG = Dense(1)(out_GG)

        out_avg = average([out_LL, out_LG, out_GL, out_GG])

        # #         ==================================================================================================
        model_avg = Model(inputs=inp,
                          outputs=[out_LL, out_LG, out_GL, out_GG, out_avg])

        #         inp_pre = Input(shape=(config.strmaxlen, ), name='input_pre')
        #         inp_post = Input(shape=(config.strmaxlen, ), name='input_post')

        #         model_pre = model_avg(inp_pre)
        #         model_post = model_avg(inp_post)

        #         stack_layer = concatenate([model_pre, model_post])
        #         ens_out = Dense(1, use_bias=False)(stack_layer)

        #         reg_model = Model(inputs=[inp_pre, inp_post], outputs=ens_out)

        model_avg.compile(loss='mean_squared_error',
                          optimizer='adam',
                          loss_weights=[1., 1., 1., 1., 0.1],
                          metrics=['mean_squared_error', 'accuracy'])

        return model_avg
Example #12
0
    def __init__(self, embedding_dim=100, batch_size=64, n_hidden=100, learning_rate=0.01, n_class=3, max_sentence_len=40, l2_reg_val=0.003):
        ############################
        self.DATASET = ['restaurant', 'laptop']
        self.TASK_INDICES = [1002, 1003, 1005]  ##1001-twitter, 1002-restaurant, 1003-laptop, 1004-others, 1005-general
        self.LOSS_WEIGHTS = {1002: 0.5, 1003: 0.5, 1005: 0.5}
        self.MODEL_TO_LOAD = './models/mtl_absa_att_sh_saved_model.h5'
        ###########################
        self.SCORE_FUNCTION = 'mlp'
        self.EMBEDDING_DIM = embedding_dim
        self.BATCH_SIZE = batch_size
        self.N_HIDDEN = n_hidden
        self.LEARNING_RATE = learning_rate
        self.N_CLASS = n_class
        self.MAX_SENTENCE_LENGTH = max_sentence_len
        self.EPOCHS = 4
        self.L2_REG_VAL = l2_reg_val
        self.MAX_ASPECT_LENGTH = 5
        self.INITIALIZER = initializers.RandomUniform(minval=-0.003, maxval=0.003)
        self.REGULARIZER = regularizers.l2(self.L2_REG_VAL)


        self.LSTM_PARAMS = {
            'units': self.N_HIDDEN,
            'activation': 'tanh',
            'recurrent_activation': 'hard_sigmoid',
            'dropout': 0,
            'recurrent_dropout': 0
            }

        self.DENSE_PARAMS = {
            'kernel_initializer': self.INITIALIZER,
            'bias_initializer': self.INITIALIZER,
            'kernel_regularizer': self.REGULARIZER,
            'bias_regularizer': self.REGULARIZER,
            'dtype':'float32'

            }

        self.texts_raw_indices, self.texts_raw_without_aspects_indices, self.texts_left_indices, self.texts_left_with_aspects_indices, \
        self.aspects_indices, self.texts_right_indices, self.texts_right_with_aspects_indices, self.dataset_index,\
        self.polarities_matrix,self.polarities,\
        self.embedding_matrix, \
        self.tokenizer = \
            read_dataset(types=self.DATASET,
                         mode='train',
                         embedding_dim=self.EMBEDDING_DIM,
                         max_seq_len=self.MAX_SENTENCE_LENGTH, max_aspect_len=self.MAX_ASPECT_LENGTH)



        print('Build model...')
        inputs_l = Input(shape=(self.MAX_SENTENCE_LENGTH,),dtype='int64')
        inputs_r = Input(shape=(self.MAX_SENTENCE_LENGTH,),dtype='int64')
        inputs_aspect = Input(shape=(self.MAX_ASPECT_LENGTH,),dtype='int64' ,name='inputs_aspect')
        nonzero_count = Lambda(lambda xin: tf.count_nonzero(xin, dtype='float32'))(inputs_aspect)
        input_dataset = Input(shape=(1,),dtype='float32')


        Embedding_Layer = Embedding(input_dim=len(self.embedding_matrix) ,
                                    output_dim=self.EMBEDDING_DIM,
                                    input_length=self.MAX_SENTENCE_LENGTH,
                                    mask_zero=True,
                                    weights=[self.embedding_matrix],
                                    trainable=False)
        aspect = Embedding(input_dim=len(self.embedding_matrix),
                           output_dim=self.EMBEDDING_DIM,
                           input_length=self.MAX_ASPECT_LENGTH,
                           mask_zero=True,
                           weights=[self.embedding_matrix],
                           trainable=False, name='aspect_embedding')(inputs_aspect)

        x_l = Embedding_Layer(inputs_l)
        x_r = Embedding_Layer(inputs_r)

        x_aspect = Bidirectional(LSTM(name='aspect', return_sequences=True,**self.LSTM_PARAMS),merge_mode='sum')(aspect)
        x_aspect = Lambda(lambda xin: K.sum(xin[0], axis=1) / xin[1], name='aspect_mean')([x_aspect, nonzero_count])

        x_l = LSTM(name='sentence_left',return_sequences=True,**self.LSTM_PARAMS)(x_l)
        x_r = LSTM(go_backwards=True, name='sentence_right',return_sequences=True,**self.LSTM_PARAMS)(x_r)

        shared_attention = Attention(score_function=self.SCORE_FUNCTION,
                                     initializer=self.INITIALIZER, regularizer=self.REGULARIZER,
                                     name='shared_attention')

        x= Concatenate(name='last_shared',axis=1)([x_l,x_r])
        x = shared_attention((x, x_aspect))
        x=  Lambda(lambda x: K.squeeze(x, 1))(x)
        #twitter task layers
        tw_x= Dense(self.N_HIDDEN,name='t1_dense_10',**self.DENSE_PARAMS)(x)
        twitter_x = Dense(self.N_CLASS,name='t1_dense_3',**self.DENSE_PARAMS)(tw_x)
        twitter_x = Concatenate(name= "twitter_output")([twitter_x,input_dataset])

        #rest task layers
        rest_x= Dense(self.N_HIDDEN,name='t2_dense_10',**self.DENSE_PARAMS)(x)
        rest_x = Dense(self.N_CLASS,name='t2_dense_3',**self.DENSE_PARAMS)(rest_x)
        rest_x = Concatenate(name="rest_output")([rest_x,input_dataset])

        #general task layers
        general_x= Dense(self.N_HIDDEN,name='t3_dense_10',**self.DENSE_PARAMS)(x)
        general_x = Dense(self.N_CLASS,name='t3_dense_3',**self.DENSE_PARAMS)(general_x)
        general_x = Concatenate(name="general_output")([general_x,input_dataset])

        model = Model(inputs=[inputs_l, inputs_r,input_dataset,inputs_aspect], outputs=[twitter_x, rest_x, general_x])
        model.summary()

        if os.path.exists(self.MODEL_TO_LOAD):
            print('loading saved model...')
            model.load_weights(self.MODEL_TO_LOAD)

        self.model = model

        self.model.compile(loss={'twitter_output': multitask_loss(self.LOSS_WEIGHTS, self.TASK_INDICES[0]),
                                 'rest_output': multitask_loss(self.LOSS_WEIGHTS, self.TASK_INDICES[1]),
                                 'general_output': multitask_loss(self.LOSS_WEIGHTS, self.TASK_INDICES[2])},
                           optimizer=optimizers.Adam(lr=self.LEARNING_RATE), metrics=[multitask_accuracy, f1])