def baseline_preconv_attetion(self, config): inp = Input(shape=(config.strmaxlen, ), name='input') emb = Embedding(config.max_features, config.max_features, embeddings_initializer='identity', trainable=True)(inp) emb = SpatialDropout1D(config.prob_dropout)(emb) x = Conv1D(config.filter_size, kernel_size=config.kernel_size, strides=config.strides, padding="valid", kernel_initializer="he_uniform")(emb) x = Bidirectional(CuDNNGRU(config.cell_size_l1, return_sequences=True))(x) x = Bidirectional(CuDNNGRU(config.cell_size_l2, return_sequences=True))(x) x = Attention(config.strmaxlen / 2 - 1)(x) x = Dense(64, activation='relu')(x) outp = Dense(2, activation='softmax')(x) model = Model(inputs=inp, outputs=outp) model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.001, decay=0.0001), metrics=['categorical_crossentropy', 'accuracy']) return model
def __init__(self, num_classes, encoder, att_type='additive', img_size=(512, 512)): super().__init__() self.device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') self.low_feat = IntermediateLayerGetter(encoder, { "layer1": "layer1" }).to(self.device) self.encoder = IntermediateLayerGetter(encoder, { "layer4": "out" }).to(self.device) # For resnet18 encoder_dim = 512 low_level_dim = 64 self.num_classes = num_classes self.class_encoder = nn.Linear(num_classes, 512) self.attention_enc = Attention(encoder_dim, att_type) self.decoder = Decoder(2, encoder_dim, img_size, low_level_dim=low_level_dim, rates=[1, 6, 12, 18])
def dense_attention(self, config): inp = Input(shape=(config.strmaxlen, ), name='input') emb = Embedding(config.max_features, config.max_features, embeddings_initializer='identity', trainable=True)(inp) emb1 = SpatialDropout1D(config.prob_dropout)(emb) #### l1_G = Bidirectional( CuDNNGRU(config.cell_size_l1, return_sequences=True))(emb) l2_GG = Bidirectional( CuDNNGRU(config.cell_size_l2, return_sequences=True))(l1_G) l3_GGC = Conv1D(config.filter_size, kernel_size=config.kernel_size, strides=config.strides, padding="valid", kernel_initializer="he_uniform")(l2_GG) attention_GGA = Attention(config.strmaxlen)(l2_GG) avg_pool_G = GlobalAveragePooling1D()(l1_G) max_pool_G = GlobalMaxPooling1D()(l1_G) avg_pool_GG = GlobalAveragePooling1D()(l2_GG) max_pool_GG = GlobalMaxPooling1D()(l2_GG) avg_pool_GGC = GlobalAveragePooling1D()(l3_GGC) max_pool_GGC = GlobalMaxPooling1D()(l3_GGC) attention_GGCA = Attention(int(config.strmaxlen / 2 - 1))(l3_GGC) conc_GGC = concatenate([ avg_pool_G, max_pool_G, avg_pool_GG, max_pool_GG, avg_pool_GGC, max_pool_GGC, attention_GGA, attention_GGCA ]) outp = Dropout(config.prob_dropout2)(conc_GGC) outp = Dense(2, activation='softmax')(outp) # ================================================================================================== model = Model(inputs=inp, outputs=outp) model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.001, decay=0.0005), metrics=['categorical_crossentropy', 'accuracy']) return model
def __init__(self, rnn="lstm", input_length=5000, embedding_size=128, hidden_size=768, num_layers=1, num_classes=10, vocab_size=209, drop_prob=0.3): """ :param rnn: indicates whether to use an LSTM or a GRU :param input_length: length of input subsequence :param embedding_size: dim of the embedding for each element in input :param hidden_size: number of features in the hidden state of LSTM :param num_layers: number of LSTM layers :param num_classes: number of classes for classification task :param vocab_size: size of vocabulary for embedding layer :param drop_prob: """ super(DeepSRGM, self).__init__() self.num_layers = num_layers self.hidden_size = hidden_size if rnn == "lstm": self.rnn = nn.LSTM(embedding_size, hidden_size, num_layers, dropout=drop_prob, batch_first=True) elif rnn == "gru": self.rnn = nn.GRU(embedding_size, hidden_size, num_layers, dropout=drop_prob, batch_first=True) self.embeddings = nn.Embedding(vocab_size, embedding_size) self.attention_layer = Attention(hidden_size, input_length) self.fc1 = nn.Linear(hidden_size, 384) self.fc2 = nn.Linear(384, num_classes) # self.batchNorm1d = nn.BatchNorm1d(input_length) self.dropout = nn.Dropout(drop_prob) self.relu = nn.ReLU()
def getModel(): listOfWords = keras.layers.Input((sequenceLength, ), dtype="int32") embed = keras.layers.Embedding(input_dim=len(kTokenizer.word_index) + 1, output_dim=wordVectorLength, input_length=sequenceLength, trainable=True, mask_zero=mask_zero)(listOfWords) if (attention == 'yes'): if mask_zero: print( 'Mask Zero:', mask_zero, ' : Using the custom Attention Layer from Christos Baziotis') vectorsForPrediction, attention_vectors = Attention( return_attention=True, name='attention_vector_layer')(embed) else: print( 'Mask Zero:', mask_zero, ' : Using the function described here with repeat & permute blocks...' ) vectorsForPrediction = applyAttention(embed) elif (attention == 'no'): countDocVector = keras.layers.Lambda( lambda x: keras.backend.sum(x, axis=1), output_shape=lambda s: (s[0], s[2]))(embed) vectorsForPrediction = keras.layers.Dense( units=denseUnits, activation='relu')(countDocVector) predictions = keras.layers.Dense(len(names), activation='sigmoid', use_bias=False)(vectorsForPrediction) model = keras.models.Model(inputs=listOfWords, outputs=predictions) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['categorical_accuracy']) print(model.summary()) plot_model(model, show_shapes=True, to_file='results/' + attention + '-' + mask + '.png') if (attention == 'yes'): attention_layer_model = keras.models.Model( inputs=model.input, outputs=model.get_layer('attention_vector_layer').output) else: attention_layer_model = None return model, attention_layer_model
def __init__(self, embedding_matrix, EMBEDDING_DIM, MAX_SEQUENCE_LENGTH, output_dim): with keras.backend.name_scope('embedding'): # load pre-trained word embeddings into an Embedding layer embedding_layer = keras.layers.Embedding( input_dim=embedding_matrix.shape[0], output_dim=EMBEDDING_DIM, embeddings_initializer=keras.initializers.Constant( embedding_matrix), input_length=MAX_SEQUENCE_LENGTH, trainable=False, name='Embedding') cond_GRU = keras.layers.GRU(128, return_sequences=True) with keras.backend.name_scope('seq_input'): input_turn1 = keras.layers.Input(shape=(MAX_SEQUENCE_LENGTH, ), name='turn1') embedded_turn1 = embedding_layer(input_turn1) input_turn2 = keras.layers.Input(shape=(MAX_SEQUENCE_LENGTH, ), name='turn2') embedded_turn2 = embedding_layer(input_turn2) input_turn3 = keras.layers.Input(shape=(MAX_SEQUENCE_LENGTH, ), name='turn3') embedded_turn3 = embedding_layer(input_turn3) # keras.layers.GRU(128 # , kernel_regularizer=keras.regularizers.l1_l2(l1=0.01, l2=0.01)) with keras.backend.name_scope('turn1_proc'): ts1 = keras.layers.Bidirectional( keras.layers.GRU(128, return_sequences=True, dropout=0.2))(embedded_turn1) ts1 = keras.layers.Dropout(0.3)(ts1) ts1, ts1_h = \ keras.layers.GRU(128, return_state=True , dropout=0.2)(ts1) with keras.backend.name_scope('turn2_proc'): ts2 = cond_GRU(embedded_turn2, initial_state=[ts1_h]) ts2 = keras.layers.Dropout(0.3)(ts2) ts2, ts2_h = \ keras.layers.GRU(128, return_state=True , dropout=0.2)(ts2) with keras.backend.name_scope('turn3_proc'): ts3 = keras.layers.Bidirectional( keras.layers.GRU(128, return_sequences=True, dropout=0.2))(embedded_turn3) ts3 = keras.layers.Dropout(0.3)(ts3) ts3 = keras.layers.Bidirectional( keras.layers.GRU(256, return_sequences=True, return_state=False, dropout=0.2))(ts3) # with keras.backend.name_scope('concat1'): # merged = keras.layers.Concatenate()([ts1, ts2, ts3]) with keras.backend.name_scope('attn'): a = Attention(MAX_SEQUENCE_LENGTH)(ts3) with keras.backend.name_scope('common'): x = keras.layers.Dense(128)(a) x = keras.layers.PReLU()(x) x = keras.layers.Dropout(0.5)(x) x = keras.layers.BatchNormalization()(x) x1 = keras.layers.Dense(128)(x) x1 = keras.layers.PReLU()(x1) x1 = keras.layers.Dropout(0.5)(x1) x1 = keras.layers.BatchNormalization()(x1) x1 = keras.layers.Add()([x, x1]) x2 = keras.layers.Dense(128)(x1) x2 = keras.layers.PReLU()(x2) x2 = keras.layers.Dropout(0.25)(x2) x2 = keras.layers.BatchNormalization()(x2) x2 = keras.layers.Add()([x1, x2]) x3 = keras.layers.Dense(128)(x2) x3 = keras.layers.PReLU()(x3) x3 = keras.layers.Dropout(0.5)(x3) x3 = keras.layers.BatchNormalization()(x3) x3 = keras.layers.Add()([x2, x3]) x4 = keras.layers.Dense(output_dim)(x3) x4 = keras.layers.PReLU()(x4) x4 = keras.layers.BatchNormalization()(x4) with keras.backend.name_scope('woe_input'): t1_woe = keras.layers.Input(shape=(4, ), name='t1_woe') t2_woe = keras.layers.Input(shape=(4, ), name='t2_woe') t3_woe = keras.layers.Input(shape=(4, ), name='t3_woe') with keras.backend.name_scope('concat2'): merged = keras.layers.Concatenate()([x4, t1_woe, t2_woe, t3_woe]) with keras.backend.name_scope('process_woe'): x = keras.layers.Dense(32)(merged) x = keras.layers.PReLU()(x) x = keras.layers.Dropout(0.5)(x) x = keras.layers.BatchNormalization()(x) x1 = keras.layers.Dense(32)(x) x1 = keras.layers.PReLU()(x1) x1 = keras.layers.Dropout(0.5)(x1) x1 = keras.layers.BatchNormalization()(x1) x1 = keras.layers.Add()([x, x1]) x2 = keras.layers.Dense(32)(x1) x2 = keras.layers.PReLU()(x2) x2 = keras.layers.Dropout(0.5)(x2) x2 = keras.layers.BatchNormalization()(x2) x2 = keras.layers.Add()([x1, x2]) preds = keras.layers.Dense(output_dim, activation='softmax')(x2) model = keras.models.Model( [input_turn1, input_turn2, input_turn3, t1_woe, t2_woe, t3_woe], preds) opt = keras.optimizers.Adam(0.001) model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['acc']) self.model = model # Summarize the model print(self.model.summary()) from keras.utils import plot_model plot_model(model, to_file='condv1.png', show_shapes=True)
y = [d[1] for d in data_tuples] split = int(SPLIT_FRACTION * len(data_tuples)) X_train, y_train = X[split*2:], y[split*2:] X_val, y_val = X[split:split*2], y[split:split*2] X_test, y_test = X[:split], y[:split] print("X_train shape:", X_train.shape) print("X_val shape:", X_val.shape) print("X_test shape:", X_test.shape) print('Build model...') model = Sequential() model.add(Embedding(vocab_size, 128, mask_zero=False)) model.add(Bidirectional(LSTM(128, dropout=0.5, recurrent_dropout=0.5, return_sequences=True))) model.add(Attention(direction="bidirectional")) model.add(Dense(50, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(1, activation='sigmoid')) model.compile( loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'] ) plot_model(model, to_file=RESULT_DIR+'/shake-model.png') model.summary() if os.path.isfile(WEIGHTS_FILE): model.load_weights(WEIGHTS_FILE)
def __init__(self): self.HOPS = 5 self.DATASET = 'twitter' # 'restaurant', 'laptop' self.POLARITIES_DIM = 3 self.EMBEDDING_DIM = 200 self.LEARNING_RATE = 0.01 self.LSTM_PARAMS = { 'units': 200, 'activation': 'tanh', 'recurrent_activation': 'sigmoid', 'kernel_initializer': initializers.RandomUniform(minval=-0.003, maxval=0.003), 'recurrent_initializer': initializers.RandomUniform(minval=-0.003, maxval=0.003), 'bias_initializer': initializers.RandomUniform(minval=-0.003, maxval=0.003), 'kernel_regularizer': regularizers.l2(0.001), 'recurrent_regularizer': regularizers.l2(0.001), 'bias_regularizer': regularizers.l2(0.001), 'dropout': 0, 'recurrent_dropout': 0, } self.MAX_SEQUENCE_LENGTH = 40 self.MAX_ASPECT_LENGTH = 2 self.ITERATION = 500 self.BATCH_SIZE = 200 self.texts_raw_indices, self.texts_left_indices, self.aspects_indices, self.texts_right_indices, \ self.polarities_matrix, \ self.embedding_matrix, \ self.tokenizer = \ read_dataset(type=self.DATASET, mode='train', embedding_dim=self.EMBEDDING_DIM, max_seq_len=self.MAX_SEQUENCE_LENGTH, max_aspect_len=self.MAX_ASPECT_LENGTH) if os.path.exists('ram_saved_model.h5'): print('loading saved model...') self.model = load_model('ram_saved_model.h5') else: print('Build model...') inputs_sentence = Input(shape=(self.MAX_SEQUENCE_LENGTH * 2 + self.MAX_ASPECT_LENGTH, ), name='inputs_sentence') inputs_aspect = Input(shape=(self.MAX_ASPECT_LENGTH, ), name='inputs_aspect') sentence = Embedding(input_dim=len(self.tokenizer.word_index) + 1, output_dim=self.EMBEDDING_DIM, input_length=self.MAX_SEQUENCE_LENGTH * 2 + self.MAX_ASPECT_LENGTH, weights=[self.embedding_matrix], trainable=False, name='sentence_embedding')(inputs_sentence) aspect = Embedding(input_dim=len(self.tokenizer.word_index) + 1, output_dim=self.EMBEDDING_DIM, input_length=self.MAX_ASPECT_LENGTH, weights=[self.embedding_matrix], trainable=False, name='aspect_embedding')(inputs_aspect) memory = Bidirectional(LSTM(**self.LSTM_PARAMS, return_sequences=True), name='memory')(sentence) aspect = Bidirectional(LSTM(**self.LSTM_PARAMS, return_sequences=True), name='aspect')(aspect) x = Lambda(lambda xin: K.mean(xin, axis=1), name='aspect_mean')(aspect) SharedAttention = Attention(name='shared_attention') for i in range(self.HOPS): x = SharedAttention((memory, x)) x = Dense(self.POLARITIES_DIM)(x) predictions = Activation('softmax')(x) model = Model(inputs=[inputs_sentence, inputs_aspect], outputs=predictions) model.summary() model.compile(loss='categorical_crossentropy', optimizer=optimizers.Adam(lr=self.LEARNING_RATE), metrics=['acc']) # plot_model(model, to_file='model.png') self.model = model
def __init__(self): self.HOPS = 3 self.SCORE_FUNCTION = 'mlp' # scaled_dot_product / mlp (concat) / bi_linear (general dot) self.DATASET = 'twitter' # 'twitter', 'restaurant', 'laptop' self.POLARITIES_DIM = 3 self.EMBEDDING_DIM = 300 self.LEARNING_RATE = 0.001 self.INITIALIZER = initializers.RandomUniform(minval=-0.05, maxval=0.05) self.REGULARIZER = regularizers.l2(0.001) self.LSTM_PARAMS = { 'units': 200, 'activation': 'tanh', 'recurrent_activation': 'sigmoid', 'kernel_initializer': self.INITIALIZER, 'recurrent_initializer': self.INITIALIZER, 'bias_initializer': self.INITIALIZER, 'kernel_regularizer': self.REGULARIZER, 'recurrent_regularizer': self.REGULARIZER, 'bias_regularizer': self.REGULARIZER, 'dropout': 0, 'recurrent_dropout': 0, } self.MAX_SEQUENCE_LENGTH = 80 self.MAX_ASPECT_LENGTH = 10 self.BATCH_SIZE = 32 self.EPOCHS = 5 self.texts_raw_indices, self.texts_raw_without_aspects_indices, self.texts_left_indices, self.texts_left_with_aspects_indices, \ self.aspects_indices, self.texts_right_indices, self.texts_right_with_aspects_indices, \ self.polarities_matrix, \ self.embedding_matrix, \ self.tokenizer = \ read_dataset(type=self.DATASET, mode='train', embedding_dim=self.EMBEDDING_DIM, max_seq_len=self.MAX_SEQUENCE_LENGTH, max_aspect_len=self.MAX_ASPECT_LENGTH) if os.path.exists('ram_saved_model.h5'): print('loading saved model...') self.model = load_model('ram_saved_model.h5') else: print('Build model...') inputs_sentence = Input(shape=(self.MAX_SEQUENCE_LENGTH,), name='inputs_sentence') inputs_aspect = Input(shape=(self.MAX_ASPECT_LENGTH,), name='inputs_aspect') nonzero_count = Lambda(lambda xin: tf.count_nonzero(xin, dtype=tf.float32))(inputs_aspect) sentence = Embedding(input_dim=len(self.tokenizer.word_index) + 1, output_dim=self.EMBEDDING_DIM, input_length=self.MAX_SEQUENCE_LENGTH, mask_zero=True, weights=[self.embedding_matrix], trainable=False, name='sentence_embedding')(inputs_sentence) aspect = Embedding(input_dim=len(self.tokenizer.word_index) + 1, output_dim=self.EMBEDDING_DIM, input_length=self.MAX_ASPECT_LENGTH, mask_zero=True, weights=[self.embedding_matrix], trainable=False, name='aspect_embedding')(inputs_aspect) memory = Bidirectional(LSTM(return_sequences=True, **self.LSTM_PARAMS), name='memory')(sentence) aspect = Bidirectional(LSTM(return_sequences=True, **self.LSTM_PARAMS), name='aspect')(aspect) x = Lambda(lambda xin: K.sum(xin[0], axis=1) / xin[1], name='aspect_mean')([aspect, nonzero_count]) shared_attention = Attention(score_function=self.SCORE_FUNCTION, initializer=self.INITIALIZER, regularizer=self.REGULARIZER, name='shared_attention') for i in range(self.HOPS): x = shared_attention((memory, x)) x = Flatten()(x) x = Dense(self.POLARITIES_DIM)(x) predictions = Activation('softmax')(x) model = Model(inputs=[inputs_sentence, inputs_aspect], outputs=predictions) model.summary() model.compile(loss='categorical_crossentropy', optimizer=optimizers.Adam(lr=self.LEARNING_RATE), metrics=['acc', f1]) # plot_model(model, to_file='model.png') self.model = model
def test_attention(self): attention = Attention(attention_weight_vector_dim=5) x = Input(shape=(5, 10)) y = attention(x) self.assertEqual(shape(y), (None, 10), "y") self.assertEqual(hasattr(y, '_keras_history'), True, "y")
def get_model222(self, config): inp = Input(shape=(config.strmaxlen, ), name='input') # inp = Input(shape=(config.max_features, ), name='input') emb = Embedding(config.max_features, config.max_features, embeddings_initializer='identity', trainable=True)(inp) # emb1 = Embedding(config.max_features, config.embed_size, trainable = True)(inp) emb1 = SpatialDropout1D(config.prob_dropout)(emb) #### l1_L = Bidirectional( CuDNNLSTM(config.cell_size_l1, return_sequences=True))(emb1) l2_LL = Bidirectional( CuDNNLSTM(config.cell_size_l2, return_sequences=True))(l1_L) l2_LG = Bidirectional( CuDNNGRU(config.cell_size_l2, return_sequences=True))(l1_L) l3_LLC = Conv1D(config.filter_size, kernel_size=config.kernel_size, strides=2, padding="valid", kernel_initializer="he_uniform")(l2_LL) l3_LGC = Conv1D(config.filter_size, kernel_size=config.kernel_size, strides=2, padding="valid", kernel_initializer="he_uniform")(l2_LG) avg_pool_L = GlobalAveragePooling1D()(l1_L) max_pool_L = GlobalMaxPooling1D()(l1_L) avg_pool_LL = GlobalAveragePooling1D()(l2_LL) max_pool_LL = GlobalMaxPooling1D()(l2_LL) avg_pool_LG = GlobalAveragePooling1D()(l2_LG) max_pool_LG = GlobalMaxPooling1D()(l2_LG) attention_LLA = Attention(config.strmaxlen)(l2_LL) attention_LGA = Attention(config.strmaxlen)(l2_LG) avg_pool_LLC = GlobalAveragePooling1D()(l3_LLC) max_pool_LLC = GlobalMaxPooling1D()(l3_LLC) avg_pool_LGC = GlobalAveragePooling1D()(l3_LGC) max_pool_LGC = GlobalMaxPooling1D()(l3_LGC) attention_LLCA = Attention(int(config.strmaxlen / 2 - 1))(l3_LLC) attention_LGCA = Attention(int(config.strmaxlen / 2 - 1))(l3_LGC) conc_LLC = concatenate([ avg_pool_L, max_pool_L, avg_pool_LL, max_pool_LL, avg_pool_LLC, max_pool_LLC, attention_LLA, attention_LLCA ]) conc_LGC = concatenate([ avg_pool_L, max_pool_L, avg_pool_LG, max_pool_LG, avg_pool_LGC, max_pool_LGC, attention_LGA, attention_LGCA ]) out_LL = Dropout(config.prob_dropout2)(conc_LLC) out_LG = Dropout(config.prob_dropout2)(conc_LGC) out_LL = Dense(2, activation='softmax')(out_LL) out_LG = Dense(2)(out_LG) #### # emb2 = Embedding(config.max_features, config.max_features,embeddings_initializer='identity', trainable = True)(inp) # emb1 = Embedding(config.max_features, config.embed_size, trainable = True)(inp) emb2 = SpatialDropout1D(config.prob_dropout)(emb) #### l1_G = Bidirectional( CuDNNGRU(config.cell_size_l1, return_sequences=True))(emb2) l2_GL = Bidirectional( CuDNNLSTM(config.cell_size_l2, return_sequences=True))(l1_G) l2_GG = Bidirectional( CuDNNGRU(config.cell_size_l2, return_sequences=True))(l1_G) l3_GLC = Conv1D(config.filter_size, kernel_size=config.kernel_size, strides=2, padding="valid", kernel_initializer="he_uniform")(l2_GL) l3_GGC = Conv1D(config.filter_size, kernel_size=config.kernel_size, strides=2, padding="valid", kernel_initializer="he_uniform")(l2_GG) avg_pool_G = GlobalAveragePooling1D()(l1_G) max_pool_G = GlobalMaxPooling1D()(l1_G) avg_pool_GL = GlobalAveragePooling1D()(l2_GL) max_pool_GL = GlobalMaxPooling1D()(l2_GL) avg_pool_GG = GlobalAveragePooling1D()(l2_GG) max_pool_GG = GlobalMaxPooling1D()(l2_GG) attention_GLA = Attention(config.strmaxlen)(l2_GL) attention_GGA = Attention(config.strmaxlen)(l2_GG) avg_pool_GLC = GlobalAveragePooling1D()(l3_GLC) max_pool_GLC = GlobalMaxPooling1D()(l3_GLC) avg_pool_GGC = GlobalAveragePooling1D()(l3_GGC) max_pool_GGC = GlobalMaxPooling1D()(l3_GGC) attention_GLCA = Attention(int(config.strmaxlen / 2 - 1))(l3_GLC) attention_GGCA = Attention(int(config.strmaxlen / 2 - 1))(l3_GGC) conc_GLC = concatenate([ avg_pool_G, max_pool_G, avg_pool_GL, max_pool_GL, avg_pool_GLC, max_pool_GLC, attention_GLA, attention_GLCA ]) conc_GGC = concatenate([ avg_pool_G, max_pool_G, avg_pool_GG, max_pool_GG, avg_pool_GGC, max_pool_GGC, attention_GGA, attention_GGCA ]) out_GL = Dropout(config.prob_dropout2)(conc_GLC) out_GG = Dropout(config.prob_dropout2)(conc_GGC) out_GL = Dense(1)(out_GL) out_GG = Dense(1)(out_GG) out_avg = average([out_LL, out_LG, out_GL, out_GG]) # # ================================================================================================== model_avg = Model(inputs=inp, outputs=[out_LL, out_LG, out_GL, out_GG, out_avg]) # inp_pre = Input(shape=(config.strmaxlen, ), name='input_pre') # inp_post = Input(shape=(config.strmaxlen, ), name='input_post') # model_pre = model_avg(inp_pre) # model_post = model_avg(inp_post) # stack_layer = concatenate([model_pre, model_post]) # ens_out = Dense(1, use_bias=False)(stack_layer) # reg_model = Model(inputs=[inp_pre, inp_post], outputs=ens_out) model_avg.compile(loss='mean_squared_error', optimizer='adam', loss_weights=[1., 1., 1., 1., 0.1], metrics=['mean_squared_error', 'accuracy']) return model_avg
def __init__(self, embedding_dim=100, batch_size=64, n_hidden=100, learning_rate=0.01, n_class=3, max_sentence_len=40, l2_reg_val=0.003): ############################ self.DATASET = ['restaurant', 'laptop'] self.TASK_INDICES = [1002, 1003, 1005] ##1001-twitter, 1002-restaurant, 1003-laptop, 1004-others, 1005-general self.LOSS_WEIGHTS = {1002: 0.5, 1003: 0.5, 1005: 0.5} self.MODEL_TO_LOAD = './models/mtl_absa_att_sh_saved_model.h5' ########################### self.SCORE_FUNCTION = 'mlp' self.EMBEDDING_DIM = embedding_dim self.BATCH_SIZE = batch_size self.N_HIDDEN = n_hidden self.LEARNING_RATE = learning_rate self.N_CLASS = n_class self.MAX_SENTENCE_LENGTH = max_sentence_len self.EPOCHS = 4 self.L2_REG_VAL = l2_reg_val self.MAX_ASPECT_LENGTH = 5 self.INITIALIZER = initializers.RandomUniform(minval=-0.003, maxval=0.003) self.REGULARIZER = regularizers.l2(self.L2_REG_VAL) self.LSTM_PARAMS = { 'units': self.N_HIDDEN, 'activation': 'tanh', 'recurrent_activation': 'hard_sigmoid', 'dropout': 0, 'recurrent_dropout': 0 } self.DENSE_PARAMS = { 'kernel_initializer': self.INITIALIZER, 'bias_initializer': self.INITIALIZER, 'kernel_regularizer': self.REGULARIZER, 'bias_regularizer': self.REGULARIZER, 'dtype':'float32' } self.texts_raw_indices, self.texts_raw_without_aspects_indices, self.texts_left_indices, self.texts_left_with_aspects_indices, \ self.aspects_indices, self.texts_right_indices, self.texts_right_with_aspects_indices, self.dataset_index,\ self.polarities_matrix,self.polarities,\ self.embedding_matrix, \ self.tokenizer = \ read_dataset(types=self.DATASET, mode='train', embedding_dim=self.EMBEDDING_DIM, max_seq_len=self.MAX_SENTENCE_LENGTH, max_aspect_len=self.MAX_ASPECT_LENGTH) print('Build model...') inputs_l = Input(shape=(self.MAX_SENTENCE_LENGTH,),dtype='int64') inputs_r = Input(shape=(self.MAX_SENTENCE_LENGTH,),dtype='int64') inputs_aspect = Input(shape=(self.MAX_ASPECT_LENGTH,),dtype='int64' ,name='inputs_aspect') nonzero_count = Lambda(lambda xin: tf.count_nonzero(xin, dtype='float32'))(inputs_aspect) input_dataset = Input(shape=(1,),dtype='float32') Embedding_Layer = Embedding(input_dim=len(self.embedding_matrix) , output_dim=self.EMBEDDING_DIM, input_length=self.MAX_SENTENCE_LENGTH, mask_zero=True, weights=[self.embedding_matrix], trainable=False) aspect = Embedding(input_dim=len(self.embedding_matrix), output_dim=self.EMBEDDING_DIM, input_length=self.MAX_ASPECT_LENGTH, mask_zero=True, weights=[self.embedding_matrix], trainable=False, name='aspect_embedding')(inputs_aspect) x_l = Embedding_Layer(inputs_l) x_r = Embedding_Layer(inputs_r) x_aspect = Bidirectional(LSTM(name='aspect', return_sequences=True,**self.LSTM_PARAMS),merge_mode='sum')(aspect) x_aspect = Lambda(lambda xin: K.sum(xin[0], axis=1) / xin[1], name='aspect_mean')([x_aspect, nonzero_count]) x_l = LSTM(name='sentence_left',return_sequences=True,**self.LSTM_PARAMS)(x_l) x_r = LSTM(go_backwards=True, name='sentence_right',return_sequences=True,**self.LSTM_PARAMS)(x_r) shared_attention = Attention(score_function=self.SCORE_FUNCTION, initializer=self.INITIALIZER, regularizer=self.REGULARIZER, name='shared_attention') x= Concatenate(name='last_shared',axis=1)([x_l,x_r]) x = shared_attention((x, x_aspect)) x= Lambda(lambda x: K.squeeze(x, 1))(x) #twitter task layers tw_x= Dense(self.N_HIDDEN,name='t1_dense_10',**self.DENSE_PARAMS)(x) twitter_x = Dense(self.N_CLASS,name='t1_dense_3',**self.DENSE_PARAMS)(tw_x) twitter_x = Concatenate(name= "twitter_output")([twitter_x,input_dataset]) #rest task layers rest_x= Dense(self.N_HIDDEN,name='t2_dense_10',**self.DENSE_PARAMS)(x) rest_x = Dense(self.N_CLASS,name='t2_dense_3',**self.DENSE_PARAMS)(rest_x) rest_x = Concatenate(name="rest_output")([rest_x,input_dataset]) #general task layers general_x= Dense(self.N_HIDDEN,name='t3_dense_10',**self.DENSE_PARAMS)(x) general_x = Dense(self.N_CLASS,name='t3_dense_3',**self.DENSE_PARAMS)(general_x) general_x = Concatenate(name="general_output")([general_x,input_dataset]) model = Model(inputs=[inputs_l, inputs_r,input_dataset,inputs_aspect], outputs=[twitter_x, rest_x, general_x]) model.summary() if os.path.exists(self.MODEL_TO_LOAD): print('loading saved model...') model.load_weights(self.MODEL_TO_LOAD) self.model = model self.model.compile(loss={'twitter_output': multitask_loss(self.LOSS_WEIGHTS, self.TASK_INDICES[0]), 'rest_output': multitask_loss(self.LOSS_WEIGHTS, self.TASK_INDICES[1]), 'general_output': multitask_loss(self.LOSS_WEIGHTS, self.TASK_INDICES[2])}, optimizer=optimizers.Adam(lr=self.LEARNING_RATE), metrics=[multitask_accuracy, f1])