def rank_attention_lstm_model(batch_size, nb_epoch, hidden_dim): sequence = Input(shape=(maxlen,), dtype='int32') embedded = Embedding(input_dim=max_features, output_dim=num_features, input_length=maxlen, mask_zero=True, weights=[W], trainable=False)(sequence) # embedded = Embedding(input_dim=max_features, output_dim=num_features, input_length=maxlen, weights=[W], trainable=False) (sequence) embedded = Dropout(0.25)(embedded) enc = Bidirectional(GRU(hidden_dim // 2, recurrent_dropout=0.25, return_sequences=True))(embedded) att = AttentionM()(enc) fc1_dropout = Dropout(0.25)(att) fc1 = Dense(50, activation="relu")(fc1_dropout) fc2_dropout = Dropout(0.25)(fc1) output = Dense(6, activation='softmax')(fc2_dropout) model = Model(inputs=sequence, outputs=output) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc', f1]) model.fit(X_train, y_train, validation_data=[X_dev, y_dev], batch_size=batch_size, epochs=nb_epoch, verbose=2, ) # model.save("weights_rank_attention" + num + ".hdf5") y_pred = model.predict(X_test, batch_size=batch_size) return y_pred
def attention_model(DROPOUT=0.25): sequence = Input(shape=(maxlen, ), dtype='int32') embedded = Embedding(input_dim=max_features, output_dim=num_features, input_length=maxlen, mask_zero=True, weights=[W], trainable=False)(sequence) # embedded = Embedding(input_dim=max_features, output_dim=num_features, input_length=maxlen, weights=[W], trainable=False) (sequence) embedded = Dropout(0.25)(embedded) enc = Bidirectional(GRU(hidden_dim, dropout=DROPOUT, return_sequences=True))(embedded) enc = Bidirectional(GRU(hidden_dim, dropout=DROPOUT, return_sequences=True))(enc) att = AttentionM()(enc) fc1_dropout = Dropout(0.25)(att) fc1 = Dense(50, activation="relu")(fc1_dropout) fc2_dropout = Dropout(0.25)(fc1) output = Dense(3, activation='softmax')(fc2_dropout) model = Model(inputs=sequence, outputs=output) rmsprop = optimizers.rmsprop(lr=0.001) model.compile(loss='categorical_crossentropy', optimizer=rmsprop, metrics=['acc', f1]) model.summary() return model
def buildModel(embeddingMatrix): """Constructs the architecture of the modelEMOTICONS_TOKEN[list_str[index]] Input: embeddingMatrix : The embedding matrix to be loaded in the embedding layer. Output: model : A basic LSTM model """ sequence = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32') embeddingLayer = Embedding(embeddingMatrix.shape[0], EMBEDDING_DIM, weights=[embeddingMatrix], input_length=MAX_SEQUENCE_LENGTH, trainable=False)(sequence) enc = Bidirectional(GRU(LSTM_DIM, dropout=DROPOUT, return_sequences=True))(embeddingLayer) enc = Bidirectional(GRU(LSTM_DIM, dropout=DROPOUT, return_sequences=True))(enc) att = AttentionM()(enc) fc1 = Dense(128, activation="relu")(att) fc2_dropout = Dropout(0.25)(fc1) output = Dense(4, activation='sigmoid')(fc2_dropout) model = Model(inputs=sequence, outputs=output) rmsprop = optimizers.rmsprop(lr=LEARNING_RATE) model.compile(loss='categorical_crossentropy', optimizer=optimizers.adam(), metrics=['acc']) return model
def attention_lstm_model(r_dropout): sequence = Input(shape=(maxlen, ), dtype='int32') embedded = Embedding(input_dim=max_features, output_dim=num_features, input_length=maxlen, mask_zero=True, weights=[W], trainable=False)(sequence) embedded = Dropout(0.25)(embedded) enc = Bidirectional( GRU(hidden_dim // 2, recurrent_dropout=r_dropout, return_sequences=True))(embedded) att = AttentionM()(enc) fc1_dropout = Dropout(0.25)(att) fc1 = Dense(50, activation="relu")(fc1_dropout) fc2_dropout = Dropout(0.25)(fc1) output = Dense(6, activation='softmax')(fc2_dropout) model = Model(inputs=sequence, outputs=output) # checkpointer = ModelCheckpoint(filepath="weights.hdf5", monitor='val_acc', verbose=1, save_best_only=True) # early_stopping = EarlyStopping(monitor="val_loss", patience=14, verbose=1) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc', f1]) model.summary() return model
def attention_elmo(batch_size = 128, nb_epoch = 35, hidden_dim = 80): sequence = Input(shape=(maxlen2,), dtype='int32') embedded = Embedding(input_dim=W2.shape[0], output_dim=W2.shape[1], input_length=maxlen2, mask_zero=True, weights=[W2], trainable=False)(sequence) embedded = Dropout(0.25)(embedded) enc = Bidirectional(GRU(hidden_dim, dropout=0.35, return_sequences=True))(embedded) enc = Bidirectional(GRU(hidden_dim, dropout=0.35, return_sequences=True))(enc) att = AttentionM()(enc) fc1_dropout = Dropout(0.25)(att) fc1 = Dense(50, activation="relu")(fc1_dropout) fc2_dropout = Dropout(0.25)(fc1) output = Dense(3, activation='softmax')(fc2_dropout) model = Model(inputs=sequence, outputs=output) rmsprop = optimizers.rmsprop(lr=0.001) model.compile(loss='categorical_crossentropy', optimizer=rmsprop, metrics=['acc', f1]) class_weight = {0: 1, 1: 2, 2: 6} train_num, test_num = X_train2.shape[0], X_test2.shape[0] # train_num, test_num = X_train2.shape[0], X_dev2.shape[0] num1 = y_train2.shape[1] second_level_train_set = np.zeros((train_num, num1)) second_level_test_set = np.zeros((test_num, num1)) test_nfolds_sets = [] # kf = KFold(n_splits = 2) kf = KFold(n_splits=5) for i, (train_index, test_index) in enumerate(kf.split(X_train2)): x_tra, y_tra = X_train2[train_index], y_train2[train_index] x_tst, y_tst = X_train2[test_index], y_train2[test_index] model.fit(x_tra, y_tra, validation_data=[x_tst, y_tst], batch_size=batch_size, epochs=nb_epoch, verbose=2, class_weight=class_weight) second_level_train_set[test_index] = model.predict(x_tst, batch_size=batch_size) test_nfolds_sets.append(model.predict(X_test2)) # test_nfolds_sets.append(model.predict(X_dev2)) for item in test_nfolds_sets: second_level_test_set += item second_level_test_set = second_level_test_set / 5 return second_level_train_set, second_level_test_set
def bi_gru_attention(left_pickle, right_pickle, dropoout=0.36, hidden_dim=160): print("this is bi_gru_attention model") left_maxlen, left_max_features, left_num_features, left_W, left_X_train, left_y_train, left_X_dev, left_y_dev, left_test, y_test = get_feature( left_pickle) right_maxlen, right_max_features, right_num_features, right_W, right_X_train, right_y_train, right_X_dev, right_y_dev, right_test, y_test = get_feature( right_pickle) left_sequence = Input(shape=(left_maxlen, ), dtype='int32') left_embedded = Embedding(input_dim=left_max_features, output_dim=left_num_features, input_length=left_maxlen, weights=[left_W], trainable=False)(left_sequence) left_enc = Bidirectional( GRU(hidden_dim, recurrent_dropout=dropoout, return_sequences=True))(left_embedded) left_att = AttentionM()(left_enc) print(np.shape(left_enc)) right_sequence = Input(shape=(right_maxlen, ), dtype='int32') right_embedded = Embedding(input_dim=right_max_features, output_dim=right_num_features, input_length=right_maxlen, weights=[right_W], trainable=False)(right_sequence) right_enc = Bidirectional( GRU(hidden_dim, recurrent_dropout=dropoout, return_sequences=True))(right_embedded) right_att = AttentionM()(right_enc) print(np.shape(right_enc)) comb = Concatenate()([left_att, right_att]) output = Dense(6, activation='softmax')(comb) model = Model(inputs=[left_sequence, right_sequence], outputs=output) return model, left_X_train, left_y_train, left_X_dev, left_y_dev, left_test, right_X_train, right_y_train, right_X_dev, right_y_dev, right_test, y_test
def gru_and_attention(maxlen, max_features, num_features, W, dropout=0.0): sequence = Input(shape=(maxlen, ), dtype='int32') embedded = Embedding(input_dim=max_features, output_dim=num_features, input_length=maxlen, mask_zero=True, weights=[W], trainable=False)(sequence) embedded = Dropout(dropout)(embedded) gru = Bidirectional( GRU(hidden_dim, recurrent_dropout=dropout, return_sequences=True))(embedded) att = AttentionM()(gru) output = Dense(6, activation='softmax')(att) model = Model(inputs=sequence, outputs=output) return model
def attentionModel(embeddingMatrix, embedding_dim, hidden_dim, name): sequence = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32') embeddingLayer = Embedding(embeddingMatrix.shape[0], embedding_dim, weights=[embeddingMatrix], input_length=MAX_SEQUENCE_LENGTH, trainable=False)(sequence) enc = Bidirectional(GRU(hidden_dim, dropout=DROPOUT, return_sequences=True))(embeddingLayer) enc = Bidirectional(GRU(hidden_dim, dropout=DROPOUT, return_sequences=True))(enc) att = AttentionM()(enc) fc1 = Dense(128, activation="relu")(att) fc2_dropout = Dropout(0.25)(fc1) output = Dense(4, activation='sigmoid')(fc2_dropout) model = Model(inputs=sequence, outputs=output) rmsprop = optimizers.rmsprop(lr=LEARNING_RATE) model.compile(loss='categorical_crossentropy', optimizer=rmsprop, metrics=['acc']) return model, name
def gru(self): model = Sequential() model.add(Embedding( input_dim=self.max_features, output_dim=self.num_features, input_length=self.maxlen, #mask_zero=True, weights=[self.weights], trainable=False )) model.add(Dropout(0.5)) model.add(GRU(self.hidden_dims // 2, recurrent_dropout=0.25, return_sequences=True)) model.add(AttentionM()) model.add(Dropout(0.25)) model.add(Dense(1, activation='sigmoid')) model.compile(loss='mse', optimizer='adam') return model
def attention_bi_lstm_model(): batch_size = 256 nb_epoch = 40 hidden_dim = 120 sequence = Input(shape=(maxlen, ), dtype='int32') embedded = Embedding(input_dim=max_features, output_dim=num_features, input_length=maxlen, mask_zero=True, weights=[W], trainable=False)(sequence) # embedded = Embedding(input_dim=max_features, output_dim=num_features, input_length=maxlen, weights=[W], trainable=False) (sequence) embedded = Dropout(0.25)(embedded) # bi-lstm enc = Bidirectional( LSTM(hidden_dim // 2, recurrent_dropout=0.25, return_sequences=True))(embedded) # gru # enc = Bidirectional(GRU(hidden_dim//2, recurrent_dropout=0.2, return_sequences=True)) (embedded) att = AttentionM()(enc) # print(enc.shape) # print(att.shape) fc1_dropout = Dropout(0.25)(att) fc1 = Dense(50, activation="relu")(fc1_dropout) fc2_dropout = Dropout(0.25)(fc1) output = Dense(2, activation='softmax')(fc2_dropout) model = Model(inputs=sequence, outputs=output) class_weight = {0: 1, 1: 7} model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc', f1]) model.summary() return model
def type_attention_lstm_model(batch_size, nb_epoch, hidden_dim): sequence = Input(shape=(maxlen, ), dtype='int32') embedded = Embedding(input_dim=max_features, output_dim=num_features, input_length=maxlen, mask_zero=True, weights=[W], trainable=False)(sequence) embedded = Dropout(0.25)(embedded) # gru enc = Bidirectional( GRU(hidden_dim // 2, recurrent_dropout=0.2, return_sequences=True))(embedded) att = AttentionM()(enc) fc1_dropout = Dropout(0.25)(att) fc1 = Dense(50, activation="relu")(fc1_dropout) fc2_dropout = Dropout(0.25)(fc1) output = Dense(4, activation='softmax')(fc2_dropout) model = Model(inputs=sequence, outputs=output) # checkpointer = ModelCheckpoint(filepath="weights.hdf5", monitor='val_acc', verbose=1, save_best_only=True) # early_stopping = EarlyStopping(monitor="val_loss", patience=8, verbose=1) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc', f1]) model.fit(X_train, y_train, validation_data=[X_dev, y_dev], batch_size=batch_size, epochs=nb_epoch, verbose=2) y_pred = model.predict(X_dev, batch_size=batch_size) return y_pred
def attentionModel(embeddingMatrix): sequence = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32') embeddingLayer = Embedding(embeddingMatrix.shape[0], embeddingMatrix.shape[1], weights=[embeddingMatrix], input_length=MAX_SEQUENCE_LENGTH, mask_zero=emb_mask_zero, trainable=emb_trainable)(sequence) enc = Bidirectional(GRU(LSTM_DIM, dropout=DROPOUT, return_sequences=True))(embeddingLayer) enc = Bidirectional(GRU(LSTM_DIM, dropout=DROPOUT, return_sequences=True))(enc) att = AttentionM()(enc) fc1 = Dense(128, activation="relu")(att) fc2_dropout = Dropout(0.25)(fc1) output = Dense(NUM_CLASSES, activation='softmax')(fc2_dropout) model = Model(inputs=sequence, outputs=output) model.compile(loss='categorical_crossentropy', optimizer="adam", metrics=['acc']) return model
# this is the placeholder tensor for the input sequence sequence = keras.layers.Input(shape=(maxlen,), dtype='int32') embedded = keras.layers.Embedding(input_dim=max_features, output_dim=num_features, input_length=maxlen, mask_zero=True, weights=[W], trainable=False)(sequence) # embedded = Embedding(input_dim=max_features, output_dim=num_features, input_length=maxlen, weights=[W], trainable=False) (sequence) embedded = keras.layers.Dropout(0.25)(embedded) # bi-lstm # enc = Bidirectional(LSTM(hidden_dim//2, recurrent_dropout=0.25, return_sequences=True)) (embedded) # gru enc = keras.layers.Bidirectional(keras.layers.GRU(hidden_dim // 2, recurrent_dropout=0.25, return_sequences=True))( embedded) att = AttentionM()(enc) # print(enc.shape) # print(att.shape) fc1_dropout = keras.layers.Dropout(0.25)(att) fc1 = keras.layers.Dense(50, activation="relu")(fc1_dropout) fc2_dropout = keras.layers.Dropout(0.25)(fc1) output = keras.layers.Dense(2, activation='softmax')(fc2_dropout) model = keras.Model(inputs=sequence, outputs=output) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc']) history = model.fit(X_train, y_train, validation_data=[X_dev, y_dev], batch_size=batch_size, epochs=nb_epoch) y_pred = model.predict(X_test, batch_size=batch_size)
def attention_lstm_model(batch_size, nb_epoch, hidden_dim, num): sequence = Input(shape=(maxlen, ), dtype='int32') embedded = Embedding(input_dim=max_features, output_dim=num_features, input_length=maxlen, mask_zero=True, weights=[W], trainable=False)(sequence) embedded = Dropout(0.25)(embedded) # bi-lstm enc = Bidirectional( LSTM(hidden_dim // 2, recurrent_dropout=0.25, return_sequences=True))(embedded) # gru # enc = Bidirectional(GRU(hidden_dim//2, recurrent_dropout=0.2, return_sequences=True)) (embedded) att = AttentionM()(enc) # print(enc.shape) # print(att.shape) fc1_dropout = Dropout(0.25)(att) fc1 = Dense(50, activation="relu")(fc1_dropout) fc2_dropout = Dropout(0.25)(fc1) output = Dense(2, activation='softmax')(fc2_dropout) model = Model(inputs=sequence, outputs=output) class_weight = {0: 1, 1: 7} model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc', f1]) train_num, test_num = X_train.shape[0], X_dev.shape[0] num1 = y_train.shape[1] second_level_train_set = np.zeros((train_num, num1)) # (10556,) second_level_test_set = np.zeros((test_num, num1)) # (2684,) test_nfolds_sets = [] kf = KFold(n_splits=5) for i, (train_index, test_index) in enumerate(kf.split(X_train)): x_tra, y_tra = X_train[train_index], y_train[train_index] x_tst, y_tst = X_train[test_index], y_train[test_index] # checkpointer = ModelCheckpoint(filepath="weights.hdf5", monitor='val_acc', verbose=1, save_best_only=True) early_stopping = EarlyStopping(monitor='val_acc', patience=10, verbose=1) model.fit(x_tra, y_tra, validation_data=[x_tst, y_tst], batch_size=batch_size, epochs=nb_epoch, verbose=2, class_weight=class_weight, callbacks=[early_stopping]) second_level_train_set[test_index] = model.predict( x_tst, batch_size=batch_size) test_nfolds_sets.append(model.predict(X_dev)) for item in test_nfolds_sets: second_level_test_set += item second_level_test_set = second_level_test_set / 5 model.save("weights_BB_attention_lstm" + num + ".hdf5") y_pred = second_level_test_set return y_pred
def RNN(X_train, y_train, args): """ Purpose -> Define and train the proposed LSTM network Input -> Data, Labels and model hyperparameters Output -> Trained LSTM network """ # Sets the model hyperparameters # Embedding hyperparameters max_features = args[0] maxlen = args[1] embedding_size = args[2] # Convolution hyperparameters filter_length = args[3] nb_filter = args[4] pool_length = args[5] # LSTM hyperparameters lstm_output_size = args[6] # Training hyperparameters batch_size = args[7] nb_epoch = args[8] numclasses = args[9] test_size = args[10] # Format conversion for y_train for compatibility with Keras y_train = np_utils.to_categorical(y_train, numclasses) print(y_train) # Train & Validation data splitting X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=test_size, random_state=42) # Build the sequential model # Model Architecture is: # Input -> Embedding -> Conv1D+Maxpool1D -> LSTM -> LSTM -> FC-1 -> Softmaxloss print('Build model...') start = time() log_dir = datetime.now().strftime('model_%Y%m%d_%H%M') os.mkdir(log_dir) es = EarlyStopping(monitor='val_loss', patience=20) mc = ModelCheckpoint(log_dir + '\\CIFAR10-EP{epoch:02d}-ACC{val_acc:.4f}.h5', monitor='val_loss', save_best_only=True) tb = TensorBoard(log_dir=log_dir, histogram_freq=0) sequence = Input(shape=(maxlen, ), dtype='int32') # embedded = Embedding(input_dim=max_features, output_dim=num_features, input_length=maxlen, mask_zero=True, weights=[W], trainable=False) (sequence) embedded = Embedding(input_dim=max_features, output_dim=embedding_size, input_length=maxlen, trainable=False)(sequence) embedded = Dropout(0.25)(embedded) convolution = Convolution1D(filters=nb_filter, filter_length=filter_length, padding='valid', activation='relu', strides=1)(embedded) maxpooling = MaxPooling1D(pool_length=pool_length)(convolution) lstm = LSTM(lstm_output_size, dropout_W=0.2, dropout_U=0.2, return_sequences=True)(maxpooling) lstm1 = LSTM(lstm_output_size, dropout_W=0.2, dropout_U=0.2, return_sequences=False)(lstm) enc = Bidirectional( GRU(lstm_output_size // 2, recurrent_dropout=0.25, return_sequences=True))(maxpooling) att = AttentionM()(enc) x = keras.layers.Concatenate(axis=1)([lstm1, att]) fc1 = Dense(128, activation="relu")(x) fc2 = Dense(64, activation="relu")(fc1) fc3 = Dense(32, activation="relu")(fc2) fc4 = Dense(16, activation="relu")(fc3) fc4_dropout = Dropout(0.25)(fc4) output = Dense(3, activation='softmax')(fc4_dropout) model = Model(inputs=sequence, outputs=output) '''model = Sequential() model.add(Embedding(max_features, embedding_size, input_length=maxlen)) model.add(Convolution1D(nb_filter=nb_filter, filter_length=filter_length, border_mode='valid', activation='relu', subsample_length=1)) model.add(MaxPooling1D(pool_length=pool_length)) model.add(SpatialDropout1D(0.1)) model.add(Bidirectional(CuDNNGRU(64, return_sequences=True))) model.add(Bidirectional(CuDNNGRU(64, return_sequences=True))) Routings = 5 Num_capsule = 10 Dim_capsule = 32 model.add(Capsule(num_capsule=Num_capsule, dim_capsule=Dim_capsule, routings=Routings, share_weights=True)) model.add(Flatten()) model.add(LSTM(lstm_output_size, dropout_W=0.2, dropout_U=0.2, return_sequences=True)) model.add(LSTM(lstm_output_size, dropout_W=0.2, dropout_U=0.2, return_sequences=True)) model.add(Bidirectional(LSTM(lstm_output_size//2, recurrent_dropout=0.25, return_sequences=False))) #model.add(AttentionM()) model.add(Dropout(0.25)) model.add(Dense(numclasses,activation='softmax'))''' # Optimizer is Adamax along with categorical crossentropy loss model.compile( loss='categorical_crossentropy', optimizer='adamax', metrics=['accuracy'], ) print(model.summary()) history = LossHistory() print('Train...') # Trains model for 50 epochs with shuffling after every epoch for training data and validates on validation data model.fit(X_train, y_train, batch_size=batch_size, shuffle=True, nb_epoch=nb_epoch, validation_data=(X_valid, y_valid), callbacks=[history, es, mc, tb]) history.loss_plot('epoch') return model
def attention_lstm_model(): program = os.path.basename(sys.argv[0]) logger = logging.getLogger(program) logging.basicConfig(format='%(asctime)s: %(levelname)s: %(message)s') logging.root.setLevel(level=logging.INFO) logger.info(r"running %s" % ''.join(sys.argv)) logging.info('loading data...') pickle_file = os.path.join('pickle', 'type_train_val_test2.pickle3') revs, W, word_idx_map, vocab, maxlen = pickle.load(open(pickle_file, 'rb')) logging.info('data loaded!') X_train, X_test, X_dev, y_train, y_dev = make_idx_data(revs, word_idx_map, maxlen=maxlen) n_train_sample = X_train.shape[0] logging.info("n_train_sample [n_train_sample]: %d" % n_train_sample) n_test_sample = X_test.shape[0] logging.info("n_test_sample [n_train_sample]: %d" % n_test_sample) len_sentence = X_train.shape[1] # 200 logging.info("len_sentence [len_sentence]: %d" % len_sentence) max_features = W.shape[0] logging.info("num of word vector [max_features]: %d" % max_features) num_features = W.shape[1] # 400 logging.info("dimension of word vector [num_features]: %d" % num_features) sequence = Input(shape=(maxlen, ), dtype='int32') embedded = Embedding(input_dim=max_features, output_dim=num_features, input_length=maxlen, mask_zero=True, weights=[W], trainable=False)(sequence) # embedded = Embedding(input_dim=max_features, output_dim=num_features, input_length=maxlen, weights=[W], trainable=False) (sequence) embedded = Dropout(0.25)(embedded) enc = Bidirectional( GRU(hidden_dim // 2, recurrent_dropout=0.25, return_sequences=True))(embedded) att = AttentionM()(enc) fc1_dropout = Dropout(0.25)(att) fc1 = Dense(50, activation="relu")(fc1_dropout) fc2_dropout = Dropout(0.25)(fc1) output = Dense(4, activation='softmax')(fc2_dropout) model = Model(inputs=sequence, outputs=output) # checkpointer = ModelCheckpoint(filepath="weights.hdf5", monitor='val_acc', verbose=1, save_best_only=True) # early_stopping = EarlyStopping(monitor="val_loss", patience=10, verbose=1) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc', f1]) # model.fit(X_train, y_train, validation_data=[X_dev, y_dev], batch_size=batch_size, epochs=nb_epoch, verbose=2, # callbacks=[checkpointer, early_stopping]) # y_pred = model.predict(X_dev, batch_size=batch_size) return model
def Attention_Model(config): image_input = Input(shape=(224, 224, 3)) x = keras.layers.BatchNormalization()(image_input) x = Conv2D(64, (4, 4), activation="relu", kernel_regularizer=keras.regularizers.l2(1e-3))(x) x = MaxPooling2D()(x) x = keras.layers.BatchNormalization()(x) x = Conv2D(32, (3, 3), activation="relu")(x) x = Dropout(0.2)(x) x = MaxPooling2D()(x) x = Conv2D(32, (2, 2), activation="relu", kernel_regularizer=keras.regularizers.l2(1e-3))(x) x = MaxPooling2D()(x) x = Conv2D(32, (2, 2), activation="relu")(x) x = MaxPooling2D()(x) x = Conv2D(32, (2, 2), activation='relu')(x) x = MaxPooling2D()(x) x = keras.layers.Conv2D(10, (2, 2), activation='relu', kernel_regularizer=keras.regularizers.l2( config.ker_reg_1))(x) fla = Flatten()(x) image_concat = BatchNormalization()(fla) # image_input = Input(shape=(2048,)) # x = keras.layers.BatchNormalization()(image_input) # x = Dropout(0.5)(x) # image_concat = Dense(300, activation="relu", kernel_regularizer=keras.regularizers.l2(args.ker_reg_1))(x) # image_concat = BatchNormalization()(image_concat) text_input = keras.Input(shape=( config.token_number, config.token_feature_vector, )) # text_input = keras.layers.Masking(mask_value=0.0,input_shape=(config.token_number,config.token_feature_vector,)) # text_bit = BatchNormalization(axis=-1)(text_input) enc = Bidirectional( LSTM(300, dropout=config.rnn_dro_1, return_sequences=True))(text_input) enc = Bidirectional( LSTM(300, dropout=config.rnn_dro_1, return_sequences=True))(enc) enc = LSTM(160, dropout=config.rnn_dro_2, return_sequences=True, kernel_regularizer=keras.regularizers.l2(config.ker_reg_2))(enc) att = AttentionM()(enc) att = BatchNormalization()(att) concat = keras.layers.concatenate([image_concat, att], axis=1) concat_dropout_1 = BatchNormalization()(concat) concat_dropout_1 = Dropout(config.concat_dropout_1)(concat_dropout_1) concat_Dense_1 = Dense(config.concat_1, activation="relu", kernel_regularizer=keras.regularizers.l1( config.ker_reg_3))(concat_dropout_1) concat_dropout_2 = Dropout(config.concat_dropout_2)(concat_Dense_1) dense2 = Dense(64, activation="relu", name="Dense_2")(concat_dropout_2) concat_dropout_3 = Dropout(config.concat_dropout_3)(dense2) dense3 = Dense( 3, activation='softmax', )(concat_dropout_3) model = keras.Model([image_input, text_input], dense3) model.summary() model.compile(optimizer=keras.optimizers.adam(), loss=keras.losses.categorical_crossentropy, metrics=['acc']) return model
def interActive_bilstm_attention(left_pickle, right_pickle, hidden_dim, dropout_rate, capsule_dim): Routings = 3 #更改 Num_capsule = 6 Dim_capsule = capsule_dim left_maxlen, left_max_features, left_num_features, left_W, left_X_train, left_y_train, left_X_dev, left_y_dev, left_test, y_test = get_feature( left_pickle) right_maxlen, right_max_features, right_num_features, right_W, right_X_train, right_y_train, right_X_dev, right_y_dev, right_test, y_test = get_feature( right_pickle) left_sequence = Input(shape=(left_maxlen, ), dtype='int32') left_embedded = Embedding(input_dim=left_max_features, output_dim=left_num_features, input_length=left_maxlen, weights=[left_W], trainable=False)(left_sequence) left_embedded = Dropout(dropout_rate)(left_embedded) # bi-lstm left_embedded = Bidirectional( GRU(hidden_dim, recurrent_dropout=dropout_rate, return_sequences=True))(left_embedded) left_enc = Bidirectional( GRU(hidden_dim, recurrent_dropout=dropout_rate, return_sequences=True))(left_embedded) # left_capsule = Flatten()(left_capsule) right_sequence = Input(shape=(right_maxlen, ), dtype='int32') right_embedded = Embedding(input_dim=right_max_features, output_dim=right_num_features, input_length=right_maxlen, weights=[right_W], trainable=False)(right_sequence) right_embedded = Dropout(dropout_rate)(right_embedded) right_embedded = Bidirectional( GRU(hidden_dim, recurrent_dropout=dropout_rate, return_sequences=True))(right_embedded) right_enc = Bidirectional( GRU(hidden_dim, recurrent_dropout=dropout_rate, return_sequences=True))(right_embedded) # output_capsule = Lambda(lambda x: K.sqrt(K.sum(K.square(x), 2)))(capsule) # right_capsule = Flatten()(right_capsule) #comboVec = Concatenate(axis=1)([left_enc, right_enc]) interActivateVec = interActivate(hidden_dims=hidden_dim)( [left_enc, right_enc]) print("input_size", interActivateVec) tanh_inter_left = Tanh()(interActivateVec) inter_trans = TransMatrix()(interActivateVec) tanh_inter_right = Tanh()(inter_trans) scaledPool_inter_left = MaxPooling1D(pool_size=165)(tanh_inter_left) scaledPool_inter_left = Reshape((165, ))(scaledPool_inter_left) print("scaledPool_inter_left ", scaledPool_inter_left) scaledPool_inter_right = MaxPooling1D(pool_size=165)(tanh_inter_right) scaledPool_inter_right = Reshape((165, ))(scaledPool_inter_right) print("scaledPool_inter_right ", scaledPool_inter_right) softmax_inter_left = Softmax()(scaledPool_inter_left) softmax_inter_right = Softmax()(scaledPool_inter_right) softmax_inter_left = Dot(axes=1)([left_enc, softmax_inter_left]) print("softmax_inter_left", softmax_inter_left, left_enc) softmax_inter_right = Dot(axes=1)([right_enc, softmax_inter_right]) print("softmax_inter_right", softmax_inter_right, right_enc) comboVec = Concatenate(axis=1)([softmax_inter_left, softmax_inter_right]) comboVec = Reshape((-1, 2 * hidden_dim))(comboVec) comboVec_dropout = Dropout(dropout_rate)(comboVec) #print("comboVect: ", comboVec) combo_gru = Bidirectional( LSTM(hidden_dim, dropout=dropout_rate, return_sequences=True))(comboVec_dropout) combo_gru_att = AttentionM()(combo_gru) #combo_gru = Flatten(combo_gru) ''' output1 = Dense(128, activation="relu")(comboVec) output1 = Dropout(0.34)(output1) output2 = Dense(64, activation="relu")(output1) output2 = Dropout(0.25)(output2) output3 = Dense(32, activation="relu")(output2) output3 = Dropout(0.12)(output3) ''' #my2dCapsule = Capsule(routings=Routings,num_capsule=Num_capsule,dim_capsule=Dim_capsule, #kernel_size=input_kernel_size)(comboVec_dropout) #my2dCapsule_dropout = Dropout(dropout_rate)(comboVec_dropout) print("capsule output: ", combo_gru_att) #comboVec_dropout = Flatten()(comboVec_dropout) #bilstm_capsule = Bidirectional(LSTM(hidden_dim,recurrent_dropout=0.34,return_sequences=True))(my2dCapsule) #bilstm_capsule = Bidirectional(LSTM(hidden_dim,recurrent_dropout=0.34, return_sequences=True))(bilstm_capsule) #attentioned_capsule = AttentionM()(bilstm_capsule) #output_capsule = Lambda(lambda x: K.sqrt(K.sum(K.square(x), 2)))(my2dCapsule_dropout) #my2dCapsule = Flatten()(my2dCapsule) output = Dense(6, activation="softmax")(combo_gru_att) print("output: ", output) model = Model(inputs=[left_sequence, right_sequence], outputs=output) return model, left_X_train, left_y_train, left_X_dev, left_y_dev, left_test, right_X_train, right_y_train, right_X_dev, \ right_y_dev, right_test, y_test
def Attention_model(config): K.set_image_data_format('channels_last') image_input = Input(shape=(224, 224, 3), dtype='float32') # x = Conv2D(64, (5, 5), activation='relu')(image_input) K.set_image_data_format('channels_last') x = BatchNormalization(axis=1)(x) x = MaxPooling2D()(x) x = Conv2D(32, (5, 5), activation='relu', kernel_regularizer=keras.regularizers.l2(1e-3))(x) x = Dropout(0.2)(x) x = MaxPooling2D()(x) x = Conv2D(32, (4, 4), activation='relu')(x) x = MaxPooling2D()(x) x = BatchNormalization(axis=1)(x) x = Conv2D(16, (3, 3), activation='relu')(x) x = MaxPooling2D()(x) x = Conv2D(6, (3, 3), activation='relu', kernel_regularizer=keras.regularizers.l2(args.ker_reg_1))(x) fla = Flatten()(x) image_concat = BatchNormalization()(fla) # image_input = Input(shape=(2048,)) # x = keras.layers.BatchNormalization()(image_input) # x = Dropout(0.5)(x) # image_concat = Dense(300, activation="relu", kernel_regularizer=keras.regularizers.l2(args.ker_reg_1))(x) # image_concat = BatchNormalization()(image_concat) text_input = keras.Input(shape=(config.token_number, config.token_feature_vector)) text_bit = BatchNormalization(axis=-1)(text_input) enc = Bidirectional( LSTM(300, dropout=args.rnn_dro_1, return_sequences=True))(text_bit) enc = Bidirectional( LSTM(300, dropout=args.rnn_dro_1, return_sequences=True))(text_bit) enc = LSTM(300, dropout=args.rnn_dro_2, return_sequences=True, kernel_regularizer=keras.regularizers.l2(args.ker_reg_2))(enc) att = AttentionM()(enc) att = BatchNormalization()(att) concat = keras.layers.concatenate([image_concat, att], axis=1) concat_dropout_1 = BatchNormalization()(concat) concat_dropout_1 = Dropout(args.concat_dropout_1)(concat_dropout_1) concat_Dense_1 = Dense(args.concat_1, activation="relu", kernel_regularizer=keras.regularizers.l1( args.ker_reg_3))(concat_dropout_1) concat_dropout_2 = Dropout(args.concat_dropout_2)(concat_Dense_1) bitch_2 = keras.layers.BatchNormalization()(concat_dropout_2) dense2 = Dense(32, activation="relu")(bitch_2) dense3 = Dense(32, activation="relu")(bitch_2) dense4 = Dense(32, activation="relu")(bitch_2) dense5 = Dense(64, activation="relu")(bitch_2) Humour_Dense = Dense(1, activation='sigmoid', name='Humour')(dense2) Sarcasm_Dense = Dense( 1, activation='sigmoid', name='Sarcasm', )(dense3) Offensive_Dense = Dense( 1, activation='sigmoid', name='Offensive', )(dense4) motivational_Dense = Dense( 1, activation='sigmoid', name='motivational', )(dense5) model = keras.Model( [image_input, text_input], [Humour_Dense, Sarcasm_Dense, Offensive_Dense, motivational_Dense]) model.summary() model.compile(optimizer=keras.optimizers.adam(), loss=keras.losses.binary_crossentropy, metrics=['acc']) return model