def load_data(self, ): print('Loading data') self.data = Dataloader(mode=self.classification_mode) if self.modality == "text": self.data.load_text_data() elif self.modality == "audio": self.data.load_audio_data() elif self.modality == "bimodal": self.data.load_bimodal_data() else: exit() self.train_x = self.data.train_dialogue_features self.val_x = self.data.val_dialogue_features self.test_x = self.data.test_dialogue_features self.train_y = self.data.train_dialogue_label self.val_y = self.data.val_dialogue_label self.test_y = self.data.test_dialogue_label self.train_mask = self.data.train_mask self.val_mask = self.data.val_mask self.test_mask = self.data.test_mask self.train_id = self.data.train_dialogue_ids.keys() self.val_id = self.data.val_dialogue_ids.keys() self.test_id = self.data.test_dialogue_ids.keys() self.sequence_length = self.train_x.shape[1] self.classes = self.train_y.shape[2]
class bc_LSTM: def __init__(self): self.classification_mode = "Emotion" self.modality = "audio" self.PATH = "./data/models/{}_weights_{}.hdf5".format( "audio", self.classification_mode.lower()) self.OUTPUT_PATH = "./data/pickles/{}_{}.pkl".format( "audio", self.classification_mode.lower()) print("Model initiated for {} classification".format( self.classification_mode)) def load_data(self, ): print('Loading data') self.data = Dataloader(mode=self.classification_mode) if self.modality == "text": self.data.load_text_data() elif self.modality == "audio": self.data.load_audio_data() elif self.modality == "bimodal": self.data.load_bimodal_data() else: exit() self.train_x = self.data.train_dialogue_features self.val_x = self.data.val_dialogue_features self.test_x = self.data.test_dialogue_features self.train_y = self.data.train_dialogue_label self.val_y = self.data.val_dialogue_label self.test_y = self.data.test_dialogue_label self.train_mask = self.data.train_mask self.val_mask = self.data.val_mask self.test_mask = self.data.test_mask self.train_id = self.data.train_dialogue_ids.keys() self.val_id = self.data.val_dialogue_ids.keys() self.test_id = self.data.test_dialogue_ids.keys() self.sequence_length = self.train_x.shape[1] self.classes = self.train_y.shape[2] def calc_test_result(self, pred_label, test_label, test_mask): true_label = [] predicted_label = [] for i in range(pred_label.shape[0]): for j in range(pred_label.shape[1]): if test_mask[i, j] == 1: true_label.append(np.argmax(test_label[i, j])) predicted_label.append(np.argmax(pred_label[i, j])) print("Confusion Matrix :") print(confusion_matrix(true_label, predicted_label)) print("Classification Report :") print(classification_report(true_label, predicted_label, digits=4)) print( 'Weighted FScore: \n ', precision_recall_fscore_support(true_label, predicted_label, average='weighted')) def get_audio_model(self): # Modality specific hyperparameters self.epochs = 100 self.batch_size = 50 # Modality specific parameters self.embedding_dim = self.train_x.shape[2] print("Creating Model...") inputs = Input(shape=(self.sequence_length, self.embedding_dim), dtype='float32') masked = Masking(mask_value=0)(inputs) lstm = Bidirectional( LSTM(300, activation='tanh', return_sequences=True, dropout=0.4))(masked) lstm = Bidirectional(LSTM(300, activation='tanh', return_sequences=True, dropout=0.4), name="utter")(lstm) output = TimeDistributed(Dense(self.classes, activation='softmax'))(lstm) model = Model(inputs, output) return model def get_text_model(self): # Modality specific hyperparameters self.epochs = 100 self.batch_size = 50 # Modality specific parameters self.embedding_dim = self.data.W.shape[1] # For text model self.vocabulary_size = self.data.W.shape[0] self.filter_sizes = [3, 4, 5] self.num_filters = 512 print("Creating Model...") sentence_length = self.train_x.shape[2] # Initializing sentence representation layers embedding = Embedding(input_dim=self.vocabulary_size, output_dim=self.embedding_dim, weights=[self.data.W], input_length=sentence_length, trainable=False) conv_0 = Conv2D(self.num_filters, kernel_size=(self.filter_sizes[0], self.embedding_dim), padding='valid', kernel_initializer='normal', activation='relu') conv_1 = Conv2D(self.num_filters, kernel_size=(self.filter_sizes[1], self.embedding_dim), padding='valid', kernel_initializer='normal', activation='relu') conv_2 = Conv2D(self.num_filters, kernel_size=(self.filter_sizes[2], self.embedding_dim), padding='valid', kernel_initializer='normal', activation='relu') maxpool_0 = MaxPool2D(pool_size=(sentence_length - self.filter_sizes[0] + 1, 1), strides=(1, 1), padding='valid') maxpool_1 = MaxPool2D(pool_size=(sentence_length - self.filter_sizes[1] + 1, 1), strides=(1, 1), padding='valid') maxpool_2 = MaxPool2D(pool_size=(sentence_length - self.filter_sizes[2] + 1, 1), strides=(1, 1), padding='valid') dense_func = Dense(100, activation='tanh', name="dense") dense_final = Dense(units=self.classes, activation='softmax') reshape_func = Reshape((sentence_length, self.embedding_dim, 1)) def slicer(x, index): return x[:, K.constant(index, dtype='int32'), :] def slicer_output_shape(input_shape): shape = list(input_shape) assert len(shape) == 3 # batch, seq_len, sent_len new_shape = (shape[0], shape[2]) return new_shape def reshaper(x): return K.expand_dims(x, axis=3) def flattener(x): x = K.reshape(x, [-1, x.shape[1] * x.shape[3]]) return x def flattener_output_shape(input_shape): shape = list(input_shape) new_shape = (shape[0], 3 * shape[3]) return new_shape inputs = Input(shape=(self.sequence_length, sentence_length), dtype='int32') cnn_output = [] for ind in range(self.sequence_length): local_input = Lambda(slicer, output_shape=slicer_output_shape, arguments={"index": ind })(inputs) # Batch, word_indices #cnn-sent emb_output = embedding(local_input) reshape = Lambda(reshaper)(emb_output) concatenated_tensor = Concatenate(axis=1)([ maxpool_0(conv_0(reshape)), maxpool_1(conv_1(reshape)), maxpool_2(conv_2(reshape)) ]) flatten = Lambda( flattener, output_shape=flattener_output_shape, )(concatenated_tensor) dense_output = dense_func(flatten) dropout = Dropout(0.5)(dense_output) cnn_output.append(dropout) def stack(x): return K.stack(x, axis=1) cnn_outputs = Lambda(stack)(cnn_output) masked = Masking(mask_value=0)(cnn_outputs) lstm = Bidirectional( LSTM(300, activation='relu', return_sequences=True, dropout=0.3))(masked) lstm = Bidirectional(LSTM(300, activation='relu', return_sequences=True, dropout=0.3), name="utter")(lstm) output = TimeDistributed(Dense(self.classes, activation='softmax'))(lstm) model = Model(inputs, output) return model def get_bimodal_model(self): # Modality specific hyperparameters self.epochs = 100 self.batch_size = 10 # Modality specific parameters self.embedding_dim = self.train_x.shape[2] print("Creating Model...") inputs = Input(shape=(self.sequence_length, self.embedding_dim), dtype='float32') masked = Masking(mask_value=0)(inputs) lstm = Bidirectional(LSTM(300, activation='tanh', return_sequences=True, dropout=0.4), name="utter")(masked) output = TimeDistributed(Dense(self.classes, activation='softmax'))(lstm) model = Model(inputs, output) return model def train_model(self): checkpoint = ModelCheckpoint(self.PATH, monitor='val_loss', verbose=1, save_best_only=True, mode='auto') if self.modality == "audio": model = self.get_audio_model() model.compile(optimizer='adadelta', loss='categorical_crossentropy', sample_weight_mode='temporal') elif self.modality == "text": model = self.get_text_model() model.compile(optimizer='adadelta', loss='categorical_crossentropy', sample_weight_mode='temporal') elif self.modality == "bimodal": model = self.get_bimodal_model() model.compile(optimizer='adam', loss='categorical_crossentropy', sample_weight_mode='temporal') early_stopping = EarlyStopping(monitor='val_loss', patience=10) model.fit(self.train_x, self.train_y, epochs=self.epochs, batch_size=self.batch_size, sample_weight=self.train_mask, shuffle=True, callbacks=[early_stopping, checkpoint], validation_data=(self.val_x, self.val_y, self.val_mask)) self.test_model() def test_model(self): model = load_model(self.PATH) intermediate_layer_model = Model( input=model.input, output=model.get_layer("utter").output) intermediate_output_train = intermediate_layer_model.predict( self.train_x) intermediate_output_val = intermediate_layer_model.predict(self.val_x) intermediate_output_test = intermediate_layer_model.predict( self.test_x) train_emb, val_emb, test_emb = {}, {}, {} for idx, ID in enumerate(self.train_id): train_emb[ID] = intermediate_output_train[idx] for idx, ID in enumerate(self.val_id): val_emb[ID] = intermediate_output_val[idx] for idx, ID in enumerate(self.test_id): test_emb[ID] = intermediate_output_test[idx] pickle.dump([train_emb, val_emb, test_emb], open(self.OUTPUT_PATH, "wb")) self.calc_test_result(model.predict(self.test_x), self.test_y, self.test_mask)