Exemple #1
0
    def load_data(self, ):

        print('Loading data')
        self.data = Dataloader(mode=self.classification_mode)

        if self.modality == "text":
            self.data.load_text_data()
        elif self.modality == "audio":
            self.data.load_audio_data()
        elif self.modality == "bimodal":
            self.data.load_bimodal_data()
        else:
            exit()

        self.train_x = self.data.train_dialogue_features
        self.val_x = self.data.val_dialogue_features
        self.test_x = self.data.test_dialogue_features

        self.train_y = self.data.train_dialogue_label
        self.val_y = self.data.val_dialogue_label
        self.test_y = self.data.test_dialogue_label

        self.train_mask = self.data.train_mask
        self.val_mask = self.data.val_mask
        self.test_mask = self.data.test_mask

        self.train_id = self.data.train_dialogue_ids.keys()
        self.val_id = self.data.val_dialogue_ids.keys()
        self.test_id = self.data.test_dialogue_ids.keys()

        self.sequence_length = self.train_x.shape[1]

        self.classes = self.train_y.shape[2]
Exemple #2
0
class bc_LSTM:
    def __init__(self):
        self.classification_mode = "Emotion"
        self.modality = "audio"
        self.PATH = "./data/models/{}_weights_{}.hdf5".format(
            "audio", self.classification_mode.lower())
        self.OUTPUT_PATH = "./data/pickles/{}_{}.pkl".format(
            "audio", self.classification_mode.lower())
        print("Model initiated for {} classification".format(
            self.classification_mode))

    def load_data(self, ):

        print('Loading data')
        self.data = Dataloader(mode=self.classification_mode)

        if self.modality == "text":
            self.data.load_text_data()
        elif self.modality == "audio":
            self.data.load_audio_data()
        elif self.modality == "bimodal":
            self.data.load_bimodal_data()
        else:
            exit()

        self.train_x = self.data.train_dialogue_features
        self.val_x = self.data.val_dialogue_features
        self.test_x = self.data.test_dialogue_features

        self.train_y = self.data.train_dialogue_label
        self.val_y = self.data.val_dialogue_label
        self.test_y = self.data.test_dialogue_label

        self.train_mask = self.data.train_mask
        self.val_mask = self.data.val_mask
        self.test_mask = self.data.test_mask

        self.train_id = self.data.train_dialogue_ids.keys()
        self.val_id = self.data.val_dialogue_ids.keys()
        self.test_id = self.data.test_dialogue_ids.keys()

        self.sequence_length = self.train_x.shape[1]

        self.classes = self.train_y.shape[2]

    def calc_test_result(self, pred_label, test_label, test_mask):

        true_label = []
        predicted_label = []

        for i in range(pred_label.shape[0]):
            for j in range(pred_label.shape[1]):
                if test_mask[i, j] == 1:
                    true_label.append(np.argmax(test_label[i, j]))
                    predicted_label.append(np.argmax(pred_label[i, j]))
        print("Confusion Matrix :")
        print(confusion_matrix(true_label, predicted_label))
        print("Classification Report :")
        print(classification_report(true_label, predicted_label, digits=4))
        print(
            'Weighted FScore: \n ',
            precision_recall_fscore_support(true_label,
                                            predicted_label,
                                            average='weighted'))

    def get_audio_model(self):

        # Modality specific hyperparameters
        self.epochs = 100
        self.batch_size = 50

        # Modality specific parameters
        self.embedding_dim = self.train_x.shape[2]

        print("Creating Model...")

        inputs = Input(shape=(self.sequence_length, self.embedding_dim),
                       dtype='float32')
        masked = Masking(mask_value=0)(inputs)
        lstm = Bidirectional(
            LSTM(300, activation='tanh', return_sequences=True,
                 dropout=0.4))(masked)
        lstm = Bidirectional(LSTM(300,
                                  activation='tanh',
                                  return_sequences=True,
                                  dropout=0.4),
                             name="utter")(lstm)
        output = TimeDistributed(Dense(self.classes,
                                       activation='softmax'))(lstm)

        model = Model(inputs, output)
        return model

    def get_text_model(self):

        # Modality specific hyperparameters
        self.epochs = 100
        self.batch_size = 50

        # Modality specific parameters
        self.embedding_dim = self.data.W.shape[1]

        # For text model
        self.vocabulary_size = self.data.W.shape[0]
        self.filter_sizes = [3, 4, 5]
        self.num_filters = 512

        print("Creating Model...")

        sentence_length = self.train_x.shape[2]

        # Initializing sentence representation layers
        embedding = Embedding(input_dim=self.vocabulary_size,
                              output_dim=self.embedding_dim,
                              weights=[self.data.W],
                              input_length=sentence_length,
                              trainable=False)
        conv_0 = Conv2D(self.num_filters,
                        kernel_size=(self.filter_sizes[0], self.embedding_dim),
                        padding='valid',
                        kernel_initializer='normal',
                        activation='relu')
        conv_1 = Conv2D(self.num_filters,
                        kernel_size=(self.filter_sizes[1], self.embedding_dim),
                        padding='valid',
                        kernel_initializer='normal',
                        activation='relu')
        conv_2 = Conv2D(self.num_filters,
                        kernel_size=(self.filter_sizes[2], self.embedding_dim),
                        padding='valid',
                        kernel_initializer='normal',
                        activation='relu')
        maxpool_0 = MaxPool2D(pool_size=(sentence_length -
                                         self.filter_sizes[0] + 1, 1),
                              strides=(1, 1),
                              padding='valid')
        maxpool_1 = MaxPool2D(pool_size=(sentence_length -
                                         self.filter_sizes[1] + 1, 1),
                              strides=(1, 1),
                              padding='valid')
        maxpool_2 = MaxPool2D(pool_size=(sentence_length -
                                         self.filter_sizes[2] + 1, 1),
                              strides=(1, 1),
                              padding='valid')
        dense_func = Dense(100, activation='tanh', name="dense")
        dense_final = Dense(units=self.classes, activation='softmax')
        reshape_func = Reshape((sentence_length, self.embedding_dim, 1))

        def slicer(x, index):
            return x[:, K.constant(index, dtype='int32'), :]

        def slicer_output_shape(input_shape):
            shape = list(input_shape)
            assert len(shape) == 3  # batch, seq_len, sent_len
            new_shape = (shape[0], shape[2])
            return new_shape

        def reshaper(x):
            return K.expand_dims(x, axis=3)

        def flattener(x):
            x = K.reshape(x, [-1, x.shape[1] * x.shape[3]])
            return x

        def flattener_output_shape(input_shape):
            shape = list(input_shape)
            new_shape = (shape[0], 3 * shape[3])
            return new_shape

        inputs = Input(shape=(self.sequence_length, sentence_length),
                       dtype='int32')
        cnn_output = []
        for ind in range(self.sequence_length):

            local_input = Lambda(slicer,
                                 output_shape=slicer_output_shape,
                                 arguments={"index": ind
                                            })(inputs)  # Batch, word_indices

            #cnn-sent
            emb_output = embedding(local_input)
            reshape = Lambda(reshaper)(emb_output)
            concatenated_tensor = Concatenate(axis=1)([
                maxpool_0(conv_0(reshape)),
                maxpool_1(conv_1(reshape)),
                maxpool_2(conv_2(reshape))
            ])
            flatten = Lambda(
                flattener,
                output_shape=flattener_output_shape,
            )(concatenated_tensor)
            dense_output = dense_func(flatten)
            dropout = Dropout(0.5)(dense_output)
            cnn_output.append(dropout)

        def stack(x):
            return K.stack(x, axis=1)

        cnn_outputs = Lambda(stack)(cnn_output)

        masked = Masking(mask_value=0)(cnn_outputs)
        lstm = Bidirectional(
            LSTM(300, activation='relu', return_sequences=True,
                 dropout=0.3))(masked)
        lstm = Bidirectional(LSTM(300,
                                  activation='relu',
                                  return_sequences=True,
                                  dropout=0.3),
                             name="utter")(lstm)
        output = TimeDistributed(Dense(self.classes,
                                       activation='softmax'))(lstm)

        model = Model(inputs, output)
        return model

    def get_bimodal_model(self):

        # Modality specific hyperparameters
        self.epochs = 100
        self.batch_size = 10

        # Modality specific parameters
        self.embedding_dim = self.train_x.shape[2]

        print("Creating Model...")

        inputs = Input(shape=(self.sequence_length, self.embedding_dim),
                       dtype='float32')
        masked = Masking(mask_value=0)(inputs)
        lstm = Bidirectional(LSTM(300,
                                  activation='tanh',
                                  return_sequences=True,
                                  dropout=0.4),
                             name="utter")(masked)
        output = TimeDistributed(Dense(self.classes,
                                       activation='softmax'))(lstm)

        model = Model(inputs, output)
        return model

    def train_model(self):

        checkpoint = ModelCheckpoint(self.PATH,
                                     monitor='val_loss',
                                     verbose=1,
                                     save_best_only=True,
                                     mode='auto')

        if self.modality == "audio":
            model = self.get_audio_model()
            model.compile(optimizer='adadelta',
                          loss='categorical_crossentropy',
                          sample_weight_mode='temporal')
        elif self.modality == "text":
            model = self.get_text_model()
            model.compile(optimizer='adadelta',
                          loss='categorical_crossentropy',
                          sample_weight_mode='temporal')
        elif self.modality == "bimodal":
            model = self.get_bimodal_model()
            model.compile(optimizer='adam',
                          loss='categorical_crossentropy',
                          sample_weight_mode='temporal')

        early_stopping = EarlyStopping(monitor='val_loss', patience=10)
        model.fit(self.train_x,
                  self.train_y,
                  epochs=self.epochs,
                  batch_size=self.batch_size,
                  sample_weight=self.train_mask,
                  shuffle=True,
                  callbacks=[early_stopping, checkpoint],
                  validation_data=(self.val_x, self.val_y, self.val_mask))

        self.test_model()

    def test_model(self):

        model = load_model(self.PATH)
        intermediate_layer_model = Model(
            input=model.input, output=model.get_layer("utter").output)

        intermediate_output_train = intermediate_layer_model.predict(
            self.train_x)
        intermediate_output_val = intermediate_layer_model.predict(self.val_x)
        intermediate_output_test = intermediate_layer_model.predict(
            self.test_x)

        train_emb, val_emb, test_emb = {}, {}, {}
        for idx, ID in enumerate(self.train_id):
            train_emb[ID] = intermediate_output_train[idx]
        for idx, ID in enumerate(self.val_id):
            val_emb[ID] = intermediate_output_val[idx]
        for idx, ID in enumerate(self.test_id):
            test_emb[ID] = intermediate_output_test[idx]
        pickle.dump([train_emb, val_emb, test_emb],
                    open(self.OUTPUT_PATH, "wb"))

        self.calc_test_result(model.predict(self.test_x), self.test_y,
                              self.test_mask)