def main():
    configs = ["IS09", "IS10", "IS11", "IS12", "ComParE", "GeMAPS", "eGeMAPS"]
    database = "IEMOCAP"
    emotionsTest = ["Neutral", "Happiness", "Sadness", "Anger"]
    for config in configs:
        print("config: " + config)
        features = pd.read_csv("features/{}_{}.csv".format(database, config))
        features.set_index(["speaker", "emotion", "actType", "name"],
                           inplace=True)
        cm = np.zeros((len(emotionsTest), len(emotionsTest)), dtype=int)
        nSpeaker = len(features.index.unique('speaker'))
        sps_val = [0, 0, 2, 2, 4, 4, 6, 6, 8, 8]
        for sp in range(nSpeaker):
            print("Testing speaker {}".format(sp))
            (x_train_scaled, y_train,
             x_val_scaled, y_val,
             x_test_scaled, y_test) = preprocess(sp, sps_val[sp],
                                                 features, emotionsTest)
            modelfile = "models/SVM/{}_{}.sav".format(config, sp)
            svm = pickle.load(open(modelfile, "rb"))
            y_p = svm.predict(x_test_scaled)
            cm += confusion_matrix(y_test, y_p)
        wa, ua = waua(cm)
        cmp = cm / np.reshape(np.sum(cm, 1), (4, 1))
        imageName = "results/SVM/Test_{}.png".format(config)
        wa, ua = np.around(waua(cm), decimals=4)
        title = "wa={}, ua={}".format(wa, ua)
        plot_wauacm(title, cmp, emotionsTest, imageName)
        print("wa: " + str(wa) + ", ua: " + str(ua))
        print(cmp)
def main():
    if socket.getfqdn(socket.gethostname()) == "d8":
        os.environ["CUDA_VISIBLE_DEVICES"] = "3"
    batch_size = 128
    configs = ["IS09", "IS10", "IS11", "IS12", "ComParE", "GeMAPS", "eGeMAPS"]
    # IEMOCAP imp NHSA
    emotionsTest = ["Neutral", "Happiness", "Sadness", "Anger"]
    database = "IEMOCAP"
    for config in configs:
        # Load features
        features = pd.read_csv("features/{}_{}.csv".format(database, config))
        features.set_index(["speaker", "emotion", "actType", "name"],
                           inplace=True)
        nSpeaker = len(features.index.unique('speaker'))
        cmV = np.zeros((len(emotionsTest), len(emotionsTest)), dtype=int)
        cmT = np.zeros((len(emotionsTest), len(emotionsTest)), dtype=int)
        for sp in range(nSpeaker):
            (x_train_scaled, y_train_C,
             x_val_scaled, y_val_C,
             x_test_scaled, y_test_C) = preprocess(sp, sp-1,
                                                   features,
                                                   emotionsTest)
            num_classes = len(emotionsTest)
            model = load_model("models/DNN/{}_{}.h5".format(config, sp))
            y_val_p = model.predict(x_val_scaled,
                                    batch_size=batch_size,
                                    verbose=1)
            y_val_p_C = [emoC.argmax() for emoC in y_val_p]
            cmV += confusion_matrix(y_val_C,
                                    y_val_p_C,
                                    labels=list(range(num_classes)))
            y_test_p = model.predict(x_test_scaled,
                                     batch_size=batch_size,
                                     verbose=1)
            cmT += confusion_matrix(y_test_C,
                                    [emoC.argmax() for emoC in y_test_p],
                                    labels=list(range(num_classes)))
            del model
            K.clear_session()
            print("speaker: " + str(sp))
            print(cmV)
            print(cmT)
        waV, uaV = np.around(waua(cmV), decimals=4)
        waT, uaT = np.around(waua(cmT), decimals=4)
        cmpV = cmV / np.reshape(np.sum(cmV, 1), (4, 1))
        cmpT = cmT / np.reshape(np.sum(cmT, 1), (4, 1))
        imageName = "results/DNN/Test_{}_V.png".format(config)
        title = "wa={}, ua={}".format(waV, uaV)
        plot_wauacm(title, cmpV, emotionsTest, imageName)
        imageName = "results/DNN/Test_{}_T.png".format(config)
        title = "wa={}, ua={}".format(waT, uaT)
        plot_wauacm(title, cmpT, emotionsTest, imageName)
        print("waV: " + str(waV) + ", uaV: " + str(uaV))
        print(cmpV)
        print("waT: " + str(waT) + ", uaT: " + str(uaT))
        print(cmpT)
Example #3
0
def main():
    dataPath = "../../../Database/IEMOCAP_full_release/"
    # hyperparams
    tMax = 300
    fMax = 400
    batch_size = 64 * 2
    # IEMOCAP
    database = "IEMOCAP"
    emotionsTest = ["Neutral", "Happiness", "Sadness", "Anger"]
    num_classes = len(emotionsTest)
    actTypeToUse = ["impro"]
    emoTest = "".join([emo[0] for emo in actTypeToUse+emotionsTest])
    dataname = "{}_{}".format(database, emoTest)
    dataDf = utils.load_IEMOCAP(dataPath, actTypeToUse, emotionsTest)
    # confusion matrix
    cmV = np.zeros((num_classes, num_classes), dtype=int)
    cmT = np.zeros((num_classes, num_classes), dtype=int)
    for sp_test in range(10):
        print("test speaker:", sp_test)
        (x_train, y_train, y_train_raw,
         x_val, y_val, y_val_raw,
         x_test, y_test, y_test_raw) = utils.Preprocess(sp_test, emotionsTest,
                                                        dataDf, tMax, fMax)
        model = load_model("models/CNN/{}.h5".format(sp_test))
        y_val_p = model.predict(x_val,
                                batch_size=batch_size,
                                verbose=1)
        y_val_p_C = utils.y_raw_transform(y_val_p, y_val_raw)
        cmV += confusion_matrix(y_val_raw[:, 0], y_val_p_C)
        y_test_p = model.predict(x_test,
                                 batch_size=batch_size,
                                 verbose=1)
        y_test_p_C = utils.y_raw_transform(y_test_p, y_test_raw)
        cmT += confusion_matrix(y_test_raw[:, 0], y_test_p_C)
        print("speaker: " + str(sp_test))
        print(cmV)
        print(cmT)
        del model
        keras.backend.clear_session()
    # Evaluation
    waV, uaV = np.around(utils.waua(cmV), decimals=4)
    waT, uaT = np.around(utils.waua(cmT), decimals=4)
    cmpV = cmV / np.reshape(np.sum(cmV, 1), (4, 1))
    cmpT = cmT / np.reshape(np.sum(cmT, 1), (4, 1))
    imageName = "results/CNN/Test_{}_CNN_V.png".format(dataname)
    title = "wa={}, ua={}".format(waV, uaV)
    utils.plot_wauacm(title, cmpV, emotionsTest, imageName)
    imageName = "results/CNN/Test_{}_CNN_T.png".format(dataname)
    title = "wa={}, ua={}".format(waT, uaT)
    utils.plot_wauacm(title, cmpT, emotionsTest, imageName)
    print("waV: " + str(waV) + ", uaV: " + str(uaV))
    print(cmpV)
    print("waT: " + str(waT) + ", uaT: " + str(uaT))
    print(cmpT)
def main():
    if not os.path.exists("results"):
        os.mkdir("results")
    if not os.path.exists("models"):
        os.mkdir("models")
    specPath = "spectrograms/"
    if socket.getfqdn(socket.gethostname()) == "d8":
        os.environ["CUDA_VISIBLE_DEVICES"] = "2,3"
        batch_size = 64 * 2
    else:
        batch_size = 64
    # parameters
    t_uplim = 200  # 2 s
    f_uplim = 400  # 4 kHz
    t_shift_test = 40  # 0.4 s

    # load the DataFrame of IEMOCAP
    dataDf = pd.read_csv(specPath + "IEMOCAP.csv", index_col=0)
    speakers = list(dataDf.speaker.unique())
    emotionsTest = list(dataDf.emotion.unique())
    num_classes = len(emotionsTest)

    # cross-validation
    cmTest = np.zeros((num_classes, num_classes))
    # %%
    for sp_test in range(len(speakers)):
        speakers_train = speakers.copy()
        speaker_test = speakers_train.pop(sp_test)
        print("Test speaker: {}".format(speaker_test))

        # preprocessing
        # test set
        (x_test, y_test, y_test_raw) = utils.prepro(specPath,
                                                    dataDf,
                                                    speaker_test,
                                                    emotionsTest,
                                                    "test",
                                                    t_shift_test,
                                                    f_uplim,
                                                    t_uplim)
        # class weight
        K.clear_session()
        # %% test
        model = load_model("models/Inception_{}.h5".format(sp_test))
        y_test_p = model.predict(x_test,
                                 batch_size=batch_size,
                                 verbose=1)
        cmTest += utils.y_raw_evaluate(y_test_p, y_test_raw)
        del model

    # %% print results
    waTest, uaTest = utils.waua(cmTest)
    # test set
    print("Results on test set: wa={}, ua={}".format(waTest, uaTest))
    print(cmTest)
    title = "wa={}, ua={}".format(waTest, uaTest)
    imageName = "results/Inception_Test_T.png"
    cmTestP = cmTest / np.reshape(np.sum(cmTest, 1), (4, 1))
    utils.plot_wauacm(title, cmTestP, emotionsTest, imageName, ".4f")
Example #5
0
def SVM_Train(config, emotionsTest, features, params):
    C, gamma = params
    cm = np.zeros((len(emotionsTest), len(emotionsTest)), dtype=int)
    nSpeaker = len(features.index.unique('speaker'))
    sps_val = [0, 0, 2, 2, 4, 4, 6, 6, 8, 8]
    for sp in range(nSpeaker):
        print("Training with test speaker {}".format(sp))
        (x_train_scaled, y_train, x_val_scaled, y_val, x_test_scaled,
         y_test) = preprocess(sp, sps_val[sp], features, emotionsTest)
        svm = SVC(class_weight="balanced", C=C, gamma=gamma)
        svm.fit(x_train_scaled, y_train)
        y_p = svm.predict(x_test_scaled)
        cm += confusion_matrix(y_test, y_p)
        modelfile = "models/SVM/{}_{}.sav".format(config, sp)
        pickle.dump(svm, open(modelfile, "wb"))
    wa, ua = waua(cm)
    cmp = cm / np.reshape(np.sum(cm, 1), (4, 1))
    imageName = "results/SVM/Train_{}.png".format(config)
    wa, ua = np.around(waua(cm), decimals=4)
    title = "C={}, gamma={}, wa={}, ua={}".format(C, gamma, wa, ua)
    plot_wauacm(title, cmp, emotionsTest, imageName)
    print("wa: " + str(wa) + ", ua: " + str(ua))
    print(cmp)
Example #6
0
def main():
    dataPath = "../../../Database/IEMOCAP_full_release/"
    if not os.path.exists("results"):
        os.mkdir("results")
    if not os.path.exists("results/CNN"):
        os.mkdir("results/CNN")
    if not os.path.exists("models"):
        os.mkdir("models")
    if not os.path.exists("models/CNN"):
        os.mkdir("models/CNN")
    if socket.getfqdn(socket.gethostname()) == "d8":
        os.environ["CUDA_VISIBLE_DEVICES"] = "2,3"
    # hyperparams
    dr = 0.3
    lr = 0.006
    tMax = 300
    fMax = 400
    input_shape = (tMax, fMax, 1)
    batch_size = 64 * 2
    # IEMOCAP
    database = "IEMOCAP"
    emotionsTest = ["Neutral", "Happiness", "Sadness", "Anger"]
    num_classes = len(emotionsTest)
    actTypeToUse = ["impro"]
    emoTest = "".join([emo[0] for emo in actTypeToUse + emotionsTest])
    dataname = "{}_{}".format(database, emoTest)
    dataDf = utils.load_IEMOCAP(dataPath, actTypeToUse, emotionsTest)
    # confusion matrix
    cmV = np.zeros((num_classes, num_classes), dtype=int)
    cmT = np.zeros((num_classes, num_classes), dtype=int)
    for sp_test in range(10):
        print("test speaker:", sp_test)
        (x_train, y_train, y_train_raw, x_val, y_val, y_val_raw, x_test,
         y_test, y_test_raw) = utils.Preprocess(sp_test, emotionsTest, dataDf,
                                                tMax, fMax)
        # set training parameters
        mc_cb = keras.callbacks.ModelCheckpoint('BestModel_CNN.h5',
                                                monitor='val_acc',
                                                verbose=1,
                                                save_best_only=True,
                                                save_weights_only=True)
        mc_es = keras.callbacks.EarlyStopping(monitor="acc",
                                              patience=10,
                                              verbose=1)
        # class weight
        cw = class_weight.compute_class_weight('balanced',
                                               np.unique(y_train_raw[:, 0]),
                                               y_train_raw[:, 0])
        cw2 = {0: cw[0], 1: cw[1], 2: cw[2], 3: cw[3]}
        model = cnn_model(num_classes=num_classes,
                          input_shape=input_shape,
                          dr=dr,
                          lr=lr)
        epochs = 16
        model.fit(x_train,
                  y_train,
                  class_weight=cw2,
                  batch_size=batch_size,
                  epochs=epochs,
                  verbose=1,
                  validation_data=(x_val, y_val))
        epochs = 50
        model.fit(x_train,
                  y_train,
                  class_weight=cw2,
                  batch_size=batch_size,
                  epochs=epochs,
                  verbose=1,
                  callbacks=[mc_cb, mc_es],
                  validation_data=(x_val, y_val))
        del mc_cb
        model.load_weights("BestModel_CNN.h5")
        y_val_p = model.predict(x_val, batch_size=batch_size, verbose=1)
        y_val_p_C = utils.y_raw_transform(y_val_p, y_val_raw)
        cmV += confusion_matrix(y_val_raw[:, 0], y_val_p_C)
        y_test_p = model.predict(x_test, batch_size=batch_size, verbose=1)
        y_test_p_C = utils.y_raw_transform(y_test_p, y_test_raw)
        cmT += confusion_matrix(y_test_raw[:, 0], y_test_p_C)
        print("speaker: " + str(sp_test))
        print(cmV)
        print(cmT)
        model.save("models/CNN/{}.h5".format(sp_test))
        del model
        keras.backend.clear_session()
    # Evaluation
    waV, uaV = np.around(utils.waua(cmV), decimals=4)
    waT, uaT = np.around(utils.waua(cmT), decimals=4)
    cmpV = cmV / np.reshape(np.sum(cmV, 1), (4, 1))
    cmpT = cmT / np.reshape(np.sum(cmT, 1), (4, 1))
    imageName = "results/CNN/Train_{}_CNN_V.png".format(dataname)
    title = "wa={}, ua={}".format(waV, uaV)
    utils.plot_wauacm(title, cmpV, emotionsTest, imageName)
    imageName = "results/CNN/Train_{}_CNN_T.png".format(dataname)
    title = "wa={}, ua={}".format(waT, uaT)
    utils.plot_wauacm(title, cmpT, emotionsTest, imageName)
    print("waV: " + str(waV) + ", uaV: " + str(uaV))
    print(cmpV)
    print("waT: " + str(waT) + ", uaT: " + str(uaT))
    print(cmpT)
def DNN_Train(features, emotionsTest, config):
    dr = 0.4
    lr = 0.0002
    batch_size = 128
    epochs = 100

    nSpeaker = len(features.index.unique('speaker'))
    cmV = np.zeros((len(emotionsTest), len(emotionsTest)), dtype=int)
    cmT = np.zeros((len(emotionsTest), len(emotionsTest)), dtype=int)
    for sp in range(nSpeaker):
        (x_train_scaled, y_train_C, x_val_scaled, y_val_C, x_test_scaled,
         y_test_C) = preprocess(sp, sp - 1, features, emotionsTest)
        num_classes = len(emotionsTest)
        y_train = keras.utils.to_categorical(y_train_C, num_classes)
        # y_test = keras.utils.to_categorical(y_test_C, num_classes)
        y_val = keras.utils.to_categorical(y_val_C, num_classes)
        # DNN model
        input_shape = (features.shape[1], )
        mc_cb = keras.callbacks.ModelCheckpoint('BestModel_DNN.h5',
                                                monitor='val_acc',
                                                verbose=1,
                                                save_best_only=True,
                                                save_weights_only=True)
        es_cb = keras.callbacks.EarlyStopping(monitor="acc",
                                              patience=10,
                                              verbose=1)
        # class weight
        cw = class_weight.compute_class_weight('balanced',
                                               np.unique(y_train_C), y_train_C)
        cw2 = {0: cw[0], 1: cw[1], 2: cw[2], 3: cw[3]}
        tf.reset_default_graph()
        graph = tf.Graph()
        with graph.as_default() as g:
            with tf.Session(graph=g):
                model = make_model(num_classes=num_classes,
                                   input_shape=input_shape,
                                   dr=dr,
                                   lr=lr)
                #                if socket.getfqdn(socket.gethostname()) == "d8":
                #                    model = multi_gpu_model(model, gpus=4)
                model.fit(x_train_scaled,
                          y_train,
                          class_weight=cw2,
                          batch_size=batch_size,
                          epochs=epochs,
                          verbose=1,
                          callbacks=[mc_cb, es_cb],
                          validation_data=(x_val_scaled, y_val))
                # evaluation
                del mc_cb
                model.load_weights('BestModel_DNN.h5')
                y_val_p = model.predict(x_val_scaled,
                                        batch_size=batch_size,
                                        verbose=1)
                y_val_p_C = [emoC.argmax() for emoC in y_val_p]
                cmV += confusion_matrix(y_val_C,
                                        y_val_p_C,
                                        labels=list(range(num_classes)))
                y_test_p = model.predict(x_test_scaled,
                                         batch_size=batch_size,
                                         verbose=1)
                cmT += confusion_matrix(y_test_C,
                                        [emoC.argmax() for emoC in y_test_p],
                                        labels=list(range(num_classes)))
                print("speaker: " + str(sp))
                print(cmV)
                print(cmT)
                model.save("models/DNN/{}_{}.h5".format(config, sp))
    waV, uaV = np.around(waua(cmV), decimals=4)
    waT, uaT = np.around(waua(cmT), decimals=4)
    cmpV = cmV / np.reshape(np.sum(cmV, 1), (4, 1))
    cmpT = cmT / np.reshape(np.sum(cmT, 1), (4, 1))
    imageName = "results/DNN/Train_{}_V.png".format(config)
    title = "wa={}, ua={}".format(waV, uaV)
    plot_wauacm(title, cmpV, emotionsTest, imageName)
    imageName = "results/DNN/Train_{}_T.png".format(config)
    title = "wa={}, ua={}".format(waT, uaT)
    plot_wauacm(title, cmpT, emotionsTest, imageName)
    print("waV: " + str(waV) + ", uaV: " + str(uaV))
    print(cmpV)
    print("waT: " + str(waT) + ", uaT: " + str(uaT))
    print(cmpT)
def main():
    if not os.path.exists("results"):
        os.mkdir("results")
    if not os.path.exists("models"):
        os.mkdir("models")
    specPath = "spectrograms/"
    if socket.getfqdn(socket.gethostname()) == "d8":
        os.environ["CUDA_VISIBLE_DEVICES"] = "2,3"
        batch_size = 64 * 2
    else:
        batch_size = 64
    # parameters
    t_uplim = 200  # 2 s
    f_uplim = 400  # 4 kHz
    t_shift_train = 100  # 1 s
    t_shift_test = 40  # 0.4 s

    # load the DataFrame of IEMOCAP
    dataDf = pd.read_csv(specPath + "IEMOCAP.csv", index_col=0)
    speakers = list(dataDf.speaker.unique())
    emotionsTest = list(dataDf.emotion.unique())
    num_classes = len(emotionsTest)

    # cross-validation
    cmTest = np.zeros((num_classes, num_classes))
    # %%
    for sp_test in range(len(speakers)):
        speakers_train = speakers.copy()
        speaker_test = speakers_train.pop(sp_test)
        print("Test speaker: {}".format(speaker_test))
        # preprocessing
        # train set
        (x_train, y_train,
         y_train_raw) = utils.prepro(specPath, dataDf, speakers_train,
                                     emotionsTest, "train", t_shift_train,
                                     f_uplim, t_uplim)
        # test set
        (x_test, y_test, y_test_raw) = utils.prepro(specPath, dataDf,
                                                    speaker_test, emotionsTest,
                                                    "test", t_shift_test,
                                                    f_uplim, t_uplim)
        # class weight
        cw = class_weight.compute_class_weight('balanced',
                                               np.unique(y_train_raw[:, 0]),
                                               y_train_raw[:, 0])
        cw2 = {0: cw[0], 1: cw[1], 2: cw[2], 3: cw[3]}
        K.clear_session()
        model = cnn_model(f_uplim, t_uplim, num_classes)
        lrs_cb = keras.callbacks.LearningRateScheduler(lrschedule, verbose=1)
        epochs = 50
        model.compile(optimizer=SGD(lr=0.05,
                                    momentum=0.9,
                                    decay=0.0001,
                                    nesterov=True),
                      loss="categorical_crossentropy",
                      metrics=["accuracy"])
        model.fit(x_train,
                  y_train,
                  batch_size=batch_size,
                  epochs=epochs,
                  class_weight=cw2,
                  callbacks=[lrs_cb],
                  validation_data=(x_test, y_test))
        # %% test
        y_test_p = model.predict(x_test, batch_size=batch_size, verbose=1)
        cmTest += utils.y_raw_evaluate(y_test_p, y_test_raw)
        model.save("models/Inception_{}.h5".format(sp_test))
        del model

    # %% print results
    waTest, uaTest = utils.waua(cmTest)
    # test set
    print("Results on test set: wa={}, ua={}".format(waTest, uaTest))
    print(cmTest)
    title = "wa={}, ua={}".format(waTest, uaTest)
    imageName = "results/Inception_Train_T.png"
    cmTestP = cmTest / np.reshape(np.sum(cmTest, 1), (4, 1))
    utils.plot_wauacm(title, cmTestP, emotionsTest, imageName, ".4f")