def __init__(self,
              name,
              input_dim_A,
              input_dim_V1,
              input_dim_V2,
              input_dim_V3,
              input_dim_V4,
              noisy=True,
              sparse=False):
     AutoEncoder.__init__(self,
                          name,
                          input_dim_A + input_dim_V1 + input_dim_V2 +
                          input_dim_V3,
                          noisy=noisy,
                          sparse=sparse)
     self.load_basic()
     self.name = '%s_hidden%.2f_batch%d_epoch%d_noise%s' % (
         name, self.hidden_ratio, self.batch_size, self.epochs, self.noise)
     self.dimension_A = input_dim_A
     self.dimension_V = input_dim_V1 + input_dim_V2 + input_dim_V3 + input_dim_V4
     self.dimension_V1 = input_dim_V1
     self.dimension_V2 = input_dim_V2
     self.dimension_V3 = input_dim_V3
     self.dimension_V4 = input_dim_V4
     self.decoder_A = None
     self.decoder_V1 = None
     self.decoder_V2 = None
     self.decoder_V3 = None
     self.decoder_V4 = None
Example #2
0
 def test_multimodal_autoencoder(self):
     ae = AutoEncoder('test', 118)
     ae.build_model()
     bae = AutoEncoderBimodal('test', 120, 118)
     bae.build_model()
     mae = AutoEncoderMultimodal('test', 118, 136, 6, 6, 35)
     mae.build_model()
Example #3
0
    def test_autoencoder(self):
        X_train_V, X_dev_V, X_test_V, y_train_V, inst_train_V, y_dev_V, inst_dev_V = load_bags_of_words(
            'BoVW', verbose=True)

        ae_bovw = AutoEncoder('BoVW', X_train_V.shape[1])
        ae_bovw.build_model()

        ae_bovw.train_model(pd.concat([X_train_V, X_dev_V]), X_test_V)
        ae_bovw.encode(X_train_V, X_dev_V)
        encoded_train, encoded_dev = ae_bovw.load_presentation()
Example #4
0
    def FV_mRMR(self):
        print("\nrunning mRMR algorithm for feature selection")
        ae = AutoEncoder('fv_gmm', 0)

        with smart_open(os.path.join(ae.save_dir, 'model_list.txt'),
                        'rb',
                        encoding='utf-8') as model_path:
            for line_no, line in enumerate(model_path):
                line = str(line).replace('\n', '')
                print(line_no, '\t', line[65:])

                if os.path.isfile(
                        os.path.join(
                            line, 'fisher_vector_train_%d.npy' %
                            self.kernel)) and os.path.isfile(
                                os.path.join(
                                    line,
                                    'fisher_vector_dev_%d.npy' % self.kernel)):
                    X_train = np.load(
                        os.path.join(
                            line, 'fisher_vector_train_%d.npy' % self.kernel))
                    X_dev = np.load(
                        os.path.join(line,
                                     'fisher_vector_dev_%d.npy' % self.kernel))
                    y_train = np.load(os.path.join(line, 'label_train.npy'))
                    y_dev = np.load(os.path.join(line, 'label_dev.npy'))
                    X_train = np.reshape(X_train,
                                         (-1, np.prod(X_train.shape[1:])))
                    X_dev = np.reshape(X_dev, (-1, np.prod(X_dev.shape[1:])))
                    X_train = np.nan_to_num(X_train)
                    X_dev = np.nan_to_num(X_dev)

                    df = pd.DataFrame(np.vstack((X_train, X_dev)))
                    df.columns = [
                        'feature_%d' % i for i in range(len(X_train[0]))
                    ]
                    df.insert(0, 'label', np.hstack((y_train, y_dev)).T)
                    print(df.head())

                    feature_list = pymrmr.mRMR(df, 'MIQ', 50)
                    np.save(os.path.join(line, 'feature_list'), feature_list)

                    X_train_df = pd.DataFrame(X_train)
                    X_train_df.columns = [
                        'feature_%d' % i for i in range(len(X_train[0]))
                    ]
                    X_train = X_train_df.loc[:, feature_list]

                    X_dev_df = pd.DataFrame(X_dev)
                    X_dev_df.columns = [
                        'feature_%d' % i for i in range(len(X_dev[0]))
                    ]
                    X_dev = X_dev_df.loc[:, feature_list]

                    print(X_train.head())
                    print(X_dev.head())

                    np.save(os.path.join(line, 'X_train_mrmr'), X_train)
                    np.save(os.path.join(line, 'X_dev_mrmr'), X_dev)
                    print("\nfeature selection done and data saved.")
Example #5
0
 def __init__(self,
              name,
              input_dim_A,
              input_dim_V,
              noisy=True,
              sparse=False):
     # para name: name of bimodal DDAE
     AutoEncoder.__init__(self,
                          name,
                          input_dim_A + input_dim_V,
                          noisy=noisy,
                          sparse=sparse)
     self.load_basic()
     self.name = '%s_hidden%.2f_batch%d_epoch%d_noise%s' % (
         name, self.hidden_ratio, self.batch_size, self.epochs, self.noise)
     self.dimension_A = input_dim_A
     self.dimension_V = input_dim_V
     self.decoder_A = None
     self.decoder_V = None
Example #6
0
    def FV_RF(self):
        print("\nrunning Random Forest on Fisher Vectors")
        ae = AutoEncoder('fv_gmm', 0)

        with smart_open(os.path.join(ae.save_dir, 'model_list.txt'),
                        'rb',
                        encoding='utf-8') as model_path:
            for line_no, line in enumerate(model_path):
                line = str(line).replace('\n', '')
                print(line_no, '\t', line[65:])
                feature_name = line[65:] + '_%d' % self.kernel

                if os.path.isfile(
                        os.path.join(
                            line, 'X_train_tree_%d.npy' %
                            self.kernel)) and os.path.isfile(
                                os.path.join(
                                    line, 'X_dev_tree_%d.npy' % self.kernel)):
                    X_train = np.load(
                        os.path.join(line,
                                     'X_train_tree_%d.npy' % self.kernel))
                    X_dev = np.load(
                        os.path.join(line, 'X_dev_tree_%d.npy' % self.kernel))
                    y_train = np.load(os.path.join(line, 'label_train.npy'))
                    y_dev = np.load(os.path.join(line, 'label_dev.npy'))

                    print(X_train.shape, X_dev.shape)

                    random_forest = RandomForest(feature_name,
                                                 X_train,
                                                 y_train,
                                                 X_dev,
                                                 y_dev,
                                                 test=False)
                    random_forest.run()
                    y_pred_train, y_pred_dev = random_forest.evaluate()
                    get_UAR(y_pred_train,
                            y_train,
                            np.array([]),
                            'RF',
                            feature_name,
                            'single',
                            train_set=True,
                            test=False)
                    get_UAR(y_pred_dev,
                            y_dev,
                            np.array([]),
                            'RF',
                            feature_name,
                            'single',
                            test=False)
Example #7
0
    def DYNAMICS(self):
        print(
            "\nrunning computation of dynamics of latent representation learned by DDAEs"
        )
        ae = AutoEncoder('dynamics', 0)

        with smart_open(os.path.join(ae.save_dir, 'model_list.txt'),
                        'rb',
                        encoding='utf-8') as model_path:
            for line_no, line in enumerate(model_path):
                line = str(line).replace('\n', '')
                print(line_no, '\t', line)

                if os.path.isfile(
                        os.path.join(
                            line,
                            'encoded_train_dynamics.npy')) and os.path.isfile(
                                os.path.join(line,
                                             'encoded_dev_dynamics.npy')):
                    continue

                X_train = np.load(os.path.join(line, 'encoded_train.npy'))
                X_dev = np.load(os.path.join(line, 'encoded_dev.npy'))
                X_train_frame = get_dynamics(X_train)
                X_dev_frame = get_dynamics(X_dev)

                assert X_train_frame.shape[0] == X_train.shape[0] - 2
                assert X_dev_frame.shape[0] == X_dev.shape[0] - 2

                print("Shape of training data", X_train.shape)
                print("Shape of development data", X_dev.shape)
                print("Shape of training data with dynamics",
                      X_train_frame.shape)
                print("Shape of development data with dynamics",
                      X_dev_frame.shape)

                np.save(os.path.join(line, 'encoded_train_dynamics'),
                        X_train_frame)
                np.save(os.path.join(line, 'encoded_dev_dynamics'),
                        X_dev_frame)

                print("\ncomputing dynamics done\n")
                del X_train, X_train_frame, X_dev, X_dev_frame
Example #8
0
    def FV_tree(self):
        print("\nrunning Random Forest algorithm for feature selection")
        ae = AutoEncoder('fv_gmm', 0)

        with smart_open(os.path.join(ae.save_dir, 'model_list.txt'),
                        'rb',
                        encoding='utf-8') as model_path:
            for line_no, line in enumerate(model_path):
                line = str(line).replace('\n', '')
                line = line[:-2]
                print(line_no, '\t', line[19:])

                if os.path.isfile(
                        os.path.join(
                            line, 'X_train_tree_%d.npy' %
                            self.kernel)) and os.path.isfile(
                                os.path.join(
                                    line, 'X_dev_tree_%d.npy' % self.kernel)):
                    preprocess_metadata_tensorboard(line, self.kernel)
                    continue

                if os.path.isfile(
                        os.path.join(
                            line, 'fisher_vector_train_%d.npy' %
                            self.kernel)) and os.path.isfile(
                                os.path.join(
                                    line,
                                    'fisher_vector_dev_%d.npy' % self.kernel)):
                    X_train = np.load(
                        os.path.join(
                            line, 'fisher_vector_train_%d.npy' % self.kernel))
                    X_dev = np.load(
                        os.path.join(line,
                                     'fisher_vector_dev_%d.npy' % self.kernel))
                    y_train = np.load(os.path.join(line, 'label_train.npy'))
                    y_dev = np.load(os.path.join(line, 'label_dev.npy'))
                    X_train = np.reshape(X_train,
                                         (-1, np.prod(X_train.shape[1:])))
                    X_dev = np.reshape(X_dev, (-1, np.prod(X_dev.shape[1:])))
                    X_train = np.nan_to_num(X_train)
                    X_dev = np.nan_to_num(X_dev)

                    from sklearn.ensemble import RandomForestClassifier

                    if not os.path.isfile(
                            os.path.join(line,
                                         'feature_list_%d.npy' % self.kernel)):
                        model = RandomForestClassifier(n_estimators=800,
                                                       criterion='entropy')

                        df = pd.DataFrame(np.vstack((X_train, X_dev)))
                        feature_names = [
                            'feature_%d' % i for i in range(len(X_train[0]))
                        ]
                        df.columns = feature_names
                        y = np.hstack((y_train, y_dev))
                        print(df.head())

                        model.fit(df, y)
                        importances = model.feature_importances_
                        print("\nfeature importance ranking")
                        indices = np.argsort(importances)[::-1]
                        for f in range(100):
                            print(
                                "%d. feature %d %s (%f)" %
                                (f + 1, indices[f], feature_names[indices[f]],
                                 importances[indices[f]]))

                        indices = indices[:100]
                        np.save(
                            os.path.join(line,
                                         'feature_list_%d' % self.kernel),
                            indices)
                    else:
                        indices = np.load(
                            os.path.join(line,
                                         'feature_list_%d.npy' % self.kernel))

                    X_train_df = pd.DataFrame(X_train)
                    X_train_df.columns = [
                        'feature_%d' % i for i in range(len(X_train[0]))
                    ]
                    X_train_tree = X_train_df.iloc[:, indices]

                    X_dev_df = pd.DataFrame(X_dev)
                    X_dev_df.columns = [
                        'feature_%d' % i for i in range(len(X_dev[0]))
                    ]
                    X_dev_tree = X_dev_df.iloc[:, indices]

                    print(X_train_tree.shape, X_dev_tree.shape)

                    np.save(
                        os.path.join(line, 'X_train_tree_%d' % self.kernel),
                        X_train_tree)
                    np.save(os.path.join(line, 'X_dev_tree_%d' % self.kernel),
                            X_dev_tree)
                    print("\nfeature selection done and data saved.")
Example #9
0
    def FV_GMM(self):
        print(
            "\nrunning Fisher Encoder using GMM on learnt representations with dynamics"
        )

        y_train_frame, inst_train, y_dev_frame, inst_dev = load_aligned_features(
            no_data=True, verbose=True)
        y_train_frame, y_dev_frame = y_train_frame[2:, :], y_dev_frame[2:, :]
        inst_train, inst_dev = inst_train[2:, :], inst_dev[2:, :]

        ae = AutoEncoder('fv_gmm', 0)

        with smart_open(os.path.join(ae.save_dir, 'model_list.txt'),
                        'rb',
                        encoding='utf-8') as model_path:
            for line_no, line in enumerate(model_path):
                line = str(line).replace('\n', '')
                print(line_no, '\t', line[65:])

                if os.path.isfile(
                        os.path.join(
                            line, 'fisher_vector_train_%d.npy' %
                            self.kernel)) and os.path.isfile(
                                os.path.join(
                                    line,
                                    'fisher_vector_dev_%d.npy' % self.kernel)):
                    preprocess_metadata_tensorboard(line, self.kernel)
                    continue

                X_train_frame = np.load(
                    os.path.join(line, 'encoded_train_dynamics.npy'))
                X_dev_frame = np.load(
                    os.path.join(line, 'encoded_dev_dynamics.npy'))

                X_train_session, y_train_session = frame2session(X_train_frame,
                                                                 y_train_frame,
                                                                 inst_train,
                                                                 verbose=True)
                X_dev_session, y_dev_session = frame2session(X_dev_frame,
                                                             y_dev_frame,
                                                             inst_dev,
                                                             verbose=True)

                fv_train, fv_dev = [], []
                score = []

                for X_train in X_train_session:
                    fv_gmm = FisherVectorGMM(n_kernels=self.kernel)
                    fv_gmm.fit(X_train)
                    fv_train.append(fv_gmm.predict(X_train, normalized=False))
                    score.append(fv_gmm.score(X_train))

                for X_dev in X_dev_session:
                    fv_gmm = FisherVectorGMM(n_kernels=self.kernel)
                    fv_gmm.fit(X_dev)
                    fv_dev.append(fv_gmm.predict(X_dev, normalized=False))
                    score.append(fv_gmm.score(X_dev))

                print("\nscores for all the FV kernel", score)
                fv_gmm.data_dir = line

                fv_gmm.save_vector(fv_train, 'train')
                fv_gmm.save_vector(fv_dev, 'dev')
                fv_gmm.save_vector(y_train_session, 'train', label=True)
                fv_gmm.save_vector(y_dev_session, 'dev', label=True)
                print("\nFV encoding for %s, done" % line[65:])
Example #10
0
 def DDAE_single(self):
     print(
         "\nrunning DDAE on unimodal features (facial landmarks and MFCC/eGeMAPS)"
     )
     print("\nchoose a modality\n0.facial landmarks\n1.MFCC\n2.eGeMAPS")
     choice = int(input("choose a function "))
     if choice == 0:
         _, _, _, X_train_V, X_dev_V, X_test_V, _, _, _, _ = load_aligned_features(
             verbose=True)
         ae = AutoEncoder('unimodal_landmark', 136)
         ae.build_model()
         ae.train_model(pd.concat([X_train_V, X_dev_V]), X_test_V)
         ae.encode(X_train_V, X_dev_V)
     elif choice == 1:
         X_train_A, X_dev_A, X_test_A, _, _, _, _, _, _, _ = load_aligned_features(
             verbose=True)
         ae = AutoEncoder('unimodal_mfcc', X_train_A.shape[1], visual=False)
         ae.build_model()
         ae.train_model(pd.concat([X_train_A, X_dev_A]), X_test_A)
         ae.encode(X_train_A, X_dev_A)
     elif choice == 2:
         X_train_A, X_dev_A, X_test_A, _, _, _, _, _, _, _ = load_aligned_features(
             eGeMAPS=True, verbose=True)
         ae = AutoEncoder('unimodal_egemaps', X_train_A.shape[1])
         ae.build_model()
         ae.train_model(pd.concat([X_train_A, X_dev_A]), X_test_A)
         ae.encode(X_train_A, X_dev_A)