Example #1
0
    def __init__(self,
                 model_name=None,
                 feature_name=None,
                 gender=False,
                 feature_tables=None):
        """Train model Controller, dispatch the training tasks;
        Input:
            model_name: certain model depend on papers
            feature_name: support for a group of absolute features
            feature_tables: support for different feature table, which make it
                            is possible for us to combine different modality
                            features freely. But note that the train controller
                            is not responsible for processing the feature table,
                            it should be completed by a certain model.
            gender: if the model should consider the gender difference

        Output:
            Result and realted information will be printed by each estimator in logs'
        """
        super().__init__()
        self.model_name = model_name
        self.feature_name = feature_name
        self.feature_tables = feature_tables
        self.gender = gender
        self.sql_handler = SqlHandler()
        self._set_feature()
Example #2
0
def data_set():
    df_train = pd.read_csv(config.data_dir + global_values.TRAIN_SET_NAME,
                           header=0)
    df_dev = pd.read_csv(config.data_dir + global_values.DEL_SET_NAME,
                         header=0)

    logger.debug(df_dev.head())
    sql_handler = SqlHandler()
    sql_handler.execute(f'drop table {config.tbl_develop_set}')
    sql_handler.execute(f'drop table {config.tbl_training_set}')

    sql_handler.df_to_db(df_train, config.tbl_training_set)
    sql_handler.df_to_db(df_dev, config.tbl_develop_set)
Example #3
0
def get_data_multi_modality(tables, gender=False):
    """gather data from different tables in every modality
        and generate train set and dev dev set of them.
    """
    sql_handler = SqlHandler()
    audio_df, video_df, text_df = [], [], []
    for tb in tables:
        if tb in AUDIO_TABLE:
            audio_df.append(sql_handler.get_df(tb))  #从数据库提取特征存入list
        elif tb in VIDEO_TABLE:
            video_df.append(sql_handler.get_df(tb))
        elif tb in TEXT_TABLE:
            text_df.append(sql_handler.get_df(tb))
        else:
            pass

    audio_merge_df = merge_dfs_by_id(audio_df)
    video_merge_df = merge_dfs_by_id(video_df)
    text_merge_df = merge_dfs_by_id(text_df)

    train = sql_handler.get_df(config.tbl_train_ros_set)
    dev = sql_handler.get_df(config.tbl_develop_set)

    if not gender:  #不考虑性别
        data_dct = {
            'audio_train': merge_df_by_id(train, audio_merge_df),
            'audio_dev': merge_df_by_id(dev, audio_merge_df),
            'vedio_train': merge_df_by_id(train, video_merge_df),
            'vedio_dev': merge_df_by_id(dev, video_merge_df),
            'text_train': merge_df_by_id(train, text_merge_df),
            'text_dev': merge_df_by_id(dev, text_merge_df)
        }
    else:  #考虑性别
        train_male = train[train['Gender'] == 1]
        train_female = train[train['Gender'] == 0]
        dev_male = dev[dev['Gender'] == 1]
        dev_female = dev[dev['Gender'] == 0]

        data_dct = {
            'male': {
                'audio_train': merge_df_by_id(train_male, audio_merge_df),
                'audio_dev': merge_df_by_id(dev_male, audio_merge_df),
                'vedio_train': merge_df_by_id(train_male, video_merge_df),
                'vedio_dev': merge_df_by_id(dev_male, video_merge_df),
                'text_train': merge_df_by_id(train_male, text_merge_df),
                'text_dev': merge_df_by_id(dev_male, text_merge_df)
            },
            'female': {
                'audio_train': merge_df_by_id(train_female, audio_merge_df),
                'audio_dev': merge_df_by_id(dev_female, audio_merge_df),
                'vedio_train': merge_df_by_id(train_female, video_merge_df),
                'vedio_dev': merge_df_by_id(dev_female, video_merge_df),
                'text_train': merge_df_by_id(train_female, text_merge_df),
                'text_dev': merge_df_by_id(dev_female, text_merge_df)
            }
        }

    return data_dct
Example #4
0
def gen_fea():
    sql_handler = SqlHandler()

    audio_text_value = list()
    with ThreadPoolExecutor(max_workers=30) as executor:  #并行启动任务
        task = [executor.submit(gen_sigle_fea, fold) for fold in PREFIX]
        for future in as_completed(task):
            try:
                fea_item = future.result()  #每一个文件下所有数据的特征 eg:300_P
                audio_text_value.append(fea_item)
            except:
                continue

    COVAREP_COLUMNS.remove('VUV')
    audio_fea = list()
    audio_fea.append('ID')
    COVAREP_COLUMNS.extend(FORMANT_COLUMNS)
    for a_fea, s_fea in itertools.product(COVAREP_COLUMNS,
                                          stats_fea.columns):  #笛卡尔积 相当于嵌套for循环
        audio_fea.append(a_fea + '_' + s_fea)
    audio_text_fea = audio_fea + TEXT_COLUMNS

    assert len(audio_text_value[0]) == len(audio_text_fea)

    audio_text_df = pd.DataFrame(audio_text_value, columns=audio_text_fea)

    sql_handler.execute(f'drop table if exists {config.tbl_exp1_fea};'
                        )  #因为每次选择特征不一样,所以入库之前需要删除原来的表
    sql_handler.df_to_db(audio_text_df, config.tbl_exp1_fea)
    logger.info('audio feature exp1 has been stored!')
Example #5
0
def get_data_by_id(feature_table, gender=False):
    sql_handler = SqlHandler()
    feature = sql_handler.get_df(feature_table)
    feature['ID'] = feature['ID'].apply(pd.to_numeric)  #转为数值
    train = sql_handler.get_df(config.tbl_train_ros_set)
    dev = sql_handler.get_df(config.tbl_develop_set)
    test = sql_handler.get_df(config.tbl_test_set)

    if not gender:
        train_set = merge_df_by_id(
            train, feature)  #合并特征csv和PHQcsv 调用了之前的函数 merge_df_by_id()
        dev_set = merge_df_by_id(dev, feature)  #合并特征csv和PHQcsv
        test_set = merge_df_by_id(test, feature)
        return train_set, dev_set, test_set
    else:  #考虑性别因素
        train_male = train[train['Gender'] == 1]
        train_female = train[train['Gender'] == 0]
        dev_male = dev[dev['Gender'] == 1]
        dev_female = dev[dev['Gender'] == 0]
        test_male = test[test['Gender'] == 1]
        test_female = test[test['Gender'] == 0]

        train_male = merge_df_by_id(train_male, feature)
        train_female = merge_df_by_id(train_female, feature)
        dev_male = merge_df_by_id(dev_male, feature)
        dev_female = merge_df_by_id(dev_female, feature)
        test_male = merge_df_by_id(test_male, feature)
        test_female = merge_df_by_id(test_female, feature)
        return train_male, dev_male, test_male, train_female, dev_female, test_female
Example #6
0
def get_data_by_id(feature_table, gender=False):

    sql_handler = SqlHandler()
    feature = sql_handler.get_df(feature_table)
    feature['ID'] = feature['ID'].apply(pd.to_numeric)
    train = sql_handler.get_df(config.tbl_training_set)
    dev = sql_handler.get_df(config.tbl_develop_set)
    if not gender:
        train_set = merge_df_by_id(train, feature)
        dev_set = merge_df_by_id(dev, feature)
        return train_set, dev_set
    else:
        train_male = train[train['Gender'] == 1]
        train_female = train[train['Gender'] == 0]
        dev_male = train[train['Gender'] == 1]
        dev_female = train[train['Gender'] == 0]
        
        train_male = merge_df_by_id(train_male, feature)
        train_female = merge_df_by_id(train_female, feature)
        dev_male = merge_df_by_id(dev_male, feature)
        dev_female = merge_df_by_id(dev_female, feature)
        return train_male, dev_male, train_female, dev_female
Example #7
0
def hog_pca():
    sql_handler = SqlHandler()
    pca = PCA(n_components=0.999)
    hog = pd.read_csv(config.data_dir+FACE_HOG)
    hog_pca_values = pca.fit_transform(hog)
    hog_pca_names = ['hog_pca_'+str(i) for i in range(184)]
    hog_pca = pd.DataFrame(hog_pca_values,columns = hog_pca_names)
    id = [float(id[:-1]) for id in PREFIX]
    col_name = hog_pca.columns.tolist()
    col_name.insert(0,'ID')
    hog_pca= hog_pca.reindex(columns = col_name,fill_value = 1)
    hog_pca['ID'] = id

    sql_handler.execute(f'drop table if exists {config.tbl_exp3_hog_fea};') #因为每次选择特征不一样,所以入库之前需要删除原来的表
    sql_handler.df_to_db(hog_pca, config.tbl_exp3_hog_fea)
    logger.info('hog feature exp3 has been stored!')
Example #8
0
def extract_audio(sample, prefix, opensmile_options, outputoption,
                  feature_type):
    """Dispatch extraction tasks
    sample: phq-id like 310
    prefix: phq file prefix like 310_
    feature_type: mfcc or egemaps
    """
    infilename = f"{config.sample_dir}/{prefix}P/{prefix}{SUFFIX['wav']}"
    outfilename = f'{sample}_{feature_type}.csv'
    opensmile_call = config.opensmile_exe + ' ' + opensmile_options + ' -inputfile ' + infilename + ' ' + outputoption + ' ' + outfilename + ' -instname ' + str(
        sample) + ' -output ?'
    os.system(opensmile_call)
    if os.path.exists(outfilename): df = pd.read_csv(outfilename, sep=';')
    else:
        return sample, feature_type
    db_handler = SqlHandler()
    if feature_type == 'mfcc':
        db_handler.df_to_db(df, config.tbl_mfcc, if_exists='append')
    elif feature_type == 'egemaps':
        db_handler.df_to_db(df, config.tbl_egemaps, if_exists='append')
    os.remove(outfilename)
    return sample, feature_type
Example #9
0
class Train(Process):
    def __init__(self,
                 model_name=None,
                 feature_name=None,
                 gender=False,
                 feature_tables=None):
        """Train model Controller, dispatch the training tasks;
        Input:
            model_name: certain model depend on papers
            feature_name: support for a group of absolute features
            feature_tables: support for different feature table, which make it
                            is possible for us to combine different modality
                            features freely. But note that the train controller
                            is not responsible for processing the feature table,
                            it should be completed by a certain model.
            gender: if the model should consider the gender difference

        Output:
            Result and realted information will be printed by each estimator in logs'
        """
        super().__init__()
        self.model_name = model_name
        self.feature_name = feature_name
        self.feature_tables = feature_tables
        self.gender = gender
        self.sql_handler = SqlHandler()
        self._set_feature()

    def _set_feature(self):
        if self.feature_name is not None:
            # you r using feature from a ceratin way!
            if self.feature_name == FEATURE_EXP_2:
                # if choose exp2 the data will be in pandas's dataframe by defaut
                self.data = get_data_by_id(config.tbl_exp2_audio_fea,
                                           self.gender)
                self.feature_list = self.sql_handler.get_cloumns_from_table(
                    config.tbl_exp2_audio_fea)
                self.feature_list.remove('ID')
            else:
                print('not finished yet')
        elif self.feature_tables is not None:
            # Now you r using a multi-modality model!
            self.data = get_data_multi_modality(self.feature_tables,
                                                self.gender)
            self.audio_fea, self.video_fea, self.text_fea = \
                        self.sql_handler.get_cloumns_from_table(self.feature_tables)
        else:
            print('You must choose a set of features to train!!!')

    def _train_eval(self, train, dev, model):
        model = model(train, dev, features=self.feature_list)
        model.train()
        return model.eval()

    def run(self):
        if self.model_name == MODEL_RF:
            from core.predictor.RF.rf_predict import RfPredictor
            if self.feature_name is not None:
                if not self.gender:
                    train, dev = self.data
                    score = self._train_eval(train, dev, RfPredictor)
                    logger.info(
                        f'Evalutaion Scores {self.model_name} with {self.feature_name}: {score}'
                    )

                else:
                    train_m, dev_m, train_f, dev_f = self.data
                    score = self._train_eval(train_m, dev_m, RfPredictor)
                    logger.info(
                        f'Evalutaion Scores Male {self.model_name} with {self.feature_name}: {score}'
                    )

                    score = self._train_eval(train_f, dev_f, RfPredictor)
                    logger.info(
                        f'Evalutaion Scores Female {self.model_name} with {self.feature_name}: {score}'
                    )
            else:
                if not self.gender:
                    # multi_modality
                    from core.predictor.RF.rf_predict import MultiModalRandomForest
                    mmrf = MultiModalRandomForest(self.data)

        else:
            print('not finish yet!')
Example #10
0
def createCodebook(feature_name):
    """
    COVAREP, FORMANT, FAUs ,***p.CSV->stack dataframe -> create codebook ->extract all files' features
    java -jar openXBOW.jar -i examples/example2/llds.csPv  -o bow.csv -a 1 -log -c kmeans++ -size 100 -B codebook -writeName -writeTimeStamp    

    """
    sqlhandler = SqlHandler()
    train = sqlhandler.get_df(config.tbl_training_set)
    dev = sqlhandler.get_df(config.tbl_develop_set)
    trainID = train['Participant_ID'].values
    devID = dev['Participant_ID'].values
    trainDev = np.hstack([trainID, devID])
    folds = np.random.choice(trainDev, 20,
                             replace=False)  # for video 50, for audio 20

    window_size = 4
    hop_size = 1
    openxbow = 'java -jar E:/openXBOW/openXBOW.jar '
    openxbow_options = '-writeName -writeTimeStamp -t ' + str(
        window_size) + ' ' + str(hop_size)
    codebook_out = 'E:/openXBOW/codebooks/'
    openxbow_options_codebook = f'-size 100 -a 1 -log -B {codebook_out}{feature_name}_codebook '

    if feature_name == 'faus':
        for fold in folds:
            path = config.data_dir + str(fold) + '_P/' + str(
                fold) + '_' + SUFFIX['au']
            feature = np.loadtxt(path, delimiter=',', skiprows=1)
            success = feature[:, 3] == 1
            feature = feature[success, 1:18]
            feature = np.delete(feature, [1, 2], axis=1)
            save_features(codebook_out + 'fausTrainDevRandom.csv',
                          feature,
                          append=True,
                          instname=str(fold))
        os.system(openxbow+ f'-standardizeInput -i {codebook_out}fausTrainDevRandom.csv '+openxbow_options_codebook+\
            openxbow_options+ ' -c kmeans++'+ f' -o {codebook_out}temp.csv')
    elif feature_name == 'gaze_pose':
        for fold in folds:
            path1 = config.data_dir + str(fold) + '_P/' + str(
                fold) + '_' + SUFFIX['gaze']
            path2 = config.data_dir + str(fold) + '_P/' + str(
                fold) + '_' + SUFFIX['pose']
            gaze_data = pd.read_csv(path1)
            pose_data = pd.read_csv(path2)
            if fold in [367, 396, 432]:
                temp = np.all(pose_data.values != ' -1.#IND',
                              axis=1)  #['367_', '396_', '432_'] 缺失 存在异常值
                data = pd.merge(
                    gaze_data,
                    pose_data)  #key = frame timestamps confidence success
                data = data[temp]
                data.iloc[:, -6:] = data.iloc[:, -6:].applymap(
                    lambda x: float(x[1:]))
            else:
                data = pd.merge(gaze_data, pose_data)
            success = data[' success'] == 1
            data = data.values[:, 1:]
            data = np.delete(data, [1, 2], axis=1)
            data = data[success]
            save_features(codebook_out + 'gazePoseTrainDevRandom.csv',
                          data,
                          append=True,
                          instname=str(fold))
        os.system(openxbow+ f'-standardizeInput -i {codebook_out}gazeposeTrainDevRandom.csv '+openxbow_options_codebook+\
            openxbow_options+ ' -c kmeans++'+ f' -o {codebook_out}temp.csv')

    elif feature_name == 'covarep':
        for fold in folds:
            path = config.data_dir + str(fold) + '_P/' + str(
                fold) + '_' + SUFFIX['covarep']
            data = np.loadtxt(path, delimiter=',')
            timestamp = np.arange(0, data.shape[0]).reshape(data.shape[0], 1)
            timestamp = timestamp / 100
            data = np.hstack([timestamp, data])
            data = data[data[:, 2] == 1]
            data = np.delete(data, 2, axis=1)
            data[np.isnan(data)] = 0
            data[np.isinf(data)] = 0
            save_features(codebook_out + 'covarepTrainDevRandom.csv',
                          data,
                          append=True,
                          instname=str(fold))
        os.system(openxbow+ f'-standardizeInput -i {codebook_out}covarepTrainDevRandom.csv '+openxbow_options_codebook+\
            openxbow_options+ ' -c kmeans++'+ f' -o {codebook_out}temp.csv')
    else:
        for fold in folds:
            path = config.data_dir + str(fold) + '_P/' + str(
                fold) + '_' + SUFFIX['formant']
            data = np.loadtxt(path, delimiter=',')
            timestamp = np.arange(0, data.shape[0]).reshape(data.shape[0], 1)
            timestamp = timestamp / 100
            data = np.hstack([timestamp, data])
            data[np.isnan(data)] = 0
            data[np.isinf(data)] = 0
            save_features(codebook_out + 'formantTrainDevRandom.csv',
                          data,
                          append=True,
                          instname=str(fold))
        os.system(openxbow+ f'-standardizeInput -i {codebook_out}formantTrainDevRandom.csv '+openxbow_options_codebook+\
            openxbow_options+ ' -c kmeans++'+ f' -o {codebook_out}temp.csv')
Example #11
0
def gen_fea():
    sql_handler = SqlHandler()
    audio_feas, text_feas, vedio_feas = gen_sigle_fea(PREFIX[0])
    #读取hog特征 应该在模型训练的地方做
    #分三个表来提取数据

    with ThreadPoolExecutor(max_workers=30) as executor:  #并行启动任务
        task = [executor.submit(gen_sigle_fea, fold) for fold in PREFIX[1:]]
        for future in as_completed(task):
            try:
                audio_value, text_value, vedio_value = future.result(
                )  #每一个文件下所有数据的特征 eg:300_P
                audio_feas = np.concatenate((audio_feas, audio_value))
                vedio_feas = np.concatenate((vedio_feas, vedio_value))
                text_feas = np.concatenate((text_feas, text_value))
            except:
                continue

    COVAREP_COLUMNS.remove('VUV')
    audio_fea_name = ['ID']
    text_fea_name = ['ID']
    vedio_fea_name = ['ID']

    audio_fea_name.extend(COVAREP_COLUMNS + FORMANT_COLUMNS)
    text_fea_name.extend(TEXT_COLUMNS)
    vedio_fea_name.extend(STABLE_POINTS)

    assert len(audio_feas[0]) == len(audio_fea_name) and len(text_feas[0]) == len(text_fea_name) \
        and len(vedio_feas[0]) == len(vedio_fea_name)
    audio_df = pd.DataFrame(audio_feas, columns=audio_fea_name)
    vedio_df = pd.DataFrame(vedio_feas, columns=vedio_fea_name)
    text_df = pd.DataFrame(text_feas, columns=text_fea_name)

    hog_pca()

    sql_handler.execute(f'drop table if exists {config.tbl_exp3_audio_fea};'
                        )  #因为每次选择特征不一样,所以入库之前需要删除原来的表
    sql_handler.df_to_db(audio_df, config.tbl_exp3_audio_fea)
    logger.info('audio feature exp3 has been stored!')

    sql_handler.execute(f'drop table if exists {config.tbl_exp3_vedio_fea};'
                        )  #因为每次选择特征不一样,所以入库之前需要删除原来的表
    sql_handler.df_to_db(vedio_df, config.tbl_exp3_vedio_fea)
    logger.info('vedio feature exp3 has been stored!')

    sql_handler.execute(f'drop table if exists {config.tbl_exp3_text_fea};'
                        )  #因为每次选择特征不一样,所以入库之前需要删除原来的表
    sql_handler.df_to_db(text_df, config.tbl_exp3_text_fea)
    logger.info('text feature exp3 has been stored!')
Example #12
0
from torchvision import models
from tensorflow import keras
from global_values import *
import config

vgg19 = models.vgg.vgg19(pretrained=False)
vgg19.load_state_dict(torch.load(config.pretrained_model_dir + 'vgg19.pth'))
vggfc = VggFc7(vgg19)

alexnet = models.alexnet(pretrained=False)
alexnet.load_state_dict(torch.load(config.pretrained_model_dir +
                                   'alexnet.pth'))
alexnetfc = alexnetFc7(alexnet)

logger = get_logger()
sqlhandler = SqlHandler()


#提取某个文件夹下的数据 eg:300_P
def gen_sigle_fea(fold):
    # audio_fea = Audio_features()
    # video_fea = Video_features()
    # #text_fea = Text_features()
    path = f"{config.data_dir}/{fold}P/{fold}{SUFFIX['covarep']}"
    # covarep =  audio_fea.covarep_fea(path)
    bow(path, feature_name='covarep')
    path = f'E:/database/COVAREP_BOW/{fold}covarep_bow.csv'
    covarep_bow = np.loadtxt(path, delimiter=';')
    covarep_bow = covarep_bow[:, 1:]
    os.system('rm ' + path)
Example #13
0
class Train(Process):
    def __init__(self,
                 model_name=None,
                 feature_name=None,
                 gender=False,
                 feature_tables=None):
        """Train model Controller, dispatch the training tasks;
        Input:
            model_name: certain model depend on papers
            feature_name: support for a group of absolute features
            feature_tables: support for different feature table, which make it
                            is possible for us to combine different modality
                            features freely. But note that the train controller
                            is not responsible for processing the feature table,
                            it should be completed by a certain model.
            gender: if the model should consider the gender difference

        Output:
            Result and realted information will be printed by each estimator in logs'
        """
        super().__init__()  #调用父类PROCESS
        self.model_name = model_name
        self.feature_name = feature_name
        self.feature_tables = feature_tables
        self.gender = gender
        self.sql_handler = SqlHandler()
        self._set_feature()  #?

    def _set_feature(self):
        if self.feature_name is not None:
            # you r using feature from a ceratin way!
            if self.feature_name == FEATURE_EXP_2:
                # if choose exp2 the data will be in pandas's dataframe by defaut
                self.data = get_data_by_id(config.tbl_exp2_audio_fea,
                                           self.gender)
                self.feature_list = self.sql_handler.get_cloumns_from_table(
                    config.tbl_exp2_audio_fea)
                self.feature_list.remove('ID')
            elif self.feature_name == FEATURE_EXP_1:
                self.data = get_data_by_id(config.tbl_exp1_fea, self.gender)
                self.feature_list = self.sql_handler.get_cloumns_from_table(
                    config.tbl_exp1_fea)
                self.feature_list.remove('ID')
#---------------------baseline----------------------------------
            elif self.feature_name == FEATURE_EXP_3_VEDIO:  #
                self.data = get_data_by_id(config.tbl_exp3_vedio_fea,
                                           self.gender)
                self.feature_list = self.sql_handler.get_cloumns_from_table(
                    config.tbl_exp3_vedio_fea)
                self.feature_list.remove('ID')

            elif self.feature_name == FEATURE_EXP_3_TEXT:  #
                self.data = get_data_by_id(config.tbl_exp3_text_fea,
                                           self.gender)
                self.feature_list = self.sql_handler.get_cloumns_from_table(
                    config.tbl_exp3_text_fea)
                self.feature_list.remove('ID')

            elif self.feature_name == FEATURE_EXP_3_AUDIO:  #
                self.data = get_data_by_id(config.tbl_exp3_audio_fea,
                                           self.gender)
                self.feature_list = self.sql_handler.get_cloumns_from_table(
                    config.tbl_exp3_audio_fea)
                self.feature_list.remove('ID')

            elif self.feature_name == FEATURE_EXP_3_HOGPCA:  #
                self.data = get_data_by_id(config.tbl_exp3_hog_fea,
                                           self.gender)
                self.feature_list = self.sql_handler.get_cloumns_from_table(
                    config.tbl_exp3_hog_fea)
                self.feature_list.remove('ID')
#-----------------baseline---------------------------------------------

#-----------------finalmodel------------------------------------
            elif self.feature_name == FEATURE_FINAL_COVAREP:
                path = 'E:/rnn_models/data/covarep/'
                self.data = get_npdata_by_id(path, self.gender)

            elif self.feature_name == FEATURE_FINAL_FORMANT:
                path = 'E:/rnn_models/data/formant/'
                self.data = get_npdata_by_id(path, self.gender)

            elif self.feature_name == FEATURE_FINAL_FAUs:
                path = 'E:/rnn_models/data/faus/'
                self.data = get_npdata_by_id(path, self.gender)

            elif self.feature_name == FEATURE_FINAL_GAZE_POSE:
                path = 'E:/rnn_models/data/gaze_pose/'
                self.data = get_npdata_by_id(path, self.gender)

            elif self.feature_name == FEATURE_FINAL_TEXT:
                self.data = get_data_by_id(config.tbl_exp3_text_fea,
                                           self.gender)
                self.feature_list = self.sql_handler.get_cloumns_from_table(
                    config.tbl_exp3_text_fea)
                self.feature_list.remove('ID')

            elif self.feature_name == FEATURE_FINAL_VGG:
                path = 'E:/rnn_models/data/ds_vgg/'
                self.data = get_npdata_by_id(path, self.gender)

            elif self.feature_name == FEATURE_FINAL_ALEXNET:
                path = 'E:/rnn_models/data/ds_alexnet/'
                self.data = get_npdata_by_id(path, self.gender)

            elif self.feature_name == FEATURE_FINAL_GAZE_POSE_BOW:
                path = 'E:/rnn_models/data/gaze_pose_bow/'
                self.data = get_npdata_by_id(path, self.gender)

            elif self.feature_name == FEATURE_FINAL_FAUs_BOW:
                path = 'E:/rnn_models/data/faus_bow/'
                self.data = get_npdata_by_id(path, self.gender)

            elif self.feature_name == FEATURE_FINAL_COVAREP_BOW:
                path = 'E:/rnn_models/data/covarep_bow/'
                self.data = get_npdata_by_id(path, self.gender)

            elif self.feature_name == FEATURE_FINAL_FORMANT_BOW:
                path = 'E:/rnn_models/data/formant_bow/'
                self.data = get_npdata_by_id(path, self.gender)

            elif self.feature_name == FEATURE_FINAL_FUSION:
                path = 'E:/data/'
                if self.gender:
                    m_dev = pd.read_csv(path + 'm_pre_dev_scores.csv')
                    m_dev = m_dev.values
                    m_dev_label = m_dev[:, 0].reshape(m_dev.shape[0], 1)
                    m_dev_features = m_dev[:, 1:]

                    m_test = pd.read_csv(path + 'm_pre_test_scores.csv')
                    m_test = m_test.values
                    m_test_label = m_test[:, 0].reshape(m_test.shape[0], 1)
                    m_test_features = m_test[:, 1:]

                    f_dev = pd.read_csv(path + 'f_pre_dev_scores.csv')
                    f_dev = f_dev.values
                    f_dev_label = f_dev[:, 0].reshape(f_dev.shape[0], 1)
                    f_dev_features = f_dev[:, 1:]

                    f_test = pd.read_csv(path + 'f_pre_test_scores.csv')
                    f_test = f_test.values
                    f_test_label = f_test[:, 0].reshape(f_test.shape[0], 1)
                    f_test_features = f_test[:, 1:]

                    self.data =  m_dev_features,m_dev_label,m_test_features,m_test_label,\
                                f_dev_features,f_dev_label,f_test_features,f_test_label
                else:
                    dev = pd.read_csv(path + 'pre_dev_scores.csv')
                    dev = dev.values
                    dev_label = dev[:, 0].reshape(dev.shape[0], 1)
                    dev_features = dev[:, 1:]

                    test = pd.read_csv(path + 'pre_test_scores.csv')
                    test = test.values
                    test_label = test[:, 0].reshape(test.shape[0], 1)
                    test_features = test[:, 1:]

                    self.data = dev_features, dev_label, test_features, test_label

#------------------finalmodel-----------------------------------------------
            else:
                print('not finished yet')
        elif self.feature_tables is not None:
            # Now you r using a multi-modality model!
            #audio特征暂时只计算一个
            #feature_tables 需要与数据库中的表名吻合  不然会报错
            self.data = get_data_multi_modality(self.feature_tables,
                                                self.gender)
            self.audio_fea, self.vedio_fea, self.text_fea = \
                        self.sql_handler.get_cloumns_from_table(self.feature_tables)
            self.audio_fea.remove('ID')
            self.vedio_fea.remove('ID')
            self.text_fea.remove('ID')
            self.feature_list = {
                'audio': self.audio_fea,
                'vedio': self.vedio_fea,
                'text': self.text_fea
            }
        else:
            print('You must choose a set of features to train!!!')

    def _train_eval(self, train, dev, test, model, feature):
        model = model(train, dev, test,
                      features=feature)  # rf 和rnn不一样 差了一个test参数 记得改
        model.train()
        return model.eval()

    def run(self):
        if self.model_name == MODEL_RF:
            from core.predictor.randomForest.rf_predict import RfPredictor
            if self.feature_name is not None:
                if not self.gender:
                    train, dev, test = self.data
                    #运行的时候 需要改一下_train_eval函数 加入一个test参数
                    score = self._train_eval(train, dev, test, RfPredictor,
                                             self.feature_list)
                    logger.info(
                        f'Evalutaion Scores {self.model_name} with {self.feature_name}: {score}'
                    )
                else:
                    train_m, dev_m, test_m, train_f, dev_f, test_f = self.data
                    score = self._train_eval(train_m, dev_m, test_m,
                                             RfPredictor, self.feature_list)
                    logger.info(
                        f'Evalutaion Scores Male {self.model_name} with {self.feature_name}: {score}'
                    )

                    score = self._train_eval(train_f, dev_f, test_f,
                                             RfPredictor, self.feature_list)
                    logger.info(
                        f'Evalutaion Scores Female {self.model_name} with {self.feature_name}: {score}'
                    )
            else:
                from core.predictor.randomForest.rf_predict import MultiModalRandomForest
                if not self.gender:
                    # multi_modality
                    mmrf = MultiModalRandomForest(self.data, self.feature_list)
                    score = mmrf.eval()
                    logger.info(
                        f'Evalutaion Scores {self.model_name} with {self.feature_tables}: {score}'
                    )
                else:
                    data_male = self.data['male']
                    mmrf = MultiModalRandomForest(data_male, self.feature_list)
                    score = mmrf.eval()
                    logger.info(
                        f'Evalutaion Scores Male {self.model_name} with {self.feature_tables}: {score}'
                    )

                    data_female = self.data['female']
                    mmrf = MultiModalRandomForest(data_female,
                                                  self.feature_list)
                    score = mmrf.eval()
                    logger.info(
                        f'Evalutaion Scores Female {self.model_name} with {self.feature_tables}: {score}'
                    )

        elif self.model_name == MODEL_RNN:
            if self.feature_name is not None:
                from core.predictor.rnn.RNN import RnnPredictor
                if self.gender:
                    m_train_X, m_train_Y, m_dev_X, m_dev_Y, m_test_X, m_test_Y, f_train_X, f_train_Y, f_dev_X, f_dev_Y, f_test_X, f_test_Y = self.data
                    #m_train,m_dev,m_test = (m_train_X,m_train_Y),(m_dev_X,m_dev_Y),(m_test_X,m_test_Y)
                    f_train, f_dev, f_test = (f_train_X,
                                              f_train_Y), (f_dev_X,
                                                           f_dev_Y), (f_test_X,
                                                                      f_test_Y)

                    #score = self._train_eval(m_train,m_dev,m_test,RnnPredictor,'m_'+self.feature_name)
                    #logger.info(f'Evalutaion Scores male {self.model_name} with {self.feature_name}: {score}')

                    score = self._train_eval(f_train, f_dev, f_test,
                                             RnnPredictor,
                                             'f_' + self.feature_name)
                    logger.info(
                        f'Evalutaion Scores female {self.model_name} with {self.feature_name}: {score}'
                    )

                else:
                    train_X, train_Y, dev_X, dev_Y, test_X, test_Y = self.data
                    train, dev, test = (train_X, train_Y), (dev_X,
                                                            dev_Y), (test_X,
                                                                     test_Y)
                    score = self._train_eval(train, dev, test, RnnPredictor,
                                             self.feature_name)
                    logger.info(
                        f'Evalutaion Scores {self.model_name} with {self.feature_name}: {score}'
                    )
        elif self.model_name == MODEL_LINEAR:
            from sklearn import linear_model
            from sklearn.metrics import mean_squared_error
            reg = linear_model.Ridge(alpha=10)
            if self.gender:
                pass
            else:
                dev_features, dev_label, test_features, test_label = self.data
                reg.fit(dev_features, dev_label)
                dev_pre = reg.predict(dev_features)
                test_pre = reg.predict(test_features)

                dev_rmse = np.sqrt(mean_squared_error(dev_label, dev_pre))
                test_rmse = np.sqrt(mean_squared_error(test_label, test_pre))
                logger.info(f"dev_rmse: {dev_rmse}; test_rmse: {test_rmse}")
        else:
            print('not finish yet!')
Example #14
0
 def to_db(self, data_frame, table):
     sql_handler = SqlHandler()
     sql_handler.df_to_db(data_frame, table)