def __init__(self, model_name=None, feature_name=None, gender=False, feature_tables=None): """Train model Controller, dispatch the training tasks; Input: model_name: certain model depend on papers feature_name: support for a group of absolute features feature_tables: support for different feature table, which make it is possible for us to combine different modality features freely. But note that the train controller is not responsible for processing the feature table, it should be completed by a certain model. gender: if the model should consider the gender difference Output: Result and realted information will be printed by each estimator in logs' """ super().__init__() self.model_name = model_name self.feature_name = feature_name self.feature_tables = feature_tables self.gender = gender self.sql_handler = SqlHandler() self._set_feature()
def data_set(): df_train = pd.read_csv(config.data_dir + global_values.TRAIN_SET_NAME, header=0) df_dev = pd.read_csv(config.data_dir + global_values.DEL_SET_NAME, header=0) logger.debug(df_dev.head()) sql_handler = SqlHandler() sql_handler.execute(f'drop table {config.tbl_develop_set}') sql_handler.execute(f'drop table {config.tbl_training_set}') sql_handler.df_to_db(df_train, config.tbl_training_set) sql_handler.df_to_db(df_dev, config.tbl_develop_set)
def get_data_multi_modality(tables, gender=False): """gather data from different tables in every modality and generate train set and dev dev set of them. """ sql_handler = SqlHandler() audio_df, video_df, text_df = [], [], [] for tb in tables: if tb in AUDIO_TABLE: audio_df.append(sql_handler.get_df(tb)) #从数据库提取特征存入list elif tb in VIDEO_TABLE: video_df.append(sql_handler.get_df(tb)) elif tb in TEXT_TABLE: text_df.append(sql_handler.get_df(tb)) else: pass audio_merge_df = merge_dfs_by_id(audio_df) video_merge_df = merge_dfs_by_id(video_df) text_merge_df = merge_dfs_by_id(text_df) train = sql_handler.get_df(config.tbl_train_ros_set) dev = sql_handler.get_df(config.tbl_develop_set) if not gender: #不考虑性别 data_dct = { 'audio_train': merge_df_by_id(train, audio_merge_df), 'audio_dev': merge_df_by_id(dev, audio_merge_df), 'vedio_train': merge_df_by_id(train, video_merge_df), 'vedio_dev': merge_df_by_id(dev, video_merge_df), 'text_train': merge_df_by_id(train, text_merge_df), 'text_dev': merge_df_by_id(dev, text_merge_df) } else: #考虑性别 train_male = train[train['Gender'] == 1] train_female = train[train['Gender'] == 0] dev_male = dev[dev['Gender'] == 1] dev_female = dev[dev['Gender'] == 0] data_dct = { 'male': { 'audio_train': merge_df_by_id(train_male, audio_merge_df), 'audio_dev': merge_df_by_id(dev_male, audio_merge_df), 'vedio_train': merge_df_by_id(train_male, video_merge_df), 'vedio_dev': merge_df_by_id(dev_male, video_merge_df), 'text_train': merge_df_by_id(train_male, text_merge_df), 'text_dev': merge_df_by_id(dev_male, text_merge_df) }, 'female': { 'audio_train': merge_df_by_id(train_female, audio_merge_df), 'audio_dev': merge_df_by_id(dev_female, audio_merge_df), 'vedio_train': merge_df_by_id(train_female, video_merge_df), 'vedio_dev': merge_df_by_id(dev_female, video_merge_df), 'text_train': merge_df_by_id(train_female, text_merge_df), 'text_dev': merge_df_by_id(dev_female, text_merge_df) } } return data_dct
def gen_fea(): sql_handler = SqlHandler() audio_text_value = list() with ThreadPoolExecutor(max_workers=30) as executor: #并行启动任务 task = [executor.submit(gen_sigle_fea, fold) for fold in PREFIX] for future in as_completed(task): try: fea_item = future.result() #每一个文件下所有数据的特征 eg:300_P audio_text_value.append(fea_item) except: continue COVAREP_COLUMNS.remove('VUV') audio_fea = list() audio_fea.append('ID') COVAREP_COLUMNS.extend(FORMANT_COLUMNS) for a_fea, s_fea in itertools.product(COVAREP_COLUMNS, stats_fea.columns): #笛卡尔积 相当于嵌套for循环 audio_fea.append(a_fea + '_' + s_fea) audio_text_fea = audio_fea + TEXT_COLUMNS assert len(audio_text_value[0]) == len(audio_text_fea) audio_text_df = pd.DataFrame(audio_text_value, columns=audio_text_fea) sql_handler.execute(f'drop table if exists {config.tbl_exp1_fea};' ) #因为每次选择特征不一样,所以入库之前需要删除原来的表 sql_handler.df_to_db(audio_text_df, config.tbl_exp1_fea) logger.info('audio feature exp1 has been stored!')
def get_data_by_id(feature_table, gender=False): sql_handler = SqlHandler() feature = sql_handler.get_df(feature_table) feature['ID'] = feature['ID'].apply(pd.to_numeric) #转为数值 train = sql_handler.get_df(config.tbl_train_ros_set) dev = sql_handler.get_df(config.tbl_develop_set) test = sql_handler.get_df(config.tbl_test_set) if not gender: train_set = merge_df_by_id( train, feature) #合并特征csv和PHQcsv 调用了之前的函数 merge_df_by_id() dev_set = merge_df_by_id(dev, feature) #合并特征csv和PHQcsv test_set = merge_df_by_id(test, feature) return train_set, dev_set, test_set else: #考虑性别因素 train_male = train[train['Gender'] == 1] train_female = train[train['Gender'] == 0] dev_male = dev[dev['Gender'] == 1] dev_female = dev[dev['Gender'] == 0] test_male = test[test['Gender'] == 1] test_female = test[test['Gender'] == 0] train_male = merge_df_by_id(train_male, feature) train_female = merge_df_by_id(train_female, feature) dev_male = merge_df_by_id(dev_male, feature) dev_female = merge_df_by_id(dev_female, feature) test_male = merge_df_by_id(test_male, feature) test_female = merge_df_by_id(test_female, feature) return train_male, dev_male, test_male, train_female, dev_female, test_female
def get_data_by_id(feature_table, gender=False): sql_handler = SqlHandler() feature = sql_handler.get_df(feature_table) feature['ID'] = feature['ID'].apply(pd.to_numeric) train = sql_handler.get_df(config.tbl_training_set) dev = sql_handler.get_df(config.tbl_develop_set) if not gender: train_set = merge_df_by_id(train, feature) dev_set = merge_df_by_id(dev, feature) return train_set, dev_set else: train_male = train[train['Gender'] == 1] train_female = train[train['Gender'] == 0] dev_male = train[train['Gender'] == 1] dev_female = train[train['Gender'] == 0] train_male = merge_df_by_id(train_male, feature) train_female = merge_df_by_id(train_female, feature) dev_male = merge_df_by_id(dev_male, feature) dev_female = merge_df_by_id(dev_female, feature) return train_male, dev_male, train_female, dev_female
def hog_pca(): sql_handler = SqlHandler() pca = PCA(n_components=0.999) hog = pd.read_csv(config.data_dir+FACE_HOG) hog_pca_values = pca.fit_transform(hog) hog_pca_names = ['hog_pca_'+str(i) for i in range(184)] hog_pca = pd.DataFrame(hog_pca_values,columns = hog_pca_names) id = [float(id[:-1]) for id in PREFIX] col_name = hog_pca.columns.tolist() col_name.insert(0,'ID') hog_pca= hog_pca.reindex(columns = col_name,fill_value = 1) hog_pca['ID'] = id sql_handler.execute(f'drop table if exists {config.tbl_exp3_hog_fea};') #因为每次选择特征不一样,所以入库之前需要删除原来的表 sql_handler.df_to_db(hog_pca, config.tbl_exp3_hog_fea) logger.info('hog feature exp3 has been stored!')
def extract_audio(sample, prefix, opensmile_options, outputoption, feature_type): """Dispatch extraction tasks sample: phq-id like 310 prefix: phq file prefix like 310_ feature_type: mfcc or egemaps """ infilename = f"{config.sample_dir}/{prefix}P/{prefix}{SUFFIX['wav']}" outfilename = f'{sample}_{feature_type}.csv' opensmile_call = config.opensmile_exe + ' ' + opensmile_options + ' -inputfile ' + infilename + ' ' + outputoption + ' ' + outfilename + ' -instname ' + str( sample) + ' -output ?' os.system(opensmile_call) if os.path.exists(outfilename): df = pd.read_csv(outfilename, sep=';') else: return sample, feature_type db_handler = SqlHandler() if feature_type == 'mfcc': db_handler.df_to_db(df, config.tbl_mfcc, if_exists='append') elif feature_type == 'egemaps': db_handler.df_to_db(df, config.tbl_egemaps, if_exists='append') os.remove(outfilename) return sample, feature_type
class Train(Process): def __init__(self, model_name=None, feature_name=None, gender=False, feature_tables=None): """Train model Controller, dispatch the training tasks; Input: model_name: certain model depend on papers feature_name: support for a group of absolute features feature_tables: support for different feature table, which make it is possible for us to combine different modality features freely. But note that the train controller is not responsible for processing the feature table, it should be completed by a certain model. gender: if the model should consider the gender difference Output: Result and realted information will be printed by each estimator in logs' """ super().__init__() self.model_name = model_name self.feature_name = feature_name self.feature_tables = feature_tables self.gender = gender self.sql_handler = SqlHandler() self._set_feature() def _set_feature(self): if self.feature_name is not None: # you r using feature from a ceratin way! if self.feature_name == FEATURE_EXP_2: # if choose exp2 the data will be in pandas's dataframe by defaut self.data = get_data_by_id(config.tbl_exp2_audio_fea, self.gender) self.feature_list = self.sql_handler.get_cloumns_from_table( config.tbl_exp2_audio_fea) self.feature_list.remove('ID') else: print('not finished yet') elif self.feature_tables is not None: # Now you r using a multi-modality model! self.data = get_data_multi_modality(self.feature_tables, self.gender) self.audio_fea, self.video_fea, self.text_fea = \ self.sql_handler.get_cloumns_from_table(self.feature_tables) else: print('You must choose a set of features to train!!!') def _train_eval(self, train, dev, model): model = model(train, dev, features=self.feature_list) model.train() return model.eval() def run(self): if self.model_name == MODEL_RF: from core.predictor.RF.rf_predict import RfPredictor if self.feature_name is not None: if not self.gender: train, dev = self.data score = self._train_eval(train, dev, RfPredictor) logger.info( f'Evalutaion Scores {self.model_name} with {self.feature_name}: {score}' ) else: train_m, dev_m, train_f, dev_f = self.data score = self._train_eval(train_m, dev_m, RfPredictor) logger.info( f'Evalutaion Scores Male {self.model_name} with {self.feature_name}: {score}' ) score = self._train_eval(train_f, dev_f, RfPredictor) logger.info( f'Evalutaion Scores Female {self.model_name} with {self.feature_name}: {score}' ) else: if not self.gender: # multi_modality from core.predictor.RF.rf_predict import MultiModalRandomForest mmrf = MultiModalRandomForest(self.data) else: print('not finish yet!')
def createCodebook(feature_name): """ COVAREP, FORMANT, FAUs ,***p.CSV->stack dataframe -> create codebook ->extract all files' features java -jar openXBOW.jar -i examples/example2/llds.csPv -o bow.csv -a 1 -log -c kmeans++ -size 100 -B codebook -writeName -writeTimeStamp """ sqlhandler = SqlHandler() train = sqlhandler.get_df(config.tbl_training_set) dev = sqlhandler.get_df(config.tbl_develop_set) trainID = train['Participant_ID'].values devID = dev['Participant_ID'].values trainDev = np.hstack([trainID, devID]) folds = np.random.choice(trainDev, 20, replace=False) # for video 50, for audio 20 window_size = 4 hop_size = 1 openxbow = 'java -jar E:/openXBOW/openXBOW.jar ' openxbow_options = '-writeName -writeTimeStamp -t ' + str( window_size) + ' ' + str(hop_size) codebook_out = 'E:/openXBOW/codebooks/' openxbow_options_codebook = f'-size 100 -a 1 -log -B {codebook_out}{feature_name}_codebook ' if feature_name == 'faus': for fold in folds: path = config.data_dir + str(fold) + '_P/' + str( fold) + '_' + SUFFIX['au'] feature = np.loadtxt(path, delimiter=',', skiprows=1) success = feature[:, 3] == 1 feature = feature[success, 1:18] feature = np.delete(feature, [1, 2], axis=1) save_features(codebook_out + 'fausTrainDevRandom.csv', feature, append=True, instname=str(fold)) os.system(openxbow+ f'-standardizeInput -i {codebook_out}fausTrainDevRandom.csv '+openxbow_options_codebook+\ openxbow_options+ ' -c kmeans++'+ f' -o {codebook_out}temp.csv') elif feature_name == 'gaze_pose': for fold in folds: path1 = config.data_dir + str(fold) + '_P/' + str( fold) + '_' + SUFFIX['gaze'] path2 = config.data_dir + str(fold) + '_P/' + str( fold) + '_' + SUFFIX['pose'] gaze_data = pd.read_csv(path1) pose_data = pd.read_csv(path2) if fold in [367, 396, 432]: temp = np.all(pose_data.values != ' -1.#IND', axis=1) #['367_', '396_', '432_'] 缺失 存在异常值 data = pd.merge( gaze_data, pose_data) #key = frame timestamps confidence success data = data[temp] data.iloc[:, -6:] = data.iloc[:, -6:].applymap( lambda x: float(x[1:])) else: data = pd.merge(gaze_data, pose_data) success = data[' success'] == 1 data = data.values[:, 1:] data = np.delete(data, [1, 2], axis=1) data = data[success] save_features(codebook_out + 'gazePoseTrainDevRandom.csv', data, append=True, instname=str(fold)) os.system(openxbow+ f'-standardizeInput -i {codebook_out}gazeposeTrainDevRandom.csv '+openxbow_options_codebook+\ openxbow_options+ ' -c kmeans++'+ f' -o {codebook_out}temp.csv') elif feature_name == 'covarep': for fold in folds: path = config.data_dir + str(fold) + '_P/' + str( fold) + '_' + SUFFIX['covarep'] data = np.loadtxt(path, delimiter=',') timestamp = np.arange(0, data.shape[0]).reshape(data.shape[0], 1) timestamp = timestamp / 100 data = np.hstack([timestamp, data]) data = data[data[:, 2] == 1] data = np.delete(data, 2, axis=1) data[np.isnan(data)] = 0 data[np.isinf(data)] = 0 save_features(codebook_out + 'covarepTrainDevRandom.csv', data, append=True, instname=str(fold)) os.system(openxbow+ f'-standardizeInput -i {codebook_out}covarepTrainDevRandom.csv '+openxbow_options_codebook+\ openxbow_options+ ' -c kmeans++'+ f' -o {codebook_out}temp.csv') else: for fold in folds: path = config.data_dir + str(fold) + '_P/' + str( fold) + '_' + SUFFIX['formant'] data = np.loadtxt(path, delimiter=',') timestamp = np.arange(0, data.shape[0]).reshape(data.shape[0], 1) timestamp = timestamp / 100 data = np.hstack([timestamp, data]) data[np.isnan(data)] = 0 data[np.isinf(data)] = 0 save_features(codebook_out + 'formantTrainDevRandom.csv', data, append=True, instname=str(fold)) os.system(openxbow+ f'-standardizeInput -i {codebook_out}formantTrainDevRandom.csv '+openxbow_options_codebook+\ openxbow_options+ ' -c kmeans++'+ f' -o {codebook_out}temp.csv')
def gen_fea(): sql_handler = SqlHandler() audio_feas, text_feas, vedio_feas = gen_sigle_fea(PREFIX[0]) #读取hog特征 应该在模型训练的地方做 #分三个表来提取数据 with ThreadPoolExecutor(max_workers=30) as executor: #并行启动任务 task = [executor.submit(gen_sigle_fea, fold) for fold in PREFIX[1:]] for future in as_completed(task): try: audio_value, text_value, vedio_value = future.result( ) #每一个文件下所有数据的特征 eg:300_P audio_feas = np.concatenate((audio_feas, audio_value)) vedio_feas = np.concatenate((vedio_feas, vedio_value)) text_feas = np.concatenate((text_feas, text_value)) except: continue COVAREP_COLUMNS.remove('VUV') audio_fea_name = ['ID'] text_fea_name = ['ID'] vedio_fea_name = ['ID'] audio_fea_name.extend(COVAREP_COLUMNS + FORMANT_COLUMNS) text_fea_name.extend(TEXT_COLUMNS) vedio_fea_name.extend(STABLE_POINTS) assert len(audio_feas[0]) == len(audio_fea_name) and len(text_feas[0]) == len(text_fea_name) \ and len(vedio_feas[0]) == len(vedio_fea_name) audio_df = pd.DataFrame(audio_feas, columns=audio_fea_name) vedio_df = pd.DataFrame(vedio_feas, columns=vedio_fea_name) text_df = pd.DataFrame(text_feas, columns=text_fea_name) hog_pca() sql_handler.execute(f'drop table if exists {config.tbl_exp3_audio_fea};' ) #因为每次选择特征不一样,所以入库之前需要删除原来的表 sql_handler.df_to_db(audio_df, config.tbl_exp3_audio_fea) logger.info('audio feature exp3 has been stored!') sql_handler.execute(f'drop table if exists {config.tbl_exp3_vedio_fea};' ) #因为每次选择特征不一样,所以入库之前需要删除原来的表 sql_handler.df_to_db(vedio_df, config.tbl_exp3_vedio_fea) logger.info('vedio feature exp3 has been stored!') sql_handler.execute(f'drop table if exists {config.tbl_exp3_text_fea};' ) #因为每次选择特征不一样,所以入库之前需要删除原来的表 sql_handler.df_to_db(text_df, config.tbl_exp3_text_fea) logger.info('text feature exp3 has been stored!')
from torchvision import models from tensorflow import keras from global_values import * import config vgg19 = models.vgg.vgg19(pretrained=False) vgg19.load_state_dict(torch.load(config.pretrained_model_dir + 'vgg19.pth')) vggfc = VggFc7(vgg19) alexnet = models.alexnet(pretrained=False) alexnet.load_state_dict(torch.load(config.pretrained_model_dir + 'alexnet.pth')) alexnetfc = alexnetFc7(alexnet) logger = get_logger() sqlhandler = SqlHandler() #提取某个文件夹下的数据 eg:300_P def gen_sigle_fea(fold): # audio_fea = Audio_features() # video_fea = Video_features() # #text_fea = Text_features() path = f"{config.data_dir}/{fold}P/{fold}{SUFFIX['covarep']}" # covarep = audio_fea.covarep_fea(path) bow(path, feature_name='covarep') path = f'E:/database/COVAREP_BOW/{fold}covarep_bow.csv' covarep_bow = np.loadtxt(path, delimiter=';') covarep_bow = covarep_bow[:, 1:] os.system('rm ' + path)
class Train(Process): def __init__(self, model_name=None, feature_name=None, gender=False, feature_tables=None): """Train model Controller, dispatch the training tasks; Input: model_name: certain model depend on papers feature_name: support for a group of absolute features feature_tables: support for different feature table, which make it is possible for us to combine different modality features freely. But note that the train controller is not responsible for processing the feature table, it should be completed by a certain model. gender: if the model should consider the gender difference Output: Result and realted information will be printed by each estimator in logs' """ super().__init__() #调用父类PROCESS self.model_name = model_name self.feature_name = feature_name self.feature_tables = feature_tables self.gender = gender self.sql_handler = SqlHandler() self._set_feature() #? def _set_feature(self): if self.feature_name is not None: # you r using feature from a ceratin way! if self.feature_name == FEATURE_EXP_2: # if choose exp2 the data will be in pandas's dataframe by defaut self.data = get_data_by_id(config.tbl_exp2_audio_fea, self.gender) self.feature_list = self.sql_handler.get_cloumns_from_table( config.tbl_exp2_audio_fea) self.feature_list.remove('ID') elif self.feature_name == FEATURE_EXP_1: self.data = get_data_by_id(config.tbl_exp1_fea, self.gender) self.feature_list = self.sql_handler.get_cloumns_from_table( config.tbl_exp1_fea) self.feature_list.remove('ID') #---------------------baseline---------------------------------- elif self.feature_name == FEATURE_EXP_3_VEDIO: # self.data = get_data_by_id(config.tbl_exp3_vedio_fea, self.gender) self.feature_list = self.sql_handler.get_cloumns_from_table( config.tbl_exp3_vedio_fea) self.feature_list.remove('ID') elif self.feature_name == FEATURE_EXP_3_TEXT: # self.data = get_data_by_id(config.tbl_exp3_text_fea, self.gender) self.feature_list = self.sql_handler.get_cloumns_from_table( config.tbl_exp3_text_fea) self.feature_list.remove('ID') elif self.feature_name == FEATURE_EXP_3_AUDIO: # self.data = get_data_by_id(config.tbl_exp3_audio_fea, self.gender) self.feature_list = self.sql_handler.get_cloumns_from_table( config.tbl_exp3_audio_fea) self.feature_list.remove('ID') elif self.feature_name == FEATURE_EXP_3_HOGPCA: # self.data = get_data_by_id(config.tbl_exp3_hog_fea, self.gender) self.feature_list = self.sql_handler.get_cloumns_from_table( config.tbl_exp3_hog_fea) self.feature_list.remove('ID') #-----------------baseline--------------------------------------------- #-----------------finalmodel------------------------------------ elif self.feature_name == FEATURE_FINAL_COVAREP: path = 'E:/rnn_models/data/covarep/' self.data = get_npdata_by_id(path, self.gender) elif self.feature_name == FEATURE_FINAL_FORMANT: path = 'E:/rnn_models/data/formant/' self.data = get_npdata_by_id(path, self.gender) elif self.feature_name == FEATURE_FINAL_FAUs: path = 'E:/rnn_models/data/faus/' self.data = get_npdata_by_id(path, self.gender) elif self.feature_name == FEATURE_FINAL_GAZE_POSE: path = 'E:/rnn_models/data/gaze_pose/' self.data = get_npdata_by_id(path, self.gender) elif self.feature_name == FEATURE_FINAL_TEXT: self.data = get_data_by_id(config.tbl_exp3_text_fea, self.gender) self.feature_list = self.sql_handler.get_cloumns_from_table( config.tbl_exp3_text_fea) self.feature_list.remove('ID') elif self.feature_name == FEATURE_FINAL_VGG: path = 'E:/rnn_models/data/ds_vgg/' self.data = get_npdata_by_id(path, self.gender) elif self.feature_name == FEATURE_FINAL_ALEXNET: path = 'E:/rnn_models/data/ds_alexnet/' self.data = get_npdata_by_id(path, self.gender) elif self.feature_name == FEATURE_FINAL_GAZE_POSE_BOW: path = 'E:/rnn_models/data/gaze_pose_bow/' self.data = get_npdata_by_id(path, self.gender) elif self.feature_name == FEATURE_FINAL_FAUs_BOW: path = 'E:/rnn_models/data/faus_bow/' self.data = get_npdata_by_id(path, self.gender) elif self.feature_name == FEATURE_FINAL_COVAREP_BOW: path = 'E:/rnn_models/data/covarep_bow/' self.data = get_npdata_by_id(path, self.gender) elif self.feature_name == FEATURE_FINAL_FORMANT_BOW: path = 'E:/rnn_models/data/formant_bow/' self.data = get_npdata_by_id(path, self.gender) elif self.feature_name == FEATURE_FINAL_FUSION: path = 'E:/data/' if self.gender: m_dev = pd.read_csv(path + 'm_pre_dev_scores.csv') m_dev = m_dev.values m_dev_label = m_dev[:, 0].reshape(m_dev.shape[0], 1) m_dev_features = m_dev[:, 1:] m_test = pd.read_csv(path + 'm_pre_test_scores.csv') m_test = m_test.values m_test_label = m_test[:, 0].reshape(m_test.shape[0], 1) m_test_features = m_test[:, 1:] f_dev = pd.read_csv(path + 'f_pre_dev_scores.csv') f_dev = f_dev.values f_dev_label = f_dev[:, 0].reshape(f_dev.shape[0], 1) f_dev_features = f_dev[:, 1:] f_test = pd.read_csv(path + 'f_pre_test_scores.csv') f_test = f_test.values f_test_label = f_test[:, 0].reshape(f_test.shape[0], 1) f_test_features = f_test[:, 1:] self.data = m_dev_features,m_dev_label,m_test_features,m_test_label,\ f_dev_features,f_dev_label,f_test_features,f_test_label else: dev = pd.read_csv(path + 'pre_dev_scores.csv') dev = dev.values dev_label = dev[:, 0].reshape(dev.shape[0], 1) dev_features = dev[:, 1:] test = pd.read_csv(path + 'pre_test_scores.csv') test = test.values test_label = test[:, 0].reshape(test.shape[0], 1) test_features = test[:, 1:] self.data = dev_features, dev_label, test_features, test_label #------------------finalmodel----------------------------------------------- else: print('not finished yet') elif self.feature_tables is not None: # Now you r using a multi-modality model! #audio特征暂时只计算一个 #feature_tables 需要与数据库中的表名吻合 不然会报错 self.data = get_data_multi_modality(self.feature_tables, self.gender) self.audio_fea, self.vedio_fea, self.text_fea = \ self.sql_handler.get_cloumns_from_table(self.feature_tables) self.audio_fea.remove('ID') self.vedio_fea.remove('ID') self.text_fea.remove('ID') self.feature_list = { 'audio': self.audio_fea, 'vedio': self.vedio_fea, 'text': self.text_fea } else: print('You must choose a set of features to train!!!') def _train_eval(self, train, dev, test, model, feature): model = model(train, dev, test, features=feature) # rf 和rnn不一样 差了一个test参数 记得改 model.train() return model.eval() def run(self): if self.model_name == MODEL_RF: from core.predictor.randomForest.rf_predict import RfPredictor if self.feature_name is not None: if not self.gender: train, dev, test = self.data #运行的时候 需要改一下_train_eval函数 加入一个test参数 score = self._train_eval(train, dev, test, RfPredictor, self.feature_list) logger.info( f'Evalutaion Scores {self.model_name} with {self.feature_name}: {score}' ) else: train_m, dev_m, test_m, train_f, dev_f, test_f = self.data score = self._train_eval(train_m, dev_m, test_m, RfPredictor, self.feature_list) logger.info( f'Evalutaion Scores Male {self.model_name} with {self.feature_name}: {score}' ) score = self._train_eval(train_f, dev_f, test_f, RfPredictor, self.feature_list) logger.info( f'Evalutaion Scores Female {self.model_name} with {self.feature_name}: {score}' ) else: from core.predictor.randomForest.rf_predict import MultiModalRandomForest if not self.gender: # multi_modality mmrf = MultiModalRandomForest(self.data, self.feature_list) score = mmrf.eval() logger.info( f'Evalutaion Scores {self.model_name} with {self.feature_tables}: {score}' ) else: data_male = self.data['male'] mmrf = MultiModalRandomForest(data_male, self.feature_list) score = mmrf.eval() logger.info( f'Evalutaion Scores Male {self.model_name} with {self.feature_tables}: {score}' ) data_female = self.data['female'] mmrf = MultiModalRandomForest(data_female, self.feature_list) score = mmrf.eval() logger.info( f'Evalutaion Scores Female {self.model_name} with {self.feature_tables}: {score}' ) elif self.model_name == MODEL_RNN: if self.feature_name is not None: from core.predictor.rnn.RNN import RnnPredictor if self.gender: m_train_X, m_train_Y, m_dev_X, m_dev_Y, m_test_X, m_test_Y, f_train_X, f_train_Y, f_dev_X, f_dev_Y, f_test_X, f_test_Y = self.data #m_train,m_dev,m_test = (m_train_X,m_train_Y),(m_dev_X,m_dev_Y),(m_test_X,m_test_Y) f_train, f_dev, f_test = (f_train_X, f_train_Y), (f_dev_X, f_dev_Y), (f_test_X, f_test_Y) #score = self._train_eval(m_train,m_dev,m_test,RnnPredictor,'m_'+self.feature_name) #logger.info(f'Evalutaion Scores male {self.model_name} with {self.feature_name}: {score}') score = self._train_eval(f_train, f_dev, f_test, RnnPredictor, 'f_' + self.feature_name) logger.info( f'Evalutaion Scores female {self.model_name} with {self.feature_name}: {score}' ) else: train_X, train_Y, dev_X, dev_Y, test_X, test_Y = self.data train, dev, test = (train_X, train_Y), (dev_X, dev_Y), (test_X, test_Y) score = self._train_eval(train, dev, test, RnnPredictor, self.feature_name) logger.info( f'Evalutaion Scores {self.model_name} with {self.feature_name}: {score}' ) elif self.model_name == MODEL_LINEAR: from sklearn import linear_model from sklearn.metrics import mean_squared_error reg = linear_model.Ridge(alpha=10) if self.gender: pass else: dev_features, dev_label, test_features, test_label = self.data reg.fit(dev_features, dev_label) dev_pre = reg.predict(dev_features) test_pre = reg.predict(test_features) dev_rmse = np.sqrt(mean_squared_error(dev_label, dev_pre)) test_rmse = np.sqrt(mean_squared_error(test_label, test_pre)) logger.info(f"dev_rmse: {dev_rmse}; test_rmse: {test_rmse}") else: print('not finish yet!')
def to_db(self, data_frame, table): sql_handler = SqlHandler() sql_handler.df_to_db(data_frame, table)