Esempio n. 1
0
def gen_fea():
    sql_handler = SqlHandler()

    audio_text_value = list()
    with ThreadPoolExecutor(max_workers=30) as executor:  #并行启动任务
        task = [executor.submit(gen_sigle_fea, fold) for fold in PREFIX]
        for future in as_completed(task):
            try:
                fea_item = future.result()  #每一个文件下所有数据的特征 eg:300_P
                audio_text_value.append(fea_item)
            except:
                continue

    COVAREP_COLUMNS.remove('VUV')
    audio_fea = list()
    audio_fea.append('ID')
    COVAREP_COLUMNS.extend(FORMANT_COLUMNS)
    for a_fea, s_fea in itertools.product(COVAREP_COLUMNS,
                                          stats_fea.columns):  #笛卡尔积 相当于嵌套for循环
        audio_fea.append(a_fea + '_' + s_fea)
    audio_text_fea = audio_fea + TEXT_COLUMNS

    assert len(audio_text_value[0]) == len(audio_text_fea)

    audio_text_df = pd.DataFrame(audio_text_value, columns=audio_text_fea)

    sql_handler.execute(f'drop table if exists {config.tbl_exp1_fea};'
                        )  #因为每次选择特征不一样,所以入库之前需要删除原来的表
    sql_handler.df_to_db(audio_text_df, config.tbl_exp1_fea)
    logger.info('audio feature exp1 has been stored!')
Esempio n. 2
0
def data_set():
    df_train = pd.read_csv(config.data_dir + global_values.TRAIN_SET_NAME,
                           header=0)
    df_dev = pd.read_csv(config.data_dir + global_values.DEL_SET_NAME,
                         header=0)

    logger.debug(df_dev.head())
    sql_handler = SqlHandler()
    sql_handler.execute(f'drop table {config.tbl_develop_set}')
    sql_handler.execute(f'drop table {config.tbl_training_set}')

    sql_handler.df_to_db(df_train, config.tbl_training_set)
    sql_handler.df_to_db(df_dev, config.tbl_develop_set)
Esempio n. 3
0
def hog_pca():
    sql_handler = SqlHandler()
    pca = PCA(n_components=0.999)
    hog = pd.read_csv(config.data_dir+FACE_HOG)
    hog_pca_values = pca.fit_transform(hog)
    hog_pca_names = ['hog_pca_'+str(i) for i in range(184)]
    hog_pca = pd.DataFrame(hog_pca_values,columns = hog_pca_names)
    id = [float(id[:-1]) for id in PREFIX]
    col_name = hog_pca.columns.tolist()
    col_name.insert(0,'ID')
    hog_pca= hog_pca.reindex(columns = col_name,fill_value = 1)
    hog_pca['ID'] = id

    sql_handler.execute(f'drop table if exists {config.tbl_exp3_hog_fea};') #因为每次选择特征不一样,所以入库之前需要删除原来的表
    sql_handler.df_to_db(hog_pca, config.tbl_exp3_hog_fea)
    logger.info('hog feature exp3 has been stored!')
Esempio n. 4
0
def extract_audio(sample, prefix, opensmile_options, outputoption,
                  feature_type):
    """Dispatch extraction tasks
    sample: phq-id like 310
    prefix: phq file prefix like 310_
    feature_type: mfcc or egemaps
    """
    infilename = f"{config.sample_dir}/{prefix}P/{prefix}{SUFFIX['wav']}"
    outfilename = f'{sample}_{feature_type}.csv'
    opensmile_call = config.opensmile_exe + ' ' + opensmile_options + ' -inputfile ' + infilename + ' ' + outputoption + ' ' + outfilename + ' -instname ' + str(
        sample) + ' -output ?'
    os.system(opensmile_call)
    if os.path.exists(outfilename): df = pd.read_csv(outfilename, sep=';')
    else:
        return sample, feature_type
    db_handler = SqlHandler()
    if feature_type == 'mfcc':
        db_handler.df_to_db(df, config.tbl_mfcc, if_exists='append')
    elif feature_type == 'egemaps':
        db_handler.df_to_db(df, config.tbl_egemaps, if_exists='append')
    os.remove(outfilename)
    return sample, feature_type
Esempio n. 5
0
def gen_fea():
    sql_handler = SqlHandler()
    audio_feas, text_feas, vedio_feas = gen_sigle_fea(PREFIX[0])
    #读取hog特征 应该在模型训练的地方做
    #分三个表来提取数据

    with ThreadPoolExecutor(max_workers=30) as executor:  #并行启动任务
        task = [executor.submit(gen_sigle_fea, fold) for fold in PREFIX[1:]]
        for future in as_completed(task):
            try:
                audio_value, text_value, vedio_value = future.result(
                )  #每一个文件下所有数据的特征 eg:300_P
                audio_feas = np.concatenate((audio_feas, audio_value))
                vedio_feas = np.concatenate((vedio_feas, vedio_value))
                text_feas = np.concatenate((text_feas, text_value))
            except:
                continue

    COVAREP_COLUMNS.remove('VUV')
    audio_fea_name = ['ID']
    text_fea_name = ['ID']
    vedio_fea_name = ['ID']

    audio_fea_name.extend(COVAREP_COLUMNS + FORMANT_COLUMNS)
    text_fea_name.extend(TEXT_COLUMNS)
    vedio_fea_name.extend(STABLE_POINTS)

    assert len(audio_feas[0]) == len(audio_fea_name) and len(text_feas[0]) == len(text_fea_name) \
        and len(vedio_feas[0]) == len(vedio_fea_name)
    audio_df = pd.DataFrame(audio_feas, columns=audio_fea_name)
    vedio_df = pd.DataFrame(vedio_feas, columns=vedio_fea_name)
    text_df = pd.DataFrame(text_feas, columns=text_fea_name)

    hog_pca()

    sql_handler.execute(f'drop table if exists {config.tbl_exp3_audio_fea};'
                        )  #因为每次选择特征不一样,所以入库之前需要删除原来的表
    sql_handler.df_to_db(audio_df, config.tbl_exp3_audio_fea)
    logger.info('audio feature exp3 has been stored!')

    sql_handler.execute(f'drop table if exists {config.tbl_exp3_vedio_fea};'
                        )  #因为每次选择特征不一样,所以入库之前需要删除原来的表
    sql_handler.df_to_db(vedio_df, config.tbl_exp3_vedio_fea)
    logger.info('vedio feature exp3 has been stored!')

    sql_handler.execute(f'drop table if exists {config.tbl_exp3_text_fea};'
                        )  #因为每次选择特征不一样,所以入库之前需要删除原来的表
    sql_handler.df_to_db(text_df, config.tbl_exp3_text_fea)
    logger.info('text feature exp3 has been stored!')
Esempio n. 6
0
 def to_db(self, data_frame, table):
     sql_handler = SqlHandler()
     sql_handler.df_to_db(data_frame, table)