def gen_empty_ratings(folder): split = os.path.basename(folder).split('_') vp = split[-1] session_id = split[-2] wav_files = np.array(glob.glob(os.path.join(folder, '*.wav'))) wav_files = wav_files[snodgrass_audio_chronological_sort_indices( wav_files)] orders = snodgrass_audio2order(wav_files) snodgrass_words = load_snodgrass_words() if len(wav_files) != 233: msg = 'Only {0} files in session folder {1} -- non-full sessions currently not supported for csv export'.format( len(wav_files), folder) print(msg) raise RuntimeError(msg) fake_ratings: List[SnodgrassWordRating] = [] for wav_file, order in zip(wav_files, orders): word = snodgrass_words[order - 1] fake_ratings.append( SnodgrassWordRating(word, order, float('nan'), float('nan'), float('nan'), '', '', vp, session_id, wav_file)) return fake_ratings
def get_emu_word_counts(exclude_snodgrass=True, cache_path='counts_per_db.pckl'): def emu2word_counts_except(db, words): db_path = os.path.join(raw_data_dir, db) seq_rds_path = os.path.join(raw_data_dir, '{0}.rds'.format(db)) return emu2word_counts(db, db_path, seq_rds_path, partial(exclude_words, words_to_exclude=words), verbose=True) snodgrass_words = load_snodgrass_words() if exclude_snodgrass else None out_path = cache_path if not os.path.exists(out_path): emus = filter( lambda x: os.path.isdir(os.path.join(raw_data_dir, x)) and x. endswith('emuDB'), os.listdir(raw_data_dir)) skip_dbs = ['BROTHERS_emuDB'] counts_per_db = {} for emu in emus: if emu not in skip_dbs: counts = emu2word_counts_except(emu, snodgrass_words) counts_per_db[emu] = counts with open(out_path, 'wb') as f: pickle.dump(counts_per_db, f) return counts_per_db else: return load_pickled(out_path)
def get_swc_word_counts(exclude_snodgrass=True): if not os.path.exists(aligned_words_file): collect_aligned_words(verbose=False) with open(aligned_words_file, 'rb') as f: words_dict = pickle.load(f) snodgrass_words = load_snodgrass_words() if exclude_snodgrass else None return filter_words_dict(words_dict, word_filter=partial( exclude_words, words_to_exclude=snodgrass_words))
def __main(): if not os.path.exists(aligned_words_file): collect_aligned_words(verbose=True) with open(aligned_words_file, 'rb') as f: words_dict = pickle.load(f) snodgrass_words = load_snodgrass_words() snodgrass_words_dict = filter_words_dict( words_dict, word_filter=partial(select_words, words_to_keep=snodgrass_words)) analyze_aligned_words(words_dict, snodgrass_words_dict) analyze_aligned_words(words_dict, filter_words_dict(words_dict)) collect_swc_features('snodgrass_words_cleaned_v3', 'snodgrass_words', snodgrass_words)
seq_rds_path, feature_func, partial(select_words, words_to_keep=words), verbose=True) emus = filter( lambda x: os.path.isdir(os.path.join(raw_data_dir, x)) and x.endswith( 'emuDB'), os.listdir(raw_data_dir)) skip_dbs = ['BROTHERS_emuDB'] for emu in emus: if emu not in skip_dbs: _process_emu_db(emu, processed_sub_dir, collection_name, word_list) # _process_emu_db('PD1_emuDB') if __name__ == '__main__': snodgrass_words = load_snodgrass_words() collect_emu_features('snodgrass_words_cleaned_v3', 'snodgrass_words', snodgrass_words) # collect_emu_features('snodgrass_words_cleaned_v3_mfcc', 'snodgrass_words', snodgrass_words, feature_func=audio2mfcc) # collect_emu_features('snodgrass_words_cleaned_v3_reverse', 'snodgrass_words', snodgrass_words, # feature_func=audio2lmfe_reverse) # collect_emu_features('snodgrass_words_cleaned_v3_pncc_mn_off_dct_off', 'snodgrass_words', snodgrass_words, # feature_func=partial(audio2pncc, mean_norm=False, do_dct=False)) # collect_emu_features('test_ahm', 'test_ahm', ['<ähm>'], feature_func=None, debug=True) # with open(os.path.join(res_dir, 'new_words.txt'), 'r') as f: # new_words = [line.rstrip('\n') for line in f] # collect_emu_features('new_words', 'new_words', new_words)