Esempio n. 1
0
def gen_empty_ratings(folder):
    split = os.path.basename(folder).split('_')
    vp = split[-1]
    session_id = split[-2]
    wav_files = np.array(glob.glob(os.path.join(folder, '*.wav')))
    wav_files = wav_files[snodgrass_audio_chronological_sort_indices(
        wav_files)]
    orders = snodgrass_audio2order(wav_files)
    snodgrass_words = load_snodgrass_words()

    if len(wav_files) != 233:
        msg = 'Only {0} files in session folder {1} -- non-full sessions currently not supported for csv export'.format(
            len(wav_files), folder)
        print(msg)
        raise RuntimeError(msg)

    fake_ratings: List[SnodgrassWordRating] = []
    for wav_file, order in zip(wav_files, orders):
        word = snodgrass_words[order - 1]
        fake_ratings.append(
            SnodgrassWordRating(word, order, float('nan'), float('nan'),
                                float('nan'), '', '', vp, session_id,
                                wav_file))

    return fake_ratings
Esempio n. 2
0
def get_emu_word_counts(exclude_snodgrass=True,
                        cache_path='counts_per_db.pckl'):
    def emu2word_counts_except(db, words):
        db_path = os.path.join(raw_data_dir, db)
        seq_rds_path = os.path.join(raw_data_dir, '{0}.rds'.format(db))
        return emu2word_counts(db,
                               db_path,
                               seq_rds_path,
                               partial(exclude_words, words_to_exclude=words),
                               verbose=True)

    snodgrass_words = load_snodgrass_words() if exclude_snodgrass else None
    out_path = cache_path

    if not os.path.exists(out_path):
        emus = filter(
            lambda x: os.path.isdir(os.path.join(raw_data_dir, x)) and x.
            endswith('emuDB'), os.listdir(raw_data_dir))
        skip_dbs = ['BROTHERS_emuDB']

        counts_per_db = {}
        for emu in emus:
            if emu not in skip_dbs:
                counts = emu2word_counts_except(emu, snodgrass_words)
                counts_per_db[emu] = counts
        with open(out_path, 'wb') as f:
            pickle.dump(counts_per_db, f)
        return counts_per_db
    else:
        return load_pickled(out_path)
Esempio n. 3
0
def get_swc_word_counts(exclude_snodgrass=True):
    if not os.path.exists(aligned_words_file):
        collect_aligned_words(verbose=False)

    with open(aligned_words_file, 'rb') as f:
        words_dict = pickle.load(f)

    snodgrass_words = load_snodgrass_words() if exclude_snodgrass else None
    return filter_words_dict(words_dict,
                             word_filter=partial(
                                 exclude_words,
                                 words_to_exclude=snodgrass_words))
Esempio n. 4
0
def __main():
    if not os.path.exists(aligned_words_file):
        collect_aligned_words(verbose=True)

    with open(aligned_words_file, 'rb') as f:
        words_dict = pickle.load(f)

    snodgrass_words = load_snodgrass_words()

    snodgrass_words_dict = filter_words_dict(
        words_dict,
        word_filter=partial(select_words, words_to_keep=snodgrass_words))
    analyze_aligned_words(words_dict, snodgrass_words_dict)
    analyze_aligned_words(words_dict, filter_words_dict(words_dict))

    collect_swc_features('snodgrass_words_cleaned_v3', 'snodgrass_words',
                         snodgrass_words)
Esempio n. 5
0
                         seq_rds_path,
                         feature_func,
                         partial(select_words, words_to_keep=words),
                         verbose=True)

    emus = filter(
        lambda x: os.path.isdir(os.path.join(raw_data_dir, x)) and x.endswith(
            'emuDB'), os.listdir(raw_data_dir))
    skip_dbs = ['BROTHERS_emuDB']
    for emu in emus:
        if emu not in skip_dbs:
            _process_emu_db(emu, processed_sub_dir, collection_name, word_list)
    # _process_emu_db('PD1_emuDB')


if __name__ == '__main__':
    snodgrass_words = load_snodgrass_words()
    collect_emu_features('snodgrass_words_cleaned_v3', 'snodgrass_words',
                         snodgrass_words)
    # collect_emu_features('snodgrass_words_cleaned_v3_mfcc', 'snodgrass_words', snodgrass_words, feature_func=audio2mfcc)
    # collect_emu_features('snodgrass_words_cleaned_v3_reverse', 'snodgrass_words', snodgrass_words,
    #                      feature_func=audio2lmfe_reverse)
    # collect_emu_features('snodgrass_words_cleaned_v3_pncc_mn_off_dct_off', 'snodgrass_words', snodgrass_words,
    #                      feature_func=partial(audio2pncc, mean_norm=False, do_dct=False))

    # collect_emu_features('test_ahm', 'test_ahm', ['<ähm>'], feature_func=None, debug=True)

    # with open(os.path.join(res_dir, 'new_words.txt'), 'r') as f:
    #     new_words = [line.rstrip('\n') for line in f]
    # collect_emu_features('new_words', 'new_words', new_words)