Example #1
0
def do_fishers_pretrained_ubm():
    feature_dir = work_dir + 'data/{}/'.format(recipe)
    out_dir = work_dir + 'data/'  # Where the computed features will be saved
    ubm_dir = work_dir + 'data/UBMs/' + ubm_folder_name + '/ivec_models/'  # where the diagonal ubms live
    # list_ubm_files = util.traverse_dir(ubm_dir, '.dubm')  #  reading all the files with .mdl or .dubm as format (latter is more reliable)

    list_sets = ['dementia_new8k']
    for g in list_n_clusters:
        for deltas in [0, 1, 2]:
            # info-purpose parameters from the frame-level extracted features #
            feats_info = [
                20, deltas, 'mfcc'
            ]  # info of the features (n_features/dimension, deltas, cepstral_type=choose between mfcc or plp)
            for folder_name in list_sets:  # iterating over the list of sets where the features live
                print("\nReading dir:", feature_dir + folder_name)
                list_ubm_files = util.traverse_dir_2(
                    ubm_dir,
                    '*_{}g_{}{}-{}del_{}.dubm'.format(g, feats_info[0],
                                                      feats_info[2],
                                                      feats_info[1],
                                                      ubm_folder_name))
                list_mfcc_files = util.traverse_dir_2(
                    feature_dir + folder_name,
                    '*{}_{}_{}del.{}'.format(feats_info[0], folder_name,
                                             feats_info[1], feats_info[2]))
                extract_fishers.compute_fishers_pretr_ubm_2(
                    list_mfcc_files=list_mfcc_files,
                    out_dir=out_dir,
                    feats_info=feats_info,
                    list_files_ubm=list_ubm_files,
                    recipe=recipe,
                    folder_name=folder_name)
Example #2
0
def do_ivecs():
    print("=======i-vector extraction phase========")
    mfccs_dir = work_dir + 'data/{}/'.format(recipe)
    ubm_dir = work_dir + 'data/UBMs/{}'.format(ubm_folder_name)

    for deltas in [2]:
        feats_info = [20, deltas,
                      'mfcc']  # info of the mfccs (n_features, deltas)
        # the following is a list in the case of the UBM is meant to be trained with training and dev sets
        list_files_ubm = [
            ubm_dir + '/mfcc_{}_{}_{}del.mfcc'.format(feats_info[0],
                                                      ubm_folder_name, deltas)
        ]
        for folder_name in list_sets:
            print("\nReading dir:", mfccs_dir + folder_name)
            list_mfcc_files = util.traverse_dir_2(
                mfccs_dir + folder_name,
                '*{}_{}_{}del.{}'.format(feats_info[0], folder_name,
                                         feats_info[1], feats_info[2]))
            extract_ivecs.compute_ivecs(list_n_gauss=list_n_clusters,
                                        list_mfcc_files=list_mfcc_files,
                                        out_dir=out_dir,
                                        list_files_ubm=list_files_ubm,
                                        recipe=recipe,
                                        mfcc_info=feats_info,
                                        folder_name=folder_name)
Example #3
0
def do_fishers():
    print("=======fisher-vector extraction phase========")
    feature_dir = work_dir + '/data/{}/'.format(recipe)

    for deltas in [0, 1, 2]:
        # info-purpose parameters from the frame-level extracted features #
        feats_info = [
            20, deltas, 'mfcc'
        ]  # info of the features (n_features/dimension, deltas, cepstral_type=choose between mfcc or plp)
        obs = ''
        # Format is: "featureType_recipeName_nMFCCs_nDeltas.mfcc"
        list_files_ubm = [
            work_dir +
            'data/demencia94ABC/wav16k_split_long/mfcc_demencia94ABC_20_wav16k_split_long_{}del.mfcc'
            .format(deltas)
        ]
        for folder_name in list_sets:
            list_mfcc_files = util.traverse_dir_2(
                feature_dir + folder_name,
                '*{}_{}_{}del.{}'.format(feats_info[0], folder_name,
                                         feats_info[1], feats_info[2]))
            print(list_mfcc_files)
            extract_fishers.compute_fishers(list_n_clusters,
                                            list_mfcc_files,
                                            out_dir,
                                            feats_info=feats_info,
                                            list_files_ubm=list_files_ubm,
                                            recipe=recipe,
                                            folder_name=folder_name)
Example #4
0
def do_dimension_reduction():
    print("=======dimension reduction phase========")
    feature_dir = work_dir + '/data/{}/'.format(recipe)

    for delta in [0, 1, 2]:
        # info-purpose parameters from the frame-level extracted features #
        feats_info = [
            40, delta, 'mfcc'
        ]  # info of the features (n_features/dimension, deltas, cepstral_type=choose between mfcc or plp)
        obs = '_hires'  # observations of the features' config (if there is such) e.g. '_hires' (when the mfccs were extracted using 'hires' params)

        list_files_ubm = [
            work_dir + '/data/mask/train/{}_mask_{}_train_{}del{}.{}'.format(
                feats_info[2], feats_info[0], delta, obs, feats_info[2])
        ]
        pca = pca_trainer(list_files_ubm[0],
                          n_components=0.97)  # train PCA using training set

        for folder_name in list_sets:
            print("\nReading dir:", feature_dir + folder_name)
            list_mfcc_file = util.traverse_dir_2(
                feature_dir + folder_name,
                '*{}_{}_{}del.{}'.format(feats_info[0], folder_name,
                                         feats_info[1], feats_info[2]))
            for item in list_mfcc_file:  # transform each dataset
                reduced_data = pca_transformer(pca, item)
                util.save_pickle(
                    feature_dir + folder_name + '*{}_{}_{}del{}_pca.{}'.format(
                        feats_info[0], folder_name, feats_info[1], obs,
                        feats_info[2]), reduced_data)
Example #5
0
def do_fishers():
    print("=======fisher-vector extraction phase========")
    feature_dir = work_dir + '/data/{}/'.format(recipe)

    for delta in [0]:
        # info-purpose parameters from the frame-level extracted features #
        feats_info = [
            23, delta, 'mfcc'
        ]  # info of the features (n_features/dimension, deltas, cepstral_type=choose between mfcc or plp)
        obs = ''  # observations of the features' config e.g. '_hires' (when the mfccs were extracted using 'hires' params)

        list_files_ubm = [
            work_dir + '/data/{}/train/{}_{}_{}_train_{}del.{}'.format(
                recipe, feats_info[2], recipe, feats_info[0], delta,
                feats_info[2])
        ]
        # Format is: "featureType_recipeName_nMFCCs_nDeltas.mfcc"

        for folder_name in list_sets:
            print("\nReading dir:", feature_dir + folder_name)
            list_mfcc_files = util.traverse_dir_2(
                feature_dir + folder_name,
                '*{}_{}_{}del.{}'.format(feats_info[0], folder_name,
                                         feats_info[1], feats_info[2]))
            print(list_mfcc_files)
            extract_fishers.compute_fishers(list_n_clusters,
                                            list_mfcc_files,
                                            out_dir,
                                            feats_info=feats_info,
                                            list_files_ubm=list_files_ubm,
                                            recipe=recipe,
                                            folder_name=folder_name)
Example #6
0
def do_ivecs():
    print("=======i-vector extraction phase========")
    mfccs_dir = work_dir + 'data/{}/'.format(recipe)

    for deltas in [2]:
        feats_info = [20, deltas,
                      'mfcc']  # info of the mfccs (n_features, deltas)
        list_files_ubm = [
            work_dir +
            'data/demencia94ABC/wav16k_split_long/mfcc_demencia94ABC_20_wav16k_split_long_{}del.mfcc'
            .format(deltas)
        ]
        for folder_name in list_sets:
            print("\nReading dir:", mfccs_dir + folder_name)
            list_mfcc_files = util.traverse_dir_2(
                mfccs_dir + folder_name,
                '*{}_{}_{}del.{}'.format(feats_info[0], folder_name,
                                         feats_info[1], feats_info[2]))
            extract_ivecs.compute_ivecs(list_n_gauss=list_n_clusters,
                                        list_mfcc_files=list_mfcc_files,
                                        out_dir=out_dir,
                                        list_files_ubm=list_files_ubm,
                                        recipe=recipe,
                                        mfcc_info=feats_info,
                                        folder_name=folder_name)
Example #7
0
def do_fishers_pretrained_ubm():
    mfccs_dir = work_dir + 'data/{}/'.format(recipe)
    out_dir = work_dir + 'data/'  # Where the computed features will live in
    ubm_dir = work_dir + 'data/' + recipe + '/UBMs/'  # where the diagonal ubms live
    list_ubm_files = util.traverse_dir(
        ubm_dir, '.mdl'
    )  #  reading all the files with .mdl or .dubm as format (latter is more reliable)
    mfcc_n_deltas = 2  # Number of deltas of the mfccs

    list_sets = ['monologue_erlangen', 'readtext_erlangen']

    for folder_name in list_sets:  # iterating over the list of sets where the features live
        print("\nReading dir:", mfccs_dir + folder_name)
        for ubm in list_ubm_files:  # iterating over the pretrained ubms
            list_mfcc_files = util.traverse_dir_2(
                mfccs_dir + folder_name, '*{}del.mfcc'.format(
                    mfcc_n_deltas))  # reading MFCCs to extracting fishers from
            extract_fishers.compute_fishers_pretr_ubm(
                list_mfcc_files=list_mfcc_files,
                out_dir=out_dir,
                file_ubm=ubm,
                recipe=recipe,
                folder_name=folder_name)
Example #8
0

feature_dir = work_dir + 'data/{}/'.format(recipe)
out_dir = work_dir + 'data/'  # Where the computed features will be saved
ubm_dir = work_dir + 'data/UBMs/' + ubm_folder_name +'/ivec_models/'  # where the diagonal ubms live
# list_ubm_files = util.traverse_dir(ubm_dir, '.dubm')  #  reading all the files with .mdl or .dubm as format (latter is more reliable)

for g in list_n_clusters:
    # info-purpose parameters from the frame-level extracted features #
    feats_info_ubm = [20, 0, 'mfcc']  # info of the features (n_features/dimension, deltas, cepstral_type=choose between mfcc or plp)
    feats_info_flevel = [20, 0, 'fbank']  # info of the features (n_features/dimension, deltas, cepstral_type=choose between mfcc or plp)
    for folder_name in list_sets:  # iterating over the list of sets where the features live
        # print("\nReading dir:", feature_dir + folder_name+'/fisher/')
        # here the FULL-diag ubm is used
        list_ubm_files = util.traverse_dir_2(ubm_dir, '*_{}g_{}{}-{}del_{}.fubm'.format(g, feats_info_ubm[0],
                                                                                        feats_info_ubm[2],
                                                                                        feats_info_ubm[1],
                                                                                        ubm_folder_name))
        list_mfcc_files = util.traverse_dir_2(feature_dir + folder_name+'/', '*{}_{}_{}del.{}'.format(
                                                                                                  feats_info_flevel[0],
                                                                                                  folder_name,
                                                                                                  feats_info_flevel[1],
                                                                                                  feats_info_flevel[2]
        ))
        extract_ivecs.extract_ivecs(list_mfcc_files=list_mfcc_files, g=g, list_fubms=list_ubm_files,
                                    mfcc_info=feats_info_ubm, recipe=recipe, out_dir=out_dir, folder_name=folder_name)





fubmfile='/media/jose/hk-data/PycharmProjects/the_speech/data/UBMs/wav16k_split_long/ivec_models/fubm_mdl_256g_20mfcc-0del_wav16k_split_long.fubm'