예제 #1
0
def main():
    work_dir = '/home/jose/PycharmProjects/the_speech'
    audio_dir = 'C:/Users/Win10/Documents/'

    # Input files
    dir_wav_ubm ='../audio/wav-bea-diktafon'
    dir_anon_75 = '../audio/wav_anon_75_225'
    audio_list_ubm = util.read_files_from_dir(dir_wav_ubm)  # Reading BEA files
    audio_list_dementia_aug = util.read_files_from_dir(dir_anon_75)  # Reading augmented dementia files

    # Output files
    observation = '2del_aug-ubm'
    num_mfccs = 20
    #file_mfccs_dem = work_dir + '/data/hc/alzheimer/mfccs_dem_{}_{}'.format(num_mfccs, observation)
    #file_mfccs_dem_aug = work_dir + '/data/hc/alzheimer/mfccs_dem_{}_augv3_{}'.format(num_mfccs, observation)
    file_mfccs_ubm = work_dir + '/data/hc/alzheimer/mfccs_ubm_dem_{}_{}'.format(num_mfccs, observation)

    # ---Calculating and saving MFCCs---
    # for original audios
    #util.save_pickle(file_mfccs_dem, compute_mfccs_bkaldi(dir_anon_75, just_original_75()))
    # for UBM (BEA diktafon)
    mf0d = compute_mfccs_psf(dir_wav_ubm, audio_list_ubm)
    mf1d = psf_deltas_proc.compute_deltas(mf0d, 1)  # first derivative
    mf2d = psf_deltas_proc.compute_deltas(mf0d, 2)  # second derivative
    deltas = psf_deltas_proc.concatenate_list_of_deltas2(mf0d, mf1d, mf2d)
    util.save_pickle(file_mfccs_ubm, deltas)
예제 #2
0
def compute_fishers_pretr_ubm_2(list_mfcc_files, out_dir, list_files_ubm,
                                recipe, folder_name, feats_info):
    for file_ubm in list_files_ubm:
        # Loading File for UBM
        print("File for UBM:", os.path.basename(file_ubm))
        parent_dir_ubm = os.path.basename(
            os.path.dirname(os.path.dirname(list_files_ubm[0])))
        vars, means, weights, g = get_diag_gmm_params(
            file_diag=file_ubm,
            out_dir=out_dir + 'UBMs/' + parent_dir_ubm + '/GMM_fishers/')

        print("Fisher-vecs will be extracted using {} number of Gaussians!".
              format(g))
        for file_name in list_mfcc_files:  # This list should contain the mfcc FILES within folder_name
            list_feat = np.load(
                file_name, allow_pickle=True
            )  #  this list should contain all the mfccs per FILE
            list_fishers = []
            for feat in list_feat:  # iterating over the wavs (mfccs)
                # Extracting fishers from features
                fish = vlf.fisher.fisher(feat.transpose(),
                                         means.transpose(),
                                         vars.transpose(),
                                         weights,
                                         improved=True)
                list_fishers.append(fish)
            # Output file (fishers)
            # getting info about the number of frame-level feats and the deltas used (for naming the output files)
            # info_num_feats = regex.findall(os.path.basename(file_name))
            file_fishers = out_dir + recipe + '/' + folder_name + '/fisher/fisher-{}{}-{}del-{}g-{}.fisher'.format(
                feats_info[0], feats_info[2], feats_info[1], g, folder_name)
            util.save_pickle(file_fishers, list_fishers)  # save as pickle
예제 #3
0
def compute_fishers_pretr_ubm(list_mfcc_files, out_dir, file_ubm, recipe,
                              folder_name):
    # Loading File for UBM
    print("File for UBM:", file_ubm)
    vars, means, weights, g = get_diag_gmm_params(file_diag=file_ubm,
                                                  out_dir=out_dir)

    # print(list_feats[0])
    print("Fisher-vecs will be extracted using {} number of Gaussians!".format(
        g))
    for file_name in list_mfcc_files:  # This list should contain the mfcc FILES within folder_name
        list_feat = np.load(
            file_name, allow_pickle=True
        )  #  this list should contain all the mfccs per FILE
        # for g in list_n_clusters:
        list_fishers = []
        # means, covs, priors = do_gmm(array_mfccs_ubm[:2000], g)  # training GMM (here not neccesary since we already have it)
        for feat in list_feat:  # iterating over the wavs (mfccs)
            fish = vlf.fisher.fisher(feat.transpose(),
                                     means[:, :60].transpose(),
                                     vars[:, :60].transpose(),
                                     weights,
                                     improved=True)
            list_fishers.append(fish)  # Extracting fishers from features
        # Output file (fishers)
        info_num_feats = regex.findall(file_name)
        obs = '{}del'.format(int(
            info_num_feats[1]))  # getting number of deltas info
        file_fishers = out_dir + recipe + '/' + folder_name + '/fisher/fisher-{}-{}-{}g-{}.fisher'.format(
            int(info_num_feats[0]), obs, g, folder_name)
        util.save_pickle(file_fishers, list_fishers)  # save as pickle
        # np.savetxt(file_fishers, list_fishers, fmt='%.7f')  # save as txt
        print("{} fishers saved to:".format(len(list_fishers)), file_fishers,
              "with (1st ele.) shape:", list_fishers[0].shape, "\n")
예제 #4
0
def do_dimension_reduction():
    print("=======dimension reduction phase========")
    feature_dir = work_dir + '/data/{}/'.format(recipe)

    for delta in [0, 1, 2]:
        # info-purpose parameters from the frame-level extracted features #
        feats_info = [
            40, delta, 'mfcc'
        ]  # info of the features (n_features/dimension, deltas, cepstral_type=choose between mfcc or plp)
        obs = '_hires'  # observations of the features' config (if there is such) e.g. '_hires' (when the mfccs were extracted using 'hires' params)

        list_files_ubm = [
            work_dir + '/data/mask/train/{}_mask_{}_train_{}del{}.{}'.format(
                feats_info[2], feats_info[0], delta, obs, feats_info[2])
        ]
        pca = pca_trainer(list_files_ubm[0],
                          n_components=0.97)  # train PCA using training set

        for folder_name in list_sets:
            print("\nReading dir:", feature_dir + folder_name)
            list_mfcc_file = util.traverse_dir_2(
                feature_dir + folder_name,
                '*{}_{}_{}del.{}'.format(feats_info[0], folder_name,
                                         feats_info[1], feats_info[2]))
            for item in list_mfcc_file:  # transform each dataset
                reduced_data = pca_transformer(pca, item)
                util.save_pickle(
                    feature_dir + folder_name + '*{}_{}_{}del{}_pca.{}'.format(
                        feats_info[0], folder_name, feats_info[1], obs,
                        feats_info[2]), reduced_data)
예제 #5
0
def train_cnn_encoder(constants):
    """
    Features extraction takes too much time on CPU:

    * Takes 16 minutes on GPU.
    * 25x slower (InceptionV3) on CPU and takes 7 hours.
    * 10x slower (MobileNet) on CPU and takes 3 hours.

    :param constants:
    :return:
    """
    img_size = constants['img_size']
    # load pretrained model
    K.clear_session()
    encoder, preprocess_for_model = cnn_encoder_builder()

    # extract trained features
    img_embeds_train, img_filenames_train = apply_model(DATA_DIR + 'image_captioning/train2014.zip',
                                                        encoder,
                                                        preprocess_for_model,
                                                        input_shape=(img_size, img_size))
    save_pickle(img_embeds_train, DATA_DIR + 'image_captioning/train_img_embeds.pickle')
    save_pickle(img_filenames_train, DATA_DIR + 'image_captioning/train_img_fns.pickle')

    # extract validation features
    img_embeds_val, img_filenames_val = apply_model(DATA_DIR + 'image_captioning/val2014.zip',
                                                    encoder,
                                                    preprocess_for_model,
                                                    input_shape=(img_size, img_size))
    save_pickle(img_embeds_val, DATA_DIR + 'image_captioning/val_img_embed.pickle')
    save_pickle(img_filenames_val, DATA_DIR + 'image_captioning/val_img_fns.pickle')
예제 #6
0
def compute_flevel_feats(list_wavs,
                         out_dir,
                         obs,
                         num_feats,
                         num_deltas,
                         recipe,
                         folder_name,
                         cepstral_type="mfcc",
                         raw_energy=True,
                         num_mel_bins=23,
                         low_freq=20,
                         high_freq=0):
    print("Extracting {} for {} wavs in: {}".format(cepstral_type,
                                                    len(list_wavs),
                                                    folder_name))
    # Output details
    observation = '{}del{}'.format(num_deltas, obs)

    # parent_dir = os.path.basename(os.path.dirname(list_wavs[0]))
    if not os.path.isdir(out_dir + recipe):
        os.mkdir(out_dir + recipe)
    if not os.path.isdir(out_dir + recipe + '/' + folder_name):
        os.mkdir(out_dir + recipe + '/' + folder_name)

    # ---Calculating and saving MFCCs---
    list_mfccs = []
    for wav in list_wavs:
        mfcc = cepstral_bkaldi(wav,
                               num_feats,
                               num_deltas,
                               cepstral_type=cepstral_type,
                               raw_energy=raw_energy,
                               num_mel_bins=num_mel_bins,
                               low_freq=low_freq,
                               high_freq=high_freq)
        list_mfccs.append(mfcc)
    file_mfccs = out_dir + recipe + '/' + folder_name + '/flevel/{}_{}_{}_{}_{}.{}'.format(
        cepstral_type, recipe, num_feats, folder_name, observation,
        cepstral_type)
    print("Extracted {} {} from {} utterances".format(len(list_mfccs),
                                                      cepstral_type,
                                                      len(list_wavs)))
    util.save_pickle(file_mfccs, list_mfccs)
예제 #7
0
    file_fbanks = 'C:/Users/Win10/PycharmProjects/the_speech/data/fbanks/fbanks_dem_40'
    file_fbanks_bea = 'C:/Users/Win10/PycharmProjects/the_speech/data/fbanks/fbanks_ubm_dem_40'

    list_fbanks = util.read_pickle(file_fbanks)
    fbanks_bea = np.vstack(util.read_pickle(file_fbanks_bea))

    # Output files
    file_pca_fbanks = 'C:/Users/Win10/PycharmProjects/the_speech/data/fbanks/fbanks_dem_40_PCA'
    file_pca_bea = 'C:/Users/Win10/PycharmProjects/the_speech/data/fbanks/fbanks_ubm_dem_40_PCA_SP'

    # Scaling and selecting best number of components
    bea_scaled, list_dem_scaled = scale_min_max(fbanks_bea, list_fbanks)
    c = best_components(bea_scaled)
    # Reducing dimensions PCA
    fbanks_reduced, bea_reduced = run_pca(bea_scaled, list_dem_scaled, c)

    # Computing deltas
    # list_fbanks_deltas = compute_deltas(list_fbanks, 1)
    #bea_deltas = python_speech_features.base.delta(feat=fbanks_bea, N=1)
    # 2nd deltas
    #list_fbanks_deltas2 = compute_deltas(list_fbanks, 2)
    # bea_deltas2 = python_speech_features.base.delta(feat=fbanks_bea, N=2)
    # Concatenating deltas
    #fbanks_deltas_conc = concatenate_list_of_deltas(list_fbanks, list_fbanks_deltas)
    #bea_deltas_conc = np.concatenate((fbanks_bea, bea_deltas))
    print('deltas concatenated!')

    # Saving data
    # util.save_pickle(file_pca_fbanks, fbanks_reduced)
    util.save_pickle(file_pca_bea, bea_reduced)
예제 #8
0
    dir_anon_75 = audio_dir + '/audio/wav_anon_75_225/'

    audio_list_original_dem = util.just_original_75(
    )  # Reading Original dementia files
    audio_list_ubm = util.read_files_from_dir(dir_wav_ubm)  # Reading BEA files
    audio_list_augmented = util.read_files_from_dir(
        dir_anon_75)  # Reading augmented files

    # Output files
    observation = ''
    n_mels = '256'
    file_melspec_dem = work_dir + '/data/melspecs/melspec_dem_{}{}'.format(
        n_mels, observation)
    #file_melspec_ubm = work_dir + '/data/melspecs/melspec_ubm_dem_{}{}'.format(n_mels, observation)
    file_melspec_augmented = work_dir + '/data/melspecs/melspec_ubm_dem_{}_aug{}'.format(
        n_mels, observation)

    lista = load_audio_file(dir_anon_75, util.just_original_75(), input_length)
    util.save_pickle(work_dir + '/data/melnmffc_dem', lista)

    # ---Calculating and saving Mel-Specs---
    # for original audios
    #specs_dem = compute_mspect_librosa(dir_anon_75, util.just_original_75())
    #util.pickle_dump_big(specs_dem, file_melspec_dem)
    # for augmented audios
    #specs_augmented = compute_mspect_librosa(dir_anon_75, audio_list_augmented)
    #util.pickle_dump_big(specs_augmented, file_melspec_augmented)
    # for BEA-diktafon (UBM) audios
    #specs_ubm = compute_mspect_librosa(dir_wav_ubm, audio_list_ubm)
    #util.save_pickle(file_melspec_ubm, specs_ubm)