def generate_npz(index, wav_files, dataset_path, preproc_data_path):
    mfccs, phns = get_mfccs_and_phones(wav_files[index])
    f_name = wav_files[index].replace(dataset_path, preproc_data_path).replace('WAV', 'npz')

    if os.path.isfile(f_name):
        return
    else:
        savez(f_name, mfccs=mfccs, phns=phns)
Ejemplo n.º 2
0
def generate_npz(wav_files, dataset_path, preproc_data_path):
    print("Extracting and saving features from wav files...")
    for i in tqdm(range(len(wav_files))):
        mfccs, phns = get_mfccs_and_phones(wav_files[i])
        f_name = wav_files[i].replace(dataset_path,
                                      preproc_data_path).replace('WAV', 'npz')
        if os.path.isdir(os.path.dirname(f_name)) == False:
            os.makedirs(os.path.dirname(f_name))
        savez(f_name, mfccs=mfccs, phns=phns)
Ejemplo n.º 3
0
def generate_npz(wav_files):
    n_total = len(wav_files)

    for i in range(n_total):
        if (i % (n_total // 10)) == 0:
            print((i / (n_total // 10)) * 10, "% is converted")

        mfccs, phns = get_mfccs_and_phones(wav_files[i])
        f_name = wav_files[i].replace('WAV', 'npz')
        savez(f_name, mfccs=mfccs, phns=phns)
Ejemplo n.º 4
0
from data_load import get_mfccs_and_phones

import numpy as np
from hparam import hparam as hp
if __name__ == '__main__':

    hp.set_hparam_yaml("TINIT2")
    mfccs, phns = get_mfccs_and_phones(
        "/home/cocoonmola/datasets/TIMIT2/TRAIN/DR3/FCMG0/SA1.WAV")
    print(mfccs.shape)
    print(phns.shape)
    np.save("./mfccs", mfccs)
Ejemplo n.º 5
0
train_phns = None
test_mfccs = None
test_phns = None

if __name__ == "__main__":
    dataset_path = "/home/john/datasets"
    preproc_data_path = "./"
    s = datetime.datetime.now()

    train_wav_files = glob.glob(os.path.join(dataset_path, TIMIT_TRAIN_WAV))
    test_wav_files = glob.glob(os.path.join(dataset_path, TIMIT_TEST_WAV))

    print('Starting pre-processing train dataset...')
    print("Extracting and saving features from wav files...")
    for index in tqdm(range(len(train_wav_files))):
        mfcc, phn = get_mfccs_and_phones(train_wav_files[index])
        if index == 0:
            train_mfccs = mfcc
            train_phns = phn
        else:
            train_mfccs = np.concatenate((train_mfccs, mfcc))
            train_phns = np.concatenate((train_phns, phn))


    print('Pre-processing of train dataset has finished!')

    print('Starting pre-processing test dataset...')
    print("Extracting and saving features from wav files...")
    for index in tqdm(range(len(test_wav_files))):
        mfcc, phn = get_mfccs_and_phones(test_wav_files[index])
        if index == 0: