def split_train_validation_data(): traindata = np.load("traindata.npz", allow_pickle=True)['arr_0'] male_list = [] female_list = [] record_id = [] for i in range(len(traindata)): f_name = traindata[i]['filename'] gender, id_num, _, _ = path2info(f_name) if id_num not in record_id: record_id.append(id_num) if gender == 'man': male_list.append(id_num) else: female_list.append(id_num) print(len(female_list), "and ", len(male_list)) train_size = (int)(len(record_id) * 0.9) male_ratio = len(male_list) / train_size male_train_size = (int)(train_size * male_ratio) print(male_train_size, ' ', train_size) male_train_ids = np.random.choice(male_list, male_train_size, replace=False) female_train_ids = np.random.choice(female_list, train_size-male_train_size,replace=False) train_ids = np.concatenate([male_train_ids, female_train_ids]) train_data = [] vali_data = [] for i in range(len(traindata)): _, id_num, _, _ = path2info(traindata[i]['filename']) if id_num in train_ids: train_data.append(traindata[i]) else: vali_data.append(traindata[i]) print('num of male speaker for training:', len(male_train_ids)) print('num of female speaker for training:', len(female_train_ids)) print('num of total train data:', len(train_data)) print('num of total val data:', len(vali_data)) np.save('train_split_data.npy', train_data) np.save('vali_split_data.npy', vali_data)
def feature_extraction_and_force_alignment(filepath, nstates, phoneHMMs): """ handle one .wav file """ samples, samplingrate = loadAudio(filepath) wordTrans = list(path2info(filepath)[2]) phoneTrans = words2phones(wordTrans, prondict) stateTrans = [phone + '_' + str(stateid) for phone in phoneTrans for stateid in range(nstates[phone])] lmfcc_result = mfcc(samples) mspec_result = mspec(samples) targets = [] _, viterbi_path = forcedAlignment(lmfcc_result, phoneHMMs, phoneTrans) targets = [stateTrans[idx] for idx in viterbi_path.astype(np.int16)] return lmfcc_result, mspec_result, targets
# E.g. # of states of ah = 3; ah -> ['ah_0', 'ah_1', 'ah_2'] stateList = list() for ph in phoneHMMs.keys(): for i in range(nstates[ph]): stateList.append('%s_%d' % (ph, i)) # -------------------------------------------------------------- data = list() for root, dirs, files in walk(folder_to_extract): for f in tqdm(files): if not f.endswith('.wav'): continue # do our work filename = os.path.join(root, f) sample, srate = loadAudio(filename) mspec_x = mspec(sample, samplingrate=srate) lmfcc_x = mfcc(sample, samplingrate=srate) wordTrans = list(path2info(filename)[2]) phoneTrans = words2phones(wordTrans, prondict) targets = forcedAlignment(lmfcc_x, phoneHMMs, phoneTrans) # convert the targets from str to int idx_targets = [stateList.index(t) for t in targets] data.append({ 'filename': filename, 'lmfcc': lmfcc_x, 'mspec': mspec_x, 'targets': idx_targets }) kwargs = {data_type: data} np.savez(dump_file_name, **kwargs)
sumTot = 0.0 N = len(wordTest['transmat']) for i in range(N): sumTot += np.sum(wordTest['transmat'][i]) #print("les deux nombres suivants doivent etre egaux") print(sumTot) print(N) #print(wordTest['startprob']) #print(wordTest['transmat']) ########### filename = 'tidigits/disc_4.1.1/tidigits/train/man/nw/z43a.wav' samples, samplingrate = lab3_tools.loadAudio(filename) lmfcc = lab1.mfcc(samples) wordTrans = list(lab3_tools.path2info(filename)[2]) print(wordTrans) #should be ['z', '4', '3'] phoneTrans = words2phones(wordTrans, prondict, addShortPause=True) print(phoneTrans) #should be ['sil', 'z', 'iy', 'r', 'ow', 'f', 'ao', 'r', 'th', 'r', 'iy', 'sil'] stateTrans = [ phone + '_' + str(stateid) for phone in phoneTrans for stateid in range(nstates[phone]) ] print(stateTrans) wordHMMs = concatAnyHMM(phoneHMMs, phoneTrans)
import numpy as np import json from lab3_tools import path2info from tqdm import tqdm if __name__ == "__main__": # load the validation speaker list; from the result of the script: # work/get_val_spk_json.py with open('data/val_spk.json', 'r') as f: data = json.load(f) val_spk = data['valdation_spker'] # load the train data traindata = np.load('data/traindata.npz')['traindata'] train = list() validation = list() for d in tqdm(traindata): sound_fname = d['filename'] gender, speakerID, _, _ = path2info(sound_fname) spk_gen_id = '{}-{}'.format(gender, speakerID) if spk_gen_id in val_spk: validation.append(d) else: train.append(d) # =================================== print("The size of training set : ", len(train)) print("The size of validation set : ", len(validation)) kwargs = {'validation': validation, 'train': train} np.savez('data/train_val_data.npz', **kwargs)