コード例 #1
0
def split_train_validation_data():
   traindata = np.load("traindata.npz", allow_pickle=True)['arr_0']
   
   male_list = []
   female_list = []

   record_id = []
   for i in range(len(traindata)):
      f_name = traindata[i]['filename']
      gender, id_num, _, _ = path2info(f_name)
      if id_num not in record_id:
         record_id.append(id_num)
         if gender == 'man':
            male_list.append(id_num)
         else:
            female_list.append(id_num)
   print(len(female_list), "and ", len(male_list))
   train_size = (int)(len(record_id) * 0.9)
   male_ratio = len(male_list) / train_size
   male_train_size = (int)(train_size * male_ratio)
   print(male_train_size, ' ', train_size)
   male_train_ids = np.random.choice(male_list, male_train_size, replace=False)
   female_train_ids = np.random.choice(female_list, train_size-male_train_size,replace=False)
   train_ids = np.concatenate([male_train_ids, female_train_ids])
   
   train_data = []
   vali_data = []
   for i in range(len(traindata)):
      _, id_num, _, _ = path2info(traindata[i]['filename'])
      if id_num in train_ids:
         train_data.append(traindata[i])
      else:
         vali_data.append(traindata[i])
   print('num of male speaker for training:', len(male_train_ids))
   print('num of female speaker for training:', len(female_train_ids))
   print('num of total train data:', len(train_data))
   print('num of total val data:', len(vali_data))
   np.save('train_split_data.npy', train_data)
   np.save('vali_split_data.npy', vali_data)
コード例 #2
0
def feature_extraction_and_force_alignment(filepath, nstates, phoneHMMs):
   """
   handle one .wav file
   """
   samples, samplingrate = loadAudio(filepath)
   wordTrans = list(path2info(filepath)[2])
   phoneTrans = words2phones(wordTrans, prondict)
   stateTrans = [phone + '_' + str(stateid) for phone in phoneTrans
            for stateid in range(nstates[phone])]
   lmfcc_result = mfcc(samples)
   mspec_result = mspec(samples)
   targets = []

   _, viterbi_path = forcedAlignment(lmfcc_result, phoneHMMs, phoneTrans)
   targets = [stateTrans[idx] for idx in viterbi_path.astype(np.int16)] 
   
   return lmfcc_result, mspec_result, targets
コード例 #3
0
    # E.g. # of states of ah = 3; ah -> ['ah_0', 'ah_1', 'ah_2']
    stateList = list()
    for ph in phoneHMMs.keys():
        for i in range(nstates[ph]):
            stateList.append('%s_%d' % (ph, i))
    # --------------------------------------------------------------
    data = list()
    for root, dirs, files in walk(folder_to_extract):
        for f in tqdm(files):
            if not f.endswith('.wav'):
                continue
            # do our work
            filename = os.path.join(root, f)
            sample, srate = loadAudio(filename)
            mspec_x = mspec(sample, samplingrate=srate)
            lmfcc_x = mfcc(sample, samplingrate=srate)
            wordTrans = list(path2info(filename)[2])
            phoneTrans = words2phones(wordTrans, prondict)
            targets = forcedAlignment(lmfcc_x, phoneHMMs, phoneTrans)
            # convert the targets from str to int
            idx_targets = [stateList.index(t) for t in targets]
            data.append({
                'filename': filename,
                'lmfcc': lmfcc_x,
                'mspec': mspec_x,
                'targets': idx_targets
            })

    kwargs = {data_type: data}
    np.savez(dump_file_name, **kwargs)
コード例 #4
0
sumTot = 0.0
N = len(wordTest['transmat'])
for i in range(N):
    sumTot += np.sum(wordTest['transmat'][i])
#print("les deux nombres suivants doivent etre egaux")
print(sumTot)
print(N)

#print(wordTest['startprob'])
#print(wordTest['transmat'])

###########
filename = 'tidigits/disc_4.1.1/tidigits/train/man/nw/z43a.wav'
samples, samplingrate = lab3_tools.loadAudio(filename)
lmfcc = lab1.mfcc(samples)
wordTrans = list(lab3_tools.path2info(filename)[2])
print(wordTrans)
#should be ['z', '4', '3']

phoneTrans = words2phones(wordTrans, prondict, addShortPause=True)
print(phoneTrans)
#should be ['sil', 'z', 'iy', 'r', 'ow', 'f', 'ao', 'r', 'th', 'r', 'iy', 'sil']

stateTrans = [
    phone + '_' + str(stateid) for phone in phoneTrans
    for stateid in range(nstates[phone])
]
print(stateTrans)

wordHMMs = concatAnyHMM(phoneHMMs, phoneTrans)
コード例 #5
0
ファイル: split_data.py プロジェクト: chris4540/DT2119
import numpy as np
import json
from lab3_tools import path2info
from tqdm import tqdm

if __name__ == "__main__":
    # load the validation speaker list; from the result of the script:
    # work/get_val_spk_json.py
    with open('data/val_spk.json', 'r') as f:
        data = json.load(f)
        val_spk = data['valdation_spker']

    # load the train data
    traindata = np.load('data/traindata.npz')['traindata']

    train = list()
    validation = list()
    for d in tqdm(traindata):
        sound_fname = d['filename']
        gender, speakerID, _, _ = path2info(sound_fname)
        spk_gen_id = '{}-{}'.format(gender, speakerID)

        if spk_gen_id in val_spk:
            validation.append(d)
        else:
            train.append(d)
    # ===================================
    print("The size of training set : ", len(train))
    print("The size of validation set : ", len(validation))
    kwargs = {'validation': validation, 'train': train}
    np.savez('data/train_val_data.npz', **kwargs)