def process_data(wav_files, phn_files): max_step_size = 0 inputs = [] targets = [] for i in tqdm(range(len(wav_files))): # extract mfcc features from wav (rate, sig) = wav.read(wav_files[i]) mfcc_feat = mfcc(sig, rate) fbank_feat = logfbank(sig, rate) acoustic_features = join_features( mfcc_feat, fbank_feat) # time_stamp x n_features # extract label from phn phn_labels = [] with open(phn_files[i], 'rb') as csvfile: phn_reader = csv.reader(csvfile, delimiter=' ') for row in phn_reader: if row[2] == 'q': continue phn_labels.append( phoneme_set_39[phoneme_48_39.get(row[2], row[2])] - 1) inputs.append(acoustic_features) targets.append(phn_labels) return lists_batches(inputs, targets)
def process_raw_phn(phn_file): phn_labels = [] with open(phn_file, 'rb') as csvfile: phn_reader = csv.reader(csvfile, delimiter=' ') for row in phn_reader: if row[2] == 'q': continue phn_labels.append(phoneme_48_39.get(row[2], row[2])) return phn_labels