예제 #1
0
def process_data(wav_files, phn_files):
    max_step_size = 0
    inputs = []
    targets = []
    for i in tqdm(range(len(wav_files))):
        # extract mfcc features from wav
        (rate, sig) = wav.read(wav_files[i])
        mfcc_feat = mfcc(sig, rate)
        fbank_feat = logfbank(sig, rate)
        acoustic_features = join_features(
            mfcc_feat, fbank_feat)  # time_stamp x n_features

        # extract label from phn
        phn_labels = []
        with open(phn_files[i], 'rb') as csvfile:
            phn_reader = csv.reader(csvfile, delimiter=' ')
            for row in phn_reader:
                if row[2] == 'q':
                    continue
                phn_labels.append(
                    phoneme_set_39[phoneme_48_39.get(row[2], row[2])] - 1)

        inputs.append(acoustic_features)
        targets.append(phn_labels)

    return lists_batches(inputs, targets)
예제 #2
0
def process_raw_phn(phn_file):
    phn_labels = []
    with open(phn_file, 'rb') as csvfile:
        phn_reader = csv.reader(csvfile, delimiter=' ')
        for row in phn_reader:
            if row[2] == 'q':
                continue
            phn_labels.append(phoneme_48_39.get(row[2], row[2]))
    return phn_labels