def convert(catalog, dat_file, n_quant, sample_rate=16000): """ Convert all input data and save a dat file """ import librosa if n_quant <= 2**8: snd_dtype = np.uint8 elif n_quant <= 2**15: snd_dtype = np.int16 else: snd_dtype = np.int32 n_mel_chan = None speaker_ids = set(id for id, __ in catalog) speaker_id_map = dict((v, k) for k, v in enumerate(speaker_ids)) snd_data = np.empty((0), dtype=snd_dtype) samples = [] for (voice_id, snd_path) in catalog: snd, _ = librosa.load(snd_path, sample_rate) snd_mu = util.mu_encode_np(snd, n_quant).astype(snd_dtype) wav_b = len(snd_data) wav_e = wav_b + len(snd_mu) snd_data.resize(wav_e) snd_data[wav_b:wav_e] = snd_mu samples.append( SpokenSample( voice_index=speaker_id_map[voice_id], wav_b=wav_b, wav_e=wav_e, # mel_b=mel_b, mel_e=mel_e, file_path=snd_path)) if len(samples) % 100 == 0: print('Converted {} files of {}.'.format(len(samples), len(catalog), file=stderr)) stderr.flush() with open(dat_file, 'wb') as dat_fh: state = { 'samples': samples, 'snd_dtype': snd_dtype, 'snd_data': snd_data } pickle.dump(state, dat_fh)
def convert(catalog, pfx, n_quant, sample_rate=16000, win_sz=400, hop_sz=160, n_mels=80, n_mfcc=13): mfcc_proc = mfcc.ProcessWav(sample_rate, win_sz, hop_sz, n_mels, n_mfcc) if n_quant <= 2**8: snd_dtype = np.uint8 elif n_quant <= 2**15: snd_dtype = np.int16 else: snd_dtype = np.int32 snd_file = pfx + '.dat' ind_file = pfx + '.ind' mel_file = pfx + '.mel' ind = {'voice_id': [], 'n_snd_elem': [], 'n_mel_elem': [], 'snd_path': []} n_snd_elem = 0 n_mel_elem = 0 n_mel_chan = None with open(snd_file, 'wb') as snd_fh, open(mel_file, 'wb') as mel_fh: for (voice_id, snd_path) in catalog: snd, _ = librosa.load(snd_path, sample_rate) snd_mu = util.mu_encode_np(snd, n_quant).astype(snd_dtype) # mel: C, T (n_mels, n_timesteps) # reshape to T, C and flatten mel = mfcc_proc.func(snd) if n_mel_chan is None: n_mel_chan = mel.shape[0] mel = mel.transpose((1, 0)).flatten() snd_fh.write(snd_mu.data) mel_fh.write(mel.data) ind['voice_id'].append(voice_id) ind['n_snd_elem'].append(snd.size) ind['n_mel_elem'].append(mel.size) ind['snd_path'].append(snd_path) if len(ind['voice_id']) % 100 == 0: print('Converted {} files of {}.'.format(len(ind['voice_id']), len(catalog), file=stderr)) stderr.flush() n_snd_elem += snd.size n_mel_elem += mel.size with open(ind_file, 'wb') as ind_fh: index = { 'window_size': win_sz, 'hop_size': hop_sz, 'n_snd_elem': n_snd_elem, 'n_mel_elem': n_mel_elem, 'n_mel_chan': n_mel_chan, 'snd_dtype': snd_dtype, 'n_quant': n_quant } index.update(ind) pickle.dump(index, ind_fh)