def fetch_blizzard_unify_spec(data_path, sz=8000, timestep=79, frame_size=200, overlap=100, batch_size=100, file_name="blizzard_unify_spec.h5"): hdf5_path = os.path.join(data_path, file_name) if not os.path.exists(hdf5_path): data_matches = [] for root, dirnames, filenames in os.walk(data_path): for filename in fnmatch.filter(filenames, 'data_*.npy'): data_matches.append(os.path.join(root, filename)) # sort in proper order data_matches = sorted(data_matches, key=lambda x: int( x.split("/")[-1].split("_")[-1][0])) # setup tables compression_filter = tables.Filters(complevel=5, complib='blosc') hdf5_file = tables.openFile(hdf5_path, mode='w') data = hdf5_file.createEArray(hdf5_file.root, 'data', tables.Int16Atom(), shape=(0, timestep, frame_size), filters=compression_filter,) for n, f in enumerate(data_matches): print("Reading file %s" % (f)) with open(f) as fp: # Array of arrays, ragged d = np.load(fp) large_d = d[0] for i in xrange(1, len(d)): print("Processing line %i of %i" % (i+1, len(d))) di = d[i] if len(di.shape) > 1: di = di[:, 0] large_d = np.concatenate([large_d, di]) chunk_size = int(np.float(len(large_d) / batch_size)) seg_d = segment_axis(large_d, chunk_size, 0) num_batch = int(np.float((seg_d.shape[-1] - 1)/float(sz))) for i in range(num_batch): batch = seg_d[:, i*sz:(i+1)*sz] batch = np.array([segment_axis(x, frame_size, overlap, end='pad') for x in batch]) batch = apply_window(batch) batch = apply_fft(batch) batch = log_magnitude(batch) batch = apply_ifft(batch) for j in range(batch_size): data.append(batch[j][None]) hdf5_file.close() hdf5_file = tables.openFile(hdf5_path, mode='r') data = hdf5_file.root.data X = data return X
def load(self, data_path): data, tags = fetch_onomatopoeia(data_path) # hardcode split for now random_state = np.random.RandomState(1999) indices = np.arange(len(data)) random_state.shuffle(indices) if self.name == "train": idx = int(.9 * float(len(data))) assert idx != len(data) data = data[indices[:idx]] elif self.name == "valid": idx = int(.1 * float(len(data))) assert idx != 0 data = data[indices[-idx:]] else: raise ValueError("name = %s is not supported!" % self.name) raw_X = [] for x in data: if len(x) < self.thresh: raw_X.append(np.asarray(x, dtype=theano.config.floatX)) else: raw_X.append(np.asarray(x[:self.thresh], dtype=theano.config.floatX)) raw_X = np.array(raw_X) pre_X, self.X_mean, self.X_std = self.global_normalize(raw_X, self.X_mean, self.X_std) X = np.array([segment_axis(x, self.frame_size, 0) for x in pre_X]) return [X]
def apply_fft(self, batch): batch = np.array( [self.numpy_rfft(self.window * segment_axis(x, self.frame_size, self.overlap, end="pad")) for x in batch] ) return batch
def apply_window(self, batch): batch = np.array([self.window * segment_axis(x, self.frame_size, self.overlap, end='pad') for x in batch]) return batch
def load(self, data_path): data = np.load(data_path) if self.data_mode == 'words': if self.name == 'train': raw_data = data['train_words'] elif self.name == 'valid': raw_data = data['valid_words'] elif self.name == 'test': raw_data = data['test_words'] self._max_labels = data['n_words'] elif self.data_mode == 'chars': if self.name == 'train': raw_data = data['train_chars'] elif self.name == 'valid': raw_data = data['valid_chars'] elif self.name == 'test': raw_data = data['test_chars'] chunk_size = len(raw_data) / self.batch_size raw_data = segment_axis(raw_data, chunk_size, 0) X = [] y = [] for i in range(int(np.float((raw_data.shape[1] - 1) / float(self.context_len)))): X.extend(raw_data[:, :-1][:, i * self.context_len:(i + 1) * self.context_len, np.newaxis]) y.extend(raw_data[:, 1:][:, i * self.context_len:(i + 1) * self.context_len, np.newaxis]) X = np.asarray(X) y = np.asarray(y) return [X, y]
def load(self, data_path): data = np.load(data_path) if self.data_mode == 'words': if self.name == 'train': raw_data = data['train_words'] elif self.name == 'valid': raw_data = data['valid_words'] elif self.name == 'test': raw_data = data['test_words'] self._max_labels = data['n_words'] elif self.data_mode == 'chars': if self.name == 'train': raw_data = data['train_chars'] elif self.name == 'valid': raw_data = data['valid_chars'] elif self.name == 'test': raw_data = data['test_chars'] chunk_size = len(raw_data) / self.batch_size raw_data = segment_axis(raw_data, chunk_size, 0) X = [] y = [] for i in range( int(np.float( (raw_data.shape[1] - 1) / float(self.context_len)))): X.extend(raw_data[:, :-1][:, i * self.context_len:(i + 1) * self.context_len, np.newaxis]) y.extend(raw_data[:, 1:][:, i * self.context_len:(i + 1) * self.context_len, np.newaxis]) X = np.asarray(X) y = np.asarray(y) return [X, y]
def slices(self, start, end): batch = np.array(self.data[self.idx[start:end]], dtype=theano.config.floatX) batch -= self.X_mean batch /= self.X_std batch = np.asarray([segment_axis(x, self.inpsz, 0) for x in batch]) batch = batch.transpose(1, 0, 2) return totuple(batch)
def slices(self, start, end): batch = np.array(self.data[start:end], dtype=theano.config.floatX) batch -= self.X_mean batch /= self.X_std batch = np.asarray([segment_axis(x, self.frame_size, 0) for x in batch]) ipdb.set_trace() batch = batch.transpose(1, 0, 2) return totuple(batch)
def apply_window(self, batch): batch = np.array([ self.window * segment_axis(x, self.frame_size, self.overlap, end='pad') for x in batch ]) return batch
def slices(self, start, end): batch = np.array(self.data[start:end], dtype=theano.config.floatX) if self.use_spec: batch = self._use_spec(batch) batch = self._log_magnitude(batch) batch = self._concatenate(batch) else: batch -= self.X_mean batch /= self.X_std if self.use_window: batch = self._use_window(batch) else: batch = np.asarray([segment_axis(x, self.frame_size, 0) for x in batch]) batch = batch.transpose(1, 0, 2) return totuple(batch)
def load(self, data_path): if self.name == 'train': data_path = data_path + 'sf_train_segmented_0.npy' elif self.name == 'valid': data_path = data_path + 'sf_valid_segmented_0.npy' data = np.load(data_path) raw_X = [] for x in data: if len(x) < 50000: raw_X.append(np.asarray(x, dtype=theano.config.floatX)) else: half_len = np.int(len(x) / 2.) raw_X.append(np.asarray(x[:half_len], dtype=theano.config.floatX)) raw_X.append(np.asarray(x[half_len:], dtype=theano.config.floatX)) raw_X = np.array(raw_X) if self.shuffle: idx = np.random.permutation(len(raw_X)) raw_X = raw_X[idx] pre_X = self.apply_preprocessing(raw_X) if self.multi_source: X = [np.array([segment_axis(x, self.frame_size, 0) for x in X]) for X in pre_X] else: X = [np.array([segment_axis(x, self.frame_size, 0) for x in pre_X])] return X
def fetch_accent_tbptt(data_path, sz=8000, batch_size=100, file_name="accent_tbptt.h5"): hdf5_path = os.path.join(data_path, file_name) if not os.path.exists(hdf5_path): data_matches = [] for root, dirnames, filenames in os.walk(data_path): for filename in fnmatch.filter(filenames, '*.wav'): if '._' not in filename: data_matches.append(os.path.join(root, filename)) # Just group same languages, numbering will be in *alpha* not numeric # order within each language data_matches = sorted(data_matches) compression_filter = tables.Filters(complevel=5, complib='blosc') hdf5_file = tables.openFile(hdf5_path, mode='w') data = hdf5_file.createEArray(hdf5_file.root, 'data', tables.Int16Atom(), shape=(0, sz), filters=compression_filter,) large_d = None for n, f in enumerate(data_matches): print("Processing file %i of %i" % (n+1, len(data_matches))) try: sr, d = wavfile.read(f) if len(d.shape) > 1: d = d[:, 0] if large_d is None: large_d = d else: large_d = np.concatenate([large_d, d]) except ValueError: print("Not a proper wave file.") chunk_size = int(np.float(len(large_d) / batch_size)) seg_d = segment_axis(large_d, chunk_size, 0) num_batch = int(np.float((seg_d.shape[-1] - 1)/float(sz))) for i in range(num_batch): this_batch = seg_d[:, i*sz:(i+1)*sz] for j in range(batch_size): data.append(this_batch[j][None]) hdf5_file.close() hdf5_file = tables.openFile(hdf5_path, mode='r') data = hdf5_file.root.data X = data return X
def slices(self, start, end): batch = np.array(self.data[start:end], dtype=theano.config.floatX) if self.use_spec: batch = self.apply_fft(batch) batch = self.log_magnitude(batch) batch = self.concatenate(batch) else: batch -= self.X_mean batch /= self.X_std if self.use_window: batch = self.apply_window(batch) else: batch = np.asarray( [segment_axis(x, self.frame_size, 0) for x in batch]) batch = batch.transpose(1, 0, 2) return totuple(batch)
def slices(self, start, end): batches = [mat[start:end] for mat in self.data] if self.use_spec: batches[0] = self._use_spec(batches[0]) batches[0] = self._log_magnitude(batches[0]) batches[0] = self._concatenate(batches[0]) else: batches[0] -= self.X_mean batches[0] /= self.X_std if self.use_window: batches[0] = self._use_window(batches[0]) else: batches[0] = np.asarray([segment_axis(x, self.frame_size, 0) for x in batches[0]]) mask = self.create_mask(batches[0].swapaxes(0, 1)) if self.load_spk_info: batches = [self.zero_pad(batch) for batch in batches[:-1]] spk = batches[-1] return totuple(batches + [spk, mask]) else: batches = [self.zero_pad(batch) for batch in batches] return totuple(batches + [mask])
def fetch_timit(data_path, shuffle=0, frame_size=200, this_set="train", use_n_gram=1, file_name='_timit.h5'): file_name = this_set + file_name hdf5_path = os.path.join(data_path, file_name) if not os.path.exists(hdf5_path): raw_name = data_path + this_set + '_x_raw.npy' pho_name = data_path + this_set + '_x_phonemes.npy' raw_data = np.load(raw_name) pho_data = np.load(pho_name) if shuffle: idx = np.random.permutation(len(raw_data)) raw_data = raw_data[idx] pho_data = pho_data[idx] len_pho = np.array([np.unique(x).max() for x in pho_data]).max() + 1 pho_data = np.array([segment_axis(y, frame_size, 0) for y in pho_data]) if use_n_gram: pho_data = assign_n_gram_per_frame(pho_data, len_pho) else: pho_data = assign_phoneme_per_frame(pho_data, len_pho) # setup tables compression_filter = tables.Filters(complevel=5, complib='blosc') hdf5_file = tables.openFile(hdf5_path, mode='w') raw = hdf5_file.createVLArray(hdf5_file.root, 'raw', tables.Int16Atom(shape=()), filters=compression_filter,) pho = hdf5_file.createVLArray(hdf5_file.root, 'pho', tables.Int16Atom(shape=()), filters=compression_filter,) for x, y in zip(raw_data, pho_data): raw.append(x) pho.append(y.flatten()) hdf5_file.close() hdf5_file = tables.openFile(hdf5_path, mode='r') X = hdf5_file.root.raw y = hdf5_file.root.pho return X, y
def _segment_axis(data): x = tuple([numpy.array([segment_axis(x, frame_size, 0) for x in var]) for var in data]) return x
def fetch_blizzard_tbptt(data_path, sz=8000, batch_size=100, file_name="blizzard_tbptt.h5"): hdf5_path = os.path.join(data_path, file_name) if not os.path.exists(hdf5_path): data_matches = [] for root, dir_names, file_names in os.walk(data_path): for filename in fnmatch.filter(file_names, 'data_*.npy'): data_matches.append(os.path.join(root, filename)) # sort in proper order data_matches = sorted( data_matches, key=lambda x: int(x.split("/")[-1].split("_")[-1][0])) # setup tables compression_filter = tables.Filters(complevel=5, complib='blosc') hdf5_file = tables.openFile(hdf5_path, mode='w') data = hdf5_file.createEArray( hdf5_file.root, 'data', tables.Int16Atom(), shape=(0, sz), filters=compression_filter, ) for n, f in enumerate(data_matches): print("Reading file %s" % (f)) with open(f) as fp: # Array of arrays, ragged d = np.load(fp) large_d = d[0] for i in xrange(1, len(d)): print("Processing line %i of %i" % (i + 1, len(d))) di = d[i] if len(di.shape) > 1: di = di[:, 0] large_d = np.concatenate([large_d, di]) chunk_size = int(np.float(len(large_d) / batch_size)) seg_d = segment_axis(large_d, chunk_size, 0) num_batch = int(np.float((seg_d.shape[-1] - 1) / float(sz))) for i in range(num_batch): batch = seg_d[:, i * sz:(i + 1) * sz] for j in range(batch_size): data.append(batch[j][None]) hdf5_file.close() hdf5_file = tables.openFile(hdf5_path, mode='r') return hdf5_file.root.data
def _segment_axis(data): x = numpy.array([segment_axis(x, frame_size, 0) for x in data[0]]) return (x,)
def _segment_axis(data): # Defined inside so that frame_size is available x = tuple([numpy.array([segment_axis(x, frame_size, 0) for x in var]) for var in data]) return x
def load(self, data_path): if self.name not in ['train', 'valid', 'test']: raise ValueError(self.name + " is not a recognized value. " + "Valid values are ['train', 'valid', 'test'].") speaker_info_list_path = os.path.join(data_path, 'spkrinfo.npy') #phoneme_list_path = os.path.join(data_path, 'reduced_phonemes.pkl') #word_list_path = os.path.join(data_path, 'words.pkl') #speaker_features_list_path = os.path.join(data_path, # 'spkr_feature_names.pkl') speaker_id_list_path = os.path.join(data_path, 'speakers_ids.pkl') raw_path = os.path.join(data_path, self.name + '_x_raw.npy') phoneme_path = os.path.join(data_path, self.name + '_x_phonemes.npy') #phone_path = os.path.join(data_path, self.name + '_x_phones.npy') #word_path = os.path.join(data_path, self.name + '_x_words.npy') speaker_path = os.path.join(data_path, self.name + '_spkr.npy') raw = np.load(raw_path) raw_X = [] for x in raw: raw_X.append(np.asarray(x, dtype=theano.config.floatX)) raw_X = np.array(raw_X) if self.shuffle: idx = np.random.permutation(len(raw_X)) raw_X = raw_X[idx] else: idx = np.arange(len(raw_X)) if not self.use_spec: pre_X, self.X_mean, self.X_std =\ self.global_normalize(raw_X, self.X_mean, self.X_std) if self.use_window: if self.use_spec: X = self._use_spec(raw_X) X = self._log_magnitude(X) X = self._concatenate(X) else: X = self._use_window(pre_X) else: X = np.asarray([segment_axis(x, self.frame_size, 0) for x in pre_X]) if self.load_spk_info: spk = np.load(speaker_path) spk = spk[idx] S = np.zeros((len(spk), 630)) for i, s in enumerate(spk): S[i, s] = 1 if self.load_phonetic_label: #pho = np.load(phone_path) pho = np.load(phoneme_path) self.len_pho = np.array([np.unique(x).max() for x in pho]).max() + 1 unseg_Y = [] for y in pho: unseg_Y.append(np.asarray(y, dtype=theano.config.floatX)) unseg_Y = np.array(unseg_Y) unseg_Y = unseg_Y[idx] unseg_Y = np.array([segment_axis(y, self.frame_size, 0) for y in unseg_Y]) if self.use_n_gram: Y = self.assign_n_gram_per_frame(unseg_Y) else: Y = self.assign_phoneme_per_frame(unseg_Y) if self.load_spk_info and self.load_phonetic_label: return [X, Y, S] elif self.load_spk_info and not self.load_phonetic_label: return [X, S] elif not self.load_spk_info and self.load_phonetic_label: return [X, Y] elif not self.load_spk_info and not self.load_phonetic_label: return [X]
import numpy as np import pysptk as SPTK from scipy.io import wavfile fs, x = wavfile.read('test.wav') assert fs == 16000 x = 1. * x #change to float64 from cle.cle.utils import segment_axis frame_length = 1024 hopsize = 80 noverlap = frame_length - hopsize frames = segment_axis(x, frame_length, noverlap).astype('float64').T frames = xw * SPTK.blackman(frame_length).reshape((1024, 1)) #frames = frames.T #frames = frames.copy(order='C') frames = frames.T order = 20 alpha = 0.41 stage = 4 gamma = -1.0 / stage mgc = np.apply_along_axis(SPTK.mgcep, 1, frames, order, alpha, gamma) mgc_sp = np.apply_along_axis(SPTK.mgc2sp, 1, mgc, alpha, gamma, frame_length).real
def load(self, data_path): dataset = 'audio.tar.gz' datafile = os.path.join(data_path, dataset) if not os.path.isfile(datafile): try: import urllib urllib.urlretrieve('http://google.com') url =\ 'https://dl.dropboxusercontent.com/u/15378192/audio.tar.gz' except AttributeError: import urllib.request as urllib url =\ 'https://dl.dropboxusercontent.com/u/15378192/audio.tar.gz' print("Downloading data from %s" % url) urllib.urlretrieve(url, datafile) if not os.path.exists(os.path.join(data_path, "audio")): tar = tarfile.open(datafile) os.chdir(data_path) tar.extractall() tar.close() h5_file_path = os.path.join(data_path, "saved_fruit.h5") if not os.path.exists(h5_file_path): data_path = os.path.join(data_path, "audio") audio_matches = [] for root, dirnames, filenames in os.walk(data_path): for filename in fnmatch.filter(filenames, '*.wav'): audio_matches.append(os.path.join(root, filename)) random.seed(1999) random.shuffle(audio_matches) # http://mail.scipy.org/pipermail/numpy-discussion/2011-March/055219.html h5_file = tables.openFile(h5_file_path, mode='w') data_x = h5_file.createVLArray(h5_file.root, 'data_x', tables.Float32Atom(shape=()), filters=tables.Filters(1)) data_y = h5_file.createVLArray(h5_file.root, 'data_y', tables.Int32Atom(shape=()), filters=tables.Filters(1)) for wav_path in audio_matches: # Convert chars to int classes word = wav_path.split(os.sep)[-1][:6] chars = [ord(c) - 97 for c in word] data_y.append(np.array(chars, dtype='int32')) fs, d = wavfile.read(wav_path) data_x.append(d.astype(theano.config.floatX)) h5_file.close() h5_file = tables.openFile(h5_file_path, mode='r') raw_X = np.array([np.asarray(x) for x in h5_file.root.data_x]) cls = np.array([''.join([chr(y+97) for y in Y]) for Y in h5_file.root.data_y]) if self.name != 'all': fruit_list = [] if len(self.name) > 1: for i, fruit_name in enumerate(cls): for name in self.name: if name in fruit_name: fruit_list.append(i) else: for i, fruit_name in enumerate(cls): if self.name in fruit_name: fruit_list.append(i) else: fruit_list = tolist(np.arange(len(raw_X))) raw_X = raw_X[fruit_list] if self.prep == 'normalize': pre_X, self.X_mean, self.X_std = self.global_normalize(raw_X) elif self.prep == 'standardize': pre_X, self.X_max, self.X_min = self.standardize(raw_X) X = np.array([segment_axis(x, self.frame_size, 0) for x in pre_X]) return [X]
def _segment_axis(data): x = numpy.array([segment_axis(x, frame_size, 0) for x in data[0]]) return (x, )
def _use_spec(self, batch): batch = np.asarray([self.numpy_rfft(self.window * segment_axis(x, self.frame_size, self.overlap, end='pad')) for x in batch]) return batch
def fetch_blizzard_tbptt(data_path, sz=8000, batch_size=100, file_name="blizzard_tbptt.h5"): hdf5_path = os.path.join(data_path, file_name) print("looking for ", hdf5_path) if not os.path.exists(hdf5_path): data_matches = [] for root, dir_names, file_names in os.walk(data_path): for filename in fnmatch.filter(file_names, '*.npy'): data_matches.append(os.path.join(root, filename)) # sort in proper order ''' data_matches = sorted(data_matches, key=lambda x: int( x.split("/")[-1].split("_")[-1][0])) ''' # print(data_matches) # setup tables compression_filter = tables.Filters(complevel=5, complib='blosc') hdf5_file = tables.openFile(hdf5_path, mode='w') data = hdf5_file.createEArray(hdf5_file.root, 'data', tables.Int16Atom(), shape=(0, sz), filters=compression_filter,) for n, f in enumerate(data_matches): print("Reading file %s" % (f)) with open(f) as fp: # Array of arrays, ragged large_d = np.load(fp) ''' d = np.load(fp) large_d = d[0] for i in xrange(1, len(d)): print("Processing line %i of %i" % (i+1, len(d))) di = d[i] if len(di.shape) > 1: di = di[:, 0] large_d = np.concatenate([large_d, di]) ''' chunk_size = int(np.float(len(large_d) / batch_size)) seg_d = segment_axis(large_d, chunk_size, 0) num_batch = int(np.float((seg_d.shape[-1] - 1)/float(sz))) for i in range(num_batch): batch = seg_d[:, i*sz:(i+1)*sz] for j in range(batch_size): data.append(batch[j][None]) hdf5_file.close() hdf5_file = tables.openFile(hdf5_path, mode='r') return hdf5_file.root.data
def _segment_axis(data): x = tuple([ numpy.array([segment_axis(x, frame_size, 0) for x in var]) for var in data ]) return x
import numpy as np import pysptk as SPTK from scipy.io import wavfile fs, x = wavfile.read('test.wav') assert fs == 16000 x = 1.*x #change to float64 from cle.cle.utils import segment_axis frame_length = 1024 hopsize = 80 noverlap = frame_length - hopsize frames = segment_axis(x,frame_length, noverlap).astype('float64').T frames = xw*SPTK.blackman(frame_length).reshape((1024,1)) #frames = frames.T #frames = frames.copy(order='C') frames = frames.T order = 20 alpha = 0.41 stage = 4 gamma = -1.0 / stage mgc = np.apply_along_axis(SPTK.mgcep, 1, frames, order, alpha, gamma) mgc_sp = np.apply_along_axis(SPTK.mgc2sp, 1, mgc, alpha, gamma, frame_length).real mgc_sp_test = np.hstack([mgc_sp,mgc_sp[:,::-1][:,1:-1]])