def open_stream(which_sets=('train', ), port=5557, num_examples=None): dataset = Blizzard(which_sets=which_sets) if num_examples == None: num_examples = dataset.num_examples data_stream = DataStream.default_stream(dataset, iteration_scheme=SequentialScheme( num_examples, batch_size)) data_stream = ScaleAndShift(data_stream, scale=1 / data_std, shift=-data_mean / data_std) data_stream = Mapping(data_stream, _downsample_and_upsample, add_sources=('upsampled', )) data_stream = Mapping(data_stream, _equalize_size) data_stream = Mapping(data_stream, _get_residual, add_sources=('residual', )) data_stream = FilterSources(data_stream, sources=( 'upsampled', 'residual', )) data_stream = Mapping(data_stream, _segment_axis) data_stream = Mapping(data_stream, _transpose) data_stream = ForceFloatX(data_stream) start_server(data_stream, port=port)
def define_stream(which_sets=('train',), initial_scale=1, scale=0.5, batch_size=64, seq_length=64, frame_size=128, tbptt_flag = True, num_examples=None): def _segment_axis(data): # Defined inside so that frame_size is available x = tuple([numpy.array([segment_axis(x, frame_size, 0) for x in var]) for var in data]) return x scale = float(scale) dataset = Blizzard(which_sets=which_sets) if num_examples is None: num_examples = batch_size*(dataset.num_examples/batch_size) data_stream = DataStream.default_stream( dataset, iteration_scheme=SequentialScheme(num_examples, batch_size)) data_stream = ScaleAndShift(data_stream, scale=1/data_std, shift=-data_mean/float(data_std)) # Original sampling rate data_stream = Resample(data_stream, scale=initial_scale) data_stream = Mapping(data_stream, _copy, add_sources=('upsampled',)) data_stream = Resample(data_stream, scale=scale, which_sources=('upsampled',)) data_stream = Resample(data_stream, scale=1/scale, which_sources=('upsampled',)) # data_stream = Mapping(data_stream, _downsample_and_upsample, # add_sources=('upsampled',)) data_stream = Mapping(data_stream, _equalize_size) data_stream = Mapping(data_stream, _get_residual, add_sources=('residual',)) data_stream = FilterSources(data_stream, sources=('upsampled', 'residual',)) data_stream = Mapping(data_stream, _segment_axis) data_stream = Mapping(data_stream, _transpose) return data_stream
# Prepare dataset ################# def _transpose(data): return tuple(array.swapaxes(0, 1) for array in data) data_dir = os.environ['FUEL_DATA_PATH'] data_dir = os.path.join(data_dir, 'blizzard/', 'sp_standardize.npz') data_stats = numpy.load(data_dir) sp_mean = data_stats['sp_mean'] sp_std = data_stats['sp_std'] dataset = Blizzard(which_sets=('train', ), filename="sp_blizzard.hdf5") data_stream = DataStream.default_stream( dataset, iteration_scheme=SequentialScheme( batch_size * (dataset.num_examples / batch_size), batch_size)) data_stream = FilterSources(data_stream, ('sp', )) data_stream = ScaleAndShift(data_stream, scale=1 / sp_std, shift=-sp_mean / sp_std, which_sources=('sp', )) data_stream = Mapping(data_stream, _transpose) data_stream = SegmentSequence(data_stream, seq_size, add_flag=True) data_stream = ForceFloatX(data_stream) train_stream = data_stream num_valid_examples = 4 * 64
from play.datasets.blizzard import Blizzard import matplotlib matplotlib.use('Agg') from matplotlib import pyplot from fuel.schemes import SequentialScheme from fuel.streams import DataStream dataset = Blizzard(which_sets=('test', ), filename="mgc_blizzard.hdf5") batch_size = 1 data_stream = DataStream.default_stream(dataset, iteration_scheme=SequentialScheme( dataset.num_examples, batch_size)) f0, mgc = next(data_stream.get_epoch_iterator()) pyplot.plot(f0[0][:500]) pyplot.savefig('plot_f0.png') pyplot.close() pyplot.plot(mgc[0, :500, 3]) pyplot.savefig('plot_mgc.png') pyplot.close() pyplot.hist(f0[0, f0[0] > 0]) pyplot.savefig('plot_hist_cond.png') pyplot.close() pyplot.hist(mgc.reshape(-1))
def _get_residual(data): # The order is correct? ds = numpy.array([x[0]-x[1] for x in zip(*data)]) return (ds,) data_dir = os.environ['FUEL_DATA_PATH'] data_dir = os.path.join(data_dir, 'blizzard/', 'blizzard_standardize.npz') data_stats = numpy.load(data_dir) data_mean = data_stats['data_mean'] data_std = data_stats['data_std'] which_sets= ('test',) dataset = Blizzard(which_sets = which_sets) data_stream = DataStream.default_stream( dataset, iteration_scheme=SequentialScheme( dataset.num_examples, batch_size)) epoch_iterator = data_stream.get_epoch_iterator() raw_audio = next(epoch_iterator)[0] for i in xrange(n_iter-1): x_tr = next(epoch_iterator)[0] raw_audio = numpy.hstack([raw_audio, x_tr]) job_id = sys.argv[1] save_dir = os.environ['RESULTS_DIR'] exp_path = os.path.join(save_dir,'blizzard/', job_id + "/")
return tuple(array.swapaxes(0, 1) for array in data) def _segment_axis(data): x = numpy.array([segment_axis(x, frame_size, 0) for x in data[0]]) return (x, ) data_dir = os.environ['FUEL_DATA_PATH'] data_dir = os.path.join(data_dir, 'blizzard/', 'blizzard_standardize.npz') data_stats = numpy.load(data_dir) data_mean = data_stats['data_mean'] data_std = data_stats['data_std'] dataset = Blizzard(which_sets=('train', )) data_stream = DataStream.default_stream(dataset, iteration_scheme=SequentialScheme( dataset.num_examples, batch_size)) data_stream = ScaleAndShift(data_stream, scale=1 / data_std, shift=-data_mean / data_std) data_stream = Mapping(data_stream, _segment_axis) data_stream = Mapping(data_stream, _transpose) data_stream = ForceFloatX(data_stream) train_stream = data_stream num_valid_examples = 4 * 64 * 5 dataset = Blizzard(which_sets=('valid', )) data_stream = DataStream.default_stream(dataset, iteration_scheme=SequentialScheme(
def _zero_for_unvoiced(data): #Multiply by 0 the unvoiced components. Hardcoded. return tuple([data[0] * data[3], data[1], data[2], data[3]]) data_dir = os.environ['FUEL_DATA_PATH'] data_dir = os.path.join(data_dir, 'blizzard/', 'sp_standardize.npz') data_stats = numpy.load(data_dir) sp_mean = data_stats['sp_mean'] sp_std = data_stats['sp_std'] f0_mean = data_stats['f0_mean'] f0_std = data_stats['f0_std'] dataset = Blizzard(which_sets=('test', ), filename="sp_blizzard_80h_phon.hdf5") data_stream = DataStream.default_stream( dataset, iteration_scheme=SequentialScheme( batch_size * (dataset.num_examples / batch_size), batch_size)) epoch_iterator = data_stream.get_epoch_iterator() # data_stream = Mapping(data_stream, _is_nonzero, add_sources = ('voiced',)) # data_stream = ScaleAndShift(data_stream, # scale = 1/sp_std, # shift = -sp_mean/sp_std, # which_sources = ('sp',)) # data_stream = ScaleAndShift(data_stream, # scale = 1/f0_std,
mgc = np.apply_along_axis(SPTK.mgcep, 1, frames, order, alpha, gamma) mgc_sp = np.apply_along_axis(SPTK.mgc2sp, 1, mgc, alpha, gamma, frame_length).real mgc_sp_test = np.hstack([mgc_sp, mgc_sp[:, ::-1][:, 1:-1]]) mgc_sp_test = mgc_sp_test.copy(order='C') # Check in original data, that the processing was good import h5py import fuel from play.datasets.blizzard import Blizzard from fuel.schemes import SequentialScheme from fuel.streams import DataStream dataset_mgc = Blizzard(which_sets=('train', 'valid', 'test'), filename="mgc_blizzard.hdf5") dataset = Blizzard(which_sets=('train', 'valid', 'test')) batch_size = 2 data_stream = DataStream.default_stream(dataset, iteration_scheme=SequentialScheme( dataset.num_examples, batch_size)) data_stream_mgc = DataStream.default_stream(dataset_mgc, iteration_scheme=SequentialScheme( dataset_mgc.num_examples, batch_size)) raw = next(data_stream.get_epoch_iterator())[0] f0, mgc = next(data_stream_mgc.get_epoch_iterator())