def setUp(self): dataset = IterableDataset( OrderedDict([('features', [1, 2, 3]), ('targets', [0, 1, 0])]), axis_labels={'features': ('batch'), 'targets': ('batch')}) self.stream = DataStream(dataset) self.wrapper = ScaleAndShift( self.stream, 2, -1, which_sources=('targets',))
def test_transformer(): from fuel.transformers import ScaleAndShift from fuel.datasets import IndexableDataset from fuel.streams import DataStream from fuel.schemes import ShuffledScheme seed = 1234 rng = numpy.random.RandomState(seed) features = rng.randint(256, size=(8, 2, 2)) targets = rng.randint(4, size=(8, 1)) dataset = IndexableDataset(indexables=OrderedDict([('features', features), ('targets', targets)]), axis_labels=OrderedDict([('features', ('batch', 'height', 'width')), ('targets', ('batch', 'index'))])) scheme = ShuffledScheme(examples=dataset.num_examples, batch_size=2) data_stream = DataStream(dataset=dataset, iteration_scheme=scheme) scale = 1.0 / features.std() shift = - scale * features.mean() standardized_stream = ScaleAndShift(data_stream=data_stream, scale=scale, shift=shift, which_sources=('features',)) for batch in standardized_stream.get_epoch_iterator(): print(batch)
def test_scale_and_shift(): stream = DataStream( IterableDataset({ 'features': [1, 2, 3], 'targets': [0, 1, 0] })) wrapper = ScaleAndShift(stream, 2, -1, which_sources=('targets', )) assert list(wrapper.get_epoch_iterator()) == [(1, -1), (2, 1), (3, -1)]
def create_data(data, size, batch_size, _port): if data == "train": cats = DogsVsCats(('train', ), subset=slice(0, 20000)) port = _port + 2 elif data == "valid": cats = DogsVsCats(('train', ), subset=slice(20000, 25000)) port = _port + 3 print 'port', port stream = DataStream.default_stream(cats, iteration_scheme=ShuffledScheme( cats.num_examples, batch_size)) stream_downscale = MinimumImageDimensions( stream, size, which_sources=('image_features', )) stream_rotate = FlipAsYouCan(stream_downscale, ) stream_max = ScikitResize(stream_rotate, image_size, which_sources=('image_features', )) stream_scale = ScaleAndShift(stream_max, 1. / 255, 0, which_sources=('image_features', )) stream_data = Cast(stream_scale, dtype='float32', which_sources=('image_features', )) start_server(stream_data, port=port)
def create_data(data): stream = DataStream(data, iteration_scheme=ShuffledScheme( data.num_examples, batch_size)) # Data Augmentation stream = MinimumImageDimensions(stream, image_size, which_sources=('image_features', )) stream = MaximumImageDimensions(stream, image_size, which_sources=('image_features', )) stream = RandomHorizontalSwap(stream, which_sources=('image_features', )) stream = Random2DRotation(stream, which_sources=('image_features', )) #stream = ScikitResize(stream, image_size, which_sources=('image_features',)) # Data Preprocessing # Data Transformation stream = ScaleAndShift(stream, 1. / 255, 0, which_sources=('image_features', )) stream = Cast(stream, dtype='float32', which_sources=('image_features', )) return stream
def open_stream(which_sets=('train', ), port=5557, num_examples=None): dataset = Blizzard(which_sets=which_sets) if num_examples == None: num_examples = dataset.num_examples data_stream = DataStream.default_stream(dataset, iteration_scheme=SequentialScheme( num_examples, batch_size)) data_stream = ScaleAndShift(data_stream, scale=1 / data_std, shift=-data_mean / data_std) data_stream = Mapping(data_stream, _downsample_and_upsample, add_sources=('upsampled', )) data_stream = Mapping(data_stream, _equalize_size) data_stream = Mapping(data_stream, _get_residual, add_sources=('residual', )) data_stream = FilterSources(data_stream, sources=( 'upsampled', 'residual', )) data_stream = Mapping(data_stream, _segment_axis) data_stream = Mapping(data_stream, _transpose) data_stream = ForceFloatX(data_stream) start_server(data_stream, port=port)
def batch_iterator(dataset, batchsize, shuffle=False): if shuffle: train_scheme = ShuffledScheme(examples=dataset.num_examples, batch_size=batchsize) else: train_scheme = SequentialScheme(examples=dataset.num_examples, batch_size=batchsize) stream = DataStream.default_stream(dataset=dataset, iteration_scheme=train_scheme) stream_scale = ScaleAndShift(stream, 1./256.0, 0, which_sources=('features',)) stream_data = Cast(stream_scale, dtype=theano.config.floatX, which_sources=('features',)) return stream_data.get_epoch_iterator()
def apply_transformers(data_stream): data_stream_ = Flatten(data_stream, which_sources=['features_1', 'features_2']) data_stream_ = ScaleAndShift(data_stream_, which_sources=['features_1', 'features_2'], scale=2.0, shift=-1.0) return data_stream_
def make_gen(batch_size, examples=4): file_path_f = file_path names_select = i_names train_set = H5PYDataset(file_path_f, which_sets=('train', 'test')) scheme = SequentialScheme(examples=examples, batch_size=batch_size) data_stream_train = DataStream(dataset=train_set, iteration_scheme=scheme) stand_stream_train = ScaleAndShift(data_stream=data_stream_train, scale=scale, shift=shift, which_sources=(names_select[-1],)) return stand_stream_train, train_set, data_stream_train
class TestScaleAndShift(object): def setUp(self): dataset = IterableDataset( OrderedDict([('features', [1, 2, 3]), ('targets', [0, 1, 0])]), axis_labels={'features': ('batch'), 'targets': ('batch')}) self.stream = DataStream(dataset) self.wrapper = ScaleAndShift( self.stream, 2, -1, which_sources=('targets',)) def test_scale_and_shift(self): assert_equal(list(self.wrapper.get_epoch_iterator()), [(1, -1), (2, 1), (3, -1)]) def test_axis_labels_are_passed_through(self): assert_equal(self.wrapper.axis_labels, self.stream.axis_labels)
def define_stream(which_sets=('train',), initial_scale=1, scale=0.5, batch_size=64, seq_length=64, frame_size=128, tbptt_flag = True, num_examples=None): def _segment_axis(data): # Defined inside so that frame_size is available x = tuple([numpy.array([segment_axis(x, frame_size, 0) for x in var]) for var in data]) return x scale = float(scale) dataset = Blizzard(which_sets=which_sets) if num_examples is None: num_examples = batch_size*(dataset.num_examples/batch_size) data_stream = DataStream.default_stream( dataset, iteration_scheme=SequentialScheme(num_examples, batch_size)) data_stream = ScaleAndShift(data_stream, scale=1/data_std, shift=-data_mean/float(data_std)) # Original sampling rate data_stream = Resample(data_stream, scale=initial_scale) data_stream = Mapping(data_stream, _copy, add_sources=('upsampled',)) data_stream = Resample(data_stream, scale=scale, which_sources=('upsampled',)) data_stream = Resample(data_stream, scale=1/scale, which_sources=('upsampled',)) # data_stream = Mapping(data_stream, _downsample_and_upsample, # add_sources=('upsampled',)) data_stream = Mapping(data_stream, _equalize_size) data_stream = Mapping(data_stream, _get_residual, add_sources=('residual',)) data_stream = FilterSources(data_stream, sources=('upsampled', 'residual',)) data_stream = Mapping(data_stream, _segment_axis) data_stream = Mapping(data_stream, _transpose) return data_stream
def create_data(data): stream = DataStream.default_stream(data, iteration_scheme=ShuffledScheme( data.num_examples, batch_size)) stream_downscale = MinimumImageDimensions( stream, image_size, which_sources=('image_features', )) #stream_rotate = Random2DRotation(stream_downscale, which_sources=('image_features',)) stream_max = ScikitResize(stream_downscale, image_size, which_sources=('image_features', )) stream_scale = ScaleAndShift(stream_max, 1. / 255, 0, which_sources=('image_features', )) stream_cast = Cast(stream_scale, dtype='float32', which_sources=('image_features', )) #stream_flat = Flatten(stream_scale, which_sources=('image_features',)) return stream_cast
def monk_music_stream(which_sets=('train', ), batch_size=64, seq_size=128, frame_size=160, num_examples=None, which_sources=('features', )): """ This function generates the stream for the monk_music dataset. It doesn't compute incremental windows and instead simply separates the dataset into sequences """ dataset = MonkMusic(which_sets=which_sets, filename="dataset.hdf5", load_in_memory=True) large_batch_size = batch_size * frame_size * seq_size if not num_examples: num_examples = large_batch_size * (dataset.num_examples / large_batch_size) # If there are memory problems revert to SequentialScheme data_stream = DataStream.default_stream(dataset, iteration_scheme=SequentialScheme( num_examples, large_batch_size)) data_stream = ScaleAndShift(data_stream, scale=1. / data_stats["std"], shift=-data_stats["mean"] / data_stats["std"]) data_stream = Mapping( data_stream, lambda data: _get_subsequences(data, batch_size, seq_size, frame_size)) data_stream = ForceFloatX(data_stream) return data_stream
def make_gen(Nchunks=True, classif=False, train=True): ''' Nchunks==True : 10 chunks in the generator Nchunks == False : 1 chunk in the generator Makes the distinction between classification/regression Makes the distinction between test/train ''' file_path_f = file_path_R shift_f = shift_R scale_f = scale_R if classif: file_path_f = file_path_C shift_f = shift_C scale_f = scale_C if Nchunks: batch_size = 13 else: batch_size = 130 t_scheme = SequentialScheme(examples=130, batch_size=batch_size) t_source = 'train' if not train: if Nchunks: batch_size = 2 else: batch_size = 20 t_source = 'test' t_scheme = SequentialScheme(examples=20, batch_size=batch_size) t_set = H5PYDataset(file_path_f, which_sets=[t_source]) data_stream_t = DataStream(dataset=t_set, iteration_scheme=t_scheme) stand_stream_t = ScaleAndShift(data_stream=data_stream_t, scale=scale_f, shift=shift_f, which_sources=t_source) return stand_stream_t, t_set, data_stream_t
def preprocessing(data_stream): return ForceFloatX(ScaleAndShift(data_stream, 1 / 255.0, 0.0, which_sources=('features', )), which_sources=('features', ))
save_dir = os.path.join(save_dir,'blizzard/', job_id + "/", "samples/") main_loop = load(os.path.join(exp_path,exp_file)) rate = 16000 for i, sample in enumerate(raw_audio[:n_samples]): pyplot.plot(sample) pyplot.savefig(save_dir +"original_%i.png" % i) pyplot.close() wavfile.write(save_dir + "original_{}.wav".format(i), rate, sample) data_stream = ScaleAndShift(data_stream, scale = 1/data_std, shift = -data_mean/data_std) data_stream = Mapping(data_stream, _downsample_and_upsample, add_sources=('upsampled',)) epoch_iterator = data_stream.get_epoch_iterator() raw_audio_std, upsampled_audio = next(epoch_iterator) for i in xrange(n_iter-1): x_tr,y_tr = next(epoch_iterator) raw_audio_std = numpy.hstack([raw_audio_std, x_tr]) upsampled_audio = numpy.hstack([upsampled_audio, y_tr]) for i,(original_, upsampled_) in enumerate( zip(raw_audio_std, upsampled_audio)[:n_samples]):
exp_file = file_ save_dir = os.path.join(save_dir, "blizzard/", job_id + "/", "samples/") main_loop = load(os.path.join(exp_path, exp_file)) rate = 16000 for i, sample in enumerate(raw_audio[:n_samples]): pyplot.plot(sample) pyplot.savefig(save_dir + "original_%i.png" % i) pyplot.close() wavfile.write(save_dir + "original_{}.wav".format(i), rate, sample) data_stream = ScaleAndShift(data_stream, scale=1 / data_std, shift=-data_mean / data_std) data_stream = Mapping(data_stream, _downsample_and_upsample, add_sources=("upsampled",)) epoch_iterator = data_stream.get_epoch_iterator() raw_audio_std, upsampled_audio = next(epoch_iterator) for i in xrange(n_iter - 1): x_tr, y_tr = next(epoch_iterator) raw_audio_std = numpy.hstack([raw_audio_std, x_tr]) upsampled_audio = numpy.hstack([upsampled_audio, y_tr]) for i, (original_, upsampled_) in enumerate(zip(raw_audio_std, upsampled_audio)[:n_samples]): f, (ax1, ax2) = pyplot.subplots(2, sharex=True, sharey=True) ax1.plot(original_)
def test_scale_and_shift(): stream = DataStream( IterableDataset( OrderedDict([('features', [1, 2, 3]), ('targets', [0, 1, 0])]))) wrapper = ScaleAndShift(stream, 2, -1, which_sources=('targets',)) assert list(wrapper.get_epoch_iterator()) == [(1, -1), (2, 1), (3, -1)]
data_dir = os.environ['FUEL_DATA_PATH'] data_dir = os.path.join(data_dir, 'blizzard/', 'sp_standardize.npz') data_stats = numpy.load(data_dir) sp_mean = data_stats['sp_mean'] sp_std = data_stats['sp_std'] dataset = Blizzard(which_sets=('train', ), filename="sp_blizzard.hdf5") data_stream = DataStream.default_stream( dataset, iteration_scheme=SequentialScheme( batch_size * (dataset.num_examples / batch_size), batch_size)) data_stream = FilterSources(data_stream, ('sp', )) data_stream = ScaleAndShift(data_stream, scale=1 / sp_std, shift=-sp_mean / sp_std, which_sources=('sp', )) data_stream = Mapping(data_stream, _transpose) data_stream = SegmentSequence(data_stream, seq_size, add_flag=True) data_stream = ForceFloatX(data_stream) train_stream = data_stream num_valid_examples = 4 * 64 dataset = Blizzard(which_sets=('valid', ), filename="sp_blizzard.hdf5") data_stream = DataStream.default_stream(dataset, iteration_scheme=SequentialScheme( num_valid_examples, batch_size)) data_stream = FilterSources(data_stream, ('sp', )) data_stream = ScaleAndShift(data_stream, scale=1 / sp_std, shift=-sp_mean / sp_std,
batch_size=args.batch_size))) test_stream = Flatten(DataStream.default_stream(dataset_test, iteration_scheme=ShuffledScheme( examples=dataset_test.num_examples, batch_size=args.batch_size)) ) shp = next(train_stream.get_epoch_iterator())[0].shape # make the training data 0 mean and variance 1 # TODO compute mean and variance on full dataset, not minibatch Xbatch = next(train_stream.get_epoch_iterator())[0] scl = 1./np.sqrt(np.mean((Xbatch-np.mean(Xbatch))**2)) shft = -np.mean(Xbatch*scl) # scale is applied before shift train_stream = ScaleAndShift(train_stream, scl, shft) test_stream = ScaleAndShift(test_stream, scl, shft) baseline_uniform_noise = 1./255. # appropriate for MNIST and CIFAR10 Fuel datasets, which are scaled [0,1] uniform_noise = baseline_uniform_noise/scl ## initialize the model dpm = model.DiffusionModel(spatial_width, n_colors, uniform_noise=uniform_noise, **model_args) dpm.initialize() ## set up optimization features = T.matrix('features', dtype=theano.config.floatX) cost = dpm.cost(features) blocks_model = blocks.model.Model(cost) cg_nodropout = ComputationGraph(cost) if args.dropout_rate > 0: # DEBUG this triggers an error on my machine
## Load cifar10 stream batch_size = 32 num_train_example = slice_train.stop - slice_train.start num_valid_example = slice_valid.stop - slice_valid.start num_test_example = slice_test.stop - slice_test.start train_dataset = CIFAR10(('train', ), subset=slice_train) train_stream = DataStream.default_stream(train_dataset, iteration_scheme=SequentialScheme( train_dataset.num_examples, batch_size)) train_stream = OneHotEncode10(train_stream, which_sources=('targets', )) train_stream = RandomHorizontalFlip(train_stream, which_sources=('features', )) train_stream = MinimumImageDimensions(train_stream, (224, 224), which_sources=('features', )) train_stream = ScaleAndShift(train_stream, 1., 0, which_sources=('features', )) train_stream = Cast(train_stream, 'floatX', which_sources=('features', )) valid_dataset = CIFAR10(('train', ), subset=slice_valid) valid_stream = DataStream.default_stream(valid_dataset, iteration_scheme=SequentialScheme( valid_dataset.num_examples, batch_size)) valid_stream = OneHotEncode10(valid_stream, which_sources=('targets', )) valid_stream = MinimumImageDimensions(valid_stream, (224, 224), which_sources=('features', )) valid_stream = ScaleAndShift(valid_stream, 1., 0, which_sources=('features', )) valid_stream = Cast(valid_stream, 'floatX', which_sources=('features', )) test_dataset = CIFAR10(('train', ), subset=slice_test) test_stream = DataStream.default_stream(test_dataset,
#get the test stream from fuel.datasets.dogs_vs_cats import DogsVsCats from fuel.streams import DataStream, ServerDataStream from fuel.schemes import ShuffledScheme, SequentialExampleScheme from fuel.transformers.image import RandomFixedSizeCrop, MinimumImageDimensions, MaximumImageDimensions, Random2DRotation from fuel.transformers import Flatten, Cast, ScaleAndShift size = (128, 128) cats = DogsVsCats(('test', )) stream = DataStream.default_stream(cats, iteration_scheme=SequentialExampleScheme( cats.num_examples)) stream_upscale = MaximumImageDimensions(stream, size, which_sources=('image_features', )) stream_scale = ScaleAndShift(stream_upscale, 1. / 255, 0, which_sources=('image_features', )) stream_data = Cast(stream_scale, dtype='float32', which_sources=('image_features', )) #Load the parameters of the model params = load_parameter_values('convnet_parameters.pkl') mo = Model(predict) mo.set_parameter_values(params) #Create the forward propagation function fprop = function(mo.inputs, mo.outputs[0], allow_input_downcast=True) tab = [] i = 1 #Get the prediction for each example of the test set for data in stream_data.get_epoch_iterator():
iteration_scheme=SequentialScheme( train.num_examples, batch_size)) # upscaled_stream = MinimumImageDimensions(stream, (100, 100), which_sources=('image_features',)) downscaled_stream = DownscaleMinDimension(stream, 100, which_sources=('image_features', )) # Our images are of different sizes, so we'll use a Fuel transformer # to take random crops of size (32 x 32) from each image cropped_stream = RandomFixedSizeCrop(downscaled_stream, (100, 100), which_sources=('image_features', )) rotated_stream = Random2DRotation(cropped_stream, math.pi / 6, which_sources=('image_features', )) flipped_stream = RandomHorizontalFlip(rotated_stream, which_sources=('image_features', )) # We'll use a simple MLP, so we need to flatten the images # from (channel, width, height) to simply (features,) float_stream = ScaleAndShift(flipped_stream, 1. / 255, 0, which_sources=('image_features', )) float32_stream = Cast(float_stream, numpy.float32, which_sources=('image_features', )) start_server(float32_stream, port=port)
return (x, ) data_dir = os.environ['FUEL_DATA_PATH'] data_dir = os.path.join(data_dir, 'blizzard/', 'blizzard_standardize.npz') data_stats = numpy.load(data_dir) data_mean = data_stats['data_mean'] data_std = data_stats['data_std'] dataset = Blizzard(which_sets=('train', )) data_stream = DataStream.default_stream(dataset, iteration_scheme=SequentialScheme( dataset.num_examples, batch_size)) data_stream = ScaleAndShift(data_stream, scale=1 / data_std, shift=-data_mean / data_std) data_stream = Mapping(data_stream, _segment_axis) data_stream = Mapping(data_stream, _transpose) data_stream = ForceFloatX(data_stream) train_stream = data_stream num_valid_examples = 4 * 64 * 5 dataset = Blizzard(which_sets=('valid', )) data_stream = DataStream.default_stream(dataset, iteration_scheme=SequentialScheme( num_valid_examples, 10 * batch_size)) data_stream = ScaleAndShift(data_stream, scale=1 / data_std, shift=-data_mean / data_std)
data_stream = DataStream.default_stream( dataset, iteration_scheme=ShuffledScheme( dataset.num_examples, batch_size)) x_tr = next(data_stream.get_epoch_iterator()) #ipdb.set_trace() # Standardize data all_data = numpy.array([]) for batch in data_stream.get_epoch_iterator(): for element in batch[0]: all_data = numpy.hstack([all_data, element]) mean_data = all_data.mean() std_data = all_data.std() data_stream = ScaleAndShift(data_stream, scale = 1/std_data, shift = -mean_data/std_data) data_stream = Mapping(data_stream, _segment_axis) data_stream = Padding(data_stream) data_stream = Mapping(data_stream, _transpose) data_stream = ForceFloatX(data_stream) ################# # Model ################# activations_x = [Rectifier()]*depth_x dims_x = [frame_size] + [hidden_size_mlp_x]*(depth_x-1) + \ [hidden_size_recurrent] activations_theta = [Rectifier()]*depth_theta
split_dict = { 'train': { 'images': (0, train_feature.shape[0]), 'targets': (0, train_target.shape[0]) } } f.attrs['split'] = H5PYDataset.create_split_array(split_dict) f.flush() f.close() train_set = H5PYDataset('../../data/dataset.hdf5', which_sets=('train',)) #data_stream = DataStream(dataset=train_set, iteration_scheme=scheme) #state = train_set.open() scheme = ShuffledScheme(examples=train_set.num_examples, batch_size=4) data_stream = DataStream(dataset=train_set, iteration_scheme=scheme) for data in data_stream.get_epoch_iterator(): print(data[0], data[1]) standardized_stream = ScaleAndShift(data_stream=data_stream, scale=255,shift=0, which_sources=('features',)) for data in standardized_stream.get_epoch_iterator(): print(data[0], data[1]) #train_set.close(state)
data_stats = numpy.load(data_dir) mgc_mean = data_stats['mgc_mean'] mgc_std = data_stats['mgc_std'] f0_mean = data_stats['f0_mean'] f0_std = data_stats['f0_std'] dataset = Blizzard(which_sets=('train', ), filename="mgc_blizzard.hdf5") data_stream = DataStream.default_stream(dataset, iteration_scheme=SequentialScheme( dataset.num_examples, batch_size)) data_stream = Mapping(data_stream, _transform_to_sp) data_stream = ScaleAndShift(data_stream, scale=1 / f0_std, shift=-f0_mean / f0_std, which_sources=('f0', )) # data_stream = ScaleAndShift(data_stream, # scale = 1/mgc_std, # shift = -mgc_mean/mgc_std, # which_sources = ('mgc',)) data_stream = Mapping(data_stream, _transpose) data_stream = SegmentSequence(data_stream, seq_size, add_flag=True) data_stream = ForceFloatX(data_stream) train_stream = data_stream num_valid_examples = 4 * 64 dataset = Blizzard(which_sets=('valid', ), filename="mgc_blizzard.hdf5") data_stream = DataStream.default_stream(dataset, iteration_scheme=SequentialScheme(
data_stream = DataStream.default_stream(dataset, iteration_scheme=ShuffledScheme( dataset.num_examples, batch_size)) x_tr = next(data_stream.get_epoch_iterator()) # Standardize data all_data = numpy.array([]) for batch in data_stream.get_epoch_iterator(): for element in batch[0]: all_data = numpy.hstack([all_data, element]) mean_data = all_data.mean() std_data = all_data.std() data_stream = ScaleAndShift(data_stream, scale=1 / std_data, shift=-mean_data / std_data) data_stream = Mapping(data_stream, _downsample_and_upsample, add_sources=('upsampled', )) data_stream = Mapping(data_stream, _equalize_size) data_stream = Mapping(data_stream, _get_residual, add_sources=('residual', )) data_stream = FilterSources(data_stream, sources=( 'upsampled', 'residual', )) data_stream = Mapping(data_stream, _segment_axis) data_stream = Padding(data_stream) data_stream = FilterSources(data_stream, sources=('upsampled', 'residual', 'residual_mask')) data_stream = Mapping(data_stream, _transpose)
data_stream = dataset.get_example_stream() it = data_stream.get_epoch_iterator() sequence = next(it) length = len(sequence[0]) temp = numpy.random.choice(length, 100000) temp = map(lambda l: float(l[0]), sequence[0][temp]) temp = numpy.array(temp) data_stats["mean"] = temp.mean() data_stats["std"] = temp.std() numpy.save(data_stats_file, data_stats) data_stream = Batch(data_stream=data_stream, iteration_scheme=ConstantScheme(batch_size)) data_stream = ScaleAndShift(data_stream, scale=1. / data_stats["std"], shift=-data_stats["mean"] / data_stats["std"]) #data_stream = Mapping(data_stream, _transpose) #data_stream = SegmentSequence(data_stream, 16*seq_size, add_flag=True) data_stream = ForceFloatX(data_stream) train_stream = data_stream """ dataset = MonkMusic(which_sets = ('valid',), filename = "XqaJ2Ol5cC4.hdf5", load_in_memory=True) data_stream = dataset.get_example_stream() data_stream = Batch(data_stream=data_stream,iteration_scheme=ConstantScheme(batch_size)) data_stream = ScaleAndShift(data_stream, scale = 1/data_stats["std"], shift = -data_stats["mean"]/data_stats["std"]) #data_stream = SegmentSequence(data_stream, 16*seq_size, add_flag=True)