def open_stream(which_sets=('train', ), port=5557, num_examples=None): dataset = Blizzard(which_sets=which_sets) if num_examples == None: num_examples = dataset.num_examples data_stream = DataStream.default_stream(dataset, iteration_scheme=SequentialScheme( num_examples, batch_size)) data_stream = ScaleAndShift(data_stream, scale=1 / data_std, shift=-data_mean / data_std) data_stream = Mapping(data_stream, _downsample_and_upsample, add_sources=('upsampled', )) data_stream = Mapping(data_stream, _equalize_size) data_stream = Mapping(data_stream, _get_residual, add_sources=('residual', )) data_stream = FilterSources(data_stream, sources=( 'upsampled', 'residual', )) data_stream = Mapping(data_stream, _segment_axis) data_stream = Mapping(data_stream, _transpose) data_stream = ForceFloatX(data_stream) start_server(data_stream, port=port)
def create_stream_and_start_server(dataset, split, batch_size, port, hwm, wrapping_streams, kwargs): kwargs.update({'split': (split, )}) stream = create_stream(dataset, batch_size, **kwargs) for wrap in wrapping_streams: stream = wrap(stream) start_server(stream, port=port, hwm=hwm)
def create_data(data, size, batch_size, _port): if data == "train": cats = DogsVsCats(('train', ), subset=slice(0, 20000)) port = _port + 2 elif data == "valid": cats = DogsVsCats(('train', ), subset=slice(20000, 25000)) port = _port + 3 print 'port', port stream = DataStream.default_stream(cats, iteration_scheme=ShuffledScheme( cats.num_examples, batch_size)) stream_downscale = MinimumImageDimensions( stream, size, which_sources=('image_features', )) stream_rotate = FlipAsYouCan(stream_downscale, ) stream_max = ScikitResize(stream_rotate, image_size, which_sources=('image_features', )) stream_scale = ScaleAndShift(stream_max, 1. / 255, 0, which_sources=('image_features', )) stream_data = Cast(stream_scale, dtype='float32', which_sources=('image_features', )) start_server(stream_data, port=port)
def open_stream(which_sets= ('train',), port=5557, num_examples = None): dataset = Blizzard(which_sets = which_sets) if num_examples == None: num_examples = dataset.num_examples data_stream = DataStream.default_stream( dataset, iteration_scheme=SequentialScheme( num_examples, batch_size)) data_stream = ScaleAndShift(data_stream, scale = 1/data_std, shift = -data_mean/data_std) data_stream = Mapping(data_stream, _downsample_and_upsample, add_sources=('upsampled',)) data_stream = Mapping(data_stream, _equalize_size) data_stream = Mapping(data_stream, _get_residual, add_sources = ('residual',)) data_stream = FilterSources(data_stream, sources = ('upsampled', 'residual',)) data_stream = Mapping(data_stream, _segment_axis) data_stream = Mapping(data_stream, _transpose) data_stream = ForceFloatX(data_stream) start_server(data_stream, port=port)
def main(): parser = argparse.ArgumentParser("Starts fuel server") parser.add_argument("stream", help="The path to the pickled stream") parser.add_argument("port", type=int, help="The port to use") parser.add_argument("hwm", type=int, default=10, help="HWM") args = parser.parse_args() stream = cPickle.load(open(args.stream)) try: start_server(stream, args.port, hwm=args.hwm) except KeyboardInterrupt: logger.info("Thank you for using Fuel server, bye-bye!")
def open_stream(which_sets=('train',), initial_scale=1, scale=0.5, batch_size=64, seq_length=64, frame_size=128, port=5557, tbptt_flag = True, num_examples=None): data_stream = define_stream(which_sets, initial_scale, scale, batch_size, seq_length, frame_size, tbptt_flag, num_examples) start_server(data_stream, port=port)
def create_data(data, size, batch_size): if data == "train": cats = DogsVsCats(('train',), subset=slice(0, 20000)) port = 5550 elif data == "valid": cats = DogsVsCats(('train',), subset=slice(20000, 25000)) port = 5551 stream = DataStream.default_stream(cats, iteration_scheme=ShuffledScheme(cats.num_examples, batch_size)) stream_downscale = MinimumImageDimensions(stream, size, which_sources=('image_features',)) stream_upscale = MaximumImageDimensions(stream_downscale, size, which_sources=('image_features',)) stream_rotate = Random2DRotation(stream_upscale, which_sources=('image_features',)) stream_scale = ScaleAndShift(stream_rotate, 1./255, 0, which_sources=('image_features',)) stream_data = Cast(stream_scale, dtype='float32', which_sources=('image_features',)) start_server(stream_data, port=port)
def launch_data_server(dataset, port, config): """ """ n_items = dataset.num_examples batch_sz = config.hyper_parameters.batch_size it_schm = ShuffledScheme(n_items, batch_sz) data_stream = DataStream(dataset=dataset, iteration_scheme=it_schm) try: start_server(data_stream, port=port, hwm=config.data_server.hwm) except KeyboardInterrupt as ke: print(ke) finally: data_stream.close()
def create_data(data, size, batch_size,_port): if data == "train": cats = DogsVsCats(('train',), subset=slice(0, 20000)) port = _port+2 elif data == "valid": cats = DogsVsCats(('train',), subset=slice(20000, 25000)) port = _port+3 print 'port', port stream = DataStream.default_stream(cats, iteration_scheme=ShuffledScheme(cats.num_examples, batch_size)) stream_downscale = MinimumImageDimensions(stream, size, which_sources=('image_features',)) stream_rotate = FlipAsYouCan(stream_downscale,) stream_max = ScikitResize(stream_rotate, image_size, which_sources=('image_features',)) stream_scale = ScaleAndShift(stream_max, 1./255, 0, which_sources=('image_features',)) stream_data = Cast(stream_scale, dtype='float32', which_sources=('image_features',)) start_server(stream_data, port=port)
def create_data(data, size, batch_size): if data == "train": stream = DogsVsCats(('train',), subset=slice(0, 22500)) port = 5560 elif data == "valid": stream = DogsVsCats(('train',), subset=slice(22500, 25000)) port = 5561 stream = DataStream(stream, iteration_scheme=ShuffledScheme(stream.num_examples, batch_size)) stream = MinimumImageDimensions(stream, image_size, which_sources=('image_features',)) stream = MaximumImageDimensions(stream, image_size, which_sources=('image_features',)) stream = RandomHorizontalSwap(stream, which_sources=('image_features',)) stream = Random2DRotation(stream, which_sources=('image_features',)) stream = ScaleAndShift(stream, 1./255, 0, which_sources=('image_features',)) stream = Cast(stream, dtype='float32', which_sources=('image_features',)) start_server(stream, port=port)
def create_data(dataset, size, batch_size): if dataset == "train": #choose which dataset to process:train_data data = DogsVsCats(('train',), subset=slice(0, 20000)) port = 5556 elif dataset == "valid": #choose which dataset to process:valid_data data = DogsVsCats(('train',), subset=slice(20000, 20200)) port = 5557 elif dataset == "test": #choose which dataset to process:test_data data = DogsVsCats(('test',), subset=slice(0, 12500)) port = 5558 stream = DataStream(data, iteration_scheme=ShuffledScheme(data.num_examples, batch_size)) stream_upscaled = MinimumImageDimensions(stream, size, which_sources=('image_features',)) stream_downscaled = MaximumImageDimensions(stream_upscaled, size, which_sources=('image_features',)) stream_rotate = Random2DRotation(stream_downscaled, which_sources=('image_features',)) stream_scale = ScaleAndShift(stream_rotate, 1.0/255, 0, which_sources=('image_features',)) stream_data = Cast(stream_scale, dtype='float32', which_sources=('image_features',)) start_server(stream_data, port=port)
iteration_scheme=SequentialScheme( train.num_examples, batch_size)) # upscaled_stream = MinimumImageDimensions(stream, (100, 100), which_sources=('image_features',)) downscaled_stream = DownscaleMinDimension(stream, 100, which_sources=('image_features', )) # Our images are of different sizes, so we'll use a Fuel transformer # to take random crops of size (32 x 32) from each image cropped_stream = RandomFixedSizeCrop(downscaled_stream, (100, 100), which_sources=('image_features', )) rotated_stream = Random2DRotation(cropped_stream, math.pi / 6, which_sources=('image_features', )) flipped_stream = RandomHorizontalFlip(rotated_stream, which_sources=('image_features', )) # We'll use a simple MLP, so we need to flatten the images # from (channel, width, height) to simply (features,) float_stream = ScaleAndShift(flipped_stream, 1. / 255, 0, which_sources=('image_features', )) float32_stream = Cast(float_stream, numpy.float32, which_sources=('image_features', )) start_server(float32_stream, port=port)
from fuel.streams import DataStream from fuel.schemes import SequentialScheme, ShuffledScheme from fuel.datasets.hdf5 import H5PYDataset from fuel.server import start_server from config import basepath, minibatch_size from transformers.custom_transformers import Standardize submit_set = H5PYDataset( basepath + 'data.hdf5', which_sets=('submit', ), #subset=slice(0,50), sources=['features', 'image_name'], load_in_memory=False) stream = DataStream.default_stream(submit_set, iteration_scheme=SequentialScheme( submit_set.num_examples, minibatch_size)) print('I provide sources ', submit_set.sources) print('Number of examples', submit_set.num_examples) standardized_stream = Standardize(stream, 255) start_server(standardized_stream)
width = int(math.ceil(width * multiplier)) height = int(math.ceil(height * multiplier)) im = numpy.array(im.resize((width, height))).astype(dt) # If necessary, undo the axis swap from earlier. if im.ndim == 3: example = im.transpose(2, 0, 1) else: example = im return example def add_transformers(stream, random_crop=False): # Now the dataset has images with good minimum size # stream = ForceMinimumDimension(stream, 128, # which_sources=['image_features']) if random_crop: stream = RandomFixedSizeCrop(stream, (128, 128), which_sources=['image_features']) stream = ScaleAndShift(stream, 1 / 255.0, 0, which_sources=['image_features']) stream = ForceFloatX(stream) return stream if __name__ == '__main__': logging.basicConfig(level=logging.DEBUG) train = DogsVsCats(("train",), subset=slice(None, int(sys.argv[1]), None)) train_str = DataStream( train, iteration_scheme=ShuffledScheme(train.num_examples, int(sys.argv[2]))) train_str = add_transformers(train_str, random_crop=True) start_server(train_str, port=int(sys.argv[3]))
from fuel.streams import DataStream from fuel.schemes import SequentialScheme, ShuffledScheme from fuel.datasets.hdf5 import H5PYDataset from fuel.server import start_server from functions.custom_transformers import RandomDownscale, RandomFixedSizeCrop, RandomRotate, Normalize, Cast import math train_set = H5PYDataset('../data/data_1.hdf5', which_sets=('train', ), subset=slice(0, 20000), load_in_memory=True) index_images = 0 index_labels = 1 stream = DataStream.default_stream(train_set, iteration_scheme=ShuffledScheme( train_set.num_examples, 125)) #downscaled_stream = RandomDownscale(stream, 140) stream = RandomRotate(stream, 20) #cropped_stream = RandomFixedSizeCrop(rotated_stream, (130,130)) stream = Normalize(stream) stream = Cast(stream, 'floatX') start_server(stream, hwm=10)
from fuel.schemes import SequentialScheme, ShuffledScheme from fuel.datasets.hdf5 import H5PYDataset from fuel.server import start_server from fuel.transformers import Flatten, ScaleAndShift#, Cast #from fuel.transformers.image import Random2DRotation from fuel.transformers.video import RescaleMinDimension from custom_transformers_sunnybrook import RandomDownscale, RandomRotate, Cast, RandomLimit, Normalize, RandomFixedSizeCrop import numpy import math train_set = H5PYDataset( 'data_sunnybrook/sunnybrook_heart.hdf5', which_sets=('train',), subset=slice(40, 45), load_in_memory=True, ) stream = DataStream.default_stream( train_set, iteration_scheme=ShuffledScheme(train_set.num_examples, 5) ) resized_stream = RandomDownscale(stream, 70) rotated_stream = RandomRotate(resized_stream, math.pi/10) cropped_stream = RandomFixedSizeCrop(resized_stream, (64, 64)) limit_stream = RandomLimit(cropped_stream, 12) float_stream = Normalize(limit_stream) float32_stream = Cast(float_stream, 'floatX') start_server(float32_stream, port=5558, hwm=10)
from fuel.schemes import SequentialScheme, ShuffledScheme from fuel.datasets.hdf5 import H5PYDataset from fuel.server import start_server from fuel.transformers import Flatten, ScaleAndShift#, Cast #from fuel.transformers.image import Random2DRotation from fuel.transformers.video import RescaleMinDimension from custom_transformers_sunnybrook import RandomDownscale, RandomRotate, Cast, RandomLimit, Normalize, RandomFixedSizeCrop import numpy import math train_set = H5PYDataset( 'data_sunnybrook/sunnybrook_heart.hdf5', which_sets=('train',), subset=slice(0, 40), load_in_memory=True, ) stream = DataStream.default_stream( train_set, iteration_scheme=ShuffledScheme(train_set.num_examples, 5) ) downscaled_stream = RandomDownscale(stream, 70) rotated_stream = RandomRotate(downscaled_stream, math.pi/10) cropped_stream = RandomFixedSizeCrop(rotated_stream, (64, 64)) limit_stream = RandomLimit(cropped_stream, 12) float_stream = Normalize(limit_stream) float32_stream = Cast(float_stream, 'floatX') start_server(float32_stream, hwm=10)
print(" Dataset size:", dataset_statsinfo.st_size) nopts = 0 for i in [options.do_learn, options.do_valid, options.do_test]: if i == True: nopts += 1 if nopts != 1: print("\nMust specify one of learn, valid, or test:\n\n") print(__doc__) sys.exit(1) learn_dset, valid_dset, test_dset = load_dataset(options.dataset, options.load_in_memory) data_stream = None if options.do_test: _, data_stream = make_scheme_and_stream(test_dset, options.batch_size, "Processing training data:") elif options.do_valid: _, data_stream = make_scheme_and_stream(valid_dset, options.batch_size, "Processing training data:") elif options.do_learn: _, data_stream = make_scheme_and_stream(learn_dset, options.batch_size, "Processing training data:") if data_stream is not None: start_server(data_stream, port=options.port, hwm=options.hwm) else: print("Failure to create a data stream!")
port = 5557 # Load the training set train = DogsVsCats(('train',), subset=sub) # We now create a "stream" over the dataset which will return shuffled batches # of size 128. Using the DataStream.default_stream constructor will turn our # 8-bit images into floating-point decimals in [0, 1]. stream = DataStream.default_stream( train, iteration_scheme=SequentialScheme(train.num_examples, batch_size) ) # upscaled_stream = MinimumImageDimensions(stream, (100, 100), which_sources=('image_features',)) downscaled_stream = DownscaleMinDimension(stream, 100, which_sources=('image_features',)) # Our images are of different sizes, so we'll use a Fuel transformer # to take random crops of size (32 x 32) from each image cropped_stream = RandomFixedSizeCrop( downscaled_stream, (100, 100), which_sources=('image_features',)) rotated_stream = Random2DRotation(cropped_stream, math.pi/6, which_sources=('image_features',)) flipped_stream = RandomHorizontalFlip(rotated_stream, which_sources=('image_features',)) # We'll use a simple MLP, so we need to flatten the images # from (channel, width, height) to simply (features,) float_stream = ScaleAndShift(flipped_stream, 1./255, 0, which_sources=('image_features',)) float32_stream = Cast(float_stream, numpy.float32, which_sources=('image_features',)) start_server(float32_stream, port=port)
parser = argparse.ArgumentParser() parser.add_argument('--mode') args = parser.parse_args() if args.mode == None: path = 'data.hdf5' elif args.mode == 'jpeg': path = 'data.hdf5' elif args.mode == 'tiff': path = 'data_tiff.hdf5' else: raise SyntaxError valid_set = H5PYDataset( basepath + path, which_sets=('train', ), subset=slice(32383, 40479), # 40479 20% of training set sources=['features', 'labels'], load_in_memory=False) stream = DataStream.default_stream(valid_set, iteration_scheme=SequentialScheme( valid_set.num_examples, minibatch_size)) print('I provide sources ', valid_set.sources) print('Number of examples', valid_set.num_examples) standardized_stream = Standardize(stream, 255.) start_server(standardized_stream, port=5558)
def start(self): start_server(self.image_reader, self.port)
def start_fuel_server(stream, port, hwm): start_server(stream, port=port, hwm=hwm)
from fuel.streams import DataStream from fuel.schemes import SequentialScheme, ShuffledScheme from fuel.datasets.hdf5 import H5PYDataset from fuel.server import start_server from fuel.transformers import Flatten, ScaleAndShift #, Cast #from fuel.transformers.image import Random2DRotation from fuel.transformers.video import RescaleMinDimension from custom_transformers_sunnybrook import RandomDownscale, RandomRotate, Cast, RandomLimit, Normalize, RandomFixedSizeCrop import numpy import math train_set = H5PYDataset( 'data_sunnybrook/sunnybrook_heart.hdf5', which_sets=('train', ), subset=slice(40, 45), load_in_memory=True, ) stream = DataStream.default_stream(train_set, iteration_scheme=ShuffledScheme( train_set.num_examples, 5)) resized_stream = RandomDownscale(stream, 70) rotated_stream = RandomRotate(resized_stream, math.pi / 10) cropped_stream = RandomFixedSizeCrop(resized_stream, (64, 64)) limit_stream = RandomLimit(cropped_stream, 12) float_stream = Normalize(limit_stream) float32_stream = Cast(float_stream, 'floatX') start_server(float32_stream, port=5558, hwm=10)
nopts += 1 if nopts != 1: print("\nMust specify one of learn, valid, or test:\n\n") print(__doc__) sys.exit(1) learn_dset, valid_dset, test_dset = load_dataset(options.dataset, options.load_in_memory) data_stream = None if options.do_test: _, data_stream = make_scheme_and_stream(test_dset, options.batch_size, "Processing training data:") elif options.do_valid: _, data_stream = make_scheme_and_stream(valid_dset, options.batch_size, "Processing training data:") elif options.do_learn: _, data_stream = make_scheme_and_stream(learn_dset, options.batch_size, "Processing training data:") if data_stream is not None: start_server(data_stream, port=options.port, hwm=options.hwm) else: print("Failure to create a data stream!")
valid_set = H5PYDataset( './data_kaggle/kaggle_heart.hdf5', which_sets=('train',), #subset=slice(451, 494), subset=slice(451, 491), load_in_memory=True ) index_cases = 0 index_position = 1 index_mult = 2 index_sax = 3 index_images = 4 index_targets = 5 stream = DataStream.default_stream( valid_set, iteration_scheme=ShuffledScheme(valid_set.num_examples, 10) ) #downscaled_stream = RandomDownscale(stream, 70) masked_stream = ApplyMask(stream) order_stream = OrderFeatures(masked_stream) cropped_stream = RandomFixedSizeCrop(order_stream, (64,64)) float_stream = Normalize(cropped_stream) padded_stream = ZeroPadding(float_stream) casted_stream = Cast(padded_stream, 'floatX') start_server(casted_stream, port=5558, hwm=10)
rsync = Rsync(args.tmpdir) rsync.sync(args.data_path) args.data_path = os.path.join(args.tmpdir, os.path.basename(args.data_path)) return fuel_utils.get_datastream(path=args.data_path, which_set=args.dataset, batch_size=args.batch_size) if __name__ == "__main__": parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--data-path', help='data path', default='/u/songinch/song/data/speech/wsj_fbank123.h5') parser.add_argument('--dataset', help='dataset for training', default='train_si84_rand') parser.add_argument('--batch-size', default=1, help='batch size', type=int) parser.add_argument( '--tmpdir', help='directory name in the /Tmp directory to save data locally', default='/Tmp/songinch/data/speech') parser.add_argument('--port', help='port number', default=5557, type=int) parser.add_argument('--no-copy', help='do not copy data from NFS to the local machine', action='store_true') args = parser.parse_args() start_server(create_data_stream(args), port=args.port)
upscale_stream = MaximumImageDimensions( data_stream = downscale_stream, maximum_shape = image_size, which_sources=('image_features',) ) # swap_stream = RandomHorizontalSwap( # data_stream = upscale_stream, # which_sources=('image_features',) # ) #rotated_stream = Random2DRotation( # data_stream = upscale_stream, # which_sources=('image_features',) #) scaled_stream = ScaleAndShift( data_stream = upscale_stream, scale = 1./255, shift = 0, which_sources = ('image_features',) ) data_stream = Cast( data_stream = scaled_stream, dtype = 'float32', which_sources = ('image_features',) ) start_server(data_stream, port=port)
# number_train = 494 (counting valid set) train_set = H5PYDataset( './data_kaggle/kaggle_heart.hdf5', which_sets=('train', ), subset=slice(0, 450), #450 load_in_memory=True) index_cases = 0 index_position = 1 index_mult = 2 index_sax = 3 index_images = 4 index_targets = 5 stream = DataStream.default_stream(train_set, iteration_scheme=ShuffledScheme( train_set.num_examples, 10)) #downscaled_stream = RandomDownscale(order_stream, 70) masked_stream = ApplyMask(stream) order_stream = OrderFeatures(masked_stream) cropped_stream = RandomFixedSizeCrop(order_stream, (64, 64)) rotated_stream = RandomRotate(cropped_stream, math.pi / 10) float_stream = Normalize(cropped_stream) padded_stream = ZeroPadding(float_stream) casted_stream = Cast(padded_stream, 'floatX') start_server(casted_stream, hwm=10)
def get_data(self, request=None): if request is not None: raise ValueError time.sleep(self.slowdown) return next(self.child_epoch_iterator) def create_data_stream(slowdown=0): """Creates a bottlenecked data stream of dummy data. Parameters ---------- slowdown : float Time (in seconds) to wait each time data is requested. Returns ------- data_stream : fuel.streams.AbstactDataStream Bottlenecked data stream. """ dataset = IndexableDataset({'features': [[0] * 128] * 1000}) iteration_scheme = ShuffledScheme(examples=1000, batch_size=100) data_stream = Bottleneck(data_stream=DataStream.default_stream(dataset=dataset, iteration_scheme=iteration_scheme), slowdown=slowdown) return data_stream if __name__ == "__main__": start_server(create_data_stream(0.005))
train_set = H5PYDataset( './data_kaggle/kaggle_heart.hdf5', which_sets=('train',), subset=slice(0, 450), #450 load_in_memory=True ) index_cases = 0 index_position = 1 index_mult = 2 index_sax = 3 index_images = 4 index_targets = 5 stream = DataStream.default_stream( train_set, iteration_scheme=ShuffledScheme(train_set.num_examples, 10) ) #downscaled_stream = RandomDownscale(order_stream, 70) masked_stream = ApplyMask(stream) order_stream = OrderFeatures(masked_stream) cropped_stream = RandomFixedSizeCrop(order_stream, (64,64)) rotated_stream = RandomRotate(cropped_stream, math.pi/10) float_stream = Normalize(cropped_stream) padded_stream = ZeroPadding(float_stream) casted_stream = Cast(padded_stream, 'floatX') start_server(casted_stream, hwm=10)