def __init__(self, config, *args, **kwargs): self.config = config self._dataloader = DataLoader(config) self.ndata = self._dataloader.ndata if self.ndata < self._dataloader.batch_size: raise ValueError( 'Number of examples is smaller than the batch size')
def test_loader_reset(): # NOTE: manifest needs to stay in scope until DataLoader has read it. manifest = random_manifest(10) config = generic_config(manifest.name, batch_size) dl = DataLoader(config) assert len(list(iter(dl))) == math.ceil(10. / batch_size) dl.reset() assert len(list(iter(dl))) == math.ceil(10. / batch_size)
def test_loader_reset(): # NOTE: manifest needs to stay in scope until DataLoader has read it. manifest = random_manifest(10) config = generic_config(manifest.name) dl = DataLoader(config, gen_backend('cpu')) assert len(list(iter(dl))) == 5 dl.reset() assert len(list(iter(dl))) == 5
def test_loader_exception_next(): # NOTE: manifest needs to stay in scope until DataLoader has read it. manifest = random_manifest(10, 2) config = generic_config(manifest.name) dl = DataLoader(config, gen_backend(backend='cpu')) dl.next() with pytest.raises(LoaderRuntimeError): dl.next()
def test_loader_reset(): # NOTE: manifest needs to stay in scope until DataLoader has read it. manifest = random_manifest(10) config = generic_config(manifest.name) dl = DataLoader(config, gen_backend(backend='cpu')) assert len(list(iter(dl))) == 5 dl.reset() assert len(list(iter(dl))) == 5
def test_loader_exception_next(num_of_batches_to_process, net_definition_file, manifest_filename, manifest_root): config = dict() with net_definition_file as cfg_file: contents = cfg_file.read() config = json.loads(contents) config['manifest_filename'] = manifest_filename config['manifest_root'] = manifest_root del config['cache_directory'] dl = DataLoader(config) for x in range(0, num_of_batches_to_process): draw_images(dl.next())
def test_loader_exception_next(num_of_batches_to_process, net_definition_file, manifest_filename, manifest_root): config = dict() with net_definition_file as cfg_file: contents = cfg_file.read() config = json.loads(contents) config['manifest_filename'] = manifest_filename config['manifest_root'] = manifest_root del config['cache_directory'] dl = DataLoader(config) for x in range(0, num_of_batches_to_process): draw_images(dl.next())
def make_aeon_loaders(work_dir, batch_size, backend, random_seed=0): train_manifest, valid_manifest = ingest_cifar10(work_dir) train_config = common_config(train_manifest, batch_size) # train_config['shuffle_manifest'] = True # train_config['shuffle_every_epoch'] = True # train_config['random_seed'] = random_seed # train_config['image']['center'] = False # train_config['image']['flip_enable'] = True valid_config = common_config(valid_manifest, batch_size) train_loader = DataLoader(train_config, backend) valid_loader = DataLoader(valid_config, backend) return (train_loader, valid_loader)
def test_loader_broken_image_next(): manifest = random_manifest(9, broken_image_index=8) config = generic_config(manifest.name, batch_size) dl = DataLoader(config) with pytest.raises(Exception) as ex: for i in range(5): dl.next() assert 'Decoding image failed due to invalid data in the image file' in str(ex) dl2 = DataLoader(config) with pytest.raises(Exception) as ex: for data in dl2: pass assert 'Decoding image failed due to invalid data in the image file' in str(ex)
def build_dataloader(config, be, frcn_rois_per_img): """ Builds the dataloader for the Faster-RCNN network using our aeon loader. Besides, the base loader, we add several operations: 1. Cast the image data into float32 format 2. Subtract the BGRMean from the image. We used pre-defined means from training the VGG network. 3. Repack the data for Faster-RCNN model. This model has several nested branches, so The buffers have to repacked into nested tuples to match the branch leafs. Additionally, buffers for training the RCNN portion of the model are also allocated and provisioned to the model. Arguments: config (dict): dataloader configuration be (backend): compute backend frcn_rois_per_img (int): Number of ROIs to use for training the RCNN portion of the model. This is used to create the target buffers for RCNN. Returns: dataloader object. """ dl = DataLoader(config, be) dl = TypeCast(dl, index=0, dtype=np.float32) # cast image to float dl = BGRMeanSubtract(dl, index=0, pixel_mean=util.FRCN_PIXEL_MEANS) # subtract means dl = ObjectLocalization( dl, frcn_rois_per_img=frcn_rois_per_img) # repack for faster-rcnn return dl
def main(): address, port, session_id, rdma_address, rdma_port = parse_input() cache_root = "" # don't create cache batch_size = 4 cfg = { 'remote': { 'address': address, 'port': int(port), 'session_id': session_id, 'close_session': False } } # Add RDMA parameters if they are set if rdma_address: cfg['remote']['rdma_address'] = rdma_address cfg['remote']['rdma_port'] = int(rdma_port) # Create new aeon DataLoader object loader = DataLoader(config=cfg) print("data size: {0}".format(len(loader))) # Retrieve shapes shapes = loader.axes_info print("shapes: {0}".format(shapes)) # Iterate through all available batches batch_counter = 1 for batch in loader: print("Batch {0} ready.").format(batch_counter) batch_counter += 1 time.sleep(1)
def test_loader_invalid_manifest(): filename = tempfile.mkstemp()[1] config = generic_config(invalid_image(filename), batch_size) with pytest.raises(Exception) as ex: dl = DataLoader(config) assert 'must be string, but is null' in str(ex)
def test_loader_broken_image(): manifest = random_manifest(2, broken_image_index=1) config = generic_config(manifest.name, batch_size) with pytest.raises(Exception) as ex: dl = DataLoader(config) assert 'Decoding image failed due to invalid data in the image file' in str(ex)
def __init__(self, config, *args, **kwargs): # TODO: Remove this workaround once tuples are accepted if "etl" in config and isinstance(config["etl"], tuple): config["etl"] = list(config["etl"]) self.config = config self._dataloader = DataLoader(config)
def test_loader_exception_iter(): # NOTE: manifest needs to stay in scope until DataLoader has read it. manifest = random_manifest(10, 2) config = generic_config(manifest.name) dl = DataLoader(config, gen_backend(backend='cpu')) assert len(list(iter(dl))) == 4
def test_parse_json_dict_tuple_pass(): test_dir = os.path.dirname(os.path.realpath(__file__)) + '/test_data/' config = {'batch_size': 16, 'manifest_root': test_dir, 'manifest_filename': test_dir + 'manifest.tsv', 'etl': ({'type': 'image', 'width': 32, 'height': 32}, {'type': 'label', 'binary': False})} dl = DataLoader(config) assert (dl.config["etl"][0]["type"] == 'image' and dl.config["etl"][1]["type"] == 'label')
def test_loader_invalid_config_type(): manifest = random_manifest(10) config = generic_config(manifest.name) config['type'] = 'invalid type name' with pytest.raises(Exception) as ex: dl = DataLoader(config, gen_backend(backend='cpu'))
def test_dataloader_axes_info(): pdir = os.path.dirname(os.path.abspath(__file__)) manifest_root = os.path.join(pdir, 'test_data') manifest_file = os.path.join(manifest_root, 'manifest.tsv') cache_root = "" cfg = { 'manifest_filename': manifest_file, 'manifest_root': manifest_root, 'batch_size': 20, 'block_size': 40, 'cache_directory': cache_root, 'etl': [{ 'type': 'image', 'channel_major': False, 'width': 28, 'height': 28, 'channels': 1 }, { 'type': 'label', 'binary': False }] } d1 = DataLoader(config=cfg) shapes = d1.axes_info for x in d1: assert len(x) == len(cfg['etl']) image = x[0] label = x[1] assert len(image[1]) == cfg['batch_size'] assert len(label[1]) == cfg['batch_size'] image = shapes[0] assert image[0] == 'image' # For images, order in json doesn't matter. # Order of axes is defined by Aeon for give axis type. # For images, there could be two orders: CHW (channel_major=True) or HWC (channel_major=False). # For testing purposes, order here is defined with channel_major set to False, so we could check #if axes are sorted. assert image[1][0][0] == 'height' assert image[1][0][1] == cfg['etl'][0]['height'] assert image[1][1][0] == 'width' assert image[1][1][1] == cfg['etl'][0]['width'] assert image[1][2][0] == 'channels' assert image[1][2][1] == cfg['etl'][0]['channels'] label = shapes[1] assert label[0] == 'label'
def test_loader_invalid_config_type(): manifest = random_manifest(10) config = generic_config(manifest.name, batch_size) config["etl"][0]["type"] = 'invalid type name' with pytest.raises(RuntimeError) as ex: dl = DataLoader(config) assert 'unsupported' in str(ex)
def test_loader_missing_config_field(): manifest = random_manifest(10) config = generic_config(manifest.name, batch_size) del config['etl'][0]["height"] with pytest.raises(RuntimeError) as ex: dl = DataLoader(config) assert 'height' in str(ex)
def test_loader(): # NOTE: manifest needs to stay in scope until DataLoader has read it. for i in range(1, 10): manifest = random_manifest(i) config = generic_config(manifest.name, batch_size) dl = DataLoader(config) assert len(list(iter(dl))) == math.ceil(float(i) / batch_size)
def test_loader_missing_config_field(): manifest = random_manifest(10) config = generic_config(manifest.name) del config['image'] with pytest.raises(Exception) as ex: dl = DataLoader(config, gen_backend(backend='cpu')) assert 'image' in str(ex)
def test_loader_json_parser_pass(): files = glob.glob("./json/pass*.json") for f in files: with open(f) as json_file: json_string = json_file.read() # config must be a dict so make sure it is a dict json_string = '{"config": %s}' % json_string config = json.loads(json_string) with pytest.raises(RuntimeError) as ex: dl = DataLoader(config) assert 'Required Argument' in str(ex)
class AeonDataLoader(object): def __init__(self, config, *args, **kwargs): self.config = config self._dataloader = DataLoader(config) self.ndata = self._dataloader.ndata if self.ndata < self._dataloader.batch_size: raise ValueError( 'Number of examples is smaller than the batch size') def __next__(self): bufs = next(self._dataloader) bufs_dict = dict((key, val) for key, val in bufs) if 'label' in bufs_dict: bufs_dict['label'] = bufs_dict['label'].flatten() return bufs_dict def __iter__(self): return self def make_placeholders(self, include_iteration=False): placeholders = {} batch_axis = ng.make_axis(self._dataloader.batch_size, name="N") for placeholder_name, axis_info in self._dataloader.axes_info: p_axes = ng.make_axes([batch_axis]) for nm, sz in axis_info: if placeholder_name == 'label': continue if nm in NAME_MAP: nm = NAME_MAP[nm] p_axes += ng.make_axis(name=nm, length=sz) placeholders[placeholder_name] = ng.placeholder(p_axes) if include_iteration: placeholders['iteration'] = ng.placeholder(axes=()) return placeholders def reset(self): self._dataloader.reset() def ndata(self): self._dataloader.ndata
def test_loader_exception_next(): # NOTE: manifest needs to stay in scope until DataLoader has read it. manifest = random_manifest(10, 2) config = generic_config(manifest.name) dl = DataLoader(config, gen_backend(backend='cpu')) dl.next() with pytest.raises(LoaderRuntimeError): dl.next()
def main(): address, port, manifest, rdma_address, rdma_port = parse_input() cache_root = "" # don't create cache batch_size = 4 cfg = { 'manifest_filename': manifest, 'manifest_root': os.path.dirname(manifest), 'batch_size': batch_size, 'cache_directory': cache_root, 'etl': [{ 'type': 'image', 'width': 28, 'height': 28, 'channels': 1 }, { 'type': 'label', 'binary': False }], 'remote': { 'address': address, 'port': int(port) } } # Add RDMA parameters if they are set if rdma_address: cfg['remote']['rdma_address'] = rdma_address cfg['remote']['rdma_port'] = int(rdma_port) # Create new aeon DataLoader object loader = DataLoader(config=cfg) print("data size: {0}".format(len(loader))) # Retrieve shapes shapes = loader.axes_info print("shapes: {0}".format(shapes)) # Iterate through all available batches for batch in loader: image = batch[0] label = batch[1] print("{0} data: {1}".format(image[0], image[1])) print("{0} data: {1}".format(label[0], label[1]))
def test_loader_exception_iter(): # NOTE: manifest needs to stay in scope until DataLoader has read it. cwd = os.getcwd() dir_path = os.path.dirname(os.path.realpath(__file__)) os.chdir(dir_path+'/test_data') manifest = open("manifest.tsv") config = generic_config(manifest.name, batch_size) dl = DataLoader(config) num_of_manifest_entries = 120. assert len(list(iter(dl))) == math.ceil(num_of_manifest_entries/batch_size) manifest.close() os.chdir(cwd)
def test_loader_exception_next(): # NOTE: manifest needs to stay in scope until DataLoader has read it. cwd = os.getcwd() dir_path = os.path.dirname(os.path.realpath(__file__)) os.chdir(dir_path + '/test_data') manifest = open("manifest.tsv") config = generic_config(manifest.name, batch_size) dl = DataLoader(config) num_of_batches_in_manifest = 60 for x in range(0, num_of_batches_in_manifest): next(dl) with pytest.raises(StopIteration) as ex: next(dl) manifest.close() os.chdir(cwd)
def test_loader_json_parser_fail(): files = glob.glob("./json/fail*.json") for f in files: with open(f) as json_file: json_string = json_file.read() try: config = json.loads(json_string) except ValueError: continue json_string = '{"config": %s}' % json_string config = json.loads(json_string) with pytest.raises(RuntimeError) as ex: dl = DataLoader(config) assert 'Required Argument' in str(ex)
def main(): address, port, manifest, rdma_address, rdma_port = parse_input() cache_root = "" # don't create cache batch_size = 4 cfg = { 'manifest_filename': manifest, 'manifest_root': os.path.dirname(manifest), 'batch_size': batch_size, 'cache_directory': cache_root, 'iteration_mode': 'INFINITE', # because of INFINITE setting, there is always batch to fetch 'etl': [{ 'type': 'image', 'width': 28, 'height': 28, 'channels': 1 }, { 'type': 'label', 'binary': False }], 'remote': { 'address': address, 'port': int(port), 'close_session': True } } # Add RDMA parameters if they are set if rdma_address: cfg['remote']['rdma_address'] = rdma_address cfg['remote']['rdma_port'] = int(rdma_port) # Create new aeon DataLoader object loader = DataLoader(config=cfg) # Retrieve newly created session ID session_id = loader.session_id print("New sesion ID: {0}").format(session_id) print("Press button to close session and exit...") sys.stdin.readline()
def test_anchor_target_layer(backend_default, fargs): (height, width) = fargs manifest_path = os.environ['PASCAL_MANIFEST_PATH'] assert manifest_path is not None, "Please set the PASCAL_MANIFEST_PATH variable." manifest_root = os.environ['PASCAL_MANIFEST_ROOT'] assert manifest_root is not None, "Please set the PASCAL_MANIFEST_ROOT variable." config = PASCALVOC(manifest_path, manifest_root, cache_dir='', height=height, width=width, inference=False) config['subset_fraction'] = 0.1 dl = DataLoader(config, backend_default) dl = TypeCast(dl, index=0, dtype=np.float32) train_set = ObjectLocalization(dl, frcn_rois_per_img=128) for idx, (X, Y) in enumerate(train_set): reference_test(train_set, X, Y)
def build_dataloader(config, manifest_root, batch_size, subset_pct=100, PIXEL_MEANS=np.array([104, 117, 123])): """ Builds the dataloader for the Faster-RCNN network using our aeon loader. Besides, the base loader, we add several operations: 1. Cast the image data into float32 format 2. Subtract the BGRMean from the image. We used pre-defined means from training the VGG network. 3. Repack the data for Faster-RCNN model. This model has several nested branches, so The buffers have to repacked into nested tuples to match the branch leafs. Additionally, buffers for training the RCNN portion of the model are also allocated and provisioned to the model. Arguments: config (dict): dataloader configuration be (backend): compute backend Returns: dataloader object. """ # assert config['minibatch_size'] == be.bsz, # 'Dataloader config\'s minibatch size not matching backend bsz' config["manifest_root"] = manifest_root config["batch_size"] = batch_size config["subset_fraction"] = float(subset_pct / 100.0) dl = DataLoaderAdapter(DataLoader(config)) dl = TypeCast(dl, index=5, dtype=np.float32) # cast image to float dl = BGRMeanSubtract(dl, index=5, pixel_mean=PIXEL_MEANS) # subtract means dl = ObjectLocalization(dl) dl.set_classes(config['etl'][0]['class_names']) dl.shape = dl.shapes()[5] return dl
def __init__(self, config, *args, **kwargs): self.config = config self._dataloader = DataLoader(json.dumps(config))