def __init__(self, config, *args, **kwargs):
     self.config = config
     self._dataloader = DataLoader(config)
     self.ndata = self._dataloader.ndata
     if self.ndata < self._dataloader.batch_size:
         raise ValueError(
             'Number of examples is smaller than the batch size')
def test_loader_reset():
    # NOTE: manifest needs to stay in scope until DataLoader has read it.
    manifest = random_manifest(10)
    config = generic_config(manifest.name, batch_size)
    dl = DataLoader(config)
    assert len(list(iter(dl))) == math.ceil(10. / batch_size)
    dl.reset()
    assert len(list(iter(dl))) == math.ceil(10. / batch_size)
Beispiel #3
0
def test_loader_reset():
    # NOTE: manifest needs to stay in scope until DataLoader has read it.
    manifest = random_manifest(10)
    config = generic_config(manifest.name)
    dl = DataLoader(config, gen_backend('cpu'))

    assert len(list(iter(dl))) == 5
    dl.reset()
    assert len(list(iter(dl))) == 5
Beispiel #4
0
def test_loader_exception_next():
    # NOTE: manifest needs to stay in scope until DataLoader has read it.
    manifest = random_manifest(10, 2)
    config = generic_config(manifest.name)

    dl = DataLoader(config, gen_backend(backend='cpu'))
    dl.next()
    with pytest.raises(LoaderRuntimeError):
        dl.next()
Beispiel #5
0
def test_loader_reset():
    # NOTE: manifest needs to stay in scope until DataLoader has read it.
    manifest = random_manifest(10)
    config = generic_config(manifest.name)

    dl = DataLoader(config, gen_backend(backend='cpu'))

    assert len(list(iter(dl))) == 5
    dl.reset()
    assert len(list(iter(dl))) == 5
Beispiel #6
0
def test_loader_exception_next(num_of_batches_to_process, net_definition_file,
                               manifest_filename, manifest_root):
    config = dict()
    with net_definition_file as cfg_file:
        contents = cfg_file.read()
        config = json.loads(contents)
        config['manifest_filename'] = manifest_filename
        config['manifest_root'] = manifest_root
        del config['cache_directory']

    dl = DataLoader(config)
    for x in range(0, num_of_batches_to_process):
        draw_images(dl.next())
Beispiel #7
0
def test_loader_exception_next(num_of_batches_to_process, net_definition_file,
                               manifest_filename, manifest_root):
    config = dict()
    with net_definition_file as cfg_file:
        contents = cfg_file.read()
        config = json.loads(contents)
        config['manifest_filename'] = manifest_filename
        config['manifest_root'] = manifest_root
        del config['cache_directory']

    dl = DataLoader(config)
    for x in range(0, num_of_batches_to_process):
        draw_images(dl.next())
Beispiel #8
0
def make_aeon_loaders(work_dir, batch_size, backend, random_seed=0):
    train_manifest, valid_manifest = ingest_cifar10(work_dir)
    train_config = common_config(train_manifest, batch_size)
    # train_config['shuffle_manifest'] = True
    # train_config['shuffle_every_epoch'] = True
    # train_config['random_seed'] = random_seed
    # train_config['image']['center'] = False
    # train_config['image']['flip_enable'] = True

    valid_config = common_config(valid_manifest, batch_size)

    train_loader = DataLoader(train_config, backend)
    valid_loader = DataLoader(valid_config, backend)

    return (train_loader, valid_loader)
Beispiel #9
0
def test_loader_broken_image_next():
    manifest = random_manifest(9, broken_image_index=8)
    config = generic_config(manifest.name, batch_size)
    dl = DataLoader(config)

    with pytest.raises(Exception) as ex:
        for i in range(5):
            dl.next()
    assert 'Decoding image failed due to invalid data in the image file' in str(ex)

    dl2 = DataLoader(config)
    with pytest.raises(Exception) as ex:
        for data in dl2:
            pass
    assert 'Decoding image failed due to invalid data in the image file' in str(ex)
Beispiel #10
0
def build_dataloader(config, be, frcn_rois_per_img):
    """
    Builds the dataloader for the Faster-RCNN network using our aeon loader.
    Besides, the base loader, we add several operations:
    1. Cast the image data into float32 format
    2. Subtract the BGRMean from the image. We used pre-defined means from training
       the VGG network.
    3. Repack the data for Faster-RCNN model. This model has several nested branches, so
       The buffers have to repacked into nested tuples to match the branch leafs. Additionally,
       buffers for training the RCNN portion of the model are also allocated and provisioned
       to the model.

    Arguments:
        config (dict): dataloader configuration
        be (backend): compute backend
        frcn_rois_per_img (int): Number of ROIs to use for training the RCNN portion of the
            model. This is used to create the target buffers for RCNN.

    Returns:
        dataloader object.
    """
    dl = DataLoader(config, be)
    dl = TypeCast(dl, index=0, dtype=np.float32)  # cast image to float
    dl = BGRMeanSubtract(dl, index=0,
                         pixel_mean=util.FRCN_PIXEL_MEANS)  # subtract means
    dl = ObjectLocalization(
        dl, frcn_rois_per_img=frcn_rois_per_img)  # repack for faster-rcnn
    return dl
def main():
    address, port, session_id, rdma_address, rdma_port = parse_input()
    cache_root = ""  # don't create cache
    batch_size = 4

    cfg = {
        'remote': {
            'address': address,
            'port': int(port),
            'session_id': session_id,
            'close_session': False
        }
    }

    # Add RDMA parameters if they are set
    if rdma_address:
        cfg['remote']['rdma_address'] = rdma_address
        cfg['remote']['rdma_port'] = int(rdma_port)

    # Create new aeon DataLoader object
    loader = DataLoader(config=cfg)
    print("data size: {0}".format(len(loader)))

    # Retrieve shapes
    shapes = loader.axes_info
    print("shapes: {0}".format(shapes))

    # Iterate through all available batches
    batch_counter = 1
    for batch in loader:
        print("Batch {0} ready.").format(batch_counter)
        batch_counter += 1
        time.sleep(1)
def test_loader_invalid_manifest():
    filename = tempfile.mkstemp()[1]
    config = generic_config(invalid_image(filename), batch_size)

    with pytest.raises(Exception) as ex:
        dl = DataLoader(config)
    assert 'must be string, but is null' in str(ex)
Beispiel #13
0
def test_loader_broken_image():
    manifest = random_manifest(2, broken_image_index=1)
    config = generic_config(manifest.name, batch_size)

    with pytest.raises(Exception) as ex:
        dl = DataLoader(config)
    assert 'Decoding image failed due to invalid data in the image file' in str(ex)
Beispiel #14
0
    def __init__(self, config, *args, **kwargs):

        # TODO: Remove this workaround once tuples are accepted
        if "etl" in config and isinstance(config["etl"], tuple):
            config["etl"] = list(config["etl"])

        self.config = config
        self._dataloader = DataLoader(config)
Beispiel #15
0
def test_loader_exception_iter():
    # NOTE: manifest needs to stay in scope until DataLoader has read it.
    manifest = random_manifest(10, 2)
    config = generic_config(manifest.name)

    dl = DataLoader(config, gen_backend(backend='cpu'))

    assert len(list(iter(dl))) == 4
Beispiel #16
0
def test_parse_json_dict_tuple_pass():
    test_dir = os.path.dirname(os.path.realpath(__file__)) + '/test_data/'

    config = {'batch_size': 16, 'manifest_root': test_dir, 'manifest_filename': test_dir + 'manifest.tsv',
        'etl': ({'type': 'image', 'width': 32, 'height': 32}, {'type': 'label', 'binary': False})}

    dl = DataLoader(config)
    assert (dl.config["etl"][0]["type"] == 'image' and dl.config["etl"][1]["type"] == 'label')
Beispiel #17
0
def test_loader_invalid_config_type():
    manifest = random_manifest(10)
    config = generic_config(manifest.name)

    config['type'] = 'invalid type name'

    with pytest.raises(Exception) as ex:
        dl = DataLoader(config, gen_backend(backend='cpu'))
def test_dataloader_axes_info():
    pdir = os.path.dirname(os.path.abspath(__file__))
    manifest_root = os.path.join(pdir, 'test_data')

    manifest_file = os.path.join(manifest_root, 'manifest.tsv')
    cache_root = ""

    cfg = {
        'manifest_filename':
        manifest_file,
        'manifest_root':
        manifest_root,
        'batch_size':
        20,
        'block_size':
        40,
        'cache_directory':
        cache_root,
        'etl': [{
            'type': 'image',
            'channel_major': False,
            'width': 28,
            'height': 28,
            'channels': 1
        }, {
            'type': 'label',
            'binary': False
        }]
    }

    d1 = DataLoader(config=cfg)
    shapes = d1.axes_info

    for x in d1:
        assert len(x) == len(cfg['etl'])
        image = x[0]
        label = x[1]

        assert len(image[1]) == cfg['batch_size']
        assert len(label[1]) == cfg['batch_size']

    image = shapes[0]
    assert image[0] == 'image'

    # For images, order in json doesn't matter.
    # Order of axes is defined by Aeon for give axis type.
    # For images, there could be two orders: CHW (channel_major=True) or HWC (channel_major=False).
    # For testing purposes, order here is defined with channel_major set to False, so we could check
    #if axes are sorted.
    assert image[1][0][0] == 'height'
    assert image[1][0][1] == cfg['etl'][0]['height']
    assert image[1][1][0] == 'width'
    assert image[1][1][1] == cfg['etl'][0]['width']
    assert image[1][2][0] == 'channels'
    assert image[1][2][1] == cfg['etl'][0]['channels']

    label = shapes[1]
    assert label[0] == 'label'
def test_loader_invalid_config_type():
    manifest = random_manifest(10)
    config = generic_config(manifest.name, batch_size)

    config["etl"][0]["type"] = 'invalid type name'

    with pytest.raises(RuntimeError) as ex:
        dl = DataLoader(config)
    assert 'unsupported' in str(ex)
def test_loader_missing_config_field():
    manifest = random_manifest(10)
    config = generic_config(manifest.name, batch_size)

    del config['etl'][0]["height"]

    with pytest.raises(RuntimeError) as ex:
        dl = DataLoader(config)
    assert 'height' in str(ex)
def test_loader():
    # NOTE: manifest needs to stay in scope until DataLoader has read it.
    for i in range(1, 10):
        manifest = random_manifest(i)
        config = generic_config(manifest.name, batch_size)

        dl = DataLoader(config)

        assert len(list(iter(dl))) == math.ceil(float(i) / batch_size)
Beispiel #22
0
def test_loader_missing_config_field():
    manifest = random_manifest(10)
    config = generic_config(manifest.name)

    del config['image']

    with pytest.raises(Exception) as ex:
        dl = DataLoader(config, gen_backend(backend='cpu'))

    assert 'image' in str(ex)
def test_loader_json_parser_pass():
    files = glob.glob("./json/pass*.json")

    for f in files:
        with open(f) as json_file:
            json_string = json_file.read()
            # config must be a dict so make sure it is a dict
            json_string = '{"config": %s}' % json_string
        config = json.loads(json_string)
        with pytest.raises(RuntimeError) as ex:
            dl = DataLoader(config)
        assert 'Required Argument' in str(ex)
class AeonDataLoader(object):
    def __init__(self, config, *args, **kwargs):
        self.config = config
        self._dataloader = DataLoader(config)
        self.ndata = self._dataloader.ndata
        if self.ndata < self._dataloader.batch_size:
            raise ValueError(
                'Number of examples is smaller than the batch size')

    def __next__(self):
        bufs = next(self._dataloader)
        bufs_dict = dict((key, val) for key, val in bufs)
        if 'label' in bufs_dict:
            bufs_dict['label'] = bufs_dict['label'].flatten()
        return bufs_dict

    def __iter__(self):
        return self

    def make_placeholders(self, include_iteration=False):
        placeholders = {}
        batch_axis = ng.make_axis(self._dataloader.batch_size, name="N")
        for placeholder_name, axis_info in self._dataloader.axes_info:
            p_axes = ng.make_axes([batch_axis])
            for nm, sz in axis_info:
                if placeholder_name == 'label':
                    continue
                if nm in NAME_MAP:
                    nm = NAME_MAP[nm]
                p_axes += ng.make_axis(name=nm, length=sz)
            placeholders[placeholder_name] = ng.placeholder(p_axes)
        if include_iteration:
            placeholders['iteration'] = ng.placeholder(axes=())
        return placeholders

    def reset(self):
        self._dataloader.reset()

    def ndata(self):
        self._dataloader.ndata
Beispiel #25
0
def test_loader_exception_next():
    # NOTE: manifest needs to stay in scope until DataLoader has read it.
    manifest = random_manifest(10, 2)
    config = generic_config(manifest.name)

    dl = DataLoader(config, gen_backend(backend='cpu'))
    dl.next()
    with pytest.raises(LoaderRuntimeError):
        dl.next()
Beispiel #26
0
def main():
    address, port, manifest, rdma_address, rdma_port = parse_input()
    cache_root = ""  # don't create cache
    batch_size = 4

    cfg = {
        'manifest_filename':
        manifest,
        'manifest_root':
        os.path.dirname(manifest),
        'batch_size':
        batch_size,
        'cache_directory':
        cache_root,
        'etl': [{
            'type': 'image',
            'width': 28,
            'height': 28,
            'channels': 1
        }, {
            'type': 'label',
            'binary': False
        }],
        'remote': {
            'address': address,
            'port': int(port)
        }
    }

    # Add RDMA parameters if they are set
    if rdma_address:
        cfg['remote']['rdma_address'] = rdma_address
        cfg['remote']['rdma_port'] = int(rdma_port)

    # Create new aeon DataLoader object
    loader = DataLoader(config=cfg)
    print("data size: {0}".format(len(loader)))

    # Retrieve shapes
    shapes = loader.axes_info
    print("shapes: {0}".format(shapes))

    # Iterate through all available batches
    for batch in loader:
        image = batch[0]
        label = batch[1]

        print("{0} data: {1}".format(image[0], image[1]))
        print("{0} data: {1}".format(label[0], label[1]))
Beispiel #27
0
def test_loader_exception_iter():
    # NOTE: manifest needs to stay in scope until DataLoader has read it.
    cwd = os.getcwd()
    dir_path = os.path.dirname(os.path.realpath(__file__))
    os.chdir(dir_path+'/test_data')
    manifest = open("manifest.tsv")

    config = generic_config(manifest.name, batch_size)
    dl = DataLoader(config)

    num_of_manifest_entries = 120.
    assert len(list(iter(dl))) == math.ceil(num_of_manifest_entries/batch_size)

    manifest.close()
    os.chdir(cwd)
def test_loader_exception_next():
    # NOTE: manifest needs to stay in scope until DataLoader has read it.
    cwd = os.getcwd()
    dir_path = os.path.dirname(os.path.realpath(__file__))
    os.chdir(dir_path + '/test_data')
    manifest = open("manifest.tsv")

    config = generic_config(manifest.name, batch_size)
    dl = DataLoader(config)
    num_of_batches_in_manifest = 60
    for x in range(0, num_of_batches_in_manifest):
        next(dl)
    with pytest.raises(StopIteration) as ex:
        next(dl)
    manifest.close()
    os.chdir(cwd)
def test_loader_json_parser_fail():
    files = glob.glob("./json/fail*.json")

    for f in files:
        with open(f) as json_file:
            json_string = json_file.read()

        try:
            config = json.loads(json_string)
        except ValueError:
            continue

        json_string = '{"config": %s}' % json_string
        config = json.loads(json_string)
        with pytest.raises(RuntimeError) as ex:
            dl = DataLoader(config)
        assert 'Required Argument' in str(ex)
Beispiel #30
0
def main():
    address, port, manifest, rdma_address, rdma_port = parse_input()
    cache_root = ""  # don't create cache
    batch_size = 4

    cfg = {
        'manifest_filename':
        manifest,
        'manifest_root':
        os.path.dirname(manifest),
        'batch_size':
        batch_size,
        'cache_directory':
        cache_root,
        'iteration_mode':
        'INFINITE',  # because of INFINITE setting, there is always batch to fetch
        'etl': [{
            'type': 'image',
            'width': 28,
            'height': 28,
            'channels': 1
        }, {
            'type': 'label',
            'binary': False
        }],
        'remote': {
            'address': address,
            'port': int(port),
            'close_session': True
        }
    }

    # Add RDMA parameters if they are set
    if rdma_address:
        cfg['remote']['rdma_address'] = rdma_address
        cfg['remote']['rdma_port'] = int(rdma_port)

    # Create new aeon DataLoader object
    loader = DataLoader(config=cfg)

    # Retrieve newly created session ID
    session_id = loader.session_id

    print("New sesion ID: {0}").format(session_id)
    print("Press button to close session and exit...")
    sys.stdin.readline()
def test_anchor_target_layer(backend_default, fargs):
    (height, width) = fargs

    manifest_path = os.environ['PASCAL_MANIFEST_PATH']
    assert manifest_path is not None, "Please set the PASCAL_MANIFEST_PATH variable."

    manifest_root = os.environ['PASCAL_MANIFEST_ROOT']
    assert manifest_root is not None, "Please set the PASCAL_MANIFEST_ROOT variable."

    config = PASCALVOC(manifest_path, manifest_root, cache_dir='',
                       height=height, width=width, inference=False)
    config['subset_fraction'] = 0.1

    dl = DataLoader(config, backend_default)
    dl = TypeCast(dl, index=0, dtype=np.float32)
    train_set = ObjectLocalization(dl, frcn_rois_per_img=128)

    for idx, (X, Y) in enumerate(train_set):
        reference_test(train_set, X, Y)
Beispiel #32
0
def build_dataloader(config,
                     manifest_root,
                     batch_size,
                     subset_pct=100,
                     PIXEL_MEANS=np.array([104, 117, 123])):
    """
    Builds the dataloader for the Faster-RCNN network using our aeon loader.
    Besides, the base loader, we add several operations:
    1. Cast the image data into float32 format
    2. Subtract the BGRMean from the image. We used pre-defined means from training
       the VGG network.
    3. Repack the data for Faster-RCNN model. This model has several nested branches, so
       The buffers have to repacked into nested tuples to match the branch leafs. Additionally,
       buffers for training the RCNN portion of the model are also allocated and provisioned
       to the model.

    Arguments:
        config (dict): dataloader configuration
        be (backend): compute backend

    Returns:
        dataloader object.
    """
    # assert config['minibatch_size'] == be.bsz,
    # 'Dataloader config\'s minibatch size not matching backend bsz'
    config["manifest_root"] = manifest_root
    config["batch_size"] = batch_size
    config["subset_fraction"] = float(subset_pct / 100.0)

    dl = DataLoaderAdapter(DataLoader(config))
    dl = TypeCast(dl, index=5, dtype=np.float32)  # cast image to float

    dl = BGRMeanSubtract(dl, index=5, pixel_mean=PIXEL_MEANS)  # subtract means
    dl = ObjectLocalization(dl)
    dl.set_classes(config['etl'][0]['class_names'])
    dl.shape = dl.shapes()[5]
    return dl
Beispiel #33
0
 def __init__(self, config, *args, **kwargs):
     self.config = config
     self._dataloader = DataLoader(json.dumps(config))