Example #1
0
def test_d(size, n_splits):
    """Test checks for behaviour of D expression in `set_dataset` action.

    size
        size of the dataset.
    n_splits
        the number if cv folds.
    """
    dataset = Dataset(size)
    dataset.cv_split(n_splits=n_splits)

    pipeline = (Pipeline().init_variable('indices', default=[]).update(
        V('indices', mode='a'),
        B('indices')[0])) << dataset.CV(C('fold')).train

    result = list(range(size))

    for fold in range(n_splits):
        pipeline.set_config({'fold': fold})
        start = fold * (size // n_splits)
        end = (fold + 1) * (size // n_splits)

        for _ in range(2):
            pipeline.reset('vars')
            pipeline.run(1)

            assert pipeline.v('indices') == result[:start] + result[end:]
Example #2
0
def batch():
    """ Prepare batch and load same DATA to comp1 and comp2 components.
    """
    dataset = Dataset(BATCH_SIZE, MyBatch)
    batch = (dataset.next_batch(BATCH_SIZE).load(src=DATA, dst='comp1').load(
        src=DATA, dst='comp2'))
    return batch
def get_batch(data,
              pipeline,
              index=DATASET_SIZE,
              batch_class=Batch,
              skip=2,
              dst=False):
    """
    Parameters
    ----------
    data
        data to use
    pipeline : bool or Pipeline
        whether to get a batch from a dataset or a pipeline

    index : DatasetIndex

    batch_class : type

    skip : int
        how many batches to skip

    dst : bool or list of str
        preload data when False or load to components given
    """

    if dst is False:
        dataset = Dataset(index, preloaded=data, batch_class=batch_class)
    else:
        dataset = Dataset(index, batch_class=batch_class)

    template_pipeline = (
        Pipeline().init_variable('dummy').update(
            V('dummy'), B.data)  # touch batch data to fire preloading
    )

    if isinstance(pipeline, Pipeline):
        template_pipeline = pipeline + template_pipeline

    source = (
        dataset >> template_pipeline) if pipeline is not False else dataset

    #skip K batches
    for _ in range(skip + 1):
        batch = source.next_batch(10)

    if dst is not False:
        batch = batch.load(src=data, dst=dst)

    return batch
Example #4
0
 def test_from_dataset_new_batch_class(self, dataset):
     new_index = DatasetIndex(25)
     new_ds = Dataset.from_dataset(dataset,
                                   new_index,
                                   batch_class=ImagesBatch)
     assert isinstance(new_ds, dataset.__class__)
     assert new_ds.batch_class == ImagesBatch
Example #5
0
    def _model_setup(data_format):
        if data_format == 'channels_last':
            image_shape = (100, 100, 2)
        elif data_format == 'channels_first':
            image_shape = (2, 100, 100)

        size = 50
        batch_shape = (size, ) + image_shape
        images_array = np.random.random(batch_shape)
        labels_array = np.random.choice(10, size=size)
        data = images_array, labels_array
        dataset = Dataset(index=size, batch_class=ImagesBatch, preloaded=data)

        model_config = {
            'inputs': {
                'images': {
                    'shape': image_shape
                },
                'labels': {
                    'classes': 10
                }
            },
            'initial_block/inputs': 'images'
        }
        return dataset, model_config
Example #6
0
def test_general_get(named_expr):
    pipeline = (Dataset(10).pipeline({
        'option': 0
    }).init_variable('var').do_nothing(named_expr).run(2, lazy=True))

    failed = False
    try:
        _ = pipeline.next_batch()
    except KeyError:
        failed = True
    if failed:
        pytest.fail("Name does not exist")
Example #7
0
def test_inbatch_parallel_p(named_expr):
    """ Check if P() is evalauted properly """
    pipeline = (Dataset(10, MyBatch).pipeline(
        dict(mean=0., std=1.,
             option=ARRAY_INIT)).add_namespace(np).init_variable(
                 'var', ARRAY_INIT).update(B.images, ARRAY_INIT).update(
                     B.masks,
                     ARRAY_INIT).ip_test(param=P(named_expr)).run(BATCH_SIZE,
                                                                  lazy=True))

    _ = pipeline.next_batch()

    assert True
Example #8
0
def test_apply_parallel_p(p_type, named_expr, src):
    """ Check if P() is evalauted properly """
    pipeline = (Dataset(10, MyBatch).pipeline(
        dict(mean=0., std=1.,
             option=ARRAY_INIT)).add_namespace(np).init_variable(
                 'var', ARRAY_INIT).update(B.images, ARRAY_INIT).update(
                     B.masks, ARRAY_INIT).ap_test(
                         src=src, param=p_type(named_expr)).run(BATCH_SIZE,
                                                                lazy=True))

    b = pipeline.next_batch()

    if isinstance(src, str):
        assert True
    else:
        assert (b.images == b.masks).all()
Example #9
0
def test_rebatch(batch_size, rebatch_size):
    """ checks that rebatch produces batches of expected lengths (and doesn't crash)"""
    data = np.vstack([np.array([i, i]) for i in range(DATASET_SIZE)])
    data = (data, )
    dataset = Dataset(index=DATASET_SIZE, batch_class=MyBatch, preloaded=data)

    # workaround for pipeline variables getting lost after rebatch
    batch_lengths = {'before': [], 'after': []}

    def get_batch_len(batch, dump):
        batch_lengths[dump].append(batch.size)

    p = (Pipeline().call(get_batch_len, 'before').rebatch(rebatch_size).call(
        get_batch_len, 'after')) << dataset

    p.run(batch_size=batch_size, n_epochs=1, bar=True)

    check_batch_lengths(batch_lengths['before'], batch_size)
    check_batch_lengths(batch_lengths['after'], rebatch_size)
Example #10
0
    def _model_setup(data_format, image_shape=100):
        dataset_size = 50
        num_classes = 10

        if data_format == 'channels_last':
            image_shape = (image_shape, image_shape, 2)
        else:
            image_shape = (2, image_shape, image_shape)

        batch_shape = (dataset_size, *image_shape)
        images_array = np.random.random(batch_shape)
        labels_array = np.random.choice(num_classes, size=dataset_size)
        data = images_array, labels_array
        dataset = Dataset(index=dataset_size,
                          batch_class=ImagesBatch,
                          preloaded=data)

        model_config = {
            'inputs/images/shape': image_shape,
            'inputs/labels/classes': num_classes,
            'initial_block/inputs': 'images'
        }
        return dataset, model_config
Example #11
0
def test_i(name, expectation, limit_name, limit_value, result):
    """ Check for behaviour of I under different pipeline configurations.

    name
        Name of I, defines its output.
    expectation
        Test is expected to raise an error when names requires calculaion of total iterations (e.g. for 'm')
        and this number is not defined in pipeline (limit_value is None).
    limit_name
        'n_epochs' or 'n_iters'
    limit_value
        Total numer of epochs or iteration to run.
    result
        Expected output of I. If None, I is expected to raise an error.
    """
    kwargs = {'batch_size': 2, limit_name: limit_value, 'lazy': True}

    pipeline = (Dataset(10).pipeline().init_variable('var', -1).update(
        V('var', mode='w'), I(name)).run(**kwargs))

    with expectation:
        _ = pipeline.next_batch()

    assert pipeline.get_variable('var') == result
Example #12
0
def dataset():
    index = DatasetIndex(100)
    return Dataset(index, Batch)
Example #13
0
 def test_build_index(self):
     new_index = Dataset.build_index(25)
     assert isinstance(new_index, DatasetIndex)