Python Dataset Examples, batchflow.Dataset Python Examples

Example #1

0

Show file

File: named_expr_test.py Project: r-raymond/batchflow

def test_d(size, n_splits):
    """Test checks for behaviour of D expression in `set_dataset` action.

    size
        size of the dataset.
    n_splits
        the number if cv folds.
    """
    dataset = Dataset(size)
    dataset.cv_split(n_splits=n_splits)

    pipeline = (Pipeline().init_variable('indices', default=[]).update(
        V('indices', mode='a'),
        B('indices')[0])) << dataset.CV(C('fold')).train

    result = list(range(size))

    for fold in range(n_splits):
        pipeline.set_config({'fold': fold})
        start = fold * (size // n_splits)
        end = (fold + 1) * (size // n_splits)

        for _ in range(2):
            pipeline.reset('vars')
            pipeline.run(1)

            assert pipeline.v('indices') == result[:start] + result[end:]

Example #2

0

Show file

def batch():
    """ Prepare batch and load same DATA to comp1 and comp2 components.
    """
    dataset = Dataset(BATCH_SIZE, MyBatch)
    batch = (dataset.next_batch(BATCH_SIZE).load(src=DATA, dst='comp1').load(
        src=DATA, dst='comp2'))
    return batch

Example #3

0

Show file

File: components_batch_test.py Project: Kognor1/testPhysic

def get_batch(data,
              pipeline,
              index=DATASET_SIZE,
              batch_class=Batch,
              skip=2,
              dst=False):
    """
    Parameters
    ----------
    data
        data to use
    pipeline : bool or Pipeline
        whether to get a batch from a dataset or a pipeline

    index : DatasetIndex

    batch_class : type

    skip : int
        how many batches to skip

    dst : bool or list of str
        preload data when False or load to components given
    """

    if dst is False:
        dataset = Dataset(index, preloaded=data, batch_class=batch_class)
    else:
        dataset = Dataset(index, batch_class=batch_class)

    template_pipeline = (
        Pipeline().init_variable('dummy').update(
            V('dummy'), B.data)  # touch batch data to fire preloading
    )

    if isinstance(pipeline, Pipeline):
        template_pipeline = pipeline + template_pipeline

    source = (
        dataset >> template_pipeline) if pipeline is not False else dataset

    #skip K batches
    for _ in range(skip + 1):
        batch = source.next_batch(10)

    if dst is not False:
        batch = batch.load(src=data, dst=dst)

    return batch

Example #4

0

Show file

File: dataset_test.py Project: Kognor1/testPhysic

 def test_from_dataset_new_batch_class(self, dataset):
     new_index = DatasetIndex(25)
     new_ds = Dataset.from_dataset(dataset,
                                   new_index,
                                   batch_class=ImagesBatch)
     assert isinstance(new_ds, dataset.__class__)
     assert new_ds.batch_class == ImagesBatch

Example #5

0

Show file

File: dataformat_test.py Project: a-arefina/batchflow

    def _model_setup(data_format):
        if data_format == 'channels_last':
            image_shape = (100, 100, 2)
        elif data_format == 'channels_first':
            image_shape = (2, 100, 100)

        size = 50
        batch_shape = (size, ) + image_shape
        images_array = np.random.random(batch_shape)
        labels_array = np.random.choice(10, size=size)
        data = images_array, labels_array
        dataset = Dataset(index=size, batch_class=ImagesBatch, preloaded=data)

        model_config = {
            'inputs': {
                'images': {
                    'shape': image_shape
                },
                'labels': {
                    'classes': 10
                }
            },
            'initial_block/inputs': 'images'
        }
        return dataset, model_config

Example #6

0

Show file

File: named_expr_test.py Project: r-raymond/batchflow

def test_general_get(named_expr):
    pipeline = (Dataset(10).pipeline({
        'option': 0
    }).init_variable('var').do_nothing(named_expr).run(2, lazy=True))

    failed = False
    try:
        _ = pipeline.next_batch()
    except KeyError:
        failed = True
    if failed:
        pytest.fail("Name does not exist")

Example #7

0

Show file

File: named_expr_test.py Project: r-raymond/batchflow

def test_inbatch_parallel_p(named_expr):
    """ Check if P() is evalauted properly """
    pipeline = (Dataset(10, MyBatch).pipeline(
        dict(mean=0., std=1.,
             option=ARRAY_INIT)).add_namespace(np).init_variable(
                 'var', ARRAY_INIT).update(B.images, ARRAY_INIT).update(
                     B.masks,
                     ARRAY_INIT).ip_test(param=P(named_expr)).run(BATCH_SIZE,
                                                                  lazy=True))

    _ = pipeline.next_batch()

    assert True

Example #8

0

Show file

def test_apply_parallel_p(p_type, named_expr, src):
    """ Check if P() is evalauted properly """
    pipeline = (Dataset(10, MyBatch).pipeline(
        dict(mean=0., std=1.,
             option=ARRAY_INIT)).add_namespace(np).init_variable(
                 'var', ARRAY_INIT).update(B.images, ARRAY_INIT).update(
                     B.masks, ARRAY_INIT).ap_test(
                         src=src, param=p_type(named_expr)).run(BATCH_SIZE,
                                                                lazy=True))

    b = pipeline.next_batch()

    if isinstance(src, str):
        assert True
    else:
        assert (b.images == b.masks).all()

Example #9

0

Show file

def test_rebatch(batch_size, rebatch_size):
    """ checks that rebatch produces batches of expected lengths (and doesn't crash)"""
    data = np.vstack([np.array([i, i]) for i in range(DATASET_SIZE)])
    data = (data, )
    dataset = Dataset(index=DATASET_SIZE, batch_class=MyBatch, preloaded=data)

    # workaround for pipeline variables getting lost after rebatch
    batch_lengths = {'before': [], 'after': []}

    def get_batch_len(batch, dump):
        batch_lengths[dump].append(batch.size)

    p = (Pipeline().call(get_batch_len, 'before').rebatch(rebatch_size).call(
        get_batch_len, 'after')) << dataset

    p.run(batch_size=batch_size, n_epochs=1, bar=True)

    check_batch_lengths(batch_lengths['before'], batch_size)
    check_batch_lengths(batch_lengths['after'], rebatch_size)

Example #10

0

Show file

File: conftest.py Project: r-raymond/batchflow

    def _model_setup(data_format, image_shape=100):
        dataset_size = 50
        num_classes = 10

        if data_format == 'channels_last':
            image_shape = (image_shape, image_shape, 2)
        else:
            image_shape = (2, image_shape, image_shape)

        batch_shape = (dataset_size, *image_shape)
        images_array = np.random.random(batch_shape)
        labels_array = np.random.choice(num_classes, size=dataset_size)
        data = images_array, labels_array
        dataset = Dataset(index=dataset_size,
                          batch_class=ImagesBatch,
                          preloaded=data)

        model_config = {
            'inputs/images/shape': image_shape,
            'inputs/labels/classes': num_classes,
            'initial_block/inputs': 'images'
        }
        return dataset, model_config

Example #11

0

Show file

File: named_expr_test.py Project: r-raymond/batchflow

def test_i(name, expectation, limit_name, limit_value, result):
    """ Check for behaviour of I under different pipeline configurations.

    name
        Name of I, defines its output.
    expectation
        Test is expected to raise an error when names requires calculaion of total iterations (e.g. for 'm')
        and this number is not defined in pipeline (limit_value is None).
    limit_name
        'n_epochs' or 'n_iters'
    limit_value
        Total numer of epochs or iteration to run.
    result
        Expected output of I. If None, I is expected to raise an error.
    """
    kwargs = {'batch_size': 2, limit_name: limit_value, 'lazy': True}

    pipeline = (Dataset(10).pipeline().init_variable('var', -1).update(
        V('var', mode='w'), I(name)).run(**kwargs))

    with expectation:
        _ = pipeline.next_batch()

    assert pipeline.get_variable('var') == result

Example #12

0

Show file

File: dataset_test.py Project: Kognor1/testPhysic

def dataset():
    index = DatasetIndex(100)
    return Dataset(index, Batch)

Example #13

0

Show file

File: dataset_test.py Project: Kognor1/testPhysic

 def test_build_index(self):
     new_index = Dataset.build_index(25)
     assert isinstance(new_index, DatasetIndex)