Exemple #1
0
def make_patch_3d_strat_iter(ids, load_x, load_y, *, batch_size, x_patch_sizes, y_patch_size, nonzero_fraction,
                             buffer_size):
    check_y_patch_size(y_patch_size)

    x_patch_sizes = np.array(x_patch_sizes)
    y_patch_size = np.array(y_patch_size)

    def _extract_patches(o):
        if len(o['cancer']) > 0 and np.random.uniform() < nonzero_fraction:
            center_idx = random.choice(o['cancer'])
        else:
            center_idx = get_random_center_idx(o['y'], y_patch_size, spatial_dims=spatial_dims)

        xs = extract_patches(o['x'], patch_sizes=x_patch_sizes, center_idx=center_idx, padding_values=o['padding'],
                             spatial_dims=spatial_dims)

        y, = extract_patches(o['y'], patch_sizes=[y_patch_size], center_idx=center_idx, padding_values=0,
                             spatial_dims=spatial_dims)
        return (*xs, y)

    return pdp.Pipeline(make_source_random(ids),
                        make_block_load_x_y(load_x, load_y, buffer_size=len(ids)),
                        make_block_find_padding(len(ids)),
                        make_block_find_cancer(y_patch_size, buffer_size=len(ids)),
                        pdp.One2One(_extract_patches, buffer_size=batch_size),
                        *make_batch_blocks(batch_size, buffer_size=buffer_size))
Exemple #2
0
def make_block_find_padding(buffer_size):
    @cache_function
    def add_padding(o):
        o['padding'] = np.min(o['x'], axis=spatial_dims, keepdims=True)
        return o

    return pdp.One2One(add_padding, buffer_size=buffer_size)
Exemple #3
0
def load_combine(ids: Sequence,
                 load_x: callable,
                 load_y: callable,
                 batch_size: int,
                 *,
                 shuffle: bool = False):
    """
    A simple batch iterator that loads the data and packs it into batches.

    Parameters
    ----------
    ids: Sequence
    load_x: callable(id)
    load_y: callable(id)
    batch_size: int
    shuffle: bool, optional
        whether to shuffle the ids before yielding batches.

    Yields
    ------
    batches of size `batch_size`

    """
    return pdp.Pipeline(
        pdp.Source(load_by_ids(load_x, load_y, ids=ids, shuffle=shuffle),
                   buffer_size=30),
        pdp.Many2One(chunk_size=batch_size, buffer_size=2),
        pdp.One2One(pdp.combine_batches, buffer_size=3))
Exemple #4
0
def make_block_find_cancer(y_patch_size, *, buffer_size):
    @cache_function
    def add_cancer(o):
        o['cancer'] = find_cancer(o['y'], y_patch_size)
        return o

    return pdp.One2One(add_cancer, buffer_size=buffer_size)
Exemple #5
0
    def __init__(self,
                 source: Iterable,
                 *transformers: Callable,
                 batch_size: int,
                 batches_per_epoch: int = None,
                 buffer_size: int = 3,
                 combiner: Callable = combine_to_arrays):

        if batches_per_epoch <= 0:
            raise ValueError(
                f'Expected a positive amount of batches per epoch, but got {batches_per_epoch}'
            )
        import pdp

        # backward compatibility with pdp==0.2.1
        if hasattr(pdp.interface, 'ComponentDescription'):
            source_class = transformer_class = pdp.interface.ComponentDescription
        else:
            source_class = pdp.Source
            transformer_class = pdp.interface.TransformerDescription

        def wrap(o):
            if not isinstance(o, transformer_class):
                o = pdp.One2One(o, buffer_size=buffer_size)
            return o

        if not isinstance(source, source_class):
            source = pdp.Source(source, buffer_size=buffer_size)

        self.batches_per_epoch = batches_per_epoch
        self.pipeline = pdp.Pipeline(
            source, *map(wrap, transformers),
            pdp.Many2One(chunk_size=batch_size, buffer_size=3),
            pdp.One2One(combiner, buffer_size=buffer_size))
Exemple #6
0
def simple_iterator(ids, load_x, load_y, batch_size, *, shuffle=False):
    def simple():
        for x, y in load_by_ids(load_x, load_y, ids=ids, shuffle=shuffle):
            yield x, y

    return pdp.Pipeline(pdp.Source(simple(), buffer_size=5),
                        pdp.Many2One(chunk_size=batch_size, buffer_size=2),
                        pdp.One2One(pdp.combine_batches, buffer_size=3))
Exemple #7
0
def make_block_load_x_y(load_x, load_y, *, buffer_size):
    @cache_function
    def add_x_y(o):
        o['x'] = load_x(o['id'])
        o['y'] = load_y(o['id'])
        return o

    return pdp.One2One(add_x_y, buffer_size=buffer_size)
Exemple #8
0
def make_patch_3d_iter(ids, load_x, load_y, *, batch_size, x_patch_sizes, y_patch_size, buffer_size):
    check_y_patch_size(y_patch_size)

    x_patch_sizes = np.array(x_patch_sizes)
    y_patch_size = np.array(y_patch_size)


    def _extract_patches(o):
        center_idx = get_random_center_idx(o['y'], y_patch_size, spatial_dims=spatial_dims)

        xs = extract_patches(o['x'], patch_sizes=x_patch_sizes, center_idx=center_idx, padding_values=o['padding'],
                             spatial_dims=spatial_dims)
        y, = extract_patches(o['y'], patch_sizes=[y_patch_size], center_idx=center_idx, padding_values=0,
                             spatial_dims=spatial_dims)

        return (*xs, y)

    return pdp.Pipeline(make_source_random(ids),
                        make_block_load_x_y(load_x, load_y, buffer_size=len(ids)),
                        make_block_find_padding(buffer_size=len(ids)),
                        pdp.One2One(_extract_patches, buffer_size=batch_size),
                        *make_batch_blocks(batch_size, buffer_size=buffer_size))
Exemple #9
0
    def __init__(self, source: Iterable, *transformers: Callable,
                 batch_size: Union[int, Callable], batches_per_epoch: int,
                 buffer_size: int = 3, combiner: Callable = combine_to_arrays):
        import pdp
        from pdp.interface import ComponentDescription

        if batches_per_epoch <= 0:
            raise ValueError(f'Expected a positive amount of batches per epoch, but got {batches_per_epoch}')

        def wrap(o):
            if not isinstance(o, ComponentDescription):
                o = pdp.One2One(o, buffer_size=buffer_size)
            return o

        if not isinstance(source, ComponentDescription):
            source = pdp.Source(source, buffer_size=buffer_size)

        self.batches_per_epoch = batches_per_epoch
        self.pipeline = pdp.Pipeline(
            source, *map(wrap, transformers),
            self._make_combiner(batch_size),
            pdp.One2One(combiner, buffer_size=buffer_size)
        )
Exemple #10
0
def make_batch_blocks(batch_size, buffer_size):
    return (pdp.Many2One(chunk_size=batch_size, buffer_size=3),
            pdp.One2One(pdp.combine_batches, buffer_size=buffer_size))
Exemple #11
0
 def wrap(o):
     if not isinstance(o, ComponentDescription):
         o = pdp.One2One(o, buffer_size=buffer_size)
     return o
Exemple #12
0
 def wrap(o):
     if not isinstance(o, transformer_class):
         o = pdp.One2One(o, buffer_size=buffer_size)
     return o