def make_patch_3d_strat_iter(ids, load_x, load_y, *, batch_size, x_patch_sizes, y_patch_size, nonzero_fraction, buffer_size): check_y_patch_size(y_patch_size) x_patch_sizes = np.array(x_patch_sizes) y_patch_size = np.array(y_patch_size) def _extract_patches(o): if len(o['cancer']) > 0 and np.random.uniform() < nonzero_fraction: center_idx = random.choice(o['cancer']) else: center_idx = get_random_center_idx(o['y'], y_patch_size, spatial_dims=spatial_dims) xs = extract_patches(o['x'], patch_sizes=x_patch_sizes, center_idx=center_idx, padding_values=o['padding'], spatial_dims=spatial_dims) y, = extract_patches(o['y'], patch_sizes=[y_patch_size], center_idx=center_idx, padding_values=0, spatial_dims=spatial_dims) return (*xs, y) return pdp.Pipeline(make_source_random(ids), make_block_load_x_y(load_x, load_y, buffer_size=len(ids)), make_block_find_padding(len(ids)), make_block_find_cancer(y_patch_size, buffer_size=len(ids)), pdp.One2One(_extract_patches, buffer_size=batch_size), *make_batch_blocks(batch_size, buffer_size=buffer_size))
def make_block_find_padding(buffer_size): @cache_function def add_padding(o): o['padding'] = np.min(o['x'], axis=spatial_dims, keepdims=True) return o return pdp.One2One(add_padding, buffer_size=buffer_size)
def load_combine(ids: Sequence, load_x: callable, load_y: callable, batch_size: int, *, shuffle: bool = False): """ A simple batch iterator that loads the data and packs it into batches. Parameters ---------- ids: Sequence load_x: callable(id) load_y: callable(id) batch_size: int shuffle: bool, optional whether to shuffle the ids before yielding batches. Yields ------ batches of size `batch_size` """ return pdp.Pipeline( pdp.Source(load_by_ids(load_x, load_y, ids=ids, shuffle=shuffle), buffer_size=30), pdp.Many2One(chunk_size=batch_size, buffer_size=2), pdp.One2One(pdp.combine_batches, buffer_size=3))
def make_block_find_cancer(y_patch_size, *, buffer_size): @cache_function def add_cancer(o): o['cancer'] = find_cancer(o['y'], y_patch_size) return o return pdp.One2One(add_cancer, buffer_size=buffer_size)
def __init__(self, source: Iterable, *transformers: Callable, batch_size: int, batches_per_epoch: int = None, buffer_size: int = 3, combiner: Callable = combine_to_arrays): if batches_per_epoch <= 0: raise ValueError( f'Expected a positive amount of batches per epoch, but got {batches_per_epoch}' ) import pdp # backward compatibility with pdp==0.2.1 if hasattr(pdp.interface, 'ComponentDescription'): source_class = transformer_class = pdp.interface.ComponentDescription else: source_class = pdp.Source transformer_class = pdp.interface.TransformerDescription def wrap(o): if not isinstance(o, transformer_class): o = pdp.One2One(o, buffer_size=buffer_size) return o if not isinstance(source, source_class): source = pdp.Source(source, buffer_size=buffer_size) self.batches_per_epoch = batches_per_epoch self.pipeline = pdp.Pipeline( source, *map(wrap, transformers), pdp.Many2One(chunk_size=batch_size, buffer_size=3), pdp.One2One(combiner, buffer_size=buffer_size))
def simple_iterator(ids, load_x, load_y, batch_size, *, shuffle=False): def simple(): for x, y in load_by_ids(load_x, load_y, ids=ids, shuffle=shuffle): yield x, y return pdp.Pipeline(pdp.Source(simple(), buffer_size=5), pdp.Many2One(chunk_size=batch_size, buffer_size=2), pdp.One2One(pdp.combine_batches, buffer_size=3))
def make_block_load_x_y(load_x, load_y, *, buffer_size): @cache_function def add_x_y(o): o['x'] = load_x(o['id']) o['y'] = load_y(o['id']) return o return pdp.One2One(add_x_y, buffer_size=buffer_size)
def make_patch_3d_iter(ids, load_x, load_y, *, batch_size, x_patch_sizes, y_patch_size, buffer_size): check_y_patch_size(y_patch_size) x_patch_sizes = np.array(x_patch_sizes) y_patch_size = np.array(y_patch_size) def _extract_patches(o): center_idx = get_random_center_idx(o['y'], y_patch_size, spatial_dims=spatial_dims) xs = extract_patches(o['x'], patch_sizes=x_patch_sizes, center_idx=center_idx, padding_values=o['padding'], spatial_dims=spatial_dims) y, = extract_patches(o['y'], patch_sizes=[y_patch_size], center_idx=center_idx, padding_values=0, spatial_dims=spatial_dims) return (*xs, y) return pdp.Pipeline(make_source_random(ids), make_block_load_x_y(load_x, load_y, buffer_size=len(ids)), make_block_find_padding(buffer_size=len(ids)), pdp.One2One(_extract_patches, buffer_size=batch_size), *make_batch_blocks(batch_size, buffer_size=buffer_size))
def __init__(self, source: Iterable, *transformers: Callable, batch_size: Union[int, Callable], batches_per_epoch: int, buffer_size: int = 3, combiner: Callable = combine_to_arrays): import pdp from pdp.interface import ComponentDescription if batches_per_epoch <= 0: raise ValueError(f'Expected a positive amount of batches per epoch, but got {batches_per_epoch}') def wrap(o): if not isinstance(o, ComponentDescription): o = pdp.One2One(o, buffer_size=buffer_size) return o if not isinstance(source, ComponentDescription): source = pdp.Source(source, buffer_size=buffer_size) self.batches_per_epoch = batches_per_epoch self.pipeline = pdp.Pipeline( source, *map(wrap, transformers), self._make_combiner(batch_size), pdp.One2One(combiner, buffer_size=buffer_size) )
def make_batch_blocks(batch_size, buffer_size): return (pdp.Many2One(chunk_size=batch_size, buffer_size=3), pdp.One2One(pdp.combine_batches, buffer_size=buffer_size))
def wrap(o): if not isinstance(o, ComponentDescription): o = pdp.One2One(o, buffer_size=buffer_size) return o
def wrap(o): if not isinstance(o, transformer_class): o = pdp.One2One(o, buffer_size=buffer_size) return o