def load_combine(ids: Sequence, load_x: callable, load_y: callable, batch_size: int, *, shuffle: bool = False): """ A simple batch iterator that loads the data and packs it into batches. Parameters ---------- ids: Sequence load_x: callable(id) load_y: callable(id) batch_size: int shuffle: bool, optional whether to shuffle the ids before yielding batches. Yields ------ batches of size `batch_size` """ return pdp.Pipeline( pdp.Source(load_by_ids(load_x, load_y, ids=ids, shuffle=shuffle), buffer_size=30), pdp.Many2One(chunk_size=batch_size, buffer_size=2), pdp.One2One(pdp.combine_batches, buffer_size=3))
def __init__(self, source: Iterable, *transformers: Callable, batch_size: int, batches_per_epoch: int = None, buffer_size: int = 3, combiner: Callable = combine_to_arrays): if batches_per_epoch <= 0: raise ValueError( f'Expected a positive amount of batches per epoch, but got {batches_per_epoch}' ) import pdp # backward compatibility with pdp==0.2.1 if hasattr(pdp.interface, 'ComponentDescription'): source_class = transformer_class = pdp.interface.ComponentDescription else: source_class = pdp.Source transformer_class = pdp.interface.TransformerDescription def wrap(o): if not isinstance(o, transformer_class): o = pdp.One2One(o, buffer_size=buffer_size) return o if not isinstance(source, source_class): source = pdp.Source(source, buffer_size=buffer_size) self.batches_per_epoch = batches_per_epoch self.pipeline = pdp.Pipeline( source, *map(wrap, transformers), pdp.Many2One(chunk_size=batch_size, buffer_size=3), pdp.One2One(combiner, buffer_size=buffer_size))
def simple_iterator(ids, load_x, load_y, batch_size, *, shuffle=False): def simple(): for x, y in load_by_ids(load_x, load_y, ids=ids, shuffle=shuffle): yield x, y return pdp.Pipeline(pdp.Source(simple(), buffer_size=5), pdp.Many2One(chunk_size=batch_size, buffer_size=2), pdp.One2One(pdp.combine_batches, buffer_size=3))
def __init__(self, source: Iterable, *transformers: Callable, batch_size: Union[int, Callable], batches_per_epoch: int, buffer_size: int = 3, combiner: Callable = combine_to_arrays): import pdp from pdp.interface import ComponentDescription if batches_per_epoch <= 0: raise ValueError(f'Expected a positive amount of batches per epoch, but got {batches_per_epoch}') def wrap(o): if not isinstance(o, ComponentDescription): o = pdp.One2One(o, buffer_size=buffer_size) return o if not isinstance(source, ComponentDescription): source = pdp.Source(source, buffer_size=buffer_size) self.batches_per_epoch = batches_per_epoch self.pipeline = pdp.Pipeline( source, *map(wrap, transformers), self._make_combiner(batch_size), pdp.One2One(combiner, buffer_size=buffer_size) )
def make_source_random(ids): return pdp.Source(iter(lambda: {'id': random.choice(ids)}, None), buffer_size=3)
def make_source_sequence(ids): return pdp.Source([{'id': i} for i in ids], buffer_size=3)