Beispiel #1
0
def load_combine(ids: Sequence,
                 load_x: callable,
                 load_y: callable,
                 batch_size: int,
                 *,
                 shuffle: bool = False):
    """
    A simple batch iterator that loads the data and packs it into batches.

    Parameters
    ----------
    ids: Sequence
    load_x: callable(id)
    load_y: callable(id)
    batch_size: int
    shuffle: bool, optional
        whether to shuffle the ids before yielding batches.

    Yields
    ------
    batches of size `batch_size`

    """
    return pdp.Pipeline(
        pdp.Source(load_by_ids(load_x, load_y, ids=ids, shuffle=shuffle),
                   buffer_size=30),
        pdp.Many2One(chunk_size=batch_size, buffer_size=2),
        pdp.One2One(pdp.combine_batches, buffer_size=3))
Beispiel #2
0
    def __init__(self,
                 source: Iterable,
                 *transformers: Callable,
                 batch_size: int,
                 batches_per_epoch: int = None,
                 buffer_size: int = 3,
                 combiner: Callable = combine_to_arrays):

        if batches_per_epoch <= 0:
            raise ValueError(
                f'Expected a positive amount of batches per epoch, but got {batches_per_epoch}'
            )
        import pdp

        # backward compatibility with pdp==0.2.1
        if hasattr(pdp.interface, 'ComponentDescription'):
            source_class = transformer_class = pdp.interface.ComponentDescription
        else:
            source_class = pdp.Source
            transformer_class = pdp.interface.TransformerDescription

        def wrap(o):
            if not isinstance(o, transformer_class):
                o = pdp.One2One(o, buffer_size=buffer_size)
            return o

        if not isinstance(source, source_class):
            source = pdp.Source(source, buffer_size=buffer_size)

        self.batches_per_epoch = batches_per_epoch
        self.pipeline = pdp.Pipeline(
            source, *map(wrap, transformers),
            pdp.Many2One(chunk_size=batch_size, buffer_size=3),
            pdp.One2One(combiner, buffer_size=buffer_size))
Beispiel #3
0
def simple_iterator(ids, load_x, load_y, batch_size, *, shuffle=False):
    def simple():
        for x, y in load_by_ids(load_x, load_y, ids=ids, shuffle=shuffle):
            yield x, y

    return pdp.Pipeline(pdp.Source(simple(), buffer_size=5),
                        pdp.Many2One(chunk_size=batch_size, buffer_size=2),
                        pdp.One2One(pdp.combine_batches, buffer_size=3))
Beispiel #4
0
    def __init__(self, source: Iterable, *transformers: Callable,
                 batch_size: Union[int, Callable], batches_per_epoch: int,
                 buffer_size: int = 3, combiner: Callable = combine_to_arrays):
        import pdp
        from pdp.interface import ComponentDescription

        if batches_per_epoch <= 0:
            raise ValueError(f'Expected a positive amount of batches per epoch, but got {batches_per_epoch}')

        def wrap(o):
            if not isinstance(o, ComponentDescription):
                o = pdp.One2One(o, buffer_size=buffer_size)
            return o

        if not isinstance(source, ComponentDescription):
            source = pdp.Source(source, buffer_size=buffer_size)

        self.batches_per_epoch = batches_per_epoch
        self.pipeline = pdp.Pipeline(
            source, *map(wrap, transformers),
            self._make_combiner(batch_size),
            pdp.One2One(combiner, buffer_size=buffer_size)
        )
Beispiel #5
0
def make_source_random(ids):
    return pdp.Source(iter(lambda: {'id': random.choice(ids)}, None), buffer_size=3)
Beispiel #6
0
def make_source_sequence(ids):
    return pdp.Source([{'id': i} for i in ids], buffer_size=3)