Example #1
0
def test_iterview_random2():
    loader = LiveLoader('.cache', SourceTest())
    iterview = IteratorView(loader, 'train', randomize=True, fetch_size=1)
    iterview2 = IteratorView(loader,
                             'train',
                             randomize=True,
                             random_seed=2601,
                             fetch_size=1)
    assert list(map(lambda tp: tp[0], itertools.islice(iterview2, 10))) \
        != list(map(lambda tp: tp[0], itertools.islice(iterview, 10)))
Example #2
0
    def load(self,
             split:str='train',
             view:str='list',
             layout:str='tuples',
             batch_size:int=64,
             fetch_size:Optional[int]=None,
             stream:bool=True,
             infinite:bool=False,
             with_meta:bool=False,
             randomize:bool=False,
             max_samples:Optional[int]=None,
             transform_x:Callable[[Any], Any]=lambda x: x,
             transform_y:Callable[[Any], Any]=lambda y: y):

        """
        :param split: One of ("train", "val", "test"). Will return the split data as configured 
                      via the env or via input. Defaults to "train".

        :param view: A view determines the **class** which will hold the sample data. 
                     Possible values are:

                    - "list" (default): reads all data into memory and return it as **python list** 
                      or optionally as numpy array (see the layout option).
                    
                    - "lazy-list": returns a list that reads the data on demand.

                    - "batch": return a **generator**, splitting the data into batches of batch_size. 
                      The returned object supports getting the length (number of batches) via the len() 
                      function. The generator will stream the batches from disk if stream is set to 
                      True. Otherwise it will read all data into RAM.

                    - "iter": return the data as an **iterator** object. Supports streaming from disk 
                      or loading all data into memory (via the stream parameter).

        :param layout: Determines how x, y and (optionally meta) is returned. 
                       Can be one of:

                       - "tuples" (default): the data will be returned as (x,y) pairs.

                         [(x1,y1), (x2,y2), ...]

                       - "lists": the data will be returned as lists [xs], [ys]

                         ([x1, x2, x3], [y1, y2, y3])

                       - "arrays": the data will be returned as numpy arrays [xs], [ys]

                         array([[1, 2, 3], 
                                [4, 5, 6]])

        :param batch_size: Sample batch size. Applies to "batch" view only. Defaults to 64.

        :param fetch_size: Fetch samples in pairs of fetch_size. None means the system will automatically
                           set a fetch size.

        :param infinite: Applies to "batch" and "iter" views. If set to **True**, the returned 
                         object will be an infinite generator object. 

            NOTE for KERAS users: This setting is useful when used in with the 
            model.fit_generator() of the keras framework. Since len() will return the number of 
            steps per epoch, the steps_per_epoch of fit_generator() can be left unspecified.

        :param with_meta: If True, will return meta in addition to x and y.

        :param randomize: If True, the data will be returned in random order.

        :param max_samples: The maximum amount of samples to return. 
                      Default is None (return all samples).

        :param transform_x: a function that takes x as an argument and returns a transformed version. 
                            Defaults to None (No transformation)

        :param transform_y: a function that takes x as an argument and returns a transformed version. 
                            Defaults to None (No transformation) """

        assert view in ('list', 'lazy-list', 'batch', 'iter')
        assert layout in ('tuples', 'lists', 'arrays')
        fetch_size = fetch_size or 8

        if view == 'list':
            res = []
            self.loader.begin_read_samples()
            
            num_samples = self.loader.num_samples(split)
            if max_samples is not None:
                num_samples = min(num_samples, max_samples)
            
            for sample in self.loader.read_samples(split, 0, num_samples):
                x, y, m = sample.x, sample.y, sample.meta
                x, y = transform_x(x), transform_y(y)
                res.append((x, y, m)) if with_meta else res.append((x, y))
            self.loader.end_read_samples()

            if randomize:
                random.Random(self.random_seed).shuffle(res)

            if layout in ('lists', 'arrays'):
                res = tuple(map(list, zip(*res)))
                if not res:
                    res = ([], [], []) if with_meta else ([], []) 

            if layout == 'arrays':
                xs, ys, *meta = res
                res = tuple([np.array(xs), np.array(ys)] + meta)
            return res

        elif view == 'lazy-list':
            return ListView(self.loader,
                            split, 
                            with_meta, 
                            randomize,
                            self.random_seed,
                            fetch_size,
                            max_samples, 
                            transform_x, 
                            transform_y)

        elif view == 'batch':
            return BatchView(self.loader,
                             split,
                             batch_size,
                             randomize,
                             self.random_seed,
                             fetch_size,
                             infinite,
                             with_meta,
                             layout,
                             max_samples,
                             transform_x,
                             transform_y)

        elif view == 'iter':
            return IteratorView(self.loader,
                                split,
                                randomize,
                                self.random_seed,
                                fetch_size,
                                infinite,
                                with_meta,
                                max_samples,
                                transform_x,
                                transform_y)
Example #3
0
def test_iterview_transform_y():
    loader = LiveLoader('.cache', SourceTest())
    iterview = IteratorView(loader,
                            'train',
                            transform_y=lambda _: 'transformed_y')
    assert next(iterview)[1] == 'transformed_y'
Example #4
0
def test_iterview_test():
    loader = LiveLoader('.cache', SourceTest())
    iterview = IteratorView(loader, 'test')
    assert list(map(lambda tp: tp[0], iterview)) == list(range(20))
Example #5
0
def test_iterview_transform():
    loader = LiveLoader('.cache', SourceTest())
    iterview = IteratorView(loader, 'train', transform_x=lambda x: x + 10)
    assert list(map(lambda tp: tp[0], iterview)) == list(range(10, 110))
Example #6
0
def test_iterview_meta():
    loader = LiveLoader('.cache', SourceTest())
    iterview = IteratorView(loader, 'train', with_meta=True)
    assert next(iterview) == (0, 5, dict(meta=0))
    assert next(iterview) == (1, 6, dict(meta=1))
    assert next(iterview) == (2, 7, dict(meta=2))
Example #7
0
def test_iterview_random_fetch_size():
    loader = LiveLoader('.cache', SourceTest())
    iterview = IteratorView(loader, 'train', randomize=True, fetch_size=10)
    assert list(map(lambda tp: tp[0], itertools.islice(iterview, 10))) \
        == list(range(70, 80))
Example #8
0
def test_iterview_random():
    loader = LiveLoader('.cache', SourceTest())
    iterview = IteratorView(loader, 'train', randomize=True, fetch_size=1)
    assert list(map(lambda tp: tp[0], itertools.islice(iterview, 10))) \
        == [92, 1, 43, 61, 35, 73, 48, 18, 98, 36]
Example #9
0
def test_iterview_infinite():
    loader = LiveLoader('.cache', SourceTest())
    iterview = IteratorView(loader, 'train', infinite=True)
    assert list(map(lambda tp: tp[0], itertools.islice(iterview, 150))) \
        == list(range(100)) + list(range(50))
Example #10
0
def test_iterview_max_samples():
    loader = LiveLoader('.cache', SourceTest())
    iterview = IteratorView(loader, 'train', max_samples=10)
    assert list(map(lambda tp: tp[0], iterview)) == list(range(10))
Example #11
0
    def load(
            self,  # pylint: disable=R0913
            split: str = 'train',
            view: str = 'list',
            layout: str = 'tuples',
            batch_size: int = 64,
            fetch_size: Optional[int] = None,
            infinite: bool = False,
            with_meta: bool = False,
            randomize: bool = False,
            transform_x: Callable[[Any], Any] = lambda x: x,
            transform_y: Callable[[Any], Any] = lambda y: y):
        """
        :param split: The split to load. One of "train", "val", "test".

        :param view: How to return the data. Option are:

                    - "list" (default): reads all data into memory and
                      return it as python list or optionally as numpy
                      array (when layout is set to 'arrays').

                    - "batch": return a generator, splitting the data
                      into batches of batch_size. The returned object
                      supports getting the length (number of batches)
                      via the len() function.

                    - "iter": return the data as an **iterator** object.

        :param layout: Determines how x, y and (optionally meta) is
                       returned.
                       Can be one of:

                       - "tuples": shape the data as (x,y) pairs.

                         [(x1,y1), (x2,y2), ...]

                       - "lists": shape x,y as distinct lists [xs], [ys]

                         ([x1, x2, x3], [y1, y2, y3])

                       - "arrays": return numpy arrays [xs], [ys]

                         array([[1, 2, 3],
                                [4, 5, 6]])

        :param batch_size: Sample batch size. Applies to "batch" view
                           only. Defaults to 64.

        :param fetch_size: Fetch samples in pairs of fetch_size. None
                           means the system will automatically set a
                           fetch size.

        :param infinite: Applies to "batch" and "iter" views. If set to
                         True, the returned object will be an infinite
                         generator object.

            NOTE for KERAS users: This setting is useful when used in
            with model.fit_generator(). Since len() will return the
            number of steps per epoch, steps_per_epoch can be left
            unspecified when calling fit_generator().

        :param with_meta: If True, will return meta in addition to x
                          and y.

        :param randomize: If True, the data will be returned in random
                          order.

        :param transform_x: a function that takes x as an argument and
                            returns a transformed version.
                            Defaults to no transformation.

        :param transform_y: a function that takes x as an argument and
                            returns a transformed version.
                            Defaults to no transformation."""

        assert view in ('list', 'batch', 'iter')
        assert layout in ('tuples', 'lists', 'arrays')
        fetch_size = fetch_size or 8

        if view == 'list':
            return _load_list(loader=self.loader,
                              split=split,
                              layout=layout,
                              with_meta=with_meta,
                              randomize=randomize,
                              random_seed=self.random_seed,
                              transform_x=transform_x,
                              transform_y=transform_y)

        if view == 'batch':

            return BatchView(loader=self.loader,
                             split=split,
                             layout=layout,
                             batch_size=batch_size,
                             fetch_size=fetch_size,
                             infinite=infinite,
                             with_meta=with_meta,
                             randomize=randomize,
                             random_seed=self.random_seed,
                             transform_x=transform_x,
                             transform_y=transform_y)

        if view == 'iter':
            return IteratorView(loader=self.loader,
                                split=split,
                                fetch_size=fetch_size,
                                infinite=infinite,
                                with_meta=with_meta,
                                randomize=randomize,
                                random_seed=self.random_seed,
                                transform_x=transform_x,
                                transform_y=transform_y)

        # never runs in this code, make the linter happy
        assert False
        return None