def _preview(data, output_dir, split, num_samples): if not data.output.supports_preview(): return False data.output.begin_preview(output_dir) loader = LiveLoader(data.cache_dir, data.input, data.ops, output=data.output) loader.transform = False loader.begin_read_samples() total = 0 for split_ in SPLITS: if split not in ('all', split_): continue mul = int(loader.multipliers[split_]) for index in range(loader.num_samples(split_)): if num_samples is not None and total >= num_samples: break samples = loader.read_samples(split_, index, mul) for sample in samples: data.output.write_preview(output_dir, split_, sample) total += 1 loader.end_read_samples() data.output.end_preview(output_dir) return True
def _get_loader(self, cache_input, cache_output): if cache_input in ('disk', 'mem'): # When doing input caching, wrap the input object in # a cached loader. loader_class = FileCachedLoader if cache_input == 'disk' else MemoryCachedLoader input_loader = loader_class(self.cache_dir, self.input) input_loader.progress_callback = self._progress_callback else: # otherwise, use the input object directly input_loader = self.input if cache_output in ('disk', 'mem'): # set up output caching loader_class = FileCachedLoader if cache_output == 'disk' else MemoryCachedLoader loader = loader_class(self.cache_dir, input_loader, self.ops, self.output) loader.progress_callback = self._progress_callback return loader return LiveLoader(self.cache_dir, input_loader, self.ops, self.output)
def test_live_loader_with_ops_meta(tmpdir): cache_dir = _prepare_dir(tmpdir) st = SourceTest({'samples-dir': str(tmpdir)}) loader = LiveLoader(cache_dir, st, ops=[AppendStringOperation()], output=st) _test_loader_meta(loader)
def test_disk_loader_ops_num_samples(tmpdir): cache_dir = _prepare_dir(tmpdir) st = SourceTest({'samples-dir': str(tmpdir)}) loader = FileCachedLoader(cache_dir, st) loader2 = LiveLoader(cache_dir, loader, ops=[AppendStringOperation()], output=st) _test_loader_num_samples(loader2)
def test_live_loader_with_rng_no_reset(tmpdir): cache_dir = _prepare_dir(tmpdir) st = SourceTest({ 'samples-dir': str(tmpdir), 'test-split': 2, 'val-split': 2 }) loader = LiveLoader(cache_dir, st) _test_loader_with_rng_no_reset(loader)
def test_listview_random2(): loader = LiveLoader('.cache', SourceTest()) listview = ListView(loader, 'train', randomize=True, fetch_size=1) listview2 = ListView(loader, 'train', randomize=True, random_seed=2601, fetch_size=1) assert list(map(lambda tp: tp[0], listview2[:10])) \ != list(map(lambda tp: tp[0], listview[:10]))
def test_iterview_random2(): loader = LiveLoader('.cache', SourceTest()) iterview = IteratorView(loader, 'train', randomize=True, fetch_size=1) iterview2 = IteratorView(loader, 'train', randomize=True, random_seed=2601, fetch_size=1) assert list(map(lambda tp: tp[0], itertools.islice(iterview2, 10))) \ != list(map(lambda tp: tp[0], itertools.islice(iterview, 10)))
def test_live_loader_with_ops(tmpdir): cache_dir = _prepare_dir(tmpdir) st = SourceTest({ 'samples-dir': str(tmpdir), 'test-split': 2, 'val-split': 2 }) loader = LiveLoader(cache_dir, st, ops=[AppendStringOperation()], output=st) _test_loader_read_samples_transformed(loader)
def _get_loader(self, cache_input, cache_output): if cache_input == 'disk': input_loader = FileCachedLoader(self.cache_dir, self.input, progress_callback=self._progress_callback) elif cache_input == 'mem': input_loader = MemoryCachedLoader(self.cache_dir, self.input, progress_callback=self._progress_callback) else: input_loader = self.input if cache_output == 'disk': return FileCachedLoader(self.cache_dir, input_loader, self.ops, self.output, progress_callback=self._progress_callback) elif cache_output == 'mem': return MemoryCachedLoader(self.cache_dir, input_loader, self.ops, self.output, progress_callback=self._progress_callback) else: return LiveLoader(self.cache_dir, input_loader, self.ops, self.output)
def test_live_loader_with_multiplier_ops_between(tmpdir): cache_dir = _prepare_dir(tmpdir) st = SourceTest({ 'samples-dir': str(tmpdir), 'test-split': 2, 'val-split': 2 }) loader = LiveLoader( cache_dir, st, ops=[AugmentOperation(variants=2), AppendStringOperation()], output=st) _test_loader_read_samples_x2_between(loader)
def test_mem_loader_with_multiplier_ops(tmpdir): cache_dir = _prepare_dir(tmpdir) st = SourceTest({ 'samples-dir': str(tmpdir), 'test-split': 2, 'val-split': 2 }) loader = MemoryCachedLoader(cache_dir, st) loader2 = LiveLoader( cache_dir, loader, ops=[AugmentOperation(variants=2), AppendStringOperation()], output=st) _test_loader_read_samples_x2(loader2)
def test_listview_neg(): loader = LiveLoader('.cache', SourceTest()) listview = ListView(loader, 'train') assert listview[-1][0] == 99
def test_listview_slice4(): loader = LiveLoader('.cache', SourceTest()) listview = ListView(loader, 'val') assert list(map(lambda tp: tp[0], listview[1:3])) == [1, 2]
def test_listview_max_samples(): loader = LiveLoader('.cache', SourceTest()) listview = ListView(loader, 'train', max_samples=10) assert list(map(lambda tp: tp[0], listview)) == list(range(10))
def test_listview_random(): loader = LiveLoader('.cache', SourceTest()) listview = ListView(loader, 'train', randomize=True, fetch_size=1) assert list(map(lambda tp: tp[0], listview[:10])) == [92, 1, 43, 61, 35, 73, 48, 18, 98, 36]
def test_listview_transform_y(): loader = LiveLoader('.cache', SourceTest()) listview = ListView(loader, 'train', transform_y=lambda _: 'transformed_y') assert listview[0][1] == 'transformed_y'
def test_live_loader_num_samples(tmpdir): cache_dir = _prepare_dir(tmpdir) st = SourceTest({'samples-dir': str(tmpdir)}) loader = LiveLoader(cache_dir, st) _test_loader_num_samples(loader)
def test_iterview_meta(): loader = LiveLoader('.cache', SourceTest()) iterview = IteratorView(loader, 'train', with_meta=True) assert next(iterview) == (0, 5, dict(meta=0)) assert next(iterview) == (1, 6, dict(meta=1)) assert next(iterview) == (2, 7, dict(meta=2))
def test_iterview_transform(): loader = LiveLoader('.cache', SourceTest()) iterview = IteratorView(loader, 'train', transform_x=lambda x: x + 10) assert list(map(lambda tp: tp[0], iterview)) == list(range(10, 110))
def test_listview_random_fetch_size(): loader = LiveLoader('.cache', SourceTest()) listview = ListView(loader, 'train', randomize=True, fetch_size=10) assert list(map(lambda tp: tp[0], listview[:10])) == list(range(70, 80))
def test_iterview_random(): loader = LiveLoader('.cache', SourceTest()) iterview = IteratorView(loader, 'train', randomize=True, fetch_size=1) assert list(map(lambda tp: tp[0], itertools.islice(iterview, 10))) \ == [92, 1, 43, 61, 35, 73, 48, 18, 98, 36]
def test_iterview_infinite(): loader = LiveLoader('.cache', SourceTest()) iterview = IteratorView(loader, 'train', infinite=True) assert list(map(lambda tp: tp[0], itertools.islice(iterview, 150))) \ == list(range(100)) + list(range(50))
def test_listview_val(): loader = LiveLoader('.cache', SourceTest()) listview = ListView(loader, 'val') assert list(map(lambda tp: tp[0], listview)) == list(range(10))
def test_iterview_transform_y(): loader = LiveLoader('.cache', SourceTest()) iterview = IteratorView(loader, 'train', transform_y=lambda _: 'transformed_y') assert next(iterview)[1] == 'transformed_y'
def test_iterview_max_samples(): loader = LiveLoader('.cache', SourceTest()) iterview = IteratorView(loader, 'train', max_samples=10) assert list(map(lambda tp: tp[0], iterview)) == list(range(10))
def test_iterview_test(): loader = LiveLoader('.cache', SourceTest()) iterview = IteratorView(loader, 'test') assert list(map(lambda tp: tp[0], iterview)) == list(range(20))
def test_iterview_random_fetch_size(): loader = LiveLoader('.cache', SourceTest()) iterview = IteratorView(loader, 'train', randomize=True, fetch_size=10) assert list(map(lambda tp: tp[0], itertools.islice(iterview, 10))) \ == list(range(70, 80))
def test_listview_meta(): loader = LiveLoader('.cache', SourceTest()) listview = ListView(loader, 'train', with_meta=True) assert listview[0] == (0, 5, dict(meta=0)) assert listview[1] == (1, 6, dict(meta=1)) assert listview[2] == (2, 7, dict(meta=2))