def test_prefetch_timing(method): def f1(x): sleep(.02 + 0.01 * (random.random() - .5)) return x arr = list(range(100)) y = smap(f1, arr) y = prefetch(y, nworkers=2, max_cached=20, method=method, timeout=1) t1 = time() z = list(y) t2 = time() assert z == arr duration = t2 - t1 print("test_prefetch_timing({}):1 {}".format(method, duration)) assert duration < 1.3 arr = list(range(200)) y = smap(f1, arr) y = prefetch(y, nworkers=2, max_cached=20, method=method, timeout=1, anticipate=lambda i: i + 2) t1 = time() z = [y[i] for i in range(0, len(y), 2)] t2 = time() assert z == arr[::2] duration = t2 - t1 print("test_prefetch_timing({}):2 {}".format(method, duration)) assert duration < 1.3
def test_prefetch(method): def f1(x): sleep(0.005 * (1 + random.random())) return x if method == "process": start_hook = random.seed else: start_hook = None arr = list(range(300)) y = smap(f1, arr) y = prefetch(y, nworkers=4, max_buffered=10, method=method, timeout=1, start_hook=start_hook) # check if workers are properly restarted when asleep i = 0 n_wakeups = 3 for _ in range(500): if n_wakeups > 0 and random.random() < 0.005: sleep(1.1) # will let worker go to sleep n_wakeups -= 1 value = y[i] assert value == arr[i] if random.random() < 0.05: i = random.randrange(0, len(arr)) else: i = (i + 1) % len(arr) # helps with coverage y.async_seq._finalize(y.async_seq) # overly large buffer arr = list(range(10)) y = smap(f1, arr) y = prefetch(y, nworkers=4, max_buffered=50, method=method, timeout=1) assert list(y) == arr # anticipate method arr = list(range(200)) y = smap(f1, arr) y = prefetch(y, nworkers=2, max_buffered=20, method=method, timeout=1, anticipate=lambda i: i + 2) z = [y[i] for i in range(0, len(y), 2)] assert z == arr[::2]
def test_prefetch_errors(method): class CustomError(Exception): pass def f1(x): if x is None: raise CustomError() else: return x arr1 = [1, 2, 3, None] arr2 = smap(f1, arr1) y = prefetch(arr2, nworkers=2, max_cached=2, method=method) for i in range(3): assert y[i] == arr1[i] with pytest.raises(PrefetchException): a = y[3] del a def f2(x): if x is None: raise ValueError("blablabla") else: return x # helps with coverage y._finalize(y) arr2 = smap(f2, arr1) y = prefetch(arr2, nworkers=2, max_cached=2, method=method) for i in range(3): assert y[i] == arr1[i] try: a = y[3] del a except Exception as e: assert isinstance(e, PrefetchException) assert isinstance(e.__cause__, ValueError) else: assert False assert y[0] == 1 assert y[1] == 2 # helps with coverage y._finalize(y)
def test_prefetch_errors(method, evaluation, picklable_err): class CustomError(Exception): pass def f1(x): if x is None: raise ValueError("blablabla") if picklable_err else CustomError() else: return x arr1 = [1, 2, 3, None] arr2 = smap(f1, arr1) y = prefetch(arr2, nworkers=2, max_buffered=2, method=method) seterr(evaluation) if (method == "process" and not picklable_err) or evaluation == "wrap": error_t = EvaluationError else: error_t = ValueError if picklable_err else CustomError for i in range(3): assert y[i] == arr1[i] with pytest.raises(error_t): a = y[3] del a
def test_prefetch(method): def f1(x): sleep(0.005 * (1 + random.random())) return x if method == "process": start_hook = None else: start_hook = None arr = list(range(300)) y = smap(f1, arr) y = prefetch(y, nworkers=4, max_cached=10, method=method, timeout=1, start_hook=start_hook) # arr = arr[3:-1:2] # y = y[3:-1:2] i = 0 n_wakeups = 3 for _ in range(500): if n_wakeups > 0 and random.random() < 0.005: sleep(1.1) # will let worker go to sleep n_wakeups -= 1 assert y[i] == arr[i] if random.random() < 0.05: i = random.randrange(0, len(arr)) else: i = (i + 1) % len(arr) # helps with coverage y._finalize(y)
def make_sequence(self): """Build a sequence that looks like a dataloader when iterated over.""" # shuffling if self.batch_sampler: batch_indices = list(self.batch_sampler) out = seqtools.smap(lambda bi: [self.dataset[i] for i in bi], batch_indices) elif self.sampler: shuffle_indices = list(self.sampler) out = seqtools.gather(self.dataset, shuffle_indices) elif self.shuffle: shuffle_indices = np.random.permutation(len(self.dataset)) out = seqtools.gather(self.dataset, shuffle_indices) else: out = self.dataset # batch if not self.batch_sampler and self.batch_size is not None: out = seqtools.batch(out, k=self.batch_size, drop_last=self.drop_last, collate_fn=self.collate_fn) elif self.batch_sampler: out = seqtools.smap(self.collate_fn, out) # prefetch if self.num_workers > 0: out = seqtools.prefetch(out, max_buffered=self.num_workers * self.prefetch_factor, nworkers=self.num_workers, method='process', start_hook=self.worker_init_fn, shm_size=self.shm_size) # convert into tensors out = seqtools.smap(into_tensors, out) # pin memory if self.pin_memory: out = seqtools.smap(pin_tensors_memory, out) out = seqtools.prefetch(out, nworkers=1, method='thread', max_buffered=1) return out
def target(): arr = np.random.rand(1000, 10) y = smap(f1, arr) y = prefetch(y, method=method, max_buffered=40, nworkers=4, start_hook=init_fn) for i in range(0, 1000): a = y[i]
def test_prefetch_timings(prefetch_kwargs): def f1(x): sleep(0.005 * (1 + random.random())) return x start_hook = random.seed arr = np.random.rand(100, 10) y = smap(f1, arr) y = prefetch(y, nworkers=4, max_buffered=10, start_hook=start_hook, **prefetch_kwargs) y = [y_.copy() for y_ in y] # copy needed to release buffers when shm_size>0 assert_array_equal(np.stack(y), arr) # overly large buffer arr = np.random.rand(10, 10) y = smap(f1, arr) y = prefetch(y, nworkers=4, max_buffered=50, **prefetch_kwargs) y = [y_.copy() for y_ in y] assert_array_equal(np.stack(y), arr) # multiple restarts arr = np.random.rand(100, 10) y = smap(f1, arr) y = prefetch(y, nworkers=4, max_buffered=10, **prefetch_kwargs) for _ in range(10): n = np.random.randint(0, 99) for i in range(n): assert_array_equal(y[i], arr[i]) # starvation arr = np.random.rand(100, 10) y = prefetch(arr, nworkers=2, max_buffered=10, **prefetch_kwargs) y[0] sleep(2) for i in range(1, 100): assert_array_equal(y[i], arr[i])
def test_prefetch_errors(error_mode, prefetch_kwargs, picklable_err): class CustomError(Exception): pass def f1(x): if x is None: raise ValueError("blablabla") if picklable_err else CustomError() else: return x arr1 = [np.random.rand(10), np.random.rand(10), np.random.rand(10), None] arr2 = smap(f1, arr1) y = prefetch(arr2, nworkers=2, max_buffered=4, **prefetch_kwargs) seterr(error_mode) if (prefetch_kwargs['method'] != "thread" and not picklable_err) or error_mode == "wrap": error_t = EvaluationError else: error_t = ValueError if picklable_err else CustomError for i in range(3): assert_array_equal(y[i], arr1[i]) try: a = y[3] except Exception as e: assert type(e) == error_t if (prefetch_kwargs['method'] == "process") and error_mode == "passthrough": class CustomObject: # unpicklable object pass arr1 = [np.random.rand(10), CustomObject(), np.random.rand(10)] y = prefetch(arr1, nworkers=2, max_buffered=4, **prefetch_kwargs) with pytest.raises(ValueError): y[1]
def test_prefetch_timing(method): def f1(x): sleep(.02 + 0.01 * (random.random() - .5)) return x arr = list(range(420)) y = smap(f1, arr) y = prefetch(y, nworkers=2, max_buffered=20, method=method, timeout=1) for i in range(20): y[i] # consume first items to eliminate worker startup time t1 = time() for i in range(20, 420): y[i] t2 = time() duration = t2 - t1 print("test_prefetch_timing({}) {:.2f}s".format(method, duration)) assert duration < 4.5
def test_prefetch_throughput(prefetch_kwargs): # pragma: no cover def f1(x): sleep(.02 + 0.01 * (random.random() - .5)) return x arr = np.random.rand(420, 10) y = smap(f1, arr) y = prefetch(y, nworkers=2, max_buffered=40, **prefetch_kwargs) for i in range(20): y[i] # consume first items to eliminate worker startup time t1 = time() for i in range(20, 420): y[i] t2 = time() duration = t2 - t1 print("test_prefetch_timing: {:.2f}s".format(duration)) assert duration < 4.5
def try_prefetch(seq, cores, method, buffered): try: print(f"building {cores}-{method},fetch{buffered}") return sq.prefetch(seq, cores, method, buffered) except ValueError: return None
pass # busy waiting return x preprocessed_samples = seqtools.smap(preprocess, all_samples) minibatches = seqtools.batch(preprocessed_samples, 64, collate_fn=list) t1 = time.time() for batch in minibatches: pass t2 = time.time() print("sequential read took {:.1f}\"".format(t2 - t1)) t1 = time.time() for batch in seqtools.prefetch(minibatches, max_cached=100, method="thread", nworkers=2): pass t2 = time.time() print("threaded read took {:.1f}\"".format(t2 - t1)) t1 = time.time() for batch in seqtools.prefetch(minibatches, max_cached=100, method="process", nworkers=2): pass t2 = time.time() print("multiprocessing read took {:.1f}\"".format(t2 - t1))
def test_prefetch_random_objects(prefetch_kwargs): seq = [build_random_object() for _ in range(1000)] y = prefetch(seq, 2, **prefetch_kwargs) for x, y in zip(seq, y): compare_random_objects(x, y)
def test_prefetch_crash(method): if platform.python_implementation() == "PyPy": pytest.skip("broken with pypy") # worker dies with tempfile.TemporaryDirectory() as d: def init_fn(): signal.signal(signal.SIGUSR1, lambda *_: sys.exit(-1)) with open('{}/{}'.format(d, os.getpid()), "w"): pass def f1(x): sleep(.02 + 0.01 * (random.random() - .5)) return x arr = np.random.rand(1000, 10) y = smap(f1, arr) y = prefetch(y, method=method, max_buffered=40, nworkers=4, start_hook=init_fn) sleep(0.1) while True: if len(os.listdir(d)) > 0: os.kill(int(os.listdir(d)[0]), signal.SIGUSR1) break with pytest.raises(RuntimeError): for i in range(0, 1000): a = y[i] # parent dies with tempfile.TemporaryDirectory() as d: def init_fn(): signal.signal(signal.SIGUSR1, lambda *_: sys.exit(-1)) with open('{}/{}'.format(d, os.getpid()), "w"): pass def target(): arr = np.random.rand(1000, 10) y = smap(f1, arr) y = prefetch(y, method=method, max_buffered=40, nworkers=4, start_hook=init_fn) for i in range(0, 1000): a = y[i] p = Process(target=target) p.start() while len(os.listdir(d)) < 4: sleep(0.05) os.kill(p.pid, signal.SIGUSR1) sleep(2) # wait for workers to time out for pid in map(int, os.listdir(d)): assert not check_pid(pid)