Ejemplo n.º 1
0
def test_prefetch_timing(method):
    def f1(x):
        sleep(.02 + 0.01 * (random.random() - .5))
        return x

    arr = list(range(100))
    y = smap(f1, arr)
    y = prefetch(y, nworkers=2, max_cached=20, method=method, timeout=1)

    t1 = time()
    z = list(y)
    t2 = time()

    assert z == arr
    duration = t2 - t1
    print("test_prefetch_timing({}):1 {}".format(method, duration))
    assert duration < 1.3

    arr = list(range(200))
    y = smap(f1, arr)
    y = prefetch(y,
                 nworkers=2,
                 max_cached=20,
                 method=method,
                 timeout=1,
                 anticipate=lambda i: i + 2)

    t1 = time()
    z = [y[i] for i in range(0, len(y), 2)]
    t2 = time()

    assert z == arr[::2]
    duration = t2 - t1
    print("test_prefetch_timing({}):2 {}".format(method, duration))
    assert duration < 1.3
Ejemplo n.º 2
0
def test_prefetch(method):
    def f1(x):
        sleep(0.005 * (1 + random.random()))
        return x

    if method == "process":
        start_hook = random.seed
    else:
        start_hook = None

    arr = list(range(300))
    y = smap(f1, arr)
    y = prefetch(y,
                 nworkers=4,
                 max_buffered=10,
                 method=method,
                 timeout=1,
                 start_hook=start_hook)

    # check if workers are properly restarted when asleep
    i = 0
    n_wakeups = 3
    for _ in range(500):
        if n_wakeups > 0 and random.random() < 0.005:
            sleep(1.1)  # will let worker go to sleep
            n_wakeups -= 1
        value = y[i]
        assert value == arr[i]
        if random.random() < 0.05:
            i = random.randrange(0, len(arr))
        else:
            i = (i + 1) % len(arr)

    # helps with coverage
    y.async_seq._finalize(y.async_seq)

    # overly large buffer
    arr = list(range(10))
    y = smap(f1, arr)
    y = prefetch(y, nworkers=4, max_buffered=50, method=method, timeout=1)
    assert list(y) == arr

    # anticipate method
    arr = list(range(200))
    y = smap(f1, arr)
    y = prefetch(y,
                 nworkers=2,
                 max_buffered=20,
                 method=method,
                 timeout=1,
                 anticipate=lambda i: i + 2)

    z = [y[i] for i in range(0, len(y), 2)]

    assert z == arr[::2]
Ejemplo n.º 3
0
def test_prefetch_errors(method):
    class CustomError(Exception):
        pass

    def f1(x):
        if x is None:
            raise CustomError()
        else:
            return x

    arr1 = [1, 2, 3, None]
    arr2 = smap(f1, arr1)
    y = prefetch(arr2, nworkers=2, max_cached=2, method=method)

    for i in range(3):
        assert y[i] == arr1[i]
    with pytest.raises(PrefetchException):
        a = y[3]
        del a

    def f2(x):
        if x is None:
            raise ValueError("blablabla")
        else:
            return x

    # helps with coverage
    y._finalize(y)

    arr2 = smap(f2, arr1)
    y = prefetch(arr2, nworkers=2, max_cached=2, method=method)

    for i in range(3):
        assert y[i] == arr1[i]
    try:
        a = y[3]
        del a
    except Exception as e:
        assert isinstance(e, PrefetchException)
        assert isinstance(e.__cause__, ValueError)
    else:
        assert False

    assert y[0] == 1
    assert y[1] == 2

    # helps with coverage
    y._finalize(y)
Ejemplo n.º 4
0
def test_prefetch_errors(method, evaluation, picklable_err):
    class CustomError(Exception):
        pass

    def f1(x):
        if x is None:
            raise ValueError("blablabla") if picklable_err else CustomError()
        else:
            return x

    arr1 = [1, 2, 3, None]
    arr2 = smap(f1, arr1)
    y = prefetch(arr2, nworkers=2, max_buffered=2, method=method)

    seterr(evaluation)
    if (method == "process" and not picklable_err) or evaluation == "wrap":
        error_t = EvaluationError
    else:
        error_t = ValueError if picklable_err else CustomError

    for i in range(3):
        assert y[i] == arr1[i]
    with pytest.raises(error_t):
        a = y[3]
        del a
Ejemplo n.º 5
0
def test_prefetch(method):
    def f1(x):
        sleep(0.005 * (1 + random.random()))
        return x

    if method == "process":
        start_hook = None
    else:
        start_hook = None

    arr = list(range(300))
    y = smap(f1, arr)
    y = prefetch(y,
                 nworkers=4,
                 max_cached=10,
                 method=method,
                 timeout=1,
                 start_hook=start_hook)
    # arr = arr[3:-1:2]
    # y = y[3:-1:2]

    i = 0
    n_wakeups = 3
    for _ in range(500):
        if n_wakeups > 0 and random.random() < 0.005:
            sleep(1.1)  # will let worker go to sleep
            n_wakeups -= 1
        assert y[i] == arr[i]
        if random.random() < 0.05:
            i = random.randrange(0, len(arr))
        else:
            i = (i + 1) % len(arr)

    # helps with coverage
    y._finalize(y)
Ejemplo n.º 6
0
    def make_sequence(self):
        """Build a sequence that looks like a dataloader when iterated over."""
        # shuffling
        if self.batch_sampler:
            batch_indices = list(self.batch_sampler)
            out = seqtools.smap(lambda bi: [self.dataset[i] for i in bi],
                                batch_indices)
        elif self.sampler:
            shuffle_indices = list(self.sampler)
            out = seqtools.gather(self.dataset, shuffle_indices)
        elif self.shuffle:
            shuffle_indices = np.random.permutation(len(self.dataset))
            out = seqtools.gather(self.dataset, shuffle_indices)
        else:
            out = self.dataset

        # batch
        if not self.batch_sampler and self.batch_size is not None:
            out = seqtools.batch(out,
                                 k=self.batch_size,
                                 drop_last=self.drop_last,
                                 collate_fn=self.collate_fn)
        elif self.batch_sampler:
            out = seqtools.smap(self.collate_fn, out)

        # prefetch
        if self.num_workers > 0:
            out = seqtools.prefetch(out,
                                    max_buffered=self.num_workers *
                                    self.prefetch_factor,
                                    nworkers=self.num_workers,
                                    method='process',
                                    start_hook=self.worker_init_fn,
                                    shm_size=self.shm_size)

        # convert into tensors
        out = seqtools.smap(into_tensors, out)

        # pin memory
        if self.pin_memory:
            out = seqtools.smap(pin_tensors_memory, out)
            out = seqtools.prefetch(out,
                                    nworkers=1,
                                    method='thread',
                                    max_buffered=1)

        return out
Ejemplo n.º 7
0
        def target():
            arr = np.random.rand(1000, 10)
            y = smap(f1, arr)
            y = prefetch(y,
                         method=method,
                         max_buffered=40,
                         nworkers=4,
                         start_hook=init_fn)

            for i in range(0, 1000):
                a = y[i]
Ejemplo n.º 8
0
def test_prefetch_timings(prefetch_kwargs):
    def f1(x):
        sleep(0.005 * (1 + random.random()))
        return x

    start_hook = random.seed

    arr = np.random.rand(100, 10)
    y = smap(f1, arr)
    y = prefetch(y,
                 nworkers=4,
                 max_buffered=10,
                 start_hook=start_hook,
                 **prefetch_kwargs)
    y = [y_.copy()
         for y_ in y]  # copy needed to release buffers when shm_size>0
    assert_array_equal(np.stack(y), arr)

    # overly large buffer
    arr = np.random.rand(10, 10)
    y = smap(f1, arr)
    y = prefetch(y, nworkers=4, max_buffered=50, **prefetch_kwargs)
    y = [y_.copy() for y_ in y]
    assert_array_equal(np.stack(y), arr)

    # multiple restarts
    arr = np.random.rand(100, 10)
    y = smap(f1, arr)
    y = prefetch(y, nworkers=4, max_buffered=10, **prefetch_kwargs)
    for _ in range(10):
        n = np.random.randint(0, 99)
        for i in range(n):
            assert_array_equal(y[i], arr[i])

    # starvation
    arr = np.random.rand(100, 10)
    y = prefetch(arr, nworkers=2, max_buffered=10, **prefetch_kwargs)
    y[0]
    sleep(2)
    for i in range(1, 100):
        assert_array_equal(y[i], arr[i])
Ejemplo n.º 9
0
def test_prefetch_errors(error_mode, prefetch_kwargs, picklable_err):
    class CustomError(Exception):
        pass

    def f1(x):
        if x is None:
            raise ValueError("blablabla") if picklable_err else CustomError()
        else:
            return x

    arr1 = [np.random.rand(10), np.random.rand(10), np.random.rand(10), None]
    arr2 = smap(f1, arr1)
    y = prefetch(arr2, nworkers=2, max_buffered=4, **prefetch_kwargs)

    seterr(error_mode)
    if (prefetch_kwargs['method'] != "thread"
            and not picklable_err) or error_mode == "wrap":
        error_t = EvaluationError
    else:
        error_t = ValueError if picklable_err else CustomError

    for i in range(3):
        assert_array_equal(y[i], arr1[i])
    try:
        a = y[3]
    except Exception as e:
        assert type(e) == error_t

    if (prefetch_kwargs['method']
            == "process") and error_mode == "passthrough":

        class CustomObject:  # unpicklable object
            pass

        arr1 = [np.random.rand(10), CustomObject(), np.random.rand(10)]
        y = prefetch(arr1, nworkers=2, max_buffered=4, **prefetch_kwargs)
        with pytest.raises(ValueError):
            y[1]
Ejemplo n.º 10
0
def test_prefetch_timing(method):
    def f1(x):
        sleep(.02 + 0.01 * (random.random() - .5))
        return x

    arr = list(range(420))
    y = smap(f1, arr)
    y = prefetch(y, nworkers=2, max_buffered=20, method=method, timeout=1)

    for i in range(20):
        y[i]  # consume first items to eliminate worker startup time
    t1 = time()
    for i in range(20, 420):
        y[i]
    t2 = time()

    duration = t2 - t1
    print("test_prefetch_timing({}) {:.2f}s".format(method, duration))

    assert duration < 4.5
Ejemplo n.º 11
0
def test_prefetch_throughput(prefetch_kwargs):  # pragma: no cover
    def f1(x):
        sleep(.02 + 0.01 * (random.random() - .5))
        return x

    arr = np.random.rand(420, 10)
    y = smap(f1, arr)
    y = prefetch(y, nworkers=2, max_buffered=40, **prefetch_kwargs)

    for i in range(20):
        y[i]  # consume first items to eliminate worker startup time

    t1 = time()
    for i in range(20, 420):
        y[i]
    t2 = time()

    duration = t2 - t1
    print("test_prefetch_timing: {:.2f}s".format(duration))

    assert duration < 4.5
Ejemplo n.º 12
0
def try_prefetch(seq, cores, method, buffered):
    try:
        print(f"building {cores}-{method},fetch{buffered}")
        return sq.prefetch(seq, cores, method, buffered)
    except ValueError:
        return None
Ejemplo n.º 13
0
        pass  # busy waiting
    return x


preprocessed_samples = seqtools.smap(preprocess, all_samples)
minibatches = seqtools.batch(preprocessed_samples, 64, collate_fn=list)

t1 = time.time()
for batch in minibatches:
    pass
t2 = time.time()
print("sequential read took {:.1f}\"".format(t2 - t1))

t1 = time.time()
for batch in seqtools.prefetch(minibatches,
                               max_cached=100,
                               method="thread",
                               nworkers=2):
    pass
t2 = time.time()
print("threaded read took {:.1f}\"".format(t2 - t1))

t1 = time.time()
for batch in seqtools.prefetch(minibatches,
                               max_cached=100,
                               method="process",
                               nworkers=2):
    pass
t2 = time.time()
print("multiprocessing read took {:.1f}\"".format(t2 - t1))
Ejemplo n.º 14
0
def test_prefetch_random_objects(prefetch_kwargs):
    seq = [build_random_object() for _ in range(1000)]
    y = prefetch(seq, 2, **prefetch_kwargs)
    for x, y in zip(seq, y):
        compare_random_objects(x, y)
Ejemplo n.º 15
0
def test_prefetch_crash(method):
    if platform.python_implementation() == "PyPy":
        pytest.skip("broken with pypy")

    # worker dies
    with tempfile.TemporaryDirectory() as d:

        def init_fn():
            signal.signal(signal.SIGUSR1, lambda *_: sys.exit(-1))
            with open('{}/{}'.format(d, os.getpid()), "w"):
                pass

        def f1(x):
            sleep(.02 + 0.01 * (random.random() - .5))
            return x

        arr = np.random.rand(1000, 10)
        y = smap(f1, arr)
        y = prefetch(y,
                     method=method,
                     max_buffered=40,
                     nworkers=4,
                     start_hook=init_fn)

        sleep(0.1)

        while True:
            if len(os.listdir(d)) > 0:
                os.kill(int(os.listdir(d)[0]), signal.SIGUSR1)
                break

        with pytest.raises(RuntimeError):
            for i in range(0, 1000):
                a = y[i]

    # parent dies
    with tempfile.TemporaryDirectory() as d:

        def init_fn():
            signal.signal(signal.SIGUSR1, lambda *_: sys.exit(-1))
            with open('{}/{}'.format(d, os.getpid()), "w"):
                pass

        def target():
            arr = np.random.rand(1000, 10)
            y = smap(f1, arr)
            y = prefetch(y,
                         method=method,
                         max_buffered=40,
                         nworkers=4,
                         start_hook=init_fn)

            for i in range(0, 1000):
                a = y[i]

        p = Process(target=target)
        p.start()

        while len(os.listdir(d)) < 4:
            sleep(0.05)

        os.kill(p.pid, signal.SIGUSR1)
        sleep(2)  # wait for workers to time out

        for pid in map(int, os.listdir(d)):
            assert not check_pid(pid)