예제 #1
0
def test_nanny_process_failure(loop):
    c = Center('127.0.0.1', 8036)
    n = Nanny('127.0.0.1', 8037, 8038, '127.0.0.1', 8036, ncores=2)
    c.listen(c.port)

    @gen.coroutine
    def f():
        nn = rpc(ip=n.ip, port=n.port)
        yield n._start()

        ww = rpc(ip=n.ip, port=n.worker_port)
        yield ww.update_data(data={'x': 1, 'y': 2})
        with ignoring(StreamClosedError):
            yield ww.compute(function=sys.exit, args=(0,), key='z')

        start = time()
        while n.process.is_alive():  # wait while process dies
            yield gen.sleep(0.01)
            assert time() - start < 2

        start = time()
        while not n.process.is_alive():  # wait while process comes back
            yield gen.sleep(0.01)
            assert time() - start < 2

        start = time()
        while n.worker_address not in c.ncores:
            yield gen.sleep(0.01)
            assert time() - start < 2

        yield n._close()
        c.stop()

    loop.run_sync(f)
예제 #2
0
def test_fast_kill(loop):
    from distributed import Nanny, rpc
    c = Center('127.0.0.1')
    c.listen(0)
    a = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1')
    b = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1')
    e = Executor((c.ip, c.port), start=False, loop=loop)
    @gen.coroutine
    def f():
        yield a._start()
        yield b._start()

        while len(c.ncores) < 2:
            yield gen.sleep(0.01)
        yield e._start()

        L = e.map(sleep, range(10))

        try:
            start = time()
            yield e._restart()
            assert time() - start < 5

            assert all(x.status == 'cancelled' for x in L)

            x = e.submit(inc, 1)
            result = yield x._result()
            assert result == 2
        finally:
            yield a._close()
            yield b._close()
            yield e._shutdown(fast=True)
            c.stop()

    loop.run_sync(f)
예제 #3
0
    def g():
        c = Center('127.0.0.1')
        c.listen(0)
        a = Worker(c.ip, c.port, ncores=2, ip='127.0.0.1')
        yield a._start()
        b = Worker(c.ip, c.port, ncores=1, ip=b_ip)
        yield b._start()

        start = time()
        try:
            while len(c.ncores) < 2:
                yield gen.sleep(0.01)
                if time() - start > 5:
                    raise Exception("Cluster creation timeout")

            yield f(c, a, b)
        except Exception as e:
            logger.exception(e)
            raise
        finally:
            logger.debug("Closing out test cluster")
            for w in [a, b]:
                with ignoring(TimeoutError, StreamClosedError, OSError):
                    yield w._close()
                if os.path.exists(w.local_dir):
                    shutil.rmtree(w.local_dir)
            c.stop()
예제 #4
0
def test_fast_kill(loop):
    from distributed import Nanny, rpc
    c = Center('127.0.0.1', 8006)
    a = Nanny('127.0.0.1', 8007, 8008, '127.0.0.1', 8006, ncores=2)
    b = Nanny('127.0.0.1', 8009, 8010, '127.0.0.1', 8006, ncores=2)
    e = Executor((c.ip, c.port), start=False, loop=loop)
    c.listen(c.port)
    @gen.coroutine
    def f():
        yield a._start()
        yield b._start()

        while len(c.ncores) < 2:
            yield gen.sleep(0.01)
        yield e._start()

        L = e.map(sleep, range(10))

        try:
            start = time()
            yield e._restart()
            assert time() - start < 5

            assert all(x.status == 'cancelled' for x in L)

            x = e.submit(inc, 1)
            result = yield x._result()
            assert result == 2
        finally:
            yield a._close()
            yield b._close()
            yield e._shutdown(fast=True)
            c.stop()

    loop.run_sync(f)
예제 #5
0
def test_monitor_resources(loop):
    c = Center('127.0.0.1', 8026)
    n = Nanny('127.0.0.1', 8027, 8028, '127.0.0.1', 8026, ncores=2)
    c.listen(c.port)

    @gen.coroutine
    def f():
        nn = rpc(ip=n.ip, port=n.port)
        yield n._start()
        assert n.process.is_alive()
        d = n.resource_collect()
        assert {'cpu_percent', 'memory_percent'}.issubset(d)

        assert isinstance(d['timestamp'], datetime)

        stream = yield connect(ip=n.ip, port=n.port)
        yield write(stream, {'op': 'monitor_resources', 'interval': 0.01})

        for i in range(3):
            msg = yield read(stream)
            assert isinstance(msg, dict)
            assert {'cpu_percent', 'memory_percent'}.issubset(msg)

        stream.close()
        yield n._close()
        c.stop()

    loop.run_sync(f)
예제 #6
0
def run_center(port):
    from distributed import Center
    from tornado.ioloop import IOLoop
    import logging
    logging.getLogger("tornado").setLevel(logging.CRITICAL)
    center = Center('127.0.0.1', port)
    center.listen(port)
    IOLoop.current().start()
    IOLoop.current().close()  # Never reached. TODO: clean shutdown of IOLoop
예제 #7
0
def run_center(port):
    from distributed import Center
    from tornado.ioloop import IOLoop
    import logging
    logging.getLogger("tornado").setLevel(logging.CRITICAL)
    center = Center('127.0.0.1', port)
    center.listen(port)
    IOLoop.current().start()
    IOLoop.current().close()  # Never reached. TODO: clean shutdown of IOLoop
예제 #8
0
def main(host, port):
    if host is None:
        host = get_ip()

    logger.info("Start center at %s:%d", host, port)
    center = Center(host)
    center.listen(port)
    IOLoop.current().start()
    IOLoop.current().close()
    logger.info("\nEnd center at %s:%d", host, port)
예제 #9
0
def main(host, port):
    if host is None:
        host = get_ip()

    logger.info("Start center at %s:%d", host, port)
    center = Center(host)
    center.listen(port)
    IOLoop.current().start()
    IOLoop.current().close()
    logger.info("\nEnd center at %s:%d", host, port)
예제 #10
0
def run_center(port):
    from distributed import Center
    from tornado.ioloop import IOLoop, PeriodicCallback
    import logging
    IOLoop.clear_instance()
    loop = IOLoop(); loop.make_current()
    PeriodicCallback(lambda: None, 500).start()
    logging.getLogger("tornado").setLevel(logging.CRITICAL)
    center = Center('127.0.0.1', port)
    center.listen(port)
    loop.start()
예제 #11
0
def test_errors_dont_block():
    c = Center('127.0.0.1', 8017)
    w = Worker('127.0.0.2', 8018, c.ip, c.port, ncores=1)
    e = Executor((c.ip, c.port), start=False)

    @gen.coroutine
    def f():
        c.listen(c.port)
        yield w._start()
        IOLoop.current().spawn_callback(e._go)

        L = [
            e.submit(inc, 1),
            e.submit(throws, 1),
            e.submit(inc, 2),
            e.submit(throws, 2)
        ]

        i = 0
        while not (L[0].status == L[2].status == 'finished'):
            i += 1
            if i == 1000:
                assert False
            yield gen.sleep(0.01)
        result = yield e._gather([L[0], L[2]])
        assert result == [2, 3]

        yield w._close()
        c.stop()

    IOLoop.current().run_sync(f)
예제 #12
0
def test_restart(loop):
    from distributed import Nanny, rpc
    c = Center('127.0.0.1', 8006)
    a = Nanny('127.0.0.1', 8007, 8008, '127.0.0.1', 8006, ncores=2)
    b = Nanny('127.0.0.1', 8009, 8010, '127.0.0.1', 8006, ncores=2)
    c.listen(c.port)
    @gen.coroutine
    def f():
        yield a._start()
        yield b._start()

        e = Executor((c.ip, c.port), start=False, loop=loop)
        yield e._start()
        assert e.scheduler.ncores == {a.worker_address: 2, b.worker_address: 2}

        x = e.submit(inc, 1)
        y = e.submit(inc, x)
        yield y._result()

        cc = rpc(ip=c.ip, port=c.port)
        who_has = yield cc.who_has()
        try:
            assert e.scheduler.who_has == who_has
            assert set(e.scheduler.who_has) == {x.key, y.key}

            f = yield e._restart()
            assert f is e

            assert len(e.scheduler.stacks) == 2
            assert len(e.scheduler.processing) == 2

            who_has = yield cc.who_has()
            assert not who_has
            assert not e.scheduler.who_has

            assert x.cancelled()
            assert y.cancelled()

        finally:
            yield a._close()
            yield b._close()
            yield e._shutdown(fast=True)
            c.stop()

    loop.run_sync(f)
예제 #13
0
    def g():
        c = Center('127.0.0.1', 8017)
        c.listen(c.port)
        a = Worker('127.0.0.1', 8018, c.ip, c.port, ncores=2)
        yield a._start()
        b = Worker('127.0.0.1', 8019, c.ip, c.port, ncores=1)
        yield b._start()

        while len(c.ncores) < 2:
            yield gen.sleep(0.01)

        try:
            yield f(c, a, b)
        finally:
            with ignoring():
                yield a._close()
            with ignoring():
                yield b._close()
            c.stop()
예제 #14
0
def test_restart():
    from distributed import Nanny, rpc
    c = Center('127.0.0.1')
    c.listen(0)
    a = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1')
    b = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1')

    yield [a._start(), b._start()]

    e = Executor((c.ip, c.port), start=False, loop=IOLoop.current())
    yield e._start()

    assert e.scheduler.ncores == {a.worker_address: 2, b.worker_address: 2}

    x = e.submit(inc, 1)
    y = e.submit(inc, x)
    yield y._result()

    cc = rpc(ip=c.ip, port=c.port)
    who_has = yield cc.who_has()
    try:
        assert e.scheduler.who_has == who_has
        assert set(e.scheduler.who_has) == {x.key, y.key}

        f = yield e._restart()
        assert f is e

        assert len(e.scheduler.stacks) == 2
        assert len(e.scheduler.processing) == 2

        who_has = yield cc.who_has()
        assert not who_has
        assert not e.scheduler.who_has

        assert x.cancelled()
        assert y.cancelled()

    finally:
        yield a._close()
        yield b._close()
        yield e._shutdown(fast=True)
        c.stop()
예제 #15
0
def run_center(q):
    from distributed import Center
    from tornado.ioloop import IOLoop, PeriodicCallback
    import logging
    IOLoop.clear_instance()
    loop = IOLoop(); loop.make_current()
    PeriodicCallback(lambda: None, 500).start()
    logging.getLogger("tornado").setLevel(logging.CRITICAL)
    center = Center('127.0.0.1')

    while True:
        try:
            center.listen(0)
            break
        except Exception as e:
            logging.info("Could not start center on port.  Retrying",
                    exc_info=True)

    q.put(center.port)
    loop.start()
예제 #16
0
def test_sync_interactively():
    c = Center('127.0.0.1', 8017, start=True, block=False)
    a = Worker('127.0.0.1', 8018, c.ip, c.port, ncores=1,
            start=True, block=False)
    b = Worker('127.0.0.1', 8019, c.ip, c.port, ncores=1,
            start=True, block=False)

    try:
        while len(c.ncores) < 2:
            sleep(0.01)
        values = [1, 2, 3, 4, 5, 6, 7, 8]
        data = sync(scatter_to_center(c.ip, c.port, values))
        assert merge(a.data, b.data) == {d.key: v for d, v in zip(data,
            values)}

        results = sync(collect_from_center(c.ip, c.port, data))
        assert results == values
    finally:
        a.close()
        b.close()
        c.close()
예제 #17
0
def test_monitor_resources():
    pytest.importorskip('psutil')
    c = Center('127.0.0.1')
    c.listen(0)
    a = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1')
    b = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1')
    s = Scheduler((c.ip, c.port), resource_interval=0.01, resource_log_size=3)

    yield a._start()
    yield b._start()
    yield s.sync_center()
    done = s.start()

    try:
        assert s.ncores == {('127.0.0.1', a.worker_port): 2,
                            ('127.0.0.1', b.worker_port): 2}
        assert s.nannies == {(n.ip, n.worker_port): n.port
                             for n in [a, b]}

        while any(len(v) < 3 for v in s.resource_logs.values()):
            yield gen.sleep(0.01)

        yield gen.sleep(0.1)

        assert set(s.resource_logs) == {a.address, b.address}
        assert all(len(v) == 3 for v in s.resource_logs.values())

        d = s.diagnostic_resources(n=2)
        assert set(d) == {a.worker_address, b.worker_address}
        assert set(d[a.worker_address]).issubset({'cpu', 'memory', 'time'})
        assert all(len(v) == 2 for v in d[a.worker_address].values())

        s.put({'op': 'close'})
        yield done
    finally:
        with ignoring(TimeoutError, StreamClosedError, OSError):
            yield a._close(timeout=0.5)
        with ignoring(TimeoutError, StreamClosedError, OSError):
            yield b._close(timeout=0.5)
        c.stop()
예제 #18
0
    def g():
        c = Center('127.0.0.1')
        c.listen(0)
        a = Worker(c.ip, c.port, ncores=2, ip='127.0.0.1')
        yield a._start()
        b = Worker(c.ip, c.port, ncores=1, ip=b_ip)
        yield b._start()

        start = time()
        try:
            while len(c.ncores) < 2:
                yield gen.sleep(0.01)
                if time() - start > 5:
                    raise Exception("Cluster creation timeout")

            yield f(c, a, b)
        except Exception as e:
            logger.exception(e)
            raise
        finally:
            logger.debug("Closing out test cluster")
            for w in [a, b]:
                with ignoring(TimeoutError, StreamClosedError, OSError):
                    yield w._close()
                if os.path.exists(w.local_dir):
                    shutil.rmtree(w.local_dir)
            c.stop()
예제 #19
0
def test_monitor_resources():
    pytest.importorskip('psutil')
    c = Center(ip='127.0.0.1')
    c.listen(0)
    n = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1')

    yield n._start()
    nn = rpc(ip=n.ip, port=n.port)
    assert n.process.is_alive()
    d = n.resource_collect()
    assert {'cpu_percent', 'memory_percent'}.issubset(d)

    assert isinstance(d['timestamp'], datetime)

    stream = yield connect(ip=n.ip, port=n.port)
    yield write(stream, {'op': 'monitor_resources', 'interval': 0.01})

    for i in range(3):
        msg = yield read(stream)
        assert isinstance(msg, dict)
        assert {'cpu_percent', 'memory_percent'}.issubset(msg)

    stream.close()
    yield n._close()
    c.stop()
예제 #20
0
def test_nanny():
    c = Center('127.0.0.1')
    c.listen(0)
    n = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1')

    yield n._start(0)
    nn = rpc(ip=n.ip, port=n.port)
    assert n.process.is_alive()
    assert c.ncores[n.worker_address] == 2
    assert c.worker_services[n.worker_address]['nanny'] > 1024

    yield nn.kill()
    assert n.worker_address not in c.ncores
    assert n.worker_address not in c.worker_services
    assert not n.process

    yield nn.kill()
    assert n.worker_address not in c.ncores
    assert n.worker_address not in c.worker_services
    assert not n.process

    yield nn.instantiate()
    assert n.process.is_alive()
    assert c.ncores[n.worker_address] == 2
    assert c.worker_services[n.worker_address]['nanny'] > 1024

    yield nn.terminate()
    assert not n.process

    if n.process:
        n.process.terminate()

    yield n._close()
    c.stop()
예제 #21
0
def test_scatter_delete():
    c = Center('127.0.0.1', 8017, loop=loop)
    a = Worker('127.0.0.1', 8018, c.ip, c.port, loop=loop, ncores=1)
    b = Worker('127.0.0.1', 8019, c.ip, c.port, loop=loop, ncores=1)

    @asyncio.coroutine
    def f():
        while len(c.ncores) < 2:
            yield from asyncio.sleep(0.01, loop=loop)
        data = yield from scatter_to_center(c.ip, c.port, [1, 2, 3], loop=loop)

        assert merge(a.data, b.data) == \
                {d.key: i for d, i in zip(data, [1, 2, 3])}

        assert set(c.who_has) == {d.key for d in data}
        assert all(len(v) == 1 for v in c.who_has.values())

        assert [d.get() for d in data] == [1, 2, 3]

        yield from data[0]._delete()

        assert merge(a.data, b.data) == \
                {d.key: i for d, i in zip(data[1:], [2, 3])}

        assert data[0].key not in c.who_has

        data = yield from scatter_to_workers(c.ip, c.port,
                [a.address, b.address], [4, 5, 6], loop=loop)

        m = merge(a.data, b.data)

        for d, v in zip(data, [4, 5, 6]):
            assert m[d.key] == v

        yield from a._close()
        yield from b._close()
        yield from c._close()

    loop.run_until_complete(asyncio.gather(c.go(), a.go(), b.go(), f()))
예제 #22
0
def test_multiple_executors_restart(loop):
    from distributed import Nanny, rpc
    c = Center('127.0.0.1')
    c.listen(0)
    a = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1')
    b = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1')
    @gen.coroutine
    def f():
        yield a._start()
        yield b._start()
        while len(c.ncores) < 2:
            yield gen.sleep(0.01)

        try:
            e1 = Executor((c.ip, c.port), start=False, loop=loop)
            yield e1._start()
            e2 = Executor(e1.scheduler, start=False, loop=loop)
            yield e2._start()

            x = e1.submit(inc, 1)
            y = e2.submit(inc, 2)
            xx = yield x._result()
            yy = yield y._result()
            assert xx == 2
            assert yy == 3

            yield e1._restart()

            assert x.cancelled()
            assert y.cancelled()
        finally:
            yield a._close()
            yield b._close()
            yield e1._shutdown(fast=True)
            yield e2._shutdown(fast=True)
            c.stop()

    loop.run_sync(f)
예제 #23
0
def test_multiple_executors_restart(loop):
    from distributed import Nanny, rpc
    c = Center('127.0.0.1', 8006)
    a = Nanny('127.0.0.1', 8007, 8008, '127.0.0.1', 8006, ncores=2)
    b = Nanny('127.0.0.1', 8009, 8010, '127.0.0.1', 8006, ncores=2)
    c.listen(c.port)
    @gen.coroutine
    def f():
        yield a._start()
        yield b._start()
        while len(c.ncores) < 2:
            yield gen.sleep(0.01)

        try:
            e1 = Executor((c.ip, c.port), start=False, loop=loop)
            yield e1._start()
            e2 = Executor(scheduler=e1.scheduler, start=False, loop=loop)
            yield e2._start()

            x = e1.submit(inc, 1)
            y = e2.submit(inc, 2)
            xx = yield x._result()
            yy = yield y._result()
            assert xx == 2
            assert yy == 3

            yield e1._restart()

            assert x.cancelled()
            assert y.cancelled()
        finally:
            yield a._close()
            yield b._close()
            yield e1._shutdown(fast=True)
            yield e2._shutdown(fast=True)
            c.stop()

    loop.run_sync(f)
예제 #24
0
def test_nanny(loop):
    c = Center('127.0.0.1', 8026)
    n = Nanny('127.0.0.1', 8027, 8028, '127.0.0.1', 8026, ncores=2)
    c.listen(c.port)

    @gen.coroutine
    def f():
        nn = rpc(ip=n.ip, port=n.port)
        yield n._start()
        assert n.process.is_alive()
        assert c.ncores[n.worker_address] == 2
        assert c.nannies[n.worker_address] > 8000

        yield nn.kill()
        assert n.worker_address not in c.ncores
        assert n.worker_address not in c.nannies
        assert not n.process

        yield nn.kill()
        assert n.worker_address not in c.ncores
        assert n.worker_address not in c.nannies
        assert not n.process

        yield nn.instantiate()
        assert n.process.is_alive()
        assert c.ncores[n.worker_address] == 2
        assert c.nannies[n.worker_address] > 8000

        yield nn.terminate()
        assert not n.process

        if n.process:
            n.process.terminate()

        yield n._close()
        c.stop()

    loop.run_sync(f)
예제 #25
0
    def g():
        c = Center("127.0.0.1", 8017)
        c.listen(c.port)
        a = Worker("127.0.0.2", 8018, c.ip, c.port, ncores=2)
        yield a._start()
        b = Worker("127.0.0.3", 8019, c.ip, c.port, ncores=1)
        yield b._start()

        start = time()
        try:
            while len(c.ncores) < 2:
                yield gen.sleep(0.01)
                if time() - start > 5:
                    raise Exception("Cluster creation timeout")

            yield f(c, a, b)
        finally:
            logger.debug("Closing out test cluster")
            with ignoring():
                yield a._close()
            with ignoring():
                yield b._close()
            c.stop()
예제 #26
0
def test_monitor_resources(loop):
    c = Center('127.0.0.1', 8026)
    a = Nanny('127.0.0.1', 8027, 8028, '127.0.0.1', 8026, ncores=2)
    b = Nanny('127.0.0.1', 8029, 8030, '127.0.0.1', 8026, ncores=2)
    c.listen(c.port)
    s = Scheduler((c.ip, c.port), resource_interval=0.01, resource_log_size=3)

    @gen.coroutine
    def f():
        yield a._start()
        yield b._start()
        yield s._sync_center()
        done = s.start()

        try:
            assert s.ncores == {('127.0.0.1', a.worker_port): 2,
                                ('127.0.0.1', b.worker_port): 2}
            assert s.nannies == {(n.ip, n.worker_port): n.port
                                 for n in [a, b]}

            while any(len(v) < 3 for v in s.resource_logs.values()):
                yield gen.sleep(0.01)

            yield gen.sleep(0.1)

            assert set(s.resource_logs) == {(a.ip, a.port), (b.ip, b.port)}
            assert all(len(v) == 3 for v in s.resource_logs.values())

            s.put({'op': 'close'})
            yield done
        finally:
            yield a._close()
            yield b._close()
            c.stop()

    loop.run_sync(f, timeout=10)
예제 #27
0
    def g():
        c = Center('127.0.0.1', 8017)
        c.listen(c.port)
        a = Worker('127.0.0.2', 8018, c.ip, c.port, ncores=2)
        yield a._start()
        b = Worker('127.0.0.3', 8019, c.ip, c.port, ncores=1)
        yield b._start()

        start = time()
        try:
            while len(c.ncores) < 2:
                yield gen.sleep(0.01)
                if time() - start > 5:
                    raise Exception("Cluster creation timeout")

            yield f(c, a, b)
        finally:
            logger.debug("Closing out test cluster")
            for w in [a, b]:
                with ignoring():
                    yield w._close()
                if os.path.exists(w.local_dir):
                    shutil.rmtree(w.local_dir)
            c.stop()
예제 #28
0
def test_errors_dont_block():
    c = Center('127.0.0.1')
    c.listen(0)
    w = Worker(c.ip, c.port, ncores=1, ip='127.0.0.1')
    e = Executor((c.ip, c.port), start=False, loop=IOLoop.current())

    yield w._start()
    yield e._start()

    L = [e.submit(inc, 1),
         e.submit(throws, 1),
         e.submit(inc, 2),
         e.submit(throws, 2)]

    start = time()
    while not (L[0].status == L[2].status == 'finished'):
        assert time() < start + 5
        yield gen.sleep(0.01)

    result = yield e._gather([L[0], L[2]])
    assert result == [2, 3]

    yield w._close()
    c.stop()
예제 #29
0
    def g():
        c = Center('127.0.0.1', 8017)
        c.listen(c.port)
        a = Worker('127.0.0.1', 8018, c.ip, c.port, ncores=2)
        yield a._start()
        b = Worker('127.0.0.1', 8019, c.ip, c.port, ncores=1)
        yield b._start()

        while len(c.ncores) < 2:
            yield gen.sleep(0.01)

        try:
            yield f(c, a, b)
        finally:
            with ignoring():
                yield a._close()
            with ignoring():
                yield b._close()
            c.stop()
예제 #30
0
def test_restart():
    from distributed import Nanny, rpc
    c = Center('127.0.0.1')
    c.listen(0)
    a = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1')
    b = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1')

    yield [a._start(), b._start()]

    e = Executor((c.ip, c.port), start=False, loop=IOLoop.current())
    yield e._start()

    assert e.scheduler.ncores == {a.worker_address: 2, b.worker_address: 2}

    x = e.submit(inc, 1)
    y = e.submit(inc, x)
    yield y._result()

    cc = rpc(ip=c.ip, port=c.port)
    who_has = yield cc.who_has()
    try:
        assert e.scheduler.who_has == who_has
        assert set(e.scheduler.who_has) == {x.key, y.key}

        f = yield e._restart()
        assert f is e

        assert len(e.scheduler.stacks) == 2
        assert len(e.scheduler.processing) == 2

        who_has = yield cc.who_has()
        assert not who_has
        assert not e.scheduler.who_has

        assert x.cancelled()
        assert y.cancelled()

    finally:
        yield a._close()
        yield b._close()
        yield e._shutdown(fast=True)
        c.stop()
예제 #31
0
def test_monitor_resources():
    pytest.importorskip('psutil')
    c = Center('127.0.0.1')
    c.listen(0)
    a = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1')
    b = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1')
    s = Scheduler((c.ip, c.port), resource_interval=0.01, resource_log_size=3)

    yield a._start()
    yield b._start()
    yield s.sync_center()
    done = s.start()

    try:
        assert s.ncores == {
            ('127.0.0.1', a.worker_port): 2,
            ('127.0.0.1', b.worker_port): 2
        }
        assert s.nannies == {(n.ip, n.worker_port): n.port for n in [a, b]}

        while any(len(v) < 3 for v in s.resource_logs.values()):
            yield gen.sleep(0.01)

        yield gen.sleep(0.1)

        assert set(s.resource_logs) == {a.address, b.address}
        assert all(len(v) == 3 for v in s.resource_logs.values())

        d = s.diagnostic_resources(n=2)
        assert set(d) == {a.worker_address, b.worker_address}
        assert set(d[a.worker_address]).issubset({'cpu', 'memory', 'time'})
        assert all(len(v) == 2 for v in d[a.worker_address].values())

        s.put({'op': 'close'})
        yield done
    finally:
        with ignoring(TimeoutError, StreamClosedError, OSError):
            yield a._close(timeout=0.5)
        with ignoring(TimeoutError, StreamClosedError, OSError):
            yield b._close(timeout=0.5)
        c.stop()
예제 #32
0
def test_nanny_process_failure():
    c = Center('127.0.0.1')
    c.listen(0)
    n = Nanny(c.ip, c.port, ncores=2, ip='127.0.0.1')
    yield n._start()
    nn = rpc(ip=n.ip, port=n.port)
    first_dir = n.worker_dir

    assert os.path.exists(first_dir)

    ww = rpc(ip=n.ip, port=n.worker_port)
    yield ww.update_data(data=valmap(dumps, {'x': 1, 'y': 2}))
    with ignoring(StreamClosedError):
        yield ww.compute(function=dumps(sys.exit),
                         args=dumps((0,)),
                         key='z')

    start = time()
    while n.process.is_alive():  # wait while process dies
        yield gen.sleep(0.01)
        assert time() - start < 2

    start = time()
    while not n.process.is_alive():  # wait while process comes back
        yield gen.sleep(0.01)
        assert time() - start < 2

    start = time()
    while n.worker_address not in c.ncores or n.worker_dir is None:
        yield gen.sleep(0.01)
        assert time() - start < 2

    second_dir = n.worker_dir

    yield n._close()
    assert not os.path.exists(second_dir)
    assert not os.path.exists(first_dir)
    assert first_dir != n.worker_dir
    nn.close_streams()
    c.stop()
예제 #33
0
def test_errors_dont_block():
    c = Center('127.0.0.1')
    c.listen(0)
    w = Worker(c.ip, c.port, ncores=1, ip='127.0.0.1')
    e = Executor((c.ip, c.port), start=False, loop=IOLoop.current())

    yield w._start()
    yield e._start()

    L = [e.submit(inc, 1),
         e.submit(throws, 1),
         e.submit(inc, 2),
         e.submit(throws, 2)]

    start = time()
    while not (L[0].status == L[2].status == 'finished'):
        assert time() < start + 5
        yield gen.sleep(0.01)

    result = yield e._gather([L[0], L[2]])
    assert result == [2, 3]

    yield w._close()
    c.stop()