Esempio n. 1
0
def _test_workspace_concurrency(tmpdir, timeout, max_procs):
    """
    WorkSpace concurrency test.  We merely check that no exception or
    deadlock happens.
    """
    base_dir = str(tmpdir)

    err_q = mp_context.Queue()
    purged_q = mp_context.Queue()
    stop_evt = mp_context.Event()
    ws = WorkSpace(base_dir)
    # Make sure purging only happens in the child processes
    ws._purge_leftovers = lambda: None

    # Run a bunch of child processes that will try to purge concurrently
    NPROCS = 2 if sys.platform == "win32" else max_procs
    processes = [
        mp_context.Process(
            target=_workspace_concurrency, args=(base_dir, purged_q, err_q, stop_evt)
        )
        for i in range(NPROCS)
    ]
    for p in processes:
        p.start()

    n_created = 0
    n_purged = 0
    try:
        t1 = time()
        while time() - t1 < timeout:
            # Add a bunch of locks, and simulate forgetting them.
            # The concurrent processes should try to purge them.
            for i in range(50):
                d = ws.new_work_dir(prefix="workspace-concurrency-")
                d._finalizer.detach()
                n_created += 1
            sleep(1e-2)
    finally:
        stop_evt.set()
        for p in processes:
            p.join()

    # Any errors?
    try:
        err = err_q.get_nowait()
    except Empty:
        pass
    else:
        raise err

    try:
        while True:
            n_purged += purged_q.get_nowait()
    except Empty:
        pass
    # We attempted to purge most directories at some point
    assert n_purged >= 0.5 * n_created > 0
    return n_created, n_purged
def _test_workspace_concurrency(tmpdir, timeout, max_procs):
    base_dir = str(tmpdir)

    err_q = mp_context.Queue()
    purged_q = mp_context.Queue()
    stop_evt = mp_context.Event()
    ws = WorkSpace(base_dir)
    # Make sure purging only happens in the child processes
    ws._purge_leftovers = lambda: None

    NPROCS = 2 if sys.platform == 'win32' else max_procs
    processes = [
        mp_context.Process(target=_workspace_concurrency,
                           args=(base_dir, purged_q, err_q, stop_evt))
        for i in range(NPROCS)
    ]
    for p in processes:
        p.start()

    n_created = 0
    n_purged = 0
    try:
        t1 = time()
        while time() - t1 < timeout:
            # Add a bunch of locks, and simulate forgetting them
            for i in range(50):
                d = ws.new_work_dir(prefix='workspace-concurrency-')
                d._finalizer.detach()
                n_created += 1
            sleep(1e-2)
    finally:
        stop_evt.set()
        for p in processes:
            p.join()

    # Any errors?
    try:
        err = err_q.get_nowait()
    except Empty:
        pass
    else:
        raise err

    try:
        while True:
            n_purged += purged_q.get_nowait()
    except Empty:
        pass
    return n_created, n_purged
Esempio n. 3
0
    async def start(self):
        if self.status == "running":
            return self.status
        if self.status == "starting":
            await self.running.wait()
            return self.status

        self.init_result_q = init_q = mp_context.Queue()
        self.child_info_stop_q = mp_context.Queue()
        self.parent_info_q = mp_context.Queue()

        self.process = AsyncProcess(
            target=self._run,
            name="Dask CUDA Scheduler process",
            kwargs=dict(
                proc_cls=self.proc_cls,
                kwargs=self.kwargs,
                silence_logs=False,
                init_result_q=self.init_result_q,
                child_info_stop_q=self.child_info_stop_q,
                parent_info_q=self.parent_info_q,
                env=self.env,
            ),
        )
        # self.process.daemon = dask.config.get("distributed.worker.daemon", default=True)
        self.process.set_exit_callback(self._on_exit)
        self.running = Event()
        self.stopped = Event()
        self.status = "starting"
        try:
            await self.process.start()
        except OSError:
            logger.exception("Failed to start CUDA Scheduler process",
                             exc_info=True)
            self.process.terminate()
            return

        msg = await self._wait_until_started()
        if not msg:
            return self.status
        self.address = msg["address"]
        assert self.address
        self.status = "running"
        self.running.set()

        init_q.close()

        await super().start()
async def test_exit_callback():
    to_child = mp_context.Queue()
    from_child = mp_context.Queue()
    evt = Event()

    # FIXME: this breaks if changed to async def...
    @gen.coroutine
    def on_stop(_proc):
        assert _proc is proc
        yield gen.moment
        evt.set()

    # Normal process exit
    proc = AsyncProcess(target=feed, args=(to_child, from_child))
    evt.clear()
    proc.set_exit_callback(on_stop)
    proc.daemon = True

    await proc.start()
    await asyncio.sleep(0.05)
    assert proc.is_alive()
    assert not evt.is_set()

    to_child.put(None)
    await evt.wait(timedelta(seconds=3))
    assert evt.is_set()
    assert not proc.is_alive()

    # Process terminated
    proc = AsyncProcess(target=wait)
    evt.clear()
    proc.set_exit_callback(on_stop)
    proc.daemon = True

    await proc.start()
    await asyncio.sleep(0.05)
    assert proc.is_alive()
    assert not evt.is_set()

    await proc.terminate()
    await evt.wait(timedelta(seconds=3))
    assert evt.is_set()
Esempio n. 5
0
def test_child_main_thread():
    """
    The main thread in the child should be called "MainThread".
    """
    q = mp_context.Queue()
    proc = AsyncProcess(target=threads_info, args=(q, ))
    yield proc.start()
    yield proc.join()
    n_threads = q.get()
    main_name = q.get()
    assert n_threads == 1
    assert main_name == "MainThread"
async def test_child_main_thread():
    """
    The main thread in the child should be called "MainThread".
    """
    q = mp_context.Queue()
    proc = AsyncProcess(target=threads_info, args=(q, ))
    await proc.start()
    await proc.join()
    n_threads = q.get()
    main_name = q.get()
    assert n_threads <= 3
    assert main_name == "MainThread"
    q.close()
    q._reader.close()
    q._writer.close()
Esempio n. 7
0
def test_exitcode():
    q = mp_context.Queue()

    proc = AsyncProcess(target=exit, kwargs={'q': q})
    proc.daemon = True
    assert not proc.is_alive()
    assert proc.exitcode is None

    yield proc.start()
    assert proc.is_alive()
    assert proc.exitcode is None

    q.put(5)
    yield proc.join(timeout=3.0)
    assert not proc.is_alive()
    assert proc.exitcode == 5
Esempio n. 8
0
def test_simple():
    to_child = mp_context.Queue()
    from_child = mp_context.Queue()

    proc = AsyncProcess(target=feed, args=(to_child, from_child))
    assert not proc.is_alive()
    assert proc.pid is None
    assert proc.exitcode is None
    assert not proc.daemon
    proc.daemon = True
    assert proc.daemon

    wr1 = weakref.ref(proc)
    wr2 = weakref.ref(proc._process)

    # join() before start()
    with pytest.raises(AssertionError):
        yield proc.join()

    yield proc.start()
    assert proc.is_alive()
    assert proc.pid is not None
    assert proc.exitcode is None

    t1 = time()
    yield proc.join(timeout=0.02)
    dt = time() - t1
    assert 0.2 >= dt >= 0.01
    assert proc.is_alive()
    assert proc.pid is not None
    assert proc.exitcode is None

    # setting daemon attribute after start()
    with pytest.raises(AssertionError):
        proc.daemon = False

    to_child.put(5)
    assert from_child.get() == 5

    # child should be stopping now
    t1 = time()
    yield proc.join(timeout=10)
    dt = time() - t1
    assert dt <= 1.0
    assert not proc.is_alive()
    assert proc.pid is not None
    assert proc.exitcode == 0

    # join() again
    t1 = time()
    yield proc.join()
    dt = time() - t1
    assert dt <= 0.6

    del proc
    gc.collect()
    if wr1() is not None:
        # Help diagnosing
        from types import FrameType
        p = wr1()
        if p is not None:
            rc = sys.getrefcount(p)
            refs = gc.get_referrers(p)
            del p
            print("refs to proc:", rc, refs)
            frames = [r for r in refs if isinstance(r, FrameType)]
            for i, f in enumerate(frames):
                print("frames #%d:" % i, f.f_code.co_name,
                      f.f_code.co_filename, sorted(f.f_locals))
        pytest.fail("AsyncProcess should have been destroyed")
    t1 = time()
    while wr2() is not None:
        yield gen.sleep(0.01)
        gc.collect()
        dt = time() - t1
        assert dt < 2.0
Esempio n. 9
0
    async def start(self) -> Status:
        """
        Ensure the worker process is started.
        """
        enable_proctitle_on_children()
        if self.status == Status.running:
            return self.status
        if self.status == Status.starting:
            await self.running.wait()
            return self.status

        self.init_result_q = init_q = mp_context.Queue()
        self.child_stop_q = mp_context.Queue()
        uid = uuid.uuid4().hex

        self.process = AsyncProcess(
            target=self._run,
            name="Dask Worker process (from Nanny)",
            kwargs=dict(
                worker_kwargs=self.worker_kwargs,
                worker_start_args=self.worker_start_args,
                silence_logs=self.silence_logs,
                init_result_q=self.init_result_q,
                child_stop_q=self.child_stop_q,
                uid=uid,
                Worker=self.Worker,
                env=self.env,
                config=self.config,
            ),
        )
        self.process.daemon = dask.config.get("distributed.worker.daemon",
                                              default=True)
        self.process.set_exit_callback(self._on_exit)
        self.running = asyncio.Event()
        self.stopped = asyncio.Event()
        self.status = Status.starting

        try:
            await self.process.start()
        except OSError:
            logger.exception("Nanny failed to start process", exc_info=True)
            self.process.terminate()
            self.status = Status.failed
            return self.status
        try:
            msg = await self._wait_until_connected(uid)
        except Exception:
            self.status = Status.failed
            self.process.terminate()
            raise
        if not msg:
            return self.status
        self.worker_address = msg["address"]
        self.worker_dir = msg["dir"]
        assert self.worker_address
        self.status = Status.running
        self.running.set()

        init_q.close()

        return self.status
Esempio n. 10
0
def test_workspace_concurrency(tmpdir):
    """WorkSpace concurrency test. We merely check that no exception or
    deadlock happens.
    """
    base_dir = str(tmpdir)

    err_q = mp_context.Queue()
    purged_q = mp_context.Queue()
    stop_evt = mp_context.Event()
    ws = WorkSpace(base_dir)
    # Make sure purging only happens in the child processes
    ws._purge_leftovers = lambda: None

    # Windows (or at least Windows GitHub CI) has been observed to be exceptionally
    # slow. Don't stress it too much.
    max_procs = 2 if WINDOWS else 16

    # Run a bunch of child processes that will try to purge concurrently
    barrier = mp_context.Barrier(parties=max_procs + 1)
    processes = [
        mp_context.Process(
            target=_workspace_concurrency,
            args=(base_dir, purged_q, err_q, stop_evt, barrier),
        )
        for _ in range(max_procs)
    ]
    for p in processes:
        p.start()
    barrier.wait()
    n_created = 0
    n_purged = 0
    t1 = time()
    try:
        # On Linux, you will typically end with n_created > 10.000
        # On Windows, it can take 60 seconds to create 50 locks!
        while time() - t1 < 10:
            # Add a bunch of locks and simulate forgetting them.
            # The concurrent processes should try to purge them.
            for _ in range(100):
                d = ws.new_work_dir(prefix="workspace-concurrency-")
                d._finalizer.detach()
                n_created += 1

    finally:
        stop_evt.set()
        for p in processes:
            p.join()

    # Any errors?
    try:
        err = err_q.get_nowait()
    except queue.Empty:
        pass
    else:
        raise err

    try:
        while True:
            n_purged += purged_q.get_nowait()
    except queue.Empty:
        pass

    # We attempted to purge most directories at some point
    assert n_purged >= 0.5 * n_created > 0