Example #1
0
    def _start_worker(self, death_timeout=60, **kwargs):
        if self.status and self.status.startswith('clos'):
            warnings.warn("Tried to start a worker while status=='%s'" %
                          self.status)
            return

        if self.processes:
            W = Nanny
            kwargs['quiet'] = True
        else:
            W = Worker

        w = W(self.scheduler.address,
              loop=self.loop,
              death_timeout=death_timeout,
              silence_logs=self.silence_logs,
              **kwargs)
        yield w._start()

        self.workers.append(w)

        while w.status != 'closed' and w.worker_address not in self.scheduler.workers:
            yield gen.sleep(0.01)

        if w.status == 'closed' and self.scheduler.status == 'running':
            self.workers.remove(w)
            raise gen.TimeoutError("Worker failed to start")

        raise gen.Return(w)
 def on_timeout():
     self.log.warning("Timeout waiting for kernel_info_reply: %s",
                      kernel_id)
     finish()
     if not future.done():
         future.set_exception(
             gen.TimeoutError("Timeout waiting for restart"))
Example #3
0
    def test_handle_layer_error(self):
        context = LayerContext(mode="socks",
                               src_stream=self.src_stream,
                               port=443,
                               scheme="h2")

        layer_manager._handle_layer_error(gen.TimeoutError("timeout"), context)
        context.src_stream.close.assert_called_once_with()

        context.src_stream.reset_mock()
        layer_manager._handle_layer_error(
            DestNotConnectedError("stream closed"), context)
        context.src_stream.close.assert_not_called()

        context.src_stream.reset_mock()
        layer_manager._handle_layer_error(
            DestStreamClosedError("stream closed"), context)
        context.src_stream.close.assert_called_once_with()

        context.src_stream.reset_mock()
        layer_manager._handle_layer_error(
            SrcStreamClosedError("stream closed"), context)
        context.src_stream.close.assert_not_called()

        context.src_stream.reset_mock()
        layer_manager._handle_layer_error(
            iostream.StreamClosedError("stream closed"), context)
        context.src_stream.close.assert_called_once_with()
Example #4
0
    def _start_worker(self, port=0, processes=None, death_timeout=60, **kwargs):
        if processes is not None:
            raise ValueError("overriding `processes` for individual workers "
                             "in a LocalCluster is not supported anymore")
        if port:
            raise ValueError("overriding `port` for individual workers "
                             "in a LocalCluster is not supported anymore")
        if self.processes:
            W = Nanny
            kwargs['quiet'] = True
        else:
            W = Worker

        w = W(self.scheduler.address, loop=self.loop,
              death_timeout=death_timeout,
              silence_logs=self.silence_logs, **kwargs)
        yield w._start()

        self.workers.append(w)

        while w.status != 'closed' and w.worker_address not in self.scheduler.worker_info:
            yield gen.sleep(0.01)

        if w.status == 'closed':
            self.workers.remove(w)
            raise gen.TimeoutError("Worker failed to start")

        raise gen.Return(w)
Example #5
0
    def _start_worker(self, death_timeout=60, **kwargs):
        if self.status and self.status.startswith("clos"):
            warnings.warn("Tried to start a worker while status=='%s'" %
                          self.status)
            return

        if self.processes:
            kwargs["quiet"] = True

        w = yield self.worker_class(self.scheduler.address,
                                    loop=self.loop,
                                    death_timeout=death_timeout,
                                    silence_logs=self.silence_logs,
                                    **kwargs)

        self.workers.append(w)

        while w.status != "closed" and w.worker_address not in self.scheduler.workers:
            yield gen.sleep(0.01)

        if w.status == "closed" and self.scheduler.status == "running":
            self.workers.remove(w)
            raise gen.TimeoutError("Worker failed to start")

        raise gen.Return(w)
Example #6
0
 def get(self, stream=None, name=None, client=None, timeout=None):
     start = time()
     while name not in self.variables:
         if timeout is not None:
             left = timeout - (time() - start)
         else:
             left = None
         if left and left < 0:
             raise gen.TimeoutError()
         yield self.started.wait(timeout=left)
     record = self.variables[name]
     if record['type'] == 'Future':
         key = record['value']
         token = uuid.uuid4().hex
         try:
             state = self.scheduler.task_state[key]
         except KeyError:
             state = 'lost'
         msg = {'token': token, 'state': state}
         if state == 'erred':
             msg['exception'] = self.scheduler.exceptions[
                 self.scheduler.exceptions_blame[key]]
             msg['traceback'] = self.scheduler.tracebacks[
                 self.scheduler.exceptions_blame[key]]
         record = merge(record, msg)
         self.waiting[key, name].add(token)
     raise gen.Return(record)
Example #7
0
 async def wait_for(future, timeout=None):
     try:
         await asyncio.wait_for(future, timeout=timeout)
     except Exception:
         await self.close(timeout=1)
         raise gen.TimeoutError(
             "{} failed to start in {} seconds".format(
                 type(self).__name__, timeout))
Example #8
0
	def _start_worker(self, port=0, processes=None, death_timeout=60, **kwargs):
		
		"""
								dask-worker --help
						Usage: dask-worker [OPTIONS] SCHEDULER
						
						Options:
						  X--worker-port INTEGER  Serving worker port, defaults to randomly assigned
						  --http-port INTEGER    Serving http port, defaults to randomly assigned
						  X--nanny-port INTEGER   Serving nanny port, defaults to randomly assigned
						  X--port INTEGER         Deprecated, see --nanny-port
						  --host TEXT            Serving host. Defaults to an ip address that can
						                         hopefully be visible from the scheduler network.
						  --nthreads INTEGER     Number of threads per process. Defaults to number of
						                         cores
						  X--nprocs INTEGER       Number of worker processes.  Defaults to one.
						  --name TEXT            Alias
						  --memory-limit TEXT     Number of bytes before spilling data to disk
						  --no-nanny
						  X--help                 Show this message and exit."""




		#todo change this to bsub a job and then grab its configuration?
		#while job name not in jobs list as running
		# and worker name not in scheduler worker names
		
		#stop until job is running

		#hash clock time for name

		name = 

		jobRunning = False:
		while jobRunning == False:
			#bjobs

			#is the worker running

			#grab IP adress of the process




		while w.worker_address not in self.scheduler.worker_info:
			yield gen.sleep(0.01)

		#store job, worker address to dictionary
		self.workers.append(w)

		if w.status == 'closed':
			self.workers.remove(w)
			raise gen.TimeoutError("Worker failed to start")


		raise gen.Return(w)
Example #9
0
 async def _wait_for_workers(self):
     while {
             str(d["name"])
             for d in (
                 await self.scheduler_comm.identity())["workers"].values()
     } != set(map(str, self.workers)):
         if (any(w.status == "closed" for w in self.workers.values())
                 and self.scheduler.status == "running"):
             raise gen.TimeoutError("Worker unexpectedly closed")
         await asyncio.sleep(0.1)
Example #10
0
 async def _wait_for_workers(self):
     # TODO: this function needs to query scheduler and worker state
     # remotely without assuming that they are local
     while {
             d["name"]
             for d in self.scheduler.identity()["workers"].values()
     } != set(self.workers):
         if (any(w.status == "closed" for w in self.workers.values())
                 and self.scheduler.status == "running"):
             raise gen.TimeoutError("Worker unexpectedly closed")
         await asyncio.sleep(0.1)
Example #11
0
def sync(loop, func, *args, callback_timeout=None, **kwargs):
    """
    Run coroutine in loop running in separate thread.
    """
    # Tornado's PollIOLoop doesn't raise when using closed, do it ourselves
    if PollIOLoop and (
        (isinstance(loop, PollIOLoop) and getattr(loop, "_closing", False)) or
        (hasattr(loop, "asyncio_loop") and loop.asyncio_loop._closed)):
        raise RuntimeError("IOLoop is closed")
    try:
        if loop.asyncio_loop.is_closed():  # tornado 6
            raise RuntimeError("IOLoop is closed")
    except AttributeError:
        pass

    e = threading.Event()
    main_tid = threading.get_ident()
    result = [None]
    error = [False]

    @gen.coroutine
    def f():
        try:
            if main_tid == threading.get_ident():
                raise RuntimeError("sync() called from thread of running loop")
            yield gen.moment
            thread_state.asynchronous = True
            future = func(*args, **kwargs)
            if callback_timeout is not None:
                future = gen.with_timeout(timedelta(seconds=callback_timeout),
                                          future)
            result[0] = yield future
        except Exception as exc:
            error[0] = sys.exc_info()
        finally:
            thread_state.asynchronous = False
            e.set()

    loop.add_callback(f)
    if callback_timeout is not None:
        if not e.wait(callback_timeout):
            raise gen.TimeoutError("timed out after %s s." %
                                   (callback_timeout, ))
    else:
        while not e.is_set():
            e.wait(10)
    if error[0]:
        typ, exc, tb = error[0]
        raise exc.with_traceback(tb)
    else:
        return result[0]
    def _get(self, timeout=None):
        if timeout is not None:
            timeout = datetime.timedelta(seconds=timeout)
        start = datetime.datetime.now()
        while not self.buffer:
            if timeout is not None:
                timeout2 = timeout - (datetime.datetime.now() - start)
                if timeout2.total_seconds() < 0:
                    raise gen.TimeoutError()
            else:
                timeout2 = None
            yield self.condition.wait(timeout=timeout2)

        raise gen.Return(self.buffer.popleft())
Example #13
0
def sync(loop, func, *args, **kwargs):
    """
    Run coroutine in loop running in separate thread.
    """
    # Tornado's PollIOLoop doesn't raise when using closed, do it ourselves
    if ((isinstance(loop, PollIOLoop) and getattr(loop, '_closing', False))
            or (hasattr(loop, 'asyncio_loop') and loop.asyncio_loop._closed)):
        raise RuntimeError("IOLoop is closed")

    timeout = kwargs.pop('callback_timeout', None)

    def make_coro():
        coro = gen.maybe_future(func(*args, **kwargs))
        if timeout is None:
            return coro
        else:
            return gen.with_timeout(timedelta(seconds=timeout), coro)

    e = threading.Event()
    main_tid = get_thread_identity()
    result = [None]
    error = [False]

    @gen.coroutine
    def f():
        try:
            if main_tid == get_thread_identity():
                raise RuntimeError("sync() called from thread of running loop")
            yield gen.moment
            thread_state.asynchronous = True
            result[0] = yield make_coro()
        except Exception as exc:
            logger.exception(exc)
            error[0] = sys.exc_info()
        finally:
            thread_state.asynchronous = False
            e.set()

    loop.add_callback(f)
    if timeout is not None:
        if not e.wait(timeout):
            raise gen.TimeoutError("timed out after %s s." % (timeout, ))
    else:
        while not e.is_set():
            e.wait(10)
    if error[0]:
        six.reraise(*error[0])
    else:
        return result[0]
Example #14
0
 def _wait_until_started(self):
     delay = 0.05
     while True:
         if self.status != 'starting':
             return
         try:
             msg = self.init_result_q.get_nowait()
             if msg != 'started':
                 logger.warn("Nanny got unexpected message %s. "
                             "Starting worker again", msg)
                 raise gen.TimeoutError()
             return
         except Empty:
             yield gen.sleep(delay)
             continue
Example #15
0
def test_app_poll(disp, mocker):
    stop_side_effect = [True for _ in running_apps]
    stop_side_effect.append(True)  # reset state message
    stop_side_effect.append(False)

    mocker.patch.object(burlak.LoopSentry,
                        'should_run',
                        side_effect=stop_side_effect)

    disp.input_queue.get = mocker.Mock(
        side_effect=[make_future(gen.TimeoutError()) for _ in running_apps])

    disp.node_service.list = mocker.Mock(
        side_effect=[make_mock_channel_with(d.keys()) for d in running_apps])

    def slaves_count(app):
        for apps in running_apps:
            for a in apps:
                if a == app:
                    return apps[a]

    def info_mock(app, flags=None):
        count = slaves_count(app)
        ans = dict(pool=dict(slaves={app: 'dummy_info'
                                     for _ in xrange(count)}))

        return make_mock_channel_with(ans)

    def check_workers_mismatch(state, workers_count):
        for d in running_apps:
            if d == workers_count:
                return True
        return False

    disp.node_service.info = mocker.Mock(side_effect=info_mock)
    disp.workers_diff = mocker.Mock(side_effect=check_workers_mismatch)

    control_filter = dict(apply_control=True, white_list=[])
    control_filter = ControlFilter.from_dict(control_filter)

    yield disp.filter_queue.put(burlak.ControlFilterMessage(control_filter))

    yield disp.process_loop()

    assert disp.workers_diff.call_count == len(running_apps)

    for d in running_apps:
        assert disp.workers_diff.called_with(dict(), d)
Example #16
0
 def get(self, stream=None, name=None, client=None, timeout=None):
     start = time()
     while name not in self.variables:
         if timeout is not None:
             left = timeout - (time() - start)
         else:
             left = None
         if left and left < 0:
             raise gen.TimeoutError()
         yield self.started.wait(timeout=left)
     record = self.variables[name]
     if record['type'] == 'Future':
         token = uuid.uuid4().hex
         record['token'] = token
         self.waiting[record['value'], name].add(token)
     raise gen.Return(record)
Example #17
0
 async def get(self, stream=None, name=None, client=None, timeout=None):
     start = time()
     while name not in self.variables:
         if timeout is not None:
             left = timeout - (time() - start)
         else:
             left = None
         if left and left < 0:
             raise gen.TimeoutError()
         await self.started.wait(timeout=left)
     record = self.variables[name]
     if record["type"] == "Future":
         key = record["value"]
         token = uuid.uuid4().hex
         ts = self.scheduler.tasks.get(key)
         state = ts.state if ts is not None else "lost"
         msg = {"token": token, "state": state}
         if state == "erred":
             msg["exception"] = ts.exception_blame.exception
             msg["traceback"] = ts.exception_blame.traceback
         record = merge(record, msg)
         self.waiting[key, name].add(token)
     return record
Example #18
0
 def cmd_timeout(self, ft, cmd):
     ft.set_exception(
         gen.TimeoutError(
             'Timeout in waiting response of command: {}'.format(str(cmd))))
Example #19
0
 def _timeout_callback(self, fut):
     self._ioloop.remove_handler(self._fileno)
     fut.set_exception(gen.TimeoutError())
 def on_timeout():
     future.set_exception(gen.TimeoutError())
Example #21
0
 def on_timeout():
     waiter.set_exception(gen.TimeoutError())
     self._garbage_collect()
Example #22
0
 def on_timeout() -> None:
     if not waiter.done():
         waiter.set_exception(gen.TimeoutError())
     self._garbage_collect()
Example #23
0
 def __error_callback(self, future):
     future.set_exception(gen.TimeoutError("Timeout"))
Example #24
0
 def on_timeout():
     if not future.done():
         future.set_exception(gen.TimeoutError())
Example #25
0
    def _kill(self, comm=None, timeout=10):
        """ Kill the local worker process

        Blocks until both the process is down and the scheduler is properly
        informed
        """
        timeout_time = time() + timeout

        while not self.worker_address:
            yield gen.sleep(0.1)
            if time() > timeout_time:
                raise gen.TimeoutError()

        if self.process is None:
            raise gen.Return('OK')

        should_watch, self.should_watch = self.should_watch, False

        if isalive(self.process):
            try:
                # Ask worker to close
                with rpc(self.worker_address) as worker:
                    result = yield gen.with_timeout(
                        timedelta(seconds=min(1, timeout)),
                        worker.terminate(report=False),
                    )

            except gen.TimeoutError:
                logger.info("Worker non-responsive.  Terminating.")
            except CommClosedError:
                pass
            except BaseException as e:
                if (not self.loop._closing and self.loop._running
                        and not self.loop._stopped):
                    logger.exception(e)

            allowed_errors = (gen.TimeoutError, CommClosedError,
                              EnvironmentError, RPCClosed)
            try:
                # Tell scheduler that worker is gone
                result = yield gen.with_timeout(
                    timedelta(seconds=timeout),
                    self.scheduler.unregister(address=self.worker_address),
                    quiet_exceptions=allowed_errors)
                if result not in ('OK', 'already-removed'):
                    logger.critical(
                        "Unable to unregister with scheduler %s. "
                        "Nanny: %s, Worker: %s", result, self.address,
                        self.worker_address)
                else:
                    logger.info("Unregister worker %r from scheduler",
                                self.worker_address)
            except allowed_errors as e:
                # Maybe the scheduler is gone, or it is unresponsive
                logger.warn("Nanny %r failed to unregister worker %r: %s",
                            self.address, self.worker_address, e)
            except Exception as e:
                logger.exception(e)

        if self.process:
            with ignoring(OSError):
                self.process.terminate()
            join(self.process, timeout)
            processes_to_close.discard(self.process)

            start = time()
            while isalive(self.process) and time() < start + timeout:
                sleep(0.01)

            self.process = None
            self.cleanup()
            logger.info("Nanny %r kills worker process %r", self.address,
                        self.worker_address)

        self.should_watch = should_watch
        return