Example #1
0
async def progress_stream(address, interval):
    """Open a TCP connection to scheduler, receive progress messages

    The messages coming back are dicts containing counts of key groups::

        {'inc': {'all': 5, 'memory': 2, 'erred': 0, 'released': 1},
         'dec': {'all': 1, 'memory': 0, 'erred': 0, 'released': 0}}

    Parameters
    ----------
    address: address of scheduler
    interval: time between batches, in seconds

    Examples
    --------
    >>> stream = await eventstream('127.0.0.1:8786', 0.100)  # doctest: +SKIP
    >>> print(await read(stream))  # doctest: +SKIP
    """
    address = coerce_to_address(address)
    comm = await connect(address)
    await comm.write({
        "op": "feed",
        "setup": dumps_function(AllProgress),
        "function": dumps_function(counts),
        "interval": interval,
        "teardown": dumps_function(_remove_all_progress_plugin),
    })
    return comm
Example #2
0
def processing():
    with log_errors():
        from distributed.diagnostics.scheduler import processing
        addr = coerce_to_address((options['host'], options['tcp-port']))
        comm = yield connect(addr)
        yield comm.write({'op': 'feed',
                          'function': dumps(processing),
                          'interval': 0.200})
        while True:
            try:
                msg = yield comm.read()
            except CommClosedError:
                break
            else:
                messages['processing'] = msg
Example #3
0
def test_gather_then_submit_after_failed_workers(loop):
    with cluster(nworkers=4) as (s, [w, x, y, z]):
        with Client(('127.0.0.1', s['port']), loop=loop) as c:
            L = c.map(inc, range(20))
            wait(L)
            w['proc'].terminate()
            total = c.submit(sum, L)
            wait([total])

            addr = c.who_has()[total.key][0]
            _, port = coerce_to_address(addr, out=tuple)
            for d in [x, y, z]:
                if d['port'] == port:
                    d['proc'].terminate()
                    break
            else:
                assert 0, "Could not find worker"

            result = c.gather([total])
            assert result == [sum(map(inc, range(20)))]
def test_gather_then_submit_after_failed_workers(loop):
    with cluster(nworkers=4) as (s, [w, x, y, z]):
        with Client(('127.0.0.1', s['port']), loop=loop) as c:
            L = c.map(inc, range(20))
            wait(L)
            w['proc'].terminate()
            total = c.submit(sum, L)
            wait([total])

            addr = c.who_has()[total.key][0]
            _, port = coerce_to_address(addr, out=tuple)
            for d in [x, y, z]:
                if d['port'] == port:
                    d['proc'].terminate()
                    break
            else:
                assert 0, "Could not find worker"

            result = c.gather([total])
            assert result == [sum(map(inc, range(20)))]
Example #5
0
async def eventstream(address, interval):
    """Open a TCP connection to scheduler, receive batched task messages

    The messages coming back are lists of dicts.  Each dict is of the following
    form::

        {'key': 'mykey', 'worker': 'host:port', 'status': status,
         'compute_start': time(), 'compute_stop': time(),
         'transfer_start': time(), 'transfer_stop': time(),
         'disk_load_start': time(), 'disk_load_stop': time(),
         'other': 'junk'}

    Where ``status`` is either 'OK', or 'error'

    Parameters
    ----------
    address: address of scheduler
    interval: time between batches, in seconds

    Examples
    --------
    >>> stream = await eventstream('127.0.0.1:8786', 0.100)  # doctest: +SKIP
    >>> print(await read(stream))  # doctest: +SKIP
    [{'key': 'x', 'status': 'OK', 'worker': '192.168.0.1:54684', ...},
     {'key': 'y', 'status': 'error', 'worker': '192.168.0.1:54684', ...}]
    """
    address = coerce_to_address(address)
    comm = await connect(address)
    await comm.write({
        "op": "feed",
        "setup": dumps_function(EventStream),
        "function": dumps_function(swap_buffer),
        "interval": interval,
        "teardown": dumps_function(teardown),
    })
    return comm
Example #6
0
def test_coerce_to_address():
    for arg in ["127.0.0.1:8786", ("127.0.0.1", 8786), ("127.0.0.1", "8786")]:
        assert coerce_to_address(arg) == "tcp://127.0.0.1:8786"
Example #7
0
def test_coerce_to_address():
    for arg in [b'127.0.0.1:8786',
                '127.0.0.1:8786',
                ('127.0.0.1', 8786),
                ('127.0.0.1', '8786')]:
        assert coerce_to_address(arg) == '127.0.0.1:8786'
Example #8
0
def test_coerce_to_address():
    for arg in [b'127.0.0.1:8786',
                '127.0.0.1:8786',
                ('127.0.0.1', 8786),
                ('127.0.0.1', '8786')]:
        assert coerce_to_address(arg) == '127.0.0.1:8786'
Example #9
0
    def __init__(
        self,
        scheduler_ip=None,
        scheduler_port=None,
        scheduler_file=None,
        worker_port=0,
        nthreads=None,
        loop=None,
        local_dir=None,
        local_directory=None,
        services=None,
        name=None,
        memory_limit="auto",
        reconnect=True,
        validate=False,
        quiet=False,
        resources=None,
        silence_logs=None,
        death_timeout=None,
        preload=None,
        preload_argv=None,
        preload_nanny=None,
        preload_nanny_argv=None,
        security=None,
        contact_address=None,
        listen_address=None,
        worker_class=None,
        env=None,
        interface=None,
        host=None,
        port=None,
        protocol=None,
        config=None,
        **worker_kwargs,
    ):
        self._setup_logging(logger)
        self.loop = loop or IOLoop.current()

        if isinstance(security, dict):
            security = Security(**security)
        self.security = security or Security()
        assert isinstance(self.security, Security)
        self.connection_args = self.security.get_connection_args("worker")

        if local_dir is not None:
            warnings.warn("The local_dir keyword has moved to local_directory")
            local_directory = local_dir

        if local_directory is None:
            local_directory = dask.config.get(
                "temporary-directory") or os.getcwd()
            self._original_local_dir = local_directory
            local_directory = os.path.join(local_directory,
                                           "dask-worker-space")
        else:
            self._original_local_dir = local_directory

        self.local_directory = local_directory
        if not os.path.exists(self.local_directory):
            os.makedirs(self.local_directory, exist_ok=True)

        self.preload = preload
        if self.preload is None:
            self.preload = dask.config.get("distributed.worker.preload")
        self.preload_argv = preload_argv
        if self.preload_argv is None:
            self.preload_argv = dask.config.get(
                "distributed.worker.preload-argv")

        if preload_nanny is None:
            preload_nanny = dask.config.get("distributed.nanny.preload")
        if preload_nanny_argv is None:
            preload_nanny_argv = dask.config.get(
                "distributed.nanny.preload-argv")

        self.preloads = preloading.process_preloads(
            self,
            preload_nanny,
            preload_nanny_argv,
            file_dir=self.local_directory)

        if scheduler_file:
            cfg = json_load_robust(scheduler_file)
            self.scheduler_addr = cfg["address"]
        elif scheduler_ip is None and dask.config.get("scheduler-address"):
            self.scheduler_addr = dask.config.get("scheduler-address")
        elif scheduler_port is None:
            self.scheduler_addr = coerce_to_address(scheduler_ip)
        else:
            self.scheduler_addr = coerce_to_address(
                (scheduler_ip, scheduler_port))

        if protocol is None:
            protocol_address = self.scheduler_addr.split("://")
            if len(protocol_address) == 2:
                protocol = protocol_address[0]

        self._given_worker_port = worker_port
        self.nthreads = nthreads or CPU_COUNT
        self.reconnect = reconnect
        self.validate = validate
        self.resources = resources
        self.death_timeout = parse_timedelta(death_timeout)

        self.Worker = Worker if worker_class is None else worker_class
        config_environ = dask.config.get("distributed.nanny.environ", {})
        if not isinstance(config_environ, dict):
            raise TypeError(
                "distributed.nanny.environ configuration must be of type dict. "
                f"Instead got {type(config_environ)}")
        self.env = config_environ.copy()
        for k in self.env:
            if k in os.environ:
                self.env[k] = os.environ[k]
        if env:
            self.env.update(env)
        self.env = {k: str(v) for k, v in self.env.items()}
        self.config = config or dask.config.config
        worker_kwargs.update({
            "port": worker_port,
            "interface": interface,
            "protocol": protocol,
            "host": host,
        })
        self.worker_kwargs = worker_kwargs

        self.contact_address = contact_address

        self.services = services
        self.name = name
        self.quiet = quiet
        self.auto_restart = True

        if silence_logs:
            silence_logging(level=silence_logs)
        self.silence_logs = silence_logs

        handlers = {
            "instantiate": self.instantiate,
            "kill": self.kill,
            "restart": self.restart,
            # cannot call it 'close' on the rpc side for naming conflict
            "get_logs": self.get_logs,
            "terminate": self.close,
            "close_gracefully": self.close_gracefully,
            "run": self.run,
            "plugin_add": self.plugin_add,
            "plugin_remove": self.plugin_remove,
        }

        self.plugins: dict[str, NannyPlugin] = {}

        super().__init__(handlers=handlers,
                         io_loop=self.loop,
                         connection_args=self.connection_args)

        self.scheduler = self.rpc(self.scheduler_addr)
        self.memory_manager = NannyMemoryManager(self,
                                                 memory_limit=memory_limit)

        if (not host and not interface
                and not self.scheduler_addr.startswith("inproc://")):
            host = get_ip(get_address_host(self.scheduler.address))

        self._start_port = port
        self._start_host = host
        self._interface = interface
        self._protocol = protocol

        self._listen_address = listen_address
        Nanny._instances.add(self)
        self.status = Status.init
Example #10
0
def get_scheduler(scheduler):
    if scheduler is None:
        return default_client().scheduler.address
    return coerce_to_address(scheduler)