Exemple #1
0
    def __init__(self,
                 device_memory_limit=None,
                 memory_limit=None,
                 local_dir="dask-worker-space"):
        path = os.path.join(local_dir, "storage")

        self.host_func = dict()
        self.disk_func = Func(partial(serialize_bytes, on_error="raise"),
                              deserialize_bytes, File(path))
        self.host_buffer = Buffer(self.host_func,
                                  self.disk_func,
                                  memory_limit,
                                  weight=weight)

        self.device_func = dict()
        self.device_host_func = Func(_serialize_if_device,
                                     _deserialize_if_device, self.host_buffer)
        self.device_buffer = Buffer(self.device_func,
                                    self.device_host_func,
                                    device_memory_limit,
                                    weight=weight)

        self.device = self.device_buffer.fast.d
        self.host = self.host_buffer.fast.d
        self.disk = self.host_buffer.slow.d

        # For Worker compatibility only, where `fast` is host memory buffer
        self.fast = self.host_buffer.fast
Exemple #2
0
    def __init__(
        self,
        device_memory_limit=None,
        memory_limit=None,
        local_directory="dask-worker-space",
    ):
        path = os.path.join(local_directory, "storage")

        self.host_func = dict()
        self.disk_func = Func(serialize_bytelist, deserialize_bytes,
                              File(path))
        self.host_buffer = Buffer(self.host_func,
                                  self.disk_func,
                                  memory_limit,
                                  weight=weight)

        self.device_keys = set()
        self.device_func = dict()
        self.device_host_func = Func(device_to_host, host_to_device,
                                     self.host_buffer)
        self.device_buffer = Buffer(self.device_func,
                                    self.device_host_func,
                                    device_memory_limit,
                                    weight=weight)

        self.device = self.device_buffer.fast.d
        self.host = self.host_buffer.fast.d
        self.disk = self.host_buffer.slow.d

        # For Worker compatibility only, where `fast` is host memory buffer
        self.fast = self.host_buffer.fast
Exemple #3
0
def test_simple():
    a = dict()
    b = dict()
    buff = Buffer(a, b, n=10, weight=lambda k, v: v)

    buff["x"] = 1
    buff["y"] = 2

    assert buff["x"] == 1
    assert buff["y"] == 2
    assert a == {"x": 1, "y": 2}
    assert buff.fast.total_weight == 3

    buff["z"] = 8
    assert a == {"y": 2, "z": 8}
    assert b == {"x": 1}

    assert buff["x"] == 1
    assert a == {"x": 1, "z": 8}
    assert b == {"y": 2}

    assert "x" in buff
    assert "y" in buff
    assert "missing" not in buff

    buff["y"] = 1
    assert a == {"x": 1, "y": 1, "z": 8}
    assert buff.fast.total_weight == 10
    assert b == {}

    del buff["z"]
    assert a == {"x": 1, "y": 1}
    assert buff.fast.total_weight == 2
    assert b == {}

    del buff["y"]
    assert a == {"x": 1}
    assert buff.fast.total_weight == 1
    assert b == {}

    assert "y" not in buff

    buff["a"] = 5
    assert set(buff) == set(buff.keys()) == {"a", "x"}

    fast_keys = set(buff.fast)
    slow_keys = set(buff.slow)
    assert not (fast_keys & slow_keys)
    assert fast_keys | slow_keys == set(buff)

    # Overweight element stays in slow mapping
    buff["b"] = 1000
    assert "b" in buff.slow
    assert set(buff.fast) == fast_keys
    assert set(buff.slow) == {"b"} | slow_keys
    assert "b" in buff
    assert buff["b"] == 1000
Exemple #4
0
def test_simple():
    a = dict()
    b = dict()
    buff = Buffer(a, b, n=10, weight=lambda k, v: v)

    buff['x'] = 1
    buff['y'] = 2

    assert buff['x'] == 1
    assert buff['y'] == 2
    assert a == {'x': 1, 'y': 2}
    assert buff.fast.total_weight == 3

    buff['z'] = 8
    assert a == {'y': 2, 'z': 8}
    assert b == {'x': 1}

    assert buff['x'] == 1
    assert a == {'x': 1, 'z': 8}
    assert b == {'y': 2}

    assert 'x' in buff
    assert 'y' in buff
    assert 'missing' not in buff

    buff['y'] = 1
    assert a == {'x': 1, 'y': 1, 'z': 8}
    assert buff.fast.total_weight == 10
    assert b == {}

    del buff['z']
    assert a == {'x': 1, 'y': 1}
    assert buff.fast.total_weight == 2
    assert b == {}

    del buff['y']
    assert a == {'x': 1}
    assert buff.fast.total_weight == 1
    assert b == {}

    assert 'y' not in buff

    buff['a'] = 5
    assert set(buff) == set(buff.keys()) == {'a', 'x'}

    fast_keys = set(buff.fast)
    slow_keys = set(buff.slow)
    assert not (fast_keys & slow_keys)
    assert fast_keys | slow_keys == set(buff)

    # Overweight element stays in slow mapping
    buff['b'] = 1000
    assert 'b' in buff.slow
    assert set(buff.fast) == fast_keys
    assert set(buff.slow) == {'b'} | slow_keys
    assert 'b' in buff
    assert buff['b'] == 1000
    def __init__(
        self,
        device_memory_limit=None,
        memory_limit=None,
        local_directory=None,
        log_spilling=False,
    ):
        self.disk_func_path = os.path.join(
            local_directory or dask.config.get("temporary-directory") or os.getcwd(),
            "dask-worker-space",
            "storage",
        )
        os.makedirs(self.disk_func_path, exist_ok=True)

        self.host_func = dict()
        self.disk_func = Func(
            functools.partial(serialize_bytelist, on_error="raise"),
            deserialize_bytes,
            File(self.disk_func_path),
        )

        host_buffer_kwargs = {}
        device_buffer_kwargs = {}
        buffer_class = Buffer
        if log_spilling is True:
            buffer_class = LoggedBuffer
            host_buffer_kwargs = {"fast_name": "Host", "slow_name": "Disk"}
            device_buffer_kwargs = {"fast_name": "Device", "slow_name": "Host"}

        if memory_limit == 0:
            self.host_buffer = self.host_func
        else:
            self.host_buffer = buffer_class(
                self.host_func,
                self.disk_func,
                memory_limit,
                weight=lambda k, v: safe_sizeof(v),
                **host_buffer_kwargs,
            )

        self.device_keys = set()
        self.device_func = dict()
        self.device_host_func = Func(device_to_host, host_to_device, self.host_buffer)
        self.device_buffer = Buffer(
            self.device_func,
            self.device_host_func,
            device_memory_limit,
            weight=lambda k, v: safe_sizeof(v),
            **device_buffer_kwargs,
        )

        self.device = self.device_buffer.fast.d
        self.host = self.host_buffer if memory_limit == 0 else self.host_buffer.fast.d
        self.disk = None if memory_limit == 0 else self.host_buffer.slow.d

        # For Worker compatibility only, where `fast` is host memory buffer
        self.fast = self.host_buffer if memory_limit == 0 else self.host_buffer.fast
Exemple #6
0
def test_simple():
    a = dict()
    b = dict()
    buff = Buffer(a, b, n=10, weight=lambda k, v: v)

    buff['x'] = 1
    buff['y'] = 2

    assert buff['x'] == 1
    assert buff['y'] == 2
    assert a == {'x': 1, 'y': 2}
    assert buff.fast.total_weight == 3

    buff['z'] = 8
    assert a == {'y': 2, 'z': 8}
    assert b == {'x': 1}

    assert buff['x'] == 1
    assert a == {'x': 1, 'z': 8}
    assert b == {'y': 2}

    assert 'x' in buff
    assert 'y' in buff
    assert 'missing' not in buff

    buff['y'] = 1
    assert a == {'x': 1, 'y': 1, 'z': 8}
    assert buff.fast.total_weight == 10
    assert b == {}

    del buff['z']
    assert a == {'x': 1, 'y': 1}
    assert buff.fast.total_weight == 2
    assert b == {}

    del buff['y']
    assert a == {'x': 1}
    assert buff.fast.total_weight == 1
    assert b == {}

    assert 'y' not in buff

    buff['a'] = 5
    assert set(buff) == set(buff.keys()) == {'a', 'x'}

    fast_keys = set(buff.fast)
    slow_keys = set(buff.slow)
    assert not (fast_keys & slow_keys)
    assert fast_keys | slow_keys == set(buff)

    # Overweight element stays in slow mapping
    buff['b'] = 1000
    assert 'b' in buff.slow
    assert set(buff.fast) == fast_keys
    assert set(buff.slow) == {'b'} | slow_keys
    assert 'b' in buff
    assert buff['b'] == 1000
Exemple #7
0
    def __init__(
        self,
        device_memory_limit=None,
        memory_limit=None,
        local_directory=None,
        jit_unspill=False,
    ):
        if local_directory is None:
            local_directory = dask.config.get(
                "temporary-directory") or os.getcwd()

        if not os.path.exists(local_directory):
            os.makedirs(local_directory, exist_ok=True)
        local_directory = os.path.join(local_directory, "dask-worker-space")

        self.disk_func_path = os.path.join(local_directory, "storage")

        self.host_func = dict()
        self.disk_func = Func(
            functools.partial(serialize_bytelist, on_error="raise"),
            deserialize_bytes,
            File(self.disk_func_path),
        )
        if memory_limit == 0:
            self.host_buffer = self.host_func
        else:
            self.host_buffer = Buffer(self.host_func,
                                      self.disk_func,
                                      memory_limit,
                                      weight=weight)

        self.device_keys = set()
        self.device_func = dict()
        if jit_unspill:
            self.device_host_func = Func(pxy_obj_device_to_host,
                                         pxy_obj_host_to_device,
                                         self.host_buffer)
        else:
            self.device_host_func = Func(device_to_host, host_to_device,
                                         self.host_buffer)
        self.device_buffer = Buffer(self.device_func,
                                    self.device_host_func,
                                    device_memory_limit,
                                    weight=weight)

        self.device = self.device_buffer.fast.d
        self.host = self.host_buffer if memory_limit == 0 else self.host_buffer.fast.d
        self.disk = None if memory_limit == 0 else self.host_buffer.slow.d

        # For Worker compatibility only, where `fast` is host memory buffer
        self.fast = self.host_buffer if memory_limit == 0 else self.host_buffer.fast
Exemple #8
0
def test_mapping():
    """
    Test mapping interface for Buffer().
    """
    a = {}
    b = {}
    buff = Buffer(a, b, n=2)
    utils_test.check_mapping(buff)
    utils_test.check_closing(buff)
Exemple #9
0
def test_simple():
    a = dict()
    b = dict()
    buff = Buffer(a, b, n=10, weight=lambda k, v: v)

    buff['x'] = 1
    buff['y'] = 2

    assert buff['x'] == 1
    assert buff['y'] == 2
    assert a == {'x': 1, 'y': 2}
    assert buff.fast.total_weight == 3

    buff['z'] = 8
    assert a == {'y': 2, 'z': 8}
    assert b == {'x': 1}

    assert buff['x'] == 1
    assert a == {'x': 1, 'z': 8}
    assert b == {'y': 2}

    assert 'x' in buff
    assert 'y' in buff
    assert 'missing' not in buff

    del buff['z']
    assert a == {'x': 1}
    assert b == {'y': 2}

    del buff['y']
    assert a == {'x': 1}
    assert b == {}

    assert 'y' not in buff

    buff['a'] = 5
    assert set(buff) == set(buff.keys()) == {'a', 'x'}

    fast_keys = set(buff.fast)
    buff['b'] = 1000
    assert 'b' in buff.slow
    assert set(buff.fast) == fast_keys
Exemple #10
0
    def __init__(
        self,
        device_memory_limit=None,
        memory_limit=None,
        local_directory=None,
    ):
        if local_directory is None:
            local_directory = dask.config.get(
                "temporary-directory") or os.getcwd()
            os.makedirs(local_directory, exist_ok=True)
            local_directory = os.path.join(local_directory,
                                           "dask-worker-space")

        self.disk_func_path = os.path.join(local_directory, "storage")

        self.host_func = dict()
        self.disk_func = Func(serialize_bytelist, deserialize_bytes,
                              File(self.disk_func_path))
        self.host_buffer = Buffer(self.host_func,
                                  self.disk_func,
                                  memory_limit,
                                  weight=weight)

        self.device_keys = set()
        self.device_func = dict()
        self.device_host_func = Func(device_to_host, host_to_device,
                                     self.host_buffer)
        self.device_buffer = Buffer(self.device_func,
                                    self.device_host_func,
                                    device_memory_limit,
                                    weight=weight)

        self.device = self.device_buffer.fast.d
        self.host = self.host_buffer.fast.d
        self.disk = self.host_buffer.slow.d

        # For Worker compatibility only, where `fast` is host memory buffer
        self.fast = self.host_buffer.fast
Exemple #11
0
 def setup(self, worker):
     self.cache = Buffer(
         fast={},
         slow=Func(
             dump=blosc.pack_array,
             load=blosc.unpack_array,
             d=Buffer(
                 fast={},
                 slow=LRU(
                     n=self._maxdisk,
                     d=File(os.path.join(worker.local_directory, 'cache')),
                     weight=lambda k, v: len(v),
                 ),
                 n=self._maxcompressed,
                 weight=lambda k, v: len(v),
             ),
         ),
         n=self._maxmem,
         weight=lambda k, v: v.nbytes,
     )
     self.lock = Lock()
     self.hits = 0
     self.misses = 0
Exemple #12
0
def test_callbacks():
    f2s = []

    def f2s_cb(k, v):
        f2s.append(k)

    s2f = []

    def s2f_cb(k, v):
        s2f.append(k)

    a = dict()
    b = dict()
    buff = Buffer(
        a,
        b,
        n=10,
        weight=lambda k, v: v,
        fast_to_slow_callbacks=f2s_cb,
        slow_to_fast_callbacks=s2f_cb,
    )

    buff["x"] = 1
    buff["y"] = 2

    assert buff["x"] == 1
    assert buff["y"] == 2
    assert not f2s
    assert not s2f

    buff["z"] = 8

    assert f2s == ["x"]
    assert s2f == []
    buff["z"]

    assert f2s == ["x"]
    assert s2f == []

    buff["x"]
    assert f2s == ["x", "y"]
    assert s2f == ["x"]
Exemple #13
0
def test_setitem_avoid_fast_slow_duplicate():

    a = dict()
    b = dict()
    buff = Buffer(a, b, n=10, weight=lambda k, v: v)
    for first, second in [(1, 12), (12, 1)]:
        buff["a"] = first
        assert buff["a"] == first
        buff["a"] = second
        assert buff["a"] == second

        fast_keys = set(buff.fast)
        slow_keys = set(buff.slow)
        assert not (fast_keys & slow_keys)
        assert fast_keys | slow_keys == set(buff)

        del buff["a"]
        assert "a" not in buff
        assert "a" not in a
        assert "a" not in b
Exemple #14
0
class Worker(Server):
    """ Worker Node

    Workers perform two functions:

    1.  **Serve data** from a local dictionary
    2.  **Perform computation** on that data and on data from peers

    Additionally workers keep a scheduler informed of their data and use that
    scheduler to gather data from other workers when necessary to perform a
    computation.

    You can start a worker with the ``dask-worker`` command line application::

        $ dask-worker scheduler-ip:port

    **State**

    * **data:** ``{key: object}``:
        Dictionary mapping keys to actual values
    * **active:** ``{key}``:
        Set of keys currently under computation
    * **ncores:** ``int``:
        Number of cores used by this worker process
    * **executor:** ``concurrent.futures.ThreadPoolExecutor``:
        Executor used to perform computation
    * **local_dir:** ``path``:
        Path on local machine to store temporary files
    * **scheduler:** ``rpc``:
        Location of scheduler.  See ``.ip/.port`` attributes.
    * **name:** ``string``:
        Alias
    * **services:** ``{str: Server}``:
        Auxiliary web servers running on this worker
    * **service_ports:** ``{str: port}``:

    Examples
    --------

    Create schedulers and workers in Python:

    >>> from distributed import Scheduler, Worker
    >>> c = Scheduler('192.168.0.100', 8786)  # doctest: +SKIP
    >>> w = Worker(c.ip, c.port)  # doctest: +SKIP
    >>> yield w._start(port=8786)  # doctest: +SKIP

    Or use the command line::

        $ dask-scheduler
        Start scheduler at 127.0.0.1:8786

        $ dask-worker 127.0.0.1:8786
        Start worker at:               127.0.0.1:8786
        Registered with scheduler at:  127.0.0.1:8787

    See Also
    --------
    distributed.scheduler.Scheduler:
    """

    def __init__(self, scheduler_ip, scheduler_port, ip=None, ncores=None,
                 loop=None, local_dir=None, services=None, service_ports=None,
                 name=None, heartbeat_interval=1000, memory_limit=None, **kwargs):
        self.ip = ip or get_ip()
        self._port = 0
        self.ncores = ncores or _ncores
        self.local_dir = local_dir or tempfile.mkdtemp(prefix='worker-')
        if not os.path.exists(self.local_dir):
            os.mkdir(self.local_dir)
        if memory_limit:
            try:
                from zict import Buffer, File, Func
            except ImportError:
                raise ImportError("Please `pip install zict` for spill-to-disk workers")
            path = os.path.join(self.local_dir, 'storage')
            storage = Func(dumps_to_disk, loads_from_disk, File(path))
            self.data = Buffer({}, storage, int(float(memory_limit)), weight)
        else:
            self.data = dict()
        self.loop = loop or IOLoop.current()
        self.status = None
        self.executor = ThreadPoolExecutor(self.ncores)
        self.scheduler = rpc(ip=scheduler_ip, port=scheduler_port)
        self.active = set()
        self.name = name
        self.heartbeat_interval = heartbeat_interval
        self.heartbeat_active = False
        self.execution_state = {'scheduler': self.scheduler.address,
                                'ioloop': self.loop,
                                'worker': self}
        self._last_disk_io = None
        self._last_net_io = None
        self._ipython_kernel = None

        if self.local_dir not in sys.path:
            sys.path.insert(0, self.local_dir)

        self.services = {}
        self.service_ports = service_ports or {}
        for k, v in (services or {}).items():
            if isinstance(k, tuple):
                k, port = k
            else:
                port = 0

            self.services[k] = v(self, io_loop=self.loop)
            self.services[k].listen(port)
            self.service_ports[k] = self.services[k].port

        handlers = {'compute': self.compute,
                    'gather': self.gather,
                    'compute-stream': self.compute_stream,
                    'run': self.run,
                    'get_data': self.get_data,
                    'update_data': self.update_data,
                    'delete_data': self.delete_data,
                    'terminate': self.terminate,
                    'ping': pingpong,
                    'health': self.host_health,
                    'upload_file': self.upload_file,
                    'start_ipython': self.start_ipython,
                    'keys': self.keys,
                }

        super(Worker, self).__init__(handlers, io_loop=self.loop, **kwargs)

        self.heartbeat_callback = PeriodicCallback(self.heartbeat,
                                                   self.heartbeat_interval,
                                                   io_loop=self.loop)
        self.loop.add_callback(self.heartbeat_callback.start)

    @gen.coroutine
    def heartbeat(self):
        if not self.heartbeat_active:
            self.heartbeat_active = True
            logger.debug("Heartbeat: %s" % self.address)
            try:
                yield self.scheduler.register(address=self.address, name=self.name,
                                        ncores=self.ncores,
                                        now=time(),
                                        info=self.process_health(),
                                        host_info=self.host_health(),
                                        services=self.service_ports,
                                        **self.process_health())
            finally:
                self.heartbeat_active = False
        else:
            logger.debug("Heartbeat skipped: channel busy")

    @gen.coroutine
    def _start(self, port=0):
        self.listen(port)
        self.name = self.name or self.address
        for k, v in self.services.items():
            v.listen(0)
            self.service_ports[k] = v.port

        logger.info('      Start worker at: %20s:%d', self.ip, self.port)
        for k, v in self.service_ports.items():
            logger.info('  %16s at: %20s:%d' % (k, self.ip, v))
        logger.info('Waiting to connect to: %20s:%d',
                    self.scheduler.ip, self.scheduler.port)
        while True:
            try:
                resp = yield self.scheduler.register(
                        ncores=self.ncores, address=(self.ip, self.port),
                        keys=list(self.data),
                        name=self.name, nbytes=valmap(sizeof, self.data),
                        now=time(),
                        host_info=self.host_health(),
                        services=self.service_ports,
                        **self.process_health())
                break
            except (OSError, StreamClosedError):
                logger.debug("Unable to register with scheduler.  Waiting")
                yield gen.sleep(0.5)
        if resp != 'OK':
            raise ValueError(resp)
        logger.info('        Registered to: %20s:%d',
                    self.scheduler.ip, self.scheduler.port)
        self.status = 'running'

    def start(self, port=0):
        self.loop.add_callback(self._start, port)

    def identity(self, stream):
        return {'type': type(self).__name__, 'id': self.id,
                'scheduler': (self.scheduler.ip, self.scheduler.port)}

    @gen.coroutine
    def _close(self, report=True, timeout=10):
        self.heartbeat_callback.stop()
        with ignoring(RPCClosed, StreamClosedError):
            if report:
                yield gen.with_timeout(timedelta(seconds=timeout),
                        self.scheduler.unregister(address=(self.ip, self.port)),
                        io_loop=self.loop)
        self.scheduler.close_rpc()
        self.stop()
        self.executor.shutdown()
        if os.path.exists(self.local_dir):
            shutil.rmtree(self.local_dir)

        for k, v in self.services.items():
            v.stop()
        self.status = 'closed'
        self.stop()

    @gen.coroutine
    def terminate(self, stream, report=True):
        yield self._close(report=report)
        raise Return('OK')

    @property
    def address(self):
        return '%s:%d' % (self.ip, self.port)

    @property
    def address_tuple(self):
        return (self.ip, self.port)

    @gen.coroutine
    def gather(self, stream=None, who_has=None):
        who_has = {k: [coerce_to_address(addr) for addr in v]
                    for k, v in who_has.items()
                    if k not in self.data}
        try:
            result = yield gather_from_workers(who_has)
        except KeyError as e:
            logger.warn("Could not find data", e)
            raise Return({'status': 'missing-data',
                          'keys': e.args})
        else:
            self.data.update(result)
            raise Return({'status': 'OK'})

    def deserialize(self, function=None, args=None, kwargs=None, task=None):
        """ Deserialize task inputs and regularize to func, args, kwargs """
        if task is not None:
            task = loads(task)
        if function is not None:
            function = loads(function)
        if args:
            args = loads(args)
        if kwargs:
            kwargs = loads(kwargs)

        if task is not None:
            assert not function and not args and not kwargs
            function = execute_task
            args = (task,)

        return function, args or (), kwargs or {}

    @gen.coroutine
    def gather_many(self, msgs):
        """ Gather the data for many compute messages at once

        Returns
        -------
        good: the input messages for which we have data
        bad: a dict of task keys for which we could not find data
        data: The scope in which to run tasks
        len(remote): the number of new keys we've gathered
        """
        diagnostics = {}
        who_has = merge(msg['who_has'] for msg in msgs if 'who_has' in msg)

        start = time()
        local = {k: self.data[k] for k in who_has if k in self.data}
        stop = time()
        if stop - start > 0.005:
            diagnostics['disk_load_start'] = start
            diagnostics['disk_load_stop'] = stop

        who_has = {k: v for k, v in who_has.items() if k not in local}
        start = time()
        remote, bad_data = yield gather_from_workers(who_has,
                permissive=True)
        if remote:
            self.data.update(remote)
            yield self.scheduler.add_keys(address=self.address, keys=list(remote))
        stop = time()

        if remote:
            diagnostics['transfer_start'] = start
            diagnostics['transfer_stop'] = stop

        data = merge(local, remote)

        if bad_data:
            missing = {msg['key']: {k for k in msg['who_has'] if k in bad_data}
                        for msg in msgs if 'who_has' in msg}
            bad = {k: v for k, v in missing.items() if v}
            good = [msg for msg in msgs if not missing.get(msg['key'])]
        else:
            good, bad = msgs, {}
        raise Return([good, bad, data, len(remote), diagnostics])

    @gen.coroutine
    def _ready_task(self, function=None, key=None, args=(), kwargs={},
                    task=None, who_has=None):
        who_has = who_has or {}
        diagnostics = {}
        start = time()
        data = {k: self.data[k] for k in who_has if k in self.data}
        stop = time()

        if stop - start > 0.005:
            diagnostics['disk_load_start'] = start
            diagnostics['disk_load_stop'] = stop

        who_has = {k: set(map(coerce_to_address, v))
                   for k, v in who_has.items()
                   if k not in self.data}
        if who_has:
            try:
                logger.info("gather %d keys from peers", len(who_has))
                diagnostics['transfer_start'] = time()
                other = yield gather_from_workers(who_has)
                diagnostics['transfer_stop'] = time()
                self.data.update(other)
                yield self.scheduler.add_keys(address=self.address,
                                              keys=list(other))
                data.update(other)
            except KeyError as e:
                logger.warn("Could not find data for %s", key)
                raise Return({'status': 'missing-data',
                              'keys': e.args,
                              'key': key})

        try:
            start = default_timer()
            function, args, kwargs = self.deserialize(function, args, kwargs,
                    task)
            diagnostics['deserialization'] = default_timer() - start
        except Exception as e:
            logger.warn("Could not deserialize task", exc_info=True)
            emsg = error_message(e)
            emsg['key'] = key
            raise Return(emsg)

        # Fill args with data
        args2 = pack_data(args, data)
        kwargs2 = pack_data(kwargs, data)

        raise Return({'status': 'OK',
                      'function': function,
                      'args': args2,
                      'kwargs': kwargs2,
                      'diagnostics': diagnostics,
                      'key': key})

    @gen.coroutine
    def executor_submit(self, key, function, *args, **kwargs):
        """ Safely run function in thread pool executor

        We've run into issues running concurrent.future futures within
        tornado.  Apparently it's advantageous to use timeouts and periodic
        callbacks to ensure things run smoothly.  This can get tricky, so we
        pull it off into an separate method.
        """
        job_counter[0] += 1
        # logger.info("%s:%d Starts job %d, %s", self.ip, self.port, i, key)
        future = self.executor.submit(function, *args, **kwargs)
        pc = PeriodicCallback(lambda: logger.debug("future state: %s - %s",
            key, future._state), 1000, io_loop=self.loop); pc.start()
        try:
            yield future
        finally:
            pc.stop()
            pass

        result = future.result()

        # logger.info("Finish job %d, %s", i, key)
        raise gen.Return(result)

    @gen.coroutine
    def compute_stream(self, stream):
        with log_errors():
            logger.debug("Open compute stream")
            bstream = BatchedSend(interval=2, loop=self.loop)
            bstream.start(stream)

            closed = False
            last = gen.sleep(0)
            while not closed:
                try:
                    msgs = yield read(stream)
                except StreamClosedError:
                    break
                if not isinstance(msgs, list):
                    msgs = [msgs]

                batch = []
                for msg in msgs:
                    op = msg.pop('op', None)
                    if op == 'close':
                        closed = True
                        break
                    elif op == 'compute-task':
                        batch.append(msg)
                        logger.debug("%s asked to compute %s", self.address,
                                     msg['key'])
                    else:
                        logger.warning("Unknown operation %s, %s", op, msg)
                # self.loop.add_callback(self.compute_many, bstream, msgs)
                last = self.compute_many(bstream, msgs)

            try:
                yield last  # TODO: there might be more than one lingering
            except (RPCClosed, RuntimeError):
                pass

            yield bstream.close()
            logger.info("Close compute stream")

    @gen.coroutine
    def compute_many(self, bstream, msgs, report=False):
        good, bad, data, num_transferred, diagnostics = yield self.gather_many(msgs)

        for msg in msgs:
            msg.pop('who_has', None)

        if bad:
            logger.warn("Could not find data for %s", sorted(bad))
        for k, v in bad.items():
            bstream.send({'status': 'missing-data',
                          'key': k,
                          'keys': list(v)})

        if good:
            futures = [self.compute_one(data, report=report, **msg)
                                     for msg in good]
            wait_iterator = gen.WaitIterator(*futures)
            result = yield wait_iterator.next()
            if diagnostics:
                result.update(diagnostics)
            bstream.send(result)
            while not wait_iterator.done():
                msg = yield wait_iterator.next()
                bstream.send(msg)

    @gen.coroutine
    def compute_one(self, data, key=None, function=None, args=None, kwargs=None,
                    report=False, task=None):
        logger.debug("Compute one on %s", key)
        self.active.add(key)
        diagnostics = dict()
        try:
            start = default_timer()
            function, args, kwargs = self.deserialize(function, args, kwargs,
                    task)
            diagnostics['deserialization'] = default_timer() - start
        except Exception as e:
            logger.warn("Could not deserialize task", exc_info=True)
            emsg = error_message(e)
            emsg['key'] = key
            raise Return(emsg)

        # Fill args with data
        args2 = pack_data(args, data)
        kwargs2 = pack_data(kwargs, data)

        # Log and compute in separate thread
        result = yield self.executor_submit(key, apply_function, function,
                                            args2, kwargs2,
                                            self.execution_state, key)

        result['key'] = key
        result.update(diagnostics)

        if result['status'] == 'OK':
            self.data[key] = result.pop('result')
            if report:
                response = yield self.scheduler.add_keys(keys=[key],
                                        address=(self.ip, self.port))
                if not response == 'OK':
                    logger.warn('Could not report results to scheduler: %s',
                                str(response))
        else:
            logger.warn(" Compute Failed\n"
                "Function: %s\n"
                "args:     %s\n"
                "kwargs:   %s\n",
                str(funcname(function))[:1000],
                convert_args_to_str(args, max_len=1000),
                convert_kwargs_to_str(kwargs, max_len=1000), exc_info=True)

        logger.debug("Send compute response to scheduler: %s, %s", key,
                     result)
        try:
            self.active.remove(key)
        except KeyError:
            pass
        raise Return(result)

    @gen.coroutine
    def compute(self, stream=None, function=None, key=None, args=(), kwargs={},
            task=None, who_has=None, report=True):
        """ Execute function """
        self.active.add(key)

        # Ready function for computation
        msg = yield self._ready_task(function=function, key=key, args=args,
            kwargs=kwargs, task=task, who_has=who_has)
        if msg['status'] != 'OK':
            try:
                self.active.remove(key)
            except KeyError:
                pass
            raise Return(msg)
        else:
            function = msg['function']
            args = msg['args']
            kwargs = msg['kwargs']

        # Log and compute in separate thread
        result = yield self.executor_submit(key, apply_function, function,
                                            args, kwargs, self.execution_state,
                                            key)

        result['key'] = key
        result.update(msg['diagnostics'])

        if result['status'] == 'OK':
            self.data[key] = result.pop('result')
            if report:
                response = yield self.scheduler.add_keys(address=(self.ip, self.port),
                                                         keys=[key])
                if not response == 'OK':
                    logger.warn('Could not report results to scheduler: %s',
                                str(response))
        else:
            logger.warn(" Compute Failed\n"
                "Function: %s\n"
                "args:     %s\n"
                "kwargs:   %s\n",
                str(funcname(function))[:1000],
                convert_args_to_str(args, max_len=1000),
                convert_kwargs_to_str(kwargs, max_len=1000), exc_info=True)

        logger.debug("Send compute response to scheduler: %s, %s", key,
            get_msg_safe_str(msg))
        try:
            self.active.remove(key)
        except KeyError:
            pass
        raise Return(result)

    def run(self, stream, function=None, args=(), kwargs={}):
        return run(self, stream, function=function, args=args, kwargs=kwargs)

    @gen.coroutine
    def update_data(self, stream=None, data=None, report=True, deserialize=True):
        if deserialize:
            data = valmap(loads, data)
        self.data.update(data)
        if report:
            response = yield self.scheduler.add_keys(
                                address=(self.ip, self.port),
                                keys=list(data))
            assert response == 'OK'
        info = {'nbytes': {k: sizeof(v) for k, v in data.items()},
                'status': 'OK'}
        raise Return(info)

    @gen.coroutine
    def delete_data(self, stream, keys=None, report=True):
        if keys:
            for key in keys:
                if key in self.data:
                    del self.data[key]
            logger.info("Deleted %d keys", len(keys))
            if report:
                logger.debug("Reporting loss of keys to scheduler")
                yield self.scheduler.remove_keys(address=self.address,
                                              keys=list(keys))
        raise Return('OK')

    def get_data(self, stream, keys=None):
        return {k: dumps(self.data[k]) for k in keys if k in self.data}

    def start_ipython(self, stream):
        """Start an IPython kernel

        Returns Jupyter connection info dictionary.
        """
        from ._ipython_utils import start_ipython
        if self._ipython_kernel is None:
            self._ipython_kernel = start_ipython(
                ip=self.ip,
                ns={'worker': self},
                log=logger,
            )
        return self._ipython_kernel.get_connection_info()

    def upload_file(self, stream, filename=None, data=None, load=True):
        out_filename = os.path.join(self.local_dir, filename)
        if isinstance(data, unicode):
            data = data.encode()
        with open(out_filename, 'wb') as f:
            f.write(data)
            f.flush()

        if load:
            try:
                name, ext = os.path.splitext(filename)
                if ext in ('.py', '.pyc'):
                    logger.info("Reload module %s from .py file", name)
                    name = name.split('-')[0]
                    reload(import_module(name))
                if ext == '.egg':
                    sys.path.append(out_filename)
                    pkgs = pkg_resources.find_distributions(out_filename)
                    for pkg in pkgs:
                        logger.info("Load module %s from egg", pkg.project_name)
                        reload(import_module(pkg.project_name))
                    if not pkgs:
                        logger.warning("Found no packages in egg file")
            except Exception as e:
                logger.exception(e)
                return {'status': 'error', 'exception': dumps(e)}
        return {'status': 'OK', 'nbytes': len(data)}

    def process_health(self, stream=None):
        d = {'active': len(self.active),
             'stored': len(self.data)}
        return d

    def host_health(self, stream=None):
        """ Information about worker """
        d = {'time': time()}
        try:
            import psutil
            mem = psutil.virtual_memory()
            d.update({'cpu': psutil.cpu_percent(),
                      'memory': mem.total,
                      'memory_percent': mem.percent})

            net_io = psutil.net_io_counters()
            if self._last_net_io:
                d['network-send'] = net_io.bytes_sent - self._last_net_io.bytes_sent
                d['network-recv'] = net_io.bytes_recv - self._last_net_io.bytes_recv
            else:
                d['network-send'] = 0
                d['network-recv'] = 0
            self._last_net_io = net_io

            try:
                disk_io = psutil.disk_io_counters()
            except RuntimeError:
                # This happens when there is no physical disk in worker
                pass
            else:
                if self._last_disk_io:
                    d['disk-read'] = disk_io.read_bytes - self._last_disk_io.read_bytes
                    d['disk-write'] = disk_io.write_bytes - self._last_disk_io.write_bytes
                else:
                    d['disk-read'] = 0
                    d['disk-write'] = 0
                self._last_disk_io = disk_io

        except ImportError:
            pass
        return d

    def keys(self, stream=None):
        return list(self.data)
Exemple #15
0
    def __init__(self, scheduler_ip, scheduler_port, ip=None, ncores=None,
                 loop=None, local_dir=None, services=None, service_ports=None,
                 name=None, heartbeat_interval=1000, memory_limit=None, **kwargs):
        self.ip = ip or get_ip()
        self._port = 0
        self.ncores = ncores or _ncores
        self.local_dir = local_dir or tempfile.mkdtemp(prefix='worker-')
        if not os.path.exists(self.local_dir):
            os.mkdir(self.local_dir)
        if memory_limit:
            try:
                from zict import Buffer, File, Func
            except ImportError:
                raise ImportError("Please `pip install zict` for spill-to-disk workers")
            path = os.path.join(self.local_dir, 'storage')
            storage = Func(dumps_to_disk, loads_from_disk, File(path))
            self.data = Buffer({}, storage, int(float(memory_limit)), weight)
        else:
            self.data = dict()
        self.loop = loop or IOLoop.current()
        self.status = None
        self.executor = ThreadPoolExecutor(self.ncores)
        self.scheduler = rpc(ip=scheduler_ip, port=scheduler_port)
        self.active = set()
        self.name = name
        self.heartbeat_interval = heartbeat_interval
        self.heartbeat_active = False
        self.execution_state = {'scheduler': self.scheduler.address,
                                'ioloop': self.loop,
                                'worker': self}
        self._last_disk_io = None
        self._last_net_io = None
        self._ipython_kernel = None

        if self.local_dir not in sys.path:
            sys.path.insert(0, self.local_dir)

        self.services = {}
        self.service_ports = service_ports or {}
        for k, v in (services or {}).items():
            if isinstance(k, tuple):
                k, port = k
            else:
                port = 0

            self.services[k] = v(self, io_loop=self.loop)
            self.services[k].listen(port)
            self.service_ports[k] = self.services[k].port

        handlers = {'compute': self.compute,
                    'gather': self.gather,
                    'compute-stream': self.compute_stream,
                    'run': self.run,
                    'get_data': self.get_data,
                    'update_data': self.update_data,
                    'delete_data': self.delete_data,
                    'terminate': self.terminate,
                    'ping': pingpong,
                    'health': self.host_health,
                    'upload_file': self.upload_file,
                    'start_ipython': self.start_ipython,
                    'keys': self.keys,
                }

        super(Worker, self).__init__(handlers, io_loop=self.loop, **kwargs)

        self.heartbeat_callback = PeriodicCallback(self.heartbeat,
                                                   self.heartbeat_interval,
                                                   io_loop=self.loop)
        self.loop.add_callback(self.heartbeat_callback.start)
Exemple #16
0
class DeviceHostFile(ZictBase):
    """Manages serialization/deserialization of objects.

    Three LRU cache levels are controlled, for device, host and disk.
    Each level takes care of serializing objects once its limit has been
    reached and pass it to the subsequent level. Similarly, each cache
    may deserialize the object, but storing it back in the appropriate
    cache, depending on the type of object being deserialized.

    Parameters
    ----------
    device_memory_limit: int
        Number of bytes of CUDA device memory for device LRU cache,
        spills to host cache once filled.
    memory_limit: int
        Number of bytes of host memory for host LRU cache, spills to
        disk once filled. Setting this to 0 means unlimited host memory,
        implies no spilling to disk.
    local_directory: path
        Path where to store serialized objects on disk
    log_spilling: bool
        If True, all spilling operations will be logged directly to
        distributed.worker with an INFO loglevel. This will eventually be
        replaced by a Dask configuration flag.
    """
    def __init__(
        self,
        device_memory_limit=None,
        memory_limit=None,
        local_directory=None,
        log_spilling=False,
    ):
        if local_directory is None:
            local_directory = dask.config.get(
                "temporary-directory") or os.getcwd()

        if local_directory and not os.path.exists(local_directory):
            os.makedirs(local_directory, exist_ok=True)
        local_directory = os.path.join(local_directory, "dask-worker-space")

        self.disk_func_path = os.path.join(local_directory, "storage")

        self.host_func = dict()
        self.disk_func = Func(
            functools.partial(serialize_bytelist, on_error="raise"),
            deserialize_bytes,
            File(self.disk_func_path),
        )

        host_buffer_kwargs = {}
        device_buffer_kwargs = {}
        buffer_class = Buffer
        if log_spilling is True:
            buffer_class = LoggedBuffer
            host_buffer_kwargs = {"fast_name": "Host", "slow_name": "Disk"}
            device_buffer_kwargs = {"fast_name": "Device", "slow_name": "Host"}

        if memory_limit == 0:
            self.host_buffer = self.host_func
        else:
            self.host_buffer = buffer_class(
                self.host_func,
                self.disk_func,
                memory_limit,
                weight=lambda k, v: safe_sizeof(v),
                **host_buffer_kwargs,
            )

        self.device_keys = set()
        self.device_func = dict()
        self.device_host_func = Func(device_to_host, host_to_device,
                                     self.host_buffer)
        self.device_buffer = Buffer(
            self.device_func,
            self.device_host_func,
            device_memory_limit,
            weight=lambda k, v: safe_sizeof(v),
            **device_buffer_kwargs,
        )

        self.device = self.device_buffer.fast.d
        self.host = self.host_buffer if memory_limit == 0 else self.host_buffer.fast.d
        self.disk = None if memory_limit == 0 else self.host_buffer.slow.d

        # For Worker compatibility only, where `fast` is host memory buffer
        self.fast = self.host_buffer if memory_limit == 0 else self.host_buffer.fast

    def __setitem__(self, key, value):
        if key in self.device_buffer:
            # Make sure we register the removal of an existing key
            del self[key]

        if is_device_object(value):
            self.device_keys.add(key)
            self.device_buffer[key] = value
        else:
            self.host_buffer[key] = value

    def __getitem__(self, key):
        if key in self.device_keys:
            return self.device_buffer[key]
        elif key in self.host_buffer:
            return self.host_buffer[key]
        else:
            raise KeyError(key)

    def __len__(self):
        return len(self.device_buffer)

    def __iter__(self):
        return iter(self.device_buffer)

    def __delitem__(self, key):
        self.device_keys.discard(key)
        del self.device_buffer[key]

    def set_address(self, addr):
        if isinstance(self.host_buffer, LoggedBuffer):
            self.host_buffer.set_address(addr)
        self.device_buffer.set_address(addr)

    def get_total_spilling_time(self):
        ret = {}
        if isinstance(self.device_buffer, LoggedBuffer):
            ret = {**ret, **self.device_buffer.get_total_spilling_time()}
        if isinstance(self.host_buffer, LoggedBuffer):
            ret = {**ret, **self.host_buffer.get_total_spilling_time()}
        return ret
Exemple #17
0
    def __init__(self, scheduler_ip, scheduler_port, ip=None, ncores=None,
                 loop=None, local_dir=None, services=None, service_ports=None,
                 name=None, heartbeat_interval=5000, memory_limit=TOTAL_MEMORY,
                 **kwargs):
        self.ip = ip or get_ip()
        self._port = 0
        self.ncores = ncores or _ncores
        self.local_dir = local_dir or tempfile.mkdtemp(prefix='worker-')
        if not os.path.exists(self.local_dir):
            os.mkdir(self.local_dir)
        self.memory_limit = memory_limit
        if memory_limit:
            try:
                from zict import Buffer, File, Func
            except ImportError:
                raise ImportError("Please `pip install zict` for spill-to-disk workers")
            path = os.path.join(self.local_dir, 'storage')
            storage = Func(dumps_to_disk, loads_from_disk, File(path))
            self.data = Buffer({}, storage, int(float(memory_limit)), weight)
        else:
            self.data = dict()
        self.loop = loop or IOLoop.current()
        self.status = None
        self.executor = ThreadPoolExecutor(self.ncores)
        self.scheduler = rpc(ip=scheduler_ip, port=scheduler_port)
        self.active = set()
        self.name = name
        self.heartbeat_interval = heartbeat_interval
        self.heartbeat_active = False
        self.execution_state = {'scheduler': self.scheduler.address,
                                'ioloop': self.loop,
                                'worker': self}
        self._last_disk_io = None
        self._last_net_io = None
        self._ipython_kernel = None

        if self.local_dir not in sys.path:
            sys.path.insert(0, self.local_dir)

        self.services = {}
        self.service_ports = service_ports or {}
        for k, v in (services or {}).items():
            if isinstance(k, tuple):
                k, port = k
            else:
                port = 0

            self.services[k] = v(self, io_loop=self.loop)
            self.services[k].listen(port)
            self.service_ports[k] = self.services[k].port

        handlers = {'compute': self.compute,
                    'gather': self.gather,
                    'compute-stream': self.compute_stream,
                    'run': self.run,
                    'get_data': self.get_data,
                    'update_data': self.update_data,
                    'delete_data': self.delete_data,
                    'terminate': self.terminate,
                    'ping': pingpong,
                    'health': self.host_health,
                    'upload_file': self.upload_file,
                    'start_ipython': self.start_ipython,
                    'keys': self.keys,
                }

        super(Worker, self).__init__(handlers, io_loop=self.loop, **kwargs)

        self.heartbeat_callback = PeriodicCallback(self.heartbeat,
                                                   self.heartbeat_interval,
                                                   io_loop=self.loop)
        self.loop.add_callback(self.heartbeat_callback.start)
Exemple #18
0
class Worker(Server):
    """ Worker Node

    Workers perform two functions:

    1.  **Serve data** from a local dictionary
    2.  **Perform computation** on that data and on data from peers

    Additionally workers keep a scheduler informed of their data and use that
    scheduler to gather data from other workers when necessary to perform a
    computation.

    You can start a worker with the ``dask-worker`` command line application::

        $ dask-worker scheduler-ip:port

    **State**

    * **data:** ``{key: object}``:
        Dictionary mapping keys to actual values
    * **active:** ``{key}``:
        Set of keys currently under computation
    * **ncores:** ``int``:
        Number of cores used by this worker process
    * **executor:** ``concurrent.futures.ThreadPoolExecutor``:
        Executor used to perform computation
    * **local_dir:** ``path``:
        Path on local machine to store temporary files
    * **scheduler:** ``rpc``:
        Location of scheduler.  See ``.ip/.port`` attributes.
    * **name:** ``string``:
        Alias
    * **services:** ``{str: Server}``:
        Auxiliary web servers running on this worker
    * **service_ports:** ``{str: port}``:

    Examples
    --------

    Create schedulers and workers in Python:

    >>> from distributed import Scheduler, Worker
    >>> c = Scheduler('192.168.0.100', 8786)  # doctest: +SKIP
    >>> w = Worker(c.ip, c.port)  # doctest: +SKIP
    >>> yield w._start(port=8786)  # doctest: +SKIP

    Or use the command line::

        $ dask-scheduler
        Start scheduler at 127.0.0.1:8786

        $ dask-worker 127.0.0.1:8786
        Start worker at:               127.0.0.1:8786
        Registered with scheduler at:  127.0.0.1:8787

    See Also
    --------
    distributed.scheduler.Scheduler:
    """

    def __init__(self, scheduler_ip, scheduler_port, ip=None, ncores=None,
                 loop=None, local_dir=None, services=None, service_ports=None,
                 name=None, heartbeat_interval=5000, memory_limit=TOTAL_MEMORY,
                 **kwargs):
        self.ip = ip or get_ip()
        self._port = 0
        self.ncores = ncores or _ncores
        self.local_dir = local_dir or tempfile.mkdtemp(prefix='worker-')
        if not os.path.exists(self.local_dir):
            os.mkdir(self.local_dir)
        self.memory_limit = memory_limit
        if memory_limit:
            try:
                from zict import Buffer, File, Func
            except ImportError:
                raise ImportError("Please `pip install zict` for spill-to-disk workers")
            path = os.path.join(self.local_dir, 'storage')
            storage = Func(dumps_to_disk, loads_from_disk, File(path))
            self.data = Buffer({}, storage, int(float(memory_limit)), weight)
        else:
            self.data = dict()
        self.loop = loop or IOLoop.current()
        self.status = None
        self.executor = ThreadPoolExecutor(self.ncores)
        self.scheduler = rpc(ip=scheduler_ip, port=scheduler_port)
        self.active = set()
        self.name = name
        self.heartbeat_interval = heartbeat_interval
        self.heartbeat_active = False
        self.execution_state = {'scheduler': self.scheduler.address,
                                'ioloop': self.loop,
                                'worker': self}
        self._last_disk_io = None
        self._last_net_io = None
        self._ipython_kernel = None

        if self.local_dir not in sys.path:
            sys.path.insert(0, self.local_dir)

        self.services = {}
        self.service_ports = service_ports or {}
        for k, v in (services or {}).items():
            if isinstance(k, tuple):
                k, port = k
            else:
                port = 0

            self.services[k] = v(self, io_loop=self.loop)
            self.services[k].listen(port)
            self.service_ports[k] = self.services[k].port

        handlers = {'compute': self.compute,
                    'gather': self.gather,
                    'compute-stream': self.compute_stream,
                    'run': self.run,
                    'get_data': self.get_data,
                    'update_data': self.update_data,
                    'delete_data': self.delete_data,
                    'terminate': self.terminate,
                    'ping': pingpong,
                    'health': self.host_health,
                    'upload_file': self.upload_file,
                    'start_ipython': self.start_ipython,
                    'keys': self.keys,
                }

        super(Worker, self).__init__(handlers, io_loop=self.loop, **kwargs)

        self.heartbeat_callback = PeriodicCallback(self.heartbeat,
                                                   self.heartbeat_interval,
                                                   io_loop=self.loop)
        self.loop.add_callback(self.heartbeat_callback.start)

    @property
    def worker_address(self):
        """ For API compatibility with Nanny """
        return self.address

    @gen.coroutine
    def heartbeat(self):
        if not self.heartbeat_active:
            self.heartbeat_active = True
            logger.debug("Heartbeat: %s" % self.address)
            try:
                yield self.scheduler.register(address=self.address, name=self.name,
                                        ncores=self.ncores,
                                        now=time(),
                                        host_info=self.host_health(),
                                        services=self.service_ports,
                                        memory_limit=self.memory_limit,
                                        **self.process_health())
            finally:
                self.heartbeat_active = False
        else:
            logger.debug("Heartbeat skipped: channel busy")

    @gen.coroutine
    def _start(self, port=0):
        self.listen(port)
        self.name = self.name or self.address
        for k, v in self.services.items():
            v.listen(0)
            self.service_ports[k] = v.port

        logger.info('      Start worker at: %20s:%d', self.ip, self.port)
        for k, v in self.service_ports.items():
            logger.info('  %16s at: %20s:%d' % (k, self.ip, v))
        logger.info('Waiting to connect to: %20s:%d',
                    self.scheduler.ip, self.scheduler.port)
        while True:
            try:
                resp = yield self.scheduler.register(
                        ncores=self.ncores, address=(self.ip, self.port),
                        keys=list(self.data),
                        name=self.name, nbytes=valmap(sizeof, self.data),
                        now=time(),
                        host_info=self.host_health(),
                        services=self.service_ports,
                        memory_limit=self.memory_limit,
                        **self.process_health())
                break
            except (OSError, StreamClosedError):
                logger.debug("Unable to register with scheduler.  Waiting")
                yield gen.sleep(0.5)
        if resp != 'OK':
            raise ValueError(resp)
        logger.info('        Registered to: %20s:%d',
                    self.scheduler.ip, self.scheduler.port)
        self.status = 'running'

    def start(self, port=0):
        self.loop.add_callback(self._start, port)

    def identity(self, stream):
        return {'type': type(self).__name__, 'id': self.id,
                'scheduler': (self.scheduler.ip, self.scheduler.port),
                'ncores': self.ncores,
                'memory_limit': self.memory_limit}

    @gen.coroutine
    def _close(self, report=True, timeout=10):
        self.heartbeat_callback.stop()
        with ignoring(RPCClosed, StreamClosedError):
            if report:
                yield gen.with_timeout(timedelta(seconds=timeout),
                        self.scheduler.unregister(address=(self.ip, self.port)),
                        io_loop=self.loop)
        self.scheduler.close_rpc()
        self.stop()
        self.executor.shutdown()
        if os.path.exists(self.local_dir):
            shutil.rmtree(self.local_dir)

        for k, v in self.services.items():
            v.stop()
        self.status = 'closed'
        self.stop()

    @gen.coroutine
    def terminate(self, stream, report=True):
        yield self._close(report=report)
        raise Return('OK')

    @property
    def address(self):
        return '%s:%d' % (self.ip, self.port)

    @property
    def address_tuple(self):
        return (self.ip, self.port)

    @gen.coroutine
    def gather(self, stream=None, who_has=None):
        who_has = {k: [coerce_to_address(addr) for addr in v]
                    for k, v in who_has.items()
                    if k not in self.data}
        try:
            result = yield gather_from_workers(who_has)
        except KeyError as e:
            logger.warn("Could not find data", e)
            raise Return({'status': 'missing-data',
                          'keys': e.args})
        else:
            self.data.update(result)
            raise Return({'status': 'OK'})

    def deserialize(self, function=None, args=None, kwargs=None, task=None):
        """ Deserialize task inputs and regularize to func, args, kwargs """
        if task is not None:
            task = loads(task)
        if function is not None:
            function = loads(function)
        if args:
            args = loads(args)
        if kwargs:
            kwargs = loads(kwargs)

        if task is not None:
            assert not function and not args and not kwargs
            function = execute_task
            args = (task,)

        return function, args or (), kwargs or {}

    @gen.coroutine
    def gather_many(self, msgs):
        """ Gather the data for many compute messages at once

        Returns
        -------
        good: the input messages for which we have data
        bad: a dict of task keys for which we could not find data
        data: The scope in which to run tasks
        len(remote): the number of new keys we've gathered
        """
        diagnostics = {}
        who_has = merge(msg['who_has'] for msg in msgs if 'who_has' in msg)

        start = time()
        local = {k: self.data[k] for k in who_has if k in self.data}
        stop = time()
        if stop - start > 0.005:
            diagnostics['disk_load_start'] = start
            diagnostics['disk_load_stop'] = stop

        who_has = {k: v for k, v in who_has.items() if k not in local}
        start = time()
        remote, bad_data = yield gather_from_workers(who_has,
                permissive=True)
        if remote:
            self.data.update(remote)
            yield self.scheduler.add_keys(address=self.address, keys=list(remote))
        stop = time()

        if remote:
            diagnostics['transfer_start'] = start
            diagnostics['transfer_stop'] = stop

        data = merge(local, remote)

        if bad_data:
            missing = {msg['key']: {k for k in msg['who_has'] if k in bad_data}
                        for msg in msgs if 'who_has' in msg}
            bad = {k: v for k, v in missing.items() if v}
            good = [msg for msg in msgs if not missing.get(msg['key'])]
        else:
            good, bad = msgs, {}
        raise Return([good, bad, data, len(remote), diagnostics])

    @gen.coroutine
    def _ready_task(self, function=None, key=None, args=(), kwargs={},
                    task=None, who_has=None):
        who_has = who_has or {}
        diagnostics = {}
        start = time()
        data = {k: self.data[k] for k in who_has if k in self.data}
        stop = time()

        if stop - start > 0.005:
            diagnostics['disk_load_start'] = start
            diagnostics['disk_load_stop'] = stop

        who_has = {k: set(map(coerce_to_address, v))
                   for k, v in who_has.items()
                   if k not in self.data}
        if who_has:
            try:
                logger.info("gather %d keys from peers", len(who_has))
                diagnostics['transfer_start'] = time()
                other = yield gather_from_workers(who_has)
                diagnostics['transfer_stop'] = time()
                self.data.update(other)
                yield self.scheduler.add_keys(address=self.address,
                                              keys=list(other))
                data.update(other)
            except KeyError as e:
                logger.warn("Could not find data for %s", key)
                raise Return({'status': 'missing-data',
                              'keys': e.args,
                              'key': key})

        try:
            start = default_timer()
            function, args, kwargs = self.deserialize(function, args, kwargs,
                    task)
            diagnostics['deserialization'] = default_timer() - start
        except Exception as e:
            logger.warn("Could not deserialize task", exc_info=True)
            emsg = error_message(e)
            emsg['key'] = key
            raise Return(emsg)

        # Fill args with data
        args2 = pack_data(args, data)
        kwargs2 = pack_data(kwargs, data)

        raise Return({'status': 'OK',
                      'function': function,
                      'args': args2,
                      'kwargs': kwargs2,
                      'diagnostics': diagnostics,
                      'key': key})

    @gen.coroutine
    def executor_submit(self, key, function, *args, **kwargs):
        """ Safely run function in thread pool executor

        We've run into issues running concurrent.future futures within
        tornado.  Apparently it's advantageous to use timeouts and periodic
        callbacks to ensure things run smoothly.  This can get tricky, so we
        pull it off into an separate method.
        """
        job_counter[0] += 1
        # logger.info("%s:%d Starts job %d, %s", self.ip, self.port, i, key)
        future = self.executor.submit(function, *args, **kwargs)
        pc = PeriodicCallback(lambda: logger.debug("future state: %s - %s",
            key, future._state), 1000, io_loop=self.loop); pc.start()
        try:
            yield future
        finally:
            pc.stop()
            pass

        result = future.result()

        # logger.info("Finish job %d, %s", i, key)
        raise gen.Return(result)

    @gen.coroutine
    def compute_stream(self, stream):
        with log_errors():
            logger.debug("Open compute stream")
            bstream = BatchedSend(interval=2, loop=self.loop)
            bstream.start(stream)

            closed = False
            last = gen.sleep(0)
            while not closed:
                try:
                    msgs = yield read(stream)
                except StreamClosedError:
                    break
                if not isinstance(msgs, list):
                    msgs = [msgs]

                batch = []
                for msg in msgs:
                    op = msg.pop('op', None)
                    if op == 'close':
                        closed = True
                        break
                    elif op == 'compute-task':
                        batch.append(msg)
                        logger.debug("%s asked to compute %s", self.address,
                                     msg['key'])
                    else:
                        logger.warning("Unknown operation %s, %s", op, msg)
                # self.loop.add_callback(self.compute_many, bstream, msgs)
                last = self.compute_many(bstream, msgs)

            try:
                yield last  # TODO: there might be more than one lingering
            except (RPCClosed, RuntimeError):
                pass

            yield bstream.close()
            logger.info("Close compute stream")

    @gen.coroutine
    def compute_many(self, bstream, msgs, report=False):
        good, bad, data, num_transferred, diagnostics = yield self.gather_many(msgs)

        for msg in msgs:
            msg.pop('who_has', None)

        if bad:
            logger.warn("Could not find data for %s", sorted(bad))
        for k, v in bad.items():
            bstream.send({'status': 'missing-data',
                          'key': k,
                          'keys': list(v)})

        if good:
            futures = [self.compute_one(data, report=report, **msg)
                                     for msg in good]
            wait_iterator = gen.WaitIterator(*futures)
            result = yield wait_iterator.next()
            if diagnostics:
                result.update(diagnostics)
            bstream.send(result)
            while not wait_iterator.done():
                msg = yield wait_iterator.next()
                bstream.send(msg)

    @gen.coroutine
    def compute_one(self, data, key=None, function=None, args=None, kwargs=None,
                    report=False, task=None):
        logger.debug("Compute one on %s", key)
        self.active.add(key)
        diagnostics = dict()
        try:
            start = default_timer()
            function, args, kwargs = self.deserialize(function, args, kwargs,
                    task)
            diagnostics['deserialization'] = default_timer() - start
        except Exception as e:
            logger.warn("Could not deserialize task", exc_info=True)
            emsg = error_message(e)
            emsg['key'] = key
            raise Return(emsg)

        # Fill args with data
        args2 = pack_data(args, data)
        kwargs2 = pack_data(kwargs, data)

        # Log and compute in separate thread
        result = yield self.executor_submit(key, apply_function, function,
                                            args2, kwargs2,
                                            self.execution_state, key)

        result['key'] = key
        result.update(diagnostics)

        if result['status'] == 'OK':
            self.data[key] = result.pop('result')
            if report:
                response = yield self.scheduler.add_keys(keys=[key],
                                        address=(self.ip, self.port))
                if not response == 'OK':
                    logger.warn('Could not report results to scheduler: %s',
                                str(response))
        else:
            logger.warn(" Compute Failed\n"
                "Function: %s\n"
                "args:     %s\n"
                "kwargs:   %s\n",
                str(funcname(function))[:1000],
                convert_args_to_str(args, max_len=1000),
                convert_kwargs_to_str(kwargs, max_len=1000), exc_info=True)

        logger.debug("Send compute response to scheduler: %s, %s", key,
                     result)
        try:
            self.active.remove(key)
        except KeyError:
            pass
        raise Return(result)

    @gen.coroutine
    def compute(self, stream=None, function=None, key=None, args=(), kwargs={},
            task=None, who_has=None, report=True):
        """ Execute function """
        self.active.add(key)

        # Ready function for computation
        msg = yield self._ready_task(function=function, key=key, args=args,
            kwargs=kwargs, task=task, who_has=who_has)
        if msg['status'] != 'OK':
            try:
                self.active.remove(key)
            except KeyError:
                pass
            raise Return(msg)
        else:
            function = msg['function']
            args = msg['args']
            kwargs = msg['kwargs']

        # Log and compute in separate thread
        result = yield self.executor_submit(key, apply_function, function,
                                            args, kwargs, self.execution_state,
                                            key)

        result['key'] = key
        result.update(msg['diagnostics'])

        if result['status'] == 'OK':
            self.data[key] = result.pop('result')
            if report:
                response = yield self.scheduler.add_keys(address=(self.ip, self.port),
                                                         keys=[key])
                if not response == 'OK':
                    logger.warn('Could not report results to scheduler: %s',
                                str(response))
        else:
            logger.warn(" Compute Failed\n"
                "Function: %s\n"
                "args:     %s\n"
                "kwargs:   %s\n",
                str(funcname(function))[:1000],
                convert_args_to_str(args, max_len=1000),
                convert_kwargs_to_str(kwargs, max_len=1000), exc_info=True)

        logger.debug("Send compute response to scheduler: %s, %s", key,
            get_msg_safe_str(msg))
        try:
            self.active.remove(key)
        except KeyError:
            pass
        raise Return(result)

    def run(self, stream, function=None, args=(), kwargs={}):
        return run(self, stream, function=function, args=args, kwargs=kwargs)

    @gen.coroutine
    def update_data(self, stream=None, data=None, report=True, deserialize=True):
        if deserialize:
            data = valmap(loads, data)
        self.data.update(data)
        if report:
            response = yield self.scheduler.add_keys(
                                address=(self.ip, self.port),
                                keys=list(data))
            assert response == 'OK'
        info = {'nbytes': {k: sizeof(v) for k, v in data.items()},
                'status': 'OK'}
        raise Return(info)

    @gen.coroutine
    def delete_data(self, stream, keys=None, report=True):
        if keys:
            for key in keys:
                if key in self.data:
                    del self.data[key]
            logger.info("Deleted %d keys", len(keys))
            if report:
                logger.debug("Reporting loss of keys to scheduler")
                yield self.scheduler.remove_keys(address=self.address,
                                              keys=list(keys))
        raise Return('OK')

    def get_data(self, stream, keys=None):
        return {k: dumps(self.data[k]) for k in keys if k in self.data}

    def start_ipython(self, stream):
        """Start an IPython kernel

        Returns Jupyter connection info dictionary.
        """
        from ._ipython_utils import start_ipython
        if self._ipython_kernel is None:
            self._ipython_kernel = start_ipython(
                ip=self.ip,
                ns={'worker': self},
                log=logger,
            )
        return self._ipython_kernel.get_connection_info()

    def upload_file(self, stream, filename=None, data=None, load=True):
        out_filename = os.path.join(self.local_dir, filename)
        if isinstance(data, unicode):
            data = data.encode()
        with open(out_filename, 'wb') as f:
            f.write(data)
            f.flush()

        if load:
            try:
                name, ext = os.path.splitext(filename)
                if ext in ('.py', '.pyc'):
                    logger.info("Reload module %s from .py file", name)
                    name = name.split('-')[0]
                    reload(import_module(name))
                if ext == '.egg':
                    sys.path.append(out_filename)
                    pkgs = pkg_resources.find_distributions(out_filename)
                    for pkg in pkgs:
                        logger.info("Load module %s from egg", pkg.project_name)
                        reload(import_module(pkg.project_name))
                    if not pkgs:
                        logger.warning("Found no packages in egg file")
            except Exception as e:
                logger.exception(e)
                return {'status': 'error', 'exception': dumps(e)}
        return {'status': 'OK', 'nbytes': len(data)}

    def process_health(self, stream=None):
        d = {'active': len(self.active),
             'stored': len(self.data)}
        return d

    def host_health(self, stream=None):
        """ Information about worker """
        d = {'time': time()}
        try:
            import psutil
            mem = psutil.virtual_memory()
            d.update({'cpu': psutil.cpu_percent(),
                      'memory': mem.total,
                      'memory_percent': mem.percent})

            net_io = psutil.net_io_counters()
            if self._last_net_io:
                d['network-send'] = net_io.bytes_sent - self._last_net_io.bytes_sent
                d['network-recv'] = net_io.bytes_recv - self._last_net_io.bytes_recv
            else:
                d['network-send'] = 0
                d['network-recv'] = 0
            self._last_net_io = net_io

            try:
                disk_io = psutil.disk_io_counters()
            except RuntimeError:
                # This happens when there is no physical disk in worker
                pass
            else:
                if self._last_disk_io:
                    d['disk-read'] = disk_io.read_bytes - self._last_disk_io.read_bytes
                    d['disk-write'] = disk_io.write_bytes - self._last_disk_io.write_bytes
                else:
                    d['disk-read'] = 0
                    d['disk-write'] = 0
                self._last_disk_io = disk_io

        except ImportError:
            pass
        return d

    def keys(self, stream=None):
        return list(self.data)