def __init__(
        self,
        device_memory_limit=None,
        memory_limit=None,
        local_directory=None,
        log_spilling=False,
    ):
        self.disk_func_path = os.path.join(
            local_directory or dask.config.get("temporary-directory") or os.getcwd(),
            "dask-worker-space",
            "storage",
        )
        os.makedirs(self.disk_func_path, exist_ok=True)

        self.host_func = dict()
        self.disk_func = Func(
            functools.partial(serialize_bytelist, on_error="raise"),
            deserialize_bytes,
            File(self.disk_func_path),
        )

        host_buffer_kwargs = {}
        device_buffer_kwargs = {}
        buffer_class = Buffer
        if log_spilling is True:
            buffer_class = LoggedBuffer
            host_buffer_kwargs = {"fast_name": "Host", "slow_name": "Disk"}
            device_buffer_kwargs = {"fast_name": "Device", "slow_name": "Host"}

        if memory_limit == 0:
            self.host_buffer = self.host_func
        else:
            self.host_buffer = buffer_class(
                self.host_func,
                self.disk_func,
                memory_limit,
                weight=lambda k, v: safe_sizeof(v),
                **host_buffer_kwargs,
            )

        self.device_keys = set()
        self.device_func = dict()
        self.device_host_func = Func(device_to_host, host_to_device, self.host_buffer)
        self.device_buffer = Buffer(
            self.device_func,
            self.device_host_func,
            device_memory_limit,
            weight=lambda k, v: safe_sizeof(v),
            **device_buffer_kwargs,
        )

        self.device = self.device_buffer.fast.d
        self.host = self.host_buffer if memory_limit == 0 else self.host_buffer.fast.d
        self.disk = None if memory_limit == 0 else self.host_buffer.slow.d

        # For Worker compatibility only, where `fast` is host memory buffer
        self.fast = self.host_buffer if memory_limit == 0 else self.host_buffer.fast
Exemple #2
0
    def __setitem__(self, key: str, value: Any) -> None:
        try:
            # FIXME https://github.com/python/mypy/issues/708
            pickled = self.dump(value)  # type: ignore
        except Exception as e:
            # zict.LRU ensures that the key remains in fast if we raise.
            # Wrap the exception so that it's recognizable by SpillBuffer,
            # which will then unwrap it.
            raise PickleError(key, e)

        pickled_size = sum(len(frame) for frame in pickled)

        if has_zict_210:
            # Thanks to Buffer.__setitem__, we never update existing
            # keys in slow, but always delete them and reinsert them.
            assert key not in self.d
            assert key not in self.weight_by_key
        else:
            self.d.pop(key, None)
            self.total_weight -= self.weight_by_key.pop(key, SpilledSize(0, 0))

        if (self.max_weight is not False
                and self.total_weight.disk + pickled_size > self.max_weight):
            # Stop callbacks and ensure that the key ends up in SpillBuffer.fast
            # To be caught by SpillBuffer.__setitem__
            raise MaxSpillExceeded(key)

        # Store to disk through File.
        # This may raise OSError, which is caught by SpillBuffer above.
        self.d[key] = pickled

        weight = SpilledSize(safe_sizeof(value), pickled_size)
        self.weight_by_key[key] = weight
        self.total_weight += weight
Exemple #3
0
def test_safe_sizeof_raises():
    class BadlySized:
        def __sizeof__(self):
            raise ValueError("bar")

    foo = BadlySized()
    with captured_logger(logging.getLogger("distributed.sizeof")) as logs:
        assert safe_sizeof(foo) == 1e6

    assert "Sizeof calculation failed.  Defaulting to 1MB" in logs.getvalue()
Exemple #4
0
def test_safe_sizeof_logs_on_failure():
    class BadlySized:
        def __sizeof__(self):
            raise ValueError("bar")

    foo = BadlySized()

    # Defaults to 0.95 MiB by default
    with captured_logger(logging.getLogger("distributed.sizeof")) as logs:
        assert safe_sizeof(foo) == 1e6

    assert "Sizeof calculation failed. Defaulting to 0.95 MiB" in logs.getvalue()

    # Can provide custom `default_size`
    with captured_logger(logging.getLogger("distributed.sizeof")) as logs:
        default_size = 2 * (1024 ** 2)  # 2 MiB
        assert safe_sizeof(foo, default_size=default_size) == default_size

    assert "Defaulting to 2.00 MiB" in logs.getvalue()
Exemple #5
0
    def slow_to_fast(self, key):
        start = time.time()
        ret = super().slow_to_fast(key)
        total = time.time() - start
        self.total_time_slow_to_fast += total

        self.logger.info(self.msg_template %
                         (self.addr, key, safe_sizeof(ret), self.slow_name,
                          self.fast_name, total))

        return ret
Exemple #6
0
    def fast_to_slow(self, key, value):
        start = time.time()
        ret = super().fast_to_slow(key, value)
        total = time.time() - start
        self.total_time_fast_to_slow += total

        self.logger.info(self.msg_template % (
            self.addr,
            key,
            safe_sizeof(value),
            self.fast_name,
            self.slow_name,
            total,
        ))

        return ret
Exemple #7
0
def test_safe_sizeof(obj):
    assert safe_sizeof(obj) == sizeof(obj)
Exemple #8
0
def _in_memory_weight(key: str, value: Any) -> int:
    return safe_sizeof(value)