def __init__( self, device_memory_limit=None, memory_limit=None, local_directory=None, log_spilling=False, ): self.disk_func_path = os.path.join( local_directory or dask.config.get("temporary-directory") or os.getcwd(), "dask-worker-space", "storage", ) os.makedirs(self.disk_func_path, exist_ok=True) self.host_func = dict() self.disk_func = Func( functools.partial(serialize_bytelist, on_error="raise"), deserialize_bytes, File(self.disk_func_path), ) host_buffer_kwargs = {} device_buffer_kwargs = {} buffer_class = Buffer if log_spilling is True: buffer_class = LoggedBuffer host_buffer_kwargs = {"fast_name": "Host", "slow_name": "Disk"} device_buffer_kwargs = {"fast_name": "Device", "slow_name": "Host"} if memory_limit == 0: self.host_buffer = self.host_func else: self.host_buffer = buffer_class( self.host_func, self.disk_func, memory_limit, weight=lambda k, v: safe_sizeof(v), **host_buffer_kwargs, ) self.device_keys = set() self.device_func = dict() self.device_host_func = Func(device_to_host, host_to_device, self.host_buffer) self.device_buffer = Buffer( self.device_func, self.device_host_func, device_memory_limit, weight=lambda k, v: safe_sizeof(v), **device_buffer_kwargs, ) self.device = self.device_buffer.fast.d self.host = self.host_buffer if memory_limit == 0 else self.host_buffer.fast.d self.disk = None if memory_limit == 0 else self.host_buffer.slow.d # For Worker compatibility only, where `fast` is host memory buffer self.fast = self.host_buffer if memory_limit == 0 else self.host_buffer.fast
def __setitem__(self, key: str, value: Any) -> None: try: # FIXME https://github.com/python/mypy/issues/708 pickled = self.dump(value) # type: ignore except Exception as e: # zict.LRU ensures that the key remains in fast if we raise. # Wrap the exception so that it's recognizable by SpillBuffer, # which will then unwrap it. raise PickleError(key, e) pickled_size = sum(len(frame) for frame in pickled) if has_zict_210: # Thanks to Buffer.__setitem__, we never update existing # keys in slow, but always delete them and reinsert them. assert key not in self.d assert key not in self.weight_by_key else: self.d.pop(key, None) self.total_weight -= self.weight_by_key.pop(key, SpilledSize(0, 0)) if (self.max_weight is not False and self.total_weight.disk + pickled_size > self.max_weight): # Stop callbacks and ensure that the key ends up in SpillBuffer.fast # To be caught by SpillBuffer.__setitem__ raise MaxSpillExceeded(key) # Store to disk through File. # This may raise OSError, which is caught by SpillBuffer above. self.d[key] = pickled weight = SpilledSize(safe_sizeof(value), pickled_size) self.weight_by_key[key] = weight self.total_weight += weight
def test_safe_sizeof_raises(): class BadlySized: def __sizeof__(self): raise ValueError("bar") foo = BadlySized() with captured_logger(logging.getLogger("distributed.sizeof")) as logs: assert safe_sizeof(foo) == 1e6 assert "Sizeof calculation failed. Defaulting to 1MB" in logs.getvalue()
def test_safe_sizeof_logs_on_failure(): class BadlySized: def __sizeof__(self): raise ValueError("bar") foo = BadlySized() # Defaults to 0.95 MiB by default with captured_logger(logging.getLogger("distributed.sizeof")) as logs: assert safe_sizeof(foo) == 1e6 assert "Sizeof calculation failed. Defaulting to 0.95 MiB" in logs.getvalue() # Can provide custom `default_size` with captured_logger(logging.getLogger("distributed.sizeof")) as logs: default_size = 2 * (1024 ** 2) # 2 MiB assert safe_sizeof(foo, default_size=default_size) == default_size assert "Defaulting to 2.00 MiB" in logs.getvalue()
def slow_to_fast(self, key): start = time.time() ret = super().slow_to_fast(key) total = time.time() - start self.total_time_slow_to_fast += total self.logger.info(self.msg_template % (self.addr, key, safe_sizeof(ret), self.slow_name, self.fast_name, total)) return ret
def fast_to_slow(self, key, value): start = time.time() ret = super().fast_to_slow(key, value) total = time.time() - start self.total_time_fast_to_slow += total self.logger.info(self.msg_template % ( self.addr, key, safe_sizeof(value), self.fast_name, self.slow_name, total, )) return ret
def test_safe_sizeof(obj): assert safe_sizeof(obj) == sizeof(obj)
def _in_memory_weight(key: str, value: Any) -> int: return safe_sizeof(value)