def test_tokenize_object(): o = object() # Defaults to non-deterministic tokenization assert normalize_token(o) != normalize_token(o) with dask.config.set({"tokenize.ensure-deterministic": True}): with pytest.raises(RuntimeError, match="cannot be deterministically hashed"): normalize_token(o)
def tokenize_calibration(self): ''' This function will first inherit the tokenization from Calibration then run it for the RQConv case. It then runs it for the new arguments and appends to the args list. It will return something like: 'list, [1,1,1], 'list', [1,3,4] etc it looks messy but at least it is hashable ''' # inherit dispatch from Calibration object # calib_norm = normalize_token.dispatch(Calibration) # args = calib_norm(self) args = tokenize_calibration_base(self) # finally now tokenize the rest newargs = list() # round newargs.append(roundbydigits(self.det_orient, 3)) newargs.append(roundbydigits(self.det_tilt, 3)) newargs.append(roundbydigits(self.det_phi, 3)) newargs.append(roundbydigits(self.incident_angle, 3)) newargs.append(roundbydigits(self.sample_normal, 3)) newargs.append(roundbydigits(self.rot_matrix, 3)) newargs = normalize_token(newargs) args = (args, newargs) return args
def test_tokenize_partial_func_args_kwargs_consistent(): f = tz.partial(f3, f2, c=f1) res = normalize_token(f) sol = (b'cdask.tests.test_base\nf3\np0\n.', (b'cdask.tests.test_base\nf2\np0\n.',), (('c', b'cdask.tests.test_base\nf1\np0\n.'),)) assert res == sol
def tokenize_calibration_base(self): # function to allow for intelligent caching # all all computations of data and submethods # need to specify pure=True flag args = [self.wavelength_A, self.distance_m] args.append(self.pixel_size_um) if self.width is not None: args.append(self.width) if self.height is not None: args.append(self.height) # round these by 3 digits if self.x0 is not None: args.append(roundbydigits(self.x0, 3)) if self.y0 is not None: args.append(roundbydigits(self.y0, 3)) if self.angle_map_data is not None: args.append(roundbydigits(self.angle_map_data, 3)) if self.q_map_data is not None: args.append(roundbydigits(self.q_map_data, 3)) if self.qr_map_data is not None: args.append(roundbydigits(self.qr_map_data, 3)) if self.qx_map_data is not None: args.append(roundbydigits(self.qx_map_data, 3)) if self.qy_map_data is not None: args.append(roundbydigits(self.qy_map_data, 3)) if self.qz_map_data is not None: args.append(roundbydigits(self.qz_map_data, 3)) return normalize_token(args)
def __dask_tokenize__(self): "Produce a deterministic, content-based hash for dask." from dask.base import normalize_token return normalize_token( (type(self), self.coords, self.data, self.shape, self.fill_value) )
def __dask_tokenize__(self): import uuid from dask.base import normalize_token if not hasattr(self, "_unique_id"): self._unique_id = str(uuid.uuid4()) return normalize_token( (type(self), self.lattice, self.field_type, self._unique_id) )
def test_tokenize_partial_func_args_kwargs_consistent(): f = partial(f3, f2, c=f1) res = normalize_token(f) sol = ( b"cdask.tests.test_base\nf3\np0\n.", (b"cdask.tests.test_base\nf2\np0\n.", ), (("c", b"cdask.tests.test_base\nf1\np0\n."), ), ) assert res == sol
def test_normalize_base(): for i in [ 1, 1.1, "1", slice(1, 2, 3), datetime.date(2021, 6, 25), pathlib.PurePath("/this/that"), ]: assert normalize_token(i) is i
def test_tokenize_partial_func_args_kwargs_consistent(): f = partial(f3, f2, c=f1) res = normalize_token(f) sol = ( b"\x80\x04\x95\x1f\x00\x00\x00\x00\x00\x00\x00\x8c\x14dask.tests.test_base\x94\x8c\x02f3\x94\x93\x94.", (b"\x80\x04\x95\x1f\x00\x00\x00\x00\x00\x00\x00\x8c\x14dask.tests.test_base\x94\x8c\x02f2\x94\x93\x94.", ), (( "c", b"\x80\x04\x95\x1f\x00\x00\x00\x00\x00\x00\x00\x8c\x14dask.tests.test_base\x94\x8c\x02f1\x94\x93\x94.", ), ), ) assert res == sol
def test_use_cloudpickle_to_tokenize_functions_in__main__(): from textwrap import dedent defn = dedent(""" def inc(): return x """) __main__ = sys.modules["__main__"] exec(compile(defn, "<test>", "exec"), __main__.__dict__) f = __main__.inc t = normalize_token(f) assert b"cloudpickle" in t
def test_use_cloudpickle_to_tokenize_functions_in__main__(): from textwrap import dedent defn = dedent(""" def inc(): return x """) __main__ = sys.modules['__main__'] exec(compile(defn, '<test>', 'exec'), __main__.__dict__) f = __main__.inc t = normalize_token(f) assert b'cloudpickle' in t
def test_use_cloudpickle_to_tokenize_functions_in__main__(): import sys from textwrap import dedent defn = dedent(""" def inc(): return x """) __main__ = sys.modules['__main__'] exec(compile(defn, '<test>', 'exec'), __main__.__dict__) f = __main__.inc t = normalize_token(f) assert b'__main__' not in t
def test_normalize_function(): def f1(a, b, c=1): pass cf1 = curry(f1) def f2(a, b=1, c=2): pass def f3(a): pass assert normalize_function(f2) == str(f2) f = lambda a: a assert normalize_function(f) == str(f) comp = compose(partial(f2, b=2), f3) assert normalize_function(comp) == ((str(f2), (), (('b', 2),)), str(f3)) assert normalize_function(cf1) == (str(f1), (), ()) assert normalize_function(cf1(2, c=2)) == (str(f1), (2,), (('c', 2),)) assert normalize_token(cf1) == normalize_function(cf1)
def normalize_estimator(est): """Normalize an estimator. Note: Since scikit-learn requires duck-typing, but not sub-typing from ``BaseEstimator``, we sometimes need to call this function directly.""" base = [type(est).__name__, normalize_token(est.get_params())] # fitted attributes: https://github.com/dask/dask-ml/issues/658 attrs = [x for x in dir(est) if x.endswith("_") and not x.startswith("_")] exclude = {"cv_results_", "model_history_", "history_", "refit_time_"} with warnings.catch_warnings(): warnings.simplefilter("ignore", FutureWarning) for attr in attrs: if attr in exclude: continue try: val = getattr(est, attr) except (sklearn.exceptions.NotFittedError, AttributeError): continue base.append(val) return tuple(base)
def __dask_tokenize__(self): from dask.base import normalize_token return normalize_token((type(self), self._value))
def __dask_tokenize__(self): from dask.base import normalize_token return normalize_token( (type(self), self.dims, self.dofs, self.properties) )
def tokenize_foo(self): return normalize_token((self.a, self.b))
def test_normalize_base(): for i in [1, 1.1, '1', slice(1, 2, 3)]: assert normalize_token(i) is i
def test_tokenize(): a = (1, 2, 3) b = {'a': 1, 'b': 2, 'c': 3} assert tokenize(a) == '4889c6ccd7099fc2fd19f4be468fcfa0' assert tokenize(a, b) == tokenize(normalize_token(a), normalize_token(b))
def test_normalize(): assert normalize_token((1, 2, 3)) == (1, 2, 3) assert normalize_token('a') == 'a' assert normalize_token({'a': 1, 'b': 2, 'c': 3}) ==\ (('a', 1), ('b', 2), ('c', 3))
def normalize_gridder_config_wrapper(gc): return normalize_token((gc.nx, gc.ny, gc.csx, gc.csy, gc.eps))
def normalize_BaseEstimator(est): return type(est).__name__, normalize_token(vars(est))
def test_normalize(): assert normalize_token((1, 2, 3)) == (1, 2, 3) assert normalize_token("a") == "a" assert normalize_token({"a": 1, "b": 2, "c": 3}) == (("a", 1), ("b", 2), ("c", 3))
def tokenize_sdoc(sdoc): return normalize_token((sdoc['args'], sdoc['kwargs']))
def normalize_estimator(est): """Normalize an estimator. Note: Since scikit-learn requires duck-typing, but not sub-typing from ``BaseEstimator``, we sometimes need to call this function directly.""" return type(est).__name__, normalize_token(est.get_params())
def test_tokenize(): a = (1, 2, 3) b = {"a": 1, "b": 2, "c": 3} assert tokenize(a) == "4889c6ccd7099fc2fd19f4be468fcfa0" assert tokenize(a, b) == tokenize(normalize_token(a), normalize_token(b))
def test_matrix(mats, sol): dsk = dict((('test', i), m) for i, m in enumerate(mats)) mat = dm.Matrix(dsk, 'test', 3) assert normalize_token(mat) == mat.name res = mat.compute() assert eq(res, sol)
def test_normalize_base(): for i in [1, long(1), 1.1, "1", slice(1, 2, 3)]: assert normalize_token(i) is i
def test_normalize_base(): for i in [1, 1.1, "1", slice(1, 2, 3), datetime.date(2021, 6, 25)]: assert normalize_token(i) is i
def normalize_masked_array(x): data = normalize_token(x.data) mask = normalize_token(x.mask) fill_value = normalize_token(x.fill_value) return (data, mask, fill_value)