def test_tokenize_method(): class Foo(object): def __init__(self, x): self.x = x def __dask_tokenize__(self): return self.x a, b = Foo(1), Foo(2) assert tokenize(a) == tokenize(a) assert tokenize(a) != tokenize(b) # dispatch takes precedence before = tokenize(a) normalize_token.register(Foo, lambda self: self.x + 1) after = tokenize(a) assert before != after
def test_tokenize_method(): class Foo: def __init__(self, x): self.x = x def __dask_tokenize__(self): return self.x a, b = Foo(1), Foo(2) assert tokenize(a) == tokenize(a) assert tokenize(a) != tokenize(b) # dispatch takes precedence before = tokenize(a) normalize_token.register(Foo, lambda self: self.x + 1) after = tokenize(a) assert before != after
def test_tokenize_method(): class Foo: def __init__(self, x): self.x = x def __dask_tokenize__(self): return self.x a, b = Foo(1), Foo(2) assert tokenize(a) == tokenize(a) assert tokenize(a) != tokenize(b) for ensure in [True, False]: with dask.config.set({"tokenize.ensure-deterministic": ensure}): assert tokenize(a) == tokenize(a) # dispatch takes precedence before = tokenize(a) normalize_token.register(Foo, lambda self: self.x + 1) after = tokenize(a) assert before != after
if all(dfs[i].divisions[-1] < dfs[i + 1].divisions[0] for i in range(len(dfs) - 1)): divisions = [] for df in dfs[:-1]: # remove last to concatenate with next divisions += df.divisions[:-1] divisions += dfs[-1].divisions return stack_partitions(dfs, divisions) elif interleave_partitions: return concat_indexed_dataframes(dfs) else: divisions = [None] * (sum([df.npartitions for df in dfs]) + 1) return stack_partitions(dfs, divisions) normalize_token.register(_Frame, lambda a: a._name) def query(df, expr, callenv): boolmask = gd.queryutils.query_execute(df, expr, callenv) selected = gd.Series(boolmask) newdf = gd.DataFrame() for col in df.columns: newseries = df[col][selected] newdf[col] = newseries return newdf class DataFrame(_Frame): _partition_type = gd.DataFrame
purposes. """ cf = 'CF'[self.flags.fnc] data_state = super(np.ma.MaskedArray, self).__reduce__()[2] return data_state + (np.ma.getmaskarray(self).tostring(cf), self._fill_value) else: from numpy.ma import MaskedArray # noqa # A singleton to indicate a missing parameter MISSING = type('MissingParameter', (object,), {'__slots__': (), '__reduce__': lambda self: 'MISSING', '__doc__': "A singleton to indicate a missing parameter"})() normalize_token.register(type(MISSING), lambda x: 'MISSING') # A singleton to indicate a failed estimator fit FIT_FAILURE = type('FitFailure', (object,), {'__slots__': (), '__reduce__': lambda self: 'FIT_FAILURE', '__doc__': "A singleton to indicate fit failure"})() def warn_fit_failure(error_score, e): warnings.warn("Classifier fit failed. The score on this train-test" " partition for these parameters will be set to %f. " "Details: \n%r" % (error_score, e), FitFailedWarning)
) else: from numpy.ma import MaskedArray # noqa # A singleton to indicate a missing parameter MISSING = type( "MissingParameter", (object,), { "__slots__": (), "__reduce__": lambda self: "MISSING", "__doc__": "A singleton to indicate a missing parameter", }, )() normalize_token.register(type(MISSING), lambda x: "MISSING") # A singleton to indicate a failed estimator fit FIT_FAILURE = type( "FitFailure", (object,), { "__slots__": (), "__reduce__": lambda self: "FIT_FAILURE", "__doc__": "A singleton to indicate fit failure", }, )() def warn_fit_failure(error_score, e):
from .utils import copy_estimator try: from sklearn.utils.fixes import MaskedArray except: # pragma: no cover from numpy.ma import MaskedArray # A singleton to indicate a missing parameter MISSING = type( 'MissingParameter', (object, ), { '__slots__': (), '__reduce__': lambda self: 'MISSING', '__doc__': "A singleton to indicate a missing parameter" })() normalize_token.register(type(MISSING), lambda x: 'MISSING') # A singleton to indicate a failed estimator fit FIT_FAILURE = type( 'FitFailure', (object, ), { '__slots__': (), '__reduce__': lambda self: 'FIT_FAILURE', '__doc__': "A singleton to indicate fit failure" })() def warn_fit_failure(error_score, e): warnings.warn( "Classifier fit failed. The score on this train-test" " partition for these parameters will be set to %f. " "Details: \n%r" % (error_score, e), FitFailedWarning)
values = [values] name = 'matrix-from-delayed-' + tokenize(*values) dsk = merge(v.dask for v in values) dsk.update(((name, i), v.key) for i, v in enumerate(values)) return Matrix(dsk, name, len(values), dtype, shape) def from_series(s): name = 'matrix-from-series-' + tokenize(s) dsk = dict(((name, i), (np.asarray, k)) for i, k in enumerate(s._keys())) dsk.update(s.dask) return Matrix(dsk, name, s.npartitions, s.dtype, (None, )) def from_array(arr): name = 'matrix-from-array-' + tokenize(arr) if arr.ndim == 2: if len(arr.chunks[1]) != 1: arr = arr.rechunk((arr.chunks[0], arr.shape[1])) keys = list(concat(arr._keys())) elif arr.ndim == 1: keys = arr._keys() else: raise ValueError("array must be 1 or 2 dimensional") dsk = dict(((name, i), k) for i, k in enumerate(keys)) dsk.update(arr.dask) return Matrix(dsk, name, len(keys), arr.dtype, arr.shape) normalize_token.register(Matrix, lambda mat: mat.name)
# Aggregate for j in range(split_out): b = '{0}-agg-{1}'.format(token or funcname(aggregate), token_key) conc = (sp.SparseFrame.vstack, [(a, depth, i, j) for i in range(k)]) if aggregate_kwargs: dsk[(b, j)] = (apply, aggregate, [conc], aggregate_kwargs) else: dsk[(b, j)] = (aggregate, conc) if meta is no_default: meta_chunk = _emulate(chunk, *args, **chunk_kwargs) meta = _emulate(aggregate, sp.SparseFrame.vstack([meta_chunk]), **aggregate_kwargs) for arg in args: if isinstance(arg, SparseFrame): dsk.update(arg.dask) divisions = [None] * (split_out + 1) return SparseFrame(dsk, b, meta, divisions) @get_parallel_type.register(SparseFrame) def get_parallel_type_distributed(o): return get_parallel_type(o._meta) normalize_token.register((SparseFrame, ), lambda a: a._name)
if data["type"] in data_client_ids: # Generic data - manually re-add client_id as it gets lost in the streaming join data.update( {"client_id": ClientId(data_client_ids[data["type"]])}) data["data"] = [ GenericData(data_type=DataType(data["type"]), data=d) for d in data["data"] ] _load_engine_data(engine=engine, data=data) engine.run_streaming(run_config_id=run_config_id) engine.end_streaming() # Register tokenization methods with dask for cls in Instrument.__subclasses__(): normalize_token.register(cls, func=cls.to_dict) @normalize_token.register(object) def nautilus_tokenize(o: object): return cloudpickle.dumps(o, protocol=pickle.DEFAULT_PROTOCOL) @normalize_token.register(ImportableStrategyConfig) def tokenize_strategy_config(config: ImportableStrategyConfig): return config.dict() @normalize_token.register(BacktestRunConfig) def tokenize_backtest_run_config(config: BacktestRunConfig): return config.__dict__