예제 #1
0
파일: test_base.py 프로젝트: fortizc/dask
def test_tokenize_method():
    class Foo(object):
        def __init__(self, x):
            self.x = x

        def __dask_tokenize__(self):
            return self.x

    a, b = Foo(1), Foo(2)
    assert tokenize(a) == tokenize(a)
    assert tokenize(a) != tokenize(b)

    # dispatch takes precedence
    before = tokenize(a)
    normalize_token.register(Foo, lambda self: self.x + 1)
    after = tokenize(a)
    assert before != after
예제 #2
0
def test_tokenize_method():
    class Foo:
        def __init__(self, x):
            self.x = x

        def __dask_tokenize__(self):
            return self.x

    a, b = Foo(1), Foo(2)
    assert tokenize(a) == tokenize(a)
    assert tokenize(a) != tokenize(b)

    # dispatch takes precedence
    before = tokenize(a)
    normalize_token.register(Foo, lambda self: self.x + 1)
    after = tokenize(a)
    assert before != after
예제 #3
0
파일: test_base.py 프로젝트: jakirkham/dask
def test_tokenize_method():
    class Foo:
        def __init__(self, x):
            self.x = x

        def __dask_tokenize__(self):
            return self.x

    a, b = Foo(1), Foo(2)
    assert tokenize(a) == tokenize(a)
    assert tokenize(a) != tokenize(b)

    for ensure in [True, False]:
        with dask.config.set({"tokenize.ensure-deterministic": ensure}):
            assert tokenize(a) == tokenize(a)

    # dispatch takes precedence
    before = tokenize(a)
    normalize_token.register(Foo, lambda self: self.x + 1)
    after = tokenize(a)
    assert before != after
예제 #4
0
파일: core.py 프로젝트: tym1062/dask-cudf
        if all(dfs[i].divisions[-1] < dfs[i + 1].divisions[0]
                for i in range(len(dfs) - 1)):
            divisions = []
            for df in dfs[:-1]:
                # remove last to concatenate with next
                divisions += df.divisions[:-1]
            divisions += dfs[-1].divisions
            return stack_partitions(dfs, divisions)
    elif interleave_partitions:
        return concat_indexed_dataframes(dfs)
    else:
        divisions = [None] * (sum([df.npartitions for df in dfs]) + 1)
        return stack_partitions(dfs, divisions)


normalize_token.register(_Frame, lambda a: a._name)


def query(df, expr, callenv):
    boolmask = gd.queryutils.query_execute(df, expr, callenv)

    selected = gd.Series(boolmask)
    newdf = gd.DataFrame()
    for col in df.columns:
        newseries = df[col][selected]
        newdf[col] = newseries
    return newdf


class DataFrame(_Frame):
    _partition_type = gd.DataFrame
예제 #5
0
파일: methods.py 프로젝트: dask/dask-learn
            purposes.

            """
            cf = 'CF'[self.flags.fnc]
            data_state = super(np.ma.MaskedArray, self).__reduce__()[2]
            return data_state + (np.ma.getmaskarray(self).tostring(cf),
                                 self._fill_value)
else:
    from numpy.ma import MaskedArray    # noqa

# A singleton to indicate a missing parameter
MISSING = type('MissingParameter', (object,),
               {'__slots__': (),
                '__reduce__': lambda self: 'MISSING',
                '__doc__': "A singleton to indicate a missing parameter"})()
normalize_token.register(type(MISSING), lambda x: 'MISSING')


# A singleton to indicate a failed estimator fit
FIT_FAILURE = type('FitFailure', (object,),
                   {'__slots__': (),
                    '__reduce__': lambda self: 'FIT_FAILURE',
                    '__doc__': "A singleton to indicate fit failure"})()


def warn_fit_failure(error_score, e):
    warnings.warn("Classifier fit failed. The score on this train-test"
                  " partition for these parameters will be set to %f. "
                  "Details: \n%r" % (error_score, e), FitFailedWarning)

예제 #6
0
파일: methods.py 프로젝트: mmccarty/dask-ml
            )

else:
    from numpy.ma import MaskedArray  # noqa

# A singleton to indicate a missing parameter
MISSING = type(
    "MissingParameter",
    (object,),
    {
        "__slots__": (),
        "__reduce__": lambda self: "MISSING",
        "__doc__": "A singleton to indicate a missing parameter",
    },
)()
normalize_token.register(type(MISSING), lambda x: "MISSING")


# A singleton to indicate a failed estimator fit
FIT_FAILURE = type(
    "FitFailure",
    (object,),
    {
        "__slots__": (),
        "__reduce__": lambda self: "FIT_FAILURE",
        "__doc__": "A singleton to indicate fit failure",
    },
)()


def warn_fit_failure(error_score, e):
예제 #7
0
from .utils import copy_estimator

try:
    from sklearn.utils.fixes import MaskedArray
except:  # pragma: no cover
    from numpy.ma import MaskedArray

# A singleton to indicate a missing parameter
MISSING = type(
    'MissingParameter', (object, ), {
        '__slots__': (),
        '__reduce__': lambda self: 'MISSING',
        '__doc__': "A singleton to indicate a missing parameter"
    })()
normalize_token.register(type(MISSING), lambda x: 'MISSING')

# A singleton to indicate a failed estimator fit
FIT_FAILURE = type(
    'FitFailure', (object, ), {
        '__slots__': (),
        '__reduce__': lambda self: 'FIT_FAILURE',
        '__doc__': "A singleton to indicate fit failure"
    })()


def warn_fit_failure(error_score, e):
    warnings.warn(
        "Classifier fit failed. The score on this train-test"
        " partition for these parameters will be set to %f. "
        "Details: \n%r" % (error_score, e), FitFailedWarning)
예제 #8
0
        values = [values]
    name = 'matrix-from-delayed-' + tokenize(*values)
    dsk = merge(v.dask for v in values)
    dsk.update(((name, i), v.key) for i, v in enumerate(values))
    return Matrix(dsk, name, len(values), dtype, shape)


def from_series(s):
    name = 'matrix-from-series-' + tokenize(s)
    dsk = dict(((name, i), (np.asarray, k)) for i, k in enumerate(s._keys()))
    dsk.update(s.dask)
    return Matrix(dsk, name, s.npartitions, s.dtype, (None, ))


def from_array(arr):
    name = 'matrix-from-array-' + tokenize(arr)
    if arr.ndim == 2:
        if len(arr.chunks[1]) != 1:
            arr = arr.rechunk((arr.chunks[0], arr.shape[1]))
        keys = list(concat(arr._keys()))
    elif arr.ndim == 1:
        keys = arr._keys()
    else:
        raise ValueError("array must be 1 or 2 dimensional")
    dsk = dict(((name, i), k) for i, k in enumerate(keys))
    dsk.update(arr.dask)
    return Matrix(dsk, name, len(keys), arr.dtype, arr.shape)


normalize_token.register(Matrix, lambda mat: mat.name)
예제 #9
0
    # Aggregate
    for j in range(split_out):
        b = '{0}-agg-{1}'.format(token or funcname(aggregate), token_key)
        conc = (sp.SparseFrame.vstack, [(a, depth, i, j) for i in range(k)])
        if aggregate_kwargs:
            dsk[(b, j)] = (apply, aggregate, [conc], aggregate_kwargs)
        else:
            dsk[(b, j)] = (aggregate, conc)

    if meta is no_default:
        meta_chunk = _emulate(chunk, *args, **chunk_kwargs)
        meta = _emulate(aggregate, sp.SparseFrame.vstack([meta_chunk]),
                        **aggregate_kwargs)

    for arg in args:
        if isinstance(arg, SparseFrame):
            dsk.update(arg.dask)

    divisions = [None] * (split_out + 1)

    return SparseFrame(dsk, b, meta, divisions)


@get_parallel_type.register(SparseFrame)
def get_parallel_type_distributed(o):
    return get_parallel_type(o._meta)


normalize_token.register((SparseFrame, ), lambda a: a._name)
예제 #10
0
            if data["type"] in data_client_ids:
                # Generic data - manually re-add client_id as it gets lost in the streaming join
                data.update(
                    {"client_id": ClientId(data_client_ids[data["type"]])})
                data["data"] = [
                    GenericData(data_type=DataType(data["type"]), data=d)
                    for d in data["data"]
                ]
            _load_engine_data(engine=engine, data=data)
        engine.run_streaming(run_config_id=run_config_id)
    engine.end_streaming()


# Register tokenization methods with dask
for cls in Instrument.__subclasses__():
    normalize_token.register(cls, func=cls.to_dict)


@normalize_token.register(object)
def nautilus_tokenize(o: object):
    return cloudpickle.dumps(o, protocol=pickle.DEFAULT_PROTOCOL)


@normalize_token.register(ImportableStrategyConfig)
def tokenize_strategy_config(config: ImportableStrategyConfig):
    return config.dict()


@normalize_token.register(BacktestRunConfig)
def tokenize_backtest_run_config(config: BacktestRunConfig):
    return config.__dict__