Beispiel #1
0
def compute_up(expr, data, **kwargs):
    leaf = expr._leaves()[0]
    chunk = symbol(
        'chunk',
        DataShape(*(tuple(map(first, data.chunks)) + (leaf.dshape.measure, ))))
    (chunk, chunk_expr), (agg, agg_expr) = split(expr._child,
                                                 expr,
                                                 chunk=chunk)

    inds = tuple(range(ndim(leaf)))
    dtype = expr.dshape.measure.to_numpy_dtype()
    tmp = atop(
        curry(compute_it, chunk_expr, [chunk], **kwargs),
        inds,
        data,
        inds,
        dtype=dtype,
    )

    return atop(
        compose(
            curry(compute_it, agg_expr, [agg], **kwargs),
            curry(_concatenate2, axes=expr.axis),
        ),
        tuple(i for i in inds if i not in expr.axis),
        tmp,
        inds,
        dtype=dtype,
    )
Beispiel #2
0
def test_introspect_curry_py3():
    if not PY3:
        return
    f = toolz.curry(make_func(''))
    assert num_required_args(f) == 0
    assert is_arity(0, f)
    assert has_varargs(f) is False
    assert has_keywords(f) is False

    f = toolz.curry(make_func('x'))
    assert num_required_args(f) == 0
    assert is_arity(0, f) is False
    assert is_arity(1, f) is False
    assert has_varargs(f) is False
    assert has_keywords(f)  # A side-effect of being curried

    f = toolz.curry(make_func('x, y, z=0'))
    assert num_required_args(f) == 0
    assert is_arity(0, f) is False
    assert is_arity(1, f) is False
    assert is_arity(2, f) is False
    assert is_arity(3, f) is False
    assert has_varargs(f) is False
    assert has_keywords(f)

    f = toolz.curry(make_func('*args, **kwargs'))
    assert num_required_args(f) == 0
    assert has_varargs(f)
    assert has_keywords(f)
def test_introspect_curry_py3():
    if not PY3:
        return
    f = toolz.curry(make_func(''))
    assert num_required_args(f) == 0
    assert is_arity(0, f)
    assert has_varargs(f) is False
    assert has_keywords(f) is False

    f = toolz.curry(make_func('x'))
    assert num_required_args(f) == 0
    assert is_arity(0, f) is False
    assert is_arity(1, f) is False
    assert has_varargs(f) is False
    assert has_keywords(f)  # A side-effect of being curried

    f = toolz.curry(make_func('x, y, z=0'))
    assert num_required_args(f) == 0
    assert is_arity(0, f) is False
    assert is_arity(1, f) is False
    assert is_arity(2, f) is False
    assert is_arity(3, f) is False
    assert has_varargs(f) is False
    assert has_keywords(f)

    f = toolz.curry(make_func('*args, **kwargs'))
    assert num_required_args(f) == 0
    assert has_varargs(f)
    assert has_keywords(f)
Beispiel #4
0
def load_objects_from_storage(metadata_filename: str,
                              class_factory: GenericObject,
                              root_directory: Path) -> dict:
    return excepting_pipe(list_files(root_directory, metadata_filename),
                          curry(map)(load_json),
                          curry(map)(class_factory.from_json),
                          curry(map)(lambda _: (_.id, _)), dict)
Beispiel #5
0
def compute_up(expr, data, **kwargs):
    leaf = expr._leaves()[0]
    chunk = symbol('chunk', DataShape(*(tuple(map(first, data.chunks)) +
                                        (leaf.dshape.measure,))))
    (chunk, chunk_expr), (agg, agg_expr) = split(expr._child, expr,
                                                 chunk=chunk)

    inds = tuple(range(ndim(leaf)))
    dtype = expr.dshape.measure.to_numpy_dtype()
    tmp = atop(
        curry(compute_it, chunk_expr, [chunk], **kwargs),
        inds,
        data,
        inds,
        dtype=dtype,
    )

    return atop(
        compose(
            curry(compute_it, agg_expr, [agg], **kwargs),
            curry(_concatenate2, axes=expr.axis),
        ),
        tuple(i for i in inds if i not in expr.axis),
        tmp,
        inds,
        dtype=dtype,
    )
Beispiel #6
0
    def fold(self, binop, combine=None, initial=no_default, split_every=None):
        """ Parallelizable reduction

        Fold is like the builtin function ``reduce`` except that it works in
        parallel.  Fold takes two binary operator functions, one to reduce each
        partition of our dataset and another to combine results between
        partitions

        1.  ``binop``: Binary operator to reduce within each partition
        2.  ``combine``:  Binary operator to combine results from binop

        Sequentially this would look like the following:

        >>> intermediates = [reduce(binop, part) for part in partitions]  # doctest: +SKIP
        >>> final = reduce(combine, intermediates)  # doctest: +SKIP

        If only one function is given then it is used for both functions
        ``binop`` and ``combine`` as in the following example to compute the
        sum:

        >>> def add(x, y):
        ...     return x + y

        >>> b = from_sequence(range(5))
        >>> b.fold(add).compute()  # doctest: +SKIP
        10

        In full form we provide both binary operators as well as their default
        arguments

        >>> b.fold(binop=add, combine=add, initial=0).compute()  # doctest: +SKIP
        10

        More complex binary operators are also doable

        >>> def add_to_set(acc, x):
        ...     ''' Add new element x to set acc '''
        ...     return acc | set([x])
        >>> b.fold(add_to_set, set.union, initial=set()).compute()  # doctest: +SKIP
        {1, 2, 3, 4, 5}

        See Also
        --------

        Bag.foldby
        """
        token = tokenize(self, binop, combine, initial)
        combine = combine or binop
        a = 'foldbinop-{0}-{1}'.format(funcname(binop), token)
        b = 'foldcombine-{0}-{1}'.format(funcname(combine), token)
        initial = quote(initial)
        if initial is not no_default:
            return self.reduction(curry(_reduce, binop, initial=initial),
                                  curry(_reduce, combine),
                                  split_every=split_every)
        else:
            from toolz.curried import reduce
            return self.reduction(reduce(binop), reduce(combine),
                                  split_every=split_every)
Beispiel #7
0
def make_fold_vectorize(complexity=3, nbits=15, fold=None, boundaries=None):
    """Curry parameters in vectorizer."""
    vec = Vectorizer(complexity=complexity, nbits=nbits)
    vectorize = curry(lambda vec, graphs: vec.transform(graphs))(vec)

    cwindow_reweight = curry(_window_reweight)(boundaries)
    fold_vectorize = compose(vectorize, map(cwindow_reweight), fold)
    return fold_vectorize
Beispiel #8
0
    def fold(self, binop, combine=None, initial=no_default, split_every=None):
        """ Parallelizable reduction

        Fold is like the builtin function ``reduce`` except that it works in
        parallel.  Fold takes two binary operator functions, one to reduce each
        partition of our dataset and another to combine results between
        partitions

        1.  ``binop``: Binary operator to reduce within each partition
        2.  ``combine``:  Binary operator to combine results from binop

        Sequentially this would look like the following:

        >>> intermediates = [reduce(binop, part) for part in partitions]  # doctest: +SKIP
        >>> final = reduce(combine, intermediates)  # doctest: +SKIP

        If only one function is given then it is used for both functions
        ``binop`` and ``combine`` as in the following example to compute the
        sum:

        >>> def add(x, y):
        ...     return x + y

        >>> b = from_sequence(range(5))
        >>> b.fold(add).compute()  # doctest: +SKIP
        10

        In full form we provide both binary operators as well as their default
        arguments

        >>> b.fold(binop=add, combine=add, initial=0).compute()  # doctest: +SKIP
        10

        More complex binary operators are also doable

        >>> def add_to_set(acc, x):
        ...     ''' Add new element x to set acc '''
        ...     return acc | set([x])
        >>> b.fold(add_to_set, set.union, initial=set()).compute()  # doctest: +SKIP
        {1, 2, 3, 4, 5}

        See Also
        --------

        Bag.foldby
        """
        combine = combine or binop
        initial = quote(initial)
        if initial is not no_default:
            return self.reduction(curry(_reduce, binop, initial=initial),
                                  curry(_reduce, combine),
                                  split_every=split_every)
        else:
            from toolz.curried import reduce
            return self.reduction(reduce(binop),
                                  reduce(combine),
                                  split_every=split_every)
Beispiel #9
0
def getgraphs(aid):
    if aid == 'bursi':
        return list(gspan.gspan_to_eden("bursi.pos.gspan")), list(
            gspan.gspan_to_eden("bursi.neg.gspan"))
    download_active = curry(download)(active=True, stepsize=50)
    download_inactive = curry(download)(active=False, stepsize=50)
    active = pipe(aid, download_active, sdf_to_nx, list)
    inactive = pipe(aid, download_inactive, sdf_to_nx, list)
    return active, inactive
Beispiel #10
0
    def test_urlzsource(self):
        lines4 = []
        with URLZSource('http://www.google.com/robots.txt').open() as f:
            take_and_rstrip = compose(curry(map, lambda l: rstrip(l, '\n')),
                                      curry(take, 4))
            lines4 = list(take_and_rstrip(f))

        print(str(lines4))
        self.assertGreaterEqual(len(lines4), 1,
                                "Failed to get more than 0 lines")
Beispiel #11
0
def collect_ast():
    type_spec: dict = collections.defaultdict(set)

    def is_python(path: str):
        return path.endswith('.py') or Path(path).is_dir()

    all_python_files = linq.Flow(
        Path(Redy.__file__).parent().collect(is_python)).concat(
            Path(flask.__file__).parent().collect(is_python))._

    for each in all_python_files:
        with each.open('r', encoding='utf8') as file:
            try:
                ast_of_src_code = ast.parse(file.read())
                service = CollectASTTypeStub(type_spec)
                feature(service).just_apply_ast_transformation(ast_of_src_code)
            except SyntaxError:
                # other py version
                pass

    def snd(tp):
        return tp[1]

    stub_code = \
    (linq.Flow(type_spec)
         .map(lambda class_name, fields:
                linq.Flow(fields)
                    .group_by(lambda fst, snd: fst)
                    .map(lambda field_name, pairs:
                            '{}: {}'.format(
                            field_name,
                            compose(
                                str,
                                curry(reduce)(lambda a, b: a.union(b)),
                                curry(map)(snd))(pairs)))
                    .then(
                        compose(
                            'class {}(AST):\n'.format(class_name).__add__,
                            lambda _: textwrap.indent(_, " " * 4),
                            lambda any_code: any_code if any_code else 'pass',
                            '\n'.join))
                    ._)
         .then(
            compose(
                'import typing, abc\nNoneType = None\n'.__add__,
                'class AST(abc.ABC):\n    def __init__(self, *args, lineno: int=None, colno: int=None, **kwargs): pass\n'.__add__,
                '\n'.join
                )))._

    with Path('./').into('ast.pyi').open('w', encoding='utf8') as stub:
        stub.write(stub_code)

    with Path(ast.__file__).parent().into('ast.pyi').open(
            'w', encoding='utf8') as stub:
        stub.write(stub_code)
Beispiel #12
0
 def list_snapshots_filtered(self, model_id: UUID, git_commit_id: str,
                             query: dict) -> Mapping[UUID, ModelSnapshot]:
     return excepting_pipe(
         self._snapshots.values(),
         curry(filter)(lambda v: v.model.id == ensure_uuid(model_id)),
         curry(filter)(lambda v: v.model_git_commit == git_commit_id),
         curry(sorted,
               key=lambda snap: getattr(snap, query['sortby']),
               reverse=query['order']),
         curry(partition_all)(query['limit'] if query['limit'] > 0 else len(
             self._snapshots.values())), list,
         curry(get, default=[])(query['offset']))
Beispiel #13
0
 def list_results(self, query: dict) -> Iterator[Result]:
     """
     List all list_results
     :param query:
     :return:
     """
     return excepting_pipe(
         self._results.values(),
         curry(sorted,
               key=lambda x: getattr(x, query['sortby']),
               reverse=query['order']),
         curry(partition_all)(query['limit'] if query['limit'] > 0 else len(
             self._results.values())), list,
         curry(get, default=[])(query['offset']))
Beispiel #14
0
def test_normalize_function():
    def f1(a, b, c=1):
        pass
    def f2(a, b=1, c=2):
        pass
    def f3(a):
        pass

    assert normalize_function(f2)

    f = lambda a: a
    assert normalize_function(f)

    assert (normalize_function(partial(f2, b=2)) ==
            normalize_function(partial(f2, b=2)))

    assert (normalize_function(partial(f2, b=2)) !=
            normalize_function(partial(f2, b=3)))

    assert (normalize_function(partial(f1, b=2)) !=
            normalize_function(partial(f2, b=2)))

    assert (normalize_function(compose(f2, f3)) ==
            normalize_function(compose(f2, f3)))

    assert (normalize_function(compose(f2, f3)) !=
            normalize_function(compose(f2, f1)))

    assert normalize_function(curry(f2)) == normalize_function(curry(f2))
    assert normalize_function(curry(f2)) != normalize_function(curry(f1))
    assert (normalize_function(curry(f2, b=1)) ==
            normalize_function(curry(f2, b=1)))
    assert (normalize_function(curry(f2, b=1)) !=
            normalize_function(curry(f2, b=2)))
Beispiel #15
0
def graph_embed(data,
                target=None,
                confidence=1,
                n_iter=20,
                sample_fraction=.7,
                sample_p=None,
                feature_fraction=1,
                feature_p=None,
                alpha=0,
                gamma=1,
                beta=30):
    """Provide 2D embedding of high dimensional data."""
    # set parameters in all functions
    if sample_p is not None:
        sample_p = sample_p / np.sum(sample_p)
        sample_p = sample_p + 0.01
        sample_p = sample_p / np.sum(sample_p)
    if feature_p is not None:
        feature_p = feature_p / np.sum(feature_p)
        feature_p = feature_p + 0.01
        feature_p = feature_p / np.sum(feature_p)
    _sample = curry(sample)(sample_fraction=sample_fraction,
                            sample_p=sample_p,
                            feature_fraction=feature_fraction,
                            feature_p=feature_p)
    _step_func = curry(step_func)(alpha=alpha, gamma=gamma, beta=beta)
    _add_edge_length = curry(add_edge_length)(func=_step_func)
    _make_knn_graph = curry(make_knn_graph)(size=len(data),
                                            target=target,
                                            confidence=confidence)

    def make_graph(make_tree):
        count_dict = defaultdict(lambda: defaultdict(int))
        lengths_dict = defaultdict(lambda: defaultdict(list))
        for i in range(n_iter):
            sample_data, sample_ids, sample_feature_ids = _sample(data)
            sample_id_inv_map = make_id_map(sample_ids)
            distance_mtx = euclidean_distances(sample_data)
            rank_mtx = compute_ranks(distance_mtx)
            tree = make_tree(distance_mtx, sample_ids)
            _add_edge_length(tree, rank_mtx, sample_id_inv_map)
            update(count_dict, lengths_dict, tree)
        graph = _make_knn_graph(count_dict, lengths_dict)
        return graph

    mst_graph = make_graph(make_mst_tree)
    qks_graph = make_graph(make_qks_tree)
    graph = nx.compose(mst_graph, qks_graph)
    return graph
Beispiel #16
0
 def list_snapshots(self, query: dict) -> Iterator[ModelSnapshot]:
     """
     List all datasets
     :param query:
     :return:
     """
     # TODO: Not completely pure pipeline
     return excepting_pipe(
         self._snapshots.values(),
         curry(sorted,
               key=lambda snap: getattr(snap, query['sortby']),
               reverse=query['order']),
         curry(partition_all)(query['limit'] if query['limit'] > 0 else len(
             self._snapshots.values())), list,
         curry(get, default=[])(query['offset']))
Beispiel #17
0
def test_EqualityHashKey_callable_key():
    # Common simple hash key functions.
    EqualityHashLen = curry(EqualityHashKey, len)
    EqualityHashType = curry(EqualityHashKey, type)
    EqualityHashId = curry(EqualityHashKey, id)
    EqualityHashFirst = curry(EqualityHashKey, first)
    data1 = [[], [1], (), (1, ), {}, {1: 2}]
    data2 = [[1, 2], (1, 2), (1, 3), [1, 3], [2, 1], {1: 2}]
    assert list(unique(data1 * 3, key=EqualityHashLen)) == data1
    assert list(unique(data2 * 3, key=EqualityHashLen)) == data2
    assert list(unique(data1 * 3, key=EqualityHashType)) == data1
    assert list(unique(data2 * 3, key=EqualityHashType)) == data2
    assert list(unique(data1 * 3, key=EqualityHashId)) == data1
    assert list(unique(data2 * 3, key=EqualityHashId)) == data2
    assert list(unique(data2 * 3, key=EqualityHashFirst)) == data2
Beispiel #18
0
def test_EqualityHashKey_callable_key():
    # Common simple hash key functions.
    EqualityHashLen = curry(EqualityHashKey, len)
    EqualityHashType = curry(EqualityHashKey, type)
    EqualityHashId = curry(EqualityHashKey, id)
    EqualityHashFirst = curry(EqualityHashKey, first)
    data1 = [[], [1], (), (1,), {}, {1: 2}]
    data2 = [[1, 2], (1, 2), (1, 3), [1, 3], [2, 1], {1: 2}]
    assert list(unique(data1*3, key=EqualityHashLen)) == data1
    assert list(unique(data2*3, key=EqualityHashLen)) == data2
    assert list(unique(data1*3, key=EqualityHashType)) == data1
    assert list(unique(data2*3, key=EqualityHashType)) == data2
    assert list(unique(data1*3, key=EqualityHashId)) == data1
    assert list(unique(data2*3, key=EqualityHashId)) == data2
    assert list(unique(data2*3, key=EqualityHashFirst)) == data2
Beispiel #19
0
def test_check_curry_1():
    # Ensure the decorator works when curried
    _func_sig = check(REQUIRE, data=[str, str])(func_sig)
    fc = toolz.curry(_func_sig,
                     **{REQUIRE: {'data': ['nir', 'red']},
                        OUTPUT: {'data': ['ndvi']}})
    fc({})
Beispiel #20
0
def internal_tessssst_oneclass():
    # python -c "import score as s; s.internal_tessssst_oneclass()"
    # lets get sum data
    from toolz import curry, pipe
    from eden_chem.io.pubchem import download
    from eden_chem.io.rdkitutils import sdf_to_nx
    download_active = curry(download)(active=True)
    download_inactive = curry(download)(active=False)

    def get_pos_graphs(assay_id):
        return pipe(assay_id, download_active, sdf_to_nx, list)

    assay_id = '624249'
    gr = get_pos_graphs(assay_id)
    est = OneClassEstimator().fit(gr)
    print(est.decision_function(gr))
Beispiel #21
0
 def versions(self, dataset_id: UUID,
              query: dict) -> Iterator[DatasetVersion]:
     """
     Lists all versions of a given dataset
     :param dataset_id:
     :param query:
     :return:
     """
     # TODO: Not completely pure pipeline
     return excepting_pipe(
         self._versions.values(),
         curry(filter)(lambda v: v.parent_id == ensure_uuid(dataset_id)),
         curry(sorted,
               key=lambda ds: getattr(ds, query['sortby']),
               reverse=query['order']), partition_versions(query), list,
         curry(get, default=[])(query['offset']))
Beispiel #22
0
def reduction(x, chunk, aggregate, axis=None, keepdims=None, dtype=None):
    """ General version of reductions

    >>> reduction(my_array, np.sum, np.sum, axis=0, keepdims=False)  # doctest: +SKIP
    """
    if axis is None:
        axis = tuple(range(x.ndim))
    if isinstance(axis, int):
        axis = (axis,)

    chunk2 = partial(chunk, axis=axis, keepdims=True)
    aggregate2 = partial(aggregate, axis=axis, keepdims=keepdims)

    inds = tuple(range(x.ndim))
    tmp = atop(chunk2, next(names), inds, x, inds)

    inds2 = tuple(i for i in inds if i not in axis)

    result = atop(compose(aggregate2, curry(_concatenate2, axes=axis)),
                  next(names), inds2, tmp, inds, dtype=dtype)

    if keepdims:
        dsk = result.dask.copy()
        for k in flatten(result._keys()):
            k2 = (k[0],) + insert_many(k[1:], axis, 0)
            dsk[k2] = dsk.pop(k)
        blockdims = insert_many(result.blockdims, axis, [1])
        return Array(dsk, result.name, blockdims=blockdims, dtype=dtype)
    else:
        return result
Beispiel #23
0
def elemwise_array(expr, *data, **kwargs):
    leaves = expr._inputs
    expr_inds = tuple(range(ndim(expr)))[::-1]
    return atop(curry(compute_it, expr, leaves, **kwargs),
                expr_inds,
                *concat((dat, tuple(range(ndim(dat))[::-1])) for dat in data),
                dtype=expr.dshape.measure.to_numpy_dtype())
Beispiel #24
0
 def get(self):
     # type: () -> PluginType
     """Return the currently active plugin."""
     if self._options:
         return curry(self._active, **self._options)
     else:
         return self._active
Beispiel #25
0
def test_EqualityHashKey_default_key():
    EqualityHashDefault = curry(EqualityHashKey, None)
    L1 = [1]
    L2 = [2]
    data1 = [L1, L1, L2, [], [], [1], [2], {}, ()]
    set1 = set(map(EqualityHashDefault, data1))
    set2 = set(map(EqualityHashDefault, [[], [1], [2], {}, ()]))
    assert set1 == set2
    assert len(set1) == 5

    # Test that ``EqualityHashDefault(item)`` is distinct from ``item``
    T0 = ()
    T1 = (1,)
    data2 = list(map(EqualityHashDefault, [T0, T0, T1, T1, (), (1,)]))
    data2.extend([T0, T1, (), (1,)])
    set3 = set(data2)
    assert set3 == set([(), (1,), EqualityHashDefault(()),
                        EqualityHashDefault((1,))])
    assert len(set3) == 4
    assert EqualityHashDefault(()) in set3
    assert EqualityHashDefault((1,)) in set3

    # Miscellaneous
    E1 = EqualityHashDefault(L1)
    E2 = EqualityHashDefault(L2)
    assert str(E1) == '=[1]='
    assert repr(E1) == '=[1]='
    assert E1 != E2
    assert not (E1 == E2)
    assert E1 == EqualityHashDefault(L1)
    assert not (E1 != EqualityHashDefault(L1))
    assert E1 != L1
    assert not (E1 == L1)
def main(haunted_place_file, cache_file, output_file):

    load_dotenv(find_dotenv())

    google_api_key = os.environ.get("GOOGLE_MAPS_API_KEY")

    geo_cache = set()
    if cache_file:
        geo_cache = load_geo_cache(csv.reader(cache_file))

    haunted_places = pd.read_csv(haunted_place_file)

    null_state = haunted_places.state.isnull()
    null_city = haunted_places.city.isnull()

    haunted_place_locations = haunted_places.loc[
        ~null_state & ~null_city,
        ['state','city']
    ].drop_duplicates()

    create_geo_request = curry(_create_geo_request)(google_api_key)

    writer = csv.writer(output_file)

    for _,row in haunted_place_locations.iterrows():
        # Skip if it's already in the cache.
        if (row["state"], row["city"]) in geo_cache: continue

        # Otherwise write the row.
        writer.writerow([
            row["state"],
            row["city"],
            create_geo_request(row["state"], row["city"])
        ])
Beispiel #27
0
def reduction(x, chunk, aggregate, axis=None, keepdims=None, dtype=None):
    """ General version of reductions

    >>> reduction(my_array, np.sum, np.sum, axis=0, keepdims=False)  # doctest: +SKIP
    """
    if axis is None:
        axis = tuple(range(x.ndim))
    if isinstance(axis, int):
        axis = (axis, )

    chunk2 = partial(chunk, axis=axis, keepdims=True)
    aggregate2 = partial(aggregate, axis=axis, keepdims=keepdims)

    inds = tuple(range(x.ndim))
    tmp = atop(chunk2, next(names), inds, x, inds)

    inds2 = tuple(i for i in inds if i not in axis)

    result = atop(compose(aggregate2, curry(_concatenate2, axes=axis)),
                  next(names),
                  inds2,
                  tmp,
                  inds,
                  dtype=dtype)

    if keepdims:
        dsk = result.dask.copy()
        for k in flatten(result._keys()):
            k2 = (k[0], ) + insert_many(k[1:], axis, 0)
            dsk[k2] = dsk.pop(k)
        blockdims = insert_many(result.blockdims, axis, [1])
        return Array(dsk, result.name, blockdims=blockdims, dtype=dtype)
    else:
        return result
Beispiel #28
0
def wrap_func_size_as_kwarg(func, *args, **kwargs):
    """
    Transform np.random function into blocked version
    """
    if 'shape' in kwargs and 'size' not in kwargs:
        kwargs['size'] = kwargs.pop('shape')
    if 'size' not in kwargs:
        args, size = args[:-1], args[-1]
    else:
        size = kwargs.pop('size')

    if not isinstance(size, (tuple, list)):
        size = (size,)

    blockshape = kwargs.pop('blockshape', None)
    blockdims = kwargs.pop('blockdims', None)
    name = kwargs.pop('name', None)
    if not blockdims and blockshape:
        blockdims = blockdims_from_blockshape(size, blockshape)

    name = name or next(names)

    keys = product([name], *[range(len(bd)) for bd in blockdims])
    sizes = product(*blockdims)
    if not kwargs:
        vals = ((func,) + args + (size,) for size in sizes)
    else:
        vals = ((curry(func, *args, size=size, **kwargs),) for size in sizes)

    dsk = dict(zip(keys, vals))
    return Array(dsk, name, shape=size, blockdims=blockdims)
Beispiel #29
0
 def draw_target(self):
     '''Usage:aimmat.draw_fps()(center)'''
     def draw(center, img):
         center = (int(center[0]), int(center[1]))
         cv2.circle(img, center, 5, (50, 200, 200), -1)
         return img
     return curry(draw)
Beispiel #30
0
def load_all_users():
    ''' Returns a pd.DataFrame with the information of all the users'''
    map = tlz.curry(map)
    dataset = tlz.pipe(users, map(parse_exp03_filename), map(user_pipe),
                       accumulate_users)
    dataset.insert(0, 'user', sorted(users * 3))
    return dataset
Beispiel #31
0
def compute_down(expr, data, chunksize=2**20, map=map, **kwargs):
    leaf = expr._leaves()[0]

    # If the bottom expression is a projection or field then want to do
    # compute_up first
    children = set(e for e in expr._traverse()
                   if isinstance(e, Expr)
                   and any(i is expr._leaves()[0] for i in e._inputs))
    if len(children) == 1 and isinstance(first(children), (Field, Projection)):
        raise NotImplementedError()


    chunk = symbol('chunk', chunksize * leaf.schema)
    (chunk, chunk_expr), (agg, agg_expr) = split(leaf, expr, chunk=chunk)

    data_parts = partitions(data, chunksize=(chunksize,))

    parts = list(map(curry(compute_chunk, data, chunk, chunk_expr),
                           data_parts))

    if isinstance(parts[0], np.ndarray):
        intermediate = np.concatenate(parts)
    elif isinstance(parts[0], pd.DataFrame):
        intermediate = pd.concat(parts)
    elif isinstance(parts[0], Iterable):
        intermediate = list(concat(parts))
    else:
        raise TypeError(
        "Don't know how to concatenate objects of type %s" % type(parts[0]))

    return compute(agg_expr, {agg: intermediate})
Beispiel #32
0
 def map(self, func):
     name = next(names)
     if takes_multiple_arguments(func):
         func = curry(apply, func)
     dsk = dict(((name, i), (list, (map, func, (self.name, i))))
                     for i in range(self.npartitions))
     return Bag(merge(self.dask, dsk), name, self.npartitions)
Beispiel #33
0
def wrap_func_shape_as_first_arg(func, *args, **kwargs):
    """
    Transform np.random function into blocked version
    """
    if 'shape' not in kwargs:
        shape, args = args[0], args[1:]
    else:
        shape = kwargs.pop('shape')

    if not isinstance(shape, (tuple, list)):
        shape = (shape,)

    chunks = kwargs.pop('chunks', None)
    chunks = normalize_chunks(chunks, shape)
    name = kwargs.pop('name', None)

    dtype = kwargs.pop('dtype', None)
    if dtype is None:
        dtype = func(shape, *args, **kwargs).dtype

    name = name or next(names)

    keys = product([name], *[range(len(bd)) for bd in chunks])
    shapes = product(*chunks)
    func = curry(func, dtype=dtype, **kwargs)
    vals = ((func,) + (s,) + args for s in shapes)

    dsk = dict(zip(keys, vals))
    return Array(dsk, name, chunks, dtype=dtype)
Beispiel #34
0
def wrap_func_size_as_kwarg(func, *args, **kwargs):
    """
    Transform np.random function into blocked version
    """
    if 'shape' in kwargs and 'size' not in kwargs:
        kwargs['size'] = kwargs.pop('shape')
    if 'size' not in kwargs:
        args, size = args[:-1], args[-1]
    else:
        size = kwargs.pop('size')

    if not isinstance(size, (tuple, list)):
        size = (size, )

    blockshape = kwargs.pop('blockshape', None)
    blockdims = kwargs.pop('blockdims', None)
    name = kwargs.pop('name', None)
    if not blockdims and blockshape:
        blockdims = blockdims_from_blockshape(size, blockshape)

    name = name or next(names)

    keys = product([name], *[range(len(bd)) for bd in blockdims])
    sizes = product(*blockdims)
    if not kwargs:
        vals = ((func, ) + args + (size, ) for size in sizes)
    else:
        vals = ((curry(func, *args, size=size, **kwargs), ) for size in sizes)

    dsk = dict(zip(keys, vals))
    return Array(dsk, name, shape=size, blockdims=blockdims)
Beispiel #35
0
def use_with(function, transformers):
    """Accepts a function fn and a list of transformer functions and returns a
    new curried function. When the new function is invoked, it calls the
    function fn with parameters consisting of the result of calling each
    supplied handler on successive arguments to the new function.
    If more arguments are passed to the returned function than transformer
    functions, those arguments are passed directly to fn as additional
    parameters. If you expect additional arguments that don't need to be
    transformed, although you can ignore them, it's best to pass an identity
    function so that the new function reports the correct arity"""
    try:
        args = inspect.getfullargspec(function).args
    except TypeError:
        args = ["argument" + str(i) for i, x in enumerate(transformers)]

    F = {function.__name__: function}

    run = []
    for i, t in enumerate(transformers):
        F[t.__name__] = t
        try:
            args[i]
        except IndexError:
            args.append("argument" + str(i))

        run.append(t.__name__ + "(" + args[i] + ")")

    f = ("lambda " + ", ".join(args[:len(transformers)]) + ": " +
         function.__name__ + "(" + ",".join(run) + ")")
    return curry(eval(f, F))
Beispiel #36
0
def compute_down(expr, data, chunksize=None, map=map, **kwargs):
    leaf = expr._leaves()[0]

    if chunksize is None:
        chunksize = get_chunksize(data)

    # If the bottom expression is a projection or field then want to do
    # compute_up first
    children = set(e for e in expr._traverse()
                   if isinstance(e, Expr) and any(i is expr._leaves()[0]
                                                  for i in e._inputs))
    if len(children) == 1 and isinstance(first(children), (Field, Projection)):
        raise MDNotImplementedError()

    chunk = symbol('chunk', chunksize * leaf.schema)
    (chunk, chunk_expr), (agg, agg_expr) = split(leaf, expr, chunk=chunk)

    data_parts = partitions(data, chunksize=(chunksize, ))

    parts = list(map(curry(compute_chunk, data, chunk, chunk_expr),
                     data_parts))

    if isinstance(parts[0], np.ndarray):
        intermediate = np.concatenate(parts)
    elif isinstance(parts[0], pd.DataFrame):
        intermediate = pd.concat(parts)
    elif isinstance(parts[0], Iterable):
        intermediate = list(concat(parts))
    else:
        raise TypeError("Don't know how to concatenate objects of type %r" %
                        type(parts[0]).__name__)

    return compute(agg_expr, {agg: intermediate})
 def __init__(self, filename, *, label_only=False, **pdr_kwargs):
     # default values for attributes hopefully set by PDR
     self.LABEL = {}
     self.pointers = []
     if not label_only:
         super().__init__(filename, **pdr_kwargs)
     else:
         # this might be a messy option, but implementing it now for
         # convenience. the label_only flag allows this class to be used
         # as _just_ a label reader / converter
         setattr(self, "filename", filename)
         # Try PDS3 options
         setattr(self, "LABEL", pdr.read_label(filename))
         setattr(
             self,
             "pointers",
             [k for k in self.LABEL.keys() if k[0] == "^"],
         )
         _ = [
             setattr(
                 self,
                 pointer[1:] if pointer.startswith("^") else pointer,
                 pdr.WORKING_POINTER_TO_FUNCTION_MAP[pointer](filename,
                                                              self.LABEL),
             ) for pointer in self.pointers
         ]
     setattr(self, "convert_label", curry(_convert_label)(self))
Beispiel #38
0
def curry_pipeline_task(func, spec):
    return curry(
        func, **{
            REQUIRE: spec[REQUIRE],
            OUTPUT: spec[OUTPUT],
            'config': spec.get('config', {})
        })
def load_all_users():
    ''' Returns a pd.DataFrame with the information of all the users'''
    map = tlz.curry(map)
    dataset = tlz.pipe(users, map(parse_exp03_filename), map(user_pipe),
                       accumulate_users)
    dataset.insert(0, 'user', sorted(users * 3))
    return dataset
def create_geocoder(city_file):
    """ Creates a geocoder function for cities that takes a city name and region
        and returns the latitude and longitude.
    """
    reader = DictReader(city_file)

    # Create a blank hash to load.
    # (state_iso,city_name) => (lat, lon, records)
    # state/city collisions are resolved by whichever one has the most records.
    # Not 100% that's the right call but it's a start.
    geocoder_hash = {}

    for row in reader:
        row_key = (
            row["country_iso_code"].lower(),
            row["city_name"].lower(),
        )

        if (row_key not in geocoder_hash) or (int(row["num_blocks"]) >
                                              geocoder_hash[row_key][2]):
            geocoder_hash[row_key] = (
                float(row["latitude"]),
                float(row["longitude"]),
                int(row["num_blocks"]),
            )
            print(row_key)
            print(geocoder_hash[row_key])

    # Bind the geocoder hash to the geocoder template.
    return curry(_geocoder_template)(geocoder_hash)
Beispiel #41
0
def wrap_func_size_as_kwarg(func, *args, **kwargs):
    """
    Transform np.random function into blocked version
    """
    if 'shape' in kwargs and 'size' not in kwargs:
        kwargs['size'] = kwargs.pop('shape')
    if 'size' not in kwargs:
        args, size = args[:-1], args[-1]
    else:
        size = kwargs.pop('size')

    if not isinstance(size, (tuple, list)):
        size = (size,)

    chunks = kwargs.pop('chunks', None)
    chunks = normalize_chunks(chunks, size)
    name = kwargs.pop('name', None)

    dtype = kwargs.pop('dtype', None)
    if dtype is None:
        kw = kwargs.copy(); kw['size'] = (0,)
        dtype = func(*args, **kw).dtype

    name = name or next(names)

    keys = product([name], *[range(len(bd)) for bd in chunks])
    sizes = product(*chunks)
    if not kwargs:
        vals = ((func,) + args + (size,) for size in sizes)
    else:
        vals = ((curry(func, *args, size=size, **kwargs),) for size in sizes)

    dsk = dict(zip(keys, vals))
    return Array(dsk, name, chunks, dtype=dtype)
Beispiel #42
0
def test_EqualityHashKey_default_key():
    EqualityHashDefault = curry(EqualityHashKey, None)
    L1 = [1]
    L2 = [2]
    data1 = [L1, L1, L2, [], [], [1], [2], {}, ()]
    set1 = set(map(EqualityHashDefault, data1))
    set2 = set(map(EqualityHashDefault, [[], [1], [2], {}, ()]))
    assert set1 == set2
    assert len(set1) == 5

    # Test that ``EqualityHashDefault(item)`` is distinct from ``item``
    T0 = ()
    T1 = (1, )
    data2 = list(map(EqualityHashDefault, [T0, T0, T1, T1, (), (1, )]))
    data2.extend([T0, T1, (), (1, )])
    set3 = set(data2)
    assert set3 == set([(), (1, ),
                        EqualityHashDefault(()),
                        EqualityHashDefault((1, ))])
    assert len(set3) == 4
    assert EqualityHashDefault(()) in set3
    assert EqualityHashDefault((1, )) in set3

    # Miscellaneous
    E1 = EqualityHashDefault(L1)
    E2 = EqualityHashDefault(L2)
    assert str(E1) == '=[1]='
    assert repr(E1) == '=[1]='
    assert E1 != E2
    assert not (E1 == E2)
    assert E1 == EqualityHashDefault(L1)
    assert not (E1 != EqualityHashDefault(L1))
    assert E1 != L1
    assert not (E1 == L1)
Beispiel #43
0
 def get(self):
     # type: () -> PluginType
     """Return the currently active plugin."""
     if self._options:
         return curry(self._active, **self._options)
     else:
         return self._active
Beispiel #44
0
def wrap_func_shape_as_first_arg(func, *args, **kwargs):
    """
    Transform np.random function into blocked version
    """
    if 'shape' not in kwargs:
        shape, args = args[0], args[1:]
    else:
        shape = kwargs.pop('shape')

    dtype = kwargs.pop('dtype', None)

    if not isinstance(shape, (tuple, list)):
        shape = (shape, )

    blockshape = kwargs.pop('blockshape', None)
    blockdims = kwargs.pop('blockdims', None)

    name = kwargs.pop('name', None)
    if not blockdims and blockshape:
        blockdims = blockdims_from_blockshape(shape, blockshape)

    if dtype is None:
        dtype = func(shape, *args, **kwargs).dtype

    name = name or next(names)

    keys = product([name], *[range(len(bd)) for bd in blockdims])
    shapes = product(*blockdims)
    func = curry(func, dtype=dtype, **kwargs)
    vals = ((func, ) + (s, ) + args for s in shapes)

    dsk = dict(zip(keys, vals))
    return Array(dsk, name, shape=shape, blockdims=blockdims, dtype=dtype)
Beispiel #45
0
def nd2_reader(path):
    """Take a path or list of paths and return a list of LayerData tuples.

    Readers are expected to return data as a list of tuples, where each tuple
    is (data, [add_kwargs, [layer_type]]), "add_kwargs" and "layer_type" are
    both optional.

    Parameters
    ----------
    path : str or list of str
        Path to file, or list of paths.

    Returns
    -------
    layer_data : list of tuples
        A list of LayerData tuples where each tuple in the list contains
        (data, metadata, layer_type), where data is a numpy array, metadata is
        a dict of keyword arguments for the corresponding viewer.add_* method
        in napari, and layer_type is a lower-case string naming the type of layer.
        Both "meta", and "layer_type" are optional. napari will default to
        layer_type=="image" if not provided
    """
    with ND2Reader(path) as nd2_data:
        channels = nd2_data.metadata['channels']
        n_timepoints = nd2_data.sizes['t']
        z_depth = nd2_data.sizes['z']
        frame_shape = (z_depth, *nd2_data.frame_shape)
        frame_dtype = nd2_data._dtype
        nd2vol = tz.curry(get_nd2reader_nd2_vol)
        layer_list = get_layer_list(channels, nd2vol, path, frame_shape,
                                    frame_dtype, n_timepoints)

    return layer_list
Beispiel #46
0
def elemwise_array(expr, *data, **kwargs):
    leaves = expr._inputs
    expr_inds = tuple(range(ndim(expr)))[::-1]
    return atop(
        curry(compute_it, expr, leaves, **kwargs),
        expr_inds,
        *concat((dat, tuple(range(ndim(dat))[::-1])) for dat in data)
    )
Beispiel #47
0
    def rowterator(sel, chunksize=chunksize):
        with sel.bind.connect() as conn:
            result = conn.execute(sel)
            yield result.keys()

            for rows in iter_except(curry(result.fetchmany, size=chunksize),
                                    sa.exc.ResourceClosedError):
                yield rows
Beispiel #48
0
def pickle_apply_async(apply_async, func, args=(), kwds={},
                            func_loads=None, func_dumps=None):
    dumps = func_dumps or _globals.get('func_dumps') or _dumps
    sfunc = dumps(func)
    sargs = dumps(args)
    skwds = dumps(kwds)
    return apply_async(curry(apply_func, loads=func_loads),
                       args=[sfunc, sargs, skwds])
Beispiel #49
0
def test_EqualityHashKey_index_key():
    d1 = {'firstname': 'Alice', 'age': 21, 'data': {}}
    d2 = {'firstname': 'Alice', 'age': 34, 'data': {}}
    d3a = {'firstname': 'Bob', 'age': 56, 'data': {}}
    d3b = {'firstname': 'Bob', 'age': 56, 'data': {}}
    EqualityHashFirstname = curry(EqualityHashKey, 'firstname')
    assert list(unique(3*[d1, d2, d3a, d3b],
                       key=EqualityHashFirstname)) == [d1, d2, d3a]
    EqualityHashFirstnameAge = curry(EqualityHashKey, ['firstname', 'age'])
    assert list(unique(3*[d1, d2, d3a, d3b],
                       key=EqualityHashFirstnameAge)) == [d1, d2, d3a]
    list1 = [0] * 10
    list2 = [0] * 100
    list3a = [1] * 10
    list3b = [1] * 10
    EqualityHash0 = curry(EqualityHashKey, 0)
    assert list(unique(3*[list1, list2, list3a, list3b],
                       key=EqualityHash0)) == [list1, list2, list3a]
Beispiel #50
0
 def rowiterator(sel, chunksize=chunksize):
     with getbind(sel, bind).connect() as conn:
         result = conn.execute(sel)
         for rows in iter_except(curry(result.fetchmany, size=chunksize),
                                 sa.exc.ResourceClosedError):
             if rows:
                 yield rows
             else:
                 return
Beispiel #51
0
def elemwise_array(expr, *data, **kwargs):
    leaves = expr._inputs
    expr_inds = tuple(range(ndim(expr)))[::-1]
    return atop(
        curry(compute_it, expr, leaves, **kwargs),
        expr_inds,
        *concat((dat, tuple(range(ndim(dat))[::-1])) for dat in data),
        dtype=expr.dshape.measure.to_numpy_dtype()
    )
Beispiel #52
0
def append_text_to_s3(s3, data, multipart=False, part_size=5 << 20, **kwargs):
    if multipart:
        with start_multipart_upload_operation(s3) as multipart_upload:
            with open(data.path, 'rb') as f:
                for part_number, part in enumerate(iter(curry(f.read, part_size), ''), start=1):
                    multipart_upload.upload_part_from_file(BytesIO(part), part_num=part_number)
        return s3

    s3.object.set_contents_from_filename(data.path)
    return s3
Beispiel #53
0
 def __call__(cls, *args, **kwargs):
     """This is where the currying magic occurs. The __call__ in a metaclass
     is analogous to __new__ in a regular class.
     """
     @wraps(cls, updated=[])
     def currier(*a, **k):
         return super(Curried, cls).__call__(*a, **k)
     # there's odd behavior when composed with other
     # metaclasses if done as one function call...
     return curry(currier)(*args, **kwargs)
Beispiel #54
0
 def topk(self, k, key=None):
     a = next(names)
     b = next(names)
     if key:
         topk = curry(heapq.nlargest, key=key)
     else:
         topk = heapq.nlargest
     dsk = dict(((a, i), (list, (topk, k, (self.name, i))))
                     for i in range(self.npartitions))
     dsk2 = {(b, 0): (list, (topk, k, (concat, list(dsk.keys()))))}
     return Bag(merge(self.dask, dsk, dsk2), b, 1)
Beispiel #55
0
    def distinct(self):
        """ Distinct elements of collection

        Unordered without repeats.

        >>> b = from_sequence(['Alice', 'Bob', 'Alice'])
        >>> sorted(b.distinct())
        ['Alice', 'Bob']
        """
        return self.reduction(set, curry(apply, set.union), out_type=Bag,
                              name='distinct')
Beispiel #56
0
def test_introspect_curry_valid_py3(check_valid=is_valid_args, incomplete=False):
    if not PY3:
        return
    orig_check_valid = check_valid
    check_valid = lambda _func, *args, **kwargs: orig_check_valid(_func, args, kwargs)

    f = toolz.curry(make_func('x, y, z=0'))
    assert check_valid(f)
    assert check_valid(f, 1)
    assert check_valid(f, 1, 2)
    assert check_valid(f, 1, 2, 3)
    assert check_valid(f, 1, 2, 3, 4) is False
    assert check_valid(f, invalid_keyword=True) is False
    assert check_valid(f(1))
    assert check_valid(f(1), 2)
    assert check_valid(f(1), 2, 3)
    assert check_valid(f(1), 2, 3, 4) is False
    assert check_valid(f(1), x=2) is False
    assert check_valid(f(1), y=2)
    assert check_valid(f(x=1), 2) is False
    assert check_valid(f(x=1), y=2)
    assert check_valid(f(y=2), 1)
    assert check_valid(f(y=2), 1, z=3)
    assert check_valid(f(y=2), 1, 3) is False

    f = toolz.curry(make_func('x, y, z=0'), 1, x=1)
    assert check_valid(f) is False
    assert check_valid(f, z=3) is False

    f = toolz.curry(make_func('x, y, *args, z'))
    assert check_valid(f)
    assert check_valid(f, 0)
    assert check_valid(f(1), 0)
    assert check_valid(f(1, 2), 0)
    assert check_valid(f(1, 2, 3), 0)
    assert check_valid(f(1, 2, 3, 4), 0)
    assert check_valid(f(1, 2, 3, 4), z=4)
    assert check_valid(f(x=1))
    assert check_valid(f(x=1), 1) is False
    assert check_valid(f(x=1), y=2)
Beispiel #57
0
def wrap(wrap_func, func, **kwargs):
    f = curry(wrap_func, func, **kwargs)
    f.__doc__ = """
    Blocked variant of %(name)s

    Follows the signature of %(name)s exactly except that it also requires a
    keyword argument chunks=(...)

    Original signature follows below.
    """ % {'name': func.__name__} + func.__doc__

    f.__name__ = 'blocked_' + func.__name__
    return f
Beispiel #58
0
Datei: core.py Projekt: esc/dask
    def map(self, func):
        """ Map a function across all elements in collection

        >>> import dask.bag as db
        >>> b = db.from_sequence(range(5))
        >>> list(b.map(lambda x: x * 10))  # doctest: +SKIP
        [0, 10, 20, 30, 40]
        """
        name = next(names)
        if takes_multiple_arguments(func):
            func = curry(apply, func)
        dsk = dict(((name, i), (list, (map, func, (self.name, i))))
                        for i in range(self.npartitions))
        return Bag(merge(self.dask, dsk), name, self.npartitions)
Beispiel #59
0
def compute_down(expr, data, map=map, **kwargs):
    leaf = expr._leaves()[0]

    (chunk, chunk_expr), (agg, agg_expr) = split(leaf, expr)

    parts = list(map(curry(compute_chunk, chunk, chunk_expr), data))

    if isinstance(parts[0], np.ndarray):
        intermediate = np.concatenate(parts)
    elif isinstance(parts[0], pd.DataFrame):
        intermediate = pd.concat(parts)
    elif isinstance(parts[0], (Iterable, Iterator)):
        intermediate = concat(parts)

    return compute(agg_expr, {agg: intermediate})
Beispiel #60
0
def pickle_apply_async(apply_async, func, args=(),
                       func_loads=None, func_dumps=None):
    # XXX: To deal with deserialization errors of tasks, this version of
    # apply_async doesn't actually match that of `pool.apply_async`. It's
    # customized to fit the signature of `dask.async.execute_task`, which is
    # the only function ever actually passed as `func`. This is a bit of a
    # hack, but it works pretty well. If the signature of `execute_task`
    # changes, then this will need to be changed as well.
    dumps = func_dumps or _globals.get('func_dumps') or _dumps
    key, task, data, queue, get_id, raise_on_exception = args
    sfunc = dumps(func)
    may_fail = dumps((task, data))
    wont_fail = dumps((key, queue, get_id, raise_on_exception))
    return apply_async(curry(apply_func, loads=func_loads),
                       args=[sfunc, may_fail, wont_fail])