Example #1
0
def elemwise_array(expr, *data, **kwargs):
    leaves = expr._inputs
    expr_inds = tuple(range(ndim(expr)))[::-1]
    return atop(curry(compute_it, expr, leaves, **kwargs),
                expr_inds,
                *concat((dat, tuple(range(ndim(dat))[::-1])) for dat in data),
                dtype=expr.dshape.measure.to_numpy_dtype())
Example #2
0
 def compute_broadcast(expr, *data, **kwargs):
     leaves = expr._inputs
     expr_inds = tuple(range(ndim(expr)))[::-1]
     func = get_numba_ufunc(expr)
     return atop(func,
                 next(names), expr_inds,
                 *concat((dat, tuple(range(ndim(dat))[::-1])) for dat in data))
Example #3
0
def elemwise_array(expr, *data, **kwargs):
    leaves = expr._inputs
    expr_inds = tuple(range(ndim(expr)))[::-1]
    return atop(
        curry(compute_it, expr, leaves, **kwargs),
        expr_inds,
        *concat((dat, tuple(range(ndim(dat))[::-1])) for dat in data)
    )
Example #4
0
 def compute_broadcast(expr, *data, **kwargs):
     expr_inds = tuple(range(ndim(expr)))[::-1]
     func = get_numba_ufunc(expr)
     return atop(func,
                 expr_inds,
                 *concat(
                     (dat, tuple(range(ndim(dat))[::-1])) for dat in data),
                 dtype=data[-1].dtype)
Example #5
0
File: dask.py Project: blaze/blaze
def elemwise_array(expr, *data, **kwargs):
    leaves = expr._inputs
    expr_inds = tuple(range(ndim(expr)))[::-1]
    return atop(
        curry(compute_it, expr, leaves, **kwargs),
        expr_inds,
        *concat((dat, tuple(range(ndim(dat))[::-1])) for dat in data),
        dtype=expr.dshape.measure.to_numpy_dtype()
    )
Example #6
0
File: dask.py Project: blaze/blaze
def compute_up(expr, data, **kwargs):
    leaf = expr._leaves()[0]
    chunk = symbol('chunk', DataShape(*(tuple(map(first, data.chunks)) +
                                        (leaf.dshape.measure,))))
    (chunk, chunk_expr), (agg, agg_expr) = split(expr._child, expr,
                                                 chunk=chunk)

    inds = tuple(range(ndim(leaf)))
    dtype = expr.dshape.measure.to_numpy_dtype()
    tmp = atop(
        curry(compute_it, chunk_expr, [chunk], **kwargs),
        inds,
        data,
        inds,
        dtype=dtype,
    )

    return atop(
        compose(
            curry(compute_it, agg_expr, [agg], **kwargs),
            curry(_concatenate2, axes=expr.axis),
        ),
        tuple(i for i in inds if i not in expr.axis),
        tmp,
        inds,
        dtype=dtype,
    )
Example #7
0
def compute_up(expr, data, **kwargs):
    leaf = expr._leaves()[0]
    chunk = symbol(
        'chunk',
        DataShape(*(tuple(map(first, data.chunks)) + (leaf.dshape.measure, ))))
    (chunk, chunk_expr), (agg, agg_expr) = split(expr._child,
                                                 expr,
                                                 chunk=chunk)

    inds = tuple(range(ndim(leaf)))
    dtype = expr.dshape.measure.to_numpy_dtype()
    tmp = atop(
        curry(compute_it, chunk_expr, [chunk], **kwargs),
        inds,
        data,
        inds,
        dtype=dtype,
    )

    return atop(
        compose(
            curry(compute_it, agg_expr, [agg], **kwargs),
            curry(_concatenate2, axes=expr.axis),
        ),
        tuple(i for i in inds if i not in expr.axis),
        tmp,
        inds,
        dtype=dtype,
    )
Example #8
0
def from_blaze(expr,
               deltas='auto',
               loader=None,
               resources=None,
               no_deltas_rule=_valid_no_deltas_rules[0]):
    """Create a Pipeline API object from a blaze expression.

    Parameters
    ----------
    expr : Expr
        The blaze expression to use.
    deltas : Expr or 'auto', optional
        The expression to use for the point in time adjustments.
        If the string 'auto' is passed, a deltas expr will be looked up
        by stepping up the expression tree and looking for another field
        with the name of ``expr`` + '_deltas'. If None is passed, no deltas
        will be used.
    loader : BlazeLoader, optional
        The blaze loader to attach this pipeline dataset to. If None is passed,
        the global blaze loader is used.
    resources : dict or any, optional
        The data to execute the blaze expressions against. This is used as the
        scope for ``bz.compute``.
    no_deltas_rule : {'warn', 'raise', 'ignore'}
        What should happen if ``deltas='auto'`` but no deltas can be found.
        'warn' says to raise a warning but continue.
        'raise' says to raise an exception if no deltas can be found.
        'ignore' says take no action and proceed with no deltas.

    Returns
    -------
    pipeline_api_obj : DataSet or BoundColumn
        Either a new dataset or bound column based on the shape of the expr
        passed in. If a table shaped expression is passed, this will return
        a ``DataSet`` that represents the whole table. If an array-like shape
        is passed, a ``BoundColumn`` on the dataset that would be constructed
        from passing the parent is returned.
    """
    deltas = _get_deltas(expr, deltas, no_deltas_rule)
    if deltas is not None:
        invalid_nodes = tuple(filter(is_invalid_deltas_node, expr._subterms()))
        if invalid_nodes:
            raise TypeError(
                'expression with deltas may only contain (%s) nodes,'
                " found: %s" % (
                    ', '.join(map(getname, valid_deltas_node_types)),
                    ', '.join(set(map(compose(getname, type), invalid_nodes))),
                ),
            )

    # Check if this is a single column out of a dataset.
    if bz.ndim(expr) != 1:
        raise TypeError(
            'expression was not tabular or array-like,'
            ' %s dimensions: %d' % (
                'too many' if bz.ndim(expr) > 1 else 'not enough',
                bz.ndim(expr),
            ),
        )

    single_column = None
    if isscalar(expr.dshape.measure):
        # This is a single column. Record which column we are to return
        # but create the entire dataset.
        single_column = rename = expr._name
        field_hit = False
        if not isinstance(expr, traversable_nodes):
            raise TypeError(
                "expression '%s' was array-like but not a simple field of"
                " some larger table" % str(expr),
            )
        while isinstance(expr, traversable_nodes):
            if isinstance(expr, bz.expr.Field):
                if not field_hit:
                    field_hit = True
                else:
                    break
            rename = expr._name
            expr = expr._child
        dataset_expr = expr.relabel({rename: single_column})
    else:
        dataset_expr = expr

    measure = dataset_expr.dshape.measure
    if not isrecord(measure) or AD_FIELD_NAME not in measure.names:
        raise TypeError(
            "The dataset must be a collection of records with at least an"
            " '{ad}' field. Fields provided: '{fields}'\nhint: maybe you need"
            " to use `relabel` to change your field names".format(
                ad=AD_FIELD_NAME,
                fields=measure,
            ),
        )
    _check_datetime_field(AD_FIELD_NAME, measure)
    dataset_expr, deltas = _ensure_timestamp_field(dataset_expr, deltas)

    if deltas is not None and (sorted(deltas.dshape.measure.fields) !=
                               sorted(measure.fields)):
        raise TypeError(
            'baseline measure != deltas measure:\n%s != %s' % (
                measure,
                deltas.dshape.measure,
            ),
        )

    # Ensure that we have a data resource to execute the query against.
    _check_resources('dataset_expr', dataset_expr, resources)
    _check_resources('deltas', deltas, resources)

    # Create or retrieve the Pipeline API dataset.
    ds = new_dataset(dataset_expr, deltas)
    # Register our new dataset with the loader.
    (loader if loader is not None else global_loader)[ds] = ExprData(
        dataset_expr,
        deltas,
        resources,
    )
    if single_column is not None:
        # We were passed a single column, extract and return it.
        return getattr(ds, single_column)
    return ds
Example #9
0
def elemwise_array(expr, *data, **kwargs):
    leaves = expr._inputs
    expr_inds = tuple(range(ndim(expr)))[::-1]
    return atop(curry(compute_it, expr, leaves, **kwargs), next(names),
                expr_inds,
                *concat((dat, tuple(range(ndim(dat))[::-1])) for dat in data))