Ejemplo n.º 1
0
def execute_with_scope(expr, scope, context=None, **kwargs):
    """Execute an expression `expr`, with data provided in `scope`.

    Parameters
    ----------
    expr : ibis.expr.types.Expr
        The expression to execute.
    scope : collections.Mapping
        A dictionary mapping :class:`~ibis.expr.operations.Node` subclass
        instances to concrete data such as a pandas DataFrame.
    context : Optional[ibis.pandas.aggcontext.AggregationContext]

    Returns
    -------
    result : scalar, pd.Series, pd.DataFrame
    """
    op = expr.op()

    # Call pre_execute, to allow clients to intercept the expression before
    # computing anything *and* before associating leaf nodes with data. This
    # allows clients to provide their own data for each leaf.
    clients = list(find_backends(expr))

    if context is None:
        context = agg_ctx.Summarize()

    pre_executed_scope = map(
        functools.partial(
            pre_execute, op, scope=scope, context=context, **kwargs),
        clients
    )
    new_scope = toolz.merge(scope, *pre_executed_scope)
    result = execute_until_in_scope(
        expr,
        new_scope,
        context=context,
        clients=clients,

        # XXX: we *explicitly* pass in scope and not new_scope here so that
        # post_execute sees the scope of execute_with_scope, not the scope of
        # execute_until_in_scope
        post_execute_=functools.partial(
            post_execute,
            scope=scope,
            context=context,
            clients=clients,
            **kwargs
        ),
        **kwargs
    )

    return result
Ejemplo n.º 2
0
    def __init__(self, path=None, create=False):
        super(SQLiteClient, self).__init__(sa.create_engine('sqlite://'))
        self.name = path
        self.database_name = 'default'

        if path is not None:
            self.attach(self.database_name, path, create=create)

        for func in _SQLITE_UDF_REGISTRY:
            self.con.run_callable(functools.partial(_register_function, func))

        for agg in _SQLITE_UDAF_REGISTRY:
            self.con.run_callable(functools.partial(_register_aggregate, agg))
Ejemplo n.º 3
0
def execute_count_distinct_series_groupby_mask(
    op, data, mask, aggcontext=None, **kwargs
):
    return aggcontext.agg(
        data,
        functools.partial(_filtered_reduction, mask.obj, pd.Series.nunique)
    )
Ejemplo n.º 4
0
def execute_until_in_scope(
    expr, scope, context=None, clients=None, post_execute_=None, **kwargs
):
    """Execute until our op is in `scope`.

    Parameters
    ----------
    expr : ibis.expr.types.Expr
    scope : Mapping
    context : Optional[AggregationContext]
    clients : List[ibis.client.Client]
    kwargs : Mapping
    """
    # these should never be None
    assert context is not None, 'context is None'
    assert clients is not None, 'clients is None'
    assert post_execute_ is not None, 'post_execute_ is None'

    # base case: our op has been computed (or is a leaf data node), so
    # return the corresponding value
    op = expr.op()
    if op in scope:
        return scope[op]

    new_scope = execute_bottom_up(
        expr, scope, context=context, post_execute_=post_execute_, **kwargs)
    pre_executor = functools.partial(pre_execute, op, scope=scope, **kwargs)
    new_scope = toolz.merge(new_scope, *map(pre_executor, clients))
    return execute_until_in_scope(
        expr, new_scope,
        context=context, clients=clients, post_execute_=post_execute_,
        **kwargs
    )
Ejemplo n.º 5
0
def execute_reduction_series_gb_mask(
    op, data, mask, aggcontext=None, **kwargs
):
    method = operator.methodcaller(type(op).__name__.lower())
    return aggcontext.agg(
        data,
        functools.partial(_filtered_reduction, mask.obj, method)
    )
Ejemplo n.º 6
0
def execute_series_natural_log(op, data, **kwargs):
    if data.dtype == np.dtype(np.object_):
        return data.apply(functools.partial(execute_node, op, **kwargs))
    return np.log(data)
Ejemplo n.º 7
0
def vectorize_object(op, arg, *args, **kwargs):
    func = np.vectorize(functools.partial(execute_node, op, **kwargs))
    return pd.Series(func(arg, *args), index=arg.index, name=arg.name)
Ejemplo n.º 8
0
def call_numpy_ufunc(func, op, data, **kwargs):
    if data.dtype == np.dtype(np.object_):
        return data.apply(functools.partial(execute_node, op, **kwargs))
    return func(data)
Ejemplo n.º 9
0
Archivo: core.py Proyecto: zdog234/ibis
def execute_bottom_up(expr,
                      scope,
                      aggcontext=None,
                      post_execute_=None,
                      clients=None,
                      **kwargs):
    """Execute `expr` bottom-up.

    Parameters
    ----------
    expr : ibis.expr.types.Expr
    scope : Mapping[ibis.expr.operations.Node, object]
    aggcontext : Optional[ibis.pandas.aggcontext.AggregationContext]
    kwargs : Dict[str, object]

    Returns
    -------
    result : Mapping[
        ibis.expr.operations.Node,
        Union[pandas.Series, pandas.DataFrame, scalar_types]
    ]
        A mapping from node to the computed result of that Node
    """
    assert post_execute_ is not None, 'post_execute_ is None'
    op = expr.op()

    # if we're in scope then return the scope, this will then be passed back
    # into execute_bottom_up, which will then terminate
    if op in scope:
        return scope
    elif isinstance(op, ops.Literal):
        # special case literals to avoid the overhead of dispatching
        # execute_node
        return {
            op:
            execute_literal(op,
                            op.value,
                            expr.type(),
                            aggcontext=aggcontext,
                            **kwargs)
        }

    # figure out what arguments we're able to compute on based on the
    # expressions inputs. things like expressions, None, and scalar types are
    # computable whereas ``list``s are not
    args = op.inputs
    is_computable_argument = functools.partial(is_computable_arg, op)
    computable_args = list(filter(is_computable_argument, args))

    # recursively compute each node's arguments until we've changed type
    scopes = [
        execute_bottom_up(arg,
                          scope,
                          aggcontext=aggcontext,
                          post_execute_=post_execute_,
                          clients=clients,
                          **kwargs) if hasattr(arg, 'op') else {
                              arg: arg
                          } for arg in computable_args
    ]

    # if we're unable to find data then raise an exception
    if not scopes:
        raise com.UnboundExpressionError(
            'Unable to find data for expression:\n{}'.format(repr(expr)))

    # there should be exactly one dictionary per computable argument
    assert len(computable_args) == len(scopes)

    new_scope = toolz.merge(scopes)

    # pass our computed arguments to this node's execute_node implementation
    data = [
        new_scope[arg.op()] if hasattr(arg, 'op') else arg
        for arg in computable_args
    ]
    result = execute_node(op,
                          *data,
                          scope=scope,
                          aggcontext=aggcontext,
                          clients=clients,
                          **kwargs)
    computed = post_execute_(op, result)
    return {op: computed}
Ejemplo n.º 10
0
 def array_type(self):
     return functools.partial(self.column, dtype=self)
Ejemplo n.º 11
0
 def scalar_type(self):
     return functools.partial(self.scalar, dtype=self)
Ejemplo n.º 12
0
def execute_with_scope(expr, scope, context=None, **kwargs):
    """Execute an expression `expr`, with data provided in `scope`.

    Parameters
    ----------
    expr : ir.Expr
        The expression to execute.
    scope : dict
        A dictionary mapping :class:`~ibis.expr.types.Node` subclass instances
        to concrete data such as a pandas DataFrame.

    Returns
    -------
    result : scalar, pd.Series, pd.DataFrame
    """
    op = expr.op()

    # Call pre_execute, to allow clients to intercept the expression before
    # computing anything *and* before associating leaf nodes with data. This
    # allows clients to provide their own scope.
    scope = toolz.merge(
        scope,
        *map(
            functools.partial(pre_execute, op, scope=scope, **kwargs),
            find_backends(expr)
        )
    )

    # base case: our op has been computed (or is a leaf data node), so
    # return the corresponding value
    if op in scope:
        return scope[op]

    if context is None:
        context = agg_ctx.Summarize()

    try:
        computed_args = [scope[t] for t in op.root_tables()]
    except KeyError:
        pass
    else:
        try:
            # special case: we have a definition of execute_first that matches
            # our current operation and data leaves
            return execute_first(
                op, *computed_args, scope=scope, context=context, **kwargs
            )
        except NotImplementedError:
            pass

    args = op.args

    # recursively compute the op's arguments
    computed_args = [
        execute(arg, scope, context=context, **kwargs)
        if hasattr(arg, 'op') else arg
        for arg in args if isinstance(arg, _VALID_INPUT_TYPES)
    ]

    # Compute our op, with its computed arguments
    return execute_node(
        op, *computed_args,
        scope=scope,
        context=context,
        **kwargs
    )
Ejemplo n.º 13
0
 def output_type(self):
     return functools.partial(Foo, dtype=dt.int64)
Ejemplo n.º 14
0
def execute_series_unary_op(op, data, **kwargs):
    function = getattr(np, type(op).__name__.lower())
    if data.dtype == np.dtype(np.object_):
        return data.apply(functools.partial(execute_node, op, **kwargs))
    return function(data)
Ejemplo n.º 15
0
def execute_series_unary_op_negate(op, data, **kwargs):
    if data.dtype == np.dtype(np.object_):
        return data.apply(functools.partial(execute_node, op, **kwargs))
    return np.negative(data)