Python Scope Beispiele, ibis.expr.scope.Scope Python Beispiele

Beispiel #1

0

Datei anzeigen

def compute_projection_scalar_expr(
    expr,
    parent,
    data,
    scope: Scope,
    timecontext: Optional[TimeContext] = None,
    **kwargs,
):
    name = expr._name
    assert name is not None, 'Scalar selection name is None'

    op = expr.op()
    parent_table_op = parent.table.op()

    data_columns = frozenset(data.columns)
    scope = scope.merge_scopes(
        Scope(
            {
                t:
                map_new_column_names_to_data(
                    remap_overlapping_column_names(parent_table_op, t,
                                                   data_columns),
                    data,
                )
            },
            timecontext,
        ) for t in op.root_tables())
    scalar = execute(expr, scope=scope, **kwargs)
    return data.assign(**{name: scalar})[name]

Beispiel #2

0

Datei anzeigen

def csv_pre_execute_selection(
    op: ops.Node,
    client: CSVClient,
    scope: Scope,
    timecontext: TimeContext = None,
    **kwargs,
):
    tables = filter(
        lambda t: scope.get_value(t, timecontext) is None,
        physical_tables(op.table.op()),
    )

    ops = Scope()
    for table in tables:
        path = client.dictionary[table.name]
        usecols = None

        if op.selections:
            header = _read_csv(path, schema=table.schema, header=0, nrows=1)
            usecols = [
                getattr(s.op(), 'name', None) or s.get_name()
                for s in op.selections
            ]

            # we cannot read all the columns that we would like
            if len(pd.Index(usecols) & header.columns) != len(usecols):
                usecols = None
        result = _read_csv(path, table.schema, usecols=usecols, header=0)
        ops = ops.merge_scope(Scope({table: result}, timecontext))

    return ops

Beispiel #3

0

Datei anzeigen

    def compile(self, expr, timecontext=None, params=None, *args, **kwargs):
        """Compile an ibis expression to a PySpark DataFrame object
        """

        if timecontext is not None:
            session_timezone = self._session.conf.get(
                'spark.sql.session.timeZone'
            )
            # Since spark use session timezone for tz-naive timestamps
            # we localize tz-naive context here to match that behavior
            timecontext = localize_context(
                canonicalize_context(timecontext), session_timezone
            )

        # Insert params in scope
        if params is None:
            scope = Scope()
        else:
            scope = Scope(
                {param.op(): raw_value for param, raw_value in params.items()},
                timecontext,
            )
        return self.translator.translate(
            expr, scope=scope, timecontext=timecontext
        )

Beispiel #4

0

Datei anzeigen

Datei: util.py Projekt: jelitox/ibis

def compute_sort_key(
    key: ops.SortKey,
    data: dd.DataFrame,
    timecontext: Optional[TimeContext] = None,
    scope: Scope = None,
    **kwargs,
):
    """
    Note - we use this function instead of the pandas.execution.util so that we
    use the dask `execute` method

    This function borrows the logic in the pandas backend. ``by`` can be a
    string or an expression. If ``by.get_name()`` raises an exception, we must
    ``execute`` the expression and sort by the new derived column.
    """
    by = key.to_expr()
    name = ibis.util.guid()
    try:
        if isinstance(by, str):
            return name, data[by]
        return name, data[by.get_name()]
    except com.ExpressionError:
        if scope is None:
            scope = Scope()
        scope = scope.merge_scopes(
            Scope({t: data}, timecontext) for t in by.op().root_tables())
        new_column = execute(by, scope=scope, **kwargs)
        new_column.name = name
        return name, new_column

Beispiel #5

0

Datei anzeigen

def compute_projection_scalar_expr(
    expr,
    parent,
    data,
    scope: Scope = None,
    timecontext: Optional[TimeContext] = None,
    **kwargs,
):
    name = expr._name
    assert name is not None, 'Scalar selection name is None'

    op = expr.op()
    parent_table_op = parent.table.op()

    data_columns = frozenset(data.columns)

    scope = scope.merge_scopes(
        Scope(
            {
                t: map_new_column_names_to_data(
                    remap_overlapping_column_names(
                        parent_table_op, t, data_columns
                    ),
                    data,
                )
            },
            timecontext,
        )
        for t in op.root_tables()
    )
    scalar = execute(expr, scope=scope, **kwargs)
    result = pandas.Series([scalar], name=name).repeat(len(data.index))
    result.index = data.index
    return dd.from_pandas(result, npartitions=data.npartitions)

Beispiel #6

0

Datei anzeigen

Datei: core.py Projekt: randxie/ibis

def main_execute(
    expr,
    params=None,
    scope=None,
    timecontext: Optional[TimeContext] = None,
    aggcontext=None,
    **kwargs,
):
    """Execute an expression against data that are bound to it. If no data
    are bound, raise an Exception.

    Parameters
    ----------
    expr : ibis.expr.types.Expr
        The expression to execute
    params : Mapping[ibis.expr.types.Expr, object]
        The data that an unbound parameter in `expr` maps to
    scope : Mapping[ibis.expr.operations.Node, object]
        Additional scope, mapping ibis operations to data
    timecontext : Optional[TimeContext]
        timecontext needed for execution
    aggcontext : Optional[ibis.backends.pandas.aggcontext.AggregationContext]
        An object indicating how to compute aggregations. For example,
        a rolling mean needs to be computed differently than the mean of a
        column.
    kwargs : Dict[str, object]
        Additional arguments that can potentially be used by individual node
        execution

    Returns
    -------
    result : Union[
        pandas.Series, pandas.DataFrame, ibis.backends.pandas.core.simple_types
    ]

    Raises
    ------
    ValueError
        * If no data are bound to the input expression
    """

    if scope is None:
        scope = Scope()

    if timecontext is not None:
        # convert timecontext to datetime type, if time strings are provided
        timecontext = canonicalize_context(timecontext)

    if params is None:
        params = {}

    # TODO: make expresions hashable so that we can get rid of these .op()
    # calls everywhere
    params = {k.op() if hasattr(k, 'op') else k: v for k, v in params.items()}
    scope = scope.merge_scope(Scope(params, timecontext))
    return execute_with_scope(
        expr, scope, timecontext=timecontext, aggcontext=aggcontext, **kwargs,
    )

Beispiel #7

0

Datei anzeigen

def compute_projection_column_expr(
    expr,
    parent,
    data,
    scope: Scope,
    timecontext: Optional[TimeContext],
    **kwargs,
):
    result_name = getattr(expr, '_name', None)
    op = expr.op()
    parent_table_op = parent.table.op()

    if isinstance(op, ops.TableColumn):
        # slightly faster path for simple column selection
        name = op.name

        if name in data:
            return data[name].rename(result_name or name)

        if not isinstance(parent_table_op, ops.Join):
            raise KeyError(name)
        (root_table,) = op.root_tables()
        left_root, right_root = ops.distinct_roots(
            parent_table_op.left, parent_table_op.right
        )
        suffixes = {
            left_root: constants.LEFT_JOIN_SUFFIX,
            right_root: constants.RIGHT_JOIN_SUFFIX,
        }
        return data.loc[:, name + suffixes[root_table]].rename(
            result_name or name
        )

    data_columns = frozenset(data.columns)

    scope = scope.merge_scopes(
        Scope(
            {
                t: map_new_column_names_to_data(
                    remap_overlapping_column_names(
                        parent_table_op, t, data_columns
                    ),
                    data,
                )
            },
            timecontext,
        )
        for t in op.root_tables()
    )

    result = execute(expr, scope=scope, timecontext=timecontext, **kwargs)
    assert result_name is not None, 'Column selection name is None'
    if np.isscalar(result):
        series = dd.from_array(np.repeat(result, len(data.index)))
        series.name = result_name
        series.index = data.index
        return series
    return result.rename(result_name)

Beispiel #8

0

Datei anzeigen

def _compute_predicates(
    table_op,
    predicates,
    data,
    scope: Scope,
    timecontext: Optional[TimeContext],
    **kwargs,
):
    """Compute the predicates for a table operation.

    Parameters
    ----------
    table_op : TableNode
    predicates : List[ir.ColumnExpr]
    data : pd.DataFrame
    scope : Scope
    timecontext: Optional[TimeContext]
    kwargs : dict

    Returns
    -------
    computed_predicate : pd.Series[bool]

    Notes
    -----
    This handles the cases where the predicates are computed columns, in
    addition to the simple case of named columns coming directly from the input
    table.
    """
    for predicate in predicates:
        # Map each root table of the predicate to the data so that we compute
        # predicates on the result instead of any left or right tables if the
        # Selection is on a Join. Project data to only inlude columns from
        # the root table.
        root_tables = predicate.op().root_tables()

        # handle suffixes
        data_columns = frozenset(data.columns)

        additional_scope = Scope()
        for root_table in root_tables:
            mapping = remap_overlapping_column_names(
                table_op, root_table, data_columns
            )
            if mapping is not None:
                new_data = data.loc[:, mapping.keys()].rename(columns=mapping)
            else:
                new_data = data
            additional_scope = additional_scope.merge_scope(
                Scope({root_table: new_data}, timecontext)
            )

        scope = scope.merge_scope(additional_scope)
        yield execute(predicate, scope=scope, **kwargs)

Beispiel #9

0

Datei anzeigen

def compute_projection_column_expr(
    expr,
    parent,
    data,
    scope: Scope,
    timecontext: Optional[TimeContext],
    **kwargs,
):
    result_name = getattr(expr, '_name', None)
    op = expr.op()
    parent_table_op = parent.table.op()

    if isinstance(op, ops.TableColumn):
        # slightly faster path for simple column selection
        name = op.name
        assert isinstance(name, str)

        if name in data:
            return data[name].rename(result_name or name)

        if not isinstance(parent_table_op, ops.Join):
            raise KeyError(name)

        suffix = util.get_join_suffix_for_op(op, parent_table_op)
        return data.loc[:, name + suffix].rename(result_name or name)

    data_columns = frozenset(data.columns)

    scope = scope.merge_scopes(
        Scope(
            {
                t: map_new_column_names_to_data(
                    remap_overlapping_column_names(
                        parent_table_op, t, data_columns
                    ),
                    data,
                )
            },
            timecontext,
        )
        for t in op.root_tables()
    )

    result = coerce_to_output(
        execute(expr, scope=scope, timecontext=timecontext, **kwargs),
        expr,
        data.index,
    )
    assert result_name is not None, 'Column selection name is None'
    return result

Beispiel #10

0

Datei anzeigen

def compute_sort_key(key, data, timecontext, scope=None, **kwargs):
    by = key.to_expr()
    try:
        if isinstance(by, str):
            return by, None
        return by.get_name(), None
    except com.ExpressionError:
        if scope is None:
            scope = Scope()
        scope = scope.merge_scopes(
            Scope({t: data}, timecontext) for t in by.op().root_tables())
        new_column = execute(by, scope=scope, **kwargs)
        name = ibis.util.guid()
        new_column.name = name
        return name, new_column

Beispiel #11

0

Datei anzeigen

def test_bad_call_to_adjust_context():
    op = "not_a_node"
    context = (pd.Timestamp('20170101'), pd.Timestamp('20170103'))
    scope = Scope()
    with pytest.raises(com.IbisError,
                       match=r".*Unsupported input type for adjust context.*"):
        adjust_context(op, scope, context)

Beispiel #12

0

Datei anzeigen

Datei: client.py Projekt: zbrookle/ibis

    def compile(self, expr, timecontext=None, params=None, *args, **kwargs):
        """Compile an ibis expression to a PySpark DataFrame object
        """

        if timecontext is not None:
            timecontext = canonicalize_context(timecontext)
        # Insert params in scope
        if params is None:
            scope = Scope()
        else:
            scope = Scope(
                {param.op(): raw_value for param, raw_value in params.items()},
                timecontext,
            )
        return self.translator.translate(
            expr, scope=scope, timecontext=timecontext
        )

Beispiel #13

0

Datei anzeigen

def execute_grouped_window_op(
    op,
    data,
    window,
    scope,
    timecontext,
    aggcontext,
    clients,
    **kwargs,
):
    # extract the parent
    (root, ) = op.root_tables()
    root_expr = root.to_expr()

    root_data = execute(
        root_expr,
        scope=scope,
        timecontext=timecontext,
        clients=clients,
        aggcontext=aggcontext,
        **kwargs,
    )

    group_by = window._group_by
    grouping_keys = [
        key_op.name for key_op in map(operator.methodcaller('op'), group_by)
    ]

    grouped_root_data = root_data.groupby(grouping_keys)
    scope = scope.merge_scopes(
        [
            Scope({t: grouped_root_data}, timecontext)
            for t in op.expr.op().root_tables()
        ],
        overwrite=True,
    )

    result = execute_with_scope(
        expr=op.expr,
        scope=scope,
        timecontext=timecontext,
        aggcontext=aggcontext,
        clients=clients,
        **kwargs,
    )
    # If the grouped operation we performed is not an analytic UDF we have to
    # realign the output to the input.
    if not isinstance(op.expr._arg, ops.AnalyticVectorizedUDF):
        result = dd.merge(
            root_data[result.index.name].to_frame(),
            result.to_frame(),
            left_on=result.index.name,
            right_index=True,
        )[result.name]
        result.divisions = root_data.divisions

    return result

Beispiel #14

0

Datei anzeigen

Datei: test_core.py Projekt: randxie/ibis

def test_scope_look_up():
    # test if scope could lookup items properly
    scope = Scope()
    one_day = ibis.interval(days=1).op()
    one_hour = ibis.interval(hours=1).op()
    scope = scope.merge_scope(Scope({one_day: 1}, None))
    assert scope.get_value(one_hour) is None
    assert scope.get_value(one_day) is not None

Beispiel #15

0

Datei anzeigen

Datei: core.py Projekt: shubhamjainbb/ibis

def execute_until_in_scope(
    expr,
    scope: Scope,
    timecontext: Optional[TimeContext] = None,
    aggcontext=None,
    clients=None,
    post_execute_=None,
    **kwargs,
) -> Scope:
    """Execute until our op is in `scope`.

    Parameters
    ----------
    expr : ibis.expr.types.Expr
    scope : Scope
    timecontext : Optional[TimeContext]
    aggcontext : Optional[AggregationContext]
    clients : List[ibis.client.Client]
    kwargs : Mapping
    """
    # these should never be None
    assert aggcontext is not None, 'aggcontext is None'
    assert clients is not None, 'clients is None'
    assert post_execute_ is not None, 'post_execute_ is None'

    # base case: our op has been computed (or is a leaf data node), so
    # return the corresponding value
    op = expr.op()
    if scope.get_value(op, timecontext) is not None:
        return scope
    if isinstance(op, ops.Literal):
        # special case literals to avoid the overhead of dispatching
        # execute_node
        return Scope(
            {
                op:
                execute_literal(
                    op, op.value, expr.type(), aggcontext=aggcontext, **kwargs)
            },
            timecontext,
        )

    # figure out what arguments we're able to compute on based on the
    # expressions inputs. things like expressions, None, and scalar types are
    # computable whereas ``list``s are not
    computable_args = [arg for arg in op.inputs if is_computable_input(arg)]

    # pre_executed_states is a list of states with same the length of
    # computable_args, these states are passed to each arg
    if timecontext:
        arg_timecontexts = compute_time_context(
            op,
            num_args=len(computable_args),
            timecontext=timecontext,
            clients=clients,
        )
    else:
        arg_timecontexts = [None] * len(computable_args)

    pre_executed_scope = pre_execute(
        op,
        *clients,
        scope=scope,
        timecontext=timecontext,
        aggcontext=aggcontext,
        **kwargs,
    )

    new_scope = scope.merge_scope(pre_executed_scope)

    # Short circuit: if pre_execute puts op in scope, then we don't need to
    # execute its computable_args
    if new_scope.get_value(op, timecontext) is not None:
        return new_scope

    # recursively compute each node's arguments until we've changed type.
    # compute_time_context should return with a list with the same length
    # as computable_args, the two lists will be zipping together for
    # further execution
    if len(arg_timecontexts) != len(computable_args):
        raise com.IbisError(
            'arg_timecontexts differ with computable_arg in length '
            f'for type:\n{type(op).__name__}.')

    scopes = [
        execute_until_in_scope(
            arg,
            new_scope,
            timecontext=timecontext,
            aggcontext=aggcontext,
            post_execute_=post_execute_,
            clients=clients,
            **kwargs,
        ) if hasattr(arg, 'op') else Scope({arg: arg}, timecontext)
        for (arg, timecontext) in zip(computable_args, arg_timecontexts)
    ]

    # if we're unable to find data then raise an exception
    if not scopes and computable_args:
        raise com.UnboundExpressionError(
            'Unable to find data for expression:\n{}'.format(repr(expr)))

    # there should be exactly one dictionary per computable argument
    assert len(computable_args) == len(scopes)

    new_scope = new_scope.merge_scopes(scopes)
    # pass our computed arguments to this node's execute_node implementation
    data = [
        new_scope.get_value(arg.op(), timecontext)
        if hasattr(arg, 'op') else arg for arg in computable_args
    ]

    result = execute_node(
        op,
        *data,
        scope=scope,
        timecontext=timecontext,
        aggcontext=aggcontext,
        clients=clients,
        **kwargs,
    )
    computed = post_execute_(op, result, timecontext=timecontext)
    return Scope({op: computed}, timecontext)

Beispiel #16

0

Datei anzeigen

Datei: core.py Projekt: shubhamjainbb/ibis

def execute_with_scope(
    expr,
    scope: Scope,
    timecontext: Optional[TimeContext] = None,
    aggcontext=None,
    clients=None,
    **kwargs,
):
    """Execute an expression `expr`, with data provided in `scope`.

    Parameters
    ----------
    expr : ibis.expr.types.Expr
        The expression to execute.
    scope : Scope
        A Scope class, with dictionary mapping
        :class:`~ibis.expr.operations.Node` subclass instances to concrete
        data such as a pandas DataFrame.
    timecontext : Optional[TimeContext]
        A tuple of (begin, end) that is passed from parent Node to children
        see [timecontext.py](ibis/backends/pandas/execution/timecontext.py) for
        detailed usage for this time context.
    aggcontext : Optional[ibis.backends.pandas.aggcontext.AggregationContext]

    Returns
    -------
    result : scalar, pd.Series, pd.DataFrame
    """
    op = expr.op()

    # Call pre_execute, to allow clients to intercept the expression before
    # computing anything *and* before associating leaf nodes with data. This
    # allows clients to provide their own data for each leaf.
    if clients is None:
        clients = list(find_backends(expr))

    if aggcontext is None:
        aggcontext = agg_ctx.Summarize()

    pre_executed_scope = pre_execute(
        op,
        *clients,
        scope=scope,
        timecontext=timecontext,
        aggcontext=aggcontext,
        **kwargs,
    )
    new_scope = scope.merge_scope(pre_executed_scope)
    result = execute_until_in_scope(
        expr,
        new_scope,
        timecontext=timecontext,
        aggcontext=aggcontext,
        clients=clients,
        # XXX: we *explicitly* pass in scope and not new_scope here so that
        # post_execute sees the scope of execute_with_scope, not the scope of
        # execute_until_in_scope
        post_execute_=functools.partial(
            post_execute,
            scope=scope,
            timecontext=timecontext,
            aggcontext=aggcontext,
            clients=clients,
            **kwargs,
        ),
        **kwargs,
    ).get_value(op, timecontext)
    return result

Beispiel #17

0

Datei anzeigen

 def test_pre_execute(op, client, **kwargs):
     called[0] += 1
     return Scope()

Beispiel #18

0

Datei anzeigen

Datei: window.py Projekt: brian-ketelboeter/ibis

def execute_window_op(
    op,
    data,
    window,
    scope: Scope = None,
    timecontext: Optional[TimeContext] = None,
    aggcontext=None,
    clients=None,
    **kwargs,
):
    operand = op.expr
    # pre execute "manually" here because otherwise we wouldn't pickup
    # relevant scope changes from the child operand since we're managing
    # execution of that by hand
    operand_op = operand.op()

    adjusted_timecontext = None
    if timecontext:
        arg_timecontexts = compute_time_context(
            op, timecontext=timecontext, clients=clients
        )
        # timecontext is the original time context required by parent node
        # of this WindowOp, while adjusted_timecontext is the adjusted context
        # of this Window, since we are doing a manual execution here, use
        # adjusted_timecontext in later execution phases
        adjusted_timecontext = arg_timecontexts[0]

    pre_executed_scope = pre_execute(
        operand_op,
        *clients,
        scope=scope,
        timecontext=adjusted_timecontext,
        aggcontext=aggcontext,
        **kwargs,
    )
    scope = scope.merge_scope(pre_executed_scope)
    (root,) = op.root_tables()
    root_expr = root.to_expr()

    data = execute(
        root_expr,
        scope=scope,
        timecontext=adjusted_timecontext,
        clients=clients,
        aggcontext=aggcontext,
        **kwargs,
    )
    following = window.following
    order_by = window._order_by

    if (
        order_by
        and following != 0
        and not isinstance(operand_op, ops.ShiftBase)
    ):
        raise com.OperationNotDefinedError(
            'Window functions affected by following with order_by are not '
            'implemented'
        )

    group_by = window._group_by
    grouping_keys = [
        key_op.name
        if isinstance(key_op, ops.TableColumn)
        else execute(
            key,
            scope=scope,
            clients=clients,
            timecontext=adjusted_timecontext,
            aggcontext=aggcontext,
            **kwargs,
        )
        for key, key_op in zip(
            group_by, map(operator.methodcaller('op'), group_by)
        )
    ]

    order_by = window._order_by
    if not order_by:
        ordering_keys = []

    if group_by:
        if order_by:
            (
                sorted_df,
                grouping_keys,
                ordering_keys,
            ) = util.compute_sorted_frame(
                data,
                order_by,
                group_by=group_by,
                timecontext=adjusted_timecontext,
                **kwargs,
            )
            source = sorted_df.groupby(grouping_keys, sort=True)
            post_process = _post_process_group_by_order_by
        else:
            source = data.groupby(grouping_keys, sort=False)
            post_process = _post_process_group_by
    else:
        if order_by:
            source, grouping_keys, ordering_keys = util.compute_sorted_frame(
                data, order_by, timecontext=adjusted_timecontext, **kwargs
            )
            post_process = _post_process_order_by
        else:
            source = data
            post_process = _post_process_empty

    # Here groupby object should be add to the corresponding node in scope
    # for execution, data will be overwrite to a groupby object, so we
    # force an update regardless of time context
    new_scope = scope.merge_scopes(
        [
            Scope({t: source}, adjusted_timecontext)
            for t in operand.op().root_tables()
        ],
        overwrite=True,
    )

    aggcontext = get_aggcontext(
        window,
        scope=scope,
        operand=operand,
        parent=source,
        group_by=grouping_keys,
        order_by=ordering_keys,
        **kwargs,
    )
    result = execute(
        operand,
        scope=new_scope,
        timecontext=adjusted_timecontext,
        aggcontext=aggcontext,
        clients=clients,
        **kwargs,
    )
    series = post_process(
        result, data, ordering_keys, grouping_keys, adjusted_timecontext,
    )
    assert len(data) == len(
        series
    ), 'input data source and computed column do not have the same length'
    # trim data to original time context
    series = trim_with_timecontext(series, timecontext)
    return series

Beispiel #19

0

Datei anzeigen

Datei: temporal.py Projekt: ibis-project/ibis

def pre_execute_timestamp_now(op, *args, **kwargs):
    timecontext = kwargs.get('timecontext', None)
    return Scope({op: pd.Timestamp('now')}, timecontext)

Beispiel #20

0

Datei anzeigen

Datei: dispatch.py Projekt: zbrookle/ibis

def pre_execute_default(node, *clients, **kwargs):
    return Scope()

Beispiel #21

0

Datei anzeigen

def execute_aggregation_dataframe(op,
                                  data,
                                  scope=None,
                                  timecontext: Optional[TimeContext] = None,
                                  **kwargs):
    assert op.metrics, 'no metrics found during aggregation execution'

    if op.sort_keys:
        raise NotImplementedError(
            'sorting on aggregations not yet implemented')

    predicates = op.predicates
    if predicates:
        predicate = functools.reduce(
            operator.and_,
            (execute(p, scope=scope, timecontext=timecontext, **kwargs)
             for p in predicates),
        )
        data = data.loc[predicate]

    columns = {}

    if op.by:
        grouping_key_pairs = list(
            zip(op.by, map(operator.methodcaller('op'), op.by)))
        grouping_keys = [
            by_op.name if isinstance(by_op, ops.TableColumn) else execute(
                by, scope=scope, timecontext=timecontext, **kwargs).rename(
                    by.get_name()) for by, by_op in grouping_key_pairs
        ]
        columns.update((by_op.name, by.get_name())
                       for by, by_op in grouping_key_pairs
                       if hasattr(by_op, 'name'))
        source = data.groupby(grouping_keys)
    else:
        source = data

    scope = scope.merge_scope(Scope({op.table.op(): source}, timecontext))

    pieces = [
        coerce_to_output(
            execute(metric, scope=scope, timecontext=timecontext, **kwargs),
            metric,
        ) for metric in op.metrics
    ]

    result = pd.concat(pieces, axis=1)

    # If grouping, need a reset to get the grouping key back as a column
    if op.by:
        result = result.reset_index()

    result.columns = [columns.get(c, c) for c in result.columns]

    if op.having:
        # .having(...) is only accessible on groupby, so this should never
        # raise
        if not op.by:
            raise ValueError(
                'Filtering out aggregation values is not allowed without at '
                'least one grouping key')

        # TODO(phillipc): Don't recompute identical subexpressions
        predicate = functools.reduce(
            operator.and_,
            (execute(having, scope=scope, timecontext=timecontext, **kwargs)
             for having in op.having),
        )
        assert len(predicate) == len(
            result), 'length of predicate does not match length of DataFrame'
        result = result.loc[predicate.values]
    return result

Beispiel #22

0

Datei anzeigen

Datei: test_core.py Projekt: randxie/ibis

 def pre_execute_test(op, *clients, scope=None, **kwargs):
     return Scope({op: 4}, None)