Example #1
0
    def to_aggregation(self, metric_name=None, parent_table=None,
                       backup_metric_name=None):
        """
        Convert the TopK operation to a table aggregation
        """
        op = self.op()

        arg_table = ir.find_base_table(op.arg)

        by = op.by
        if not isinstance(by, ir.Expr):
            by = by(arg_table)
            by_table = arg_table
        else:
            by_table = ir.find_base_table(op.by)

        if metric_name is None:
            if by.get_name() == op.arg.get_name():
                by = by.name(backup_metric_name)
        else:
            by = by.name(metric_name)

        if arg_table.equals(by_table):
            agg = arg_table.aggregate(by, by=[op.arg])
        elif parent_table is not None:
            agg = parent_table.aggregate(by, by=[op.arg])
        else:
            raise com.IbisError('Cross-table TopK; must provide a parent '
                                'joined table')

        return agg.sort_by([(by.get_name(), False)]).limit(op.k)
Example #2
0
    def to_aggregation(self,
                       metric_name=None,
                       parent_table=None,
                       backup_metric_name=None):
        """
        Convert the TopK operation to a table aggregation
        """
        op = self.op()

        arg_table = ir.find_base_table(op.arg)

        by = op.by
        if not isinstance(by, ir.Expr):
            by = by(arg_table)
            by_table = arg_table
        else:
            by_table = ir.find_base_table(op.by)

        if metric_name is None:
            if by.get_name() == op.arg.get_name():
                by = by.name(backup_metric_name)
        else:
            by = by.name(metric_name)

        if arg_table.equals(by_table):
            agg = arg_table.aggregate(by, by=[op.arg])
        elif parent_table is not None:
            agg = parent_table.aggregate(by, by=[op.arg])
        else:
            raise com.IbisError('Cross-table TopK; must provide a parent '
                                'joined table')

        return agg.sort_by([(by.get_name(), False)]).limit(op.k)
Example #3
0
def _reduction_to_aggregation(expr, default_name='tmp'):
    table = ir.find_base_table(expr)

    try:
        name = expr.get_name()
        named_expr = expr
    except:
        name = default_name
        named_expr = expr.name(default_name)

    return table.aggregate([named_expr]), name
Example #4
0
def _reduction_to_aggregation(expr, default_name='tmp'):
    table = ir.find_base_table(expr)

    try:
        name = expr.get_name()
        named_expr = expr
    except:
        name = default_name
        named_expr = expr.name(default_name)

    return table.aggregate([named_expr]), name
Example #5
0
    def __init__(self, expr, window):
        from ibis.expr.window import propagate_down_window

        if not is_analytic(expr):
            raise com.IbisInputError("Expression does not contain a valid " "window operation")

        table = ir.find_base_table(expr)
        if table is not None:
            window = window.bind(table)

        expr = propagate_down_window(expr, window)

        ValueOp.__init__(self, expr, window)
Example #6
0
def window(win, *, from_base_table_of, this):
    from ibis.expr.window import Window

    if not isinstance(win, Window):
        raise com.IbisTypeError(
            "`win` argument should be of type `ibis.expr.window.Window`; "
            f"got type {type(win).__name__}")
    table = ir.find_base_table(getattr(this, from_base_table_of))
    if table is not None:
        win = win.bind(table)

    if win.max_lookback is not None:
        error_msg = ("'max lookback' windows must be ordered "
                     "by a timestamp column")
        if len(win._order_by) != 1:
            raise com.IbisInputError(error_msg)
        order_var = win._order_by[0].op().args[0]
        if not isinstance(order_var.type(), dt.Timestamp):
            raise com.IbisInputError(error_msg)
    return win
Example #7
0
def value_counts(arg, metric_name='count'):
    """
    Compute a frequency table for this value expression

    Parameters
    ----------

    Returns
    -------
    counts : TableExpr
      Aggregated table
    """
    base = ir.find_base_table(arg)
    metric = base.count().name(metric_name)

    try:
        arg.get_name()
    except _com.ExpressionError:
        arg = arg.name('unnamed')

    return base.group_by(arg).aggregate(metric)
Example #8
0
def value_counts(arg, metric_name='count'):
    """
    Compute a frequency table for this value expression

    Parameters
    ----------

    Returns
    -------
    counts : TableExpr
      Aggregated table
    """
    base = ir.find_base_table(arg)
    metric = base.count().name(metric_name)

    try:
        arg.get_name()
    except _com.ExpressionError:
        arg = arg.name('unnamed')

    return base.group_by(arg).aggregate(metric)
Example #9
0
def _adapt_expr(expr):
    # Non-table expressions need to be adapted to some well-formed table
    # expression, along with a way to adapt the results to the desired
    # arity (whether array-like or scalar, for example)
    #
    # Canonical case is scalar values or arrays produced by some reductions
    # (simple reductions, or distinct, say)
    def as_is(x):
        return x

    if isinstance(expr, ir.TableExpr):
        return expr, as_is

    def _scalar_reduce(x):
        return isinstance(x, ir.ScalarExpr) and ops.is_reduction(x)

    def _get_scalar(field):
        def scalar_handler(results):
            return results[field][0]
        return scalar_handler

    if isinstance(expr, ir.ScalarExpr):

        if _scalar_reduce(expr):
            table_expr, name = _reduction_to_aggregation(
                expr, default_name='tmp')
            return table_expr, _get_scalar(name)
        else:
            base_table = ir.find_base_table(expr)
            if base_table is None:
                # expr with no table refs
                return expr.name('tmp'), _get_scalar('tmp')
            else:
                raise NotImplementedError(expr._repr())

    elif isinstance(expr, ir.AnalyticExpr):
        return expr.to_aggregation(), as_is

    elif isinstance(expr, ir.ExprList):
        exprs = expr.exprs()

        is_aggregation = True
        any_aggregation = False

        for x in exprs:
            if not _scalar_reduce(x):
                is_aggregation = False
            else:
                any_aggregation = True

        if is_aggregation:
            table = ir.find_base_table(exprs[0])
            return table.aggregate(exprs), as_is
        elif not any_aggregation:
            return expr, as_is
        else:
            raise NotImplementedError(expr._repr())

    elif isinstance(expr, ir.ArrayExpr):
        op = expr.op()

        def _get_column(name):
            def column_handler(results):
                return results[name]
            return column_handler

        if isinstance(op, ops.TableColumn):
            table_expr = op.table
            result_handler = _get_column(op.name)
        else:
            # Something more complicated.
            base_table = L.find_source_table(expr)

            if isinstance(op, ops.DistinctArray):
                expr = op.arg
                try:
                    name = op.arg.get_name()
                except Exception:
                    name = 'tmp'

                table_expr = (base_table.projection([expr.name(name)])
                              .distinct())
                result_handler = _get_column(name)
            else:
                table_expr = base_table.projection([expr.name('tmp')])
                result_handler = _get_column('tmp')

        return table_expr, result_handler
    else:
        raise com.TranslationError('Do not know how to execute: {0}'
                                   .format(type(expr)))
Example #10
0
def _notall_expand(expr):
    arg = expr.op().args[0]
    t = ir.find_base_table(arg)
    return arg.sum() < t.count()
Example #11
0
def _adapt_expr(expr):
    # Non-table expressions need to be adapted to some well-formed table
    # expression, along with a way to adapt the results to the desired
    # arity (whether array-like or scalar, for example)
    #
    # Canonical case is scalar values or arrays produced by some reductions
    # (simple reductions, or distinct, say)
    def as_is(x):
        return x

    if isinstance(expr, ir.TableExpr):
        return expr, as_is

    def _scalar_reduce(x):
        return isinstance(x, ir.ScalarExpr) and ops.is_reduction(x)

    def _get_scalar(field):
        def scalar_handler(results):
            return results[field][0]

        return scalar_handler

    if isinstance(expr, ir.ScalarExpr):

        if _scalar_reduce(expr):
            table_expr, name = _reduction_to_aggregation(expr,
                                                         default_name='tmp')
            return table_expr, _get_scalar(name)
        else:
            base_table = ir.find_base_table(expr)
            if base_table is None:
                # expr with no table refs
                return expr.name('tmp'), _get_scalar('tmp')
            else:
                raise NotImplementedError(expr._repr())

    elif isinstance(expr, ir.AnalyticExpr):
        return expr.to_aggregation(), as_is

    elif isinstance(expr, ir.ExprList):
        exprs = expr.exprs()

        is_aggregation = True
        any_aggregation = False

        for x in exprs:
            if not _scalar_reduce(x):
                is_aggregation = False
            else:
                any_aggregation = True

        if is_aggregation:
            table = ir.find_base_table(exprs[0])
            return table.aggregate(exprs), as_is
        elif not any_aggregation:
            return expr, as_is
        else:
            raise NotImplementedError(expr._repr())

    elif isinstance(expr, ir.ArrayExpr):
        op = expr.op()

        def _get_column(name):
            def column_handler(results):
                return results[name]

            return column_handler

        if isinstance(op, ops.TableColumn):
            table_expr = op.table[[op.name]]
            result_handler = _get_column(op.name)
        else:
            # Something more complicated.
            base_table = L.find_source_table(expr)

            if isinstance(op, ops.DistinctArray):
                expr = op.arg
                try:
                    name = op.arg.get_name()
                except Exception:
                    name = 'tmp'

                table_expr = (base_table.projection([expr.name(name)
                                                     ]).distinct())
                result_handler = _get_column(name)
            else:
                table_expr = base_table.projection([expr.name('tmp')])
                result_handler = _get_column('tmp')

        return table_expr, result_handler
    else:
        raise com.TranslationError('Do not know how to execute: {0}'.format(
            type(expr)))
Example #12
0
def _notall_expand(expr):
    arg = expr.op().args[0]
    t = ir.find_base_table(arg)
    return arg.sum() < t.count()
Example #13
0
def _reduction_to_aggregation(expr, agg_name='tmp'):
    table = ir.find_base_table(expr)
    return table.aggregate([expr.name(agg_name)])
Example #14
0
    def _adapt_expr(expr):
        # Non-table expressions need to be adapted to some well-formed table
        # expression, along with a way to adapt the results to the desired
        # arity (whether array-like or scalar, for example)
        #
        # Canonical case is scalar values or arrays produced by some reductions
        # (simple reductions, or distinct, say)

        if isinstance(expr, ir.TableExpr):
            return expr, toolz.identity

        def _get_scalar(field):
            def scalar_handler(results):
                return results[field][0]

            return scalar_handler

        if isinstance(expr, ir.ScalarExpr):

            if L.is_scalar_reduction(expr):
                table_expr, name = L.reduction_to_aggregation(
                    expr, default_name='tmp'
                )
                return table_expr, _get_scalar(name)
            else:
                base_table = ir.find_base_table(expr)
                if base_table is None:
                    # exprs with no table refs
                    # TODO(phillipc): remove ScalarParameter hack
                    if isinstance(expr.op(), ops.ScalarParameter):
                        name = expr.get_name()
                        assert (
                            name is not None
                        ), f'scalar parameter {expr} has no name'
                        return expr, _get_scalar(name)
                    return expr.name('tmp'), _get_scalar('tmp')

                raise NotImplementedError(repr(expr))

        elif isinstance(expr, ir.AnalyticExpr):
            return expr.to_aggregation(), toolz.identity

        elif isinstance(expr, ir.ColumnExpr):
            op = expr.op()

            def _get_column(name):
                def column_handler(results):
                    return results[name]

                return column_handler

            if isinstance(op, ops.TableColumn):
                table_expr = op.table[[op.name]]
                result_handler = _get_column(op.name)
            else:
                # Something more complicated.
                base_table = L.find_source_table(expr)

                if isinstance(op, ops.DistinctColumn):
                    expr = op.arg
                    try:
                        name = op.arg.get_name()
                    except Exception:
                        name = 'tmp'

                    table_expr = base_table.projection(
                        [expr.name(name)]
                    ).distinct()
                    result_handler = _get_column(name)
                else:
                    table_expr = base_table.projection([expr.name('tmp')])
                    result_handler = _get_column('tmp')

            return table_expr, result_handler
        else:
            raise com.TranslationError(
                f'Do not know how to execute: {type(expr)}'
            )
Example #15
0
def _reduction_to_aggregation(expr, agg_name='tmp'):
    table = ir.find_base_table(expr)
    return table.aggregate([expr.name(agg_name)])