def to_aggregation(self, metric_name=None, parent_table=None, backup_metric_name=None): """ Convert the TopK operation to a table aggregation """ op = self.op() arg_table = ir.find_base_table(op.arg) by = op.by if not isinstance(by, ir.Expr): by = by(arg_table) by_table = arg_table else: by_table = ir.find_base_table(op.by) if metric_name is None: if by.get_name() == op.arg.get_name(): by = by.name(backup_metric_name) else: by = by.name(metric_name) if arg_table.equals(by_table): agg = arg_table.aggregate(by, by=[op.arg]) elif parent_table is not None: agg = parent_table.aggregate(by, by=[op.arg]) else: raise com.IbisError('Cross-table TopK; must provide a parent ' 'joined table') return agg.sort_by([(by.get_name(), False)]).limit(op.k)
def _reduction_to_aggregation(expr, default_name='tmp'): table = ir.find_base_table(expr) try: name = expr.get_name() named_expr = expr except: name = default_name named_expr = expr.name(default_name) return table.aggregate([named_expr]), name
def __init__(self, expr, window): from ibis.expr.window import propagate_down_window if not is_analytic(expr): raise com.IbisInputError("Expression does not contain a valid " "window operation") table = ir.find_base_table(expr) if table is not None: window = window.bind(table) expr = propagate_down_window(expr, window) ValueOp.__init__(self, expr, window)
def window(win, *, from_base_table_of, this): from ibis.expr.window import Window if not isinstance(win, Window): raise com.IbisTypeError( "`win` argument should be of type `ibis.expr.window.Window`; " f"got type {type(win).__name__}") table = ir.find_base_table(getattr(this, from_base_table_of)) if table is not None: win = win.bind(table) if win.max_lookback is not None: error_msg = ("'max lookback' windows must be ordered " "by a timestamp column") if len(win._order_by) != 1: raise com.IbisInputError(error_msg) order_var = win._order_by[0].op().args[0] if not isinstance(order_var.type(), dt.Timestamp): raise com.IbisInputError(error_msg) return win
def value_counts(arg, metric_name='count'): """ Compute a frequency table for this value expression Parameters ---------- Returns ------- counts : TableExpr Aggregated table """ base = ir.find_base_table(arg) metric = base.count().name(metric_name) try: arg.get_name() except _com.ExpressionError: arg = arg.name('unnamed') return base.group_by(arg).aggregate(metric)
def _adapt_expr(expr): # Non-table expressions need to be adapted to some well-formed table # expression, along with a way to adapt the results to the desired # arity (whether array-like or scalar, for example) # # Canonical case is scalar values or arrays produced by some reductions # (simple reductions, or distinct, say) def as_is(x): return x if isinstance(expr, ir.TableExpr): return expr, as_is def _scalar_reduce(x): return isinstance(x, ir.ScalarExpr) and ops.is_reduction(x) def _get_scalar(field): def scalar_handler(results): return results[field][0] return scalar_handler if isinstance(expr, ir.ScalarExpr): if _scalar_reduce(expr): table_expr, name = _reduction_to_aggregation( expr, default_name='tmp') return table_expr, _get_scalar(name) else: base_table = ir.find_base_table(expr) if base_table is None: # expr with no table refs return expr.name('tmp'), _get_scalar('tmp') else: raise NotImplementedError(expr._repr()) elif isinstance(expr, ir.AnalyticExpr): return expr.to_aggregation(), as_is elif isinstance(expr, ir.ExprList): exprs = expr.exprs() is_aggregation = True any_aggregation = False for x in exprs: if not _scalar_reduce(x): is_aggregation = False else: any_aggregation = True if is_aggregation: table = ir.find_base_table(exprs[0]) return table.aggregate(exprs), as_is elif not any_aggregation: return expr, as_is else: raise NotImplementedError(expr._repr()) elif isinstance(expr, ir.ArrayExpr): op = expr.op() def _get_column(name): def column_handler(results): return results[name] return column_handler if isinstance(op, ops.TableColumn): table_expr = op.table result_handler = _get_column(op.name) else: # Something more complicated. base_table = L.find_source_table(expr) if isinstance(op, ops.DistinctArray): expr = op.arg try: name = op.arg.get_name() except Exception: name = 'tmp' table_expr = (base_table.projection([expr.name(name)]) .distinct()) result_handler = _get_column(name) else: table_expr = base_table.projection([expr.name('tmp')]) result_handler = _get_column('tmp') return table_expr, result_handler else: raise com.TranslationError('Do not know how to execute: {0}' .format(type(expr)))
def _notall_expand(expr): arg = expr.op().args[0] t = ir.find_base_table(arg) return arg.sum() < t.count()
def _adapt_expr(expr): # Non-table expressions need to be adapted to some well-formed table # expression, along with a way to adapt the results to the desired # arity (whether array-like or scalar, for example) # # Canonical case is scalar values or arrays produced by some reductions # (simple reductions, or distinct, say) def as_is(x): return x if isinstance(expr, ir.TableExpr): return expr, as_is def _scalar_reduce(x): return isinstance(x, ir.ScalarExpr) and ops.is_reduction(x) def _get_scalar(field): def scalar_handler(results): return results[field][0] return scalar_handler if isinstance(expr, ir.ScalarExpr): if _scalar_reduce(expr): table_expr, name = _reduction_to_aggregation(expr, default_name='tmp') return table_expr, _get_scalar(name) else: base_table = ir.find_base_table(expr) if base_table is None: # expr with no table refs return expr.name('tmp'), _get_scalar('tmp') else: raise NotImplementedError(expr._repr()) elif isinstance(expr, ir.AnalyticExpr): return expr.to_aggregation(), as_is elif isinstance(expr, ir.ExprList): exprs = expr.exprs() is_aggregation = True any_aggregation = False for x in exprs: if not _scalar_reduce(x): is_aggregation = False else: any_aggregation = True if is_aggregation: table = ir.find_base_table(exprs[0]) return table.aggregate(exprs), as_is elif not any_aggregation: return expr, as_is else: raise NotImplementedError(expr._repr()) elif isinstance(expr, ir.ArrayExpr): op = expr.op() def _get_column(name): def column_handler(results): return results[name] return column_handler if isinstance(op, ops.TableColumn): table_expr = op.table[[op.name]] result_handler = _get_column(op.name) else: # Something more complicated. base_table = L.find_source_table(expr) if isinstance(op, ops.DistinctArray): expr = op.arg try: name = op.arg.get_name() except Exception: name = 'tmp' table_expr = (base_table.projection([expr.name(name) ]).distinct()) result_handler = _get_column(name) else: table_expr = base_table.projection([expr.name('tmp')]) result_handler = _get_column('tmp') return table_expr, result_handler else: raise com.TranslationError('Do not know how to execute: {0}'.format( type(expr)))
def _reduction_to_aggregation(expr, agg_name='tmp'): table = ir.find_base_table(expr) return table.aggregate([expr.name(agg_name)])
def _adapt_expr(expr): # Non-table expressions need to be adapted to some well-formed table # expression, along with a way to adapt the results to the desired # arity (whether array-like or scalar, for example) # # Canonical case is scalar values or arrays produced by some reductions # (simple reductions, or distinct, say) if isinstance(expr, ir.TableExpr): return expr, toolz.identity def _get_scalar(field): def scalar_handler(results): return results[field][0] return scalar_handler if isinstance(expr, ir.ScalarExpr): if L.is_scalar_reduction(expr): table_expr, name = L.reduction_to_aggregation( expr, default_name='tmp' ) return table_expr, _get_scalar(name) else: base_table = ir.find_base_table(expr) if base_table is None: # exprs with no table refs # TODO(phillipc): remove ScalarParameter hack if isinstance(expr.op(), ops.ScalarParameter): name = expr.get_name() assert ( name is not None ), f'scalar parameter {expr} has no name' return expr, _get_scalar(name) return expr.name('tmp'), _get_scalar('tmp') raise NotImplementedError(repr(expr)) elif isinstance(expr, ir.AnalyticExpr): return expr.to_aggregation(), toolz.identity elif isinstance(expr, ir.ColumnExpr): op = expr.op() def _get_column(name): def column_handler(results): return results[name] return column_handler if isinstance(op, ops.TableColumn): table_expr = op.table[[op.name]] result_handler = _get_column(op.name) else: # Something more complicated. base_table = L.find_source_table(expr) if isinstance(op, ops.DistinctColumn): expr = op.arg try: name = op.arg.get_name() except Exception: name = 'tmp' table_expr = base_table.projection( [expr.name(name)] ).distinct() result_handler = _get_column(name) else: table_expr = base_table.projection([expr.name('tmp')]) result_handler = _get_column('tmp') return table_expr, result_handler else: raise com.TranslationError( f'Do not know how to execute: {type(expr)}' )