def __init__( self, group_by=None, order_by=None, preceding=None, following=None, how='rows', ): if group_by is None: group_by = [] if order_by is None: order_by = [] self._group_by = util.promote_list(group_by) self._order_by = [] for x in util.promote_list(order_by): if isinstance(x, ir.SortExpr): pass elif isinstance(x, ir.Expr): x = ops.SortKey(x).to_expr() self._order_by.append(x) self.preceding = _list_to_tuple(preceding) self.following = _list_to_tuple(following) self.how = how self._validate_frame()
def filter(table, predicates): """ Select rows from table based on boolean expressions Parameters ---------- predicates : boolean array expressions, or list thereof Returns ------- filtered_expr : TableExpr """ if isinstance(predicates, Expr): predicates = _L.unwrap_ands(predicates) predicates = util.promote_list(predicates) predicates = [ir.bind_expr(table, x) for x in predicates] resolved_predicates = [] for pred in predicates: if isinstance(pred, ir.AnalyticExpr): pred = pred.to_filter() resolved_predicates.append(pred) op = _L.apply_filter(table, resolved_predicates) return TableExpr(op)
def _is_valid(self, exprs): try: self._assert_valid(util.promote_list(exprs)) except com.RelationError: return False else: return True
def __init__(self, left, right, by, predicates, **kwargs): by = _clean_join_predicates(left, right, util.promote_list(by)) super().__init__(left=left, right=right, by=by, predicates=predicates, **kwargs)
def mutate(self, exprs=None, **kwds): """ Returns a table projection with analytic / window functions applied. Any arguments can be functions. Parameters ---------- exprs : list, default None kwds : key=value pairs Examples -------- >>> expr = (table .group_by('foo') .order_by(ibis.desc('bar')) .mutate(qux=lambda x: x.baz.lag(), qux2=table.baz.lead())) Returns ------- mutated : TableExpr """ if exprs is None: exprs = [] else: exprs = util.promote_list(exprs) kwd_names = list(kwds.keys()) kwd_values = list(kwds.values()) kwd_values = self.table._resolve(kwd_values) for k, v in sorted(zip(kwd_names, kwd_values)): exprs.append(v.name(k)) return self.projection([self.table] + exprs)
def drop(self, fields: str | Sequence[str]) -> Table: """Remove fields from a table. Parameters ---------- fields Fields to drop Returns ------- Table Expression without `fields` """ if not fields: # no-op if nothing to be dropped return self fields = util.promote_list(fields) schema = self.schema() field_set = frozenset(fields) missing_fields = field_set.difference(schema) if missing_fields: raise KeyError(f'Fields not in table: {missing_fields!s}') return self[[field for field in schema if field not in field_set]]
def __init__(self, left, right, predicates, **kwargs): left, right, predicates = _make_distinct_join_predicates( left, right, util.promote_list(predicates)) super().__init__(left=left, right=right, predicates=predicates, **kwargs)
def sort_by( self, sort_exprs: str | ir.Column | ir.SortKey | tuple[str | ir.Column, bool] | Sequence[tuple[str | ir.Column, bool]], ) -> Table: """Sort table by `sort_exprs` Parameters ---------- sort_exprs Sort specifications Examples -------- >>> import ibis >>> t = ibis.table([('a', 'int64'), ('b', 'string')]) >>> ab_sorted = t.sort_by([('a', True), ('b', False)]) Returns ------- Table Sorted table """ if isinstance(sort_exprs, tuple): sort_exprs = [sort_exprs] elif sort_exprs is None: sort_exprs = [] else: sort_exprs = util.promote_list(sort_exprs) return self.op().sort_by(self, sort_exprs).to_expr()
def order_by( self, expr: ir.ValueExpr | Iterable[ir.ValueExpr] ) -> GroupedTableExpr: """Sort a grouped table expression by `expr`. Notes ----- This API call is ignored in aggregations. Parameters ---------- expr Expressions to order the results by Returns ------- GroupedTableExpr A sorted grouped GroupedTableExpr """ exprs = util.promote_list(expr) new_order = self._order_by + exprs return GroupedTableExpr( self.table, self.by, having=self._having, order_by=new_order, window=self._window, )
def __init__(self, table_expr, sort_keys): self.table = table_expr self.keys = [ to_sort_key(self.table, k) for k in util.promote_list(sort_keys) ] TableNode.__init__(self, [self.table, self.keys])
def ilike( self, patterns: str | StringValue | Iterable[str | StringValue], ) -> ir.BooleanValue: """Match `patterns` against `self`, case-insensitive. This function is modeled after SQL's `ILIKE` directive. Use `%` as a multiple-character wildcard or `_` as a single-character wildcard. Use `re_search` or `rlike` for regular expression-based matching. Parameters ---------- patterns If `pattern` is a list, then if any pattern matches the input then the corresponding row in the output is `True`. Returns ------- BooleanValue Column indicating matches """ import ibis.expr.operations as ops return functools.reduce( operator.or_, (ops.StringSQLILike(self, pattern).to_expr() for pattern in util.promote_list(patterns)), )
def mutate( self, exprs: ir.Value | Sequence[ir.Value] | None = None, **kwds: ir.Value, ): """Return a table projection with window functions applied. Any arguments can be functions. Parameters ---------- exprs List of expressions kwds Expressions Examples -------- >>> import ibis >>> t = ibis.table([ ... ('foo', 'string'), ... ('bar', 'string'), ... ('baz', 'double'), ... ], name='t') >>> t UnboundTable[t] foo string bar string baz float64 >>> expr = (t.group_by('foo') ... .order_by(ibis.desc('bar')) ... .mutate(qux=lambda x: x.baz.lag(), ... qux2=t.baz.lead())) >>> print(expr) r0 := UnboundTable[t] foo string bar string baz float64 Selection[r0] selections: r0 qux: Window(Lag(r0.baz), window=Window(group_by=[r0.foo], order_by=[desc|r0.bar], how='rows')) qux2: Window(Lead(r0.baz), window=Window(group_by=[r0.foo], order_by=[desc|r0.bar], how='rows')) Returns ------- Table A table expression with window functions applied """ # noqa: E501 if exprs is None: exprs = [] else: exprs = util.promote_list(exprs) for name, expr in kwds.items(): expr = self.table._ensure_expr(expr) exprs.append(expr.name(name)) return self.projection([self.table, *exprs])
def _resolve(self, exprs): exprs = util.promote_list(exprs) # Stash this helper method here for now out_exprs = [] for expr in exprs: expr = self._ensure_expr(expr) out_exprs.append(expr) return out_exprs
def __init__(self, group_by=None, order_by=None, preceding=None, following=None): if group_by is None: group_by = [] if order_by is None: order_by = [] self._group_by = util.promote_list(group_by) self._order_by = util.promote_list(order_by) self._order_by = [ops.SortKey(expr) if isinstance(expr, ir.Expr) else expr for expr in self._order_by] self.preceding = _list_to_tuple(preceding) self.following = _list_to_tuple(following) self._validate_frame()
def _get_window(self): if self._window is None: groups = self.by sorts = self._order_by preceding, following = None, None else: w = self._window groups = w.group_by + self.by sorts = w.order_by + self._order_by preceding, following = w.preceding, w.following return _window.window( preceding=preceding, following=following, group_by=list( map(self.table._ensure_expr, util.promote_list(groups))), order_by=list( map(self.table._ensure_expr, util.promote_list(sorts))), )
def __init__( self, table, by, having=None, order_by=None, window=None, **expressions ): self.table = table self.by = util.promote_list(by if by is not None else []) + [ _get_group_by_key(table, v).name(k) for k, v in sorted(expressions.items(), key=toolz.first) ] self._order_by = order_by or [] self._having = having or [] self._window = window
def __init__( self, table, by, having=None, order_by=None, window=None, **expressions ): self.table = table self.by = util.promote_list(by if by is not None else []) + [ (table[v] if isinstance(v, six.string_types) else v).name(k) for k, v in sorted(expressions.items(), key=toolz.first) ] self._order_by = order_by or [] self._having = having or [] self._window = window
def mutate(table, exprs=None, **kwds): """ Convenience function for table projections involving adding columns Parameters ---------- exprs : list, default None List of named expressions to add as columns kwds : keywords for new columns Examples -------- expr = table.mutate(qux=table.foo + table.bar, baz=5) Returns ------- mutated : TableExpr """ if exprs is None: exprs = [] else: exprs = util.promote_list(exprs) for k, v in sorted(kwds.items()): if util.is_function(v): v = v(table) else: v = as_value_expr(v) exprs.append(v.name(k)) has_replacement = False for expr in exprs: if expr.get_name() in table: has_replacement = True if has_replacement: by_name = dict((x.get_name(), x) for x in exprs) used = set() proj_exprs = [] for c in table.columns: if c in by_name: proj_exprs.append(by_name[c]) used.add(c) else: proj_exprs.append(c) for x in exprs: if x.get_name() not in used: proj_exprs.append(x) return table.projection(proj_exprs) else: return table.projection([table] + exprs)
def _resolve_predicates(table, predicates): if isinstance(predicates, Expr): predicates = _L.unwrap_ands(predicates) predicates = util.promote_list(predicates) predicates = [ir.bind_expr(table, x) for x in predicates] resolved_predicates = [] for pred in predicates: if isinstance(pred, ir.AnalyticExpr): pred = pred.to_filter() resolved_predicates.append(pred) return resolved_predicates
def __init__( self, group_by=None, order_by=None, preceding=None, following=None, max_lookback=None, how='rows', ): import ibis.expr.operations as ops if group_by is None: group_by = [] if order_by is None: order_by = [] self._group_by = util.promote_list(group_by) self._order_by = [] for x in util.promote_list(order_by): if isinstance(x, ir.SortExpr): pass elif isinstance(x, ir.Expr): x = ops.SortKey(x).to_expr() self._order_by.append(x) if isinstance(preceding, RowsWithMaxLookback): # the offset interval is used as the 'preceding' value of a window # while 'rows' is used to adjust the window created using offset self.preceding = preceding.max_lookback self.max_lookback = preceding.rows else: self.preceding = _sequence_to_tuple(preceding) self.max_lookback = max_lookback self.following = _sequence_to_tuple(following) self.how = how self._validate_frame()
def __init__( self, group_by=None, order_by=None, preceding=None, following=None, max_lookback=None, how='rows', ): import ibis.expr.operations as ops self._group_by = tuple( toolz.unique( promote_list([] if group_by is None else group_by), key=lambda value: getattr(value, "_key", value), )) _order_by = [] for expr in promote_list([] if order_by is None else order_by): if isinstance(expr, ir.Expr) and not isinstance(expr, ir.SortExpr): expr = ops.SortKey(expr).to_expr() _order_by.append(expr) self._order_by = tuple( toolz.unique(_order_by, key=lambda value: getattr(value, "_key", value))) if isinstance(preceding, RowsWithMaxLookback): # the offset interval is used as the 'preceding' value of a window # while 'rows' is used to adjust the window created using offset self.preceding = preceding.max_lookback self.max_lookback = preceding.rows else: self.preceding = _sequence_to_tuple(preceding) self.max_lookback = max_lookback self.following = _sequence_to_tuple(following) self.how = how self._validate_frame()
def __init__(self, group_by=None, order_by=None, preceding=None, following=None): if group_by is None: group_by = [] if order_by is None: order_by = [] self._group_by = util.promote_list(group_by) self._order_by = [] for x in util.promote_list(order_by): if isinstance(x, ir.SortExpr): pass elif isinstance(x, ir.Expr): x = ops.SortKey(x).to_expr() self._order_by.append(x) self.preceding = _list_to_tuple(preceding) self.following = _list_to_tuple(following) self._validate_frame()
def _rewrite_exprs(self, what): from ibis.expr.analysis import substitute_parents what = util.promote_list(what) all_exprs = [] for expr in what: if isinstance(expr, ir.ExprList): all_exprs.extend(expr.exprs()) else: all_exprs.append(expr) return [substitute_parents(x, past_projection=False) for x in all_exprs]
def projection(self, exprs): """Project new columns out of the grouped table. See Also -------- ibis.expr.groupby.GroupedTable.mutate """ w = self._get_window() windowed_exprs = [] for expr in util.promote_list(exprs): expr = self.table._ensure_expr(expr) expr = L.windowize_function(expr, w=w) windowed_exprs.append(expr) return self.table.projection(windowed_exprs)
def _maybe_convert_sort_keys(tables, exprs): exprs = util.promote_list(exprs) keys = exprs[:] for i, key in enumerate(exprs): step = -1 if isinstance(key, (str, DeferredSortKey)) else 1 for table in tables[::step]: try: sort_key = _to_sort_key(key, table=table) except Exception: continue else: keys[i] = sort_key break return keys
def _rewrite_exprs(self, what): from ibis.expr.analysis import substitute_parents what = util.promote_list(what) all_exprs = [] for expr in what: if isinstance(expr, ir.ExprList): all_exprs.extend(expr.exprs()) else: all_exprs.append(expr) return [ substitute_parents(x, past_projection=False) for x in all_exprs ]
def aggregate( self, metrics: Sequence[ir.Scalar] | None = None, by: Sequence[ir.Value] | None = None, having: Sequence[ir.BooleanValue] | None = None, **kwargs: ir.Value, ) -> Table: """Aggregate a table with a given set of reductions grouping by `by`. Parameters ---------- metrics Aggregate expressions by Grouping expressions having Post-aggregation filters kwargs Named aggregate expressions Returns ------- Table An aggregate table expression """ metrics = [] if metrics is None else util.promote_list(metrics) metrics.extend( self._ensure_expr(expr).name(name) for name, expr in kwargs.items() ) op = self.op().aggregate( self, metrics, by=util.promote_list(by if by is not None else []), having=util.promote_list(having if having is not None else []), ) return op.to_expr()
def _pushdown_exprs(self, exprs): from ibis.expr.analysis import shares_all_roots, sub_for subbed_exprs = [] for expr in util.promote_list(exprs): expr = self.op.table._ensure_expr(expr) subbed = sub_for(expr, [(self.parent, self.op.table)]) subbed_exprs.append(subbed) if subbed_exprs: valid = shares_all_roots(subbed_exprs, self.op.table) else: valid = True return valid, subbed_exprs
def __init__(self, table, by, having=None, order_by=None, window=None, **expressions): self.table = table self.by = util.promote_list(by if by is not None else []) + [ (table[v] if isinstance(v, six.string_types) else v).name(k) for k, v in sorted(expressions.items(), key=toolz.first) ] self._order_by = order_by or [] self._having = having or [] self._window = window
def having(self, expr): """ Add a post-aggregation result filter (like the having argument in `aggregate`), for composability with the group_by API Parameters ---------- Returns ------- grouped : GroupedTableExpr """ exprs = util.promote_list(expr) new_having = self._having + exprs return GroupedTableExpr(self.table, self.by, having=new_having, order_by=self._order_by, window=self._window)
def test_dropna_table(backend, alltypes, how, subset): table = alltypes.mutate(na_col=np.nan) table = table.mutate(none_col=None) table = table.mutate(none_col=table['none_col'].cast('float64')) table_pandas = table.execute() result = table.dropna(subset, how).execute().reset_index(drop=True) subset = util.promote_list(subset) if subset else table_pandas.columns expected = table_pandas.dropna(how=how, subset=subset).reset_index(drop=True) # check_dtype is False here because there are dtype diffs between # Pyspark and Pandas on Java 8 - the 'bool_col' of an empty DataFrame # is type object in Pyspark, and type bool in Pandas. This diff does # not exist in Java 11. backend.assert_frame_equal(result, expected, check_dtype=False)
def order_by(self, expr): """ Expressions to use for ordering data for a window function computation. Ignored in aggregations. Parameters ---------- expr : value expression or list of value expressions Returns ------- grouped : GroupedTableExpr """ exprs = util.promote_list(expr) new_order = self._order_by + exprs return GroupedTableExpr(self.table, self.by, having=self._having, order_by=new_order, window=self._window)
def fmt_fields( op: ops.TableNode, fields: Mapping[str, Callable[[Any, Aliases], str]], *, aliases: Aliases, ) -> str: parts = [] for field, formatter in fields.items(): if exprs := [ expr for expr in util.promote_list(getattr(op, field)) if expr is not None ]: field_fmt = [formatter(expr, aliases=aliases) for expr in exprs] parts.append(f"{field}:") parts.append(util.indent("\n".join(field_fmt), spaces=2))
def having(self, expr: ir.BooleanScalar) -> GroupedTable: """Add a post-aggregation result filter `expr`. Parameters ---------- expr An expression that filters based on an aggregate value. Returns ------- GroupedTable A grouped table expression """ return self.__class__( self.table, self.by, having=self._having + util.promote_list(expr), order_by=self._order_by, window=self._window, )
def mutate(self, exprs=None, **kwds): """ Returns a table projection with analytic / window functions applied Examples -------- expr = (table .group_by('foo') .order_by(ibis.desc('bar')) .mutate(qux=table.baz.lag())) Returns ------- mutated : TableExpr """ if exprs is None: exprs = [] else: exprs = util.promote_list(exprs) for k, v in kwds.items(): exprs.append(v.name(k)) return self.projection([self.table] + exprs)
def mutate(self, exprs=None, **kwds): """ Returns a table projection with analytic / window functions applied. Any arguments can be functions. Parameters ---------- exprs : list, default None kwds : key=value pairs Examples -------- >>> import ibis >>> t = ibis.table([ ... ('foo', 'string'), ... ('bar', 'string'), ... ('baz', 'double'), ... ], name='t') >>> t UnboundTable[table] name: t schema: foo : string bar : string baz : float64 >>> expr = (t.group_by('foo') ... .order_by(ibis.desc('bar')) ... .mutate(qux=lambda x: x.baz.lag(), ... qux2=t.baz.lead())) >>> print(expr) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE ref_0 UnboundTable[table] name: t schema: foo : string bar : string baz : float64 Selection[table] table: Table: ref_0 selections: Table: ref_0 qux = WindowOp[float64*] qux = Lag[float64*] baz = Column[float64*] 'baz' from table ref_0 offset: None default: None <ibis.expr.window.Window object at 0x...> qux2 = WindowOp[float64*] qux2 = Lead[float64*] baz = Column[float64*] 'baz' from table ref_0 offset: None default: None <ibis.expr.window.Window object at 0x...> Returns ------- mutated : TableExpr """ if exprs is None: exprs = [] else: exprs = util.promote_list(exprs) kwd_names = list(kwds.keys()) kwd_values = list(kwds.values()) kwd_values = self.table._resolve(kwd_values) for k, v in sorted(zip(kwd_names, kwd_values)): exprs.append(v.name(k)) return self.projection([self.table] + exprs)
def group_by(self, expr): new_groups = self._group_by + util.promote_list(expr) return self._replace(group_by=new_groups)
def order_by(self, expr): new_sorts = self._order_by + util.promote_list(expr) return self._replace(order_by=new_sorts)
def _resolve_exprs(table, exprs): exprs = util.promote_list(exprs) return table._resolve(exprs)
def _is_valid(self, exprs): try: self._assert_valid(util.promote_list(exprs)) return True except: return False
def __init__(self, types, fail_message, **arg_kwds): self.types = util.promote_list(types) self.fail_message = fail_message Argument.__init__(self, **arg_kwds)
def __init__(self, table_expr, sort_keys): self.table = table_expr self.keys = [to_sort_key(self.table, k) for k in util.promote_list(sort_keys)] TableNode.__init__(self, [self.table, self.keys])