class _UnresolvedSubquery(Value, _Negatable): """An exists subquery whose outer leaf table is unknown. Notes ----- Consider the following ibis expressions >>> t = ibis.table(dict(a="string")) >>> s = ibis.table(dict(a="string")) >>> cond = (t.a == s.a).any() Without knowing the table to use as the outer query there are two ways to turn this expression into a SQL `EXISTS` predicate depending on which of `t` or `s` is filtered on. Filtering from `t`: ```sql SELECT * FROM t WHERE EXISTS (SELECT 1 WHERE t.a = s.a) ``` Filtering from `s`: ```sql SELECT * FROM s WHERE EXISTS (SELECT 1 WHERE t.a = s.a) ``` Notably the subquery `(SELECT 1 WHERE t.a = s.a)` cannot stand on its own. The purpose of `_UnresolvedSubquery` is to capture enough information about an exists predicate such that it can be resolved when predicates are resolved against the outer leaf table when `Selection`s are constructed. """ tables = rlz.tuple_of(rlz.table) predicates = rlz.tuple_of(rlz.boolean) output_dtype = dt.boolean output_shape = rlz.Shape.COLUMNAR @abc.abstractmethod def _resolve( self, table: ir.Table ) -> type[ExistsSubquery] | type[NotExistsSubquery]: # pragma: no cover ...
def test_valid_list_of_extra(): validator = rlz.tuple_of(identity) assert validator((3, 2)) == tuple([3, 2]) validator = rlz.list_of(rlz.list_of(rlz.string)) result = validator([[], ['a']]) assert result[1][0].equals(ibis.literal('a'))
class Bucket(BucketLike): arg = rlz.column(rlz.any) buckets = rlz.tuple_of(rlz.scalar(rlz.any)) closed = rlz.optional(rlz.isin({'left', 'right'}), default='left') close_extreme = rlz.optional(rlz.instance_of(bool), default=True) include_under = rlz.optional(rlz.instance_of(bool), default=False) include_over = rlz.optional(rlz.instance_of(bool), default=False) def __init__(self, buckets, include_under, include_over, **kwargs): if not len(buckets): raise ValueError('Must be at least one bucket edge') elif len(buckets) == 1: if not include_under or not include_over: raise ValueError('If one bucket edge provided, must have ' 'include_under=True and include_over=True') super().__init__( buckets=buckets, include_under=include_under, include_over=include_over, **kwargs, ) @property def nbuckets(self): return len(self.buckets) - 1 + self.include_over + self.include_under
class VectorizedUDF(Value): func = rlz.instance_of((FunctionType, LambdaType)) func_args = rlz.tuple_of(rlz.column(rlz.any)) # TODO(kszucs): should rename these arguments to # input_dtypes and return_dtype input_type = rlz.tuple_of(rlz.datatype) return_type = rlz.datatype @property def inputs(self): return self.func_args @property def output_dtype(self): return self.return_type def root_tables(self): return distinct_roots(*self.func_args)
class NotExistsSubquery(Value, _Negatable): foreign_table = rlz.table predicates = rlz.tuple_of(rlz.boolean) output_dtype = dt.boolean output_shape = rlz.Shape.COLUMNAR def negate(self) -> ExistsSubquery: return ExistsSubquery(*self.args)
class DropNa(TableNode, sch.HasSchema): """Drop null values in the table.""" table = rlz.table how = rlz.isin({'any', 'all'}) subset = rlz.optional(rlz.tuple_of(rlz.column_from("table")), default=()) @property def schema(self): return self.table.schema()
class ValueList(Value): """Data structure for a list of value expressions""" # NOTE: this proxies the Value behaviour to the underlying values values = rlz.tuple_of(rlz.any) output_type = ir.ValueList output_dtype = rlz.dtype_like("values") output_shape = rlz.shape_like("values") def root_tables(self): return distinct_roots(*self.values)
class CategoryLabel(Value): arg = rlz.category labels = rlz.tuple_of(rlz.instance_of(str)) nulls = rlz.optional(rlz.instance_of(str)) output_dtype = dt.string output_shape = rlz.shape_like("arg") def __init__(self, arg, labels, **kwargs): cardinality = arg.type().cardinality if len(labels) != cardinality: raise ValueError('Number of labels must match number of ' f'categories: {cardinality}') super().__init__(arg=arg, labels=labels, **kwargs)
class Aggregation(TableNode, sch.HasSchema): """ metrics : per-group scalar aggregates by : group expressions having : post-aggregation predicate TODO: not putting this in the aggregate operation yet where : pre-aggregation predicate """ table = rlz.table metrics = rlz.optional( rlz.tuple_of( rlz.one_of(( rlz.function_of( "table", output_rule=rlz.one_of( (rlz.reduction, rlz.scalar(rlz.any))), ), rlz.reduction, rlz.scalar(rlz.any), rlz.tuple_of(rlz.scalar(rlz.any)), )), flatten=True, ), default=(), ) by = rlz.optional( rlz.tuple_of( rlz.one_of(( rlz.function_of("table"), rlz.column_from("table"), rlz.column(rlz.any), ))), default=(), ) having = rlz.optional( rlz.tuple_of( rlz.one_of(( rlz.function_of("table", output_rule=rlz.scalar(rlz.boolean)), rlz.scalar(rlz.boolean), )), ), default=(), ) predicates = rlz.optional(rlz.tuple_of(rlz.boolean), default=()) sort_keys = rlz.optional( rlz.tuple_of( rlz.one_of(( rlz.column_from("table"), rlz.function_of("table"), rlz.sort_key(from_="table"), rlz.pair( rlz.one_of(( rlz.column_from("table"), rlz.function_of("table"), rlz.any, )), rlz.map_to({ True: True, False: False, "desc": False, "descending": False, "asc": True, "ascending": True, 1: True, 0: False, }), ), ))), default=(), ) def __init__(self, table, metrics, by, having, predicates, sort_keys): from ibis.expr.analysis import shares_all_roots, shares_some_roots # All non-scalar refs originate from the input table if not shares_all_roots(metrics + by + having + sort_keys, table): raise com.RelationError( "Selection expressions don't fully originate from " "dependencies of the table expression.") # invariant due to Aggregation and AggregateSelection requiring a valid # Selection assert all( shares_some_roots(predicate, table) for predicate in predicates) if not by: sort_keys = tuple() super().__init__( table=table, metrics=metrics, by=by, having=having, predicates=predicates, sort_keys=sort_keys, ) # Validate schema has no overlapping columns assert self.schema def blocks(self): return True @util.deprecated(instead="instantiate Aggregation directly", version="4.0.0") def substitute_table(self, table_expr): # pragma: no cover return Aggregation(table_expr, self.metrics, by=self.by, having=self.having) @cached_property def schema(self): names = [] types = [] for e in self.by + self.metrics: if isinstance(e, ir.DestructValue): # If this is a destruct, then we destructure # the result and assign to multiple columns struct_type = e.type() for name in struct_type.names: names.append(name) types.append(struct_type[name]) else: names.append(e.get_name()) types.append(e.type()) return sch.Schema(names, types) def sort_by(self, expr, sort_exprs): from ibis.expr.analysis import shares_all_roots resolved_keys = _maybe_convert_sort_keys([self.table, expr], sort_exprs) if shares_all_roots(resolved_keys, self.table): return Aggregation( self.table, self.metrics, by=self.by, having=self.having, predicates=self.predicates, sort_keys=self.sort_keys + tuple(resolved_keys), ) return Selection(expr, [], sort_keys=resolved_keys)
class Selection(TableNode, sch.HasSchema): table = rlz.table selections = rlz.optional( rlz.tuple_of( rlz.one_of(( rlz.table, rlz.column_from("table"), rlz.function_of("table"), rlz.any, ))), default=(), ) predicates = rlz.optional(rlz.tuple_of(rlz.boolean), default=()) sort_keys = rlz.optional( rlz.tuple_of( rlz.one_of(( rlz.column_from("table"), rlz.function_of("table"), rlz.sort_key(from_="table"), rlz.pair( rlz.one_of(( rlz.column_from("table"), rlz.function_of("table"), rlz.any, )), rlz.map_to({ True: True, False: False, "desc": False, "descending": False, "asc": True, "ascending": True, 1: True, 0: False, }), ), ))), default=(), ) def __init__(self, table, selections, predicates, sort_keys, **kwargs): from ibis.expr.analysis import shares_all_roots, shares_some_roots if not shares_all_roots(selections + sort_keys, table): raise com.RelationError( "Selection expressions don't fully originate from " "dependencies of the table expression.") for predicate in predicates: if not shares_some_roots(predicate, table): raise com.RelationError( "Predicate doesn't share any roots with table") super().__init__( table=table, selections=selections, predicates=predicates, sort_keys=sort_keys, **kwargs, ) # Validate no overlapping columns in schema assert self.schema @cached_property def _projection(self): return self.__class__(table=self.table, selections=self.selections) @cached_property def schema(self): # Resolve schema and initialize if not self.selections: return self.table.schema() types = [] names = [] for projection in self.selections: if isinstance(projection, ir.DestructColumn): # If this is a destruct, then we destructure # the result and assign to multiple columns struct_type = projection.type() for name in struct_type.names: names.append(name) types.append(struct_type[name]) elif isinstance(projection, ir.Value): names.append(projection.get_name()) types.append(projection.type()) elif isinstance(projection, ir.Table): schema = projection.schema() names.extend(schema.names) types.extend(schema.types) return sch.Schema(names, types) def blocks(self): return bool(self.selections) @util.deprecated(instead="instantiate Selection directly", version="4.0.0") def substitute_table(self, table_expr): # pragma: no cover return Selection(table_expr, self.selections) def root_tables(self): return [self] @util.deprecated(instead="", version="4.0.0") def can_add_filters(self, wrapped_expr, predicates): # pragma: no cover pass @util.deprecated(instead="", version="4.0.0") def empty_or_equal(self, other) -> bool: # pragma: no cover for field in "selections", "sort_keys", "predicates": selfs = getattr(self, field) others = getattr(other, field) valid = (not selfs or not others or (a.equals(b) for a, b in zip(selfs, others))) if not valid: return False return True @util.deprecated(instead="", version="4.0.0") def compatible_with(self, other): # pragma: no cover # self and other are equivalent except for predicates, selections, or # sort keys any of which is allowed to be empty. If both are not empty # then they must be equal if self.equals(other): return True if not isinstance(other, type(self)): return False return self.table.equals(other.table) and self.empty_or_equal(other) def aggregate(self, this, metrics, by=None, having=None): if len(self.selections) > 0: return Aggregation(this, metrics, by=by, having=having) else: helper = AggregateSelection(this, metrics, by, having) return helper.get_result() def sort_by(self, expr, sort_exprs): from ibis.expr.analysis import shares_all_roots resolved_keys = _maybe_convert_sort_keys([self.table, expr], sort_exprs) if not self.blocks(): if shares_all_roots(resolved_keys, self.table): return Selection( self.table, self.selections, predicates=self.predicates, sort_keys=self.sort_keys + tuple(resolved_keys), ) return Selection(expr, [], sort_keys=resolved_keys)