class ShiftBase(Analytic): arg = rlz.column(rlz.any) offset = rlz.optional(rlz.one_of((rlz.integer, rlz.interval))) default = rlz.optional(rlz.any) output_dtype = rlz.dtype_like("arg")
class BaseConvert(Value): arg = rlz.one_of([rlz.integer, rlz.string]) from_base = rlz.integer to_base = rlz.integer output_dtype = dt.string output_shape = rlz.shape_like("args")
class Literal(Value): value = rlz.one_of(( rlz.instance_of(( BaseGeometry, bytes, datetime.date, datetime.datetime, datetime.time, datetime.timedelta, enum.Enum, float, frozenset, int, frozendict, np.generic, np.ndarray, pd.Timedelta, pd.Timestamp, str, tuple, type(None), uuid.UUID, decimal.Decimal, )), rlz.is_computable_input, )) dtype = rlz.datatype output_shape = rlz.Shape.SCALAR output_dtype = property(attrgetter("dtype")) def root_tables(self): return []
class BaseConvert(ValueOp): input_type = [ rules.one_of([integer, string]), integer(name='from_base'), integer(name='to_base') ] output_type = rules.shape_like_flatargs('string')
class MapValueForKey(Value): arg = rlz.mapping key = rlz.one_of([rlz.string, rlz.integer]) output_shape = rlz.shape_like("args") @immutable_property def output_dtype(self): return self.arg.type().value_type
class Contains(Value): value = rlz.any options = rlz.one_of([ rlz.value_list_of(rlz.any), rlz.set_, rlz.column(rlz.any), rlz.array_of(rlz.any), ]) output_dtype = dt.boolean output_shape = rlz.shape_like("args")
class TopK(Node): arg = rlz.column(rlz.any) k = rlz.non_negative_integer by = rlz.one_of((rlz.function_of(rlz.base_table_of("arg")), rlz.any)) output_type = ir.TopK def blocks(self): # pragma: no cover return True def root_tables(self): # pragma: no cover args = (arg for arg in self.flat_args() if isinstance(arg, ir.Expr)) return distinct_roots(*args)
class MapValueOrDefaultForKey(Value): arg = rlz.mapping key = rlz.one_of([rlz.string, rlz.integer]) default = rlz.any output_shape = rlz.shape_like("args") @property def output_dtype(self): value_type = self.arg.type().value_type default_type = self.default.type() if not dt.same_kind(default_type, value_type): raise com.IbisTypeError( "Default value\n{}\nof type {} cannot be cast to map's value " "type {}".format(self.default, default_type, value_type)) return dt.highest_precedence((default_type, value_type))
def __rsub__( self, other: datetime.date | DateValue | datetime.timedelta | pd.Timedelta | IntervalValue, ) -> IntervalValue | DateValue | NotImplemented: """Subtract a date or an interval from a date.""" import ibis.expr.operations as ops import ibis.expr.rules as rlz other = rlz.one_of([rlz.date, rlz.interval], other) if isinstance(other, DateValue): op = ops.DateDiff else: op = ops.DateSub # let the operation validate return _binop(op, other, self)
class FillNa(TableNode, sch.HasSchema): """Fill null values in the table.""" table = rlz.table replacements = rlz.one_of(( rlz.numeric, rlz.string, rlz.instance_of(collections.abc.Mapping), )) def __init__(self, table, replacements, **kwargs): super().__init__( table=table, replacements=(replacements if not isinstance(replacements, collections.abc.Mapping) else util.frozendict(replacements)), **kwargs, ) @property def schema(self): return self.table.schema()
class Count(Filterable, Reduction): arg = rlz.one_of((rlz.column(rlz.any), rlz.table)) output_dtype = dt.int64
class BetweenTime(Between): arg = rlz.one_of([rlz.timestamp, rlz.time]) lower_bound = rlz.one_of([rlz.time, rlz.string]) upper_bound = rlz.one_of([rlz.time, rlz.string])
class DayOfWeekNode(Node): arg = rlz.one_of([rlz.date, rlz.timestamp]) output_type = ir.DayOfWeek
class DayOfWeekName(Unary): arg = rlz.one_of([rlz.date, rlz.timestamp]) output_dtype = dt.string
class DayOfWeekIndex(Unary): arg = rlz.one_of([rlz.date, rlz.timestamp]) output_dtype = dt.int16
class ExtractTimeField(ExtractTemporalField): arg = rlz.one_of([rlz.time, rlz.timestamp])
class Selection(TableNode, sch.HasSchema): table = rlz.table selections = rlz.optional( rlz.tuple_of( rlz.one_of(( rlz.table, rlz.column_from("table"), rlz.function_of("table"), rlz.any, ))), default=(), ) predicates = rlz.optional(rlz.tuple_of(rlz.boolean), default=()) sort_keys = rlz.optional( rlz.tuple_of( rlz.one_of(( rlz.column_from("table"), rlz.function_of("table"), rlz.sort_key(from_="table"), rlz.pair( rlz.one_of(( rlz.column_from("table"), rlz.function_of("table"), rlz.any, )), rlz.map_to({ True: True, False: False, "desc": False, "descending": False, "asc": True, "ascending": True, 1: True, 0: False, }), ), ))), default=(), ) def __init__(self, table, selections, predicates, sort_keys, **kwargs): from ibis.expr.analysis import shares_all_roots, shares_some_roots if not shares_all_roots(selections + sort_keys, table): raise com.RelationError( "Selection expressions don't fully originate from " "dependencies of the table expression.") for predicate in predicates: if not shares_some_roots(predicate, table): raise com.RelationError( "Predicate doesn't share any roots with table") super().__init__( table=table, selections=selections, predicates=predicates, sort_keys=sort_keys, **kwargs, ) # Validate no overlapping columns in schema assert self.schema @cached_property def _projection(self): return self.__class__(table=self.table, selections=self.selections) @cached_property def schema(self): # Resolve schema and initialize if not self.selections: return self.table.schema() types = [] names = [] for projection in self.selections: if isinstance(projection, ir.DestructColumn): # If this is a destruct, then we destructure # the result and assign to multiple columns struct_type = projection.type() for name in struct_type.names: names.append(name) types.append(struct_type[name]) elif isinstance(projection, ir.Value): names.append(projection.get_name()) types.append(projection.type()) elif isinstance(projection, ir.Table): schema = projection.schema() names.extend(schema.names) types.extend(schema.types) return sch.Schema(names, types) def blocks(self): return bool(self.selections) @util.deprecated(instead="instantiate Selection directly", version="4.0.0") def substitute_table(self, table_expr): # pragma: no cover return Selection(table_expr, self.selections) def root_tables(self): return [self] @util.deprecated(instead="", version="4.0.0") def can_add_filters(self, wrapped_expr, predicates): # pragma: no cover pass @util.deprecated(instead="", version="4.0.0") def empty_or_equal(self, other) -> bool: # pragma: no cover for field in "selections", "sort_keys", "predicates": selfs = getattr(self, field) others = getattr(other, field) valid = (not selfs or not others or (a.equals(b) for a, b in zip(selfs, others))) if not valid: return False return True @util.deprecated(instead="", version="4.0.0") def compatible_with(self, other): # pragma: no cover # self and other are equivalent except for predicates, selections, or # sort keys any of which is allowed to be empty. If both are not empty # then they must be equal if self.equals(other): return True if not isinstance(other, type(self)): return False return self.table.equals(other.table) and self.empty_or_equal(other) def aggregate(self, this, metrics, by=None, having=None): if len(self.selections) > 0: return Aggregation(this, metrics, by=by, having=having) else: helper = AggregateSelection(this, metrics, by, having) return helper.get_result() def sort_by(self, expr, sort_exprs): from ibis.expr.analysis import shares_all_roots resolved_keys = _maybe_convert_sort_keys([self.table, expr], sort_exprs) if not self.blocks(): if shares_all_roots(resolved_keys, self.table): return Selection( self.table, self.selections, predicates=self.predicates, sort_keys=self.sort_keys + tuple(resolved_keys), ) return Selection(expr, [], sort_keys=resolved_keys)
class HashBytes(Value): arg = rlz.one_of({rlz.value(dt.string), rlz.value(dt.binary)}) how = rlz.isin({'md5', 'sha1', 'sha256', 'sha512'}) output_dtype = dt.binary output_shape = rlz.shape_like("arg")
class HashBytes(ValueOp): arg = Arg(rlz.one_of([rlz.value(dt.string), rlz.value(dt.binary)])) how = Arg(rlz.isin({"sha256", "farm_fingerprint"})) output_type = rlz.shape_like("arg", "binary")
class Aggregation(TableNode, sch.HasSchema): """ metrics : per-group scalar aggregates by : group expressions having : post-aggregation predicate TODO: not putting this in the aggregate operation yet where : pre-aggregation predicate """ table = rlz.table metrics = rlz.optional( rlz.tuple_of( rlz.one_of(( rlz.function_of( "table", output_rule=rlz.one_of( (rlz.reduction, rlz.scalar(rlz.any))), ), rlz.reduction, rlz.scalar(rlz.any), rlz.tuple_of(rlz.scalar(rlz.any)), )), flatten=True, ), default=(), ) by = rlz.optional( rlz.tuple_of( rlz.one_of(( rlz.function_of("table"), rlz.column_from("table"), rlz.column(rlz.any), ))), default=(), ) having = rlz.optional( rlz.tuple_of( rlz.one_of(( rlz.function_of("table", output_rule=rlz.scalar(rlz.boolean)), rlz.scalar(rlz.boolean), )), ), default=(), ) predicates = rlz.optional(rlz.tuple_of(rlz.boolean), default=()) sort_keys = rlz.optional( rlz.tuple_of( rlz.one_of(( rlz.column_from("table"), rlz.function_of("table"), rlz.sort_key(from_="table"), rlz.pair( rlz.one_of(( rlz.column_from("table"), rlz.function_of("table"), rlz.any, )), rlz.map_to({ True: True, False: False, "desc": False, "descending": False, "asc": True, "ascending": True, 1: True, 0: False, }), ), ))), default=(), ) def __init__(self, table, metrics, by, having, predicates, sort_keys): from ibis.expr.analysis import shares_all_roots, shares_some_roots # All non-scalar refs originate from the input table if not shares_all_roots(metrics + by + having + sort_keys, table): raise com.RelationError( "Selection expressions don't fully originate from " "dependencies of the table expression.") # invariant due to Aggregation and AggregateSelection requiring a valid # Selection assert all( shares_some_roots(predicate, table) for predicate in predicates) if not by: sort_keys = tuple() super().__init__( table=table, metrics=metrics, by=by, having=having, predicates=predicates, sort_keys=sort_keys, ) # Validate schema has no overlapping columns assert self.schema def blocks(self): return True @util.deprecated(instead="instantiate Aggregation directly", version="4.0.0") def substitute_table(self, table_expr): # pragma: no cover return Aggregation(table_expr, self.metrics, by=self.by, having=self.having) @cached_property def schema(self): names = [] types = [] for e in self.by + self.metrics: if isinstance(e, ir.DestructValue): # If this is a destruct, then we destructure # the result and assign to multiple columns struct_type = e.type() for name in struct_type.names: names.append(name) types.append(struct_type[name]) else: names.append(e.get_name()) types.append(e.type()) return sch.Schema(names, types) def sort_by(self, expr, sort_exprs): from ibis.expr.analysis import shares_all_roots resolved_keys = _maybe_convert_sort_keys([self.table, expr], sort_exprs) if shares_all_roots(resolved_keys, self.table): return Aggregation( self.table, self.metrics, by=self.by, having=self.having, predicates=self.predicates, sort_keys=self.sort_keys + tuple(resolved_keys), ) return Selection(expr, [], sort_keys=resolved_keys)
class Negate(Unary): arg = rlz.one_of((rlz.numeric, rlz.interval)) output_dtype = rlz.dtype_like("arg")