class Bucket(BucketLike): arg = rlz.column(rlz.any) buckets = rlz.tuple_of(rlz.scalar(rlz.any)) closed = rlz.optional(rlz.isin({'left', 'right'}), default='left') close_extreme = rlz.optional(rlz.instance_of(bool), default=True) include_under = rlz.optional(rlz.instance_of(bool), default=False) include_over = rlz.optional(rlz.instance_of(bool), default=False) def __init__(self, buckets, include_under, include_over, **kwargs): if not len(buckets): raise ValueError('Must be at least one bucket edge') elif len(buckets) == 1: if not include_under or not include_over: raise ValueError('If one bucket edge provided, must have ' 'include_under=True and include_over=True') super().__init__( buckets=buckets, include_under=include_under, include_over=include_over, **kwargs, ) @property def nbuckets(self): return len(self.buckets) - 1 + self.include_over + self.include_under
class MyOp(ops.ValueOp): input_type = [ rules.scalar(value_type=dt.boolean, optional=True, default=False, name='value') ] output_type = rules.type_of_arg(0)
class Histogram(BucketLike): arg = rlz.numeric nbins = rlz.optional(rlz.instance_of(int)) binwidth = rlz.optional(rlz.scalar(rlz.numeric)) base = rlz.optional(rlz.scalar(rlz.numeric)) closed = rlz.optional(rlz.isin({'left', 'right'}), default='left') aux_hash = rlz.optional(rlz.instance_of(str)) def __init__(self, nbins, binwidth, **kwargs): if nbins is None: if binwidth is None: raise ValueError('Must indicate nbins or binwidth') elif binwidth is not None: raise ValueError('nbins and binwidth are mutually exclusive') super().__init__(nbins=nbins, binwidth=binwidth, **kwargs) @property def output_dtype(self): # always undefined cardinality (for now) return dt.category
@pytest.mark.parametrize( ('units', 'value', 'expected'), [({'Y'}, ibis.interval(hours=1), IbisTypeError), ({'Y', 'M', 'D'}, ibis.interval(hours=1), IbisTypeError), ({'Q', 'W', 'D'}, ibis.interval(seconds=1), IbisTypeError)]) def test_invalid_interval(units, value, expected): with pytest.raises(expected): rlz.interval(value, units=units) @pytest.mark.parametrize( ('validator', 'value', 'expected'), [(rlz.column(rlz.any), table.int_col, table.int_col), (rlz.column(rlz.string), table.string_col, table.string_col), (rlz.scalar(rlz.integer), ibis.literal(3), ibis.literal(3)), (rlz.scalar(rlz.any), 'caracal', ibis.literal('caracal'))]) def test_valid_column_or_scalar(validator, value, expected): result = validator(value) assert result.equals(expected) @pytest.mark.parametrize(('validator', 'value', 'expected'), [ (rlz.column(rlz.integer), table.double_col, IbisTypeError), (rlz.column(rlz.any), ibis.literal(3), IbisTypeError), (rlz.column(rlz.integer), ibis.literal(3), IbisTypeError), ]) def test_invalid_column_or_scalar(validator, value, expected): with pytest.raises(expected): validator(value)
class MyOp(ops.ValueOp): input_type = [rules.scalar(value_type=rules.number)] output_type = rules.double
class NTile(Analytic): arg = rlz.column(rlz.any) buckets = rlz.scalar(rlz.integer) output_dtype = dt.int64
class EndsWith(Value): arg = rlz.string end = rlz.scalar(rlz.string) output_dtype = dt.boolean output_shape = rlz.shape_like("arg")
class StartsWith(Value): arg = rlz.string start = rlz.scalar(rlz.string) output_dtype = dt.boolean output_shape = rlz.shape_like("arg")
({'Y'}, ibis.interval(hours=1), IbisTypeError), ({'Y', 'M', 'D'}, ibis.interval(hours=1), IbisTypeError), ({'Q', 'W', 'D'}, ibis.interval(seconds=1), IbisTypeError), ], ) def test_invalid_interval(units, value, expected): with pytest.raises(expected): rlz.interval(value, units=units) @pytest.mark.parametrize( ('validator', 'value', 'expected'), [ (rlz.column(rlz.any), table.int_col, table.int_col), (rlz.column(rlz.string), table.string_col, table.string_col), (rlz.scalar(rlz.integer), ibis.literal(3), ibis.literal(3)), (rlz.scalar(rlz.any), 'caracal', ibis.literal('caracal')), ], ) def test_valid_column_or_scalar(validator, value, expected): result = validator(value) assert result.equals(expected) @pytest.mark.parametrize( ('validator', 'value', 'expected'), [ (rlz.column(rlz.integer), table.double_col, IbisTypeError), (rlz.column(rlz.any), ibis.literal(3), IbisTypeError), (rlz.column(rlz.integer), ibis.literal(3), IbisTypeError), ],
class Aggregation(TableNode, sch.HasSchema): """ metrics : per-group scalar aggregates by : group expressions having : post-aggregation predicate TODO: not putting this in the aggregate operation yet where : pre-aggregation predicate """ table = rlz.table metrics = rlz.optional( rlz.tuple_of( rlz.one_of(( rlz.function_of( "table", output_rule=rlz.one_of( (rlz.reduction, rlz.scalar(rlz.any))), ), rlz.reduction, rlz.scalar(rlz.any), rlz.tuple_of(rlz.scalar(rlz.any)), )), flatten=True, ), default=(), ) by = rlz.optional( rlz.tuple_of( rlz.one_of(( rlz.function_of("table"), rlz.column_from("table"), rlz.column(rlz.any), ))), default=(), ) having = rlz.optional( rlz.tuple_of( rlz.one_of(( rlz.function_of("table", output_rule=rlz.scalar(rlz.boolean)), rlz.scalar(rlz.boolean), )), ), default=(), ) predicates = rlz.optional(rlz.tuple_of(rlz.boolean), default=()) sort_keys = rlz.optional( rlz.tuple_of( rlz.one_of(( rlz.column_from("table"), rlz.function_of("table"), rlz.sort_key(from_="table"), rlz.pair( rlz.one_of(( rlz.column_from("table"), rlz.function_of("table"), rlz.any, )), rlz.map_to({ True: True, False: False, "desc": False, "descending": False, "asc": True, "ascending": True, 1: True, 0: False, }), ), ))), default=(), ) def __init__(self, table, metrics, by, having, predicates, sort_keys): from ibis.expr.analysis import shares_all_roots, shares_some_roots # All non-scalar refs originate from the input table if not shares_all_roots(metrics + by + having + sort_keys, table): raise com.RelationError( "Selection expressions don't fully originate from " "dependencies of the table expression.") # invariant due to Aggregation and AggregateSelection requiring a valid # Selection assert all( shares_some_roots(predicate, table) for predicate in predicates) if not by: sort_keys = tuple() super().__init__( table=table, metrics=metrics, by=by, having=having, predicates=predicates, sort_keys=sort_keys, ) # Validate schema has no overlapping columns assert self.schema def blocks(self): return True @util.deprecated(instead="instantiate Aggregation directly", version="4.0.0") def substitute_table(self, table_expr): # pragma: no cover return Aggregation(table_expr, self.metrics, by=self.by, having=self.having) @cached_property def schema(self): names = [] types = [] for e in self.by + self.metrics: if isinstance(e, ir.DestructValue): # If this is a destruct, then we destructure # the result and assign to multiple columns struct_type = e.type() for name in struct_type.names: names.append(name) types.append(struct_type[name]) else: names.append(e.get_name()) types.append(e.type()) return sch.Schema(names, types) def sort_by(self, expr, sort_exprs): from ibis.expr.analysis import shares_all_roots resolved_keys = _maybe_convert_sort_keys([self.table, expr], sort_exprs) if shares_all_roots(resolved_keys, self.table): return Aggregation( self.table, self.metrics, by=self.by, having=self.having, predicates=self.predicates, sort_keys=self.sort_keys + tuple(resolved_keys), ) return Selection(expr, [], sort_keys=resolved_keys)