def Argument(validator, default=EMPTY): """Argument constructor Parameters ---------- validator : Union[Callable[[arg], coerced], Type, Tuple[Type]] Function which handles validation and/or coercion of the given argument. default : Union[Any, Callable[[], str]] In case of missing (None) value for validation this will be used. Note, that default value (except for None) must also pass the inner validator. If callable is passed, it will be executed just before the inner, and itsreturn value will be treaded as default. """ if isinstance(validator, Validator): pass elif isinstance(validator, type): validator = rlz.instance_of(validator) elif isinstance(validator, tuple): assert util.all_of(validator, type) validator = rlz.instance_of(validator) elif isinstance(validator, Validator): validator = validator elif callable(validator): validator = ValidatorFunction(validator) else: raise TypeError('Argument validator must be a callable, type or ' 'tuple of types, given: {}'.format(validator)) if default is EMPTY: return validator else: return Optional(validator, default=default)
class Bucket(BucketLike): arg = rlz.column(rlz.any) buckets = rlz.tuple_of(rlz.scalar(rlz.any)) closed = rlz.optional(rlz.isin({'left', 'right'}), default='left') close_extreme = rlz.optional(rlz.instance_of(bool), default=True) include_under = rlz.optional(rlz.instance_of(bool), default=False) include_over = rlz.optional(rlz.instance_of(bool), default=False) def __init__(self, buckets, include_under, include_over, **kwargs): if not len(buckets): raise ValueError('Must be at least one bucket edge') elif len(buckets) == 1: if not include_under or not include_over: raise ValueError('If one bucket edge provided, must have ' 'include_under=True and include_over=True') super().__init__( buckets=buckets, include_under=include_under, include_over=include_over, **kwargs, ) @property def nbuckets(self): return len(self.buckets) - 1 + self.include_over + self.include_under
def __init__(self, validator, default=_undefined, show=True): """Argument constructor Parameters ---------- validator : Union[Callable[[arg], coerced], Type, Tuple[Type]] Function which handles validation and/or coercion of the given argument. default : Union[Any, Callable[[], str]] In case of missing (None) value for validation this will be used. Note, that default value (except for None) must also pass the inner validator. If callable is passed, it will be executed just before the inner, and itsreturn value will be treaded as default. show : bool Whether to show this argument in an :class:`~ibis.expr.types.Expr` that contains it. """ self.default = default self.show = show if isinstance(validator, type): self.validator = rlz.instance_of(validator) elif isinstance(validator, tuple): assert util.all_of(validator, type) self.validator = rlz.instance_of(validator) elif callable(validator): self.validator = validator else: raise TypeError('Argument validator must be a callable, type or ' 'tuple of types, given: {}'.format(validator))
class DatabaseTable(PhysicalTable): name = rlz.instance_of(str) schema = rlz.instance_of(sch.Schema) source = rlz.client def change_name(self, new_name): return type(self)(new_name, self.args[1], self.source)
def __init__(self, validator, default=_undefined, show=True): """Argument constructor Parameters ---------- validator : Union[Callable[[arg], coerced], Type, Tuple[Type]] Function which handles validation and/or coercion of the given argument. default : Union[Any, Callable[[], str]] In case of missing (None) value for validation this will be used. Note, that default value (except for None) must also pass the inner validator. If callable is passed, it will be executed just before the inner, and itsreturn value will be treaded as default. show : bool Whether to show this argument in an :class:`~ibis.expr.types.Expr` that contains it. """ self.default = default self.show = show if isinstance(validator, type): self.validator = rlz.instance_of(validator) elif isinstance(validator, tuple): assert util.all_of(validator, type) self.validator = rlz.instance_of(validator) elif callable(validator): self.validator = validator else: raise TypeError( 'Argument validator must be a callable, type or ' 'tuple of types, given: {}'.format(validator) )
class SQLQueryResult(TableNode, sch.HasSchema): """A table sourced from the result set of a select query""" query = rlz.instance_of(str) schema = rlz.instance_of(sch.Schema) source = rlz.client def blocks(self): return True
class UnboundTable(PhysicalTable): schema = rlz.instance_of(sch.Schema) name = rlz.optional(rlz.instance_of(str), default=genname) def has_resolved_name(self): return True def resolve_name(self): return self.name
class SQLStringView(PhysicalTable): """A view created from a SQL string.""" child = rlz.table name = rlz.instance_of(str) query = rlz.instance_of(str) @cached_property def schema(self): backend = self.child._find_backend() return backend._get_schema_using_query(self.query)
class CategoryLabel(Value): arg = rlz.category labels = rlz.tuple_of(rlz.instance_of(str)) nulls = rlz.optional(rlz.instance_of(str)) output_dtype = dt.string output_shape = rlz.shape_like("arg") def __init__(self, arg, labels, **kwargs): cardinality = arg.type().cardinality if len(labels) != cardinality: raise ValueError('Number of labels must match number of ' f'categories: {cardinality}') super().__init__(arg=arg, labels=labels, **kwargs)
class Literal(Value): value = rlz.one_of(( rlz.instance_of(( BaseGeometry, bytes, datetime.date, datetime.datetime, datetime.time, datetime.timedelta, enum.Enum, float, frozenset, int, frozendict, np.generic, np.ndarray, pd.Timedelta, pd.Timestamp, str, tuple, type(None), uuid.UUID, decimal.Decimal, )), rlz.is_computable_input, )) dtype = rlz.datatype output_shape = rlz.Shape.SCALAR output_dtype = property(attrgetter("dtype")) def root_tables(self): return []
class View(PhysicalTable): """A view created from an expression.""" child = rlz.table name = rlz.instance_of(str) @property def schema(self): return self.child.schema()
class Limit(TableNode): table = rlz.table n = rlz.instance_of(int) offset = rlz.instance_of(int) def blocks(self): return True @property def schema(self): return self.table.schema() @util.deprecated(version="4.0", instead="") def has_schema(self): return self.table.op().has_schema() def root_tables(self): return [self]
class Histogram(BucketLike): arg = rlz.numeric nbins = rlz.optional(rlz.instance_of(int)) binwidth = rlz.optional(rlz.scalar(rlz.numeric)) base = rlz.optional(rlz.scalar(rlz.numeric)) closed = rlz.optional(rlz.isin({'left', 'right'}), default='left') aux_hash = rlz.optional(rlz.instance_of(str)) def __init__(self, nbins, binwidth, **kwargs): if nbins is None: if binwidth is None: raise ValueError('Must indicate nbins or binwidth') elif binwidth is not None: raise ValueError('nbins and binwidth are mutually exclusive') super().__init__(nbins=nbins, binwidth=binwidth, **kwargs) @property def output_dtype(self): # always undefined cardinality (for now) return dt.category
class Alias(Value): arg = rlz.any name = rlz.instance_of((str, UnnamedMarker)) output_shape = rlz.shape_like("arg") output_dtype = rlz.dtype_like("arg") def has_resolved_name(self): return True def resolve_name(self): return self.name
class AlchemyTable(ops.DatabaseTable): sqla_table = rlz.instance_of(object) name = rlz.optional(rlz.instance_of(str), default=None) schema = rlz.optional(rlz.instance_of(sch.Schema), default=None) def __init__(self, source, sqla_table, name, schema): if name is None: name = sqla_table.name if schema is None: schema = sch.infer(sqla_table, schema=schema) super().__init__(name=name, schema=schema, sqla_table=sqla_table, source=source) # TODO(kszucs): remove this def __equals__(self, other: AlchemyTable) -> bool: # override the default implementation to not compare # sqla_table instances return (self.name == other.name and self.source == other.source and self.schema.equals(other.schema))
class StructField(Value): arg = rlz.struct field = rlz.instance_of(str) output_shape = rlz.shape_like("arg") @immutable_property def output_dtype(self): struct_dtype = self.arg.type() value_dtype = struct_dtype[self.field] return value_dtype def resolve_name(self): return self.field def has_resolved_name(self): return True
class VectorizedUDF(Value): func = rlz.instance_of((FunctionType, LambdaType)) func_args = rlz.tuple_of(rlz.column(rlz.any)) # TODO(kszucs): should rename these arguments to # input_dtypes and return_dtype input_type = rlz.tuple_of(rlz.datatype) return_type = rlz.datatype @property def inputs(self): return self.func_args @property def output_dtype(self): return self.return_type def root_tables(self): return distinct_roots(*self.func_args)
class ScalarParameter(Value): _counter = itertools.count() dtype = rlz.datatype counter = rlz.optional(rlz.instance_of(int), default=lambda: next(ScalarParameter._counter)) output_shape = rlz.Shape.SCALAR output_dtype = property(attrgetter("dtype")) def resolve_name(self): return f'param_{self.counter:d}' def __hash__(self): return hash((self.dtype, self.counter)) @property def inputs(self): return () def root_tables(self): return []
class FillNa(TableNode, sch.HasSchema): """Fill null values in the table.""" table = rlz.table replacements = rlz.one_of(( rlz.numeric, rlz.string, rlz.instance_of(collections.abc.Mapping), )) def __init__(self, table, replacements, **kwargs): super().__init__( table=table, replacements=(replacements if not isinstance(replacements, collections.abc.Mapping) else util.frozendict(replacements)), **kwargs, ) @property def schema(self): return self.table.schema()
class TableColumn(Value): """Selects a column from a `Table`.""" table = rlz.table name = rlz.instance_of((str, int)) output_shape = rlz.Shape.COLUMNAR def __init__(self, table, name): schema = table.schema() if isinstance(name, int): name = schema.name_at_position(name) if name not in schema: raise com.IbisTypeError( f"value {name!r} is not a field in {table.columns}") super().__init__(table=table, name=name) @util.deprecated(version="4.0.0", instead="Use `table` property instead") def parent(self): # pragma: no cover return self.table def resolve_name(self): return self.name def has_resolved_name(self): return True def root_tables(self): return self.table.op().root_tables() @property def output_dtype(self): schema = self.table.schema() return schema[self.name]
class NullLiteral(Literal, Singleton): """Typeless NULL literal""" value = rlz.optional(type(None)) dtype = rlz.optional(rlz.instance_of(dt.Null), default=dt.null)
class Op(Annotable): arg = rlz.optional(rlz.instance_of(list), default=default)
(rlz.array_of(rlz.array_of(rlz.string)), [1, 2]), (rlz.array_of(rlz.string), [1, 2.0]), (rlz.array_of(rlz.array_of(rlz.integer)), [2, 2.0]), ], ) def test_array_of_invalid_input(rule, input): with pytest.raises(IbisTypeError): rule(input) @pytest.mark.parametrize( ('validator', 'input'), [ (rlz.array_of(rlz.integer), [1, 2, 3]), (rlz.value_list_of(rlz.integer), (3, 2)), (rlz.instance_of(int), 32), ], ) def test_optional(validator, input): expected = validator(input) if isinstance(expected, ibis.Expr): assert rlz.optional(validator)(input).equals(expected) else: assert rlz.optional(validator)(input) == expected assert rlz.optional(validator)(None) is None def test_base_table_of_failure_mode(): class BrokenUseOfBaseTableOf(ops.Node): arg = rlz.any foo = rlz.function_of(rlz.base_table_of("arg"))
def test_valid_instance_of(klass, value, expected): assert rlz.instance_of(klass, value) == expected
def test_valid_instance_of(klass, value, expected): assert rlz.instance_of(klass, value) == expected
class StringSQLLike(FuzzySearch): arg = rlz.string pattern = rlz.string escape = rlz.optional(rlz.instance_of(str))
class SummaryFilter(Value): expr = rlz.instance_of(ir.TopK) output_dtype = dt.boolean output_shape = rlz.Shape.COLUMNAR
class MyOperation(types.Node): input_type = [rules.instance_of(types.IntegerValue)]
class MyOperation(ops.Node): arg = rlz.instance_of(ir.IntegerValue)
def test_invalid_instance_of(klass, value, expected): with pytest.raises(expected): assert rlz.instance_of(klass, value)
def test_invalid_instance_of(klass, value, expected): with pytest.raises(expected): assert rlz.instance_of(klass, value)
class Union(SetOp): distinct = rlz.optional(rlz.instance_of(bool), default=False)