def column_from(name, column, *, this): """A column from a named table. This validator accepts columns passed as string, integer, or column expression. In the case of a column expression, this validator checks if the column in the table is equal to the column being passed. """ if name not in this: raise com.IbisTypeError(f"Could not get table {name} from {this}") table = this[name] if isinstance(column, (str, int)): return table[column] elif isinstance(column, ir.Column): if not column.has_name(): raise com.IbisTypeError(f"Passed column {column} has no name") maybe_column = column.get_name() try: if column.equals(table[maybe_column]): return column else: raise com.IbisTypeError( f"Passed column is not a column in {type(table)}") except com.IbisError: raise com.IbisTypeError( f"Cannot get column {maybe_column} from {type(table)}") raise com.IbisTypeError("value must be an int or str or Column, got " f"{type(column).__name__}")
def _to_sort_key(key, *, table=None): if isinstance(key, DeferredSortKey): if table is None: raise com.IbisTypeError( "cannot resolve DeferredSortKey with table=None") key = key.resolve(table) if isinstance(key, ir.SortExpr): return key if isinstance(key, (tuple, list)): key, sort_order = key else: sort_order = True if not isinstance(key, ir.Expr): if table is None: raise com.IbisTypeError("cannot resolve key with table=None") key = table._ensure_expr(key) if isinstance(key, (ir.SortExpr, DeferredSortKey)): return _to_sort_key(key, table=table) if isinstance(sort_order, str): if sort_order.lower() in ('desc', 'descending'): sort_order = False elif not isinstance(sort_order, bool): sort_order = bool(sort_order) return SortKey(key, ascending=sort_order).to_expr()
def table(arg, *, schema=None, **kwargs): """A table argument. Parameters ---------- schema : Union[sch.Schema, List[Tuple[str, dt.DataType], None] A validator for the table's columns. Only column subset validators are currently supported. Accepts any arguments that `sch.schema` accepts. See the example for usage. arg : The validatable argument. Examples -------- The following op will accept an argument named ``'table'``. Note that the ``schema`` argument specifies rules for columns that are required to be in the table: ``time``, ``group`` and ``value1``. These must match the types specified in the column rules. Column ``value2`` is optional, but if present it must be of the specified type. The table may have extra columns not specified in the schema. """ if not isinstance(arg, ir.Table): raise com.IbisTypeError( f'Argument is not a table; got type {type(arg).__name__}') if schema is not None: if arg.schema() >= sch.schema(schema): return arg raise com.IbisTypeError( f'Argument is not a table with column subset of {schema}') return arg
def array(values, type=None): """Create an array expression. If the input expressions are all column expressions, then the output will be an ``ArrayColumn``. The input columns will be concatenated row-wise to produce each array in the output array column. Each array will have length n, where n is the number of input columns. All input columns should be of the same datatype. If the input expressions are Python literals, then the output will be a single ``ArrayScalar`` of length n, where n are the number of input values. This is equivalent to ``ibis.literal(values)``. Parameters ---------- values : list A list of Ibis column expressions, or a list of Python literals Returns ------- array_value : ArrayValue An array column (if the inputs are column expressions), or an array scalar (if the inputs are Python literals) type : ibis type or string, optional An instance of :class:`ibis.expr.datatypes.DataType` or a string indicating the ibis type of `value`. Examples -------- Creating an array column from column expressions: >>> import ibis >>> t = ibis.table([('a', 'int64'), ('b', 'int64')], name='t') >>> result = ibis.array([t.a, t.b]) Creating an array scalar from Python literals: >>> import ibis >>> result = ibis.array([1.0, 2.0, 3.0]) """ import ibis.expr.operations as ops if all([isinstance(value, ColumnExpr) for value in values]): return ops.ArrayColumn(values).to_expr() elif any([isinstance(value, ColumnExpr) for value in values]): raise com.IbisTypeError( 'To create an array column using `array`, all input values must ' 'be column expressions.' ) else: try: return literal(list(values), type=type) except com.IbisTypeError as e: raise com.IbisTypeError( 'Could not create an array scalar from the values provided ' 'to `array`. Ensure that all input values have the same ' 'Python type, or can be casted to a single Python type.' ) from e
def list_of(inner, arg, min_length=0): if isinstance( arg, str) or not isinstance(arg, (collections.abc.Sequence, ir.ListExpr)): raise com.IbisTypeError('Argument must be a sequence') if len(arg) < min_length: raise com.IbisTypeError( 'Arg must have at least {} number of elements'.format(min_length)) return ir.sequence(list(map(inner, arg)))
def container_of(inner, arg, *, type, min_length=0, flatten=False, **kwargs): if not util.is_iterable(arg): raise com.IbisTypeError('Argument must be a sequence') if len(arg) < min_length: raise com.IbisTypeError( f'Arg must have at least {min_length} number of elements') if flatten: arg = util.flatten_iterable(arg) return type(inner(item, **kwargs) for item in arg)
def cast(source, target): """Currently Literal to *Scalar implicit casts are allowed""" import ibis.expr.operations as ops # TODO: don't use ops here if not castable(source, target): raise com.IbisTypeError('Source is not castable to target type!') # currently it prevents column -> scalar implicit castings # however the datatypes are matching op = source.op() if not isinstance(op, ops.Literal): raise com.IbisTypeError('Only able to implicitly cast literals!') out_type = target.type().scalar_type() return out_type(op)
def instance_of(klass, arg): """Require that a value has a particular Python type.""" if not isinstance(arg, klass): raise com.IbisTypeError( 'Given argument with type {} is not an instance of {}'.format( type(arg), klass)) return arg
def interval(arg, units=None): arg = value(dt.Interval, arg) unit = arg.type().unit if units is not None and unit not in units: msg = 'Interval unit `{}` is not among the allowed ones {}' raise com.IbisTypeError(msg.format(unit, units)) return arg
def from_string(value: str) -> DataType: try: return TypeParser(value).parse() except SyntaxError: raise com.IbisTypeError( '{!r} cannot be parsed as a datatype'.format(value) )
def non_negative_integer(arg, **kwargs): if not isinstance(arg, int): raise com.IbisTypeError( f"positive integer must be int type, got {type(arg).__name__}") if arg < 0: raise ValueError("got negative value for non-negative integer rule") return arg
def reduction(argument, **kwargs): from ibis.expr.analysis import is_reduction if not is_reduction(argument): raise com.IbisTypeError("`argument` must be a reduction") return argument
def cast(source: DataType | str, target: DataType | str, **kwargs) -> DataType: """Attempts to implicitly cast from source dtype to target dtype""" source, result_target = dtype(source), dtype(target) if not castable(source, result_target, **kwargs): raise com.IbisTypeError('Datatype {} cannot be implicitly ' 'casted to {}'.format(source, result_target)) return result_target
def one_of(inners, arg, **kwargs): """At least one of the inner validators must pass""" for inner in inners: with suppress(com.IbisTypeError, ValueError): return inner(arg, **kwargs) raise com.IbisTypeError("argument passes none of the following rules: " f"{', '.join(map(repr, inners))}")
def instance_of(klasses, arg, **kwargs): """Require that a value has a particular Python type.""" if not isinstance(arg, klasses): raise com.IbisTypeError( f'Given argument with type {type(arg)} ' f'is not an instance of {klasses}' ) return arg
def __init__(self, left, right): left_dtype, right_dtype = left.type(), right.type() if left_dtype != right_dtype: raise com.IbisTypeError( 'Array types must match exactly in a {} operation. ' 'Left type {} != Right type {}'.format( type(self).__name__, left_dtype, right_dtype)) super().__init__(left=left, right=right)
def higher_precedence(left: DataType, right: DataType) -> DataType: if castable(left, right, upcast=True): return right elif castable(right, left, upcast=True): return left raise com.IbisTypeError( 'Cannot compute precedence for {} and {} types'.format(left, right))
def array_of(inner, arg): val = arg if isinstance(arg, ir.Expr) else ir.literal(arg) argtype = val.type() if not isinstance(argtype, dt.Array): raise com.IbisTypeError( 'Argument must be an array, got expression {} which is of type ' '{}'.format(val, val.type())) return value(dt.Array(inner(val[0]).type()), val)
def named_literal(value, **kwargs): import ibis.expr.operations as ops if not isinstance(value, ir.Scalar): raise com.IbisTypeError("`value` must be a scalar expression; " f"got value of type {type(value).__name__}") if not isinstance(value.op(), ops.Literal): raise com.IbisTypeError( "`value` must map to an ibis literal; " f"got expr with op {type(value.op()).__name__}") # check that the literal has a name if not value.has_name(): raise com.IbisTypeError("`value` literal is not named") return value
def base_table_of(name, *, this): from ibis.expr.analysis import find_first_base_table arg = this[name] base = find_first_base_table(arg) if base is None: raise com.IbisTypeError(f"`{arg}` doesn't have a base table") else: return base
def member_of(obj, arg): if isinstance(arg, enum.Enum): enum.unique(obj) # check that enum has unique values arg = arg.name if not hasattr(obj, arg): raise com.IbisTypeError( 'Value with type {} is not a member of {}'.format(type(arg), obj)) return getattr(obj, arg)
def one_of(inners, arg): """At least one of the inner validators must pass""" for inner in inners: with suppress(com.IbisTypeError, ValueError): return inner(arg) rules_formatted = ', '.join(map(repr, inners)) raise com.IbisTypeError( 'Arg passes neither of the following rules: {}'.format( rules_formatted))
def array_of(inner, arg, **kwargs): val = arg if isinstance(arg, ir.Expr) else ir.literal(arg) argtype = val.type() if not isinstance(argtype, dt.Array): raise com.IbisTypeError( 'Argument must be an array, ' f'got expression which is of type {val.type()}') value_dtype = inner(val[0], **kwargs).type() array_dtype = dt.Array(value_dtype) return value(array_dtype, val, **kwargs)
def validate_func_and_types(self, func): if isinstance(self.spark_output_type, (pt.MapType, pt.StructType)): raise com.IbisTypeError( 'Spark does not support MapType or StructType output for \ Pandas UDFs') if not self.input_type: raise com.UnsupportedArgumentError( 'Spark does not support 0-arg pandas UDFs. Instead, create \ a 1-arg pandas UDF and ignore the arg in your function') super().validate_func_and_types(func)
def is_computable_input(value, **kwargs): # pragma: no cover from ibis.backends.pandas.core import ( is_computable_input as _is_computable_input, ) if not _is_computable_input(value): raise com.IbisTypeError( f"object {value} is not a computable input; " "did you register the type with " "ibis.backends.pandas.core.is_computable_input?") return value
def output_dtype(self): value_type = self.arg.type().value_type default_type = self.default.type() if not dt.same_kind(default_type, value_type): raise com.IbisTypeError( "Default value\n{}\nof type {} cannot be cast to map's value " "type {}".format(self.default, default_type, value_type)) return dt.highest_precedence((default_type, value_type))
def __init__(self, table, name): schema = table.schema() if isinstance(name, int): name = schema.name_at_position(name) if name not in schema: raise com.IbisTypeError( f"value {name!r} is not a field in {table.columns}") super().__init__(table=table, name=name)
def validate_func_and_types(self, func): if not callable(func): raise TypeError('func must be callable, got {}'.format(func)) # validate that the input_type argument and the function signature # match valid_function_signature(self.input_type, func) if not self.output_type.nullable: raise com.IbisTypeError( 'Spark does not support non-nullable output types')
def member_of(obj, arg, **kwargs): if isinstance(arg, ir.EnumValue): arg = arg.op().value if isinstance(arg, enum.Enum): enum.unique(obj) # check that enum has unique values arg = arg.name if not hasattr(obj, arg): raise com.IbisTypeError( f'Value with type {type(arg)} is not a member of {obj}') return getattr(obj, arg)
def value(dtype, arg): """Validates that the given argument is a Value with a particular datatype Parameters ---------- dtype : DataType subclass or DataType instance arg : python literal or an ibis expression If a python literal is given the validator tries to coerce it to an ibis literal. Returns ------- arg : AnyValue An ibis value expression with the specified datatype """ if not isinstance(arg, ir.Expr): # coerce python literal to ibis literal arg = ir.literal(arg) if not isinstance(arg, ir.AnyValue): raise com.IbisTypeError( 'Given argument with type {} is not a value ' 'expression'.format(type(arg)) ) # retrieve literal values for implicit cast check value = getattr(arg.op(), 'value', None) if isinstance(dtype, type) and isinstance(arg.type(), dtype): # dtype class has been specified like dt.Interval or dt.Decimal return arg elif dt.castable(arg.type(), dt.dtype(dtype), value=value): # dtype instance or string has been specified and arg's dtype is # implicitly castable to it, like dt.int8 is castable to dt.int64 return arg else: raise com.IbisTypeError( 'Given argument with datatype {} is not ' 'subtype of {} nor implicitly castable to ' 'it'.format(arg.type(), dtype) )