def list_of(inner, arg, min_length=0): if isinstance(arg, str) or not isinstance(arg, (collections.Sequence, ir.ListExpr)): raise com.IbisTypeError('Argument must be a sequence') if len(arg) < min_length: raise com.IbisTypeError( 'Arg must have at least {} number of elements'.format(min_length)) return ir.sequence(list(map(inner, arg)))
def pandas_col_to_ibis_type(col): import pandas.core.common as pdcom import ibis.expr.datatypes as dt import numpy as np dty = col.dtype # datetime types if pdcom.is_datetime64_dtype(dty): if pdcom.is_datetime64_ns_dtype(dty): return 'timestamp' else: raise com.IbisTypeError("Column {0} has dtype {1}, which is " "datetime64-like but does " "not use nanosecond units".format( col.name, dty)) if pdcom.is_timedelta64_dtype(dty): print("Warning: encoding a timedelta64 as an int64") return 'int64' if pdcom.is_categorical_dtype(dty): return dt.Category(len(col.cat.categories)) if pdcom.is_bool_dtype(dty): return 'boolean' # simple numerical types if issubclass(dty.type, np.int8): return 'int8' if issubclass(dty.type, np.int16): return 'int16' if issubclass(dty.type, np.int32): return 'int32' if issubclass(dty.type, np.int64): return 'int64' if issubclass(dty.type, np.float32): return 'float' if issubclass(dty.type, np.float64): return 'double' if issubclass(dty.type, np.uint8): return 'int16' if issubclass(dty.type, np.uint16): return 'int32' if issubclass(dty.type, np.uint32): return 'int64' if issubclass(dty.type, np.uint64): raise com.IbisTypeError("Column {0} is an unsigned int64".format( col.name)) if pdcom.is_object_dtype(dty): # TODO: overly broad? return 'string' raise com.IbisTypeError("Column {0} is dtype {1}".format(col.name, dty))
def pandas_col_to_ibis_type(col): import numpy as np dty = col.dtype # datetime types if pdcom.is_datetime64tz_dtype(dty): return dt.Timestamp(str(dty.tz)) if pdcom.is_datetime64_dtype(dty): if pdcom.is_datetime64_ns_dtype(dty): return dt.timestamp else: raise com.IbisTypeError("Column {0} has dtype {1}, which is " "datetime64-like but does " "not use nanosecond units".format( col.name, dty)) if pdcom.is_timedelta64_dtype(dty): print("Warning: encoding a timedelta64 as an int64") return dt.int64 if pdcom.is_categorical_dtype(dty): return dt.Category(len(col.cat.categories)) if pdcom.is_bool_dtype(dty): return dt.boolean # simple numerical types if issubclass(dty.type, np.int8): return dt.int8 if issubclass(dty.type, np.int16): return dt.int16 if issubclass(dty.type, np.int32): return dt.int32 if issubclass(dty.type, np.int64): return dt.int64 if issubclass(dty.type, np.float32): return dt.float if issubclass(dty.type, np.float64): return dt.double if issubclass(dty.type, np.uint8): return dt.int16 if issubclass(dty.type, np.uint16): return dt.int32 if issubclass(dty.type, np.uint32): return dt.int64 if issubclass(dty.type, np.uint64): raise com.IbisTypeError("Column {} is an unsigned int64".format( col.name)) if pdcom.is_object_dtype(dty): return _infer_object_dtype(col) raise com.IbisTypeError("Column {0} is dtype {1}".format(col.name, dty))
def cast(source, target): """Currently Literal to *Scalar implicit casts are allowed""" if not castable(source, target): raise com.IbisTypeError('Source is not castable to target type!') # currently it prevents column -> scalar implicit castings # however the datatypes are matching op = source.op() if not isinstance(op, Literal): raise com.IbisTypeError('Only able to implicitly cast literals!') out_type = target.type().scalar_type() return out_type(op)
def interval(arg, units=None): arg = value(dt.Interval, arg) unit = arg.type().unit if units is not None and unit not in units: msg = 'Interval unit `{}` is not among the allowed ones {}' raise com.IbisTypeError(msg.format(unit, units)) return arg
def instance_of(klass, arg): """Require that a value has a particular Python type.""" if not isinstance(arg, klass): raise com.IbisTypeError( 'Given argument with type {} is not an instance of {}'.format( type(arg), klass)) return arg
def from_string(value): try: return TypeParser(value).parse() except SyntaxError: raise com.IbisTypeError( '{!r} cannot be parsed as a datatype'.format(value) )
def table(schema, arg): """A table argument. Parameters ---------- schema : Union[sch.Schema, List[Tuple[str, dt.DataType]] A validator for the table's columns. Only column subset validators are currently supported. Accepts any arguments that `sch.schema` accepts. See the example for usage. arg : The validatable argument. Examples -------- The following op will accept an argument named ``'table'``. Note that the ``schema`` argument specifies rules for columns that are required to be in the table: ``time``, ``group`` and ``value1``. These must match the types specified in the column rules. Column ``value2`` is optional, but if present it must be of the specified type. The table may have extra columns not specified in the schema. """ assert isinstance(arg, ir.TableExpr) if arg.schema() >= sch.schema(schema): return arg raise com.IbisTypeError( 'Argument is not a table with column subset of {}'.format(schema))
def cast(source, target, **kwargs): """Attempts to implicitly cast from source dtype to target dtype""" source, target = dtype(source), dtype(target) if not castable(source, target, **kwargs): raise com.IbisTypeError('Datatype {} cannot be implicitly ' 'casted to {}'.format(source, target)) return target
def higher_precedence(left, right): if castable(left, right, upcast=True): return right elif castable(right, left, upcast=True): return left raise com.IbisTypeError('Cannot compute precedence for {} ' 'and {} types'.format(left, right))
def array_of(inner, arg): val = arg if isinstance(arg, ir.Expr) else ir.literal(arg) argtype = val.type() if not isinstance(argtype, dt.Array): raise com.IbisTypeError( 'Argument must be an array, got expression {} which is of type ' '{}'.format(val, val.type())) return value(dt.Array(inner(val[0]).type()), val)
def list_of(inner, arg, min_length=0): if not isinstance(arg, (tuple, list, ir.ListExpr)): arg = [arg] if len(arg) < min_length: raise com.IbisTypeError( 'Arg must have at least {} number of elements'.format(min_length)) return ir.sequence(list(map(inner, arg)))
def member_of(obj, arg): if isinstance(arg, enum.Enum): enum.unique(obj) # check that enum has unique values arg = arg.name if not hasattr(obj, arg): raise com.IbisTypeError( 'Value with type {} is not a member of {}'.format(type(arg), obj)) return getattr(obj, arg)
def __init__(self, name, table_expr): Node.__init__(self, [name, table_expr]) if name not in table_expr.schema(): raise com.IbisTypeError("'{0}' is not a field in {1}".format( name, table_expr.columns)) self.name = name self.table = table_expr
def cast(source: Union[DataType, str], target: Union[DataType, str], **kwargs) -> DataType: """Attempts to implicitly cast from source dtype to target dtype""" source, result_target = dtype(source), dtype(target) if not castable(source, result_target, **kwargs): raise com.IbisTypeError('Datatype {} cannot be implicitly ' 'casted to {}'.format(source, result_target)) return result_target
def one_of(inners, arg): """At least one of the inner validators must pass""" for inner in inners: with suppress(com.IbisTypeError, ValueError): return inner(arg) rules_formatted = ', '.join(map(repr, inners)) raise com.IbisTypeError( 'Arg passes neither of the following rules: {}'.format( rules_formatted))
def value(dtype, arg): """Validates that the given argument is a Value with a particular datatype Parameters ---------- dtype : DataType subclass or DataType instance arg : python literal or an ibis expression If a python literal is given the validator tries to coerce it to an ibis literal. Returns ------- arg : AnyValue An ibis value expression with the specified datatype """ if not isinstance(arg, ir.Expr): # coerce python literal to ibis literal arg = ir.literal(arg) if not isinstance(arg, ir.AnyValue): raise com.IbisTypeError( 'Given argument with type {} is not a value ' 'expression'.format(type(arg)) ) # retrieve literal values for implicit cast check value = getattr(arg.op(), 'value', None) if isinstance(dtype, type) and isinstance(arg.type(), dtype): # dtype class has been specified like dt.Interval or dt.Decimal return arg elif dt.castable(arg.type(), dt.dtype(dtype), value=value): # dtype instance or string has been specified and arg's dtype is # implicitly castable to it, like dt.int8 is castable to dt.int64 return arg else: raise com.IbisTypeError( 'Given argument with datatype {} is not ' 'subtype of {} nor implicitly castable to ' 'it'.format(arg.type(), dtype) )
def __init__(self, name, table_expr): schema = table_expr.schema() if isinstance(name, six.integer_types): name = schema.name_at_position(name) super(TableColumn, self).__init__(name, table_expr) if name not in schema: raise com.IbisTypeError("'{0}' is not a field in {1}".format( name, table_expr.columns)) self.name = name self.table = table_expr
def default(value, **kwargs) -> DataType: raise com.IbisTypeError('Value {!r} is not a valid datatype'.format(value))
def compute_window_spec(dtype, obj): raise com.IbisTypeError( "Unknown dtype type {} and object {} for compute_window_spec".format( dtype, obj))