def test_interval_unvalid_unit(unit): definition = "interval('{}')".format(unit) with pytest.raises(ValueError): dt.validate_type(definition) with pytest.raises(ValueError): dt.Interval(dt.int32, unit)
def test_interval(unit): definition = "interval('{}')".format(unit) dt.Interval(unit, dt.int32) == dt.validate_type(definition) definition = "interval<uint16>('{}')".format(unit) dt.Interval(unit, dt.uint16) == dt.validate_type(definition) definition = "interval<int64>('{}')".format(unit) dt.Interval(unit, dt.int64) == dt.validate_type(definition)
def __init__(self, hdfs_file, input_type, output_type, name=None): file_suffix = hdfs_file[-3:] if not (file_suffix == '.so' or file_suffix == '.ll'): raise ValueError('Invalid file type. Must be .so or .ll ') self.hdfs_file = hdfs_file inputs = [validate_type(x) for x in input_type] output = validate_type(output_type) new_name = name if not name: string = self.so_symbol for in_type in inputs: string += in_type.name() new_name = sha1(string).hexdigest() UDFInfo.__init__(self, inputs, output, new_name)
def param(type, name=None): """Create a parameter of a particular type to be defined just before execution. Parameters ---------- type : dt.DataType The type of the unbound parameter, e.g., double, int64, date, etc. name : str, optional The name of the parameter Returns ------- ScalarExpr Examples -------- >>> import ibis >>> import ibis.expr.datatypes as dt >>> start = ibis.param(dt.date) >>> end = ibis.param(dt.date) >>> schema = [('timestamp_col', 'timestamp'), ('value', 'double')] >>> t = ibis.table(schema) >>> predicates = [t.timestamp_col >= start, t.timestamp_col <= end] >>> expr = t.filter(predicates).value.sum() """ if name is None: name = _parameter_name() expr = ScalarParameter(dt.validate_type(type), name=name).to_expr() return expr.name(name)
def test_struct(): orders = """array<struct< oid: int64, status: string, totalprice: decimal(12, 2), order_date: string, items: array<struct< iid: int64, name: string, price: decimal(12, 2), discount_perc: decimal(12, 2), shipdate: string >> >>""" expected = dt.Array(dt.Struct.from_tuples([ ('oid', dt.int64), ('status', dt.string), ('totalprice', dt.Decimal(12, 2)), ('order_date', dt.string), ( 'items', dt.Array(dt.Struct.from_tuples([ ('iid', dt.int64), ('name', dt.string), ('price', dt.Decimal(12, 2)), ('discount_perc', dt.Decimal(12, 2)), ('shipdate', dt.string), ])) ) ])) assert dt.validate_type(orders) == expected
def test_struct(): orders = """array<struct< oid: int64, status: string, totalprice: decimal(12, 2), order_date: string, items: array<struct< iid: int64, name: string, price: decimal(12, 2), discount_perc: decimal(12, 2), shipdate: string >> >>""" expected = dt.Array( dt.Struct.from_tuples([('oid', dt.int64), ('status', dt.string), ('totalprice', dt.Decimal(12, 2)), ('order_date', dt.string), ('items', dt.Array( dt.Struct.from_tuples([ ('iid', dt.int64), ('name', dt.string), ('price', dt.Decimal(12, 2)), ('discount_perc', dt.Decimal(12, 2)), ('shipdate', dt.string), ])))])) assert dt.validate_type(orders) == expected
def test_literal_complex_types(value, expected_type, expected_class): expr = ibis.literal(value) expr_type = expr.type() assert expr_type.equals(dt.validate_type(expected_type)) assert isinstance(expr, expected_class) assert isinstance(expr.op(), ops.Literal) assert expr.op().value is value
def pandas_dtypes_to_ibis_schema(df, schema): dtypes = df.dtypes pairs = [] for column_name, dtype in dtypes.iteritems(): if not isinstance(column_name, six.string_types): raise TypeError( 'Column names must be strings to use the pandas backend') if column_name in schema: ibis_type = dt.validate_type(schema[column_name]) elif dtype == np.object_: inferred_dtype = infer_dtype(df[column_name].dropna()) if inferred_dtype == 'mixed': raise TypeError( 'Unable to infer type of column {0!r}. Try instantiating ' 'your table from the client with client.table(' "'my_table', schema={{{0!r}: <explicit type>}})".format( column_name)) ibis_type = _INFERRED_DTYPE_TO_IBIS_TYPE[inferred_dtype] elif hasattr(dtype, 'tz'): ibis_type = dt.Timestamp(str(dtype.tz)) else: dtype_string = str(dtype) ibis_type = _DTYPE_TO_IBIS_TYPE.get(dtype_string, dtype_string) pairs.append((column_name, ibis_type)) return ibis.schema(pairs)
def test_udf_primitive_output_types(self): types = [ ('boolean', True, self.b), ('int8', 1, self.i8), ('int16', 1, self.i16), ('int32', 1, self.i32), ('int64', 1, self.i64), ('float', 1.0, self.f), ('double', 1.0, self.d), ('string', '1', self.s), ('timestamp', ibis.timestamp('1961-04-10'), self.t), ] for t, sv, av in types: func = self._register_udf([t], t, 'test') ibis_type = dt.validate_type(t) expr = func(sv) assert type(expr) == type( ibis_type.scalar_type()(expr.op()) ) # noqa: E501, E721 expr = func(av) assert type(expr) == type( ibis_type.column_type()(expr.op()) ) # noqa: E501, E721
def _to_input_sig(inputs): if isinstance(inputs, rules.TypeSignature): return inputs else: in_type = [validate_type(x) for x in inputs] return rules.TypeSignature([rules.value_typed_as(x) for x in in_type])
def __init__(self, hdfs_file, input_type, output_type, name=None): file_suffix = hdfs_file[-3:] if not(file_suffix == '.so' or file_suffix == '.ll'): raise ValueError('Invalid file type. Must be .so or .ll ') self.hdfs_file = hdfs_file inputs = [validate_type(x) for x in input_type] output = validate_type(output_type) new_name = name if not name: string = self.so_symbol for in_type in inputs: string += in_type.name() new_name = sha1(string).hexdigest() UDFInfo.__init__(self, inputs, output, new_name)
def _ibis_string_to_impala(tval): from ibis.backends.base_sql import sql_type_names if tval in sql_type_names: return sql_type_names[tval] result = dt.validate_type(tval) if result: return repr(result)
def _ibis_string_to_impala(tval): from ibis.impala.compiler import _sql_type_names if tval in _sql_type_names: return _sql_type_names[tval] result = dt.validate_type(tval) if result: return repr(result)
def _validate(self, args, i): arg = args[i] if isinstance(arg, py_string): arg = arg.lower() arg = args[i] = dt.validate_type(arg) return arg
def _impala_signature(sig): if isinstance(sig, rules.TypeSignature): if isinstance(sig, rules.VarArgs): val = _arg_to_string(sig.arg_type) return '{0}...'.format(val) else: return ', '.join([_arg_to_string(arg) for arg in sig.types]) else: return ', '.join([_type_to_sql_string(validate_type(x)) for x in sig])
def _arg_to_string(arg): if isinstance(arg, rules.ValueTyped): types = arg.types if len(types) > 1: raise NotImplementedError return _type_to_sql_string(types[0]) elif isinstance(arg, py_string): return _type_to_sql_string(validate_type(arg)) else: raise NotImplementedError
def test_udf_primitive_output_types(ty, value, column, table): func = _register_udf([ty], ty, 'test') ibis_type = dt.validate_type(ty) expr = func(value) assert type(expr) == ibis_type.scalar expr = func(table[column]) assert type(expr) == ibis_type.column
def test_uda_primitive_output_types(ty, value): func = _register_uda([ty], ty, 'test') ibis_type = dt.validate_type(ty) expr1 = func(value) assert isinstance(expr1, ibis_type.scalar) expr2 = func(value) assert isinstance(expr2, ibis_type.scalar)
def test_uda_primitive_output_types(ty, value, table): func = _register_uda([ty], ty, 'test') ibis_type = dt.validate_type(ty) expr1 = func(value) expr2 = func(value) expected_type1 = type(ibis_type.scalar_type()(expr1.op())) expected_type2 = type(ibis_type.scalar_type()(expr2.op())) assert isinstance(expr1, expected_type1) assert isinstance(expr2, expected_type2)
def test_udf_primitive_output_types(ty, value, column, table): func = _register_udf([ty], ty, 'test') ibis_type = dt.validate_type(ty) expr = func(value) assert type(expr) == type( # noqa: E501, E721 ibis_type.scalar_type()(expr.op())) expr = func(table[column]) assert type(expr) == type( # noqa: E501, E721 ibis_type.column_type()(expr.op()))
def int_literal_class(value, allow_overflow=False): if -128 <= value <= 127: t = 'int8' elif -32768 <= value <= 32767: t = 'int16' elif -2147483648 <= value <= 2147483647: t = 'int32' else: if value < -9223372036854775808 or value > 9223372036854775807: if not allow_overflow: raise OverflowError(value) t = 'int64' return dt.validate_type(t)
def literal(value, type=None): """Create a scalar expression from a Python value. Parameters ---------- value : some Python basic type A Python value type : ibis type or string, optional An instance of :class:`ibis.expr.datatypes.DataType` or a string indicating the ibis type of `value`. This parameter should only be used in cases where ibis's type inference isn't sufficient for discovering the type of `value`. Returns ------- literal_value : Literal An expression representing a literal value Examples -------- >>> import ibis >>> x = ibis.literal(42) >>> x.type() int8 >>> y = ibis.literal(42, type='double') >>> y.type() double >>> ibis.literal('foobar', type='int64') # doctest: +ELLIPSIS Traceback (most recent call last): ... TypeError: Value 'foobar' cannot be safely coerced to int64 """ if hasattr(value, 'op') and isinstance(value.op(), Literal): return value if type is None: type = infer_literal_type(value) else: type = dt.validate_type(type) if not type.valid_literal(value): raise TypeError('Value {!r} cannot be safely coerced to {}'.format( value, type)) if value is None or value is _NULL or value is null: result = null().cast(type) else: result = Literal(value, type=type).to_expr() return result
def test_uda_primitive_output_types(self): types = [('boolean', True, self.b), ('int8', 1, self.i8), ('int16', 1, self.i16), ('int32', 1, self.i32), ('int64', 1, self.i64), ('float', 1.0, self.f), ('double', 1.0, self.d), ('string', '1', self.s), ('timestamp', ibis.timestamp('1961-04-10'), self.t)] for t, sv, av in types: func = self._register_uda([t], t, 'test') ibis_type = validate_type(t) expr1 = func(sv) expr2 = func(sv) assert isinstance(expr1, ibis_type.scalar_type()) assert isinstance(expr2, ibis_type.scalar_type())
def parse_type(t): t = t.lower() if t in _impala_to_ibis_type: return _impala_to_ibis_type[t] else: if 'varchar' in t or 'char' in t: return 'string' elif 'decimal' in t: result = validate_type(t) if result: return t else: return ValueError(t) else: raise Exception(t)
def test_uda_primitive_output_types(self): types = [ ('boolean', True, self.b), ('int8', 1, self.i8), ('int16', 1, self.i16), ('int32', 1, self.i32), ('int64', 1, self.i64), ('float', 1.0, self.f), ('double', 1.0, self.d), ('string', '1', self.s), ('timestamp', ibis.timestamp('1961-04-10'), self.t) ] for t, sv, av in types: func = self._register_uda([t], t, 'test') ibis_type = validate_type(t) expr1 = func(sv) expr2 = func(sv) assert isinstance(expr1, ibis_type.scalar_type()) assert isinstance(expr2, ibis_type.scalar_type())
def test_struct(): orders = """array<struct< oid: int64, status: string, totalprice: decimal(12, 2), order_date: string, items: array<struct< iid: int64, name: string, price: decimal(12, 2), discount_perc: decimal(12, 2), shipdate: string >> >>""" expected = dt.Array( dt.Struct.from_tuples( [ ("oid", dt.int64), ("status", dt.string), ("totalprice", dt.Decimal(12, 2)), ("order_date", dt.string), ( "items", dt.Array( dt.Struct.from_tuples( [ ("iid", dt.int64), ("name", dt.string), ("price", dt.Decimal(12, 2)), ("discount_perc", dt.Decimal(12, 2)), ("shipdate", dt.string), ] ) ), ), ] ) ) assert dt.validate_type(orders) == expected
def _to_type(x): ibis_type = udf._impala_type_to_ibis(x.lower()) return validate_type(ibis_type)
def test_nested_array(): assert dt.validate_type("array<array<string>>") == dt.Array(dt.Array(dt.string))
def test_literal_with_explicit_type(value, expected_type): expr = ibis.literal(value, type=expected_type) assert expr.type().equals(dt.validate_type(expected_type))
def f(self): if isinstance(rule, dt.DataType): t = rule else: t = dt.validate_type(rule(self)) return t.array_type()
def shape_like(arg, out_type): out_type = dt.validate_type(out_type) if isinstance(arg, ir.ScalarExpr): return out_type.scalar_type() else: return out_type.array_type()
def _type_signature(self, inputs, output): input_type = _to_input_sig(inputs) output = validate_type(output) output_type = rules.shape_like_flatargs(output) return input_type, output_type
def test_char_varchar(spec): assert dt.validate_type(spec) == dt.string
def f(self): t = dt.validate_type(rule(self)) return t.scalar_type()
def _type_signature(self, inputs, output): input_type = _to_input_sig(inputs) output = validate_type(output) output_type = rules.scalar_output(output) return input_type, output_type
def test_primitive(spec): assert dt.validate_type(spec) == dt._primitive_types[spec]
def test_char_varchar_invalid(spec): with pytest.raises(SyntaxError): dt.validate_type(spec)
def test_decimal_failure(): with pytest.raises(SyntaxError): dt.validate_type("decimal(")
def _impala_signature(types): from ibis.expr.datatypes import validate_type return [_type_to_sql_string(validate_type(x)) for x in types]
def _largest_int(int_types): nbytes = max(t._nbytes for t in int_types) return dt.validate_type('int%d' % (8 * nbytes))
def shape_like_args(args, out_type): out_type = dt.validate_type(out_type) if util.any_of(args, ir.ColumnExpr): return out_type.array_type() else: return out_type.scalar_type()
def shape_like_args(args, out_type): out_type = dt.validate_type(out_type) if util.any_of(args, ir.ArrayExpr): return out_type.array_type() else: return out_type.scalar_type()
def f(self): t = dt.validate_type(rule(self)) return t.array_type()
def _operation_type_conversion(inputs, output): in_type = [validate_type(x) for x in inputs] in_values = [rules.value_typed_as(_convert_types(x)) for x in in_type] out_type = validate_type(output) out_value = rules.shape_like_flatargs(out_type) return (in_values, out_value)