def make_array_type(nullable, value_type=value_type): return dt.Array(value_type, nullable=nullable)
def test_array_with_string_value_type(): assert dt.Array('int32') == dt.Array(dt.int32) assert dt.Array(dt.Array('array<map<string, double>>')) == (dt.Array( dt.Array(dt.Array(dt.Map(dt.string, dt.double)))))
class MyOp(ops.ValueOp): value = rlz.value(dt.Array(dt.double)) output_type = rlz.typeof('value')
def test_complex_datatype_builtins(benchmark, func): datatype = dt.Array( dt.Struct.from_dict( dict(a=dt.Array(dt.string), b=dt.Map(dt.string, dt.Array(dt.int64))))) benchmark(func, datatype)
def times_two(x, scope=None): return x * 2.0 @udf.analytic(input_type=['double'], output_type='double') def zscore(series): return (series - series.mean()) / series.std() @udf.elementwise([], dt.int64) def a_single_number(**kwargs): return 1 @udf.reduction( input_type=[dt.double, dt.Array(dt.double)], output_type=dt.Array(dt.double), ) def quantiles(series, quantiles): return list(series.quantile(quantiles)) def test_udf(t, df): expr = my_string_length(t.a) assert isinstance(expr, ir.ColumnExpr) result = expr.execute() expected = df.a.str.len().mul(2) tm.assert_series_equal(result, expected)
@pytest.fixture(scope='module') def df3(): return pd.DataFrame( { 'key': list('ac'), 'other_value': [4.0, 6.0], 'key2': list('ae'), 'key3': list('fe'), } ) t_schema = { 'decimal': dt.Decimal(4, 3), 'array_of_float64': dt.Array(dt.double), 'array_of_int64': dt.Array(dt.int64), 'array_of_strings': dt.Array(dt.string), 'map_of_strings_integers': dt.Map(dt.string, dt.int64), 'map_of_integers_strings': dt.Map(dt.int64, dt.string), 'map_of_complex_values': dt.Map(dt.string, dt.Array(dt.int64)), } @pytest.fixture(scope='module') def t(client): return client.table('df', schema=t_schema) @pytest.fixture(scope='module') def lahman(batting_df, awards_players_df):
} return my_len(s); """; SELECT (my_len_0('abcd') + my_len_0('abcd')) + my_len_1('abcd') AS `tmp`''' assert sql == expected @pytest.mark.parametrize( ('argument_type', 'return_type'), [ param(dt.int64, dt.float64, marks=pytest.mark.xfail(raises=TypeError)), param(dt.float64, dt.int64, marks=pytest.mark.xfail(raises=TypeError)), # complex argument type, valid return type param( dt.Array(dt.int64), dt.float64, marks=pytest.mark.xfail(raises=TypeError), ), # valid argument type, complex invalid return type param( dt.float64, dt.Array(dt.int64), marks=pytest.mark.xfail(raises=TypeError), ), # both invalid param( dt.Array(dt.Array(dt.int64)), dt.int64, marks=pytest.mark.xfail(raises=TypeError), ),
def test_array(): assert dt.dtype('ARRAY<DOUBLE>') == dt.Array(dt.double)
def test_nested_array(): assert dt.dtype('array<array<string>>') == dt.Array(dt.Array(dt.string))
def test_nested_map(): assert dt.validate_type('map<int64, array<map<string, int8>>>') == dt.Map( dt.int64, dt.Array(dt.Map(dt.string, dt.int8)))
# numpy types (np.int8(5), dt.int8), (np.int16(-1), dt.int16), (np.int32(2), dt.int32), (np.int64(-5), dt.int64), (np.uint8(5), dt.uint8), (np.uint16(50), dt.uint16), (np.uint32(500), dt.uint32), (np.uint64(5000), dt.uint64), (np.float32(5.5), dt.float32), (np.float32(5.5), dt.float), (np.float64(5.55), dt.float64), (np.float64(5.55), dt.double), (np.bool_(True), dt.boolean), (np.bool_(False), dt.boolean), (np.arange(5, dtype='int32'), dt.Array(dt.int32)), # pandas types (pd.Timestamp('2015-01-01 12:00:00', tz='US/Eastern'), dt.Timestamp('US/Eastern')), # parametric types (list('abc'), dt.Array(dt.string)), ([1, 2, 3], dt.Array(dt.int8)), ([1, 128], dt.Array(dt.int16)), ([1, 128, 32768], dt.Array(dt.int32)), ([1, 128, 32768, 2147483648], dt.Array(dt.int64)), ({ 'a': 1, 'b': 2, 'c': 3
def test_whole_schema(): customers = ibis.table( [('cid', 'int64'), ('mktsegment', 'string'), ('address', ('struct<city: string, street: string, ' 'street_number: int32, zip: int16>')), ('phone_numbers', 'array<string>'), ('orders', """array<struct< oid: int64, status: string, totalprice: decimal(12, 2), order_date: string, items: array<struct< iid: int64, name: string, price: decimal(12, 2), discount_perc: decimal(12, 2), shipdate: string >> >>"""), ('web_visits', ('map<string, struct<user_agent: string, ' 'client_ip: string, visit_date: string, ' 'duration_ms: int32>>')), ('support_calls', ('array<struct<agent_id: int64, ' 'call_date: string, duration_ms: int64, ' 'issue_resolved: boolean, ' 'agent_comment: string>>'))], name='customers', ) expected = ibis.Schema.from_tuples([ ('cid', dt.int64), ('mktsegment', dt.string), ( 'address', dt.Struct.from_tuples([('city', dt.string), ('street', dt.string), ('street_number', dt.int32), ('zip', dt.int16)]), ), ('phone_numbers', dt.Array(dt.string)), ('orders', dt.Array( dt.Struct.from_tuples([('oid', dt.int64), ('status', dt.string), ('totalprice', dt.Decimal(12, 2)), ('order_date', dt.string), ('items', dt.Array( dt.Struct.from_tuples([ ('iid', dt.int64), ('name', dt.string), ('price', dt.Decimal(12, 2)), ('discount_perc', dt.Decimal( 12, 2)), ('shipdate', dt.string), ])))]))), ('web_visits', dt.Map( dt.string, dt.Struct.from_tuples([ ('user_agent', dt.string), ('client_ip', dt.string), ('visit_date', dt.string), ('duration_ms', dt.int32), ]))), ('support_calls', dt.Array( dt.Struct.from_tuples([('agent_id', dt.int64), ('call_date', dt.string), ('duration_ms', dt.int64), ('issue_resolved', dt.boolean), ('agent_comment', dt.string)]))), ], ) assert customers.schema() == expected
class StringSplit(Value): arg = rlz.string delimiter = rlz.string output_shape = rlz.shape_like("arg") output_dtype = dt.Array(dt.string)
@pytest.mark.parametrize( ('datatype', 'expected'), [ (dt.float32, 'FLOAT64'), (dt.float64, 'FLOAT64'), (dt.uint8, 'INT64'), (dt.uint16, 'INT64'), (dt.uint32, 'INT64'), (dt.int8, 'INT64'), (dt.int16, 'INT64'), (dt.int32, 'INT64'), (dt.int64, 'INT64'), (dt.string, 'STRING'), (dt.Array(dt.int64), 'ARRAY<INT64>'), (dt.Array(dt.string), 'ARRAY<STRING>'), ( dt.Struct.from_tuples([('a', dt.int64), ('b', dt.string), ('c', dt.Array(dt.string))]), 'STRUCT<a INT64, b STRING, c ARRAY<STRING>>', ), (dt.date, 'DATE'), (dt.timestamp, 'TIMESTAMP'), param( dt.Timestamp(timezone='US/Eastern'), 'TIMESTAMP', marks=pytest.mark.xfail(raises=TypeError, reason='Not supported in BigQuery'), ), ('array<struct<a: string>>', 'ARRAY<STRUCT<a STRING>>'),
def infer_array(value): # TODO(kszucs): infer series return dt.Array(dt.dtype(value.dtype.name))
def test_nested_map(): expected = dt.Map(dt.int64, dt.Array(dt.Map(dt.string, dt.int8))) assert dt.dtype('map<int64, array<map<string, int8>>>') == expected
return x + 1.0 @udf.elementwise([dt.double], dt.double) def times_two(x): return x * 2.0 @udf.analytic(input_type=['double'], output_type='double') def zscore(series): return (series - series.mean()) / series.std() @udf.reduction( input_type=[dt.double], output_type=dt.Array(dt.double), ) def quantiles(series, *, quantiles): return np.array(series.quantile(quantiles)) def test_udf(t, df): expr = my_string_length(t.a) assert isinstance(expr, ir.ColumnExpr) result = expr.execute() expected = df.a.str.len().mul(2) tm.assert_series_equal(result, expected)
kwargs = {kind: (begin, end)} with pytest.raises(com.IbisInputError): ibis.window(**kwargs) @pytest.mark.parametrize( ('left', 'right', 'expected'), [ (ibis.literal(1), ibis.literal(1.0), dt.float64), (ibis.literal('a'), ibis.literal('b'), dt.string), (ibis.literal(1.0), ibis.literal(1), dt.float64), (ibis.literal(1), ibis.literal(1), dt.int8), (ibis.literal(1), ibis.literal(1000), dt.int16), (ibis.literal(2**16), ibis.literal(2**17), dt.int32), (ibis.literal(2**50), ibis.literal(1000), dt.int64), (ibis.literal([1, 2]), ibis.literal([1, 2]), dt.Array(dt.int8)), (ibis.literal(['a']), ibis.literal([]), dt.Array(dt.string)), (ibis.literal([]), ibis.literal(['a']), dt.Array(dt.string)), (ibis.literal([]), ibis.literal([]), dt.Array(dt.null)), ], ) def test_nullif_type(left, right, expected): assert left.nullif(right).type() == expected @pytest.mark.parametrize(('left', 'right'), [(ibis.literal(1), ibis.literal('a'))]) def test_nullif_fail(left, right): with pytest.raises(com.IbisTypeError): left.nullif(right) with pytest.raises(com.IbisTypeError):
def test_array_schema(array_types): assert array_types.x.type() == dt.Array(dt.int64) assert array_types.y.type() == dt.Array(dt.string) assert array_types.z.type() == dt.Array(dt.double)
class FooNode(ops.ValueOp): value = Arg(rlz.value(dt.Array(dt.int64))) def output_type(self): return Foo
@pytest.mark.parametrize( "dtypes", [ pytest.param( [ obj for _, obj in inspect.getmembers( dt, lambda obj: isinstance(obj, dt.DataType), ) ], id="singletons", ), pytest.param( dt.Array( dt.Struct.from_dict( dict( a=dt.Array(dt.string), b=dt.Map(dt.string, dt.Array(dt.int64)), ))), id="complex", ), ], ) def test_eq_datatypes(benchmark, dtypes): def eq(a, b): assert a == b benchmark(eq, dtypes, copy.deepcopy(dtypes)) def multiple_joins(table, num_joins): for _ in range(num_joins):
def test_map_keys_output_type(): mapping = ibis.literal({'a': 1, 'b': 2}) assert mapping.keys().type() == dt.Array(dt.string)
def test_array_schema(array_types, column, value_type): assert array_types[column].type() == dt.Array(value_type)
def test_map_values_output_type(): mapping = ibis.literal({'a': 1, 'b': 2}) assert mapping.values().type() == dt.Array(dt.int8)
def test_columns_types_with_additional_argument(con): sql_types = ["toFixedString('foo', 8) AS fixedstring_col"] if parse_version(con.version).base_version >= '1.1.54337': sql_types.append( "toDateTime('2018-07-02 00:00:00', 'UTC') AS datetime_col") sql = 'SELECT {}'.format(', '.join(sql_types)) df = con.sql(sql).execute() assert df.fixedstring_col.dtype.name == 'object' if parse_version(con.version).base_version >= '1.1.54337': assert df.datetime_col.dtype.name == 'datetime64[ns]' @pytest.mark.parametrize( ('ch_type', 'ibis_type'), [ ('Array(Int8)', dt.Array(dt.Int8(nullable=False))), ('Array(Int16)', dt.Array(dt.Int16(nullable=False))), ('Array(Int32)', dt.Array(dt.Int32(nullable=False))), ('Array(Int64)', dt.Array(dt.Int64(nullable=False))), ('Array(UInt8)', dt.Array(dt.UInt8(nullable=False))), ('Array(UInt16)', dt.Array(dt.UInt16(nullable=False))), ('Array(UInt32)', dt.Array(dt.UInt32(nullable=False))), ('Array(UInt64)', dt.Array(dt.UInt64(nullable=False))), ('Array(Float32)', dt.Array(dt.Float32(nullable=False))), ('Array(Float64)', dt.Array(dt.Float64(nullable=False))), ('Array(String)', dt.Array(dt.String(nullable=False))), ('Array(FixedString(32))', dt.Array(dt.String(nullable=False))), ('Array(Date)', dt.Array(dt.Date(nullable=False))), ('Array(DateTime)', dt.Array(dt.Timestamp(nullable=False))), ('Array(DateTime64)', dt.Array(dt.Timestamp(nullable=False))), ('Array(Nothing)', dt.Array(dt.Null(nullable=False))),
def test_literal_array(): what = [] expr = api.literal(what) assert isinstance(expr, ir.ArrayValue) assert expr.type().equals(dt.Array(dt.null))
def test_map_with_string_value_type(): assert dt.Map('int32', 'double') == dt.Map(dt.int32, dt.double) assert dt.Map('int32', 'array<double>') == \ dt.Map(dt.int32, dt.Array(dt.double))
def __init__(self, value_type, *args, **kwargs): super(ArrayValueTyped, self).__init__(dt.Array(value_type), *args, **kwargs)
import pytest import pytz from multipledispatch.conflict import ambiguities import ibis import ibis.expr.datatypes as dt def test_validate_type(): assert dt.validate_type is dt.dtype @pytest.mark.parametrize( ('spec', 'expected'), [ ('ARRAY<DOUBLE>', dt.Array(dt.double)), ('array<array<string>>', dt.Array(dt.Array(dt.string))), ('map<string, double>', dt.Map(dt.string, dt.double)), ( 'map<int64, array<map<string, int8>>>', dt.Map(dt.int64, dt.Array(dt.Map(dt.string, dt.int8))), ), ('set<uint8>', dt.Set(dt.uint8)), ([dt.uint8], dt.Array(dt.uint8)), ([dt.float32, dt.float64], dt.Array(dt.float64)), ({dt.string}, dt.Set(dt.string)), ('point', dt.point), ('point;4326', dt.point), ('point;4326:geometry', dt.point), ('point;4326:geography', dt.point), ('linestring', dt.linestring),
@pytest.mark.parametrize( ("datatype", "expected"), [ (dt.float32, "FLOAT64"), (dt.float64, "FLOAT64"), (dt.uint8, "INT64"), (dt.uint16, "INT64"), (dt.uint32, "INT64"), (dt.int8, "INT64"), (dt.int16, "INT64"), (dt.int32, "INT64"), (dt.int64, "INT64"), (dt.string, "STRING"), (dt.Array(dt.int64), "ARRAY<INT64>"), (dt.Array(dt.string), "ARRAY<STRING>"), (dt.date, "DATE"), (dt.timestamp, "TIMESTAMP"), param( dt.Timestamp(timezone="US/Eastern"), "TIMESTAMP", ), ], ) def test_simple(datatype, expected): context = TypeTranslationContext() assert ibis_type_to_cloud_spanner_type(datatype, context) == expected @pytest.mark.parametrize("datatype", [dt.uint64, dt.Decimal(8, 3)])