Beispiel #1
0
 def make_array_type(nullable, value_type=value_type):
     return dt.Array(value_type, nullable=nullable)
Beispiel #2
0
def test_array_with_string_value_type():
    assert dt.Array('int32') == dt.Array(dt.int32)
    assert dt.Array(dt.Array('array<map<string, double>>')) == (dt.Array(
        dt.Array(dt.Array(dt.Map(dt.string, dt.double)))))
Beispiel #3
0
 class MyOp(ops.ValueOp):
     value = rlz.value(dt.Array(dt.double))
     output_type = rlz.typeof('value')
Beispiel #4
0
def test_complex_datatype_builtins(benchmark, func):
    datatype = dt.Array(
        dt.Struct.from_dict(
            dict(a=dt.Array(dt.string),
                 b=dt.Map(dt.string, dt.Array(dt.int64)))))
    benchmark(func, datatype)
Beispiel #5
0
def times_two(x, scope=None):
    return x * 2.0


@udf.analytic(input_type=['double'], output_type='double')
def zscore(series):
    return (series - series.mean()) / series.std()


@udf.elementwise([], dt.int64)
def a_single_number(**kwargs):
    return 1


@udf.reduction(
    input_type=[dt.double, dt.Array(dt.double)],
    output_type=dt.Array(dt.double),
)
def quantiles(series, quantiles):
    return list(series.quantile(quantiles))


def test_udf(t, df):
    expr = my_string_length(t.a)

    assert isinstance(expr, ir.ColumnExpr)

    result = expr.execute()
    expected = df.a.str.len().mul(2)
    tm.assert_series_equal(result, expected)
Beispiel #6
0
@pytest.fixture(scope='module')
def df3():
    return pd.DataFrame(
        {
            'key': list('ac'),
            'other_value': [4.0, 6.0],
            'key2': list('ae'),
            'key3': list('fe'),
        }
    )


t_schema = {
    'decimal': dt.Decimal(4, 3),
    'array_of_float64': dt.Array(dt.double),
    'array_of_int64': dt.Array(dt.int64),
    'array_of_strings': dt.Array(dt.string),
    'map_of_strings_integers': dt.Map(dt.string, dt.int64),
    'map_of_integers_strings': dt.Map(dt.int64, dt.string),
    'map_of_complex_values': dt.Map(dt.string, dt.Array(dt.int64)),
}


@pytest.fixture(scope='module')
def t(client):
    return client.table('df', schema=t_schema)


@pytest.fixture(scope='module')
def lahman(batting_df, awards_players_df):
Beispiel #7
0
}
return my_len(s);
""";

SELECT (my_len_0('abcd') + my_len_0('abcd')) + my_len_1('abcd') AS `tmp`'''
    assert sql == expected


@pytest.mark.parametrize(
    ('argument_type', 'return_type'),
    [
        param(dt.int64, dt.float64, marks=pytest.mark.xfail(raises=TypeError)),
        param(dt.float64, dt.int64, marks=pytest.mark.xfail(raises=TypeError)),
        # complex argument type, valid return type
        param(
            dt.Array(dt.int64),
            dt.float64,
            marks=pytest.mark.xfail(raises=TypeError),
        ),
        # valid argument type, complex invalid return type
        param(
            dt.float64,
            dt.Array(dt.int64),
            marks=pytest.mark.xfail(raises=TypeError),
        ),
        # both invalid
        param(
            dt.Array(dt.Array(dt.int64)),
            dt.int64,
            marks=pytest.mark.xfail(raises=TypeError),
        ),
def test_array():
    assert dt.dtype('ARRAY<DOUBLE>') == dt.Array(dt.double)
def test_nested_array():
    assert dt.dtype('array<array<string>>') == dt.Array(dt.Array(dt.string))
Beispiel #10
0
def test_nested_map():
    assert dt.validate_type('map<int64, array<map<string, int8>>>') == dt.Map(
        dt.int64, dt.Array(dt.Map(dt.string, dt.int8)))
        # numpy types
        (np.int8(5), dt.int8),
        (np.int16(-1), dt.int16),
        (np.int32(2), dt.int32),
        (np.int64(-5), dt.int64),
        (np.uint8(5), dt.uint8),
        (np.uint16(50), dt.uint16),
        (np.uint32(500), dt.uint32),
        (np.uint64(5000), dt.uint64),
        (np.float32(5.5), dt.float32),
        (np.float32(5.5), dt.float),
        (np.float64(5.55), dt.float64),
        (np.float64(5.55), dt.double),
        (np.bool_(True), dt.boolean),
        (np.bool_(False), dt.boolean),
        (np.arange(5, dtype='int32'), dt.Array(dt.int32)),

        # pandas types
        (pd.Timestamp('2015-01-01 12:00:00',
                      tz='US/Eastern'), dt.Timestamp('US/Eastern')),

        # parametric types
        (list('abc'), dt.Array(dt.string)),
        ([1, 2, 3], dt.Array(dt.int8)),
        ([1, 128], dt.Array(dt.int16)),
        ([1, 128, 32768], dt.Array(dt.int32)),
        ([1, 128, 32768, 2147483648], dt.Array(dt.int64)),
        ({
            'a': 1,
            'b': 2,
            'c': 3
Beispiel #12
0
def test_whole_schema():
    customers = ibis.table(
        [('cid', 'int64'), ('mktsegment', 'string'),
         ('address', ('struct<city: string, street: string, '
                      'street_number: int32, zip: int16>')),
         ('phone_numbers', 'array<string>'),
         ('orders', """array<struct<
                                oid: int64,
                                status: string,
                                totalprice: decimal(12, 2),
                                order_date: string,
                                items: array<struct<
                                    iid: int64,
                                    name: string,
                                    price: decimal(12, 2),
                                    discount_perc: decimal(12, 2),
                                    shipdate: string
                                >>
                            >>"""),
         ('web_visits', ('map<string, struct<user_agent: string, '
                         'client_ip: string, visit_date: string, '
                         'duration_ms: int32>>')),
         ('support_calls', ('array<struct<agent_id: int64, '
                            'call_date: string, duration_ms: int64, '
                            'issue_resolved: boolean, '
                            'agent_comment: string>>'))],
        name='customers',
    )
    expected = ibis.Schema.from_tuples([
        ('cid', dt.int64),
        ('mktsegment', dt.string),
        (
            'address',
            dt.Struct.from_tuples([('city', dt.string), ('street', dt.string),
                                   ('street_number', dt.int32),
                                   ('zip', dt.int16)]),
        ),
        ('phone_numbers', dt.Array(dt.string)),
        ('orders',
         dt.Array(
             dt.Struct.from_tuples([('oid', dt.int64), ('status', dt.string),
                                    ('totalprice', dt.Decimal(12, 2)),
                                    ('order_date', dt.string),
                                    ('items',
                                     dt.Array(
                                         dt.Struct.from_tuples([
                                             ('iid', dt.int64),
                                             ('name', dt.string),
                                             ('price', dt.Decimal(12, 2)),
                                             ('discount_perc', dt.Decimal(
                                                 12, 2)),
                                             ('shipdate', dt.string),
                                         ])))]))),
        ('web_visits',
         dt.Map(
             dt.string,
             dt.Struct.from_tuples([
                 ('user_agent', dt.string),
                 ('client_ip', dt.string),
                 ('visit_date', dt.string),
                 ('duration_ms', dt.int32),
             ]))),
        ('support_calls',
         dt.Array(
             dt.Struct.from_tuples([('agent_id', dt.int64),
                                    ('call_date', dt.string),
                                    ('duration_ms', dt.int64),
                                    ('issue_resolved', dt.boolean),
                                    ('agent_comment', dt.string)]))),
    ], )
    assert customers.schema() == expected
Beispiel #13
0
class StringSplit(Value):
    arg = rlz.string
    delimiter = rlz.string

    output_shape = rlz.shape_like("arg")
    output_dtype = dt.Array(dt.string)
Beispiel #14
0

@pytest.mark.parametrize(
    ('datatype', 'expected'),
    [
        (dt.float32, 'FLOAT64'),
        (dt.float64, 'FLOAT64'),
        (dt.uint8, 'INT64'),
        (dt.uint16, 'INT64'),
        (dt.uint32, 'INT64'),
        (dt.int8, 'INT64'),
        (dt.int16, 'INT64'),
        (dt.int32, 'INT64'),
        (dt.int64, 'INT64'),
        (dt.string, 'STRING'),
        (dt.Array(dt.int64), 'ARRAY<INT64>'),
        (dt.Array(dt.string), 'ARRAY<STRING>'),
        (
            dt.Struct.from_tuples([('a', dt.int64), ('b', dt.string),
                                   ('c', dt.Array(dt.string))]),
            'STRUCT<a INT64, b STRING, c ARRAY<STRING>>',
        ),
        (dt.date, 'DATE'),
        (dt.timestamp, 'TIMESTAMP'),
        param(
            dt.Timestamp(timezone='US/Eastern'),
            'TIMESTAMP',
            marks=pytest.mark.xfail(raises=TypeError,
                                    reason='Not supported in BigQuery'),
        ),
        ('array<struct<a: string>>', 'ARRAY<STRUCT<a STRING>>'),
Beispiel #15
0
def infer_array(value):
    # TODO(kszucs): infer series
    return dt.Array(dt.dtype(value.dtype.name))
def test_nested_map():
    expected = dt.Map(dt.int64, dt.Array(dt.Map(dt.string, dt.int8)))
    assert dt.dtype('map<int64, array<map<string, int8>>>') == expected
Beispiel #17
0
    return x + 1.0


@udf.elementwise([dt.double], dt.double)
def times_two(x):
    return x * 2.0


@udf.analytic(input_type=['double'], output_type='double')
def zscore(series):
    return (series - series.mean()) / series.std()


@udf.reduction(
    input_type=[dt.double],
    output_type=dt.Array(dt.double),
)
def quantiles(series, *, quantiles):
    return np.array(series.quantile(quantiles))


def test_udf(t, df):
    expr = my_string_length(t.a)

    assert isinstance(expr, ir.ColumnExpr)

    result = expr.execute()
    expected = df.a.str.len().mul(2)
    tm.assert_series_equal(result, expected)

Beispiel #18
0
    kwargs = {kind: (begin, end)}
    with pytest.raises(com.IbisInputError):
        ibis.window(**kwargs)


@pytest.mark.parametrize(
    ('left', 'right', 'expected'),
    [
        (ibis.literal(1), ibis.literal(1.0), dt.float64),
        (ibis.literal('a'), ibis.literal('b'), dt.string),
        (ibis.literal(1.0), ibis.literal(1), dt.float64),
        (ibis.literal(1), ibis.literal(1), dt.int8),
        (ibis.literal(1), ibis.literal(1000), dt.int16),
        (ibis.literal(2**16), ibis.literal(2**17), dt.int32),
        (ibis.literal(2**50), ibis.literal(1000), dt.int64),
        (ibis.literal([1, 2]), ibis.literal([1, 2]), dt.Array(dt.int8)),
        (ibis.literal(['a']), ibis.literal([]), dt.Array(dt.string)),
        (ibis.literal([]), ibis.literal(['a']), dt.Array(dt.string)),
        (ibis.literal([]), ibis.literal([]), dt.Array(dt.null)),
    ],
)
def test_nullif_type(left, right, expected):
    assert left.nullif(right).type() == expected


@pytest.mark.parametrize(('left', 'right'),
                         [(ibis.literal(1), ibis.literal('a'))])
def test_nullif_fail(left, right):
    with pytest.raises(com.IbisTypeError):
        left.nullif(right)
    with pytest.raises(com.IbisTypeError):
Beispiel #19
0
def test_array_schema(array_types):
    assert array_types.x.type() == dt.Array(dt.int64)
    assert array_types.y.type() == dt.Array(dt.string)
    assert array_types.z.type() == dt.Array(dt.double)
Beispiel #20
0
    class FooNode(ops.ValueOp):
        value = Arg(rlz.value(dt.Array(dt.int64)))

        def output_type(self):
            return Foo
Beispiel #21
0
@pytest.mark.parametrize(
    "dtypes",
    [
        pytest.param(
            [
                obj for _, obj in inspect.getmembers(
                    dt,
                    lambda obj: isinstance(obj, dt.DataType),
                )
            ],
            id="singletons",
        ),
        pytest.param(
            dt.Array(
                dt.Struct.from_dict(
                    dict(
                        a=dt.Array(dt.string),
                        b=dt.Map(dt.string, dt.Array(dt.int64)),
                    ))),
            id="complex",
        ),
    ],
)
def test_eq_datatypes(benchmark, dtypes):
    def eq(a, b):
        assert a == b

    benchmark(eq, dtypes, copy.deepcopy(dtypes))


def multiple_joins(table, num_joins):
    for _ in range(num_joins):
Beispiel #22
0
def test_map_keys_output_type():
    mapping = ibis.literal({'a': 1, 'b': 2})
    assert mapping.keys().type() == dt.Array(dt.string)
Beispiel #23
0
def test_array_schema(array_types, column, value_type):
    assert array_types[column].type() == dt.Array(value_type)
Beispiel #24
0
def test_map_values_output_type():
    mapping = ibis.literal({'a': 1, 'b': 2})
    assert mapping.values().type() == dt.Array(dt.int8)
Beispiel #25
0
def test_columns_types_with_additional_argument(con):
    sql_types = ["toFixedString('foo', 8) AS fixedstring_col"]
    if parse_version(con.version).base_version >= '1.1.54337':
        sql_types.append(
            "toDateTime('2018-07-02 00:00:00', 'UTC') AS datetime_col")
    sql = 'SELECT {}'.format(', '.join(sql_types))
    df = con.sql(sql).execute()
    assert df.fixedstring_col.dtype.name == 'object'
    if parse_version(con.version).base_version >= '1.1.54337':
        assert df.datetime_col.dtype.name == 'datetime64[ns]'


@pytest.mark.parametrize(
    ('ch_type', 'ibis_type'),
    [
        ('Array(Int8)', dt.Array(dt.Int8(nullable=False))),
        ('Array(Int16)', dt.Array(dt.Int16(nullable=False))),
        ('Array(Int32)', dt.Array(dt.Int32(nullable=False))),
        ('Array(Int64)', dt.Array(dt.Int64(nullable=False))),
        ('Array(UInt8)', dt.Array(dt.UInt8(nullable=False))),
        ('Array(UInt16)', dt.Array(dt.UInt16(nullable=False))),
        ('Array(UInt32)', dt.Array(dt.UInt32(nullable=False))),
        ('Array(UInt64)', dt.Array(dt.UInt64(nullable=False))),
        ('Array(Float32)', dt.Array(dt.Float32(nullable=False))),
        ('Array(Float64)', dt.Array(dt.Float64(nullable=False))),
        ('Array(String)', dt.Array(dt.String(nullable=False))),
        ('Array(FixedString(32))', dt.Array(dt.String(nullable=False))),
        ('Array(Date)', dt.Array(dt.Date(nullable=False))),
        ('Array(DateTime)', dt.Array(dt.Timestamp(nullable=False))),
        ('Array(DateTime64)', dt.Array(dt.Timestamp(nullable=False))),
        ('Array(Nothing)', dt.Array(dt.Null(nullable=False))),
Beispiel #26
0
def test_literal_array():
    what = []
    expr = api.literal(what)
    assert isinstance(expr, ir.ArrayValue)
    assert expr.type().equals(dt.Array(dt.null))
Beispiel #27
0
def test_map_with_string_value_type():
    assert dt.Map('int32', 'double') == dt.Map(dt.int32, dt.double)
    assert dt.Map('int32', 'array<double>') == \
        dt.Map(dt.int32, dt.Array(dt.double))
Beispiel #28
0
 def __init__(self, value_type, *args, **kwargs):
     super(ArrayValueTyped, self).__init__(dt.Array(value_type), *args,
                                           **kwargs)
Beispiel #29
0
import pytest
import pytz
from multipledispatch.conflict import ambiguities

import ibis
import ibis.expr.datatypes as dt


def test_validate_type():
    assert dt.validate_type is dt.dtype


@pytest.mark.parametrize(
    ('spec', 'expected'),
    [
        ('ARRAY<DOUBLE>', dt.Array(dt.double)),
        ('array<array<string>>', dt.Array(dt.Array(dt.string))),
        ('map<string, double>', dt.Map(dt.string, dt.double)),
        (
            'map<int64, array<map<string, int8>>>',
            dt.Map(dt.int64, dt.Array(dt.Map(dt.string, dt.int8))),
        ),
        ('set<uint8>', dt.Set(dt.uint8)),
        ([dt.uint8], dt.Array(dt.uint8)),
        ([dt.float32, dt.float64], dt.Array(dt.float64)),
        ({dt.string}, dt.Set(dt.string)),
        ('point', dt.point),
        ('point;4326', dt.point),
        ('point;4326:geometry', dt.point),
        ('point;4326:geography', dt.point),
        ('linestring', dt.linestring),

@pytest.mark.parametrize(
    ("datatype", "expected"),
    [
        (dt.float32, "FLOAT64"),
        (dt.float64, "FLOAT64"),
        (dt.uint8, "INT64"),
        (dt.uint16, "INT64"),
        (dt.uint32, "INT64"),
        (dt.int8, "INT64"),
        (dt.int16, "INT64"),
        (dt.int32, "INT64"),
        (dt.int64, "INT64"),
        (dt.string, "STRING"),
        (dt.Array(dt.int64), "ARRAY<INT64>"),
        (dt.Array(dt.string), "ARRAY<STRING>"),
        (dt.date, "DATE"),
        (dt.timestamp, "TIMESTAMP"),
        param(
            dt.Timestamp(timezone="US/Eastern"),
            "TIMESTAMP",
        ),
    ],
)
def test_simple(datatype, expected):
    context = TypeTranslationContext()
    assert ibis_type_to_cloud_spanner_type(datatype, context) == expected


@pytest.mark.parametrize("datatype", [dt.uint64, dt.Decimal(8, 3)])