Ejemplo n.º 1
0
Archivo: types.py Proyecto: cuulee/ibis
    def _can_cast_implicit(self, typename):
        if not isinstance(typename, dt.Map):
            return False

        self_type = self.type()
        return (super(MapValue, self)._can_cast_implicit(typename)
                or self_type.equals(dt.Map(dt.null, dt.null))
                or self_type.equals(dt.Map(dt.any, dt.any)))
Ejemplo n.º 2
0
def test_struct_with_string_types():
    result = dt.Struct.from_tuples([
        ('a', 'map<double, string>'),
        ('b', 'array<map<string, array<int32>>>'),
        ('c', 'array<string>'),
        ('d', 'int8'),
    ])

    assert result == dt.Struct.from_tuples([
        ('a', dt.Map(dt.double, dt.string)),
        ('b', dt.Array(dt.Map(dt.string, dt.Array(dt.int32)))),
        ('c', dt.Array(dt.string)),
        ('d', dt.int8),
    ])
Ejemplo n.º 3
0
def higher_precedence(left, right):
    left_name = left.name.lower()
    right_name = right.name.lower()

    if (left_name in _SCALAR_TYPE_PRECEDENCE
            and right_name in _SCALAR_TYPE_PRECEDENCE):
        left_prec = _SCALAR_TYPE_PRECEDENCE[left_name]
        right_prec = _SCALAR_TYPE_PRECEDENCE[right_name]
        _, highest_type = max(((left_prec, left), (right_prec, right)),
                              key=first)
        return highest_type

    # TODO(phillipc): Ensure that left and right are API compatible

    if isinstance(left, dt.Array):
        return dt.Array(higher_precedence(left.value_type, right.value_type))

    if isinstance(left, dt.Map):
        return dt.Map(higher_precedence(left.key_type, right.key_type),
                      higher_precedence(left.value_type, right.value_type))

    if isinstance(left, dt.Struct):
        if left.names != right.names:
            raise TypeError('Struct names are not equal')
        return dt.Struct(left.names,
                         list(map(higher_precedence, left.types, right.types)))
    raise TypeError('Cannot compute precedence for {} and {} types'.format(
        left, right))
Ejemplo n.º 4
0
def from_pyarrow_map(arrow_type: pa.MapType,
                     nullable: bool = True) -> dt.DataType:
    return dt.Map(
        dt.dtype(arrow_type.key_type),
        dt.dtype(arrow_type.item_type),
        nullable=nullable,
    )
Ejemplo n.º 5
0
 def map():
     yield dt.spaceless_string("map")
     yield dt.LPAREN
     key_type = yield ty
     yield dt.COMMA
     value_type = yield ty
     yield dt.RPAREN
     return dt.Map(key_type, value_type, nullable=False)
Ejemplo n.º 6
0
def test_complex_datatype_parse(benchmark):
    type_str = "array<struct<a: array<string>, b: map<string, array<int64>>>>"
    expected = dt.Array(
        dt.Struct.from_dict(
            dict(a=dt.Array(dt.string),
                 b=dt.Map(dt.string, dt.Array(dt.int64)))))
    assert dt.parse(type_str) == expected
    benchmark(dt.parse, type_str)
Ejemplo n.º 7
0
def infer_literal_type(value):
    import ibis.expr.rules as rules

    if value is None or value is null:
        return dt.null
    elif isinstance(value, bool):
        return dt.boolean
    elif isinstance(value, compat.integer_types):
        return rules.int_literal_class(value)
    elif isinstance(value, float):
        return dt.double
    elif isinstance(value, six.string_types):
        return dt.string
    elif isinstance(value, datetime.timedelta):
        return dt.interval
    elif isinstance(value, datetime.datetime):
        return dt.timestamp
    elif isinstance(value, datetime.date):
        return dt.date
    elif isinstance(value, datetime.time):
        return dt.time
    elif isinstance(value, list):
        if not value:
            return dt.Array(dt.null)
        return dt.Array(
            rules.highest_precedence_type(list(map(literal, value))))
    elif isinstance(value, collections.OrderedDict):
        if not value:
            raise TypeError('Empty struct type not supported')
        return dt.Struct(
            list(value.keys()),
            [literal(element).type() for element in value.values()],
        )
    elif isinstance(value, dict):
        if not value:
            return dt.Map(dt.null, dt.null)
        return dt.Map(
            rules.highest_precedence_type(list(map(literal, value.keys()))),
            rules.highest_precedence_type(list(map(literal, value.values()))),
        )

    raise com.InputTypeError(value)
Ejemplo n.º 8
0
import ibis
import ibis.expr.datatypes as dt
from ibis.common.exceptions import IbisTypeError


def test_validate_type():
    assert dt.validate_type is dt.dtype


@pytest.mark.parametrize(
    ('spec', 'expected'),
    [
        ('ARRAY<DOUBLE>', dt.Array(dt.double)),
        ('array<array<string>>', dt.Array(dt.Array(dt.string))),
        ('map<string, double>', dt.Map(dt.string, dt.double)),
        (
            'map<int64, array<map<string, int8>>>',
            dt.Map(dt.int64, dt.Array(dt.Map(dt.string, dt.int8))),
        ),
        ('set<uint8>', dt.Set(dt.uint8)),
        ([dt.uint8], dt.Array(dt.uint8)),
        ([dt.float32, dt.float64], dt.Array(dt.float64)),
        ({dt.string}, dt.Set(dt.string)),
        ('point', dt.point),
        ('point;4326', dt.point),
        ('point;4326:geometry', dt.point),
        ('point;4326:geography', dt.point),
        ('linestring', dt.linestring),
        ('linestring;4326', dt.linestring),
        ('linestring;4326:geometry', dt.linestring),
Ejemplo n.º 9
0
import ibis
import ibis.expr.datatypes as dt
from ibis.common.exceptions import IbisTypeError


def test_validate_type():
    assert dt.validate_type is dt.dtype


@pytest.mark.parametrize(
    ('spec', 'expected'),
    [
        ('ARRAY<DOUBLE>', dt.Array(dt.double)),
        ('array<array<string>>', dt.Array(dt.Array(dt.string))),
        ('map<string, double>', dt.Map(dt.string, dt.double)),
        (
            'map<int64, array<map<string, int8>>>',
            dt.Map(dt.int64, dt.Array(dt.Map(dt.string, dt.int8))),
        ),
        ('set<uint8>', dt.Set(dt.uint8)),
        ([dt.uint8], dt.Array(dt.uint8)),
        ([dt.float32, dt.float64], dt.Array(dt.float64)),
        ({dt.string}, dt.Set(dt.string)),
        ('point', dt.point),
        ('point;4326', dt.point),
        ('point;4326:geometry', dt.point),
        ('point;4326:geography', dt.point),
        ('linestring', dt.linestring),
        ('linestring;4326', dt.linestring),
        ('linestring;4326:geometry', dt.linestring),
Ejemplo n.º 10
0
    [
        pytest.param(dt.int8, 26, ibis.literal(26)),
        pytest.param(dt.int16, 26, ibis.literal(26)),
        pytest.param(dt.int32, 26, ibis.literal(26)),
        pytest.param(dt.int64, 26, ibis.literal(26)),
        pytest.param(dt.uint8, 26, ibis.literal(26)),
        pytest.param(dt.uint16, 26, ibis.literal(26)),
        pytest.param(dt.uint32, 26, ibis.literal(26)),
        pytest.param(dt.uint64, 26, ibis.literal(26)),
        pytest.param(dt.float32, 26, ibis.literal(26)),
        pytest.param(dt.float64, 26.4, ibis.literal(26.4)),
        pytest.param(dt.double, 26.3, ibis.literal(26.3)),
        pytest.param(dt.string, 'bar', ibis.literal('bar')),
        pytest.param(dt.Array(dt.float), [3.4, 5.6], ibis.literal([3.4, 5.6])),
        pytest.param(
            dt.Map(dt.string, dt.Array(dt.boolean)),
            {
                'a': [True, False],
                'b': [True]
            },
            ibis.literal({
                'a': [True, False],
                'b': [True]
            }),
            id='map_literal',
        ),
    ],
)
def test_valid_value(dtype, value, expected):
    result = rlz.value(dtype, value)
    assert result.equals(expected)
Ejemplo n.º 11
0
def test_nested_map():
    assert dt.validate_type('map<int64, array<map<string, int8>>>') == dt.Map(
        dt.int64, dt.Array(dt.Map(dt.string, dt.int8)))
Ejemplo n.º 12
0
def test_nested_map():
    expected = dt.Map(dt.int64, dt.Array(dt.Map(dt.string, dt.int8)))
    assert dt.dtype('map<int64, array<map<string, int8>>>') == expected
Ejemplo n.º 13
0
    C=dt.Timestamp('UTC'),
    D=dt.Timestamp('UTC'),
    E=dt.int8,
    F=dt.int8,
    G=dt.uint64,
    H=dt.uint32,
    I=dt.uint16,
    J=dt.uint8,
    K=dt.uuid,
    L=dt.string,
    M=dt.string,
    N=dt.string,
    O=dt.string,
    P=dt.string,
    Q=dt.Array(dt.int32),
    R=dt.Map(dt.string, dt.int64),
    S=dt.Struct.from_dict(
        dict(
            a=dt.int32,
            b=dt.string,
            c=dt.Array(dt.Map(dt.string, dt.Array(dt.float64))),
        )
    ),
)


@pytest.mark.parametrize(
    ("column", "type"),
    [
        param(colname, type, id=type.lower())
        for colname, type in [
Ejemplo n.º 14
0
def spark_map_dtype_to_ibis_dtype(spark_type_obj, nullable=True):
    key_type = dt.dtype(spark_type_obj.keyType)
    value_type = dt.dtype(spark_type_obj.valueType,
                          nullable=spark_type_obj.valueContainsNull)
    return dt.Map(key_type, value_type, nullable=nullable)
Ejemplo n.º 15
0
def test_complex_datatype_builtins(benchmark, func):
    datatype = dt.Array(
        dt.Struct.from_dict(
            dict(a=dt.Array(dt.string),
                 b=dt.Map(dt.string, dt.Array(dt.int64)))))
    benchmark(func, datatype)
Ejemplo n.º 16
0
    [
        pytest.param(
            [
                obj for _, obj in inspect.getmembers(
                    dt,
                    lambda obj: isinstance(obj, dt.DataType),
                )
            ],
            id="singletons",
        ),
        pytest.param(
            dt.Array(
                dt.Struct.from_dict(
                    dict(
                        a=dt.Array(dt.string),
                        b=dt.Map(dt.string, dt.Array(dt.int64)),
                    ))),
            id="complex",
        ),
    ],
)
def test_eq_datatypes(benchmark, dtypes):
    def eq(a, b):
        assert a == b

    benchmark(eq, dtypes, copy.deepcopy(dtypes))


def multiple_joins(table, num_joins):
    for _ in range(num_joins):
        table = table.mutate(dummy=ibis.literal(""))
Ejemplo n.º 17
0
 def _validate(self, args, i):
     arg = super(MapValueTyped, self)._validate(args, i)
     type, = self.types
     if arg.type().equals(dt.Map(dt.any, dt.any)):
         return arg.cast(type)
     return arg
Ejemplo n.º 18
0
 def __init__(self, key_type, value_type, *args, **kwargs):
     super(MapValueTyped, self).__init__(dt.Map(key_type, value_type),
                                         *args, **kwargs)
Ejemplo n.º 19
0
def test_scalar_param_map(backend, con):
    value = {'a': 'ghi', 'b': 'def', 'c': 'abc'}
    param = ibis.param(dt.Map(dt.string, dt.string))
    result = con.execute(param['b'], params={param: value})
    assert result == value['b']
Ejemplo n.º 20
0
def test_array_with_string_value_type():
    assert dt.Array('int32') == dt.Array(dt.int32)
    assert dt.Array(dt.Array('array<map<string, double>>')) == (dt.Array(
        dt.Array(dt.Array(dt.Map(dt.string, dt.double)))))
Ejemplo n.º 21
0
def test_map_with_string_value_type():
    assert dt.Map('int32', 'double') == dt.Map(dt.int32, dt.double)
    assert dt.Map('int32', 'array<double>') == dt.Map(dt.int32,
                                                      dt.Array(dt.double))
Ejemplo n.º 22
0
    ('dtype', 'value', 'expected'),
    [
        pytest.param(dt.int8, 26, ibis.literal(26)),
        pytest.param(dt.int16, 26, ibis.literal(26)),
        pytest.param(dt.int32, 26, ibis.literal(26)),
        pytest.param(dt.int64, 26, ibis.literal(26)),
        pytest.param(dt.uint8, 26, ibis.literal(26)),
        pytest.param(dt.uint16, 26, ibis.literal(26)),
        pytest.param(dt.uint32, 26, ibis.literal(26)),
        pytest.param(dt.uint64, 26, ibis.literal(26)),
        pytest.param(dt.float32, 26, ibis.literal(26)),
        pytest.param(dt.float64, 26.4, ibis.literal(26.4)),
        pytest.param(dt.double, 26.3, ibis.literal(26.3)),
        pytest.param(dt.string, 'bar', ibis.literal('bar')),
        pytest.param(dt.Array(dt.float), [3.4, 5.6], ibis.literal([3.4, 5.6])),
        pytest.param(dt.Map(dt.string, dt.Array(dt.boolean)), {
            'a': [True, False],
            'b': [True]
        },
                     ibis.literal({
                         'a': [True, False],
                         'b': [True]
                     }),
                     id='map_literal'),
    ],
)
def test_valid_value(dtype, value, expected):
    result = rlz.value(dtype, value)
    assert result.equals(expected)

Ejemplo n.º 23
0
def df3(npartitions):
    pandas_df = pd.DataFrame({
        'key': list('ac'),
        'other_value': [4.0, 6.0],
        'key2': list('ae'),
        'key3': list('fe'),
    })
    return dd.from_pandas(pandas_df, npartitions=npartitions)


t_schema = {
    'decimal': dt.Decimal(4, 3),
    'array_of_float64': dt.Array(dt.double),
    'array_of_int64': dt.Array(dt.int64),
    'array_of_strings': dt.Array(dt.string),
    'map_of_strings_integers': dt.Map(dt.string, dt.int64),
    'map_of_integers_strings': dt.Map(dt.int64, dt.string),
    'map_of_complex_values': dt.Map(dt.string, dt.Array(dt.int64)),
}


@pytest.fixture(scope='module')
def t(client):
    return client.table('df', schema=t_schema)


@pytest.fixture(scope='module')
def lahman(batting_df, awards_players_df):
    return connect({
        'batting': batting_df,
        'awards_players': awards_players_df
Ejemplo n.º 24
0
def test_map_get_with_compatible_value_bigger():
    value = ibis.literal({'A': 1, 'B': 2})
    expr = value.get('C', 3000)
    assert value.type() == dt.Map(dt.string, dt.int8)
    assert expr.type() == dt.int16
Ejemplo n.º 25
0
decimal = value(dt.Decimal)
floating = value(dt.float64)
date = value(dt.date)
time = value(dt.time)
timestamp = value(dt.Timestamp)
category = value(dt.category)
temporal = one_of([timestamp, date, time])

strict_numeric = one_of([integer, floating, decimal])
soft_numeric = one_of([integer, floating, decimal, boolean])
numeric = soft_numeric

set_ = value(dt.Set)
array = value(dt.Array)
struct = value(dt.Struct)
mapping = value(dt.Map(dt.any, dt.any))

geospatial = value(dt.GeoSpatial)
point = value(dt.Point)
linestring = value(dt.LineString)
polygon = value(dt.Polygon)
multilinestring = value(dt.MultiLineString)
multipoint = value(dt.MultiPoint)
multipolygon = value(dt.MultiPolygon)


@validator
def interval(arg, units=None):
    arg = value(dt.Interval, arg)
    unit = arg.type().unit
    if units is not None and unit not in units:
Ejemplo n.º 26
0
        # pandas types
        (pd.Timestamp('2015-01-01 12:00:00',
                      tz='US/Eastern'), dt.Timestamp('US/Eastern')),

        # parametric types
        (list('abc'), dt.Array(dt.string)),
        ([1, 2, 3], dt.Array(dt.int8)),
        ([1, 128], dt.Array(dt.int16)),
        ([1, 128, 32768], dt.Array(dt.int32)),
        ([1, 128, 32768, 2147483648], dt.Array(dt.int64)),
        ({
            'a': 1,
            'b': 2,
            'c': 3
        }, dt.Map(dt.string, dt.int8)),
        ({
            1: 2,
            3: 4,
            5: 6
        }, dt.Map(dt.int8, dt.int8)),
        ({
            'a': [1.0, 2.0],
            'b': [],
            'c': [3.0]
        }, dt.Map(dt.string, dt.Array(dt.double))),
        (OrderedDict([('a', 1), ('b', list('abc')),
                      ('c', OrderedDict([('foo', [1.0, 2.0])]))]),
         dt.Struct.from_tuples(
             [('a', dt.int8), ('b', dt.Array(dt.string)),
              ('c', dt.Struct.from_tuples([('foo', dt.Array(dt.double))]))]))
Ejemplo n.º 27
0
def test_map_get_with_compatible_value_smaller():
    value = ibis.literal({'A': 1000, 'B': 2000})
    expr = value.get('C', 3)
    assert value.type() == dt.Map(dt.string, dt.int16)
    assert expr.type() == dt.int16
Ejemplo n.º 28
0
def test_whole_schema():
    customers = ibis.table(
        [('cid', 'int64'), ('mktsegment', 'string'),
         ('address', ('struct<city: string, street: string, '
                      'street_number: int32, zip: int16>')),
         ('phone_numbers', 'array<string>'),
         ('orders', """array<struct<
                                oid: int64,
                                status: string,
                                totalprice: decimal(12, 2),
                                order_date: string,
                                items: array<struct<
                                    iid: int64,
                                    name: string,
                                    price: decimal(12, 2),
                                    discount_perc: decimal(12, 2),
                                    shipdate: string
                                >>
                            >>"""),
         ('web_visits', ('map<string, struct<user_agent: string, '
                         'client_ip: string, visit_date: string, '
                         'duration_ms: int32>>')),
         ('support_calls', ('array<struct<agent_id: int64, '
                            'call_date: string, duration_ms: int64, '
                            'issue_resolved: boolean, '
                            'agent_comment: string>>'))],
        name='customers',
    )
    expected = ibis.Schema.from_tuples([
        ('cid', dt.int64),
        ('mktsegment', dt.string),
        (
            'address',
            dt.Struct.from_tuples([('city', dt.string), ('street', dt.string),
                                   ('street_number', dt.int32),
                                   ('zip', dt.int16)]),
        ),
        ('phone_numbers', dt.Array(dt.string)),
        ('orders',
         dt.Array(
             dt.Struct.from_tuples([('oid', dt.int64), ('status', dt.string),
                                    ('totalprice', dt.Decimal(12, 2)),
                                    ('order_date', dt.string),
                                    ('items',
                                     dt.Array(
                                         dt.Struct.from_tuples([
                                             ('iid', dt.int64),
                                             ('name', dt.string),
                                             ('price', dt.Decimal(12, 2)),
                                             ('discount_perc', dt.Decimal(
                                                 12, 2)),
                                             ('shipdate', dt.string),
                                         ])))]))),
        ('web_visits',
         dt.Map(
             dt.string,
             dt.Struct.from_tuples([
                 ('user_agent', dt.string),
                 ('client_ip', dt.string),
                 ('visit_date', dt.string),
                 ('duration_ms', dt.int32),
             ]))),
        ('support_calls',
         dt.Array(
             dt.Struct.from_tuples([('agent_id', dt.int64),
                                    ('call_date', dt.string),
                                    ('duration_ms', dt.int64),
                                    ('issue_resolved', dt.boolean),
                                    ('agent_comment', dt.string)]))),
    ], )
    assert customers.schema() == expected
Ejemplo n.º 29
0
def test_map_get_with_null_on_not_nullable(null_value):
    map_type = dt.Map(dt.string, dt.Int16(nullable=False))
    value = ibis.literal({'A': 1000, 'B': 2000}).cast(map_type)
    assert value.type() == map_type
    with pytest.raises(IbisTypeError):
        assert value.get('C', null_value)
Ejemplo n.º 30
0
def test_map():
    assert dt.validate_type('map<string, double>') == dt.Map(
        dt.string, dt.double)