Beispiel #1
0
def test_interval_unvalid_unit(unit):
    definition = "interval('{}')".format(unit)

    with pytest.raises(ValueError):
        dt.validate_type(definition)

    with pytest.raises(ValueError):
        dt.Interval(dt.int32, unit)
Beispiel #2
0
def test_interval(unit):
    definition = "interval('{}')".format(unit)
    dt.Interval(unit, dt.int32) == dt.validate_type(definition)

    definition = "interval<uint16>('{}')".format(unit)
    dt.Interval(unit, dt.uint16) == dt.validate_type(definition)

    definition = "interval<int64>('{}')".format(unit)
    dt.Interval(unit, dt.int64) == dt.validate_type(definition)
Beispiel #3
0
    def __init__(self, hdfs_file, input_type, output_type, name=None):
        file_suffix = hdfs_file[-3:]
        if not (file_suffix == '.so' or file_suffix == '.ll'):
            raise ValueError('Invalid file type. Must be .so or .ll ')
        self.hdfs_file = hdfs_file
        inputs = [validate_type(x) for x in input_type]
        output = validate_type(output_type)
        new_name = name
        if not name:
            string = self.so_symbol
            for in_type in inputs:
                string += in_type.name()
            new_name = sha1(string).hexdigest()

        UDFInfo.__init__(self, inputs, output, new_name)
Beispiel #4
0
def param(type, name=None):
    """Create a parameter of a particular type to be defined just before
    execution.

    Parameters
    ----------
    type : dt.DataType
        The type of the unbound parameter, e.g., double, int64, date, etc.
    name : str, optional
        The name of the parameter

    Returns
    -------
    ScalarExpr

    Examples
    --------
    >>> import ibis
    >>> import ibis.expr.datatypes as dt
    >>> start = ibis.param(dt.date)
    >>> end = ibis.param(dt.date)
    >>> schema = [('timestamp_col', 'timestamp'), ('value', 'double')]
    >>> t = ibis.table(schema)
    >>> predicates = [t.timestamp_col >= start, t.timestamp_col <= end]
    >>> expr = t.filter(predicates).value.sum()
    """
    if name is None:
        name = _parameter_name()
    expr = ScalarParameter(dt.validate_type(type), name=name).to_expr()
    return expr.name(name)
Beispiel #5
0
def test_struct():
    orders = """array<struct<
                    oid: int64,
                    status: string,
                    totalprice: decimal(12, 2),
                    order_date: string,
                    items: array<struct<
                        iid: int64,
                        name: string,
                        price: decimal(12, 2),
                        discount_perc: decimal(12, 2),
                        shipdate: string
                    >>
                >>"""
    expected = dt.Array(dt.Struct.from_tuples([
        ('oid', dt.int64),
        ('status', dt.string),
        ('totalprice', dt.Decimal(12, 2)),
        ('order_date', dt.string),
        (
            'items',
            dt.Array(dt.Struct.from_tuples([
                ('iid', dt.int64),
                ('name', dt.string),
                ('price', dt.Decimal(12, 2)),
                ('discount_perc', dt.Decimal(12, 2)),
                ('shipdate', dt.string),
            ]))
        )
    ]))

    assert dt.validate_type(orders) == expected
Beispiel #6
0
def test_struct():
    orders = """array<struct<
                    oid: int64,
                    status: string,
                    totalprice: decimal(12, 2),
                    order_date: string,
                    items: array<struct<
                        iid: int64,
                        name: string,
                        price: decimal(12, 2),
                        discount_perc: decimal(12, 2),
                        shipdate: string
                    >>
                >>"""
    expected = dt.Array(
        dt.Struct.from_tuples([('oid', dt.int64), ('status', dt.string),
                               ('totalprice', dt.Decimal(12, 2)),
                               ('order_date', dt.string),
                               ('items',
                                dt.Array(
                                    dt.Struct.from_tuples([
                                        ('iid', dt.int64),
                                        ('name', dt.string),
                                        ('price', dt.Decimal(12, 2)),
                                        ('discount_perc', dt.Decimal(12, 2)),
                                        ('shipdate', dt.string),
                                    ])))]))

    assert dt.validate_type(orders) == expected
Beispiel #7
0
def test_literal_complex_types(value, expected_type, expected_class):
    expr = ibis.literal(value)
    expr_type = expr.type()
    assert expr_type.equals(dt.validate_type(expected_type))
    assert isinstance(expr, expected_class)
    assert isinstance(expr.op(), ops.Literal)
    assert expr.op().value is value
Beispiel #8
0
def pandas_dtypes_to_ibis_schema(df, schema):
    dtypes = df.dtypes

    pairs = []

    for column_name, dtype in dtypes.iteritems():
        if not isinstance(column_name, six.string_types):
            raise TypeError(
                'Column names must be strings to use the pandas backend')

        if column_name in schema:
            ibis_type = dt.validate_type(schema[column_name])
        elif dtype == np.object_:
            inferred_dtype = infer_dtype(df[column_name].dropna())

            if inferred_dtype == 'mixed':
                raise TypeError(
                    'Unable to infer type of column {0!r}. Try instantiating '
                    'your table from the client with client.table('
                    "'my_table', schema={{{0!r}: <explicit type>}})".format(
                        column_name))
            ibis_type = _INFERRED_DTYPE_TO_IBIS_TYPE[inferred_dtype]
        elif hasattr(dtype, 'tz'):
            ibis_type = dt.Timestamp(str(dtype.tz))
        else:
            dtype_string = str(dtype)
            ibis_type = _DTYPE_TO_IBIS_TYPE.get(dtype_string, dtype_string)

        pairs.append((column_name, ibis_type))
    return ibis.schema(pairs)
Beispiel #9
0
    def test_udf_primitive_output_types(self):
        types = [
            ('boolean', True, self.b),
            ('int8', 1, self.i8),
            ('int16', 1, self.i16),
            ('int32', 1, self.i32),
            ('int64', 1, self.i64),
            ('float', 1.0, self.f),
            ('double', 1.0, self.d),
            ('string', '1', self.s),
            ('timestamp', ibis.timestamp('1961-04-10'), self.t),
        ]
        for t, sv, av in types:
            func = self._register_udf([t], t, 'test')

            ibis_type = dt.validate_type(t)

            expr = func(sv)
            assert type(expr) == type(
                ibis_type.scalar_type()(expr.op())
            )  # noqa: E501, E721
            expr = func(av)
            assert type(expr) == type(
                ibis_type.column_type()(expr.op())
            )  # noqa: E501, E721
Beispiel #10
0
    def test_udf_primitive_output_types(self):
        types = [
            ('boolean', True, self.b),
            ('int8', 1, self.i8),
            ('int16', 1, self.i16),
            ('int32', 1, self.i32),
            ('int64', 1, self.i64),
            ('float', 1.0, self.f),
            ('double', 1.0, self.d),
            ('string', '1', self.s),
            ('timestamp', ibis.timestamp('1961-04-10'), self.t),
        ]
        for t, sv, av in types:
            func = self._register_udf([t], t, 'test')

            ibis_type = dt.validate_type(t)

            expr = func(sv)
            assert type(expr) == type(
                ibis_type.scalar_type()(expr.op())
            )  # noqa: E501, E721
            expr = func(av)
            assert type(expr) == type(
                ibis_type.column_type()(expr.op())
            )  # noqa: E501, E721
Beispiel #11
0
def _to_input_sig(inputs):
    if isinstance(inputs, rules.TypeSignature):
        return inputs
    else:
        in_type = [validate_type(x) for x in inputs]
        return rules.TypeSignature([rules.value_typed_as(x)
                                    for x in in_type])
Beispiel #12
0
def test_literal_complex_types(value, expected_type, expected_class):
    expr = ibis.literal(value)
    expr_type = expr.type()
    assert expr_type.equals(dt.validate_type(expected_type))
    assert isinstance(expr, expected_class)
    assert isinstance(expr.op(), ops.Literal)
    assert expr.op().value is value
Beispiel #13
0
def _to_input_sig(inputs):
    if isinstance(inputs, rules.TypeSignature):
        return inputs
    else:
        in_type = [validate_type(x) for x in inputs]
        return rules.TypeSignature([rules.value_typed_as(x)
                                    for x in in_type])
Beispiel #14
0
    def __init__(self, hdfs_file, input_type,
                 output_type, name=None):
        file_suffix = hdfs_file[-3:]
        if not(file_suffix == '.so' or file_suffix == '.ll'):
            raise ValueError('Invalid file type. Must be .so or .ll ')
        self.hdfs_file = hdfs_file
        inputs = [validate_type(x) for x in input_type]
        output = validate_type(output_type)
        new_name = name
        if not name:
            string = self.so_symbol
            for in_type in inputs:
                string += in_type.name()
            new_name = sha1(string).hexdigest()

        UDFInfo.__init__(self, inputs, output, new_name)
Beispiel #15
0
def _ibis_string_to_impala(tval):
    from ibis.backends.base_sql import sql_type_names

    if tval in sql_type_names:
        return sql_type_names[tval]
    result = dt.validate_type(tval)
    if result:
        return repr(result)
Beispiel #16
0
def _ibis_string_to_impala(tval):
    from ibis.impala.compiler import _sql_type_names

    if tval in _sql_type_names:
        return _sql_type_names[tval]
    result = dt.validate_type(tval)
    if result:
        return repr(result)
Beispiel #17
0
    def _validate(self, args, i):
        arg = args[i]

        if isinstance(arg, py_string):
            arg = arg.lower()

        arg = args[i] = dt.validate_type(arg)
        return arg
Beispiel #18
0
def _ibis_string_to_impala(tval):
    from ibis.impala.compiler import _sql_type_names

    if tval in _sql_type_names:
        return _sql_type_names[tval]
    result = dt.validate_type(tval)
    if result:
        return repr(result)
Beispiel #19
0
    def _validate(self, args, i):
        arg = args[i]

        if isinstance(arg, py_string):
            arg = arg.lower()

        arg = args[i] = dt.validate_type(arg)
        return arg
Beispiel #20
0
def _impala_signature(sig):
    if isinstance(sig, rules.TypeSignature):
        if isinstance(sig, rules.VarArgs):
            val = _arg_to_string(sig.arg_type)
            return '{0}...'.format(val)
        else:
            return ', '.join([_arg_to_string(arg) for arg in sig.types])
    else:
        return ', '.join([_type_to_sql_string(validate_type(x)) for x in sig])
Beispiel #21
0
def _arg_to_string(arg):
    if isinstance(arg, rules.ValueTyped):
        types = arg.types
        if len(types) > 1:
            raise NotImplementedError
        return _type_to_sql_string(types[0])
    elif isinstance(arg, py_string):
        return _type_to_sql_string(validate_type(arg))
    else:
        raise NotImplementedError
Beispiel #22
0
def _arg_to_string(arg):
    if isinstance(arg, rules.ValueTyped):
        types = arg.types
        if len(types) > 1:
            raise NotImplementedError
        return _type_to_sql_string(types[0])
    elif isinstance(arg, py_string):
        return _type_to_sql_string(validate_type(arg))
    else:
        raise NotImplementedError
Beispiel #23
0
def test_udf_primitive_output_types(ty, value, column, table):
    func = _register_udf([ty], ty, 'test')

    ibis_type = dt.validate_type(ty)

    expr = func(value)
    assert type(expr) == ibis_type.scalar

    expr = func(table[column])
    assert type(expr) == ibis_type.column
Beispiel #24
0
def test_uda_primitive_output_types(ty, value):
    func = _register_uda([ty], ty, 'test')

    ibis_type = dt.validate_type(ty)

    expr1 = func(value)
    assert isinstance(expr1, ibis_type.scalar)

    expr2 = func(value)
    assert isinstance(expr2, ibis_type.scalar)
Beispiel #25
0
def _impala_signature(sig):
    if isinstance(sig, rules.TypeSignature):
        if isinstance(sig, rules.VarArgs):
            val = _arg_to_string(sig.arg_type)
            return '{0}...'.format(val)
        else:
            return ', '.join([_arg_to_string(arg) for arg in sig.types])
    else:
        return ', '.join([_type_to_sql_string(validate_type(x))
                          for x in sig])
Beispiel #26
0
def test_uda_primitive_output_types(ty, value, table):
    func = _register_uda([ty], ty, 'test')

    ibis_type = dt.validate_type(ty)

    expr1 = func(value)
    expr2 = func(value)
    expected_type1 = type(ibis_type.scalar_type()(expr1.op()))
    expected_type2 = type(ibis_type.scalar_type()(expr2.op()))
    assert isinstance(expr1, expected_type1)
    assert isinstance(expr2, expected_type2)
Beispiel #27
0
def test_udf_primitive_output_types(ty, value, column, table):
    func = _register_udf([ty], ty, 'test')

    ibis_type = dt.validate_type(ty)

    expr = func(value)
    assert type(expr) == type(  # noqa: E501, E721
        ibis_type.scalar_type()(expr.op()))
    expr = func(table[column])
    assert type(expr) == type(  # noqa: E501, E721
        ibis_type.column_type()(expr.op()))
Beispiel #28
0
def int_literal_class(value, allow_overflow=False):
    if -128 <= value <= 127:
        t = 'int8'
    elif -32768 <= value <= 32767:
        t = 'int16'
    elif -2147483648 <= value <= 2147483647:
        t = 'int32'
    else:
        if value < -9223372036854775808 or value > 9223372036854775807:
            if not allow_overflow:
                raise OverflowError(value)
        t = 'int64'
    return dt.validate_type(t)
Beispiel #29
0
def int_literal_class(value, allow_overflow=False):
    if -128 <= value <= 127:
        t = 'int8'
    elif -32768 <= value <= 32767:
        t = 'int16'
    elif -2147483648 <= value <= 2147483647:
        t = 'int32'
    else:
        if value < -9223372036854775808 or value > 9223372036854775807:
            if not allow_overflow:
                raise OverflowError(value)
        t = 'int64'
    return dt.validate_type(t)
Beispiel #30
0
def literal(value, type=None):
    """Create a scalar expression from a Python value.

    Parameters
    ----------
    value : some Python basic type
        A Python value
    type : ibis type or string, optional
        An instance of :class:`ibis.expr.datatypes.DataType` or a string
        indicating the ibis type of `value`. This parameter should only be used
        in cases where ibis's type inference isn't sufficient for discovering
        the type of `value`.

    Returns
    -------
    literal_value : Literal
        An expression representing a literal value

    Examples
    --------
    >>> import ibis
    >>> x = ibis.literal(42)
    >>> x.type()
    int8
    >>> y = ibis.literal(42, type='double')
    >>> y.type()
    double
    >>> ibis.literal('foobar', type='int64')  # doctest: +ELLIPSIS
    Traceback (most recent call last):
      ...
    TypeError: Value 'foobar' cannot be safely coerced to int64
    """
    if hasattr(value, 'op') and isinstance(value.op(), Literal):
        return value

    if type is None:
        type = infer_literal_type(value)
    else:
        type = dt.validate_type(type)

    if not type.valid_literal(value):
        raise TypeError('Value {!r} cannot be safely coerced to {}'.format(
            value, type))

    if value is None or value is _NULL or value is null:
        result = null().cast(type)
    else:
        result = Literal(value, type=type).to_expr()
    return result
Beispiel #31
0
    def test_uda_primitive_output_types(self):
        types = [('boolean', True, self.b), ('int8', 1, self.i8),
                 ('int16', 1, self.i16), ('int32', 1, self.i32),
                 ('int64', 1, self.i64), ('float', 1.0, self.f),
                 ('double', 1.0, self.d), ('string', '1', self.s),
                 ('timestamp', ibis.timestamp('1961-04-10'), self.t)]
        for t, sv, av in types:
            func = self._register_uda([t], t, 'test')

            ibis_type = validate_type(t)

            expr1 = func(sv)
            expr2 = func(sv)
            assert isinstance(expr1, ibis_type.scalar_type())
            assert isinstance(expr2, ibis_type.scalar_type())
Beispiel #32
0
def parse_type(t):
    t = t.lower()
    if t in _impala_to_ibis_type:
        return _impala_to_ibis_type[t]
    else:
        if 'varchar' in t or 'char' in t:
            return 'string'
        elif 'decimal' in t:
            result = validate_type(t)
            if result:
                return t
            else:
                return ValueError(t)
        else:
            raise Exception(t)
Beispiel #33
0
def parse_type(t):
    t = t.lower()
    if t in _impala_to_ibis_type:
        return _impala_to_ibis_type[t]
    else:
        if 'varchar' in t or 'char' in t:
            return 'string'
        elif 'decimal' in t:
            result = validate_type(t)
            if result:
                return t
            else:
                return ValueError(t)
        else:
            raise Exception(t)
Beispiel #34
0
    def test_uda_primitive_output_types(self):
        types = [
            ('boolean', True, self.b),
            ('int8', 1, self.i8),
            ('int16', 1, self.i16),
            ('int32', 1, self.i32),
            ('int64', 1, self.i64),
            ('float', 1.0, self.f),
            ('double', 1.0, self.d),
            ('string', '1', self.s),
            ('timestamp', ibis.timestamp('1961-04-10'), self.t)
        ]
        for t, sv, av in types:
            func = self._register_uda([t], t, 'test')

            ibis_type = validate_type(t)

            expr1 = func(sv)
            expr2 = func(sv)
            assert isinstance(expr1, ibis_type.scalar_type())
            assert isinstance(expr2, ibis_type.scalar_type())
Beispiel #35
0
def test_struct():
    orders = """array<struct<
                    oid: int64,
                    status: string,
                    totalprice: decimal(12, 2),
                    order_date: string,
                    items: array<struct<
                        iid: int64,
                        name: string,
                        price: decimal(12, 2),
                        discount_perc: decimal(12, 2),
                        shipdate: string
                    >>
                >>"""
    expected = dt.Array(
        dt.Struct.from_tuples(
            [
                ("oid", dt.int64),
                ("status", dt.string),
                ("totalprice", dt.Decimal(12, 2)),
                ("order_date", dt.string),
                (
                    "items",
                    dt.Array(
                        dt.Struct.from_tuples(
                            [
                                ("iid", dt.int64),
                                ("name", dt.string),
                                ("price", dt.Decimal(12, 2)),
                                ("discount_perc", dt.Decimal(12, 2)),
                                ("shipdate", dt.string),
                            ]
                        )
                    ),
                ),
            ]
        )
    )

    assert dt.validate_type(orders) == expected
Beispiel #36
0
 def _to_type(x):
     ibis_type = udf._impala_type_to_ibis(x.lower())
     return validate_type(ibis_type)
Beispiel #37
0
def test_nested_array():
    assert dt.validate_type("array<array<string>>") == dt.Array(dt.Array(dt.string))
Beispiel #38
0
def test_literal_with_explicit_type(value, expected_type):
    expr = ibis.literal(value, type=expected_type)
    assert expr.type().equals(dt.validate_type(expected_type))
Beispiel #39
0
 def f(self):
     if isinstance(rule, dt.DataType):
         t = rule
     else:
         t = dt.validate_type(rule(self))
     return t.array_type()
Beispiel #40
0
def shape_like(arg, out_type):
    out_type = dt.validate_type(out_type)
    if isinstance(arg, ir.ScalarExpr):
        return out_type.scalar_type()
    else:
        return out_type.array_type()
Beispiel #41
0
 def _type_signature(self, inputs, output):
     input_type = _to_input_sig(inputs)
     output = validate_type(output)
     output_type = rules.shape_like_flatargs(output)
     return input_type, output_type
Beispiel #42
0
def test_char_varchar(spec):
    assert dt.validate_type(spec) == dt.string
Beispiel #43
0
 def f(self):
     t = dt.validate_type(rule(self))
     return t.scalar_type()
Beispiel #44
0
def test_literal_with_explicit_type(value, expected_type):
    expr = ibis.literal(value, type=expected_type)
    assert expr.type().equals(dt.validate_type(expected_type))
Beispiel #45
0
 def _type_signature(self, inputs, output):
     input_type = _to_input_sig(inputs)
     output = validate_type(output)
     output_type = rules.scalar_output(output)
     return input_type, output_type
Beispiel #46
0
 def _type_signature(self, inputs, output):
     input_type = _to_input_sig(inputs)
     output = validate_type(output)
     output_type = rules.shape_like_flatargs(output)
     return input_type, output_type
Beispiel #47
0
def test_primitive(spec):
    assert dt.validate_type(spec) == dt._primitive_types[spec]
Beispiel #48
0
 def f(self):
     if isinstance(rule, dt.DataType):
         t = rule
     else:
         t = dt.validate_type(rule(self))
     return t.array_type()
Beispiel #49
0
 def _type_signature(self, inputs, output):
     input_type = _to_input_sig(inputs)
     output = validate_type(output)
     output_type = rules.scalar_output(output)
     return input_type, output_type
Beispiel #50
0
def test_char_varchar_invalid(spec):
    with pytest.raises(SyntaxError):
        dt.validate_type(spec)
Beispiel #51
0
def test_decimal_failure():
    with pytest.raises(SyntaxError):
        dt.validate_type("decimal(")
Beispiel #52
0
def _impala_signature(types):
    from ibis.expr.datatypes import validate_type
    return [_type_to_sql_string(validate_type(x)) for x in types]
Beispiel #53
0
def _largest_int(int_types):
    nbytes = max(t._nbytes for t in int_types)
    return dt.validate_type('int%d' % (8 * nbytes))
Beispiel #54
0
def _largest_int(int_types):
    nbytes = max(t._nbytes for t in int_types)
    return dt.validate_type('int%d' % (8 * nbytes))
Beispiel #55
0
def shape_like_args(args, out_type):
    out_type = dt.validate_type(out_type)
    if util.any_of(args, ir.ColumnExpr):
        return out_type.array_type()
    else:
        return out_type.scalar_type()
Beispiel #56
0
def shape_like(arg, out_type):
    out_type = dt.validate_type(out_type)
    if isinstance(arg, ir.ScalarExpr):
        return out_type.scalar_type()
    else:
        return out_type.array_type()
Beispiel #57
0
def shape_like_args(args, out_type):
    out_type = dt.validate_type(out_type)
    if util.any_of(args, ir.ArrayExpr):
        return out_type.array_type()
    else:
        return out_type.scalar_type()
Beispiel #58
0
 def f(self):
     t = dt.validate_type(rule(self))
     return t.array_type()
Beispiel #59
0
def _operation_type_conversion(inputs, output):
    in_type = [validate_type(x) for x in inputs]
    in_values = [rules.value_typed_as(_convert_types(x)) for x in in_type]
    out_type = validate_type(output)
    out_value = rules.shape_like_flatargs(out_type)
    return (in_values, out_value)
Beispiel #60
0
 def f(self):
     t = dt.validate_type(rule(self))
     return t.array_type()