Example #1
0
def test_decimal_sum_type(lineitem):
    col = lineitem.l_extendedprice
    result = col.sum()
    assert isinstance(result, ir.DecimalScalar)
    assert result.type() == dt.Decimal(38, col.type().scale)
Example #2
0
def test_type_metadata(lineitem):
    col = lineitem.l_extendedprice
    assert isinstance(col, ir.DecimalColumn)

    assert col.type() == dt.Decimal(12, 2)
Example #3
0
def test_cast_scalar_to_decimal():
    val = api.literal('1.2345')

    casted = val.cast('decimal(15,5)')
    assert isinstance(casted, ir.DecimalScalar)
    assert casted.type() == dt.Decimal(15, 5)
Example #4
0
class MapDDataType:

    __slots__ = 'typename', 'nullable'

    # using impala.client._HS2_TTypeId_to_dtype as reference
    # https://www.mapd.com/docs/latest/mapd-core-guide/fixed-encoding/
    dtypes = {
        'BIGINT': dt.int64,
        'BOOL': dt.Boolean,
        'DATE': dt.date,
        'DECIMAL': dt.Decimal(18, 9),
        'DOUBLE': dt.double,
        'FLOAT': dt.float32,
        'INT': dt.int32,
        'LINESTRING': dt.linestring,
        'MULTIPOLYGON': dt.multipolygon,
        'NULL': dt.Null,
        'NUMERIC': dt.Decimal(18, 9),
        'POINT': dt.point,
        'POLYGON': dt.polygon,
        'SMALLINT': dt.int16,
        'STR': dt.string,
        'TIME': dt.time,
        'TIMESTAMP': dt.timestamp,
        'TINYINT': dt.int8,
    }

    ibis_dtypes = {v: k for k, v in dtypes.items()}

    _mapd_to_ibis_dtypes = {
        'BIGINT': 'int64',
        'BOOLEAN': 'Boolean',
        'BOOL': 'Boolean',
        'CHAR': 'string',
        'DATE': 'date',
        'DECIMAL': 'decimal',
        'DOUBLE': 'double',
        'INT': 'int32',
        'INTEGER': 'int32',
        'FLOAT': 'float32',
        'NUMERIC': 'float64',
        'REAL': 'float32',
        'SMALLINT': 'int16',
        'STR': 'string',
        'TEXT': 'string',
        'TIME': 'time',
        'TIMESTAMP': 'timestamp',
        'VARCHAR': 'string',
        'POINT': 'point',
        'LINESTRING': 'linestring',
        'POLYGON': 'polygon',
        'MULTIPOLYGON': 'multipolygon',
    }

    def __init__(self, typename, nullable=True):
        if typename not in self.dtypes:
            raise com.UnsupportedBackendType(typename)
        self.typename = typename
        self.nullable = nullable

    def __str__(self):
        if self.nullable:
            return 'Nullable({})'.format(self.typename)
        else:
            return self.typename

    def __repr__(self):
        return '<MapD {}>'.format(str(self))

    @classmethod
    def parse(cls, spec):
        if spec.startswith('Nullable'):
            return cls(spec[9:-1], nullable=True)
        else:
            return cls(spec)

    def to_ibis(self):
        return self.dtypes[self.typename](nullable=self.nullable)

    @classmethod
    def from_ibis(cls, dtype, nullable=None):
        dtype_ = type(dtype)
        if dtype_ in cls.ibis_dtypes:
            typename = cls.ibis_dtypes[dtype_]
        elif dtype in cls.ibis_dtypes:
            typename = cls.ibis_dtypes[dtype]
        else:
            raise NotImplementedError('{} dtype not implemented'.format(dtype))

        if nullable is None:
            nullable = dtype.nullable
        return cls(typename, nullable=nullable)
Example #5
0

@pytest.fixture(scope='module')
def df3():
    return pd.DataFrame(
        {
            'key': list('ac'),
            'other_value': [4.0, 6.0],
            'key2': list('ae'),
            'key3': list('fe'),
        }
    )


t_schema = {
    'decimal': dt.Decimal(4, 3),
    'array_of_float64': dt.Array(dt.double),
    'array_of_int64': dt.Array(dt.int64),
    'array_of_strings': dt.Array(dt.string),
    'map_of_strings_integers': dt.Map(dt.string, dt.int64),
    'map_of_integers_strings': dt.Map(dt.int64, dt.string),
    'map_of_complex_values': dt.Map(dt.string, dt.Array(dt.int64)),
}


@pytest.fixture(scope='module')
def t(client):
    return client.table('df', schema=t_schema)


@pytest.fixture(scope='module')
Example #6
0
def test_numeric_table_schema(numeric_table):
    assert numeric_table.schema() == ibis.schema([('string_col', dt.string),
                                                  ('numeric_col',
                                                   dt.Decimal(38, 9))])
Example #7
0
 def type(self):
     return dt.Decimal(self._precision, self._scale)
Example #8
0
from .datatypes import ibis_type_to_bigquery_type

NATIVE_PARTITION_COL = '_PARTITIONTIME'

_DTYPE_TO_IBIS_TYPE = {
    'INT64': dt.int64,
    'FLOAT64': dt.double,
    'BOOL': dt.boolean,
    'STRING': dt.string,
    'DATE': dt.date,
    # FIXME: enforce no tz info
    'DATETIME': dt.timestamp,
    'TIME': dt.time,
    'TIMESTAMP': dt.timestamp,
    'BYTES': dt.binary,
    'NUMERIC': dt.Decimal(38, 9),
}

_LEGACY_TO_STANDARD = {
    'INTEGER': 'INT64',
    'FLOAT': 'FLOAT64',
    'BOOLEAN': 'BOOL',
}

_USER_AGENT_DEFAULT_TEMPLATE = 'ibis/{}'


def _create_client_info(application_name):
    user_agent = []

    if application_name:
Example #9
0
def sa_numeric(_, satype, nullable=True):
    return dt.Decimal(satype.precision, satype.scale, nullable=nullable)
Example #10
0
)
def test_infer_dtype(value, expected_dtype):
    assert dt.infer(value) == expected_dtype
    # test literal creation
    value = ibis.literal(value, type=expected_dtype)
    assert value.type() == expected_dtype


@pytest.mark.parametrize(
    ('source', 'target'),
    [
        (dt.any, dt.string),
        (dt.null, dt.date),
        (dt.null, dt.any),
        (dt.int8, dt.int64),
        (dt.int8, dt.Decimal(12, 2)),
        (dt.int32, dt.int32),
        (dt.int32, dt.int64),
        (dt.uint32, dt.uint64),
        (dt.uint32, dt.Decimal(12, 2)),
        (dt.uint32, dt.float32),
        (dt.uint32, dt.float64),
        (dt.Interval('s', dt.int16), dt.Interval('s', dt.int32)),
    ],
)
def test_implicit_castable(source, target):
    assert dt.castable(source, target)


@pytest.mark.parametrize(
    ('source', 'target'),
Example #11
0
def spark_decimal_dtype_to_ibis_dtype(spark_dtype_obj, nullable=True):
    precision = spark_dtype_obj.precision
    scale = spark_dtype_obj.scale
    return dt.Decimal(precision, scale, nullable=nullable)
Example #12
0
class OmniSciDBDataType:
    """OmniSciDB Backend Data Type."""

    __slots__ = 'typename', 'nullable'

    # using impala.client._HS2_TTypeId_to_dtype as reference
    # NOTE: any updates here should be reflected to
    #       omniscidb.operations._sql_type_names
    dtypes = {
        'BIGINT': dt.int64,
        'BOOL': dt.Boolean,
        'DATE': dt.date,
        'DECIMAL': dt.Decimal(18, 9),
        'DOUBLE': dt.double,
        'FLOAT': dt.float32,
        'INT': dt.int32,
        'LINESTRING': dt.linestring,
        'MULTIPOLYGON': dt.multipolygon,
        'NULL': dt.Null,
        'NUMERIC': dt.Decimal(18, 9),
        'POINT': dt.point,
        'POLYGON': dt.polygon,
        'SMALLINT': dt.int16,
        'STR': dt.string,
        'TIME': dt.time,
        'TIMESTAMP': dt.timestamp,
        'TINYINT': dt.int8,
    }

    ibis_dtypes = {v: k for k, v in dtypes.items()}

    # NOTE: any updates here should be reflected to
    #       omniscidb.operations._sql_type_names
    _omniscidb_to_ibis_dtypes = {
        'BIGINT': 'int64',
        'BOOLEAN': 'Boolean',
        'BOOL': 'Boolean',
        'CHAR': 'string',
        'DATE': 'date',
        'DECIMAL': 'decimal',
        'DOUBLE': 'double',
        'INT': 'int32',
        'INTEGER': 'int32',
        'FLOAT': 'float32',
        'NUMERIC': 'float64',
        'REAL': 'float32',
        'SMALLINT': 'int16',
        'STR': 'string',
        'TEXT': 'string',
        'TIME': 'time',
        'TIMESTAMP': 'timestamp',
        'TINYINT': 'int8',
        'VARCHAR': 'string',
        'POINT': 'point',
        'LINESTRING': 'linestring',
        'POLYGON': 'polygon',
        'MULTIPOLYGON': 'multipolygon',
    }

    def __init__(self, typename, nullable=True):
        if typename not in self.dtypes:
            raise com.UnsupportedBackendType(typename)
        self.typename = typename
        self.nullable = nullable

    def __str__(self):
        """Return the data type name."""
        if self.nullable:
            return 'Nullable({})'.format(self.typename)
        else:
            return self.typename

    def __repr__(self):
        """Return the backend name and the datatype name."""
        return '<OmniSciDB {}>'.format(str(self))

    @classmethod
    def parse(cls, spec: str):
        """Return a OmniSciDBDataType related to the given data type name.

        Parameters
        ----------
        spec : string

        Returns
        -------
        OmniSciDBDataType
        """
        if spec.startswith('Nullable'):
            return cls(spec[9:-1], nullable=True)
        else:
            return cls(spec)

    def to_ibis(self):
        """
        Return the Ibis data type correspondent to the current OmniSciDB type.

        Returns
        -------
        ibis.expr.datatypes.DataType
        """
        return self.dtypes[self.typename](nullable=self.nullable)

    @classmethod
    def from_ibis(cls, dtype, nullable=None):
        """
        Return a OmniSciDBDataType correspondent to the given Ibis data type.

        Parameters
        ----------
        dtype : ibis.expr.datatypes.DataType
        nullable : bool

        Returns
        -------
        OmniSciDBDataType

        Raises
        ------
        NotImplementedError
            if the given data type was not implemented.
        """
        dtype_ = type(dtype)
        if dtype_ in cls.ibis_dtypes:
            typename = cls.ibis_dtypes[dtype_]
        elif dtype in cls.ibis_dtypes:
            typename = cls.ibis_dtypes[dtype]
        else:
            raise NotImplementedError('{} dtype not implemented'.format(dtype))

        if nullable is None:
            nullable = dtype.nullable
        return cls(typename, nullable=nullable)
Example #13
0
        (
            dt.Struct.from_tuples([('a', dt.int64), ('b', dt.string),
                                   ('c', dt.Array(dt.string))]),
            'STRUCT<a INT64, b STRING, c ARRAY<STRING>>',
        ),
        (dt.date, 'DATE'),
        (dt.timestamp, 'TIMESTAMP'),
        param(
            dt.Timestamp(timezone='US/Eastern'),
            'TIMESTAMP',
            marks=pytest.mark.xfail(raises=TypeError,
                                    reason='Not supported in BigQuery'),
        ),
        ('array<struct<a: string>>', 'ARRAY<STRUCT<a STRING>>'),
        param(
            dt.Decimal(38, 9),
            'NUMERIC',
            marks=pytest.mark.xfail(raises=TypeError,
                                    reason='Not supported in BigQuery'),
        ),
    ],
)
def test_simple(datatype, expected):
    context = TypeTranslationContext()
    assert ibis_type_to_bigquery_type(datatype, context) == expected


@pytest.mark.parametrize('datatype', [dt.uint64, dt.Decimal(8, 3)])
def test_simple_failure_mode(datatype):
    with pytest.raises(TypeError):
        ibis_type_to_bigquery_type(datatype)
Example #14
0
EXPECTED_SCHEMA = dict(
    a=dt.int64,
    b=dt.int64,
    c=dt.int64,
    d=dt.boolean,
    e=dt.boolean,
    f=dt.boolean,
    g=dt.binary,
    h=dt.binary,
    i=dt.binary,
    j=dt.binary,
    k=dt.date,
    l=dt.float64,
    m=dt.float64,
    n=dt.Decimal(18, 3),
    o=dt.Decimal(18, 3),
    p=dt.Decimal(10, 3),
    q=dt.int32,
    r=dt.int32,
    s=dt.int32,
    t=dt.int32,
    u=dt.interval,
    v=dt.float32,
    w=dt.float32,
    x=dt.float32,
    y=dt.int16,
    z=dt.int16,
    A=dt.int16,
    B=dt.time,
    C=dt.Timestamp('UTC'),
Example #15
0
def test_invalid_precision_scale_combo(precision, scale):
    with pytest.raises(ValueError):
        dt.Decimal(precision, scale)
Example #16
0
def sa_mysql_numeric(_, satype, nullable=True):
    # https://dev.mysql.com/doc/refman/8.0/en/fixed-point-types.html
    return dt.Decimal(
        satype.precision or 10, satype.scale or 0, nullable=nullable
    )
Example #17
0
def test_invalid_precision_scale_type(precision, scale):
    with pytest.raises(TypeError):
        dt.Decimal(precision, scale)
Example #18
0
def test_whole_schema():
    customers = ibis.table(
        [('cid', 'int64'), ('mktsegment', 'string'),
         ('address', ('struct<city: string, street: string, '
                      'street_number: int32, zip: int16>')),
         ('phone_numbers', 'array<string>'),
         ('orders', """array<struct<
                                oid: int64,
                                status: string,
                                totalprice: decimal(12, 2),
                                order_date: string,
                                items: array<struct<
                                    iid: int64,
                                    name: string,
                                    price: decimal(12, 2),
                                    discount_perc: decimal(12, 2),
                                    shipdate: string
                                >>
                            >>"""),
         ('web_visits', ('map<string, struct<user_agent: string, '
                         'client_ip: string, visit_date: string, '
                         'duration_ms: int32>>')),
         ('support_calls', ('array<struct<agent_id: int64, '
                            'call_date: string, duration_ms: int64, '
                            'issue_resolved: boolean, '
                            'agent_comment: string>>'))],
        name='customers',
    )
    expected = ibis.Schema.from_tuples([
        ('cid', dt.int64),
        ('mktsegment', dt.string),
        (
            'address',
            dt.Struct.from_tuples([('city', dt.string), ('street', dt.string),
                                   ('street_number', dt.int32),
                                   ('zip', dt.int16)]),
        ),
        ('phone_numbers', dt.Array(dt.string)),
        ('orders',
         dt.Array(
             dt.Struct.from_tuples([('oid', dt.int64), ('status', dt.string),
                                    ('totalprice', dt.Decimal(12, 2)),
                                    ('order_date', dt.string),
                                    ('items',
                                     dt.Array(
                                         dt.Struct.from_tuples([
                                             ('iid', dt.int64),
                                             ('name', dt.string),
                                             ('price', dt.Decimal(12, 2)),
                                             ('discount_perc', dt.Decimal(
                                                 12, 2)),
                                             ('shipdate', dt.string),
                                         ])))]))),
        ('web_visits',
         dt.Map(
             dt.string,
             dt.Struct.from_tuples([
                 ('user_agent', dt.string),
                 ('client_ip', dt.string),
                 ('visit_date', dt.string),
                 ('duration_ms', dt.int32),
             ]))),
        ('support_calls',
         dt.Array(
             dt.Struct.from_tuples([('agent_id', dt.int64),
                                    ('call_date', dt.string),
                                    ('duration_ms', dt.int64),
                                    ('issue_resolved', dt.boolean),
                                    ('agent_comment', dt.string)]))),
    ], )
    assert customers.schema() == expected
Example #19
0
    assert t.plain_datetimes_ny.type().equals(dt.Timestamp('America/New_York'))
    assert t.plain_datetimes_utc.type().equals(dt.Timestamp('UTC'))


@pytest.mark.parametrize(
    'column',
    ['plain_datetimes_naive', 'plain_datetimes_ny', 'plain_datetimes_utc'],
)
def test_cast_date(t, df, column):
    expr = t[column].cast('date')
    result = expr.compile()
    expected = df[column].dt.normalize()
    tm.assert_series_equal(result.compute(), expected.compute())


@pytest.mark.parametrize('type', [dt.Decimal(9, 0), dt.Decimal(12, 3)])
def test_cast_to_decimal(t, df, type):
    expr = t.float64_as_strings.cast(type)
    result = expr.compile()
    context = decimal.Context(prec=type.precision)
    expected = df.float64_as_strings.apply(
        lambda x: context.create_decimal(x).quantize(
            decimal.Decimal('{}.{}'.format('0' * (type.precision - type.scale),
                                           '0' * type.scale))),
        meta=("float64_as_strings", "object"),
    )
    tm.assert_series_equal(result.compute(), expected.compute())
    assert all(
        abs(element.as_tuple().exponent) == type.scale
        for element in result.compute().values)
    assert all(1 <= len(element.as_tuple().digits) <= type.precision
Example #20
0
            ("line", dt.linestring),
            ("real", dt.float32),
            ("double precision", dt.float64),
            ("macaddr", dt.macaddr),
            ("macaddr8", dt.macaddr),
            ("inet", dt.inet),
            ("character", dt.string),
            ("character varying", dt.string),
            ("date", dt.date),
            ("time", dt.time),
            ("time without time zone", dt.time),
            ("timestamp without time zone", dt.timestamp),
            ("timestamp with time zone", dt.Timestamp("UTC")),
            ("interval", dt.interval),
            ("numeric", dt.decimal),
            ("numeric(3, 2)", dt.Decimal(3, 2)),
            ("uuid", dt.uuid),
            ("jsonb", dt.jsonb),
            ("geometry", dt.geometry),
            ("geography", dt.geography),
        ]
    ],
)
def test_get_schema_from_query(con, pg_type, expected_type):
    raw_name = ibis.util.guid()
    name = con.con.dialect.identifier_preparer.quote_identifier(raw_name)
    con.raw_sql(f"CREATE TEMPORARY TABLE {name} (x {pg_type}, y {pg_type}[])")
    expected_schema = ibis.schema(
        dict(x=expected_type, y=dt.Array(expected_type)))
    result_schema = con._get_schema_using_query(f"SELECT x, y FROM {name}")
    assert result_schema == expected_schema