예제 #1
0
def test_nullable_column_propagated():
    t = ibis.table([
        ('a', dt.Int32(nullable=True)),
        ('b', dt.Int32(nullable=False)),
        ('c', dt.String(nullable=False)),
        ('d', dt.float64),  # nullable by default
        ('f', dt.Float64(nullable=False)),
    ])

    assert t.a.type().nullable is True
    assert t.b.type().nullable is False
    assert t.c.type().nullable is False
    assert t.d.type().nullable is True
    assert t.f.type().nullable is False

    s = t.a + t.d
    assert s.type().nullable is True

    s = t.b + t.d
    assert s.type().nullable is True

    s = t.b + t.f
    assert s.type().nullable is False
예제 #2
0
def sa_mysql_double(_, satype, nullable=True):
    # TODO: handle asdecimal=True
    return dt.Float64(nullable=nullable)
예제 #3
0
def sa_double(_, satype, nullable=True):
    return dt.Float64(nullable=nullable)
예제 #4
0
def mysql_double(satype, nullable=True):
    return dt.Float64(nullable=nullable)
예제 #5
0
def parse(text: str) -> DataType:
    @p.generate
    def datetime():
        yield dt.spaceless_string("datetime64", "datetime")
        timezone = yield parened_string.optional()
        return dt.Timestamp(timezone=timezone, nullable=False)

    primitive = (
        datetime
        | dt.spaceless_string("null", "nothing").result(dt.null)
        | dt.spaceless_string("bigint", "int64").result(
            dt.Int64(nullable=False))
        | dt.spaceless_string("double", "float64").result(
            dt.Float64(nullable=False))
        | dt.spaceless_string("float32", "float").result(
            dt.Float32(nullable=False))
        | dt.spaceless_string("smallint", "int16", "int2").result(
            dt.Int16(nullable=False))
        | dt.spaceless_string("date32", "date").result(dt.Date(nullable=False))
        | dt.spaceless_string("time").result(dt.Time(nullable=False))
        | dt.spaceless_string("tinyint", "int8", "int1", "boolean",
                              "bool").result(dt.Int8(nullable=False))
        | dt.spaceless_string("integer", "int32", "int4", "int").result(
            dt.Int32(nullable=False))
        | dt.spaceless_string("uint64").result(dt.UInt64(nullable=False))
        | dt.spaceless_string("uint32").result(dt.UInt32(nullable=False))
        | dt.spaceless_string("uint16").result(dt.UInt16(nullable=False))
        | dt.spaceless_string("uint8").result(dt.UInt8(nullable=False))
        | dt.spaceless_string("uuid").result(dt.UUID(nullable=False))
        | dt.spaceless_string(
            "longtext",
            "mediumtext",
            "tinytext",
            "text",
            "longblob",
            "mediumblob",
            "tinyblob",
            "blob",
            "varchar",
            "char",
            "string",
        ).result(dt.String(nullable=False)))

    @p.generate
    def parened_string():
        yield dt.LPAREN
        s = yield dt.RAW_STRING
        yield dt.RPAREN
        return s

    @p.generate
    def nullable():
        yield dt.spaceless_string("nullable")
        yield dt.LPAREN
        parsed_ty = yield ty
        yield dt.RPAREN
        return parsed_ty(nullable=True)

    @p.generate
    def fixed_string():
        yield dt.spaceless_string("fixedstring")
        yield dt.LPAREN
        yield dt.NUMBER
        yield dt.RPAREN
        return dt.String(nullable=False)

    @p.generate
    def decimal():
        yield dt.spaceless_string("decimal", "numeric")
        precision, scale = yield dt.LPAREN.then(
            p.seq(dt.PRECISION.skip(dt.COMMA), dt.SCALE)).skip(dt.RPAREN)
        return dt.Decimal(precision, scale, nullable=False)

    @p.generate
    def paren_type():
        yield dt.LPAREN
        value_type = yield ty
        yield dt.RPAREN
        return value_type

    @p.generate
    def array():
        yield dt.spaceless_string("array")
        value_type = yield paren_type
        return dt.Array(value_type, nullable=False)

    @p.generate
    def map():
        yield dt.spaceless_string("map")
        yield dt.LPAREN
        key_type = yield ty
        yield dt.COMMA
        value_type = yield ty
        yield dt.RPAREN
        return dt.Map(key_type, value_type, nullable=False)

    at_least_one_space = p.regex(r"\s+")

    @p.generate
    def nested():
        yield dt.spaceless_string("nested")
        yield dt.LPAREN

        field_names_types = yield (p.seq(
            dt.SPACES.then(dt.FIELD.skip(at_least_one_space)),
            ty).combine(lambda field, ty:
                        (field, dt.Array(ty, nullable=False))).sep_by(
                            dt.COMMA))
        yield dt.RPAREN
        return dt.Struct.from_tuples(field_names_types, nullable=False)

    @p.generate
    def struct():
        yield dt.spaceless_string("tuple")
        yield dt.LPAREN
        field_names_types = yield (p.seq(
            dt.SPACES.then(dt.FIELD.skip(at_least_one_space).optional()),
            ty,
        ).combine(lambda field, ty: (field, ty)).sep_by(dt.COMMA))
        yield dt.RPAREN
        return dt.Struct.from_tuples(
            [(field_name if field_name is not None else f"f{i:d}", typ)
             for i, (field_name, typ) in enumerate(field_names_types)],
            nullable=False,
        )

    ty = (nullable
          | nested
          | primitive
          | fixed_string
          | decimal
          | array
          | map
          | struct)
    return ty.parse(text)
예제 #6
0
        assert df.datetime_col.dtype.name == 'datetime64[ns]'


@pytest.mark.parametrize(
    ('ch_type', 'ibis_type'),
    [
        ('Array(Int8)', dt.Array(dt.Int8(nullable=False))),
        ('Array(Int16)', dt.Array(dt.Int16(nullable=False))),
        ('Array(Int32)', dt.Array(dt.Int32(nullable=False))),
        ('Array(Int64)', dt.Array(dt.Int64(nullable=False))),
        ('Array(UInt8)', dt.Array(dt.UInt8(nullable=False))),
        ('Array(UInt16)', dt.Array(dt.UInt16(nullable=False))),
        ('Array(UInt32)', dt.Array(dt.UInt32(nullable=False))),
        ('Array(UInt64)', dt.Array(dt.UInt64(nullable=False))),
        ('Array(Float32)', dt.Array(dt.Float32(nullable=False))),
        ('Array(Float64)', dt.Array(dt.Float64(nullable=False))),
        ('Array(String)', dt.Array(dt.String(nullable=False))),
        ('Array(FixedString(32))', dt.Array(dt.String(nullable=False))),
        ('Array(Date)', dt.Array(dt.Date(nullable=False))),
        ('Array(DateTime)', dt.Array(dt.Timestamp(nullable=False))),
        ('Array(DateTime64)', dt.Array(dt.Timestamp(nullable=False))),
        ('Array(Nothing)', dt.Array(dt.Null(nullable=False))),
        ('Array(Null)', dt.Array(dt.Null(nullable=False))),
        ('Array(Array(Int8))', dt.Array(dt.Array(dt.Int8(nullable=False)))),
        (
            'Array(Array(Array(Int8)))',
            dt.Array(dt.Array(dt.Array(dt.Int8(nullable=False)))),
        ),
        (
            'Array(Array(Array(Array(Int8))))',
            dt.Array(dt.Array(dt.Array(dt.Array(dt.Int8(nullable=False))))),
예제 #7
0
pytest.importorskip("clickhouse_driver")


@pytest.mark.parametrize(
    ('to_type', 'expected'),
    [
        param('int8', 'CAST(`double_col` AS Nullable(Int8))', id="int8"),
        param('int16', 'CAST(`double_col` AS Nullable(Int16))', id="int16"),
        param('float32',
              'CAST(`double_col` AS Nullable(Float32))',
              id="float32"),
        param('float', '`double_col`', id="float"),
        # alltypes.double_col is non-nullable
        param(
            dt.Float64(nullable=False),
            'CAST(`double_col` AS Float64)',
            id="float64",
        ),
    ],
)
def test_cast_double_col(alltypes, translate, to_type, expected):
    expr = alltypes.double_col.cast(to_type)
    assert translate(expr) == expected


@pytest.mark.parametrize(
    ('to_type', 'expected'),
    [
        ('int8', 'CAST(`string_col` AS Nullable(Int8))'),
        ('int16', 'CAST(`string_col` AS Nullable(Int16))'),
예제 #8
0
def test_insert_with_more_columns(temporary_alltypes, df):
    temporary = temporary_alltypes
    records = df[:10].copy()
    records['non_existing_column'] = 'raise on me'

    with pytest.raises(AssertionError):
        temporary.insert(records)


@pytest.mark.parametrize(
    ("query", "expected_schema"),
    [
        (
            "SELECT 1 as a, 2 + dummy as b",
            ibis.schema(
                dict(a=dt.UInt8(nullable=False), b=dt.UInt16(nullable=False))),
        ),
        (
            "SELECT string_col, sum(double_col) as b FROM functional_alltypes GROUP BY string_col",  # noqa: E501
            ibis.schema(
                dict(
                    string_col=dt.String(nullable=True),
                    b=dt.Float64(nullable=True),
                )),
        ),
    ],
)
def test_get_schema_using_query(con, query, expected_schema):
    result = con._get_schema_using_query(query)
    assert result == expected_schema
예제 #9
0
 [
     ('Array(Int8)', dt.Array(dt.Int8(nullable=False), nullable=False)),
     ('Array(Int16)', dt.Array(dt.Int16(nullable=False), nullable=False)),
     ('Array(Int32)', dt.Array(dt.Int32(nullable=False), nullable=False)),
     ('Array(Int64)', dt.Array(dt.Int64(nullable=False), nullable=False)),
     ('Array(UInt8)', dt.Array(dt.UInt8(nullable=False), nullable=False)),
     ('Array(UInt16)', dt.Array(dt.UInt16(nullable=False), nullable=False)),
     ('Array(UInt32)', dt.Array(dt.UInt32(nullable=False), nullable=False)),
     ('Array(UInt64)', dt.Array(dt.UInt64(nullable=False), nullable=False)),
     (
         'Array(Float32)',
         dt.Array(dt.Float32(nullable=False), nullable=False),
     ),
     (
         'Array(Float64)',
         dt.Array(dt.Float64(nullable=False), nullable=False),
     ),
     ('Array(String)', dt.Array(dt.String(nullable=False), nullable=False)),
     (
         'Array(FixedString(32))',
         dt.Array(dt.String(nullable=False), nullable=False),
     ),
     ('Array(Date)', dt.Array(dt.Date(nullable=False), nullable=False)),
     (
         'Array(DateTime)',
         dt.Array(dt.Timestamp(nullable=False), nullable=False),
     ),
     (
         'Array(DateTime64)',
         dt.Array(dt.Timestamp(nullable=False), nullable=False),
     ),