sql = 'SELECT {}'.format(', '.join(sql_types)) df = con.sql(sql).execute() assert df.fixedstring_col.dtype.name == 'object' if parse_version(con.version).base_version >= '1.1.54337': assert df.datetime_col.dtype.name == 'datetime64[ns]' @pytest.mark.parametrize( ('ch_type', 'ibis_type'), [ ('Array(Int8)', dt.Array(dt.Int8(nullable=False))), ('Array(Int16)', dt.Array(dt.Int16(nullable=False))), ('Array(Int32)', dt.Array(dt.Int32(nullable=False))), ('Array(Int64)', dt.Array(dt.Int64(nullable=False))), ('Array(UInt8)', dt.Array(dt.UInt8(nullable=False))), ('Array(UInt16)', dt.Array(dt.UInt16(nullable=False))), ('Array(UInt32)', dt.Array(dt.UInt32(nullable=False))), ('Array(UInt64)', dt.Array(dt.UInt64(nullable=False))), ('Array(Float32)', dt.Array(dt.Float32(nullable=False))), ('Array(Float64)', dt.Array(dt.Float64(nullable=False))), ('Array(String)', dt.Array(dt.String(nullable=False))), ('Array(FixedString(32))', dt.Array(dt.String(nullable=False))), ('Array(Date)', dt.Array(dt.Date(nullable=False))), ('Array(DateTime)', dt.Array(dt.Timestamp(nullable=False))), ('Array(DateTime64)', dt.Array(dt.Timestamp(nullable=False))), ('Array(Nothing)', dt.Array(dt.Null(nullable=False))), ('Array(Null)', dt.Array(dt.Null(nullable=False))), ('Array(Array(Int8))', dt.Array(dt.Array(dt.Int8(nullable=False)))), ( 'Array(Array(Array(Int8)))', dt.Array(dt.Array(dt.Array(dt.Int8(nullable=False)))),
def parse(text: str) -> DataType: @p.generate def datetime(): yield dt.spaceless_string("datetime64", "datetime") timezone = yield parened_string.optional() return dt.Timestamp(timezone=timezone, nullable=False) primitive = ( datetime | dt.spaceless_string("null", "nothing").result(dt.null) | dt.spaceless_string("bigint", "int64").result( dt.Int64(nullable=False)) | dt.spaceless_string("double", "float64").result( dt.Float64(nullable=False)) | dt.spaceless_string("float32", "float").result( dt.Float32(nullable=False)) | dt.spaceless_string("smallint", "int16", "int2").result( dt.Int16(nullable=False)) | dt.spaceless_string("date32", "date").result(dt.Date(nullable=False)) | dt.spaceless_string("time").result(dt.Time(nullable=False)) | dt.spaceless_string("tinyint", "int8", "int1", "boolean", "bool").result(dt.Int8(nullable=False)) | dt.spaceless_string("integer", "int32", "int4", "int").result( dt.Int32(nullable=False)) | dt.spaceless_string("uint64").result(dt.UInt64(nullable=False)) | dt.spaceless_string("uint32").result(dt.UInt32(nullable=False)) | dt.spaceless_string("uint16").result(dt.UInt16(nullable=False)) | dt.spaceless_string("uint8").result(dt.UInt8(nullable=False)) | dt.spaceless_string("uuid").result(dt.UUID(nullable=False)) | dt.spaceless_string( "longtext", "mediumtext", "tinytext", "text", "longblob", "mediumblob", "tinyblob", "blob", "varchar", "char", "string", ).result(dt.String(nullable=False))) @p.generate def parened_string(): yield dt.LPAREN s = yield dt.RAW_STRING yield dt.RPAREN return s @p.generate def nullable(): yield dt.spaceless_string("nullable") yield dt.LPAREN parsed_ty = yield ty yield dt.RPAREN return parsed_ty(nullable=True) @p.generate def fixed_string(): yield dt.spaceless_string("fixedstring") yield dt.LPAREN yield dt.NUMBER yield dt.RPAREN return dt.String(nullable=False) @p.generate def decimal(): yield dt.spaceless_string("decimal", "numeric") precision, scale = yield dt.LPAREN.then( p.seq(dt.PRECISION.skip(dt.COMMA), dt.SCALE)).skip(dt.RPAREN) return dt.Decimal(precision, scale, nullable=False) @p.generate def paren_type(): yield dt.LPAREN value_type = yield ty yield dt.RPAREN return value_type @p.generate def array(): yield dt.spaceless_string("array") value_type = yield paren_type return dt.Array(value_type, nullable=False) @p.generate def map(): yield dt.spaceless_string("map") yield dt.LPAREN key_type = yield ty yield dt.COMMA value_type = yield ty yield dt.RPAREN return dt.Map(key_type, value_type, nullable=False) at_least_one_space = p.regex(r"\s+") @p.generate def nested(): yield dt.spaceless_string("nested") yield dt.LPAREN field_names_types = yield (p.seq( dt.SPACES.then(dt.FIELD.skip(at_least_one_space)), ty).combine(lambda field, ty: (field, dt.Array(ty, nullable=False))).sep_by( dt.COMMA)) yield dt.RPAREN return dt.Struct.from_tuples(field_names_types, nullable=False) @p.generate def struct(): yield dt.spaceless_string("tuple") yield dt.LPAREN field_names_types = yield (p.seq( dt.SPACES.then(dt.FIELD.skip(at_least_one_space).optional()), ty, ).combine(lambda field, ty: (field, ty)).sep_by(dt.COMMA)) yield dt.RPAREN return dt.Struct.from_tuples( [(field_name if field_name is not None else f"f{i:d}", typ) for i, (field_name, typ) in enumerate(field_names_types)], nullable=False, ) ty = (nullable | nested | primitive | fixed_string | decimal | array | map | struct) return ty.parse(text)
def test_insert_with_more_columns(temporary_alltypes, df): temporary = temporary_alltypes records = df[:10].copy() records['non_existing_column'] = 'raise on me' with pytest.raises(AssertionError): temporary.insert(records) @pytest.mark.parametrize( ("query", "expected_schema"), [ ( "SELECT 1 as a, 2 + dummy as b", ibis.schema( dict(a=dt.UInt8(nullable=False), b=dt.UInt16(nullable=False))), ), ( "SELECT string_col, sum(double_col) as b FROM functional_alltypes GROUP BY string_col", # noqa: E501 ibis.schema( dict( string_col=dt.String(nullable=True), b=dt.Float64(nullable=True), )), ), ], ) def test_get_schema_using_query(con, query, expected_schema): result = con._get_schema_using_query(query) assert result == expected_schema