def map(): yield dt.spaceless_string("map") yield dt.LPAREN key_type = yield ty yield dt.COMMA value_type = yield ty yield dt.RPAREN return dt.Map(key_type, value_type, nullable=False)
def map(): yield spaceless_string("map") yield LANGLE key_type = yield primitive yield COMMA value_type = yield ty yield RANGLE return Map(key_type, value_type)
def struct(): yield spaceless_string("struct") yield LPAREN field_names_types = yield ( p.seq(field, ty) .combine(lambda field, ty: (field, ty)) .sep_by(COMMA) ) yield RPAREN return Struct.from_tuples(field_names_types)
def nested(): yield dt.spaceless_string("nested") yield dt.LPAREN field_names_types = yield (p.seq( dt.SPACES.then(dt.FIELD.skip(at_least_one_space)), ty).combine(lambda field, ty: (field, dt.Array(ty, nullable=False))).sep_by( dt.COMMA)) yield dt.RPAREN return dt.Struct.from_tuples(field_names_types, nullable=False)
def decimal(): yield spaceless_string("decimal", "numeric") prec_scale = ( yield LPAREN.then( p.seq(PRECISION.skip(COMMA), SCALE).combine( lambda prec, scale: (prec, scale) ) ) .skip(RPAREN) .optional() ) or default_decimal_parameters return Decimal(*prec_scale)
def struct(): yield dt.spaceless_string("tuple") yield dt.LPAREN field_names_types = yield (p.seq( dt.SPACES.then(dt.FIELD.skip(at_least_one_space).optional()), ty, ).combine(lambda field, ty: (field, ty)).sep_by(dt.COMMA)) yield dt.RPAREN return dt.Struct.from_tuples( [(field_name if field_name is not None else f"f{i:d}", typ) for i, (field_name, typ) in enumerate(field_names_types)], nullable=False, )
def parse(text: str, default_decimal_parameters=(18, 3)) -> DataType: """Parse a DuckDB type into an ibis data type.""" primitive = ( spaceless_string("interval").result(Interval()) | spaceless_string("bigint", "int8", "long").result(int64) | spaceless_string("boolean", "bool", "logical").result(boolean) | spaceless_string( "blob", "bytea", "binary", "varbinary", ).result(binary) | spaceless_string("double", "float8").result(float64) | spaceless_string("real", "float4", "float").result(float32) | spaceless_string("smallint", "int2", "short").result(int16) | spaceless_string("timestamp", "datetime").result( Timestamp(timezone="UTC") ) | spaceless_string("date").result(date) | spaceless_string("time").result(time) | spaceless_string("tinyint", "int1").result(int8) | spaceless_string("integer", "int4", "int", "signed").result(int32) | spaceless_string("ubigint").result(uint64) | spaceless_string("usmallint").result(uint16) | spaceless_string("uinteger").result(uint32) | spaceless_string("utinyint").result(uint8) | spaceless_string("uuid").result(uuid) | spaceless_string( "varchar", "char", "bpchar", "text", "string", ).result(string) ) @p.generate def decimal(): yield spaceless_string("decimal", "numeric") prec_scale = ( yield LPAREN.then( p.seq(PRECISION.skip(COMMA), SCALE).combine( lambda prec, scale: (prec, scale) ) ) .skip(RPAREN) .optional() ) or default_decimal_parameters return Decimal(*prec_scale) @p.generate def angle_type(): yield LANGLE value_type = yield ty yield RANGLE return value_type @p.generate def list_array(): yield spaceless_string("list") value_type = yield angle_type return Array(value_type) @p.generate def pg_array(): value_type = yield non_pg_array_type yield LBRACKET yield RBRACKET return Array(value_type) @p.generate def map(): yield spaceless_string("map") yield LANGLE key_type = yield primitive yield COMMA value_type = yield ty yield RANGLE return Map(key_type, value_type) field = spaceless(FIELD) @p.generate def struct(): yield spaceless_string("struct") yield LPAREN field_names_types = yield ( p.seq(field, ty) .combine(lambda field, ty: (field, ty)) .sep_by(COMMA) ) yield RPAREN return Struct.from_tuples(field_names_types) non_pg_array_type = primitive | decimal | list_array | map | struct ty = pg_array | non_pg_array_type return ty.parse(text)
def list_array(): yield spaceless_string("list") value_type = yield angle_type return Array(value_type)
def array(): yield dt.spaceless_string("array") value_type = yield paren_type return dt.Array(value_type, nullable=False)
def decimal(): yield dt.spaceless_string("decimal", "numeric") precision, scale = yield dt.LPAREN.then( p.seq(dt.PRECISION.skip(dt.COMMA), dt.SCALE)).skip(dt.RPAREN) return dt.Decimal(precision, scale, nullable=False)
def fixed_string(): yield dt.spaceless_string("fixedstring") yield dt.LPAREN yield dt.NUMBER yield dt.RPAREN return dt.String(nullable=False)
def nullable(): yield dt.spaceless_string("nullable") yield dt.LPAREN parsed_ty = yield ty yield dt.RPAREN return parsed_ty(nullable=True)
def datetime(): yield dt.spaceless_string("datetime64", "datetime") timezone = yield parened_string.optional() return dt.Timestamp(timezone=timezone, nullable=False)
def parse(text: str) -> DataType: @p.generate def datetime(): yield dt.spaceless_string("datetime64", "datetime") timezone = yield parened_string.optional() return dt.Timestamp(timezone=timezone, nullable=False) primitive = ( datetime | dt.spaceless_string("null", "nothing").result(dt.null) | dt.spaceless_string("bigint", "int64").result( dt.Int64(nullable=False)) | dt.spaceless_string("double", "float64").result( dt.Float64(nullable=False)) | dt.spaceless_string("float32", "float").result( dt.Float32(nullable=False)) | dt.spaceless_string("smallint", "int16", "int2").result( dt.Int16(nullable=False)) | dt.spaceless_string("date32", "date").result(dt.Date(nullable=False)) | dt.spaceless_string("time").result(dt.Time(nullable=False)) | dt.spaceless_string("tinyint", "int8", "int1", "boolean", "bool").result(dt.Int8(nullable=False)) | dt.spaceless_string("integer", "int32", "int4", "int").result( dt.Int32(nullable=False)) | dt.spaceless_string("uint64").result(dt.UInt64(nullable=False)) | dt.spaceless_string("uint32").result(dt.UInt32(nullable=False)) | dt.spaceless_string("uint16").result(dt.UInt16(nullable=False)) | dt.spaceless_string("uint8").result(dt.UInt8(nullable=False)) | dt.spaceless_string("uuid").result(dt.UUID(nullable=False)) | dt.spaceless_string( "longtext", "mediumtext", "tinytext", "text", "longblob", "mediumblob", "tinyblob", "blob", "varchar", "char", "string", ).result(dt.String(nullable=False))) @p.generate def parened_string(): yield dt.LPAREN s = yield dt.RAW_STRING yield dt.RPAREN return s @p.generate def nullable(): yield dt.spaceless_string("nullable") yield dt.LPAREN parsed_ty = yield ty yield dt.RPAREN return parsed_ty(nullable=True) @p.generate def fixed_string(): yield dt.spaceless_string("fixedstring") yield dt.LPAREN yield dt.NUMBER yield dt.RPAREN return dt.String(nullable=False) @p.generate def decimal(): yield dt.spaceless_string("decimal", "numeric") precision, scale = yield dt.LPAREN.then( p.seq(dt.PRECISION.skip(dt.COMMA), dt.SCALE)).skip(dt.RPAREN) return dt.Decimal(precision, scale, nullable=False) @p.generate def paren_type(): yield dt.LPAREN value_type = yield ty yield dt.RPAREN return value_type @p.generate def array(): yield dt.spaceless_string("array") value_type = yield paren_type return dt.Array(value_type, nullable=False) @p.generate def map(): yield dt.spaceless_string("map") yield dt.LPAREN key_type = yield ty yield dt.COMMA value_type = yield ty yield dt.RPAREN return dt.Map(key_type, value_type, nullable=False) at_least_one_space = p.regex(r"\s+") @p.generate def nested(): yield dt.spaceless_string("nested") yield dt.LPAREN field_names_types = yield (p.seq( dt.SPACES.then(dt.FIELD.skip(at_least_one_space)), ty).combine(lambda field, ty: (field, dt.Array(ty, nullable=False))).sep_by( dt.COMMA)) yield dt.RPAREN return dt.Struct.from_tuples(field_names_types, nullable=False) @p.generate def struct(): yield dt.spaceless_string("tuple") yield dt.LPAREN field_names_types = yield (p.seq( dt.SPACES.then(dt.FIELD.skip(at_least_one_space).optional()), ty, ).combine(lambda field, ty: (field, ty)).sep_by(dt.COMMA)) yield dt.RPAREN return dt.Struct.from_tuples( [(field_name if field_name is not None else f"f{i:d}", typ) for i, (field_name, typ) in enumerate(field_names_types)], nullable=False, ) ty = (nullable | nested | primitive | fixed_string | decimal | array | map | struct) return ty.parse(text)