Ejemplo n.º 1
0
 def map():
     yield dt.spaceless_string("map")
     yield dt.LPAREN
     key_type = yield ty
     yield dt.COMMA
     value_type = yield ty
     yield dt.RPAREN
     return dt.Map(key_type, value_type, nullable=False)
Ejemplo n.º 2
0
 def map():
     yield spaceless_string("map")
     yield LANGLE
     key_type = yield primitive
     yield COMMA
     value_type = yield ty
     yield RANGLE
     return Map(key_type, value_type)
Ejemplo n.º 3
0
 def struct():
     yield spaceless_string("struct")
     yield LPAREN
     field_names_types = yield (
         p.seq(field, ty)
         .combine(lambda field, ty: (field, ty))
         .sep_by(COMMA)
     )
     yield RPAREN
     return Struct.from_tuples(field_names_types)
Ejemplo n.º 4
0
    def nested():
        yield dt.spaceless_string("nested")
        yield dt.LPAREN

        field_names_types = yield (p.seq(
            dt.SPACES.then(dt.FIELD.skip(at_least_one_space)),
            ty).combine(lambda field, ty:
                        (field, dt.Array(ty, nullable=False))).sep_by(
                            dt.COMMA))
        yield dt.RPAREN
        return dt.Struct.from_tuples(field_names_types, nullable=False)
Ejemplo n.º 5
0
 def decimal():
     yield spaceless_string("decimal", "numeric")
     prec_scale = (
         yield LPAREN.then(
             p.seq(PRECISION.skip(COMMA), SCALE).combine(
                 lambda prec, scale: (prec, scale)
             )
         )
         .skip(RPAREN)
         .optional()
     ) or default_decimal_parameters
     return Decimal(*prec_scale)
Ejemplo n.º 6
0
 def struct():
     yield dt.spaceless_string("tuple")
     yield dt.LPAREN
     field_names_types = yield (p.seq(
         dt.SPACES.then(dt.FIELD.skip(at_least_one_space).optional()),
         ty,
     ).combine(lambda field, ty: (field, ty)).sep_by(dt.COMMA))
     yield dt.RPAREN
     return dt.Struct.from_tuples(
         [(field_name if field_name is not None else f"f{i:d}", typ)
          for i, (field_name, typ) in enumerate(field_names_types)],
         nullable=False,
     )
Ejemplo n.º 7
0
def parse(text: str, default_decimal_parameters=(18, 3)) -> DataType:
    """Parse a DuckDB type into an ibis data type."""
    primitive = (
        spaceless_string("interval").result(Interval())
        | spaceless_string("bigint", "int8", "long").result(int64)
        | spaceless_string("boolean", "bool", "logical").result(boolean)
        | spaceless_string(
            "blob",
            "bytea",
            "binary",
            "varbinary",
        ).result(binary)
        | spaceless_string("double", "float8").result(float64)
        | spaceless_string("real", "float4", "float").result(float32)
        | spaceless_string("smallint", "int2", "short").result(int16)
        | spaceless_string("timestamp", "datetime").result(
            Timestamp(timezone="UTC")
        )
        | spaceless_string("date").result(date)
        | spaceless_string("time").result(time)
        | spaceless_string("tinyint", "int1").result(int8)
        | spaceless_string("integer", "int4", "int", "signed").result(int32)
        | spaceless_string("ubigint").result(uint64)
        | spaceless_string("usmallint").result(uint16)
        | spaceless_string("uinteger").result(uint32)
        | spaceless_string("utinyint").result(uint8)
        | spaceless_string("uuid").result(uuid)
        | spaceless_string(
            "varchar",
            "char",
            "bpchar",
            "text",
            "string",
        ).result(string)
    )

    @p.generate
    def decimal():
        yield spaceless_string("decimal", "numeric")
        prec_scale = (
            yield LPAREN.then(
                p.seq(PRECISION.skip(COMMA), SCALE).combine(
                    lambda prec, scale: (prec, scale)
                )
            )
            .skip(RPAREN)
            .optional()
        ) or default_decimal_parameters
        return Decimal(*prec_scale)

    @p.generate
    def angle_type():
        yield LANGLE
        value_type = yield ty
        yield RANGLE
        return value_type

    @p.generate
    def list_array():
        yield spaceless_string("list")
        value_type = yield angle_type
        return Array(value_type)

    @p.generate
    def pg_array():
        value_type = yield non_pg_array_type
        yield LBRACKET
        yield RBRACKET
        return Array(value_type)

    @p.generate
    def map():
        yield spaceless_string("map")
        yield LANGLE
        key_type = yield primitive
        yield COMMA
        value_type = yield ty
        yield RANGLE
        return Map(key_type, value_type)

    field = spaceless(FIELD)

    @p.generate
    def struct():
        yield spaceless_string("struct")
        yield LPAREN
        field_names_types = yield (
            p.seq(field, ty)
            .combine(lambda field, ty: (field, ty))
            .sep_by(COMMA)
        )
        yield RPAREN
        return Struct.from_tuples(field_names_types)

    non_pg_array_type = primitive | decimal | list_array | map | struct
    ty = pg_array | non_pg_array_type
    return ty.parse(text)
Ejemplo n.º 8
0
 def list_array():
     yield spaceless_string("list")
     value_type = yield angle_type
     return Array(value_type)
Ejemplo n.º 9
0
 def array():
     yield dt.spaceless_string("array")
     value_type = yield paren_type
     return dt.Array(value_type, nullable=False)
Ejemplo n.º 10
0
 def decimal():
     yield dt.spaceless_string("decimal", "numeric")
     precision, scale = yield dt.LPAREN.then(
         p.seq(dt.PRECISION.skip(dt.COMMA), dt.SCALE)).skip(dt.RPAREN)
     return dt.Decimal(precision, scale, nullable=False)
Ejemplo n.º 11
0
 def fixed_string():
     yield dt.spaceless_string("fixedstring")
     yield dt.LPAREN
     yield dt.NUMBER
     yield dt.RPAREN
     return dt.String(nullable=False)
Ejemplo n.º 12
0
 def nullable():
     yield dt.spaceless_string("nullable")
     yield dt.LPAREN
     parsed_ty = yield ty
     yield dt.RPAREN
     return parsed_ty(nullable=True)
Ejemplo n.º 13
0
 def datetime():
     yield dt.spaceless_string("datetime64", "datetime")
     timezone = yield parened_string.optional()
     return dt.Timestamp(timezone=timezone, nullable=False)
Ejemplo n.º 14
0
def parse(text: str) -> DataType:
    @p.generate
    def datetime():
        yield dt.spaceless_string("datetime64", "datetime")
        timezone = yield parened_string.optional()
        return dt.Timestamp(timezone=timezone, nullable=False)

    primitive = (
        datetime
        | dt.spaceless_string("null", "nothing").result(dt.null)
        | dt.spaceless_string("bigint", "int64").result(
            dt.Int64(nullable=False))
        | dt.spaceless_string("double", "float64").result(
            dt.Float64(nullable=False))
        | dt.spaceless_string("float32", "float").result(
            dt.Float32(nullable=False))
        | dt.spaceless_string("smallint", "int16", "int2").result(
            dt.Int16(nullable=False))
        | dt.spaceless_string("date32", "date").result(dt.Date(nullable=False))
        | dt.spaceless_string("time").result(dt.Time(nullable=False))
        | dt.spaceless_string("tinyint", "int8", "int1", "boolean",
                              "bool").result(dt.Int8(nullable=False))
        | dt.spaceless_string("integer", "int32", "int4", "int").result(
            dt.Int32(nullable=False))
        | dt.spaceless_string("uint64").result(dt.UInt64(nullable=False))
        | dt.spaceless_string("uint32").result(dt.UInt32(nullable=False))
        | dt.spaceless_string("uint16").result(dt.UInt16(nullable=False))
        | dt.spaceless_string("uint8").result(dt.UInt8(nullable=False))
        | dt.spaceless_string("uuid").result(dt.UUID(nullable=False))
        | dt.spaceless_string(
            "longtext",
            "mediumtext",
            "tinytext",
            "text",
            "longblob",
            "mediumblob",
            "tinyblob",
            "blob",
            "varchar",
            "char",
            "string",
        ).result(dt.String(nullable=False)))

    @p.generate
    def parened_string():
        yield dt.LPAREN
        s = yield dt.RAW_STRING
        yield dt.RPAREN
        return s

    @p.generate
    def nullable():
        yield dt.spaceless_string("nullable")
        yield dt.LPAREN
        parsed_ty = yield ty
        yield dt.RPAREN
        return parsed_ty(nullable=True)

    @p.generate
    def fixed_string():
        yield dt.spaceless_string("fixedstring")
        yield dt.LPAREN
        yield dt.NUMBER
        yield dt.RPAREN
        return dt.String(nullable=False)

    @p.generate
    def decimal():
        yield dt.spaceless_string("decimal", "numeric")
        precision, scale = yield dt.LPAREN.then(
            p.seq(dt.PRECISION.skip(dt.COMMA), dt.SCALE)).skip(dt.RPAREN)
        return dt.Decimal(precision, scale, nullable=False)

    @p.generate
    def paren_type():
        yield dt.LPAREN
        value_type = yield ty
        yield dt.RPAREN
        return value_type

    @p.generate
    def array():
        yield dt.spaceless_string("array")
        value_type = yield paren_type
        return dt.Array(value_type, nullable=False)

    @p.generate
    def map():
        yield dt.spaceless_string("map")
        yield dt.LPAREN
        key_type = yield ty
        yield dt.COMMA
        value_type = yield ty
        yield dt.RPAREN
        return dt.Map(key_type, value_type, nullable=False)

    at_least_one_space = p.regex(r"\s+")

    @p.generate
    def nested():
        yield dt.spaceless_string("nested")
        yield dt.LPAREN

        field_names_types = yield (p.seq(
            dt.SPACES.then(dt.FIELD.skip(at_least_one_space)),
            ty).combine(lambda field, ty:
                        (field, dt.Array(ty, nullable=False))).sep_by(
                            dt.COMMA))
        yield dt.RPAREN
        return dt.Struct.from_tuples(field_names_types, nullable=False)

    @p.generate
    def struct():
        yield dt.spaceless_string("tuple")
        yield dt.LPAREN
        field_names_types = yield (p.seq(
            dt.SPACES.then(dt.FIELD.skip(at_least_one_space).optional()),
            ty,
        ).combine(lambda field, ty: (field, ty)).sep_by(dt.COMMA))
        yield dt.RPAREN
        return dt.Struct.from_tuples(
            [(field_name if field_name is not None else f"f{i:d}", typ)
             for i, (field_name, typ) in enumerate(field_names_types)],
            nullable=False,
        )

    ty = (nullable
          | nested
          | primitive
          | fixed_string
          | decimal
          | array
          | map
          | struct)
    return ty.parse(text)