class DataSet(struct.Schema): """Testing schema. The actual fields are irrelevant. """ feature: struct.Field = struct.Field(kind.Integer()) label: struct.Field = struct.Field(kind.Float())
class Year(series.Univariate): """Extract the year from given date/time.""" value: series.Operable = property(operator.itemgetter(0)) kind: kindmod.Any = kindmod.Integer() def __new__(cls, value: series.Operable): kindmod.Date.ensure(series.Operable.ensure_is(value).kind) return super().__new__(cls, value)
def test_colliding(self, schema: typing.Type['struct.Schema']): """Test schema with colliding field names.""" with pytest.raises(error.Syntax): class Colliding(schema): """Schema with colliding field names.""" birthday = struct.Field(kind.Integer()) _ = Colliding class Override(schema): """Schema with overridden field kind.""" school = struct.Field(kind.String()) assert Override.school.kind == kind.String() assert schema.school.kind == kind.Integer()
class Demo(struct.Schema): """Demo schema representation.""" Label = struct.Field(kind.Integer()) Age = struct.Field(kind.Integer())
class Series(parsmod.Frame.Series[str, str]): """Series DSL parser producing SQL code.""" class Expression: """Expression generator/formatter.""" ASSOCIATIVE = re.compile( r"\s*(?:(\S*\()?\s*[^-+*/%\s]+\s*(?(1).*\))|TIMESTAMP *'.+'|DATE *'.+')\s*" ) def __init__(self, template: str, mapper: typing.Optional[typing.Callable[ ..., typing.Sequence]] = None): self._template: str = template self._mapper: typing.Optional[typing.Callable[ ..., typing.Sequence]] = mapper def __call__(self, *args: typing.Any) -> str: """Actual expression generator. Args: *args: Expression arguments. Returns: Generated expression value. """ def clean(arg: str) -> str: """Add parentheses if necessary. Args: arg: Argument to be cleaned. Returns: Clean argument. """ if not self.ASSOCIATIVE.fullmatch(arg): arg = f'({arg})' return arg if self._mapper: args = self._mapper(*args) args = [clean(a) for a in args] return self._template.format(*args) KIND: typing.Mapping[kindmod.Any, str] = { kindmod.Boolean(): 'BOOLEAN', kindmod.Integer(): 'BIGINT', kindmod.Float(): 'DOUBLE', kindmod.Decimal(): 'DECIMAL', kindmod.String(): 'VARCHAR', kindmod.Date(): 'DATE', kindmod.Timestamp(): 'TIMESTAMP', } EXPRESSION: typing.Mapping[ typing.Type[series.Expression], typing.Callable[..., str]] = { function.Addition: Expression('{} + {}'), function.Subtraction: Expression('{} - {}'), function.Multiplication: Expression('{} * {}'), function.Division: Expression('{} / {}'), function.Modulus: Expression('{} % {}'), function.LessThan: Expression('{} < {}'), function.LessEqual: Expression('{} <= {}'), function.GreaterThan: Expression('{} > {}'), function.GreaterEqual: Expression('{} >= {}'), function.Equal: Expression('{} = {}'), function.NotEqual: Expression('{} != {}'), function.IsNull: Expression('{} IS NULL'), function.NotNull: Expression('{} IS NOT NULL'), function.And: Expression('{} AND {}'), function.Or: Expression('{} OR {}'), function.Not: Expression('NOT {}'), function.Cast: Expression('cast({} AS {})', lambda _, k: [_, Frame.Series.KIND[k]]), function.Avg: Expression('avg({})'), function.Count: Expression('count({})', lambda c=None: [c if c is not None else '*']), function.Min: Expression('min({})'), function.Max: Expression('max({})'), function.Sum: Expression('sum({})'), function.Year: Expression('year({})'), function.Abs: Expression('abs({})'), function.Ceil: Expression('ceil({})'), function.Floor: Expression('floor({})'), } DATE = '%Y-%m-%d' TIMESTAMP = '%Y-%m-%d %H:%M:%S' def __init__( self, sources: typing.Mapping[frame.Source, str], columns: typing.Optional[typing.Mapping[series.Column, str]] = None, ): super().__init__(sources, columns or dict()) def resolve_column(self, column: series.Column) -> str: """Resolver falling back to a field name in case of no explicit mapping. Args: column: Column to be resolved. Returns: Resolved column. """ try: return super().resolve_column(column) except error.Mapping as err: if isinstance(column, series.Element): return column.name raise err def generate_element(self, source: str, element: str) -> str: # pylint: disable=no-self-use """Generate a field code. Args: source: Field source value. element: Field symbol. Returns: Field representation. """ return f'{source}.{element}' def generate_alias(self, column: str, alias: str) -> str: # pylint: disable=no-self-use """Generate column alias code. Args: column: Column value. alias: Alias to be used for given column. Returns: Aliased column. """ return f'{column} AS {alias}' def generate_literal(self, value: typing.Any, kind: kindmod.Any) -> str: """Generate a literal value. Args: value: Literal value instance. kind: Literal value type. Returns: Literal. """ if isinstance(kind, kindmod.String): return f"'{value}'" if isinstance(kind, kindmod.Numeric): return f'{value}' if isinstance(kind, kindmod.Timestamp): return f"TIMESTAMP '{value.strftime(self.TIMESTAMP)}'" if isinstance(kind, kindmod.Date): return f"DATE '{value.strftime(self.DATE)}'" if isinstance(kind, kindmod.Array): return f"ARRAY[{', '.join(self.generate_literal(v, kind.element) for v in value)}]" raise error.Unsupported(f'Unsupported literal kind: {kind}') def generate_expression(self, expression: typing.Type[series.Expression], arguments: typing.Sequence[typing.Any]) -> str: """Expression of given arguments. Args: expression: Operator or function implementing the expression. arguments: Expression arguments. Returns: Expression. """ try: return self.EXPRESSION[expression](*arguments) except KeyError as err: raise error.Unsupported( f'Unsupported expression: {expression}') from err def generate_reference(self, name: str) -> str: # pylint: disable=no-self-use """Generate a source reference (alias) application. Args: name: Reference name (alias). Returns: Reference application. """ return name
class School(struct.Schema): """School table.""" sid = struct.Field(kind.Integer(), 'id') name = struct.Field(kind.String())
class Student(person): """Extended table.""" level = struct.Field(kind.Integer()) score = struct.Field(kind.Float()) school = struct.Field(kind.Integer())
class Child(Base): """Child schema - adding a field "last" plus overriding kind of the "fixme" field.""" last = struct.Field(kind.Integer()) fixme = struct.Field(kind.String(), name='new')
class Base(struct.Schema): """Base schema.""" first = struct.Field(kind.Integer()) fixme = struct.Field(kind.Float(), name='old')
def key() -> kindmod.Any: """Key fixture.""" return kindmod.Integer()
class Floor(series.Arithmetic, series.Univariate): """Value rounded down to nearest integer.""" kind: kindmod.Integer = kindmod.Integer()
class Ceil(series.Arithmetic, series.Univariate): """Value rounded up to nearest integer.""" kind: kindmod.Integer = kindmod.Integer()
class Count(series.Aggregate, series.Univariate): """Number of the input rows.""" kind: kindmod.Integer = kindmod.Integer()
def kind() -> typing.Type[kindmod.Any]: return lambda: kindmod.Struct( foo=kindmod.Integer(), bar=kindmod.String(), baz=kindmod.Boolean())
class Human(struct.Schema): """Human schema representation.""" name = struct.Field(kind.String()) age = struct.Field(kind.Integer())
class Colliding(schema): """Schema with colliding field names.""" birthday = struct.Field(kind.Integer())
def element() -> kindmod.Any: """Element fixture.""" return kindmod.Integer()