Esempio n. 1
0
class ShiftBase(Analytic):
    arg = rlz.column(rlz.any)

    offset = rlz.optional(rlz.one_of((rlz.integer, rlz.interval)))
    default = rlz.optional(rlz.any)

    output_dtype = rlz.dtype_like("arg")
Esempio n. 2
0
class Bucket(BucketLike):
    arg = rlz.column(rlz.any)
    buckets = rlz.tuple_of(rlz.scalar(rlz.any))
    closed = rlz.optional(rlz.isin({'left', 'right'}), default='left')
    close_extreme = rlz.optional(rlz.instance_of(bool), default=True)
    include_under = rlz.optional(rlz.instance_of(bool), default=False)
    include_over = rlz.optional(rlz.instance_of(bool), default=False)

    def __init__(self, buckets, include_under, include_over, **kwargs):
        if not len(buckets):
            raise ValueError('Must be at least one bucket edge')
        elif len(buckets) == 1:
            if not include_under or not include_over:
                raise ValueError('If one bucket edge provided, must have '
                                 'include_under=True and include_over=True')
        super().__init__(
            buckets=buckets,
            include_under=include_under,
            include_over=include_over,
            **kwargs,
        )

    @property
    def nbuckets(self):
        return len(self.buckets) - 1 + self.include_over + self.include_under
Esempio n. 3
0
def test_optional(validator, input):
    expected = validator(input)
    if isinstance(expected, ibis.Expr):
        assert rlz.optional(validator)(input).equals(expected)
    else:
        assert rlz.optional(validator)(input) == expected
    assert rlz.optional(validator)(None) is None
Esempio n. 4
0
class Clip(Value):
    arg = rlz.strict_numeric
    lower = rlz.optional(rlz.strict_numeric)
    upper = rlz.optional(rlz.strict_numeric)

    output_dtype = rlz.dtype_like("arg")
    output_shape = rlz.shape_like("args")
Esempio n. 5
0
class StringFind(Value):
    arg = rlz.string
    substr = rlz.string
    start = rlz.optional(rlz.integer)
    end = rlz.optional(rlz.integer)

    output_shape = rlz.shape_like("arg")
    output_dtype = dt.int64
Esempio n. 6
0
class AsOfJoin(Join):
    # TODO(kszucs): convert to proper predicate rules
    by = rlz.optional(lambda x, this: x, default=())
    tolerance = rlz.optional(rlz.interval)

    def __init__(self, left, right, by, predicates, **kwargs):
        by = _clean_join_predicates(left, right, util.promote_list(by))
        super().__init__(left=left,
                         right=right,
                         by=by,
                         predicates=predicates,
                         **kwargs)
Esempio n. 7
0
class LPad(Value):
    arg = rlz.string
    length = rlz.integer
    pad = rlz.optional(rlz.string)

    output_shape = rlz.shape_like("arg")
    output_dtype = dt.string
Esempio n. 8
0
class ArraySlice(Value):
    arg = rlz.array
    start = rlz.integer
    stop = rlz.optional(rlz.integer)

    output_dtype = rlz.dtype_like("arg")
    output_shape = rlz.shape_like("arg")
Esempio n. 9
0
class Join(TableNode):
    left = rlz.table
    right = rlz.table
    # TODO(kszucs): convert to proper predicate rules
    predicates = rlz.optional(lambda x, this: x, default=())

    def __init__(self, left, right, predicates, **kwargs):
        left, right, predicates = _make_distinct_join_predicates(
            left, right, util.promote_list(predicates))
        super().__init__(left=left,
                         right=right,
                         predicates=predicates,
                         **kwargs)

    @property
    def schema(self):
        # For joins retaining both table schemas, merge them together here
        return self.left.schema().append(self.right.schema())

    @util.deprecated(version="4.0", instead="")
    def has_schema(self):
        return not set(self.left.columns) & set(self.right.columns)

    def root_tables(self):
        if util.all_of([self.left.op(), self.right.op()], (Join, Selection)):
            # Unraveling is not possible
            return [self.left.op(), self.right.op()]
        else:
            return distinct_roots(self.left, self.right)
Esempio n. 10
0
class Substring(Value):
    arg = rlz.string
    start = rlz.integer
    length = rlz.optional(rlz.integer)

    output_dtype = dt.string
    output_shape = rlz.shape_like('arg')
Esempio n. 11
0
class StringToTimestamp(Value):
    arg = rlz.string
    format_str = rlz.string
    timezone = rlz.optional(rlz.string)

    output_shape = rlz.shape_like("arg")
    output_dtype = dt.Timestamp(timezone='UTC')
Esempio n. 12
0
class UnboundTable(PhysicalTable):
    schema = rlz.instance_of(sch.Schema)
    name = rlz.optional(rlz.instance_of(str), default=genname)

    def has_resolved_name(self):
        return True

    def resolve_name(self):
        return self.name
Esempio n. 13
0
class DropNa(TableNode, sch.HasSchema):
    """Drop null values in the table."""

    table = rlz.table
    how = rlz.isin({'any', 'all'})
    subset = rlz.optional(rlz.tuple_of(rlz.column_from("table")), default=())

    @property
    def schema(self):
        return self.table.schema()
Esempio n. 14
0
class TimestampFromYMDHMS(Value):
    year = rlz.integer
    month = rlz.integer
    day = rlz.integer
    hours = rlz.integer
    minutes = rlz.integer
    seconds = rlz.integer
    timezone = rlz.optional(rlz.string)

    output_dtype = dt.timestamp
    output_shape = rlz.shape_like("args")
Esempio n. 15
0
class Histogram(BucketLike):
    arg = rlz.numeric
    nbins = rlz.optional(rlz.instance_of(int))
    binwidth = rlz.optional(rlz.scalar(rlz.numeric))
    base = rlz.optional(rlz.scalar(rlz.numeric))
    closed = rlz.optional(rlz.isin({'left', 'right'}), default='left')
    aux_hash = rlz.optional(rlz.instance_of(str))

    def __init__(self, nbins, binwidth, **kwargs):
        if nbins is None:
            if binwidth is None:
                raise ValueError('Must indicate nbins or binwidth')
        elif binwidth is not None:
            raise ValueError('nbins and binwidth are mutually exclusive')
        super().__init__(nbins=nbins, binwidth=binwidth, **kwargs)

    @property
    def output_dtype(self):
        # always undefined cardinality (for now)
        return dt.category
Esempio n. 16
0
class AlchemyTable(ops.DatabaseTable):
    sqla_table = rlz.instance_of(object)
    name = rlz.optional(rlz.instance_of(str), default=None)
    schema = rlz.optional(rlz.instance_of(sch.Schema), default=None)

    def __init__(self, source, sqla_table, name, schema):
        if name is None:
            name = sqla_table.name
        if schema is None:
            schema = sch.infer(sqla_table, schema=schema)
        super().__init__(name=name,
                         schema=schema,
                         sqla_table=sqla_table,
                         source=source)

    # TODO(kszucs): remove this
    def __equals__(self, other: AlchemyTable) -> bool:
        # override the default implementation to not compare
        # sqla_table instances
        return (self.name == other.name and self.source == other.source
                and self.schema.equals(other.schema))
Esempio n. 17
0
class Round(Value):
    arg = rlz.numeric
    digits = rlz.optional(rlz.numeric)

    output_shape = rlz.shape_like("arg")

    @property
    def output_dtype(self):
        if isinstance(self.arg.type(), dt.Decimal):
            return self.arg.type()
        elif self.digits is None:
            return dt.int64
        else:
            return dt.double
Esempio n. 18
0
class CategoryLabel(Value):
    arg = rlz.category
    labels = rlz.tuple_of(rlz.instance_of(str))
    nulls = rlz.optional(rlz.instance_of(str))

    output_dtype = dt.string
    output_shape = rlz.shape_like("arg")

    def __init__(self, arg, labels, **kwargs):
        cardinality = arg.type().cardinality
        if len(labels) != cardinality:
            raise ValueError('Number of labels must match number of '
                             f'categories: {cardinality}')
        super().__init__(arg=arg, labels=labels, **kwargs)
Esempio n. 19
0
class ParseURL(Value):
    arg = rlz.string
    extract = rlz.isin({
        'PROTOCOL',
        'HOST',
        'PATH',
        'REF',
        'AUTHORITY',
        'FILE',
        'USERINFO',
        'QUERY',
    })
    key = rlz.optional(rlz.string)

    output_shape = rlz.shape_like("arg")
    output_dtype = dt.string
Esempio n. 20
0
class SortKey(Node):
    expr = rlz.column(rlz.any)
    ascending = rlz.optional(
        rlz.map_to({
            True: True,
            False: False,
            1: True,
            0: False,
        }, ),
        default=True,
    )

    output_type = ir.SortExpr

    def root_tables(self):
        return self.expr.op().root_tables()

    def resolve_name(self):
        return self.expr.get_name()
Esempio n. 21
0
class ScalarParameter(Value):
    _counter = itertools.count()

    dtype = rlz.datatype
    counter = rlz.optional(rlz.instance_of(int),
                           default=lambda: next(ScalarParameter._counter))

    output_shape = rlz.Shape.SCALAR
    output_dtype = property(attrgetter("dtype"))

    def resolve_name(self):
        return f'param_{self.counter:d}'

    def __hash__(self):
        return hash((self.dtype, self.counter))

    @property
    def inputs(self):
        return ()

    def root_tables(self):
        return []
Esempio n. 22
0
class Arbitrary(Filterable, Reduction):
    arg = rlz.column(rlz.any)
    how = rlz.optional(rlz.isin({'first', 'last', 'heavy'}))
    output_dtype = rlz.dtype_like('arg')
Esempio n. 23
0
class Filterable(Value):
    where = rlz.optional(rlz.boolean)
Esempio n. 24
0
class StringSQLLike(FuzzySearch):
    arg = rlz.string
    pattern = rlz.string
    escape = rlz.optional(rlz.instance_of(str))
Esempio n. 25
0
class NullLiteral(Literal, Singleton):
    """Typeless NULL literal"""

    value = rlz.optional(type(None))
    dtype = rlz.optional(rlz.instance_of(dt.Null), default=dt.null)
Esempio n. 26
0
 class Op(Annotable):
     arg = rlz.optional(rlz.instance_of(list), default=default)
Esempio n. 27
0
class Selection(TableNode, sch.HasSchema):
    table = rlz.table
    selections = rlz.optional(
        rlz.tuple_of(
            rlz.one_of((
                rlz.table,
                rlz.column_from("table"),
                rlz.function_of("table"),
                rlz.any,
            ))),
        default=(),
    )
    predicates = rlz.optional(rlz.tuple_of(rlz.boolean), default=())
    sort_keys = rlz.optional(
        rlz.tuple_of(
            rlz.one_of((
                rlz.column_from("table"),
                rlz.function_of("table"),
                rlz.sort_key(from_="table"),
                rlz.pair(
                    rlz.one_of((
                        rlz.column_from("table"),
                        rlz.function_of("table"),
                        rlz.any,
                    )),
                    rlz.map_to({
                        True: True,
                        False: False,
                        "desc": False,
                        "descending": False,
                        "asc": True,
                        "ascending": True,
                        1: True,
                        0: False,
                    }),
                ),
            ))),
        default=(),
    )

    def __init__(self, table, selections, predicates, sort_keys, **kwargs):
        from ibis.expr.analysis import shares_all_roots, shares_some_roots

        if not shares_all_roots(selections + sort_keys, table):
            raise com.RelationError(
                "Selection expressions don't fully originate from "
                "dependencies of the table expression.")

        for predicate in predicates:
            if not shares_some_roots(predicate, table):
                raise com.RelationError(
                    "Predicate doesn't share any roots with table")

        super().__init__(
            table=table,
            selections=selections,
            predicates=predicates,
            sort_keys=sort_keys,
            **kwargs,
        )

        # Validate no overlapping columns in schema
        assert self.schema

    @cached_property
    def _projection(self):
        return self.__class__(table=self.table, selections=self.selections)

    @cached_property
    def schema(self):
        # Resolve schema and initialize
        if not self.selections:
            return self.table.schema()

        types = []
        names = []

        for projection in self.selections:
            if isinstance(projection, ir.DestructColumn):
                # If this is a destruct, then we destructure
                # the result and assign to multiple columns
                struct_type = projection.type()
                for name in struct_type.names:
                    names.append(name)
                    types.append(struct_type[name])
            elif isinstance(projection, ir.Value):
                names.append(projection.get_name())
                types.append(projection.type())
            elif isinstance(projection, ir.Table):
                schema = projection.schema()
                names.extend(schema.names)
                types.extend(schema.types)

        return sch.Schema(names, types)

    def blocks(self):
        return bool(self.selections)

    @util.deprecated(instead="instantiate Selection directly", version="4.0.0")
    def substitute_table(self, table_expr):  # pragma: no cover
        return Selection(table_expr, self.selections)

    def root_tables(self):
        return [self]

    @util.deprecated(instead="", version="4.0.0")
    def can_add_filters(self, wrapped_expr, predicates):  # pragma: no cover
        pass

    @util.deprecated(instead="", version="4.0.0")
    def empty_or_equal(self, other) -> bool:  # pragma: no cover
        for field in "selections", "sort_keys", "predicates":
            selfs = getattr(self, field)
            others = getattr(other, field)
            valid = (not selfs or not others
                     or (a.equals(b) for a, b in zip(selfs, others)))
            if not valid:
                return False
        return True

    @util.deprecated(instead="", version="4.0.0")
    def compatible_with(self, other):  # pragma: no cover
        # self and other are equivalent except for predicates, selections, or
        # sort keys any of which is allowed to be empty. If both are not empty
        # then they must be equal
        if self.equals(other):
            return True

        if not isinstance(other, type(self)):
            return False

        return self.table.equals(other.table) and self.empty_or_equal(other)

    def aggregate(self, this, metrics, by=None, having=None):
        if len(self.selections) > 0:
            return Aggregation(this, metrics, by=by, having=having)
        else:
            helper = AggregateSelection(this, metrics, by, having)
            return helper.get_result()

    def sort_by(self, expr, sort_exprs):
        from ibis.expr.analysis import shares_all_roots

        resolved_keys = _maybe_convert_sort_keys([self.table, expr],
                                                 sort_exprs)
        if not self.blocks():
            if shares_all_roots(resolved_keys, self.table):
                return Selection(
                    self.table,
                    self.selections,
                    predicates=self.predicates,
                    sort_keys=self.sort_keys + tuple(resolved_keys),
                )

        return Selection(expr, [], sort_keys=resolved_keys)
Esempio n. 28
0
class Log(Logarithm):
    arg = rlz.strict_numeric
    base = rlz.optional(rlz.strict_numeric)
Esempio n. 29
0
class Aggregation(TableNode, sch.HasSchema):
    """
    metrics : per-group scalar aggregates
    by : group expressions
    having : post-aggregation predicate

    TODO: not putting this in the aggregate operation yet
    where : pre-aggregation predicate
    """

    table = rlz.table
    metrics = rlz.optional(
        rlz.tuple_of(
            rlz.one_of((
                rlz.function_of(
                    "table",
                    output_rule=rlz.one_of(
                        (rlz.reduction, rlz.scalar(rlz.any))),
                ),
                rlz.reduction,
                rlz.scalar(rlz.any),
                rlz.tuple_of(rlz.scalar(rlz.any)),
            )),
            flatten=True,
        ),
        default=(),
    )
    by = rlz.optional(
        rlz.tuple_of(
            rlz.one_of((
                rlz.function_of("table"),
                rlz.column_from("table"),
                rlz.column(rlz.any),
            ))),
        default=(),
    )
    having = rlz.optional(
        rlz.tuple_of(
            rlz.one_of((
                rlz.function_of("table", output_rule=rlz.scalar(rlz.boolean)),
                rlz.scalar(rlz.boolean),
            )), ),
        default=(),
    )
    predicates = rlz.optional(rlz.tuple_of(rlz.boolean), default=())
    sort_keys = rlz.optional(
        rlz.tuple_of(
            rlz.one_of((
                rlz.column_from("table"),
                rlz.function_of("table"),
                rlz.sort_key(from_="table"),
                rlz.pair(
                    rlz.one_of((
                        rlz.column_from("table"),
                        rlz.function_of("table"),
                        rlz.any,
                    )),
                    rlz.map_to({
                        True: True,
                        False: False,
                        "desc": False,
                        "descending": False,
                        "asc": True,
                        "ascending": True,
                        1: True,
                        0: False,
                    }),
                ),
            ))),
        default=(),
    )

    def __init__(self, table, metrics, by, having, predicates, sort_keys):
        from ibis.expr.analysis import shares_all_roots, shares_some_roots

        # All non-scalar refs originate from the input table
        if not shares_all_roots(metrics + by + having + sort_keys, table):
            raise com.RelationError(
                "Selection expressions don't fully originate from "
                "dependencies of the table expression.")

        # invariant due to Aggregation and AggregateSelection requiring a valid
        # Selection
        assert all(
            shares_some_roots(predicate, table) for predicate in predicates)

        if not by:
            sort_keys = tuple()

        super().__init__(
            table=table,
            metrics=metrics,
            by=by,
            having=having,
            predicates=predicates,
            sort_keys=sort_keys,
        )
        # Validate schema has no overlapping columns
        assert self.schema

    def blocks(self):
        return True

    @util.deprecated(instead="instantiate Aggregation directly",
                     version="4.0.0")
    def substitute_table(self, table_expr):  # pragma: no cover
        return Aggregation(table_expr,
                           self.metrics,
                           by=self.by,
                           having=self.having)

    @cached_property
    def schema(self):
        names = []
        types = []

        for e in self.by + self.metrics:
            if isinstance(e, ir.DestructValue):
                # If this is a destruct, then we destructure
                # the result and assign to multiple columns
                struct_type = e.type()
                for name in struct_type.names:
                    names.append(name)
                    types.append(struct_type[name])
            else:
                names.append(e.get_name())
                types.append(e.type())

        return sch.Schema(names, types)

    def sort_by(self, expr, sort_exprs):
        from ibis.expr.analysis import shares_all_roots

        resolved_keys = _maybe_convert_sort_keys([self.table, expr],
                                                 sort_exprs)
        if shares_all_roots(resolved_keys, self.table):
            return Aggregation(
                self.table,
                self.metrics,
                by=self.by,
                having=self.having,
                predicates=self.predicates,
                sort_keys=self.sort_keys + tuple(resolved_keys),
            )

        return Selection(expr, [], sort_keys=resolved_keys)
Esempio n. 30
0
 class Log(ops.Node):
     arg = rlz.double()
     base = rlz.optional(rlz.double())