Ejemplo n.º 1
0
class ShiftBase(Analytic):
    arg = rlz.column(rlz.any)

    offset = rlz.optional(rlz.one_of((rlz.integer, rlz.interval)))
    default = rlz.optional(rlz.any)

    output_dtype = rlz.dtype_like("arg")
Ejemplo n.º 2
0
class BaseConvert(Value):
    arg = rlz.one_of([rlz.integer, rlz.string])
    from_base = rlz.integer
    to_base = rlz.integer

    output_dtype = dt.string
    output_shape = rlz.shape_like("args")
Ejemplo n.º 3
0
class Literal(Value):
    value = rlz.one_of((
        rlz.instance_of((
            BaseGeometry,
            bytes,
            datetime.date,
            datetime.datetime,
            datetime.time,
            datetime.timedelta,
            enum.Enum,
            float,
            frozenset,
            int,
            frozendict,
            np.generic,
            np.ndarray,
            pd.Timedelta,
            pd.Timestamp,
            str,
            tuple,
            type(None),
            uuid.UUID,
            decimal.Decimal,
        )),
        rlz.is_computable_input,
    ))
    dtype = rlz.datatype

    output_shape = rlz.Shape.SCALAR
    output_dtype = property(attrgetter("dtype"))

    def root_tables(self):
        return []
Ejemplo n.º 4
0
class BaseConvert(ValueOp):

    input_type = [
        rules.one_of([integer, string]),
        integer(name='from_base'),
        integer(name='to_base')
    ]
    output_type = rules.shape_like_flatargs('string')
Ejemplo n.º 5
0
class MapValueForKey(Value):
    arg = rlz.mapping
    key = rlz.one_of([rlz.string, rlz.integer])

    output_shape = rlz.shape_like("args")

    @immutable_property
    def output_dtype(self):
        return self.arg.type().value_type
Ejemplo n.º 6
0
class Contains(Value):
    value = rlz.any
    options = rlz.one_of([
        rlz.value_list_of(rlz.any),
        rlz.set_,
        rlz.column(rlz.any),
        rlz.array_of(rlz.any),
    ])

    output_dtype = dt.boolean
    output_shape = rlz.shape_like("args")
Ejemplo n.º 7
0
class TopK(Node):
    arg = rlz.column(rlz.any)
    k = rlz.non_negative_integer
    by = rlz.one_of((rlz.function_of(rlz.base_table_of("arg")), rlz.any))
    output_type = ir.TopK

    def blocks(self):  # pragma: no cover
        return True

    def root_tables(self):  # pragma: no cover
        args = (arg for arg in self.flat_args() if isinstance(arg, ir.Expr))
        return distinct_roots(*args)
Ejemplo n.º 8
0
class MapValueOrDefaultForKey(Value):
    arg = rlz.mapping
    key = rlz.one_of([rlz.string, rlz.integer])
    default = rlz.any

    output_shape = rlz.shape_like("args")

    @property
    def output_dtype(self):
        value_type = self.arg.type().value_type
        default_type = self.default.type()

        if not dt.same_kind(default_type, value_type):
            raise com.IbisTypeError(
                "Default value\n{}\nof type {} cannot be cast to map's value "
                "type {}".format(self.default, default_type, value_type))

        return dt.highest_precedence((default_type, value_type))
Ejemplo n.º 9
0
    def __rsub__(
        self,
        other: datetime.date
        | DateValue
        | datetime.timedelta
        | pd.Timedelta
        | IntervalValue,
    ) -> IntervalValue | DateValue | NotImplemented:
        """Subtract a date or an interval from a date."""
        import ibis.expr.operations as ops
        import ibis.expr.rules as rlz

        other = rlz.one_of([rlz.date, rlz.interval], other)

        if isinstance(other, DateValue):
            op = ops.DateDiff
        else:
            op = ops.DateSub  # let the operation validate

        return _binop(op, other, self)
Ejemplo n.º 10
0
class FillNa(TableNode, sch.HasSchema):
    """Fill null values in the table."""

    table = rlz.table
    replacements = rlz.one_of((
        rlz.numeric,
        rlz.string,
        rlz.instance_of(collections.abc.Mapping),
    ))

    def __init__(self, table, replacements, **kwargs):
        super().__init__(
            table=table,
            replacements=(replacements if
                          not isinstance(replacements, collections.abc.Mapping)
                          else util.frozendict(replacements)),
            **kwargs,
        )

    @property
    def schema(self):
        return self.table.schema()
Ejemplo n.º 11
0
class Count(Filterable, Reduction):
    arg = rlz.one_of((rlz.column(rlz.any), rlz.table))
    output_dtype = dt.int64
Ejemplo n.º 12
0
class BetweenTime(Between):
    arg = rlz.one_of([rlz.timestamp, rlz.time])
    lower_bound = rlz.one_of([rlz.time, rlz.string])
    upper_bound = rlz.one_of([rlz.time, rlz.string])
Ejemplo n.º 13
0
class DayOfWeekNode(Node):
    arg = rlz.one_of([rlz.date, rlz.timestamp])
    output_type = ir.DayOfWeek
Ejemplo n.º 14
0
class DayOfWeekName(Unary):
    arg = rlz.one_of([rlz.date, rlz.timestamp])
    output_dtype = dt.string
Ejemplo n.º 15
0
class DayOfWeekIndex(Unary):
    arg = rlz.one_of([rlz.date, rlz.timestamp])
    output_dtype = dt.int16
Ejemplo n.º 16
0
class ExtractTimeField(ExtractTemporalField):
    arg = rlz.one_of([rlz.time, rlz.timestamp])
Ejemplo n.º 17
0
class Selection(TableNode, sch.HasSchema):
    table = rlz.table
    selections = rlz.optional(
        rlz.tuple_of(
            rlz.one_of((
                rlz.table,
                rlz.column_from("table"),
                rlz.function_of("table"),
                rlz.any,
            ))),
        default=(),
    )
    predicates = rlz.optional(rlz.tuple_of(rlz.boolean), default=())
    sort_keys = rlz.optional(
        rlz.tuple_of(
            rlz.one_of((
                rlz.column_from("table"),
                rlz.function_of("table"),
                rlz.sort_key(from_="table"),
                rlz.pair(
                    rlz.one_of((
                        rlz.column_from("table"),
                        rlz.function_of("table"),
                        rlz.any,
                    )),
                    rlz.map_to({
                        True: True,
                        False: False,
                        "desc": False,
                        "descending": False,
                        "asc": True,
                        "ascending": True,
                        1: True,
                        0: False,
                    }),
                ),
            ))),
        default=(),
    )

    def __init__(self, table, selections, predicates, sort_keys, **kwargs):
        from ibis.expr.analysis import shares_all_roots, shares_some_roots

        if not shares_all_roots(selections + sort_keys, table):
            raise com.RelationError(
                "Selection expressions don't fully originate from "
                "dependencies of the table expression.")

        for predicate in predicates:
            if not shares_some_roots(predicate, table):
                raise com.RelationError(
                    "Predicate doesn't share any roots with table")

        super().__init__(
            table=table,
            selections=selections,
            predicates=predicates,
            sort_keys=sort_keys,
            **kwargs,
        )

        # Validate no overlapping columns in schema
        assert self.schema

    @cached_property
    def _projection(self):
        return self.__class__(table=self.table, selections=self.selections)

    @cached_property
    def schema(self):
        # Resolve schema and initialize
        if not self.selections:
            return self.table.schema()

        types = []
        names = []

        for projection in self.selections:
            if isinstance(projection, ir.DestructColumn):
                # If this is a destruct, then we destructure
                # the result and assign to multiple columns
                struct_type = projection.type()
                for name in struct_type.names:
                    names.append(name)
                    types.append(struct_type[name])
            elif isinstance(projection, ir.Value):
                names.append(projection.get_name())
                types.append(projection.type())
            elif isinstance(projection, ir.Table):
                schema = projection.schema()
                names.extend(schema.names)
                types.extend(schema.types)

        return sch.Schema(names, types)

    def blocks(self):
        return bool(self.selections)

    @util.deprecated(instead="instantiate Selection directly", version="4.0.0")
    def substitute_table(self, table_expr):  # pragma: no cover
        return Selection(table_expr, self.selections)

    def root_tables(self):
        return [self]

    @util.deprecated(instead="", version="4.0.0")
    def can_add_filters(self, wrapped_expr, predicates):  # pragma: no cover
        pass

    @util.deprecated(instead="", version="4.0.0")
    def empty_or_equal(self, other) -> bool:  # pragma: no cover
        for field in "selections", "sort_keys", "predicates":
            selfs = getattr(self, field)
            others = getattr(other, field)
            valid = (not selfs or not others
                     or (a.equals(b) for a, b in zip(selfs, others)))
            if not valid:
                return False
        return True

    @util.deprecated(instead="", version="4.0.0")
    def compatible_with(self, other):  # pragma: no cover
        # self and other are equivalent except for predicates, selections, or
        # sort keys any of which is allowed to be empty. If both are not empty
        # then they must be equal
        if self.equals(other):
            return True

        if not isinstance(other, type(self)):
            return False

        return self.table.equals(other.table) and self.empty_or_equal(other)

    def aggregate(self, this, metrics, by=None, having=None):
        if len(self.selections) > 0:
            return Aggregation(this, metrics, by=by, having=having)
        else:
            helper = AggregateSelection(this, metrics, by, having)
            return helper.get_result()

    def sort_by(self, expr, sort_exprs):
        from ibis.expr.analysis import shares_all_roots

        resolved_keys = _maybe_convert_sort_keys([self.table, expr],
                                                 sort_exprs)
        if not self.blocks():
            if shares_all_roots(resolved_keys, self.table):
                return Selection(
                    self.table,
                    self.selections,
                    predicates=self.predicates,
                    sort_keys=self.sort_keys + tuple(resolved_keys),
                )

        return Selection(expr, [], sort_keys=resolved_keys)
Ejemplo n.º 18
0
class HashBytes(Value):
    arg = rlz.one_of({rlz.value(dt.string), rlz.value(dt.binary)})
    how = rlz.isin({'md5', 'sha1', 'sha256', 'sha512'})

    output_dtype = dt.binary
    output_shape = rlz.shape_like("arg")
Ejemplo n.º 19
0
class HashBytes(ValueOp):
    arg = Arg(rlz.one_of([rlz.value(dt.string), rlz.value(dt.binary)]))
    how = Arg(rlz.isin({"sha256", "farm_fingerprint"}))
    output_type = rlz.shape_like("arg", "binary")
Ejemplo n.º 20
0
class Aggregation(TableNode, sch.HasSchema):
    """
    metrics : per-group scalar aggregates
    by : group expressions
    having : post-aggregation predicate

    TODO: not putting this in the aggregate operation yet
    where : pre-aggregation predicate
    """

    table = rlz.table
    metrics = rlz.optional(
        rlz.tuple_of(
            rlz.one_of((
                rlz.function_of(
                    "table",
                    output_rule=rlz.one_of(
                        (rlz.reduction, rlz.scalar(rlz.any))),
                ),
                rlz.reduction,
                rlz.scalar(rlz.any),
                rlz.tuple_of(rlz.scalar(rlz.any)),
            )),
            flatten=True,
        ),
        default=(),
    )
    by = rlz.optional(
        rlz.tuple_of(
            rlz.one_of((
                rlz.function_of("table"),
                rlz.column_from("table"),
                rlz.column(rlz.any),
            ))),
        default=(),
    )
    having = rlz.optional(
        rlz.tuple_of(
            rlz.one_of((
                rlz.function_of("table", output_rule=rlz.scalar(rlz.boolean)),
                rlz.scalar(rlz.boolean),
            )), ),
        default=(),
    )
    predicates = rlz.optional(rlz.tuple_of(rlz.boolean), default=())
    sort_keys = rlz.optional(
        rlz.tuple_of(
            rlz.one_of((
                rlz.column_from("table"),
                rlz.function_of("table"),
                rlz.sort_key(from_="table"),
                rlz.pair(
                    rlz.one_of((
                        rlz.column_from("table"),
                        rlz.function_of("table"),
                        rlz.any,
                    )),
                    rlz.map_to({
                        True: True,
                        False: False,
                        "desc": False,
                        "descending": False,
                        "asc": True,
                        "ascending": True,
                        1: True,
                        0: False,
                    }),
                ),
            ))),
        default=(),
    )

    def __init__(self, table, metrics, by, having, predicates, sort_keys):
        from ibis.expr.analysis import shares_all_roots, shares_some_roots

        # All non-scalar refs originate from the input table
        if not shares_all_roots(metrics + by + having + sort_keys, table):
            raise com.RelationError(
                "Selection expressions don't fully originate from "
                "dependencies of the table expression.")

        # invariant due to Aggregation and AggregateSelection requiring a valid
        # Selection
        assert all(
            shares_some_roots(predicate, table) for predicate in predicates)

        if not by:
            sort_keys = tuple()

        super().__init__(
            table=table,
            metrics=metrics,
            by=by,
            having=having,
            predicates=predicates,
            sort_keys=sort_keys,
        )
        # Validate schema has no overlapping columns
        assert self.schema

    def blocks(self):
        return True

    @util.deprecated(instead="instantiate Aggregation directly",
                     version="4.0.0")
    def substitute_table(self, table_expr):  # pragma: no cover
        return Aggregation(table_expr,
                           self.metrics,
                           by=self.by,
                           having=self.having)

    @cached_property
    def schema(self):
        names = []
        types = []

        for e in self.by + self.metrics:
            if isinstance(e, ir.DestructValue):
                # If this is a destruct, then we destructure
                # the result and assign to multiple columns
                struct_type = e.type()
                for name in struct_type.names:
                    names.append(name)
                    types.append(struct_type[name])
            else:
                names.append(e.get_name())
                types.append(e.type())

        return sch.Schema(names, types)

    def sort_by(self, expr, sort_exprs):
        from ibis.expr.analysis import shares_all_roots

        resolved_keys = _maybe_convert_sort_keys([self.table, expr],
                                                 sort_exprs)
        if shares_all_roots(resolved_keys, self.table):
            return Aggregation(
                self.table,
                self.metrics,
                by=self.by,
                having=self.having,
                predicates=self.predicates,
                sort_keys=self.sort_keys + tuple(resolved_keys),
            )

        return Selection(expr, [], sort_keys=resolved_keys)
Ejemplo n.º 21
0
class Negate(Unary):
    arg = rlz.one_of((rlz.numeric, rlz.interval))

    output_dtype = rlz.dtype_like("arg")