Exemple #1
0
class Bucket(BucketLike):
    arg = rlz.column(rlz.any)
    buckets = rlz.tuple_of(rlz.scalar(rlz.any))
    closed = rlz.optional(rlz.isin({'left', 'right'}), default='left')
    close_extreme = rlz.optional(rlz.instance_of(bool), default=True)
    include_under = rlz.optional(rlz.instance_of(bool), default=False)
    include_over = rlz.optional(rlz.instance_of(bool), default=False)

    def __init__(self, buckets, include_under, include_over, **kwargs):
        if not len(buckets):
            raise ValueError('Must be at least one bucket edge')
        elif len(buckets) == 1:
            if not include_under or not include_over:
                raise ValueError('If one bucket edge provided, must have '
                                 'include_under=True and include_over=True')
        super().__init__(
            buckets=buckets,
            include_under=include_under,
            include_over=include_over,
            **kwargs,
        )

    @property
    def nbuckets(self):
        return len(self.buckets) - 1 + self.include_over + self.include_under
Exemple #2
0
    class MyOp(ops.ValueOp):

        input_type = [
            rules.scalar(value_type=dt.boolean,
                         optional=True,
                         default=False,
                         name='value')
        ]
        output_type = rules.type_of_arg(0)
Exemple #3
0
class Histogram(BucketLike):
    arg = rlz.numeric
    nbins = rlz.optional(rlz.instance_of(int))
    binwidth = rlz.optional(rlz.scalar(rlz.numeric))
    base = rlz.optional(rlz.scalar(rlz.numeric))
    closed = rlz.optional(rlz.isin({'left', 'right'}), default='left')
    aux_hash = rlz.optional(rlz.instance_of(str))

    def __init__(self, nbins, binwidth, **kwargs):
        if nbins is None:
            if binwidth is None:
                raise ValueError('Must indicate nbins or binwidth')
        elif binwidth is not None:
            raise ValueError('nbins and binwidth are mutually exclusive')
        super().__init__(nbins=nbins, binwidth=binwidth, **kwargs)

    @property
    def output_dtype(self):
        # always undefined cardinality (for now)
        return dt.category
Exemple #4
0
@pytest.mark.parametrize(
    ('units', 'value', 'expected'),
    [({'Y'}, ibis.interval(hours=1), IbisTypeError),
     ({'Y', 'M', 'D'}, ibis.interval(hours=1), IbisTypeError),
     ({'Q', 'W', 'D'}, ibis.interval(seconds=1), IbisTypeError)])
def test_invalid_interval(units, value, expected):
    with pytest.raises(expected):
        rlz.interval(value, units=units)


@pytest.mark.parametrize(
    ('validator', 'value', 'expected'),
    [(rlz.column(rlz.any), table.int_col, table.int_col),
     (rlz.column(rlz.string), table.string_col, table.string_col),
     (rlz.scalar(rlz.integer), ibis.literal(3), ibis.literal(3)),
     (rlz.scalar(rlz.any), 'caracal', ibis.literal('caracal'))])
def test_valid_column_or_scalar(validator, value, expected):
    result = validator(value)
    assert result.equals(expected)


@pytest.mark.parametrize(('validator', 'value', 'expected'), [
    (rlz.column(rlz.integer), table.double_col, IbisTypeError),
    (rlz.column(rlz.any), ibis.literal(3), IbisTypeError),
    (rlz.column(rlz.integer), ibis.literal(3), IbisTypeError),
])
def test_invalid_column_or_scalar(validator, value, expected):
    with pytest.raises(expected):
        validator(value)
Exemple #5
0
    class MyOp(ops.ValueOp):

        input_type = [rules.scalar(value_type=rules.number)]
        output_type = rules.double
Exemple #6
0
class NTile(Analytic):
    arg = rlz.column(rlz.any)
    buckets = rlz.scalar(rlz.integer)
    output_dtype = dt.int64
Exemple #7
0
class EndsWith(Value):
    arg = rlz.string
    end = rlz.scalar(rlz.string)
    output_dtype = dt.boolean
    output_shape = rlz.shape_like("arg")
Exemple #8
0
class StartsWith(Value):
    arg = rlz.string
    start = rlz.scalar(rlz.string)
    output_dtype = dt.boolean
    output_shape = rlz.shape_like("arg")
Exemple #9
0
        ({'Y'}, ibis.interval(hours=1), IbisTypeError),
        ({'Y', 'M', 'D'}, ibis.interval(hours=1), IbisTypeError),
        ({'Q', 'W', 'D'}, ibis.interval(seconds=1), IbisTypeError),
    ],
)
def test_invalid_interval(units, value, expected):
    with pytest.raises(expected):
        rlz.interval(value, units=units)


@pytest.mark.parametrize(
    ('validator', 'value', 'expected'),
    [
        (rlz.column(rlz.any), table.int_col, table.int_col),
        (rlz.column(rlz.string), table.string_col, table.string_col),
        (rlz.scalar(rlz.integer), ibis.literal(3), ibis.literal(3)),
        (rlz.scalar(rlz.any), 'caracal', ibis.literal('caracal')),
    ],
)
def test_valid_column_or_scalar(validator, value, expected):
    result = validator(value)
    assert result.equals(expected)


@pytest.mark.parametrize(
    ('validator', 'value', 'expected'),
    [
        (rlz.column(rlz.integer), table.double_col, IbisTypeError),
        (rlz.column(rlz.any), ibis.literal(3), IbisTypeError),
        (rlz.column(rlz.integer), ibis.literal(3), IbisTypeError),
    ],
Exemple #10
0
class Aggregation(TableNode, sch.HasSchema):
    """
    metrics : per-group scalar aggregates
    by : group expressions
    having : post-aggregation predicate

    TODO: not putting this in the aggregate operation yet
    where : pre-aggregation predicate
    """

    table = rlz.table
    metrics = rlz.optional(
        rlz.tuple_of(
            rlz.one_of((
                rlz.function_of(
                    "table",
                    output_rule=rlz.one_of(
                        (rlz.reduction, rlz.scalar(rlz.any))),
                ),
                rlz.reduction,
                rlz.scalar(rlz.any),
                rlz.tuple_of(rlz.scalar(rlz.any)),
            )),
            flatten=True,
        ),
        default=(),
    )
    by = rlz.optional(
        rlz.tuple_of(
            rlz.one_of((
                rlz.function_of("table"),
                rlz.column_from("table"),
                rlz.column(rlz.any),
            ))),
        default=(),
    )
    having = rlz.optional(
        rlz.tuple_of(
            rlz.one_of((
                rlz.function_of("table", output_rule=rlz.scalar(rlz.boolean)),
                rlz.scalar(rlz.boolean),
            )), ),
        default=(),
    )
    predicates = rlz.optional(rlz.tuple_of(rlz.boolean), default=())
    sort_keys = rlz.optional(
        rlz.tuple_of(
            rlz.one_of((
                rlz.column_from("table"),
                rlz.function_of("table"),
                rlz.sort_key(from_="table"),
                rlz.pair(
                    rlz.one_of((
                        rlz.column_from("table"),
                        rlz.function_of("table"),
                        rlz.any,
                    )),
                    rlz.map_to({
                        True: True,
                        False: False,
                        "desc": False,
                        "descending": False,
                        "asc": True,
                        "ascending": True,
                        1: True,
                        0: False,
                    }),
                ),
            ))),
        default=(),
    )

    def __init__(self, table, metrics, by, having, predicates, sort_keys):
        from ibis.expr.analysis import shares_all_roots, shares_some_roots

        # All non-scalar refs originate from the input table
        if not shares_all_roots(metrics + by + having + sort_keys, table):
            raise com.RelationError(
                "Selection expressions don't fully originate from "
                "dependencies of the table expression.")

        # invariant due to Aggregation and AggregateSelection requiring a valid
        # Selection
        assert all(
            shares_some_roots(predicate, table) for predicate in predicates)

        if not by:
            sort_keys = tuple()

        super().__init__(
            table=table,
            metrics=metrics,
            by=by,
            having=having,
            predicates=predicates,
            sort_keys=sort_keys,
        )
        # Validate schema has no overlapping columns
        assert self.schema

    def blocks(self):
        return True

    @util.deprecated(instead="instantiate Aggregation directly",
                     version="4.0.0")
    def substitute_table(self, table_expr):  # pragma: no cover
        return Aggregation(table_expr,
                           self.metrics,
                           by=self.by,
                           having=self.having)

    @cached_property
    def schema(self):
        names = []
        types = []

        for e in self.by + self.metrics:
            if isinstance(e, ir.DestructValue):
                # If this is a destruct, then we destructure
                # the result and assign to multiple columns
                struct_type = e.type()
                for name in struct_type.names:
                    names.append(name)
                    types.append(struct_type[name])
            else:
                names.append(e.get_name())
                types.append(e.type())

        return sch.Schema(names, types)

    def sort_by(self, expr, sort_exprs):
        from ibis.expr.analysis import shares_all_roots

        resolved_keys = _maybe_convert_sort_keys([self.table, expr],
                                                 sort_exprs)
        if shares_all_roots(resolved_keys, self.table):
            return Aggregation(
                self.table,
                self.metrics,
                by=self.by,
                having=self.having,
                predicates=self.predicates,
                sort_keys=self.sort_keys + tuple(resolved_keys),
            )

        return Selection(expr, [], sort_keys=resolved_keys)