Beispiel #1
0
    def _windowize(x, w):
        if not isinstance(x.op(), ops.WindowOp):
            walked = _walk(x, w)
        else:
            window_arg, window_w = x.op().args
            walked_child = _walk(window_arg, w)

            if walked_child is not window_arg:
                walked = x._factory(ops.WindowOp(walked_child, window_w),
                                    name=x._name)
            else:
                walked = x

        op = walked.op()
        if isinstance(op, ops.AnalyticOp) or getattr(op, '_reduction', False):
            if w is None:
                w = window()
            return walked.over(w)
        elif isinstance(op, ops.WindowOp):
            if w is not None:
                return walked.over(w)
            else:
                return walked
        else:
            return walked
Beispiel #2
0
    def _windowize(x, w):
        if not isinstance(x.op(), ops.Window):
            walked = _walk(x, w)
        else:
            window_arg, window_w = x.op().args
            walked_child = _walk(window_arg, w)

            if walked_child is not window_arg:
                op = ops.Window(walked_child, window_w)
                walked = op.to_expr().name(x.get_name())
            else:
                walked = x

        op = walked.op()
        if isinstance(op, (ops.Analytic, ops.Reduction)):
            if w is None:
                w = window()
            return walked.over(w)
        elif isinstance(op, ops.Window):
            if w is not None:
                return walked.over(w.combine(op.window))
            else:
                return walked
        else:
            return walked
Beispiel #3
0
    def _windowize(x, w):
        if not isinstance(x.op(), ops.WindowOp):
            walked = _walk(x, w)
        else:
            window_arg, window_w = x.op().args
            walked_child = _walk(window_arg, w)

            if walked_child is not window_arg:
                walked = x._factory(ops.WindowOp(walked_child, window_w),
                                    name=x._name)
            else:
                walked = x

        op = walked.op()
        if (isinstance(op, ops.AnalyticOp) or
                getattr(op, '_reduction', False)):
            if w is None:
                w = window()
            return _check_window(walked.over(w))
        elif isinstance(op, ops.WindowOp):
            if w is not None:
                return _check_window(walked.over(w))
            else:
                return _check_window(walked)
        else:
            return walked
Beispiel #4
0
def test_analytic_udf_destruct(backend, alltypes, udf):
    w = window(preceding=None, following=None, group_by='year')

    result = alltypes.mutate(
        udf(alltypes['double_col'],
            alltypes['int_col']).over(w).destructure()).execute()

    expected = alltypes.mutate(
        demean=alltypes['double_col'] - alltypes['double_col'].mean().over(w),
        demean_weight=alltypes['int_col'] - alltypes['int_col'].mean().over(w),
    ).execute()
    backend.assert_frame_equal(result, expected)
Beispiel #5
0
    def _get_window(self):
        if self._window is None:
            groups = self.by
            sorts = self._order_by
            preceding, following = None, None
        else:
            w = self._window
            groups = w.group_by + self.by
            sorts = w.order_by + self._order_by
            preceding, following = w.preceding, w.following

        sorts = [ops.to_sort_key(self.table, k) for k in sorts]

        return _window.window(preceding=preceding, following=following,
                              group_by=groups, order_by=sorts)
Beispiel #6
0
    def _get_window(self):
        if self._window is None:
            groups = self.by
            sorts = self._order_by
            preceding, following = None, None
        else:
            w = self._window
            groups = w.group_by + self.by
            sorts = w.order_by + self._order_by
            preceding, following = w.preceding, w.following

        sorts = [ops.to_sort_key(self.table, k) for k in sorts]

        return _window.window(preceding=preceding, following=following,
                              group_by=groups, order_by=sorts)
Beispiel #7
0
def test_analytic_udf_destruct_no_groupby(backend, alltypes):
    w = window(preceding=None, following=None)

    demean_struct_udf = create_demean_struct_udf(
        result_formatter=lambda v1, v2: (v1, v2))
    result = alltypes.mutate(
        demean_struct_udf(
            alltypes['double_col'],
            alltypes['int_col']).over(w).destructure()).execute()

    expected = alltypes.mutate(
        demean=alltypes['double_col'] - alltypes['double_col'].mean().over(w),
        demean_weight=alltypes['int_col'] - alltypes['int_col'].mean().over(w),
    ).execute()

    backend.assert_frame_equal(result, expected)
Beispiel #8
0
def test_reduction_udf_destruct_window(backend, alltypes):
    win = window(
        preceding=ibis.interval(hours=2),
        following=0,
        group_by='year',
        order_by='timestamp_col',
    )

    result = alltypes.mutate(
        mean_struct(alltypes['double_col'],
                    alltypes['int_col']).over(win).destructure()).execute()

    expected = alltypes.mutate(
        mean=alltypes['double_col'].mean().over(win),
        mean_weight=alltypes['int_col'].mean().over(win),
    ).execute()

    backend.assert_frame_equal(result, expected)
Beispiel #9
0
    def _get_window(self):
        if self._window is None:
            groups = self.by
            sorts = self._order_by
            preceding, following = None, None
        else:
            w = self._window
            groups = w.group_by + self.by
            sorts = w.order_by + self._order_by
            preceding, following = w.preceding, w.following

        return _window.window(
            preceding=preceding,
            following=following,
            group_by=list(
                map(self.table._ensure_expr, util.promote_list(groups))),
            order_by=list(
                map(self.table._ensure_expr, util.promote_list(sorts))),
        )
Beispiel #10
0
    def _get_window(self):
        if self._window is None:
            groups = self.by
            sorts = self._order_by
            preceding, following = None, None
        else:
            w = self._window
            groups = w.group_by + self.by
            sorts = w.order_by + self._order_by
            preceding, following = w.preceding, w.following

        sorts = [ops.sortkeys._to_sort_key(k, table=self.table) for k in sorts]

        groups = _resolve_exprs(self.table, groups)

        return _window.window(
            preceding=preceding,
            following=following,
            group_by=groups,
            order_by=sorts,
        )
Beispiel #11
0
def test_analytic_udf_destruct_overwrite(backend, alltypes):
    w = window(preceding=None, following=None, group_by='year')

    result = alltypes.mutate(
        overwrite_struct_analytic(
            alltypes['double_col'],
            alltypes['int_col']).over(w).destructure()).execute()

    expected = alltypes.mutate(
        double_col=alltypes['double_col'] -
        alltypes['double_col'].mean().over(w),
        demean_weight=alltypes['int_col'] - alltypes['int_col'].mean().over(w),
    ).execute()

    # TODO issue #2649
    # Due to a known limitation with how we treat DestructColumn
    # in assignments, the ordering of op.selections may not exactly
    # correspond with the column ordering we want (i.e. all new columns
    # should appear at the end, but currently they are materialized
    # directly after those overwritten columns).
    backend.assert_frame_equal(result, expected, check_like=True)
def test_reduction_udf_destruct_window(udf_backend, udf_alltypes):
    win = window(
        preceding=ibis.interval(hours=2),
        following=0,
        group_by='year',
        order_by='timestamp_col',
    )
    mean_struct_udf = create_mean_struct_udf(
        result_formatter=lambda v1, v2: (v1, v2))

    result = udf_alltypes.mutate(
        mean_struct_udf(
            udf_alltypes['double_col'],
            udf_alltypes['int_col']).over(win).destructure()).execute()

    expected = udf_alltypes.mutate(
        mean=udf_alltypes['double_col'].mean().over(win),
        mean_weight=udf_alltypes['int_col'].mean().over(win),
    ).execute()

    udf_backend.assert_frame_equal(result, expected)