Exemple #1
0
def _filter_selection(expr, predicates):
    # if any of the filter predicates have the parent expression among
    # their roots, then pushdown (at least of that predicate) is not
    # possible

    # It's not unusual for the filter to reference the projection
    # itself. If a predicate can be pushed down, in this case we must
    # rewrite replacing the table refs with the roots internal to the
    # projection we are referencing
    #
    # Assuming that the fields referenced by the filter predicate originate
    # below the projection, we need to rewrite the predicate referencing
    # the parent tables in the join being projected

    op = expr.op()
    if not op.blocks():
        # Potential fusion opportunity. The predicates may need to be
        # rewritten in terms of the child table. This prevents the broken
        # ref issue (described in more detail in #59)
        simplified_predicates = tuple(
            sub_for(predicate, [(
                expr,
                op.table)]) if not has_reduction(predicate) else predicate
            for predicate in predicates)

        if op.table._is_valid(simplified_predicates):
            result = ops.Selection(
                op.table,
                [],
                predicates=op.predicates + simplified_predicates,
                sort_keys=op.sort_keys,
            )
            return result.to_expr()

    can_pushdown = _can_pushdown(op, predicates)

    if can_pushdown:
        simplified_predicates = tuple(
            substitute_parents(x) for x in predicates)
        fused_predicates = op.predicates + simplified_predicates
        result = ops.Selection(
            op.table,
            selections=op.selections,
            predicates=fused_predicates,
            sort_keys=op.sort_keys,
        )
    else:
        result = ops.Selection(expr, selections=[], predicates=predicates)

    return result.to_expr()
Exemple #2
0
def apply_filter(expr, predicates):
    # This will attempt predicate pushdown in the cases where we can do it
    # easily and safely, to make both cleaner SQL and fewer referential errors
    # for users

    op = expr.op()

    if isinstance(op, ops.Selection):
        return _filter_selection(expr, predicates)
    elif isinstance(op, ops.Aggregation):
        # Potential fusion opportunity
        # GH1344: We can't sub in things with correlated subqueries
        simplified_predicates = [
            sub_for(predicate, [(expr, op.table)])
            if not has_reduction(predicate) else predicate
            for predicate in predicates
        ]

        if op.table._is_valid(simplified_predicates):
            result = ops.Aggregation(op.table,
                                     op.metrics,
                                     by=op.by,
                                     having=op.having,
                                     predicates=op.predicates +
                                     simplified_predicates,
                                     sort_keys=op.sort_keys)

            return ir.TableExpr(result)
    elif isinstance(op, ops.Join):
        expr = expr.materialize()

    result = ops.Selection(expr, [], predicates)
    return ir.TableExpr(result)
Exemple #3
0
    def _lift_Selection(self, expr, block=None):
        if block is None:
            block = self.block_projection

        op = expr.op()

        if block and op.blocks():
            # GH #549: dig no further
            return expr
        else:
            lifted_table, unch = self._lift_arg(op.table, block=True)

        lifted_selections, unch_sel = self._lift_arg(op.selections, block=True)
        unchanged = unch and unch_sel

        lifted_predicates, unch_sel = self._lift_arg(op.predicates, block=True)
        unchanged = unch and unch_sel

        lifted_sort_keys, unch_sel = self._lift_arg(op.sort_keys, block=True)
        unchanged = unch and unch_sel

        if not unchanged:
            lifted_projection = ops.Selection(lifted_table, lifted_selections,
                                              lifted_predicates,
                                              lifted_sort_keys)
            result = ir.TableExpr(lifted_projection)
        else:
            result = expr

        return result
Exemple #4
0
def apply_filter(expr, predicates):
    # This will attempt predicate pushdown in the cases where we can do it
    # easily and safely, to make both cleaner SQL and fewer referential errors
    # for users

    op = expr.op()

    if isinstance(op, ops.Selection):
        return _filter_selection(expr, predicates)
    elif isinstance(op, ops.Aggregation):
        # Potential fusion opportunity
        simplified_predicates = [
            sub_for(x, [(expr, op.table)]) for x in predicates
        ]

        if op.table._is_valid(simplified_predicates):
            result = ops.Aggregation(op.table,
                                     op.agg_exprs,
                                     by=op.by,
                                     having=op.having,
                                     predicates=op.predicates +
                                     simplified_predicates,
                                     sort_keys=op.sort_keys)

            return ir.TableExpr(result)
    elif isinstance(op, ops.Join):
        expr = expr.materialize()

    result = ops.Selection(expr, [], predicates)
    return ir.TableExpr(result)
Exemple #5
0
    def get_result(self):
        roots = self.parent_roots

        if len(roots) == 1 and isinstance(roots[0], ops.Selection):
            fused_op = self._check_fusion(roots[0])
            if fused_op is not None:
                return fused_op

        return ops.Selection(self.parent, self.clean_exprs)
Exemple #6
0
    def get_result(self):
        roots = self.parent_roots
        first_root = roots[0]

        if len(roots) == 1 and isinstance(first_root, ops.Selection):
            fused_op = self.try_fusion(first_root)
            if fused_op is not None:
                return fused_op

        return ops.Selection(self.parent, self.clean_exprs)
Exemple #7
0
    def _check_fusion(self, root):
        roots = root.table._root_tables()
        validator = ExprValidator([root.table])
        fused_exprs = []
        can_fuse = False

        resolved = _maybe_resolve_exprs(root.table, self.input_exprs)
        if not resolved:
            return None

        for val in resolved:
            # XXX
            lifted_val = substitute_parents(val)

            # a * projection
            if isinstance(
                    val, ir.TableExpr
            ) and (self.parent.op().compatible_with(val.op())
                   # gross we share the same table root. Better way to
                   # detect?
                   or len(roots) == 1 and val._root_tables()[0] is roots[0]):
                can_fuse = True

                have_root = False
                for y in root.selections:
                    # Don't add the * projection twice
                    if y.equals(root.table):
                        fused_exprs.append(root.table)
                        have_root = True
                        continue
                    fused_exprs.append(y)

                # This was a filter, so implicitly a select *
                if not have_root and len(root.selections) == 0:
                    fused_exprs = [root.table] + fused_exprs
            elif validator.validate(lifted_val):
                can_fuse = True
                fused_exprs.append(lifted_val)
            elif not validator.validate(val):
                can_fuse = False
                break
            else:
                fused_exprs.append(val)

        if can_fuse:
            return ops.Selection(
                root.table,
                fused_exprs,
                predicates=root.predicates,
                sort_keys=root.sort_keys,
            )
        else:
            return None
Exemple #8
0
def apply_filter(expr, predicates):
    # This will attempt predicate pushdown in the cases where we can do it
    # easily and safely, to make both cleaner SQL and fewer referential errors
    # for users

    op = expr.op()

    if isinstance(op, ops.Selection):
        return _filter_selection(expr, predicates)
    elif isinstance(op, ops.Aggregation):
        # Potential fusion opportunity
        # GH1344: We can't sub in things with correlated subqueries
        simplified_predicates = [
            # Originally this line tried substituting op.table in for expr, but
            # that is too aggressive in the presence of filters that occur
            # after aggregations.
            #
            # See https://github.com/ibis-project/ibis/pull/3341 for details
            sub_for(predicate, [(op.table, expr)])
            if not has_reduction(predicate)
            else predicate
            for predicate in predicates
        ]

        if op.table._is_valid(simplified_predicates):
            result = ops.Aggregation(
                op.table,
                op.metrics,
                by=op.by,
                having=op.having,
                predicates=op.predicates + simplified_predicates,
                sort_keys=op.sort_keys,
            )

            return ir.TableExpr(result)

    result = ops.Selection(expr, [], predicates)
    return ir.TableExpr(result)
Exemple #9
0
    def try_fusion(self, root):
        assert self.parent.op() == root

        root_table = root.table
        roots = root_table.op().root_tables()
        fused_exprs = []
        clean_exprs = self.clean_exprs

        if not isinstance(root_table.op(), ops.Join):
            try:
                resolved = [
                    root_table._ensure_expr(expr)
                    for expr in util.promote_list(self.input_exprs)
                ]
            except (AttributeError, IbisTypeError):
                resolved = clean_exprs
            else:
                # if any expressions aren't exactly equivalent then don't try
                # to fuse them
                if any(not res_root_root.equals(res_root)
                       for res_root_root, res_root in zip(
                           resolved, clean_exprs)):
                    return None
        else:
            # joins cannot be used to resolve expressions, but we still may be
            # able to fuse columns from a projection off of a join. In that
            # case, use the projection's input expressions as the columns with
            # which to attempt fusion
            resolved = clean_exprs

        root_selections = root.selections
        parent_op = self.parent.op()
        for val in resolved:
            # a * projection
            if isinstance(
                    val, ir.Table
            ) and (parent_op.equals(val.op())
                   # gross we share the same table root. Better way to
                   # detect?
                   or len(roots) == 1
                   and val.op().root_tables()[0] is roots[0]):
                have_root = False
                for root_sel in root_selections:
                    # Don't add the * projection twice
                    if root_sel.equals(root_table):
                        fused_exprs.append(root_table)
                        have_root = True
                        continue
                    fused_exprs.append(root_sel)

                # This was a filter, so implicitly a select *
                if not have_root and not root_selections:
                    fused_exprs = [root_table, *fused_exprs]
            elif shares_all_roots(val, root_table):
                fused_exprs.append(val)
            else:
                return None

        return ops.Selection(
            root_table,
            fused_exprs,
            predicates=root.predicates,
            sort_keys=root.sort_keys,
        )
Exemple #10
0
    def try_fusion(self, root):
        root_table = root.table
        roots = root_table._root_tables()
        validator = ExprValidator([root_table])
        fused_exprs = []
        can_fuse = False

        if not isinstance(root_table.op(), ops.Join):
            resolved = _maybe_resolve_exprs(root_table, self.input_exprs)
        else:
            # joins cannot be used to resolve expressions, but we still may be
            # able to fuse columns from a projection off of a join. In that
            # case, use the projection's input expressions as the columns with
            # which to attempt fusion
            resolved = self.clean_exprs

        if not resolved:
            return None

        root_selections = root.selections
        parent_op = self.parent.op()
        for val in resolved:
            # XXX
            lifted_val = substitute_parents(val)

            # a * projection
            if isinstance(
                    val, ir.TableExpr
            ) and (parent_op.compatible_with(val.op())
                   # gross we share the same table root. Better way to
                   # detect?
                   or len(roots) == 1 and val._root_tables()[0] is roots[0]):
                can_fuse = True
                have_root = False
                for root_sel in root_selections:
                    # Don't add the * projection twice
                    if root_sel.equals(root_table):
                        fused_exprs.append(root_table)
                        have_root = True
                        continue
                    fused_exprs.append(root_sel)

                # This was a filter, so implicitly a select *
                if not have_root and not root_selections:
                    fused_exprs = [root_table] + fused_exprs
            elif validator.validate(lifted_val):
                can_fuse = True
                fused_exprs.append(lifted_val)
            elif not validator.validate(val):
                can_fuse = False
                break
            else:
                fused_exprs.append(val)

        if can_fuse:
            return ops.Selection(
                root_table,
                fused_exprs,
                predicates=root.predicates,
                sort_keys=root.sort_keys,
            )
        return None