Exemple #1
0
def apply_filter(expr, predicates):
    # This will attempt predicate pushdown in the cases where we can do it
    # easily and safely, to make both cleaner SQL and fewer referential errors
    # for users

    op = expr.op()

    if isinstance(op, ops.Selection):
        return _filter_selection(expr, predicates)
    elif isinstance(op, ops.Aggregation):
        # Potential fusion opportunity
        simplified_predicates = [
            sub_for(x, [(expr, op.table)]) for x in predicates
        ]

        if op.table._is_valid(simplified_predicates):
            result = ops.Aggregation(op.table,
                                     op.agg_exprs,
                                     by=op.by,
                                     having=op.having,
                                     predicates=op.predicates +
                                     simplified_predicates,
                                     sort_keys=op.sort_keys)

            return ir.TableExpr(result)
    elif isinstance(op, ops.Join):
        expr = expr.materialize()

    result = ops.Selection(expr, [], predicates)
    return ir.TableExpr(result)
Exemple #2
0
def apply_filter(expr, predicates):
    # This will attempt predicate pushdown in the cases where we can do it
    # easily and safely, to make both cleaner SQL and fewer referential errors
    # for users

    op = expr.op()

    if isinstance(op, ops.Selection):
        return _filter_selection(expr, predicates)
    elif isinstance(op, ops.Aggregation):
        # Potential fusion opportunity
        # GH1344: We can't sub in things with correlated subqueries
        simplified_predicates = [
            sub_for(predicate, [(expr, op.table)])
            if not has_reduction(predicate) else predicate
            for predicate in predicates
        ]

        if op.table._is_valid(simplified_predicates):
            result = ops.Aggregation(op.table,
                                     op.metrics,
                                     by=op.by,
                                     having=op.having,
                                     predicates=op.predicates +
                                     simplified_predicates,
                                     sort_keys=op.sort_keys)

            return ir.TableExpr(result)
    elif isinstance(op, ops.Join):
        expr = expr.materialize()

    result = ops.Selection(expr, [], predicates)
    return ir.TableExpr(result)
Exemple #3
0
    def _lift_Selection(self, expr, block=None):
        if block is None:
            block = self.block_projection

        op = expr.op()

        if block and op.blocks():
            # GH #549: dig no further
            return expr
        else:
            lifted_table, unch = self._lift_arg(op.table, block=True)

        lifted_selections, unch_sel = self._lift_arg(op.selections, block=True)
        unchanged = unch and unch_sel

        lifted_predicates, unch_sel = self._lift_arg(op.predicates, block=True)
        unchanged = unch and unch_sel

        lifted_sort_keys, unch_sel = self._lift_arg(op.sort_keys, block=True)
        unchanged = unch and unch_sel

        if not unchanged:
            lifted_projection = ops.Selection(lifted_table, lifted_selections,
                                              lifted_predicates,
                                              lifted_sort_keys)
            result = ir.TableExpr(lifted_projection)
        else:
            result = expr

        return result
Exemple #4
0
    def _lift_Aggregation(self, expr, block=None):
        if block is None:
            block = self.block_projection

        op = expr.op()

        # as exposed in #544, do not lift the table inside (which may be
        # filtered or otherwise altered in some way) if blocking

        if block:
            lifted_table = op.table
        else:
            lifted_table = self.lift(op.table, block=True)

        unch = lifted_table is op.table

        lifted_aggs, unch1 = self._lift_arg(op.metrics, block=True)
        lifted_by, unch2 = self._lift_arg(op.by, block=True)
        lifted_having, unch3 = self._lift_arg(op.having, block=True)

        unchanged = unch and unch1 and unch2 and unch3

        if not unchanged:
            lifted_op = ops.Aggregation(lifted_table,
                                        lifted_aggs,
                                        by=lifted_by,
                                        having=lifted_having)
            result = ir.TableExpr(lifted_op)
        else:
            result = expr

        return result
    def _inject_table(self, name, schema):
        if name in self.meta.tables:
            table = self.meta.tables[name]
        else:
            table = alch.table_from_schema(name, self.meta, schema)

        node = alch.AlchemyTable(table, self)
        return ir.TableExpr(node)
Exemple #6
0
def _filter_selection(expr, predicates):
    # if any of the filter predicates have the parent expression among
    # their roots, then pushdown (at least of that predicate) is not
    # possible

    # It's not unusual for the filter to reference the projection
    # itself. If a predicate can be pushed down, in this case we must
    # rewrite replacing the table refs with the roots internal to the
    # projection we are referencing
    #
    # Assuming that the fields referenced by the filter predicate originate
    # below the projection, we need to rewrite the predicate referencing
    # the parent tables in the join being projected

    op = expr.op()
    if not op.blocks():
        # Potential fusion opportunity. The predicates may need to be
        # rewritten in terms of the child table. This prevents the broken
        # ref issue (described in more detail in #59)
        simplified_predicates = [
            sub_for(x, [(expr, op.table)]) for x in predicates
        ]

        if op.table._is_valid(simplified_predicates):
            result = ops.Selection(op.table, [],
                                   predicates=op.predicates +
                                   simplified_predicates,
                                   sort_keys=op.sort_keys)
            return ir.TableExpr(result)

    can_pushdown = _can_pushdown(op, predicates)

    if can_pushdown:
        simplified_predicates = [substitute_parents(x) for x in predicates]
        fused_predicates = op.predicates + simplified_predicates
        result = ops.Selection(op.table,
                               proj_exprs=op.selections,
                               predicates=fused_predicates,
                               sort_keys=op.sort_keys)
    else:
        result = ops.Selection(expr, proj_exprs=[], predicates=predicates)

    return ir.TableExpr(result)
Exemple #7
0
def apply_filter(expr, predicates):
    # This will attempt predicate pushdown in the cases where we can do it
    # easily and safely, to make both cleaner SQL and fewer referential errors
    # for users

    op = expr.op()

    if isinstance(op, ops.Selection):
        return _filter_selection(expr, predicates)
    elif isinstance(op, ops.Aggregation):
        # Potential fusion opportunity
        # GH1344: We can't sub in things with correlated subqueries
        simplified_predicates = [
            # Originally this line tried substituting op.table in for expr, but
            # that is too aggressive in the presence of filters that occur
            # after aggregations.
            #
            # See https://github.com/ibis-project/ibis/pull/3341 for details
            sub_for(predicate, [(op.table, expr)])
            if not has_reduction(predicate)
            else predicate
            for predicate in predicates
        ]

        if op.table._is_valid(simplified_predicates):
            result = ops.Aggregation(
                op.table,
                op.metrics,
                by=op.by,
                having=op.having,
                predicates=op.predicates + simplified_predicates,
                sort_keys=op.sort_keys,
            )

            return ir.TableExpr(result)

    result = ops.Selection(expr, [], predicates)
    return ir.TableExpr(result)
Exemple #8
0
    def _wrap_new_table(self, qualified_name, persist):
        if persist:
            t = self.table(qualified_name)
        else:
            schema = self._get_table_schema(qualified_name)
            node = ImpalaTemporaryTable(qualified_name, schema, self)
            t = ir.TableExpr(node)

        # Compute number of rows in table for better default query planning
        cardinality = t.count().execute()
        set_card = ("alter table {0} set tblproperties('numRows'='{1}', "
                    "'STATS_GENERATED_VIA_STATS_TASK' = 'true')".format(
                        qualified_name, cardinality))
        self._execute(set_card)

        return t
Exemple #9
0
    def sql(self, query):
        """
        Convert a SQL query to an Ibis table expression

        Parameters
        ----------

        Returns
        -------
        table : TableExpr
        """
        # Get the schema by adding a LIMIT 0 on to the end of the query. If
        # there is already a limit in the query, we find and remove it
        limited_query = 'SELECT * FROM ({}) t0 LIMIT 0'.format(query)
        schema = self._get_schema_using_query(limited_query)

        node = ops.SQLQueryResult(query, schema, self)
        return ir.TableExpr(node)
Exemple #10
0
    def table(self, name, database=None):
        """
        Create a table expression that references a particular table in the
        database

        Parameters
        ----------
        name : string
        database : string, optional

        Returns
        -------
        table : TableExpr
        """
        qualified_name = self._fully_qualified_name(name, database)
        schema = self._get_table_schema(qualified_name)
        node = ops.DatabaseTable(qualified_name, schema, self)
        return ir.TableExpr(node)
Exemple #11
0
    def _lift_Projection(self, expr, block=None):
        if block is None:
            block = self.block_projection

        op = expr.op()

        if block:
            lifted_table = op.table
            unch = True
        else:
            lifted_table, unch = self._lift_arg(op.table, block=True)

        lifted_selections, unch_sel = self._lift_arg(op.selections, block=True)
        unchanged = unch and unch_sel
        if not unchanged:
            lifted_projection = ops.Projection(lifted_table, lifted_selections)
            result = ir.TableExpr(lifted_projection)
        else:
            result = expr

        return result
Exemple #12
0
    def _lift_Join(self, expr, block=None):
        op = expr.op()

        left_lifted = self.lift(op.left, block=block)
        right_lifted = self.lift(op.right, block=block)

        unchanged = (left_lifted is op.left and right_lifted is op.right)

        # Fix predicates
        lifted_preds = []
        for x in op.predicates:
            subbed = self._sub(x, block=True)
            if subbed is not x:
                unchanged = False
            lifted_preds.append(subbed)

        if not unchanged:
            lifted_join = type(op)(left_lifted, right_lifted, lifted_preds)
            result = ir.TableExpr(lifted_join)
        else:
            result = expr

        return result
Exemple #13
0
    def _lift_Aggregation(self, expr, block=None):
        if block is None:
            block = self.block_projection

        op = expr.op()
        lifted_table = self.lift(op.table, block=True)
        unch = lifted_table is op.table

        lifted_aggs, unch1 = self._lift_arg(op.agg_exprs, block=True)
        lifted_by, unch2 = self._lift_arg(op.by, block=True)
        lifted_having, unch3 = self._lift_arg(op.having, block=True)

        unchanged = unch and unch1 and unch2 and unch3

        if not unchanged:
            lifted_op = ops.Aggregation(lifted_table,
                                        lifted_aggs,
                                        by=lifted_by,
                                        having=lifted_having)
            result = ir.TableExpr(lifted_op)
        else:
            result = expr

        return result