コード例 #1
0
ファイル: rules.py プロジェクト: EvgenyKarataev/raco
    def fire(self, expr):
        # don't allow swap-created join to be swapped
        if isinstance(expr, algebra.Join) and not hasattr(expr, '__swapped__'):
            assert type(expr) is algebra.Join
            # An apply will undo the effect of the swap on the scheme,
            # so above operators won't be affected
            left_sch = expr.left.scheme()
            right_sch = expr.right.scheme()
            leftlen = len(left_sch)
            rightlen = len(right_sch)
            assert leftlen + rightlen == len(expr.scheme())
            emitters_left = [(left_sch.getName(i),
                              UnnamedAttributeRef(rightlen + i))
                             for i in range(leftlen)]
            emitters_right = [(right_sch.getName(i), UnnamedAttributeRef(i))
                              for i in range(rightlen)]
            emitters = emitters_left + emitters_right

            # reindex the expression
            index_map = dict([(oldpos, attr[1].position)
                              for (oldpos, attr) in enumerate(emitters)])

            expression.reindex_expr(expr.condition, index_map)

            newjoin = algebra.Join(expr.condition, expr.right, expr.left)
            newjoin.__swapped__ = True

            return algebra.Apply(emitters=emitters, input=newjoin)
        else:
            return expr
コード例 #2
0
ファイル: myrialang.py プロジェクト: TheYorkWei/raco
    def fire(self, op):
        if isinstance(op, algebra.GroupBy):
            child = op.input
            child_scheme = child.scheme()
            grp_list = [to_unnamed_recursive(g, child_scheme)
                        for g in op.grouping_list]
            agg_list = [to_unnamed_recursive(a, child_scheme)
                        for a in op.aggregate_list]
            agg = [accessed_columns(a) for a in agg_list]
            pos = [g.position for g in grp_list]
            accessed = sorted(set(itertools.chain(*(agg + [pos]))))
            if not accessed:
                # Bug #207: COUNTALL() does not access any columns. So if the
                # query is just a COUNT(*), we would generate an empty Apply.
                # If this happens, just keep the first column of the input.
                accessed = [0]
            if len(accessed) != len(child_scheme):
                emitters = [(None, UnnamedAttributeRef(i)) for i in accessed]
                new_apply = algebra.Apply(emitters, child)
                index_map = {a: i for (i, a) in enumerate(accessed)}
                for agg_expr in itertools.chain(grp_list, agg_list):
                    expression.reindex_expr(agg_expr, index_map)
                op.grouping_list = grp_list
                op.aggregate_list = agg_list
                op.input = new_apply
                return op
        elif isinstance(op, algebra.ProjectingJoin):
            l_scheme = op.left.scheme()
            r_scheme = op.right.scheme()
            in_scheme = l_scheme + r_scheme
            condition = to_unnamed_recursive(op.condition, in_scheme)
            column_list = [to_unnamed_recursive(c, in_scheme)
                           for c in op.output_columns]

            accessed = (accessed_columns(condition)
                        | set(c.position for c in op.output_columns))
            if len(accessed) == len(in_scheme):
                return op

            accessed = sorted(accessed)
            left = [a for a in accessed if a < len(l_scheme)]
            if len(left) < len(l_scheme):
                emits = [(None, UnnamedAttributeRef(a)) for a in left]
                apply = algebra.Apply(emits, op.left)
                op.left = apply
            right = [a - len(l_scheme) for a in accessed
                     if a >= len(l_scheme)]
            if len(right) < len(r_scheme):
                emits = [(None, UnnamedAttributeRef(a)) for a in right]
                apply = algebra.Apply(emits, op.right)
                op.right = apply
            index_map = {a: i for (i, a) in enumerate(accessed)}
            expression.reindex_expr(condition, index_map)
            [expression.reindex_expr(c, index_map) for c in column_list]
            op.condition = condition
            op.output_columns = column_list
            return op

        return op
コード例 #3
0
ファイル: myrialang.py プロジェクト: TheYorkWei/raco
    def fire(self, op):
        if not isinstance(op, algebra.Apply):
            return op

        child = op.input

        if isinstance(child, algebra.Apply):
            in_scheme = child.scheme()
            child_in_scheme = child.input.scheme()
            names, emits = zip(*op.emitters)
            emits = [to_unnamed_recursive(e, in_scheme)
                     for e in emits]
            child_emits = [to_unnamed_recursive(e[1], child_in_scheme)
                           for e in child.emitters]

            def convert(n):
                if isinstance(n, expression.UnnamedAttributeRef):
                    n = child_emits[n.position]
                else:
                    n.apply(convert)
                return n

            emits = [convert(copy.deepcopy(e)) for e in emits]

            new_apply = algebra.Apply(emitters=zip(names, emits),
                                      input=child.input)
            return self.fire(new_apply)

        elif isinstance(child, algebra.ProjectingJoin):
            in_scheme = child.scheme()
            names, emits = zip(*op.emitters)
            emits = [to_unnamed_recursive(e, in_scheme)
                     for e in emits]
            accessed = sorted(set(itertools.chain(*(accessed_columns(e)
                                                    for e in emits))))
            index_map = {a: i for (i, a) in enumerate(accessed)}
            child.output_columns = [child.output_columns[i] for i in accessed]
            for e in emits:
                expression.reindex_expr(e, index_map)
            # TODO(dhalperi) we may not need the Apply if all it did was rename
            # and/or select certain columns. Figure out these cases and omit
            # the Apply
            return algebra.Apply(emitters=zip(names, emits),
                                 input=child)

        return op
コード例 #4
0
    def fire(self, expr):
        # don't allow swap-created join to be swapped
        if (isinstance(expr, algebra.Join) or
            isinstance(expr, algebra.CrossProduct)) \
                and not hasattr(expr, '__swapped__'):

            assert (
                isinstance(
                    expr,
                    algebra.Join)) or (
                isinstance(
                    expr,
                    algebra.CrossProduct))

            # An apply will undo the effect of the swap on the scheme,
            # so above operators won't be affected
            left_sch = expr.left.scheme()
            right_sch = expr.right.scheme()
            leftlen = len(left_sch)
            rightlen = len(right_sch)
            assert leftlen + rightlen == len(expr.scheme())
            emitters_left = [(left_sch.getName(i),
                              UnnamedAttributeRef(rightlen + i))
                             for i in range(leftlen)]
            emitters_right = [(right_sch.getName(i), UnnamedAttributeRef(i))
                              for i in range(rightlen)]
            emitters = emitters_left + emitters_right

            if isinstance(expr, algebra.Join):
                # reindex the expression
                index_map = dict([(oldpos, attr[1].position)
                                  for (oldpos, attr) in enumerate(emitters)])

                expression.reindex_expr(expr.condition, index_map)

                newjoin = algebra.Join(expr.condition, expr.right, expr.left)
            else:
                newjoin = algebra.CrossProduct(expr.right, expr.left)

            newjoin.__swapped__ = True

            return algebra.Apply(emitters=emitters, input=newjoin)
        else:
            return expr
コード例 #5
0
ファイル: rules.py プロジェクト: anukat2015/raco
    def fire(self, op):
        if not isinstance(op, algebra.Apply):
            return op

        child = op.input

        if isinstance(child, algebra.Apply):

            emits = op.get_unnamed_emit_exprs()
            child_emits = child.get_unnamed_emit_exprs()

            def convert(n):
                if isinstance(n, expression.UnnamedAttributeRef):
                    return child_emits[n.position]
                n.apply(convert)
                return n
            emits = [convert(e) for e in emits]

            new_apply = algebra.Apply(emitters=zip(op.get_names(), emits),
                                      input=child.input)
            return self.fire(new_apply)

        elif isinstance(child, algebra.ProjectingJoin):
            emits = op.get_unnamed_emit_exprs()

            # If this apply is only AttributeRefs and the columns already
            # have the correct names, we can push it into the ProjectingJoin
            if (all(isinstance(e, expression.AttributeRef) for e in emits)
                    and len(set(emits)) == len(emits)):
                new_cols = [child.output_columns[e.position] for e in emits]
                # We need to ensure that left columns come before right cols
                left_sch = child.left.scheme()
                right_sch = child.right.scheme()
                combined = left_sch + right_sch
                left_len = len(left_sch)
                new_cols = [expression.to_unnamed_recursive(e, combined)
                            for e in new_cols]
                side = [e.position >= left_len for e in new_cols]
                if sorted(side) == side:
                    # Left columns do come before right cols
                    new_pj = algebra.ProjectingJoin(
                        condition=child.condition, left=child.left,
                        right=child.right, output_columns=new_cols)
                    if new_pj.scheme() == op.scheme():
                        return new_pj

            accessed = sorted(set(itertools.chain(*(accessed_columns(e)
                                                    for e in emits))))
            index_map = {a: i for (i, a) in enumerate(accessed)}
            child.output_columns = [child.output_columns[i] for i in accessed]
            for e in emits:
                expression.reindex_expr(e, index_map)

            return algebra.Apply(emitters=zip(op.get_names(), emits),
                                 input=child)

        elif isinstance(child, algebra.GroupBy):
            emits = op.get_unnamed_emit_exprs()
            assert all(is_simple_agg_expr(agg) for agg in child.aggregate_list)

            accessed = sorted(set(itertools.chain(*(accessed_columns(e)
                                                    for e in emits))))
            num_grps = len(child.grouping_list)
            accessed_aggs = [i for i in accessed if i >= num_grps]
            if len(accessed_aggs) == len(child.aggregate_list):
                return op

            unused_map = {i: j + num_grps for j, i in enumerate(accessed_aggs)}

            # copy the groupby operator so we can modify it
            newgb = child.__class__()
            newgb.copy(child)

            # remove aggregates that are projected out
            newgb.aggregate_list = [newgb.aggregate_list[i - num_grps]
                                    for i in accessed_aggs]
            for e in emits:
                expression.reindex_expr(e, unused_map)

            return algebra.Apply(emitters=zip(op.get_names(), emits),
                                 input=newgb)

        return op
コード例 #6
0
ファイル: rules.py プロジェクト: anukat2015/raco
    def descend_tree(op, cond):
        """Recursively push a selection condition down a tree of operators.

        :param op: The root of an operator tree
        :type op: raco.algebra.Operator
        :param cond: The selection condition
        :type cond: raco.expression.expression

        :return: A (possibly modified) operator.
        """

        if isinstance(op, algebra.Select):
            # Keep pushing; selects are commutative
            op.input = PushSelects.descend_tree(op.input, cond)
            return op
        elif isinstance(op, algebra.CompositeBinaryOperator):
            # Joins and cross-products; consider conversion to an equijoin
            # Expressions containing random do not commute across joins
            has_random = any(isinstance(e, RANDOM) for e in cond.walk())
            if not has_random:
                left_len = len(op.left.scheme())
                accessed = accessed_columns(cond)
                in_left = [col < left_len for col in accessed]
                if all(in_left):
                    # Push the select into the left sub-tree.
                    op.left = PushSelects.descend_tree(op.left, cond)
                    return op
                elif not any(in_left):
                    # Push into right subtree; rebase column indexes
                    expression.rebase_expr(cond, left_len)
                    op.right = PushSelects.descend_tree(op.right, cond)
                    return op
                else:
                    # Selection includes both children; attempt to create an
                    # equijoin condition
                    cols = PushSelects.is_column_equality_comparison(cond)
                    if cols:
                        return op.add_equijoin_condition(cols[0], cols[1])
        elif isinstance(op, algebra.Apply):
            # Convert accessed to a list from a set to ensure consistent order
            accessed = list(accessed_columns(cond))
            accessed_emits = [op.emitters[i][1] for i in accessed]
            if all(isinstance(e, expression.AttributeRef)
                   for e in accessed_emits):
                unnamed_emits = expression.ensure_unnamed(
                    accessed_emits, op.input)
                # This condition only touches columns that are copied verbatim
                # from the child, so we can push it.
                index_map = {a: e.position
                             for (a, e) in zip(accessed, unnamed_emits)}
                expression.reindex_expr(cond, index_map)
                op.input = PushSelects.descend_tree(op.input, cond)
                return op
        elif isinstance(op, algebra.GroupBy):
            # Convert accessed to a list from a set to ensure consistent order
            accessed = list(accessed_columns(cond))
            if all((a < len(op.grouping_list)) for a in accessed):
                accessed_grps = [op.grouping_list[a] for a in accessed]
                # This condition only touches columns that are copied verbatim
                # from the child (grouping keys), so we can push it.
                assert all(isinstance(e, expression.AttributeRef)
                           for e in op.grouping_list)
                unnamed_grps = expression.ensure_unnamed(accessed_grps,
                                                         op.input)
                index_map = {a: e.position
                             for (a, e) in zip(accessed, unnamed_grps)}
                expression.reindex_expr(cond, index_map)
                op.input = PushSelects.descend_tree(op.input, cond)
                return op

        # Can't push any more: instantiate the selection
        new_op = algebra.Select(cond, op)
        new_op.has_been_pushed = True
        return new_op
コード例 #7
0
    def fire(self, op):
        if isinstance(op, algebra.GroupBy):
            child = op.input
            child_scheme = child.scheme()
            grp_list = op.get_unnamed_grouping_list()
            agg_list = op.get_unnamed_aggregate_list()

            up_names = [name for name, ex in op.updaters]
            up_list = op.get_unnamed_update_exprs()

            agg = [accessed_columns(a) for a in agg_list]
            up = [accessed_columns(a) for a in up_list]
            pos = [{g.position} for g in grp_list]

            accessed = sorted(set(itertools.chain(*(up + agg + pos))))
            if not accessed:
                # Bug #207: COUNTALL() does not access any columns. So if the
                # query is just a COUNT(*), we would generate an empty Apply.
                # If this happens, just keep the first column of the input.
                accessed = [0]
            if len(accessed) != len(child_scheme):
                emitters = [(None, UnnamedAttributeRef(i)) for i in accessed]
                new_apply = algebra.Apply(emitters, child)
                index_map = {a: i for (i, a) in enumerate(accessed)}
                for agg_expr in itertools.chain(grp_list, agg_list, up_list):
                    expression.reindex_expr(agg_expr, index_map)
                op.grouping_list = grp_list
                op.aggregate_list = agg_list
                op.updaters = [(name, ex) for name, ex in
                               zip(up_names, up_list)]
                op.input = new_apply
                return op
        elif isinstance(op, algebra.ProjectingJoin):
            l_scheme = op.left.scheme()
            r_scheme = op.right.scheme()
            in_scheme = l_scheme + r_scheme
            condition = to_unnamed_recursive(op.condition, in_scheme)
            column_list = [to_unnamed_recursive(c, in_scheme)
                           for c in op.output_columns]

            accessed = (accessed_columns(condition) |
                        set(c.position for c in op.output_columns))
            if len(accessed) == len(in_scheme):
                return op

            accessed = sorted(accessed)
            left = [a for a in accessed if a < len(l_scheme)]
            if len(left) < len(l_scheme):
                emits = [(None, UnnamedAttributeRef(a)) for a in left]
                apply = algebra.Apply(emits, op.left)
                op.left = apply
            right = [a - len(l_scheme) for a in accessed
                     if a >= len(l_scheme)]
            if len(right) < len(r_scheme):
                emits = [(None, UnnamedAttributeRef(a)) for a in right]
                apply = algebra.Apply(emits, op.right)
                op.right = apply
            index_map = {a: i for (i, a) in enumerate(accessed)}
            expression.reindex_expr(condition, index_map)
            [expression.reindex_expr(c, index_map) for c in column_list]
            op.condition = condition
            op.output_columns = column_list
            return op

        return op
コード例 #8
0
    def fire(self, op):
        if not isinstance(op, algebra.Apply):
            return op

        child = op.input

        if isinstance(child, algebra.Apply):

            emits = op.get_unnamed_emit_exprs()
            child_emits = child.get_unnamed_emit_exprs()

            def convert(n):
                if isinstance(n, expression.UnnamedAttributeRef):
                    return child_emits[n.position]
                n.apply(convert)
                return n
            emits = [convert(e) for e in emits]

            new_apply = algebra.Apply(emitters=zip(op.get_names(), emits),
                                      input=child.input)
            return self.fire(new_apply)

        elif isinstance(child, algebra.ProjectingJoin):
            emits = op.get_unnamed_emit_exprs()

            # If this apply is only AttributeRefs and the columns already
            # have the correct names, we can push it into the ProjectingJoin
            if (all(isinstance(e, expression.AttributeRef) for e in emits) and
                    len(set(emits)) == len(emits)):
                new_cols = [child.output_columns[e.position] for e in emits]
                # We need to ensure that left columns come before right cols
                left_sch = child.left.scheme()
                right_sch = child.right.scheme()
                combined = left_sch + right_sch
                left_len = len(left_sch)
                new_cols = [expression.to_unnamed_recursive(e, combined)
                            for e in new_cols]
                side = [e.position >= left_len for e in new_cols]
                if sorted(side) == side:
                    # Left columns do come before right cols
                    new_pj = algebra.ProjectingJoin(
                        condition=child.condition, left=child.left,
                        right=child.right, output_columns=new_cols)
                    if new_pj.scheme() == op.scheme():
                        return new_pj

            accessed = sorted(set(itertools.chain(*(accessed_columns(e)
                                                    for e in emits))))
            index_map = {a: i for (i, a) in enumerate(accessed)}
            child.output_columns = [child.output_columns[i] for i in accessed]
            for e in emits:
                expression.reindex_expr(e, index_map)

            return algebra.Apply(emitters=zip(op.get_names(), emits),
                                 input=child)

        elif isinstance(child, algebra.GroupBy):
            emits = op.get_unnamed_emit_exprs()
            assert all(is_simple_agg_expr(agg) for agg in child.aggregate_list)

            accessed = sorted(set(itertools.chain(*(accessed_columns(e)
                                                    for e in emits))))
            num_grps = len(child.grouping_list)
            accessed_aggs = [i for i in accessed if i >= num_grps]
            if len(accessed_aggs) == len(child.aggregate_list):
                return op

            unused_map = {i: j + num_grps for j, i in enumerate(accessed_aggs)}

            # copy the groupby operator so we can modify it
            newgb = child.__class__()
            newgb.copy(child)

            # remove aggregates that are projected out
            newgb.aggregate_list = [newgb.aggregate_list[i - num_grps]
                                    for i in accessed_aggs]
            for e in emits:
                expression.reindex_expr(e, unused_map)

            return algebra.Apply(emitters=zip(op.get_names(), emits),
                                 input=newgb)

        return op
コード例 #9
0
    def descend_tree(op, cond):
        """Recursively push a selection condition down a tree of operators.

        :param op: The root of an operator tree
        :type op: raco.algebra.Operator
        :param cond: The selection condition
        :type cond: raco.expression.expression

        :return: A (possibly modified) operator.
        """

        if isinstance(op, algebra.Select):
            # Keep pushing; selects are commutative
            op.input = PushSelects.descend_tree(op.input, cond)
            return op
        elif isinstance(op, algebra.CompositeBinaryOperator):
            # Joins and cross-products; consider conversion to an equijoin
            # Expressions containing random do not commute across joins
            has_random = any(isinstance(e, RANDOM) for e in cond.walk())
            if not has_random:
                left_len = len(op.left.scheme())
                accessed = accessed_columns(cond)
                in_left = [col < left_len for col in accessed]
                if all(in_left):
                    # Push the select into the left sub-tree.
                    op.left = PushSelects.descend_tree(op.left, cond)
                    return op
                elif not any(in_left):
                    # Push into right subtree; rebase column indexes
                    expression.rebase_expr(cond, left_len)
                    op.right = PushSelects.descend_tree(op.right, cond)
                    return op
                else:
                    # Selection includes both children; attempt to create an
                    # equijoin condition
                    cols = PushSelects.is_column_equality_comparison(cond)
                    if cols:
                        return op.add_equijoin_condition(cols[0], cols[1])
        elif isinstance(op, algebra.Apply):
            # Convert accessed to a list from a set to ensure consistent order
            accessed = list(accessed_columns(cond))
            accessed_emits = [op.emitters[i][1] for i in accessed]
            if all(isinstance(e, expression.AttributeRef)
                   for e in accessed_emits):
                unnamed_emits = expression.ensure_unnamed(
                    accessed_emits, op.input)
                # This condition only touches columns that are copied verbatim
                # from the child, so we can push it.
                index_map = {a: e.position
                             for (a, e) in zip(accessed, unnamed_emits)}
                expression.reindex_expr(cond, index_map)
                op.input = PushSelects.descend_tree(op.input, cond)
                return op
        elif isinstance(op, algebra.GroupBy):
            # Convert accessed to a list from a set to ensure consistent order
            accessed = list(accessed_columns(cond))
            if all((a < len(op.grouping_list)) for a in accessed):
                accessed_grps = [op.grouping_list[a] for a in accessed]
                # This condition only touches columns that are copied verbatim
                # from the child (grouping keys), so we can push it.
                assert all(isinstance(e, expression.AttributeRef)
                           for e in op.grouping_list)
                unnamed_grps = expression.ensure_unnamed(accessed_grps,
                                                         op.input)
                index_map = {a: e.position
                             for (a, e) in zip(accessed, unnamed_grps)}
                expression.reindex_expr(cond, index_map)
                op.input = PushSelects.descend_tree(op.input, cond)
                return op

        # Can't push any more: instantiate the selection
        new_op = algebra.Select(cond, op)
        new_op.has_been_pushed = True
        return new_op