Esempio n. 1
0
    def compileme(self, inputid):
        child_scheme = self.input.scheme()
        group_fields = [expression.toUnnamed(ref, child_scheme)
                        for ref in self.grouping_list]

        agg_fields = []
        for expr in self.aggregate_list:
            if isinstance(expr, expression.COUNTALL):
                # XXX Wrong in the presence of nulls
                agg_fields.append(UnnamedAttributeRef(0))
            else:
                agg_fields.append(expression.toUnnamed(
                    expr.input, child_scheme))

        agg_types = [[MyriaGroupBy.agg_mapping(agg_expr)]
                     for agg_expr in self.aggregate_list]
        ret = {
            "argChild": inputid,
            "argAggFields": [agg_field.position for agg_field in agg_fields],
            "argAggOperators": agg_types,
        }

        num_fields = len(self.grouping_list)
        if num_fields == 0:
            ret["opType"] = "Aggregate"
        elif num_fields == 1:
            ret["opType"] = "SingleGroupByAggregate"
            ret["argGroupField"] = group_fields[0].position
        else:
            ret["opType"] = "MultiGroupByAggregate"
            ret["argGroupFields"] = [field.position for field in group_fields]
        return ret
Esempio n. 2
0
    def _convert_attribute_ref(self, cols, expr, input_scheme):
        if isinstance(expr, expression.NamedAttributeRef):
            expr = expression.toUnnamed(expr, input_scheme)

        if isinstance(expr, expression.UnnamedAttributeRef):
            # Not an elif since the first may actually turn into a UARef
            return cols[expr.position]

        raise NotImplementedError("expression {} to sql".format(type(expr)))
Esempio n. 3
0
    def check_no_shuffle(op):
        """Check if no shuffle is needed"""

        # Get an array of position references to columns in the child scheme
        child_scheme = op.input.scheme()
        group_fields = [expression.toUnnamed(ref, child_scheme)
                        for ref in op.grouping_list]
        return (len(group_fields) > 0 and
                check_partition_equality(op.input, group_fields))
Esempio n. 4
0
    def _convert_attribute_ref(self, cols, expr, input_scheme):
        if isinstance(expr, expression.NamedAttributeRef):
            expr = expression.toUnnamed(expr, input_scheme)

        if isinstance(expr, expression.UnnamedAttributeRef):
            # Not an elif since the first may actually turn into a UARef
            return cols[expr.position]

        raise NotImplementedError("expression {} to sql".format(type(expr)))
Esempio n. 5
0
    def check_no_shuffle(op):
        """Check if no shuffle is needed"""

        # Get an array of position references to columns in the child scheme
        child_scheme = op.input.scheme()
        group_fields = [expression.toUnnamed(ref, child_scheme)
                        for ref in op.grouping_list]
        return (len(group_fields) > 0 and
                check_partition_equality(op.input, group_fields))
Esempio n. 6
0
    def do_transfer(op):
        """Introduce a network transfer before a groupby operation."""

        # Get an array of position references to columns in the child scheme
        child_scheme = op.input.scheme()
        group_fields = [expression.toUnnamed(ref, child_scheme)
                        for ref in op.grouping_list]
        if len(group_fields) == 0:
            # Need to Collect all tuples at once place
            op.input = algebra.Collect(op.input)
        else:
            # Need to Shuffle
            op.input = algebra.Shuffle(op.input, group_fields)
Esempio n. 7
0
    def do_transfer(op):
        """Introduce a network transfer before a groupby operation."""

        # Get an array of position references to columns in the child scheme
        child_scheme = op.input.scheme()
        group_fields = [expression.toUnnamed(ref, child_scheme)
                        for ref in op.grouping_list]
        if len(group_fields) == 0:
            # Need to Collect all tuples at once place
            op.input = algebra.Collect(op.input)
        else:
            # Need to Shuffle
            op.input = algebra.Shuffle(op.input, group_fields)
Esempio n. 8
0
    def fire(self, op):
        if not isinstance(op, algebra.Apply):
            return op

        # At least one emit expression is not just copying a column
        if not all(isinstance(e[1], expression.AttributeRef)
                   for e in op.emitters):
            return op

        child = op.input
        child_scheme = child.scheme()

        # Schemes are different, this Apply does something
        if child_scheme != op.scheme():
            return op

        emitters = [expression.toUnnamed(e[1], child_scheme)
                    for e in op.emitters]
        # Schemes are the same (including names), and this Apply keeps all
        # columns in the same order. This Apply does nothing.
        if all(e.position == i for (i, e) in enumerate(emitters)):
            return child

        return op
Esempio n. 9
0
    def fire(self, op):
        if not isinstance(op, algebra.Apply):
            return op

        # At least one emit expression is not just copying a column
        if not all(isinstance(e[1], expression.AttributeRef)
                   for e in op.emitters):
            return op

        child = op.input
        child_scheme = child.scheme()

        # Schemes are different, this Apply does something
        if child_scheme != op.scheme():
            return op

        emitters = [expression.toUnnamed(e[1], child_scheme)
                    for e in op.emitters]
        # Schemes are the same (including names), and this Apply keeps all
        # columns in the same order. This Apply does nothing.
        if all(e.position == i for (i, e) in enumerate(emitters)):
            return child

        return op
Esempio n. 10
0
 def resolve(self, attrref):
     """return the name and type of the attribute reference, resolved
     against this scheme"""
     unnamed = expression.toUnnamed(attrref, self)
     return self.getName(unnamed.position), self.getType(unnamed.position)
Esempio n. 11
0
 def resolve(self, attrref):
     """return the name and type of the attribute reference, resolved
     against this scheme"""
     unnamed = expression.toUnnamed(attrref, self)
     return self.getName(unnamed.position), self.getType(unnamed.position)
Esempio n. 12
0
    def descend_tree(op, cond):
        """Recursively push a selection condition down a tree of operators.

        :param op: The root of an operator tree
        :type op: raco.algebra.Operator
        :type cond: The selection condition
        :type cond: raco.expression.expression

        :return: A (possibly modified) operator.
        """

        if isinstance(op, algebra.Select):
            # Keep pushing; selects are commutative
            op.input = PushSelects.descend_tree(op.input, cond)
            return op
        elif isinstance(op, algebra.CompositeBinaryOperator):
            # Joins and cross-products; consider conversion to an equijoin
            left_len = len(op.left.scheme())
            accessed = accessed_columns(cond)
            in_left = [col < left_len for col in accessed]
            if all(in_left):
                # Push the select into the left sub-tree.
                op.left = PushSelects.descend_tree(op.left, cond)
                return op
            elif not any(in_left):
                # Push into right subtree; rebase column indexes
                expression.rebase_expr(cond, left_len)
                op.right = PushSelects.descend_tree(op.right, cond)
                return op
            else:
                # Selection includes both children; attempt to create an
                # equijoin condition
                cols = is_column_equality_comparison(cond)
                if cols:
                    return op.add_equijoin_condition(cols[0], cols[1])
        elif isinstance(op, algebra.Apply):
            # Convert accessed to a list from a set to ensure consistent order
            accessed = list(accessed_columns(cond))
            accessed_emits = [op.emitters[i][1] for i in accessed]
            if all(isinstance(e, expression.AttributeRef)
                   for e in accessed_emits):
                unnamed_emits = [expression.toUnnamed(e, op.input.scheme())
                                 for e in accessed_emits]
                # This condition only touches columns that are copied verbatim
                # from the child, so we can push it.
                index_map = {a: e.position
                             for (a, e) in zip(accessed, unnamed_emits)}
                expression.reindex_expr(cond, index_map)
                op.input = PushSelects.descend_tree(op.input, cond)
                return op
        elif isinstance(op, algebra.GroupBy):
            # Convert accessed to a list from a set to ensure consistent order
            accessed = list(accessed_columns(cond))
            if all((a < len(op.grouping_list)) for a in accessed):
                accessed_grps = [op.grouping_list[a] for a in accessed]
                # This condition only touches columns that are copied verbatim
                # from the child (grouping keys), so we can push it.
                assert all(isinstance(e, expression.AttributeRef)
                           for e in op.grouping_list)
                unnamed_grps = [expression.toUnnamed(e, op.input.scheme())
                                for e in accessed_grps]
                index_map = {a: e.position
                             for (a, e) in zip(accessed, unnamed_grps)}
                expression.reindex_expr(cond, index_map)
                op.input = PushSelects.descend_tree(op.input, cond)
                return op

        # Can't push any more: instantiate the selection
        new_op = algebra.Select(cond, op)
        new_op.has_been_pushed = True
        return new_op