def compileme(self, inputid): child_scheme = self.input.scheme() group_fields = [expression.toUnnamed(ref, child_scheme) for ref in self.grouping_list] agg_fields = [] for expr in self.aggregate_list: if isinstance(expr, expression.COUNTALL): # XXX Wrong in the presence of nulls agg_fields.append(UnnamedAttributeRef(0)) else: agg_fields.append(expression.toUnnamed( expr.input, child_scheme)) agg_types = [[MyriaGroupBy.agg_mapping(agg_expr)] for agg_expr in self.aggregate_list] ret = { "argChild": inputid, "argAggFields": [agg_field.position for agg_field in agg_fields], "argAggOperators": agg_types, } num_fields = len(self.grouping_list) if num_fields == 0: ret["opType"] = "Aggregate" elif num_fields == 1: ret["opType"] = "SingleGroupByAggregate" ret["argGroupField"] = group_fields[0].position else: ret["opType"] = "MultiGroupByAggregate" ret["argGroupFields"] = [field.position for field in group_fields] return ret
def _convert_attribute_ref(self, cols, expr, input_scheme): if isinstance(expr, expression.NamedAttributeRef): expr = expression.toUnnamed(expr, input_scheme) if isinstance(expr, expression.UnnamedAttributeRef): # Not an elif since the first may actually turn into a UARef return cols[expr.position] raise NotImplementedError("expression {} to sql".format(type(expr)))
def check_no_shuffle(op): """Check if no shuffle is needed""" # Get an array of position references to columns in the child scheme child_scheme = op.input.scheme() group_fields = [expression.toUnnamed(ref, child_scheme) for ref in op.grouping_list] return (len(group_fields) > 0 and check_partition_equality(op.input, group_fields))
def do_transfer(op): """Introduce a network transfer before a groupby operation.""" # Get an array of position references to columns in the child scheme child_scheme = op.input.scheme() group_fields = [expression.toUnnamed(ref, child_scheme) for ref in op.grouping_list] if len(group_fields) == 0: # Need to Collect all tuples at once place op.input = algebra.Collect(op.input) else: # Need to Shuffle op.input = algebra.Shuffle(op.input, group_fields)
def fire(self, op): if not isinstance(op, algebra.Apply): return op # At least one emit expression is not just copying a column if not all(isinstance(e[1], expression.AttributeRef) for e in op.emitters): return op child = op.input child_scheme = child.scheme() # Schemes are different, this Apply does something if child_scheme != op.scheme(): return op emitters = [expression.toUnnamed(e[1], child_scheme) for e in op.emitters] # Schemes are the same (including names), and this Apply keeps all # columns in the same order. This Apply does nothing. if all(e.position == i for (i, e) in enumerate(emitters)): return child return op
def resolve(self, attrref): """return the name and type of the attribute reference, resolved against this scheme""" unnamed = expression.toUnnamed(attrref, self) return self.getName(unnamed.position), self.getType(unnamed.position)
def descend_tree(op, cond): """Recursively push a selection condition down a tree of operators. :param op: The root of an operator tree :type op: raco.algebra.Operator :type cond: The selection condition :type cond: raco.expression.expression :return: A (possibly modified) operator. """ if isinstance(op, algebra.Select): # Keep pushing; selects are commutative op.input = PushSelects.descend_tree(op.input, cond) return op elif isinstance(op, algebra.CompositeBinaryOperator): # Joins and cross-products; consider conversion to an equijoin left_len = len(op.left.scheme()) accessed = accessed_columns(cond) in_left = [col < left_len for col in accessed] if all(in_left): # Push the select into the left sub-tree. op.left = PushSelects.descend_tree(op.left, cond) return op elif not any(in_left): # Push into right subtree; rebase column indexes expression.rebase_expr(cond, left_len) op.right = PushSelects.descend_tree(op.right, cond) return op else: # Selection includes both children; attempt to create an # equijoin condition cols = is_column_equality_comparison(cond) if cols: return op.add_equijoin_condition(cols[0], cols[1]) elif isinstance(op, algebra.Apply): # Convert accessed to a list from a set to ensure consistent order accessed = list(accessed_columns(cond)) accessed_emits = [op.emitters[i][1] for i in accessed] if all(isinstance(e, expression.AttributeRef) for e in accessed_emits): unnamed_emits = [expression.toUnnamed(e, op.input.scheme()) for e in accessed_emits] # This condition only touches columns that are copied verbatim # from the child, so we can push it. index_map = {a: e.position for (a, e) in zip(accessed, unnamed_emits)} expression.reindex_expr(cond, index_map) op.input = PushSelects.descend_tree(op.input, cond) return op elif isinstance(op, algebra.GroupBy): # Convert accessed to a list from a set to ensure consistent order accessed = list(accessed_columns(cond)) if all((a < len(op.grouping_list)) for a in accessed): accessed_grps = [op.grouping_list[a] for a in accessed] # This condition only touches columns that are copied verbatim # from the child (grouping keys), so we can push it. assert all(isinstance(e, expression.AttributeRef) for e in op.grouping_list) unnamed_grps = [expression.toUnnamed(e, op.input.scheme()) for e in accessed_grps] index_map = {a: e.position for (a, e) in zip(accessed, unnamed_grps)} expression.reindex_expr(cond, index_map) op.input = PushSelects.descend_tree(op.input, cond) return op # Can't push any more: instantiate the selection new_op = algebra.Select(cond, op) new_op.has_been_pushed = True return new_op