def table(self, emit_clause): """Emit a single-row table literal.""" emit_args = [] for clause in emit_clause: emit_args.extend(clause.expand({})) from_args = collections.OrderedDict() from_args['$$SINGLETON$$'] = raco.algebra.SingletonRelation() # Add unbox relations to the from_args dictionary for name, sexpr in emit_args: self.extract_unbox_args(from_args, sexpr) op, info = multiway.merge(from_args) # rewrite clauses in terms of the new schema emit_args = [(name, multiway.rewrite_refs(sexpr, from_args, info)) for (name, sexpr) in emit_args] return raco.algebra.Apply(emitters=emit_args, input=op)
def bagcomp(self, from_clause, where_clause, emit_clause, orderby_clause, limit_clause): """Evaluate a bag comprehension. from_clause: A list of tuples of the form (id, expr). expr can be None, which means "read the value from the symbol table". where_clause: An optional scalar expression (raco.expression). emit_clause: A list of EmitArg instances, each defining one or more output columns. orderby_clause: An optional list of OrderbyArg instances. limit_clause: An optional integer expression """ # Make sure no aliases were reused: [FROM X, X EMIT *] is illegal from_aliases = set([x[0] for x in from_clause]) if len(from_aliases) != len(from_clause): raise DuplicateAliasException() # For each FROM argument, create a mapping from ID to operator # (id, raco.algebra.Operator) from_args = collections.OrderedDict() for _id, expr in from_clause: assert isinstance(_id, basestring) if expr: from_args[_id] = self.evaluate(expr) else: from_args[_id] = self.__lookup_symbol(_id) # Expand wildcards into a list of output columns assert emit_clause # There should always be something to emit emit_args = [] statemods = [] for clause in emit_clause: emit_args.extend(clause.expand(from_args)) statemods.extend(clause.get_statemods()) orig_op, _info = multiway.merge(from_args) orig_schema_length = len(orig_op.scheme()) # Add unbox relations to the from_args dictionary for name, sexpr in emit_args: self.extract_unbox_args(from_args, sexpr) if where_clause: self.extract_unbox_args(from_args, where_clause) # Create a single RA operation that is the cross of all targets op, info = multiway.merge(from_args) # HACK: calculate unboxed columns as implicit grouping columns, # so they can be used in grouping terms. new_schema_length = len(op.scheme()) implicit_group_by_cols = range(orig_schema_length, new_schema_length) ################################################ # Compile away unbox expressions in where, emit clauses ################################################ if where_clause: where_clause = multiway.rewrite_refs(where_clause, from_args, info) # Extract the type of there where clause to force type safety # to be checked where_clause.typeof(op.scheme(), None) op = raco.algebra.Select(condition=where_clause, input=op) emit_args = [(name, multiway.rewrite_refs(sexpr, from_args, info)) for (name, sexpr) in emit_args] statemods = multiway.rewrite_statemods(statemods, from_args, info) if any( raco.expression.expression_contains_aggregate(ex) for name, ex in emit_args): op = groupby.groupby(op, emit_args, implicit_group_by_cols, statemods) else: if statemods: return raco.algebra.StatefulApply(emit_args, statemods, op) if not (len(from_args) == 1 and len(emit_clause) == 1 and isinstance(emit_clause[0], (TableWildcardEmitArg, FullWildcardEmitArg))): op = raco.algebra.Apply(emit_args, op) if orderby_clause: if limit_clause is None: raise InvalidStatementException( "An ORDER BY clause must be accompanied by a LIMIT clause") orderby_cols, orderby_ords = zip(*orderby_clause) orderby_idxs = [ get_unnamed_ref(col, op.scheme()).position for col in orderby_cols ] op = raco.algebra.OrderBy(input=op, sort_columns=orderby_idxs, ascending=orderby_ords) if limit_clause: if orderby_clause is None: raise InvalidStatementException( "A LIMIT clause must be accompanied by an ORDER BY clause") op = raco.algebra.Limit(input=op, count=limit_clause) return op
def bagcomp(self, from_clause, where_clause, emit_clause, orderby_clause, limit_clause): """Evaluate a bag comprehension. from_clause: A list of tuples of the form (id, expr). expr can be None, which means "read the value from the symbol table". where_clause: An optional scalar expression (raco.expression). emit_clause: A list of EmitArg instances, each defining one or more output columns. orderby_clause: An optional list of OrderbyArg instances. limit_clause: An optional integer expression """ # Make sure no aliases were reused: [FROM X, X EMIT *] is illegal from_aliases = set([x[0] for x in from_clause]) if len(from_aliases) != len(from_clause): raise DuplicateAliasException() # For each FROM argument, create a mapping from ID to operator # (id, raco.algebra.Operator) from_args = collections.OrderedDict() for _id, expr in from_clause: assert isinstance(_id, basestring) if expr: from_args[_id] = self.evaluate(expr) else: from_args[_id] = self.__lookup_symbol(_id) # Expand wildcards into a list of output columns assert emit_clause # There should always be something to emit emit_args = [] statemods = [] for clause in emit_clause: emit_args.extend(clause.expand(from_args)) statemods.extend(clause.get_statemods()) orig_op, _info = multiway.merge(from_args) orig_schema_length = len(orig_op.scheme()) # Add unbox relations to the from_args dictionary for name, sexpr in emit_args: self.extract_unbox_args(from_args, sexpr) if where_clause: self.extract_unbox_args(from_args, where_clause) # Create a single RA operation that is the cross of all targets op, info = multiway.merge(from_args) # HACK: calculate unboxed columns as implicit grouping columns, # so they can be used in grouping terms. new_schema_length = len(op.scheme()) implicit_group_by_cols = range(orig_schema_length, new_schema_length) ################################################ # Compile away unbox expressions in where, emit clauses ################################################ if where_clause: where_clause = multiway.rewrite_refs(where_clause, from_args, info) # Extract the type of there where clause to force type safety # to be checked where_clause.typeof(op.scheme(), None) op = raco.algebra.Select(condition=where_clause, input=op) emit_args = [(name, multiway.rewrite_refs(sexpr, from_args, info)) for (name, sexpr) in emit_args] statemods = multiway.rewrite_statemods(statemods, from_args, info) if any(raco.expression.expression_contains_aggregate(ex) for name, ex in emit_args): op = groupby.groupby(op, emit_args, implicit_group_by_cols, statemods) else: if statemods: return raco.algebra.StatefulApply(emit_args, statemods, op) if not (len(from_args) == 1 and len(emit_clause) == 1 and isinstance(emit_clause[0], (TableWildcardEmitArg, FullWildcardEmitArg))): op = raco.algebra.Apply(emit_args, op) if orderby_clause: if limit_clause is None: raise InvalidStatementException( "An ORDER BY clause must be accompanied by a LIMIT clause") orderby_cols, orderby_ords = zip(*orderby_clause) orderby_idxs = [get_unnamed_ref(col, op.scheme()).position for col in orderby_cols] op = raco.algebra.OrderBy(input=op, sort_columns=orderby_idxs, ascending=orderby_ords) if limit_clause: if orderby_clause is None: raise InvalidStatementException( "A LIMIT clause must be accompanied by an ORDER BY clause") op = raco.algebra.Limit(input=op, count=limit_clause) return op
def bagcomp(self, from_clause, where_clause, emit_clause): """Evaluate a bag comprehension. from_clause: A list of tuples of the form (id, expr). expr can be None, which means "read the value from the symbol table". where_clause: An optional scalar expression (raco.expression). emit_clause: A list of EmitArg instances, each defining one or more output columns. """ # Make sure no aliases were reused: [FROM X, X EMIT *] is illegal from_aliases = set([x[0] for x in from_clause]) if len(from_aliases) != len(from_clause): raise DuplicateAliasException() # For each FROM argument, create a mapping from ID to operator # (id, raco.algebra.Operator) from_args = collections.OrderedDict() for _id, expr in from_clause: if expr: from_args[_id] = self.evaluate(expr) else: from_args[_id] = self.__lookup_symbol(_id) # Expand wildcards into a list of output columns assert emit_clause # There should always be something to emit emit_args = [] statemods = [] for clause in emit_clause: emit_args.extend(clause.expand(from_args)) statemods.extend(clause.get_statemods()) orig_op, _info = multiway.merge(from_args) orig_schema_length = len(orig_op.scheme()) # Add unbox relations to the from_args dictionary for name, sexpr in emit_args: self.extract_unbox_args(from_args, sexpr) if where_clause: self.extract_unbox_args(from_args, where_clause) # Create a single RA operation that is the cross of all targets op, info = multiway.merge(from_args) # HACK: calculate unboxed columns as implicit grouping columns, # so they can be used in grouping terms. new_schema_length = len(op.scheme()) implicit_group_by_cols = range(orig_schema_length, new_schema_length) # rewrite clauses in terms of the new schema if where_clause: where_clause = multiway.rewrite_refs(where_clause, from_args, info) # Extract the type of there where clause to force type safety # to be checked where_clause.typeof(op.scheme(), None) op = raco.algebra.Select(condition=where_clause, input=op) emit_args = [(name, multiway.rewrite_refs(sexpr, from_args, info)) for (name, sexpr) in emit_args] statemods = [(name, init, multiway.rewrite_refs(update, from_args, info)) # noqa for name, init, update in statemods] if any([raco.expression.isaggregate(ex) for name, ex in emit_args]): return groupby.groupby(op, emit_args, implicit_group_by_cols) else: if statemods: return raco.algebra.StatefulApply(emit_args, statemods, op) if (len(from_args) == 1 and len(emit_clause) == 1 and isinstance(emit_clause[0], (TableWildcardEmitArg, FullWildcardEmitArg))): return op return raco.algebra.Apply(emit_args, op)