Example #1
0
    def table(self, emit_clause):
        """Emit a single-row table literal."""
        emit_args = []
        for clause in emit_clause:
            emit_args.extend(clause.expand({}))

        from_args = collections.OrderedDict()
        from_args['$$SINGLETON$$'] = raco.algebra.SingletonRelation()

        # Add unbox relations to the from_args dictionary
        for name, sexpr in emit_args:
            self.extract_unbox_args(from_args, sexpr)

        op, info = multiway.merge(from_args)

        # rewrite clauses in terms of the new schema
        emit_args = [(name, multiway.rewrite_refs(sexpr, from_args, info))
                     for (name, sexpr) in emit_args]

        return raco.algebra.Apply(emitters=emit_args, input=op)
Example #2
0
    def table(self, emit_clause):
        """Emit a single-row table literal."""
        emit_args = []
        for clause in emit_clause:
            emit_args.extend(clause.expand({}))

        from_args = collections.OrderedDict()
        from_args['$$SINGLETON$$'] = raco.algebra.SingletonRelation()

        # Add unbox relations to the from_args dictionary
        for name, sexpr in emit_args:
            self.extract_unbox_args(from_args, sexpr)

        op, info = multiway.merge(from_args)

        # rewrite clauses in terms of the new schema
        emit_args = [(name, multiway.rewrite_refs(sexpr, from_args, info))
                     for (name, sexpr) in emit_args]

        return raco.algebra.Apply(emitters=emit_args, input=op)
Example #3
0
    def bagcomp(self, from_clause, where_clause, emit_clause, orderby_clause,
                limit_clause):
        """Evaluate a bag comprehension.

        from_clause: A list of tuples of the form (id, expr).  expr can
        be None, which means "read the value from the symbol table".

        where_clause: An optional scalar expression (raco.expression).

        emit_clause: A list of EmitArg instances, each defining one or more
        output columns.

        orderby_clause: An optional list of OrderbyArg instances.

        limit_clause: An optional integer expression
        """

        # Make sure no aliases were reused: [FROM X, X EMIT *] is illegal
        from_aliases = set([x[0] for x in from_clause])
        if len(from_aliases) != len(from_clause):
            raise DuplicateAliasException()

        # For each FROM argument, create a mapping from ID to operator
        # (id, raco.algebra.Operator)
        from_args = collections.OrderedDict()

        for _id, expr in from_clause:
            assert isinstance(_id, basestring)
            if expr:
                from_args[_id] = self.evaluate(expr)
            else:
                from_args[_id] = self.__lookup_symbol(_id)

        # Expand wildcards into a list of output columns
        assert emit_clause  # There should always be something to emit
        emit_args = []
        statemods = []
        for clause in emit_clause:
            emit_args.extend(clause.expand(from_args))
            statemods.extend(clause.get_statemods())

        orig_op, _info = multiway.merge(from_args)
        orig_schema_length = len(orig_op.scheme())

        # Add unbox relations to the from_args dictionary
        for name, sexpr in emit_args:
            self.extract_unbox_args(from_args, sexpr)
        if where_clause:
            self.extract_unbox_args(from_args, where_clause)

        # Create a single RA operation that is the cross of all targets
        op, info = multiway.merge(from_args)

        # HACK: calculate unboxed columns as implicit grouping columns,
        # so they can be used in grouping terms.
        new_schema_length = len(op.scheme())
        implicit_group_by_cols = range(orig_schema_length, new_schema_length)

        ################################################
        # Compile away unbox expressions in where, emit clauses
        ################################################

        if where_clause:
            where_clause = multiway.rewrite_refs(where_clause, from_args, info)
            # Extract the type of there where clause to force type safety
            # to be checked
            where_clause.typeof(op.scheme(), None)
            op = raco.algebra.Select(condition=where_clause, input=op)

        emit_args = [(name, multiway.rewrite_refs(sexpr, from_args, info))
                     for (name, sexpr) in emit_args]

        statemods = multiway.rewrite_statemods(statemods, from_args, info)

        if any(
                raco.expression.expression_contains_aggregate(ex)
                for name, ex in emit_args):
            op = groupby.groupby(op, emit_args, implicit_group_by_cols,
                                 statemods)
        else:
            if statemods:
                return raco.algebra.StatefulApply(emit_args, statemods, op)
            if not (len(from_args) == 1 and len(emit_clause) == 1 and
                    isinstance(emit_clause[0],
                               (TableWildcardEmitArg, FullWildcardEmitArg))):
                op = raco.algebra.Apply(emit_args, op)

        if orderby_clause:
            if limit_clause is None:
                raise InvalidStatementException(
                    "An ORDER BY clause must be accompanied by a LIMIT clause")
            orderby_cols, orderby_ords = zip(*orderby_clause)
            orderby_idxs = [
                get_unnamed_ref(col, op.scheme()).position
                for col in orderby_cols
            ]
            op = raco.algebra.OrderBy(input=op,
                                      sort_columns=orderby_idxs,
                                      ascending=orderby_ords)

        if limit_clause:
            if orderby_clause is None:
                raise InvalidStatementException(
                    "A LIMIT clause must be accompanied by an ORDER BY clause")
            op = raco.algebra.Limit(input=op, count=limit_clause)

        return op
Example #4
0
    def bagcomp(self, from_clause, where_clause, emit_clause,
                orderby_clause, limit_clause):
        """Evaluate a bag comprehension.

        from_clause: A list of tuples of the form (id, expr).  expr can
        be None, which means "read the value from the symbol table".

        where_clause: An optional scalar expression (raco.expression).

        emit_clause: A list of EmitArg instances, each defining one or more
        output columns.

        orderby_clause: An optional list of OrderbyArg instances.

        limit_clause: An optional integer expression
        """

        # Make sure no aliases were reused: [FROM X, X EMIT *] is illegal
        from_aliases = set([x[0] for x in from_clause])
        if len(from_aliases) != len(from_clause):
            raise DuplicateAliasException()

        # For each FROM argument, create a mapping from ID to operator
        # (id, raco.algebra.Operator)
        from_args = collections.OrderedDict()

        for _id, expr in from_clause:
            assert isinstance(_id, basestring)
            if expr:
                from_args[_id] = self.evaluate(expr)
            else:
                from_args[_id] = self.__lookup_symbol(_id)

        # Expand wildcards into a list of output columns
        assert emit_clause  # There should always be something to emit
        emit_args = []
        statemods = []
        for clause in emit_clause:
            emit_args.extend(clause.expand(from_args))
            statemods.extend(clause.get_statemods())

        orig_op, _info = multiway.merge(from_args)
        orig_schema_length = len(orig_op.scheme())

        # Add unbox relations to the from_args dictionary
        for name, sexpr in emit_args:
            self.extract_unbox_args(from_args, sexpr)
        if where_clause:
            self.extract_unbox_args(from_args, where_clause)

        # Create a single RA operation that is the cross of all targets
        op, info = multiway.merge(from_args)

        # HACK: calculate unboxed columns as implicit grouping columns,
        # so they can be used in grouping terms.
        new_schema_length = len(op.scheme())
        implicit_group_by_cols = range(orig_schema_length, new_schema_length)

        ################################################
        # Compile away unbox expressions in where, emit clauses
        ################################################

        if where_clause:
            where_clause = multiway.rewrite_refs(where_clause, from_args, info)
            # Extract the type of there where clause to force type safety
            # to be checked
            where_clause.typeof(op.scheme(), None)
            op = raco.algebra.Select(condition=where_clause, input=op)

        emit_args = [(name, multiway.rewrite_refs(sexpr, from_args, info))
                     for (name, sexpr) in emit_args]

        statemods = multiway.rewrite_statemods(statemods, from_args, info)

        if any(raco.expression.expression_contains_aggregate(ex)
               for name, ex in emit_args):
            op = groupby.groupby(op, emit_args, implicit_group_by_cols,
                                 statemods)
        else:
            if statemods:
                return raco.algebra.StatefulApply(emit_args, statemods, op)
            if not (len(from_args) == 1 and len(emit_clause) == 1 and
                    isinstance(emit_clause[0],
                               (TableWildcardEmitArg, FullWildcardEmitArg))):
                op = raco.algebra.Apply(emit_args, op)

        if orderby_clause:
            if limit_clause is None:
                raise InvalidStatementException(
                    "An ORDER BY clause must be accompanied by a LIMIT clause")
            orderby_cols, orderby_ords = zip(*orderby_clause)
            orderby_idxs = [get_unnamed_ref(col, op.scheme()).position
                            for col in orderby_cols]
            op = raco.algebra.OrderBy(input=op,
                                      sort_columns=orderby_idxs,
                                      ascending=orderby_ords)

        if limit_clause:
            if orderby_clause is None:
                raise InvalidStatementException(
                    "A LIMIT clause must be accompanied by an ORDER BY clause")
            op = raco.algebra.Limit(input=op, count=limit_clause)

        return op
Example #5
0
    def bagcomp(self, from_clause, where_clause, emit_clause):
        """Evaluate a bag comprehension.

        from_clause: A list of tuples of the form (id, expr).  expr can
        be None, which means "read the value from the symbol table".

        where_clause: An optional scalar expression (raco.expression).

        emit_clause: A list of EmitArg instances, each defining one or more
        output columns.
        """

        # Make sure no aliases were reused: [FROM X, X EMIT *] is illegal
        from_aliases = set([x[0] for x in from_clause])
        if len(from_aliases) != len(from_clause):
            raise DuplicateAliasException()

        # For each FROM argument, create a mapping from ID to operator
        # (id, raco.algebra.Operator)
        from_args = collections.OrderedDict()

        for _id, expr in from_clause:
            if expr:
                from_args[_id] = self.evaluate(expr)
            else:
                from_args[_id] = self.__lookup_symbol(_id)

        # Expand wildcards into a list of output columns
        assert emit_clause  # There should always be something to emit
        emit_args = []
        statemods = []
        for clause in emit_clause:
            emit_args.extend(clause.expand(from_args))
            statemods.extend(clause.get_statemods())

        orig_op, _info = multiway.merge(from_args)
        orig_schema_length = len(orig_op.scheme())

        # Add unbox relations to the from_args dictionary
        for name, sexpr in emit_args:
            self.extract_unbox_args(from_args, sexpr)
        if where_clause:
            self.extract_unbox_args(from_args, where_clause)

        # Create a single RA operation that is the cross of all targets
        op, info = multiway.merge(from_args)

        # HACK: calculate unboxed columns as implicit grouping columns,
        # so they can be used in grouping terms.
        new_schema_length = len(op.scheme())
        implicit_group_by_cols = range(orig_schema_length, new_schema_length)

        # rewrite clauses in terms of the new schema
        if where_clause:
            where_clause = multiway.rewrite_refs(where_clause, from_args, info)
            # Extract the type of there where clause to force type safety
            # to be checked
            where_clause.typeof(op.scheme(), None)
            op = raco.algebra.Select(condition=where_clause, input=op)

        emit_args = [(name, multiway.rewrite_refs(sexpr, from_args, info))
                     for (name, sexpr) in emit_args]

        statemods = [(name, init, multiway.rewrite_refs(update, from_args, info))  # noqa
                     for name, init, update in statemods]

        if any([raco.expression.isaggregate(ex) for name, ex in emit_args]):
            return groupby.groupby(op, emit_args, implicit_group_by_cols)
        else:
            if statemods:
                return raco.algebra.StatefulApply(emit_args, statemods, op)
            if (len(from_args) == 1 and len(emit_clause) == 1 and
                isinstance(emit_clause[0],
                           (TableWildcardEmitArg, FullWildcardEmitArg))):
                return op
            return raco.algebra.Apply(emit_args, op)