Esempio n. 1
0
    def __init__(self, catalog, use_dummy_schema=False):
        # Map from identifiers (aliases) to raco.algebra.Operation instances
        self.symbols = {}

        assert isinstance(catalog, raco.catalog.Catalog)
        self.catalog = catalog

        self.ep = ExpressionProcessor(self.symbols, catalog, use_dummy_schema)

        self.cfg = ControlFlowGraph()
Esempio n. 2
0
    def __init__(self, catalog=None, use_dummy_schema=False):
        # Map from identifiers (aliases) to raco.algebra.Operation instances
        self.symbols = {}

        self.catalog = catalog
        self.ep = ExpressionProcessor(self.symbols, catalog, use_dummy_schema)

        self.cfg = ControlFlowGraph()
Esempio n. 3
0
class StatementProcessor(object):
    """Evaluate a list of statements"""
    def __init__(self, catalog, use_dummy_schema=False):
        # Map from identifiers (aliases) to raco.algebra.Operation instances
        self.symbols = {}

        assert isinstance(catalog, raco.catalog.Catalog)
        self.catalog = catalog

        self.ep = ExpressionProcessor(self.symbols, catalog, use_dummy_schema)

        self.cfg = ControlFlowGraph()

    def evaluate(self, statements):
        """Evaluate a list of statements"""
        for statement in statements:
            # Switch on the first tuple entry
            method = getattr(self, statement[0].lower())
            method(*statement[1:])

    def __evaluate_expr(self, expr, _def):
        """Evaluate an expression; add a node to the control flow graph.

        :param expr: An expression to evaluate
        :type expr: Myrial AST tuple
        :param _def: The variable defined by the expression, or None for
                     non-statements
        :type _def: string
        """

        op = self.ep.evaluate(expr)
        uses_set = self.ep.get_and_clear_uses_set()
        self.cfg.add_op(op, _def, uses_set)
        return op

    def __do_assignment(self, _id, expr):
        """Process an assignment statement; add a node to the control flow
        graph.
        :param _id: The target variable name.
        :type _id: string
        :param expr: The relational expression to evaluate
        :type expr: A Myrial expression AST node tuple
        """

        child_op = self.ep.evaluate(expr)
        if _id in self.symbols:
            check_assignment_compatability(child_op, self.symbols[_id])

        op = raco.algebra.StoreTemp(_id, child_op)
        uses_set = self.ep.get_and_clear_uses_set()
        self.cfg.add_op(op, _id, uses_set)

        # Point future references of this symbol to a scan of the materialized
        # table. Note that this assumes there is no scoping in Myrial.
        self.symbols[_id] = raco.algebra.ScanTemp(_id, child_op.scheme())

    def assign(self, _id, expr):
        """Map a variable to the value of an expression."""
        self.__do_assignment(_id, expr)

    def idbassign(self, _id, agg, expr):
        """Map an IDB to the value of an expression."""
        self.__do_assignment(_id, expr)

    def store(self, _id, rel_key, how_distributed):
        assert isinstance(rel_key, relation_key.RelationKey)

        alias_expr = ("ALIAS", _id)
        child_op = self.ep.evaluate(alias_expr)

        if how_distributed == "BROADCAST":
            child_op = raco.algebra.Broadcast(child_op)
        elif how_distributed == "ROUND_ROBIN":
            child_op = raco.algebra.Shuffle(
                child_op, None, shuffle_type=Shuffle.ShuffleType.RoundRobin)
        # hash-partitioned
        elif how_distributed:
            scheme = child_op.scheme()
            col_list = [get_unnamed_ref(a, scheme) for a in how_distributed]
            child_op = raco.algebra.Shuffle(
                child_op, col_list, shuffle_type=Shuffle.ShuffleType.Hash)
        op = raco.algebra.Store(rel_key, child_op)

        uses_set = self.ep.get_and_clear_uses_set()
        self.cfg.add_op(op, None, uses_set)

    def sink(self, _id):
        alias_expr = ("ALIAS", _id)
        child_op = self.ep.evaluate(alias_expr)
        op = raco.algebra.Sink(child_op)

        uses_set = self.ep.get_and_clear_uses_set()
        self.cfg.add_op(op, None, uses_set)

    def dump(self, _id):
        alias_expr = ("ALIAS", _id)
        child_op = self.ep.evaluate(alias_expr)
        op = raco.algebra.Dump(child_op)
        uses_set = self.ep.get_and_clear_uses_set()
        self.cfg.add_op(op, None, uses_set)

    def dowhile(self, statement_list, termination_ex):
        first_op_id = self.cfg.next_op_id  # op ID of the top of the loop

        for _type, _id, expr in statement_list:
            if _type != 'ASSIGN':
                # TODO: Better error message
                raise InvalidStatementException('%s not allowed in do/while' %
                                                _type.lower())
            self.__do_assignment(_id, expr)

        last_op_id = self.cfg.next_op_id

        self.__evaluate_expr(termination_ex, None)

        # Add a control flow edge from the loop condition to the top of the
        # loop
        self.cfg.add_edge(last_op_id, first_op_id)

    def check_schema(self, expr):
        return True

    def get_idb_leaves(self, expr, idbs):
        ret = []
        op = expr[0].lower()
        args = expr[1:]
        if op in ["bagcomp"]:
            for _id, arg in args[0]:
                if arg:
                    ret += self.get_idb_leaves(arg, idbs)
                elif _id in idbs:
                    ret += [_id]
        elif op in ["select"]:
            for _id, arg in args[0].from_:
                if arg:
                    ret += self.get_idb_leaves(arg, idbs)
                elif _id in idbs:
                    ret += [_id]
        elif op in ["join", "union", "cross", "diff", "intersect"]:
            ret += self.get_idb_leaves(args[0], idbs)
            ret += self.get_idb_leaves(args[1], idbs)
        elif op in ["unionall"]:
            for child in args[0]:
                ret += self.get_idb_leaves(child, idbs)
        elif op in ["limit", "countall", "distinct"]:
            ret += self.get_idb_leaves(args[0], idbs)
        elif op in ["alias"]:
            if args[0] in idbs:
                ret += [args[0]]
        else:
            raise InvalidStatementException('%s not recognized' % op)
        return ret

    def separate_inputs(self, expr, idbs, is_init):
        op = expr[0].lower()
        if op in ["unionall"]:
            inputs = []
            for input in expr[1]:
                inputs += self.separate_inputs(input, idbs, is_init)
            return inputs
        else:
            edb_only = len(self.get_idb_leaves(expr, idbs)) == 0
            if (is_init and edb_only) or (not is_init and not edb_only):
                return [expr]
        return []

    def untilconvergence(self, statement_list, recursion_mode,
                         pull_order_policy):
        idbs = {}
        idx = 0
        for _type, _id, emits, expr in statement_list:
            if _id in self.symbols:
                raise InvalidStatementException('IDB %s is already used' % _id)
            idbcontroller = raco.algebra.IDBController(
                _id, idx,
                [None, None,
                 raco.algebra.EmptyRelation(raco.scheme.Scheme())], emits,
                None, recursion_mode)
            idbs[_id] = idbcontroller
            self.symbols[_id] = raco.algebra.ScanIDB(_id, None, idbcontroller)
            idx = idx + 1

        for _type, _id, emits, expr in statement_list:
            initial_inputs = self.separate_inputs(expr, idbs, True)
            if len(initial_inputs) == 0:
                idbs[_id].children()[0] =\
                    raco.algebra.EmptyRelation(raco.scheme.Scheme())
            elif len(initial_inputs) == 1:
                idbs[_id].children()[0] = self.ep.evaluate(initial_inputs[0])
            else:
                idbs[_id].children()[0] =\
                    raco.algebra.UnionAll([self.ep.evaluate(expr)
                                           for expr in initial_inputs])

        done = False
        while (not done):
            done = True
            for _type, _id, emits, expr in statement_list:
                if idbs[_id].children()[1] is not None:
                    continue
                leaves = self.get_idb_leaves(expr, idbs)
                if any(idbs[leaf].scheme() is None for leaf in leaves):
                    done = False
                else:
                    iterative_inputs = self.separate_inputs(expr, idbs, False)
                    if len(iterative_inputs) == 0:
                        idbs[_id].children()[1] = raco.algebra.EmptyRelation(
                            raco.scheme.Scheme())
                    elif len(iterative_inputs) == 1:
                        idbs[_id].children()[1] = self.ep.evaluate(
                            iterative_inputs[0])
                    else:
                        idbs[_id].children()[1] = raco.algebra.UnionAll([
                            self.ep.evaluate(expr) for expr in iterative_inputs
                        ])

        op = raco.algebra.UntilConvergence(idbs.values(), pull_order_policy)
        uses_set = self.ep.get_and_clear_uses_set()
        self.cfg.add_op(op, None, uses_set)

    def get_logical_plan(self, **kwargs):
        """Return an operator representing the logical query plan."""
        return self.cfg.get_logical_plan(
            dead_code_elimination=kwargs.get('dead_code_elimination', True),
            apply_chaining=kwargs.get('apply_chaining', True))

    def __get_physical_plan_for__(self, target_phys_algebra, **kwargs):
        logical_plan = self.get_logical_plan(**kwargs)

        kwargs['target'] = target_phys_algebra
        return optimize(logical_plan, **kwargs)

    def get_physical_plan(self, **kwargs):
        """Return an operator representing the physical query plan."""
        target_phys_algebra = kwargs.get('target_alg')
        if target_phys_algebra is None:
            if kwargs.get('multiway_join', False):
                target_phys_algebra = MyriaHyperCubeAlgebra(self.catalog)
            else:
                target_phys_algebra = MyriaLeftDeepTreeAlgebra()

        return self.__get_physical_plan_for__(target_phys_algebra, **kwargs)

    def get_json(self, **kwargs):
        lp = self.get_logical_plan()
        pps = self.get_physical_plan(**kwargs)

        # TODO This is not correct. The first argument is the raw query string,
        # not the string representation of the logical plan
        return compile_to_json(str(lp), pps, pps, "myrial")

    @classmethod
    def get_json_from_physical_plan(cls, pp):
        pps = pp

        # TODO This is not correct. The first argument is the raw query string,
        # not the string representation of the logical plan
        return compile_to_json("NOT_SOURCED_FROM_LOGICAL_RA", pps, pps,
                               "myrial")
Esempio n. 4
0
class StatementProcessor(object):

    """Evaluate a list of statements"""

    def __init__(self, catalog, use_dummy_schema=False):
        # Map from identifiers (aliases) to raco.algebra.Operation instances
        self.symbols = {}

        assert isinstance(catalog, raco.catalog.Catalog)
        self.catalog = catalog

        self.ep = ExpressionProcessor(self.symbols, catalog, use_dummy_schema)

        self.cfg = ControlFlowGraph()

    def evaluate(self, statements):
        """Evaluate a list of statements"""
        for statement in statements:
            # Switch on the first tuple entry
            method = getattr(self, statement[0].lower())
            method(*statement[1:])

    def __evaluate_expr(self, expr, _def):
        """Evaluate an expression; add a node to the control flow graph.

        :param expr: An expression to evaluate
        :type expr: Myrial AST tuple
        :param _def: The variable defined by the expression, or None for
                     non-statements
        :type _def: string
        """

        op = self.ep.evaluate(expr)
        uses_set = self.ep.get_and_clear_uses_set()
        self.cfg.add_op(op, _def, uses_set)
        return op

    def __do_assignment(self, _id, expr):
        """Process an assignment statement; add a node to the control flow
        graph.
        :param _id: The target variable name.
        :type _id: string
        :param expr: The relational expression to evaluate
        :type expr: A Myrial expression AST node tuple
        """

        child_op = self.ep.evaluate(expr)
        if _id in self.symbols:
            check_assignment_compatability(child_op, self.symbols[_id])

        op = raco.algebra.StoreTemp(_id, child_op)
        uses_set = self.ep.get_and_clear_uses_set()
        self.cfg.add_op(op, _id, uses_set)

        # Point future references of this symbol to a scan of the materialized
        # table. Note that this assumes there is no scoping in Myrial.
        self.symbols[_id] = raco.algebra.ScanTemp(_id, child_op.scheme())

    def assign(self, _id, expr):
        """Map a variable to the value of an expression."""
        self.__do_assignment(_id, expr)

    def idbassign(self, _id, agg, expr):
        """Map an IDB to the value of an expression."""
        self.__do_assignment(_id, expr)

    def store(self, _id, rel_key, how_distributed):
        assert isinstance(rel_key, relation_key.RelationKey)

        alias_expr = ("ALIAS", _id)
        child_op = self.ep.evaluate(alias_expr)

        if how_distributed == "BROADCAST":
            child_op = raco.algebra.Broadcast(child_op)
        elif how_distributed == "ROUND_ROBIN":
            child_op = raco.algebra.Shuffle(
                child_op, None, shuffle_type=Shuffle.ShuffleType.RoundRobin)
        # hash-partitioned
        elif how_distributed:
            scheme = child_op.scheme()
            col_list = [get_unnamed_ref(a, scheme) for a in how_distributed]
            child_op = raco.algebra.Shuffle(
                child_op, col_list, shuffle_type=Shuffle.ShuffleType.Hash)
        op = raco.algebra.Store(rel_key, child_op)

        uses_set = self.ep.get_and_clear_uses_set()
        self.cfg.add_op(op, None, uses_set)

    def sink(self, _id):
        alias_expr = ("ALIAS", _id)
        child_op = self.ep.evaluate(alias_expr)
        op = raco.algebra.Sink(child_op)

        uses_set = self.ep.get_and_clear_uses_set()
        self.cfg.add_op(op, None, uses_set)

    def export(self, _id, uri):
        alias_expr = ("ALIAS", _id)
        child_op = self.ep.evaluate(alias_expr)
        collect_op = raco.algebra.Collect(child_op)
        op = raco.algebra.Export(uri, collect_op)

        uses_set = self.ep.get_and_clear_uses_set()
        self.cfg.add_op(op, None, uses_set)

    def dump(self, _id):
        alias_expr = ("ALIAS", _id)
        child_op = self.ep.evaluate(alias_expr)
        op = raco.algebra.Dump(child_op)
        uses_set = self.ep.get_and_clear_uses_set()
        self.cfg.add_op(op, None, uses_set)

    def dowhile(self, statement_list, termination_ex):
        first_op_id = self.cfg.next_op_id  # op ID of the top of the loop

        for _type, _id, expr in statement_list:
            if _type != 'ASSIGN':
                # TODO: Better error message
                raise InvalidStatementException('%s not allowed in do/while' %
                                                _type.lower())
            self.__do_assignment(_id, expr)

        last_op_id = self.cfg.next_op_id

        self.__evaluate_expr(termination_ex, None)

        # Add a control flow edge from the loop condition to the top of the
        # loop
        self.cfg.add_edge(last_op_id, first_op_id)

    def check_schema(self, expr):
        return True

    def get_idb_leaves(self, expr, idbs):
        ret = []
        op = expr[0].lower()
        args = expr[1:]
        if op in ["bagcomp"]:
            for _id, arg in args[0]:
                if arg:
                    ret += self.get_idb_leaves(arg, idbs)
                elif _id in idbs:
                    ret += [_id]
        elif op in ["select"]:
            for _id, arg in args[0].from_:
                if arg:
                    ret += self.get_idb_leaves(arg, idbs)
                elif _id in idbs:
                    ret += [_id]
        elif op in ["join", "union", "cross", "diff", "intersect"]:
            ret += self.get_idb_leaves(args[0], idbs)
            ret += self.get_idb_leaves(args[1], idbs)
        elif op in ["unionall"]:
            for child in args[0]:
                ret += self.get_idb_leaves(child, idbs)
        elif op in ["limit", "countall", "distinct"]:
            ret += self.get_idb_leaves(args[0], idbs)
        elif op in ["alias"]:
            if args[0] in idbs:
                ret += [args[0]]
        else:
            raise InvalidStatementException('%s not recognized' % op)
        return ret

    def separate_inputs(self, expr, idbs, is_init):
        op = expr[0].lower()
        if op in ["unionall"]:
            inputs = []
            for input in expr[1]:
                inputs += self.separate_inputs(input, idbs, is_init)
            return inputs
        else:
            edb_only = len(self.get_idb_leaves(expr, idbs)) == 0
            if (is_init and edb_only) or (not is_init and not edb_only):
                return [expr]
        return []

    def untilconvergence(self, statement_list, recursion_mode,
                         pull_order_policy):
        idbs = {}
        idx = 0
        for _type, _id, emits, expr in statement_list:
            if _id in self.symbols:
                raise InvalidStatementException('IDB %s is already used' % _id)
            idbcontroller = raco.algebra.IDBController(
                _id, idx,
                [None, None, raco.algebra.EmptyRelation(raco.scheme.Scheme())],
                emits, None, recursion_mode)
            idbs[_id] = idbcontroller
            self.symbols[_id] = raco.algebra.ScanIDB(_id, None, idbcontroller)
            idx = idx + 1

        for _type, _id, emits, expr in statement_list:
            initial_inputs = self.separate_inputs(expr, idbs, True)
            if len(initial_inputs) == 0:
                idbs[_id].children()[0] =\
                    raco.algebra.EmptyRelation(raco.scheme.Scheme())
            elif len(initial_inputs) == 1:
                idbs[_id].children()[0] = self.ep.evaluate(initial_inputs[0])
            else:
                idbs[_id].children()[0] =\
                    raco.algebra.UnionAll([self.ep.evaluate(expr)
                                           for expr in initial_inputs])

        done = False
        while (not done):
            done = True
            for _type, _id, emits, expr in statement_list:
                if idbs[_id].children()[1] is not None:
                    continue
                leaves = self.get_idb_leaves(expr, idbs)
                if any(idbs[leaf].scheme() is None for leaf in leaves):
                    done = False
                else:
                    iterative_inputs = self.separate_inputs(expr, idbs, False)
                    if len(iterative_inputs) == 0:
                        idbs[_id].children()[1] = raco.algebra.EmptyRelation(
                            raco.scheme.Scheme())
                    elif len(iterative_inputs) == 1:
                        idbs[_id].children()[1] = self.ep.evaluate(
                            iterative_inputs[0])
                    else:
                        idbs[_id].children()[1] = raco.algebra.UnionAll(
                            [self.ep.evaluate(expr)
                             for expr in iterative_inputs])

        op = raco.algebra.UntilConvergence(idbs.values(), pull_order_policy)
        uses_set = self.ep.get_and_clear_uses_set()
        self.cfg.add_op(op, None, uses_set)

    def get_logical_plan(self, **kwargs):
        """Return an operator representing the logical query plan."""
        return self.cfg.get_logical_plan(
            dead_code_elimination=kwargs.get('dead_code_elimination', True),
            apply_chaining=kwargs.get('apply_chaining', True))

    def __get_physical_plan_for__(self, target_phys_algebra, **kwargs):
        logical_plan = self.get_logical_plan(**kwargs)

        kwargs['target'] = target_phys_algebra
        return optimize(logical_plan, **kwargs)

    def get_physical_plan(self, **kwargs):
        """Return an operator representing the physical query plan."""
        target_phys_algebra = kwargs.get('target_alg')
        if target_phys_algebra is None:
            if kwargs.get('multiway_join', False):
                target_phys_algebra = MyriaHyperCubeAlgebra(self.catalog)
            else:
                target_phys_algebra = MyriaLeftDeepTreeAlgebra()

        return self.__get_physical_plan_for__(target_phys_algebra, **kwargs)

    def get_json(self, **kwargs):
        lp = self.get_logical_plan()
        pps = self.get_physical_plan(**kwargs)

        # TODO This is not correct. The first argument is the raw query string,
        # not the string representation of the logical plan
        return compile_to_json(str(lp), pps, pps, "myrial")

    @classmethod
    def get_json_from_physical_plan(cls, pp):
        pps = pp

        # TODO This is not correct. The first argument is the raw query string,
        # not the string representation of the logical plan
        return compile_to_json(
            "NOT_SOURCED_FROM_LOGICAL_RA", pps, pps, "myrial")
Esempio n. 5
0
class StatementProcessor(object):
    """Evaluate a list of statements"""

    def __init__(self, catalog, use_dummy_schema=False):
        # Map from identifiers (aliases) to raco.algebra.Operation instances
        self.symbols = {}

        assert isinstance(catalog, raco.catalog.Catalog)
        self.catalog = catalog
        self.ep = ExpressionProcessor(self.symbols, catalog, use_dummy_schema)

        self.cfg = ControlFlowGraph()

    def evaluate(self, statements):
        """Evaluate a list of statements"""
        for statement in statements:
            # Switch on the first tuple entry
            method = getattr(self, statement[0].lower())
            method(*statement[1:])

    def __evaluate_expr(self, expr, _def):
        """Evaluate an expression; add a node to the control flow graph.

        :param expr: An expression to evaluate
        :type expr: Myrial AST tuple
        :param _def: The variable defined by the expression, or None for
                     non-statements
        :type _def: string
        """

        op = self.ep.evaluate(expr)
        uses_set = self.ep.get_and_clear_uses_set()
        self.cfg.add_op(op, _def, uses_set)
        return op

    def __do_assignment(self, _id, expr):
        """Process an assignment statement; add a node to the control flow
        graph.

        :param _id: The target variable name.
        :type _id: string
        :param expr: The relational expression to evaluate
        :type expr: A Myrial expression AST node tuple
        """

        child_op = self.ep.evaluate(expr)
        if _id in self.symbols:
            check_assignment_compatability(child_op, self.symbols[_id])

        op = raco.algebra.StoreTemp(_id, child_op)
        uses_set = self.ep.get_and_clear_uses_set()
        self.cfg.add_op(op, _id, uses_set)

        # Point future references of this symbol to a scan of the materialized
        # table. Note that this assumes there is no scoping in Myrial.
        self.symbols[_id] = raco.algebra.ScanTemp(_id, child_op.scheme())

    def assign(self, _id, expr):
        """Map a variable to the value of an expression."""
        self.__do_assignment(_id, expr)

    def store(self, _id, rel_key, how_partitioned):
        assert isinstance(rel_key, relation_key.RelationKey)

        alias_expr = ("ALIAS", _id)
        child_op = self.ep.evaluate(alias_expr)

        if how_partitioned:
            scheme = child_op.scheme()
            col_list = [get_unnamed_ref(a, scheme) for a in how_partitioned]
            child_op = raco.algebra.Shuffle(child_op, col_list)
        op = raco.algebra.Store(rel_key, child_op)

        uses_set = self.ep.get_and_clear_uses_set()
        self.cfg.add_op(op, None, uses_set)

    def sink(self, _id):
        alias_expr = ("ALIAS", _id)
        child_op = self.ep.evaluate(alias_expr)
        op = raco.algebra.Sink(child_op)

        uses_set = self.ep.get_and_clear_uses_set()
        self.cfg.add_op(op, None, uses_set)

    def dump(self, _id):
        alias_expr = ("ALIAS", _id)
        child_op = self.ep.evaluate(alias_expr)
        op = raco.algebra.Dump(child_op)
        uses_set = self.ep.get_and_clear_uses_set()
        self.cfg.add_op(op, None, uses_set)

    def dowhile(self, statement_list, termination_ex):
        first_op_id = self.cfg.next_op_id  # op ID of the top of the loop

        for _type, _id, expr in statement_list:
            if _type != 'ASSIGN':
                # TODO: Better error message
                raise InvalidStatementException('%s not allowed in do/while' %
                                                _type.lower())
            self.__do_assignment(_id, expr)

        last_op_id = self.cfg.next_op_id

        self.__evaluate_expr(termination_ex, None)

        # Add a control flow edge from the loop condition to the top of the
        # loop
        self.cfg.add_edge(last_op_id, first_op_id)

    def get_logical_plan(self, **kwargs):
        """Return an operator representing the logical query plan."""
        return self.cfg.get_logical_plan(
            dead_code_elimination=kwargs.get('dead_code_elimination', True),
            apply_chaining=kwargs.get('apply_chaining', True))

    def __get_physical_plan_for__(self, target_phys_algebra, **kwargs):
        logical_plan = self.get_logical_plan(**kwargs)

        kwargs['target'] = target_phys_algebra
        return optimize(logical_plan, **kwargs)

    def get_physical_plan(self, **kwargs):
        """Return an operator representing the physical query plan."""
        target_phys_algebra = kwargs.get('target_alg')
        if target_phys_algebra is None:
            if kwargs.get('multiway_join', False):
                target_phys_algebra = MyriaHyperCubeAlgebra(self.catalog)
            else:
                target_phys_algebra = MyriaLeftDeepTreeAlgebra()

        return self.__get_physical_plan_for__(target_phys_algebra, **kwargs)

    def get_json(self, **kwargs):
        lp = self.get_logical_plan()
        pps = self.get_physical_plan(**kwargs)

        # TODO This is not correct. The first argument is the raw query string,
        # not the string representation of the logical plan
        return compile_to_json(str(lp), pps, pps, "myrial")

    @classmethod
    def get_json_from_physical_plan(cls, pp):
        pps = pp

        # TODO This is not correct. The first argument is the raw query string,
        # not the string representation of the logical plan
        return compile_to_json(
            "NOT_SOURCED_FROM_LOGICAL_RA", pps, pps, "myrial")
Esempio n. 6
0
class StatementProcessor(object):
    '''Evaluate a list of statements'''

    def __init__(self, catalog=None, use_dummy_schema=False):
        # Map from identifiers (aliases) to raco.algebra.Operation instances
        self.symbols = {}

        self.catalog = catalog
        self.ep = ExpressionProcessor(self.symbols, catalog, use_dummy_schema)

        self.cfg = ControlFlowGraph()

    def evaluate(self, statements):
        '''Evaluate a list of statements'''
        for statement in statements:
            # Switch on the first tuple entry
            method = getattr(self, statement[0].lower())
            method(*statement[1:])

    def __evaluate_expr(self, expr, _def):
        """Evaluate an expression; add a node to the control flow graph.

        :param expr: An expression to evaluate
        :type expr: Myrial AST tuple
        :param _def: The variable defined by the expression, or None for
                     non-statements
        :type _def: string
        """

        op = self.ep.evaluate(expr)
        uses_set = self.ep.get_and_clear_uses_set()
        self.cfg.add_op(op, _def, uses_set)
        return op

    def __do_assignment(self, _id, expr):
        """Process an assignment statement; add a node to the control flow
        graph.

        :param _id: The target variable name.
        :type _id: string
        :param expr: The relational expression to evaluate
        :type expr: A Myrial expression AST node tuple
        """

        child_op = self.ep.evaluate(expr)
        if _id in self.symbols:
            check_assignment_compatability(child_op, self.symbols[_id])

        op = raco.algebra.StoreTemp(_id, child_op)
        uses_set = self.ep.get_and_clear_uses_set()
        self.cfg.add_op(op, _id, uses_set)

        # Point future references of this symbol to a scan of the materialized
        # table. Note that this assumes there is no scoping in Myrial.
        self.symbols[_id] = raco.algebra.ScanTemp(_id, child_op.scheme())

    def assign(self, _id, expr):
        '''Map a variable to the value of an expression.'''
        self.__do_assignment(_id, expr)

    def store(self, _id, rel_key, how_partitioned):
        assert isinstance(rel_key, relation_key.RelationKey)

        alias_expr = ("ALIAS", _id)
        child_op = self.ep.evaluate(alias_expr)

        if how_partitioned:
            scheme = child_op.scheme()
            col_list = [get_unnamed_ref(a, scheme) for a in how_partitioned]
            child_op = raco.algebra.Shuffle(child_op, col_list)
        op = raco.algebra.Store(rel_key, child_op)

        uses_set = self.ep.get_and_clear_uses_set()
        self.cfg.add_op(op, None, uses_set)

    def dump(self, _id):
        alias_expr = ("ALIAS", _id)
        child_op = self.ep.evaluate(alias_expr)
        op = raco.algebra.Dump(child_op)
        uses_set = self.ep.get_and_clear_uses_set()
        self.cfg.add_op(op, None, uses_set)

    def dowhile(self, statement_list, termination_ex):
        first_op_id = self.cfg.next_op_id  # op ID of the top of the loop

        for _type, _id, expr in statement_list:
            if _type != 'ASSIGN':
                # TODO: Better error message
                raise InvalidStatementException('%s not allowed in do/while' %
                                                _type.lower())
            self.__do_assignment(_id, expr)

        last_op_id = self.cfg.next_op_id

        self.__evaluate_expr(termination_ex, None)

        # Add a control flow edge from the loop condition to the top of the
        # loop
        self.cfg.add_edge(last_op_id, first_op_id)

    def get_logical_plan(self):
        """Return an operator representing the logical query plan."""
        return self.cfg.get_logical_plan()

    def get_physical_plan(self):
        """Return an operator representing the physical query plan."""

        # TODO: Get rid of the dummy label argument here.
        # Return first (only) plan; strip off dummy label.
        logical_plan = self.get_logical_plan()
        physical_plans = optimize([('root', logical_plan)],
                                  target=MyriaAlgebra,
                                  source=LogicalAlgebra)
        return physical_plans[0][1]

    def get_json(self):
        lp = self.get_logical_plan()
        pps = optimize([(None, lp)], target=MyriaAlgebra,
                       source=LogicalAlgebra)
        # TODO This is not correct. The first argument is the raw query string,
        # not the string representation of the logical plan
        return compile_to_json(str(lp), pps[0][1], pps[0][1])