Exemple #1
0
            def common_scope_of_scopes(scopes):
                stacks = []

                for scope in scopes:
                    stacks.append(scope.scope_stack())

                result = None

                while True:
                    top_scopes = []
                    for stack in stacks:
                        if len(stack) == 0:
                            if result is None:
                                raise error.InvalidLogicalPlanException(
                                    "Error getting "
                                    "common scope")
                            return result

                        top_scopes.append(stack.pop())

                    count = top_scopes.count(top_scopes[0])
                    if count == len(top_scopes):
                        result = top_scopes[0]
                    else:
                        break

                if result is None:
                    raise error.InvalidLogicalPlanException(
                        "Error getting common scope")
                return result
Exemple #2
0
    def sink(self, scope, from_node):
        if from_node.type() == logical_plan_pb2.PbLogicalPlanNode.SINK_NODE:
            raise error.InvalidLogicalPlanException(
                "Invalid plan: sinker's user cannot be sinker")

        if not from_node.scope().is_cover_by(scope):
            raise error.InvalidLogicalPlanException(
                "Sinker must be in right scope")

        sink_node = LogicalPlan.SinkNode(from_node, scope, self)
        if LogicalPlan.Node.if_infinite([from_node], scope):
            sink_node.set_infinite()
        self.__nodes.append(sink_node)

        return sink_node
Exemple #3
0
        def to_proto_message(self):
            message = logical_plan_pb2.PbLogicalPlanNode()

            message.id = self._id
            message.type = self._node_type
            message.debug_info = \
                self._debug_info + \
                self._extra_debug_info + \
                ", size: " + str(self._size)
            message.cache = self._cache
            if self._memory_limit != -1:
                message.memory_limit = self._memory_limit
            if self._cpu_limit != -1:
                message.cpu_limit = self._cpu_limit
            message.is_infinite = self._is_infinite

            if self._objector_entity is None \
                    and self._node_type != logical_plan_pb2.PbLogicalPlanNode.SINK_NODE:
                raise error.InvalidLogicalPlanException(
                    "Non sink node must have objector!")

            if self._objector_entity is not None:
                message.objector.CopyFrom(
                    self._objector_entity.to_proto_message())

            message.scope = self._scope.id()
            self.set_specific_field(message)

            return message
Exemple #4
0
    def process(self, scope=None, from_nodes=None):
        if from_nodes is None:
            from_nodes = []

        if not isinstance(from_nodes, list) or len(from_nodes) == 0:
            raise error.InvalidLogicalPlanException(
                "Invalid arguments: "
                "from_nodes must be valid list")

        def process_with_scope(_scope):
            if not all(isinstance(node, LogicalPlan.Node) for node in from_nodes) \
                    or not all(node.scope().is_cover_by(_scope) for node in from_nodes):
                raise error.InvalidLogicalPlanException(
                    "Invalid arguments: wrong processed nodes")

            process_node = LogicalPlan.ProcessNode(from_nodes, _scope, self)
            if LogicalPlan.Node.if_infinite(from_nodes, _scope):
                process_node.set_infinite()
            self.__nodes.append(process_node)

            return process_node

        if scope is not None:
            return process_with_scope(scope)
        else:
            common_scope = LogicalPlan.Scope.common_scope(from_nodes)
            return process_with_scope(common_scope)
Exemple #5
0
    def union(self, scope=None, nodes=None):
        if nodes is None:
            nodes = []

        if not isinstance(nodes, list) or len(nodes) == 0:
            raise error.InvalidLogicalPlanException(
                "Invalid arguments: "
                "nodes to union must be valid list")

        def union_with_scope(_scope):
            if not all(isinstance(node, LogicalPlan.Node) for node in nodes) \
                    or not all(node.scope().is_cover_by(_scope) for node in nodes):
                raise error.InvalidLogicalPlanException(
                    "Invalid arguments: wrong nodes to union")

            union_node = LogicalPlan.UnionNode(nodes, _scope, self)
            if LogicalPlan.Node.if_infinite(nodes, _scope):
                union_node.set_infinite()
            self.__nodes.append(union_node)
            return union_node

        if scope is not None:
            return union_with_scope(scope)
        else:
            common_scope = LogicalPlan.Scope.common_scope(nodes)
            return union_with_scope(common_scope)
Exemple #6
0
 def __getstate__(self):
     """
         let all the ptype can not be pickled, to make sure ptype will not
         be used in lambda. Please use use sideinput for PType
     """
     raise error.InvalidLogicalPlanException("Please use sideinput for PType." +\
         "More info here: http://bigflow.baidu.com/doc/guide.html#sideinputs")
Exemple #7
0
    def create_and_setup(self):
        if self.is_empty():
            raise error.InvalidLogicalPlanException("Empty entity")

        instance = pickle.loads(self.__config)

        return instance
Exemple #8
0
        def __set_scope_type(self, scope_type):
            scope_message = self._scope._Scope__message

            if scope_message.type != logical_plan_pb2.PbScope.DEFAULT \
                and scope_message.type != scope_type:
                raise error.InvalidLogicalPlanException("Invalid scope type")

            scope_message.type = scope_type
Exemple #9
0
        def set_specific_field(self, message):
            if self.__loader_entity is None:
                raise error.InvalidLogicalPlanException("Invalid loader")

            for uri in self.__uri_list:
                message.load_node.uri.append(uri)

            message.load_node.loader.CopyFrom(
                self.__loader_entity.to_proto_message())
Exemple #10
0
        def leave_scope(self):
            if self._scope.father() is None:
                raise error.InvalidLogicalPlanException(
                    "Trying to leave global scope")

            union_node = self._plan.union(self._scope.father(), [self])
            union_node.set_objector(self._objector_entity)

            return union_node
Exemple #11
0
 def from_proto_message(self, message):
     from bigflow.core import entity_names
     for key, value in entity_names.__dict__.items():
         if isinstance(key, str) and isinstance(
                 value, str) and value == message.name:
             self.__name = key
     if self.__name is None:
         raise error.InvalidLogicalPlanException(
             "Invalid name/type for entity.")
     self.__config = message.config
Exemple #12
0
        def set_specific_field(self, message):
            if self.__sinker_entity is None:
                raise error.InvalidLogicalPlanException("Invalid sinker")

            sink_message = logical_plan_pb2.PbSinkNode()
            sink_message.__setattr__("from", self.__from_node.id())
            sink_message.sinker.CopyFrom(
                self.__sinker_entity.to_proto_message())

            message.sink_node.CopyFrom(sink_message)
Exemple #13
0
        def by(self, loader_obj):
            loader_entity = entity.Entity.of(entity.Entity.loader, loader_obj)
            self.set_loader(loader_entity)

            scope_message = self._scope._Scope__message
            if scope_message.type != logical_plan_pb2.PbScope.INPUT:
                raise error.InvalidLogicalPlanException("Invalid message")

            scope_message.input_scope.spliter.CopyFrom(
                loader_entity.to_proto_message())
            return self
Exemple #14
0
    def __init__(self, name="", operator=None, message=None):
        if message is None:
            if len(name) == 0:
                raise error.InvalidLogicalPlanException(
                    "Invalid name for entity.")
            if operator is None:
                raise error.InvalidLogicalPlanException(
                    "Invalid operator(None) for entity.")

            if isinstance(operator, EntitiedBySelf):
                self.__name = operator.get_entity_name()
                self.__config = operator.get_entity_config()
            elif isinstance(operator, str):
                self.__name = name
                self.__config = operator
            else:
                self.__name = name
                self.__config = cloudpickle.dumps(operator)
        else:
            self.from_proto_message(message)
Exemple #15
0
        def union_with_scope(_scope):
            if not all(isinstance(node, LogicalPlan.Node) for node in nodes) \
                    or not all(node.scope().is_cover_by(_scope) for node in nodes):
                raise error.InvalidLogicalPlanException(
                    "Invalid arguments: wrong nodes to union")

            union_node = LogicalPlan.UnionNode(nodes, _scope, self)
            if LogicalPlan.Node.if_infinite(nodes, _scope):
                union_node.set_infinite()
            self.__nodes.append(union_node)
            return union_node
Exemple #16
0
        def process_with_scope(_scope):
            if not all(isinstance(node, LogicalPlan.Node) for node in from_nodes) \
                    or not all(node.scope().is_cover_by(_scope) for node in from_nodes):
                raise error.InvalidLogicalPlanException(
                    "Invalid arguments: wrong processed nodes")

            process_node = LogicalPlan.ProcessNode(from_nodes, _scope, self)
            if LogicalPlan.Node.if_infinite(from_nodes, _scope):
                process_node.set_infinite()
            self.__nodes.append(process_node)

            return process_node
Exemple #17
0
    def __add_shuffle_node(self, source_node, target_scope):
        if target_scope.father() is not source_node.scope():
            raise error.InvalidLogicalPlanException(
                "Source node should only belong to"
                " target scope's father")

        # Find ShuffleGroup for Scope
        shuffle_group = None
        for shuffle in self.__shuffles:
            if shuffle.scope() is target_scope:
                shuffle_group = shuffle
                break

        if shuffle_group is None:
            raise error.InvalidLogicalPlanException(
                "Unable to find corresponding "
                "Shuffle Group for target scope")

        shuffle_node = LogicalPlan.ShuffleNode(source_node, shuffle_group,
                                               self).broadcast()
        self.__nodes.append(shuffle_node)

        return shuffle_node
Exemple #18
0
        def __init__(self, from_nodes, scope, plan):
            LogicalPlan.Node.__init__(
                self, logical_plan_pb2.PbLogicalPlanNode.UNION_NODE, scope,
                plan)
            self._type_str = "UnionNode"
            self.__from_nodes = from_nodes
            self._size = sum(map(lambda x: x.size(), self.__from_nodes))
            objector = None
            for from_node in from_nodes:
                assert from_node._serde is not None
                self._serde = from_node._serde

                if from_node.objector() is None:
                    raise error.InvalidLogicalPlanException(
                        "Error getting objector from inputs")

                if objector is None:
                    objector = from_node.objector()
                elif objector != from_node.objector():
                    raise error.InvalidLogicalPlanException(
                        "Union sources with different objectors"
                        ", user must set objector manually")

            self.set_objector(objector)
Exemple #19
0
        def set_specific_field(self, message):
            if self.__processor_entity is None:
                raise error.InvalidLogicalPlanException("Invalid processor")

            pb_process_node = message.process_node

            pb_process_node.processor.CopyFrom(
                self.__processor_entity.to_proto_message())
            pb_process_node.least_prepared_inputs = self.__least_prepared_inputs
            pb_process_node.is_ignore_group = self.__is_ignore_group
            pb_process_node.is_stateful = self.__is_stateful
            pb_process_node.effective_key_num = self.__effective_key_num

            for _input in self.__inputs:
                input_message = pb_process_node.input.add()
                input_message.CopyFrom(_input.to_proto_message())
Exemple #20
0
    def shuffle(self, scope, from_nodes):
        if not isinstance(from_nodes, list) or len(from_nodes) == 0:
            raise error.InvalidLogicalPlanException(
                "Invalid arguments: "
                "nodes to shuffle must be valid list")

        shuffle_scope = LogicalPlan.Scope(scope, self)
        self.__scopes.append(shuffle_scope)

        shuffle_group = LogicalPlan._ShuffleGroup(shuffle_scope)
        self.__shuffles.append(shuffle_group)

        for from_node in from_nodes:
            shuffle_node = LogicalPlan.ShuffleNode(from_node, shuffle_group,
                                                   self)
            self.__nodes.append(shuffle_node)

        return shuffle_group
Exemple #21
0
    def to_proto_message(self):
        message = logical_plan_pb2.PbLogicalPlan()

        for node in self.__nodes:
            node_message = message.node.add()
            node_message.CopyFrom(node.to_proto_message())

        for scope in self.__scopes:
            scope_message = message.scope.add()
            scope_message.CopyFrom(scope.to_proto_message())

        if self._environment:
            message.environment.CopyFrom(
                entity.Entity.of("EntitiedBySelf",
                                 self._environment).to_proto_message())

        if not message.IsInitialized():
            raise error.InvalidLogicalPlanException(
                "Message is not initialized")

        return message
Exemple #22
0
        def set_specific_field(self, message):
            shuffle_node = message.shuffle_node
            shuffle_node.__setattr__("from", self.__from_node.id())
            shuffle_node.type = self.__shuffle_type

            if self.__shuffle_type == logical_plan_pb2.PbShuffleNode.KEY:
                if self.__key_reader is None:
                    raise error.InvalidLogicalPlanException(
                        "Invalid key reader")

                shuffle_node.key_reader.CopyFrom(
                    self.__key_reader.to_proto_message())

            if self.__shuffle_type == logical_plan_pb2.PbShuffleNode.SEQUENCE \
                    and self.__partitioner is not None:
                shuffle_node.partitioner.CopyFrom(
                    self.__partitioner.to_proto_message())

            if self.__shuffle_type == logical_plan_pb2.PbShuffleNode.WINDOW \
                    and self.__time_reader is not None:
                shuffle_node.time_reader.CopyFrom(
                    self.__time_reader.to_proto_message())
Exemple #23
0
    def broadcast_to(self, source_node, target_scope):
        common_scope = LogicalPlan.Scope.common_scope(
            [source_node.scope(), target_scope])

        if target_scope is common_scope and target_scope is not source_node.scope(
        ):
            raise error.InvalidLogicalPlanException(
                "Up-forward broadcasting is forbidden")

        scopes = []
        scope = target_scope
        while scope is not None and scope is not common_scope:
            scopes.append(scope)
            scope = scope.father()

        current_node = source_node
        for scope in reversed(scopes):
            current_node = self.__add_shuffle_node(current_node, scope)

        union_node = LogicalPlan.UnionNode([current_node], target_scope, self)
        self.__nodes.append(union_node)

        return union_node