def common_scope_of_scopes(scopes): stacks = [] for scope in scopes: stacks.append(scope.scope_stack()) result = None while True: top_scopes = [] for stack in stacks: if len(stack) == 0: if result is None: raise error.InvalidLogicalPlanException( "Error getting " "common scope") return result top_scopes.append(stack.pop()) count = top_scopes.count(top_scopes[0]) if count == len(top_scopes): result = top_scopes[0] else: break if result is None: raise error.InvalidLogicalPlanException( "Error getting common scope") return result
def sink(self, scope, from_node): if from_node.type() == logical_plan_pb2.PbLogicalPlanNode.SINK_NODE: raise error.InvalidLogicalPlanException( "Invalid plan: sinker's user cannot be sinker") if not from_node.scope().is_cover_by(scope): raise error.InvalidLogicalPlanException( "Sinker must be in right scope") sink_node = LogicalPlan.SinkNode(from_node, scope, self) if LogicalPlan.Node.if_infinite([from_node], scope): sink_node.set_infinite() self.__nodes.append(sink_node) return sink_node
def to_proto_message(self): message = logical_plan_pb2.PbLogicalPlanNode() message.id = self._id message.type = self._node_type message.debug_info = \ self._debug_info + \ self._extra_debug_info + \ ", size: " + str(self._size) message.cache = self._cache if self._memory_limit != -1: message.memory_limit = self._memory_limit if self._cpu_limit != -1: message.cpu_limit = self._cpu_limit message.is_infinite = self._is_infinite if self._objector_entity is None \ and self._node_type != logical_plan_pb2.PbLogicalPlanNode.SINK_NODE: raise error.InvalidLogicalPlanException( "Non sink node must have objector!") if self._objector_entity is not None: message.objector.CopyFrom( self._objector_entity.to_proto_message()) message.scope = self._scope.id() self.set_specific_field(message) return message
def process(self, scope=None, from_nodes=None): if from_nodes is None: from_nodes = [] if not isinstance(from_nodes, list) or len(from_nodes) == 0: raise error.InvalidLogicalPlanException( "Invalid arguments: " "from_nodes must be valid list") def process_with_scope(_scope): if not all(isinstance(node, LogicalPlan.Node) for node in from_nodes) \ or not all(node.scope().is_cover_by(_scope) for node in from_nodes): raise error.InvalidLogicalPlanException( "Invalid arguments: wrong processed nodes") process_node = LogicalPlan.ProcessNode(from_nodes, _scope, self) if LogicalPlan.Node.if_infinite(from_nodes, _scope): process_node.set_infinite() self.__nodes.append(process_node) return process_node if scope is not None: return process_with_scope(scope) else: common_scope = LogicalPlan.Scope.common_scope(from_nodes) return process_with_scope(common_scope)
def union(self, scope=None, nodes=None): if nodes is None: nodes = [] if not isinstance(nodes, list) or len(nodes) == 0: raise error.InvalidLogicalPlanException( "Invalid arguments: " "nodes to union must be valid list") def union_with_scope(_scope): if not all(isinstance(node, LogicalPlan.Node) for node in nodes) \ or not all(node.scope().is_cover_by(_scope) for node in nodes): raise error.InvalidLogicalPlanException( "Invalid arguments: wrong nodes to union") union_node = LogicalPlan.UnionNode(nodes, _scope, self) if LogicalPlan.Node.if_infinite(nodes, _scope): union_node.set_infinite() self.__nodes.append(union_node) return union_node if scope is not None: return union_with_scope(scope) else: common_scope = LogicalPlan.Scope.common_scope(nodes) return union_with_scope(common_scope)
def __getstate__(self): """ let all the ptype can not be pickled, to make sure ptype will not be used in lambda. Please use use sideinput for PType """ raise error.InvalidLogicalPlanException("Please use sideinput for PType." +\ "More info here: http://bigflow.baidu.com/doc/guide.html#sideinputs")
def create_and_setup(self): if self.is_empty(): raise error.InvalidLogicalPlanException("Empty entity") instance = pickle.loads(self.__config) return instance
def __set_scope_type(self, scope_type): scope_message = self._scope._Scope__message if scope_message.type != logical_plan_pb2.PbScope.DEFAULT \ and scope_message.type != scope_type: raise error.InvalidLogicalPlanException("Invalid scope type") scope_message.type = scope_type
def set_specific_field(self, message): if self.__loader_entity is None: raise error.InvalidLogicalPlanException("Invalid loader") for uri in self.__uri_list: message.load_node.uri.append(uri) message.load_node.loader.CopyFrom( self.__loader_entity.to_proto_message())
def leave_scope(self): if self._scope.father() is None: raise error.InvalidLogicalPlanException( "Trying to leave global scope") union_node = self._plan.union(self._scope.father(), [self]) union_node.set_objector(self._objector_entity) return union_node
def from_proto_message(self, message): from bigflow.core import entity_names for key, value in entity_names.__dict__.items(): if isinstance(key, str) and isinstance( value, str) and value == message.name: self.__name = key if self.__name is None: raise error.InvalidLogicalPlanException( "Invalid name/type for entity.") self.__config = message.config
def set_specific_field(self, message): if self.__sinker_entity is None: raise error.InvalidLogicalPlanException("Invalid sinker") sink_message = logical_plan_pb2.PbSinkNode() sink_message.__setattr__("from", self.__from_node.id()) sink_message.sinker.CopyFrom( self.__sinker_entity.to_proto_message()) message.sink_node.CopyFrom(sink_message)
def by(self, loader_obj): loader_entity = entity.Entity.of(entity.Entity.loader, loader_obj) self.set_loader(loader_entity) scope_message = self._scope._Scope__message if scope_message.type != logical_plan_pb2.PbScope.INPUT: raise error.InvalidLogicalPlanException("Invalid message") scope_message.input_scope.spliter.CopyFrom( loader_entity.to_proto_message()) return self
def __init__(self, name="", operator=None, message=None): if message is None: if len(name) == 0: raise error.InvalidLogicalPlanException( "Invalid name for entity.") if operator is None: raise error.InvalidLogicalPlanException( "Invalid operator(None) for entity.") if isinstance(operator, EntitiedBySelf): self.__name = operator.get_entity_name() self.__config = operator.get_entity_config() elif isinstance(operator, str): self.__name = name self.__config = operator else: self.__name = name self.__config = cloudpickle.dumps(operator) else: self.from_proto_message(message)
def union_with_scope(_scope): if not all(isinstance(node, LogicalPlan.Node) for node in nodes) \ or not all(node.scope().is_cover_by(_scope) for node in nodes): raise error.InvalidLogicalPlanException( "Invalid arguments: wrong nodes to union") union_node = LogicalPlan.UnionNode(nodes, _scope, self) if LogicalPlan.Node.if_infinite(nodes, _scope): union_node.set_infinite() self.__nodes.append(union_node) return union_node
def process_with_scope(_scope): if not all(isinstance(node, LogicalPlan.Node) for node in from_nodes) \ or not all(node.scope().is_cover_by(_scope) for node in from_nodes): raise error.InvalidLogicalPlanException( "Invalid arguments: wrong processed nodes") process_node = LogicalPlan.ProcessNode(from_nodes, _scope, self) if LogicalPlan.Node.if_infinite(from_nodes, _scope): process_node.set_infinite() self.__nodes.append(process_node) return process_node
def __add_shuffle_node(self, source_node, target_scope): if target_scope.father() is not source_node.scope(): raise error.InvalidLogicalPlanException( "Source node should only belong to" " target scope's father") # Find ShuffleGroup for Scope shuffle_group = None for shuffle in self.__shuffles: if shuffle.scope() is target_scope: shuffle_group = shuffle break if shuffle_group is None: raise error.InvalidLogicalPlanException( "Unable to find corresponding " "Shuffle Group for target scope") shuffle_node = LogicalPlan.ShuffleNode(source_node, shuffle_group, self).broadcast() self.__nodes.append(shuffle_node) return shuffle_node
def __init__(self, from_nodes, scope, plan): LogicalPlan.Node.__init__( self, logical_plan_pb2.PbLogicalPlanNode.UNION_NODE, scope, plan) self._type_str = "UnionNode" self.__from_nodes = from_nodes self._size = sum(map(lambda x: x.size(), self.__from_nodes)) objector = None for from_node in from_nodes: assert from_node._serde is not None self._serde = from_node._serde if from_node.objector() is None: raise error.InvalidLogicalPlanException( "Error getting objector from inputs") if objector is None: objector = from_node.objector() elif objector != from_node.objector(): raise error.InvalidLogicalPlanException( "Union sources with different objectors" ", user must set objector manually") self.set_objector(objector)
def set_specific_field(self, message): if self.__processor_entity is None: raise error.InvalidLogicalPlanException("Invalid processor") pb_process_node = message.process_node pb_process_node.processor.CopyFrom( self.__processor_entity.to_proto_message()) pb_process_node.least_prepared_inputs = self.__least_prepared_inputs pb_process_node.is_ignore_group = self.__is_ignore_group pb_process_node.is_stateful = self.__is_stateful pb_process_node.effective_key_num = self.__effective_key_num for _input in self.__inputs: input_message = pb_process_node.input.add() input_message.CopyFrom(_input.to_proto_message())
def shuffle(self, scope, from_nodes): if not isinstance(from_nodes, list) or len(from_nodes) == 0: raise error.InvalidLogicalPlanException( "Invalid arguments: " "nodes to shuffle must be valid list") shuffle_scope = LogicalPlan.Scope(scope, self) self.__scopes.append(shuffle_scope) shuffle_group = LogicalPlan._ShuffleGroup(shuffle_scope) self.__shuffles.append(shuffle_group) for from_node in from_nodes: shuffle_node = LogicalPlan.ShuffleNode(from_node, shuffle_group, self) self.__nodes.append(shuffle_node) return shuffle_group
def to_proto_message(self): message = logical_plan_pb2.PbLogicalPlan() for node in self.__nodes: node_message = message.node.add() node_message.CopyFrom(node.to_proto_message()) for scope in self.__scopes: scope_message = message.scope.add() scope_message.CopyFrom(scope.to_proto_message()) if self._environment: message.environment.CopyFrom( entity.Entity.of("EntitiedBySelf", self._environment).to_proto_message()) if not message.IsInitialized(): raise error.InvalidLogicalPlanException( "Message is not initialized") return message
def set_specific_field(self, message): shuffle_node = message.shuffle_node shuffle_node.__setattr__("from", self.__from_node.id()) shuffle_node.type = self.__shuffle_type if self.__shuffle_type == logical_plan_pb2.PbShuffleNode.KEY: if self.__key_reader is None: raise error.InvalidLogicalPlanException( "Invalid key reader") shuffle_node.key_reader.CopyFrom( self.__key_reader.to_proto_message()) if self.__shuffle_type == logical_plan_pb2.PbShuffleNode.SEQUENCE \ and self.__partitioner is not None: shuffle_node.partitioner.CopyFrom( self.__partitioner.to_proto_message()) if self.__shuffle_type == logical_plan_pb2.PbShuffleNode.WINDOW \ and self.__time_reader is not None: shuffle_node.time_reader.CopyFrom( self.__time_reader.to_proto_message())
def broadcast_to(self, source_node, target_scope): common_scope = LogicalPlan.Scope.common_scope( [source_node.scope(), target_scope]) if target_scope is common_scope and target_scope is not source_node.scope( ): raise error.InvalidLogicalPlanException( "Up-forward broadcasting is forbidden") scopes = [] scope = target_scope while scope is not None and scope is not common_scope: scopes.append(scope) scope = scope.father() current_node = source_node for scope in reversed(scopes): current_node = self.__add_shuffle_node(current_node, scope) union_node = LogicalPlan.UnionNode([current_node], target_scope, self) self.__nodes.append(union_node) return union_node