def __init__(self, node_dict): super(DAG, self).__init__(node_dict) self.subnodes = None # TODO make a function to look for parameter for parameter in self.node.parameters: if parameter.name == '_nodes': self.subnodes = parameter.value.value assert self.subnodes is not None, 'Could not find subnodes' self.node_id_to_node = { node._id: node for node in self.subnodes } # number of dependencies to ids self.dependency_index_to_node_ids = defaultdict(lambda: set()) self.node_id_to_dependents = defaultdict(lambda: set()) self.node_id_to_dependency_index = defaultdict(lambda: 0) self.uncompleted_nodes_count = 0 self._node_running_status = NodeRunningStatus.READY for node in self.subnodes: node_id = node._id if node_id == SpecialNodeId.INPUT: updated_resources_count = 0 for output in node.outputs: for input in self.node.inputs: if input.name == output.name: updated_resources_count += 1 output.values = input.values if updated_resources_count != len(self.node.inputs): raise Exception('Used {} inputs for {} outputs'.format(updated_resources_count, len(self.node.inputs))) # ignore nodes in finished statuses if NodeRunningStatus.is_finished(node.node_running_status) and node_id != SpecialNodeId.OUTPUT: continue dependency_index = 0 for node_input in node.inputs: for input_reference in node_input.input_references: dep_node_id = to_object_id(input_reference.node_id) self.node_id_to_dependents[dep_node_id].add(node_id) if not NodeRunningStatus.is_finished(self.node_id_to_node[dep_node_id].node_running_status): dependency_index += 1 if not NodeRunningStatus.is_finished(node.node_running_status): self.uncompleted_nodes_count += 1 self.dependency_index_to_node_ids[dependency_index].add(node_id) self.node_id_to_dependency_index[node_id] = dependency_index self.monitoring_node_ids = set() if self.uncompleted_nodes_count == 0: self._node_running_status = NodeRunningStatus.SUCCESS
def pop_jobs(self): """Get a set of nodes with satisfied dependencies""" res = [] logging.info("Pop jobs") for running_node_dict in node_collection_manager.get_db_nodes_by_ids( self.monitoring_node_ids): # check status if NodeRunningStatus.is_finished( running_node_dict['node_running_status']): node = Node.from_dict(running_node_dict) self.update_node(node) self.monitoring_node_ids.remove(node._id) if NodeRunningStatus.is_failed(self.node.node_running_status): logging.info("Job in DAG failed, pop_jobs returns []") return res cached_nodes = [] for node_id in self.dependency_index_to_node_ids[0]: """Get the node and init its inputs, i.e. filling its resource_ids""" orig_node = self.node_id_to_node[node_id] for node_input in orig_node.inputs: for input_reference in node_input.input_references: node_input.values.extend(self.node_id_to_node[to_object_id( input_reference.node_id)].get_output_by_name( input_reference.output_id).values) orig_node.node_running_status = NodeRunningStatus.IN_QUEUE node = orig_node.copy() if DAG._cacheable(node) and False: # !!! _cacheable is broken try: cache = self.node_cache_manager.get( node, self.graph.author) if cache: node.node_running_status = NodeRunningStatus.RESTORED node.outputs = cache.outputs node.logs = cache.logs node.cache_url = '{}/graphs/{}?nid={}'.format( self.WEB_CONFIG.endpoint.rstrip('/'), str(cache.graph_id), str(cache.node_id), ) cached_nodes.append(node) continue except Exception as err: logging.exception( "Unable to update cache: `{}`".format(err)) res.append(node) del self.dependency_index_to_node_ids[0] for node in cached_nodes: self.update_node(node) return res
def run(self): while not self.finished(): new_jobs = self.pop_jobs() if len(new_jobs) == 0: time.sleep(_GRAPH_ITERATION_SLEEP) continue for node in new_jobs: self._execute_node(node) is_succeeded = NodeRunningStatus.is_succeeded( self.node.node_running_status) if is_succeeded: for node in self.subnodes: if node._id != SpecialNodeId.OUTPUT: continue updated_resources_count = 0 for output in self.node.outputs: for input in node.inputs: if input.name == output.name: output.values = input.values updated_resources_count += 1 break if updated_resources_count != len(node.inputs): raise Exception('Used {} inputs for {} outputs'.format( updated_resources_count, len(node.inputs))) return JobReturnStatus.SUCCESS if is_succeeded else JobReturnStatus.FAILED
def _execute_node(self, node): if NodeRunningStatus.is_finished( node.node_running_status): # NodeRunningStatus.SPECIAL return node.save(collection=Collections.RUNS) self.monitoring_node_ids.add(node._id)
def _execute_node(self, node): if NodeRunningStatus.is_finished(node.node_running_status): # NodeRunningStatus.SPECIAL return node.author = self.node.author # Change it to the author that runs it node.save(collection=Collections.RUNS) self.monitoring_node_ids.add(node._id)
def _set_node_status(self, node_id, node_running_status): node = self.node_id_to_node[node_id] node.node_running_status = node_running_status logging.info("Node running status {} {}".format(node_running_status, node.title)) if node_running_status == NodeRunningStatus.FAILED: # TODO optional cancel based on parameter self.kill() self._node_running_status = NodeRunningStatus.FAILED_WAITING if node_running_status in {NodeRunningStatus.SUCCESS, NodeRunningStatus.RESTORED}: for dependent_node_id in self.node_id_to_dependents[node_id]: dependent_node = self.node_id_to_node[dependent_node_id] prev_dependency_index = self.node_id_to_dependency_index[dependent_node_id] removed_dependencies = 0 for node_input in dependent_node.inputs: for input_reference in node_input.input_references: if to_object_id(input_reference.node_id) == to_object_id(node_id): removed_dependencies += 1 dependency_index = prev_dependency_index - removed_dependencies self.dependency_index_to_node_ids[prev_dependency_index].remove(dependent_node_id) self.dependency_index_to_node_ids[dependency_index].add(dependent_node_id) self.node_id_to_dependency_index[dependent_node_id] = dependency_index self.uncompleted_nodes_count -= 1 if self.uncompleted_nodes_count == 0 and not NodeRunningStatus.is_failed(self._node_running_status): self._node_running_status = NodeRunningStatus.SUCCESS
def call_executor_tick(self): while not self._stop_event.is_set(): self._stop_event.wait(timeout=TickThread.TICK_TIMEOUT) if self.executor.is_updated(): # Save logs whe operation is running with self.executor._lock: if NodeRunningStatus.is_finished( self.executor.node.node_running_status): continue self.executor.node.save(collection=Collections.RUNS)
def __init__(self, graph, node_collection=None): if isinstance(graph, Graph): self.graph_id = graph._id self.graph = graph else: self.graph_id = graph self.graph = Graph.load(self.graph_id) self.node_id_to_node = { node._id: node for node in self.graph.nodes } # number of dependencies to ids self.dependency_index_to_node_ids = defaultdict(lambda: set()) self.node_id_to_dependents = defaultdict(lambda: set()) self.node_id_to_dependency_index = defaultdict(lambda: 0) self.uncompleted_nodes_count = 0 if node_collection: self.node_collection = node_collection else: self.node_collection = NodeCollection() for node in self.graph.nodes: # ignore nodes in finished statuses if NodeRunningStatus.is_finished(node.node_running_status): continue node_id = node._id dependency_index = 0 for node_input in node.inputs: for input_value in node_input.values: parent_node_id = to_object_id(input_value.node_id) self.node_id_to_dependents[parent_node_id].add(node_id) if not NodeRunningStatus.is_finished(self.node_id_to_node[parent_node_id].node_running_status): dependency_index += 1 if not NodeRunningStatus.is_finished(node.node_running_status): self.uncompleted_nodes_count += 1 self.dependency_index_to_node_ids[dependency_index].add(node_id) self.node_id_to_dependency_index[node_id] = dependency_index
def update_node(self, worker_id, node): """Return True Job is in the queue, else False.""" job_description = self.get_job_description(worker_id) if job_description: with self._graph_schedulers_lock: scheduler = self._graph_id_to_scheduler.get( job_description.graph_id, None) if scheduler: scheduler.update_node(node) if NodeRunningStatus.is_finished(node.node_running_status): self._del_job_description(worker_id) return True else: logging.info( "Scheduler was not found for worker `{}`".format(worker_id)) return False