class nonloc: completed_chunks = AtomicCounter(0) merge_chunks = [False] * num_chunks error = None
def run(self, taskClass): if not self.project_paths: return # Shared variables across threads class nonloc: error = None local_processing = False max_remote_tasks = None calculate_task_limit_lock = threading.Lock() finished_tasks = AtomicCounter(0) remote_running_tasks = AtomicCounter(0) # Create queue q = queue.Queue() for pp in self.project_paths: log.ODM_INFO("LRE: Adding to queue %s" % pp) q.put(taskClass(pp, self.node, self.params)) def remove_task_safe(task): try: removed = task.remove() except exceptions.OdmError: removed = False return removed def cleanup_remote_tasks(): if self.params['tasks']: log.ODM_WARNING("LRE: Attempting to cleanup remote tasks") else: log.ODM_INFO("LRE: No remote tasks left to cleanup") for task in self.params['tasks']: log.ODM_INFO( "LRE: Removing remote task %s... %s" % (task.uuid, 'OK' if remove_task_safe(task) else 'NO')) def handle_result(task, local, error=None, partial=False): def cleanup_remote(): if not partial and task.remote_task: log.ODM_INFO( "LRE: Cleaning up remote task (%s)... %s" % (task.remote_task.uuid, 'OK' if remove_task_safe(task.remote_task) else 'NO')) self.params['tasks'].remove(task.remote_task) task.remote_task = None if error: log.ODM_WARNING("LRE: %s failed with: %s" % (task, str(error))) # Special case in which the error is caused by a SIGTERM signal # this means a local processing was terminated either by CTRL+C or # by canceling the task. if str(error) == "Child was terminated by signal 15": system.exit_gracefully() task_limit_reached = isinstance(error, NodeTaskLimitReachedException) if task_limit_reached: # Estimate the maximum number of tasks based on how many tasks # are currently running with calculate_task_limit_lock: if nonloc.max_remote_tasks is None: node_task_limit = 0 for t in self.params['tasks']: try: info = t.info(with_output=-3) if info.status == TaskStatus.RUNNING and info.processing_time >= 0 and len( info.output) >= 3: node_task_limit += 1 except exceptions.OdmError: pass nonloc.max_remote_tasks = max(1, node_task_limit) log.ODM_INFO( "LRE: Node task limit reached. Setting max remote tasks to %s" % node_task_limit) # Retry, but only if the error is not related to a task failure if task.retries < task.max_retries and not isinstance( error, exceptions.TaskFailedError): # Put task back in queue # Don't increment the retry counter if this task simply reached the task # limit count. if not task_limit_reached: task.retries += 1 task.wait_until = datetime.datetime.now( ) + datetime.timedelta(seconds=task.retries * task.retry_timeout) cleanup_remote() q.task_done() log.ODM_INFO("LRE: Re-queueing %s (retries: %s)" % (task, task.retries)) q.put(task) if not local: remote_running_tasks.increment(-1) return else: nonloc.error = error finished_tasks.increment() if not local: remote_running_tasks.increment(-1) else: if not partial: log.ODM_INFO("LRE: %s finished successfully" % task) finished_tasks.increment() if not local: remote_running_tasks.increment(-1) cleanup_remote() if not partial: q.task_done() def local_worker(): while True: # Block until a new queue item is available task = q.get() if task is None or nonloc.error is not None: q.task_done() break # Process local try: nonloc.local_processing = True task.process(True, handle_result) except Exception as e: handle_result(task, True, e) finally: nonloc.local_processing = False def remote_worker(): while True: # Block until a new queue item is available task = q.get() if task is None or nonloc.error is not None: q.task_done() break # Yield to local processing if not nonloc.local_processing: log.ODM_INFO( "LRE: Yielding to local processing, sending %s back to the queue" % task) q.put(task) q.task_done() time.sleep(0.05) continue # If we've found an estimate of the limit on the maximum number of tasks # a node can process, we block until some tasks have completed if nonloc.max_remote_tasks is not None and remote_running_tasks.value >= nonloc.max_remote_tasks: q.put(task) q.task_done() time.sleep(2) continue # Process remote try: remote_running_tasks.increment() task.process(False, handle_result) except Exception as e: handle_result(task, False, e) # Create queue thread local_thread = threading.Thread(target=local_worker) if self.node_online: remote_thread = threading.Thread(target=remote_worker) system.add_cleanup_callback(cleanup_remote_tasks) # Start workers local_thread.start() if self.node_online: remote_thread.start() # block until all tasks are done (or CTRL+C) try: while finished_tasks.value < len( self.project_paths) and nonloc.error is None: time.sleep(0.5) except KeyboardInterrupt: log.ODM_WARNING("LRE: CTRL+C") system.exit_gracefully() # stop workers q.put(None) if self.node_online: q.put(None) # Wait for queue thread local_thread.join() if self.node_online: remote_thread.join() # Wait for all remains threads for thrds in self.params['threads']: thrds.join() system.remove_cleanup_callback(cleanup_remote_tasks) cleanup_remote_tasks() if nonloc.error is not None: # Try not to leak access token if isinstance(nonloc.error, exceptions.NodeConnectionError): raise exceptions.NodeConnectionError( "A connection error happened. Check the connection to the processing node and try again." ) else: raise nonloc.error
class nonloc: uploaded_files = AtomicCounter(0) error = None