Ejemplo n.º 1
0
        def handle_result(task, local, error=None, partial=False):
            def cleanup_remote():
                if not partial and task.remote_task:
                    log.ODM_INFO(
                        "LRE: Cleaning up remote task (%s)... %s" %
                        (task.remote_task.uuid,
                         'OK' if remove_task_safe(task.remote_task) else 'NO'))
                    self.params['tasks'].remove(task.remote_task)
                    task.remote_task = None

            if error:
                log.ODM_WARNING("LRE: %s failed with: %s" % (task, str(error)))

                # Special case in which the error is caused by a SIGTERM signal
                # this means a local processing was terminated either by CTRL+C or
                # by canceling the task.
                if str(error) == "Child was terminated by signal 15":
                    system.exit_gracefully()

                task_limit_reached = isinstance(error,
                                                NodeTaskLimitReachedException)
                if task_limit_reached:
                    # Estimate the maximum number of tasks based on how many tasks
                    # are currently running
                    with calculate_task_limit_lock:
                        if nonloc.max_remote_tasks is None:
                            node_task_limit = 0
                            for t in self.params['tasks']:
                                try:
                                    info = t.info(with_output=-3)
                                    if info.status == TaskStatus.RUNNING and info.processing_time >= 0 and len(
                                            info.output) >= 3:
                                        node_task_limit += 1
                                except exceptions.OdmError:
                                    pass

                            nonloc.max_remote_tasks = max(1, node_task_limit)
                            log.ODM_INFO(
                                "LRE: Node task limit reached. Setting max remote tasks to %s"
                                % node_task_limit)

                # Retry, but only if the error is not related to a task failure
                if task.retries < task.max_retries and not isinstance(
                        error, exceptions.TaskFailedError):
                    # Put task back in queue
                    # Don't increment the retry counter if this task simply reached the task
                    # limit count.
                    if not task_limit_reached:
                        task.retries += 1
                    task.wait_until = datetime.datetime.now(
                    ) + datetime.timedelta(seconds=task.retries *
                                           task.retry_timeout)
                    cleanup_remote()
                    q.task_done()

                    log.ODM_INFO("LRE: Re-queueing %s (retries: %s)" %
                                 (task, task.retries))
                    q.put(task)
                    if not local: remote_running_tasks.increment(-1)
                    return
                else:
                    nonloc.error = error
                    finished_tasks.increment()
                    if not local: remote_running_tasks.increment(-1)
            else:
                if not partial:
                    log.ODM_INFO("LRE: %s finished successfully" % task)
                    finished_tasks.increment()
                    if not local: remote_running_tasks.increment(-1)

            cleanup_remote()
            if not partial: q.task_done()
Ejemplo n.º 2
0
    def run(self, taskClass):
        if not self.project_paths:
            return

        # Shared variables across threads
        class nonloc:
            error = None
            local_processing = False
            max_remote_tasks = None

        calculate_task_limit_lock = threading.Lock()
        finished_tasks = AtomicCounter(0)
        remote_running_tasks = AtomicCounter(0)

        # Create queue
        q = queue.Queue()
        for pp in self.project_paths:
            log.ODM_INFO("LRE: Adding to queue %s" % pp)
            q.put(taskClass(pp, self.node, self.params))

        def remove_task_safe(task):
            try:
                removed = task.remove()
            except exceptions.OdmError:
                removed = False
            return removed

        def cleanup_remote_tasks():
            if self.params['tasks']:
                log.ODM_WARNING("LRE: Attempting to cleanup remote tasks")
            else:
                log.ODM_INFO("LRE: No remote tasks left to cleanup")

            for task in self.params['tasks']:
                log.ODM_INFO(
                    "LRE: Removing remote task %s... %s" %
                    (task.uuid, 'OK' if remove_task_safe(task) else 'NO'))

        def handle_result(task, local, error=None, partial=False):
            def cleanup_remote():
                if not partial and task.remote_task:
                    log.ODM_INFO(
                        "LRE: Cleaning up remote task (%s)... %s" %
                        (task.remote_task.uuid,
                         'OK' if remove_task_safe(task.remote_task) else 'NO'))
                    self.params['tasks'].remove(task.remote_task)
                    task.remote_task = None

            if error:
                log.ODM_WARNING("LRE: %s failed with: %s" % (task, str(error)))

                # Special case in which the error is caused by a SIGTERM signal
                # this means a local processing was terminated either by CTRL+C or
                # by canceling the task.
                if str(error) == "Child was terminated by signal 15":
                    system.exit_gracefully()

                task_limit_reached = isinstance(error,
                                                NodeTaskLimitReachedException)
                if task_limit_reached:
                    # Estimate the maximum number of tasks based on how many tasks
                    # are currently running
                    with calculate_task_limit_lock:
                        if nonloc.max_remote_tasks is None:
                            node_task_limit = 0
                            for t in self.params['tasks']:
                                try:
                                    info = t.info(with_output=-3)
                                    if info.status == TaskStatus.RUNNING and info.processing_time >= 0 and len(
                                            info.output) >= 3:
                                        node_task_limit += 1
                                except exceptions.OdmError:
                                    pass

                            nonloc.max_remote_tasks = max(1, node_task_limit)
                            log.ODM_INFO(
                                "LRE: Node task limit reached. Setting max remote tasks to %s"
                                % node_task_limit)

                # Retry, but only if the error is not related to a task failure
                if task.retries < task.max_retries and not isinstance(
                        error, exceptions.TaskFailedError):
                    # Put task back in queue
                    # Don't increment the retry counter if this task simply reached the task
                    # limit count.
                    if not task_limit_reached:
                        task.retries += 1
                    task.wait_until = datetime.datetime.now(
                    ) + datetime.timedelta(seconds=task.retries *
                                           task.retry_timeout)
                    cleanup_remote()
                    q.task_done()

                    log.ODM_INFO("LRE: Re-queueing %s (retries: %s)" %
                                 (task, task.retries))
                    q.put(task)
                    if not local: remote_running_tasks.increment(-1)
                    return
                else:
                    nonloc.error = error
                    finished_tasks.increment()
                    if not local: remote_running_tasks.increment(-1)
            else:
                if not partial:
                    log.ODM_INFO("LRE: %s finished successfully" % task)
                    finished_tasks.increment()
                    if not local: remote_running_tasks.increment(-1)

            cleanup_remote()
            if not partial: q.task_done()

        def local_worker():
            while True:
                # Block until a new queue item is available
                task = q.get()

                if task is None or nonloc.error is not None:
                    q.task_done()
                    break

                # Process local
                try:
                    nonloc.local_processing = True
                    task.process(True, handle_result)
                except Exception as e:
                    handle_result(task, True, e)
                finally:
                    nonloc.local_processing = False

        def remote_worker():
            while True:
                # Block until a new queue item is available
                task = q.get()

                if task is None or nonloc.error is not None:
                    q.task_done()
                    break

                # Yield to local processing
                if not nonloc.local_processing:
                    log.ODM_INFO(
                        "LRE: Yielding to local processing, sending %s back to the queue"
                        % task)
                    q.put(task)
                    q.task_done()
                    time.sleep(0.05)
                    continue

                # If we've found an estimate of the limit on the maximum number of tasks
                # a node can process, we block until some tasks have completed
                if nonloc.max_remote_tasks is not None and remote_running_tasks.value >= nonloc.max_remote_tasks:
                    q.put(task)
                    q.task_done()
                    time.sleep(2)
                    continue

                # Process remote
                try:
                    remote_running_tasks.increment()
                    task.process(False, handle_result)
                except Exception as e:
                    handle_result(task, False, e)

        # Create queue thread
        local_thread = threading.Thread(target=local_worker)
        if self.node_online:
            remote_thread = threading.Thread(target=remote_worker)

        system.add_cleanup_callback(cleanup_remote_tasks)

        # Start workers
        local_thread.start()
        if self.node_online:
            remote_thread.start()

        # block until all tasks are done (or CTRL+C)
        try:
            while finished_tasks.value < len(
                    self.project_paths) and nonloc.error is None:
                time.sleep(0.5)
        except KeyboardInterrupt:
            log.ODM_WARNING("LRE: CTRL+C")
            system.exit_gracefully()

        # stop workers
        q.put(None)
        if self.node_online:
            q.put(None)

        # Wait for queue thread
        local_thread.join()
        if self.node_online:
            remote_thread.join()

        # Wait for all remains threads
        for thrds in self.params['threads']:
            thrds.join()

        system.remove_cleanup_callback(cleanup_remote_tasks)
        cleanup_remote_tasks()

        if nonloc.error is not None:
            # Try not to leak access token
            if isinstance(nonloc.error, exceptions.NodeConnectionError):
                raise exceptions.NodeConnectionError(
                    "A connection error happened. Check the connection to the processing node and try again."
                )
            else:
                raise nonloc.error