Esempio n. 1
0
    def manage_tasks(self, tasks):
        """Receives completed tasks from the clients and updates the
        tasks file.

        Args:
            tasks (dict): Tasks.

        """
        t0 = Time.time()
        tasks_pth = path(self.params().get("tasks_pth", ""))
        n_tasks = len(tasks)
        n_done0 = n_tasks - self.taskq().qsize()
        n_done = n_done0
        util.info("[Server] Tasks queued. {}/{} ({:.1f}%%) complete.".format(
            n_done, n_tasks, n_done / n_tasks * 100))
        save_time = Time.time()
        # Set up a thread that joins self.taskq and only returns once
        # all the tasks have completed. The while loops continues as
        # long as taskq_thread is alive.
        taskq_thread = Thread(name="taskq", target=self._taskq_worker,
                              args=(self.taskq,))
        taskq_thread.start()
        while taskq_thread.is_alive():
            # Wait for a done task to arrive.
            task = self.doneq().get()
            # Update the master tasks dict.
            task_name = task["task_name"]
            tasks[task_name].update(task)
            if tasks_pth and save_time.delta() > 10.:
                # Save task to disk.
                update_tasks_file(tasks_pth, tasks, overwrite=True)
                save_time = Time.time()
            # Report progress.
            n_done += 1
            percent = float(n_done) / n_tasks * 100.
            dt = t0.delta()
            time_per_task = dt / float(n_done - n_done0)
            n_left = n_tasks - n_done
            t_left = Time(time_per_task * n_left)
            util.info("[Server] Task `{}` complete:\n\t\t {}/{} ({:.2f}%) {} "
                      "\n\t\t Time left: {}.".format(
                          task_name, n_done, n_tasks, percent, dt, t_left))
        # Save tasks to disk one last time.
        update_tasks_file(tasks_pth, tasks, overwrite=True)
        # Wait for the clients to d/c.
        clients = []
        while not self.activeq().empty():
            clients.append(self.activeq().get())
        util.debug("[Server] Waiting for clients to disconnect:\n\t{}.".format(
            "\n\t".join(clients)))
        self.activeq().join()
        time.sleep(0.5)
        self.activeq().close()
        self.taskq().close()
        self.doneq().close()
        util.info("[Server] Tasks completed.")
Esempio n. 2
0
 def _worker_process(self, finish, job, taskq, doneq, max_run_retries=3,
                     max_send_retries=10):
     """Run a set of jobs."""
     job.setup()
     run_retries = 0
     exitcode = 0
     # Loop over tasks.
     while not taskq.empty():
         # Pop task off queue.
         task = taskq.get()
         success = False
         T0 = Time.time()
         try:
             progress, tmp_fid = job.run(task)
         except EOFError as err:
             # Report what went wrong and retry.
             util.debug("[Client] {}".format(err.msg))
             msg = "[Client] {}/{} failed run retries, {{}}".format(
                 run_retries, max_run_retries)
             if run_retries < max_run_retries:
                 run_retries += 1
                 util.debug(msg.format("retrying..."))
             else:
                 util.debug(msg.format("exiting."))
                 raise err
         else:
             # Send results.
             sent = send_result(tmp_fid, task, retries=max_send_retries)
             if sent or not self.save:
                 # Mark simulation as complete.
                 task["complete"] = True
                 # Task is done.
                 doneq.put(task)
                 success = True
                 # Report progress.
                 progress.task = (taskq.qsize(), T0.delta())
                 progress.report()
         finally:
             if not success:
                 # Task did not complete successfully: put it back in taskq.
                 taskq.put(task)
             taskq.task_done()
             exitcode = 0
             if finish.is_set():
                 exitcode = 100
                 break
     job.teardown()
     util.info("[Client] Process complete: {}.".format(ProcLabel()))
     sys.exit(exitcode)