Example #1
0
 def remove_job(self, job_dag):
     for executor in list(job_dag.executors):
         executor.detach_job()
     self.exec_commit.remove_job(job_dag)
     self.free_executors.remove_job(job_dag)
     self.moving_executors.remove_job(job_dag)
     self.job_dags.remove(job_dag)
     self.finished_job_dags.add(job_dag)
     self.action_map = compute_act_map(self.job_dags)
Example #2
0
 def remove_job(self, job_dag):
     for level in range(self.level_range):
         for executor in job_dag.executors[level]:
             executor.detach_node()
     # self.exec_commit.remove_job(job_dag)
     # self.free_executors.remove_job(job_dag)
     # self.moving_executors.remove_job(job_dag)
     self.job_dags.remove(job_dag)
     self.finished_job_dags.add(job_dag)
     self.action_map = compute_act_map(self.job_dags)
Example #3
0
 def reset(self, max_time=np.inf):
     self.max_time = max_time
     self.wall_time.reset()
     self.timeline.reset()
     self.exec_commit.reset()
     self.moving_executors.reset()
     self.reward_calculator.reset()
     self.finished_job_dags = OrderedSet()
     self.node_selected.clear()
     for executor in self.executors:
         executor.reset()
     self.free_executors.reset(self.executors)
     # generate a set of new jobs
     self.job_dags = generate_jobs(self.np_random, self.timeline,
                                   self.wall_time)
     # map action to dag_idx and node_idx
     self.action_map = compute_act_map(self.job_dags)
     # add initial set of jobs in the system
     for job_dag in self.job_dags:
         self.add_job(job_dag)
     # put all executors as source executors initially
     self.source_job = None
     self.num_source_exec = len(self.executors)
     self.exec_to_schedule = OrderedSet(self.executors)
Example #4
0
    def reset(self, max_time=np.inf):
        self.max_time = max_time
        self.wall_time.reset()
        self.timeline.reset()
        # self.exec_commit.reset()
        # self.moving_executors.reset()
        self.reward_calculator.reset()
        self.finished_job_dags = OrderedSet()
        self.node_selected.clear()
        self.base = 0
        for level in range(self.level_range):
            self.executors[level].clear()
            self.usingExecutors[level].clear()
            t = self.curve[0][level]
            for exec_id in range(self.base, t + self.base):
                self.executors[level].add(Executor(exec_id, level))
            self.base += t

        self.free_executors.reset(self.executors)
        # generate a set of new jobs

        self.job_dags = generate_jobs(self.np_random, self.timeline,
                                      self.wall_time)

        # map action to dag_idx and node_idx
        self.action_map = compute_act_map(self.job_dags)
        # add initial set of jobs in the system
        # for job_dag in self.job_dags:
        #     self.add_job(job_dag)
        # # put all executors as source executors initially
        # self.exec_to_schedule = OrderedSet()
        # for executor in self.executors:
        #     self.exec_to_schedule.add(executor)
        self.timeline.push(self.timeInterval, Wave(self.curve[0],
                                                   self.curve[1]))
        self.nextTime = 2
Example #5
0
    def step(self, next_node, limit):
        #### TEST #################
        if isinstance(next_node, list):
            l = 0
            for n in next_node:
                assert n not in self.node_selected
                self.node_selected.add(n)
                # commit the source executor
                executor = next(iter(self.exec_to_schedule))
                source = executor.job_dag if executor.node is None else executor.node

                # compute number of valid executors to assign
                if n is not None:
                    use_exec = min(n.num_tasks - n.next_task_idx - \
                                    self.exec_commit.node_commit[n] - \
                                    self.moving_executors.count(n), limit[l])

                else:
                    use_exec = limit[l]
                assert use_exec > 0

                self.exec_commit.add(source, n, use_exec)
                # deduct the executors that know the destination
                self.num_source_exec -= use_exec
                assert self.num_source_exec >= 0
                l = l + 1
        ######################################
        else:
            # mark the node as selected
            assert next_node not in self.node_selected
            self.node_selected.add(next_node)
            # commit the source executor
            executor = next(iter(self.exec_to_schedule))
            source = executor.job_dag if executor.node is None else executor.node

            # compute number of valid executors to assign
            if next_node is not None:
                use_exec = min(next_node.num_tasks - next_node.next_task_idx - \
                               self.exec_commit.node_commit[next_node] - \
                               self.moving_executors.count(next_node), limit)
            else:
                use_exec = limit
            assert use_exec > 0

            self.exec_commit.add(source, next_node, use_exec)
            # deduct the executors that know the destination
            self.num_source_exec -= use_exec
            assert self.num_source_exec >= 0

        if self.num_source_exec == 0:
            # now a new scheduling round, clean up node selection
            self.node_selected.clear()
            # all commitments are made, now schedule free executors
            self.schedule()

        # Now run to the next event in the virtual timeline
        while len(self.timeline) > 0 and self.num_source_exec == 0:
            # consult agent by putting executors in source_exec

            new_time, obj = self.timeline.pop()
            self.wall_time.update_time(new_time)

            # case task: a task completion event, and frees up an executor.
            # case query: a new job arrives
            # case executor: an executor arrives at certain job

            if isinstance(obj, Task):  # task completion event
                finished_task = obj
                node = finished_task.node
                node.num_finished_tasks += 1

                # bookkeepings for node completion
                frontier_changed = False
                if node.num_finished_tasks == node.num_tasks:
                    assert not node.tasks_all_done  # only complete once
                    node.tasks_all_done = True
                    node.job_dag.num_nodes_done += 1
                    node.node_finish_time = self.wall_time.curr_time

                    frontier_changed = node.job_dag.update_frontier_nodes(node)

                # assign new destination for the job
                self.assign_executor(finished_task.executor, frontier_changed)

                # bookkeepings for job completion
                if node.job_dag.num_nodes_done == node.job_dag.num_nodes:
                    assert not node.job_dag.completed  # only complete once
                    node.job_dag.completed = True
                    node.job_dag.completion_time = self.wall_time.curr_time
                    self.remove_job(node.job_dag)

            elif isinstance(obj, JobDAG):  # new job arrival event
                job_dag = obj
                # job should be arrived at the first time
                assert not job_dag.arrived
                job_dag.arrived = True
                # inform agent about job arrival when stream is enabled
                self.job_dags.add(job_dag)
                self.add_job(job_dag)
                self.action_map = compute_act_map(self.job_dags)
                # assign free executors (if any) to the new job
                if len(self.free_executors[None]) > 0:
                    self.exec_to_schedule = \
                        OrderedSet(self.free_executors[None])
                    self.source_job = None
                    self.num_source_exec = \
                        len(self.free_executors[None])

            elif isinstance(obj, Executor):  # executor arrival event
                executor = obj
                # pop destination from the tracking record
                node = self.moving_executors.pop(executor)

                if node is not None:
                    # the job is not yet done when executor arrives
                    executor.job_dag = node.job_dag
                    node.job_dag.executors.add(executor)

                if node is not None and not node.no_more_tasks:
                    # the node is still schedulable
                    if node in node.job_dag.frontier_nodes:
                        # node is immediately runnable
                        task = node.schedule(executor)
                        self.timeline.push(task.finish_time, task)
                    else:
                        # free up the executor in this job
                        self.free_executors.add(executor.job_dag, executor)
                else:
                    # the node is saturated or the job is done
                    # by the time the executor arrives, use
                    # backup logic
                    self.backup_schedule(executor)

            else:
                print("illegal event type")
                exit(1)

        # compute reward
        reward = self.reward_calculator.get_reward(self.job_dags,
                                                   self.wall_time.curr_time)

        # no more decision to make, jobs all done or time is up
        done = (self.num_source_exec == 0) and \
               ((len(self.timeline) == 0) or \
               (self.wall_time.curr_time >= self.max_time))

        if done:
            assert self.wall_time.curr_time >= self.max_time or \
                   len(self.job_dags) == 0

        return self.observe(), reward, done