def remove_job(self, job_dag): for executor in list(job_dag.executors): executor.detach_job() self.exec_commit.remove_job(job_dag) self.free_executors.remove_job(job_dag) self.moving_executors.remove_job(job_dag) self.job_dags.remove(job_dag) self.finished_job_dags.add(job_dag) self.action_map = compute_act_map(self.job_dags)
def remove_job(self, job_dag): for level in range(self.level_range): for executor in job_dag.executors[level]: executor.detach_node() # self.exec_commit.remove_job(job_dag) # self.free_executors.remove_job(job_dag) # self.moving_executors.remove_job(job_dag) self.job_dags.remove(job_dag) self.finished_job_dags.add(job_dag) self.action_map = compute_act_map(self.job_dags)
def reset(self, max_time=np.inf): self.max_time = max_time self.wall_time.reset() self.timeline.reset() self.exec_commit.reset() self.moving_executors.reset() self.reward_calculator.reset() self.finished_job_dags = OrderedSet() self.node_selected.clear() for executor in self.executors: executor.reset() self.free_executors.reset(self.executors) # generate a set of new jobs self.job_dags = generate_jobs(self.np_random, self.timeline, self.wall_time) # map action to dag_idx and node_idx self.action_map = compute_act_map(self.job_dags) # add initial set of jobs in the system for job_dag in self.job_dags: self.add_job(job_dag) # put all executors as source executors initially self.source_job = None self.num_source_exec = len(self.executors) self.exec_to_schedule = OrderedSet(self.executors)
def reset(self, max_time=np.inf): self.max_time = max_time self.wall_time.reset() self.timeline.reset() # self.exec_commit.reset() # self.moving_executors.reset() self.reward_calculator.reset() self.finished_job_dags = OrderedSet() self.node_selected.clear() self.base = 0 for level in range(self.level_range): self.executors[level].clear() self.usingExecutors[level].clear() t = self.curve[0][level] for exec_id in range(self.base, t + self.base): self.executors[level].add(Executor(exec_id, level)) self.base += t self.free_executors.reset(self.executors) # generate a set of new jobs self.job_dags = generate_jobs(self.np_random, self.timeline, self.wall_time) # map action to dag_idx and node_idx self.action_map = compute_act_map(self.job_dags) # add initial set of jobs in the system # for job_dag in self.job_dags: # self.add_job(job_dag) # # put all executors as source executors initially # self.exec_to_schedule = OrderedSet() # for executor in self.executors: # self.exec_to_schedule.add(executor) self.timeline.push(self.timeInterval, Wave(self.curve[0], self.curve[1])) self.nextTime = 2
def step(self, next_node, limit): #### TEST ################# if isinstance(next_node, list): l = 0 for n in next_node: assert n not in self.node_selected self.node_selected.add(n) # commit the source executor executor = next(iter(self.exec_to_schedule)) source = executor.job_dag if executor.node is None else executor.node # compute number of valid executors to assign if n is not None: use_exec = min(n.num_tasks - n.next_task_idx - \ self.exec_commit.node_commit[n] - \ self.moving_executors.count(n), limit[l]) else: use_exec = limit[l] assert use_exec > 0 self.exec_commit.add(source, n, use_exec) # deduct the executors that know the destination self.num_source_exec -= use_exec assert self.num_source_exec >= 0 l = l + 1 ###################################### else: # mark the node as selected assert next_node not in self.node_selected self.node_selected.add(next_node) # commit the source executor executor = next(iter(self.exec_to_schedule)) source = executor.job_dag if executor.node is None else executor.node # compute number of valid executors to assign if next_node is not None: use_exec = min(next_node.num_tasks - next_node.next_task_idx - \ self.exec_commit.node_commit[next_node] - \ self.moving_executors.count(next_node), limit) else: use_exec = limit assert use_exec > 0 self.exec_commit.add(source, next_node, use_exec) # deduct the executors that know the destination self.num_source_exec -= use_exec assert self.num_source_exec >= 0 if self.num_source_exec == 0: # now a new scheduling round, clean up node selection self.node_selected.clear() # all commitments are made, now schedule free executors self.schedule() # Now run to the next event in the virtual timeline while len(self.timeline) > 0 and self.num_source_exec == 0: # consult agent by putting executors in source_exec new_time, obj = self.timeline.pop() self.wall_time.update_time(new_time) # case task: a task completion event, and frees up an executor. # case query: a new job arrives # case executor: an executor arrives at certain job if isinstance(obj, Task): # task completion event finished_task = obj node = finished_task.node node.num_finished_tasks += 1 # bookkeepings for node completion frontier_changed = False if node.num_finished_tasks == node.num_tasks: assert not node.tasks_all_done # only complete once node.tasks_all_done = True node.job_dag.num_nodes_done += 1 node.node_finish_time = self.wall_time.curr_time frontier_changed = node.job_dag.update_frontier_nodes(node) # assign new destination for the job self.assign_executor(finished_task.executor, frontier_changed) # bookkeepings for job completion if node.job_dag.num_nodes_done == node.job_dag.num_nodes: assert not node.job_dag.completed # only complete once node.job_dag.completed = True node.job_dag.completion_time = self.wall_time.curr_time self.remove_job(node.job_dag) elif isinstance(obj, JobDAG): # new job arrival event job_dag = obj # job should be arrived at the first time assert not job_dag.arrived job_dag.arrived = True # inform agent about job arrival when stream is enabled self.job_dags.add(job_dag) self.add_job(job_dag) self.action_map = compute_act_map(self.job_dags) # assign free executors (if any) to the new job if len(self.free_executors[None]) > 0: self.exec_to_schedule = \ OrderedSet(self.free_executors[None]) self.source_job = None self.num_source_exec = \ len(self.free_executors[None]) elif isinstance(obj, Executor): # executor arrival event executor = obj # pop destination from the tracking record node = self.moving_executors.pop(executor) if node is not None: # the job is not yet done when executor arrives executor.job_dag = node.job_dag node.job_dag.executors.add(executor) if node is not None and not node.no_more_tasks: # the node is still schedulable if node in node.job_dag.frontier_nodes: # node is immediately runnable task = node.schedule(executor) self.timeline.push(task.finish_time, task) else: # free up the executor in this job self.free_executors.add(executor.job_dag, executor) else: # the node is saturated or the job is done # by the time the executor arrives, use # backup logic self.backup_schedule(executor) else: print("illegal event type") exit(1) # compute reward reward = self.reward_calculator.get_reward(self.job_dags, self.wall_time.curr_time) # no more decision to make, jobs all done or time is up done = (self.num_source_exec == 0) and \ ((len(self.timeline) == 0) or \ (self.wall_time.curr_time >= self.max_time)) if done: assert self.wall_time.curr_time >= self.max_time or \ len(self.job_dags) == 0 return self.observe(), reward, done