def generate_job_profile(self, user_id): self.job_list.append(list()) task_id = 0 job_submit_time = dict() job_priority = dict() job_service_type = dict() job_curveString = dict() job_monopolize_time = dict() job_weight = dict() job_accelerate_factor = dict() print("enter generate_job_profile") stageIdToParallelism = dict() for c_job_id in self.job_profile: # temporary setting # XX job_profile job_submit_time[int( c_job_id)] = self.job_profile[c_job_id]["Submit Time"] job_priority[int( c_job_id)] = self.job_profile[c_job_id]["Priority"] job_service_type[int( c_job_id)] = self.job_profile[c_job_id]["Service Type"] job_curveString[int( c_job_id)] = self.job_profile[c_job_id]["curve"] job_monopolize_time[int( c_job_id)] = self.job_profile[c_job_id]["Monopolize Time"] job_weight[int(c_job_id)] = self.job_profile[c_job_id]["Weight"] job_accelerate_factor[int( c_job_id)] = self.job_profile[c_job_id]["Accelerate Factor"] for stage_id in self.stage_profile: # XX stage_profile timeout_type = 0 job_id = self.stage_profile[stage_id]["Job ID"] self.job_durations[job_id] = 0 Job_id = 'user_%s_job_%s' % (user_id, job_id) Stage_id = 'user_%s_stage_%s' % (user_id, stage_id) task_number = self.stage_profile[stage_id]["Task Number"] # change parallelism stageIdToParallelism[Stage_id] = task_number Parent_ids = list() if "Parents" in self.stage_profile[stage_id]: parent_ids = self.stage_profile[stage_id]["Parents"] for parent_id in parent_ids: Parent_ids.append('user_%s_stage_%s' % (user_id, parent_id)) if stageIdToParallelism[Parent_ids[-1]] >= task_number: timeout_type = 1 # generate taskset of the stage taskset = list() max_time = 0 for i in range(0, task_number): runtime = self.search_runtime(stage_id, i) if job_service_type[job_id] != 0: # XX job_service_type 0 或者 1 runtime *= 1 else: runtime *= 1 if runtime > max_time: max_time = runtime Task_id = 'user_%s_task_%s' % (user_id, task_id) time_out = 0 if timeout_type == 0: task = Task(Job_id, Stage_id, Task_id, i, runtime, time_out, job_priority[job_id]) else: # task = Task(Job_id, Stage_id, Task_id, i, runtime, 3000, job_priority[job_id]) task = Task(Job_id, Stage_id, Task_id, i, runtime, time_out, job_priority[job_id]) task_id += 1 task.user_id = user_id taskset.append(task) stage = Stage(Job_id, Stage_id, Parent_ids, taskset) stage.monopolize_time = max_time for id in Parent_ids: self.scheduler.stageIdToStage[ id].downstream_parallelism += len(taskset) self.scheduler.stageIdToStage[Stage_id] = stage for task in taskset: task.stage = stage stage.user_id = user_id if not self.search_job_by_id(Job_id, user_id): job = Job(Job_id) job.index = int(job_id) job.user_id = user_id job.stages.append(stage) job.submit_time = job_submit_time[job_id] job.priority = job_priority[job_id] job.service_type = job_service_type[job_id] job.weight = job_weight[job_id] job.accelerate_factor = job_accelerate_factor[job_id] if job.service_type == self.cluster.foreground_type: self.cluster.jobIdToReservedNumber[job.id] = 0 self.cluster.jobIdToReservedMachineId[job.id] = set() job.set_curve(job_curveString[job_id]) job.monopolize_time = job_monopolize_time[job_id] self.job_list[user_id].append(job) stage.priority = job.priority stage.job = job else: # this job already exits job = self.search_job_by_id(Job_id, user_id) job.stages.append(stage) stage.priority = job.priority stage.job = job # Set the not_completed_stage_ids for all the jobs for job in self.job_list[user_id]: job.not_completed_stage_ids = [stage.id for stage in job.stages] for tstage in job.stages: job.stagesDict[tstage.id] = tstage job.submitted_stage_ids = list() job.completed_stage_ids = list() # this part shall be changed, sort by the submission time of a job self.job_list[user_id] = sorted( self.job_list[user_id], key=lambda job: job.index) # sort job_list by job_index print("finish generate job profile") print("0: tasknumber:", len(self.job_list[0][0].stages[0].taskset))