Beispiel #1
0
    def generate_job_profile(self, user_id):
        self.job_list.append(list())
        task_id = 0
        job_submit_time = dict()
        job_priority = dict()
        job_service_type = dict()
        job_curveString = dict()
        job_monopolize_time = dict()
        job_weight = dict()
        job_accelerate_factor = dict()
        print("enter generate_job_profile")

        stageIdToParallelism = dict()
        for c_job_id in self.job_profile:
            # temporary setting
            # XX job_profile
            job_submit_time[int(
                c_job_id)] = self.job_profile[c_job_id]["Submit Time"]
            job_priority[int(
                c_job_id)] = self.job_profile[c_job_id]["Priority"]
            job_service_type[int(
                c_job_id)] = self.job_profile[c_job_id]["Service Type"]
            job_curveString[int(
                c_job_id)] = self.job_profile[c_job_id]["curve"]
            job_monopolize_time[int(
                c_job_id)] = self.job_profile[c_job_id]["Monopolize Time"]
            job_weight[int(c_job_id)] = self.job_profile[c_job_id]["Weight"]
            job_accelerate_factor[int(
                c_job_id)] = self.job_profile[c_job_id]["Accelerate Factor"]

        for stage_id in self.stage_profile:
            # XX stage_profile
            timeout_type = 0
            job_id = self.stage_profile[stage_id]["Job ID"]
            self.job_durations[job_id] = 0
            Job_id = 'user_%s_job_%s' % (user_id, job_id)
            Stage_id = 'user_%s_stage_%s' % (user_id, stage_id)
            task_number = self.stage_profile[stage_id]["Task Number"]
            # change parallelism

            stageIdToParallelism[Stage_id] = task_number

            Parent_ids = list()
            if "Parents" in self.stage_profile[stage_id]:
                parent_ids = self.stage_profile[stage_id]["Parents"]
                for parent_id in parent_ids:
                    Parent_ids.append('user_%s_stage_%s' %
                                      (user_id, parent_id))
                    if stageIdToParallelism[Parent_ids[-1]] >= task_number:
                        timeout_type = 1

            # generate taskset of the stage
            taskset = list()
            max_time = 0
            for i in range(0, task_number):
                runtime = self.search_runtime(stage_id, i)
                if job_service_type[job_id] != 0:  # XX job_service_type 0 或者 1
                    runtime *= 1
                else:
                    runtime *= 1
                if runtime > max_time:
                    max_time = runtime
                Task_id = 'user_%s_task_%s' % (user_id, task_id)
                time_out = 0
                if timeout_type == 0:
                    task = Task(Job_id, Stage_id, Task_id, i, runtime,
                                time_out, job_priority[job_id])
                else:
                    # task = Task(Job_id, Stage_id, Task_id, i, runtime, 3000, job_priority[job_id])
                    task = Task(Job_id, Stage_id, Task_id, i, runtime,
                                time_out, job_priority[job_id])
                task_id += 1
                task.user_id = user_id
                taskset.append(task)
            stage = Stage(Job_id, Stage_id, Parent_ids, taskset)
            stage.monopolize_time = max_time

            for id in Parent_ids:
                self.scheduler.stageIdToStage[
                    id].downstream_parallelism += len(taskset)

            self.scheduler.stageIdToStage[Stage_id] = stage
            for task in taskset:
                task.stage = stage
            stage.user_id = user_id

            if not self.search_job_by_id(Job_id, user_id):
                job = Job(Job_id)
                job.index = int(job_id)
                job.user_id = user_id
                job.stages.append(stage)
                job.submit_time = job_submit_time[job_id]
                job.priority = job_priority[job_id]
                job.service_type = job_service_type[job_id]
                job.weight = job_weight[job_id]
                job.accelerate_factor = job_accelerate_factor[job_id]
                if job.service_type == self.cluster.foreground_type:
                    self.cluster.jobIdToReservedNumber[job.id] = 0
                    self.cluster.jobIdToReservedMachineId[job.id] = set()
                job.set_curve(job_curveString[job_id])
                job.monopolize_time = job_monopolize_time[job_id]
                self.job_list[user_id].append(job)
                stage.priority = job.priority
                stage.job = job
            else:  # this job already exits
                job = self.search_job_by_id(Job_id, user_id)
                job.stages.append(stage)
                stage.priority = job.priority
                stage.job = job

        # Set the not_completed_stage_ids for all the jobs
        for job in self.job_list[user_id]:
            job.not_completed_stage_ids = [stage.id for stage in job.stages]
            for tstage in job.stages:
                job.stagesDict[tstage.id] = tstage
            job.submitted_stage_ids = list()
            job.completed_stage_ids = list()

        # this part shall be changed, sort by the submission time of a job
        self.job_list[user_id] = sorted(
            self.job_list[user_id],
            key=lambda job: job.index)  # sort job_list by job_index
        print("finish generate job profile")
        print("0: tasknumber:", len(self.job_list[0][0].stages[0].taskset))