Beispiel #1
0
class Simulator:
    def __init__(self, cluster, json_dir, user_number):
        self.cluster = cluster
        self.log = Log()
        self.json_dir = json_dir
        self.cluster = cluster
        self.scheduler = Scheduler(cluster)
        self.block_list = list()
        self.job_list = list()  # list of lists. A job list for each user.
        self.event_queue = Q.PriorityQueue()
        self.timestamp = 0
        self.user_number = user_number
        self.job_durations = {}
        self.stage_durations = {}
        self.job_execution_profile = {
        }  # record the execution information of jobs

        for user_index in range(0, user_number):
            """currently, we load the 'job info (job, stage, runtime)' for each user separately.
            which is equivalent to each user has 'exact same' job submition now!!!
            """
            stage_profile_path = "Workloads/stage_profile.json"
            self.stage_profile = json.load(open(stage_profile_path, 'r'),
                                           object_pairs_hook=OrderedDict)
            print "stage_profile loaded"

            runtime_path = "Workloads/runtime.json"
            self.runtime_profile = json.load(open(runtime_path, 'r'),
                                             object_pairs_hook=OrderedDict)
            print "runtime_profile loaded"

            job_path = "Workloads/job.json"
            self.job_profile = json.load(open(job_path, 'r'),
                                         object_pairs_hook=OrderedDict)
            print "job_profile loaded"
            self.generate_job_profile(user_index)

    def run(self):
        runtime = 0
        self.log.add(
            'Simulation Starts with %s machines.' %
            (len(self.cluster.machines)), 0)
        current_job_index = dict(
        )  # map from user id to its current running job index
        for user_index in range(0, self.user_number):
            current_job_index[user_index] = 0
            for job_i in range(len(self.job_list[user_index])):
                self.event_queue.put(
                    EventJobSubmit(
                        self.job_list[user_index][job_i].submit_time,
                        self.job_list[user_index][job_i]))
        # - trigger the first allocation action
        self.event_queue.put(EventReAlloc(0))

        while not self.event_queue.empty():
            event = self.event_queue.get()
            new_events = list()
            if isinstance(event, EventReAlloc):
                # - periodically trigger the allocation process every 1000 time units
                msg = self.scheduler.do_allocate(event.time)
                if len(self.cluster.finished_jobs) < len(self.job_list[0]):
                    new_events.append(EventReAlloc(event.time + 1000))
                for item in msg:
                    new_events.append(EventTaskSubmit(event.time, item[0]))
                    new_events.append(
                        EventTaskComplete(event.time + item[0].runtime,
                                          item[0], item[1]))

            if isinstance(event, EventJobSubmit):
                current_job_index[event.job.user_id] = event.job.index
                ready_stages = self.scheduler.submit_job(event.job)
                for stage in ready_stages:
                    new_events.append(EventStageSubmit(event.time, stage))

            elif isinstance(event, EventStageSubmit):
                event.stage.submit_time = event.time
                msg = self.scheduler.submit_stage(event.stage, event.time)
                for item in msg:
                    new_events.append(EventTaskSubmit(event.time, item[0]))
                    new_events.append(
                        EventTaskComplete(event.time + item[0].runtime,
                                          item[0], item[1]))

            elif isinstance(event, EventTaskSubmit):
                event.task.start_time = event.time
                if self.cluster.isDebug:
                    print "time", event.time, " submit task ", event.task.id, "-job-", event.task.job_id, "-slot-", event.task.machine_id
                if len(event.task.stage.not_submitted_tasks) == 0:
                    event.task.stage.last_task_submit_time = event.time
                continue

            elif isinstance(event, EventTaskComplete):
                event.task.finish_time = event.time
                if self.cluster.isDebug:
                    print "time", event.time, "   finish task ", event.task.id, "-job-", event.task.job_id, "-slot-", event.task.machine_id
                self.scheduler.stageIdToAllowedMachineId[
                    event.task.stage_id].append(event.task.machine_id)
                self.cluster.release_task(event.task)
                event.task.stage.not_completed_tasks.remove(event.task)
                event.task.stage.completed_tasks.append(event.task)
                if len(event.task.stage.not_completed_tasks) == 0:
                    new_events.append(
                        EventStageComplete(event.time, event.task.stage))
                if len(event.task.stage.not_submitted_tasks) > 0:
                    msg = [[
                        event.task.stage.not_submitted_tasks[0],
                        event.task.machine_id
                    ]]
                    runtime = self.cluster.assign_task(
                        event.task.machine_id,
                        event.task.stage.not_submitted_tasks[0], event.time)
                else:
                    msg = self.scheduler.do_allocate(event.time)
                for item in msg:
                    new_events.append(EventTaskSubmit(event.time, item[0]))
                    new_events.append(
                        EventTaskComplete(event.time + item[0].runtime,
                                          item[0], item[1]))

            elif isinstance(event, EventStageComplete):
                stageSlots = set()
                for i in event.stage.taskset:
                    stageSlots.add(i.machine_id)
                event.stage.finish_time = event.time
                self.stage_durations[event.stage.id] = {}
                self.stage_durations[event.stage.id]["task num"] = len(
                    event.stage.taskset)
                self.stage_durations[event.stage.id]["used slot num"] = len(
                    stageSlots)
                self.stage_durations[event.stage.id]["duration"] = event.stage.finish_time - \
                    event.stage.submit_time
                # ready_stage or job (tell the simulator the entire job is done)
                msg = self.scheduler.stage_complete(event.stage)
                for item in msg:
                    if isinstance(item, Stage):  # stage ready to be submitted
                        new_events.append(EventStageSubmit(event.time, item))
                    else:  # must be job, which means the job is done
                        new_events.append(EventJobComplete(event.time, item))

            elif isinstance(event, EventJobComplete):
                event.job.completion_time = event.time
                event.job.duration = event.time - event.job.submit_time
                event.job.execution_time = event.time - event.job.start_execution_time
                print "time: ", event.time, "-", event.job.id, " (job) finishes, duration", event.job.duration, " job.alloc ", event.job.alloc
                self.scheduler.handle_job_completion(event.job)
                self.job_durations[int(
                    event.job.id.split("_")[-1])] = event.job.duration
                job_id = int(event.job.id.split("_")[-1])
                self.job_execution_profile[job_id] = {}
                self.job_execution_profile[job_id][
                    "duration"] = event.job.duration
                self.job_execution_profile[job_id][
                    "execution_time"] = event.job.execution_time
                #                self.job_execution_profile[job_id]["runtimes"] = [[i.runtime, i.machine_id, i.start_time, i.finish_time] for i in event.job.stages[0].taskset]
                if self.scheduler.scheduler_type == "paf":
                    self.job_execution_profile[job_id][
                        "fair_alloc"] = event.job.fairAlloc
                    self.job_execution_profile[job_id][
                        "target_alloc"] = event.job.targetAlloc
                else:
                    self.job_execution_profile[job_id][
                        "fair_alloc"] = event.job.alloc
                    self.job_execution_profile[job_id][
                        "target_alloc"] = event.job.alloc
                self.job_execution_profile[job_id]["alloc"] = event.job.alloc

            for new_event in new_events:
                self.event_queue.put(new_event)

        if self.scheduler.scheduler_type == "paf":
            fname = "ExecutionResult/" + str(self.cluster.machine_number) + "_" + \
                self.scheduler.scheduler_type + "_" + ".json"
        else:
            fname = "ExecutionResult/" + \
                str(self.cluster.machine_number) + "_" + \
                self.scheduler.scheduler_type + ".json"
        f = open(fname, 'w')
        json.dump(self.job_execution_profile, f, indent=2, sort_keys=True)
        f.close()

        return [runtime]

    def generate_job_profile(self, user_id):
        self.job_list.append(list())
        task_id = 0
        job_submit_time = dict()
        job_priority = dict()
        job_weight = dict()
        print "enter generate_job_profile"

        stageIdToParallelism = dict()
        for c_job_id in self.job_profile:
            # temporary setting
            job_submit_time[int(
                c_job_id)] = self.job_profile[c_job_id]["Submit Time"]
            job_priority[int(
                c_job_id)] = self.job_profile[c_job_id]["Priority"]
            job_weight[int(c_job_id)] = self.job_profile[c_job_id]["Weight"]

        for stage_id in self.stage_profile:
            timeout_type = 0
            job_id = self.stage_profile[stage_id]["Job ID"]
            self.job_durations[job_id] = 0
            Job_id = 'user_%s_job_%s' % (user_id, job_id)
            Stage_id = 'user_%s_stage_%s' % (user_id, stage_id)
            task_number = self.stage_profile[stage_id]["Task Number"]
            # change parallelism

            stageIdToParallelism[Stage_id] = task_number

            Parent_ids = list()
            if "Parents" in self.stage_profile[stage_id]:
                parent_ids = self.stage_profile[stage_id]["Parents"]
                for parent_id in parent_ids:
                    Parent_ids.append('user_%s_stage_%s' %
                                      (user_id, parent_id))
                    if stageIdToParallelism[Parent_ids[-1]] >= task_number:
                        timeout_type = 1

            # generate taskset of the stage
            taskset = list()
            max_time = 0
            for i in range(0, task_number):
                runtime = self.search_runtime(stage_id, i)
                runtime *= 1
                if runtime > max_time:
                    max_time = runtime
                Task_id = 'user_%s_task_%s' % (user_id, task_id)
                time_out = 0
                if timeout_type == 0:
                    task = Task(Job_id, Stage_id, Task_id, i, runtime,
                                time_out, job_priority[job_id])
                else:
                    task = Task(Job_id, Stage_id, Task_id, i, runtime,
                                time_out, job_priority[job_id])
                task_id += 1
                task.user_id = user_id
                taskset.append(task)
            stage = Stage(Job_id, Stage_id, Parent_ids, taskset)

            for id in Parent_ids:
                self.scheduler.stageIdToStage[
                    id].downstream_parallelism += len(taskset)

            self.scheduler.stageIdToStage[Stage_id] = stage
            for task in taskset:
                task.stage = stage
            stage.user_id = user_id

            if self.search_job_by_id(Job_id, user_id) == False:
                job = Job(Job_id)
                job.index = int(job_id)
                job.user_id = user_id
                job.stages.append(stage)
                job.submit_time = job_submit_time[job_id]
                job.priority = job_priority[job_id]
                job.weight = job_weight[job_id]
                self.job_list[user_id].append(job)
                stage.priority = job.priority
                stage.job = job
            else:  # this job already exits
                job = self.search_job_by_id(Job_id, user_id)
                job.stages.append(stage)
                stage.priority = job.priority
                stage.job = job

        # Set the not_completed_stage_ids for all the jobs
        for job in self.job_list[user_id]:
            job.not_completed_stage_ids = [stage.id for stage in job.stages]
            for tstage in job.stages:
                job.stagesDict[tstage.id] = tstage
            job.submitted_stage_ids = list()
            job.completed_stage_ids = list()

        # this part shall be changed, sort by the submission time of a job
        self.job_list[user_id] = sorted(
            self.job_list[user_id],
            key=lambda job: job.index)  # sort job_list by job_index
        print "finish generate job profile"
        print "0: tasknumber:", len(self.job_list[0][0].stages[0].taskset)

    def search_runtime(self, stage_id, task_index):
        return self.runtime_profile[str(stage_id)][str(task_index)]['runtime']

    def search_job_by_id(self, job_id, user_index):
        for job in self.job_list[user_index]:
            if job.id == job_id:
                return job
        return False

    def reset(self):
        for job in self.job_list:
            job.reset()
        self.cluster.reset()
Beispiel #2
0
class Simulator:
    def __init__(self, cluster, json_dir, user_number):
        self.cluster = cluster
        self.log = Log()
        self.json_dir = json_dir
        self.cluster = cluster
        self.scheduler = Scheduler(cluster)
        self.block_list = list()
        self.job_list = list()  # list of lists. A job list for each user.
        self.event_queue = queue.PriorityQueue()
        self.timestamp = 0
        self.user_number = user_number
        self.total_application_type = 1
        self.app_map = OrderedDict()  # map from user id to app id
        self.job_durations = {}
        self.stage_durations = {}
        self.job_execution_profile = {
        }  # record the execution information of jobs
        # generate the job list for each user. All users share the rdd_list and block list
        for user_index in range(0, user_number):
            # each user randomly chooses an application
            # application_number = random.randint(1, self.total_application_type)
            application_number = user_index + 1
            self.app_map[user_index] = application_number
            # stage_profile_path = 'Workloads/stage_profile.json' % (json_dir, application_name)
            stage_profile_path = "Workloads/stage_profile.json"  # read stage_profile XX
            self.stage_profile = json.load(open(stage_profile_path, 'r'),
                                           object_pairs_hook=OrderedDict)
            print("stage_profile loaded")

            runtime_path = "Workloads/runtime.json"  # read runtime fille XX
            self.runtime_profile = json.load(open(runtime_path, 'r'),
                                             object_pairs_hook=OrderedDict)
            print("runtime_profile loaded")
            # self.generate_rdd_profile(user_index)

            job_path = "Workloads/job.json"  # read job file XX
            self.job_profile = json.load(open(job_path, 'r'),
                                         object_pairs_hook=OrderedDict)
            print("job_profile loaded")
            self.generate_job_profile(user_index)

    def run(self):
        self.log.add(
            'Simulation Starts with %s machines.' %
            (len(self.cluster.machines)), 0)
        current_job_index = dict(
        )  # map from user id to its current running job index
        for user_index in range(0, self.user_number):
            current_job_index[user_index] = 0
            for job_i in range(len(self.job_list[user_index])):
                self.event_queue.put(
                    EventJobSubmit(
                        self.job_list[user_index][job_i].submit_time,
                        self.job_list[user_index][job_i]))
        self.event_queue.put(EventReAlloc(0))

        while not self.event_queue.empty():
            event = self.event_queue.get()
            new_events = list()

            # ReAlloc
            # 需要重分配
            # 完成的Job小于这个用户的Job, 延迟 1000 时间分配 TODO ?
            # 可以分配时 添加 TaskSubmit 和对映的 TaskComplete
            if isinstance(event, EventReAlloc):
                msg = self.scheduler.do_allocate(event.time)
                if len(self.cluster.finished_jobs) < len(
                        self.job_list[0]):  # TODO job_list[0]
                    new_events.append(EventReAlloc(event.time + 1000))
                for item in msg:
                    new_events.append(EventTaskSubmit(event.time, item[0]))
                    new_events.append(
                        EventTaskComplete(event.time + item[0].runtime,
                                          item[0], item[1]))

            # JobSubmit
            # 生成对映的 Stages 并添加 StageSubmit
            elif isinstance(event, EventJobSubmit):
                current_job_index[
                    event.job.user_id] = event.job.index  # TODO 干嘛的
                ready_stages = self.scheduler.submit_job(event.job)
                for stage in ready_stages:
                    new_events.append(EventStageSubmit(event.time, stage))

            # StageSubmit
            # 添加对应的 TaskSubmit 和 TaskComplete
            elif isinstance(event, EventStageSubmit):
                event.stage.submit_time = event.time  # TODO 干嘛的
                msg = self.scheduler.submit_stage(event.stage, event.time)
                for item in msg:
                    new_events.append(EventTaskSubmit(event.time, item[0]))
                    new_events.append(
                        EventTaskComplete(event.time + item[0].runtime,
                                          item[0], item[1]))

            # TaskSubmit
            # 看来来好像没用
            elif isinstance(event, EventTaskSubmit):
                event.task.start_time = event.time  # TODO 干嘛的
                if self.cluster.isDebug:
                    print("time", event.time, "-submit task-", event.task.id,
                          "-job-", event.task.job_id, "-slot-",
                          event.task.machine_id)
                if len(event.task.stage.not_submitted_tasks) == 0:
                    event.task.stage.last_task_submit_time = event.time

            # TaskComplete
            # 恢复资源 如果可能添加 StageComplete
            # TODO 如果服务类型是 foreground
            elif isinstance(event, EventTaskComplete):
                event.task.finish_time = event.time
                if self.cluster.isDebug:
                    print("time", event.time, "-finish task-", event.task.id,
                          "-job-", event.task.job_id, "-slot-",
                          event.task.machine_id)
                if event.task.has_completed:
                    continue
                event.task.has_completed = True
                self.scheduler.stageIdToAllowedMachineId[
                    event.task.stage_id].append(event.task.machine_id)
                self.cluster.release_task(event.task)
                event.task.stage.not_completed_tasks.remove(event.task)
                event.task.stage.completed_tasks.append(event.task)
                if len(event.task.stage.not_completed_tasks) == 0:
                    new_events.append(
                        EventStageComplete(event.time, event.task.stage))

                if event.task.stage.job.service_type == self.cluster.foreground_type and \
                        len(event.task.stage.not_submitted_tasks) > 0 and \
                        self.cluster.open_machine_number == 0:
                    msg = [[
                        event.task.stage.not_submitted_tasks[0],
                        event.task.machine_id
                    ]]
                    runtime = self.cluster.assign_task(
                        event.task.machine_id,
                        event.task.stage.not_submitted_tasks[0], event.time)
                else:
                    msg = self.scheduler.do_allocate(event.time)
                for item in msg:
                    new_events.append(EventTaskSubmit(event.time, item[0]))
                    new_events.append(
                        EventTaskComplete(event.time + item[0].runtime,
                                          item[0], item[1]))

            # StageComplete
            # 添加对映的 StageSubmit 和 JobCompete
            elif isinstance(event, EventStageComplete):
                if event.stage.job.service_type == self.cluster.foreground_type:
                    stageSlots = set()
                    for i in event.stage.taskset:
                        stageSlots.add(i.machine_id)
                    print(
                        "stage finish: ", event.stage.id, "used slots number:",
                        len(stageSlots), "submit interval",
                        event.stage.last_task_submit_time -
                        event.stage.submit_time,
                        "currently reserved for this job:",
                        self.cluster.jobIdToReservedNumber[event.stage.job.id])
                    event.stage.finish_time = event.time
                    self.stage_durations[event.stage.id] = {}
                    self.stage_durations[event.stage.id]["task num"] = len(
                        event.stage.taskset)
                    self.stage_durations[
                        event.stage.id]["used slot num"] = len(stageSlots)
                    self.stage_durations[event.stage.id][
                        "monopolize"] = event.stage.monopolize_time
                    self.stage_durations[event.stage.id][
                        "duration"] = event.stage.finish_time - event.stage.submit_time

                msg = self.scheduler.stage_complete(event.stage)
                # ready_stage or job (tell the simulator the entire job is done)
                for item in msg:
                    if isinstance(item, Stage):  # stage ready to be submitted
                        new_events.append(EventStageSubmit(event.time, item))
                    else:  # must be job, which means the job is done
                        new_events.append(EventJobComplete(event.time, item))
                # print "# time:", event.time, "stage completion", event.stage.id, event.stage.job_id

            # JobComplete
            elif isinstance(event, EventJobComplete):
                event.job.completion_time = event.time
                event.job.duration = event.time - event.job.submit_time
                event.job.execution_time = event.time - event.job.start_execution_time
                print(
                    "-", event.job.id, " (job) finishes, duration",
                    event.job.duration, " job.alloc ", event.job.alloc, "PR:",
                    float(event.job.monopolize_time) /
                    event.job.execution_time)
                if event.job.service_type == self.cluster.foreground_type:
                    self.cluster.clear_reservation(event.job)
                    event.job.progress_rate = float(
                        event.job.monopolize_time) / event.job.execution_time
                self.scheduler.handle_job_completion(event.job)
                self.job_durations[int(
                    event.job.id.split("_")[-1])] = event.job.duration
                job_id = int(event.job.id.split("_")[-1])
                self.job_execution_profile[job_id] = {}
                self.job_execution_profile[job_id][
                    "duration"] = event.job.duration
                self.job_execution_profile[job_id]["demand"] = len(
                    event.job.curve) - 1
                self.job_execution_profile[job_id][
                    "execution_time"] = event.job.execution_time
                if self.scheduler.scheduler_type == "paf":
                    self.job_execution_profile[job_id][
                        "fair_alloc"] = event.job.fairAlloc
                    self.job_execution_profile[job_id][
                        "target_alloc"] = event.job.targetAlloc
                else:
                    self.job_execution_profile[job_id][
                        "fair_alloc"] = event.job.alloc
                    self.job_execution_profile[job_id][
                        "target_alloc"] = event.job.alloc
                self.job_execution_profile[job_id]["alloc"] = event.job.alloc
                self.job_execution_profile[job_id][
                    "progress_rate"] = event.job.progress_rate

            for new_event in new_events:
                self.event_queue.put(new_event)

        progress_rates = []
        for job in self.job_list[0]:
            if job.service_type == self.cluster.foreground_type:
                progress_rates.append(job.progress_rate)
        print("total average progress rate:",
              sum(progress_rates) / len(progress_rates))

        if self.scheduler.scheduler_type == "paf":
            fname = "ExecutionResult/" + \
                    str(self.cluster.machine_number) + "_" + self.scheduler.scheduler_type + "_" + str(
                self.cluster.alpha) + ".json"
        else:
            fname = "ExecutionResult/" + \
                    str(self.cluster.machine_number) + "_" + self.scheduler.scheduler_type + ".json"
        f = open(fname, 'w')
        json.dump(self.job_execution_profile, f, indent=2, sort_keys=True)
        f.close()

    def generate_job_profile(self, user_id):
        self.job_list.append(list())
        task_id = 0
        job_submit_time = dict()
        job_priority = dict()
        job_service_type = dict()
        job_curveString = dict()
        job_monopolize_time = dict()
        job_weight = dict()
        job_accelerate_factor = dict()
        print("enter generate_job_profile")

        stageIdToParallelism = dict()
        for c_job_id in self.job_profile:
            # temporary setting
            # XX job_profile
            job_submit_time[int(
                c_job_id)] = self.job_profile[c_job_id]["Submit Time"]
            job_priority[int(
                c_job_id)] = self.job_profile[c_job_id]["Priority"]
            job_service_type[int(
                c_job_id)] = self.job_profile[c_job_id]["Service Type"]
            job_curveString[int(
                c_job_id)] = self.job_profile[c_job_id]["curve"]
            job_monopolize_time[int(
                c_job_id)] = self.job_profile[c_job_id]["Monopolize Time"]
            job_weight[int(c_job_id)] = self.job_profile[c_job_id]["Weight"]
            job_accelerate_factor[int(
                c_job_id)] = self.job_profile[c_job_id]["Accelerate Factor"]

        for stage_id in self.stage_profile:
            # XX stage_profile
            timeout_type = 0
            job_id = self.stage_profile[stage_id]["Job ID"]
            self.job_durations[job_id] = 0
            Job_id = 'user_%s_job_%s' % (user_id, job_id)
            Stage_id = 'user_%s_stage_%s' % (user_id, stage_id)
            task_number = self.stage_profile[stage_id]["Task Number"]
            # change parallelism

            stageIdToParallelism[Stage_id] = task_number

            Parent_ids = list()
            if "Parents" in self.stage_profile[stage_id]:
                parent_ids = self.stage_profile[stage_id]["Parents"]
                for parent_id in parent_ids:
                    Parent_ids.append('user_%s_stage_%s' %
                                      (user_id, parent_id))
                    if stageIdToParallelism[Parent_ids[-1]] >= task_number:
                        timeout_type = 1

            # generate taskset of the stage
            taskset = list()
            max_time = 0
            for i in range(0, task_number):
                runtime = self.search_runtime(stage_id, i)
                if job_service_type[job_id] != 0:  # XX job_service_type 0 或者 1
                    runtime *= 1
                else:
                    runtime *= 1
                if runtime > max_time:
                    max_time = runtime
                Task_id = 'user_%s_task_%s' % (user_id, task_id)
                time_out = 0
                if timeout_type == 0:
                    task = Task(Job_id, Stage_id, Task_id, i, runtime,
                                time_out, job_priority[job_id])
                else:
                    # task = Task(Job_id, Stage_id, Task_id, i, runtime, 3000, job_priority[job_id])
                    task = Task(Job_id, Stage_id, Task_id, i, runtime,
                                time_out, job_priority[job_id])
                task_id += 1
                task.user_id = user_id
                taskset.append(task)
            stage = Stage(Job_id, Stage_id, Parent_ids, taskset)
            stage.monopolize_time = max_time

            for id in Parent_ids:
                self.scheduler.stageIdToStage[
                    id].downstream_parallelism += len(taskset)

            self.scheduler.stageIdToStage[Stage_id] = stage
            for task in taskset:
                task.stage = stage
            stage.user_id = user_id

            if not self.search_job_by_id(Job_id, user_id):
                job = Job(Job_id)
                job.index = int(job_id)
                job.user_id = user_id
                job.stages.append(stage)
                job.submit_time = job_submit_time[job_id]
                job.priority = job_priority[job_id]
                job.service_type = job_service_type[job_id]
                job.weight = job_weight[job_id]
                job.accelerate_factor = job_accelerate_factor[job_id]
                if job.service_type == self.cluster.foreground_type:
                    self.cluster.jobIdToReservedNumber[job.id] = 0
                    self.cluster.jobIdToReservedMachineId[job.id] = set()
                job.set_curve(job_curveString[job_id])
                job.monopolize_time = job_monopolize_time[job_id]
                self.job_list[user_id].append(job)
                stage.priority = job.priority
                stage.job = job
            else:  # this job already exits
                job = self.search_job_by_id(Job_id, user_id)
                job.stages.append(stage)
                stage.priority = job.priority
                stage.job = job

        # Set the not_completed_stage_ids for all the jobs
        for job in self.job_list[user_id]:
            job.not_completed_stage_ids = [stage.id for stage in job.stages]
            for tstage in job.stages:
                job.stagesDict[tstage.id] = tstage
            job.submitted_stage_ids = list()
            job.completed_stage_ids = list()

        # this part shall be changed, sort by the submission time of a job
        self.job_list[user_id] = sorted(
            self.job_list[user_id],
            key=lambda job: job.index)  # sort job_list by job_index
        print("finish generate job profile")
        print("0: tasknumber:", len(self.job_list[0][0].stages[0].taskset))

    def search_runtime(self, stage_id, task_index):
        return self.runtime_profile[str(stage_id)][str(task_index)]['runtime']

    def search_job_by_id(self, job_id, user_index):
        for job in self.job_list[user_index]:
            if job.id == job_id:
                return job
        return False

    def reset(self):
        for job in self.job_list:
            job.reset()
        self.cluster.reset()
Beispiel #3
0
class Simulator:
    jobs = OrderedDict()  #Map from user id to job id
    jobs[0] = 'j_3093365'
    '''
    Init
    '''
    def __init__(self, cluster, user_number):
        self.cluster = cluster
        # self.app_name = json_dir.split('/')[-1]
        self.log = Log()
        self.cluster = cluster
        self.rdd_list = list()
        self.block_list = list()
        self.job_list = list()
        self.event_queue = Q.PriorityQueue()
        self.user_number = user_number

    '''
    Reset the ref counts
    '''

    def reset(self):
        self.log.flush()
        self.cluster.reset()
        self.rdd_list = list()
        self.block_list = list()
        self.job_list = list()
        self.event_queue = Q.PriorityQueue()

        # generate the stage list for each user. update the block ref counts
        for user_index in range(0, self.user_number):
            job_id = Simulator.jobs[user_index]
            job_profile_path = 'job-profile/%s.json' % (job_id)
            if not os.path.exists(job_profile_path):
                print('Error: can not find %s' % job_profile_path)
                exit(-1)
            else:
                job_profile = json.load(open(job_profile_path, 'r'),
                                        object_pairs_hook=OrderedDict)
                self.generate_blocks(user_index, job_profile)
                self.generate_tasks(user_index, job_profile)

        self.cluster.block_manager.rdd_list = self.rdd_list

    '''
    Search_rdd_by_name
    '''

    def search_rdd_by_name(self, name):
        for rdd in self.rdd_list:
            if rdd.name == name:
                return rdd
        return False

    '''
    Naming rule: 
        rdd: user$i_rdd$j: the rdd generated in the jth stage of user i's job 
        block: user$i_rdd$j_block$k, kth block
    '''

    def generate_blocks(self, user_id, job_profile):

        # generate all rdds and blocks
        for stage_id in job_profile.keys():
            task_num = job_profile[stage_id]['Task_Num']
            rdd_name = 'user%s_rdd%s' % (user_id, stage_id)
            this_rdd = RDD(
                rdd_name, task_num
            )  # task_num in this stage = partition_num of the generated rdd
            block_list = list()
            for index in range(0, task_num):
                this_block = Block(
                    this_rdd, this_rdd.name, index, user_id
                )  # block size will be choson randamly according to the co-flow trace
                block_list.append(this_block)
            this_rdd.set_blocklist(block_list)
            self.rdd_list.append(this_rdd)

    '''
    Generate all stages and tasks; ref-counts of blocks and peer blocks
    '''

    def generate_tasks(self, user_id, job_profile):
        this_job = Job(user_id)
        stage_list = list()
        sorted_stage_id = np.sort(job_profile.keys())  # todo check
        for stage_id in sorted_stage_id:
            this_stage = Stage(user_id, stage_id)
            task_num = job_profile[stage_id]['Task_Num']
            this_rdd = self.search_rdd_by_name('user%s_rdd%s' %
                                               (user_id, stage_id))
            parent_rdd_ids = job_profile[stage_id]['Parents']
            start_time = float(job_profile[stage_id]['Start_Time'])
            end_time = float(job_profile[stage_id]['End_Time'])

            task_list = list()
            peer_group = list()  # for sticky policies
            for task_id in range(0, task_num):
                this_task = Task(user_id, stage_id, task_id)

                # set start and end time
                this_task.set_start_time(start_time)
                this_task.set_end_time(end_time)

                # set dependent blocks and update their ref counts.
                dependent_blocks = list()
                for parent_rdd_id in parent_rdd_ids:
                    parent_rdd = self.search_rdd_by_name(
                        'user%s_rdd%s' % (user_id, parent_rdd_id))
                    dependent_block_index = task_id % parent_rdd.partitions  # Map the dependent block
                    dependent_block = parent_rdd.blocks[dependent_block_index]
                    dependent_block.add_ref_count()
                    dependent_blocks.append(dependent_block)
                this_task.set_dependent_blocks(dependent_blocks)

                # set peer-groups for LRC conservative policy
                for dependent_block in dependent_blocks:
                    peer_group.append(dependent_block)
                if (isinstance(self.cluster.block_manager.policy,
                               LRCConservativePolicy)):
                    self.cluster.block_manager.policy.add_peer_group(
                        peer_group)
                    peer_group = list()

                # set produced_block
                produced_block = this_rdd.blocks[task_id]
                this_task.set_produced_block(produced_block)
                task_list.append(this_task)
            if (isinstance(self.cluster.block_manager.policy,
                           LRCAggressivePolicy)):
                self.cluster.block_manager.policy.add_peer_group(peer_group)
            this_stage.set_tasks(task_list)
            stage_list.append(this_stage)

        this_job.set_stages(stage_list)
        self.job_list.append(this_job)

    '''
    Run
    '''

    def run(self):
        self.reset()  # update the ref-counts

        #self.cluster.block_manager.rdd_list = self.rdd_list  # Tell the block_manager how many partitions each rdd has
        total_hit = 0
        total_miss = 0
        task_hit = 0
        task_miss = 0
        stage_hit = 0
        stage_miss = 0

        for job in self.job_list:
            for stage in job.stages:
                for task in stage.tasks:
                    self.event_queue.put(EventTaskSubmit(
                        task.start_time, task))
                    self.event_queue.put(EventTaskComplete(
                        task.end_time, task))

        while not self.event_queue.empty():
            event = self.event_queue.get()
            if isinstance(event, EventTaskSubmit):
                self.log.add('Task %s starts' % event.task.name, event.time)
                # if int(event.task.stage_id)==133:
                #     a=1
                self.cluster.submit_task(event.task, event.time)
            elif isinstance(event, EventTaskComplete):
                self.log.add('Task %s ends' % event.task.name, event.time)
                total_hit += event.task.hit_num
                total_miss += event.task.miss_num
                if (event.task.task_hit):
                    task_hit += 1
                else:
                    task_miss += 1

        # get stage hit
        for job in self.job_list:
            for stage in job.stages:
                this_stage_hit = True
                for task in stage.tasks:
                    if (task.task_hit == False):
                        this_stage_hit = False
                        break
                if (this_stage_hit):
                    stage_hit += 1
                else:
                    stage_miss += 1

        return [
            total_hit, total_miss, task_hit, task_miss, stage_hit, stage_miss
        ]