class ReverseEasyScheduler(EasyBackfillScheduler): def __init__(self, options): super(ReverseEasyScheduler, self).__init__(options) self.cpu_snapshot = CpuSnapshot(self.num_processors, options["stats"]) def _backfill_jobs(self, current_time): "Overriding parent method" if len(self.unscheduled_jobs) <= 1: return [] result = [] first_job = self.unscheduled_jobs[0] tail = list_copy(self.unscheduled_jobs[1:]) tail_of_jobs_by_reverse_order = sorted(tail, key=latest_sort_key) self.cpu_snapshot.assignJobEarliest(first_job, current_time) for job in tail_of_jobs_by_reverse_order: if self.cpu_snapshot.canJobStartNow(job, current_time): self.unscheduled_jobs.remove(job) self.cpu_snapshot.assignJob(job, current_time) result.append(job) self.cpu_snapshot.delJobFromCpuSlices(first_job) return result
class TailDoubleEasyScheduler(EasyBackfillScheduler): """ This algorithm implements the algorithm in the paper of Tsafrir, Etzion, Feitelson, june 2007? """ def __init__(self, options): super(TailDoubleEasyScheduler, self).__init__(options) self.cpu_snapshot = CpuSnapshot(self.num_processors, options["stats"]) def _backfill_jobs(self, current_time): "Overriding parent method" if len(self.unscheduled_jobs) <= 1: return [] result = [] first_job = self.unscheduled_jobs[0] tail = list_copy(self.unscheduled_jobs[1:]) self.cpu_snapshot.assignJobEarliest(first_job, current_time) for job in tail: job.predicted_run_time = 2 * job.user_estimated_run_time # doubling is done here if self.cpu_snapshot.canJobStartNow(job, current_time): # if job can be backfilled self.unscheduled_jobs.remove(job) self.cpu_snapshot.assignJob(job, current_time) result.append(job) else: job.predicted_run_time = job.user_estimated_run_time # undoubling is done here self.cpu_snapshot.delJobFromCpuSlices(first_job) return result
class ConservativeScheduler(Scheduler): def __init__(self, options): super(ConservativeScheduler, self).__init__(options) self.cpu_snapshot = CpuSnapshot(self.num_processors, options["stats"]) self.unfinished_jobs_by_submit_time = [] def new_events_on_job_submission(self, job, current_time): self.cpu_snapshot.archive_old_slices(current_time) self.unfinished_jobs_by_submit_time.append(job) self.cpu_snapshot.assignJobEarliest(job, current_time) return [JobStartEvent(job.start_to_run_at_time, job)] def new_events_on_job_termination(self, job, current_time): """ Here we delete the tail of job if it was ended before the duration declaration. It then reschedules the remaining jobs and returns a collection of new termination events (using the dictionary data structure) """ self.cpu_snapshot.archive_old_slices(current_time) self.unfinished_jobs_by_submit_time.remove(job) self.cpu_snapshot.delTailofJobFromCpuSlices(job) return self._reschedule_jobs(current_time) def _reschedule_jobs(self, current_time): newEvents = [] for job in self.unfinished_jobs_by_submit_time: if job.start_to_run_at_time <= current_time: continue # job started to run before, so it cannot be rescheduled (preemptions are not allowed) prev_start_to_run_at_time = job.start_to_run_at_time self.cpu_snapshot.delJobFromCpuSlices(job) self.cpu_snapshot.assignJobEarliest(job, current_time) assert prev_start_to_run_at_time >= job.start_to_run_at_time if prev_start_to_run_at_time != job.start_to_run_at_time: newEvents.append(JobStartEvent(job.start_to_run_at_time, job)) return newEvents
class ConservativeScheduler(Scheduler): def __init__(self, options): super(ConservativeScheduler, self).__init__(options) self.cpu_snapshot = CpuSnapshot(self.num_processors, options["stats"]) self.unfinished_jobs_by_submit_time = [] def new_events_on_job_submission(self, job, current_time): self.cpu_snapshot.archive_old_slices(current_time) self.unfinished_jobs_by_submit_time.append(job) self.cpu_snapshot.assignJobEarliest(job, current_time) return [ JobStartEvent(job.start_to_run_at_time, job) ] def new_events_on_job_termination(self, job, current_time): """ Here we delete the tail of job if it was ended before the duration declaration. It then reschedules the remaining jobs and returns a collection of new termination events (using the dictionary data structure) """ self.cpu_snapshot.archive_old_slices(current_time) self.unfinished_jobs_by_submit_time.remove(job) self.cpu_snapshot.delTailofJobFromCpuSlices(job) return self._reschedule_jobs(current_time) def _reschedule_jobs(self, current_time): newEvents = [] for job in self.unfinished_jobs_by_submit_time: if job.start_to_run_at_time <= current_time: continue # job started to run before, so it cannot be rescheduled (preemptions are not allowed) prev_start_to_run_at_time = job.start_to_run_at_time self.cpu_snapshot.delJobFromCpuSlices(job) self.cpu_snapshot.assignJobEarliest(job, current_time) assert prev_start_to_run_at_time >= job.start_to_run_at_time if prev_start_to_run_at_time != job.start_to_run_at_time: newEvents.append( JobStartEvent(job.start_to_run_at_time, job) ) return newEvents
class EasySJBFScheduler(EasyBackfillScheduler): """ This algorithm implements the algorithm in the paper of Tsafrir, Etzion, Feitelson, june 2007? """ def __init__(self, num_processors): super(EasySJBFScheduler, self).__init__(num_processors) self.cpu_snapshot = CpuSnapshot(num_processors) def _backfill_jobs(self, current_time): "Overriding parent method" if len(self.unscheduled_jobs) <= 1: return [] result = [] first_job = self.unscheduled_jobs[0] tail = list_copy(self.unscheduled_jobs[1:]) tail_of_jobs_by_sjf_order = sorted(tail, key=sjf_sort_key) self.cpu_snapshot.assignJobEarliest(first_job, current_time) for job in tail_of_jobs_by_sjf_order: if self.cpu_snapshot.canJobStartNow(job, current_time): self.unscheduled_jobs.remove(job) self.cpu_snapshot.assignJob(job, current_time) result.append(job) self.cpu_snapshot.delJobFromCpuSlices(first_job) return result
class EasyPlusPlusScheduler(Scheduler): """ This algorithm implements the algorithm in the paper of Tsafrir, Etzion, Feitelson, june 2007? """ I_NEED_A_PREDICTOR = True def __init__(self, options): super(EasyPlusPlusScheduler, self).__init__(options) self.init_predictor(options) self.init_corrector(options) self.cpu_snapshot = CpuSnapshot(self.num_processors, options["stats"]) self.unscheduled_jobs = [] def new_events_on_job_submission(self, job, current_time): self.cpu_snapshot.archive_old_slices(current_time) self.predictor.predict(job, current_time, self.running_jobs) if not hasattr(job,"initial_prediction"): job.initial_prediction=job.predicted_run_time self.unscheduled_jobs.append(job) return [ JobStartEvent(current_time, job) for job in self._schedule_jobs(current_time) ] def new_events_on_job_termination(self, job, current_time): self.predictor.fit(job, current_time) if self.corrector.__name__=="ninetynine": self.pestimator.fit(job.actual_run_time/job.user_estimated_run_time) self.cpu_snapshot.archive_old_slices(current_time) self.cpu_snapshot.delTailofJobFromCpuSlices(job) return [ JobStartEvent(current_time, job) for job in self._schedule_jobs(current_time) ] def new_events_on_job_under_prediction(self, job, current_time): pass #assert job.predicted_run_time <= job.user_estimated_run_time if not hasattr(job,"num_underpredict"): job.num_underpredict = 0 else: job.num_underpredict += 1 if self.corrector.__name__=="ninetynine": new_predicted_run_time = self.corrector(self.pestimator,job,current_time) else: new_predicted_run_time = self.corrector(job, current_time) #set the new predicted runtime self.cpu_snapshot.assignTailofJobToTheCpuSlices(job, new_predicted_run_time) job.predicted_run_time = new_predicted_run_time return [JobStartEvent(current_time, job)] def _schedule_jobs(self, current_time): "Schedules jobs that can run right now, and returns them" jobs = self._schedule_head_of_list(current_time) jobs += self._backfill_jobs(current_time) return jobs def _schedule_head_of_list(self, current_time): result = [] while True: if len(self.unscheduled_jobs) == 0: break # Try to schedule the first job if self.cpu_snapshot.free_processors_available_at(current_time) >= self.unscheduled_jobs[0].num_required_processors: job = self.unscheduled_jobs.pop(0) self.cpu_snapshot.assignJob(job, current_time) result.append(job) else: # first job can't be scheduled break return result def _backfill_jobs(self, current_time): if len(self.unscheduled_jobs) <= 1: return [] result = [] first_job = self.unscheduled_jobs[0] tail = list_copy(self.unscheduled_jobs[1:]) tail_of_jobs_by_sjf_order = sorted(tail, key=sjf_sort_key) self.cpu_snapshot.assignJobEarliest(first_job, current_time) for job in tail_of_jobs_by_sjf_order: if self.cpu_snapshot.canJobStartNow(job, current_time): job.is_backfilled = 1 self.unscheduled_jobs.remove(job) self.cpu_snapshot.assignJob(job, current_time) result.append(job) self.cpu_snapshot.delJobFromCpuSlices(first_job) return result
class EasyPlusPlusScheduler(Scheduler): """ This algorithm implements the algorithm in the paper of Tsafrir, Etzion, Feitelson, june 2007? """ def __init__(self, num_processors): super(EasyPlusPlusScheduler, self).__init__(num_processors) self.cpu_snapshot = CpuSnapshot(num_processors) self.unscheduled_jobs = [] self.user_run_time_prev = {} self.user_run_time_last = {} def new_events_on_job_submission(self, job, current_time): if not self.user_run_time_last.has_key(job.user_id): self.user_run_time_prev[job.user_id] = None self.user_run_time_last[job.user_id] = None self.cpu_snapshot.archive_old_slices(current_time) self.unscheduled_jobs.append(job) return [ JobStartEvent(current_time, job) for job in self._schedule_jobs(current_time) ] def new_events_on_job_termination(self, job, current_time): assert self.user_run_time_last.has_key(job.user_id) == True assert self.user_run_time_prev.has_key(job.user_id) == True self.user_run_time_prev[job.user_id] = self.user_run_time_last[ job.user_id] self.user_run_time_last[job.user_id] = job.actual_run_time self.cpu_snapshot.archive_old_slices(current_time) self.cpu_snapshot.delTailofJobFromCpuSlices(job) return [ JobStartEvent(current_time, job) for job in self._schedule_jobs(current_time) ] def new_events_on_job_under_prediction(self, job, current_time): assert job.predicted_run_time <= job.user_estimated_run_time self.cpu_snapshot.assignTailofJobToTheCpuSlices(job) job.predicted_run_time = job.user_estimated_run_time return [] def _schedule_jobs(self, current_time): "Schedules jobs that can run right now, and returns them" for job in self.unscheduled_jobs: if self.user_run_time_prev[job.user_id] != None: average = int((self.user_run_time_last[job.user_id] + self.user_run_time_prev[job.user_id]) / 2) job.predicted_run_time = min(job.user_estimated_run_time, average) jobs = self._schedule_head_of_list(current_time) jobs += self._backfill_jobs(current_time) return jobs def _schedule_head_of_list(self, current_time): result = [] while True: if len(self.unscheduled_jobs) == 0: break # Try to schedule the first job if self.cpu_snapshot.free_processors_available_at( current_time ) >= self.unscheduled_jobs[0].num_required_processors: job = self.unscheduled_jobs.pop(0) self.cpu_snapshot.assignJob(job, current_time) result.append(job) else: # first job can't be scheduled break return result def _backfill_jobs(self, current_time): if len(self.unscheduled_jobs) <= 1: return [] result = [] first_job = self.unscheduled_jobs[0] tail = list_copy(self.unscheduled_jobs[1:]) tail_of_jobs_by_sjf_order = sorted(tail, key=sjf_sort_key) self.cpu_snapshot.assignJobEarliest(first_job, current_time) for job in tail_of_jobs_by_sjf_order: if self.cpu_snapshot.canJobStartNow(job, current_time): self.unscheduled_jobs.remove(job) self.cpu_snapshot.assignJob(job, current_time) result.append(job) self.cpu_snapshot.delJobFromCpuSlices(first_job) return result
class EasyPlusPlusScheduler(Scheduler): """ This algorithm implements the algorithm in the paper of Tsafrir, Etzion, Feitelson, june 2007? """ def __init__(self, num_processors): super(EasyPlusPlusScheduler, self).__init__(num_processors) self.cpu_snapshot = CpuSnapshot(num_processors) self.unscheduled_jobs = [] self.user_run_time_prev = {} self.user_run_time_last = {} def new_events_on_job_submission(self, job, current_time): if not self.user_run_time_last.has_key(job.user_id): self.user_run_time_prev[job.user_id] = None self.user_run_time_last[job.user_id] = None self.cpu_snapshot.archive_old_slices(current_time) self.unscheduled_jobs.append(job) return [ JobStartEvent(current_time, job) for job in self._schedule_jobs(current_time) ] def new_events_on_job_termination(self, job, current_time): assert self.user_run_time_last.has_key(job.user_id) == True assert self.user_run_time_prev.has_key(job.user_id) == True self.user_run_time_prev[job.user_id] = self.user_run_time_last[job.user_id] self.user_run_time_last[job.user_id] = job.actual_run_time self.cpu_snapshot.archive_old_slices(current_time) self.cpu_snapshot.delTailofJobFromCpuSlices(job) return [ JobStartEvent(current_time, job) for job in self._schedule_jobs(current_time) ] def new_events_on_job_under_prediction(self, job, current_time): assert job.predicted_run_time <= job.user_estimated_run_time self.cpu_snapshot.assignTailofJobToTheCpuSlices(job) job.predicted_run_time = job.user_estimated_run_time return [] def _schedule_jobs(self, current_time): "Schedules jobs that can run right now, and returns them" for job in self.unscheduled_jobs: if self.user_run_time_prev[job.user_id] != None: average = int((self.user_run_time_last[job.user_id] + self.user_run_time_prev[job.user_id])/ 2) job.predicted_run_time = min (job.user_estimated_run_time, average) jobs = self._schedule_head_of_list(current_time) jobs += self._backfill_jobs(current_time) return jobs def _schedule_head_of_list(self, current_time): result = [] while True: if len(self.unscheduled_jobs) == 0: break # Try to schedule the first job if self.cpu_snapshot.free_processors_available_at(current_time) >= self.unscheduled_jobs[0].num_required_processors: job = self.unscheduled_jobs.pop(0) self.cpu_snapshot.assignJob(job, current_time) result.append(job) else: # first job can't be scheduled break return result def _backfill_jobs(self, current_time): if len(self.unscheduled_jobs) <= 1: return [] result = [] first_job = self.unscheduled_jobs[0] tail = list_copy(self.unscheduled_jobs[1:]) tail_of_jobs_by_sjf_order = sorted(tail, key=sjf_sort_key) self.cpu_snapshot.assignJobEarliest(first_job, current_time) for job in tail_of_jobs_by_sjf_order: if self.cpu_snapshot.canJobStartNow(job, current_time): self.unscheduled_jobs.remove(job) self.cpu_snapshot.assignJob(job, current_time) result.append(job) self.cpu_snapshot.delJobFromCpuSlices(first_job) return result
class EasyPlusPlusScheduler(Scheduler): """ This algorithm implements the algorithm in the paper of Tsafrir, Etzion, Feitelson, june 2007? """ I_NEED_A_PREDICTOR = True def __init__(self, options): super(EasyPlusPlusScheduler, self).__init__(options) self.init_predictor(options) self.init_corrector(options) self.cpu_snapshot = CpuSnapshot(self.num_processors, options["stats"]) self.unscheduled_jobs = [] self.ff = open("times-epp-sgd.txt", 'w') def new_events_on_job_submission(self, job, current_time): self.cpu_snapshot.archive_old_slices(current_time) self.predictor.predict(job, current_time, self.running_jobs) self.ff.write("%d\t%d\n" % (job.actual_run_time, job.predicted_run_time)) self.ff.flush() if not hasattr(job, "initial_prediction"): job.initial_prediction = job.predicted_run_time self.unscheduled_jobs.append(job) return [ JobStartEvent(current_time, job) for job in self._schedule_jobs(current_time) ] def new_events_on_job_termination(self, job, current_time): self.predictor.fit(job, current_time) if self.corrector.__name__ == "ninetynine": self.pestimator.fit(job.actual_run_time / job.user_estimated_run_time) self.cpu_snapshot.archive_old_slices(current_time) self.cpu_snapshot.delTailofJobFromCpuSlices(job) return [ JobStartEvent(current_time, job) for job in self._schedule_jobs(current_time) ] def new_events_on_job_under_prediction(self, job, current_time): assert job.predicted_run_time <= job.user_estimated_run_time if not hasattr(job, "num_underpredict"): job.num_underpredict = 0 else: job.num_underpredict += 1 if self.corrector.__name__ == "ninetynine": new_predicted_run_time = self.corrector(self.pestimator, job, current_time) else: new_predicted_run_time = self.corrector(job, current_time) #set the new predicted runtime self.cpu_snapshot.assignTailofJobToTheCpuSlices( job, new_predicted_run_time) job.predicted_run_time = new_predicted_run_time return [JobStartEvent(current_time, job)] def _schedule_jobs(self, current_time): "Schedules jobs that can run right now, and returns them" jobs = self._schedule_head_of_list(current_time) jobs += self._backfill_jobs(current_time) return jobs def _schedule_head_of_list(self, current_time): result = [] while True: if len(self.unscheduled_jobs) == 0: break # Try to schedule the first job if self.cpu_snapshot.free_processors_available_at( current_time ) >= self.unscheduled_jobs[0].num_required_processors: job = self.unscheduled_jobs.pop(0) self.cpu_snapshot.assignJob(job, current_time) result.append(job) else: # first job can't be scheduled break return result def _backfill_jobs(self, current_time): if len(self.unscheduled_jobs) <= 1: return [] result = [] first_job = self.unscheduled_jobs[0] tail = list_copy(self.unscheduled_jobs[1:]) tail_of_jobs_by_sjf_order = sorted(tail, key=sjf_sort_key) self.cpu_snapshot.assignJobEarliest(first_job, current_time) for job in tail_of_jobs_by_sjf_order: if self.cpu_snapshot.canJobStartNow(job, current_time): job.is_backfilled = 1 self.unscheduled_jobs.remove(job) self.cpu_snapshot.assignJob(job, current_time) result.append(job) self.cpu_snapshot.delJobFromCpuSlices(first_job) return result