예제 #1
0
class  ReverseEasyScheduler(EasyBackfillScheduler):
    
    def __init__(self, options):
        super(ReverseEasyScheduler, self).__init__(options)
        self.cpu_snapshot = CpuSnapshot(self.num_processors, options["stats"])

    
    def _backfill_jobs(self, current_time):
        "Overriding parent method"
        if len(self.unscheduled_jobs) <= 1:
            return []

        result = []  
        first_job = self.unscheduled_jobs[0]        
        tail =  list_copy(self.unscheduled_jobs[1:])
        tail_of_jobs_by_reverse_order = sorted(tail, key=latest_sort_key)
        
        self.cpu_snapshot.assignJobEarliest(first_job, current_time)
        
        for job in tail_of_jobs_by_reverse_order:
            if self.cpu_snapshot.canJobStartNow(job, current_time): 
                self.unscheduled_jobs.remove(job)
                self.cpu_snapshot.assignJob(job, current_time)
                result.append(job)
                
        self.cpu_snapshot.delJobFromCpuSlices(first_job)

        return result
class  TailDoubleEasyScheduler(EasyBackfillScheduler):
    """ This algorithm implements the algorithm in the paper of Tsafrir, Etzion, Feitelson, june 2007?
    """
    
    def __init__(self, options):
        super(TailDoubleEasyScheduler, self).__init__(options)
        self.cpu_snapshot = CpuSnapshot(self.num_processors, options["stats"])

    
    def _backfill_jobs(self, current_time):
        "Overriding parent method"
        if len(self.unscheduled_jobs) <= 1:
            return []

        result = []  
        first_job = self.unscheduled_jobs[0]        
        tail =  list_copy(self.unscheduled_jobs[1:]) 
        
        self.cpu_snapshot.assignJobEarliest(first_job, current_time)
        
        for job in tail:
            job.predicted_run_time = 2 * job.user_estimated_run_time # doubling is done here 
            if self.cpu_snapshot.canJobStartNow(job, current_time): # if job can be backfilled 
                self.unscheduled_jobs.remove(job)
                self.cpu_snapshot.assignJob(job, current_time)
                result.append(job)
            else:
                job.predicted_run_time = job.user_estimated_run_time # undoubling is done here 
                
        self.cpu_snapshot.delJobFromCpuSlices(first_job)

        return result
예제 #3
0
class ConservativeScheduler(Scheduler):
    def __init__(self, options):
        super(ConservativeScheduler, self).__init__(options)
        self.cpu_snapshot = CpuSnapshot(self.num_processors, options["stats"])
        self.unfinished_jobs_by_submit_time = []

    def new_events_on_job_submission(self, job, current_time):
        self.cpu_snapshot.archive_old_slices(current_time)
        self.unfinished_jobs_by_submit_time.append(job)
        self.cpu_snapshot.assignJobEarliest(job, current_time)
        return [JobStartEvent(job.start_to_run_at_time, job)]

    def new_events_on_job_termination(self, job, current_time):
        """ Here we delete the tail of job if it was ended before the duration declaration.
        It then reschedules the remaining jobs and returns a collection of new termination events
        (using the dictionary data structure) """
        self.cpu_snapshot.archive_old_slices(current_time)
        self.unfinished_jobs_by_submit_time.remove(job)
        self.cpu_snapshot.delTailofJobFromCpuSlices(job)
        return self._reschedule_jobs(current_time)

    def _reschedule_jobs(self, current_time):
        newEvents = []
        for job in self.unfinished_jobs_by_submit_time:
            if job.start_to_run_at_time <= current_time:
                continue  # job started to run before, so it cannot be rescheduled (preemptions are not allowed)
            prev_start_to_run_at_time = job.start_to_run_at_time
            self.cpu_snapshot.delJobFromCpuSlices(job)
            self.cpu_snapshot.assignJobEarliest(job, current_time)
            assert prev_start_to_run_at_time >= job.start_to_run_at_time
            if prev_start_to_run_at_time != job.start_to_run_at_time:
                newEvents.append(JobStartEvent(job.start_to_run_at_time, job))
        return newEvents
class ConservativeScheduler(Scheduler):

    def __init__(self, options):
        super(ConservativeScheduler, self).__init__(options)
        self.cpu_snapshot = CpuSnapshot(self.num_processors, options["stats"])
        self.unfinished_jobs_by_submit_time = []

    def new_events_on_job_submission(self, job, current_time):
        self.cpu_snapshot.archive_old_slices(current_time)
        self.unfinished_jobs_by_submit_time.append(job)
        self.cpu_snapshot.assignJobEarliest(job, current_time)
        return [ JobStartEvent(job.start_to_run_at_time, job) ]

    def new_events_on_job_termination(self, job, current_time):
        """ Here we delete the tail of job if it was ended before the duration declaration.
        It then reschedules the remaining jobs and returns a collection of new termination events
        (using the dictionary data structure) """
        self.cpu_snapshot.archive_old_slices(current_time)
        self.unfinished_jobs_by_submit_time.remove(job)
        self.cpu_snapshot.delTailofJobFromCpuSlices(job)
        return self._reschedule_jobs(current_time)

    def _reschedule_jobs(self, current_time):
        newEvents = []
        for job in self.unfinished_jobs_by_submit_time:
            if job.start_to_run_at_time <= current_time:
                continue # job started to run before, so it cannot be rescheduled (preemptions are not allowed)
            prev_start_to_run_at_time = job.start_to_run_at_time
            self.cpu_snapshot.delJobFromCpuSlices(job)
            self.cpu_snapshot.assignJobEarliest(job, current_time)
            assert prev_start_to_run_at_time >= job.start_to_run_at_time
            if prev_start_to_run_at_time != job.start_to_run_at_time:
                newEvents.append( JobStartEvent(job.start_to_run_at_time, job) )
        return newEvents
예제 #5
0
class EasySjbfScheduler(EasyBackfillScheduler):
    """ This algorithm implements the algorithm in the paper of Tsafrir, Etzion, Feitelson, june 2007?
    """
    def __init__(self, options):
        super(EasySjbfScheduler, self).__init__(options)
        self.cpu_snapshot = CpuSnapshot(self.num_processors, options["stats"])

    def _backfill_jobs(self, current_time):
        "Overriding parent method"
        if len(self.unscheduled_jobs) <= 1:
            return []

        result = []
        first_job = self.unscheduled_jobs[0]
        tail = list_copy(self.unscheduled_jobs[1:])
        tail_of_jobs_by_sjf_order = sorted(tail, key=sjf_sort_key)

        self.cpu_snapshot.assignJobEarliest(first_job, current_time)

        for job in tail_of_jobs_by_sjf_order:
            if self.cpu_snapshot.canJobStartNow(job, current_time):
                job.is_backfilled = 1
                self.unscheduled_jobs.remove(job)
                self.cpu_snapshot.assignJob(job, current_time)
                result.append(job)

        self.cpu_snapshot.unAssignJob(first_job)

        return result
예제 #6
0
class EasySJBFScheduler(EasyBackfillScheduler):
    """ This algorithm implements the algorithm in the paper of Tsafrir, Etzion, Feitelson, june 2007?
    """

    def __init__(self, num_processors):
        super(EasySJBFScheduler, self).__init__(num_processors)
        self.cpu_snapshot = CpuSnapshot(num_processors)

    def _backfill_jobs(self, current_time):
        "Overriding parent method"
        if len(self.unscheduled_jobs) <= 1:
            return []

        result = []
        first_job = self.unscheduled_jobs[0]
        tail = list_copy(self.unscheduled_jobs[1:])
        tail_of_jobs_by_sjf_order = sorted(tail, key=sjf_sort_key)

        self.cpu_snapshot.assignJobEarliest(first_job, current_time)

        for job in tail_of_jobs_by_sjf_order:
            if self.cpu_snapshot.canJobStartNow(job, current_time):
                self.unscheduled_jobs.remove(job)
                self.cpu_snapshot.assignJob(job, current_time)
                result.append(job)

        self.cpu_snapshot.delJobFromCpuSlices(first_job)

        return result
예제 #7
0
class EasyBackfillScheduler(Scheduler):

    def __init__(self, options):
        super(EasyBackfillScheduler, self).__init__(options)
        self.cpu_snapshot = CpuSnapshot(self.num_processors, options["stats"])
        self.unscheduled_jobs = []

    def new_events_on_job_submission(self, just_submitted_job, current_time):
        """ Here we first add the new job to the waiting list. We then try to schedule
        the jobs in the waiting list, returning a collection of new termination events """
        # TODO: a probable performance bottleneck because we reschedule all the
        # jobs. Knowing that only one new job is added allows more efficient
        # scheduling here.
        self.cpu_snapshot.archive_old_slices(current_time)
        self.unscheduled_jobs.append(just_submitted_job)
        
        retl = []
        
        if (self.cpu_snapshot.free_processors_available_at(current_time) >= just_submitted_job.num_required_processors):
		for job in self._schedule_jobs(current_time):
			retl.append(JobStartEvent(current_time, job))
        
        return retl

    def new_events_on_job_termination(self, job, current_time):
        """ Here we first delete the tail of the just terminated job (in case it's
        done before user estimation time). We then try to schedule the jobs in the waiting list,
        returning a collection of new termination events """
        self.cpu_snapshot.archive_old_slices(current_time)
        self.cpu_snapshot.delTailofJobFromCpuSlices(job)
        return [
            JobStartEvent(current_time, job)
            for job in self._schedule_jobs(current_time)
        ]

    def _schedule_jobs(self, current_time):
        "Schedules jobs that can run right now, and returns them"
        jobs = self._schedule_head_of_list(current_time)
        jobs += self._backfill_jobs(current_time)
        return jobs

    def _schedule_head_of_list(self, current_time):     
        result = []
        while True:
            if len(self.unscheduled_jobs) == 0:
                break
            # Try to schedule the first job
            if self.cpu_snapshot.free_processors_available_at(current_time) >= self.unscheduled_jobs[0].num_required_processors:
                job = self.unscheduled_jobs.pop(0)
                self.cpu_snapshot.assignJob(job, current_time)
                result.append(job)
            else:
                # first job can't be scheduled
                break
        return result

    def _backfill_jobs(self, current_time):
        """
        Find jobs that can be backfilled and update the cpu snapshot.
        DEPRECATED FUNCTION !!!!!!
        """
        if len(self.unscheduled_jobs) <= 1:
            return []
        
        result = []


        tail_of_waiting_list = list_copy(self.unscheduled_jobs[1:])
        
        first_job = self.unscheduled_jobs[0]
        self.cpu_snapshot.assignJobEarliest(first_job, current_time)
        
        for job in tail_of_waiting_list:
            if self.cpu_snapshot.canJobStartNow(job, current_time):
                job.is_backfilled = 1
                self.unscheduled_jobs.remove(job)
                self.cpu_snapshot.assignJob(job, current_time)
                result.append(job)
        self.cpu_snapshot.unAssignJob(first_job)

        return result
class  EasyPlusPlusScheduler(Scheduler):
    """ This algorithm implements the algorithm in the paper of Tsafrir, Etzion, Feitelson, june 2007?
    """

    I_NEED_A_PREDICTOR = True

    def __init__(self, options):
        super(EasyPlusPlusScheduler, self).__init__(options)
        self.init_predictor(options)
        self.init_corrector(options)

        self.cpu_snapshot = CpuSnapshot(self.num_processors, options["stats"])
        self.unscheduled_jobs = []


    def new_events_on_job_submission(self, job, current_time):

        self.cpu_snapshot.archive_old_slices(current_time)
        self.predictor.predict(job, current_time, self.running_jobs)
        if not hasattr(job,"initial_prediction"):
            job.initial_prediction=job.predicted_run_time
        self.unscheduled_jobs.append(job)
        return [
            JobStartEvent(current_time, job)
            for job in self._schedule_jobs(current_time)
        ]


    def new_events_on_job_termination(self, job, current_time):
        self.predictor.fit(job, current_time)

        if self.corrector.__name__=="ninetynine":
            self.pestimator.fit(job.actual_run_time/job.user_estimated_run_time)

        self.cpu_snapshot.archive_old_slices(current_time)
        self.cpu_snapshot.delTailofJobFromCpuSlices(job)
        return [
            JobStartEvent(current_time, job)
            for job in self._schedule_jobs(current_time)
        ]


    def new_events_on_job_under_prediction(self, job, current_time):
        pass #assert job.predicted_run_time <= job.user_estimated_run_time

        if not hasattr(job,"num_underpredict"):
            job.num_underpredict = 0
        else:
            job.num_underpredict += 1

        if self.corrector.__name__=="ninetynine":
            new_predicted_run_time = self.corrector(self.pestimator,job,current_time)
        else:
            new_predicted_run_time = self.corrector(job, current_time)

        #set the new predicted runtime
        self.cpu_snapshot.assignTailofJobToTheCpuSlices(job, new_predicted_run_time)
        job.predicted_run_time = new_predicted_run_time

        return [JobStartEvent(current_time, job)]


    def _schedule_jobs(self, current_time):
        "Schedules jobs that can run right now, and returns them"

        jobs  = self._schedule_head_of_list(current_time)
        jobs += self._backfill_jobs(current_time)
        return jobs


    def _schedule_head_of_list(self, current_time):
        result = []
        while True:
            if len(self.unscheduled_jobs) == 0:
                break
            # Try to schedule the first job
            if self.cpu_snapshot.free_processors_available_at(current_time) >= self.unscheduled_jobs[0].num_required_processors:
                job = self.unscheduled_jobs.pop(0)
                self.cpu_snapshot.assignJob(job, current_time)
                result.append(job)
            else:
                # first job can't be scheduled
                break
        return result


    def _backfill_jobs(self, current_time):
        if len(self.unscheduled_jobs) <= 1:
            return []

        result = []
        first_job = self.unscheduled_jobs[0]
        tail =  list_copy(self.unscheduled_jobs[1:])
        tail_of_jobs_by_sjf_order = sorted(tail, key=sjf_sort_key)

        self.cpu_snapshot.assignJobEarliest(first_job, current_time)

        for job in tail_of_jobs_by_sjf_order:
            if self.cpu_snapshot.canJobStartNow(job, current_time):
                job.is_backfilled = 1
                self.unscheduled_jobs.remove(job)
                self.cpu_snapshot.assignJob(job, current_time)
                result.append(job)

        self.cpu_snapshot.delJobFromCpuSlices(first_job)

        return result
class EasyPlusPlusScheduler(Scheduler):
    """ This algorithm implements the algorithm in the paper of Tsafrir, Etzion, Feitelson, june 2007?
    """
    def __init__(self, num_processors):
        super(EasyPlusPlusScheduler, self).__init__(num_processors)
        self.cpu_snapshot = CpuSnapshot(num_processors)
        self.unscheduled_jobs = []
        self.user_run_time_prev = {}
        self.user_run_time_last = {}

    def new_events_on_job_submission(self, job, current_time):
        if not self.user_run_time_last.has_key(job.user_id):
            self.user_run_time_prev[job.user_id] = None
            self.user_run_time_last[job.user_id] = None

        self.cpu_snapshot.archive_old_slices(current_time)
        self.unscheduled_jobs.append(job)
        return [
            JobStartEvent(current_time, job)
            for job in self._schedule_jobs(current_time)
        ]

    def new_events_on_job_termination(self, job, current_time):
        assert self.user_run_time_last.has_key(job.user_id) == True
        assert self.user_run_time_prev.has_key(job.user_id) == True

        self.user_run_time_prev[job.user_id] = self.user_run_time_last[
            job.user_id]
        self.user_run_time_last[job.user_id] = job.actual_run_time
        self.cpu_snapshot.archive_old_slices(current_time)
        self.cpu_snapshot.delTailofJobFromCpuSlices(job)
        return [
            JobStartEvent(current_time, job)
            for job in self._schedule_jobs(current_time)
        ]

    def new_events_on_job_under_prediction(self, job, current_time):
        assert job.predicted_run_time <= job.user_estimated_run_time

        self.cpu_snapshot.assignTailofJobToTheCpuSlices(job)
        job.predicted_run_time = job.user_estimated_run_time
        return []

    def _schedule_jobs(self, current_time):
        "Schedules jobs that can run right now, and returns them"

        for job in self.unscheduled_jobs:
            if self.user_run_time_prev[job.user_id] != None:
                average = int((self.user_run_time_last[job.user_id] +
                               self.user_run_time_prev[job.user_id]) / 2)
                job.predicted_run_time = min(job.user_estimated_run_time,
                                             average)

        jobs = self._schedule_head_of_list(current_time)
        jobs += self._backfill_jobs(current_time)
        return jobs

    def _schedule_head_of_list(self, current_time):
        result = []
        while True:
            if len(self.unscheduled_jobs) == 0:
                break
            # Try to schedule the first job
            if self.cpu_snapshot.free_processors_available_at(
                    current_time
            ) >= self.unscheduled_jobs[0].num_required_processors:
                job = self.unscheduled_jobs.pop(0)
                self.cpu_snapshot.assignJob(job, current_time)
                result.append(job)
            else:
                # first job can't be scheduled
                break
        return result

    def _backfill_jobs(self, current_time):
        if len(self.unscheduled_jobs) <= 1:
            return []

        result = []
        first_job = self.unscheduled_jobs[0]
        tail = list_copy(self.unscheduled_jobs[1:])
        tail_of_jobs_by_sjf_order = sorted(tail, key=sjf_sort_key)

        self.cpu_snapshot.assignJobEarliest(first_job, current_time)

        for job in tail_of_jobs_by_sjf_order:
            if self.cpu_snapshot.canJobStartNow(job, current_time):
                self.unscheduled_jobs.remove(job)
                self.cpu_snapshot.assignJob(job, current_time)
                result.append(job)

        self.cpu_snapshot.delJobFromCpuSlices(first_job)

        return result
예제 #10
0
class  EasyPlusPlusScheduler(Scheduler):
    """ This algorithm implements the algorithm in the paper of Tsafrir, Etzion, Feitelson, june 2007?
    """
    
    def __init__(self, num_processors):
        super(EasyPlusPlusScheduler, self).__init__(num_processors)
        self.cpu_snapshot = CpuSnapshot(num_processors)
        self.unscheduled_jobs = []
        self.user_run_time_prev = {}
        self.user_run_time_last = {}

    
    def new_events_on_job_submission(self, job, current_time):
        if not self.user_run_time_last.has_key(job.user_id): 
            self.user_run_time_prev[job.user_id] = None 
            self.user_run_time_last[job.user_id] = None

        self.cpu_snapshot.archive_old_slices(current_time)
        self.unscheduled_jobs.append(job)
        return [
            JobStartEvent(current_time, job)
            for job in self._schedule_jobs(current_time)
        ]


    def new_events_on_job_termination(self, job, current_time):
        assert self.user_run_time_last.has_key(job.user_id) == True
        assert self.user_run_time_prev.has_key(job.user_id) == True

        self.user_run_time_prev[job.user_id] = self.user_run_time_last[job.user_id]
        self.user_run_time_last[job.user_id] = job.actual_run_time
        self.cpu_snapshot.archive_old_slices(current_time)
        self.cpu_snapshot.delTailofJobFromCpuSlices(job)
        return [
            JobStartEvent(current_time, job)
            for job in self._schedule_jobs(current_time)
        ]


    def new_events_on_job_under_prediction(self, job, current_time):
        assert job.predicted_run_time <= job.user_estimated_run_time

        self.cpu_snapshot.assignTailofJobToTheCpuSlices(job)
        job.predicted_run_time = job.user_estimated_run_time
        return []


    def _schedule_jobs(self, current_time):
        "Schedules jobs that can run right now, and returns them"
   
        for job in self.unscheduled_jobs:
            if self.user_run_time_prev[job.user_id] != None: 
                average =  int((self.user_run_time_last[job.user_id] + self.user_run_time_prev[job.user_id])/ 2)
                job.predicted_run_time = min (job.user_estimated_run_time, average)

        jobs  = self._schedule_head_of_list(current_time)
        jobs += self._backfill_jobs(current_time)
        return jobs


    def _schedule_head_of_list(self, current_time):     
        result = []
        while True:
            if len(self.unscheduled_jobs) == 0:
                break
            # Try to schedule the first job
            if self.cpu_snapshot.free_processors_available_at(current_time) >= self.unscheduled_jobs[0].num_required_processors:
                job = self.unscheduled_jobs.pop(0)
                self.cpu_snapshot.assignJob(job, current_time)
                result.append(job)
            else:
                # first job can't be scheduled
                break
        return result
    

    def _backfill_jobs(self, current_time):
        if len(self.unscheduled_jobs) <= 1:
            return []

        result = []  
        first_job = self.unscheduled_jobs[0]        
        tail =  list_copy(self.unscheduled_jobs[1:])
        tail_of_jobs_by_sjf_order = sorted(tail, key=sjf_sort_key)
        
        self.cpu_snapshot.assignJobEarliest(first_job, current_time)
        
        for job in tail_of_jobs_by_sjf_order:
            if self.cpu_snapshot.canJobStartNow(job, current_time): 
                self.unscheduled_jobs.remove(job)
                self.cpu_snapshot.assignJob(job, current_time)
                result.append(job)
                
        self.cpu_snapshot.delJobFromCpuSlices(first_job)

        return result
예제 #11
0
class EasyPlusPlusScheduler(Scheduler):
    """ This algorithm implements the algorithm in the paper of Tsafrir, Etzion, Feitelson, june 2007?
    """

    I_NEED_A_PREDICTOR = True

    def __init__(self, options):
        super(EasyPlusPlusScheduler, self).__init__(options)
        self.init_predictor(options)
        self.init_corrector(options)

        self.cpu_snapshot = CpuSnapshot(self.num_processors, options["stats"])
        self.unscheduled_jobs = []

        self.ff = open("times-epp-sgd.txt", 'w')

    def new_events_on_job_submission(self, job, current_time):

        self.cpu_snapshot.archive_old_slices(current_time)
        self.predictor.predict(job, current_time, self.running_jobs)

        self.ff.write("%d\t%d\n" %
                      (job.actual_run_time, job.predicted_run_time))
        self.ff.flush()

        if not hasattr(job, "initial_prediction"):
            job.initial_prediction = job.predicted_run_time
        self.unscheduled_jobs.append(job)
        return [
            JobStartEvent(current_time, job)
            for job in self._schedule_jobs(current_time)
        ]

    def new_events_on_job_termination(self, job, current_time):
        self.predictor.fit(job, current_time)

        if self.corrector.__name__ == "ninetynine":
            self.pestimator.fit(job.actual_run_time /
                                job.user_estimated_run_time)

        self.cpu_snapshot.archive_old_slices(current_time)
        self.cpu_snapshot.delTailofJobFromCpuSlices(job)
        return [
            JobStartEvent(current_time, job)
            for job in self._schedule_jobs(current_time)
        ]

    def new_events_on_job_under_prediction(self, job, current_time):
        assert job.predicted_run_time <= job.user_estimated_run_time

        if not hasattr(job, "num_underpredict"):
            job.num_underpredict = 0
        else:
            job.num_underpredict += 1

        if self.corrector.__name__ == "ninetynine":
            new_predicted_run_time = self.corrector(self.pestimator, job,
                                                    current_time)
        else:
            new_predicted_run_time = self.corrector(job, current_time)

        #set the new predicted runtime
        self.cpu_snapshot.assignTailofJobToTheCpuSlices(
            job, new_predicted_run_time)
        job.predicted_run_time = new_predicted_run_time

        return [JobStartEvent(current_time, job)]

    def _schedule_jobs(self, current_time):
        "Schedules jobs that can run right now, and returns them"

        jobs = self._schedule_head_of_list(current_time)
        jobs += self._backfill_jobs(current_time)
        return jobs

    def _schedule_head_of_list(self, current_time):
        result = []
        while True:
            if len(self.unscheduled_jobs) == 0:
                break
            # Try to schedule the first job
            if self.cpu_snapshot.free_processors_available_at(
                    current_time
            ) >= self.unscheduled_jobs[0].num_required_processors:
                job = self.unscheduled_jobs.pop(0)
                self.cpu_snapshot.assignJob(job, current_time)
                result.append(job)
            else:
                # first job can't be scheduled
                break
        return result

    def _backfill_jobs(self, current_time):
        if len(self.unscheduled_jobs) <= 1:
            return []

        result = []
        first_job = self.unscheduled_jobs[0]
        tail = list_copy(self.unscheduled_jobs[1:])
        tail_of_jobs_by_sjf_order = sorted(tail, key=sjf_sort_key)

        self.cpu_snapshot.assignJobEarliest(first_job, current_time)

        for job in tail_of_jobs_by_sjf_order:
            if self.cpu_snapshot.canJobStartNow(job, current_time):
                job.is_backfilled = 1
                self.unscheduled_jobs.remove(job)
                self.cpu_snapshot.assignJob(job, current_time)
                result.append(job)

        self.cpu_snapshot.delJobFromCpuSlices(first_job)

        return result