def __setstate__(self, state): self.reservations = state['reservations'] if 'active' in state: self.active = state['active'] else: self.active = True self.id_gen = IncrID() self.id_gen.set(state['next_res_id']) global bgsched_id_gen bgsched_id_gen = self.id_gen self.cycle_id_gen = IncrID() self.cycle_id_gen.set(state['next_cycle_id']) global bgsched_cycle_id_gen bgsched_cycle_id_gen = self.cycle_id_gen self.queues = QueueDict(self.COMP_QUEUE_MANAGER) self.jobs = JobDict(self.COMP_QUEUE_MANAGER) self.started_jobs = {} self.sync_state = Cobalt.Util.FailureMode("Foreign Data Sync") self.get_current_time = time.time self.lock = threading.Lock() self.statistics = Statistics() if state.has_key('msg_queue'): dbwriter.msg_queue = state['msg_queue'] if state.has_key('overflow') and (dbwriter.max_queued != None): dbwriter.overflow = state['overflow']
def __setstate__(self, state): Component.__setstate__(self, state) self.reservations = state['reservations'] if 'active' in state: self.active = state['active'] else: self.active = True self.id_gen = IncrID() self.id_gen.set(state['next_res_id']) global bgsched_id_gen bgsched_id_gen = self.id_gen self.cycle_id_gen = IncrID() self.cycle_id_gen.set(state['next_cycle_id']) global bgsched_cycle_id_gen bgsched_cycle_id_gen = self.cycle_id_gen self.queues = QueueDict() self.jobs = JobDict() self.started_jobs = {} self.sync_state = Cobalt.Util.FailureMode("Foreign Data Sync") self.get_current_time = time.time if state.has_key('msg_queue'): dbwriter.msg_queue = state['msg_queue'] if state.has_key('overflow') and (dbwriter.max_queued != None): dbwriter.overflow = state['overflow']
def __init__(self, *args, **kwargs): """Initialize a new BaseForker. All arguments are passed to the component constructor. """ Component.__init__(self, *args, **kwargs) self.children = {} self.id_gen = IncrID()
def __init__(self, *args, **kwargs): Component.__init__(self, *args, **kwargs) self.reservations = ReservationDict() self.queues = QueueDict() self.jobs = JobDict() self.started_jobs = {} self.sync_state = Cobalt.Util.FailureMode("Foreign Data Sync") self.active = True self.get_current_time = time.time self.id_gen = IncrID() global bgsched_id_gen bgsched_id_gen = self.id_gen self.cycle_id_gen = IncrID() global bgsched_cycle_id_gen bgsched_cycle_id_gen = self.cycle_id_gen
class ProcessGroupDict(DataDict): item_cls = ProcessGroup key = "id" def __init__(self): self.id_gen = IncrID() def q_add (self, specs, callback=None, cargs={}): for spec in specs: if spec.get("id", "*") != "*": raise DataCreationError("cannot specify an id") spec['id'] = self.id_gen.next() return DataDict.q_add(self, specs)
class JobDict (DataDict): item_cls = Job key = "id" def __init__(self): self.id_gen = IncrID() def q_add (self, specs, callback=None, cargs={}): for spec in specs: if "id" not in spec or spec['id'] == "*": spec['id'] = self.id_gen.next() return DataDict.q_add(self, specs)
class ProcessGroupDict(DataDict): item_cls = ProcessGroup key = "id" def __init__(self): self.id_gen = IncrID() def q_add(self, specs, callback=None, cargs={}): for spec in specs: if spec.get("id", "*") != "*": raise DataCreationError("cannot specify an id") spec['id'] = self.id_gen.next() return DataDict.q_add(self, specs)
class ProcessGroupDict(DataDict): """A container for holding process groups, keyed by id""" item_cls = ProcessGroup key = "id" def __init__(self): DataDict.__init__(self) self.id_gen = IncrID() def q_add(self, specs, callback=None, cargs={}): """Add a process group to the container""" for spec in specs: if spec.get("id", "*") != "*": raise DataCreationError("cannot specify an id") spec["id"] = self.id_gen.next() return DataDict.q_add(self, specs, callback, cargs)
import Cobalt.Util from Cobalt.Components.bgq_cn_boot_states import BootPending, BootInitiating, BootRebooting, BootComplete, BootFailed from Cobalt.Components.bgq_io_boot_states import IOBootPending, IOBootInitiating, IOBootComplete, IOBootFailed from Cobalt.Data import IncrID from Cobalt.Util import get_config_option #FIXME: also make this handle cleanup import Cobalt.Logging _logger = logging.getLogger() Cobalt.Util.init_cobalt_config() _boot_id_gen = IncrID() class BootContext(object): '''Context for ongonig boot. This should include any resources specific to that individual boot. A pointer to one of these objects is passed to the boot state for processing. ''' def __init__(self, block, job_id, user, block_lock, subblock_parent=None, timeout=None): self.block = block self.block_id = self.block.name self.job_id = job_id self.user = user self.block_lock = block_lock if subblock_parent == None: self.subblock_parent = self.block.name
def __init__(self): self.id_gen = IncrID()
def test_next (self, max=100): generator = IncrID() for count in itertools.count(1): assert generator.next() == count if count >= max: break
class BGSched (Component): implementation = "bgsched" name = "scheduler" logger = logging.getLogger("Cobalt.Components.scheduler") _configfields = ['utility_file'] _config = ConfigParser.ConfigParser() # print Cobalt.CONFIG_FILES _config.read(Cobalt.CONFIG_FILES) if not _config._sections.has_key('bgsched'): print '''"bgsched" section missing from cobalt config file''' sys.exit(1) config = _config._sections['bgsched'] mfields = [field for field in _configfields if not config.has_key(field)] if mfields: print "Missing option(s) in cobalt config file [bgsched] section: %s" % (" ".join(mfields)) sys.exit(1) if config.get("default_reservation_policy"): global DEFAULT_RESERVATION_POLICY DEFAULT_RESERVATION_POLICY = config.get("default_reservation_policy") def __init__(self, *args, **kwargs): Component.__init__(self, *args, **kwargs) self.reservations = ReservationDict() self.queues = QueueDict() self.jobs = JobDict() self.started_jobs = {} self.sync_state = Cobalt.Util.FailureMode("Foreign Data Sync") self.active = True self.get_current_time = time.time self.id_gen = IncrID() global bgsched_id_gen bgsched_id_gen = self.id_gen self.cycle_id_gen = IncrID() global bgsched_cycle_id_gen bgsched_cycle_id_gen = self.cycle_id_gen def __getstate__(self): state = {} state.update(Component.__getstate__(self)) state.update({ 'sched_version':1, 'reservations':self.reservations, 'active':self.active, 'next_res_id':self.id_gen.idnum+1, 'next_cycle_id':self.cycle_id_gen.idnum+1, 'msg_queue': dbwriter.msg_queue, 'overflow': dbwriter.overflow}) return state def __setstate__(self, state): Component.__setstate__(self, state) self.reservations = state['reservations'] if 'active' in state: self.active = state['active'] else: self.active = True self.id_gen = IncrID() self.id_gen.set(state['next_res_id']) global bgsched_id_gen bgsched_id_gen = self.id_gen self.cycle_id_gen = IncrID() self.cycle_id_gen.set(state['next_cycle_id']) global bgsched_cycle_id_gen bgsched_cycle_id_gen = self.cycle_id_gen self.queues = QueueDict() self.jobs = JobDict() self.started_jobs = {} self.sync_state = Cobalt.Util.FailureMode("Foreign Data Sync") self.get_current_time = time.time if state.has_key('msg_queue'): dbwriter.msg_queue = state['msg_queue'] if state.has_key('overflow') and (dbwriter.max_queued != None): dbwriter.overflow = state['overflow'] # order the jobs with biggest utility first def utilitycmp(self, job1, job2): return -cmp(job1.score, job2.score) def prioritycmp(self, job1, job2): """Compare 2 jobs first using queue priority and then first-in, first-out.""" val = cmp(self.queues[job1.queue].priority, self.queues[job2.queue].priority) if val == 0: return self.fifocmp(job1, job2) else: # we want the higher priority first return -val def fifocmp (self, job1, job2): """Compare 2 jobs for first-in, first-out.""" def fifo_value (job): if job.index is not None: return int(job.index) else: return job.jobid # Implement some simple variations on FIFO scheduling # within a particular queue, based on queue policy fifoval = cmp(fifo_value(job1), fifo_value(job2)) if(job1.queue == job2.queue): qpolicy = self.queues[job1.queue].policy sizeval = cmp(int(job1.nodes), int(job2.nodes)) wtimeval = cmp(int(job1.walltime), int(job2.walltime)) if(qpolicy == 'largest-first' and sizeval): return -sizeval elif(qpolicy == 'smallest-first' and sizeval): return sizeval elif(qpolicy == 'longest-first' and wtimeval): return -wtimeval elif(qpolicy == 'shortest-first' and wtimeval): return wtimeval else: return fifoval else: return fifoval return cmp(fifo_value(job1), fifo_value(job2)) def save_me(self): Component.save(self) save_me = automatic(save_me, float(get_bgsched_config('save_me_interval', 10))) #user_name in this context is the user setting/modifying the res. def add_reservations (self, specs, user_name): self.logger.info("%s adding reservation: %r" % (user_name, specs)) added_reservations = self.reservations.q_add(specs) for added_reservation in added_reservations: self.logger.info("Res %s/%s: %s adding reservation: %r" % (added_reservation.res_id, added_reservation.cycle_id, user_name, specs)) dbwriter.log_to_db(user_name, "creating", "reservation", added_reservation) return added_reservations add_reservations = exposed(query(add_reservations)) def del_reservations (self, specs, user_name): self.logger.info("%s releasing reservation: %r" % (user_name, specs)) del_reservations = self.reservations.q_del(specs) for del_reservation in del_reservations: self.logger.info("Res %s/%s/: %s releasing reservation: %r" % (del_reservation.res_id, del_reservation.cycle_id, user_name, specs)) #dbwriter.log_to_db(user_name, "ending", "reservation", del_reservation) return del_reservations del_reservations = exposed(query(del_reservations)) def get_reservations (self, specs): return self.reservations.q_get(specs) get_reservations = exposed(query(get_reservations)) def set_reservations(self, specs, updates, user_name): log_str = "%s modifying reservation: %r with updates %r" % (user_name, specs, updates) self.logger.info(log_str) #handle defers as a special case: have to log these, and not drop a mod record. def _set_reservations(res, newattr): res.update(newattr) updates['__cmd_user'] = user_name mod_reservations = self.reservations.q_get(specs, _set_reservations, updates) for mod_reservation in mod_reservations: self.logger.info("Res %s/%s: %s modifying reservation: %r" % (mod_reservation.res_id, mod_reservation.cycle_id, user_name, specs)) return mod_reservations set_reservations = exposed(query(set_reservations)) def release_reservations(self, specs, user_name): self.logger.info("%s requested release of reservation: %r" % (user_name, specs)) self.logger.info("%s releasing reservation: %r" % (user_name, specs)) rel_res = self.get_reservations(specs) for res in rel_res: dbwriter.log_to_db(user_name, "released", "reservation", res) del_reservations = self.reservations.q_del(specs) for del_reservation in del_reservations: self.logger.info("Res %s/%s/: %s releasing reservation: %r" % (del_reservation.res_id, del_reservation.cycle_id, user_name, specs)) return del_reservations release_reservations = exposed(query(release_reservations)) def check_reservations(self): ret = "" reservations = self.reservations.values() for i in range(len(reservations)): for j in range(i+1, len(reservations)): # if at least one reservation is cyclic, we want *that* reservation to be the one getting its overlaps method # called if reservations[i].cycle is not None: res1 = reservations[i] res2 = reservations[j] else: res1 = reservations[j] res2 = reservations[i] # we subtract a little bit because the overlaps method isn't really meant to do this # it will report warnings when one reservation starts at the same time another ends if res1.overlaps(res2.start, res2.duration - 0.00001): # now we need to check for overlap in space results = ComponentProxy("system").get_partitions( [ {'name': p, 'children': '*', 'parents': '*'} for p in res2.partitions.split(":") ] ) for p in res1.partitions.split(":"): for r in results: if p==r['name'] or p in r['children'] or p in r['parents']: ret += "Warning: reservation '%s' overlaps reservation '%s'\n" % (res1.name, res2.name) return ret check_reservations = exposed(check_reservations) def sync_data(self): started = self.get_current_time() for item in [self.jobs, self.queues]: try: item.Sync() except (ComponentLookupError, xmlrpclib.Fault): # the ForeignDataDicts already include FailureMode stuff pass # print "took %f seconds for sync_data" % (time.time() - started, ) #sync_data = automatic(sync_data) def _run_reservation_jobs (self, reservations_cache): # handle each reservation separately, as they shouldn't be competing for resources for cur_res in reservations_cache.itervalues(): #print "trying to run res jobs in", cur_res.name, self.started_jobs queue = cur_res.queue if not (self.queues.has_key(queue) and self.queues[queue].state == 'running'): continue temp_jobs = self.jobs.q_get([{'is_runnable':True, 'queue':queue}]) active_jobs = [] for j in temp_jobs: if not self.started_jobs.has_key(j.jobid) and cur_res.job_within_reservation(j): active_jobs.append(j) if not active_jobs: continue active_jobs.sort(self.utilitycmp) job_location_args = [] for job in active_jobs: job_location_args.append( { 'jobid': str(job.jobid), 'nodes': job.nodes, 'queue': job.queue, 'required': cur_res.partitions.split(":"), 'utility_score': job.score, 'walltime': job.walltime, 'attrs': job.attrs, 'user': job.user, } ) # there's no backfilling in reservations try: best_partition_dict = ComponentProxy("system").find_job_location(job_location_args, []) except: self.logger.error("failed to connect to system component") best_partition_dict = {} for jobid in best_partition_dict: job = self.jobs[int(jobid)] self._start_job(job, best_partition_dict[jobid], {str(job.jobid):cur_res.res_id}) def _start_job(self, job, partition_list, resid=None): """Get the queue manager to start a job.""" cqm = ComponentProxy("queue-manager") try: self.logger.info("trying to start job %d on partition %r" % (job.jobid, partition_list)) cqm.run_jobs([{'tag':"job", 'jobid':job.jobid}], partition_list, None, resid) except ComponentLookupError: self.logger.error("failed to connect to queue manager") return self.started_jobs[job.jobid] = self.get_current_time() def schedule_jobs (self): '''look at the queued jobs, and decide which ones to start''' started_scheduling = self.get_current_time() if not self.active: return self.sync_data() # if we're missing information, don't bother trying to schedule jobs if not (self.queues.__oserror__.status and self.jobs.__oserror__.status): self.sync_state.Fail() return self.sync_state.Pass() self.component_lock_acquire() try: # cleanup any reservations which have expired for res in self.reservations.values(): if res.is_over(): self.logger.info("reservation %s has ended; removing" % (res.name)) self.logger.info("Res %s/%s: Ending reservation: %r" % (res.res_id, res.cycle_id, res.name)) #dbwriter.log_to_db(None, "ending", "reservation", # res) del_reservations = self.reservations.q_del([ {'name': res.name}]) # FIXME: this isn't a deepcopy. it copies references to each reservation in the reservations dict. is that really # sufficient? --brt reservations_cache = self.reservations.copy() except: # just to make sure we don't keep the lock forever self.logger.error("error in schedule_jobs", exc_info=True) self.component_lock_release() # clean up the started_jobs cached data # TODO: Make this tunable. now = self.get_current_time() for job_name in self.started_jobs.keys(): if (now - self.started_jobs[job_name]) > 60: del self.started_jobs[job_name] active_queues = [] spruce_queues = [] res_queues = set() for item in reservations_cache.q_get([{'queue':'*'}]): if self.queues.has_key(item.queue): if self.queues[item.queue].state == 'running': res_queues.add(item.queue) for queue in self.queues.itervalues(): if queue.name not in res_queues and queue.state == 'running': if queue.policy == "high_prio": spruce_queues.append(queue) else: active_queues.append(queue) # handle the reservation jobs that might be ready to go self._run_reservation_jobs(reservations_cache) # figure out stuff about queue equivalence classes res_info = {} for cur_res in reservations_cache.values(): res_info[cur_res.name] = cur_res.partitions try: equiv = ComponentProxy("system").find_queue_equivalence_classes( res_info, [q.name for q in active_queues + spruce_queues]) except: self.logger.error("failed to connect to system component") return for eq_class in equiv: # recall that is_runnable is True for certain types of holds temp_jobs = self.jobs.q_get([{'is_runnable':True, 'queue':queue.name} for queue in active_queues \ if queue.name in eq_class['queues']]) active_jobs = [] for j in temp_jobs: if not self.started_jobs.has_key(j.jobid): active_jobs.append(j) temp_jobs = self.jobs.q_get([{'is_runnable':True, 'queue':queue.name} for queue in spruce_queues \ if queue.name in eq_class['queues']]) spruce_jobs = [] for j in temp_jobs: if not self.started_jobs.has_key(j.jobid): spruce_jobs.append(j) # if there are any pending jobs in high_prio queues, those are the only ones that can start if spruce_jobs: active_jobs = spruce_jobs # get the cutoff time for backfilling # # BRT: should we use 'has_resources' or 'is_active'? has_resources returns to false once the resource epilogue # scripts have finished running while is_active only returns to false once the job (not just the running task) has # completely terminated. the difference is likely to be slight unless the job epilogue scripts are heavy weight. temp_jobs = [job for job in self.jobs.q_get([{'has_resources':True}]) if job.queue in eq_class['queues']] end_times = [] for job in temp_jobs: # take the max so that jobs which have gone overtime and are being killed # continue to cast a small backfilling shadow (we need this for the case # that the final job in a drained partition runs overtime -- which otherwise # allows things to be backfilled into the drained partition) ##*AdjEst* if running_job_walltime_prediction: runtime_estimate = float(job.walltime_p) else: runtime_estimate = float(job.walltime) end_time = max(float(job.starttime) + 60 * runtime_estimate, now + 5*60) end_times.append([job.location, end_time]) for res_name in eq_class['reservations']: cur_res = reservations_cache[res_name] if not cur_res.cycle: end_time = float(cur_res.start) + float(cur_res.duration) else: done_after = float(cur_res.duration) - ((now - float(cur_res.start)) % float(cur_res.cycle)) if done_after < 0: done_after += cur_res.cycle end_time = now + done_after if cur_res.is_active(): for part_name in cur_res.partitions.split(":"): end_times.append([[part_name], end_time]) if not active_jobs: continue active_jobs.sort(self.utilitycmp) # now smoosh lots of data together to be passed to the allocator in the system component job_location_args = [] for job in active_jobs: forbidden_locations = set() for res_name in eq_class['reservations']: cur_res = reservations_cache[res_name] if cur_res.overlaps(self.get_current_time(), 60 * float(job.walltime) + SLOP_TIME): forbidden_locations.update(cur_res.partitions.split(":")) job_location_args.append( { 'jobid': str(job.jobid), 'nodes': job.nodes, 'queue': job.queue, 'forbidden': list(forbidden_locations), 'utility_score': job.score, 'walltime': job.walltime, 'walltime_p': job.walltime_p, #*AdjEst* 'attrs': job.attrs, 'user': job.user, } ) try: best_partition_dict = ComponentProxy("system").find_job_location(job_location_args, end_times) except: self.logger.error("failed to connect to system component", exc_info=True) best_partition_dict = {} for jobid in best_partition_dict: job = self.jobs[int(jobid)] self._start_job(job, best_partition_dict[jobid]) # print "took %f seconds for scheduling loop" % (time.time() - started_scheduling, ) schedule_jobs = locking(automatic(schedule_jobs, float(get_bgsched_config('schedule_jobs_interval', 10)))) def get_resid(self, queue_name): return None get_resid = exposed(get_resid) def enable(self, user_name): """Enable scheduling""" self.logger.info("%s enabling scheduling", user_name) self.active = True enable = exposed(enable) def disable(self, user_name): """Disable scheduling""" self.logger.info("%s disabling scheduling", user_name) self.active = False disable = exposed(disable) def set_res_id(self, id_num): """Set the reservation id number.""" self.id_gen.set(id_num) logger.info("Reset res_id generator to %s." % id_num) set_res_id = exposed(set_res_id) def set_cycle_id(self, id_num): """Set the cycle id number.""" self.cycle_id_gen.set(id_num) logger.info("Reset cycle_id generator to %s." % id_num) set_cycle_id = exposed(set_cycle_id) def force_res_id(self, id_num): """Override the id-generator and change the resid to id_num""" self.id_gen.idnum = id_num - 1 logger.warning("Forced res_id generator to %s." % id_num) force_res_id = exposed(force_res_id) def force_cycle_id(self, id_num): """Override the id-generator and change the cycleid to id_num""" self.cycle_id_gen.idnum = id_num - 1 logger.warning("Forced cycle_id generator to %s." % id_num) force_cycle_id = exposed(force_cycle_id) def get_next_res_id(self): """Get what the next resid number would be""" return self.id_gen.idnum + 1 get_next_res_id = exposed(get_next_res_id) def get_next_cycle_id(self): """get what the next cycleid number would be""" return self.cycle_id_gen.idnum + 1 get_next_cycle_id = exposed(get_next_cycle_id) def __flush_msg_queue(self): """Send queued messages to the database-writer component""" dbwriter.flush_queue() __flush_msg_queue = automatic(__flush_msg_queue, float(get_bgsched_config('db_flush_interval', 10)))
def __init__(self): DataDict.__init__(self) self.id_gen = IncrID()
def test_next(self, max=100): generator = IncrID() for count in itertools.count(1): assert generator.next() == count if count >= max: break
class BGSched(Component): implementation = "bgsched" name = "scheduler" logger = logging.getLogger("Cobalt.Components.scheduler") _configfields = ['utility_file'] _config = ConfigParser.ConfigParser() print Cobalt.CONFIG_FILES _config.read(Cobalt.CONFIG_FILES) if not _config._sections.has_key('bgsched'): print '''"bgsched" section missing from cobalt config file''' sys.exit(1) config = _config._sections['bgsched'] mfields = [field for field in _configfields if not config.has_key(field)] if mfields: print "Missing option(s) in cobalt config file [bgsched] section: %s" % ( " ".join(mfields)) sys.exit(1) if config.get("default_reservation_policy"): global DEFAULT_RESERVATION_POLICY DEFAULT_RESERVATION_POLICY = config.get("default_reservation_policy") def __init__(self, *args, **kwargs): Component.__init__(self, *args, **kwargs) self.COMP_QUEUE_MANAGER = "queue-manager" self.COMP_SYSTEM = "system" self.reservations = ReservationDict() self.queues = QueueDict(self.COMP_QUEUE_MANAGER) self.jobs = JobDict(self.COMP_QUEUE_MANAGER) self.started_jobs = {} self.sync_state = Cobalt.Util.FailureMode("Foreign Data Sync") self.active = True self.get_current_time = time.time self.id_gen = IncrID() global bgsched_id_gen bgsched_id_gen = self.id_gen self.cycle_id_gen = IncrID() global bgsched_cycle_id_gen bgsched_cycle_id_gen = self.cycle_id_gen def __getstate__(self): return { 'reservations': self.reservations, 'version': 1, 'active': self.active, 'next_res_id': self.id_gen.idnum + 1, 'next_cycle_id': self.cycle_id_gen.idnum + 1, 'msg_queue': dbwriter.msg_queue, 'overflow': dbwriter.overflow } def __setstate__(self, state): self.reservations = state['reservations'] if 'active' in state: self.active = state['active'] else: self.active = True self.id_gen = IncrID() self.id_gen.set(state['next_res_id']) global bgsched_id_gen bgsched_id_gen = self.id_gen self.cycle_id_gen = IncrID() self.cycle_id_gen.set(state['next_cycle_id']) global bgsched_cycle_id_gen bgsched_cycle_id_gen = self.cycle_id_gen self.queues = QueueDict(self.COMP_QUEUE_MANAGER) self.jobs = JobDict(self.COMP_QUEUE_MANAGER) self.started_jobs = {} self.sync_state = Cobalt.Util.FailureMode("Foreign Data Sync") self.get_current_time = time.time self.lock = threading.Lock() self.statistics = Statistics() if state.has_key('msg_queue'): dbwriter.msg_queue = state['msg_queue'] if state.has_key('overflow') and (dbwriter.max_queued != None): dbwriter.overflow = state['overflow'] # order the jobs with biggest utility first def utilitycmp(self, job1, job2): return -cmp(job1.score, job2.score) def prioritycmp(self, job1, job2): """Compare 2 jobs first using queue priority and then first-in, first-out.""" val = cmp(self.queues[job1.queue].priority, self.queues[job2.queue].priority) if val == 0: return self.fifocmp(job1, job2) else: # we want the higher priority first return -val def fifocmp(self, job1, job2): """Compare 2 jobs for first-in, first-out.""" def fifo_value(job): if job.index is not None: return int(job.index) else: return job.jobid # Implement some simple variations on FIFO scheduling # within a particular queue, based on queue policy fifoval = cmp(fifo_value(job1), fifo_value(job2)) if (job1.queue == job2.queue): qpolicy = self.queues[job1.queue].policy sizeval = cmp(int(job1.nodes), int(job2.nodes)) wtimeval = cmp(int(job1.walltime), int(job2.walltime)) if (qpolicy == 'largest-first' and sizeval): return -sizeval elif (qpolicy == 'smallest-first' and sizeval): return sizeval elif (qpolicy == 'longest-first' and wtimeval): return -wtimeval elif (qpolicy == 'shortest-first' and wtimeval): return wtimeval else: return fifoval else: return fifoval return cmp(fifo_value(job1), fifo_value(job2)) def save_me(self): Component.save(self) save_me = automatic(save_me) #user_name in this context is the user setting/modifying the res. def add_reservations(self, specs, user_name): self.logger.info("%s adding reservation: %r" % (user_name, specs)) added_reservations = self.reservations.q_add(specs) for added_reservation in added_reservations: self.logger.info("Res %s/%s: %s adding reservation: %r" % (added_reservation.res_id, added_reservation.cycle_id, user_name, specs)) dbwriter.log_to_db(user_name, "creating", "reservation", added_reservation) return added_reservations add_reservations = exposed(query(add_reservations)) def del_reservations(self, specs, user_name): self.logger.info("%s releasing reservation: %r" % (user_name, specs)) del_reservations = self.reservations.q_del(specs) for del_reservation in del_reservations: self.logger.info("Res %s/%s/: %s releasing reservation: %r" % (del_reservation.res_id, del_reservation.cycle_id, user_name, specs)) #dbwriter.log_to_db(user_name, "ending", "reservation", del_reservation) return del_reservations del_reservations = exposed(query(del_reservations)) def get_reservations(self, specs): return self.reservations.q_get(specs) get_reservations = exposed(query(get_reservations)) def set_reservations(self, specs, updates, user_name): log_str = "%s modifying reservation: %r with updates %r" % ( user_name, specs, updates) self.logger.info(log_str) #handle defers as a special case: have to log these, and not drop a mod record. def _set_reservations(res, newattr): res.update(newattr) updates['__cmd_user'] = user_name mod_reservations = self.reservations.q_get(specs, _set_reservations, updates) for mod_reservation in mod_reservations: self.logger.info("Res %s/%s: %s modifying reservation: %r" % (mod_reservation.res_id, mod_reservation.cycle_id, user_name, specs)) return mod_reservations set_reservations = exposed(query(set_reservations)) def release_reservations(self, specs, user_name): self.logger.info("%s requested release of reservation: %r" % (user_name, specs)) self.logger.info("%s releasing reservation: %r" % (user_name, specs)) rel_res = self.get_reservations(specs) for res in rel_res: dbwriter.log_to_db(user_name, "released", "reservation", res) del_reservations = self.reservations.q_del(specs) for del_reservation in del_reservations: self.logger.info("Res %s/%s/: %s releasing reservation: %r" % (del_reservation.res_id, del_reservation.cycle_id, user_name, specs)) return del_reservations release_reservations = exposed(query(release_reservations)) def check_reservations(self): ret = "" reservations = self.reservations.values() for i in range(len(reservations)): for j in range(i + 1, len(reservations)): # if at least one reservation is cyclic, we want *that* reservation to be the one getting its overlaps method # called if reservations[i].cycle is not None: res1 = reservations[i] res2 = reservations[j] else: res1 = reservations[j] res2 = reservations[i] # we subtract a little bit because the overlaps method isn't really meant to do this # it will report warnings when one reservation starts at the same time another ends if res1.overlaps(res2.start, res2.duration - 0.00001): # now we need to check for overlap in space results = ComponentProxy(self.COMP_SYSTEM).get_partitions( [{ 'name': p, 'children': '*', 'parents': '*' } for p in res2.partitions.split(":")]) for p in res1.partitions.split(":"): for r in results: if p == r['name'] or p in r['children'] or p in r[ 'parents']: ret += "Warning: reservation '%s' overlaps reservation '%s'\n" % ( res1.name, res2.name) return ret check_reservations = exposed(check_reservations) def sync_data(self): started = self.get_current_time() for item in [self.jobs, self.queues]: try: item.Sync() except (ComponentLookupError, xmlrpclib.Fault): # the ForeignDataDicts already include FailureMode stuff pass # print "took %f seconds for sync_data" % (time.time() - started, ) #sync_data = automatic(sync_data) def _run_reservation_jobs(self, reservations_cache): # handle each reservation separately, as they shouldn't be competing for resources for cur_res in reservations_cache.itervalues(): #print "trying to run res jobs in", cur_res.name, self.started_jobs queue = cur_res.queue if not (self.queues.has_key(queue) and self.queues[queue].state == 'running'): continue temp_jobs = self.jobs.q_get([{ 'is_runnable': True, 'queue': queue }]) active_jobs = [] for j in temp_jobs: if not self.started_jobs.has_key( j.jobid) and cur_res.job_within_reservation(j): active_jobs.append(j) if not active_jobs: continue active_jobs.sort(self.utilitycmp) job_location_args = [] for job in active_jobs: job_location_args.append({ 'jobid': str(job.jobid), 'nodes': job.nodes, 'queue': job.queue, 'required': cur_res.partitions.split(":"), 'utility_score': job.score, 'walltime': job.walltime, 'attrs': job.attrs, 'user': job.user, }) # there's no backfilling in reservations try: best_partition_dict = ComponentProxy( self.COMP_SYSTEM).find_job_location(job_location_args, []) except: self.logger.error("failed to connect to system component") best_partition_dict = {} for jobid in best_partition_dict: job = self.jobs[int(jobid)] self._start_job(job, best_partition_dict[jobid], {str(job.jobid): cur_res.res_id}) def _start_job(self, job, partition_list, resid=None): """Get the queue manager to start a job.""" cqm = ComponentProxy(self.COMP_QUEUE_MANAGER) try: self.logger.info("trying to start job %d on partition %r" % (job.jobid, partition_list)) cqm.run_jobs([{ 'tag': "job", 'jobid': job.jobid }], partition_list, None, resid) except ComponentLookupError: self.logger.error("failed to connect to queue manager") return self.started_jobs[job.jobid] = self.get_current_time() def schedule_jobs(self): '''look at the queued jobs, and decide which ones to start''' started_scheduling = self.get_current_time() if not self.active: return self.sync_data() # if we're missing information, don't bother trying to schedule jobs if not (self.queues.__oserror__.status and self.jobs.__oserror__.status): self.sync_state.Fail() return self.sync_state.Pass() self.lock.acquire() try: # cleanup any reservations which have expired for res in self.reservations.values(): if res.is_over(): self.logger.info("reservation %s has ended; removing" % (res.name)) self.logger.info("Res %s/%s: Ending reservation: %r" % (res.res_id, res.cycle_id, res.name)) #dbwriter.log_to_db(None, "ending", "reservation", # res) del_reservations = self.reservations.q_del([{ 'name': res.name }]) reservations_cache = self.reservations.copy() except: # just to make sure we don't keep the lock forever self.logger.error("error in schedule_jobs", exc_info=True) self.lock.release() # clean up the started_jobs cached data # TODO: Make this tunable. now = self.get_current_time() for job_name in self.started_jobs.keys(): if (now - self.started_jobs[job_name]) > 60: del self.started_jobs[job_name] active_queues = [] spruce_queues = [] res_queues = set() for item in reservations_cache.q_get([{'queue': '*'}]): if self.queues.has_key(item.queue): if self.queues[item.queue].state == 'running': res_queues.add(item.queue) for queue in self.queues.itervalues(): if queue.name not in res_queues and queue.state == 'running': if queue.policy == "high_prio": spruce_queues.append(queue) else: active_queues.append(queue) # handle the reservation jobs that might be ready to go self._run_reservation_jobs(reservations_cache) # figure out stuff about queue equivalence classes if __running_mode__ == "simulation": equiv = [{'reservations': [], 'queues': ['default']}] else: res_info = {} for cur_res in reservations_cache.values(): res_info[cur_res.name] = cur_res.partitions try: equiv = ComponentProxy( self.COMP_SYSTEM).find_queue_equivalence_classes( res_info, [q.name for q in active_queues + spruce_queues]) except: self.logger.error("failed to connect to system component") return for eq_class in equiv: # recall that is_runnable is True for certain types of holds temp_jobs = self.jobs.q_get([{'is_runnable':True, 'queue':queue.name} for queue in active_queues \ if queue.name in eq_class['queues']]) active_jobs = [] for j in temp_jobs: if not self.started_jobs.has_key(j.jobid): active_jobs.append(j) temp_jobs = self.jobs.q_get([{'is_runnable':True, 'queue':queue.name} for queue in spruce_queues \ if queue.name in eq_class['queues']]) spruce_jobs = [] for j in temp_jobs: if not self.started_jobs.has_key(j.jobid): spruce_jobs.append(j) # if there are any pending jobs in high_prio queues, those are the only ones that can start if spruce_jobs: active_jobs = spruce_jobs # get the cutoff time for backfilling # # BRT: should we use 'has_resources' or 'is_active'? has_resources returns to false once the resource epilogue # scripts have finished running while is_active only returns to false once the job (not just the running task) has # completely terminated. the difference is likely to be slight unless the job epilogue scripts are heavy weight. temp_jobs = [ job for job in self.jobs.q_get([{ 'has_resources': True }]) if job.queue in eq_class['queues'] ] end_times = [] for job in temp_jobs: # take the max so that jobs which have gone overtime and are being killed # continue to cast a small backfilling shadow (we need this for the case # that the final job in a drained partition runs overtime -- which otherwise # allows things to be backfilled into the drained partition) ##*AdjEst* if running_job_walltime_prediction: runtime_estimate = float(job.walltime_p) else: runtime_estimate = float(job.walltime) end_time = max( float(job.starttime) + 60 * runtime_estimate, now + 5 * 60) end_times.append([job.location, end_time]) for res_name in eq_class['reservations']: cur_res = reservations_cache[res_name] if not cur_res.cycle: end_time = float(cur_res.start) + float(cur_res.duration) else: done_after = float(cur_res.duration) - ( (now - float(cur_res.start)) % float(cur_res.cycle)) if done_after < 0: done_after += cur_res.cycle end_time = now + done_after if cur_res.is_active(): for part_name in cur_res.partitions.split(":"): end_times.append([[part_name], end_time]) if not active_jobs: continue active_jobs.sort(self.utilitycmp) # now smoosh lots of data together to be passed to the allocator in the system component job_location_args = [] for job in active_jobs: forbidden_locations = set() for res_name in eq_class['reservations']: cur_res = reservations_cache[res_name] if cur_res.overlaps(self.get_current_time(), 60 * float(job.walltime) + SLOP_TIME): forbidden_locations.update( cur_res.partitions.split(":")) job_location_args.append({ 'jobid': str(job.jobid), 'nodes': job.nodes, 'queue': job.queue, 'forbidden': list(forbidden_locations), 'utility_score': job.score, 'walltime': job.walltime, 'walltime_p': job.walltime_p, #*AdjEst* 'attrs': job.attrs, 'user': job.user, }) try: best_partition_dict = ComponentProxy( self.COMP_SYSTEM).find_job_location( job_location_args, end_times) except: self.logger.error("failed to connect to system component", exc_info=True) best_partition_dict = {} for jobid in best_partition_dict: job = self.jobs[int(jobid)] self._start_job(job, best_partition_dict[jobid]) # print "took %f seconds for scheduling loop" % (time.time() - started_scheduling, ) schedule_jobs = locking(automatic(schedule_jobs)) def get_resid(self, queue_name): return None get_resid = exposed(get_resid) def enable(self, user_name): """Enable scheduling""" self.logger.info("%s enabling scheduling", user_name) self.active = True enable = exposed(enable) def disable(self, user_name): """Disable scheduling""" self.logger.info("%s disabling scheduling", user_name) self.active = False disable = exposed(disable) def set_res_id(self, id_num): """Set the reservation id number.""" self.id_gen.set(id_num) logger.info("Reset res_id generator to %s." % id_num) set_res_id = exposed(set_res_id) def set_cycle_id(self, id_num): """Set the cycle id number.""" self.cycle_id_gen.set(id_num) logger.info("Reset cycle_id generator to %s." % id_num) set_cycle_id = exposed(set_cycle_id) def force_res_id(self, id_num): """Override the id-generator and change the resid to id_num""" self.id_gen.idnum = id_num - 1 logger.warning("Forced res_id generator to %s." % id_num) force_res_id = exposed(force_res_id) def force_cycle_id(self, id_num): """Override the id-generator and change the cycleid to id_num""" self.cycle_id_gen.idnum = id_num - 1 logger.warning("Forced cycle_id generator to %s." % id_num) force_cycle_id = exposed(force_cycle_id) def get_next_res_id(self): """Get what the next resid number would be""" return self.id_gen.idnum + 1 get_next_res_id = exposed(get_next_res_id) def get_next_cycle_id(self): """get what the next cycleid number would be""" return self.cycle_id_gen.idnum + 1 get_next_cycle_id = exposed(get_next_cycle_id) def __flush_msg_queue(self): """Send queued messages to the database-writer component""" dbwriter.flush_queue() __flush_msg_queue = automatic( __flush_msg_queue, float(get_bgsched_config('db_flush_interval', 10)))