Exemple #1
0
    def __setstate__(self, state):
        self.reservations = state['reservations']
        if 'active' in state:
            self.active = state['active']
        else:
            self.active = True

        self.id_gen = IncrID()
        self.id_gen.set(state['next_res_id'])
        global bgsched_id_gen
        bgsched_id_gen = self.id_gen

        self.cycle_id_gen = IncrID()
        self.cycle_id_gen.set(state['next_cycle_id'])
        global bgsched_cycle_id_gen
        bgsched_cycle_id_gen = self.cycle_id_gen

        self.queues = QueueDict(self.COMP_QUEUE_MANAGER)
        self.jobs = JobDict(self.COMP_QUEUE_MANAGER)
        self.started_jobs = {}
        self.sync_state = Cobalt.Util.FailureMode("Foreign Data Sync")

        self.get_current_time = time.time
        self.lock = threading.Lock()
        self.statistics = Statistics()

        if state.has_key('msg_queue'):
            dbwriter.msg_queue = state['msg_queue']
        if state.has_key('overflow') and (dbwriter.max_queued != None):
            dbwriter.overflow = state['overflow']
Exemple #2
0
    def __setstate__(self, state):
        Component.__setstate__(self, state)

        self.reservations = state['reservations']
        if 'active' in state:
            self.active = state['active']
        else:
            self.active = True
        
        self.id_gen = IncrID()
        self.id_gen.set(state['next_res_id'])
        global bgsched_id_gen
        bgsched_id_gen = self.id_gen
        
        self.cycle_id_gen = IncrID()
        self.cycle_id_gen.set(state['next_cycle_id'])
        global bgsched_cycle_id_gen
        bgsched_cycle_id_gen = self.cycle_id_gen

        self.queues = QueueDict()
        self.jobs = JobDict()
        self.started_jobs = {}
        self.sync_state = Cobalt.Util.FailureMode("Foreign Data Sync")
        
        self.get_current_time = time.time

        if state.has_key('msg_queue'):
            dbwriter.msg_queue = state['msg_queue']
        if state.has_key('overflow') and (dbwriter.max_queued != None):
            dbwriter.overflow = state['overflow']
Exemple #3
0
 def __init__(self, *args, **kwargs):
     """Initialize a new BaseForker.
     
     All arguments are passed to the component constructor.
     """
     Component.__init__(self, *args, **kwargs)
     self.children = {}
     self.id_gen = IncrID()
Exemple #4
0
 def __init__(self, *args, **kwargs):
     Component.__init__(self, *args, **kwargs)
     self.reservations = ReservationDict()
     self.queues = QueueDict()
     self.jobs = JobDict()
     self.started_jobs = {}
     self.sync_state = Cobalt.Util.FailureMode("Foreign Data Sync")
     self.active = True
 
     self.get_current_time = time.time
     self.id_gen = IncrID()
     global bgsched_id_gen
     bgsched_id_gen = self.id_gen
     
     self.cycle_id_gen = IncrID()
     global bgsched_cycle_id_gen
     bgsched_cycle_id_gen = self.cycle_id_gen
Exemple #5
0
    def __init__(self, *args, **kwargs):
        Component.__init__(self, *args, **kwargs)
        self.reservations = ReservationDict()
        self.queues = QueueDict()
        self.jobs = JobDict()
        self.started_jobs = {}
        self.sync_state = Cobalt.Util.FailureMode("Foreign Data Sync")
        self.active = True

        self.get_current_time = time.time
        self.id_gen = IncrID()
        global bgsched_id_gen
        bgsched_id_gen = self.id_gen

        self.cycle_id_gen = IncrID()
        global bgsched_cycle_id_gen
        bgsched_cycle_id_gen = self.cycle_id_gen
Exemple #6
0
class ProcessGroupDict(DataDict):
    item_cls = ProcessGroup
    key = "id"
    
    def __init__(self):
        self.id_gen = IncrID()
 
    def q_add (self, specs, callback=None, cargs={}):
        for spec in specs:
            if spec.get("id", "*") != "*":
                raise DataCreationError("cannot specify an id")
            spec['id'] = self.id_gen.next()
        return DataDict.q_add(self, specs)
Exemple #7
0
class JobDict (DataDict):
    
    item_cls = Job
    key = "id"

    def __init__(self):
        self.id_gen = IncrID()
        
    def q_add (self, specs, callback=None, cargs={}):
        for spec in specs:
            if "id" not in spec or spec['id'] == "*":
                spec['id'] = self.id_gen.next()
        return DataDict.q_add(self, specs)
Exemple #8
0
class ProcessGroupDict(DataDict):
    item_cls = ProcessGroup
    key = "id"

    def __init__(self):
        self.id_gen = IncrID()

    def q_add(self, specs, callback=None, cargs={}):
        for spec in specs:
            if spec.get("id", "*") != "*":
                raise DataCreationError("cannot specify an id")
            spec['id'] = self.id_gen.next()
        return DataDict.q_add(self, specs)
Exemple #9
0
class ProcessGroupDict(DataDict):
    """A container for holding process groups, keyed by id"""

    item_cls = ProcessGroup
    key = "id"
    
    def __init__(self):
        DataDict.__init__(self)
        self.id_gen = IncrID()

    def q_add(self, specs, callback=None, cargs={}):
        """Add a process group to the container"""
        for spec in specs:
            if spec.get("id", "*") != "*":
                raise DataCreationError("cannot specify an id")
            spec["id"] = self.id_gen.next()
        return DataDict.q_add(self, specs, callback, cargs)
import Cobalt.Util

from Cobalt.Components.bgq_cn_boot_states import BootPending, BootInitiating, BootRebooting, BootComplete, BootFailed
from Cobalt.Components.bgq_io_boot_states import IOBootPending, IOBootInitiating, IOBootComplete, IOBootFailed
from Cobalt.Data import IncrID
from Cobalt.Util import get_config_option


#FIXME: also make this handle cleanup

import Cobalt.Logging
_logger = logging.getLogger()

Cobalt.Util.init_cobalt_config()

_boot_id_gen = IncrID()

class BootContext(object):

    '''Context for ongonig boot.  This should include any resources specific to that individual boot.
    A pointer to one of these objects is passed to the boot state for processing.

    '''
    def __init__(self, block, job_id, user, block_lock, subblock_parent=None, timeout=None):
        self.block = block
        self.block_id = self.block.name
        self.job_id = job_id
        self.user = user
        self.block_lock = block_lock
        if subblock_parent == None:
            self.subblock_parent = self.block.name
Exemple #11
0
 def __init__(self):
     self.id_gen = IncrID()
Exemple #12
0
 def test_next (self, max=100):
     generator = IncrID()
     for count in itertools.count(1):
         assert generator.next() == count
         if count >= max:
             break
Exemple #13
0
class BGSched (Component):
    
    implementation = "bgsched"
    name = "scheduler"
    logger = logging.getLogger("Cobalt.Components.scheduler")


    _configfields = ['utility_file']
    _config = ConfigParser.ConfigParser()
    # print Cobalt.CONFIG_FILES
    _config.read(Cobalt.CONFIG_FILES)
    if not _config._sections.has_key('bgsched'):
        print '''"bgsched" section missing from cobalt config file'''
        sys.exit(1)
    config = _config._sections['bgsched']
    mfields = [field for field in _configfields if not config.has_key(field)]
    if mfields:
        print "Missing option(s) in cobalt config file [bgsched] section: %s" % (" ".join(mfields))
        sys.exit(1)
    if config.get("default_reservation_policy"):
        global DEFAULT_RESERVATION_POLICY
        DEFAULT_RESERVATION_POLICY = config.get("default_reservation_policy")
    
    def __init__(self, *args, **kwargs):
        Component.__init__(self, *args, **kwargs)
        self.reservations = ReservationDict()
        self.queues = QueueDict()
        self.jobs = JobDict()
        self.started_jobs = {}
        self.sync_state = Cobalt.Util.FailureMode("Foreign Data Sync")
        self.active = True
    
        self.get_current_time = time.time
        self.id_gen = IncrID()
        global bgsched_id_gen
        bgsched_id_gen = self.id_gen
        
        self.cycle_id_gen = IncrID()
        global bgsched_cycle_id_gen
        bgsched_cycle_id_gen = self.cycle_id_gen

    def __getstate__(self):
        state = {}
        state.update(Component.__getstate__(self))
        state.update({
                'sched_version':1,
                'reservations':self.reservations,
                'active':self.active,
                'next_res_id':self.id_gen.idnum+1, 
                'next_cycle_id':self.cycle_id_gen.idnum+1, 
                'msg_queue': dbwriter.msg_queue, 
                'overflow': dbwriter.overflow})
        return state
    
    def __setstate__(self, state):
        Component.__setstate__(self, state)

        self.reservations = state['reservations']
        if 'active' in state:
            self.active = state['active']
        else:
            self.active = True
        
        self.id_gen = IncrID()
        self.id_gen.set(state['next_res_id'])
        global bgsched_id_gen
        bgsched_id_gen = self.id_gen
        
        self.cycle_id_gen = IncrID()
        self.cycle_id_gen.set(state['next_cycle_id'])
        global bgsched_cycle_id_gen
        bgsched_cycle_id_gen = self.cycle_id_gen

        self.queues = QueueDict()
        self.jobs = JobDict()
        self.started_jobs = {}
        self.sync_state = Cobalt.Util.FailureMode("Foreign Data Sync")
        
        self.get_current_time = time.time

        if state.has_key('msg_queue'):
            dbwriter.msg_queue = state['msg_queue']
        if state.has_key('overflow') and (dbwriter.max_queued != None):
            dbwriter.overflow = state['overflow']

    # order the jobs with biggest utility first
    def utilitycmp(self, job1, job2):
        return -cmp(job1.score, job2.score)
    
    def prioritycmp(self, job1, job2):
        """Compare 2 jobs first using queue priority and then first-in, first-out."""
        
        val = cmp(self.queues[job1.queue].priority, self.queues[job2.queue].priority)
        if val == 0:
            return self.fifocmp(job1, job2)
        else:
            # we want the higher priority first
            return -val
        
    def fifocmp (self, job1, job2):
        """Compare 2 jobs for first-in, first-out."""
        
        def fifo_value (job):
            if job.index is not None:
                return int(job.index)
            else:
                return job.jobid
            
        # Implement some simple variations on FIFO scheduling
        # within a particular queue, based on queue policy
        fifoval = cmp(fifo_value(job1), fifo_value(job2))
        if(job1.queue == job2.queue):
            qpolicy = self.queues[job1.queue].policy
            sizeval = cmp(int(job1.nodes), int(job2.nodes))
            wtimeval = cmp(int(job1.walltime), int(job2.walltime))
            if(qpolicy == 'largest-first' and sizeval):
                return -sizeval
            elif(qpolicy == 'smallest-first' and sizeval):
                return sizeval
            elif(qpolicy == 'longest-first' and wtimeval):
                return -wtimeval
            elif(qpolicy == 'shortest-first' and wtimeval):
                return wtimeval
            else:
                return fifoval
        else:
            return fifoval

        return cmp(fifo_value(job1), fifo_value(job2))

    def save_me(self):
        Component.save(self)
    save_me = automatic(save_me, float(get_bgsched_config('save_me_interval', 10)))

    #user_name in this context is the user setting/modifying the res.
    def add_reservations (self, specs, user_name):
        self.logger.info("%s adding reservation: %r" % (user_name, specs))
        added_reservations =  self.reservations.q_add(specs)
        for added_reservation in added_reservations:
            self.logger.info("Res %s/%s: %s adding reservation: %r" % 
                             (added_reservation.res_id,
                              added_reservation.cycle_id,
                              user_name, specs))
            dbwriter.log_to_db(user_name, "creating", "reservation", added_reservation)
        return added_reservations
    
    add_reservations = exposed(query(add_reservations))

    def del_reservations (self, specs, user_name):
        self.logger.info("%s releasing reservation: %r" % (user_name, specs))
        del_reservations = self.reservations.q_del(specs)
        for del_reservation in del_reservations:
            self.logger.info("Res %s/%s/: %s releasing reservation: %r" % 
                             (del_reservation.res_id,
                              del_reservation.cycle_id,
                              user_name, specs))
            #dbwriter.log_to_db(user_name, "ending", "reservation", del_reservation) 
        return del_reservations

    del_reservations = exposed(query(del_reservations))

    def get_reservations (self, specs):
        return self.reservations.q_get(specs)
    get_reservations = exposed(query(get_reservations))

    def set_reservations(self, specs, updates, user_name):
        log_str = "%s modifying reservation: %r with updates %r" % (user_name, specs, updates)
        self.logger.info(log_str)
        #handle defers as a special case:  have to log these, and not drop a mod record.
        def _set_reservations(res, newattr):
            res.update(newattr)
        updates['__cmd_user'] = user_name
        mod_reservations = self.reservations.q_get(specs, _set_reservations, updates)
        for mod_reservation in mod_reservations:
            self.logger.info("Res %s/%s: %s modifying reservation: %r" % 
                             (mod_reservation.res_id,
                              mod_reservation.cycle_id,
                              user_name, specs))
        return mod_reservations
        
    set_reservations = exposed(query(set_reservations))


    def release_reservations(self, specs, user_name):
        self.logger.info("%s requested release of reservation: %r" % (user_name, specs))
        self.logger.info("%s releasing reservation: %r" % (user_name, specs))
        rel_res = self.get_reservations(specs)
        for res in rel_res:
            dbwriter.log_to_db(user_name, "released", "reservation", res) 
        del_reservations = self.reservations.q_del(specs)
        for del_reservation in del_reservations:
            self.logger.info("Res %s/%s/: %s releasing reservation: %r" % 
                             (del_reservation.res_id,
                              del_reservation.cycle_id,
                              user_name, specs))
        return del_reservations

    release_reservations = exposed(query(release_reservations))

    def check_reservations(self):
        ret = ""
        reservations = self.reservations.values()
        for i in range(len(reservations)):
            for j in range(i+1, len(reservations)):
                # if at least one reservation is cyclic, we want *that* reservation to be the one getting its overlaps method
                # called
                if reservations[i].cycle is not None:
                    res1 = reservations[i]
                    res2 = reservations[j]
                else:
                    res1 = reservations[j]
                    res2 = reservations[i]

                # we subtract a little bit because the overlaps method isn't really meant to do this
                # it will report warnings when one reservation starts at the same time another ends
                if res1.overlaps(res2.start, res2.duration - 0.00001):
                    # now we need to check for overlap in space
                    results = ComponentProxy("system").get_partitions(
                        [ {'name': p, 'children': '*', 'parents': '*'} for p in res2.partitions.split(":") ]
                    )
                    for p in res1.partitions.split(":"):
                        for r in results:
                            if p==r['name'] or p in r['children'] or p in r['parents']:
                                ret += "Warning: reservation '%s' overlaps reservation '%s'\n" % (res1.name, res2.name)

        return ret
    check_reservations = exposed(check_reservations)

    def sync_data(self):
        started = self.get_current_time()
        for item in [self.jobs, self.queues]:
            try:
                item.Sync()
            except (ComponentLookupError, xmlrpclib.Fault):
                # the ForeignDataDicts already include FailureMode stuff
                pass
        # print "took %f seconds for sync_data" % (time.time() - started, )
    #sync_data = automatic(sync_data)

    def _run_reservation_jobs (self, reservations_cache):
        # handle each reservation separately, as they shouldn't be competing for resources
        for cur_res in reservations_cache.itervalues():
            #print "trying to run res jobs in", cur_res.name, self.started_jobs
            queue = cur_res.queue
            if not (self.queues.has_key(queue) and self.queues[queue].state == 'running'):
                continue
            
            temp_jobs = self.jobs.q_get([{'is_runnable':True, 'queue':queue}])
            active_jobs = []
            for j in temp_jobs:
                if not self.started_jobs.has_key(j.jobid) and cur_res.job_within_reservation(j):
                    active_jobs.append(j)
    
            if not active_jobs:
                continue
            active_jobs.sort(self.utilitycmp)
            
            job_location_args = []
            for job in active_jobs:
                job_location_args.append( 
                    { 'jobid': str(job.jobid), 
                      'nodes': job.nodes, 
                      'queue': job.queue, 
                      'required': cur_res.partitions.split(":"),
                      'utility_score': job.score,
                      'walltime': job.walltime,
                      'attrs': job.attrs,
                      'user': job.user,
                    } )

            # there's no backfilling in reservations
            try:
                best_partition_dict = ComponentProxy("system").find_job_location(job_location_args, [])
            except:
                self.logger.error("failed to connect to system component")
                best_partition_dict = {}
    
            for jobid in best_partition_dict:
                job = self.jobs[int(jobid)]
                self._start_job(job, best_partition_dict[jobid], {str(job.jobid):cur_res.res_id})

    def _start_job(self, job, partition_list, resid=None):
        """Get the queue manager to start a job."""

        cqm = ComponentProxy("queue-manager")
        
        try:
            self.logger.info("trying to start job %d on partition %r" % (job.jobid, partition_list))
            cqm.run_jobs([{'tag':"job", 'jobid':job.jobid}], partition_list, None, resid)
        except ComponentLookupError:
            self.logger.error("failed to connect to queue manager")
            return

        self.started_jobs[job.jobid] = self.get_current_time()



    def schedule_jobs (self):
        '''look at the queued jobs, and decide which ones to start'''

        started_scheduling = self.get_current_time()

        if not self.active:
            return
        
        self.sync_data()
        
        # if we're missing information, don't bother trying to schedule jobs
        if not (self.queues.__oserror__.status and 
                self.jobs.__oserror__.status):
            self.sync_state.Fail()
            return
        self.sync_state.Pass()
        
        self.component_lock_acquire()
        try:
            # cleanup any reservations which have expired
            for res in self.reservations.values():
                if res.is_over():
                    self.logger.info("reservation %s has ended; removing" % 
                            (res.name))
                    self.logger.info("Res %s/%s: Ending reservation: %r" % 
                             (res.res_id, res.cycle_id, res.name))
                    #dbwriter.log_to_db(None, "ending", "reservation", 
                    #        res) 
                    del_reservations = self.reservations.q_del([
                        {'name': res.name}])

            # FIXME: this isn't a deepcopy.  it copies references to each reservation in the reservations dict.  is that really
            # sufficient?  --brt
            reservations_cache = self.reservations.copy()
        except:
            # just to make sure we don't keep the lock forever
            self.logger.error("error in schedule_jobs", exc_info=True)
        self.component_lock_release()
        
        # clean up the started_jobs cached data
        # TODO: Make this tunable.
        now = self.get_current_time()
        for job_name in self.started_jobs.keys():
            if (now - self.started_jobs[job_name]) > 60:
                del self.started_jobs[job_name]

        active_queues = []
        spruce_queues = []
        res_queues = set()
        for item in reservations_cache.q_get([{'queue':'*'}]):
            if self.queues.has_key(item.queue):
                if self.queues[item.queue].state == 'running':
                    res_queues.add(item.queue)

        for queue in self.queues.itervalues():
            if queue.name not in res_queues and queue.state == 'running':
                if queue.policy == "high_prio":
                    spruce_queues.append(queue)
                else:
                    active_queues.append(queue)
        
        # handle the reservation jobs that might be ready to go
        self._run_reservation_jobs(reservations_cache)

        # figure out stuff about queue equivalence classes
        res_info = {}
        for cur_res in reservations_cache.values():
            res_info[cur_res.name] = cur_res.partitions
        try:
            equiv = ComponentProxy("system").find_queue_equivalence_classes(
                    res_info, [q.name for q in active_queues + spruce_queues])
        except:
            self.logger.error("failed to connect to system component")
            return
        
        for eq_class in equiv:
            # recall that is_runnable is True for certain types of holds
            temp_jobs = self.jobs.q_get([{'is_runnable':True, 'queue':queue.name} for queue in active_queues \
                if queue.name in eq_class['queues']])
            active_jobs = []
            for j in temp_jobs:
                if not self.started_jobs.has_key(j.jobid):
                    active_jobs.append(j)
    
            temp_jobs = self.jobs.q_get([{'is_runnable':True, 'queue':queue.name} for queue in spruce_queues \
                if queue.name in eq_class['queues']])
            spruce_jobs = []
            for j in temp_jobs:
                if not self.started_jobs.has_key(j.jobid):
                    spruce_jobs.append(j)
    
            # if there are any pending jobs in high_prio queues, those are the only ones that can start
            if spruce_jobs:
                active_jobs = spruce_jobs

            # get the cutoff time for backfilling
            #
            # BRT: should we use 'has_resources' or 'is_active'?  has_resources returns to false once the resource epilogue
            # scripts have finished running while is_active only returns to false once the job (not just the running task) has
            # completely terminated.  the difference is likely to be slight unless the job epilogue scripts are heavy weight.
            temp_jobs = [job for job in self.jobs.q_get([{'has_resources':True}]) if job.queue in eq_class['queues']]
            end_times = []
            for job in temp_jobs:
                # take the max so that jobs which have gone overtime and are being killed
                # continue to cast a small backfilling shadow (we need this for the case
                # that the final job in a drained partition runs overtime -- which otherwise
                # allows things to be backfilled into the drained partition)
                            
                ##*AdjEst*
                if running_job_walltime_prediction:
                    runtime_estimate = float(job.walltime_p)
                else:
                    runtime_estimate = float(job.walltime)
                
                end_time = max(float(job.starttime) + 60 * runtime_estimate, now + 5*60)
                end_times.append([job.location, end_time])
            
            for res_name in eq_class['reservations']:
                cur_res = reservations_cache[res_name]

                if not cur_res.cycle:
                    end_time = float(cur_res.start) + float(cur_res.duration)
                else:
                    done_after = float(cur_res.duration) - ((now - float(cur_res.start)) % float(cur_res.cycle))
                    if done_after < 0:
                        done_after += cur_res.cycle
                    end_time = now + done_after
                if cur_res.is_active():
                    for part_name in cur_res.partitions.split(":"):
                        end_times.append([[part_name], end_time])
    
            if not active_jobs:
                continue
            active_jobs.sort(self.utilitycmp)
            
            # now smoosh lots of data together to be passed to the allocator in the system component
            job_location_args = []
            for job in active_jobs:
                forbidden_locations = set()
                for res_name in eq_class['reservations']:
                    cur_res = reservations_cache[res_name]
                    if cur_res.overlaps(self.get_current_time(), 60 * float(job.walltime) + SLOP_TIME):
                        forbidden_locations.update(cur_res.partitions.split(":"))

                job_location_args.append( 
                    { 'jobid': str(job.jobid), 
                      'nodes': job.nodes, 
                      'queue': job.queue, 
                      'forbidden': list(forbidden_locations),
                      'utility_score': job.score,
                      'walltime': job.walltime,
                      'walltime_p': job.walltime_p, #*AdjEst*
                      'attrs': job.attrs,
                      'user': job.user,
                    } )

            try:
                best_partition_dict = ComponentProxy("system").find_job_location(job_location_args, end_times)
            except:
                self.logger.error("failed to connect to system component", exc_info=True)
                best_partition_dict = {}
    
            for jobid in best_partition_dict:
                job = self.jobs[int(jobid)]
                self._start_job(job, best_partition_dict[jobid])
    

        # print "took %f seconds for scheduling loop" % (time.time() - started_scheduling, )
    schedule_jobs = locking(automatic(schedule_jobs, float(get_bgsched_config('schedule_jobs_interval', 10))))

    def get_resid(self, queue_name):
        
        return None
    get_resid = exposed(get_resid)

    
    def enable(self, user_name):
        """Enable scheduling"""
        self.logger.info("%s enabling scheduling", user_name)
        self.active = True
    enable = exposed(enable)

    def disable(self, user_name):
        """Disable scheduling"""
        self.logger.info("%s disabling scheduling", user_name)
        self.active = False
    disable = exposed(disable)

    def set_res_id(self, id_num):
        """Set the reservation id number."""
        self.id_gen.set(id_num)
        logger.info("Reset res_id generator to %s." % id_num)

    set_res_id = exposed(set_res_id)
    
    def set_cycle_id(self, id_num):
        """Set the cycle id number."""
        self.cycle_id_gen.set(id_num)
        logger.info("Reset cycle_id generator to %s." % id_num)

    set_cycle_id = exposed(set_cycle_id)

    def force_res_id(self, id_num):
        """Override the id-generator and change the resid to id_num"""
        self.id_gen.idnum = id_num - 1
        logger.warning("Forced res_id generator to %s." % id_num)

    force_res_id = exposed(force_res_id)

    def force_cycle_id(self, id_num):
        """Override the id-generator and change the cycleid to id_num"""
        self.cycle_id_gen.idnum = id_num - 1
        logger.warning("Forced cycle_id generator to %s." % id_num)

    force_cycle_id = exposed(force_cycle_id)

    def get_next_res_id(self):
        """Get what the next resid number would be"""
        return self.id_gen.idnum + 1
    get_next_res_id = exposed(get_next_res_id)

    def get_next_cycle_id(self):
        """get what the next cycleid number would be"""
        return self.cycle_id_gen.idnum + 1
    get_next_cycle_id = exposed(get_next_cycle_id)

    def __flush_msg_queue(self):
        """Send queued messages to the database-writer component"""
        dbwriter.flush_queue()
    __flush_msg_queue = automatic(__flush_msg_queue, 
                float(get_bgsched_config('db_flush_interval', 10)))
Exemple #14
0
 def __init__(self):
     self.id_gen = IncrID()
Exemple #15
0
 def __init__(self):
     DataDict.__init__(self)
     self.id_gen = IncrID()
Exemple #16
0
 def test_next(self, max=100):
     generator = IncrID()
     for count in itertools.count(1):
         assert generator.next() == count
         if count >= max:
             break
Exemple #17
0
class BGSched(Component):

    implementation = "bgsched"
    name = "scheduler"
    logger = logging.getLogger("Cobalt.Components.scheduler")

    _configfields = ['utility_file']
    _config = ConfigParser.ConfigParser()
    print Cobalt.CONFIG_FILES
    _config.read(Cobalt.CONFIG_FILES)
    if not _config._sections.has_key('bgsched'):
        print '''"bgsched" section missing from cobalt config file'''
        sys.exit(1)
    config = _config._sections['bgsched']
    mfields = [field for field in _configfields if not config.has_key(field)]
    if mfields:
        print "Missing option(s) in cobalt config file [bgsched] section: %s" % (
            " ".join(mfields))
        sys.exit(1)
    if config.get("default_reservation_policy"):
        global DEFAULT_RESERVATION_POLICY
        DEFAULT_RESERVATION_POLICY = config.get("default_reservation_policy")

    def __init__(self, *args, **kwargs):
        Component.__init__(self, *args, **kwargs)
        self.COMP_QUEUE_MANAGER = "queue-manager"
        self.COMP_SYSTEM = "system"
        self.reservations = ReservationDict()
        self.queues = QueueDict(self.COMP_QUEUE_MANAGER)
        self.jobs = JobDict(self.COMP_QUEUE_MANAGER)
        self.started_jobs = {}
        self.sync_state = Cobalt.Util.FailureMode("Foreign Data Sync")
        self.active = True

        self.get_current_time = time.time
        self.id_gen = IncrID()
        global bgsched_id_gen
        bgsched_id_gen = self.id_gen

        self.cycle_id_gen = IncrID()
        global bgsched_cycle_id_gen
        bgsched_cycle_id_gen = self.cycle_id_gen

    def __getstate__(self):
        return {
            'reservations': self.reservations,
            'version': 1,
            'active': self.active,
            'next_res_id': self.id_gen.idnum + 1,
            'next_cycle_id': self.cycle_id_gen.idnum + 1,
            'msg_queue': dbwriter.msg_queue,
            'overflow': dbwriter.overflow
        }

    def __setstate__(self, state):
        self.reservations = state['reservations']
        if 'active' in state:
            self.active = state['active']
        else:
            self.active = True

        self.id_gen = IncrID()
        self.id_gen.set(state['next_res_id'])
        global bgsched_id_gen
        bgsched_id_gen = self.id_gen

        self.cycle_id_gen = IncrID()
        self.cycle_id_gen.set(state['next_cycle_id'])
        global bgsched_cycle_id_gen
        bgsched_cycle_id_gen = self.cycle_id_gen

        self.queues = QueueDict(self.COMP_QUEUE_MANAGER)
        self.jobs = JobDict(self.COMP_QUEUE_MANAGER)
        self.started_jobs = {}
        self.sync_state = Cobalt.Util.FailureMode("Foreign Data Sync")

        self.get_current_time = time.time
        self.lock = threading.Lock()
        self.statistics = Statistics()

        if state.has_key('msg_queue'):
            dbwriter.msg_queue = state['msg_queue']
        if state.has_key('overflow') and (dbwriter.max_queued != None):
            dbwriter.overflow = state['overflow']

    # order the jobs with biggest utility first
    def utilitycmp(self, job1, job2):
        return -cmp(job1.score, job2.score)

    def prioritycmp(self, job1, job2):
        """Compare 2 jobs first using queue priority and then first-in, first-out."""

        val = cmp(self.queues[job1.queue].priority,
                  self.queues[job2.queue].priority)
        if val == 0:
            return self.fifocmp(job1, job2)
        else:
            # we want the higher priority first
            return -val

    def fifocmp(self, job1, job2):
        """Compare 2 jobs for first-in, first-out."""
        def fifo_value(job):
            if job.index is not None:
                return int(job.index)
            else:
                return job.jobid

        # Implement some simple variations on FIFO scheduling
        # within a particular queue, based on queue policy
        fifoval = cmp(fifo_value(job1), fifo_value(job2))
        if (job1.queue == job2.queue):
            qpolicy = self.queues[job1.queue].policy
            sizeval = cmp(int(job1.nodes), int(job2.nodes))
            wtimeval = cmp(int(job1.walltime), int(job2.walltime))
            if (qpolicy == 'largest-first' and sizeval):
                return -sizeval
            elif (qpolicy == 'smallest-first' and sizeval):
                return sizeval
            elif (qpolicy == 'longest-first' and wtimeval):
                return -wtimeval
            elif (qpolicy == 'shortest-first' and wtimeval):
                return wtimeval
            else:
                return fifoval
        else:
            return fifoval

        return cmp(fifo_value(job1), fifo_value(job2))

    def save_me(self):
        Component.save(self)

    save_me = automatic(save_me)

    #user_name in this context is the user setting/modifying the res.
    def add_reservations(self, specs, user_name):
        self.logger.info("%s adding reservation: %r" % (user_name, specs))
        added_reservations = self.reservations.q_add(specs)
        for added_reservation in added_reservations:
            self.logger.info("Res %s/%s: %s adding reservation: %r" %
                             (added_reservation.res_id,
                              added_reservation.cycle_id, user_name, specs))
            dbwriter.log_to_db(user_name, "creating", "reservation",
                               added_reservation)
        return added_reservations

    add_reservations = exposed(query(add_reservations))

    def del_reservations(self, specs, user_name):
        self.logger.info("%s releasing reservation: %r" % (user_name, specs))
        del_reservations = self.reservations.q_del(specs)
        for del_reservation in del_reservations:
            self.logger.info("Res %s/%s/: %s releasing reservation: %r" %
                             (del_reservation.res_id, del_reservation.cycle_id,
                              user_name, specs))
            #dbwriter.log_to_db(user_name, "ending", "reservation", del_reservation)
        return del_reservations

    del_reservations = exposed(query(del_reservations))

    def get_reservations(self, specs):
        return self.reservations.q_get(specs)

    get_reservations = exposed(query(get_reservations))

    def set_reservations(self, specs, updates, user_name):
        log_str = "%s modifying reservation: %r with updates %r" % (
            user_name, specs, updates)
        self.logger.info(log_str)

        #handle defers as a special case:  have to log these, and not drop a mod record.
        def _set_reservations(res, newattr):
            res.update(newattr)

        updates['__cmd_user'] = user_name
        mod_reservations = self.reservations.q_get(specs, _set_reservations,
                                                   updates)
        for mod_reservation in mod_reservations:
            self.logger.info("Res %s/%s: %s modifying reservation: %r" %
                             (mod_reservation.res_id, mod_reservation.cycle_id,
                              user_name, specs))
        return mod_reservations

    set_reservations = exposed(query(set_reservations))

    def release_reservations(self, specs, user_name):
        self.logger.info("%s requested release of reservation: %r" %
                         (user_name, specs))
        self.logger.info("%s releasing reservation: %r" % (user_name, specs))
        rel_res = self.get_reservations(specs)
        for res in rel_res:
            dbwriter.log_to_db(user_name, "released", "reservation", res)
        del_reservations = self.reservations.q_del(specs)
        for del_reservation in del_reservations:
            self.logger.info("Res %s/%s/: %s releasing reservation: %r" %
                             (del_reservation.res_id, del_reservation.cycle_id,
                              user_name, specs))
        return del_reservations

    release_reservations = exposed(query(release_reservations))

    def check_reservations(self):
        ret = ""
        reservations = self.reservations.values()
        for i in range(len(reservations)):
            for j in range(i + 1, len(reservations)):
                # if at least one reservation is cyclic, we want *that* reservation to be the one getting its overlaps method
                # called
                if reservations[i].cycle is not None:
                    res1 = reservations[i]
                    res2 = reservations[j]
                else:
                    res1 = reservations[j]
                    res2 = reservations[i]

                # we subtract a little bit because the overlaps method isn't really meant to do this
                # it will report warnings when one reservation starts at the same time another ends
                if res1.overlaps(res2.start, res2.duration - 0.00001):
                    # now we need to check for overlap in space
                    results = ComponentProxy(self.COMP_SYSTEM).get_partitions(
                        [{
                            'name': p,
                            'children': '*',
                            'parents': '*'
                        } for p in res2.partitions.split(":")])
                    for p in res1.partitions.split(":"):
                        for r in results:
                            if p == r['name'] or p in r['children'] or p in r[
                                    'parents']:
                                ret += "Warning: reservation '%s' overlaps reservation '%s'\n" % (
                                    res1.name, res2.name)

        return ret

    check_reservations = exposed(check_reservations)

    def sync_data(self):
        started = self.get_current_time()
        for item in [self.jobs, self.queues]:
            try:
                item.Sync()
            except (ComponentLookupError, xmlrpclib.Fault):
                # the ForeignDataDicts already include FailureMode stuff
                pass
        # print "took %f seconds for sync_data" % (time.time() - started, )

    #sync_data = automatic(sync_data)

    def _run_reservation_jobs(self, reservations_cache):
        # handle each reservation separately, as they shouldn't be competing for resources
        for cur_res in reservations_cache.itervalues():
            #print "trying to run res jobs in", cur_res.name, self.started_jobs
            queue = cur_res.queue
            if not (self.queues.has_key(queue)
                    and self.queues[queue].state == 'running'):
                continue

            temp_jobs = self.jobs.q_get([{
                'is_runnable': True,
                'queue': queue
            }])
            active_jobs = []
            for j in temp_jobs:
                if not self.started_jobs.has_key(
                        j.jobid) and cur_res.job_within_reservation(j):
                    active_jobs.append(j)

            if not active_jobs:
                continue
            active_jobs.sort(self.utilitycmp)

            job_location_args = []
            for job in active_jobs:
                job_location_args.append({
                    'jobid':
                    str(job.jobid),
                    'nodes':
                    job.nodes,
                    'queue':
                    job.queue,
                    'required':
                    cur_res.partitions.split(":"),
                    'utility_score':
                    job.score,
                    'walltime':
                    job.walltime,
                    'attrs':
                    job.attrs,
                    'user':
                    job.user,
                })

            # there's no backfilling in reservations
            try:
                best_partition_dict = ComponentProxy(
                    self.COMP_SYSTEM).find_job_location(job_location_args, [])
            except:
                self.logger.error("failed to connect to system component")
                best_partition_dict = {}

            for jobid in best_partition_dict:
                job = self.jobs[int(jobid)]
                self._start_job(job, best_partition_dict[jobid],
                                {str(job.jobid): cur_res.res_id})

    def _start_job(self, job, partition_list, resid=None):
        """Get the queue manager to start a job."""

        cqm = ComponentProxy(self.COMP_QUEUE_MANAGER)

        try:
            self.logger.info("trying to start job %d on partition %r" %
                             (job.jobid, partition_list))
            cqm.run_jobs([{
                'tag': "job",
                'jobid': job.jobid
            }], partition_list, None, resid)
        except ComponentLookupError:
            self.logger.error("failed to connect to queue manager")
            return

        self.started_jobs[job.jobid] = self.get_current_time()

    def schedule_jobs(self):
        '''look at the queued jobs, and decide which ones to start'''

        started_scheduling = self.get_current_time()

        if not self.active:
            return

        self.sync_data()

        # if we're missing information, don't bother trying to schedule jobs
        if not (self.queues.__oserror__.status
                and self.jobs.__oserror__.status):
            self.sync_state.Fail()
            return
        self.sync_state.Pass()

        self.lock.acquire()
        try:
            # cleanup any reservations which have expired
            for res in self.reservations.values():
                if res.is_over():
                    self.logger.info("reservation %s has ended; removing" %
                                     (res.name))
                    self.logger.info("Res %s/%s: Ending reservation: %r" %
                                     (res.res_id, res.cycle_id, res.name))
                    #dbwriter.log_to_db(None, "ending", "reservation",
                    #        res)
                    del_reservations = self.reservations.q_del([{
                        'name':
                        res.name
                    }])

            reservations_cache = self.reservations.copy()
        except:
            # just to make sure we don't keep the lock forever
            self.logger.error("error in schedule_jobs", exc_info=True)
        self.lock.release()

        # clean up the started_jobs cached data
        # TODO: Make this tunable.
        now = self.get_current_time()
        for job_name in self.started_jobs.keys():
            if (now - self.started_jobs[job_name]) > 60:
                del self.started_jobs[job_name]

        active_queues = []
        spruce_queues = []
        res_queues = set()
        for item in reservations_cache.q_get([{'queue': '*'}]):
            if self.queues.has_key(item.queue):
                if self.queues[item.queue].state == 'running':
                    res_queues.add(item.queue)

        for queue in self.queues.itervalues():
            if queue.name not in res_queues and queue.state == 'running':
                if queue.policy == "high_prio":
                    spruce_queues.append(queue)
                else:
                    active_queues.append(queue)

        # handle the reservation jobs that might be ready to go
        self._run_reservation_jobs(reservations_cache)

        # figure out stuff about queue equivalence classes
        if __running_mode__ == "simulation":
            equiv = [{'reservations': [], 'queues': ['default']}]
        else:
            res_info = {}
            for cur_res in reservations_cache.values():
                res_info[cur_res.name] = cur_res.partitions
            try:
                equiv = ComponentProxy(
                    self.COMP_SYSTEM).find_queue_equivalence_classes(
                        res_info,
                        [q.name for q in active_queues + spruce_queues])
            except:
                self.logger.error("failed to connect to system component")
                return

        for eq_class in equiv:
            # recall that is_runnable is True for certain types of holds
            temp_jobs = self.jobs.q_get([{'is_runnable':True, 'queue':queue.name} for queue in active_queues \
                if queue.name in eq_class['queues']])
            active_jobs = []
            for j in temp_jobs:
                if not self.started_jobs.has_key(j.jobid):
                    active_jobs.append(j)

            temp_jobs = self.jobs.q_get([{'is_runnable':True, 'queue':queue.name} for queue in spruce_queues \
                if queue.name in eq_class['queues']])
            spruce_jobs = []
            for j in temp_jobs:
                if not self.started_jobs.has_key(j.jobid):
                    spruce_jobs.append(j)

            # if there are any pending jobs in high_prio queues, those are the only ones that can start
            if spruce_jobs:
                active_jobs = spruce_jobs

            # get the cutoff time for backfilling
            #
            # BRT: should we use 'has_resources' or 'is_active'?  has_resources returns to false once the resource epilogue
            # scripts have finished running while is_active only returns to false once the job (not just the running task) has
            # completely terminated.  the difference is likely to be slight unless the job epilogue scripts are heavy weight.
            temp_jobs = [
                job for job in self.jobs.q_get([{
                    'has_resources': True
                }]) if job.queue in eq_class['queues']
            ]
            end_times = []
            for job in temp_jobs:
                # take the max so that jobs which have gone overtime and are being killed
                # continue to cast a small backfilling shadow (we need this for the case
                # that the final job in a drained partition runs overtime -- which otherwise
                # allows things to be backfilled into the drained partition)

                ##*AdjEst*
                if running_job_walltime_prediction:
                    runtime_estimate = float(job.walltime_p)
                else:
                    runtime_estimate = float(job.walltime)

                end_time = max(
                    float(job.starttime) + 60 * runtime_estimate, now + 5 * 60)
                end_times.append([job.location, end_time])

            for res_name in eq_class['reservations']:
                cur_res = reservations_cache[res_name]

                if not cur_res.cycle:
                    end_time = float(cur_res.start) + float(cur_res.duration)
                else:
                    done_after = float(cur_res.duration) - (
                        (now - float(cur_res.start)) % float(cur_res.cycle))
                    if done_after < 0:
                        done_after += cur_res.cycle
                    end_time = now + done_after
                if cur_res.is_active():
                    for part_name in cur_res.partitions.split(":"):
                        end_times.append([[part_name], end_time])

            if not active_jobs:
                continue
            active_jobs.sort(self.utilitycmp)

            # now smoosh lots of data together to be passed to the allocator in the system component
            job_location_args = []
            for job in active_jobs:
                forbidden_locations = set()
                for res_name in eq_class['reservations']:
                    cur_res = reservations_cache[res_name]
                    if cur_res.overlaps(self.get_current_time(),
                                        60 * float(job.walltime) + SLOP_TIME):
                        forbidden_locations.update(
                            cur_res.partitions.split(":"))

                job_location_args.append({
                    'jobid': str(job.jobid),
                    'nodes': job.nodes,
                    'queue': job.queue,
                    'forbidden': list(forbidden_locations),
                    'utility_score': job.score,
                    'walltime': job.walltime,
                    'walltime_p': job.walltime_p,  #*AdjEst*
                    'attrs': job.attrs,
                    'user': job.user,
                })

            try:
                best_partition_dict = ComponentProxy(
                    self.COMP_SYSTEM).find_job_location(
                        job_location_args, end_times)
            except:
                self.logger.error("failed to connect to system component",
                                  exc_info=True)
                best_partition_dict = {}

            for jobid in best_partition_dict:
                job = self.jobs[int(jobid)]
                self._start_job(job, best_partition_dict[jobid])

        # print "took %f seconds for scheduling loop" % (time.time() - started_scheduling, )

    schedule_jobs = locking(automatic(schedule_jobs))

    def get_resid(self, queue_name):

        return None

    get_resid = exposed(get_resid)

    def enable(self, user_name):
        """Enable scheduling"""
        self.logger.info("%s enabling scheduling", user_name)
        self.active = True

    enable = exposed(enable)

    def disable(self, user_name):
        """Disable scheduling"""
        self.logger.info("%s disabling scheduling", user_name)
        self.active = False

    disable = exposed(disable)

    def set_res_id(self, id_num):
        """Set the reservation id number."""
        self.id_gen.set(id_num)
        logger.info("Reset res_id generator to %s." % id_num)

    set_res_id = exposed(set_res_id)

    def set_cycle_id(self, id_num):
        """Set the cycle id number."""
        self.cycle_id_gen.set(id_num)
        logger.info("Reset cycle_id generator to %s." % id_num)

    set_cycle_id = exposed(set_cycle_id)

    def force_res_id(self, id_num):
        """Override the id-generator and change the resid to id_num"""
        self.id_gen.idnum = id_num - 1
        logger.warning("Forced res_id generator to %s." % id_num)

    force_res_id = exposed(force_res_id)

    def force_cycle_id(self, id_num):
        """Override the id-generator and change the cycleid to id_num"""
        self.cycle_id_gen.idnum = id_num - 1
        logger.warning("Forced cycle_id generator to %s." % id_num)

    force_cycle_id = exposed(force_cycle_id)

    def get_next_res_id(self):
        """Get what the next resid number would be"""
        return self.id_gen.idnum + 1

    get_next_res_id = exposed(get_next_res_id)

    def get_next_cycle_id(self):
        """get what the next cycleid number would be"""
        return self.cycle_id_gen.idnum + 1

    get_next_cycle_id = exposed(get_next_cycle_id)

    def __flush_msg_queue(self):
        """Send queued messages to the database-writer component"""
        dbwriter.flush_queue()

    __flush_msg_queue = automatic(
        __flush_msg_queue, float(get_bgsched_config('db_flush_interval', 10)))