def __init__(self, *args, **kwargs):
     Component.__init__(self, *args, **kwargs)
     self.process_groups = ProcessGroupDict()
     self.process_groups.item_cls = HeckleProcessGroup
     self.queue_assignments["default"] = self.get_resources()
     self.hacky_forbidden_nodes = [
     ]  #This is a temporary fix for the forbidden nodes issue
 def __init__(self, *args, **kwargs):
         "heckle: System: init ... %s ... &&&&&&&&&&&&&&&&&&&&&&&&&&&&&  I am here as well &&&&&&&&&&&&&&&&&&&&&&&&&"
         % threading.current_thread().getName())
     Component.__init__(self, *args, **kwargs)
     self.process_groups = ProcessGroupDict()
     self.process_groups.item_cls = HeckleProcessGroup
     self.resources = ResourceDict()
     self.queue_assignments["default"] = self.resources.keys()
     print "\n\n\n\n"
     print "Queue assignments are: %s" % self.queue_assignments
Exemple #3
 def __init__(self, *args, **kwargs):
         "heckle: System: init ... %s ... &&&&&&&&&&&&&&&&&&&&&&&&&&&&&  I am here as well &&&&&&&&&&&&&&&&&&&&&&&&&"
         % threading.current_thread().getName()
     Component.__init__(self, *args, **kwargs)
     self.process_groups = ProcessGroupDict()
     self.process_groups.item_cls = HeckleProcessGroup
     self.resources = ResourceDict()
     self.queue_assignments["default"] = self.resources.keys()
     print "\n\n\n\n"
     print "Queue assignments are: %s" % self.queue_assignments
class HeckleSystem(Component):
     Cobalt System component for handling / interacting with Heckle resource manager
     External Methods:
          add_process_groups -- allocates nodes
          get_process_groups -- get process groups based on specs
          signal_process_groups -- signal a process group
          wait_process_groups -- removed process groups based on specs
     Internal Methods:
     Queue Manager Methods:

    name = "system"
    implementation = "HeckleBreadboard"
    queue_assignments = {}

    def __init__(self, *args, **kwargs):
            "heckle: System: init ... %s ... &&&&&&&&&&&&&&&&&&&&&&&&&&&&&  I am here as well &&&&&&&&&&&&&&&&&&&&&&&&&"
            % threading.current_thread().getName())
        Component.__init__(self, *args, **kwargs)
        self.process_groups = ProcessGroupDict()
        self.process_groups.item_cls = HeckleProcessGroup
        self.resources = ResourceDict()
        self.queue_assignments["default"] = self.resources.keys()
        print "\n\n\n\n"
        print "Queue assignments are: %s" % self.queue_assignments

    def __repr__(self):
          printout representation of the class
        indict = self.__dict__
        printstr = ""
        printstr += "Heckle System Object: Values"
        for element in indict:
            printstr += str(element) + "::"
            if indict[element] == None:
                printstr += "None, "
                printstr += str(indict[element]) + ", "
        printstr += "   Process Groups:"
        for element in self.process_groups:
            printstr += str(element) + "::" + str(
                self.process_groups[element]) + ", "
        return printstr

    # Main set of methods
    def add_process_groups(self, specs):
          Allocate nodes and add the list of those allocated to the PGDict
          specs is a list of dictionaries
          Each dictionary contains the specifications for all the nodes in the process group
        #Debug - Take out to really rebuild
        ####    Need to check the environment variable for fakebuild
            specs[0]['fakebuild'] = specs[0]['env']['fakebuild']
            del specs[0]['env']['fakebuild']
        print "Heckle System:  add_process_groups: <<<<<<<<<<<<<<<<<<          OK< Debug< This< :  %s" % specs
        HICCUP = HeckleConnector()
        reservation = HICCUP.make_reservation(**(specs[0]))
        heckle_res_id =
        uid = specs[0]['user']
        logger.debug("Heckle System: heckle_res_id = %i" % heckle_res_id)
        specs[0]['heckle_res_id'] = heckle_res_id
        return self.process_groups.q_add(
            specs, lambda x, _: self._start_pg(
                x, heckle_res_id=heckle_res_id, uid=uid))
        #except Exception as hec_except:
        ## could do something here about problems
        ##    1)  Kill job, then resubmit job w/o node name(s)
        ##         Would require access to cqadm via api
        ##    2)  Put job / node in fail state
        ##    3)  Simply fail
        #raise Exception("Heckle System Object: add_process_groups: %s" % hec_except)

    add_process_groups = exposed(query(add_process_groups))

    def get_process_groups(self, specs):
        """get a list of existing allocations"""
        #logger.debug( "Heckle System: get_process_groups" )
        return self.process_groups.q_get(specs)

    get_process_groups = exposed(query(get_process_groups))

    def signal_process_groups(self, specs, sig):
        """Free the specified process group (set of allocated nodes)"""
            "Heckle System: signal_process_groups: Specs are %s, sig is %s" %
            (specs, sig))
        return self.process_groups.q_get(specs, lambda x, y: x.signal(y), sig)

    signal_process_groups = exposed(query(signal_process_groups))

    def wait_process_groups(self, specs):
        """Remove terminated process groups"""
        logger.debug("Heckle System: wait_process_groups; specs are %s" %
        return self.process_groups.q_del(
            specs, lambda x, _: self._release_resources(x))

    wait_process_groups = exposed(query(wait_process_groups))

    # Methods for dealing with Process Groups

    def _start_pg(self, pgp, heckle_res_id, uid):
          Populates the process group with its resources
               gets node information for nodes in process group
               Updates those attributes
               Places nodes in the pinging nodes list, to see if they're built
        logger.debug("Heckle System: start_pg: PGP is %s" % pgp)
        nodelist = pgp.location
        for node in nodelist:
            node_attributes = self.resources[node]
            node_attributes['mac'] = node_attributes['mac'].replace("-", ":")
            node_attributes['heckle_res_id'] = heckle_res_id
            pgp.resource_attributes[node] = node_attributes._get_dict()
        pgp.uid = uid

    add_process_groups = exposed(query(add_process_groups))

    def _check_builds_done(self):
          Check to see if the nodes are done building
          Starts the process group if all nodes in them are done building
        #logger.debug( "heckle: System: Check Build Done: Waiting to Start..." )
        retval = True
        pg_list = [
            x for x in self.process_groups.itervalues()
            if (len(x.pinging_nodes) > 0)
        for pgp in pg_list:
            for nodename in pgp.pinging_nodes:
                teststr = self.resources[nodename]['bootstate']
                if teststr == "COMPLETED":
                        "heckle: System: Check Build Done: Removing node %s...%i pinging nodes left"
                        % (nodename, len(pgp.pinging_nodes) - 1))
                elif teststr in ["BOOTING", "", ""]:
                        "Heckle System: Check Build Done: Node %s not done yet."
                        % nodename)
                elif teststr == "UNALLOCATED":
                    raise Exception(
                        "HIC_SO: _check_builds_done: Node says, 'UNALLOCATED'.  Possible build error, or system timed out."
                elif teststr == "CRITFAIL":
                    raise Exception(
                        "HIC_SO: _check_builds_done: Node says, 'CRITFAIL'.  It timed out while building."
                    ####      Need to figure a better way to fail gracefully on this one...
                elif teststr == "READY":
                    raise Exception(
                        "HIC_SO: _check_builds_done: Node says, 'READY'.  The Heckle Reservation is already ready already, skipping pinging."
            if len(pgp.pinging_nodes) == 0:
                    "Heckle System: Check Build Done: No Pinging Nodes left, Start PG Running."
                retval = False
        return retval

    _check_builds_done = automatic(_check_builds_done)

    def _wait(self):
          Calls the process group container's wait() method
        #logger.debug( "Heckle System: wait" )
        for pgp in self.process_groups.itervalues():

    _wait = automatic(_wait)

    def _release_resources(self, pgp):
          Releases all the Heckle nodes, unreserving them
        logger.debug("Heckle System: Release %s" % pgp.location)
        HICCUP = HeckleConnector()
        HICCUP.free_reserved_node(uid=pgp.uid, node_list=pgp.location)

    def get_resources(self, specs={}):
          Returns a list of names for all the FREE resources (nodes) which match the given specs.
        logger.debug("Heckle System: get Resources, specs are %s" % specs)
        ###  Look at this as a future change
        specs['current reservation'] = 9999999
        specs['allocatable'] = 'True'
        res_list = self.resources >= specs
        logger.debug("Heckle System: get Resources, resources are %s" %
        return res_list

    get_resources = exposed(query(get_resources))

    # Methods for interacting with scheduler and queue-manager

    def validate_job(self, spec):
          Validates a job for submission
          -- will the job ever run under the current Heckle configuration?
               1)  Validate Kernel
               2)  Validate HW
               3)  Validate Job versus overall
        logger.debug("Heckle System: Validate Job: Specs are %s" % spec)
            checklist = spec['attrs']
            checklist = {}
            nodecount = spec['nodecount']
            nodecount = 1
        glossary = self.resources.glossary
        dnelist = []  # for attributes which do not exist in glossary
        badlist = []  # for attributes in glossary which do not exist
        ###  Look at this as a future change
        ###  Think:  Refresh Resources Info
        #1st step:  Are there enough nodes at all?
        if nodecount >= self.resources.node_count():
            raise Exception(
                "Validate Job: Not enough nodes; Requested %s, only have %s in the system."
                % (nodecount, self.resources.nodecount()))
        for att in checklist:
            val = checklist[att]
                if val in glossary[att]:
                    badlist.append("%s:%s" % (att, val))  # Bad attribute
                dnelist.append(att)  #Attribute does not exist
            checklist['current reservation'] = 9999999
            checklist['allocatable'] = 'True'
            retlist = self.resources >= checklist
            retcount = len(retlist)
            goodlen = retcount >= nodecount
            if goodlen and not badlist and not dnelist:
                return spec  #Good Job!
                retstr = "Validate Job: "
                if badlist or dnelist:
                    if badlist:
                        restr += "No value for attribute: %s" % badlist
                    if dnelist:
                        retstr += "Attributes Do Not Exist: %s" % dnelist
                    retstr += "Need %s nodes, only have %s nodes:  %s" % (
                        nodecount, retcount, retlist)
                raise Exception(retstr)
        return spec

    validate_job = exposed(validate_job)

    def verify_locations(self, location_list):
          Makes sure a location list is valid
          location list is a list of fully qualified strings of node names
        logger.debug("heckle: System: Validate Job: Verify Locations")
        return location_list in self.resources.glossary

    verify_locations = exposed(verify_locations)

    def find_job_location(self, job_location_args, end_times):
          Finds a group of not-busy nodes in which to run the job
               job_location_args -- A list of dictionaries with info about the job
                    jobid -- string identifier
                    nodes -- int number of nodes
                    queue -- string queue name
                    required -- ??
                    utility_score -- ??
                    threshold -- ??
                    walltime -- ??
                    attrs -- dictionary of attributes to match against
               end_times -- supposed time the job will end
          Returns: Dictionary with list of nodes a job can run on, keyed by jobid
        logger.debug("heckle: System: find_job_location")
        locations = {}

        def jobsort(job):
            """Used to sort job list by utility score"""
            return job["utility_score"]

        #Try to match jobs to nodes which can run them
        for job in job_location_args:
            if "attrs" not in job or job["attrs"] is None:
                attrs = {}
                attrs = job['attrs']
            attrs['current reservation'] = 9999999
            attrs['allocatable'] = 'True'
            nodecount = int(job['nodes'])
            print "Heckle System: Find Job Location: Job is %s" % job
            ###  Look at this as point of change
            ###  Think:  For node in unreserved nodes
            ###            Choose node from list
            ###            Remove node from unreserved nodes
            print "Heckle System: Find Job Location: Free Nodes is %s" % self.resources.getfreenodes(
            nodelist = (self.resources >= attrs)  # get Matching Node
            print "Nodelist at this stage is %s" % nodelist
            if len(nodelist) >= nodecount:
                print "Nodecount = %s" % nodecount
                retlist = nodelist[:nodecount]
                print "Heckle System: Find Job Location: Remaining nodelist is %s" % retlist
                raise Exception(
                    "Heckle System: find_job_locations: Not Enough matching Nodes Available"
            locations[job["jobid"]] = retlist
            print "Locations is now: %s" % locations"heckle: find_job_location: locations are %s" % locations)
        return locations

    find_job_location = exposed(find_job_location)

    def find_queue_equivalence_classes(self, reservation_dict,
        Finds equivalent queues
        An equivalent queue is a queue which can run upon the same partition(s)
        For now, with one partition (everything!) this is irrelevant.
        Returns: equiv= [{'reservations': [], 'queues': ['default']}]
        logger.debug("Heckle System: find queue equivalence classes")
        equiv = []
        #print "Reservation_Dict is: %s" % reservation_dict
        #print "Active_queue_names is %s" % active_queue_names
        #print "Queue assignments are: %s" % self.queue_assignments
        for queue in self.queue_assignments:
            # skip queues that aren't running
            if not queue in active_queue_names:
            found_a_match = False
            print "Heckle Queue is %s" % queue
            for equ in equiv:
                print "Heckle Equ is %s" % equ
                if equ['data'].intersection(self.queue_assignments[queue]):
                    found_a_match = True
            if not found_a_match:
                    'queues': set([queue]),
                    'data': set(self.queue_assignments[queue]),
                    'reservations': set()
        real_equiv = []
        for eq_class in equiv:
            found_a_match = False
            for equ in real_equiv:
                if equ['queues'].intersection(eq_class['queues']):
                    found_a_match = True
            if not found_a_match:
        equiv = real_equiv
        for eq_class in equiv:
            for res_name in reservation_dict:
                for host_name in reservation_dict[res_name].split(":"):
                    if host_name in eq_class['data']:
            for key in eq_class:
                eq_class[key] = list(eq_class[key])
            del eq_class['data']
        return equiv

    find_queue_equivalence_classes = exposed(find_queue_equivalence_classes)

    def get_partitions(self, locations):
          Work-around to get the cqadm to run a single job on this system
          PRE:  locations is a list of dict of strings of possible node names
          POST:  if good, return locations
                 if not good, raise exception and list bad nodes
        nodelist = self.resources.Glossary.nodelist
        logger.debug("Heckle System: get_partitions: raw is are: %s" %
        logger.debug("Heckle System: get_partitions: vals are: %s" % locs)
        if locations in nodelist:
            return locations
            raise Exception(
                "heckle: System: get_partition: Bad Locations: %s " %

    get_partitions = exposed(get_partitions)
 def __init__(self, *args, **kwargs):
     Component.__init__(self, *args, **kwargs)
     self.process_groups = ProcessGroupDict()
     self.process_groups.item_cls = HeckleProcessGroup
     self.queue_assignments["default"] = self.get_resources()
     self.hacky_forbidden_nodes = []   #This is a temporary fix for the forbidden nodes issue
class HeckleSystem(Component):
    Cobalt System component for handling / interacting with Heckle resource manager
    External Methods:
        add_process_groups -- allocates nodes
        get_process_groups -- get process groups based on specs
        signal_process_groups -- signal a process group
        wait_process_groups -- removed process groups based on specs
    Internal Methods:
    Queue Manager Methods:
    name = "system"
    implementation = "HeckleBreadboard"
    queue_assignments = {}

    def __init__(self, *args, **kwargs):
        Component.__init__(self, *args, **kwargs)
        self.process_groups = ProcessGroupDict()
        self.process_groups.item_cls = HeckleProcessGroup
        self.queue_assignments["default"] = self.get_resources()
        self.hacky_forbidden_nodes = []   #This is a temporary fix for the forbidden nodes issue
    def __repr__(self):
        printout representation of the class
        indict = self.__dict__
        printstr = ""
        printstr += "Heckle System Object: Values"
        for element in indict:
            printstr += str(element) + "::"
            if indict[element] == None:
                printstr += "None, "
                printstr += str(indict[element]) + ", "
        printstr += "   Process Groups:"
        for element in self.process_groups:
            printstr += str(element) + "::" + \
            str(self.process_groups[element]) + ", "
        return printstr
    # Main set of methods
    def add_process_groups(self, specs):
        This function takes the specs (a list of jobs) and initiates each job as
        a process group.
        The process group abstracts the actual job into an object, providing a 
        single point of control and interaction for all the nodes within that job.
        Each job is described by a dict.  Each dict contains:
            kernel: a String, the name of the kernel image to load.
            executable: A string, the name of the command to execute upon the
                head node; this could be considered the actual job's file.
            stdin, stdout, stderr:  Three separate strings, each containing
                the file to use for standard communication with the job as it
                is running.  May be specified, or False.
            kerneloptions: A string containing various options for the kernel,
                or False.
            args: A list
            umask: An integer
            jobid: An integer
            cobalt_log_file: A string containing the log file to use in the
                initiation and running of the job itself.
            location:  List of strings of node / resource names
            env:  A dict of key:value strings, specifying the environment in
                which the job is to run on the node
            id: A number
            cwd:  A string, specifying the current working directory in which
                to run the job on the node
            walltime:  Integer; the time, in minutes, allocated for the job
                to run on the node.
            user:  A string, the name of the user under which this job is to run.
        logstr = "System:add_process_groups:"
        LOGGER.debug( logstr + "Specs are %s" % specs )
        return self.process_groups.q_add(specs)
    add_process_groups = exposed(query(add_process_groups))
    def get_process_groups(self, specs):
        """get a list of existing allocations"""
        LOGGER.debug( "System:get_process_groups: specs are %s" % specs )
        return self.process_groups.q_get(specs)
    get_process_groups = exposed(query(get_process_groups))
    def signal_process_groups(self, specs, sig):
        """Free the specified process group (set of allocated nodes)"""
        "System:signal_process_groups: Specs are %s, sig is %s"\
        % (specs, sig) )
        return self.process_groups.q_get(specs, lambda x, y:x.signal(y), sig)
    signal_process_groups = exposed(query(signal_process_groups))
    def wait_process_groups(self, specs):
        """Remove terminated process groups"""
        LOGGER.debug( "System:wait_process_groups; specs are %s" % specs )
        return self.process_groups.q_del(specs, lambda x, \
    wait_process_groups = exposed(query(wait_process_groups))
    # Methods for dealing with Process Groups
    def _check_builds_done(self):
        Check to see if the nodes are done building
        Starts the process group if all nodes in them are done building
        #LOGGER.debug( "System:Check Build Done: Waiting to Start..." )
        exstr = "System:check_build_done:"
        retval = True
        pg_list = [x for x in self.process_groups.itervalues()\
        if (len(x.pinging_nodes) > 0)]
        hiccup = HeckleConnector()
        for pgp in pg_list:
            for nodename in pgp.pinging_nodes:
                teststr = hiccup.get_node_bootstate(nodename)
                if teststr == "READY":
                    if 'fakebuild' in pgp.__dict__ and pgp.fakebuild:
                        LOGGER.debug( exstr + "Node %s done building; "\
                             + "%s pinging nodes left" %\
                             ( nodename, len(pgp.pinging_nodes)-1 ) )
                        LOGGER.debug( exstr + "Node %s not done yet" %\
                                          nodename )
                if  teststr == "COMPLETED":
                    LOGGER.debug( exstr + 
                         "Removing node %s...%i pinging nodes left" \
                              % (nodename, len(pgp.pinging_nodes)-1) )
                elif teststr in ["BOOTING", "", ""]:
                    LOGGER.debug( exstr +
                    "Node %s not done yet." % nodename)
                elif teststr == "UNALLOCATED":
                    raise Exception( exstr +
        "Node 'UNALLOCATED'; Possible build error, or system timed out.")
                elif teststr == "CRITFAIL":
                    raise Exception( exstr +
                "Node says, 'CRITFAIL'.  It timed out while building.")
                ####     Need to figure a better way to fail gracefully
            if len(pgp.pinging_nodes) == 0:
    "System:Check Build Done: No Pinging Nodes left, Start PG %s Running." \
        % pgp.jobid)
                retval = False
        return retval
    _check_builds_done = automatic(_check_builds_done)
    def _wait(self):
        Calls the process group container's wait() method
        waitlen = len( self.process_groups.keys() )
        LOGGER.debug( "System:_wait:%s process groups." % waitlen )
        for pgp in self.process_groups.itervalues():
                del( self.hacky_forbidden_nodes[pgp.location] )
    _wait = automatic(_wait)
    def _release_resources(self, pgp):
        Releases all the Heckle nodes, unreserving them
        LOGGER.debug( "System:release" )
        LOGGER.debug( "System:Locations are: %s" % pgp.location )
        hiccup = HeckleConnector()
        hiccup.free_reserved_node( uid = pgp.uid, node_list=pgp.location )
            del( self.hacky_forbidden_nodes[pgp.location] )
    def get_resources(self, specs=None ):
        Returns a list of free resources (nodes) which match the given specs.
        Specs is a dict which describes a job
        LOGGER.debug( "System:get Resources" )
        ###  Look at this as a future change
        hiccup = HeckleConnector()
        if not specs:
            return hiccup.node_list
            return hiccup.list_available_nodes( **specs )
    get_resources = exposed(query(get_resources))
    # Methods for interacting with scheduler and queue-manager
    def validate_job(self, spec):
        Validates a job for submission
        -- will the job ever run under the current Heckle configuration?
            1)  Validate Kernel
            2)  Validate HW
            3)  Validate Job versus overall
        LOGGER.debug( "System:Validate Job: Specs are %s" % spec )
        hiccup = HeckleConnector()
            kernel = spec['kernel']
            valid_kernel = hiccup.validkernel( kernel )
            if not valid_kernel:
                raise Exception("System:Validate Job: Bad Kernel")
            spec['kernel'] = 'default'
            valid_hw = hiccup.validhw( **spec['attrs'] )
            if not valid_hw:
                raise Exception(
                "System:Validate Job: Bad Hardware Specs: %s" % spec )
        except Exception as strec:
            raise Exception("System:Validate Job:  Validate Job: %s" % strec)
            #valid_job = hiccup.validjob( **spec )
            #if not valid_job:
                #raise Exception(
                #"System: validate Job:  Never enough nodes")
            #raise Exception("System: validate Job: Never enough nodes")
        return spec
    validate_job = exposed(validate_job)
    def verify_locations(self, location_list):
        Makes sure a location list is valid
        location list is a list of fully qualified strings of node names
        LOGGER.debug("System:validate Job: Verify Locations")
        hiccup = HeckleConnector()
        heckle_set = set(hiccup.list_all_nodes())
        location_set = set(location_list)
        if heckle_set >= location_set:
            return location_list
            not_valid_list = list( location_set.difference( heckle_set ) )
            raise Exception(
    "System:VerifyLocations: Invalid location names: %s" % not_valid_list)
    verify_locations = exposed( verify_locations )
    def find_job_location(self, job_location_args, end_times):
        Finds a group of not-busy nodes in which to run the job
            job_location_args -- A list of dictionaries with info about the job
                jobid -- string identifier
                nodes -- int number of nodes
                queue -- string queue name
                required -- ??
                utility_score -- ??
                threshold -- ??
                walltime -- ??
                attrs -- dictionary of attributes to match against
            end_times -- supposed time the job will end
        Returns: Dictionary with list of nodes a job can run on, keyed by jobid
        LOGGER.debug("System:find_job_location" )
        locations = {}
        def jobsort(job):
            """Used to sort job list by utility score"""
            return job["utility_score"]
        #Try to match jobs to nodes which can run them
        hiccup = HeckleConnector()
        for job in job_location_args:
            if "attrs" not in job or job["attrs"] is None:
                job["attrs"] = {}
            print "Job is %s" % job
            tempjob = job.copy()
            if self.hacky_forbidden_nodes:
                if 'forbidden' not in tempjob.keys():
                    tempjob['forbidden'] = self.hacky_forbidden_nodes
                    tempjob['forbidden'].extend( self.hacky_forbidden_nodes )
            ###  Look at this as point of change
            ###  Think:  For node in unreserved nodes
            ###            Choose node from list
            ###            Remove node from unreserved nodes
                resources = hiccup.find_job_location(**job)  #get matching nodes
                if not resources:
            except Exception as err:
      "System:find_job_location: Error %s" % err)
            node_list = []
            # Build a list of appropriate nodes
            for node in resources:
            locations[job["jobid"]] = node_list"System:find_job_location: locations are %s" % locations )
        return locations
    find_job_location = exposed(find_job_location)
    def find_queue_equivalence_classes(self, reservation_dict, \
        Finds equivalent queues
        An equivalent queue is a queue which can run upon the same partition(s)
        For now, with one partition (everything!) this is irrelevant.
        Returns: equiv= [{'reservations': [], 'queues': ['default']}]
        #LOGGER.debug("System:find queue equivalence classes" )
        equiv = []
        #print "Reservation_Dict is: %s" % reservation_dict
        #print "Active_queue_names is %s" % active_queue_names
        #print "Queue assignments are: %s" % self.queue_assignments
        for queue in self.queue_assignments:
            # skip queues that aren't running
            if not queue in active_queue_names:
            found_a_match = False
            #print "Heckle Queue is %s" % queue
            for equ in equiv:
                print "Heckle Equ is %s" % equ
                if equ['data'].intersection(self.queue_assignments[queue]):
                    found_a_match = True
            if not found_a_match:
                equiv.append({'queues': set([queue]),
                                'data': set(self.queue_assignments[queue]),
                                'reservations': set()})
        real_equiv = []
        for eq_class in equiv:
            found_a_match = False
            for equ in real_equiv:
                if equ['queues'].intersection(eq_class['queues']):
                    found_a_match = True
            if not found_a_match:
        equiv = real_equiv
        for eq_class in equiv:
            for res_name in reservation_dict:
                for host_name in reservation_dict[res_name].split(":"):
                    if host_name in eq_class['data']:
            for key in eq_class:
                eq_class[key] = list(eq_class[key])
            del eq_class['data']
        return equiv
    find_queue_equivalence_classes = exposed(find_queue_equivalence_classes)
    def get_partitions(self, locations):
        Work-around to get the cqadm to run a single job on this system
        PRE:  locations is a list of dict of strings of possible node names
        POST:  if good, return locations
                if not good, raise exception and list bad nodes
        logstr = "System:get_partition: "
        hiccup = HeckleConnector()
        heckle_node_set = set(hiccup.list_all_nodes())
        locs = locations[0]['name']
        LOGGER.debug( logstr + "raw is are: %s" % locations )
        LOGGER.debug( logstr + "vals are: %s" % locs )
        if type(locs) == ListType:
            locset = set(locs)
            badlocations = locset.difference(heckle_node_set)
            if badlocations:
                raise Exception(
        logstr + "Bad Locations: %s " % list(badlocations) )
        elif type(locs) == StringType:
            if locs not in locations:
                raise Exception( logstr + "Bad Locations: %s" % locs)
            raise Exception( logstr + 
"location needs to be string or list of strings, you provided %s : %s" \
% ( type(locs), locs))
        return locations
    get_partitions = exposed(get_partitions)
class HeckleSystem(Component):
    Cobalt System component for handling / interacting with Heckle resource manager
    External Methods:
        add_process_groups -- allocates nodes
        get_process_groups -- get process groups based on specs
        signal_process_groups -- signal a process group
        wait_process_groups -- removed process groups based on specs
    Internal Methods:
    Queue Manager Methods:

    name = "system"
    implementation = "HeckleBreadboard"
    queue_assignments = {}

    def __init__(self, *args, **kwargs):
        Component.__init__(self, *args, **kwargs)
        self.process_groups = ProcessGroupDict()
        self.process_groups.item_cls = HeckleProcessGroup
        self.queue_assignments["default"] = self.get_resources()
        self.hacky_forbidden_nodes = [
        ]  #This is a temporary fix for the forbidden nodes issue

    def __repr__(self):
        printout representation of the class
        indict = self.__dict__
        printstr = ""
        printstr += "Heckle System Object: Values"
        for element in indict:
            printstr += str(element) + "::"
            if indict[element] == None:
                printstr += "None, "
                printstr += str(indict[element]) + ", "
        printstr += "   Process Groups:"
        for element in self.process_groups:
            printstr += str(element) + "::" + \
            str(self.process_groups[element]) + ", "
        return printstr

    # Main set of methods
    def add_process_groups(self, specs):
        This function takes the specs (a list of jobs) and initiates each job as
        a process group.
        The process group abstracts the actual job into an object, providing a 
        single point of control and interaction for all the nodes within that job.
        Each job is described by a dict.  Each dict contains:
            kernel: a String, the name of the kernel image to load.
            executable: A string, the name of the command to execute upon the
                head node; this could be considered the actual job's file.
            stdin, stdout, stderr:  Three separate strings, each containing
                the file to use for standard communication with the job as it
                is running.  May be specified, or False.
            kerneloptions: A string containing various options for the kernel,
                or False.
            args: A list
            umask: An integer
            jobid: An integer
            cobalt_log_file: A string containing the log file to use in the
                initiation and running of the job itself.
            location:  List of strings of node / resource names
            env:  A dict of key:value strings, specifying the environment in
                which the job is to run on the node
            id: A number
            cwd:  A string, specifying the current working directory in which
                to run the job on the node
            walltime:  Integer; the time, in minutes, allocated for the job
                to run on the node.
            user:  A string, the name of the user under which this job is to run.
        logstr = "System:add_process_groups:"
        LOGGER.debug(logstr + "Specs are %s" % specs)
        return self.process_groups.q_add(specs)

    add_process_groups = exposed(query(add_process_groups))

    def get_process_groups(self, specs):
        """get a list of existing allocations"""
        LOGGER.debug("System:get_process_groups: specs are %s" % specs)
        return self.process_groups.q_get(specs)

    get_process_groups = exposed(query(get_process_groups))

    def signal_process_groups(self, specs, sig):
        """Free the specified process group (set of allocated nodes)"""
        "System:signal_process_groups: Specs are %s, sig is %s"\
        % (specs, sig) )
        return self.process_groups.q_get(specs, lambda x, y: x.signal(y), sig)

    signal_process_groups = exposed(query(signal_process_groups))

    def wait_process_groups(self, specs):
        """Remove terminated process groups"""
        LOGGER.debug("System:wait_process_groups; specs are %s" % specs)
        return self.process_groups.q_del(specs, lambda x, \

    wait_process_groups = exposed(query(wait_process_groups))

    # Methods for dealing with Process Groups

    def _check_builds_done(self):
        Check to see if the nodes are done building
        Starts the process group if all nodes in them are done building
        #LOGGER.debug( "System:Check Build Done: Waiting to Start..." )
        exstr = "System:check_build_done:"
        retval = True
        pg_list = [x for x in self.process_groups.itervalues()\
        if (len(x.pinging_nodes) > 0)]
        hiccup = HeckleConnector()
        for pgp in pg_list:
            for nodename in pgp.pinging_nodes:
                teststr = hiccup.get_node_bootstate(nodename)
                if teststr == "READY":
                    if 'fakebuild' in pgp.__dict__ and pgp.fakebuild:
                        LOGGER.debug( exstr + "Node %s done building; "\
                             + "%s pinging nodes left" %\
                             ( nodename, len(pgp.pinging_nodes)-1 ) )
                        LOGGER.debug( exstr + "Node %s not done yet" %\
                                          nodename )
                if teststr == "COMPLETED":
                    LOGGER.debug( exstr +
                         "Removing node %s...%i pinging nodes left" \
                              % (nodename, len(pgp.pinging_nodes)-1) )
                elif teststr in ["BOOTING", "", ""]:
                    LOGGER.debug(exstr + "Node %s not done yet." % nodename)
                elif teststr == "UNALLOCATED":
                    raise Exception(
                        exstr +
                        "Node 'UNALLOCATED'; Possible build error, or system timed out."
                elif teststr == "CRITFAIL":
                    raise Exception(
                        exstr +
                        "Node says, 'CRITFAIL'.  It timed out while building.")
                ####     Need to figure a better way to fail gracefully
            if len(pgp.pinging_nodes) == 0:
                "System:Check Build Done: No Pinging Nodes left, Start PG %s Running." \
        % pgp.jobid)
                retval = False
        return retval

    _check_builds_done = automatic(_check_builds_done)

    def _wait(self):
        Calls the process group container's wait() method
        waitlen = len(self.process_groups.keys())
        LOGGER.debug("System:_wait:%s process groups." % waitlen)
        for pgp in self.process_groups.itervalues():
                del (self.hacky_forbidden_nodes[pgp.location])

    _wait = automatic(_wait)

    def _release_resources(self, pgp):
        Releases all the Heckle nodes, unreserving them
        LOGGER.debug("System:Locations are: %s" % pgp.location)
        hiccup = HeckleConnector()
        hiccup.free_reserved_node(uid=pgp.uid, node_list=pgp.location)
            del (self.hacky_forbidden_nodes[pgp.location])

    def get_resources(self, specs=None):
        Returns a list of free resources (nodes) which match the given specs.
        Specs is a dict which describes a job
        LOGGER.debug("System:get Resources")
        ###  Look at this as a future change
        hiccup = HeckleConnector()
        if not specs:
            return hiccup.node_list
            return hiccup.list_available_nodes(**specs)

    get_resources = exposed(query(get_resources))

    # Methods for interacting with scheduler and queue-manager

    def validate_job(self, spec):
        Validates a job for submission
        -- will the job ever run under the current Heckle configuration?
            1)  Validate Kernel
            2)  Validate HW
            3)  Validate Job versus overall
        LOGGER.debug("System:Validate Job: Specs are %s" % spec)
        hiccup = HeckleConnector()
            kernel = spec['kernel']
            valid_kernel = hiccup.validkernel(kernel)
            if not valid_kernel:
                raise Exception("System:Validate Job: Bad Kernel")
            spec['kernel'] = 'default'
            valid_hw = hiccup.validhw(**spec['attrs'])
            if not valid_hw:
                raise Exception("System:Validate Job: Bad Hardware Specs: %s" %
        except Exception as strec:
            raise Exception("System:Validate Job:  Validate Job: %s" % strec)
        #valid_job = hiccup.validjob( **spec )
        #if not valid_job:
        #raise Exception(
        #"System: validate Job:  Never enough nodes")
        #raise Exception("System: validate Job: Never enough nodes")
        return spec

    validate_job = exposed(validate_job)

    def verify_locations(self, location_list):
        Makes sure a location list is valid
        location list is a list of fully qualified strings of node names
        LOGGER.debug("System:validate Job: Verify Locations")
        hiccup = HeckleConnector()
        heckle_set = set(hiccup.list_all_nodes())
        location_set = set(location_list)
        if heckle_set >= location_set:
            return location_list
            not_valid_list = list(location_set.difference(heckle_set))
            raise Exception(
                "System:VerifyLocations: Invalid location names: %s" %

    verify_locations = exposed(verify_locations)

    def find_job_location(self, job_location_args, end_times):
        Finds a group of not-busy nodes in which to run the job
            job_location_args -- A list of dictionaries with info about the job
                jobid -- string identifier
                nodes -- int number of nodes
                queue -- string queue name
                required -- ??
                utility_score -- ??
                threshold -- ??
                walltime -- ??
                attrs -- dictionary of attributes to match against
            end_times -- supposed time the job will end
        Returns: Dictionary with list of nodes a job can run on, keyed by jobid
        locations = {}

        def jobsort(job):
            """Used to sort job list by utility score"""
            return job["utility_score"]


        #Try to match jobs to nodes which can run them
        hiccup = HeckleConnector()
        for job in job_location_args:
            if "attrs" not in job or job["attrs"] is None:
                job["attrs"] = {}
            print "Job is %s" % job
            tempjob = job.copy()
            if self.hacky_forbidden_nodes:
                if 'forbidden' not in tempjob.keys():
                    tempjob['forbidden'] = self.hacky_forbidden_nodes
            ###  Look at this as point of change
            ###  Think:  For node in unreserved nodes
            ###            Choose node from list
            ###            Remove node from unreserved nodes
                resources = hiccup.find_job_location(**
                                                     job)  #get matching nodes
                if not resources:
            except Exception as err:
      "System:find_job_location: Error %s" % err)
            node_list = []
            # Build a list of appropriate nodes
            for node in resources:
            locations[job["jobid"]] = node_list"System:find_job_location: locations are %s" % locations)
        return locations

    find_job_location = exposed(find_job_location)

    def find_queue_equivalence_classes(self, reservation_dict, \
        Finds equivalent queues
        An equivalent queue is a queue which can run upon the same partition(s)
        For now, with one partition (everything!) this is irrelevant.
        Returns: equiv= [{'reservations': [], 'queues': ['default']}]
        #LOGGER.debug("System:find queue equivalence classes" )
        equiv = []
        #print "Reservation_Dict is: %s" % reservation_dict
        #print "Active_queue_names is %s" % active_queue_names
        #print "Queue assignments are: %s" % self.queue_assignments
        for queue in self.queue_assignments:
            # skip queues that aren't running
            if not queue in active_queue_names:
            found_a_match = False
            #print "Heckle Queue is %s" % queue
            for equ in equiv:
                print "Heckle Equ is %s" % equ
                if equ['data'].intersection(self.queue_assignments[queue]):
                    found_a_match = True
            if not found_a_match:
                    'queues': set([queue]),
                    'data': set(self.queue_assignments[queue]),
                    'reservations': set()
        real_equiv = []
        for eq_class in equiv:
            found_a_match = False
            for equ in real_equiv:
                if equ['queues'].intersection(eq_class['queues']):
                    found_a_match = True
            if not found_a_match:
        equiv = real_equiv
        for eq_class in equiv:
            for res_name in reservation_dict:
                for host_name in reservation_dict[res_name].split(":"):
                    if host_name in eq_class['data']:
            for key in eq_class:
                eq_class[key] = list(eq_class[key])
            del eq_class['data']
        return equiv

    find_queue_equivalence_classes = exposed(find_queue_equivalence_classes)

    def get_partitions(self, locations):
        Work-around to get the cqadm to run a single job on this system
        PRE:  locations is a list of dict of strings of possible node names
        POST:  if good, return locations
                if not good, raise exception and list bad nodes
        logstr = "System:get_partition: "
        hiccup = HeckleConnector()
        heckle_node_set = set(hiccup.list_all_nodes())
        locs = locations[0]['name']
        LOGGER.debug(logstr + "raw is are: %s" % locations)
        LOGGER.debug(logstr + "vals are: %s" % locs)
        if type(locs) == ListType:
            locset = set(locs)
            badlocations = locset.difference(heckle_node_set)
            if badlocations:
                raise Exception(logstr +
                                "Bad Locations: %s " % list(badlocations))
        elif type(locs) == StringType:
            if locs not in locations:
                raise Exception(logstr + "Bad Locations: %s" % locs)
            raise Exception( logstr +
                             "location needs to be string or list of strings, you provided %s : %s" \
% ( type(locs), locs))
        return locations

    get_partitions = exposed(get_partitions)
Exemple #8
class HeckleSystem(Component):
     Cobalt System component for handling / interacting with Heckle resource manager
     External Methods:
          add_process_groups -- allocates nodes
          get_process_groups -- get process groups based on specs
          signal_process_groups -- signal a process group
          wait_process_groups -- removed process groups based on specs
     Internal Methods:
     Queue Manager Methods:

    name = "system"
    implementation = "HeckleBreadboard"
    queue_assignments = {}

    def __init__(self, *args, **kwargs):
            "heckle: System: init ... %s ... &&&&&&&&&&&&&&&&&&&&&&&&&&&&&  I am here as well &&&&&&&&&&&&&&&&&&&&&&&&&"
            % threading.current_thread().getName()
        Component.__init__(self, *args, **kwargs)
        self.process_groups = ProcessGroupDict()
        self.process_groups.item_cls = HeckleProcessGroup
        self.resources = ResourceDict()
        self.queue_assignments["default"] = self.resources.keys()
        print "\n\n\n\n"
        print "Queue assignments are: %s" % self.queue_assignments

    def __repr__(self):
          printout representation of the class
        indict = self.__dict__
        printstr = ""
        printstr += "Heckle System Object: Values"
        for element in indict:
            printstr += str(element) + "::"
            if indict[element] == None:
                printstr += "None, "
                printstr += str(indict[element]) + ", "
        printstr += "   Process Groups:"
        for element in self.process_groups:
            printstr += str(element) + "::" + str(self.process_groups[element]) + ", "
        return printstr

    # Main set of methods
    def add_process_groups(self, specs):
          Allocate nodes and add the list of those allocated to the PGDict
          specs is a list of dictionaries
          Each dictionary contains the specifications for all the nodes in the process group
        # Debug - Take out to really rebuild
        ####    Need to check the environment variable for fakebuild
            specs[0]["fakebuild"] = specs[0]["env"]["fakebuild"]
            del specs[0]["env"]["fakebuild"]
        print "Heckle System:  add_process_groups: <<<<<<<<<<<<<<<<<<          OK< Debug< This< :  %s" % specs
        HICCUP = HeckleConnector()
        # try:
        reservation = HICCUP.make_reservation(**(specs[0]))
        heckle_res_id =
        uid = specs[0]["user"]
        logger.debug("Heckle System: heckle_res_id = %i" % heckle_res_id)
        specs[0]["heckle_res_id"] = heckle_res_id
        return self.process_groups.q_add(specs, lambda x, _: self._start_pg(x, heckle_res_id=heckle_res_id, uid=uid))
        # except Exception as hec_except:
        ## could do something here about problems
        ##    1)  Kill job, then resubmit job w/o node name(s)
        ##         Would require access to cqadm via api
        ##    2)  Put job / node in fail state
        ##    3)  Simply fail
        # raise Exception("Heckle System Object: add_process_groups: %s" % hec_except)

    add_process_groups = exposed(query(add_process_groups))

    def get_process_groups(self, specs):
        """get a list of existing allocations"""
        # logger.debug( "Heckle System: get_process_groups" )
        return self.process_groups.q_get(specs)

    get_process_groups = exposed(query(get_process_groups))

    def signal_process_groups(self, specs, sig):
        """Free the specified process group (set of allocated nodes)"""
        logger.debug("Heckle System: signal_process_groups: Specs are %s, sig is %s" % (specs, sig))
        return self.process_groups.q_get(specs, lambda x, y: x.signal(y), sig)

    signal_process_groups = exposed(query(signal_process_groups))

    def wait_process_groups(self, specs):
        """Remove terminated process groups"""
        logger.debug("Heckle System: wait_process_groups; specs are %s" % specs)
        return self.process_groups.q_del(specs, lambda x, _: self._release_resources(x))

    wait_process_groups = exposed(query(wait_process_groups))

    # Methods for dealing with Process Groups

    def _start_pg(self, pgp, heckle_res_id, uid):
          Populates the process group with its resources
               gets node information for nodes in process group
               Updates those attributes
               Places nodes in the pinging nodes list, to see if they're built
        logger.debug("Heckle System: start_pg: PGP is %s" % pgp)
        nodelist = pgp.location
        for node in nodelist:
            node_attributes = self.resources[node]
            node_attributes["mac"] = node_attributes["mac"].replace("-", ":")
            node_attributes["heckle_res_id"] = heckle_res_id
            pgp.resource_attributes[node] = node_attributes._get_dict()
        pgp.uid = uid

    add_process_groups = exposed(query(add_process_groups))

    def _check_builds_done(self):
          Check to see if the nodes are done building
          Starts the process group if all nodes in them are done building
        # logger.debug( "heckle: System: Check Build Done: Waiting to Start..." )
        # sleep(20)
        retval = True
        pg_list = [x for x in self.process_groups.itervalues() if (len(x.pinging_nodes) > 0)]
        for pgp in pg_list:
            for nodename in pgp.pinging_nodes:
                teststr = self.resources[nodename]["bootstate"]
                if teststr == "COMPLETED":
                        "heckle: System: Check Build Done: Removing node %s...%i pinging nodes left"
                        % (nodename, len(pgp.pinging_nodes) - 1)
                elif teststr in ["BOOTING", "", ""]:
                    logger.debug("Heckle System: Check Build Done: Node %s not done yet." % nodename)
                elif teststr == "UNALLOCATED":
                    raise Exception(
                        "HIC_SO: _check_builds_done: Node says, 'UNALLOCATED'.  Possible build error, or system timed out."
                elif teststr == "CRITFAIL":
                    raise Exception("HIC_SO: _check_builds_done: Node says, 'CRITFAIL'.  It timed out while building.")
                    ####      Need to figure a better way to fail gracefully on this one...
                elif teststr == "READY":
                    raise Exception(
                        "HIC_SO: _check_builds_done: Node says, 'READY'.  The Heckle Reservation is already ready already, skipping pinging."
            if len(pgp.pinging_nodes) == 0:
                logger.debug("Heckle System: Check Build Done: No Pinging Nodes left, Start PG Running.")
                retval = False
        return retval

    _check_builds_done = automatic(_check_builds_done)

    def _wait(self):
          Calls the process group container's wait() method
        # logger.debug( "Heckle System: wait" )
        for pgp in self.process_groups.itervalues():

    _wait = automatic(_wait)

    def _release_resources(self, pgp):
          Releases all the Heckle nodes, unreserving them
        logger.debug("Heckle System: Release %s" % pgp.location)
        # self.resources[pgp.location]['action']='Free'
        HICCUP = HeckleConnector()
        HICCUP.free_reserved_node(uid=pgp.uid, node_list=pgp.location)

    def get_resources(self, specs={}):
          Returns a list of names for all the FREE resources (nodes) which match the given specs.
        logger.debug("Heckle System: get Resources, specs are %s" % specs)
        ###  Look at this as a future change
        specs["current reservation"] = 9999999
        specs["allocatable"] = "True"
        res_list = self.resources >= specs
        logger.debug("Heckle System: get Resources, resources are %s" % res_list)
        return res_list

    get_resources = exposed(query(get_resources))

    # Methods for interacting with scheduler and queue-manager

    def validate_job(self, spec):
          Validates a job for submission
          -- will the job ever run under the current Heckle configuration?
               1)  Validate Kernel
               2)  Validate HW
               3)  Validate Job versus overall
        logger.debug("Heckle System: Validate Job: Specs are %s" % spec)
            checklist = spec["attrs"]
            checklist = {}
        # del(checklist['action'])
            nodecount = spec["nodecount"]
            nodecount = 1
        glossary = self.resources.glossary
        dnelist = []  # for attributes which do not exist in glossary
        badlist = []  # for attributes in glossary which do not exist
        ###  Look at this as a future change
        ###  Think:  Refresh Resources Info
        # 1st step:  Are there enough nodes at all?
        if nodecount >= self.resources.node_count():
            raise Exception(
                "Validate Job: Not enough nodes; Requested %s, only have %s in the system."
                % (nodecount, self.resources.nodecount())
        for att in checklist:
            val = checklist[att]
                if val in glossary[att]:
                    badlist.append("%s:%s" % (att, val))  # Bad attribute
                dnelist.append(att)  # Attribute does not exist
            checklist["current reservation"] = 9999999
            checklist["allocatable"] = "True"
            retlist = self.resources >= checklist
            retcount = len(retlist)
            goodlen = retcount >= nodecount
            if goodlen and not badlist and not dnelist:
                return spec  # Good Job!
                retstr = "Validate Job: "
                if badlist or dnelist:
                    if badlist:
                        restr += "No value for attribute: %s" % badlist
                    if dnelist:
                        retstr += "Attributes Do Not Exist: %s" % dnelist
                    retstr += "Need %s nodes, only have %s nodes:  %s" % (nodecount, retcount, retlist)
                raise Exception(retstr)
        return spec

    validate_job = exposed(validate_job)

    def verify_locations(self, location_list):
          Makes sure a location list is valid
          location list is a list of fully qualified strings of node names
        logger.debug("heckle: System: Validate Job: Verify Locations")
        return location_list in self.resources.glossary

    verify_locations = exposed(verify_locations)

    def find_job_location(self, job_location_args, end_times):
          Finds a group of not-busy nodes in which to run the job
               job_location_args -- A list of dictionaries with info about the job
                    jobid -- string identifier
                    nodes -- int number of nodes
                    queue -- string queue name
                    required -- ??
                    utility_score -- ??
                    threshold -- ??
                    walltime -- ??
                    attrs -- dictionary of attributes to match against
               end_times -- supposed time the job will end
          Returns: Dictionary with list of nodes a job can run on, keyed by jobid
        logger.debug("heckle: System: find_job_location")
        locations = {}

        def jobsort(job):
            """Used to sort job list by utility score"""
            return job["utility_score"]

        # Try to match jobs to nodes which can run them
        for job in job_location_args:
            if "attrs" not in job or job["attrs"] is None:
                attrs = {}
                attrs = job["attrs"]
            attrs["current reservation"] = 9999999
            attrs["allocatable"] = "True"
            nodecount = int(job["nodes"])
            print "Heckle System: Find Job Location: Job is %s" % job
            ###  Look at this as point of change
            ###  Think:  For node in unreserved nodes
            ###            Choose node from list
            ###            Remove node from unreserved nodes
            print "Heckle System: Find Job Location: Free Nodes is %s" % self.resources.getfreenodes()
            nodelist = self.resources >= attrs  # get Matching Node
            print "Nodelist at this stage is %s" % nodelist
            if len(nodelist) >= nodecount:
                print "Nodecount = %s" % nodecount
                retlist = nodelist[:nodecount]
                print "Heckle System: Find Job Location: Remaining nodelist is %s" % retlist
                raise Exception("Heckle System: find_job_locations: Not Enough matching Nodes Available")
            locations[job["jobid"]] = retlist
            print "Locations is now: %s" % locations"heckle: find_job_location: locations are %s" % locations)
        return locations

    find_job_location = exposed(find_job_location)

    def find_queue_equivalence_classes(self, reservation_dict, active_queue_names):
        Finds equivalent queues
        An equivalent queue is a queue which can run upon the same partition(s)
        For now, with one partition (everything!) this is irrelevant.
        Returns: equiv= [{'reservations': [], 'queues': ['default']}]
        logger.debug("Heckle System: find queue equivalence classes")
        equiv = []
        # print "Reservation_Dict is: %s" % reservation_dict
        # print "Active_queue_names is %s" % active_queue_names
        # print "Queue assignments are: %s" % self.queue_assignments
        for queue in self.queue_assignments:
            # skip queues that aren't running
            if not queue in active_queue_names:
            found_a_match = False
            print "Heckle Queue is %s" % queue
            for equ in equiv:
                print "Heckle Equ is %s" % equ
                if equ["data"].intersection(self.queue_assignments[queue]):
                    found_a_match = True
            if not found_a_match:
                    {"queues": set([queue]), "data": set(self.queue_assignments[queue]), "reservations": set()}
        real_equiv = []
        for eq_class in equiv:
            found_a_match = False
            for equ in real_equiv:
                if equ["queues"].intersection(eq_class["queues"]):
                    found_a_match = True
            if not found_a_match:
        equiv = real_equiv
        for eq_class in equiv:
            for res_name in reservation_dict:
                for host_name in reservation_dict[res_name].split(":"):
                    if host_name in eq_class["data"]:
            for key in eq_class:
                eq_class[key] = list(eq_class[key])
            del eq_class["data"]
        return equiv

    find_queue_equivalence_classes = exposed(find_queue_equivalence_classes)

    def get_partitions(self, locations):
          Work-around to get the cqadm to run a single job on this system
          PRE:  locations is a list of dict of strings of possible node names
          POST:  if good, return locations
                 if not good, raise exception and list bad nodes
        nodelist = self.resources.Glossary.nodelist
        logger.debug("Heckle System: get_partitions: raw is are: %s" % locations)
        logger.debug("Heckle System: get_partitions: vals are: %s" % locs)
        if locations in nodelist:
            return locations
            raise Exception("heckle: System: get_partition: Bad Locations: %s " % badlocations)

    get_partitions = exposed(get_partitions)