class GlobalVars():
    def __init__(self):
        self.pbs_query = PBSQuery()
        self.job_type = ""
        self.idle_jobs = []
        self.total_jobs = 0.0
        self.job_list_length = 0.0
        self.current_time = mktime(strptime(ctime(time())))  # in seconds
        self.current_year = ctime(time()).split(" ")[4]
        self.system = self.pbs_query.get_server_name().partition("-")[0]
        self.job_server = self.pbs_query.get_server_name()

        self.word = sys.maxsize  # double check word/gb conversions ...
        self.gb_from_mw = 1/(64.0**3)
        self.gb_from_kw = 1/(64.0**2)
        self.gb_from_w = 1/64.0
        self.gb_from_mb = 1/1024.0
        self.gb_from_kb = self.gb_from_mb/1024.0
        self.gb_from_b = self.gb_from_kb/1024.0
        
        self.available_licenses = self.find_available_licenses() 
        self.showres = check_output(["showres"]).strip("\n").split("\n") 

    def find_available_licenses(self):
        # Find all available licenses on the system

        query_flexlm = check_output(["/usr/local/sbin/query-flexlm"]).rstrip("\n").partition("ARES=")[2].split(",")
        self.available_licenses = {}

        for lic in query_flexlm:
            self.available_licenses[lic.partition(":")[0]] = int(lic.partition(":")[2])
Esempio n. 2
0
    def is_running(self, queue_id):
        """Must return True/False whether the job is in the queue or not
            respectively.

        Input:
            queue_id: Unique identifier for a job.
        
        Output:
            in_queue: Boolean value. True if the job identified by 'queue_id'
                        is still running.
        """
        batch = PBSQuery.PBSQuery().getjobs()
        return (queue_id in batch)
Esempio n. 3
0
def get_nodes(jobs):

    p = PBSQuery.PBSQuery()
    nodes = list()

    for job in jobs:
        exec_hosts = p.getjob(job, ['exec_host'])
        if not exec_hosts or 'exec_host' not in exec_hosts:
            continue
        for exec_host in exec_hosts['exec_host'][0].split('+'):
            hostname = exec_host.split('/')[0]
            if hostname not in nodes:
                nodes.append(hostname)
    return nodes
Esempio n. 4
0
    def _get_current_notes(self, nodes):
        '''A function to retrieve the current message'''
        if ARGS_VERBOSE:
            _print('class:SaraNodes func:_get_current_notes input:%s' %
                   str(nodes),
                   file=sys.stderr)

        pbsq = PBSQuery.PBSQuery()
        rdict = dict()

        ## We are only intereseted in the note
        for node, properties in pbsq.getnodes(['note']).items():
            if node in nodes and properties.has_key('note'):
                rdict[node] = properties['note']
        return rdict
Esempio n. 5
0
def main(job_id):
    '''Monitor a job_id and run job_cleanup when the elapsed_time is within exit_time seconds of the wall_time.'''
    p=PBSQuery.PBSQuery()
    if not p.getjob(job_id):
        raise Exception,'invalid job id %s.' % job_id
    job=p.getjob(job_id)
    wall_time=hhmmss_to_seconds(job[job_id]['Resource_List.walltime'])
    try:
        sleep_time=wall_time-options.exit_time
    except NameError:
        # Try to use the default in case watchdog is being used as a module.
        sleep_time=wall_time-exit_time
    print 'Watchdog sleeping for %i' % (sleep_time)
    time.sleep(sleep_time)
    job_cleanup()
    sys.exit
Esempio n. 6
0
        def print_list(self, args, options):
                '''
                A method that is used for collecting all nodes with the state down, offline or unknown
                '''

                
                p = PBSQuery.PBSQuery()
                if pbs.version_info >= ( 4,0,0 ):
                        if self.obj_sara_nodes.verbose:
                                print "Enabling new_data_structure for PBSQuery"
                        p.new_data_structure()

                try:
                    nodes = p.getnodes( ['state', 'note'] )
                except PBSQuery.PBSError, detail:
                    print "PBSQuery error: %s" %detail
                    sys.exit(1)
Esempio n. 7
0
    def __init__(self, pbs_server=None):
        self.pbsq = None
        retry = 0
        cached_exception = None
        while not self.pbsq and retry < _MAX_RETRY:
            try:
                self.pbsq = PBSQuery.PBSQuery(server=pbs_server)
            except PBSQuery.PBSError as e:
                cached_exception = e
                retry += 1
                time.sleep(retry ** 2)

        if not self.pbsq:
            if cached_exception:
                raise civet_exceptions.CivetException(cached_exception.message)
            else:
                raise civet_exceptions.CivetException("Unable to instantiate PBSQuery instance, unknown error.")
        self.pbs_server = pbs_server
    def __init__(self):
        self.pbs_query = PBSQuery()
        self.job_type = ""
        self.idle_jobs = []
        self.total_jobs = 0.0
        self.job_list_length = 0.0
        self.current_time = mktime(strptime(ctime(time())))  # in seconds
        self.current_year = ctime(time()).split(" ")[4]
        self.system = self.pbs_query.get_server_name().partition("-")[0]
        self.job_server = self.pbs_query.get_server_name()

        self.word = sys.maxsize  # double check word/gb conversions ...
        self.gb_from_mw = 1/(64.0**3)
        self.gb_from_kw = 1/(64.0**2)
        self.gb_from_w = 1/64.0
        self.gb_from_mb = 1/1024.0
        self.gb_from_kb = self.gb_from_mb/1024.0
        self.gb_from_b = self.gb_from_kb/1024.0
        
        self.available_licenses = self.find_available_licenses() 
        self.showres = check_output(["showres"]).strip("\n").split("\n") 
Esempio n. 9
0
    def status(self):
        """Return a tuple of number of jobs running and queued for the pipeline

        Inputs:
            None

        Outputs:
            running: The number of pipeline jobs currently marked as running 
                        by the queue manager.
            queued: The number of pipeline jobs currently marked as queued 
                        by the queue manager.
        """
        numrunning = 0
        numqueued = 0
        batch = PBSQuery.PBSQuery().getjobs()
        for j in batch.keys():
            if batch[j]['Job_Name'][0].startswith(self.job_basename):
                if 'R' in batch[j]['job_state']:
                    numrunning += 1
                elif 'Q' in batch[j]['job_state']:
                    numqueued += 1
        return (numrunning, numqueued)
Esempio n. 10
0
    def delete(self, queue_id):
        """Remove the job identified by 'queue_id' from the queue.

        Input:
            queue_id: Unique identifier for a job.
        
        Output:
            None
            
            *** NOTE: A pipeline_utils.PipelineError is raised if
                        the job removal fails.
        """
        cmd = "qsig -s SIGINT %s" % queue_id
        pipe = subprocess.Popen(cmd, shell=True)

        # Wait a few seconds a see if the job is still being tracked by
        # the queue manager, or if it marked as exiting.
        time.sleep(5)
        batch = PBSQuery.PBSQuery().getjobs()
        if (queue_id in batch) and ('E' not in batch[queue_id]['job_state']):
            errormsg = "The job (%s) is still in the queue " % queue_id
            errormsg += "and is not marked as exiting (status = 'E')!\n"
            raise pipeline_utils.PipelineError(errormsg)
Esempio n. 11
0
    def _get_submit_node(self):
        """Return the name of the node to submit to.

            Inputs:
                None

            Output:
                node: The name of the node that the next job should be submitted to.
        """
        batch = PBSQuery.PBSQuery()
        nodes = batch.getnodes_with_property(self.property)
        max_cpus_free = -1
        node = None
        for n in nodes.keys():
            num_jobs = len(nodes[n].setdefault('jobs', []))
            if (nodes[n]['state'] != ['free']) or \
                (num_jobs >= self.max_jobs_per_node):
                continue
            cpus_free = int(nodes[n]['np'][0]) - num_jobs
            if cpus_free > max_cpus_free:
                node = n
                max_cpus_free = cpus_free
        return node
Esempio n. 12
0
        def note( self, node, note_attr ):
                '''
                This method combines all note methods and returns the new note
                '''
                p = PBSQuery.PBSQuery()
                p.new_data_structure()
                pbs_info = p.getnode( node )
                
                pre_parts = list()
                old_note = None
                new_note = None

                if pbs_info.has_key( 'note' ):
                        pbs_note = pbs_info[ 'note' ]
                        if len( pbs_note ) > 4:
                                pre_parts = pbs_note[:4]
                                old_note = ', '.join( pbs_note[4:] )

                                pre_parts[1] = self.create_date()
                                pre_parts[2] = self.note_return_username( pre_parts[2] )

                else:
                        pre_parts = self.note_init()

                if note_attr.has_key( 'ticket' ):
                	pre_parts[3] = self.note_check_ticket( note_attr['ticket'], pre_parts[3] )

                if note_attr.has_key( 'note' ) and note_attr.has_key( 'mode' ):
                        if note_attr[ 'note' ] and note_attr[ 'mode' ] in [ 'a','w' ]:
                                if old_note:
                                        new_note = self.note_create( note_attr[ 'note' ], note_attr[ 'mode' ], old_note )
                                else:
                                        new_note = self.note_create( note_attr[ 'note' ], note_attr[ 'mode' ] )
                        else:
                                new_note = old_note

                return '%s,%s' % ( ','.join( pre_parts ), new_note )
Esempio n. 13
0
def print_get_nodes(hosts=None):
    '''This function retrieves the information from your batch environment'''
    if ARGS_VERBOSE:
        _print('func:print_get_nodes input:%s' % str(hosts), file=sys.stderr)

    ## there are 2 possible filters, by hostname, or by state
    pbsq = PBSQuery.PBSQuery()
    split_1 = dict()
    split_2 = dict()

    if ARGS_VERBOSE:
        _print('func:print_get_nodes fetching node information',
               file=sys.stderr)
    ## We ask from the batch all nodes, and with the properties state and note
    for host, properties in pbsq.getnodes(['state', 'note']).items():
        do_host = None
        ## Check if the current host matches our criterium (given with the arguments
        ## or has the allowed state)
        if hosts and host in hosts:
            do_host = host
        elif not hosts:
            ## Do a intersection on both set's, if there is a match, then the host is allowed
            if bool(ALLOWED_STATES.intersection(set(properties.state))):
                do_host = host

        ## when we have a do_host (means matches our criterium) then sort
        ## them by basename
        if do_host:
            if SPLIT_SORT and re.findall(SPLIT_SORT, do_host):
                split_1[host] = properties
            else:
                split_2[host] = properties

    if ARGS_VERBOSE:
        _print('func:print_get_nodes returning values', file=sys.stderr)
    return split_1, split_2
Esempio n. 14
0
#!/usr/bin/env python
import os, subprocess, shutil, glob, time, datetime, pytz, config, utils, database, PBSQuery, time
prestodir = os.environ["PRESTO"]

#checkpoints = glob.glob(os.path.join(config.jobsdir, "*.checkpoint"))
checkpoints = []

queue = PBSQuery.PBSQuery()

print("Starting GBNCC job submitter...")

while True:
    print("Connecting to database")
    db = database.Database("observations")
    query = "SELECT ProcessingID,FileName FROM GBNCC WHERE "\
            "ProcessingStatus='i'"
    print("Updating job states")
    db.execute(query)
    ret = db.fetchall()
    if len(ret) != 0:
        print("Getting all currently running jobs")
        alljobs = queue.getjobs()
        if alljobs is not None:
            for jobid, filenm in ret:
                if alljobs.has_key(str(jobid)):
                    if alljobs[str(jobid)]["job_state"][0] == "R":
                        nodenm = alljobs[str(jobid)]["exec_host"][0]
                        jobnm = alljobs[str(jobid)]["Job_Name"][0]
                        #checkpoint = os.path.join(config.jobsdir, jobnm+".checkpoint")
                        #with open(checkpoint, "w") as f:
                        #    f.write(nodenm+"\n")
Esempio n. 15
0
        def pbs_batch( self, nodes, attrs=None, note_attributes=None ):
                nodeserror = list()
                if not attrs and not note_attributes:
                        raise sara_nodesException, 'attrs and note_attributes can not be empty together!'

                if not self.dryrun:
                        if note_attributes and len( note_attributes ) == 3:
                                if attrs:
                                        attributes = attrs + pbs.new_attropl(1)
                                        attributes[1].name = pbs.ATTR_NODE_note
                                        attributes[1].op = pbs.SET
                                else:
                                        attributes = pbs.new_attropl(1)
                                        attributes[0].name = pbs.ATTR_NODE_note
                                        attributes[0].op = pbs.SET
                        else:
                                attributes = attrs
                        # Some hacking here because some limitation in the Torque 2.4 version
                        # fetching note data first for all nodes!
                        tmp_node_note = dict()

                        for node in nodes:
                                if note_attributes and len( note_attributes ) == 3:
                                        tmp_node_note[ node ] = self.note( node, note_attributes )

                        pbs_server = pbs.pbs_default()

                        if not pbs_server:
                                raise sara_nodesException, 'Default pbs server not found!'

                        pbs_connection = pbs.pbs_connect( pbs_server )
                        for node in nodes:
                                if note_attributes and len( note_attributes ) == 3:
                                        try:
                                                if attrs:
                                                        attributes[1].value = tmp_node_note[ node ]
                                                else:
                                                        attributes[0].value = tmp_node_note[ node ]
                                        except KeyError:
                                                pass
                                rcode = pbs.pbs_manager( pbs_connection, pbs.MGR_CMD_SET, pbs.MGR_OBJ_NODE, node, attributes, 'NULL' )
                                if rcode > 0:
                                        errno, text = pbs.error()
                                        nodeserror.append( '%s: %s (%s)' % ( node, text, errno ) )
                else:
                        p = PBSQuery.PBSQuery()
                        pbsnodes = p.getnodes().keys()

                        print '%*s:' % ( 7, 'Nodes' ),
                        firstitem = True

                        for node in nodes:

                                if node in pbsnodes:
                                        if firstitem:
                                                print '%s' % node
                                                firstitem = False
                                        else:
                                                print '%*s' % ( 17, node )
                                else:
                                        nodeserror.append( '%s: does not exist' % node )

                if len( nodeserror ) > 0:
                        raise sara_nodesException, nodeserror
Esempio n. 16
0
 def __init__(self, pbs_server=None):
     self.pbsq = PBSQuery.PBSQuery(server=pbs_server)
     self.pbs_server = pbs_server