예제 #1
0
 def check_all_jobs(self):
     """
     Returns a list of servers that failed to be contacted and a dict
     of "job_id : status" pairs (where status is a bunchified version
     of the API's structure.
     """
     servers = []
     failures = []
     statuses = {}
     for pbs_job_state in self.watched:
         pbs_server_name = self.__get_pbs_server(
             pbs_job_state.job_destination.params)
         if pbs_server_name not in servers:
             servers.append(pbs_server_name)
         pbs_job_state.check_count += 1
     for pbs_server_name in servers:
         c = pbs.pbs_connect(util.smart_str(pbs_server_name))
         if c <= 0:
             log.debug(
                 "connection to PBS server %s for state check failed" %
                 pbs_server_name)
             failures.append(pbs_server_name)
             continue
         stat_attrl = pbs.new_attrl(3)
         stat_attrl[0].name = pbs.ATTR_state
         stat_attrl[1].name = pbs.ATTR_used
         stat_attrl[2].name = pbs.ATTR_exitstat
         jobs = pbs.pbs_statjob(c, None, stat_attrl, None)
         pbs.pbs_disconnect(c)
         statuses.update(self.convert_statjob_to_bunches(jobs))
     return ((failures, statuses))
예제 #2
0
파일: stat_job.py 프로젝트: raidenfox/nomad
def locate_attribs(id_conn, id_job):
    code = []
    attribs = []
    jobs = pbs.pbs_statjob(id_conn, id_job, "NULL", "NULL")
    i = 0
    index = len(jobs)
    for job in jobs:
        print job.name,"\n"
        attrib = job.attribs
        y = 0
        while i < index:
            lenght_a = len(attrib)
            while y < lenght_a:
                attribs.append(attrib[y].name)
                code.append(y)
                y = y + 1
            
            i = i+1
    
    
    #momentaneo - definire una ADT per memorizzare dianmicamente i codici degli attributi
    code_len = len(attribs)
    x = 0
    while x < code_len:
        print attribs[x], "->", code[x], "<-" ,jobs[0].attribs[x].value
        x = x + 1
    
    return attribs
예제 #3
0
파일: pbs.py 프로젝트: AAFC-MBB/galaxy-1
 def check_all_jobs( self ):
     """
     Returns a list of servers that failed to be contacted and a dict
     of "job_id : status" pairs (where status is a bunchified version
     of the API's structure.
     """
     servers = []
     failures = []
     statuses = {}
     for pbs_job_state in self.watched:
         pbs_server_name = self.__get_pbs_server(pbs_job_state.job_destination.params)
         if pbs_server_name not in servers:
             servers.append( pbs_server_name )
         pbs_job_state.check_count += 1
     for pbs_server_name in servers:
         c = pbs.pbs_connect( util.smart_str( pbs_server_name ) )
         if c <= 0:
             log.debug("connection to PBS server %s for state check failed" % pbs_server_name )
             failures.append( pbs_server_name )
             continue
         stat_attrl = pbs.new_attrl(3)
         stat_attrl[0].name = pbs.ATTR_state
         stat_attrl[1].name = pbs.ATTR_used
         stat_attrl[2].name = pbs.ATTR_exitstat
         jobs = pbs.pbs_statjob( c, None, stat_attrl, None )
         pbs.pbs_disconnect( c )
         statuses.update( self.convert_statjob_to_bunches( jobs ) )
     return( ( failures, statuses ) )
예제 #4
0
def update_all_jobs(batchserver_name):
    """ Update info about all jobs of the given batchserver.
    """
    server,created = getBatchServer(batchserver_name)
    if not pbs_data_jobs.has_key(batchserver_name):
        pbs_data_jobs[batchserver_name] = {'last_update':None, 'jobs':{}}

    if pbs_data_jobs[batchserver_name]['last_update'] and (datetime.datetime.now()-pbs_data_jobs[batchserver_name]['last_update']).total_seconds()<GlobalConfiguration.objects.get(pk=1).max_lastupdate:
        logging.debug("jobs info is new enough for server: %s" % batchserver_name)
        print "not updated"
        return pbs_data_jobs
    print "updated"

    conn = pbs.pbs_connect(batchserver_name.encode('iso-8859-1', 'replace'))
    if conn==-1:
        logging.error("Cannot connect to %s - live data will be missing" % server.name)
        return
    statjobs = pbs.pbs_statjob(conn, "" , [], "")
    pbs.pbs_disconnect(conn)

    for sj in statjobs:
        jobid = sj.name
        attr_dict = dict([ (x.name,x.value) for x in sj.attribs])
        attr_dict = {}
        for x in sj.attribs:
            if x.resource:
                attr_dict[x.name+"_"+x.resource] = x.value
            else:
                attr_dict[x.name] = x.value

        pbs_data_jobs[batchserver_name]['jobs'][jobid] = update_one_job_from_pbs_data(jobid, attr_dict)
        pbs_data_jobs[batchserver_name]['last_update'] = datetime.datetime.now()

    return pbs_data_jobs
예제 #5
0
파일: stat_job.py 프로젝트: raidenfox/nomad
def statjob_detailed(id_conn,id_job):
    result = []
    jobs = pbs.pbs_statjob(id_conn, id_job, "NULL", "NULL")
    for job in jobs:
        print job.name,"\n"
        for attrib in job.attribs:
            print attrib.name,attrib.resource,attrib.value
            
    return result
예제 #6
0
파일: stat_job.py 프로젝트: raidenfox/nomad
def resource_name_forjob(id_conn,id_job,id_resource):
    result = ""
    jobs = pbs.pbs_statjob(id_conn, id_job, "NULL", "NULL")
    for job in jobs:
            attrib = job.attribs
            for i in id_resource:
                result = attrib[i].resource
            
    return result
예제 #7
0
파일: stat_job.py 프로젝트: raidenfox/nomad
def get_attribs(id_conn,id_job):
    attr = {}
    i = 0
    job = pbs.pbs_statjob(id_conn, id_job, "NULL", "NULL")
    while i < len(job[0].attribs):
        attr[str(job[0].attribs[i].name)] = job[0].attribs[i].value
        i = i + 1
        
    attr['lenght_resources'] = len(job[0].attribs)+1
    return attr
예제 #8
0
 def get_status(self):
     if self.status == main.job.JOB_STATUS.FAIL:
         return self.status
     if not self.jobid:
         return PBS_STATUS['Q']
     stats = pbs.pbs_statjob(self.connect(), self.jobid, self.attrl, "NULL")
     status = stats[0].attribs[0].value
     self.disconnect()
     if status in PBS_STATUS:
         return PBS_STATUS[status]
     return PBS_STATUS['Q']
예제 #9
0
파일: stat_job.py 프로젝트: raidenfox/nomad
def statjob(id_conn,id_job):
    result = []
    jobs = pbs.pbs_statjob(id_conn, id_job, "NULL", "NULL")
    nodes = pbs.pbs_statnode(id_conn, "", "NULL", "NULL")
    for job in jobs:
        for attrib in job.attribs:
            for node in nodes:
                if string.find(attrib.value,node.name) != -1:
                    result.append(node.name)

    return result
    def run_cluster(self, pbs_server, job_script, settings):

        import pbs
        from threading import threa

        self.settings = copy.deepcopy(settings)
        # Launch script, wait for output to come back, return when it does

        # Create the job options struct
        attropl = pbs.new_attropl(4)

        # Set the name of the job
        #
        attropl[0].name = pbs.ATTR_N
        attropl[0].value = "inferno_" + self.name

        # Job is Rerunable
        #
        attropl[1].name = pbs.ATTR_r
        attropl[1].value = "y"

        # Walltime
        #
        attropl[2].name = pbs.ATTR_l
        attropl[2].resource = "walltime"
        attropl[2].value = "400"

        # Nodes
        #
        attropl[3].name = pbs.ATTR_l
        attropl[3].resource = "nodes"
        attropl[3].value = "1:ppn=4"

        # Run the job
        if pbs_server == None:
            pbs_server = pbs.pbs_default()
        job_id = pbs.pbs_submit(pbs_server, attropl, job_script, "NULL", "NULL")

        e, e_txt = pbs.error()
        if e:
            print e, e_txt

        # Save the job ID for later so we can check on the status
        self.job_id = job_id

        # TODO: Change this
        # Now loop, checking every 5 seconds or so if the job is done by
        # polling the pbs_server about the jobid.
        running = True
        while running:
            job_info = pbs.pbs_statjob(pbs_server, self.job_id, "NULL", "NULL")
            print job_info
            time.sleep(5)
예제 #11
0
    def _statjob(self, job_name='', attrib_list=None):
        """Get the job config from the pbs server"""
        if attrib_list:
            self._list_2_attrib(attrib_list)
        else:
            self.attribs = 'NULL'

        self._connect()
        jobs = pbs.pbs_statjob(self.con, job_name, self.attribs, 'NULL')
        self._disconnect()

        self._list_2_dict(jobs, job)
예제 #12
0
	def _statjob(self, job_name='', attrib_list=None):
		"""Get the job config from the pbs server"""
		if attrib_list:
			self._list_2_attrib(attrib_list)
		else:
			self.attribs = 'NULL' 
			
		self._connect()
		jobs = pbs.pbs_statjob(self.con, job_name, self.attribs, 'NULL')
		self._disconnect() 
		
		self._list_2_dict(jobs, job)
예제 #13
0
    def info(self, types=None):
        """
        Return jobinfo
        """
        if not self.jobid:
            self.log.debug("no jobid, job is not submitted yet?")
            return None

        # convert single type into list
        if type(types) is str:
            types = [types]

        self.log.debug("Return info types %s" % types)

        # create attribute list to query pbs with
        if types is None:
            jobattr = NULL
        else:
            jobattr = pbs.new_attrl(len(types))
            for idx, attr in enumerate(types):
                jobattr[idx].name = attr

        # get a new connection (otherwise this seems to fail)
        if self.clean_conn:
            pbs.pbs_disconnect(self.pbsconn)
            self.pbsconn = pbs.pbs_connect(self.pbs_server)
        jobs = pbs.pbs_statjob(self.pbsconn, self.jobid, jobattr, NULL)
        if len(jobs) == 0:
            # no job found, return None info
            res = None
            self.log.debug(
                "No job found. Wrong id %s or job finished? Returning %s" %
                (self.jobid, res))
            return res
        elif len(jobs) == 1:
            self.log.debug("Request for jobid %s returned one result %s" %
                           (self.jobid, jobs))
        else:
            self.log.error(
                "Request for jobid %s returned more then one result %s" %
                (self.jobid, jobs))

        # only expect to have a list with one element
        j = jobs[0]
        # convert attribs into useable dict
        job_details = dict([(attrib.name, attrib.value)
                            for attrib in j.attribs])
        # manually set 'id' attribute
        job_details['id'] = j.name
        self.log.debug("Found jobinfo %s" % job_details)
        return job_details
예제 #14
0
 def check_single_job( self, pbs_server_name, job_id ):
     """
     Returns the state of a single job, used to make sure a job is
     really dead.
     """
     c = pbs.pbs_connect( util.smart_str( pbs_server_name ) )
     if c <= 0:
         log.debug("connection to PBS server %s for state check failed" % pbs_server_name )
         return None
     stat_attrl = pbs.new_attrl(1)
     stat_attrl[0].name = pbs.ATTR_state
     jobs = pbs.pbs_statjob( c, job_id, stat_attrl, None )
     pbs.pbs_disconnect( c )
     return jobs[0].attribs[0].value
예제 #15
0
    def info(self, jobid, types=None, job_filter=None):
        """Return jobinfo"""
        # # TODO restrict to current user jobs
        if type(types) is str:
            types = [types]
        self.log.debug("Return info types %s" % types)

        # # add all filter values to the types
        if job_filter is None:
            job_filter = {}
        self.log.debug("Job filter passed %s" % job_filter)
        if self.job_filter is not None:
            self.log.debug("Job filter update with %s" % self.job_filter)
            job_filter.update(self.job_filter)
        self.log.debug("Job filter used %s" % job_filter)

        for filter_name in job_filter.keys():
            if not filter_name in types:
                types.append(filter_name)

        if types is None:
            jobattr = 'NULL'
        else:
            jobattr = pbs.new_attrl(len(types))
            for idx in range(len(types)):
                jobattr[idx].name = types[idx]

        jobs = pbs.pbs_statjob(self.pbsconn, jobid, jobattr, 'NULL')
        if len(jobs) == 0:
            res = [dict([(typ, None) for typ in types + ['id']])]  # add id
            res = []  # return nothing
            self.log.debug("No job found. Wrong id %s or job finished? Returning %s" % (jobid, res))
            return res
        elif len(jobs) == 1:
            self.log.debug(
                "Request for jobid %s returned one result %s" % (jobid, jobs))
        else:
            self.log.error("Request for jobid %s returned more then one result %s" % (jobid, jobs))

        # # more then one, return value
        res = []
        for j in jobs:
            job_details = dict(
                [(attrib.name, attrib.value) for attrib in j.attribs])
            job_details['id'] = j.name  # add id
            if self.match_filter(job_details, job_filter):
                res.append(job_details)
        self.log.debug("Found jobinfo %s" % res)
        return res
예제 #16
0
파일: stat_job.py 프로젝트: raidenfox/nomad
def define_attribs(id_conn, id_job):
    attribs = []
    jobs = pbs.pbs_statjob(id_conn, id_job, "NULL", "NULL")
    index = len(jobs)
    i = 0
    for job in jobs:
        y = 0
        while i < index:
            lenght_a = len(job.attribs)
            while y < lenght_a:
                attribs.append(job.attribs[y].name)
                y = y + 1
            
            i = i+1
    
    return attribs
예제 #17
0
    def info(self, types=None):
        """
        Return jobinfo
        """
        if not self.jobid:
            self.log.debug("no jobid, job is not submitted yet?")
            return None

        # convert single type into list
        if type(types) is str:
            types = [types]

        self.log.debug("Return info types %s" % types)

        # create attribute list to query pbs with
        if types is None:
            jobattr = NULL
        else:
            jobattr = pbs.new_attrl(len(types))
            for idx, attr in enumerate(types):
                jobattr[idx].name = attr


        # get a new connection (otherwise this seems to fail)
        if self.clean_conn:
            pbs.pbs_disconnect(self.pbsconn)
            self.pbsconn = pbs.pbs_connect(self.pbs_server)
        jobs = pbs.pbs_statjob(self.pbsconn, self.jobid, jobattr, NULL)
        if len(jobs) == 0:
            # no job found, return None info
            res = None
            self.log.debug("No job found. Wrong id %s or job finished? Returning %s" % (self.jobid, res))
            return res
        elif len(jobs) == 1:
            self.log.debug("Request for jobid %s returned one result %s" % (self.jobid, jobs))
        else:
            self.log.error("Request for jobid %s returned more then one result %s" % (self.jobid, jobs))

        # only expect to have a list with one element
        j = jobs[0]
        # convert attribs into useable dict
        job_details = dict([ (attrib.name, attrib.value) for attrib in j.attribs ])
        # manually set 'id' attribute
        job_details['id'] = j.name
        self.log.debug("Found jobinfo %s" % job_details)
        return job_details
예제 #18
0
    def query(self, obj, service='', objType='node'):
        """
        query status and eventually other scheduler related information
        It may use single 'node' scheduler id or bulk id for association
        """
        if type(obj) != Task:
            raise SchedulerError('wrong argument type', str(type(obj)))

        jobids = []

        conn = self.pbs_conn()
        attrl = pbs.new_attrl(2)
        attrl[0].name = 'job_state'
        attrl[1].name = 'exec_host'

        for job in obj.jobs:
            if not self.valid(job.runningJob): continue
            id = str(job.runningJob['schedulerId']).strip()
            jobstat = pbs.pbs_statjob(conn, id, attrl, 'Null')

            if not jobstat:
                err, err_text = pbs.error()
                if err != 15001:  # unknown job (probably finished)
                    self.logging.error('Error in job query for ' + id)
                    self.logging.error('PBS error code ' + str(err) + ': ' +
                                       err_text)
                    self.pbs_disconn(conn)
                    raise SchedulerError('PBS error',
                                         str(err) + ': ' + err_text)

            host = ''
            if len(jobstat) == 0:
                pbs_stat = 'Done'
            else:
                pbs_stat = jobstat[0].attribs[0].value
                if len(jobstat[0].attribs) > 1:
                    host = jobstat[0].attribs[1].value
            job.runningJob['statusScheduler'] = pbs_stat
            job.runningJob['status'] = self.status_map[pbs_stat]
            job.runningJob['destination'] = host

        self.pbs_disconn(conn)
예제 #19
0
파일: updater.py 프로젝트: tomaso/goove
def update_one_job(conn, jobid, bs):
    """ Update info about one job """
    # TODO: I am a bit afraid this will take a lot of time
    fulljobid = "%s.%s" % (jobid, bs.name)
    sj = pbs.pbs_statjob(conn, fulljobid.encode('iso-8859-1', 'replace'), [], "")
    if len(sj)==0:
        log(LOG_ERROR, "failed to update info from pbs about job: %s.%s" % (jobid,bs.name))
        return None
    
    dj = attribs_to_dict(sj[0].attribs)
    
    j,created = Job.objects.get_or_create(jobid=jobid, server=bs)
    if created:
        log(LOG_INFO, "new job will be created: %s @ %s in queue: %s" % (jobid, bs.name, dj['queue']))
    j.job_name = dj['Job_Name']
    j.queue = Queue.objects.get(name=dj['queue'])
    j.job_state = JobState.objects.get(shortname=dj['job_state'])

    j.save()
    return j
예제 #20
0
    def query(self, obj, service='', objType='node') :
        """
        query status and eventually other scheduler related information
        It may use single 'node' scheduler id or bulk id for association
        """
        if type(obj) != Task :
            raise SchedulerError('wrong argument type', str( type(obj) ))

        jobids=[]

        conn=self.pbs_conn()
        attrl=pbs.new_attrl(2)
        attrl[0].name='job_state'
        attrl[1].name='exec_host'

        for job in obj.jobs :
            if not self.valid( job.runningJob ): continue
            id=str(job.runningJob['schedulerId']).strip()
            jobstat=pbs.pbs_statjob(conn, id, attrl, 'Null')

            if not jobstat:
                err, err_text=pbs.error()
                if err!=15001: # unknown job (probably finished)
                    self.logging.error('Error in job query for '+id)
                    self.logging.error('PBS error code '+str(err)+': '+err_text)
                    self.pbs_disconn(conn)
                    raise SchedulerError('PBS error', str(err)+': '+err_text)
        
            host=''
            if len(jobstat)==0:
                pbs_stat='Done'
            else:
                pbs_stat=jobstat[0].attribs[0].value
                if len(jobstat[0].attribs)>1: host=jobstat[0].attribs[1].value
            job.runningJob['statusScheduler']=pbs_stat
            job.runningJob['status'] = self.status_map[pbs_stat]
            job.runningJob['destination']=host
            
        self.pbs_disconn(conn)
예제 #21
0
    def info(self, jobid, types=None, job_filter=None):
        """Return jobinfo"""
        # add all filter values to the types
        if job_filter is None:
            job_filter = {}
        self.log.debug("Job filter passed %s", job_filter)
        if self.job_filter is not None:
            self.log.debug("Job filter update with %s", self.job_filter)
            job_filter.update(self.job_filter)
        self.log.debug("Job filter used %s", job_filter)

        for filter_name in job_filter.keys():
            if not filter_name in types:
                types.append(filter_name)

        if types is None:
            jobattr = 'NULL'
        else:
            jobattr = pbs.new_attrl(len(types))
            for idx, name in enumerate(types):
                jobattr[idx].name = name

        jobs = pbs.pbs_statjob(self.pbsconn, jobid, jobattr, 'NULL')
        if not jobs:
            self.log.debug("No job found. Wrong id %s or job finished?", jobid)
            return []

        self.log.debug("Request for jobid %s returned %d result(s) %s", jobid, len(jobs), jobs)
        res = []
        for j in jobs:
            job_details = dict([(attrib.name, attrib.value) for attrib in j.attribs])
            job_details['id'] = j.name  # add id
            if self.match_filter(job_details, job_filter):
                res.append(job_details)
        self.log.debug("Found jobinfo %s", res)
        return res
예제 #22
0
    sys.path.append(os.path.abspath("./"))
    import pbs
except:
    # Running from within the tests directory.
    sys.path.append(os.path.abspath("../"))
    import pbs

# You need to set the hostname of the PBS Server.
pbsserver = 'hpcnode0'

conn = pbs.pbs_connect(pbsserver)
if conn < 0:
    print('Error connecting to server.')
    sys.exit(1)

# Returns a batch_status structure.
b = pbs.pbs_statjob(conn, '', None, None)

while b != None:
    print("\n------ Job: %s ------" % b.name)
    attribs = b.attribs
    while attribs != None:
        if attribs.resource != None:
            print("    %s.%s = %s" %
                  (attribs.name, attribs.resource, attribs.value))
        else:
            print("    %s = %s" % (attribs.name, attribs.value))
        attribs = attribs.next

    b = b.next
예제 #23
0
def get_jobs(conn, extend=None):
    '''
    Get information on the PBS jobs.
    This function returns a list of jobs, where each job is a dictionary.

    This is the list of resources requested by the job, e.g.:
      Resource_List : mem = 120gb
      Resource_List : ncpus = 24
      Resource_List : nodect = 1
      Resource_List : place = free
      Resource_List : select = 1:ncpus=24:mem=120GB
      Resource_List : walltime = 200:00:00

    These are non-resource attributes, e.g.
        Job_Name : AuCuZn
        Job_Owner : 999777@hpcnode0
        job_state : Q
        queue : workq
        server : hpcnode0
      etc ....

    '''

    jobs = []  # This will contain a list of dictionaries.

    # Some jobs don't yet have a particular attribute as the job hasn't started yet.
    # We have to create that key and set it to something, otherwise we get errors like:
    #   NameError("name 'resources_used_ncpus' is not defined",)
    attribute_names = ['resources_used_ncpus', 'resources_used_mem', 'resources_used_vmem', \
        'resources_used_walltime', 'exec_host', 'exec_vnode', 'stime', 'etime', 'resources_time_left', \
        'resources_used_cpupercent']

    b = pbs.pbs_statjob(conn, '', None, extend)
    while b != None:
        attributes = {}  # Init the dictionary to empty.
        # Init the values of the attributes.
        for name in attribute_names:
            attributes[name] = ''
        for name in ['resources_used_walltime', 'resources_used_cput']:
            attributes[name] = '0:0:0'

        attribs = b.attribs
        #print('-----------', b.name, '-------------------')
        attributes['job_id'] = b.name.split('.')[
            0]  # b.name is a string like '137550.hpcnode0'
        while attribs != None:
            if attribs.resource != None:
                #print('    ', attribs.name, ':', attribs.resource, '=', attribs.value)
                keyname = '%s_%s' % (attribs.name, attribs.resource)
                keyname = keyname.lower()
                attributes[keyname] = attribs.value
            else:
                #print('  ', attribs.name, ':', attribs.value)
                keyname = attribs.name.lower()
                attributes[keyname] = attribs.value

            attribs = attribs.next

        jobs.append(attributes)
        b = b.next

    return jobs
예제 #24
0
#print combine[1].name
#print combine[2].name

nodes = pbs.pbs_statnode(con, "", 'NULL', "NULL")
for node in nodes:
  print node.name, ':'
  for prop in node.attribs:
     print '\t', prop.name, ' = ',  prop.value

queues = pbs.pbs_statque(con, "", 'NULL', "")
for queue in queues:
  print queue.name
  for attrib in queue.attribs:
    print '\t', attrib.name, ' = ',  attrib.value

jobs = pbs.pbs_statjob(con, "", 'NULL', "")
for job in jobs:
  print job.name
  for attrib in job.attribs:
    print '\t', attrib.name, ' = ',  attrib.value

sys.exit(0)

## OLD stuff obselete
##
while batch_info.this:
  node_attr = batch_info.attribs
  print batch_info.name, ':'
  while node_attr.this:
    # print node_attr.this
    print '\t', node_attr.name ,node_attr.value
예제 #25
0
 def _getJobStatus(self, jobid):
     status = pbs.pbs_statjob(self._connection_id, jobid, "NULL", "NULL")
     job_state = status[0].attribs
     for attr in job_state:
         if attr.name == "job_state":
             return attr.value