def _list_2_attrib(self, list): """Convert an python list to an attrib list suitable for pbs""" self.attribs = pbs.new_attrl( len(list) ) i = 0 for attrib in list: self.attribs[i].name = attrib i = i + 1
def __init__(self, job): self.jobid = 0 self.attrl = pbs.new_attrl(1) self.attrl[0].name = 'job_state' self.attropl = self.get_pbs_attr(job.db_job.id, job.tool.config) self.script = PBS_SCRIPT % (job.tool.directory, job.command) self.status = main.job.JOB_STATUS.READY
def check_all_jobs( self ): """ Returns a list of servers that failed to be contacted and a dict of "job_id : status" pairs (where status is a bunchified version of the API's structure. """ servers = [] failures = [] statuses = {} for pbs_job_state in self.watched: pbs_server_name = self.__get_pbs_server(pbs_job_state.job_destination.params) if pbs_server_name not in servers: servers.append( pbs_server_name ) pbs_job_state.check_count += 1 for pbs_server_name in servers: c = pbs.pbs_connect( util.smart_str( pbs_server_name ) ) if c <= 0: log.debug("connection to PBS server %s for state check failed" % pbs_server_name ) failures.append( pbs_server_name ) continue stat_attrl = pbs.new_attrl(3) stat_attrl[0].name = pbs.ATTR_state stat_attrl[1].name = pbs.ATTR_used stat_attrl[2].name = pbs.ATTR_exitstat jobs = pbs.pbs_statjob( c, None, stat_attrl, None ) pbs.pbs_disconnect( c ) statuses.update( self.convert_statjob_to_bunches( jobs ) ) return( ( failures, statuses ) )
def check_all_jobs(self): """ Returns a list of servers that failed to be contacted and a dict of "job_id : status" pairs (where status is a bunchified version of the API's structure. """ servers = [] failures = [] statuses = {} for pbs_job_state in self.watched: pbs_server_name = self.__get_pbs_server( pbs_job_state.job_destination.params) if pbs_server_name not in servers: servers.append(pbs_server_name) pbs_job_state.check_count += 1 for pbs_server_name in servers: c = pbs.pbs_connect(util.smart_str(pbs_server_name)) if c <= 0: log.debug( "connection to PBS server %s for state check failed" % pbs_server_name) failures.append(pbs_server_name) continue stat_attrl = pbs.new_attrl(3) stat_attrl[0].name = pbs.ATTR_state stat_attrl[1].name = pbs.ATTR_used stat_attrl[2].name = pbs.ATTR_exitstat jobs = pbs.pbs_statjob(c, None, stat_attrl, None) pbs.pbs_disconnect(c) statuses.update(self.convert_statjob_to_bunches(jobs)) return ((failures, statuses))
def test_init(): print 'Test init' w = pbs.new_attrl(2) print type(w) print 'end Test init'
def _list_2_attrib(self, list): """Convert an python list to an attrib list suitable for pbs""" self.attribs = pbs.new_attrl(len(list)) i = 0 for attrib in list: self.attribs[i].name = attrib i = i + 1
def _list_2_attrib(self, list): """Convert a python list to an attrib list suitable for pbs""" self.attribs = pbs.new_attrl( len(list) ) i = 0 for attrib in list: # So we can user Resource attrib = attrib.split('.') self.attribs[i].name = attrib[0] i = i + 1
def test_loop(): print 'Test loop' w = pbs.new_attrl(2) b = w[0] c = w[1] b.name = 'bas' b.value = 'vlies' c.name = 'jaap' c.value = 'dijkshoorn' for i in w: print i
def info(self, types=None): """ Return jobinfo """ if not self.jobid: self.log.debug("no jobid, job is not submitted yet?") return None # convert single type into list if type(types) is str: types = [types] self.log.debug("Return info types %s" % types) # create attribute list to query pbs with if types is None: jobattr = NULL else: jobattr = pbs.new_attrl(len(types)) for idx, attr in enumerate(types): jobattr[idx].name = attr # get a new connection (otherwise this seems to fail) if self.clean_conn: pbs.pbs_disconnect(self.pbsconn) self.pbsconn = pbs.pbs_connect(self.pbs_server) jobs = pbs.pbs_statjob(self.pbsconn, self.jobid, jobattr, NULL) if len(jobs) == 0: # no job found, return None info res = None self.log.debug( "No job found. Wrong id %s or job finished? Returning %s" % (self.jobid, res)) return res elif len(jobs) == 1: self.log.debug("Request for jobid %s returned one result %s" % (self.jobid, jobs)) else: self.log.error( "Request for jobid %s returned more then one result %s" % (self.jobid, jobs)) # only expect to have a list with one element j = jobs[0] # convert attribs into useable dict job_details = dict([(attrib.name, attrib.value) for attrib in j.attribs]) # manually set 'id' attribute job_details['id'] = j.name self.log.debug("Found jobinfo %s" % job_details) return job_details
def check_single_job( self, pbs_server_name, job_id ): """ Returns the state of a single job, used to make sure a job is really dead. """ c = pbs.pbs_connect( util.smart_str( pbs_server_name ) ) if c <= 0: log.debug("connection to PBS server %s for state check failed" % pbs_server_name ) return None stat_attrl = pbs.new_attrl(1) stat_attrl[0].name = pbs.ATTR_state jobs = pbs.pbs_statjob( c, job_id, stat_attrl, None ) pbs.pbs_disconnect( c ) return jobs[0].attribs[0].value
def info(self, jobid, types=None, job_filter=None): """Return jobinfo""" # # TODO restrict to current user jobs if type(types) is str: types = [types] self.log.debug("Return info types %s" % types) # # add all filter values to the types if job_filter is None: job_filter = {} self.log.debug("Job filter passed %s" % job_filter) if self.job_filter is not None: self.log.debug("Job filter update with %s" % self.job_filter) job_filter.update(self.job_filter) self.log.debug("Job filter used %s" % job_filter) for filter_name in job_filter.keys(): if not filter_name in types: types.append(filter_name) if types is None: jobattr = 'NULL' else: jobattr = pbs.new_attrl(len(types)) for idx in range(len(types)): jobattr[idx].name = types[idx] jobs = pbs.pbs_statjob(self.pbsconn, jobid, jobattr, 'NULL') if len(jobs) == 0: res = [dict([(typ, None) for typ in types + ['id']])] # add id res = [] # return nothing self.log.debug("No job found. Wrong id %s or job finished? Returning %s" % (jobid, res)) return res elif len(jobs) == 1: self.log.debug( "Request for jobid %s returned one result %s" % (jobid, jobs)) else: self.log.error("Request for jobid %s returned more then one result %s" % (jobid, jobs)) # # more then one, return value res = [] for j in jobs: job_details = dict( [(attrib.name, attrib.value) for attrib in j.attribs]) job_details['id'] = j.name # add id if self.match_filter(job_details, job_filter): res.append(job_details) self.log.debug("Found jobinfo %s" % res) return res
def test_getitem(): print 'Test getitem' w = pbs.new_attrl(2) b = w[0] c = w[1] print b, type(b) b.name = 'bas' b.value = 'vlies' print 'b', b c.name = 'cbassssssssssss' c.value = 'cvlies' print 'c', c
def info(self, types=None): """ Return jobinfo """ if not self.jobid: self.log.debug("no jobid, job is not submitted yet?") return None # convert single type into list if type(types) is str: types = [types] self.log.debug("Return info types %s" % types) # create attribute list to query pbs with if types is None: jobattr = NULL else: jobattr = pbs.new_attrl(len(types)) for idx, attr in enumerate(types): jobattr[idx].name = attr # get a new connection (otherwise this seems to fail) if self.clean_conn: pbs.pbs_disconnect(self.pbsconn) self.pbsconn = pbs.pbs_connect(self.pbs_server) jobs = pbs.pbs_statjob(self.pbsconn, self.jobid, jobattr, NULL) if len(jobs) == 0: # no job found, return None info res = None self.log.debug("No job found. Wrong id %s or job finished? Returning %s" % (self.jobid, res)) return res elif len(jobs) == 1: self.log.debug("Request for jobid %s returned one result %s" % (self.jobid, jobs)) else: self.log.error("Request for jobid %s returned more then one result %s" % (self.jobid, jobs)) # only expect to have a list with one element j = jobs[0] # convert attribs into useable dict job_details = dict([ (attrib.name, attrib.value) for attrib in j.attribs ]) # manually set 'id' attribute job_details['id'] = j.name self.log.debug("Found jobinfo %s" % job_details) return job_details
def main(): pbs_server = pbs.pbs_default() if not pbs_server: print 'No default server' sys.exit(1) con = pbs.pbs_connect(pbs_server) attr_l = pbs.new_attrl(1) attr_l[0].name = 'pbs_version' server_info = pbs.pbs_statserver(con, attr_l, 'NULL') for entry in server_info: print entry.name for attrib in entry.attribs: print '\t', attrib.name, ' = ', attrib.value
def query(self, obj, service='', objType='node'): """ query status and eventually other scheduler related information It may use single 'node' scheduler id or bulk id for association """ if type(obj) != Task: raise SchedulerError('wrong argument type', str(type(obj))) jobids = [] conn = self.pbs_conn() attrl = pbs.new_attrl(2) attrl[0].name = 'job_state' attrl[1].name = 'exec_host' for job in obj.jobs: if not self.valid(job.runningJob): continue id = str(job.runningJob['schedulerId']).strip() jobstat = pbs.pbs_statjob(conn, id, attrl, 'Null') if not jobstat: err, err_text = pbs.error() if err != 15001: # unknown job (probably finished) self.logging.error('Error in job query for ' + id) self.logging.error('PBS error code ' + str(err) + ': ' + err_text) self.pbs_disconn(conn) raise SchedulerError('PBS error', str(err) + ': ' + err_text) host = '' if len(jobstat) == 0: pbs_stat = 'Done' else: pbs_stat = jobstat[0].attribs[0].value if len(jobstat[0].attribs) > 1: host = jobstat[0].attribs[1].value job.runningJob['statusScheduler'] = pbs_stat job.runningJob['status'] = self.status_map[pbs_stat] job.runningJob['destination'] = host self.pbs_disconn(conn)
def query(self, obj, service='', objType='node') : """ query status and eventually other scheduler related information It may use single 'node' scheduler id or bulk id for association """ if type(obj) != Task : raise SchedulerError('wrong argument type', str( type(obj) )) jobids=[] conn=self.pbs_conn() attrl=pbs.new_attrl(2) attrl[0].name='job_state' attrl[1].name='exec_host' for job in obj.jobs : if not self.valid( job.runningJob ): continue id=str(job.runningJob['schedulerId']).strip() jobstat=pbs.pbs_statjob(conn, id, attrl, 'Null') if not jobstat: err, err_text=pbs.error() if err!=15001: # unknown job (probably finished) self.logging.error('Error in job query for '+id) self.logging.error('PBS error code '+str(err)+': '+err_text) self.pbs_disconn(conn) raise SchedulerError('PBS error', str(err)+': '+err_text) host='' if len(jobstat)==0: pbs_stat='Done' else: pbs_stat=jobstat[0].attribs[0].value if len(jobstat[0].attribs)>1: host=jobstat[0].attribs[1].value job.runningJob['statusScheduler']=pbs_stat job.runningJob['status'] = self.status_map[pbs_stat] job.runningJob['destination']=host self.pbs_disconn(conn)
def info(self, jobid, types=None, job_filter=None): """Return jobinfo""" # add all filter values to the types if job_filter is None: job_filter = {} self.log.debug("Job filter passed %s", job_filter) if self.job_filter is not None: self.log.debug("Job filter update with %s", self.job_filter) job_filter.update(self.job_filter) self.log.debug("Job filter used %s", job_filter) for filter_name in job_filter.keys(): if not filter_name in types: types.append(filter_name) if types is None: jobattr = 'NULL' else: jobattr = pbs.new_attrl(len(types)) for idx, name in enumerate(types): jobattr[idx].name = name jobs = pbs.pbs_statjob(self.pbsconn, jobid, jobattr, 'NULL') if not jobs: self.log.debug("No job found. Wrong id %s or job finished?", jobid) return [] self.log.debug("Request for jobid %s returned %d result(s) %s", jobid, len(jobs), jobs) res = [] for j in jobs: job_details = dict([(attrib.name, attrib.value) for attrib in j.attribs]) job_details['id'] = j.name # add id if self.match_filter(job_details, job_filter): res.append(job_details) self.log.debug("Found jobinfo %s", res) return res
def pbsmon(): global NODES_PER_RACK, N_RACKS, PBS_STATES if len(sys.argv) > 1: pbs_server = sys.argv[1] else: pbs_server = pbs.pbs_default() if not pbs_server: print "No default pbs server, usage: %s [server]" % os.path.basename(sys.argv[0]) sys.exit(1) con = pbs.pbs_connect(pbs_server) if con < 0: errno, text = pbs.error() print errno, text sys.exit(1) # get the state of the nodes attrl = pbs.new_attrl(2) attrl[0].name = "state" attrl[1].name = "jobs" nodes = pbs.pbs_statnode(con, "", attrl, "NULL") node_dict = {} count_states = {} for key in PBS_STATES.keys(): count_states[key] = 0 for node in nodes: node_attr = node.attribs temp = string.split(node_attr[0].value, ",") state = temp[0] state_char = PBS_STATES[state] count_states[state] = count_states[state] + 1 if state == pbs.ND_free: if len(node_attr) > 1: # print 'TD: %s' % node.name, node_attr[1] state_char = PBS_STATES[pbs_ND_single] count_states[pbs.ND_free] = count_states[pbs.ND_free] - 1 count_states[pbs_ND_single] = count_states[pbs_ND_single] + 1 # print 'TD: %s %s' % (node.name, state_char) node_dict[node.name] = state_char legend = PBS_STATES.keys() legend.sort() # print nodes with gb-r%dn%d naming scheme print " ", for rack in xrange(1, N_RACKS + 1): print "%2d" % rack, print for node_nr in xrange(1, NODES_PER_RACK + 1): print "%2d" % node_nr, for rack in xrange(1, N_RACKS + 1): node_name = "gb-r%dn%d" % (rack, node_nr) if node_dict.has_key(node_name): print " %s" % node_dict[node_name], del node_dict[node_name] else: print " ", if node_nr - 1 < len(legend): state = legend[node_nr - 1] print " %s %-13s : %d" % (PBS_STATES[state], state, count_states[state]) else: print print # any other nodes? arr = node_dict.keys() if arr: arr.sort() for node in arr: print "%s %s" % (node, node_dict[node]) print
def main(): state_list = [] node_list = [] node_nr = 0 if len(sys.argv) > 1: pbs_server = sys.argv[1] else: pbs_server = pbs.pbs_default() if not pbs_server: print "No default pbs server, usage: pbsmon [server] " sys.exit(1) con = pbs.pbs_connect(pbs_server) if con < 0: errno, text = pbs.error() print errno, text sys.exit(1) # We are only interested in the state and jobs of a node # attrl = pbs.new_attrl(2) attrl[0].name = "state" attrl[1].name = "jobs" nodes = pbs.pbs_statnode(con, "", attrl, "NULL") # Some is het None dan weer NULL, beats me # for node in nodes: # display_node_status(batch_info) node_attr = node.attribs # A node can have serveral states, huh. We are only # interested in first entry. # temp = string.splitfields(node_attr[0].value, ",") state = temp[0] # look if on a free node a job is scheduled, then mark it # as other state # if state == pbs.ND_free: if len([x for x in node_attr if x.name == "jobs"]): state_list.append(translate_state[pbs_ND_free_and_job]) else: state_list.append(translate_state[state]) else: state_list.append(translate_state[state]) re_host = re.compile( r""" (?P<name>\d+) """, re.VERBOSE, ) result = re_host.search(node.name) if result: node_list.append(result.group("name")) else: node_nr = node_nr + 1 node_list.append(str(node_nr)) display_cluster_status(node_list, state_list)
#!/usr/bin/python import sys import pbs pbs_server = pbs.pbs_default() con = pbs.pbs_connect(pbs_server) print con z = pbs.new_attrl(1) z[0].name = 'state' print z[0].name batch_info = pbs.pbs_statnode(con, "", z, "NULL") #print type(batch_info), batch_info, batch_info.name print type(batch_info), batch_info print 'bas' print type(batch_info[0]) print batch_info[0] print batch_info[0].name #b = pbs.batch_statusPtr(batch_info[0]) #print type(b) #print b #print b.name #sys.exit(1) while batch_info.this: node_attr = batch_info.attribs print batch_info.name, ':'
c.name = 'jaap' c.value = 'dijkshoorn' for i in w: print i test_init() test_getitem() test_loop() pbs_server = pbs.pbs_default() con = pbs.pbs_connect(pbs_server) print con z = pbs.new_attrl(2) z[0].name = 'state' z[1].name = 'ntype' #z.append('bla'); #print 'z', z for entry in z: print 'entry', entry x = pbs.new_attrl(1) x[0].name = 'np' combine = z + x print combine, len(combine)
def main(): state_list = [] node_list = [] node_nr = 0 if len(sys.argv) > 1: pbs_server = sys.argv[1] else: pbs_server = pbs.pbs_default() if not pbs_server: print "No default pbs server, usage: pbsmon [server] " sys.exit(1) con = pbs.pbs_connect(pbs_server) if con < 0: errno, text = pbs.error() print errno, text sys.exit(1) # We are only interested in the state and jobs of a node # attrl = pbs.new_attrl(2) attrl[0].name = 'state' attrl[1].name = 'jobs' nodes = pbs.pbs_statnode(con, "", attrl, "NULL") # Some is het None dan weer NULL, beats me # for node in nodes: # display_node_status(batch_info) node_attr = node.attribs # A node can have serveral states, huh. We are only # interested in first entry. # temp = string.splitfields(node_attr[0].value, ',') state = temp[0] # look if on a free node a job is scheduled, then mark it # as other state # if state == pbs.ND_free: if len([x for x in node_attr if x.name == 'jobs']): state_list.append(translate_state[pbs_ND_free_and_job]) else: state_list.append(translate_state[state]) else: state_list.append(translate_state[state]) re_host = re.compile(r""" (?P<name>\d+) """, re.VERBOSE) result = re_host.search(node.name) if result: node_list.append(result.group('name')) else: node_nr = node_nr + 1 node_list.append(str(node_nr)) display_cluster_status(node_list, state_list)
def pbsmon(): global NODES_PER_RACK, N_RACKS, PBS_STATES if len(sys.argv) > 1: pbs_server = sys.argv[1] else: pbs_server = pbs.pbs_default() if not pbs_server: print 'No default pbs server, usage: %s [server]' % os.path.basename( sys.argv[0]) sys.exit(1) con = pbs.pbs_connect(pbs_server) if con < 0: errno, text = pbs.error() print errno, text sys.exit(1) # get the state of the nodes attrl = pbs.new_attrl(2) attrl[0].name = 'state' attrl[1].name = 'jobs' nodes = pbs.pbs_statnode(con, '', attrl, 'NULL') node_dict = {} count_states = {} for key in PBS_STATES.keys(): count_states[key] = 0 for node in nodes: node_attr = node.attribs temp = string.split(node_attr[0].value, ',') state = temp[0] state_char = PBS_STATES[state] count_states[state] = count_states[state] + 1 if state == pbs.ND_free: if len(node_attr) > 1: # print 'TD: %s' % node.name, node_attr[1] state_char = PBS_STATES[pbs_ND_single] count_states[pbs.ND_free] = count_states[pbs.ND_free] - 1 count_states[pbs_ND_single] = count_states[pbs_ND_single] + 1 # print 'TD: %s %s' % (node.name, state_char) node_dict[node.name] = state_char legend = PBS_STATES.keys() legend.sort() # print nodes with gb-r%dn%d naming scheme print ' ', for rack in xrange(1, N_RACKS + 1): print '%2d' % rack, print for node_nr in xrange(1, NODES_PER_RACK + 1): print '%2d' % node_nr, for rack in xrange(1, N_RACKS + 1): node_name = 'gb-r%dn%d' % (rack, node_nr) if node_dict.has_key(node_name): print ' %s' % node_dict[node_name], del node_dict[node_name] else: print ' ', if node_nr - 1 < len(legend): state = legend[node_nr - 1] print ' %s %-13s : %d' % (PBS_STATES[state], state, count_states[state]) else: print print # any other nodes? arr = node_dict.keys() if arr: arr.sort() for node in arr: print '%s %s' % (node, node_dict[node]) print