def del_job(self, job_id, server): c = pbs.pbs_connect(str( pbs.pbs_default())) # Create new connection for the child process if server is None: server = pbs.pbs_default() job_full_id = job_id + '.' + server result = pbs.pbs_deljob(c, job_full_id, 'NULL') return result # If operation is successfull, result == 0
def main(): server = pbs.pbs_default() c = pbs.pbs_connect(server) nodes = pbs.pbs_statnode(c, '', 'NULL', 'NULL') for node in nodes: print node.name, ' :' attrs = node.attribs for attr in attrs: print '\t%s = %s' % (attr.name, attr.value) try: mom_port = socket.getservbyname('pbs_resmon', 'tcp') except socket.error: mom_port = pbs.PBS_MANAGER_SERVICE_PORT mom_id = pbs.openrm(node.name, mom_port) mom_keys = pbs.get_mom_values(mom_id) for key in mom_keys.keys(): print '\t%s = %s' % (key, mom_keys[key]) print '\nTesting list with user supplied keywords' l = ['bas', 'ncpus', 'loadave'] mom_keys = pbs.get_mom_values(mom_id, l) for key in mom_keys.keys(): print '\t%s = %s' % (key, mom_keys[key]) print '' pbs.closerm(mom_id)
def alive(self, process_id): alive = False try: status = self.pbsquery.getjob(str(process_id))['job_state'][0] except: # job not found status = -1 sys.stderr.write("EXC: %s\n" % str(sys.exc_info()[0])) sys.stderr.write("Could not find job for process id %d\n" % process_id) if status == 'Q': sys.stderr.write("Job %d waiting in queue.\n" % (process_id)) alive = True elif status == 'R': sys.stderr.write("Job %d is running.\n" % (process_id)) alive = True elif status in ['H','S']: sys.stderr.write("Job %d is held or suspended.\n" % (process_id)) alive = False if not alive: try: # Kill the job. c = pbs.pbs_connect(pbs.pbs_default()) result = pbs.pbs_deljob(c, str(process_id)) sys.stderr.write("Killed job %d.\n" % (process_id)) except: sys.stderr.write("Failed to kill job %d.\n" % (process_id)) return False else: return True
def _process(self, batch_list): '''This function execute the change to the batch server''' if ARGS_VERBOSE: _print('class:SaraNodes func:_process input:%s' % str(batch_list), file=sys.stderr) ## Always get the pbs_server name, even in dry-run mode pbs_server = pbs.pbs_default() if not pbs_server: _print('Could not locate a pbs server', file=sys.stderr) sys.exit(1) if ARGS_VERBOSE: _print('class:SaraNodes func:_process pbs_server:%s' % pbs_server, file=sys.stderr) ## If dry-run is not specified create a connection if not ARGS_DRYRUN: pbs_connection = pbs.pbs_connect(pbs_server) ## Execute the changes for node in batch_list: if not ARGS_DRYRUN: pbs_connection = pbs.pbs_connect(pbs_server) rcode = pbs.pbs_manager(pbs_connection, pbs.MGR_CMD_SET, pbs.MGR_OBJ_NODE, node[0], node[1], 'NULL') if rcode > 0: errno, text = pbs.error() _print('PBS error for node \'%s\': %s (%s)' % (node[0], text, errno), file=sys.stderr) else: _print("pbs.pbs_manager(pbs_connection, pbs.MGR_CMD_SET, pbs.MGR_OBJ_NODE, %s, %s, 'NULL')" % (node[0], str(node[1]))) ## Close the connection with the batch system if not ARGS_DRYRUN: pbs.pbs_disconnect(pbs_connection)
def __init__(self, script, name, env_vars=None, resources={}, conn=None, ppn=None): """ create a new Job to be submitted to PBS env_vars is a dictionary with key-value pairs of environment variables that should be passed on to the job resources is a dictionary with optional keys: ['hours', 'cores'] both of these should be integer values. hours can be 1 - MAX_WALLTIME, cores depends on which cluster it is being run. """ self.clean_conn = True self.log = fancylogger.getLogger(self.__class__.__name__, fname=False) self.script = script if env_vars: self.env_vars = env_vars.copy() else: self.env_vars = {} self.name = name if pbs_import_failed: self.log.error(pbs_import_failed) try: self.pbs_server = pbs.pbs_default() if conn: self.pbsconn = conn self.clean_conn = False else: self.pbsconn = pbs.pbs_connect(self.pbs_server) except Exception, err: self.log.error("Failed to connect to the default pbs server: %s" % err)
def main(): pbs_server = pbs.pbs_default() if not pbs_server: print 'No default server' sys.exit(1) if len(sys.argv) < 2: print "Usage: set_property.py <hostname>" sys.exit(1) hostname = sys.argv[1] con = pbs.pbs_connect(pbs_server) attrop_l = pbs.new_attropl(1) attrop_l[0].name = 'note' attrop_l[0].value = 'set_something_useful' attrop_l[0].op = pbs.SET r = pbs.pbs_manager(con, pbs.MGR_CMD_SET, pbs.MGR_OBJ_NODE, hostname, attrop_l, 'NULL') if r > 0: print r, ";" errno, text = pbs.error() print errno, text
def main(): server = pbs.pbs_default() c = pbs.pbs_connect(server) nodes = pbs.pbs_statnode(c, '', 'NULL', 'NULL') for node in nodes: print node.name, ' :' attrs = node.attribs for attr in attrs: print '\t%s = %s' %(attr.name, attr.value) try: mom_port = socket.getservbyname('pbs_resmon', 'tcp') except socket.error: mom_port = pbs.PBS_MANAGER_SERVICE_PORT mom_id = pbs.openrm(node.name, mom_port) mom_keys = pbs.get_mom_values(mom_id) for key in mom_keys.keys(): print '\t%s = %s' %(key, mom_keys[key]) print '\nTesting list with user supplied keywords' l = [ 'bas', 'ncpus', 'loadave' ] mom_keys = pbs.get_mom_values(mom_id, l) for key in mom_keys.keys(): print '\t%s = %s' %(key, mom_keys[key]) print '' pbs.closerm(mom_id)
def connect(self, server=None): if not server: server = pbs.pbs_default() self._connection_id = pbs.pbs_connect(server) if not self._connection_id: raise PBSException('could not connect to pbs server ' + str(server))
def submit_get_subfamilies_job(job): # This is how we are passing the fasta and job id to the script server_name = pbs.pbs_default() c = pbs.pbs_connect(server_name) attropl = pbs.new_attropl(5) attropl[0].name = pbs.ATTR_N attropl[0].value = "FAT-CAT Get Sub-Families: %s" % job.id attropl[1].name = pbs.ATTR_l attropl[1].resource = 'nodes' attropl[1].value = '1:ppn=1' attropl[2].name = pbs.ATTR_o attropl[2].value = JOB_LOG_FILE attropl[3].name = pbs.ATTR_e attropl[3].value = JOB_LOG_FILE attropl[4].name = pbs.ATTR_v attropl[4].value = "job_id=%s" % (job.id) job.status_id = 5 job.save() job_id = pbs.pbs_submit(c, attropl, "/clusterfs/ohana/software/fatcat/scripts/get_best_nodes.py", 'web', 'NULL') logger.info("Submitting %s to the grid to get best nodes with id %s" % (job.id, job_id)) if job_id: job.get_best_nodes_pbs_job_id = job_id job.save() pbs.pbs_disconnect(c) return job_id
def _connect_to_server(server=None): """ open a connection to a pbs_server at hostname server, if server is None then connect to the default server. This function is shared between JobManager and TorqueJobRunner """ server_name = server if server else pbs.pbs_default() retry = 0 connection = pbs.pbs_connect(server_name) while connection <= 0 and retry < _MAX_RETRY: retry += 1 time.sleep(retry**2) connection = pbs.pbs_connect(server_name) if connection <= 0: e, e_msg = pbs.error() # the batch system returned an error, throw exception raise Exception("Error connecting to pbs_server. " "Torque error {0}: '{1}'".format( e, torque_strerror(e))) return connection
def pbs_conn(self): conn=pbs.pbs_connect(pbs.pbs_default()) if(conn<0): err, err_text = pbs.error() self.logging.error('Error in PBS server conncet') self.logging.error('PBS error code '+str(err)+': '+err_text) raise SchedulerError('PBS error', str(err)+': '+err_text) return conn
def connect_to_server(pbs_server=None): """Connect to PBS server and return connection.""" if pbs_import_failed: raise EasyBuildError(pbs_import_failed) if not pbs_server: pbs_server = pbs.pbs_default() return pbs.pbs_connect(pbs_server)
def pbs_conn(self): conn = pbs.pbs_connect(pbs.pbs_default()) if (conn < 0): err, err_text = pbs.error() self.logging.error('Error in PBS server conncet') self.logging.error('PBS error code ' + str(err) + ': ' + err_text) raise SchedulerError('PBS error', str(err) + ': ' + err_text) return conn
def connect_to_server(pbs_server=None): """Connect to PBS server and return connection.""" if pbs_import_failed: _log.error(pbs_import_failed) return None if not pbs_server: pbs_server = pbs.pbs_default() return pbs.pbs_connect(pbs_server)
def __init__(self, *args, **kwargs): """Constructor.""" pbs_server = kwargs.pop('pbs_server', None) super(PbsPython, self).__init__(*args, **kwargs) self.pbs_server = pbs_server or build_option('job_target_resource') or pbs.pbs_default() self.conn = None self._ppn = None
def hold_rls_job(self, job_id, server, mode, permission): ''' Example: job_id: 183 server: jim-desktop mode: hold | rls permission: u | o | s ''' c = pbs.pbs_connect(str( pbs.pbs_default())) # Create new connection for the child process if server is None: server = pbs.pbs_default() job_full_id = job_id + '.' + server if mode == 'hold': result = pbs.pbs_holdjob(c, job_full_id, permission, 'NULL') elif mode == 'rls': result = pbs.pbs_rlsjob(c, job_full_id, permission, 'NULL') return result # If operation is successfull, result == 0
def __init__(self, server=None): if not server: self.server = pbs.pbs_default() else: self.server = server self._connect() ## this is needed for getjob a jobid is made off: # sequence_number.server (is not self.server) # self.job_server_id = list(self.get_serverinfo())[0] self._disconnect()
def run_cluster(self, pbs_server, job_script, settings): import pbs from threading import threa self.settings = copy.deepcopy(settings) # Launch script, wait for output to come back, return when it does # Create the job options struct attropl = pbs.new_attropl(4) # Set the name of the job # attropl[0].name = pbs.ATTR_N attropl[0].value = "inferno_" + self.name # Job is Rerunable # attropl[1].name = pbs.ATTR_r attropl[1].value = "y" # Walltime # attropl[2].name = pbs.ATTR_l attropl[2].resource = "walltime" attropl[2].value = "400" # Nodes # attropl[3].name = pbs.ATTR_l attropl[3].resource = "nodes" attropl[3].value = "1:ppn=4" # Run the job if pbs_server == None: pbs_server = pbs.pbs_default() job_id = pbs.pbs_submit(pbs_server, attropl, job_script, "NULL", "NULL") e, e_txt = pbs.error() if e: print e, e_txt # Save the job ID for later so we can check on the status self.job_id = job_id # TODO: Change this # Now loop, checking every 5 seconds or so if the job is done by # polling the pbs_server about the jobid. running = True while running: job_info = pbs.pbs_statjob(pbs_server, self.job_id, "NULL", "NULL") print job_info time.sleep(5)
def __init__(self, **args): super(SchedulerPbs, self).__init__(**args) self.jobScriptDir = args['jobScriptDir'] self.jobResDir = args['jobResDir'] self.queue = args['queue'] self.workerNodeWorkDir = args.get('workDir', '') self.res_dict = {} for a in args['resources'].split(','): if len(a) > 0: if a.find("=") != -1: res, val = a.split('=') self.res_dict.update({res: val}) else: raise SchedulerError("PBS error", +\ "Unkown resource format: " + a) env = [] for v in ('HOME', 'LANG', 'LOGNAME', 'MAIL', 'PATH', 'SHELL'): env.append('PBS_O_' + v + '=' + os.environ[v]) env.append('PBS_O_WORKDIR=' + os.getcwd()) env.append('PBS_O_HOST=' + pbs.pbs_default()) #if 'use_proxy' in args: # if args['use_proxy'] == 1: # proxy_location = '' # try: # proxy_location = os.environ['X509_USER_PROXY'] # except: # proxy_location = '/tmp/x509up_u'+ repr(os.getuid()) # msg, ret = self.ExecuteCommand('cp ' + proxy_location + " " + self.cert) ## proxy_path = self.getUserProxy() # env.append('X509_USER_PROXY=' + self.cert) # env.append('X509_USER_CERT=' + self.cert) # env.append('X509_USER_KEY=' + self.cert) # else: # raise SchedulerError(str(args), self.cert) self.pbs_env = ','.join(env) self.status_map = { 'E': 'R', 'H': 'SS', 'Q': 'SS', 'R': 'R', 'S': 'R', 'T': 'R', 'W': 'SS', 'Done': 'SD', 'C': 'SD' }
def __init__(self, options): super(Pbs, self).__init__(options) self.log = fancylogger.getLogger(self.__class__.__name__, fname=False) self.options = options self.log.debug("Provided options %s", options) self.pbs_server = pbs.pbs_default() self.pbsconn = pbs.pbs_connect(self.pbs_server) self.vars = { 'cwd': 'PBS_O_WORKDIR', 'jobid': 'PBS_JOBID', }
def submit_fxn_site_prediction_job(job): # This is how we are passing the fasta and job id to the script server_name = pbs.pbs_default() c = pbs.pbs_connect(server_name) print server_name print c attropl = pbs.new_attropl(7) attropl[0].name = pbs.ATTR_N attropl[0].value = "Functional Site Prediction Job: %s" % job.id attropl[1].name = pbs.ATTR_l attropl[1].resource = 'nodes' attropl[1].value = '1:ppn=1' attropl[2].name = pbs.ATTR_o attropl[2].value = JOB_LOG_FILE attropl[3].name = pbs.ATTR_e attropl[3].value = JOB_LOG_FILE attropl[4].name = pbs.ATTR_v attropl[4].value = "job_id=%s" % (job.id) attropl[5].name = pbs.ATTR_r attropl[5].value = 'y' attropl[6].name = pbs.ATTR_l attropl[6].resource = 'walltime' attropl[6].value = '1000' job.status_id = 2 job.save() job_id = pbs.pbs_submit( c, attropl, "/home/cyrus_afrasiabi/ohana_repository/bpg/fxn_site_prediction.py", 'web', 'NULL') logger.info( "Submitting %s to the grid to get functional site predictions with id %s" % (job.id, job_id)) if job_id: job.pbs_job_id = job_id job.save() pbs.pbs_disconnect(c) return job_id
def main(): pbs_server = pbs.pbs_default() if not pbs_server: print "No default pbs server" sys.exit(1) con = pbs.pbs_connect(pbs_server) nodes = pbs.pbs_statnode(con, "", "NULL", "NULL") for node in nodes: print node.name for attrib in node.attribs: print '\t', attrib.name, '=', attrib.value
def __init__( self, **args): super(SchedulerPbs, self).__init__(**args) self.jobScriptDir=args['jobScriptDir'] self.jobResDir=args['jobResDir'] self.queue=args['queue'] self.workerNodeWorkDir = args.get('workDir', '') self.res_dict={} for a in args['resources'].split(','): if len(a) > 0: if a.find("=") != -1: res,val=a.split('=') self.res_dict.update({res:val}) else: raise SchedulerError("PBS error", +\ "Unkown resource format: " + a) env=[] for v in ('HOME', 'LANG', 'LOGNAME', 'MAIL', 'PATH', 'SHELL'): env.append('PBS_O_'+v+'='+os.environ[v]) env.append('PBS_O_WORKDIR='+os.getcwd()) env.append('PBS_O_HOST='+pbs.pbs_default()) #if 'use_proxy' in args: # if args['use_proxy'] == 1: # proxy_location = '' # try: # proxy_location = os.environ['X509_USER_PROXY'] # except: # proxy_location = '/tmp/x509up_u'+ repr(os.getuid()) # msg, ret = self.ExecuteCommand('cp ' + proxy_location + " " + self.cert) ## proxy_path = self.getUserProxy() # env.append('X509_USER_PROXY=' + self.cert) # env.append('X509_USER_CERT=' + self.cert) # env.append('X509_USER_KEY=' + self.cert) # else: # raise SchedulerError(str(args), self.cert) self.pbs_env=','.join(env) self.status_map={'E':'R', 'H':'SS', 'Q':'SS', 'R':'R', 'S':'R', 'T':'R', 'W':'SS', 'Done':'SD', 'C':'SD'}
def submit_intrepid_job(job): # This is how we are passing the fasta and job id to the script server_name = pbs.pbs_default() c = pbs.pbs_connect(server_name) attropl = pbs.new_attropl(6) attropl[0].name = pbs.ATTR_N attropl[0].value = "INTREPID Job: %s" % job.id attropl[1].name = pbs.ATTR_l attropl[1].resource = 'nodes' attropl[1].value = '1:ppn=8' attropl[2].name = pbs.ATTR_o attropl[2].value = JOB_LOG_FILE attropl[3].name = pbs.ATTR_e attropl[3].value = JOB_LOG_FILE attropl[4].name = pbs.ATTR_v attropl[4].value = "job_id=%s" % (job.id) attropl[5].name = pbs.ATTR_l attropl[5].resource = 'walltime' attropl[5].value = '48:00:00' if job.development_job: job_id = pbs.pbs_submit( c, attropl, "/clusterfs/ohana/software/intrepid/scripts/intrepid_development_pipeline.py", 'web', 'NULL') else: job_id = pbs.pbs_submit( c, attropl, "/clusterfs/ohana/software/intrepid/scripts/intrepid_pipeline.py", 'web', 'NULL') logger.info("Submitting %s to the grid with id %s" % (job.id, job_id)) if job_id: job.pbs_job_id = job_id job.status_id = JOB_SUBMITTED job.save() else: pass pbs.pbs_disconnect(c) return job_id
def submitScript(script): result = {} try: pbs_connection = pbs.pbs_connect(pbs.pbs_default()) # queues = pbs.pbs_statque(pbs_connection, "batch", "NULL", "NULL") attropl = pbs.new_attropl(4) # Set the name of the job # attropl[0].name = pbs.ATTR_N attropl[0].value = str(script['jobName']) if script['jobName'] else "new_job" # Job is Rerunable # attropl[1].name = pbs.ATTR_r attropl[1].value = 'y' # Walltime # attropl[2].name = pbs.ATTR_l attropl[2].resource = 'walltime' attropl[2].value = str(script['maxTime']) if script['maxTime'] else '01:00:00' # Nodes # attropl[3].name = pbs.ATTR_l attropl[3].resource = 'nodes' attropl[3].value = '1:ppn=' + str(script['cpuNumber']) if script['cpuNumber'] else '1' # A1.tsk is the job script filename # job_id = pbs.pbs_submit(pbs_connection, attropl, str(script['scriptName']), str(script['queue']), 'NULL') e, e_txt = pbs.error() if e: result['Result'] = 'ERROR' result['Message'] = str(e) + ' : ' + e_txt else: result['Result'] = 'OK' result['Message'] = job_id except Exception as exc: result['Result'] = 'ERROR' result['Message'] = str(exc) return result
def main(): pbs_server = pbs.pbs_default() if not pbs_server: print 'No default server' sys.exit(1) con = pbs.pbs_connect(pbs_server) attr_l = pbs.new_attrl(1) attr_l[0].name = 'pbs_version' server_info = pbs.pbs_statserver(con, attr_l, 'NULL') for entry in server_info: print entry.name for attrib in entry.attribs: print '\t', attrib.name, ' = ', attrib.value
def __init__(self): self.targets = collections.defaultdict(list) self.default = "" # Construct self.attrs from available attributes in the pbs module # this provides a mapping from human readable names (no spaces) to # the module ATTR_* names. Not all ATTR_ entities are interesting. self.attrs = {} pbs_module_attrs = [a for a in dir(pbs) if a[0:5] == "ATTR_"] for attr in pbs_module_attrs: self.attrs[getattr(pbs, attr)] = str srvname = pbs.pbs_default() self.conn = pbs.pbs_connect(srvname) # By default, submit jobs to pbs self.pbs(True) self.dotAliases = {}
def main(): pbs_server = pbs.pbs_default() if not pbs_server: print 'No default server' sys.exit(1) con = pbs.pbs_connect(pbs_server) attrop_l = pbs.new_attropl(1) attrop_l[0].name = 'properties' attrop_l[0].value = 'set_something_useful' attrop_l[0].op = pbs.INCR r = pbs.pbs_manager(con, pbs.MGR_CMD_SET, pbs.MGR_OBJ_NODE, "e2", attrop_l, 'NULL') if r > 0: print r, ";" errno, text = pbs.error() print errno, text
def method1(): pbs_server = pbs.pbs_default() if not pbs_server: print "No default pbs server" sys.exit(1) con = pbs.pbs_connect(pbs_server) if con == -1: print "Default pbs server connection failed" pbs_server = pbs.pbs_fbserver() if not pbs_server: print "No pbs fallback server" sys.exit(1) else: con = pbs.pbs_connect(pbs_server) if con == -1: print "pbs fallback server connection failed" sys.exit(1) print "Connected to %s" %(pbs_server)
def _connect_to_server(server): """ open a connection to a pbs_server at hostname server, if server is None then connect to the default server. This function is shared between JobManager and TorqueJobRunner """ if server: connection = pbs.pbs_connect(server) else: connection = pbs.pbs_connect(pbs.pbs_default()) if connection <= 0: e, e_msg = pbs.error() # the batch system returned an error, throw exception raise Exception("Error connecting to pbs_server. " "Torque error {0}: '{1}'".format( e, torque_strerror(e))) return connection
def method1(): pbs_server = pbs.pbs_default() if not pbs_server: print "No default pbs server" sys.exit(1) con = pbs.pbs_connect(pbs_server) if con == -1: print "Default pbs server connection failed" pbs_server = pbs.pbs_fbserver() if not pbs_server: print "No pbs fallback server" sys.exit(1) else: con = pbs.pbs_connect(pbs_server) if con == -1: print "pbs fallback server connection failed" sys.exit(1) print "Connected to %s" % (pbs_server)
def _process(self, batch_list): '''This function execute the change to the batch server''' if ARGS_VERBOSE: _print('class:SaraNodes func:_process input:%s' % str(batch_list), file=sys.stderr) ## Always get the pbs_server name, even in dry-run mode pbs_server = pbs.pbs_default() if not pbs_server: _print('Could not locate a pbs server', file=sys.stderr) sys.exit(1) if ARGS_VERBOSE: _print('class:SaraNodes func:_process pbs_server:%s' % pbs_server, file=sys.stderr) ## If dry-run is not specified create a connection if not ARGS_DRYRUN: pbs_connection = pbs.pbs_connect(pbs_server) ## Execute the changes for node in batch_list: if not ARGS_DRYRUN: pbs_connection = pbs.pbs_connect(pbs_server) rcode = pbs.pbs_manager(pbs_connection, pbs.MGR_CMD_SET, pbs.MGR_OBJ_NODE, node[0], node[1], 'NULL') if rcode > 0: errno, text = pbs.error() _print('PBS error for node \'%s\': %s (%s)' % (node[0], text, errno), file=sys.stderr) else: _print( "pbs.pbs_manager(pbs_connection, pbs.MGR_CMD_SET, pbs.MGR_OBJ_NODE, %s, %s, 'NULL')" % (node[0], str(node[1]))) ## Close the connection with the batch system if not ARGS_DRYRUN: pbs.pbs_disconnect(pbs_connection)
def create_job(self, username, Job_Name, queue, nodes, walltime, file): c = pbs.pbs_connect(str(pbs.pbs_default())) attrl = pbs.new_attropl(3) attrl[0].name = pbs.ATTR_N attrl[0].value = str(Job_Name) attrl[1].name = pbs.ATTR_l attrl[1].resource = 'nodes' attrl[1].value = str(nodes) attrl[2].name = pbs.ATTR_l attrl[2].resource = 'walltime' attrl[2].value = str(walltime) queue = str(queue) task_id = pbs.pbs_submit(c, attrl, str("media/" + username + "/" + file), queue, 'NULL') return pbs.pbs_geterrmsg(c)
def main(): state_list = [] node_list = [] node_nr = 0 if len(sys.argv) > 1: pbs_server = sys.argv[1] else: pbs_server = pbs.pbs_default() if not pbs_server: print "No default pbs server, usage: pbsmon [server] " sys.exit(1) con = pbs.pbs_connect(pbs_server) if con < 0: errno, text = pbs.error() print errno, text sys.exit(1) # We are only interested in the state and jobs of a node # attrl = pbs.new_attrl(2) attrl[0].name = "state" attrl[1].name = "jobs" nodes = pbs.pbs_statnode(con, "", attrl, "NULL") # Some is het None dan weer NULL, beats me # for node in nodes: # display_node_status(batch_info) node_attr = node.attribs # A node can have serveral states, huh. We are only # interested in first entry. # temp = string.splitfields(node_attr[0].value, ",") state = temp[0] # look if on a free node a job is scheduled, then mark it # as other state # if state == pbs.ND_free: if len([x for x in node_attr if x.name == "jobs"]): state_list.append(translate_state[pbs_ND_free_and_job]) else: state_list.append(translate_state[state]) else: state_list.append(translate_state[state]) re_host = re.compile( r""" (?P<name>\d+) """, re.VERBOSE, ) result = re_host.search(node.name) if result: node_list.append(result.group("name")) else: node_nr = node_nr + 1 node_list.append(str(node_nr)) display_cluster_status(node_list, state_list)
# data.blurb = "running" # data.flush() # windows fix that for something that I don't fully understand # waiting for the stdout to close if os.sep == '\\': time.sleep(5) # set up for pbs (if we're using it if asbool(self.trans.app.config.use_pbs) and "/tools/data_source" not in command_line: log.debug("importing pbs module") import pkg_resources pkg_resources.require( "pbs_python" ) import pbs, random from PBSQuery import PBSQuery pbs_server = pbs.pbs_default() if not pbs_server: self.trans.app.config.use_pbs = False # data_source tools don't farm #if not self.app.config.use_pbs or re.search("/tools/data_source/", command_line): if (not asbool(self.trans.app.config.use_pbs)) or "/tools/data_source/" in command_line: for data in out_data.values(): data.state = data.states.RUNNING data.blurb = "running" data.flush() # start the subprocess if self.command: log.debug('executing: %s' % command_line)
def pbsmon(): global NODES_PER_RACK, N_RACKS, PBS_STATES if len(sys.argv) > 1: pbs_server = sys.argv[1] else: pbs_server = pbs.pbs_default() if not pbs_server: print 'No default pbs server, usage: %s [server]' % os.path.basename( sys.argv[0]) sys.exit(1) con = pbs.pbs_connect(pbs_server) if con < 0: errno, text = pbs.error() print errno, text sys.exit(1) # get the state of the nodes attrl = pbs.new_attrl(2) attrl[0].name = 'state' attrl[1].name = 'jobs' nodes = pbs.pbs_statnode(con, '', attrl, 'NULL') node_dict = {} count_states = {} for key in PBS_STATES.keys(): count_states[key] = 0 for node in nodes: node_attr = node.attribs temp = string.split(node_attr[0].value, ',') state = temp[0] state_char = PBS_STATES[state] count_states[state] = count_states[state] + 1 if state == pbs.ND_free: if len(node_attr) > 1: # print 'TD: %s' % node.name, node_attr[1] state_char = PBS_STATES[pbs_ND_single] count_states[pbs.ND_free] = count_states[pbs.ND_free] - 1 count_states[pbs_ND_single] = count_states[pbs_ND_single] + 1 # print 'TD: %s %s' % (node.name, state_char) node_dict[node.name] = state_char legend = PBS_STATES.keys() legend.sort() # print nodes with gb-r%dn%d naming scheme print ' ', for rack in xrange(1, N_RACKS + 1): print '%2d' % rack, print for node_nr in xrange(1, NODES_PER_RACK + 1): print '%2d' % node_nr, for rack in xrange(1, N_RACKS + 1): node_name = 'gb-r%dn%d' % (rack, node_nr) if node_dict.has_key(node_name): print ' %s' % node_dict[node_name], del node_dict[node_name] else: print ' ', if node_nr - 1 < len(legend): state = legend[node_nr - 1] print ' %s %-13s : %d' % (PBS_STATES[state], state, count_states[state]) else: print print # any other nodes? arr = node_dict.keys() if arr: arr.sort() for node in arr: print '%s %s' % (node, node_dict[node]) print
def default_pbs_server(self): if self.__default_pbs_server is None: self.__default_pbs_server = pbs.pbs_default() log.debug("Set default PBS server to %s" % self.default_pbs_server) return self.__default_pbs_server
def master_hostname(): """Return hostname of master server of resource manager.""" return pbs.pbs_default()
def connect(self): self.c = pbs.pbs_connect(pbs.pbs_default()) return self.c
def default_pbs_server(self): if self.__default_pbs_server is None: self.__default_pbs_server = pbs.pbs_default() log.debug( "Set default PBS server to %s" % self.default_pbs_server ) return self.__default_pbs_server
def pbs_batch( self, nodes, attrs=None, note_attributes=None ): nodeserror = list() if not attrs and not note_attributes: raise sara_nodesException, 'attrs and note_attributes can not be empty together!' if not self.dryrun: if note_attributes and len( note_attributes ) == 3: if attrs: attributes = attrs + pbs.new_attropl(1) attributes[1].name = pbs.ATTR_NODE_note attributes[1].op = pbs.SET else: attributes = pbs.new_attropl(1) attributes[0].name = pbs.ATTR_NODE_note attributes[0].op = pbs.SET else: attributes = attrs # Some hacking here because some limitation in the Torque 2.4 version # fetching note data first for all nodes! tmp_node_note = dict() for node in nodes: if note_attributes and len( note_attributes ) == 3: tmp_node_note[ node ] = self.note( node, note_attributes ) pbs_server = pbs.pbs_default() if not pbs_server: raise sara_nodesException, 'Default pbs server not found!' pbs_connection = pbs.pbs_connect( pbs_server ) for node in nodes: if note_attributes and len( note_attributes ) == 3: try: if attrs: attributes[1].value = tmp_node_note[ node ] else: attributes[0].value = tmp_node_note[ node ] except KeyError: pass rcode = pbs.pbs_manager( pbs_connection, pbs.MGR_CMD_SET, pbs.MGR_OBJ_NODE, node, attributes, 'NULL' ) if rcode > 0: errno, text = pbs.error() nodeserror.append( '%s: %s (%s)' % ( node, text, errno ) ) else: p = PBSQuery.PBSQuery() pbsnodes = p.getnodes().keys() print '%*s:' % ( 7, 'Nodes' ), firstitem = True for node in nodes: if node in pbsnodes: if firstitem: print '%s' % node firstitem = False else: print '%*s' % ( 17, node ) else: nodeserror.append( '%s: does not exist' % node ) if len( nodeserror ) > 0: raise sara_nodesException, nodeserror
def __init__(self, server=None): if not server: self.server = pbs.pbs_default() else: self.server = server
def main(): state_list = [] node_list = [] node_nr = 0 if len(sys.argv) > 1: pbs_server = sys.argv[1] else: pbs_server = pbs.pbs_default() if not pbs_server: print "No default pbs server, usage: pbsmon [server] " sys.exit(1) con = pbs.pbs_connect(pbs_server) if con < 0: errno, text = pbs.error() print errno, text sys.exit(1) # We are only interested in the state and jobs of a node # attrl = pbs.new_attrl(2) attrl[0].name = 'state' attrl[1].name = 'jobs' nodes = pbs.pbs_statnode(con, "", attrl, "NULL") # Some is het None dan weer NULL, beats me # for node in nodes: # display_node_status(batch_info) node_attr = node.attribs # A node can have serveral states, huh. We are only # interested in first entry. # temp = string.splitfields(node_attr[0].value, ',') state = temp[0] # look if on a free node a job is scheduled, then mark it # as other state # if state == pbs.ND_free: if len([x for x in node_attr if x.name == 'jobs']): state_list.append(translate_state[pbs_ND_free_and_job]) else: state_list.append(translate_state[state]) else: state_list.append(translate_state[state]) re_host = re.compile(r""" (?P<name>\d+) """, re.VERBOSE) result = re_host.search(node.name) if result: node_list.append(result.group('name')) else: node_nr = node_nr + 1 node_list.append(str(node_nr)) display_cluster_status(node_list, state_list)
def pbsmon(): global NODES_PER_RACK, N_RACKS, PBS_STATES if len(sys.argv) > 1: pbs_server = sys.argv[1] else: pbs_server = pbs.pbs_default() if not pbs_server: print "No default pbs server, usage: %s [server]" % os.path.basename(sys.argv[0]) sys.exit(1) con = pbs.pbs_connect(pbs_server) if con < 0: errno, text = pbs.error() print errno, text sys.exit(1) # get the state of the nodes attrl = pbs.new_attrl(2) attrl[0].name = "state" attrl[1].name = "jobs" nodes = pbs.pbs_statnode(con, "", attrl, "NULL") node_dict = {} count_states = {} for key in PBS_STATES.keys(): count_states[key] = 0 for node in nodes: node_attr = node.attribs temp = string.split(node_attr[0].value, ",") state = temp[0] state_char = PBS_STATES[state] count_states[state] = count_states[state] + 1 if state == pbs.ND_free: if len(node_attr) > 1: # print 'TD: %s' % node.name, node_attr[1] state_char = PBS_STATES[pbs_ND_single] count_states[pbs.ND_free] = count_states[pbs.ND_free] - 1 count_states[pbs_ND_single] = count_states[pbs_ND_single] + 1 # print 'TD: %s %s' % (node.name, state_char) node_dict[node.name] = state_char legend = PBS_STATES.keys() legend.sort() # print nodes with gb-r%dn%d naming scheme print " ", for rack in xrange(1, N_RACKS + 1): print "%2d" % rack, print for node_nr in xrange(1, NODES_PER_RACK + 1): print "%2d" % node_nr, for rack in xrange(1, N_RACKS + 1): node_name = "gb-r%dn%d" % (rack, node_nr) if node_dict.has_key(node_name): print " %s" % node_dict[node_name], del node_dict[node_name] else: print " ", if node_nr - 1 < len(legend): state = legend[node_nr - 1] print " %s %-13s : %d" % (PBS_STATES[state], state, count_states[state]) else: print print # any other nodes? arr = node_dict.keys() if arr: arr.sort() for node in arr: print "%s %s" % (node, node_dict[node]) print
def pp_predict_motifs(fastafile, outfile, analysis="small", organism="hg18", single=False, background="", tools=None, job_server="", ncpus=8, logger=None, max_time=None, fg_file=None, bg_file=None): if tools is None: tools = {} config = MotifConfig() if not tools: tools = dict([(x,1) for x in config.get_default_params["tools"].split(",")]) #logger = logging.getLogger('prediction.pp_predict_motifs') wmin = 5 step = 1 if analysis in ["large","xl"]: step = 2 wmin = 6 analysis_max = {"xs":5,"small":8, "medium":10,"large":14, "xl":20} wmax = analysis_max[analysis] if analysis == "xs": sys.stderr.write("Setting analysis xs to small") analysis = "small" jobs = {} result = PredictionResult(outfile, logger=logger, fg_file=fg_file, bg_file=bg_file) # Dynamically load all tools toolio = [x[1]() for x in inspect.getmembers( tool_classes, lambda x: inspect.isclass(x) and issubclass(x, tool_classes.MotifProgram) ) if x[0] != 'MotifProgram'] # TODO: # Add warnings for running time: Weeder GADEM # Prepare PBS submission server = pbs.pbs_default() c = pbs.pbs_connect(server) q = PBSQuery() attropl = pbs.new_attropl(6) # Name attropl[0].name = pbs.ATTR_N # Restartable attropl[1].name = pbs.ATTR_r attropl[1].value = 'y' # Walltime attropl[2].name = pbs.ATTR_l attropl[2].resource = 'walltime' attropl[2].value = '600' # Node requirements attropl[3].name = pbs.ATTR_l attropl[3].resource = 'nodes' attropl[3].value = '1:ppn=1' # attropl[4].name = pbs.ATTR_o attropl[5].name = pbs.ATTR_e rundir = os.path.join(os.path.split(os.path.abspath(fastafile))[0], "torque") if not os.path.exists(rundir): os.mkdir(rundir) params = { 'analysis': analysis, 'background':background, "single":single, "organism":organism } jobs = {} for t in toolio: if tools.has_key(t.name) and tools[t.name]: if t.use_width: for i in range(wmin, wmax + 1, step): logger.info("Starting %s job, width %s" % (t.name, i)) params['width'] = i sh = write_shell_script(t.name, fastafile, rundir=rundir, params=params) job_name = os.path.basename(os.path.splitext(sh)[0]) # submit attropl[0].value = job_name attropl[4].value = "{0}/{1}.stdout".format(rundir, job_name) attropl[5].value = "{0}/{1}.stderr".format(rundir, job_name) job_id = pbs.pbs_submit(c, attropl, sh, "batchq", 'NULL') e, e_txt = pbs.error() if e: logger.error("Failed: {0}".format(e_txt)) else: jobs[job_id] = job_name else: logger.debug("Starting %s job" % t.name) sh = write_shell_script(t.name, fastafile, rundir=rundir, params=params) job_name = os.path.basename(os.path.splitext(sh)[0]) # submit attropl[0].value = job_name attropl[4].value = "{0}/{1}.stdout".format(rundir, job_name) attropl[5].value = "{0}/{1}.stderr".format(rundir, job_name) job_id = pbs.pbs_submit(c, attropl, sh, "batchq", 'NULL') e, e_txt = pbs.error() if e: logger.error("Failed submission: {0}".format(e_txt)) else: jobs[job_id] = job_name else: logger.debug("Skipping %s" % t.name) ### Wait until all jobs are finished or the time runs out ### start_time = time() try: # Run until all jobs are finished while len(jobs) > 0 and not(max_time) or time() - start_time < max_time: for job_id,job_name in jobs.items(): job = q.getjob(job_id) if not job: # or not job.is_running(): motifs = [] if job: name = job['Job_Name'] # Some error checking here! else: pwmfile = os.path.join(rundir, "{0}.pwm".format(job_name)) if os.path.exists(pwmfile): motifs = read_motifs(open(pwmfile), fmt="pwm") else: logger.error("Job {0} finished, but couldn find {1}!".format(job_name, pwmfile)) stdout = open(os.path.join(rundir, "{0}.stdout".format(job_name))).read() stderr = open(os.path.join(rundir, "{0}.stderr".format(job_name))).read() result.add_motifs(job_id, (motifs, stdout, stderr)) #for fname in glob.glob("{0}*".format(job_name)): # logger.debug("Deleting {0}".format(fname)) # #os.unlink(fname) del jobs[job_id] sleep(5) ### Or the user gets impatient... ### except KeyboardInterrupt, e: # Destroy all running jobs logger.info("Caught interrupt, destroying all running jobs")
# data.flush() # windows fix that for something that I don't fully understand # waiting for the stdout to close if os.sep == '\\': time.sleep(5) # set up for pbs (if we're using it if asbool(self.trans.app.config.use_pbs ) and "/tools/data_source" not in command_line: log.debug("importing pbs module") import pkg_resources pkg_resources.require("pbs_python") import pbs, random from PBSQuery import PBSQuery pbs_server = pbs.pbs_default() if not pbs_server: self.trans.app.config.use_pbs = False # data_source tools don't farm #if not self.app.config.use_pbs or re.search("/tools/data_source/", command_line): if (not asbool(self.trans.app.config.use_pbs) ) or "/tools/data_source/" in command_line: for data in out_data.values(): data.state = data.states.RUNNING data.blurb = "running" data.flush() # start the subprocess if self.command:
def submit(self): attropl = pbs.new_attropl(self.attribute_count + 1) attropl_idx = 0 attropl[attropl_idx].name = pbs.ATTR_v attropl[attropl_idx].value = self.generate_env() attropl_idx += 1 if self.name: attropl[attropl_idx].name = pbs.ATTR_N attropl[attropl_idx].value = self.name attropl_idx += 1 if self.walltime: attropl[attropl_idx].name = pbs.ATTR_l attropl[attropl_idx].resource = 'walltime' attropl[attropl_idx].value = self.walltime attropl_idx += 1 if self.nodes: attropl[attropl_idx].name = pbs.ATTR_l attropl[attropl_idx].resource = 'nodes' attropl[attropl_idx].value = self.nodes attropl_idx += 1 if self.stdout_path: attropl[attropl_idx].name = pbs.ATTR_o attropl[attropl_idx].value = self.stdout_path attropl_idx += 1 if self.stderr_path: attropl[attropl_idx].name = pbs.ATTR_o attropl[attropl_idx].value = self.stderr_path attropl_idx += 1 if self.dependency_list: attropl[attropl_idx].name = pbs.ATTR_depend attropl[attropl_idx].value = self.dependency_list attropl_idx += 1 if self.mail_options: attropl[attropl_idx].name = pbs.ATTR_m attropl[attropl_idx].value = self.mail_options attropl_idx += 1 if self.mem: attropl[attropl_idx].name = pbs.ATTR_l attropl[attropl_idx].resource = 'mem' attropl[attropl_idx].value = self.mem attropl_idx += 1 if self.vmem: attropl[attropl_idx].name = pbs.ATTR_l attropl[attropl_idx].resource = 'vmem' attropl[attropl_idx].value = self.vmem attropl_idx += 1 connection = pbs.pbs_connect(pbs.pbs_default()) self.job_id = pbs.pbs_submit(connection, attropl, self.job_script, None, None) pbs.pbs_disconnect(connection) e, e_msg = pbs.error() # the batch system returned an error, throw exception if e: message = "%d: %s" % (e, e_msg) raise Exception(message) return self.job_id