Ejemplo n.º 1
0
    def _process(self, batch_list):
        '''This function execute the change to the batch server'''

        if ARGS_VERBOSE: 
            _print('class:SaraNodes func:_process input:%s' % str(batch_list), file=sys.stderr)

        ## Always get the pbs_server name, even in dry-run mode
        pbs_server = pbs.pbs_default()
        if not pbs_server:
            _print('Could not locate a pbs server', file=sys.stderr)
            sys.exit(1)

        if ARGS_VERBOSE:
            _print('class:SaraNodes func:_process pbs_server:%s' % pbs_server, file=sys.stderr)

        ## If dry-run is not specified create a connection
        if not ARGS_DRYRUN:
            pbs_connection = pbs.pbs_connect(pbs_server)

        ## Execute the changes
        for node in batch_list:
            if not ARGS_DRYRUN:
                pbs_connection = pbs.pbs_connect(pbs_server)
                rcode = pbs.pbs_manager(pbs_connection, pbs.MGR_CMD_SET, pbs.MGR_OBJ_NODE, node[0], node[1], 'NULL')
                if rcode > 0:
                    errno, text = pbs.error()
                    _print('PBS error for node \'%s\': %s (%s)' % (node[0], text, errno), file=sys.stderr)
            else:
                _print("pbs.pbs_manager(pbs_connection, pbs.MGR_CMD_SET, pbs.MGR_OBJ_NODE, %s, %s, 'NULL')" % (node[0], str(node[1])))

        ## Close the connection with the batch system
        if not ARGS_DRYRUN:
            pbs.pbs_disconnect(pbs_connection)
Ejemplo n.º 2
0
def _connect_to_server(server=None):
    """
        open a connection to a pbs_server at hostname server, if server is None 
        then connect to the default server.
        
        This function is shared between JobManager and TorqueJobRunner
    """
    server_name = server if server else pbs.pbs_default()

    retry = 0
    connection = pbs.pbs_connect(server_name)

    while connection <= 0 and retry < _MAX_RETRY:
        retry += 1
        time.sleep(retry**2)
        connection = pbs.pbs_connect(server_name)

    if connection <= 0:
        e, e_msg = pbs.error()
        # the batch system returned an error, throw exception
        raise Exception("Error connecting to pbs_server.  "
                        "Torque error {0}: '{1}'".format(
                            e, torque_strerror(e)))

    return connection
Ejemplo n.º 3
0
def update_all_jobs(batchserver_name):
    """ Update info about all jobs of the given batchserver.
    """
    server,created = getBatchServer(batchserver_name)
    if not pbs_data_jobs.has_key(batchserver_name):
        pbs_data_jobs[batchserver_name] = {'last_update':None, 'jobs':{}}

    if pbs_data_jobs[batchserver_name]['last_update'] and (datetime.datetime.now()-pbs_data_jobs[batchserver_name]['last_update']).total_seconds()<GlobalConfiguration.objects.get(pk=1).max_lastupdate:
        logging.debug("jobs info is new enough for server: %s" % batchserver_name)
        print "not updated"
        return pbs_data_jobs
    print "updated"

    conn = pbs.pbs_connect(batchserver_name.encode('iso-8859-1', 'replace'))
    if conn==-1:
        logging.error("Cannot connect to %s - live data will be missing" % server.name)
        return
    statjobs = pbs.pbs_statjob(conn, "" , [], "")
    pbs.pbs_disconnect(conn)

    for sj in statjobs:
        jobid = sj.name
        attr_dict = dict([ (x.name,x.value) for x in sj.attribs])
        attr_dict = {}
        for x in sj.attribs:
            if x.resource:
                attr_dict[x.name+"_"+x.resource] = x.value
            else:
                attr_dict[x.name] = x.value

        pbs_data_jobs[batchserver_name]['jobs'][jobid] = update_one_job_from_pbs_data(jobid, attr_dict)
        pbs_data_jobs[batchserver_name]['last_update'] = datetime.datetime.now()

    return pbs_data_jobs
Ejemplo n.º 4
0
def update_all_queues(batchserver_name):
    """ Update info about all queues for give batchserver.
    """
    server,created = getBatchServer(batchserver_name)
    if server.queues_lastupdate and (datetime.datetime.now()-server.queues_lastupdate).total_seconds()<GlobalConfiguration.objects.get(pk=1).max_lastupdate:
        logging.debug("Queue info is new enough for server: %s" % batchserver_name)
        return

    conn = pbs.pbs_connect(batchserver_name.encode('iso-8859-1', 'replace'))
    if conn==-1:
        logging.error("Cannot connect to %s - live data will be missing" % server.name)
        return
    statqueues = pbs.pbs_statque(conn, "" , [], "")
    pbs.pbs_disconnect(conn)
    if conn==-1:
        logging.error("Cannot connect to %s - live data will be missing" % server.name)
        return
    
    for sq in statqueues:
        queue,created = getQueue(sq.name, server)
        attr_dict = dict([ (x.name,x.value) for x in sq.attribs])
        update_one_queue_from_pbs_data(queue, attr_dict)
        queue.save()
    server.queues_lastupdate = datetime.datetime.now()
    server.save()
Ejemplo n.º 5
0
 def check_all_jobs(self):
     """
     Returns a list of servers that failed to be contacted and a dict
     of "job_id : status" pairs (where status is a bunchified version
     of the API's structure.
     """
     servers = []
     failures = []
     statuses = {}
     for pbs_job_state in self.watched:
         pbs_server_name = self.__get_pbs_server(
             pbs_job_state.job_destination.params)
         if pbs_server_name not in servers:
             servers.append(pbs_server_name)
         pbs_job_state.check_count += 1
     for pbs_server_name in servers:
         c = pbs.pbs_connect(util.smart_str(pbs_server_name))
         if c <= 0:
             log.debug(
                 "connection to PBS server %s for state check failed" %
                 pbs_server_name)
             failures.append(pbs_server_name)
             continue
         stat_attrl = pbs.new_attrl(3)
         stat_attrl[0].name = pbs.ATTR_state
         stat_attrl[1].name = pbs.ATTR_used
         stat_attrl[2].name = pbs.ATTR_exitstat
         jobs = pbs.pbs_statjob(c, None, stat_attrl, None)
         pbs.pbs_disconnect(c)
         statuses.update(self.convert_statjob_to_bunches(jobs))
     return ((failures, statuses))
Ejemplo n.º 6
0
    def stop_job( self, job ):
        """Attempts to delete a job from the PBS queue"""
        job_id = job.get_job_runner_external_id().encode('utf-8')
        job_tag = "(%s/%s)" % ( job.get_id_tag(), job_id )
        log.debug( "%s Stopping PBS job" % job_tag )

        # Declare the connection handle c so that it can be cleaned up:
        c = None

        try:
            pbs_server_name = self.__get_pbs_server( job.destination_params )
            if pbs_server_name is None:
                log.debug("(%s) Job queued but no destination stored in job params, cannot delete"
                          % job_tag )
                return
            c = pbs.pbs_connect( util.smart_str( pbs_server_name ) )
            if c <= 0:
                log.debug("(%s) Connection to PBS server for job delete failed"
                          % job_tag )
                return
            pbs.pbs_deljob( c, job_id, '' )
            log.debug( "%s Removed from PBS queue before job completion"
                       % job_tag )
        except:
            e = traceback.format_exc()
            log.debug( "%s Unable to stop job: %s" % ( job_tag, e ) )
        finally:
            # Cleanup: disconnect from the server.
            if ( None is not c ):
                pbs.pbs_disconnect( c )
Ejemplo n.º 7
0
    def __init__(self, script, name, env_vars=None, resources={}, conn=None, ppn=None):
        """
        create a new Job to be submitted to PBS
        env_vars is a dictionary with key-value pairs of environment variables that should be passed on to the job
        resources is a dictionary with optional keys: ['hours', 'cores'] both of these should be integer values.
        hours can be 1 - MAX_WALLTIME, cores depends on which cluster it is being run.
        """
        self.clean_conn = True
        self.log = fancylogger.getLogger(self.__class__.__name__, fname=False)
        self.script = script
        if env_vars:
            self.env_vars = env_vars.copy()
        else:
            self.env_vars = {}
        self.name = name

        if pbs_import_failed:
            self.log.error(pbs_import_failed)

        try:
            self.pbs_server = pbs.pbs_default()
            if conn:
                self.pbsconn = conn
                self.clean_conn = False
            else:
                self.pbsconn = pbs.pbs_connect(self.pbs_server)
        except Exception, err:
            self.log.error("Failed to connect to the default pbs server: %s" % err)
Ejemplo n.º 8
0
def main():
    pbs_server = pbs.pbs_default()
    if not pbs_server:
        print 'No default server'
        sys.exit(1)

    if len(sys.argv) < 2:
        print "Usage: set_property.py <hostname>"
        sys.exit(1)

    hostname = sys.argv[1]

    con = pbs.pbs_connect(pbs_server)

    attrop_l = pbs.new_attropl(1)
    attrop_l[0].name = 'note'
    attrop_l[0].value = 'set_something_useful'
    attrop_l[0].op = pbs.SET

    r = pbs.pbs_manager(con, pbs.MGR_CMD_SET, pbs.MGR_OBJ_NODE, hostname,
                        attrop_l, 'NULL')

    if r > 0:
        print r, ";"
        errno, text = pbs.error()
        print errno, text
Ejemplo n.º 9
0
    def stop_job(self, job):
        """Attempts to delete a job from the PBS queue"""
        job_id = job.get_job_runner_external_id().encode('utf-8')
        job_tag = "(%s/%s)" % (job.get_id_tag(), job_id)
        log.debug("%s Stopping PBS job" % job_tag)

        # Declare the connection handle c so that it can be cleaned up:
        c = None

        try:
            pbs_server_name = self.__get_pbs_server(job.destination_params)
            if pbs_server_name is None:
                log.debug(
                    "(%s) Job queued but no destination stored in job params, cannot delete"
                    % job_tag)
                return
            c = pbs.pbs_connect(util.smart_str(pbs_server_name))
            if c <= 0:
                log.debug(
                    "(%s) Connection to PBS server for job delete failed" %
                    job_tag)
                return
            pbs.pbs_deljob(c, job_id, '')
            log.debug("%s Removed from PBS queue before job completion" %
                      job_tag)
        except:
            e = traceback.format_exc()
            log.debug("%s Unable to stop job: %s" % (job_tag, e))
        finally:
            # Cleanup: disconnect from the server.
            if (None is not c):
                pbs.pbs_disconnect(c)
Ejemplo n.º 10
0
    def alive(self, process_id):
        alive = False
        try:
            status = self.pbsquery.getjob(str(process_id))['job_state'][0]            
        except:
            # job not found
            status = -1
            sys.stderr.write("EXC: %s\n" % str(sys.exc_info()[0]))
            sys.stderr.write("Could not find job for process id %d\n" % process_id)

        if status == 'Q':
            sys.stderr.write("Job %d waiting in queue.\n" % (process_id))
            alive = True
        elif status == 'R':
            sys.stderr.write("Job %d is running.\n" % (process_id))
            alive = True
        elif status in ['H','S']:
            sys.stderr.write("Job %d is held or suspended.\n" % (process_id))
            alive = False

        if not alive:
            try:
                # Kill the job.
                c = pbs.pbs_connect(pbs.pbs_default())
                result = pbs.pbs_deljob(c, str(process_id))                    
                sys.stderr.write("Killed job %d.\n" % (process_id))
            except:
                sys.stderr.write("Failed to kill job %d.\n" % (process_id))

            return False
        else:
            return True
Ejemplo n.º 11
0
def update_all_nodes(batchserver_name):
    """ Update info about all nodes of the given batchserver.
    """
    server,created = getBatchServer(batchserver_name)
    if not pbs_data_nodes.has_key(batchserver_name):
        pbs_data_nodes[batchserver_name] = {'last_update':None, 'nodes':{}}

    if pbs_data_nodes[batchserver_name]['last_update'] and (datetime.datetime.now()-pbs_data_nodes[batchserver_name]['last_update']).total_seconds()<GlobalConfiguration.objects.get(pk=1).max_lastupdate:
        logging.debug("Nodes info is new enough for server: %s" % batchserver_name)
        print "not updated"
        return pbs_data_nodes

    print "updated"

    conn = pbs.pbs_connect(batchserver_name.encode('iso-8859-1', 'replace'))
    if conn==-1:
        logging.error("Cannot connect to %s - live data will be missing" % server.name)
        return
    statnodes = pbs.pbs_statnode(conn, "" , [], "")
    pbs.pbs_disconnect(conn)

    for sn in statnodes:
        node,created = getNode(sn.name, server)
        attr_dict = dict([ (x.name,x.value) for x in sn.attribs])
        pbs_data_nodes[batchserver_name]['nodes'][node] = update_one_node_from_pbs_data(node, attr_dict)
        pbs_data_nodes[batchserver_name]['last_update'] = datetime.datetime.now()

    return pbs_data_nodes
def submit_get_subfamilies_job(job):
    # This is how we are passing the fasta and job id to the script
    server_name = pbs.pbs_default()
    c = pbs.pbs_connect(server_name)
    attropl = pbs.new_attropl(5)

    attropl[0].name  = pbs.ATTR_N
    attropl[0].value = "FAT-CAT Get Sub-Families: %s" % job.id

    attropl[1].name  = pbs.ATTR_l
    attropl[1].resource = 'nodes'
    attropl[1].value = '1:ppn=1'

    attropl[2].name  = pbs.ATTR_o
    attropl[2].value = JOB_LOG_FILE

    attropl[3].name  = pbs.ATTR_e
    attropl[3].value = JOB_LOG_FILE

    attropl[4].name  = pbs.ATTR_v
    attropl[4].value = "job_id=%s" % (job.id)

    job.status_id = 5
    job.save()

    job_id = pbs.pbs_submit(c, attropl, "/clusterfs/ohana/software/fatcat/scripts/get_best_nodes.py", 'web', 'NULL')
    logger.info("Submitting %s to the grid to get best nodes with id %s" % (job.id, job_id))

    if job_id: 
        job.get_best_nodes_pbs_job_id = job_id
        job.save()

    pbs.pbs_disconnect(c)

    return job_id
Ejemplo n.º 13
0
def main():
  server = pbs.pbs_default()
  c      = pbs.pbs_connect(server)

  nodes = pbs.pbs_statnode(c, '', 'NULL', 'NULL')

  for node in nodes:
    print node.name, ' :'
    attrs = node.attribs
    for attr in attrs:
      print '\t%s = %s' %(attr.name, attr.value)

    try:
      mom_port = socket.getservbyname('pbs_resmon', 'tcp')
    except socket.error:
      mom_port = pbs.PBS_MANAGER_SERVICE_PORT

    mom_id = pbs.openrm(node.name, mom_port)

    mom_keys = pbs.get_mom_values(mom_id)
    for key in mom_keys.keys():
      print '\t%s = %s' %(key, mom_keys[key])

    print '\nTesting list with user supplied keywords'

    l = [ 'bas', 'ncpus', 'loadave' ]
    mom_keys = pbs.get_mom_values(mom_id, l)
    for key in mom_keys.keys():
      print '\t%s = %s' %(key, mom_keys[key])
    print ''
    pbs.closerm(mom_id)
Ejemplo n.º 14
0
def queues_page():
    conn = pbs.pbs_connect(pbsserver)
    queues = get_queues(conn)
    pbs.pbs_disconnect(conn)
    queues = queue_attributes_reformat(queues)
    now = datetime.datetime.now().strftime('%Y.%m.%d at %I:%M:%S %P')
    return {'now': now, 'queues': queues}
Ejemplo n.º 15
0
def jobs_page():
    conn = pbs.pbs_connect(pbsserver)
    jobs = get_jobs(conn)
    pbs.pbs_disconnect(conn)
    jobs = job_attributes_reformat(jobs)
    now = datetime.datetime.now().strftime('%Y.%m.%d at %I:%M:%S %P')
    return {'now': now, 'jobs': jobs}
Ejemplo n.º 16
0
    def __init__(self, script, name, env_vars=None, resources={}, conn=None, ppn=None):
        """
        create a new Job to be submitted to PBS
        env_vars is a dictionary with key-value pairs of environment variables that should be passed on to the job
        resources is a dictionary with optional keys: ['hours', 'cores'] both of these should be integer values.
        hours can be 1 - MAX_WALLTIME, cores depends on which cluster it is being run.
        """
        self.clean_conn = True
        self.log = fancylogger.getLogger(self.__class__.__name__, fname=False)
        self.script = script
        if env_vars:
            self.env_vars = env_vars.copy()
        else:
            self.env_vars = {}
        self.name = name

        if pbs_import_failed:
            self.log.error(pbs_import_failed)

        try:
            self.pbs_server = pbs.pbs_default()
            if conn:
                self.pbsconn = conn
                self.clean_conn = False
            else:
                self.pbsconn = pbs.pbs_connect(self.pbs_server)
        except Exception, err:
            self.log.error("Failed to connect to the default pbs server: %s" % err)
Ejemplo n.º 17
0
def main():
  pbs_server = pbs.pbs_default()
  if not pbs_server:
    print 'No default server'
    sys.exit(1)

  if len(sys.argv) < 2:
  	print "Usage: set_property.py <hostname>"
	sys.exit(1)

  hostname = sys.argv[1]

  con = pbs.pbs_connect(pbs_server)

  attrop_l = pbs.new_attropl(1)
  attrop_l[0].name  = 'note'
  attrop_l[0].value = 'set_something_useful'
  attrop_l[0].op    = pbs.SET

  r =  pbs.pbs_manager(con, pbs.MGR_CMD_SET, pbs.MGR_OBJ_NODE, 
                    hostname, attrop_l, 'NULL')

  if r > 0:
    print r, ";"
    errno, text = pbs.error() 
    print errno, text
Ejemplo n.º 18
0
def main():
    server = pbs.pbs_default()
    c = pbs.pbs_connect(server)

    nodes = pbs.pbs_statnode(c, '', 'NULL', 'NULL')

    for node in nodes:
        print node.name, ' :'
        attrs = node.attribs
        for attr in attrs:
            print '\t%s = %s' % (attr.name, attr.value)

        try:
            mom_port = socket.getservbyname('pbs_resmon', 'tcp')
        except socket.error:
            mom_port = pbs.PBS_MANAGER_SERVICE_PORT

        mom_id = pbs.openrm(node.name, mom_port)

        mom_keys = pbs.get_mom_values(mom_id)
        for key in mom_keys.keys():
            print '\t%s = %s' % (key, mom_keys[key])

        print '\nTesting list with user supplied keywords'

        l = ['bas', 'ncpus', 'loadave']
        mom_keys = pbs.get_mom_values(mom_id, l)
        for key in mom_keys.keys():
            print '\t%s = %s' % (key, mom_keys[key])
        print ''
        pbs.closerm(mom_id)
Ejemplo n.º 19
0
 def check_all_jobs( self ):
     """
     Returns a list of servers that failed to be contacted and a dict
     of "job_id : status" pairs (where status is a bunchified version
     of the API's structure.
     """
     servers = []
     failures = []
     statuses = {}
     for pbs_job_state in self.watched:
         pbs_server_name = self.__get_pbs_server(pbs_job_state.job_destination.params)
         if pbs_server_name not in servers:
             servers.append( pbs_server_name )
         pbs_job_state.check_count += 1
     for pbs_server_name in servers:
         c = pbs.pbs_connect( util.smart_str( pbs_server_name ) )
         if c <= 0:
             log.debug("connection to PBS server %s for state check failed" % pbs_server_name )
             failures.append( pbs_server_name )
             continue
         stat_attrl = pbs.new_attrl(3)
         stat_attrl[0].name = pbs.ATTR_state
         stat_attrl[1].name = pbs.ATTR_used
         stat_attrl[2].name = pbs.ATTR_exitstat
         jobs = pbs.pbs_statjob( c, None, stat_attrl, None )
         pbs.pbs_disconnect( c )
         statuses.update( self.convert_statjob_to_bunches( jobs ) )
     return( ( failures, statuses ) )
Ejemplo n.º 20
0
    def connect(self, server=None):
        if not server:
          server = pbs.pbs_default()
        self._connection_id = pbs.pbs_connect(server)

        if not self._connection_id:
            raise PBSException('could not connect to pbs server ' + str(server))
Ejemplo n.º 21
0
 def pbs_conn(self):
     conn=pbs.pbs_connect(pbs.pbs_default())
     if(conn<0):
         err, err_text = pbs.error()
         self.logging.error('Error in PBS server conncet')
         self.logging.error('PBS error code '+str(err)+': '+err_text)
         raise SchedulerError('PBS error', str(err)+': '+err_text)
     return conn
Ejemplo n.º 22
0
def nodes_page():
    conn = pbs.pbs_connect(pbsserver)
    nodes = get_nodes(conn)
    pbs.pbs_disconnect(conn)
    nodes = node_attributes_reformat(nodes)
    node_totals = get_node_totals(nodes)
    now = datetime.datetime.now().strftime('%Y.%m.%d at %I:%M:%S %P')
    return {'now': now, 'nodes': nodes, 'node_totals': node_totals}
Ejemplo n.º 23
0
def connect_to_server(pbs_server=None):
    """Connect to PBS server and return connection."""
    if pbs_import_failed:
        raise EasyBuildError(pbs_import_failed)

    if not pbs_server:
        pbs_server = pbs.pbs_default()
    return pbs.pbs_connect(pbs_server)
Ejemplo n.º 24
0
 def pbs_conn(self):
     conn = pbs.pbs_connect(pbs.pbs_default())
     if (conn < 0):
         err, err_text = pbs.error()
         self.logging.error('Error in PBS server conncet')
         self.logging.error('PBS error code ' + str(err) + ': ' + err_text)
         raise SchedulerError('PBS error', str(err) + ': ' + err_text)
     return conn
Ejemplo n.º 25
0
def connect_to_server(pbs_server=None):
    """Connect to PBS server and return connection."""
    if pbs_import_failed:
        raise EasyBuildError(pbs_import_failed)

    if not pbs_server:
        pbs_server = pbs.pbs_default()
    return pbs.pbs_connect(pbs_server)
Ejemplo n.º 26
0
 def del_job(self, job_id, server):
     c = pbs.pbs_connect(str(
         pbs.pbs_default()))  # Create new connection for the child process
     if server is None:
         server = pbs.pbs_default()
     job_full_id = job_id + '.' + server
     result = pbs.pbs_deljob(c, job_full_id, 'NULL')
     return result  # If operation is successfull, result == 0
Ejemplo n.º 27
0
def connect_to_server(pbs_server=None):
    """Connect to PBS server and return connection."""
    if pbs_import_failed:
        _log.error(pbs_import_failed)
        return None

    if not pbs_server:
        pbs_server = pbs.pbs_default()
    return pbs.pbs_connect(pbs_server)
Ejemplo n.º 28
0
def connect_to_server(pbs_server=None):
    """Connect to PBS server and return connection."""
    if pbs_import_failed:
        _log.error(pbs_import_failed)
        return None

    if not pbs_server:
        pbs_server = pbs.pbs_default()
    return pbs.pbs_connect(pbs_server)
Ejemplo n.º 29
0
def method1():
	pbs_server = pbs.pbs_default() 
	if not pbs_server:
		print "No default pbs server"
		sys.exit(1)

	con = pbs.pbs_connect(pbs_server)
	if con == -1:
		print "Default pbs server connection failed"
		pbs_server = pbs.pbs_fbserver()
		if not pbs_server:
			print "No pbs fallback server"
			sys.exit(1)
		else:
			con = pbs.pbs_connect(pbs_server)
			if con == -1:
				print "pbs fallback server connection failed"
				sys.exit(1)

	print "Connected to %s" %(pbs_server)
Ejemplo n.º 30
0
def method1():
    pbs_server = pbs.pbs_default()
    if not pbs_server:
        print "No default pbs server"
        sys.exit(1)

    con = pbs.pbs_connect(pbs_server)
    if con == -1:
        print "Default pbs server connection failed"
        pbs_server = pbs.pbs_fbserver()
        if not pbs_server:
            print "No pbs fallback server"
            sys.exit(1)
        else:
            con = pbs.pbs_connect(pbs_server)
            if con == -1:
                print "pbs fallback server connection failed"
                sys.exit(1)

    print "Connected to %s" % (pbs_server)
Ejemplo n.º 31
0
def _connect_to_server(server):
    """
        open a connection to a pbs_server at hostname server, if server is None 
        then connect to the default server.
        
        This function is shared between JobManager and TorqueJobRunner
    """
    if server:
        connection = pbs.pbs_connect(server)
    else:
        connection = pbs.pbs_connect(pbs.pbs_default())

    if connection <= 0:
        e, e_msg = pbs.error()
        # the batch system returned an error, throw exception
        raise Exception("Error connecting to pbs_server.  "
                        "Torque error {0}: '{1}'".format(
                            e, torque_strerror(e)))

    return connection
Ejemplo n.º 32
0
    def _process(self, batch_list):
        '''This function execute the change to the batch server'''

        if ARGS_VERBOSE:
            _print('class:SaraNodes func:_process input:%s' % str(batch_list),
                   file=sys.stderr)

        ## Always get the pbs_server name, even in dry-run mode
        pbs_server = pbs.pbs_default()
        if not pbs_server:
            _print('Could not locate a pbs server', file=sys.stderr)
            sys.exit(1)

        if ARGS_VERBOSE:
            _print('class:SaraNodes func:_process pbs_server:%s' % pbs_server,
                   file=sys.stderr)

        ## If dry-run is not specified create a connection
        if not ARGS_DRYRUN:
            pbs_connection = pbs.pbs_connect(pbs_server)

        ## Execute the changes
        for node in batch_list:
            if not ARGS_DRYRUN:
                pbs_connection = pbs.pbs_connect(pbs_server)
                rcode = pbs.pbs_manager(pbs_connection, pbs.MGR_CMD_SET,
                                        pbs.MGR_OBJ_NODE, node[0], node[1],
                                        'NULL')
                if rcode > 0:
                    errno, text = pbs.error()
                    _print('PBS error for node \'%s\': %s (%s)' %
                           (node[0], text, errno),
                           file=sys.stderr)
            else:
                _print(
                    "pbs.pbs_manager(pbs_connection, pbs.MGR_CMD_SET, pbs.MGR_OBJ_NODE, %s, %s, 'NULL')"
                    % (node[0], str(node[1])))

        ## Close the connection with the batch system
        if not ARGS_DRYRUN:
            pbs.pbs_disconnect(pbs_connection)
Ejemplo n.º 33
0
def print_header():
    # try connecting to the PBS server
#    print " "
    print "Content-Type: text/html"
#    print " "
#    print " "
    try:
	con = pbs.pbs_connect(pbs_server)
	nodes = pbs.pbs_statnode(con, "", "NULL", "NULL")
    except pbserr, error:
	print "<h1>Error connecting to PBS server:</h1><tt>",error,"</tt>"
        sys.exit(1)	
Ejemplo n.º 34
0
    def __init__(self, options):
        super(Pbs, self).__init__(options)
        self.log = fancylogger.getLogger(self.__class__.__name__, fname=False)
        self.options = options
        self.log.debug("Provided options %s", options)

        self.pbs_server = pbs.pbs_default()
        self.pbsconn = pbs.pbs_connect(self.pbs_server)

        self.vars = {
            'cwd': 'PBS_O_WORKDIR',
            'jobid': 'PBS_JOBID',
        }
def submit_fxn_site_prediction_job(job):
    # This is how we are passing the fasta and job id to the script
    server_name = pbs.pbs_default()
    c = pbs.pbs_connect(server_name)

    print server_name
    print c

    attropl = pbs.new_attropl(7)

    attropl[0].name = pbs.ATTR_N
    attropl[0].value = "Functional Site Prediction Job: %s" % job.id

    attropl[1].name = pbs.ATTR_l
    attropl[1].resource = 'nodes'
    attropl[1].value = '1:ppn=1'

    attropl[2].name = pbs.ATTR_o
    attropl[2].value = JOB_LOG_FILE

    attropl[3].name = pbs.ATTR_e
    attropl[3].value = JOB_LOG_FILE

    attropl[4].name = pbs.ATTR_v
    attropl[4].value = "job_id=%s" % (job.id)

    attropl[5].name = pbs.ATTR_r
    attropl[5].value = 'y'

    attropl[6].name = pbs.ATTR_l
    attropl[6].resource = 'walltime'
    attropl[6].value = '1000'

    job.status_id = 2
    job.save()

    job_id = pbs.pbs_submit(
        c, attropl,
        "/home/cyrus_afrasiabi/ohana_repository/bpg/fxn_site_prediction.py",
        'web', 'NULL')
    logger.info(
        "Submitting %s to the grid to get functional site predictions with id %s"
        % (job.id, job_id))

    if job_id:
        job.pbs_job_id = job_id
        job.save()

    pbs.pbs_disconnect(c)

    return job_id
Ejemplo n.º 36
0
    def info(self, types=None):
        """
        Return jobinfo
        """
        if not self.jobid:
            self.log.debug("no jobid, job is not submitted yet?")
            return None

        # convert single type into list
        if type(types) is str:
            types = [types]

        self.log.debug("Return info types %s" % types)

        # create attribute list to query pbs with
        if types is None:
            jobattr = NULL
        else:
            jobattr = pbs.new_attrl(len(types))
            for idx, attr in enumerate(types):
                jobattr[idx].name = attr

        # get a new connection (otherwise this seems to fail)
        if self.clean_conn:
            pbs.pbs_disconnect(self.pbsconn)
            self.pbsconn = pbs.pbs_connect(self.pbs_server)
        jobs = pbs.pbs_statjob(self.pbsconn, self.jobid, jobattr, NULL)
        if len(jobs) == 0:
            # no job found, return None info
            res = None
            self.log.debug(
                "No job found. Wrong id %s or job finished? Returning %s" %
                (self.jobid, res))
            return res
        elif len(jobs) == 1:
            self.log.debug("Request for jobid %s returned one result %s" %
                           (self.jobid, jobs))
        else:
            self.log.error(
                "Request for jobid %s returned more then one result %s" %
                (self.jobid, jobs))

        # only expect to have a list with one element
        j = jobs[0]
        # convert attribs into useable dict
        job_details = dict([(attrib.name, attrib.value)
                            for attrib in j.attribs])
        # manually set 'id' attribute
        job_details['id'] = j.name
        self.log.debug("Found jobinfo %s" % job_details)
        return job_details
Ejemplo n.º 37
0
 def check_single_job( self, pbs_server_name, job_id ):
     """
     Returns the state of a single job, used to make sure a job is
     really dead.
     """
     c = pbs.pbs_connect( util.smart_str( pbs_server_name ) )
     if c <= 0:
         log.debug("connection to PBS server %s for state check failed" % pbs_server_name )
         return None
     stat_attrl = pbs.new_attrl(1)
     stat_attrl[0].name = pbs.ATTR_state
     jobs = pbs.pbs_statjob( c, job_id, stat_attrl, None )
     pbs.pbs_disconnect( c )
     return jobs[0].attribs[0].value
Ejemplo n.º 38
0
def runAsDaemon():
    """
    Run in daemon mode 
    """
    # TODO: detach from console and log in syslog
    
    for bs in BatchServer.objects.all():
        conn = pbs.pbs_connect(bs.name.encode('iso-8859-1', 'replace'))
        if conn == -1:
            log(LOG_ERROR, "Cannot connect to batch server %s" % bs.name)
            continue
        update_all_queues(conn,bs)
#        update_all_jobs(conn,bs)
        update_all_nodes(conn,bs)
Ejemplo n.º 39
0
def main():

    pbs_server = pbs.pbs_default()
    if not pbs_server:
        print "No default pbs server"
        sys.exit(1)

    con = pbs.pbs_connect(pbs_server)
    nodes = pbs.pbs_statnode(con, "", "NULL", "NULL")

    for node in nodes:
        print node.name
        for attrib in node.attribs:
            print '\t', attrib.name, '=', attrib.value
Ejemplo n.º 40
0
def main():

  pbs_server = pbs.pbs_default()
  if not pbs_server:
    print "No default pbs server"
    sys.exit(1)

  con = pbs.pbs_connect(pbs_server)
  nodes = pbs.pbs_statnode(con, "", "NULL", "NULL")


  for node in nodes:
    print node.name
    for attrib in node.attribs:
      print '\t', attrib.name, '=', attrib.value
def submit_intrepid_job(job):
    # This is how we are passing the fasta and job id to the script
    server_name = pbs.pbs_default()
    c = pbs.pbs_connect(server_name)
    attropl = pbs.new_attropl(6)

    attropl[0].name = pbs.ATTR_N
    attropl[0].value = "INTREPID Job: %s" % job.id

    attropl[1].name = pbs.ATTR_l
    attropl[1].resource = 'nodes'
    attropl[1].value = '1:ppn=8'

    attropl[2].name = pbs.ATTR_o
    attropl[2].value = JOB_LOG_FILE

    attropl[3].name = pbs.ATTR_e
    attropl[3].value = JOB_LOG_FILE

    attropl[4].name = pbs.ATTR_v
    attropl[4].value = "job_id=%s" % (job.id)

    attropl[5].name = pbs.ATTR_l
    attropl[5].resource = 'walltime'
    attropl[5].value = '48:00:00'

    if job.development_job:
        job_id = pbs.pbs_submit(
            c, attropl,
            "/clusterfs/ohana/software/intrepid/scripts/intrepid_development_pipeline.py",
            'web', 'NULL')
    else:
        job_id = pbs.pbs_submit(
            c, attropl,
            "/clusterfs/ohana/software/intrepid/scripts/intrepid_pipeline.py",
            'web', 'NULL')
    logger.info("Submitting %s to the grid with id %s" % (job.id, job_id))

    if job_id:
        job.pbs_job_id = job_id
        job.status_id = JOB_SUBMITTED
        job.save()
    else:
        pass

    pbs.pbs_disconnect(c)

    return job_id
Ejemplo n.º 42
0
    def submitScript(script):
        result = {}
        try:
            pbs_connection = pbs.pbs_connect(pbs.pbs_default())
            #    queues = pbs.pbs_statque(pbs_connection, "batch", "NULL", "NULL")

            attropl = pbs.new_attropl(4)

            # Set the name of the job
            #
            attropl[0].name = pbs.ATTR_N
            attropl[0].value = str(script['jobName']) if script['jobName'] else "new_job"

            # Job is Rerunable
            #
            attropl[1].name = pbs.ATTR_r
            attropl[1].value = 'y'

            # Walltime
            #
            attropl[2].name = pbs.ATTR_l
            attropl[2].resource = 'walltime'
            attropl[2].value = str(script['maxTime']) if script['maxTime'] else '01:00:00'

            # Nodes
            #
            attropl[3].name = pbs.ATTR_l
            attropl[3].resource = 'nodes'
            attropl[3].value = '1:ppn=' + str(script['cpuNumber']) if script['cpuNumber'] else '1'


            # A1.tsk is the job script filename
            #
            job_id = pbs.pbs_submit(pbs_connection, attropl, str(script['scriptName']), str(script['queue']), 'NULL')

            e, e_txt = pbs.error()
            if e:
                result['Result'] = 'ERROR'
                result['Message'] = str(e) + ' : ' + e_txt
            else:
                result['Result'] = 'OK'
                result['Message'] = job_id
        except Exception as exc:
            result['Result'] = 'ERROR'
            result['Message'] = str(exc)

        return result
Ejemplo n.º 43
0
    def __init__(self):
        self.targets = collections.defaultdict(list)
        self.default = ""
        # Construct self.attrs from available attributes in the pbs module
        # this provides a mapping from human readable names (no spaces) to
        # the module ATTR_* names. Not all ATTR_ entities are interesting.
        self.attrs = {}
        pbs_module_attrs = [a for a in dir(pbs) if a[0:5] == "ATTR_"]
        for attr in pbs_module_attrs:
            self.attrs[getattr(pbs, attr)] = str

        srvname = pbs.pbs_default()
        self.conn = pbs.pbs_connect(srvname)

        # By default, submit jobs to pbs
        self.pbs(True)
        self.dotAliases = {}
Ejemplo n.º 44
0
def main():

    pbs_server = pbs.pbs_default()
    if not pbs_server:
        print 'No default server'
        sys.exit(1)

    con = pbs.pbs_connect(pbs_server)

    attr_l = pbs.new_attrl(1)
    attr_l[0].name = 'pbs_version'

    server_info = pbs.pbs_statserver(con, attr_l, 'NULL')
    for entry in server_info:
        print entry.name
        for attrib in entry.attribs:
            print '\t', attrib.name, ' = ', attrib.value
Ejemplo n.º 45
0
def main():

  pbs_server = pbs.pbs_default()
  if not pbs_server:
    print 'No default server'
    sys.exit(1)

  con = pbs.pbs_connect(pbs_server)

  attr_l = pbs.new_attrl(1)
  attr_l[0].name = 'pbs_version'

  server_info = pbs.pbs_statserver(con, attr_l, 'NULL')
  for entry in server_info:
    print entry.name
    for attrib in entry.attribs:
       print '\t', attrib.name, ' = ', attrib.value
Ejemplo n.º 46
0
    def info(self, types=None):
        """
        Return jobinfo
        """
        if not self.jobid:
            self.log.debug("no jobid, job is not submitted yet?")
            return None

        # convert single type into list
        if type(types) is str:
            types = [types]

        self.log.debug("Return info types %s" % types)

        # create attribute list to query pbs with
        if types is None:
            jobattr = NULL
        else:
            jobattr = pbs.new_attrl(len(types))
            for idx, attr in enumerate(types):
                jobattr[idx].name = attr


        # get a new connection (otherwise this seems to fail)
        if self.clean_conn:
            pbs.pbs_disconnect(self.pbsconn)
            self.pbsconn = pbs.pbs_connect(self.pbs_server)
        jobs = pbs.pbs_statjob(self.pbsconn, self.jobid, jobattr, NULL)
        if len(jobs) == 0:
            # no job found, return None info
            res = None
            self.log.debug("No job found. Wrong id %s or job finished? Returning %s" % (self.jobid, res))
            return res
        elif len(jobs) == 1:
            self.log.debug("Request for jobid %s returned one result %s" % (self.jobid, jobs))
        else:
            self.log.error("Request for jobid %s returned more then one result %s" % (self.jobid, jobs))

        # only expect to have a list with one element
        j = jobs[0]
        # convert attribs into useable dict
        job_details = dict([ (attrib.name, attrib.value) for attrib in j.attribs ])
        # manually set 'id' attribute
        job_details['id'] = j.name
        self.log.debug("Found jobinfo %s" % job_details)
        return job_details
Ejemplo n.º 47
0
def update_one_queue(queue):
    """ Update live info about the given queue 
    """
    conn = pbs.pbs_connect(queue.server.name.encode('iso-8859-1', 'replace'))
    if conn==-1:
        logging.error("Cannot connect to %s - live data will be missing" % server.name)
        return
    statqueues = pbs.pbs_statque(conn, queue.name.encode('iso-8859-1', 'replace') , [], "")
    pbs.pbs_disconnect(conn)
    if len(statqueues)==0:
        logging.error("pbs_statque failed for queue: %s" % queue.name)
        return
    if len(statqueues)>1:
        logging.warning("pbs_statque returned more than one records for queue: %s" % queue.name)

    attr_dict = dict([ (x.name,x.value) for x in statqueues[0].attribs])
    update_one_queue_from_pbs_data(queue, attr_dict)
    queue.save()
Ejemplo n.º 48
0
 def hold_rls_job(self, job_id, server, mode, permission):
     '''
     Example:
     job_id: 183
     server: jim-desktop
     mode: hold | rls
     permission: u | o | s
     '''
     c = pbs.pbs_connect(str(
         pbs.pbs_default()))  # Create new connection for the child process
     if server is None:
         server = pbs.pbs_default()
     job_full_id = job_id + '.' + server
     if mode == 'hold':
         result = pbs.pbs_holdjob(c, job_full_id, permission, 'NULL')
     elif mode == 'rls':
         result = pbs.pbs_rlsjob(c, job_full_id, permission, 'NULL')
     return result  # If operation is successfull, result == 0
Ejemplo n.º 49
0
def main():
  pbs_server = pbs.pbs_default()
  if not pbs_server:
    print 'No default server'
    sys.exit(1)

  con = pbs.pbs_connect(pbs_server)

  attrop_l = pbs.new_attropl(1)
  attrop_l[0].name  = 'properties'
  attrop_l[0].value = 'set_something_useful'
  attrop_l[0].op    = pbs.INCR

  r =  pbs.pbs_manager(con, pbs.MGR_CMD_SET, pbs.MGR_OBJ_NODE, 
                    "e2", attrop_l, 'NULL')

  if r > 0:
    print r, ";"
    errno, text = pbs.error() 
    print errno, text
Ejemplo n.º 50
0
def shell_test():
    '''
    Function to do some tests/debugging when running in the shell.
    This is only used for debugging.
    '''

    print 'Running in the shell only.'
    conn = pbs.pbs_connect(pbsserver)
    if conn < 0:
        print 'Error connecting to PBS server.'
        print 'Have you set the PBS server hostname in this code?'
        sys.exit(1)

    # Uncomment one or more of the sections below to print info on the nodes,
    # queues and jobs.

    # Print nodes information
    nodes = get_nodes(conn)
    nodes = node_attributes_reformat(nodes)
    for node in sorted(nodes):
        print 'Node Name: %s' % node['node_name']
        print '  Mem:  ', node['resources_assigned_mem'], '/', node['resources_available_mem'], \
            'GB = ', '%3d' % node['mem_ratio'], '% used'
        print '  Cores:', node['resources_assigned_ncpus'],'/', node['resources_available_ncpus'], \
            '=', '%3d' % node['cpu_ratio'], '% used'

        #for key in node.keys():
        #    print '   ', key, ' = ', node[key]

    print '\nNode Totals: '
    print get_node_totals(nodes)
    '''
    # Print queues information
    queues = get_queues(conn)
    queues = queue_attributes_reformat(queues)
    for queue in queues:
        print '------ Queue Name: %s ------' % queue['queue_name']
        for key in queue.keys():
            print '  ', key, ' = ', queue[key]
    '''
    '''
Ejemplo n.º 51
0
    def create_job(self, username, Job_Name, queue, nodes, walltime, file):
        c = pbs.pbs_connect(str(pbs.pbs_default()))

        attrl = pbs.new_attropl(3)

        attrl[0].name = pbs.ATTR_N
        attrl[0].value = str(Job_Name)

        attrl[1].name = pbs.ATTR_l
        attrl[1].resource = 'nodes'
        attrl[1].value = str(nodes)

        attrl[2].name = pbs.ATTR_l
        attrl[2].resource = 'walltime'
        attrl[2].value = str(walltime)

        queue = str(queue)
        task_id = pbs.pbs_submit(c, attrl,
                                 str("media/" + username + "/" + file), queue,
                                 'NULL')
        return pbs.pbs_geterrmsg(c)
    def __init__(self, settings):
        # spawn own thread, start up queue, start connection to server
        self.queue = []
        self.finished = []
        self.running = []
        self.error = []
        self.job_ids = []
        self.threads = []
        self.use_cluster = False
        self.connection = None
        self.curr_id = 0
        self.settings = None

        running_threads = 0
        max_threads = 1
        self.settings = settings
        if settings["global"]["use_cluster"] == True:
            import pbs

            self.use_cluster = True
        else:
            self.use_cluster = False
            self.max_threads = settings["global"]["n_processors"]

        if self.use_cluster:
            # Establish connection to PBS server
            serv_addr = settings["global"]["cluster_address"]

            # Let the cluster's jobman handle scheduling
            self.max_threads = sys.maxint
            self.connection = pbs.pbs_connect(serv_addr)
            if self.connection < 0:
                errno, text = pbs.error()
                print "Error, unable to establish connection to PBS server."
                print errno, text
                sys.exit(1)
Ejemplo n.º 53
0
 def _connect(self):
     """Connect to the PBS/Torque server"""
     self.con = pbs.pbs_connect(self.server)
     if self.con < 0:
         str = "Could not make a connection with %s\n" %(self.server)
         raise PBSError(str)
Ejemplo n.º 54
0
	def _connect(self):
		"""Connect to the PBS/Torque server"""
		self.con = pbs.pbs_connect(self.server)
		if self.con < 0:
			str = "Could not make a connection with %s\n" %(self.server)
			raise PBSError(str)
Ejemplo n.º 55
0
 def connect_to_server(self):
     """Connect to PBS server, set and return connection."""
     if not self.conn:
         self.conn = pbs.pbs_connect(self.pbs_server)
     return self.conn
Ejemplo n.º 56
0
                job_file = "%s/database/pbs/%s.sh" % (os.getcwd(), job_name)
                fh = file(job_file, "w")
                fh.write(script)
                fh.close()

                # define job attributes
                ofile = "%s/database/pbs/%s.o" % (os.getcwd(), job_name)
                efile = "%s/database/pbs/%s.e" % (os.getcwd(), job_name)
                job_attrs = pbs.new_attropl(2)
                job_attrs[0].name = pbs.ATTR_o
                job_attrs[0].value = ofile
                job_attrs[1].name = pbs.ATTR_e
                job_attrs[1].value = efile

                # get a handle
                conn = pbs.pbs_connect(pbs_server)

                # queue it
                if os.access(job_file, os.R_OK):
                    log.debug("submitting file %s with output %s and error %s" % (job_file, ofile, efile) )
                    log.debug("command is: %s" % command_line)
                    job_id = pbs.pbs_submit(conn, job_attrs, job_file, None, None)

                    # monitor
                    if job_id:
                        p = PBSQuery()
                        job_data = p.getjob(job_id)
                        old_state = job_data[job_id]["job_state"]
                        log.debug("initial state is %s" % old_state)
                        running = False
                        while True:
Ejemplo n.º 57
0
 def connect_to_server(self):
     """Connect to PBS server, set and return connection."""
     if not self.conn:
         self.conn = pbs.pbs_connect(self.pbs_server)
     return self.conn