def get_njobs_in_queue(self, username=None): """ returns the number of jobs currently in the queu efor the user :param username: (str) the username of the jobs to count (default is to autodetect) :return: (int) number of jobs in the queue """ queue_logger = self.get_qlogger('qadapter.{}'.format(self.q_name)) # initialize username if username is None: username = getpass.getuser() # run qstat qstat = Command(self._get_status_cmd(username)) p = qstat.run(timeout=5) # parse the result if p[0] == 0: njobs = self._parse_njobs(p[1], username) queue_logger.info( 'The number of jobs currently in the queue is: {}'.format( njobs)) return njobs # there's a problem talking to qstat server? msgs = [ 'Error trying to get the number of jobs in the queue', 'The error response reads: {}'.format(p[2]) ] log_fancy(queue_logger, msgs, 'error') return None
def get_njobs_in_queue(self, username=None): """ returns the number of jobs currently in the queu efor the user :param username: (str) the username of the jobs to count (default is to autodetect) :return: (int) number of jobs in the queue """ queue_logger = self.get_qlogger('qadapter.{}'.format(self.q_name)) # initialize username if username is None: username = getpass.getuser() # run qstat qstat = Command(self._get_status_cmd(username)) p = qstat.run(timeout=5) # parse the result if p[0] == 0: njobs = self._parse_njobs(p[1], username) queue_logger.info( 'The number of jobs currently in the queue is: {}'.format( njobs)) return njobs # there's a problem talking to qstat server? msgs = ['Error trying to get the number of jobs in the queue', 'The error response reads: {}'.format(p[2])] log_fancy(queue_logger, msgs, 'error') return None
def submit_to_queue(self, script_file): """ submits the job to the queue and returns the job id :param script_file: (str) name of the script file to use (String) :return: (int) job_id """ if not os.path.exists(script_file): raise ValueError( 'Cannot find script file located at: {}'.format( script_file)) queue_logger = self.get_qlogger('qadapter.{}'.format(self.q_name)) submit_cmd = self.q_commands[self.q_type]["submit_cmd"] # submit the job try: if self.q_type == "Cobalt": # Cobalt requires scripts to be executable os.chmod(script_file,stat.S_IRWXU|stat.S_IRGRP|stat.S_IXGRP) cmd = [submit_cmd, script_file] #For most of the queues handled by common_adapter, it's best to simply submit the file name #as an argument. LoadSharingFacility doesn't handle the header section (queue name, nodes, etc) #when taking file arguments, so the file needs to be passed as stdin to make it work correctly. if self.q_type == 'LoadSharingFacility': with open(script_file, 'r') as inputFile: p = subprocess.Popen([submit_cmd],stdin=inputFile,stdout=subprocess.PIPE,stderr=subprocess.PIPE) else: p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) p.wait() # retrieve the returncode. PBS returns 0 if the job was successful if p.returncode == 0: try: job_id = self._parse_jobid(p.stdout.read().decode()) queue_logger.info( 'Job submission was successful and job_id is {}'.format( job_id)) return job_id except Exception as ex: # probably error parsing job code log_exception(queue_logger, 'Could not parse job id following {} due to error {}...' .format(submit_cmd, str(ex))) else: # some qsub error, e.g. maybe wrong queue specified, don't have permission to submit, etc... msgs = [ 'Error in job submission with {n} file {f} and cmd {c}'.format( n=self.q_name, f=script_file, c=cmd), 'The error response reads: {}'.format(p.stderr.read())] log_fancy(queue_logger, msgs, 'error') except Exception as ex: # random error, e.g. no qsub on machine! log_exception(queue_logger, 'Running the command: {} caused an error...' .format(submit_cmd))
def submit_to_queue(self, script_file): """ submits the job to the queue and returns the job id :param script_file: (str) name of the script file to use (String) :return: (int) job_id """ if not os.path.exists(script_file): raise ValueError( 'Cannot find script file located at: {}'.format( script_file)) queue_logger = self.get_qlogger('qadapter.{}'.format(self.q_name)) submit_cmd = self.q_commands[self.q_type]["submit_cmd"] # submit the job try: if self.q_type == "Cobalt": # Cobalt requires scripts to be executable os.chmod(script_file, stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP) cmd = [submit_cmd, script_file] # For most of the queues handled by common_adapter, it's best to simply submit the file name # as an argument. LoadSharingFacility doesn't handle the header section (queue name, nodes, etc) # when taking file arguments, so the file needs to be passed as stdin to make it work correctly. if self.q_type == 'LoadSharingFacility': with open(script_file, 'r') as inputFile: p = subprocess.Popen([submit_cmd], stdin=inputFile, stdout=subprocess.PIPE, stderr=subprocess.PIPE) else: p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) p.wait() # retrieve the returncode. PBS returns 0 if the job was successful if p.returncode == 0: try: job_id = self._parse_jobid(p.stdout.read().decode()) queue_logger.info( 'Job submission was successful and job_id is {}'.format( job_id)) return job_id except Exception as ex: # probably error parsing job code log_exception(queue_logger, 'Could not parse job id following {} due to error {}...' .format(submit_cmd, str(ex))) else: # some qsub error, e.g. maybe wrong queue specified, don't have permission to submit, etc... msgs = [ 'Error in job submission with {n} file {f} and cmd {c}'.format( n=self.q_name, f=script_file, c=cmd), 'The error response reads: {}'.format(p.stderr.read())] log_fancy(queue_logger, msgs, 'error') except Exception as ex: # random error, e.g. no qsub on machine! log_exception(queue_logger, 'Running the command: {} caused an error...' .format(submit_cmd))
def submit_to_queue(self, script_file): """ submits the job to the queue and returns the job id :param script_file: (str) name of the script file to use (String) :return: (int) job_id """ if not os.path.exists(script_file): raise ValueError( 'Cannot find script file located at: {}'.format(script_file)) queue_logger = self.get_qlogger('qadapter.{}'.format(self.q_name)) # submit the job try: cmd = [self.submit_cmd, script_file] p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) p.wait() # grab the returncode. PBS returns 0 if the job was successful if p.returncode == 0: try: job_id = self._parse_jobid(p.stdout.read()) queue_logger.info( 'Job submission was successful and job_id is {}'. format(job_id)) return job_id except: # probably error parsing job code log_exception( queue_logger, 'Could not parse job id following {}...'.format( self.submit_cmd)) else: # some qsub error, e.g. maybe wrong queue specified, don't have permission to submit, etc... msgs = [ 'Error in job submission with {n} file {f} and cmd {c}'. format(n=self.q_name, f=script_file, c=cmd), 'The error response reads: {}'.format(p.stderr.read()) ] log_fancy(queue_logger, msgs, 'error') except: # random error, e.g. no qsub on machine! log_exception( queue_logger, 'Running the command: {} caused an error...'.format( self.submit_cmd))
def submit_to_queue(self, script_file): """ submits the job to the queue and returns the job id :param script_file: (str) name of the script file to use (String) :return: (int) job_id """ if not os.path.exists(script_file): raise ValueError( 'Cannot find script file located at: {}'.format( script_file)) queue_logger = self.get_qlogger('qadapter.{}'.format(self.q_name)) # submit the job try: cmd = [self.submit_cmd, script_file] p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) p.wait() # grab the returncode. PBS returns 0 if the job was successful if p.returncode == 0: try: job_id = self._parse_jobid(p.stdout.read()) queue_logger.info( 'Job submission was successful and job_id is {}'.format( job_id)) return job_id except: # probably error parsing job code log_exception(queue_logger, 'Could not parse job id following {}...'.format( self.submit_cmd)) else: # some qsub error, e.g. maybe wrong queue specified, don't have permission to submit, etc... msgs = [ 'Error in job submission with {n} file {f} and cmd {c}'.format( n=self.q_name, f=script_file, c=cmd), 'The error response reads: {}'.format(p.stderr.read())] log_fancy(queue_logger, msgs, 'error') except: # random error, e.g. no qsub on machine! log_exception(queue_logger, 'Running the command: {} caused an error...'.format( self.submit_cmd))
def submit_to_queue(self, queue_params, script_file): """ for documentation, see parent object """ if not os.path.exists(script_file): raise ValueError( 'Cannot find script file located at: {}'.format(script_file)) # initialize logger slurm_logger = get_fw_logger('rocket.slurm', queue_params.logging_dir) # submit the job try: cmd = ['sbatch', script_file] p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) p.wait() # grab the returncode. SLURM returns 0 if the job was successful if p.returncode == 0: try: # output should of the form '2561553.sdb' or '352353.jessup' - just grab the first part for job id job_id = int(p.stdout.read().split()[3]) slurm_logger.info( 'Job submission was successful and job_id is {}'. format(job_id)) return job_id except: # probably error parsing job code log_exception(slurm_logger, 'Could not parse job id following slurm...') else: # some qsub error, e.g. maybe wrong queue specified, don't have permission to submit, etc... msgs = [ 'Error in job submission with SLURM file {f} and cmd {c}'. format(f=script_file, c=cmd) ] msgs.append('The error response reads: {}'.format( p.stderr.read())) log_fancy(slurm_logger, 'error', msgs) except: # random error, e.g. no qsub on machine! log_exception(slurm_logger, 'Running slurm caused an error...')
def get_njobs_in_queue(self, rocket_params, username=None): """ for documentation, see parent object """ # TODO: (low-priority) parse the qstat -x output as an alternate way to get this working # tmp_file_name = 'tmp_qstat.xml' # cmd = ['qstat', '-x']\n # initialize logger pbs_logger = get_fw_logger('rocket.pbs', rocket_params.logging_dir) # initialize username if username is None: username = getpass.getuser() # run qstat cmd = ['qstat', '-a', '-u', username] p = subprocess.Popen(cmd, shell=False, stdout=subprocess.PIPE) p.wait() # parse the result if p.returncode == 0: # lines should have this form # '1339044.sdb username queuename 2012-02-29-16-43 20460 -- -- -- 00:20 C 00:09' # count lines that include the username in it # TODO: only count running or queued jobs. or rather, *don't* count jobs that are 'C'. outs = p.stdout.readlines() rx = re.compile(username) njobs = len( [line.split() for line in outs if rx.search(line) is not None]) pbs_logger.info( 'The number of jobs currently in the queue is: {}'.format( njobs)) return njobs # there's a problem talking to qstat server? msgs = [ 'Error trying to get the number of jobs in the queue using qstat service' ] msgs.append('The error response reads: {}'.format(p.stderr.read())) log_fancy(pbs_logger, 'error', msgs) return None
def get_njobs_in_queue(self, queue_params, username=None): """ for documentation, see parent object """ # TODO: (low-priority) parse the qstat -x output as an alternate way to get this working # tmp_file_name = 'tmp_qstat.xml' # cmd = ['qstat', '-x']\n # initialize logger slurm_logger = get_fw_logger('rocket.slurm', queue_params.logging_dir) # initialize username if username is None: username = getpass.getuser() # run qstat cmd = ['squeue', '-o "%u"', '-u', username] p = subprocess.Popen(cmd, shell=False, stdout=subprocess.PIPE) p.wait() # parse the result if p.returncode == 0: # lines should have this form # username # count lines that include the username in it outs = p.stdout.readlines() rx = re.compile(username) njobs = len( [line.split() for line in outs if rx.search(line) is not None]) slurm_logger.info( 'The number of jobs currently in the queue is: {}'.format( njobs)) return njobs # there's a problem talking to qstat server? msgs = [ 'Error trying to get the number of jobs in the queue using squeue service' ] msgs.append('The error response reads: {}'.format(p.stderr.read())) log_fancy(slurm_logger, 'error', msgs) return None
def get_njobs_in_queue(self, rocket_params, username=None): """ for documentation, see parent object """ # TODO: (low-priority) parse the qstat -x output as an alternate way to get this working # tmp_file_name = 'tmp_qstat.xml' # cmd = ['qstat', '-x']\n # initialize logger pbs_logger = get_fw_logger('rocket.pbs', rocket_params.logging_dir) # initialize username if username is None: username = getpass.getuser() # run qstat cmd = ['qstat', '-a', '-u', username] p = subprocess.Popen(cmd, shell=False, stdout=subprocess.PIPE) p.wait() # parse the result if p.returncode == 0: # lines should have this form # '1339044.sdb username queuename 2012-02-29-16-43 20460 -- -- -- 00:20 C 00:09' # count lines that include the username in it # TODO: only count running or queued jobs. or rather, *don't* count jobs that are 'C'. outs = p.stdout.readlines() rx = re.compile(username) njobs = len([line.split() for line in outs if rx.search(line) is not None]) pbs_logger.info('The number of jobs currently in the queue is: {}'.format(njobs)) return njobs # there's a problem talking to qstat server? msgs = ['Error trying to get the number of jobs in the queue using qstat service'] msgs.append('The error response reads: {}'.format(p.stderr.read())) log_fancy(pbs_logger, 'error', msgs) return None
def get_njobs_in_queue(self, queue_params, username=None): """ for documentation, see parent object """ # TODO: (low-priority) parse the qstat -x output as an alternate way to get this working # tmp_file_name = 'tmp_qstat.xml' # cmd = ['qstat', '-x']\n # initialize logger slurm_logger = get_fw_logger('rocket.slurm', queue_params.logging_dir) # initialize username if username is None: username = getpass.getuser() # run qstat cmd = ['squeue', '-o "%u"', '-u', username] p = subprocess.Popen(cmd, shell=False, stdout=subprocess.PIPE) p.wait() # parse the result if p.returncode == 0: # lines should have this form # username # count lines that include the username in it outs = p.stdout.readlines() rx = re.compile(username) njobs = len([line.split() for line in outs if rx.search(line) is not None]) slurm_logger.info('The number of jobs currently in the queue is: {}'.format(njobs)) return njobs # there's a problem talking to qstat server? msgs = ['Error trying to get the number of jobs in the queue using squeue service'] msgs.append('The error response reads: {}'.format(p.stderr.read())) log_fancy(slurm_logger, 'error', msgs) return None
def submit_to_queue(self, queue_params, script_file): """ for documentation, see parent object """ if not os.path.exists(script_file): raise ValueError('Cannot find script file located at: {}'.format(script_file)) # initialize logger slurm_logger = get_fw_logger('rocket.slurm', queue_params.logging_dir) # submit the job try: cmd = ['sbatch', script_file] p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) p.wait() # grab the returncode. SLURM returns 0 if the job was successful if p.returncode == 0: try: # output should of the form '2561553.sdb' or '352353.jessup' - just grab the first part for job id job_id = int(p.stdout.read().split()[3]) slurm_logger.info('Job submission was successful and job_id is {}'.format(job_id)) return job_id except: # probably error parsing job code log_exception(slurm_logger, 'Could not parse job id following slurm...') else: # some qsub error, e.g. maybe wrong queue specified, don't have permission to submit, etc... msgs = ['Error in job submission with SLURM file {f} and cmd {c}'.format(f=script_file, c=cmd)] msgs.append('The error response reads: {}'.format(p.stderr.read())) log_fancy(slurm_logger, 'error', msgs) except: # random error, e.g. no qsub on machine! log_exception(slurm_logger, 'Running slurm caused an error...')