def kill_job_ids(job_ids): """ Kills qsub jobs by issuing the ``qdel`` command Parameters ---------- job_ids: list a list of job ID numbers Examples -------- Example usage:: import qsub job_ids = ['4104004', '4104006', '4104009'] qsub.kill_job_ids(job_ids = job_ids) """ if job_ids: logger.debug('Killing jobs: {0}'.format(job_ids)) qdel_command = 'qdel {0}'.format(' '.join([job_id for job_id in job_ids])) cmd = tools.SubprocessCmd(command = qdel_command).run() logger.debug(cmd.proc_stdout) logger.debug(cmd.proc_stderr) else: logger.debug("No jobs passed")
def get_qacct(job_id): """ Gets the qacct entry for a completed qsub job """ qacct_command = 'qacct -j {0}'.format(job_id) run_cmd = tools.SubprocessCmd(command = qacct_command).run() return(run_cmd.proc_stdout)
def vcf2annovar(vcf_file, **kwargs): """ Converts a .vcf file to ANNOVAR .avinput format, using ANNOVAR ``convert2annovar.pl`` Parameters ---------- vcf_file: str the path to a .vcf file Keyword Arguments ----------------- output_file: str the path to the ``.avinput`` file to be created, or ``None`` bin_dir: str path to the ANNOVAR installation directory, or ``None`` to use the internally set default location Notes ----- Generates and executes a shell command in the format:: annovar/convert2annovar.pl -format vcf4old /data/output/169.duplications.vcf -includeinfo > /data/output/169.duplications.avinput Returns ------- str the path to the output ``.avinput`` file """ bin_dir = kwargs.pop('bin_dir', configs['ANNOVAR_bin_dir']) output_file = kwargs.pop('output_file', os.path.splitext(vcf_file)[0] + '.avinput') # make sure input file exists tools.missing_item_kill(item = vcf_file, logger = logger) # path to binary to use convert_bin = os.path.join(bin_dir, 'convert2annovar.pl') tools.missing_item_kill(item = convert_bin, logger = logger) # shell command to run convert_command = ''' "{0}" -format vcf4old "{1}" -includeinfo > "{2}" '''.format( convert_bin, # 0 vcf_file, # 1 output_file # 2 ) # run logger.debug(convert_command) run_cmd = tools.SubprocessCmd(command = convert_command).run() logger.debug(run_cmd.proc_stdout) logger.debug(run_cmd.proc_stderr) # make sure output file exists tools.missing_item_kill(item = output_file, logger = logger) return(output_file)
def kill_jobs(jobs): """ Kills qsub jobs by issuing the ``qdel`` command Parameters ---------- jobs: list a list of ``Job`` objects """ if jobs: logger.debug('Killing jobs: {0}'.format(jobs)) qdel_command = 'qdel {0}'.format(' '.join([job.id for job in jobs])) cmd = tools.SubprocessCmd(command = qdel_command).run() logger.debug(cmd.proc_stdout) logger.debug(cmd.proc_stderr) else: logger.debug("No jobs passed")
def get_qacct(self, job_id = None): """ Gets the `qacct` entry for a completed qsub job, used to determine if the job completed successfully Notes ----- This operation is extremely slow, takes about 10 - 30+ seconds to complete Returns ------- str The character string representation of the stdout from the `qacct -j` command for the job """ if not job_id: job_id = self.id qacct_command = 'qacct -j {0}'.format(job_id) run_cmd = tools.SubprocessCmd(command = qacct_command).run() return(run_cmd.proc_stdout)
def monitor_jobs(jobs = None, kill_err = True, print_verbose = False, **kwargs): """ Monitors a list of qsub `Job` objects for completion. Job monitoring is accomplished by calling each job's `present()` and `error()` methods, then waiting for several seconds. Jobs that are no longer present in `qstat` or have an error state will be removed from the monitoring queue. The function will repeatedly check each job and then wait, removing absent or errored jobs, until no jobs remain in the monitoring queue. Optionally, jobs that had an error status will be killed with the `qdel` command, or else they will remain in `qstat` indefinitely. This function allows your program to wait for jobs to finish running before continuing. Parameters ---------- jobs: list a list of `Job` objects kill_err: bool `True` or `False`, whether or not jobs left in error state should be automatically killed. Its recommened to leave this `True` print_verbose: bool whether or not descriptions of the steps being taken should be printed to the console with Python's `print` function Returns ------- tuple a tuple of lists containing `Job` objects, in the format: `(completed_jobs, err_jobs)` Notes ----- This function will only check whether a job is present/absent in the `qstat` queue, or in an error state in the `qstat` queue; it does not actually check if a job is in a 'Running' state. If a job is present and not in error state, it is assumed to either be 'qw' (waiting to run), or 'r' (running). In both cases, it is assumed that the job will eventually finish and leave the `qstat` queue, and subsequently be removed from this function's monitoring queue. Jobs in 'Eqw' error state are stuck and will not leave on their own so must be removed automatically by this function, or killed manually by the end user. The ``jobs`` is mutable and passed by reference; this means that upon completion of this function, the original ``jobs`` list will be depleted:: >>> import qsub >>> jobs = [] >>> len(jobs) 0 >>> for i in range(5): ... job = qsub.submit('sleep 20') ... jobs.append(job) ... >>> len(jobs) 5 >>> qsub.monitor_jobs(jobs = jobs) ([Job(id = 4098911, name = python, log_dir = None), Job(id = 4098913, name = python, log_dir = None), Job(id = 4098915, name = python, log_dir = None), Job(id = 4098912, name = python, log_dir = None), Job(id = 4098914, name = python, log_dir = None)], []) >>> len(jobs) 0 Examples -------- Example usage:: job = submit(print_verbose = True) completed_jobs, err_jobs = monitor_jobs([job], print_verbose = True) [job.validate_completion() for job in completed_jobs] """ # make sure jobs were passed if not jobs or len(jobs) < 1: logger.error('No jobs to monitor') return() # make sure jobs is a list if not isinstance(jobs, list): logger.error('"jobs" passed is not a list') return() completed_jobs = [] # jobs in error state; won't finish err_jobs = [] num_jobs = len(jobs) logger.debug('Monitoring jobs for completion. Number of jobs in queue: {0}'.format(num_jobs)) if print_verbose: print('Monitoring jobs for completion. Number of jobs in queue: {0}'.format(num_jobs)) while num_jobs > 0: # check number of jobs in the list if num_jobs != len(jobs): num_jobs = len(jobs) logger.debug("Number of jobs in queue: {0}".format(num_jobs)) if print_verbose: print("Number of jobs in queue: {0}".format(num_jobs)) # check each job for presence & error state for i, job in enumerate(jobs): if not job.present(): completed_jobs.append(jobs.pop(i)) # jobs.remove(job) if job.error(): err_jobs.append(jobs.pop(i)) sleep(5) logger.debug('No jobs remaining in the job queue') if print_verbose: print('No jobs remaining in the job queue') # check if there were any jobs left in error state if err_jobs: logger.error('{0} jobs left were left in error state. Jobs: {1}'.format(len(err_jobs), [job.id for job in err_jobs])) if print_verbose: print('{0} jobs left were left in error state. Jobs: {1}'.format(len(err_jobs), [job.id for job in err_jobs])) # kill the error jobs with the 'qdel' command if kill_err: logger.debug('Killing jobs left in error state') if print_verbose: print('Killing jobs left in error state') qdel_command = 'qdel {0}'.format(' '.join([job.id for job in err_jobs])) cmd = tools.SubprocessCmd(command = qdel_command).run() logger.debug(cmd.proc_stdout) if print_verbose: print(cmd.proc_stdout) return((completed_jobs, err_jobs))
def table_annovar(avinput_file, **kwargs): """ Runs ANNOVAR ``table_annovar.pl`` Parameters ---------- avinput_file: str path to ANNOVAR format ``.avinput`` file Keyword Arguments ----------------- output_file_base: str file path base for the annotated output file; `` `` will be automatically appended by ANNOVAR bin_dir: str path to the ANNOVAR installation directory db_dir: str path to the ANNOVAR database directory buildver: str the build version to use, e.g. "hg19" Notes ----- Generates and executes a shell command in the format:: perl "/annovar/table_annovar.pl" "example-data/Sample1.avinput" "/annovar/db" --outfile "example-data/Sample1" --buildver "hg19" --protocol "cytoBand,refGene" --operation "r,g" --nastring "." --remove Returns ------- """ # get keyword arguments bin_dir = kwargs.pop('bin_dir', configs['ANNOVAR_bin_dir']) db_dir = kwargs.pop('db_dir', configs['ANNOVAR_db_dir']) buildver = kwargs.pop('db_dir', configs['ANNOVAR_buildver']) protocol = kwargs.pop('protocol', configs['ANNOVAR_protocol']) operation = kwargs.pop('operation', configs['ANNOVAR_operation']) output_file_base = kwargs.pop('operation', os.path.splitext(avinput_file)[0]) # make sure input file exists tools.missing_item_kill(item = avinput_file, logger = logger) # expected output file output_suffix = '.{0}_multianno.txt'.format(buildver) multianno_output = output_file_base + output_suffix table_annovar_bin = os.path.join(bin_dir, 'table_annovar.pl') table_annovar_command = ''' "{0}" "{1}" "{2}" --outfile "{3}" --buildver "{4}" --protocol "{5}" --operation "{6}" --nastring "." --remove '''.format( table_annovar_bin, # 0 avinput_file, # 1 db_dir, # 2 output_file_base, # 3 buildver, # 4 protocol, # 5 operation # 6 ) logger.debug(table_annovar_command) run_cmd = tools.SubprocessCmd(command = table_annovar_command).run() logger.debug(run_cmd.proc_stdout) logger.debug(run_cmd.proc_stderr) # make sure output file exists tools.missing_item_kill(item = multianno_output, logger = logger) return(multianno_output)