def test_execute_command_line(self): cl = 'hostname' popen_object = execute_command_line(cl, stdout=subprocess.PIPE, stderr=subprocess.PIPE) reported_hostname = popen_object.communicate()[0].strip() assert (reported_hostname == socket.gethostname())
def run_multiqc(base_path, project_id, project_name, wait=False): project_path=os.path.join(base_path, 'ANALYSIS', project_id) result_path=os.path.join(base_path, 'ANALYSIS', project_id, 'multiqc') safe_makedir(result_path) command=['multiqc', project_path, '-o', result_path, '-i', project_name, '-n', project_name, '-q', '-f'] multiqc_stdout='' multiqc_stderr='' try: #if multiqc is already running, kill it first. ps_command=["ps", "ux"] pcs=subprocess.check_output(ps_command) for line in pcs.splitlines(): if " ".join(command) in line : os.kill(int(line.split()[1]), 9) #then run multiqc handle=execute_command_line(command) if wait: (multiqc_stdout, multiqc_stderr)=handle.communicate() if multiqc_stdout or multiqc_stderr: combined_output="{}\n{}".format(multiqc_stdout, multiqc_stderr) raise Exception(combined_output) except: raise
def launch_piper_job(command_line, project, log_file_path=None): """Launch the Piper command line. :param str command_line: The command line to execute :param Project project: The Project object (needed to set the CWD) :returns: The subprocess.Popen object for the process :rtype: subprocess.Popen """ working_dir = os.path.join(project.base_path, "ANALYSIS", project.dirname) file_handle = None if log_file_path: try: file_handle = open(log_file_path, 'w') except Exception as e: LOG.error('Could not open log file "{}"; reverting to standard ' 'logger (error: {})'.format(log_file_path, e)) log_file_path = None popen_object = execute_command_line(command_line, cwd=working_dir, shell=True, stdout=(file_handle or subprocess.PIPE), stderr=(file_handle or subprocess.PIPE)) if not log_file_path: log_process_non_blocking(popen_object.stdout, LOG.info) log_process_non_blocking(popen_object.stderr, LOG.warn) return popen_object
def launch_piper_job(command_line, project, log_file_path=None): """Launch the Piper command line. :param str command_line: The command line to execute :param Project project: The Project object (needed to set the CWD) :returns: The subprocess.Popen object for the process :rtype: subprocess.Popen """ working_dir = os.path.join(project.base_path, "ANALYSIS", project.dirname) file_handle=None if log_file_path: try: file_handle = open(log_file_path, 'w') except Exception as e: LOG.error('Could not open log file "{}"; reverting to standard ' 'logger (error: {})'.format(log_file_path, e)) log_file_path = None popen_object = execute_command_line(command_line, cwd=working_dir, shell=True, stdout=(file_handle or subprocess.PIPE), stderr=(file_handle or subprocess.PIPE)) if not log_file_path: log_process_non_blocking(popen_object.stdout, LOG.info) log_process_non_blocking(popen_object.stderr, LOG.warn) return popen_object
def queue_sbatch_file(sbatch_file_path): LOG.info("Queueing sbatch file {}".format(sbatch_file_path)) p_handle = execute_command_line("sbatch {}".format(sbatch_file_path), stdout=subprocess.PIPE, stderr=subprocess.PIPE) p_out, p_err = p_handle.communicate() try: slurm_job_id = re.match(r'Submitted batch job (\d+)', p_out).groups()[0] except AttributeError: raise RuntimeError('Could not submit sbatch file "{}": ' '{}'.format(sbatch_file_path, p_err)) return int(slurm_job_id)
def run_multiqc(base_path, project_id, project_name): project_path=os.path.join(base_path, 'ANALYSIS', project_id) result_path=os.path.join(base_path, 'ANALYSIS', project_id, 'multiqc') safe_makedir(result_path) command=['multiqc', project_path, '-o', result_path, '-i', project_name, '-n', project_name, '-q', '-f'] multiqc_stdout='' multiqc_stderr='' try: handle=execute_command_line(command) (multiqc_stdout, multiqc_stderr)=handle.communicate() if multiqc_stdout or multiqc_stderr: combined_output="{}\n{}".format(multiqc_stdout, multiqc_stderr) raise Exception(combined_output) except: raise
def execute_process(self, command_line, working_dir=None, **extra_args): """ Execute the supplied command line. If the working directory that should be used does not exist, it will be created. :param command_line: command line to be executed, can be a string or a list :param working_dir: directory to use as working directory when executing the command line. Default is to use the current working directory used by this ProcessConnector. Will be created if it does not exist :param extra_args: any additional parameters passed will be ignored :return: the process id (pid) of the launched process """ working_dir = working_dir or self.cwd safe_makedir(working_dir) with chdir(working_dir): try: proc = execute_command_line(command_line, shell=False, cwd=working_dir) return proc.pid except RuntimeError: raise
def execute_process(self, command_line, working_dir=None, exit_code_path=None, job_name=None): """ Wrap the supplied command line in a SLURM script and submit it to the job queue. :param command_line: command line to execute in the SLURM job, formatted as a string :param working_dir: the directory in which to create the SLURM script and use as working directory for the job. If it does not already exist, it will be created. :param exit_code_path: path to the file where the exit code from the command should be stored. If not specified, the exit code will be sent to /dev/null :param job_name: the job name to use when submitting to the cluster. If not specified, it will be constructed from the command line :return: the slurm job id """ exit_code_path = exit_code_path or os.devnull job_name = job_name or command_line.replace(" ", "_")[0:20] # create the working dir if it does not exist already working_dir = working_dir or self.cwd safe_makedir(working_dir) with chdir(working_dir): slurm_script = self._slurm_script_from_command_line( command_line, working_dir, exit_code_path, job_name) # submit the sbatch file sbatch_command_line = "sbatch {}".format(slurm_script) proc = execute_command_line( sbatch_command_line, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = proc.communicate() try: # parse the slurm job id from the sbatch stdout slurm_job_id = re.match(r'Submitted batch job (\d+)', stdout).groups()[0] return slurm_job_id except AttributeError: raise RuntimeError( 'Could not submit sbatch job for workflow "{}": {}'.format(job_name, stderr))
def run_multiqc(base_path, project_id, project_name, wait=False): project_path = os.path.join(base_path, 'ANALYSIS', project_id) result_path = os.path.join(base_path, 'ANALYSIS', project_id, 'multiqc') safe_makedir(result_path) command = [ 'multiqc', project_path, '-o', result_path, '-i', project_name, '-n', project_name, '-q', '-f' ] multiqc_stdout = '' multiqc_stderr = '' try: handle = execute_command_line(command) if wait: (multiqc_stdout, multiqc_stderr) = handle.communicate() if multiqc_stdout or multiqc_stderr: combined_output = "{}\n{}".format(multiqc_stdout, multiqc_stderr) raise Exception(combined_output) except: raise
def sbatch_piper_sample(command_line_list, workflow_name, project, sample, libprep=None, restart_finished_jobs=False, config=None, config_file_path=None): """sbatch a piper sample-level workflow. :param list command_line_list: The list of command lines to execute (in order) :param str workflow_name: The name of the workflow to execute :param NGIProject project: The NGIProject :param NGISample sample: The NGISample :param dict config: The parsed configuration file (optional) :param str config_file_path: The path to the configuration file (optional) """ job_identifier = "{}-{}-{}".format(project.project_id, sample, workflow_name) # Paths to the various data directories project_dirname = project.dirname sample_dirname = sample.dirname perm_analysis_dir = os.path.join(project.base_path, "ANALYSIS", project_dirname, "piper_ngi") scratch_analysis_dir = os.path.join("$SNIC_TMP/ANALYSIS/", project_dirname, "piper_ngi") scratch_aln_dir = os.path.join(scratch_analysis_dir, "01_raw_alignments") scratch_qc_dir = os.path.join(scratch_analysis_dir, "02_preliminary_alignment_qc") #ensure that the analysis dir exists safe_makedir(perm_analysis_dir) try: slurm_project_id = config["environment"]["project_id"] except KeyError: raise RuntimeError('No SLURM project id specified in configuration file ' 'for job "{}"'.format(job_identifier)) slurm_queue = config.get("slurm", {}).get("queue") or "core" num_cores = config.get("slurm", {}).get("cores") or 8 slurm_time = config.get("piper", {}).get("job_walltime", {}).get("workflow_name") or "4-00:00:00" slurm_out_log = os.path.join(perm_analysis_dir, "logs", "{}_sbatch.out".format(job_identifier)) slurm_err_log = os.path.join(perm_analysis_dir, "logs", "{}_sbatch.err".format(job_identifier)) for log_file in slurm_out_log, slurm_err_log: rotate_file(log_file) sbatch_text = create_sbatch_header(slurm_project_id=slurm_project_id, slurm_queue=slurm_queue, num_cores=num_cores, slurm_time=slurm_time, job_name="piper_{}".format(job_identifier), slurm_out_log=slurm_out_log, slurm_err_log=slurm_err_log) sbatch_text_list = sbatch_text.split("\n") sbatch_extra_params = config.get("slurm", {}).get("extra_params", {}) for param, value in sbatch_extra_params.iteritems(): sbatch_text_list.append("#SBATCH {} {}\n\n".format(param, value)) modules_to_load = config.get("piper", {}).get("load_modules", []) if modules_to_load: sbatch_text_list.append("\n# Load required modules for Piper") for module_name in modules_to_load: sbatch_text_list.append("module load {}".format(module_name)) project, src_aln_files, src_alnqc_files = \ collect_files_for_sample_analysis(project, sample, restart_finished_jobs) # Fastq files to copy fastq_src_dst_list = [] directories_to_create = set() for sample in project: for libprep in sample: for seqrun in libprep: project_specific_path = os.path.join(project.dirname, sample.dirname, libprep.dirname, seqrun.dirname) directories_to_create.add(os.path.join("$SNIC_TMP/DATA/", project_specific_path)) for fastq in seqrun.fastq_files: src_file = os.path.join(project.base_path, "DATA", project_specific_path, fastq) dst_file = os.path.join("$SNIC_TMP/DATA/", project_specific_path, fastq) fastq_src_dst_list.append([src_file, dst_file]) sbatch_text_list.append("echo -ne '\\n\\nCopying fastq files at '") sbatch_text_list.append("date") if fastq_src_dst_list: for directory in directories_to_create: sbatch_text_list.append("mkdir -p {}".format(directory)) for src_file, dst_file in fastq_src_dst_list: sbatch_text_list.append("rsync -rptoDLv {} {}".format(src_file, dst_file)) else: raise ValueError(('No valid fastq files available to process for ' 'project/sample {}/{}'.format(project, sample))) # BAM files / Alignment QC files input_files_list = [ src_aln_files, src_alnqc_files ] output_dirs_list = [ scratch_aln_dir, scratch_qc_dir ] echo_text_list = ["Copying any pre-existing alignment files", "Copying any pre-existing alignment qc files"] for echo_text, input_files, output_dir in zip(echo_text_list, input_files_list, output_dirs_list): if input_files: sbatch_text_list.append("echo -ne '\\n\\n{}' at ".format(echo_text)) sbatch_text_list.append("date") sbatch_text_list.append("mkdir -p {}".format(output_dir)) sbatch_text_list.append(("rsync -rptoDLv {input_files} " "{output_directory}/").format(input_files=" ".join(input_files), output_directory=output_dir)) sbatch_text_list.append("echo -ne '\\n\\nExecuting command lines at '") sbatch_text_list.append("date") sbatch_text_list.append("# Run the actual commands") for command_line in command_line_list: sbatch_text_list.append(command_line) piper_status_file=create_exit_code_file_path(workflow_subtask=workflow_name, project_base_path=project.base_path, project_name=project.dirname, project_id=project.project_id, sample_id=sample.name) sbatch_text_list.append("\nPIPER_RETURN_CODE=$?") #sbatch_text_list.append("if [[ $PIPER_RETURN_CODE == 0 ]]") #sbatch_text_list.append("then") sbatch_text_list.append("echo -ne '\\n\\nCopying back the resulting analysis files at '") sbatch_text_list.append("date") sbatch_text_list.append("mkdir -p {}".format(perm_analysis_dir)) sbatch_text_list.append("rsync -rptoDLv {}/ {}/".format(scratch_analysis_dir, perm_analysis_dir)) sbatch_text_list.append("\nRSYNC_RETURN_CODE=$?") #sbatch_text_list.append("else") #sbatch_text_list.append(" echo -e '\\n\\nPiper job failed'") #sbatch_text_list.append("fi") # Record job completion status sbatch_text_list.append("if [[ $RSYNC_RETURN_CODE == 0 ]]") sbatch_text_list.append("then") sbatch_text_list.append(" if [[ $PIPER_RETURN_CODE == 0 ]]") sbatch_text_list.append(" then") sbatch_text_list.append(" echo '0'> {}".format(piper_status_file)) sbatch_text_list.append(" else") sbatch_text_list.append(" echo '1'> {}".format(piper_status_file)) sbatch_text_list.append(" fi") sbatch_text_list.append("else") sbatch_text_list.append(" echo '2'> {}".format(piper_status_file)) sbatch_text_list.append("fi") # Write the sbatch file sbatch_dir = os.path.join(perm_analysis_dir, "sbatch") safe_makedir(sbatch_dir) sbatch_outfile = os.path.join(sbatch_dir, "{}.sbatch".format(job_identifier)) rotate_file(sbatch_outfile) with open(sbatch_outfile, 'w') as f: f.write("\n".join(sbatch_text_list)) LOG.info("Queueing sbatch file {} for job {}".format(sbatch_outfile, job_identifier)) # Queue the sbatch file p_handle = execute_command_line("sbatch {}".format(sbatch_outfile), stdout=subprocess.PIPE, stderr=subprocess.PIPE) p_out, p_err = p_handle.communicate() try: slurm_job_id = re.match(r'Submitted batch job (\d+)', p_out).groups()[0] except AttributeError: raise RuntimeError('Could not submit sbatch job for workflow "{}": ' '{}'.format(job_identifier, p_err)) # Detail which seqruns we've started analyzing so we can update statuses later record_analysis_details(project, job_identifier) return int(slurm_job_id)
def test_execute_command_line_RuntimeError(self): cl = "nosuchcommand" with self.assertRaises(RuntimeError): execute_command_line(cl)
def test_execute_command_line(self): cl = "hostname" popen_object = execute_command_line(cl, stdout=subprocess.PIPE, stderr=subprocess.PIPE) reported_hostname = popen_object.communicate()[0].strip() assert(reported_hostname == socket.gethostname())
def sbatch_piper_sample(command_line_list, workflow_name, project, sample, libprep=None, restart_finished_jobs=False, files_to_copy=None, config=None, config_file_path=None): """sbatch a piper sample-level workflow. :param list command_line_list: The list of command lines to execute (in order) :param str workflow_name: The name of the workflow to execute :param NGIProject project: The NGIProject :param NGISample sample: The NGISample :param dict config: The parsed configuration file (optional) :param str config_file_path: The path to the configuration file (optional) """ job_identifier = "{}-{}-{}".format(project.project_id, sample, workflow_name) # Paths to the various data directories project_dirname = project.dirname perm_analysis_dir = os.path.join(project.base_path, "ANALYSIS", project_dirname, "piper_ngi", "") scratch_analysis_dir = os.path.join("$SNIC_TMP/ANALYSIS/", project_dirname, "piper_ngi", "") #ensure that the analysis dir exists safe_makedir(perm_analysis_dir) try: slurm_project_id = config["environment"]["project_id"] except KeyError: raise RuntimeError( 'No SLURM project id specified in configuration file ' 'for job "{}"'.format(job_identifier)) slurm_queue = config.get("slurm", {}).get("queue") or "core" num_cores = config.get("slurm", {}).get("cores") or 16 slurm_time = config.get("piper", {}).get( "job_walltime", {}).get(workflow_name) or "4-00:00:00" slurm_out_log = os.path.join(perm_analysis_dir, "logs", "{}_sbatch.out".format(job_identifier)) slurm_err_log = os.path.join(perm_analysis_dir, "logs", "{}_sbatch.err".format(job_identifier)) for log_file in slurm_out_log, slurm_err_log: rotate_file(log_file) sbatch_text = create_sbatch_header( slurm_project_id=slurm_project_id, slurm_queue=slurm_queue, num_cores=num_cores, slurm_time=slurm_time, job_name="piper_{}".format(job_identifier), slurm_out_log=slurm_out_log, slurm_err_log=slurm_err_log) sbatch_text_list = sbatch_text.split("\n") sbatch_extra_params = config.get("slurm", {}).get("extra_params", {}) for param, value in sbatch_extra_params.iteritems(): sbatch_text_list.append("#SBATCH {} {}\n\n".format(param, value)) modules_to_load = config.get("piper", {}).get("load_modules", []) if modules_to_load: sbatch_text_list.append("\n# Load required modules for Piper") for module_name in modules_to_load: sbatch_text_list.append("module load {}".format(module_name)) if not files_to_copy: project, files_to_copy = \ collect_files_for_sample_analysis(project, sample, restart_finished_jobs) # Fastq files to copy fastq_src_dst_list = [] directories_to_create = set() for libprep in sample: for seqrun in libprep: project_specific_path = os.path.join(project.dirname, sample.dirname, libprep.dirname, seqrun.dirname) directories_to_create.add( os.path.join("$SNIC_TMP/DATA/", project_specific_path)) for fastq in seqrun.fastq_files: src_file = os.path.join(project.base_path, "DATA", project_specific_path, fastq) dst_file = os.path.join("$SNIC_TMP/DATA/", project_specific_path, fastq) fastq_src_dst_list.append([src_file, dst_file]) sbatch_text_list.append("echo -ne '\\n\\nCopying fastq files at '") sbatch_text_list.append("date") if fastq_src_dst_list: for directory in directories_to_create: sbatch_text_list.append("mkdir -p {}".format(directory)) for src_file, dst_file in fastq_src_dst_list: sbatch_text_list.append("rsync -rptoDLv {} {}".format( src_file, dst_file)) else: raise ValueError(('No valid fastq files available to process for ' 'project/sample {}/{}'.format(project, sample))) # Pre-existing analysis files if files_to_copy: sbatch_text_list.append( "echo -ne '\\n\\nCopying pre-existing analysis files at '") sbatch_text_list.append("date") sbatch_text_list.append("if [ ! -d {output directory} ]; then") sbatch_text_list.append("mkdir {output directory} ") sbatch_text_list.append("fi") sbatch_text_list.append(("rsync -rptoDLv {input_files} " "{output_directory}/").format( input_files=" ".join(files_to_copy), output_directory=scratch_analysis_dir)) # Delete pre-existing analysis files after copy sbatch_text_list.append( "echo -ne '\\n\\nDeleting pre-existing analysis files at '") sbatch_text_list.append("date") sbatch_text_list.append( "rm -rf {input_files}".format(input_files=" ".join(files_to_copy))) sbatch_text_list.append("echo -ne '\\n\\nExecuting command lines at '") sbatch_text_list.append("date") sbatch_text_list.append("# Run the actual commands") for command_line in command_line_list: sbatch_text_list.append(command_line) piper_status_file = create_exit_code_file_path( workflow_subtask=workflow_name, project_base_path=project.base_path, project_name=project.dirname, project_id=project.project_id, sample_id=sample.name) sbatch_text_list.append("\nPIPER_RETURN_CODE=$?") #Precalcuate md5sums sbatch_text_list.append( 'MD5FILES="$SNIC_TMP/ANALYSIS/{}/piper_ngi/05_processed_alignments/*{}*.bam' .format(project.project_id, sample.name)) sbatch_text_list.append( '$SNIC_TMP/ANALYSIS/{}/piper_ngi/05_processed_alignments/*.table'. format(project.project_id)) sbatch_text_list.append( '$SNIC_TMP/ANALYSIS/{}/piper_ngi/07_variant_calls/*{}*.genomic.vcf.gz'. format(project.project_id, sample.name)) sbatch_text_list.append( '$SNIC_TMP/ANALYSIS/{}/piper_ngi/07_variant_calls/*{}*.annotated.vcf.gz"' .format(project.project_id, sample.name)) sbatch_text_list.append('for f in $MD5FILES') sbatch_text_list.append('do') sbatch_text_list.append(" md5sum $f | awk '{printf $1}' > $f.md5 &") sbatch_text_list.append('done') sbatch_text_list.append('wait') #Copying back files sbatch_text_list.append( "echo -ne '\\n\\nCopying back the resulting analysis files at '") sbatch_text_list.append("date") sbatch_text_list.append("mkdir -p {}".format(perm_analysis_dir)) sbatch_text_list.append("rsync -rptoDLv {}/ {}/".format( scratch_analysis_dir, perm_analysis_dir)) sbatch_text_list.append("\nRSYNC_RETURN_CODE=$?") # Record job completion status sbatch_text_list.append("if [[ $RSYNC_RETURN_CODE == 0 ]]") sbatch_text_list.append("then") sbatch_text_list.append(" if [[ $PIPER_RETURN_CODE == 0 ]]") sbatch_text_list.append(" then") sbatch_text_list.append(" echo '0'> {}".format(piper_status_file)) sbatch_text_list.append(" else") sbatch_text_list.append(" echo '1'> {}".format(piper_status_file)) sbatch_text_list.append(" fi") sbatch_text_list.append("else") sbatch_text_list.append(" echo '2'> {}".format(piper_status_file)) sbatch_text_list.append("fi") # Write the sbatch file sbatch_dir = os.path.join(perm_analysis_dir, "sbatch") safe_makedir(sbatch_dir) sbatch_outfile = os.path.join(sbatch_dir, "{}.sbatch".format(job_identifier)) rotate_file(sbatch_outfile) with open(sbatch_outfile, 'w') as f: f.write("\n".join(sbatch_text_list)) LOG.info("Queueing sbatch file {} for job {}".format( sbatch_outfile, job_identifier)) # Queue the sbatch file p_handle = execute_command_line("sbatch {}".format(sbatch_outfile), stdout=subprocess.PIPE, stderr=subprocess.PIPE) p_out, p_err = p_handle.communicate() try: slurm_job_id = re.match(r'Submitted batch job (\d+)', p_out).groups()[0] except AttributeError: raise RuntimeError('Could not submit sbatch job for workflow "{}": ' '{}'.format(job_identifier, p_err)) # Detail which seqruns we've started analyzing so we can update statuses later record_analysis_details(project, job_identifier) return int(slurm_job_id)
def start_analysis(sbatch_path): cl = ["bash", sbatch_path] handle = execute_command_line(cl) return handle.pid
def start_analysis(sbatch_path): cl=["bash", sbatch_path] handle=execute_command_line(cl) return handle.pid