def _create_genotyping_proc(setup): mpileup_args = {"-u": None, "-l": setup["files"]["bed"]} mpileup_call = build_call(call=("samtools", "mpileup"), args=mpileup_args, new_args=args.mpileup_argument, positional=("-", )) sys.stderr.write("Running 'samtools mpileup': %s\n" % (" ".join(mpileup_call))) procs = setup["procs"] procs["mpileup"] \ = processes.open_proc(mpileup_call, stdin=procs["filter"].stdout, stdout=processes.PIPE) bcftools_call = build_call(call=("bcftools", "view"), args={}, new_args=args.bcftools_argument, positional=("-", )) sys.stderr.write("Running 'bcftools call': %s\n" % (" ".join(bcftools_call))) procs["bcftools"] \ = processes.open_proc(bcftools_call, stdin=procs["mpileup"].stdout, stdout=processes.PIPE) return procs["bcftools"].stdout
def _setup_paired_ended_pipeline(args, procs, bam_cleanup): # Convert input to (uncompressed) BAM procs["pipe"] = processes.open_proc(bam_cleanup + ["pipe"], stdin=sys.stdin, stdout=processes.PIPE) sys.stdin.close() # Fix mate information for PE reads call_fixmate = ['samtools', 'fixmate'] if args.samtools1x == "yes": call_fixmate.extend(("-O", "bam")) procs["fixmate"] = processes.open_proc(call_fixmate + ['-', '-'], stdin=procs["pipe"].stdout, stdout=processes.PIPE) procs["pipe"].stdout.close() # Cleanup / filter reads. Must be done after 'fixmate', as BWA may produce # hits where the mate-unmapped flag is incorrect, which 'fixmate' fixes. procs["cleanup"] = processes.open_proc(bam_cleanup + ['cleanup'], stdin=procs["fixmate"].stdout, stdout=processes.PIPE) procs["fixmate"].stdout.close() return procs["cleanup"]
def _setup_paired_ended_pipeline(args, procs, bam_cleanup): # Convert input to (uncompressed) BAM procs["pipe"] = processes.open_proc(bam_cleanup + ["pipe"], stdin=sys.stdin, stdout=processes.PIPE) sys.stdin.close() # Fix mate information for PE reads call_fixmate = ['samtools', 'fixmate'] if args.samtools1x == "yes": call_fixmate.extend(("-O", "bam")) procs["fixmate"] = processes.open_proc(call_fixmate + ['-', '-'], stdin=procs["pipe"].stdout, stdout=processes.PIPE) procs["pipe"].stdout.close() # Cleanup / filter reads. Must be done after 'fixmate', as BWA may produce # hits where the mate-unmapped flag is incorrect, which 'fixmate' fixes. procs["cleanup"] = processes.open_proc(bam_cleanup + ['cleanup'], stdin=procs["fixmate"].stdout, stdout=processes.PIPE) procs["fixmate"].stdout.close() return procs["cleanup"]
def _create_genotyping_proc(setup): mpileup_args = {"-u": None, "-l": setup["files"]["bed"]} mpileup_call = build_call(call=("samtools", "mpileup"), args=mpileup_args, new_args=args.mpileup_argument, positional=("-",)) sys.stderr.write("Running 'samtools mpileup': %s\n" % (" ".join(mpileup_call))) procs = setup["procs"] procs["mpileup"] \ = processes.open_proc(mpileup_call, stdin=procs["filter"].stdout, stdout=processes.PIPE) bcftools_call = build_call(call=("bcftools", "view"), args={}, new_args=args.bcftools_argument, positional=("-",)) sys.stderr.write("Running 'bcftools call': %s\n" % (" ".join(bcftools_call))) procs["bcftools"] \ = processes.open_proc(bcftools_call, stdin=procs["mpileup"].stdout, stdout=processes.PIPE) return procs["bcftools"].stdout
def _read_sequences(file_type, filename, stats): cat_call = factory.new("cat") cat_call.add_multiple_values((filename, )) cat_call = cat_call.finalized_call cat = None try: cat = procs.open_proc(cat_call, bufsize=io.DEFAULT_BUFFER_SIZE, stderr=procs.PIPE, stdout=procs.PIPE) qualities = _collect_qualities(cat.stdout, file_type, filename, stats) return sampling.reservoir_sampling(qualities, 100000) except StandardError as error: if cat: try: cat.kill() except OSError: pass cat.wait() cat = None raise error finally: rc_cat = cat.wait() if cat else 0 if rc_cat: message = "Error running 'paleomix cat':\n" \ " Unicat return-code = %i\n\n%s" \ % (rc_cat, cat.stderr.read()) raise NodeError(message)
def _read_sequences(filename): cat_call = factory.new("cat") cat_call.add_multiple_values((filename,)) cat_call = cat_call.finalized_call cat = None try: cat = procs.open_proc(cat_call, bufsize=io.DEFAULT_BUFFER_SIZE, stderr=procs.PIPE, stdout=procs.PIPE) qualities = _collect_qualities(cat.stdout, filename) return sampling.reservoir_sampling(qualities, 100000) except: if cat: cat.kill() cat.wait() cat = None raise finally: rc_cat = cat.wait() if cat else 0 if rc_cat: message = "Error running 'paleomix cat':\n" \ " Unicat return-code = %i\n\n%s" \ % (rc_cat, cat.stderr.read()) raise NodeError(message)
def _setup_single_ended_pipeline(procs, bam_cleanup): # Convert input to BAM and cleanup / filter reads procs["pipe"] = processes.open_proc(bam_cleanup + ['cleanup-sam'], stdin=sys.stdin, stdout=processes.PIPE) sys.stdin.close() return procs["pipe"]
def _setup_single_ended_pipeline(procs, bam_cleanup): # Convert input to BAM and cleanup / filter reads procs["pipe"] = processes.open_proc(bam_cleanup + ['cleanup-sam'], stdin=sys.stdin, stdout=processes.PIPE) sys.stdin.close() return procs["pipe"]
def setup_basic_batch(args, regions, prefix, func, first_batch=True): setup = {"files": {}, "temp_files": {}, "procs": {}, "handles": {}} try: setup["files"]["bed"] = write_bed_file(prefix, regions) setup["temp_files"]["bed"] = setup["files"]["bed"] filter_builder = factory.new("genotype") filter_builder.set_option("--filter-only") filter_builder.set_option("--bedfile", setup["files"]["bed"]) filter_builder.add_option(args.bamfile) filter_builder.add_option(args.destination) setup["procs"]["filter"] \ = processes.open_proc(filter_builder.call, stdout=processes.PIPE) call_stdout = func(setup) if not first_batch: setup["procs"]["grep"] = processes.open_proc(('grep', '-v', '^#'), stdin=call_stdout, stdout=processes.PIPE) call_stdout = setup["procs"]["grep"].stdout setup["handles"]["outfile"] = open(prefix, "w") zip_proc = processes.open_proc(["bgzip"], stdin=call_stdout, stdout=setup["handles"]["outfile"]) setup["procs"]["gzip"] = zip_proc return setup except: sys.stderr.write(traceback.format_exc() + "\n") cleanup_batch(setup) raise
def run(self, temp, wrap_errors=True): """Runs the given command, saving files in the specified temp folder. To move files to their final destination, call commit(). Note that in contexts where the *Cmds classes are used, this function may block. """ if self._running: raise CmdError("Calling 'run' on already running command.") self._temp = temp self._running = True # kwords for pipes are always built relative to the current directory, # since these are opened before (possibly) CD'ing to the temp # directory. stdin = stdout = stderr = None try: kwords = self._generate_filenames(self._files, root=temp) stdin = self._open_pipe(kwords, "IN_STDIN", "rb") stdout = self._open_pipe(kwords, "OUT_STDOUT", "wb") stderr = self._open_pipe(kwords, "OUT_STDERR", "wb") cwd = temp if self._set_cwd else None temp = "" if self._set_cwd else os.path.abspath(temp) call = self._generate_call(temp) # Explicitly set to DEVNULL to ensure that STDIN is not left open. if stdin is None: stdin = self.DEVNULL self._proc = procs.open_proc( call, stdin=stdin, stdout=stdout, stderr=stderr, cwd=cwd, preexec_fn=os.setsid, ) except Exception as error: if not wrap_errors: raise message = "Error running commands:\n" " Call = %r\n" " Error = %r" raise CmdError(message % (self._command, error)) finally: # Close pipes to allow the command to recieve SIGPIPE for handle in (stdin, stdout, stderr): if handle not in (None, self.PIPE, self.DEVNULL): handle.close() # Allow subprocesses to be killed in case of a SIGTERM _add_to_killlist(self._proc)
def _run_cleanup_pipeline(args): bam_cleanup = _build_wrapper_command(args) procs = {} try: # Update 'procs' and get the last process in the pipeline if args.paired_end: last_proc = _setup_paired_ended_pipeline(args, procs, bam_cleanup) else: last_proc = _setup_single_ended_pipeline(procs, bam_cleanup) call_sort = ['samtools', 'sort', '-l', '0'] if args.samtools1x == "yes": call_sort.extend(('-O', 'bam', '-T', args.temp_prefix)) else: # Sort, output to stdout (-o) call_sort.extend(('-o', '-', args.temp_prefix)) sort_stdout = None if args.fasta is None else processes.PIPE procs["sort"] = processes.open_proc(call_sort, stdin=last_proc.stdout, stdout=sort_stdout) last_proc.stdout.close() # Update NM and MD tags; output BAM (-b) to stdout if args.fasta is not None: call_calmd = ['samtools', 'calmd', '-b', '-', args.fasta] procs["calmd"] = processes.open_proc(call_calmd, stdin=procs["sort"].stdout) procs["sort"].stdout.close() if any(processes.join_procs(procs.values())): return 1 return 0 except: for proc in procs.itervalues(): proc.terminate() raise
def _run_cleanup_pipeline(args): bam_cleanup = _build_wrapper_command(args) procs = {} try: # Update 'procs' and get the last process in the pipeline if args.paired_end: last_proc = _setup_paired_ended_pipeline(args, procs, bam_cleanup) else: last_proc = _setup_single_ended_pipeline(procs, bam_cleanup) call_sort = ['samtools', 'sort', '-l', '0'] if args.samtools1x == "yes": call_sort.extend(('-O', 'bam', '-T', args.temp_prefix)) else: # Sort, output to stdout (-o) call_sort.extend(('-o', '-', args.temp_prefix)) sort_stdout = None if args.fasta is None else processes.PIPE procs["sort"] = processes.open_proc(call_sort, stdin=last_proc.stdout, stdout=sort_stdout) last_proc.stdout.close() # Update NM and MD tags; output BAM (-b) to stdout if args.fasta is not None: call_calmd = ['samtools', 'calmd', '-b', '-', args.fasta] procs["calmd"] = processes.open_proc(call_calmd, stdin=procs["sort"].stdout) procs["sort"].stdout.close() if any(processes.join_procs(procs.values())): return 1 return 0 except: for proc in procs.itervalues(): proc.terminate() raise
def _create_mpileup_proc(setup): mpileup_args = {"-l": setup["files"]["bed"]} call = build_call(call=("samtools", "mpileup"), args=mpileup_args, new_args=args.mpileup_argument, positional=("-",)) sys.stderr.write("Running 'samtools mpileup': %s\n" % (" ".join(call))) procs = setup["procs"] procs["mpileup"] \ = processes.open_proc(call, stdin=procs["filter"].stdout, stdout=processes.PIPE) return procs["mpileup"].stdout
def _run(call): """Carries out a system call and returns STDOUT and STDERR as a combined string. If an OSError is raied (e.g. due to missing executables), the resulting message is returned as a string. If the call raised an OSError, then the exception is returned as a value. """ try: proc = procs.open_proc(call, stdout=procs.PIPE, # Merge STDERR with STDOUT output stderr=procs.STDOUT) return proc.communicate()[0] except OSError as error: return error
def setup_basic_batch(args, regions, prefix, func, first_batch=True): setup = {"files": {}, "temp_files": {}, "procs": {}, "handles": {}} try: setup["files"]["bed"] = write_bed_file(prefix, regions) setup["temp_files"]["bed"] = setup["files"]["bed"] filter_builder = factory.new("genotype") filter_builder.set_option("--filter-only") filter_builder.set_option("--bedfile", setup["files"]["bed"]) filter_builder.add_option(args.bamfile) filter_builder.add_option(args.destination) setup["procs"]["filter"] \ = processes.open_proc(filter_builder.call, stdout=processes.PIPE) call_stdout = func(setup) if not first_batch: setup["procs"]["grep"] = processes.open_proc(('grep', '-v', '^#'), stdin=call_stdout, stdout=processes.PIPE) call_stdout = setup["procs"]["grep"].stdout setup["handles"]["outfile"] = open(prefix, "w") zip_proc = processes.open_proc(["bgzip"], stdin=call_stdout, stdout=setup["handles"]["outfile"]) setup["procs"]["gzip"] = zip_proc return setup except: sys.stderr.write(traceback.format_exc() + "\n") cleanup_batch(setup) raise
def _create_mpileup_proc(setup): mpileup_args = {"-l": setup["files"]["bed"]} call = build_call(call=("samtools", "mpileup"), args=mpileup_args, new_args=args.mpileup_argument, positional=("-", )) sys.stderr.write("Running 'samtools mpileup': %s\n" % (" ".join(call))) procs = setup["procs"] procs["mpileup"] \ = processes.open_proc(call, stdin=procs["filter"].stdout, stdout=processes.PIPE) return procs["mpileup"].stdout
def _run(call): """Carries out a system call and returns STDOUT and STDERR as a combined string. If an OSError is raied (e.g. due to missing executables), the resulting message is returned as a string. If the call raised an OSError, then the exception is returned as a value. """ try: proc = procs.open_proc( call, stdout=procs.PIPE, # Merge STDERR with STDOUT output stderr=procs.STDOUT) return proc.communicate()[0] except OSError as error: return error
def run(self, temp, wrap_errors=True): """Runs the given command, saving files in the specified temp folder. To move files to their final destination, call commit(). Note that in contexts where the *Cmds classes are used, this function may block. """ if self._running: raise CmdError("Calling 'run' on already running command.") self._temp = temp self._running = True # kwords for pipes are always built relative to the current directory, # since these are opened before (possibly) CD'ing to the temp # directory. stdin = stdout = stderr = None try: kwords = self._generate_filenames(self._files, root=temp) stdin = self._open_pipe(kwords, "IN_STDIN", "rb") stdout = self._open_pipe(kwords, "OUT_STDOUT", "wb") stderr = self._open_pipe(kwords, "OUT_STDERR", "wb") cwd = temp if self._set_cwd else None temp = "" if self._set_cwd else os.path.abspath(temp) call = self._generate_call(temp) # Explicitly set to DEVNULL to ensure that STDIN is not left open. if stdin is None: stdin = self.DEVNULL self._proc = procs.open_proc(call, stdin=stdin, stdout=stdout, stderr=stderr, cwd=cwd, preexec_fn=os.setsid) except StandardError, error: if not wrap_errors: raise message = \ "Error running commands:\n" \ " Call = %r\n" \ " Error = %r" raise CmdError(message % (self._command, error))
def _run_cleanup_pipeline(args): bam_cleanup = _build_wrapper_command(args) commands = [] try: if args.paired_end: # Convert input to (uncompressed) BAM and fix mate information for PE reads commands.append(["samtools", "fixmate", "-O", "bam", "-", "-"]) # Cleanup / filter reads. Must be done after 'fixmate', as BWA may produce # hits where the mate-unmapped flag is incorrect, which 'fixmate' fixes. commands.append(bam_cleanup + ["cleanup"]) # Sort by coordinates and output uncompressed BAM commands.append( ["samtools", "sort", "-l", "0", "-O", "bam", "-T", args.temp_prefix] ) # Update NM and MD tags; output BAM (-b) to stdout if args.fasta is not None: commands.append(["samtools", "calmd", "-b", "-", args.fasta]) procs = [] last_out = sys.stdin for cmd in commands: proc_stdout = None if cmd is commands[-1] else processes.PIPE procs.append(processes.open_proc(cmd, stdin=last_out, stdout=proc_stdout)) last_out.close() last_out = procs[-1].stdout return int(any(processes.join_procs(procs))) except Exception: for proc in procs.values(): proc.terminate() raise