Ejemplo n.º 1
0
    def _create_genotyping_proc(setup):
        mpileup_args = {"-u": None, "-l": setup["files"]["bed"]}
        mpileup_call = build_call(call=("samtools", "mpileup"),
                                  args=mpileup_args,
                                  new_args=args.mpileup_argument,
                                  positional=("-", ))

        sys.stderr.write("Running 'samtools mpileup': %s\n" %
                         (" ".join(mpileup_call)))

        procs = setup["procs"]
        procs["mpileup"] \
            = processes.open_proc(mpileup_call,
                                  stdin=procs["filter"].stdout,
                                  stdout=processes.PIPE)

        bcftools_call = build_call(call=("bcftools", "view"),
                                   args={},
                                   new_args=args.bcftools_argument,
                                   positional=("-", ))

        sys.stderr.write("Running 'bcftools call': %s\n" %
                         (" ".join(bcftools_call)))

        procs["bcftools"] \
            = processes.open_proc(bcftools_call,
                                  stdin=procs["mpileup"].stdout,
                                  stdout=processes.PIPE)

        return procs["bcftools"].stdout
Ejemplo n.º 2
0
def _setup_paired_ended_pipeline(args, procs, bam_cleanup):
    # Convert input to (uncompressed) BAM
    procs["pipe"] = processes.open_proc(bam_cleanup + ["pipe"],
                                        stdin=sys.stdin,
                                        stdout=processes.PIPE)
    sys.stdin.close()

    # Fix mate information for PE reads
    call_fixmate = ['samtools', 'fixmate']
    if args.samtools1x == "yes":
        call_fixmate.extend(("-O", "bam"))

    procs["fixmate"] = processes.open_proc(call_fixmate + ['-', '-'],
                                           stdin=procs["pipe"].stdout,
                                           stdout=processes.PIPE)
    procs["pipe"].stdout.close()

    # Cleanup / filter reads. Must be done after 'fixmate', as BWA may produce
    # hits where the mate-unmapped flag is incorrect, which 'fixmate' fixes.
    procs["cleanup"] = processes.open_proc(bam_cleanup + ['cleanup'],
                                           stdin=procs["fixmate"].stdout,
                                           stdout=processes.PIPE)
    procs["fixmate"].stdout.close()

    return procs["cleanup"]
Ejemplo n.º 3
0
def _setup_paired_ended_pipeline(args, procs, bam_cleanup):
    # Convert input to (uncompressed) BAM
    procs["pipe"] = processes.open_proc(bam_cleanup + ["pipe"],
                                        stdin=sys.stdin,
                                        stdout=processes.PIPE)
    sys.stdin.close()

    # Fix mate information for PE reads
    call_fixmate = ['samtools', 'fixmate']
    if args.samtools1x == "yes":
        call_fixmate.extend(("-O", "bam"))

    procs["fixmate"] = processes.open_proc(call_fixmate + ['-', '-'],
                                           stdin=procs["pipe"].stdout,
                                           stdout=processes.PIPE)
    procs["pipe"].stdout.close()

    # Cleanup / filter reads. Must be done after 'fixmate', as BWA may produce
    # hits where the mate-unmapped flag is incorrect, which 'fixmate' fixes.
    procs["cleanup"] = processes.open_proc(bam_cleanup + ['cleanup'],
                                           stdin=procs["fixmate"].stdout,
                                           stdout=processes.PIPE)
    procs["fixmate"].stdout.close()

    return procs["cleanup"]
Ejemplo n.º 4
0
    def _create_genotyping_proc(setup):
        mpileup_args = {"-u": None,
                        "-l": setup["files"]["bed"]}
        mpileup_call = build_call(call=("samtools", "mpileup"),
                                  args=mpileup_args,
                                  new_args=args.mpileup_argument,
                                  positional=("-",))

        sys.stderr.write("Running 'samtools mpileup': %s\n"
                         % (" ".join(mpileup_call)))

        procs = setup["procs"]
        procs["mpileup"] \
            = processes.open_proc(mpileup_call,
                                  stdin=procs["filter"].stdout,
                                  stdout=processes.PIPE)

        bcftools_call = build_call(call=("bcftools", "view"),
                                   args={},
                                   new_args=args.bcftools_argument,
                                   positional=("-",))

        sys.stderr.write("Running 'bcftools call': %s\n"
                         % (" ".join(bcftools_call)))

        procs["bcftools"] \
            = processes.open_proc(bcftools_call,
                                  stdin=procs["mpileup"].stdout,
                                  stdout=processes.PIPE)

        return procs["bcftools"].stdout
Ejemplo n.º 5
0
def _read_sequences(file_type, filename, stats):
    cat_call = factory.new("cat")
    cat_call.add_multiple_values((filename, ))
    cat_call = cat_call.finalized_call

    cat = None
    try:
        cat = procs.open_proc(cat_call,
                              bufsize=io.DEFAULT_BUFFER_SIZE,
                              stderr=procs.PIPE,
                              stdout=procs.PIPE)
        qualities = _collect_qualities(cat.stdout, file_type, filename, stats)

        return sampling.reservoir_sampling(qualities, 100000)
    except StandardError as error:
        if cat:
            try:
                cat.kill()
            except OSError:
                pass
            cat.wait()
            cat = None
        raise error
    finally:
        rc_cat = cat.wait() if cat else 0
        if rc_cat:
            message = "Error running 'paleomix cat':\n" \
                      "  Unicat return-code = %i\n\n%s" \
                      % (rc_cat, cat.stderr.read())
            raise NodeError(message)
Ejemplo n.º 6
0
def _read_sequences(filename):
    cat_call = factory.new("cat")
    cat_call.add_multiple_values((filename,))
    cat_call = cat_call.finalized_call

    cat = None
    try:
        cat = procs.open_proc(cat_call,
                              bufsize=io.DEFAULT_BUFFER_SIZE,
                              stderr=procs.PIPE,
                              stdout=procs.PIPE)
        qualities = _collect_qualities(cat.stdout, filename)

        return sampling.reservoir_sampling(qualities, 100000)
    except:
        if cat:
            cat.kill()
            cat.wait()
            cat = None
        raise
    finally:
        rc_cat = cat.wait() if cat else 0
        if rc_cat:
            message = "Error running 'paleomix cat':\n" \
                      "  Unicat return-code = %i\n\n%s" \
                      % (rc_cat, cat.stderr.read())
            raise NodeError(message)
Ejemplo n.º 7
0
def _setup_single_ended_pipeline(procs, bam_cleanup):
    # Convert input to BAM and cleanup / filter reads
    procs["pipe"] = processes.open_proc(bam_cleanup + ['cleanup-sam'],
                                        stdin=sys.stdin,
                                        stdout=processes.PIPE)
    sys.stdin.close()

    return procs["pipe"]
Ejemplo n.º 8
0
def _setup_single_ended_pipeline(procs, bam_cleanup):
    # Convert input to BAM and cleanup / filter reads
    procs["pipe"] = processes.open_proc(bam_cleanup + ['cleanup-sam'],
                                        stdin=sys.stdin,
                                        stdout=processes.PIPE)
    sys.stdin.close()

    return procs["pipe"]
Ejemplo n.º 9
0
def setup_basic_batch(args, regions, prefix, func, first_batch=True):
    setup = {"files": {},
             "temp_files": {},
             "procs": {},
             "handles": {}}

    try:
        setup["files"]["bed"] = write_bed_file(prefix, regions)
        setup["temp_files"]["bed"] = setup["files"]["bed"]

        filter_builder = factory.new("genotype")
        filter_builder.set_option("--filter-only")
        filter_builder.set_option("--bedfile", setup["files"]["bed"])
        filter_builder.add_option(args.bamfile)
        filter_builder.add_option(args.destination)

        setup["procs"]["filter"] \
            = processes.open_proc(filter_builder.call,
                                  stdout=processes.PIPE)

        call_stdout = func(setup)
        if not first_batch:
            setup["procs"]["grep"] = processes.open_proc(('grep', '-v', '^#'),
                                                         stdin=call_stdout,
                                                         stdout=processes.PIPE)
            call_stdout = setup["procs"]["grep"].stdout

        setup["handles"]["outfile"] = open(prefix, "w")
        zip_proc = processes.open_proc(["bgzip"],
                                       stdin=call_stdout,
                                       stdout=setup["handles"]["outfile"])

        setup["procs"]["gzip"] = zip_proc

        return setup
    except:
        sys.stderr.write(traceback.format_exc() + "\n")
        cleanup_batch(setup)
        raise
Ejemplo n.º 10
0
    def run(self, temp, wrap_errors=True):
        """Runs the given command, saving files in the specified temp folder.
        To move files to their final destination, call commit(). Note that in
        contexts where the *Cmds classes are used, this function may block.

        """
        if self._running:
            raise CmdError("Calling 'run' on already running command.")
        self._temp = temp
        self._running = True

        # kwords for pipes are always built relative to the current directory,
        # since these are opened before (possibly) CD'ing to the temp
        # directory.
        stdin = stdout = stderr = None
        try:
            kwords = self._generate_filenames(self._files, root=temp)
            stdin = self._open_pipe(kwords, "IN_STDIN", "rb")
            stdout = self._open_pipe(kwords, "OUT_STDOUT", "wb")
            stderr = self._open_pipe(kwords, "OUT_STDERR", "wb")

            cwd = temp if self._set_cwd else None
            temp = "" if self._set_cwd else os.path.abspath(temp)
            call = self._generate_call(temp)

            # Explicitly set to DEVNULL to ensure that STDIN is not left open.
            if stdin is None:
                stdin = self.DEVNULL

            self._proc = procs.open_proc(
                call,
                stdin=stdin,
                stdout=stdout,
                stderr=stderr,
                cwd=cwd,
                preexec_fn=os.setsid,
            )
        except Exception as error:
            if not wrap_errors:
                raise

            message = "Error running commands:\n" "  Call = %r\n" "  Error = %r"
            raise CmdError(message % (self._command, error))
        finally:
            # Close pipes to allow the command to recieve SIGPIPE
            for handle in (stdin, stdout, stderr):
                if handle not in (None, self.PIPE, self.DEVNULL):
                    handle.close()

        # Allow subprocesses to be killed in case of a SIGTERM
        _add_to_killlist(self._proc)
Ejemplo n.º 11
0
def _run_cleanup_pipeline(args):
    bam_cleanup = _build_wrapper_command(args)
    procs = {}
    try:
        # Update 'procs' and get the last process in the pipeline
        if args.paired_end:
            last_proc = _setup_paired_ended_pipeline(args, procs, bam_cleanup)
        else:
            last_proc = _setup_single_ended_pipeline(procs, bam_cleanup)

        call_sort = ['samtools', 'sort', '-l', '0']
        if args.samtools1x == "yes":
            call_sort.extend(('-O', 'bam', '-T', args.temp_prefix))
        else:
            # Sort, output to stdout (-o)
            call_sort.extend(('-o', '-', args.temp_prefix))

        sort_stdout = None if args.fasta is None else processes.PIPE
        procs["sort"] = processes.open_proc(call_sort,
                                            stdin=last_proc.stdout,
                                            stdout=sort_stdout)
        last_proc.stdout.close()

        # Update NM and MD tags; output BAM (-b) to stdout
        if args.fasta is not None:
            call_calmd = ['samtools', 'calmd', '-b', '-', args.fasta]
            procs["calmd"] = processes.open_proc(call_calmd,
                                                 stdin=procs["sort"].stdout)
            procs["sort"].stdout.close()

        if any(processes.join_procs(procs.values())):
            return 1
        return 0
    except:
        for proc in procs.itervalues():
            proc.terminate()
        raise
Ejemplo n.º 12
0
def _run_cleanup_pipeline(args):
    bam_cleanup = _build_wrapper_command(args)
    procs = {}
    try:
        # Update 'procs' and get the last process in the pipeline
        if args.paired_end:
            last_proc = _setup_paired_ended_pipeline(args, procs, bam_cleanup)
        else:
            last_proc = _setup_single_ended_pipeline(procs, bam_cleanup)

        call_sort = ['samtools', 'sort', '-l', '0']
        if args.samtools1x == "yes":
            call_sort.extend(('-O', 'bam', '-T', args.temp_prefix))
        else:
            # Sort, output to stdout (-o)
            call_sort.extend(('-o', '-', args.temp_prefix))

        sort_stdout = None if args.fasta is None else processes.PIPE
        procs["sort"] = processes.open_proc(call_sort,
                                            stdin=last_proc.stdout,
                                            stdout=sort_stdout)
        last_proc.stdout.close()

        # Update NM and MD tags; output BAM (-b) to stdout
        if args.fasta is not None:
            call_calmd = ['samtools', 'calmd', '-b', '-', args.fasta]
            procs["calmd"] = processes.open_proc(call_calmd,
                                                 stdin=procs["sort"].stdout)
            procs["sort"].stdout.close()

        if any(processes.join_procs(procs.values())):
            return 1
        return 0
    except:
        for proc in procs.itervalues():
            proc.terminate()
        raise
Ejemplo n.º 13
0
    def _create_mpileup_proc(setup):
        mpileup_args = {"-l": setup["files"]["bed"]}
        call = build_call(call=("samtools", "mpileup"),
                          args=mpileup_args,
                          new_args=args.mpileup_argument,
                          positional=("-",))

        sys.stderr.write("Running 'samtools mpileup': %s\n" % (" ".join(call)))
        procs = setup["procs"]
        procs["mpileup"] \
            = processes.open_proc(call,
                                  stdin=procs["filter"].stdout,
                                  stdout=processes.PIPE)

        return procs["mpileup"].stdout
Ejemplo n.º 14
0
def _run(call):
    """Carries out a system call and returns STDOUT and STDERR as a combined
    string. If an OSError is raied (e.g. due to missing executables), the
    resulting message is returned as a string. If the call raised an OSError,
    then the exception is returned as a value.
    """
    try:
        proc = procs.open_proc(call,
                               stdout=procs.PIPE,
                               # Merge STDERR with STDOUT output
                               stderr=procs.STDOUT)

        return proc.communicate()[0]
    except OSError as error:
        return error
Ejemplo n.º 15
0
def setup_basic_batch(args, regions, prefix, func, first_batch=True):
    setup = {"files": {}, "temp_files": {}, "procs": {}, "handles": {}}

    try:
        setup["files"]["bed"] = write_bed_file(prefix, regions)
        setup["temp_files"]["bed"] = setup["files"]["bed"]

        filter_builder = factory.new("genotype")
        filter_builder.set_option("--filter-only")
        filter_builder.set_option("--bedfile", setup["files"]["bed"])
        filter_builder.add_option(args.bamfile)
        filter_builder.add_option(args.destination)

        setup["procs"]["filter"] \
            = processes.open_proc(filter_builder.call,
                                  stdout=processes.PIPE)

        call_stdout = func(setup)
        if not first_batch:
            setup["procs"]["grep"] = processes.open_proc(('grep', '-v', '^#'),
                                                         stdin=call_stdout,
                                                         stdout=processes.PIPE)
            call_stdout = setup["procs"]["grep"].stdout

        setup["handles"]["outfile"] = open(prefix, "w")
        zip_proc = processes.open_proc(["bgzip"],
                                       stdin=call_stdout,
                                       stdout=setup["handles"]["outfile"])

        setup["procs"]["gzip"] = zip_proc

        return setup
    except:
        sys.stderr.write(traceback.format_exc() + "\n")
        cleanup_batch(setup)
        raise
Ejemplo n.º 16
0
    def _create_mpileup_proc(setup):
        mpileup_args = {"-l": setup["files"]["bed"]}
        call = build_call(call=("samtools", "mpileup"),
                          args=mpileup_args,
                          new_args=args.mpileup_argument,
                          positional=("-", ))

        sys.stderr.write("Running 'samtools mpileup': %s\n" % (" ".join(call)))
        procs = setup["procs"]
        procs["mpileup"] \
            = processes.open_proc(call,
                                  stdin=procs["filter"].stdout,
                                  stdout=processes.PIPE)

        return procs["mpileup"].stdout
Ejemplo n.º 17
0
def _run(call):
    """Carries out a system call and returns STDOUT and STDERR as a combined
    string. If an OSError is raied (e.g. due to missing executables), the
    resulting message is returned as a string. If the call raised an OSError,
    then the exception is returned as a value.
    """
    try:
        proc = procs.open_proc(
            call,
            stdout=procs.PIPE,
            # Merge STDERR with STDOUT output
            stderr=procs.STDOUT)

        return proc.communicate()[0]
    except OSError as error:
        return error
Ejemplo n.º 18
0
    def run(self, temp, wrap_errors=True):
        """Runs the given command, saving files in the specified temp folder.
        To move files to their final destination, call commit(). Note that in
        contexts where the *Cmds classes are used, this function may block.

        """
        if self._running:
            raise CmdError("Calling 'run' on already running command.")
        self._temp = temp
        self._running = True

        # kwords for pipes are always built relative to the current directory,
        # since these are opened before (possibly) CD'ing to the temp
        # directory.
        stdin = stdout = stderr = None
        try:
            kwords = self._generate_filenames(self._files, root=temp)
            stdin = self._open_pipe(kwords, "IN_STDIN", "rb")
            stdout = self._open_pipe(kwords, "OUT_STDOUT", "wb")
            stderr = self._open_pipe(kwords, "OUT_STDERR", "wb")

            cwd = temp if self._set_cwd else None
            temp = "" if self._set_cwd else os.path.abspath(temp)
            call = self._generate_call(temp)

            # Explicitly set to DEVNULL to ensure that STDIN is not left open.
            if stdin is None:
                stdin = self.DEVNULL

            self._proc = procs.open_proc(call,
                                         stdin=stdin,
                                         stdout=stdout,
                                         stderr=stderr,
                                         cwd=cwd,
                                         preexec_fn=os.setsid)
        except StandardError, error:
            if not wrap_errors:
                raise

            message = \
                "Error running commands:\n" \
                "  Call = %r\n" \
                "  Error = %r"
            raise CmdError(message % (self._command, error))
Ejemplo n.º 19
0
def _run_cleanup_pipeline(args):
    bam_cleanup = _build_wrapper_command(args)
    commands = []

    try:
        if args.paired_end:
            # Convert input to (uncompressed) BAM and fix mate information for PE reads
            commands.append(["samtools", "fixmate", "-O", "bam", "-", "-"])

        # Cleanup / filter reads. Must be done after 'fixmate', as BWA may produce
        # hits where the mate-unmapped flag is incorrect, which 'fixmate' fixes.
        commands.append(bam_cleanup + ["cleanup"])

        # Sort by coordinates and output uncompressed BAM
        commands.append(
            ["samtools", "sort", "-l", "0", "-O", "bam", "-T", args.temp_prefix]
        )

        # Update NM and MD tags; output BAM (-b) to stdout
        if args.fasta is not None:
            commands.append(["samtools", "calmd", "-b", "-", args.fasta])

        procs = []
        last_out = sys.stdin
        for cmd in commands:
            proc_stdout = None if cmd is commands[-1] else processes.PIPE
            procs.append(processes.open_proc(cmd, stdin=last_out, stdout=proc_stdout))

            last_out.close()
            last_out = procs[-1].stdout

        return int(any(processes.join_procs(procs)))
    except Exception:
        for proc in procs.values():
            proc.terminate()
        raise