예제 #1
0
    def customize(cls, reference, infile, outfile, regions = None, dependencies = ()):
        assert outfile.lower().endswith(".vcf.bgz")

        pileup = AtomicCmdBuilder(["samtools", "mpileup"],
                              IN_REFERENCE = reference,
                              IN_BAMFILE   = infile,
                              IN_REGIONS   = regions,
                              OUT_STDOUT   = AtomicCmd.PIPE,
                              CHECK_SAM    = SAMTOOLS_VERSION)
        pileup.set_option("-u") # Uncompressed output
        pileup.set_option("-f", "%(IN_REFERENCE)s")
        pileup.add_value("%(IN_BAMFILE)s")

        if regions:
            pileup.set_option("-l", "%(IN_REGIONS)s")

        genotype = AtomicCmdBuilder(["bcftools", "view"],
                                IN_STDIN     = pileup,
                                OUT_STDOUT   = AtomicCmd.PIPE)
        genotype.add_value("-")

        bgzip    = AtomicCmdBuilder(["bgzip"],
                                IN_STDIN     = genotype,
                                OUT_STDOUT   = outfile)

        return {"commands" : {"pileup"   : pileup,
                              "genotype" : genotype,
                              "bgzip"    : bgzip}}
예제 #2
0
    def customize(cls, input_file, output_file, algorithm = "auto", dependencies = ()):
        command = AtomicCmdBuilder(_PRESETS[algorithm.lower()])
        command.add_value("%(IN_FASTA)s")
        command.set_kwargs(IN_FASTA   = input_file,
                           OUT_STDOUT = output_file,
                           CHECK_VERSION = MAFFT_VERSION)

        return {"command"      : command,
                "dependencies" : dependencies}
예제 #3
0
    def __init__(self,
                 config,
                 reference,
                 input_bam,
                 output_bam,
                 tags,
                 min_mapq=0,
                 filter_unmapped=False,
                 dependencies=()):
        flt_params = AtomicCmdBuilder(("samtools", "view", "-bu"),
                                      IN_BAM=input_bam,
                                      OUT_STDOUT=AtomicCmd.PIPE)

        if min_mapq:
            flt_params.set_option("-q", min_mapq, sep="")
        if filter_unmapped:
            flt_params.set_option("-F", "0x4", sep="")

        flt_params.add_value("%(IN_BAM)s")

        jar_params = picard.picard_command(config, "AddOrReplaceReadGroups")
        jar_params.set_option("INPUT", "/dev/stdin", sep="=")
        # Output is written to a named pipe, since the JVM may, in some cases,
        # emit warning messages to stdout, resulting in a malformed BAM.
        jar_params.set_option("OUTPUT", "%(TEMP_OUT_BAM)s", sep="=")
        jar_params.set_option("COMPRESSION_LEVEL", "0", sep="=")
        # Ensure that the BAM is sorted; this is required by the pipeline, and
        # needs to be done before calling calmd (avoiding pathologic runtimes).
        jar_params.set_option("SORT_ORDER", "coordinate", sep="=")

        # All tags are overwritten; ID is set since the default (e.g. '1')
        # causes problems with pysam due to type inference (is read as a length
        # 1 string, but written as a character).
        for tag in ("ID", "SM", "LB", "PU", "PL"):
            jar_params.set_option(tag, tags[tag], sep="=")

        jar_params.set_kwargs(IN_STDIN=flt_params, TEMP_OUT_BAM="bam.pipe")

        calmd = AtomicCmdBuilder(
            ["samtools", "calmd", "-b", "%(TEMP_IN_BAM)s", "%(IN_REF)s"],
            IN_REF=reference,
            TEMP_IN_BAM="bam.pipe",
            OUT_STDOUT=output_bam)

        commands = [cmd.finalize() for cmd in (flt_params, jar_params, calmd)]
        description = "<Cleanup BAM: %s -> '%s'>" \
            % (input_bam, output_bam)
        PicardNode.__init__(self,
                            command=ParallelCmds(commands),
                            description=description,
                            dependencies=dependencies)
예제 #4
0
    def customize(cls, reference, in_bam, in_vcf, outfile, dependencies=()):
        unicat = AtomicCmdBuilder(["unicat", "%(IN_VCF)s"],
                                  IN_VCF=in_vcf,
                                  OUT_STDOUT=AtomicCmd.PIPE)

        vcfpileup = AtomicCmdBuilder(["vcf_create_pileup", "%(OUT_PILEUP)s"],
                                     IN_REF=reference,
                                     IN_BAM=in_bam,
                                     IN_STDIN=unicat,
                                     OUT_PILEUP=outfile,
                                     OUT_TBI=outfile + ".tbi")
        vcfpileup.add_value("%(IN_BAM)s")
        vcfpileup.set_option("-f", "%(IN_REF)s")

        return {"commands": {"unicat": unicat, "pileup": vcfpileup}}
예제 #5
0
    def customize(cls, reference, in_bam, in_vcf, outfile, dependencies = ()):
        unicat = AtomicCmdBuilder(["unicat", "%(IN_VCF)s"],
                                  IN_VCF     = in_vcf,
                                  OUT_STDOUT = AtomicCmd.PIPE)

        vcfpileup = AtomicCmdBuilder(["vcf_create_pileup", "%(OUT_PILEUP)s"],
                                     IN_REF       = reference,
                                     IN_BAM       = in_bam,
                                     IN_STDIN     = unicat,
                                     OUT_PILEUP   = outfile,
                                     OUT_TBI      = outfile + ".tbi")
        vcfpileup.add_value("%(IN_BAM)s")
        vcfpileup.set_option("-f", "%(IN_REF)s")

        return {"commands" : {"unicat" : unicat,
                              "pileup" : vcfpileup}}
예제 #6
0
def test_builder__finalize__calls_atomiccmd():
    was_called = []
    class _AtomicCmdMock:
        def __init__(self, *args, **kwargs):
            assert_equal(args, (["echo", "-out", "%(OUT_FILE)s", "%(IN_FILE)s"],))
            assert_equal(kwargs, {"IN_FILE" : "/in/file", "OUT_FILE" : "/out/file", "set_cwd" : True})
            was_called.append(True)

    with Monkeypatch("pypeline.atomiccmd.builder.AtomicCmd", _AtomicCmdMock):
        builder = AtomicCmdBuilder("echo", set_cwd = True)
        builder.add_option("-out", "%(OUT_FILE)s")
        builder.add_value("%(IN_FILE)s")
        builder.set_kwargs(OUT_FILE = "/out/file",
                           IN_FILE  = "/in/file")

        builder.finalize()
        assert was_called
예제 #7
0
    def __init__(self, config, reference, input_bam, output_bam, tags,
                 min_mapq=0, filter_unmapped=False, dependencies=()):
        flt_params = AtomicCmdBuilder(("samtools", "view", "-bu"),
                                      IN_BAM=input_bam,
                                      OUT_STDOUT=AtomicCmd.PIPE)

        if min_mapq:
            flt_params.set_option("-q", min_mapq, sep="")
        if filter_unmapped:
            flt_params.set_option("-F", "0x4", sep="")

        flt_params.add_value("%(IN_BAM)s")

        jar_params = picard.picard_command(config, "AddOrReplaceReadGroups")
        jar_params.set_option("INPUT", "/dev/stdin", sep="=")
        # Output is written to a named pipe, since the JVM may, in some cases,
        # emit warning messages to stdout, resulting in a malformed BAM.
        jar_params.set_option("OUTPUT", "%(TEMP_OUT_BAM)s", sep="=")
        jar_params.set_option("COMPRESSION_LEVEL", "0", sep="=")
        # Ensure that the BAM is sorted; this is required by the pipeline, and
        # needs to be done before calling calmd (avoiding pathologic runtimes).
        jar_params.set_option("SORT_ORDER", "coordinate", sep="=")

        # All tags are overwritten; ID is set since the default (e.g. '1')
        # causes problems with pysam due to type inference (is read as a length
        # 1 string, but written as a character).
        for tag in ("ID", "SM", "LB", "PU", "PL"):
            jar_params.set_option(tag, tags[tag], sep="=")

        jar_params.set_kwargs(IN_STDIN=flt_params,
                              TEMP_OUT_BAM="bam.pipe")

        calmd = AtomicCmdBuilder(["samtools", "calmd", "-b",
                                 "%(TEMP_IN_BAM)s", "%(IN_REF)s"],
                                 IN_REF=reference,
                                 TEMP_IN_BAM="bam.pipe",
                                 OUT_STDOUT=output_bam)

        commands = [cmd.finalize() for cmd in (flt_params, jar_params, calmd)]
        description = "<Cleanup BAM: %s -> '%s'>" \
            % (input_bam, output_bam)
        PicardNode.__init__(self,
                            command=ParallelCmds(commands),
                            description=description,
                            dependencies=dependencies)
예제 #8
0
def test_builder__finalize__calls_atomiccmd():
    was_called = []

    class _AtomicCmdMock:
        def __init__(self, *args, **kwargs):
            assert_equal(args,
                         (["echo", "-out", "%(OUT_FILE)s", "%(IN_FILE)s"], ))
            assert_equal(kwargs, {
                "IN_FILE": "/in/file",
                "OUT_FILE": "/out/file",
                "set_cwd": True
            })
            was_called.append(True)

    with Monkeypatch("pypeline.atomiccmd.builder.AtomicCmd", _AtomicCmdMock):
        builder = AtomicCmdBuilder("echo", set_cwd=True)
        builder.add_option("-out", "%(OUT_FILE)s")
        builder.add_value("%(IN_FILE)s")
        builder.set_kwargs(OUT_FILE="/out/file", IN_FILE="/in/file")

        builder.finalize()
        assert was_called
예제 #9
0
    def __init__(self, d_bam, dependencies):
        aux_r = os.path.join(os.path.dirname(gccorrect.__file__), 'model_gc.R')
        dest = os.path.join(d_bam.bam_output,
                            '%s_GC_Model.txt' % (str(d_bam.bam_name)))
        plot_dest = os.path.splitext(dest)[0] + '.pdf'
        infiles = [''.join(n.output_files) for n in dependencies]
        builder = AtomicCmdBuilder(("Rscript", "%(AUX_R)s"))
        builder.add_value('%(OUT_FILEPATH)s')
        builder.add_value('%(OUT_PLOT)s')
        builder.add_multiple_values(infiles)
        builder.set_kwargs(AUX_R=aux_r,
                           OUT_FILEPATH=dest,
                           OUT_PLOT=plot_dest,
                           CHECK_VERSION=Rscript_VERSION)
        cmd = builder.finalize()

        d_bam.opts['BamInfo']['--GCmodel'] = dest
        description = "<CreateGCModel: '%s' -> '%s'" % (d_bam.bam_temp_local,
                                                        dest)
        CommandNode.__init__(self,
                             description=description,
                             command=cmd,
                             dependencies=dependencies)
예제 #10
0
파일: bwa.py 프로젝트: schae234/pypeline
def _process_output(stdin, output_file, reference, run_fixmate = False):
    convert = AtomicCmdBuilder("safeSAM2BAM")
    convert.set_option("--flag-as-sorted")
    convert.set_option("-F", "0x4", sep = "", fixed = False) # Remove misses
    convert.set_kwargs(IN_STDIN    = stdin,
                      OUT_STDOUT  = AtomicCmd.PIPE,
                      CHECK_PYSAM = PYSAM_VERSION,
                      CHECK_SAMTOOLS = SAMTOOLS_VERSION)

    fixmate = None
    if run_fixmate:
        fixmate = AtomicCmdBuilder(("samtools", "fixmate", "-", "-"),
                               IN_STDIN   = convert,
                               OUT_STDOUT = AtomicCmd.PIPE,
                               CHECK_SAMTOOLS = SAMTOOLS_VERSION)

    sort = AtomicCmdBuilder(("samtools", "sort"))
    sort.set_option("-o") # Output to STDOUT on completion
    sort.add_value("-")
    sort.add_value("%(TEMP_OUT_BAM)s")
    sort.set_kwargs(IN_STDIN     = fixmate or convert,
                   OUT_STDOUT   = AtomicCmd.PIPE,
                   TEMP_OUT_BAM = "sorted",
                   CHECK_SAM = SAMTOOLS_VERSION)

    calmd = AtomicCmdBuilder(("samtools", "calmd"))
    calmd.add_value("-")
    calmd.add_value("%(IN_REF)s")
    calmd.set_option("-b") # Output BAM
    calmd.set_kwargs(IN_REF   = reference,
                    IN_STDIN = sort,
                    OUT_STDOUT = output_file,
                    CHECK_SAM = SAMTOOLS_VERSION)

    order = ["convert", "sort", "calmd"]
    commands = {"convert" : convert,
                "sort"    : sort,
                "calmd"   : calmd}

    if run_fixmate:
        order.insert(1, "fixmate")
        commands["fixmate"] = fixmate

    return order, commands
예제 #11
0
파일: bwa.py 프로젝트: schae234/pypeline
def _process_output(stdin, output_file, reference, run_fixmate=False):
    convert = AtomicCmdBuilder("safeSAM2BAM")
    convert.set_option("--flag-as-sorted")
    convert.set_option("-F", "0x4", sep="", fixed=False)  # Remove misses
    convert.set_kwargs(IN_STDIN=stdin,
                       OUT_STDOUT=AtomicCmd.PIPE,
                       CHECK_PYSAM=PYSAM_VERSION,
                       CHECK_SAMTOOLS=SAMTOOLS_VERSION)

    fixmate = None
    if run_fixmate:
        fixmate = AtomicCmdBuilder(("samtools", "fixmate", "-", "-"),
                                   IN_STDIN=convert,
                                   OUT_STDOUT=AtomicCmd.PIPE,
                                   CHECK_SAMTOOLS=SAMTOOLS_VERSION)

    sort = AtomicCmdBuilder(("samtools", "sort"))
    sort.set_option("-o")  # Output to STDOUT on completion
    sort.add_value("-")
    sort.add_value("%(TEMP_OUT_BAM)s")
    sort.set_kwargs(IN_STDIN=fixmate or convert,
                    OUT_STDOUT=AtomicCmd.PIPE,
                    TEMP_OUT_BAM="sorted",
                    CHECK_SAM=SAMTOOLS_VERSION)

    calmd = AtomicCmdBuilder(("samtools", "calmd"))
    calmd.add_value("-")
    calmd.add_value("%(IN_REF)s")
    calmd.set_option("-b")  # Output BAM
    calmd.set_kwargs(IN_REF=reference,
                     IN_STDIN=sort,
                     OUT_STDOUT=output_file,
                     CHECK_SAM=SAMTOOLS_VERSION)

    order = ["convert", "sort", "calmd"]
    commands = {"convert": convert, "sort": sort, "calmd": calmd}

    if run_fixmate:
        order.insert(1, "fixmate")
        commands["fixmate"] = fixmate

    return order, commands
예제 #12
0
def test_builder__add_value__two_values():
    builder = AtomicCmdBuilder("ls")
    builder.add_value("%(IN_FILE)s")
    builder.add_value("%(OUT_FILE)s")
    assert_equal(builder.call, ["ls", "%(IN_FILE)s", "%(OUT_FILE)s"])
예제 #13
0
def test_builder__add_value__two_values():
    builder = AtomicCmdBuilder("ls")
    builder.add_value("%(IN_FILE)s")
    builder.add_value("%(OUT_FILE)s")
    assert_equal(builder.call, ["ls", "%(IN_FILE)s", "%(OUT_FILE)s"])