Esempio n. 1
0
def concatenate_input_bams(config, input_bams, out=AtomicCmd.PIPE):
    """Transparent concatenation of input BAMs.

    Return a tuple containing a list of nodes (0 or 1), and an
    object which may be passed to the IN_STDIN of an AtomicCmd
    (either an AtomicCmd, or a filename). This allows transparent
    concatenation when multiple files are specified, while
    avoiding needless overhead when there is only 1 input file."""

    input_bams = safe_coerce_to_tuple(input_bams)
    if len(input_bams) == 1:
        return [], input_bams[0]

    jar_file = os.path.join(config.jar_root, "MergeSamFiles.jar")
    params = AtomicJavaCmdBuilder(config, jar_file)
    params.set_kwargs(CHECK_JAR=_picard_version(jar_file))

    if out == AtomicCmd.PIPE:
        params.set_kwargs(OUT_STDOUT=out)
        params.set_option("OUTPUT", "/dev/stdout", sep="=")
    else:
        params.set_option("OUTPUT", out, sep="=")

    params.set_option("CREATE_INDEX", "False", sep="=")
    params.set_option("COMPRESSION_LEVEL", 0, sep="=")

    for (index, filename) in enumerate(safe_coerce_to_tuple(input_bams),
                                       start=1):
        params.add_option("I", "%%(IN_BAM_%02i)s" % index, sep="=")
        params.set_kwargs(**{"IN_BAM_%02i" % index: filename})

    params.set_option("SO", "coordinate", sep="=", fixed=False)

    cmd = params.finalize()
    return [cmd], cmd
Esempio n. 2
0
    def customize(cls, reference, infiles, outfile, options, dependencies = ()):
        infiles = safe_coerce_to_tuple(infiles)
        jar_file = os.path.join(options.jar_root,"GenomeAnalysisTK.jar")
        UnifiedGenotyper = AtomicJavaCmdBuilder(options,jar_file)
        UnifiedGenotyper.set_option("-R", "%(IN_REFERENCE)s")
        UnifiedGenotyper.set_option("-T", "UnifiedGenotyper")
        for bam in infiles:
            assert os.path.exists(bam), "Couldn't find input BAM: {}".format(bam)
            UnifiedGenotyper.add_option("-I", bam)
        UnifiedGenotyper.set_option("-o", "%(OUT_VCFFILES)s")
        UnifiedGenotyper.set_option("-stand_call_conf", "30.0")
        UnifiedGenotyper.set_option("-stand_emit_conf", "10.0")
        UnifiedGenotyper.set_option("-dcov", "200")
        #UnifiedGenotyper.set_option("-nct", "3")
        UnifiedGenotyper.set_option("-L", "chrUn2:1-19213991")
    

        UnifiedGenotyper.set_kwargs(
            IN_REFERENCE = reference,
            OUT_VCFFILES = outfile,
            OUT_VCF_IDX  = outfile + ".idx"
        )

        return {
            "commands" : {
                "unifiedgenotyper" : UnifiedGenotyper
            }
        }
Esempio n. 3
0
    def customize(cls, config, input_bam, output_log = None, dependencies = ()):
        jar_file = os.path.join(config.jar_root, "ValidateSamFile.jar")
        params = AtomicJavaCmdBuilder(config, jar_file)

        params.set_option("I", "%(IN_BAM)s", sep = "=")
        params.set_kwargs(IN_BAM     = input_bam,
                         OUT_STDOUT = output_log or swap_ext(input_bam, ".validated"),
                         CHECK_JAR  = _picard_version(jar_file))

        return {"command"      : params,
                "dependencies" : dependencies}
Esempio n. 4
0
    def customize(cls, config, input_bam, output_log=None, dependencies=()):
        jar_file = os.path.join(config.jar_root, "ValidateSamFile.jar")
        params = AtomicJavaCmdBuilder(config, jar_file)

        params.set_option("I", "%(IN_BAM)s", sep="=")
        params.set_kwargs(IN_BAM=input_bam,
                          OUT_STDOUT=output_log
                          or swap_ext(input_bam, ".validated"),
                          CHECK_JAR=_picard_version(jar_file))

        return {"command": params, "dependencies": dependencies}
Esempio n. 5
0
    def customize(cls, config, reference, dependencies=()):
        jar_file = os.path.join(config.jar_root,
                                "CreateSequenceDictionary.jar")
        params = AtomicJavaCmdBuilder(config, jar_file)

        params.set_option("R", "%(IN_REF)s", sep="=")
        params.set_option("O", "%(OUT_DICT)s", sep="=")
        params.set_kwargs(IN_REF=reference,
                          OUT_DICT=swap_ext(reference, ".dict"),
                          CHECK_JAR=_picard_version(jar_file))

        return {"command": params, "dependencies": dependencies}
Esempio n. 6
0
    def customize(cls, config, reference, dependencies = ()):
        jar_file = os.path.join(config.jar_root, "CreateSequenceDictionary.jar")
        params = AtomicJavaCmdBuilder(config, jar_file)

        params.set_option("R", "%(IN_REF)s", sep = "=")
        params.set_option("O", "%(OUT_DICT)s", sep = "=")
        params.set_kwargs(IN_REF     = reference,
                          OUT_DICT   = swap_ext(reference, ".dict"),
                          CHECK_JAR  = _picard_version(jar_file))

        return {"command"      : params,
                "dependencies" : dependencies}
Esempio n. 7
0
    def customize(cls, config, input_bams, output_bam, output_metrics = None, dependencies = ()):
        jar_file = os.path.join(config.jar_root, "MarkDuplicates.jar")
        params = AtomicJavaCmdBuilder(config, jar_file)

        # Create .bai index, since it is required by a lot of other programs
        params.set_option("CREATE_INDEX", "True", sep = "=")

        params.set_option("OUTPUT", "%(OUT_BAM)s", sep = "=")
        params.set_option("METRICS_FILE", "%(OUT_METRICS)s", sep = "=")

        input_bams = safe_coerce_to_tuple(input_bams)
        for (index, filename) in enumerate(input_bams):
            params.add_option("I", "%%(IN_BAM_%02i)s" % index, sep = "=")
            params.set_kwargs(**{("IN_BAM_%02i" % index) : filename})

        # Remove duplicates from output by default to save disk-space
        params.set_option("REMOVE_DUPLICATES", "True", sep = "=", fixed = False)

        params.set_kwargs(OUT_BAM     = output_bam,
                         OUT_BAI     = swap_ext(output_bam, ".bai"),
                         OUT_METRICS = output_metrics or swap_ext(output_bam, ".metrics"),
                         CHECK_JAR  = _picard_version(jar_file))

        return {"command"      : params,
                "dependencies" : dependencies}
Esempio n. 8
0
    def __init__(self, config, input_bams, pipename="input.bam"):
        self.pipe = pipename
        self.files = safe_coerce_to_tuple(input_bams)

        self.commands = []
        self.kwargs = {"TEMP_IN_BAM": self.pipe}
        if len(self.files) > 1:
            jar_file = os.path.join(config.jar_root, "MergeSamFiles.jar")
            params = AtomicJavaCmdBuilder(jar=jar_file,
                                          temp_root=config.temp_root,
                                          jre_options=config.jre_options)

            params.set_option("SO", "coordinate", sep="=", fixed=False)
            params.set_option("CREATE_INDEX", "False", sep="=")
            params.set_option("COMPRESSION_LEVEL", 0, sep="=")
            params.set_option("OUTPUT", "%(TEMP_OUT_BAM)s", sep="=")
            params.add_multiple_options("I", input_bams, sep="=")

            params.set_kwargs(CHECK_JAR=_picard_version(config, jar_file),
                              TEMP_OUT_BAM=self.pipe)

            self.commands = [params.finalize()]
        else:
            # Ensure that the actual command depends on the input
            self.kwargs["IN_FILE_00"] = self.files[0]
            self.kwargs["IN_FILE_01"] = swap_ext(self.files[0], ".bai")
Esempio n. 9
0
def concatenate_input_bams(config, input_bams, out = AtomicCmd.PIPE):
    """Transparent concatenation of input BAMs.

    Return a tuple containing a list of nodes (0 or 1), and an
    object which may be passed to the IN_STDIN of an AtomicCmd
    (either an AtomicCmd, or a filename). This allows transparent
    concatenation when multiple files are specified, while
    avoiding needless overhead when there is only 1 input file."""

    input_bams = safe_coerce_to_tuple(input_bams)
    if len(input_bams) == 1:
        return [], input_bams[0]

    jar_file = os.path.join(config.jar_root, "MergeSamFiles.jar")
    params = AtomicJavaCmdBuilder(config, jar_file)
    params.set_kwargs(CHECK_JAR  = _picard_version(jar_file))

    if out == AtomicCmd.PIPE:
        params.set_kwargs(OUT_STDOUT = out)
        params.set_option("OUTPUT", "/dev/stdout", sep = "=")
    else:
        params.set_option("OUTPUT", out, sep = "=")

    params.set_option("CREATE_INDEX", "False", sep = "=")
    params.set_option("COMPRESSION_LEVEL",  0, sep = "=")

    for (index, filename) in enumerate(safe_coerce_to_tuple(input_bams), start = 1):
        params.add_option("I", "%%(IN_BAM_%02i)s" % index, sep = "=")
        params.set_kwargs(**{"IN_BAM_%02i" % index : filename})

    params.set_option("SO", "coordinate", sep = "=", fixed = False)

    cmd = params.finalize()
    return [cmd], cmd
Esempio n. 10
0
    def customize(cls, config, input_bams, output_bam, output_metrics=None,
                  keep_dupes=False, dependencies=()):
        jar_file = os.path.join(config.jar_root, "MarkDuplicates.jar")
        params = AtomicJavaCmdBuilder(jar_file, jre_options=config.jre_options)

        # Create .bai index, since it is required by a lot of other programs
        params.set_option("CREATE_INDEX", "True", sep="=")

        params.set_option("OUTPUT", "%(OUT_BAM)s", sep="=")
        params.set_option("METRICS_FILE", "%(OUT_METRICS)s", sep="=")
        params.add_multiple_options("I", input_bams, sep="=")

        if not keep_dupes:
            # Remove duplicates from output by default to save disk-space
            params.set_option("REMOVE_DUPLICATES", "True",
                              sep="=", fixed=False)

        output_metrics = output_metrics or swap_ext(output_bam, ".metrics")
        params.set_kwargs(OUT_BAM=output_bam,
                          OUT_BAI=swap_ext(output_bam, ".bai"),
                          OUT_METRICS=output_metrics,
                          CHECK_JAR=_picard_version(config, jar_file))

        return {"command": params,
                "dependencies": dependencies}
Esempio n. 11
0
    def customize(cls,
                  config,
                  input_bams,
                  output_bam,
                  output_metrics=None,
                  dependencies=()):
        jar_file = os.path.join(config.jar_root, "MarkDuplicates.jar")
        params = AtomicJavaCmdBuilder(config, jar_file)

        # Create .bai index, since it is required by a lot of other programs
        params.set_option("CREATE_INDEX", "True", sep="=")

        params.set_option("OUTPUT", "%(OUT_BAM)s", sep="=")
        params.set_option("METRICS_FILE", "%(OUT_METRICS)s", sep="=")

        input_bams = safe_coerce_to_tuple(input_bams)
        for (index, filename) in enumerate(input_bams):
            params.add_option("I", "%%(IN_BAM_%02i)s" % index, sep="=")
            params.set_kwargs(**{("IN_BAM_%02i" % index): filename})

        # Remove duplicates from output by default to save disk-space
        params.set_option("REMOVE_DUPLICATES", "True", sep="=", fixed=False)

        params.set_kwargs(OUT_BAM=output_bam,
                          OUT_BAI=swap_ext(output_bam, ".bai"),
                          OUT_METRICS=output_metrics
                          or swap_ext(output_bam, ".metrics"),
                          CHECK_JAR=_picard_version(jar_file))

        return {"command": params, "dependencies": dependencies}
Esempio n. 12
0
    def __init__(self, config, input_bams, pipename="input.bam"):
        self.pipe = pipename
        self.files = safe_coerce_to_tuple(input_bams)

        self.commands = []
        self.kwargs = {"TEMP_IN_BAM": self.pipe}
        if len(self.files) > 1:
            jar_file = os.path.join(config.jar_root, "MergeSamFiles.jar")
            params = AtomicJavaCmdBuilder(jar=jar_file,
                                          temp_root=config.temp_root,
                                          jre_options=config.jre_options)

            params.set_option("SO", "coordinate", sep="=", fixed=False)
            params.set_option("CREATE_INDEX", "False", sep="=")
            params.set_option("COMPRESSION_LEVEL", 0, sep="=")
            params.set_option("OUTPUT", "%(TEMP_OUT_BAM)s", sep="=")
            params.add_multiple_options("I", input_bams, sep="=")

            params.set_kwargs(CHECK_JAR=_picard_version(config, jar_file),
                              TEMP_OUT_BAM=self.pipe)

            self.commands = [params.finalize()]
        else:
            # Ensure that the actual command depends on the input
            self.kwargs["IN_FILE_00"] = self.files[0]
            self.kwargs["IN_FILE_01"] = swap_ext(self.files[0], ".bai")
Esempio n. 13
0
    def customize(cls,
                  config,
                  input_bams,
                  output_bam,
                  output_metrics=None,
                  keep_dupes=False,
                  dependencies=()):
        jar_file = os.path.join(config.jar_root, "MarkDuplicates.jar")
        params = AtomicJavaCmdBuilder(jar_file, jre_options=config.jre_options)

        # Create .bai index, since it is required by a lot of other programs
        params.set_option("CREATE_INDEX", "True", sep="=")

        params.set_option("OUTPUT", "%(OUT_BAM)s", sep="=")
        params.set_option("METRICS_FILE", "%(OUT_METRICS)s", sep="=")
        params.add_multiple_options("I", input_bams, sep="=")

        if not keep_dupes:
            # Remove duplicates from output by default to save disk-space
            params.set_option("REMOVE_DUPLICATES",
                              "True",
                              sep="=",
                              fixed=False)

        output_metrics = output_metrics or swap_ext(output_bam, ".metrics")
        params.set_kwargs(OUT_BAM=output_bam,
                          OUT_BAI=swap_ext(output_bam, ".bai"),
                          OUT_METRICS=output_metrics,
                          CHECK_JAR=_picard_version(config, jar_file))

        return {"command": params, "dependencies": dependencies}
Esempio n. 14
0
    def __init__(self,
                 config,
                 reference,
                 input_bam,
                 output_bam,
                 tags,
                 min_mapq=0,
                 filter_unmapped=False,
                 dependencies=()):
        call = ["samtools", "view", "-bu"]
        if min_mapq > 0:
            call.append("-q%i" % min_mapq)
        if filter_unmapped:
            call.append("-F0x4")
        call.append("%(IN_BAM)s")

        flt = AtomicCmd(call, IN_BAM=input_bam, OUT_STDOUT=AtomicCmd.PIPE)

        jar_file = os.path.join(config.jar_root, "AddOrReplaceReadGroups.jar")
        params = AtomicJavaCmdBuilder(jar=jar_file,
                                      jre_options=config.jre_options)
        params.set_option("INPUT", "/dev/stdin", sep="=")
        params.set_option("OUTPUT", "%(TEMP_OUT_BAM)s", sep="=")
        params.set_option("COMPRESSION_LEVEL", "0", sep="=")
        params.set_option("SORT_ORDER", "coordinate", sep="=")

        for tag in ("SM", "LB", "PU", "PL"):
            params.set_option(tag, tags[tag], sep="=")

        params.set_kwargs(IN_STDIN=flt, TEMP_OUT_BAM="bam.pipe")
        annotate = params.finalize()

        calmd = AtomicCmd(
            ["samtools", "calmd", "-b", "%(TEMP_IN_BAM)s", "%(IN_REF)s"],
            IN_REF=reference,
            TEMP_IN_BAM="bam.pipe",
            OUT_STDOUT=output_bam)

        description = "<Cleanup BAM: %s -> '%s'>" \
            % (input_bam, output_bam)
        PicardNode.__init__(self,
                            command=ParallelCmds([flt, annotate, calmd]),
                            description=description,
                            dependencies=dependencies)
Esempio n. 15
0
def picard_command(config, command):
    """Returns basic AtomicJavaCmdBuilder for Picard tools commands."""
    jar_path = os.path.join(config.jar_root, _PICARD_JAR)

    if jar_path not in _PICARD_VERSION_CACHE:
        params = AtomicJavaCmdBuilder(jar_path,
                                      temp_root=config.temp_root,
                                      jre_options=config.jre_options)

        # Arbitrary command, since just '--version' does not work
        params.set_option("MarkDuplicates")
        params.set_option("--version")

        requirement = versions.Requirement(call=params.finalized_call,
                                           name="Picard tools",
                                           search=r"^(\d+)\.(\d+)",
                                           checks=versions.GE(1, 124))
        _PICARD_VERSION_CACHE[jar_path] = requirement

    version = _PICARD_VERSION_CACHE[jar_path]
    params = AtomicJavaCmdBuilder(jar_path,
                                  temp_root=config.temp_root,
                                  jre_options=config.jre_options,
                                  CHECK_JAR=version)
    params.set_option(command)

    return params
Esempio n. 16
0
    def __init__(self, config, reference, input_bam, output_bam, tags,
                 min_mapq=0, filter_unmapped=False, dependencies=()):
        call = ["samtools", "view", "-bu"]
        if min_mapq > 0:
            call.append("-q%i" % min_mapq)
        if filter_unmapped:
            call.append("-F0x4")
        call.append("%(IN_BAM)s")

        flt = AtomicCmd(call,
                        IN_BAM=input_bam,
                        OUT_STDOUT=AtomicCmd.PIPE)

        jar_file = os.path.join(config.jar_root, "AddOrReplaceReadGroups.jar")
        params = AtomicJavaCmdBuilder(jar=jar_file,
                                      jre_options=config.jre_options)
        params.set_option("INPUT", "/dev/stdin", sep="=")
        params.set_option("OUTPUT", "%(TEMP_OUT_BAM)s", sep="=")
        params.set_option("COMPRESSION_LEVEL", "0", sep="=")
        params.set_option("SORT_ORDER", "coordinate", sep="=")

        for tag in ("SM", "LB", "PU", "PL"):
            params.set_option(tag, tags[tag], sep="=")

        params.set_kwargs(IN_STDIN=flt,
                          TEMP_OUT_BAM="bam.pipe")
        annotate = params.finalize()

        calmd = AtomicCmd(["samtools", "calmd", "-b",
                           "%(TEMP_IN_BAM)s", "%(IN_REF)s"],
                          IN_REF=reference,
                          TEMP_IN_BAM="bam.pipe",
                          OUT_STDOUT=output_bam)

        description = "<Cleanup BAM: %s -> '%s'>" \
            % (input_bam, output_bam)
        PicardNode.__init__(self,
                            command=ParallelCmds([flt, annotate, calmd]),
                            description=description,
                            dependencies=dependencies)
Esempio n. 17
0
def picard_command(config, command):
    """Returns basic AtomicJavaCmdBuilder for Picard tools commands."""
    jar_path = os.path.join(config.jar_root, _PICARD_JAR)

    if jar_path not in _PICARD_VERSION_CACHE:
        params = AtomicJavaCmdBuilder(jar_path,
                                      temp_root=config.temp_root,
                                      jre_options=config.jre_options)

        # Arbitrary command, since just '--version' does not work
        params.set_option("MarkDuplicates")
        params.set_option("--version")

        requirement = versions.Requirement(call=params.finalized_call,
                                           name="Picard tools",
                                           search=r"^(\d+)\.(\d+)",
                                           checks=versions.GE(1, 124))
        _PICARD_VERSION_CACHE[jar_path] = requirement

    version = _PICARD_VERSION_CACHE[jar_path]
    params = AtomicJavaCmdBuilder(jar_path,
                                  temp_root=config.temp_root,
                                  jre_options=config.jre_options,
                                  CHECK_JAR=version)
    params.set_option(command)

    return params
Esempio n. 18
0
    def customize(cls, config, input_bams, output_bam, dependencies=()):
        jar_file = os.path.join(config.jar_root, "MergeSamFiles.jar")
        params = AtomicJavaCmdBuilder(jar_file, jre_options=config.jre_options)

        params.set_option("OUTPUT", "%(OUT_BAM)s", sep="=")
        params.set_option("CREATE_INDEX", "True", sep="=")
        params.set_option("SO", "coordinate", sep="=", fixed=False)
        params.add_multiple_options("I", input_bams, sep="=")

        params.set_kwargs(OUT_BAM=output_bam,
                          OUT_BAI=swap_ext(output_bam, ".bai"),
                          CHECK_JAR=_picard_version(config, jar_file))

        return {"command": params, "dependencies": dependencies}
Esempio n. 19
0
    def customize(cls, config, input_bams, output_bam, dependencies=()):
        jar_file = os.path.join(config.jar_root, "MergeSamFiles.jar")
        params = AtomicJavaCmdBuilder(jar_file, jre_options=config.jre_options)

        params.set_option("OUTPUT", "%(OUT_BAM)s", sep="=")
        params.set_option("CREATE_INDEX", "True", sep="=")
        params.set_option("SO", "coordinate", sep="=", fixed=False)
        params.add_multiple_options("I", input_bams, sep="=")

        params.set_kwargs(OUT_BAM=output_bam,
                          OUT_BAI=swap_ext(output_bam, ".bai"),
                          CHECK_JAR=_picard_version(config, jar_file))

        return {"command": params,
                "dependencies": dependencies}
Esempio n. 20
0
    def customize(cls, config, input_bams, output_bam, dependencies=()):
        jar_file = os.path.join(config.jar_root, "MergeSamFiles.jar")
        params = AtomicJavaCmdBuilder(config, jar_file)

        params.set_option("OUTPUT", "%(OUT_BAM)s", sep="=")
        params.set_option("CREATE_INDEX", "True", sep="=")
        params.set_kwargs(OUT_BAM=output_bam,
                          OUT_BAI=swap_ext(output_bam, ".bai"),
                          CHECK_JAR=_picard_version(jar_file))

        for (index, filename) in enumerate(input_bams, start=1):
            params.add_option("I", "%%(IN_BAM_%02i)s" % index, sep="=")
            params.set_kwargs(**{("IN_BAM_%02i" % index): filename})

        params.set_option("SO", "coordinate", sep="=", fixed=False)

        return {"command": params, "dependencies": dependencies}
Esempio n. 21
0
    def __init__(self, config, reference, infiles, outfile, dependencies=()):
        infiles = safe_coerce_to_tuple(infiles)
        jar_file = os.path.join(config.jar_root, "GenomeAnalysisTK.jar")
        command = AtomicJavaCmdBuilder(jar_file, jre_options=config.jre_options)
        command.set_option("-T", "RealignerTargetCreator")
        command.set_option("-R", "%(IN_REFERENCE)s")
        command.set_option("-o", "%(OUT_INTERVALS)s")

        _set_input_files(command, infiles)
        command.set_kwargs(
            IN_REFERENCE=reference,
            IN_REF_DICT=fileutils.swap_ext(reference, ".dict"),
            OUT_INTERVALS=outfile,
            CHECK_GATK=_get_gatk_version_check(config),
        )

        description = "<Indel Realigner (training): %s -> %r>" % (describe_files(infiles), outfile)
        CommandNode.__init__(self, description=description, command=command.finalize(), dependencies=dependencies)
Esempio n. 22
0
    def customize(cls, config, input_bams, output_bam, dependencies = ()):
        jar_file = os.path.join(config.jar_root, "MergeSamFiles.jar")
        params = AtomicJavaCmdBuilder(config, jar_file)

        params.set_option("OUTPUT", "%(OUT_BAM)s", sep = "=")
        params.set_option("CREATE_INDEX", "True", sep = "=")
        params.set_kwargs(OUT_BAM = output_bam,
                         OUT_BAI = swap_ext(output_bam, ".bai"),
                         CHECK_JAR  = _picard_version(jar_file))

        for (index, filename) in enumerate(input_bams, start = 1):
            params.add_option("I", "%%(IN_BAM_%02i)s" % index, sep = "=")
            params.set_kwargs(**{("IN_BAM_%02i" % index) : filename})

        params.set_option("SO", "coordinate", sep = "=", fixed = False)

        return {"command"      : params,
                "dependencies" : dependencies}
Esempio n. 23
0
    def __init__(self, config, reference, infiles, outfile, dependencies=()):
        infiles = safe_coerce_to_tuple(infiles)
        jar_file = os.path.join(config.jar_root, "GenomeAnalysisTK.jar")
        command = AtomicJavaCmdBuilder(config, jar_file)
        command.set_option("-T", "RealignerTargetCreator")
        command.set_option("-R", "%(IN_REFERENCE)s")
        command.set_option("-o", "%(OUT_INTERVALS)s")

        _set_input_files(command, infiles)
        command.set_kwargs(IN_REFERENCE=reference,
                           IN_REF_DICT=fileutils.swap_ext(reference, ".dict"),
                           OUT_INTERVALS=outfile)

        description = "<Train Indel Realigner: %i file(s) -> '%s'>" \
            % (len(infiles), outfile)
        CommandNode.__init__(self,
                             description=description,
                             command=command.finalize(),
                             dependencies=dependencies)
Esempio n. 24
0
    def __init__(self, config, reference, infiles, outfile, dependencies = ()):
        infiles  = safe_coerce_to_tuple(infiles)
        jar_file = os.path.join(config.jar_root, "GenomeAnalysisTK.jar")
        command  = AtomicJavaCmdBuilder(config, jar_file)
        command.set_option("-T", "RealignerTargetCreator")
        command.set_option("-R", "%(IN_REFERENCE)s")
        command.set_option("-o", "%(OUT_INTERVALS)s")

        _set_input_files(command, infiles)
        command.set_kwargs(IN_REFERENCE  = reference,
                           IN_REF_DICT   = fileutils.swap_ext(reference, ".dict"),
                           OUT_INTERVALS = outfile)

        description = "<Train Indel Realigner: %i file(s) -> '%s'>" \
            % (len(infiles), outfile)
        CommandNode.__init__(self,
                             description  = description,
                             command      = command.finalize(),
                             dependencies = dependencies)
Esempio n. 25
0
    def __init__(self, config, reference, infiles, outfile, dependencies=()):
        infiles = safe_coerce_to_tuple(infiles)
        jar_file = os.path.join(config.jar_root, "GenomeAnalysisTK.jar")
        command = AtomicJavaCmdBuilder(jar_file,
                                       jre_options=config.jre_options)
        command.set_option("-T", "RealignerTargetCreator")
        command.set_option("-R", "%(IN_REFERENCE)s")
        command.set_option("-o", "%(OUT_INTERVALS)s")

        _set_input_files(command, infiles)
        command.set_kwargs(IN_REFERENCE=reference,
                           IN_REF_DICT=fileutils.swap_ext(reference, ".dict"),
                           OUT_INTERVALS=outfile,
                           CHECK_GATK=_get_gatk_version_check(config))

        description = "<Indel Realigner (training): %s -> %r>" \
            % (describe_files(infiles), outfile)
        CommandNode.__init__(self,
                             description=description,
                             command=command.finalize(),
                             dependencies=dependencies)
Esempio n. 26
0
    def __init__(self,
                 config,
                 reference,
                 input_bam,
                 output_bam,
                 tags,
                 min_mapq=0,
                 dependencies=()):
        flt = AtomicCmd([
            "samtools", "view", "-bu", "-F0x4",
            "-q%i" % min_mapq, "%(IN_BAM)s"
        ],
                        IN_BAM=input_bam,
                        OUT_STDOUT=AtomicCmd.PIPE)

        jar_file = os.path.join(config.jar_root, "AddOrReplaceReadGroups.jar")
        params = AtomicJavaCmdBuilder(config, jar_file)
        params.set_option("INPUT", "/dev/stdin", sep="=")
        params.set_option("OUTPUT", "/dev/stdout", sep="=")
        params.set_option("QUIET", "true", sep="=")
        params.set_option("COMPRESSION_LEVEL", "0", sep="=")

        for (tag, value) in sorted(tags.iteritems()):
            if tag not in ("PG", "Target", "PU_src", "PU_cur"):
                params.set_option(tag, value, sep="=")
            elif tag == "PU_src":
                params.set_option("PU", value, sep="=")

        params.set_kwargs(IN_STDIN=flt, OUT_STDOUT=AtomicCmd.PIPE)
        annotate = params.finalize()

        calmd = AtomicCmd(["samtools", "calmd", "-b", "-", "%(IN_REF)s"],
                          IN_REF=reference,
                          IN_STDIN=annotate,
                          OUT_STDOUT=output_bam)

        description =  "<Cleanup BAM: %s -> '%s'>" \
            % (input_bam, output_bam)
        CommandNode.__init__(self,
                             command=ParallelCmds([flt, annotate, calmd]),
                             description=description,
                             dependencies=dependencies)
Esempio n. 27
0
    def __init__(self, config, reference, intervals, infiles, outfile,
                 dependencies=()):
        self._basename = os.path.basename(outfile)

        infiles = safe_coerce_to_tuple(infiles)
        jar_file = os.path.join(config.jar_root, "GenomeAnalysisTK.jar")
        command = AtomicJavaCmdBuilder(jar_file,
                                       jre_options=config.jre_options)
        command.set_option("-T", "IndelRealigner")
        command.set_option("-R", "%(IN_REFERENCE)s")
        command.set_option("-targetIntervals", "%(IN_INTERVALS)s")
        command.set_option("-o", "%(OUT_BAMFILE)s")
        command.set_option("--bam_compression", 0)
        command.set_option("--disable_bam_indexing")
        _set_input_files(command, infiles)

        command.set_kwargs(IN_REFERENCE=reference,
                           IN_REF_DICT=fileutils.swap_ext(reference, ".dict"),
                           IN_INTERVALS=intervals,
                           OUT_BAMFILE=outfile,
                           CHECK_GATK=_get_gatk_version_check(config))

        calmd = AtomicCmd(["samtools", "calmd", "-b",
                           "%(TEMP_IN_BAM)s", "%(IN_REF)s"],
                          TEMP_IN_BAM=self._basename,
                          IN_REF=reference,
                          TEMP_OUT_STDOUT=self._basename + ".calmd",
                          CHECK_VERSION=SAMTOOLS_VERSION)

        description = "<Indel Realigner (aligning): %s -> %r>" \
            % (describe_files(infiles), outfile)
        CommandNode.__init__(self,
                             description=description,
                             command=ParallelCmds([command.finalize(), calmd]),
                             dependencies=dependencies)
Esempio n. 28
0
    def __init__(self, config, reference, input_bam, output_bam, tags, min_mapq = 0, dependencies = ()):
        flt = AtomicCmd(["samtools", "view", "-bu", "-F0x4", "-q%i" % min_mapq, "%(IN_BAM)s"],
                        IN_BAM  = input_bam,
                        OUT_STDOUT = AtomicCmd.PIPE)

        jar_file = os.path.join(config.jar_root, "AddOrReplaceReadGroups.jar")
        params = AtomicJavaCmdBuilder(config, jar_file)
        params.set_option("INPUT", "/dev/stdin", sep = "=")
        params.set_option("OUTPUT", "/dev/stdout", sep = "=")
        params.set_option("QUIET", "true", sep = "=")
        params.set_option("COMPRESSION_LEVEL", "0", sep = "=")

        for (tag, value) in sorted(tags.iteritems()):
            if tag not in ("PG", "Target", "PU_src", "PU_cur"):
                params.set_option(tag, value, sep = "=")
            elif tag == "PU_src":
                params.set_option("PU", value, sep = "=")

        params.set_kwargs(IN_STDIN   = flt,
                         OUT_STDOUT = AtomicCmd.PIPE)
        annotate = params.finalize()

        calmd = AtomicCmd(["samtools", "calmd", "-b", "-", "%(IN_REF)s"],
                          IN_REF   = reference,
                          IN_STDIN = annotate,
                          OUT_STDOUT = output_bam)

        description =  "<Cleanup BAM: %s -> '%s'>" \
            % (input_bam, output_bam)
        CommandNode.__init__(self,
                             command      = ParallelCmds([flt, annotate, calmd]),
                             description  = description,
                             dependencies = dependencies)
Esempio n. 29
0
    def __init__(self, config, reference, intervals, infiles, outfile, dependencies = ()):
        self._basename = os.path.basename(outfile)

        infiles  = safe_coerce_to_tuple(infiles)
        jar_file = os.path.join(config.jar_root, "GenomeAnalysisTK.jar")
        command  = AtomicJavaCmdBuilder(config, jar_file)
        command.set_option("-T", "IndelRealigner")
        command.set_option("-R", "%(IN_REFERENCE)s")
        command.set_option("-targetIntervals", "%(IN_INTERVALS)s")
        command.set_option("-o", "%(OUT_BAMFILE)s")
        command.set_option("--bam_compression", 0)
        command.set_option("--disable_bam_indexing")
        _set_input_files(command, infiles)

        command.set_kwargs(IN_REFERENCE = reference,
                           IN_REF_DICT  = fileutils.swap_ext(reference, ".dict"),
                           IN_INTERVALS = intervals,
                           OUT_BAMFILE  = outfile)

        calmd   = AtomicCmd(["samtools", "calmd", "-b", "%(TEMP_IN_BAM)s", "%(IN_REF)s"],
                            TEMP_IN_BAM     = self._basename,
                            IN_REF          = reference,
                            TEMP_OUT_STDOUT = self._basename + ".calmd")

        description = "<Indel Realign: %i file(s) -> '%s'>" \
            % (len(infiles), outfile)

        CommandNode.__init__(self,
                             description  = description,
                             command      = ParallelCmds([command.finalize(),
                                                          calmd]),
                             dependencies = dependencies)