Esempio n. 1
0
    def __init__(self, config, reference, intervals, infiles, outfile,
                 dependencies=()):
        self._basename = os.path.basename(outfile)

        infiles = safe_coerce_to_tuple(infiles)
        jar_file = os.path.join(config.jar_root, "GenomeAnalysisTK.jar")
        command = AtomicJavaCmdBuilder(jar_file,
                                       jre_options=config.jre_options)
        command.set_option("-T", "IndelRealigner")
        command.set_option("-R", "%(IN_REFERENCE)s")
        command.set_option("-targetIntervals", "%(IN_INTERVALS)s")
        command.set_option("-o", "%(OUT_BAMFILE)s")
        command.set_option("--bam_compression", 0)
        command.set_option("--disable_bam_indexing")
        _set_input_files(command, infiles)

        command.set_kwargs(IN_REFERENCE=reference,
                           IN_REF_DICT=fileutils.swap_ext(reference, ".dict"),
                           IN_INTERVALS=intervals,
                           OUT_BAMFILE=outfile,
                           CHECK_GATK=_get_gatk_version_check(config))

        calmd = AtomicCmd(["samtools", "calmd", "-b",
                           "%(TEMP_IN_BAM)s", "%(IN_REF)s"],
                          TEMP_IN_BAM=self._basename,
                          IN_REF=reference,
                          TEMP_OUT_STDOUT=self._basename + ".calmd",
                          CHECK_VERSION=SAMTOOLS_VERSION)

        description = "<Indel Realigner (aligning): %s -> %r>" \
            % (describe_files(infiles), outfile)
        CommandNode.__init__(self,
                             description=description,
                             command=ParallelCmds([command.finalize(), calmd]),
                             dependencies=dependencies)
Esempio n. 2
0
 def __init__(self, parameters):
     commands = [parameters.commands['VariantRecal'].finalize()]
     description = "<Variant Recalibrator: {}".format(os.path.basename(parameters.model_name))
     CommandNode.__init__(self,
         description = description,
         command = ParallelCmds(commands),
         dependencies = parameters.dependencies)
Esempio n. 3
0
 def __init__(self, parameters):
     self._kwargs = parameters.command.paths
     CommandNode.__init__(self,
                          command      = parameters.command.finalize(),
                          description  = "<RAxMLReduce: '%s' -> '%s'>" \
                                  % (parameters.input_alignment, parameters.output_alignment),
                          dependencies = parameters.dependencies)
Esempio n. 4
0
 def __init__(self, parameters):
     commands = [parameters.commands['Filter'].finalize()]
     description = "<Variant Filter: {}".format(os.path.basename(parameters.outfile))
     CommandNode.__init__(self,
         description = description,
         command = ParallelCmds(commands),
         dependencies = parameters.dependencies)
Esempio n. 5
0
    def __init__(self, config, d_make, bedn, mappa, unique, dependencies=()):
        inbedfile = d_make.bedfiles[bedn]
        basename, extension = os.path.splitext(os.path.basename(inbedfile))
        bname = "{}_MappaOnly{}".format(basename, extension)
        dest = os.path.join(config.temp_local, bname)

        d_make.bedfiles[bedn] = dest

        call1 = [
            "python",
            os.path.join(PREFIX, "intersectmappabed.py"), "%(IN_BED)s",
            "%(IN_MAPPA)s",
            str(unique), "%(OUT_DEST)s"
        ]

        cmd = AtomicCmd(call1,
                        IN_BED=inbedfile,
                        IN_MAPPA=mappa,
                        OUT_DEST=dest,
                        CHECK_VERSION=PYTHON_VERSION)

        description = ("<CLEANBEDFILES: '%s' -> '%s', Uniqueness: '%s'>" %
                       (inbedfile, dest, unique))
        CommandNode.__init__(self,
                             description=description,
                             command=cmd,
                             dependencies=dependencies)
Esempio n. 6
0
    def __init__(self,
                 infile,
                 outfile,
                 genome,
                 from_start=0,
                 from_end=0,
                 strand_relative=False,
                 dependencies=()):
        if type(from_start) != type(from_end):
            raise ValueError(
                "'from_start' and 'from_end' should be of same type!")

        call = [
            "slopBed", "-i", "%(IN_FILE)s", "-g", "%(IN_GENOME)s", "-l",
            str(from_start), "-r",
            str(from_end)
        ]

        if strand_relative:
            call.append("-s")
        if type(from_start) is float:
            call.append("-pct")

        command = AtomicCmd(call,
                            IN_FILE=infile,
                            IN_GENOME=genome,
                            OUT_STDOUT=outfile)

        description = "<SlopBed: '%s' -> '%s'>" % (infile, outfile)

        CommandNode.__init__(self,
                             description=description,
                             command=command,
                             dependencies=dependencies)
Esempio n. 7
0
 def __init__(self, parameters):
     description =  "<Merge BAMs: %i file(s) -> '%s'>" \
         % (len(parameters.input_bams), parameters.output_bam)
     CommandNode.__init__(self,
                          command=parameters.command.finalize(),
                          description=description,
                          dependencies=parameters.dependencies)
Esempio n. 8
0
 def __init__(self, parameters):
     commands = [parameters.commands['merge'].finalize()]
     description = "<Variant Merge Node"
     CommandNode.__init__(self,
                          description  = description,
                          command      = ParallelCmds(commands),
                          dependencies = parameters.dependencies)
Esempio n. 9
0
 def __init__(self, parameters):
     description = "<MarkDuplicates: %s>" % (describe_files(
         parameters.input_bams), )
     CommandNode.__init__(self,
                          command=parameters.command.finalize(),
                          description=description,
                          dependencies=parameters.dependencies)
Esempio n. 10
0
 def __init__(self, parameters):
     commands = [parameters.commands[key].finalize() for key in ('pileup','bcftools')]
     description = "<Samtools VariantCaller : {}".format(os.path.basename(parameters.outfile))
     CommandNode.__init__(self,
                          description  = description,
                          command      = ParallelCmds(commands),
                          dependencies = parameters.dependencies)
Esempio n. 11
0
    def __init__(self, config, reference, input_files, output_directory,
                 dependencies):
        cat_cmds, cat_obj = concatenate_input_bams(config, input_files)

        cmd_map = AtomicCmd(
            [
                "mapDamage", "--no-stats", "-n", _MAPDAMAGE_MAX_READS, "-i",
                "-", "-d", "%(TEMP_DIR)s", "-r", reference
            ],
            IN_STDIN=cat_obj,
            OUT_FREQ_3p=os.path.join(output_directory, "3pGtoA_freq.txt"),
            OUT_FREQ_5p=os.path.join(output_directory, "5pCtoT_freq.txt"),
            OUT_COMP_USER=os.path.join(output_directory, "dnacomp.txt"),
            OUT_PLOT_FRAG=os.path.join(output_directory,
                                       "Fragmisincorporation_plot.pdf"),
            OUT_PLOT_LEN=os.path.join(output_directory, "Length_plot.pdf"),
            OUT_LENGTH=os.path.join(output_directory, "lgdistribution.txt"),
            OUT_MISINCORP=os.path.join(output_directory,
                                       "misincorporation.txt"),
            OUT_LOG=os.path.join(output_directory, "Runtime_log.txt"),
            CHECK_VERSION=MAPDAMAGE_VERSION)

        description = "<mapDamage: %i file(s) -> '%s'>" % (len(input_files),
                                                           output_directory)
        CommandNode.__init__(self,
                             command=ParallelCmds(cat_cmds + [cmd_map]),
                             description=description,
                             dependencies=dependencies)
Esempio n. 12
0
    def __init__(self, config, reference, input_files, output_file,
                 dependencies):
        cat_cmds, cat_obj = concatenate_input_bams(config, input_files)
        cmd_map = AtomicCmd([
            "mapDamage", "-n", _MAPDAMAGE_MAX_READS, "-i", "-", "-d",
            "%(TEMP_DIR)s", "-r", reference
        ],
                            IN_STDIN=cat_obj,
                            CHECK_VERSION=MAPDAMAGE_VERSION)
        train_cmds = ParallelCmds(cat_cmds + [cmd_map])

        cat_cmds, cat_obj = concatenate_input_bams(config, input_files)
        cmd_scale = AtomicCmd([
            "mapDamage", "--rescale-only", "-n", _MAPDAMAGE_MAX_READS, "-i",
            "-", "-d", "%(TEMP_DIR)s", "-r", reference, "--rescale-out",
            "%(OUT_BAM)s"
        ],
                              IN_STDIN=cat_obj,
                              OUT_BAM=output_file,
                              CHECK_VERSION=MAPDAMAGE_VERSION)
        rescale_cmds = ParallelCmds(cat_cmds + [cmd_scale])

        description = "<mapDamageRescale: %i file(s) -> '%s'>" % (
            len(input_files), output_file)
        CommandNode.__init__(self,
                             command=SequentialCmds([train_cmds,
                                                     rescale_cmds]),
                             description=description,
                             dependencies=dependencies)
Esempio n. 13
0
    def __init__(self, parameters):
        self._directory = parameters.directory

        description = "<mapDamage (model): %r>" % (parameters.directory, )
        CommandNode.__init__(self,
                             command=parameters.command.finalize(),
                             description=description,
                             dependencies=parameters.dependencies)
Esempio n. 14
0
 def __init__(self, parameters):
     command = parameters.command.finalize()
     description = "<Bowtie2 Index '%s' -> '%s.*'>" % (
         parameters.input_file, parameters.prefix)
     CommandNode.__init__(self,
                          command=command,
                          description=description,
                          dependencies=parameters.dependencies)
Esempio n. 15
0
 def __init__(self, parameters):
     command = parameters.command.finalize()
     description = "<BuildRegions: '%s' -> '%s'>" % (parameters.infile,
                                                     parameters.outfile)
     CommandNode.__init__(self,
                          description=description,
                          command=command,
                          dependencies=parameters.dependencies)
Esempio n. 16
0
 def __init__(self, parameters):
     commands = [parameters.commands['Snp'].finalize()]
     description = "<SNP List Generator Node>"
     CommandNode.__init__(self,
         description = description,
         command = ParallelCmds(commands),
         dependencies = parameters.dependencies
     )
Esempio n. 17
0
 def __init__(self, parameters):
     commands = [parameters.commands[key].finalize() for key in ("pileup", "genotype", "bgzip")]
     description = "<Genotyper: '%s' -> '%s'>" % (parameters.infile,
                                                  parameters.outfile)
     CommandNode.__init__(self,
                          description  = description,
                          command      = ParallelCmds(commands),
                          dependencies = parameters.dependencies)
Esempio n. 18
0
    def __init__(self, parameters):
        self._version = parameters.version
        self._basename = parameters.basename
        if len(parameters.input_files_1) != len(parameters.input_files_2):
            raise CmdError("Number of mate 1 files differ from mate 2 files: %i != %i" \
                               % (len(parameters.input_files_1),
                                  len(parameters.input_files_2)))

        zcat_pair_1 = _build_unicat_command(parameters.input_files_1,
                                            "uncompressed_input_1")
        zcat_pair_2 = _build_unicat_command(parameters.input_files_2,
                                            "uncompressed_input_2")
        zip_pair_1 = _build_zip_command(parameters.output_format,
                                        parameters.output_prefix,
                                        ".pair1.truncated")
        zip_pair_2 = _build_zip_command(parameters.output_format,
                                        parameters.output_prefix,
                                        ".pair2.truncated")
        zip_discarded = _build_zip_command(parameters.output_format,
                                           parameters.output_prefix,
                                           ".discarded")
        adapterrm = parameters.command.finalize()

        commands = [adapterrm, zip_pair_1, zip_pair_2]
        if parameters.version == VERSION_15:
            zip_aln = _build_zip_command(parameters.output_format,
                                         parameters.output_prefix,
                                         ".collapsed")
            zip_aln_trunc = _build_zip_command(parameters.output_format,
                                               parameters.output_prefix,
                                               ".collapsed.truncated")
            zip_unaligned = _build_zip_command(parameters.output_format,
                                               parameters.output_prefix,
                                               ".singleton.truncated")
            commands += [zip_aln, zip_aln_trunc, zip_unaligned]
        else:
            zip_aln = _build_zip_command(parameters.output_format,
                                         parameters.output_prefix,
                                         ".singleton.aln.truncated")
            zip_unaligned = _build_zip_command(parameters.output_format,
                                               parameters.output_prefix,
                                               ".singleton.unaln.truncated")
            commands += [zip_aln, zip_unaligned]
        commands += [zip_discarded, zcat_pair_1, zcat_pair_2]

        # Opening of pipes block, so the order of these commands is dependent upon
        # the order of file-opens in atomiccmd and the the programs themselves.
        commands = ParallelCmds(commands)

        description  = "<PE_AdapterRM: %s -> '%s.*'>" \
            % (fileutils.describe_files(parameters.input_files_1).replace("file", "pair"),
               parameters.output_prefix)

        CommandNode.__init__(self,
                             command=commands,
                             description=description,
                             dependencies=parameters.dependencies)
Esempio n. 19
0
    def __init__(self, parameters):
        self._input_alignment = parameters.input_alignment
        self._input_partitions = parameters.input_partitions
        self._output_tree = parameters.output_tree

        CommandNode.__init__(self,
                             command      = parameters.command.finalize(),
                             description  = "<RAxMLParsimonyTree: '%s' -> '%s'>" \
                                     % (parameters.input_alignment, parameters.output_tree),
                             dependencies = parameters.dependencies)
Esempio n. 20
0
 def __init__(self, parameters):
     commands = [
         parameters.commands[key].finalize() for key in ("unicat", "pileup")
     ]
     description = "<VCFPileup: '%s' -> '%s'>" % (parameters.in_bam,
                                                  parameters.outfile)
     CommandNode.__init__(self,
                          description=description,
                          command=ParallelCmds(commands),
                          dependencies=parameters.dependencies)
Esempio n. 21
0
    def __init__(self, infile, dependencies = ()):
        cmd_index = AtomicCmd(["samtools", "index", "%(IN_BAM)s", "%(OUT_BAI)s"],
                              IN_BAM      = infile,
                              OUT_BAI     = swap_ext(infile, ".bai"),
                              CHECK_SAM   = SAMTOOLS_VERSION)

        CommandNode.__init__(self,
                             description  = "<BAMIndex: '%s'>" % (infile,),
                             command      = cmd_index,
                             dependencies = dependencies)
Esempio n. 22
0
    def __init__(self, parameters):
        self._symlinks = [os.path.realpath(parameters.input_alignment)]
        self._output_tree = os.path.basename(parameters.output_tree)


        CommandNode.__init__(self,
                             command      = parameters.command.finalize(),
                             description  = "<Parsimonator: '%s' -> '%s'>" \
                                 % (parameters.input_alignment, parameters.output_tree),
                             dependencies = parameters.dependencies)
Esempio n. 23
0
    def __init__(self, parameters):
        self._input_alignment = parameters.input_alignment
        self._input_partition = parameters.input_partition
        self._output_alignment = os.path.basename(parameters.output_alignment)

        CommandNode.__init__(self,
                             command      = parameters.command.finalize(),
                             description  = "<RAxMLBootstrap: '%s' -> '%s'>" \
                                     % (parameters.input_alignment, parameters.output_alignment),
                             dependencies = parameters.dependencies)
Esempio n. 24
0
    def __init__(self, parameters):
        self._output_file = parameters.output_file
        description = "<MAFFTNode (%s): '%s' -> '%s'>" \
                % (parameters.algorithm,
                   parameters.input_file,
                   parameters.output_file)

        CommandNode.__init__(self,
                             command      = parameters.command.finalize(),
                             description  = description,
                             dependencies = parameters.dependencies)
Esempio n. 25
0
    def __init__(self, parameters):
        self._template = os.path.basename(parameters.output_template)

        CommandNode.__init__(self,
                             command      = parameters.command.finalize(),
                             description  = "<EXaML (%i thread(s)): '%s' -> '%s'>" \
                                 % (parameters.threads,
                                    parameters.input_binary,
                                    parameters.output_template),
                             threads      = parameters.threads,
                             dependencies = parameters.dependencies)
Esempio n. 26
0
    def __init__(self, parameters):
        self._in_vcf = parameters.infile_vcf
        command = parameters.command.finalize()
        description = "<VCFPileup: '%s' -> '%s'>" \
            % (parameters.infile_vcf,
               parameters.outfile)

        CommandNode.__init__(self,
                             description=description,
                             command=command,
                             dependencies=parameters.dependencies)
Esempio n. 27
0
 def __init__(self, parameters):
     _check_bwa_prefix(parameters.prefix)
     command = ParallelCmds(
         [parameters.commands[key].finalize() for key in parameters.order])
     description = "<PE_BWA (%i threads): '%s'>" % (parameters.threads,
                                                    parameters.input_file_1)
     CommandNode.__init__(self,
                          command=command,
                          description=description,
                          threads=parameters.threads,
                          dependencies=parameters.dependencies)
Esempio n. 28
0
    def __init__(self, infile, dependencies=()):
        self._infile = infile
        cmd_faidx = AtomicCmd(["samtools", "faidx", "%(TEMP_IN_FASTA)s"],
                              TEMP_IN_FASTA=os.path.basename(infile),
                              IN_FASTA=infile,
                              OUT_TBI=infile + ".fai",
                              CHECK_SAM=SAMTOOLS_VERSION)

        CommandNode.__init__(self,
                             description="<FastaIndex: '%s'>" % (infile,),
                             command=cmd_faidx,
                             dependencies=dependencies)
Esempio n. 29
0
    def __init__(self, parameters):
        self._symlinks = [
            os.path.realpath(parameters.input_alignment),
            os.path.realpath(parameters.input_partition)
        ]
        self._output_file = os.path.basename(parameters.output_file)


        CommandNode.__init__(self,
                             command      = parameters.command.finalize(),
                             description  = "<EXaMLParser: '%s' -> '%s'>" \
                                 % (parameters.input_alignment, parameters.output_file),
                             dependencies = parameters.dependencies)
Esempio n. 30
0
    def __init__(self, config, input_bams, output_bam, dependencies=()):
        cat_cmds, cat_obj = concatenate_input_bams(config, input_bams)
        filteruniq = AtomicCmd(["bam_rmdup_collapsed", "--remove-duplicates"],
                               IN_STDIN=cat_obj,
                               OUT_STDOUT=output_bam)

        command = ParallelCmds(cat_cmds + [filteruniq])
        description = "<FilterCollapsedBAM: %s>" % (
            describe_files(input_bams), )
        CommandNode.__init__(self,
                             command=command,
                             description=description,
                             dependencies=dependencies)