Exemple #1
0
    def __init__(self, config, reference, intervals, infiles, outfile,
                 dependencies=()):
        self._basename = os.path.basename(outfile)

        infiles = safe_coerce_to_tuple(infiles)
        jar_file = os.path.join(config.jar_root, "GenomeAnalysisTK.jar")
        command = AtomicJavaCmdBuilder(jar_file,
                                       jre_options=config.jre_options)
        command.set_option("-T", "IndelRealigner")
        command.set_option("-R", "%(IN_REFERENCE)s")
        command.set_option("-targetIntervals", "%(IN_INTERVALS)s")
        command.set_option("-o", "%(OUT_BAMFILE)s")
        command.set_option("--bam_compression", 0)
        command.set_option("--disable_bam_indexing")
        _set_input_files(command, infiles)

        command.set_kwargs(IN_REFERENCE=reference,
                           IN_REF_DICT=fileutils.swap_ext(reference, ".dict"),
                           IN_INTERVALS=intervals,
                           OUT_BAMFILE=outfile,
                           CHECK_GATK=_get_gatk_version_check(config))

        calmd = AtomicCmd(["samtools", "calmd", "-b",
                           "%(TEMP_IN_BAM)s", "%(IN_REF)s"],
                          TEMP_IN_BAM=self._basename,
                          IN_REF=reference,
                          TEMP_OUT_STDOUT=self._basename + ".calmd",
                          CHECK_VERSION=SAMTOOLS_VERSION)

        description = "<GATK Indel Realigner (aligning): %s -> %r>" \
            % (describe_files(infiles), outfile)
        CommandNode.__init__(self,
                             description=description,
                             command=ParallelCmds([command.finalize(), calmd]),
                             dependencies=dependencies)
Exemple #2
0
    def __init__(self, samples, prefix, output_prefix, dependencies=()):
        abs_prefix = os.path.abspath(prefix)
        basename = os.path.basename(output_prefix)

        # TreeMix plots with migration edges
        cmd_1 = self._plot_command(prefix, "plot_tree", abs_prefix,
                                   "%(IN_SAMPLES)s", "%(TEMP_OUT_PREFIX)s",
                                   IN_SAMPLES=samples,
                                   TEMP_OUT_PREFIX=basename + "_tree",
                                   OUT_PDF=output_prefix + "_tree.pdf",
                                   OUT_PNG=output_prefix + "_tree.png")

        # Heatmap showing TreeMix residuals
        cmd_2 = self._plot_command(prefix, "plot_residuals", abs_prefix,
                                   "%(IN_SAMPLES)s", "%(TEMP_OUT_PREFIX)s",
                                   IN_SAMPLES=samples,
                                   TEMP_OUT_PREFIX=basename + "_residuals",
                                   OUT_PDF=output_prefix + "_residuals.pdf",
                                   OUT_PNG=output_prefix + "_residuals.png")

        # Text file containing % of variance explained by model
        cmd_3 = self._plot_command(prefix, "variance", abs_prefix,
                                   "%(OUT_TXT)s",
                                   OUT_TXT=output_prefix + "_variance.txt")

        CommandNode.__init__(self,
                             description="<PlotTreemix -> '%s.*'>"
                             % (output_prefix,),
                             command=SequentialCmds((cmd_1, cmd_2, cmd_3)),
                             dependencies=dependencies)
Exemple #3
0
    def __init__(self, config, reference, infiles, outfile,
                 threads=1, dependencies=()):
        threads = _get_max_threads(reference, threads)
        infiles = safe_coerce_to_tuple(infiles)
        jar_file = os.path.join(config.jar_root, "GenomeAnalysisTK.jar")
        command = AtomicJavaCmdBuilder(jar_file,
                                       jre_options=config.jre_options)
        command.set_option("-T", "RealignerTargetCreator")
        command.set_option("-R", "%(IN_REFERENCE)s")
        command.set_option("-o", "%(OUT_INTERVALS)s")
        command.set_option("-nt", threads)

        _set_input_files(command, infiles)
        command.set_kwargs(IN_REFERENCE=reference,
                           IN_REF_DICT=fileutils.swap_ext(reference, ".dict"),
                           OUT_INTERVALS=outfile,
                           CHECK_GATK=_get_gatk_version_check(config))

        description = "<GATK Indel Realigner (training): %s -> %r>" \
            % (describe_files(infiles), outfile)
        CommandNode.__init__(self,
                             threads=threads,
                             description=description,
                             command=command.finalize(),
                             dependencies=dependencies)
Exemple #4
0
    def __init__(self,
                 output_prefix,
                 tfam,
                 tped,
                 indep_filter=None,
                 indep_parameters=None,
                 plink_parameters=None,
                 dependencies=()):
        temp_prefix = os.path.basename(output_prefix)

        plink_cmd = [
            "plink", "--make-bed", "--noweb", "--tped", "%(IN_TPED)s",
            "--tfam", "%(IN_TFAM)s", "--out", "%(TEMP_OUT_PREFIX)s"
        ]

        plink_cmd.extend(self._parse_parameters(plink_parameters))

        command = AtomicCmd(plink_cmd,
                            IN_TPED=tped,
                            IN_TFAM=tfam,
                            TEMP_OUT_PREFIX=temp_prefix,
                            OUT_BED=output_prefix + ".bed",
                            OUT_BIM=output_prefix + ".bim",
                            OUT_FAM=output_prefix + ".fam",
                            OUT_LOG=output_prefix + ".log",
                            TEMP_OUT_NOSEX=temp_prefix + ".nosex",
                            TEMP_OUT_NOF=temp_prefix + ".nof",
                            CHECK_VERSION=PLINK_VERSION,
                            set_cwd=True)

        CommandNode.__init__(self,
                             description="<BuildBEDFiles -> '%s.*'>" %
                             (output_prefix, ),
                             command=command,
                             dependencies=dependencies)
Exemple #5
0
    def __init__(self, infile, outfile, genome, from_start=0, from_end=0,
                 strand_relative=False, dependencies=()):
        if type(from_start) != type(from_end):
            raise ValueError("Parameters 'from_start' and 'from_end' should "
                             "be of same type!")

        call = ["bedtools", "slop",
                "-i", "%(IN_FILE)s",
                "-g", "%(IN_GENOME)s",
                "-l", str(from_start),
                "-r", str(from_end)]

        if strand_relative:
            call.append("-s")
        if type(from_start) is float:
            call.append("-pct")

        command = AtomicCmd(call,
                            IN_FILE=infile,
                            IN_GENOME=genome,
                            OUT_STDOUT=outfile,
                            CHECK_VERSION=BEDTOOLS_VERSION)

        description = "<SlopBed: '%s' -> '%s'>" % (infile, outfile)

        CommandNode.__init__(self,
                             description=description,
                             command=command,
                             dependencies=dependencies)
Exemple #6
0
    def __init__(self,
                 output_root,
                 table,
                 bamfile,
                 downsample,
                 dependencies=()):
        cmd = factory.new("zonkey_tped")
        cmd.set_option("--name", "Sample")
        cmd.set_option("--downsample", downsample)
        cmd.add_value("%(TEMP_DIR)s")
        cmd.add_value("%(IN_TABLE)s")
        cmd.add_value("%(IN_BAM)s")

        if not downsample:
            # Needed for random access (chromosomes are read 1 ... 31)
            cmd.set_kwargs(IN_BAI=fileutils.swap_ext(bamfile, ".bai"))

        cmd.set_kwargs(OUT_TFAM=os.path.join(output_root, "common.tfam"),
                       OUT_SUMMARY=os.path.join(output_root, "common.summary"),
                       OUT_TPED_INCL_TS=os.path.join(output_root,
                                                     "incl_ts.tped"),
                       OUT_TPED_EXCL_TS=os.path.join(output_root,
                                                     "excl_ts.tped"),
                       IN_TABLE=table,
                       IN_BAM=bamfile)

        CommandNode.__init__(self,
                             description="<BuildTPEDFiles -> %r>" %
                             (os.path.join(output_root, '*'), ),
                             command=cmd.finalize(),
                             dependencies=dependencies)
Exemple #7
0
    def __init__(self, samples, prefix, output_prefix, dependencies=()):
        abs_prefix = os.path.abspath(prefix)

        script = rtools.rscript("zonkey", "pca.r")
        call = [
            "Rscript", script, abs_prefix, "%(IN_SAMPLES)s",
            "%(TEMP_OUT_PREFIX)s"
        ]

        cmd = AtomicCmd(call,
                        AUX_SCRIPT=script,
                        IN_FILE_EVAL=prefix + ".eval",
                        IN_FILE_EVEC=prefix + ".evec",
                        IN_SAMPLES=samples,
                        TEMP_OUT_PREFIX=os.path.basename(output_prefix),
                        OUT_PDF=output_prefix + ".pdf",
                        OUT_PNG=output_prefix + ".png",
                        CHECK_R=RSCRIPT_VERSION,
                        CHECK_R_GGPLOT2=rtools.requirement("ggplot2"),
                        CHECK_R_LABELS=rtools.requirement("ggrepel"),
                        set_cwd=True)

        CommandNode.__init__(self,
                             description="<PlotPCA -> '%s.*'>" %
                             (output_prefix, ),
                             command=cmd,
                             dependencies=dependencies)
Exemple #8
0
    def __init__(self, infile, index_format='.bai', dependencies=()):
        basename = os.path.basename(infile)

        if index_format == '.bai':
            samtools_version = SAMTOOLS_VERSION
            samtools_call = ["samtools", "index", "%(TEMP_IN_BAM)s"]
        elif index_format == '.csi':
            samtools_version = SAMTOOLS_VERSION_1x
            samtools_call = ["samtools", "index", "-c", "%(TEMP_IN_BAM)s"]
        else:
            raise ValueError("Unknown format type %r; expected .bai or .csi" %
                             (index_format, ))

        cmd_link = AtomicCmd(["ln", "-s", "%(IN_BAM)s", "%(TEMP_OUT_BAM)s"],
                             IN_BAM=infile,
                             TEMP_OUT_BAM=basename,
                             set_cwd=True)

        cmd_index = AtomicCmd(samtools_call,
                              TEMP_IN_BAM=basename,
                              CHECK_SAM=samtools_version)

        cmd_rename = AtomicCmd(["mv", "%(TEMP_IN_BAM)s", "%(OUT_BAM)s"],
                               TEMP_IN_BAM=basename + index_format,
                               OUT_BAM=swap_ext(infile, index_format))

        commands = SequentialCmds((cmd_link, cmd_index, cmd_rename))

        CommandNode.__init__(self,
                             description="<BAMIndex (%s): '%s'>" %
                             (index_format[1:].upper(), infile),
                             command=commands,
                             dependencies=dependencies)
Exemple #9
0
    def __init__(self, samples, prefix, output_prefix, dependencies=()):
        abs_prefix = os.path.abspath(prefix)
        basename = os.path.basename(output_prefix)

        # TreeMix plots with migration edges
        cmd_1 = self._plot_command(prefix, "plot_tree", abs_prefix,
                                   "%(IN_SAMPLES)s", "%(TEMP_OUT_PREFIX)s",
                                   IN_SAMPLES=samples,
                                   TEMP_OUT_PREFIX=basename + "_tree",
                                   OUT_PDF=output_prefix + "_tree.pdf",
                                   OUT_PNG=output_prefix + "_tree.png")

        # Heatmap showing TreeMix residuals
        cmd_2 = self._plot_command(prefix, "plot_residuals", abs_prefix,
                                   "%(IN_SAMPLES)s", "%(TEMP_OUT_PREFIX)s",
                                   IN_SAMPLES=samples,
                                   TEMP_OUT_PREFIX=basename + "_residuals",
                                   OUT_PDF=output_prefix + "_residuals.pdf",
                                   OUT_PNG=output_prefix + "_residuals.png")

        # Text file containing % of variance explained by model
        cmd_3 = self._plot_command(prefix, "variance", abs_prefix,
                                   "%(OUT_TXT)s",
                                   OUT_TXT=output_prefix + "_variance.txt")

        CommandNode.__init__(self,
                             description="<PlotTreemix -> '%s.*'>"
                             % (output_prefix,),
                             command=SequentialCmds((cmd_1, cmd_2, cmd_3)),
                             dependencies=dependencies)
Exemple #10
0
    def __init__(self, samples, prefix, output_prefix, dependencies=()):
        abs_prefix = os.path.abspath(prefix)

        script = rtools.rscript("zonkey", "pca.r")
        call = ["Rscript", script,
                abs_prefix, "%(IN_SAMPLES)s", "%(TEMP_OUT_PREFIX)s"]

        cmd = AtomicCmd(call,
                        AUX_SCRIPT=script,
                        IN_FILE_EVAL=prefix + ".eval",
                        IN_FILE_EVEC=prefix + ".evec",
                        IN_SAMPLES=samples,
                        TEMP_OUT_PREFIX=os.path.basename(output_prefix),
                        OUT_PDF=output_prefix + ".pdf",
                        OUT_PNG=output_prefix + ".png",
                        CHECK_R=RSCRIPT_VERSION,
                        CHECK_R_GGPLOT2=rtools.requirement("ggplot2"),
                        CHECK_R_LABELS=rtools.requirement("directlabels"),
                        set_cwd=True)

        CommandNode.__init__(self,
                             description="<PlotPCA -> '%s.*'>"
                             % (output_prefix,),
                             command=cmd,
                             dependencies=dependencies)
Exemple #11
0
    def __init__(self, input_prefix, output_prefix, tfam,
                 parameters=None, dependencies=()):
        basename = os.path.basename(output_prefix)

        plink_cmd = ["plink", "--freq", "--missing", "--noweb",
                     "--bfile", input_prefix,
                     "--within", "%(TEMP_OUT_CLUST)s",
                     "--out", "%(TEMP_OUT_PREFIX)s"]

        if parameters:
            plink_cmd.extend(parameters.split())

        plink = AtomicCmd(plink_cmd,
                          IN_BED=input_prefix + ".bed",
                          IN_BIM=input_prefix + ".bim",
                          IN_FAM=input_prefix + ".fam",
                          TEMP_OUT_CLUST="samples.clust",
                          OUT_NOSEX=output_prefix + ".frq.strat.nosex",
                          OUT_LOG=output_prefix + ".frq.strat.log",
                          TEMP_OUT_PREFIX=basename,
                          CHECK_VERSION=PLINK_VERSION)

        gzip = AtomicCmd(["gzip", "%(TEMP_IN_FREQ)s"],
                         TEMP_IN_FREQ=basename + ".frq.strat",
                         OUT_FREQ=output_prefix + ".frq.strat.gz")

        # FIXME! Can be
        self._tfam = tfam
        self._basename = basename

        CommandNode.__init__(self,
                             description="<BuildFreqFiles -> '%s.*'"
                             % (output_prefix,),
                             command=SequentialCmds((plink, gzip)),
                             dependencies=dependencies)
Exemple #12
0
    def __init__(self, samples, treefile, bootstraps, output_prefix,
                 dependencies=()):
        rscript = rtools.rscript("zonkey", "tinytree.r")

        cmd = AtomicCmd(("Rscript", rscript,
                         "%(TEMP_OUT_FILE)s",
                         "%(IN_SAMPLES)s",
                         "%(TEMP_OUT_PREFIX)s"),
                        AUX_RSCRIPT=rscript,
                        IN_SAMPLES=samples,
                        IN_FILE=treefile,
                        IN_BOOTSTRAPS=bootstraps,
                        TEMP_OUT_FILE="rerooted.newick",
                        TEMP_OUT_PREFIX=os.path.basename(output_prefix),
                        OUT_TREE_PDF=output_prefix + ".pdf",
                        OUT_TREE_PNG=output_prefix + ".png",
                        CHECK_RSCRIPT=RSCRIPT_VERSION,
                        CHECK_RSCRIPT_APE=rtools.requirement("ape"),
                        CHECK_RSCRIPT_GGPLOT2=rtools.requirement("ggplot2"),
                        CHECK_RSCRIPT_GRID=rtools.requirement("grid"))

        self._treefile = treefile
        self._bootstraps = bootstraps

        CommandNode.__init__(self,
                             description="<DrawPhylogeny -> '%s.*'>"
                             % (output_prefix,),
                             command=cmd,
                             dependencies=dependencies)
Exemple #13
0
    def __init__(self, infile, outfile, regions, options, dependencies=()):
        vcffilter = factory.new("vcf_filter")
        vcffilter.add_value("%(IN_VCF)s")

        for contig in regions["HomozygousContigs"]:
            vcffilter.add_option("--homozygous-chromosome", contig)
        vcffilter.set_kwargs(IN_VCF=infile, OUT_STDOUT=AtomicCmd.PIPE)

        apply_options(vcffilter, options)

        bgzip = AtomicCmdBuilder(["bgzip"],
                                 IN_STDIN=vcffilter,
                                 OUT_STDOUT=outfile)

        description = "<VCFFilter: '%s' -> '%s'>" % (
            infile,
            outfile,
        )
        CommandNode.__init__(
            self,
            description=description,
            command=ParallelCmds([vcffilter.finalize(),
                                  bgzip.finalize()]),
            dependencies=dependencies,
        )
Exemple #14
0
    def __init__(self,
                 contigs,
                 mapping,
                 input_file,
                 output_prefix,
                 dependencies=()):
        self._contigs = contigs
        self._mapping = dict(zip(mapping.values(), mapping))
        self._input_file = input_file

        script = rtools.rscript("zonkey", "coverage.r")
        cmd = AtomicCmd(
            ("Rscript", script, "%(TEMP_OUT_TABLE)s", "%(TEMP_OUT_PREFIX)s"),
            AUX_RSCRIPT=script,
            IN_FILE=input_file,
            TEMP_OUT_TABLE="contigs.table",
            OUT_PDF=output_prefix + ".pdf",
            OUT_PNG=output_prefix + ".png",
            TEMP_OUT_PREFIX=os.path.basename(output_prefix),
            CHECK_R=RSCRIPT_VERSION,
            CHECK_R_GGPLOT2=rtools.requirement("ggplot2"),
            set_cwd=True,
        )

        CommandNode.__init__(
            self,
            description="<CoveragePlot -> '%s.*'>" % (output_prefix, ),
            command=cmd,
            dependencies=dependencies,
        )
Exemple #15
0
    def __init__(
            self,
            input_file_1,
            output_file,
            reference,
            prefix,
            input_file_2=None,
            threads=1,
            algorithm="mem",
            mapping_options={},
            cleanup_options={},
            dependencies=(),
    ):
        if algorithm not in ("mem", "bwasw"):
            raise NotImplementedError("BWA algorithm %r not implemented" %
                                      (algorithm, ))

        threads = _get_max_threads(reference, threads)

        aln = _new_bwa_command(
            ("bwa", algorithm, prefix, "%(IN_FILE_1)s"),
            prefix,
            IN_FILE_1=input_file_1,
            OUT_STDOUT=AtomicCmd.PIPE,
        )

        if input_file_2:
            aln.add_value("%(IN_FILE_2)s")
            aln.set_kwargs(IN_FILE_2=input_file_2)

        aln.set_option("-t", threads)
        # Mark alternative hits as secondary; required by e.g. Picard
        aln.set_option("-M")

        cleanup = _new_cleanup_command(aln,
                                       output_file,
                                       reference,
                                       paired_end=input_file_1
                                       and input_file_2)

        apply_options(aln, mapping_options)
        apply_options(cleanup, cleanup_options)

        description = _get_node_description(
            name="BWA",
            algorithm="%s%s" %
            (algorithm.upper(), "_PE" if input_file_2 else "_SE"),
            input_files_1=input_file_1,
            input_files_2=input_file_2,
            prefix=prefix,
        )

        CommandNode.__init__(
            self,
            command=ParallelCmds([aln.finalize(),
                                  cleanup.finalize()]),
            description=description,
            threads=threads,
            dependencies=dependencies,
        )
Exemple #16
0
    def __init__(self,
                 samples,
                 treefile,
                 bootstraps,
                 output_prefix,
                 dependencies=()):
        rscript = rtools.rscript("zonkey", "tinytree.r")

        cmd = AtomicCmd(("Rscript", rscript, "%(TEMP_OUT_FILE)s",
                         "%(IN_SAMPLES)s", "%(TEMP_OUT_PREFIX)s"),
                        AUX_RSCRIPT=rscript,
                        IN_SAMPLES=samples,
                        IN_FILE=treefile,
                        IN_BOOTSTRAPS=bootstraps,
                        TEMP_OUT_FILE="rerooted.newick",
                        TEMP_OUT_PREFIX=os.path.basename(output_prefix),
                        OUT_TREE_PDF=output_prefix + ".pdf",
                        OUT_TREE_PNG=output_prefix + ".png",
                        CHECK_RSCRIPT=RSCRIPT_VERSION,
                        CHECK_RSCRIPT_APE=rtools.requirement("ape"),
                        CHECK_RSCRIPT_GGPLOT2=rtools.requirement("ggplot2"),
                        CHECK_RSCRIPT_GRID=rtools.requirement("grid"))

        self._treefile = treefile
        self._bootstraps = bootstraps

        CommandNode.__init__(self,
                             description="<DrawPhylogeny -> '%s.*'>" %
                             (output_prefix, ),
                             command=cmd,
                             dependencies=dependencies)
Exemple #17
0
    def __init__(
            self,
            target_name,
            input_file,
            output_file,
            prefix,
            regions_file=None,
            dependencies=(),
    ):
        index_format = regions_file and prefix["IndexFormat"]

        builder = factory.new("depths")
        builder.add_value("%(IN_BAM)s")
        builder.add_value("%(OUT_FILE)s")
        builder.set_option("--target-name", target_name)
        builder.set_kwargs(OUT_FILE=output_file, IN_BAM=input_file)

        if regions_file:
            builder.set_option("--regions-file", "%(IN_REGIONS)s")
            builder.set_kwargs(IN_REGIONS=regions_file,
                               TEMP_IN_INDEX=input_file + index_format)

        description = "<DepthHistogram: %s -> '%s'>" % (
            input_file,
            output_file,
        )

        CommandNode.__init__(
            self,
            command=builder.finalize(),
            description=description,
            dependencies=dependencies,
        )
Exemple #18
0
 def __init__(self, parameters):
     self._kwargs = parameters.command.kwargs
     CommandNode.__init__(self,
                          command      = parameters.command.finalize(),
                          description  = "<RAxMLReduce: '%s' -> '%s'>" \
                                  % (parameters.input_alignment, parameters.output_alignment),
                          dependencies = parameters.dependencies)
Exemple #19
0
    def __init__(self, infile, index_format='.bai', dependencies=()):
        basename = os.path.basename(infile)

        if index_format == '.bai':
            samtools_version = SAMTOOLS_VERSION
            samtools_call = ["samtools", "index", "%(TEMP_IN_BAM)s"]
        elif index_format == '.csi':
            samtools_version = SAMTOOLS_VERSION_1x
            samtools_call = ["samtools", "index", "-c", "%(TEMP_IN_BAM)s"]
        else:
            raise ValueError("Unknown format type %r; expected .bai or .csi"
                             % (index_format,))

        cmd_link = AtomicCmd(["ln", "-s", "%(IN_BAM)s", "%(TEMP_OUT_BAM)s"],
                             IN_BAM=infile,
                             TEMP_OUT_BAM=basename,
                             set_cwd=True)

        cmd_index = AtomicCmd(samtools_call,
                              TEMP_IN_BAM=basename,
                              CHECK_SAM=samtools_version)

        cmd_rename = AtomicCmd(["mv", "%(TEMP_IN_BAM)s", "%(OUT_BAM)s"],
                               TEMP_IN_BAM=basename + index_format,
                               OUT_BAM=swap_ext(infile, index_format))

        commands = SequentialCmds((cmd_link, cmd_index, cmd_rename))

        CommandNode.__init__(self,
                             description="<BAMIndex (%s): '%s'>"
                             % (index_format[1:].upper(), infile),
                             command=commands,
                             dependencies=dependencies)
Exemple #20
0
    def __init__(self, output_root, table, bamfile, downsample,
                 dependencies=()):
        cmd = factory.new("build_tped")
        cmd.set_option("--name", "Sample")
        cmd.set_option("--downsample", downsample)
        cmd.add_value("%(TEMP_DIR)s")
        cmd.add_value("%(IN_TABLE)s")
        cmd.add_value("%(IN_BAM)s")

        if not downsample:
            # Needed for random access (chromosomes are read 1 ... 31)
            cmd.set_kwargs(IN_BAI=fileutils.swap_ext(bamfile, ".bai"))

        cmd.set_kwargs(OUT_TFAM=os.path.join(output_root, "common.tfam"),
                       OUT_SUMMARY=os.path.join(output_root, "common.summary"),
                       OUT_TPED_INCL_TS=os.path.join(output_root,
                                                     "incl_ts.tped"),
                       OUT_TPED_EXCL_TS=os.path.join(output_root,
                                                     "excl_ts.tped"),
                       IN_TABLE=table,
                       IN_BAM=bamfile)

        CommandNode.__init__(self,
                             description="<BuildTPEDFiles -> %r>"
                             % (os.path.join(output_root, '*'),),
                             command=cmd.finalize(),
                             dependencies=dependencies)
Exemple #21
0
    def __init__(self,
                 infile,
                 bedfile,
                 outfile,
                 padding,
                 options={},
                 dependencies=()):
        params = factory.new("vcf_to_fasta")
        params.set_option("--padding", padding)
        params.set_option("--genotype", "%(IN_VCFFILE)s")
        params.set_option("--intervals", "%(IN_INTERVALS)s")

        params.set_kwargs(
            IN_VCFFILE=infile,
            IN_TABIX=infile + ".tbi",
            IN_INTERVALS=bedfile,
            OUT_STDOUT=outfile,
        )

        apply_options(params, options)

        description = "<BuildRegions: '%s' -> '%s'>" % (
            infile,
            outfile,
        )
        CommandNode.__init__(
            self,
            description=description,
            command=params.finalize(),
            dependencies=dependencies,
        )
Exemple #22
0
    def __init__(self, output_prefix, tfam, tped,
                 indep_filter=None, indep_parameters=None,
                 plink_parameters=None,
                 dependencies=()):
        temp_prefix = os.path.basename(output_prefix)

        plink_cmd = ["plink", "--make-bed", "--noweb",
                     "--tped", "%(IN_TPED)s",
                     "--tfam", "%(IN_TFAM)s",
                     "--out", "%(TEMP_OUT_PREFIX)s"]

        plink_cmd.extend(self._parse_parameters(plink_parameters))

        command = AtomicCmd(plink_cmd,
                            IN_TPED=tped,
                            IN_TFAM=tfam,
                            TEMP_OUT_PREFIX=temp_prefix,
                            OUT_BED=output_prefix + ".bed",
                            OUT_BIM=output_prefix + ".bim",
                            OUT_FAM=output_prefix + ".fam",
                            OUT_LOG=output_prefix + ".log",
                            TEMP_OUT_NOSEX=temp_prefix + ".nosex",
                            TEMP_OUT_NOF=temp_prefix + ".nof",
                            CHECK_VERSION=PLINK_VERSION,
                            set_cwd=True)

        CommandNode.__init__(self,
                             description="<BuildBEDFiles -> '%s.*'>"
                             % (output_prefix,),
                             command=command,
                             dependencies=dependencies)
Exemple #23
0
    def __init__(self, input_file, output_prefix, order, samples,
                 dependencies=()):
        self._samples = samples
        self._order = tuple(order) + ("Sample",)

        script = rtools.rscript("zonkey", "admixture.r")

        cmd = AtomicCmd(("Rscript", script, "%(IN_FILE)s",
                         "%(TEMP_OUT_NAMES)s", "%(TEMP_OUT_PREFIX)s"),
                        AUX_RSCRIPT=script,
                        IN_FILE=input_file,
                        IN_SAMPLES=samples,
                        OUT_PDF=output_prefix + ".pdf",
                        OUT_PNG=output_prefix + ".png",
                        TEMP_OUT_NAMES="samples.txt",
                        TEMP_OUT_PREFIX=os.path.basename(output_prefix),
                        CHECK_R=RSCRIPT_VERSION,
                        CHECK_R_GGPLOT2=rtools.requirement("ggplot2"),
                        CHECK_R_RESHAPE2=rtools.requirement("reshape2"),
                        set_cwd=True)

        CommandNode.__init__(self,
                             description="<AdmixturePlot -> '%s.*'>"
                             % (output_prefix,),
                             command=cmd,
                             dependencies=dependencies)
Exemple #24
0
    def __init__(self, infile, index_format=".bai", dependencies=()):
        if index_format == ".bai":
            samtools_call = ["samtools", "index", "%(IN_BAM)s", "%(OUT_IDX)s"]
        elif index_format == ".csi":
            samtools_call = [
                "samtools", "index", "-c", "%(IN_BAM)s", "%(OUT_IDX)s"
            ]
        else:
            raise ValueError("Unknown format type %r; expected .bai or .csi" %
                             (index_format, ))

        command = AtomicCmd(
            samtools_call,
            IN_BAM=infile,
            OUT_IDX=infile + index_format,
            CHECK_SAM=SAMTOOLS_VERSION,
        )

        CommandNode.__init__(
            self,
            description="<BAMIndex (%s): '%s'>" %
            (index_format[1:].upper(), infile),
            command=command,
            dependencies=dependencies,
        )
Exemple #25
0
    def __init__(self,
                 input_file,
                 output_prefix,
                 order,
                 samples,
                 dependencies=()):
        self._samples = samples
        self._order = tuple(order) + ("Sample", )

        script = rtools.rscript("zonkey", "admixture.r")

        cmd = AtomicCmd(("Rscript", script, "%(IN_FILE)s",
                         "%(TEMP_OUT_NAMES)s", "%(TEMP_OUT_PREFIX)s"),
                        AUX_RSCRIPT=script,
                        IN_FILE=input_file,
                        IN_SAMPLES=samples,
                        OUT_PDF=output_prefix + ".pdf",
                        OUT_PNG=output_prefix + ".png",
                        TEMP_OUT_NAMES="samples.txt",
                        TEMP_OUT_PREFIX=os.path.basename(output_prefix),
                        CHECK_R=RSCRIPT_VERSION,
                        CHECK_R_GGPLOT2=rtools.requirement("ggplot2"),
                        CHECK_R_RESHAPE2=rtools.requirement("reshape2"),
                        set_cwd=True)

        CommandNode.__init__(self,
                             description="<AdmixturePlot -> '%s.*'>" %
                             (output_prefix, ),
                             command=cmd,
                             dependencies=dependencies)
Exemple #26
0
    def __init__(self, config, input_bams, command, index_format=None,
                 description=None, threads=1, dependencies=()):
        self._input_bams = safe_coerce_to_tuple(input_bams)
        self._index_format = index_format

        if not self._input_bams:
            raise ValueError("No input BAM files specified!")
        elif len(self._input_bams) > 1 and index_format:
            raise ValueError("BAM index cannot be required for > 1 file")
        elif index_format not in (None, ".bai", ".csi"):
            raise ValueError("Unknown index format %r" % (index_format,))

        if len(self._input_bams) > 1:
            merge = picard_command(config, "MergeSamFiles")
            merge.set_option("SO", "coordinate", sep="=")
            merge.set_option("COMPRESSION_LEVEL", 0, sep="=")
            merge.set_option("OUTPUT", "%(TEMP_OUT_BAM)s", sep="=")
            # Validation is mostly left to manual ValidateSamFile runs; this
            # is because .csi indexed BAM records can have "invalid" bins.
            merge.set_option("VALIDATION_STRINGENCY", "LENIENT", sep="=")
            merge.add_multiple_options("I", input_bams, sep="=")

            merge.set_kwargs(TEMP_OUT_BAM=self.PIPE_FILE)

            command = ParallelCmds([merge.finalize(), command])

        CommandNode.__init__(self,
                             command=command,
                             description=description,
                             threads=threads,
                             dependencies=dependencies)
Exemple #27
0
    def __init__(self,
                 input_file,
                 output_file,
                 algorithm="auto",
                 options={},
                 dependencies=()):
        command = AtomicCmdBuilder(
            _PRESETS[algorithm.lower()] + ["%(IN_FASTA)s"],
            IN_FASTA=input_file,
            OUT_STDOUT=output_file,
            CHECK_VERSION=MAFFT_VERSION,
        )

        apply_options(command, options)

        self._output_file = output_file

        CommandNode.__init__(
            self,
            command=command.finalize(),
            description="<MAFFTNode (%s): '%s' -> '%s'>" % (
                algorithm,
                input_file,
                output_file,
            ),
            dependencies=dependencies,
        )
Exemple #28
0
    def __init__(self, input_prefix, output_prefix, nchroms, dependencies=()):
        self._input_prefix = input_prefix
        self._output_prefix = output_prefix
        self._nchroms = nchroms

        cmd = AtomicCmd(
            ("smartpca", "-p", "%(TEMP_OUT_PARAMS)s"),
            TEMP_OUT_PARAMS="parameters.txt",
            IN_FILE_BED=input_prefix + ".bed",
            IN_FILE_BIM=input_prefix + ".bim",
            IN_FILE_FAM=input_prefix + ".fam",
            OUT_STDOUT=output_prefix + ".log",
            OUT_EVEC=output_prefix + ".evec",
            OUT_EVAL=output_prefix + ".eval",
            OUT_SNPS=output_prefix + ".deleted_snps",
            CHECK_VERSION=SMARTPCA_VERSION,
            set_cwd=True,
        )

        CommandNode.__init__(
            self,
            description="<SmartPCA -> '%s.*>" % (output_prefix, ),
            command=cmd,
            dependencies=dependencies,
        )
Exemple #29
0
    def __init__(self, control_file, sequence_file, trees_file, output_tar,
                 exclude_groups=(), dependencies=()):
        self._exclude_groups = safe_coerce_to_frozenset(exclude_groups)
        self._control_file = control_file
        self._sequence_file = sequence_file
        self._trees_file = trees_file

        paml_cmd = AtomicCmd(["codeml", "template.ctl"],
                             IN_CONTROL_FILE  = control_file,
                             IN_SEQUENCE_FILE = sequence_file,
                             IN_TREES_FILE    = trees_file,
                             TEMP_OUT_CTL     = "template.ctl",
                             TEMP_OUT_SEQS    = "template.seqs",
                             TEMP_OUT_TREES   = "template.trees",
                             TEMP_OUT_STDOUT  = "template.stdout",
                             TEMP_OUT_STDERR  = "template.stderr",
                             TEMP_OUT_4FOLD   = "4fold.nuc",
                             IN_STDIN         = "/dev/null", # Prevent promts from blocking
                             set_cwd          = True,
                             **CodemlNode._get_codeml_files("TEMP_OUT_CODEML"))

        tar_pairs = CodemlNode._get_codeml_files("TEMP_IN_CODEML")
        tar_files = ["%%(%s)s" % (key,) for key in tar_pairs]
        tar_cmd  = AtomicCmd(["tar", "cvzf", "%(OUT_FILE)s"] + tar_files,
                             OUT_FILE = output_tar,
                             set_cwd  = True,
                             **tar_pairs)

        CommandNode.__init__(self,
                             description  = "<CodemlNode: %r -> %r>" % (sequence_file, output_tar),
                             command      = SequentialCmds([paml_cmd, tar_cmd]),
                             dependencies = dependencies)
Exemple #30
0
    def __init__(self,
                 input_file,
                 output_prefix,
                 threads=1,
                 options={},
                 dependencies=()):
        # See below for parameters in common between SE/PE
        cmd = _get_common_parameters(threads=threads, options=options)

        # Prefix for output files, ensure that all end up in temp folder
        cmd.set_option("--basename", "%(TEMP_OUT_BASENAME)s")

        output_tmpl = output_prefix + ".%s.gz"
        cmd.set_kwargs(
            TEMP_OUT_BASENAME=os.path.basename(output_prefix),
            OUT_SETTINGS=output_prefix + ".settings",
            OUT_MATE_1=output_tmpl % ("truncated", ),
            OUT_DISCARDED=output_tmpl % ("discarded", ),
        )

        cmd.set_option("--file1", "%(IN_READS_1)s")
        cmd.set_kwargs(IN_READS_1=input_file)

        apply_options(cmd, options)

        CommandNode.__init__(
            self,
            command=cmd.finalize(),
            threads=threads,
            description="<AdapterRM (SE): %s -> '%s.*'>" % (
                fileutils.describe_files(input_file),
                output_prefix,
            ),
            dependencies=dependencies,
        )
Exemple #31
0
    def __init__(self, parameters):
        self._directory = parameters.directory

        description = "<mapDamage (model): %r>" % (parameters.directory,)
        CommandNode.__init__(self,
                             command=parameters.command.finalize(),
                             description=description,
                             dependencies=parameters.dependencies)
Exemple #32
0
 def __init__(self, parameters):
     command = parameters.command.finalize()
     description = "<BWA Index '%s' -> '%s.*'>" % (parameters.input_file,
                                                   parameters.prefix)
     CommandNode.__init__(self,
                          command=command,
                          description=description,
                          dependencies=parameters.dependencies)
Exemple #33
0
 def __init__(self, parameters):
     command = parameters.command.finalize()
     description = "<BuildRegions: '%s' -> '%s'>" % (parameters.infile,
                                                     parameters.outfile)
     CommandNode.__init__(self,
                          description=description,
                          command=command,
                          dependencies=parameters.dependencies)
Exemple #34
0
    def __init__(self, parameters):
        self._directory = parameters.directory

        description = "<mapDamage (model): %r>" % (parameters.directory, )
        CommandNode.__init__(self,
                             command=parameters.command.finalize(),
                             description=description,
                             dependencies=parameters.dependencies)
Exemple #35
0
    def __init__(self,
                 data,
                 input_file,
                 output_prefix,
                 m=0,
                 k=100,
                 outgroup=(),
                 dependencies=()):
        call = [
            "treemix", "-i", "%(IN_FILE)s", "-o", "%(TEMP_OUT_PREFIX)s",
            "-global", "-m", m
        ]

        if outgroup:
            call.extend(("-root", ",".join(outgroup)))

        self._param_m = m
        self._param_outgroup = outgroup
        self._params_file = output_prefix + ".parameters.txt"

        if isinstance(k, int):
            call.extend(("-k", k))
            self._param_k = k
            self._k_file = self._k_field = None
        elif isinstance(k, tuple) and all(isinstance(v, str) for v in k):
            self._k_field, self._k_file = k
            self._genome_size = sum(value["Size"]
                                    for value in data.contigs.itervalues())
            self._snp_distance = data.settings["SNPDistance"]
        else:
            raise ValueError("k must be int or (key, path) in TreemixNode")

        self._parameters_hash \
            = "%s.%s" % (output_prefix,
                         hash_params(k=k, m=m, global_set=True,
                                     outgroup=tuple(sorted(outgroup))))

        cmd = AtomicCmd(call,
                        IN_FILE=input_file,
                        TEMP_OUT_PREFIX=os.path.basename(output_prefix),
                        OUT_FILE_COV=output_prefix + ".cov.gz",
                        OUT_FILE_COVSE=output_prefix + ".covse.gz",
                        OUT_FILE_EDGES=output_prefix + ".edges.gz",
                        OUT_FILE_LLIK=output_prefix + ".llik",
                        OUT_FILE_MODELCOV=output_prefix + ".modelcov.gz",
                        OUT_FILE_TREEOUT=output_prefix + ".treeout.gz",
                        OUT_FILE_VERTICES=output_prefix + ".vertices.gz",
                        OUT_FILE_PARAMS=self._params_file,
                        OUT_FILE_PARAMS_HASH=self._parameters_hash,
                        CHECK_VERSION=TREEMIX_VERSION,
                        set_cwd=True)

        CommandNode.__init__(self,
                             description="<Treemix -> '%s.*'>" %
                             (output_prefix, ),
                             command=cmd,
                             dependencies=dependencies)
Exemple #36
0
    def __init__(
            self,
            input_binary,
            initial_tree,
            output_template,
            model="GAMMA",
            threads=1,
            dependencies=(),
    ):
        """
        Arguments:
        input_binary  -- A binary alignment file in a format readable by ExaML.
        output_template  -- A template string used to construct final filenames. Should
                            consist of a full path, including a single '%s', which is
                            replaced with the variable part of RAxML output files (e.g.
                            'info', 'bestTree', ...).

                            Example destination: '/disk/project/SN013420.RAxML.%s'
                            Example output:      '/disk/project/SN013420.RAxML.bestTree'
        """

        # TODO: Make MPIParams!
        command = AtomicMPICmdBuilder("examl", threads=threads)

        # Ensures that output is saved to the temporary directory
        command.set_option("-w", "%(TEMP_DIR)s")

        command.set_option("-s", "%(IN_ALN)s")
        command.set_option("-t", "%(IN_TREE)s")
        command.set_option("-n", "Pypeline")

        command.set_kwargs(
            IN_ALN=input_binary,
            IN_TREE=initial_tree,
            # Final output files, are not created directly
            OUT_INFO=output_template % "info",
            OUT_BESTTREE=output_template % "result",
            OUT_BOOTSTRAP=output_template % "log",
            # Only generated by newer versions of ExaML
            TEMP_OUT_MODELFILE=os.path.basename(output_template % "modelFile"),
            CHECK_EXAML=EXAML_VERSION,
        )

        # Use the GAMMA model of NT substitution by default
        command.set_option("-m", model)

        self._dirname = os.path.dirname(output_template)
        self._template = os.path.basename(output_template)

        CommandNode.__init__(
            self,
            command=command.finalize(),
            description="<ExaML (%i thread(s)): '%s' -> '%s'>" %
            (threads, input_binary, output_template),
            threads=threads,
            dependencies=dependencies,
        )
Exemple #37
0
    def __init__(self, parameters):
        self._symlinks = [os.path.abspath(parameters.input_alignment)]
        self._output_tree = os.path.basename(parameters.output_tree)


        CommandNode.__init__(self,
                             command      = parameters.command.finalize(),
                             description  = "<Parsimonator: '%s' -> '%s'>" \
                                 % (parameters.input_alignment, parameters.output_tree),
                             dependencies = parameters.dependencies)
Exemple #38
0
    def __init__(self, parameters):
        self._symlinks = [os.path.abspath(parameters.input_alignment)]
        self._output_tree = os.path.basename(parameters.output_tree)


        CommandNode.__init__(self,
                             command      = parameters.command.finalize(),
                             description  = "<Parsimonator: '%s' -> '%s'>" \
                                 % (parameters.input_alignment, parameters.output_tree),
                             dependencies = parameters.dependencies)
Exemple #39
0
    def __init__(self, parameters):
        self._input_alignment  = parameters.input_alignment
        self._input_partitions = parameters.input_partitions
        self._output_tree      = parameters.output_tree

        CommandNode.__init__(self,
                             command      = parameters.command.finalize(),
                             description  = "<RAxMLParsimonyTree: '%s' -> '%s'>" \
                                     % (parameters.input_alignment, parameters.output_tree),
                             dependencies = parameters.dependencies)
Exemple #40
0
    def __init__(self,
                 output_prefix,
                 tfam,
                 tped,
                 indep_filter=None,
                 indep_parameters=None,
                 plink_parameters=None,
                 dependencies=()):

        assert indep_filter in ('indep', 'indep-pairphase',
                                'indep-pairwise'), indep_filter
        assert len(indep_parameters) == 3, indep_parameters

        parameters = self._parse_parameters(plink_parameters)

        plink_cmd = [
            "plink", "--noweb", "--tped", "%(IN_TPED)s", "--tfam",
            "%(IN_TFAM)s", "--out", "%(TEMP_OUT_PREFIX)s", '--' + indep_filter
        ]
        plink_cmd.extend(indep_parameters)
        plink_cmd.extend(parameters)

        cmd_indep = AtomicCmd(plink_cmd,
                              IN_TFAM=tfam,
                              IN_TPED=tped,
                              TEMP_OUT_PREFIX="indep",
                              TEMP_OUT_LOG="indep.log",
                              TEMP_OUT_NOSEX="indep.nosex",
                              TEMP_OUT_PRUNE_IN="indep.prune.in",
                              TEMP_OUT_PRUNE_OUT="indep.prune.out",
                              set_cwd=True)

        basename = os.path.basename(output_prefix)
        cmd_filter = AtomicCmd([
            "plink", "--noweb", "--make-bed", "--tped", "%(IN_TPED)s",
            "--tfam", "%(IN_TFAM)s", "--extract", "%(TEMP_IN_PRUNE)s", "--out",
            "%(TEMP_OUT_PREFIX)s"
        ] + parameters,
                               IN_TFAM=tfam,
                               IN_TPED=tped,
                               TEMP_OUT_PREFIX=basename,
                               TEMP_IN_PRUNE="indep.prune.in",
                               TEMP_OUT_NOSEX=basename + ".nosex",
                               TEMP_OUT_LOG=basename + ".log",
                               OUT_LOG=output_prefix + ".log",
                               OUT_BED=output_prefix + ".bed",
                               OUT_BIM=output_prefix + ".bim",
                               OUT_FAM=output_prefix + ".fam",
                               set_cwd=True)

        CommandNode.__init__(self,
                             description="<BuildFilteredBEDFiles -> '%s.*'>" %
                             (output_prefix, ),
                             command=SequentialCmds((cmd_indep, cmd_filter)),
                             dependencies=dependencies)
Exemple #41
0
    def __init__(self, parameters):
        self._output_file = parameters.output_file
        description = "<MAFFTNode (%s): '%s' -> '%s'>" \
                % (parameters.algorithm,
                   parameters.input_file,
                   parameters.output_file)

        CommandNode.__init__(self,
                             command      = parameters.command.finalize(),
                             description  = description,
                             dependencies = parameters.dependencies)
Exemple #42
0
    def __init__(self, parameters):
        self._in_vcf = parameters.infile_vcf
        command = parameters.command.finalize()
        description = "<VCFPileup: '%s' -> '%s'>" \
            % (parameters.infile_vcf,
               parameters.outfile)

        CommandNode.__init__(self,
                             description=description,
                             command=command,
                             dependencies=parameters.dependencies)
Exemple #43
0
    def __init__(self, parameters):
        self._symlinks = [os.path.abspath(parameters.input_alignment),
                          os.path.abspath(parameters.input_partition)]
        self._output_file = os.path.basename(parameters.output_file)


        CommandNode.__init__(self,
                             command      = parameters.command.finalize(),
                             description  = "<ExaMLParser: '%s' -> '%s'>" \
                                 % (parameters.input_alignment, parameters.output_file),
                             dependencies = parameters.dependencies)
Exemple #44
0
    def __init__(self, parameters):
        self._output_file = parameters.output_file
        description = "<MAFFTNode (%s): '%s' -> '%s'>" \
                % (parameters.algorithm,
                   parameters.input_file,
                   parameters.output_file)

        CommandNode.__init__(self,
                             command      = parameters.command.finalize(),
                             description  = description,
                             dependencies = parameters.dependencies)
Exemple #45
0
    def __init__(self, output_prefix, tfam, tped,
                 indep_filter=None, indep_parameters=None,
                 plink_parameters=None,
                 dependencies=()):

        assert indep_filter in ('indep',
                                'indep-pairphase',
                                'indep-pairwise'), indep_filter
        assert len(indep_parameters) == 3, indep_parameters

        parameters = self._parse_parameters(plink_parameters)

        plink_cmd = ["plink", "--noweb",
                     "--tped", "%(IN_TPED)s",
                     "--tfam", "%(IN_TFAM)s",
                     "--out", "%(TEMP_OUT_PREFIX)s",
                     '--' + indep_filter]
        plink_cmd.extend(indep_parameters)
        plink_cmd.extend(parameters)

        cmd_indep = AtomicCmd(plink_cmd,
                              IN_TFAM=tfam,
                              IN_TPED=tped,
                              TEMP_OUT_PREFIX="indep",
                              TEMP_OUT_LOG="indep.log",
                              TEMP_OUT_NOSEX="indep.nosex",
                              TEMP_OUT_PRUNE_IN="indep.prune.in",
                              TEMP_OUT_PRUNE_OUT="indep.prune.out",
                              set_cwd=True)

        basename = os.path.basename(output_prefix)
        cmd_filter = AtomicCmd(["plink", "--noweb", "--make-bed",
                                "--tped", "%(IN_TPED)s",
                                "--tfam", "%(IN_TFAM)s",
                                "--extract", "%(TEMP_IN_PRUNE)s",
                                "--out", "%(TEMP_OUT_PREFIX)s"] +
                               parameters,
                               IN_TFAM=tfam,
                               IN_TPED=tped,
                               TEMP_OUT_PREFIX=basename,
                               TEMP_IN_PRUNE="indep.prune.in",
                               TEMP_OUT_NOSEX=basename + ".nosex",
                               TEMP_OUT_LOG=basename + ".log",
                               OUT_LOG=output_prefix + ".log",
                               OUT_BED=output_prefix + ".bed",
                               OUT_BIM=output_prefix + ".bim",
                               OUT_FAM=output_prefix + ".fam",
                               set_cwd=True)

        CommandNode.__init__(self,
                             description="<BuildFilteredBEDFiles -> '%s.*'>"
                             % (output_prefix,),
                             command=SequentialCmds((cmd_indep, cmd_filter)),
                             dependencies=dependencies)
Exemple #46
0
    def __init__(self, input_file, destination, dependencies=()):
        md5_cmd = AtomicCmd(("md5sum", "%(IN_FILE)s"),
                            IN_FILE=input_file,
                            OUT_STDOUT=destination)

        description = "<MD5Sum %s -> %s>" \
            % (input_file, destination)

        CommandNode.__init__(self,
                             description=description,
                             command=md5_cmd,
                             dependencies=dependencies)
Exemple #47
0
    def __init__(self,
                 input_prefix,
                 output_prefix,
                 tfam,
                 parameters=None,
                 dependencies=()):
        basename = os.path.basename(output_prefix)

        plink_cmd = [
            "plink",
            "--freq",
            "--missing",
            "--noweb",
            "--bfile",
            os.path.abspath(input_prefix),
            "--within",
            "%(TEMP_OUT_CLUST)s",
            "--out",
            "%(TEMP_OUT_PREFIX)s",
        ]

        if parameters:
            plink_cmd.extend(parameters.split())

        plink = AtomicCmd(
            plink_cmd,
            IN_BED=input_prefix + ".bed",
            IN_BIM=input_prefix + ".bim",
            IN_FAM=input_prefix + ".fam",
            TEMP_OUT_CLUST="samples.clust",
            TEMP_OUT_IMISS=basename + ".imiss",
            TEMP_OUT_LMISS=basename + ".lmiss",
            OUT_NOSEX=output_prefix + ".frq.strat.nosex",
            OUT_LOG=output_prefix + ".frq.strat.log",
            TEMP_OUT_PREFIX=basename,
            CHECK_VERSION=PLINK_VERSION,
            set_cwd=True,
        )

        gzip = AtomicCmd(
            ["gzip", "%(TEMP_IN_FREQ)s"],
            TEMP_IN_FREQ=basename + ".frq.strat",
            OUT_FREQ=output_prefix + ".frq.strat.gz",
        )

        self._tfam = tfam
        self._basename = basename
        CommandNode.__init__(
            self,
            description="<BuildFreqFiles -> '%s.*'" % (output_prefix, ),
            command=SequentialCmds((plink, gzip)),
            dependencies=dependencies,
        )
Exemple #48
0
    def __init__(self, parameters):
        self._symlinks = [parameters.input_alignment,
                          parameters.input_partition]
        self._template = os.path.basename(parameters.output_template)

        CommandNode.__init__(self,
                             command=parameters.command.finalize(),
                             description="<RAxMLRapidBS: '%s' -> '%s'>"
                             % (parameters.input_alignment,
                                parameters.output_template % ("*",)),
                             threads=parameters.threads,
                             dependencies=parameters.dependencies)
Exemple #49
0
    def __init__(self, parameters):
        self._dirname  = os.path.dirname(parameters.output_template)
        self._template = os.path.basename(parameters.output_template)

        CommandNode.__init__(self,
                             command      = parameters.command.finalize(),
                             description  = "<ExaML (%i thread(s)): '%s' -> '%s'>" \
                                 % (parameters.threads,
                                    parameters.input_binary,
                                    parameters.output_template),
                             threads      = parameters.threads,
                             dependencies = parameters.dependencies)
Exemple #50
0
    def __init__(self, parameters):
        self._dirname = os.path.dirname(parameters.output_template)
        self._template = os.path.basename(parameters.output_template)

        CommandNode.__init__(self,
                             command      = parameters.command.finalize(),
                             description  = "<ExaML (%i thread(s)): '%s' -> '%s'>" \
                                 % (parameters.threads,
                                    parameters.input_binary,
                                    parameters.output_template),
                             threads      = parameters.threads,
                             dependencies = parameters.dependencies)
Exemple #51
0
    def __init__(self, infile, dependencies=()):
        self._infile = infile
        cmd_faidx = AtomicCmd(["samtools", "faidx", "%(TEMP_IN_FASTA)s"],
                              TEMP_IN_FASTA=os.path.basename(infile),
                              IN_FASTA=infile,
                              OUT_TBI=infile + ".fai",
                              CHECK_SAM=SAMTOOLS_VERSION)

        CommandNode.__init__(self,
                             description="<FastaIndex: '%s'>" % (infile,),
                             command=cmd_faidx,
                             dependencies=dependencies)
Exemple #52
0
    def __init__(self, data, input_file, output_prefix, m=0, k=100,
                 outgroup=(), dependencies=()):
        call = ["treemix",
                "-i", "%(IN_FILE)s",
                "-o", "%(TEMP_OUT_PREFIX)s",
                "-global",
                "-m", m]

        if outgroup:
            call.extend(("-root", ",".join(outgroup)))

        self._param_m = m
        self._param_outgroup = outgroup
        self._params_file = output_prefix + ".parameters.txt"

        if isinstance(k, int):
            call.extend(("-k", k))
            self._param_k = k
            self._k_file = self._k_field = None
        elif isinstance(k, tuple) and all(isinstance(v, str) for v in k):
            self._k_field, self._k_file = k
            self._genome_size = sum(value["Size"]
                                    for value in data.contigs.itervalues())
            self._snp_distance = data.settings["SNPDistance"]
        else:
            raise ValueError("k must be int or (key, path) in TreemixNode")

        self._parameters_hash \
            = "%s.%s" % (output_prefix,
                         hash_params(k=k, m=m, global_set=True,
                                     outgroup=tuple(sorted(outgroup))))

        cmd = AtomicCmd(call,
                        IN_FILE=input_file,
                        TEMP_OUT_PREFIX=os.path.basename(output_prefix),
                        OUT_FILE_COV=output_prefix + ".cov.gz",
                        OUT_FILE_COVSE=output_prefix + ".covse.gz",
                        OUT_FILE_EDGES=output_prefix + ".edges.gz",
                        OUT_FILE_LLIK=output_prefix + ".llik",
                        OUT_FILE_MODELCOV=output_prefix + ".modelcov.gz",
                        OUT_FILE_TREEOUT=output_prefix + ".treeout.gz",
                        OUT_FILE_VERTICES=output_prefix + ".vertices.gz",
                        OUT_FILE_PARAMS=self._params_file,
                        OUT_FILE_PARAMS_HASH=self._parameters_hash,
                        CHECK_VERSION=TREEMIX_VERSION,
                        set_cwd=True)

        CommandNode.__init__(self,
                             description="<Treemix -> '%s.*'>"
                             % (output_prefix,),
                             command=cmd,
                             dependencies=dependencies)
Exemple #53
0
    def __init__(self, parameters):
        self._input_alignment = parameters.input_alignment
        self._input_partition = parameters.input_partition
        self._output_template = parameters.template
        self._bootstrap_num   = parameters.bootstraps
        self._bootstrap_start = parameters.start

        CommandNode.__init__(self,
                             command      = parameters.command.finalize(),
                             description  = "<RAxMLBootstrap: '%s' -> '%s' (%i .. %i>" \
                                     % (parameters.input_alignment, parameters.template,
                                        parameters.start, parameters.start + parameters.bootstraps - 1),
                             dependencies = parameters.dependencies)
Exemple #54
0
    def __init__(self, parameters):
        command = ParallelCmds([parameters.commands[key].finalize()
                                for key in parameters.order])

        input_file = parameters.input_file_fq
        description = _get_node_description(name="BWA Samse",
                                            input_files_1=input_file,
                                            prefix=parameters.prefix)

        CommandNode.__init__(self,
                             command=command,
                             description=description,
                             dependencies=parameters.dependencies)
Exemple #55
0
    def __init__(self, parameters):
        command = ParallelCmds([parameters.commands[key].finalize()
                                for key in parameters.order])

        description \
            = _get_node_description(name="BWA",
                                    algorithm='Backtrack',
                                    input_files_1=parameters.input_file,
                                    prefix=parameters.prefix,
                                    threads=parameters.threads)

        CommandNode.__init__(self,
                             command=command,
                             description=description,
                             threads=parameters.threads,
                             dependencies=parameters.dependencies)
Exemple #56
0
    def __init__(self, parameters):
        command = ParallelCmds([parameters.commands[key].finalize() for key in parameters.order])

        algorithm    = "PE" if parameters.input_file_2 else "SE"
        description  = _get_node_description(name          = "Bowtie2",
                                             algorithm     = algorithm,
                                             input_files_1 = parameters.input_file_1,
                                             input_files_2 = parameters.input_file_2,
                                             prefix        = parameters.prefix,
                                             threads       = parameters.threads)

        CommandNode.__init__(self,
                             command      = command,
                             description  = description,
                             threads      = parameters.threads,
                             dependencies = parameters.dependencies)