Example #1
0
    def __init__(self, config, reference, intervals, infiles, outfile,
                 dependencies=()):
        self._basename = os.path.basename(outfile)

        infiles = safe_coerce_to_tuple(infiles)
        jar_file = os.path.join(config.jar_root, "GenomeAnalysisTK.jar")
        command = AtomicJavaCmdBuilder(jar_file,
                                       jre_options=config.jre_options)
        command.set_option("-T", "IndelRealigner")
        command.set_option("-R", "%(IN_REFERENCE)s")
        command.set_option("-targetIntervals", "%(IN_INTERVALS)s")
        command.set_option("-o", "%(OUT_BAMFILE)s")
        command.set_option("--bam_compression", 0)
        command.set_option("--disable_bam_indexing")
        _set_input_files(command, infiles)

        command.set_kwargs(IN_REFERENCE=reference,
                           IN_REF_DICT=fileutils.swap_ext(reference, ".dict"),
                           IN_INTERVALS=intervals,
                           OUT_BAMFILE=outfile,
                           CHECK_GATK=_get_gatk_version_check(config))

        calmd = AtomicCmd(["samtools", "calmd", "-b",
                           "%(TEMP_IN_BAM)s", "%(IN_REF)s"],
                          TEMP_IN_BAM=self._basename,
                          IN_REF=reference,
                          TEMP_OUT_STDOUT=self._basename + ".calmd",
                          CHECK_VERSION=SAMTOOLS_VERSION)

        description = "<GATK Indel Realigner (aligning): %s -> %r>" \
            % (describe_files(infiles), outfile)
        CommandNode.__init__(self,
                             description=description,
                             command=ParallelCmds([command.finalize(), calmd]),
                             dependencies=dependencies)
Example #2
0
    def __init__(self, samples, prefix, output_prefix, dependencies=()):
        abs_prefix = os.path.abspath(prefix)
        basename = os.path.basename(output_prefix)

        # TreeMix plots with migration edges
        cmd_1 = self._plot_command(prefix, "plot_tree", abs_prefix,
                                   "%(IN_SAMPLES)s", "%(TEMP_OUT_PREFIX)s",
                                   IN_SAMPLES=samples,
                                   TEMP_OUT_PREFIX=basename + "_tree",
                                   OUT_PDF=output_prefix + "_tree.pdf",
                                   OUT_PNG=output_prefix + "_tree.png")

        # Heatmap showing TreeMix residuals
        cmd_2 = self._plot_command(prefix, "plot_residuals", abs_prefix,
                                   "%(IN_SAMPLES)s", "%(TEMP_OUT_PREFIX)s",
                                   IN_SAMPLES=samples,
                                   TEMP_OUT_PREFIX=basename + "_residuals",
                                   OUT_PDF=output_prefix + "_residuals.pdf",
                                   OUT_PNG=output_prefix + "_residuals.png")

        # Text file containing % of variance explained by model
        cmd_3 = self._plot_command(prefix, "variance", abs_prefix,
                                   "%(OUT_TXT)s",
                                   OUT_TXT=output_prefix + "_variance.txt")

        CommandNode.__init__(self,
                             description="<PlotTreemix -> '%s.*'>"
                             % (output_prefix,),
                             command=SequentialCmds((cmd_1, cmd_2, cmd_3)),
                             dependencies=dependencies)
Example #3
0
    def __init__(self, config, reference, infiles, outfile,
                 threads=1, dependencies=()):
        threads = _get_max_threads(reference, threads)
        infiles = safe_coerce_to_tuple(infiles)
        jar_file = os.path.join(config.jar_root, "GenomeAnalysisTK.jar")
        command = AtomicJavaCmdBuilder(jar_file,
                                       jre_options=config.jre_options)
        command.set_option("-T", "RealignerTargetCreator")
        command.set_option("-R", "%(IN_REFERENCE)s")
        command.set_option("-o", "%(OUT_INTERVALS)s")
        command.set_option("-nt", threads)

        _set_input_files(command, infiles)
        command.set_kwargs(IN_REFERENCE=reference,
                           IN_REF_DICT=fileutils.swap_ext(reference, ".dict"),
                           OUT_INTERVALS=outfile,
                           CHECK_GATK=_get_gatk_version_check(config))

        description = "<GATK Indel Realigner (training): %s -> %r>" \
            % (describe_files(infiles), outfile)
        CommandNode.__init__(self,
                             threads=threads,
                             description=description,
                             command=command.finalize(),
                             dependencies=dependencies)
Example #4
0
    def __init__(self,
                 infile,
                 bedfile,
                 outfile,
                 padding,
                 options={},
                 dependencies=()):
        params = factory.new("vcf_to_fasta")
        params.set_option("--padding", padding)
        params.set_option("--genotype", "%(IN_VCFFILE)s")
        params.set_option("--intervals", "%(IN_INTERVALS)s")

        params.set_kwargs(
            IN_VCFFILE=infile,
            IN_TABIX=infile + ".tbi",
            IN_INTERVALS=bedfile,
            OUT_STDOUT=outfile,
        )

        apply_options(params, options)

        description = "<BuildRegions: '%s' -> '%s'>" % (
            infile,
            outfile,
        )
        CommandNode.__init__(
            self,
            description=description,
            command=params.finalize(),
            dependencies=dependencies,
        )
Example #5
0
    def __init__(self,
                 output_prefix,
                 tfam,
                 tped,
                 indep_filter=None,
                 indep_parameters=None,
                 plink_parameters=None,
                 dependencies=()):
        temp_prefix = os.path.basename(output_prefix)

        plink_cmd = [
            "plink", "--make-bed", "--noweb", "--tped", "%(IN_TPED)s",
            "--tfam", "%(IN_TFAM)s", "--out", "%(TEMP_OUT_PREFIX)s"
        ]

        plink_cmd.extend(self._parse_parameters(plink_parameters))

        command = AtomicCmd(plink_cmd,
                            IN_TPED=tped,
                            IN_TFAM=tfam,
                            TEMP_OUT_PREFIX=temp_prefix,
                            OUT_BED=output_prefix + ".bed",
                            OUT_BIM=output_prefix + ".bim",
                            OUT_FAM=output_prefix + ".fam",
                            OUT_LOG=output_prefix + ".log",
                            TEMP_OUT_NOSEX=temp_prefix + ".nosex",
                            TEMP_OUT_NOF=temp_prefix + ".nof",
                            CHECK_VERSION=PLINK_VERSION,
                            set_cwd=True)

        CommandNode.__init__(self,
                             description="<BuildBEDFiles -> '%s.*'>" %
                             (output_prefix, ),
                             command=command,
                             dependencies=dependencies)
Example #6
0
    def __init__(self, samples, treefile, bootstraps, output_prefix,
                 dependencies=()):
        rscript = rtools.rscript("zonkey", "tinytree.r")

        cmd = AtomicCmd(("Rscript", rscript,
                         "%(TEMP_OUT_FILE)s",
                         "%(IN_SAMPLES)s",
                         "%(TEMP_OUT_PREFIX)s"),
                        AUX_RSCRIPT=rscript,
                        IN_SAMPLES=samples,
                        IN_FILE=treefile,
                        IN_BOOTSTRAPS=bootstraps,
                        TEMP_OUT_FILE="rerooted.newick",
                        TEMP_OUT_PREFIX=os.path.basename(output_prefix),
                        OUT_TREE_PDF=output_prefix + ".pdf",
                        OUT_TREE_PNG=output_prefix + ".png",
                        CHECK_RSCRIPT=RSCRIPT_VERSION,
                        CHECK_RSCRIPT_APE=rtools.requirement("ape"),
                        CHECK_RSCRIPT_GGPLOT2=rtools.requirement("ggplot2"),
                        CHECK_RSCRIPT_GRID=rtools.requirement("grid"))

        self._treefile = treefile
        self._bootstraps = bootstraps

        CommandNode.__init__(self,
                             description="<DrawPhylogeny -> '%s.*'>"
                             % (output_prefix,),
                             command=cmd,
                             dependencies=dependencies)
Example #7
0
    def __init__(self, infile, outfile, genome, from_start=0, from_end=0,
                 strand_relative=False, dependencies=()):
        if type(from_start) != type(from_end):
            raise ValueError("Parameters 'from_start' and 'from_end' should "
                             "be of same type!")

        call = ["bedtools", "slop",
                "-i", "%(IN_FILE)s",
                "-g", "%(IN_GENOME)s",
                "-l", str(from_start),
                "-r", str(from_end)]

        if strand_relative:
            call.append("-s")
        if type(from_start) is float:
            call.append("-pct")

        command = AtomicCmd(call,
                            IN_FILE=infile,
                            IN_GENOME=genome,
                            OUT_STDOUT=outfile,
                            CHECK_VERSION=BEDTOOLS_VERSION)

        description = "<SlopBed: '%s' -> '%s'>" % (infile, outfile)

        CommandNode.__init__(self,
                             description=description,
                             command=command,
                             dependencies=dependencies)
Example #8
0
    def __init__(
            self,
            target_name,
            input_file,
            output_file,
            prefix,
            regions_file=None,
            dependencies=(),
    ):
        index_format = regions_file and prefix["IndexFormat"]

        builder = factory.new("depths")
        builder.add_value("%(IN_BAM)s")
        builder.add_value("%(OUT_FILE)s")
        builder.set_option("--target-name", target_name)
        builder.set_kwargs(OUT_FILE=output_file, IN_BAM=input_file)

        if regions_file:
            builder.set_option("--regions-file", "%(IN_REGIONS)s")
            builder.set_kwargs(IN_REGIONS=regions_file,
                               TEMP_IN_INDEX=input_file + index_format)

        description = "<DepthHistogram: %s -> '%s'>" % (
            input_file,
            output_file,
        )

        CommandNode.__init__(
            self,
            command=builder.finalize(),
            description=description,
            dependencies=dependencies,
        )
Example #9
0
    def _setup(self, config, temp):
        """See CommandNode._setup."""
        infile = os.path.abspath(self._infile)
        outfile = reroot_path(temp, self._infile)
        os.symlink(infile, outfile)

        CommandNode._setup(self, config, temp)
Example #10
0
    def _teardown(self, config, temp):
        os.remove(os.path.join(temp, self.PIPE_FILE))
        if self._index_format:
            os.remove(os.path.join(temp, swap_ext(self.PIPE_FILE,
                                                  self._index_format)))

        CommandNode._teardown(self, config, temp)
Example #11
0
    def __init__(self, config, input_bams, command, index_format=None,
                 description=None, threads=1, dependencies=()):
        self._input_bams = safe_coerce_to_tuple(input_bams)
        self._index_format = index_format

        if not self._input_bams:
            raise ValueError("No input BAM files specified!")
        elif len(self._input_bams) > 1 and index_format:
            raise ValueError("BAM index cannot be required for > 1 file")
        elif index_format not in (None, ".bai", ".csi"):
            raise ValueError("Unknown index format %r" % (index_format,))

        if len(self._input_bams) > 1:
            merge = picard_command(config, "MergeSamFiles")
            merge.set_option("SO", "coordinate", sep="=")
            merge.set_option("COMPRESSION_LEVEL", 0, sep="=")
            merge.set_option("OUTPUT", "%(TEMP_OUT_BAM)s", sep="=")
            # Validation is mostly left to manual ValidateSamFile runs; this
            # is because .csi indexed BAM records can have "invalid" bins.
            merge.set_option("VALIDATION_STRINGENCY", "LENIENT", sep="=")
            merge.add_multiple_options("I", input_bams, sep="=")

            merge.set_kwargs(TEMP_OUT_BAM=self.PIPE_FILE)

            command = ParallelCmds([merge.finalize(), command])

        CommandNode.__init__(self,
                             command=command,
                             description=description,
                             threads=threads,
                             dependencies=dependencies)
Example #12
0
    def __init__(self,
                 input_file,
                 output_file,
                 algorithm="auto",
                 options={},
                 dependencies=()):
        command = AtomicCmdBuilder(
            _PRESETS[algorithm.lower()] + ["%(IN_FASTA)s"],
            IN_FASTA=input_file,
            OUT_STDOUT=output_file,
            CHECK_VERSION=MAFFT_VERSION,
        )

        apply_options(command, options)

        self._output_file = output_file

        CommandNode.__init__(
            self,
            command=command.finalize(),
            description="<MAFFTNode (%s): '%s' -> '%s'>" % (
                algorithm,
                input_file,
                output_file,
            ),
            dependencies=dependencies,
        )
Example #13
0
    def __init__(self, infile, index_format='.bai', dependencies=()):
        basename = os.path.basename(infile)

        if index_format == '.bai':
            samtools_version = SAMTOOLS_VERSION
            samtools_call = ["samtools", "index", "%(TEMP_IN_BAM)s"]
        elif index_format == '.csi':
            samtools_version = SAMTOOLS_VERSION_1x
            samtools_call = ["samtools", "index", "-c", "%(TEMP_IN_BAM)s"]
        else:
            raise ValueError("Unknown format type %r; expected .bai or .csi"
                             % (index_format,))

        cmd_link = AtomicCmd(["ln", "-s", "%(IN_BAM)s", "%(TEMP_OUT_BAM)s"],
                             IN_BAM=infile,
                             TEMP_OUT_BAM=basename,
                             set_cwd=True)

        cmd_index = AtomicCmd(samtools_call,
                              TEMP_IN_BAM=basename,
                              CHECK_SAM=samtools_version)

        cmd_rename = AtomicCmd(["mv", "%(TEMP_IN_BAM)s", "%(OUT_BAM)s"],
                               TEMP_IN_BAM=basename + index_format,
                               OUT_BAM=swap_ext(infile, index_format))

        commands = SequentialCmds((cmd_link, cmd_index, cmd_rename))

        CommandNode.__init__(self,
                             description="<BAMIndex (%s): '%s'>"
                             % (index_format[1:].upper(), infile),
                             command=commands,
                             dependencies=dependencies)
Example #14
0
    def _teardown(self, config, temp):
        # Picard creates a folder named after the user in the temp-root
        try_rmtree(os.path.join(temp, getpass.getuser()))
        # Some JREs may create a folder for temporary performance counters
        try_rmtree(os.path.join(temp, "hsperfdata_" + getpass.getuser()))

        CommandNode._teardown(self, config, temp)
Example #15
0
    def __init__(self,
                 samples,
                 treefile,
                 bootstraps,
                 output_prefix,
                 dependencies=()):
        rscript = rtools.rscript("zonkey", "tinytree.r")

        cmd = AtomicCmd(("Rscript", rscript, "%(TEMP_OUT_FILE)s",
                         "%(IN_SAMPLES)s", "%(TEMP_OUT_PREFIX)s"),
                        AUX_RSCRIPT=rscript,
                        IN_SAMPLES=samples,
                        IN_FILE=treefile,
                        IN_BOOTSTRAPS=bootstraps,
                        TEMP_OUT_FILE="rerooted.newick",
                        TEMP_OUT_PREFIX=os.path.basename(output_prefix),
                        OUT_TREE_PDF=output_prefix + ".pdf",
                        OUT_TREE_PNG=output_prefix + ".png",
                        CHECK_RSCRIPT=RSCRIPT_VERSION,
                        CHECK_RSCRIPT_APE=rtools.requirement("ape"),
                        CHECK_RSCRIPT_GGPLOT2=rtools.requirement("ggplot2"),
                        CHECK_RSCRIPT_GRID=rtools.requirement("grid"))

        self._treefile = treefile
        self._bootstraps = bootstraps

        CommandNode.__init__(self,
                             description="<DrawPhylogeny -> '%s.*'>" %
                             (output_prefix, ),
                             command=cmd,
                             dependencies=dependencies)
Example #16
0
    def __init__(self, output_prefix, tfam, tped,
                 indep_filter=None, indep_parameters=None,
                 plink_parameters=None,
                 dependencies=()):
        temp_prefix = os.path.basename(output_prefix)

        plink_cmd = ["plink", "--make-bed", "--noweb",
                     "--tped", "%(IN_TPED)s",
                     "--tfam", "%(IN_TFAM)s",
                     "--out", "%(TEMP_OUT_PREFIX)s"]

        plink_cmd.extend(self._parse_parameters(plink_parameters))

        command = AtomicCmd(plink_cmd,
                            IN_TPED=tped,
                            IN_TFAM=tfam,
                            TEMP_OUT_PREFIX=temp_prefix,
                            OUT_BED=output_prefix + ".bed",
                            OUT_BIM=output_prefix + ".bim",
                            OUT_FAM=output_prefix + ".fam",
                            OUT_LOG=output_prefix + ".log",
                            TEMP_OUT_NOSEX=temp_prefix + ".nosex",
                            TEMP_OUT_NOF=temp_prefix + ".nof",
                            CHECK_VERSION=PLINK_VERSION,
                            set_cwd=True)

        CommandNode.__init__(self,
                             description="<BuildBEDFiles -> '%s.*'>"
                             % (output_prefix,),
                             command=command,
                             dependencies=dependencies)
Example #17
0
    def _setup(self, config, temp):
        with open(os.path.join(temp, "contigs.table"), "w") as handle:
            handle.write("ID\tSize\tNs\tHits\n")

            # Workaround for pysam < 0.9 returning list, >= 0.9 returning str
            for line in "".join(pysam.idxstats(self._input_file)).split('\n'):
                line = line.strip()
                if not line:
                    continue

                name, size, hits, _ = line.split('\t')
                name = contig_name_to_plink_name(name)
                if name is None or not (name.isdigit() or name == 'X'):
                    continue
                elif name not in self._contigs:
                    # Excluding contigs is allowed
                    continue

                if int(size) != self._contigs[name]['Size']:
                    raise NodeError(
                        "Size mismatch between database and BAM; "
                        "expected size %i, found %i for contig %r" %
                        (int(size), self._contigs[name]['Size'], name))

                row = {
                    'ID': name,
                    'Size': self._contigs[name]['Size'],
                    'Ns': self._contigs[name]['Ns'],
                    'Hits': hits,
                }

                handle.write('{ID}\t{Size}\t{Ns}\t{Hits}\n'.format(**row))

        CommandNode._setup(self, config, temp)
Example #18
0
    def _setup(self, config, temp):
        CommandNode._setup(self, config, temp)

        # The temp folder may contain old files:
        # Remove old pipes to prevent failure at _teardown
        for pipe_fname in glob.glob(os.path.join(temp, "pipe*")):
            fileutils.try_remove(pipe_fname)
        # ExaML refuses to overwrite old info files
        fileutils.try_remove(os.path.join(temp, "ExaML_info.Pypeline"))

        # Resume from last checkpoint, if one such was generated
        checkpoints = glob.glob(os.path.join(temp,
                                "ExaML_binaryCheckpoint.Pypeline_*"))
        if not checkpoints:
            return

        cache = FileStatusCache()
        if not cache.are_files_outdated(self.input_files, checkpoints):
            checkpoints.sort(key=lambda fname: int(fname.rsplit("_", 1)[-1]))

            # FIXME: Less hacky solution to modifying AtomicCmds needed
            self._command._command.append("-R")
            self._command._command.append(checkpoints[-1])
        else:
            for fpath in checkpoints:
                fileutils.try_remove(fpath)
Example #19
0
    def __init__(self, samples, prefix, output_prefix, dependencies=()):
        abs_prefix = os.path.abspath(prefix)

        script = rtools.rscript("zonkey", "pca.r")
        call = [
            "Rscript", script, abs_prefix, "%(IN_SAMPLES)s",
            "%(TEMP_OUT_PREFIX)s"
        ]

        cmd = AtomicCmd(call,
                        AUX_SCRIPT=script,
                        IN_FILE_EVAL=prefix + ".eval",
                        IN_FILE_EVEC=prefix + ".evec",
                        IN_SAMPLES=samples,
                        TEMP_OUT_PREFIX=os.path.basename(output_prefix),
                        OUT_PDF=output_prefix + ".pdf",
                        OUT_PNG=output_prefix + ".png",
                        CHECK_R=RSCRIPT_VERSION,
                        CHECK_R_GGPLOT2=rtools.requirement("ggplot2"),
                        CHECK_R_LABELS=rtools.requirement("ggrepel"),
                        set_cwd=True)

        CommandNode.__init__(self,
                             description="<PlotPCA -> '%s.*'>" %
                             (output_prefix, ),
                             command=cmd,
                             dependencies=dependencies)
Example #20
0
    def __init__(self, infile, index_format='.bai', dependencies=()):
        basename = os.path.basename(infile)

        if index_format == '.bai':
            samtools_version = SAMTOOLS_VERSION
            samtools_call = ["samtools", "index", "%(TEMP_IN_BAM)s"]
        elif index_format == '.csi':
            samtools_version = SAMTOOLS_VERSION_1x
            samtools_call = ["samtools", "index", "-c", "%(TEMP_IN_BAM)s"]
        else:
            raise ValueError("Unknown format type %r; expected .bai or .csi" %
                             (index_format, ))

        cmd_link = AtomicCmd(["ln", "-s", "%(IN_BAM)s", "%(TEMP_OUT_BAM)s"],
                             IN_BAM=infile,
                             TEMP_OUT_BAM=basename,
                             set_cwd=True)

        cmd_index = AtomicCmd(samtools_call,
                              TEMP_IN_BAM=basename,
                              CHECK_SAM=samtools_version)

        cmd_rename = AtomicCmd(["mv", "%(TEMP_IN_BAM)s", "%(OUT_BAM)s"],
                               TEMP_IN_BAM=basename + index_format,
                               OUT_BAM=swap_ext(infile, index_format))

        commands = SequentialCmds((cmd_link, cmd_index, cmd_rename))

        CommandNode.__init__(self,
                             description="<BAMIndex (%s): '%s'>" %
                             (index_format[1:].upper(), infile),
                             command=commands,
                             dependencies=dependencies)
Example #21
0
 def _setup(self, config, temp):
     CommandNode._setup(self, config, temp)
     for fname in ("3pGtoA_freq.txt", "5pCtoT_freq.txt", "dnacomp.txt",
                   "misincorporation.txt"):
         relpath = os.path.join(self._directory, fname)
         abspath = os.path.abspath(relpath)
         os.symlink(abspath, os.path.join(temp, fname))
Example #22
0
    def _setup(self, config, temp):
        CommandNode._setup(self, config, temp)

        input_files = [
            self._input_file,
            fileutils.swap_ext(self._input_file, ".bim"),
            fileutils.swap_ext(self._input_file, ".fam"),
        ]

        for filename in input_files:
            basename = os.path.basename(filename)
            os.symlink(os.path.abspath(filename), os.path.join(temp, basename))

        if self._supervised:
            fam_filename = fileutils.swap_ext(self._input_file, ".fam")

            pop_filename = fileutils.swap_ext(fam_filename, ".pop")
            pop_filename = fileutils.reroot_path(temp, pop_filename)

            key = "Group(%i)" % (self._k_groups, )
            with open(fam_filename) as fam_handle:
                with open(pop_filename, "w") as pop_handle:
                    for line in fam_handle:
                        sample, _ = line.split(None, 1)
                        group = self._samples.get(sample, {}).get(key, "-")

                        pop_handle.write("%s\n" % (group, ))
Example #23
0
    def _teardown(self, config, temp):
        # Picard creates a folder named after the user in the temp-root
        try_rmtree(os.path.join(temp, getpass.getuser()))
        # Some JREs may create a folder for temporary performance counters
        try_rmtree(os.path.join(temp, "hsperfdata_" + getpass.getuser()))

        CommandNode._teardown(self, config, temp)
Example #24
0
    def __init__(
            self,
            input_file_1,
            output_file,
            reference,
            prefix,
            input_file_2=None,
            threads=1,
            algorithm="mem",
            mapping_options={},
            cleanup_options={},
            dependencies=(),
    ):
        if algorithm not in ("mem", "bwasw"):
            raise NotImplementedError("BWA algorithm %r not implemented" %
                                      (algorithm, ))

        threads = _get_max_threads(reference, threads)

        aln = _new_bwa_command(
            ("bwa", algorithm, prefix, "%(IN_FILE_1)s"),
            prefix,
            IN_FILE_1=input_file_1,
            OUT_STDOUT=AtomicCmd.PIPE,
        )

        if input_file_2:
            aln.add_value("%(IN_FILE_2)s")
            aln.set_kwargs(IN_FILE_2=input_file_2)

        aln.set_option("-t", threads)
        # Mark alternative hits as secondary; required by e.g. Picard
        aln.set_option("-M")

        cleanup = _new_cleanup_command(aln,
                                       output_file,
                                       reference,
                                       paired_end=input_file_1
                                       and input_file_2)

        apply_options(aln, mapping_options)
        apply_options(cleanup, cleanup_options)

        description = _get_node_description(
            name="BWA",
            algorithm="%s%s" %
            (algorithm.upper(), "_PE" if input_file_2 else "_SE"),
            input_files_1=input_file_1,
            input_files_2=input_file_2,
            prefix=prefix,
        )

        CommandNode.__init__(
            self,
            command=ParallelCmds([aln.finalize(),
                                  cleanup.finalize()]),
            description=description,
            threads=threads,
            dependencies=dependencies,
        )
Example #25
0
    def _setup(self, config, temp):
        with open(os.path.join(temp, "contigs.table"), "w") as handle:
            handle.write("ID\tSize\tNs\tHits\n")

            # Workaround for pysam < 0.9 returning list, >= 0.9 returning str
            for line in "".join(pysam.idxstats(self._input_file)).split('\n'):
                line = line.strip()
                if not line:
                    continue

                name, size, hits, _ = line.split('\t')
                name = contig_name_to_plink_name(name)
                if name is None or not (name.isdigit() or name == 'X'):
                    continue

                if int(size) != self._contigs[name]['Size']:
                    raise NodeError("TODO: size mismatch")

                row = {
                    'ID': name,
                    'Size': self._contigs[name]['Size'],
                    'Ns': self._contigs[name]['Ns'],
                    'Hits': hits,
                }

                handle.write('{ID}\t{Size}\t{Ns}\t{Hits}\n'.format(**row))

        CommandNode._setup(self, config, temp)
Example #26
0
    def __init__(self, input_prefix, output_prefix, nchroms, dependencies=()):
        self._input_prefix = input_prefix
        self._output_prefix = output_prefix
        self._nchroms = nchroms

        cmd = AtomicCmd(
            ("smartpca", "-p", "%(TEMP_OUT_PARAMS)s"),
            TEMP_OUT_PARAMS="parameters.txt",
            IN_FILE_BED=input_prefix + ".bed",
            IN_FILE_BIM=input_prefix + ".bim",
            IN_FILE_FAM=input_prefix + ".fam",
            OUT_STDOUT=output_prefix + ".log",
            OUT_EVEC=output_prefix + ".evec",
            OUT_EVAL=output_prefix + ".eval",
            OUT_SNPS=output_prefix + ".deleted_snps",
            CHECK_VERSION=SMARTPCA_VERSION,
            set_cwd=True,
        )

        CommandNode.__init__(
            self,
            description="<SmartPCA -> '%s.*>" % (output_prefix, ),
            command=cmd,
            dependencies=dependencies,
        )
Example #27
0
    def __init__(self,
                 input_file,
                 output_prefix,
                 order,
                 samples,
                 dependencies=()):
        self._samples = samples
        self._order = tuple(order) + ("Sample", )

        script = rtools.rscript("zonkey", "admixture.r")

        cmd = AtomicCmd(("Rscript", script, "%(IN_FILE)s",
                         "%(TEMP_OUT_NAMES)s", "%(TEMP_OUT_PREFIX)s"),
                        AUX_RSCRIPT=script,
                        IN_FILE=input_file,
                        IN_SAMPLES=samples,
                        OUT_PDF=output_prefix + ".pdf",
                        OUT_PNG=output_prefix + ".png",
                        TEMP_OUT_NAMES="samples.txt",
                        TEMP_OUT_PREFIX=os.path.basename(output_prefix),
                        CHECK_R=RSCRIPT_VERSION,
                        CHECK_R_GGPLOT2=rtools.requirement("ggplot2"),
                        CHECK_R_RESHAPE2=rtools.requirement("reshape2"),
                        set_cwd=True)

        CommandNode.__init__(self,
                             description="<AdmixturePlot -> '%s.*'>" %
                             (output_prefix, ),
                             command=cmd,
                             dependencies=dependencies)
Example #28
0
    def __init__(self, output_root, table, bamfile, downsample,
                 dependencies=()):
        cmd = factory.new("build_tped")
        cmd.set_option("--name", "Sample")
        cmd.set_option("--downsample", downsample)
        cmd.add_value("%(TEMP_DIR)s")
        cmd.add_value("%(IN_TABLE)s")
        cmd.add_value("%(IN_BAM)s")

        if not downsample:
            # Needed for random access (chromosomes are read 1 ... 31)
            cmd.set_kwargs(IN_BAI=fileutils.swap_ext(bamfile, ".bai"))

        cmd.set_kwargs(OUT_TFAM=os.path.join(output_root, "common.tfam"),
                       OUT_SUMMARY=os.path.join(output_root, "common.summary"),
                       OUT_TPED_INCL_TS=os.path.join(output_root,
                                                     "incl_ts.tped"),
                       OUT_TPED_EXCL_TS=os.path.join(output_root,
                                                     "excl_ts.tped"),
                       IN_TABLE=table,
                       IN_BAM=bamfile)

        CommandNode.__init__(self,
                             description="<BuildTPEDFiles -> %r>"
                             % (os.path.join(output_root, '*'),),
                             command=cmd.finalize(),
                             dependencies=dependencies)
Example #29
0
    def __init__(self, infile, index_format=".bai", dependencies=()):
        if index_format == ".bai":
            samtools_call = ["samtools", "index", "%(IN_BAM)s", "%(OUT_IDX)s"]
        elif index_format == ".csi":
            samtools_call = [
                "samtools", "index", "-c", "%(IN_BAM)s", "%(OUT_IDX)s"
            ]
        else:
            raise ValueError("Unknown format type %r; expected .bai or .csi" %
                             (index_format, ))

        command = AtomicCmd(
            samtools_call,
            IN_BAM=infile,
            OUT_IDX=infile + index_format,
            CHECK_SAM=SAMTOOLS_VERSION,
        )

        CommandNode.__init__(
            self,
            description="<BAMIndex (%s): '%s'>" %
            (index_format[1:].upper(), infile),
            command=command,
            dependencies=dependencies,
        )
Example #30
0
    def __init__(self, input_prefix, output_prefix, tfam,
                 parameters=None, dependencies=()):
        basename = os.path.basename(output_prefix)

        plink_cmd = ["plink", "--freq", "--missing", "--noweb",
                     "--bfile", input_prefix,
                     "--within", "%(TEMP_OUT_CLUST)s",
                     "--out", "%(TEMP_OUT_PREFIX)s"]

        if parameters:
            plink_cmd.extend(parameters.split())

        plink = AtomicCmd(plink_cmd,
                          IN_BED=input_prefix + ".bed",
                          IN_BIM=input_prefix + ".bim",
                          IN_FAM=input_prefix + ".fam",
                          TEMP_OUT_CLUST="samples.clust",
                          OUT_NOSEX=output_prefix + ".frq.strat.nosex",
                          OUT_LOG=output_prefix + ".frq.strat.log",
                          TEMP_OUT_PREFIX=basename,
                          CHECK_VERSION=PLINK_VERSION)

        gzip = AtomicCmd(["gzip", "%(TEMP_IN_FREQ)s"],
                         TEMP_IN_FREQ=basename + ".frq.strat",
                         OUT_FREQ=output_prefix + ".frq.strat.gz")

        # FIXME! Can be
        self._tfam = tfam
        self._basename = basename

        CommandNode.__init__(self,
                             description="<BuildFreqFiles -> '%s.*'"
                             % (output_prefix,),
                             command=SequentialCmds((plink, gzip)),
                             dependencies=dependencies)
Example #31
0
    def __init__(self, infile, outfile, regions, options, dependencies=()):
        vcffilter = factory.new("vcf_filter")
        vcffilter.add_value("%(IN_VCF)s")

        for contig in regions["HomozygousContigs"]:
            vcffilter.add_option("--homozygous-chromosome", contig)
        vcffilter.set_kwargs(IN_VCF=infile, OUT_STDOUT=AtomicCmd.PIPE)

        apply_options(vcffilter, options)

        bgzip = AtomicCmdBuilder(["bgzip"],
                                 IN_STDIN=vcffilter,
                                 OUT_STDOUT=outfile)

        description = "<VCFFilter: '%s' -> '%s'>" % (
            infile,
            outfile,
        )
        CommandNode.__init__(
            self,
            description=description,
            command=ParallelCmds([vcffilter.finalize(),
                                  bgzip.finalize()]),
            dependencies=dependencies,
        )
Example #32
0
    def _setup(self, config, temp):
        CommandNode._setup(self, config, temp)

        # The temp folder may contain old files:
        # Remove old pipes to prevent failure at _teardown
        for pipe_fname in glob.glob(os.path.join(temp, "pipe*")):
            fileutils.try_remove(pipe_fname)
        # ExaML refuses to overwrite old info files
        fileutils.try_remove(os.path.join(temp, "ExaML_info.Pypeline"))

        # Resume from last checkpoint, if one such was generated
        checkpoints = glob.glob(
            os.path.join(temp, "ExaML_binaryCheckpoint.Pypeline_*"))
        if not checkpoints:
            return

        cache = FileStatusCache()
        if not cache.are_files_outdated(self.input_files, checkpoints):
            checkpoints.sort(key=lambda fname: int(fname.rsplit("_", 1)[-1]))

            # FIXME: Less hacky solution to modifying AtomicCmds needed
            self._command._command.append("-R")
            self._command._command.append(checkpoints[-1])
        else:
            for fpath in checkpoints:
                fileutils.try_remove(fpath)
Example #33
0
    def _setup(self, config, temp):
        """See CommandNode._setup."""
        infile = os.path.abspath(self._infile)
        outfile = reroot_path(temp, self._infile)
        os.symlink(infile, outfile)

        CommandNode._setup(self, config, temp)
Example #34
0
    def _setup(self, config, temp):
        with open(os.path.join(temp, "contigs.table"), "w") as handle:
            handle.write("ID\tSize\tNs\tHits\n")

            # Workaround for pysam < 0.9 returning list, >= 0.9 returning str
            for line in "".join(pysam.idxstats(self._input_file)).split("\n"):
                line = line.strip()
                if not line:
                    continue

                name, size, hits, _ = line.split("\t")
                name = self._mapping.get(name, name)
                if name not in self._contigs:
                    # Excluding contigs is allowed
                    continue

                row = {
                    "ID": name,
                    "Size": self._contigs[name]["Size"],
                    "Ns": self._contigs[name]["Ns"],
                    "Hits": hits,
                }

                handle.write("{ID}\t{Size}\t{Ns}\t{Hits}\n".format(**row))

        CommandNode._setup(self, config, temp)
Example #35
0
    def __init__(self,
                 contigs,
                 mapping,
                 input_file,
                 output_prefix,
                 dependencies=()):
        self._contigs = contigs
        self._mapping = dict(zip(mapping.values(), mapping))
        self._input_file = input_file

        script = rtools.rscript("zonkey", "coverage.r")
        cmd = AtomicCmd(
            ("Rscript", script, "%(TEMP_OUT_TABLE)s", "%(TEMP_OUT_PREFIX)s"),
            AUX_RSCRIPT=script,
            IN_FILE=input_file,
            TEMP_OUT_TABLE="contigs.table",
            OUT_PDF=output_prefix + ".pdf",
            OUT_PNG=output_prefix + ".png",
            TEMP_OUT_PREFIX=os.path.basename(output_prefix),
            CHECK_R=RSCRIPT_VERSION,
            CHECK_R_GGPLOT2=rtools.requirement("ggplot2"),
            set_cwd=True,
        )

        CommandNode.__init__(
            self,
            description="<CoveragePlot -> '%s.*'>" % (output_prefix, ),
            command=cmd,
            dependencies=dependencies,
        )
Example #36
0
    def _setup(self, config, temp):
        CommandNode._setup(self, config, temp)

        input_files = [
            self._input_file,
            fileutils.swap_ext(self._input_file, ".bim"),
            fileutils.swap_ext(self._input_file, ".fam"),
        ]

        for filename in input_files:
            basename = os.path.basename(filename)
            os.symlink(os.path.abspath(filename), os.path.join(temp, basename))

        if self._supervised:
            fam_filename = fileutils.swap_ext(self._input_file, ".fam")

            pop_filename = fileutils.swap_ext(fam_filename, ".pop")
            pop_filename = fileutils.reroot_path(temp, pop_filename)

            key = "Group(%i)" % (self._k_groups,)
            with open(fam_filename) as fam_handle:
                with open(pop_filename, "w") as pop_handle:
                    for line in fam_handle:
                        sample, _ = line.split(None, 1)
                        group = self._samples.get(sample, {}).get(key, "-")

                        pop_handle.write("%s\n" % (group,))
Example #37
0
def test_commandnode_run__exception_on_error():
    cmd_mock = _build_cmd_mock(return_codes=(1, ))
    node = CommandNode(cmd_mock)
    with pytest.raises(CmdNodeError):
        node._run(None, "xTMPx")

    assert cmd_mock.mock_calls == [call.run("xTMPx"), call.join()]
Example #38
0
    def __init__(self, input_file, output_prefix, order, samples,
                 dependencies=()):
        self._samples = samples
        self._order = tuple(order) + ("Sample",)

        script = rtools.rscript("zonkey", "admixture.r")

        cmd = AtomicCmd(("Rscript", script, "%(IN_FILE)s",
                         "%(TEMP_OUT_NAMES)s", "%(TEMP_OUT_PREFIX)s"),
                        AUX_RSCRIPT=script,
                        IN_FILE=input_file,
                        IN_SAMPLES=samples,
                        OUT_PDF=output_prefix + ".pdf",
                        OUT_PNG=output_prefix + ".png",
                        TEMP_OUT_NAMES="samples.txt",
                        TEMP_OUT_PREFIX=os.path.basename(output_prefix),
                        CHECK_R=RSCRIPT_VERSION,
                        CHECK_R_GGPLOT2=rtools.requirement("ggplot2"),
                        CHECK_R_RESHAPE2=rtools.requirement("reshape2"),
                        set_cwd=True)

        CommandNode.__init__(self,
                             description="<AdmixturePlot -> '%s.*'>"
                             % (output_prefix,),
                             command=cmd,
                             dependencies=dependencies)
Example #39
0
    def __init__(self, control_file, sequence_file, trees_file, output_tar,
                 exclude_groups=(), dependencies=()):
        self._exclude_groups = safe_coerce_to_frozenset(exclude_groups)
        self._control_file = control_file
        self._sequence_file = sequence_file
        self._trees_file = trees_file

        paml_cmd = AtomicCmd(["codeml", "template.ctl"],
                             IN_CONTROL_FILE  = control_file,
                             IN_SEQUENCE_FILE = sequence_file,
                             IN_TREES_FILE    = trees_file,
                             TEMP_OUT_CTL     = "template.ctl",
                             TEMP_OUT_SEQS    = "template.seqs",
                             TEMP_OUT_TREES   = "template.trees",
                             TEMP_OUT_STDOUT  = "template.stdout",
                             TEMP_OUT_STDERR  = "template.stderr",
                             TEMP_OUT_4FOLD   = "4fold.nuc",
                             IN_STDIN         = "/dev/null", # Prevent promts from blocking
                             set_cwd          = True,
                             **CodemlNode._get_codeml_files("TEMP_OUT_CODEML"))

        tar_pairs = CodemlNode._get_codeml_files("TEMP_IN_CODEML")
        tar_files = ["%%(%s)s" % (key,) for key in tar_pairs]
        tar_cmd  = AtomicCmd(["tar", "cvzf", "%(OUT_FILE)s"] + tar_files,
                             OUT_FILE = output_tar,
                             set_cwd  = True,
                             **tar_pairs)

        CommandNode.__init__(self,
                             description  = "<CodemlNode: %r -> %r>" % (sequence_file, output_tar),
                             command      = SequentialCmds([paml_cmd, tar_cmd]),
                             dependencies = dependencies)
Example #40
0
    def __init__(self,
                 output_root,
                 table,
                 bamfile,
                 downsample,
                 dependencies=()):
        cmd = factory.new("zonkey_tped")
        cmd.set_option("--name", "Sample")
        cmd.set_option("--downsample", downsample)
        cmd.add_value("%(TEMP_DIR)s")
        cmd.add_value("%(IN_TABLE)s")
        cmd.add_value("%(IN_BAM)s")

        if not downsample:
            # Needed for random access (chromosomes are read 1 ... 31)
            cmd.set_kwargs(IN_BAI=fileutils.swap_ext(bamfile, ".bai"))

        cmd.set_kwargs(OUT_TFAM=os.path.join(output_root, "common.tfam"),
                       OUT_SUMMARY=os.path.join(output_root, "common.summary"),
                       OUT_TPED_INCL_TS=os.path.join(output_root,
                                                     "incl_ts.tped"),
                       OUT_TPED_EXCL_TS=os.path.join(output_root,
                                                     "excl_ts.tped"),
                       IN_TABLE=table,
                       IN_BAM=bamfile)

        CommandNode.__init__(self,
                             description="<BuildTPEDFiles -> %r>" %
                             (os.path.join(output_root, '*'), ),
                             command=cmd.finalize(),
                             dependencies=dependencies)
Example #41
0
 def __init__(self, parameters):
     self._kwargs = parameters.command.kwargs
     CommandNode.__init__(self,
                          command      = parameters.command.finalize(),
                          description  = "<RAxMLReduce: '%s' -> '%s'>" \
                                  % (parameters.input_alignment, parameters.output_alignment),
                          dependencies = parameters.dependencies)
Example #42
0
    def __init__(self, samples, prefix, output_prefix, dependencies=()):
        abs_prefix = os.path.abspath(prefix)
        basename = os.path.basename(output_prefix)

        # TreeMix plots with migration edges
        cmd_1 = self._plot_command(prefix, "plot_tree", abs_prefix,
                                   "%(IN_SAMPLES)s", "%(TEMP_OUT_PREFIX)s",
                                   IN_SAMPLES=samples,
                                   TEMP_OUT_PREFIX=basename + "_tree",
                                   OUT_PDF=output_prefix + "_tree.pdf",
                                   OUT_PNG=output_prefix + "_tree.png")

        # Heatmap showing TreeMix residuals
        cmd_2 = self._plot_command(prefix, "plot_residuals", abs_prefix,
                                   "%(IN_SAMPLES)s", "%(TEMP_OUT_PREFIX)s",
                                   IN_SAMPLES=samples,
                                   TEMP_OUT_PREFIX=basename + "_residuals",
                                   OUT_PDF=output_prefix + "_residuals.pdf",
                                   OUT_PNG=output_prefix + "_residuals.png")

        # Text file containing % of variance explained by model
        cmd_3 = self._plot_command(prefix, "variance", abs_prefix,
                                   "%(OUT_TXT)s",
                                   OUT_TXT=output_prefix + "_variance.txt")

        CommandNode.__init__(self,
                             description="<PlotTreemix -> '%s.*'>"
                             % (output_prefix,),
                             command=SequentialCmds((cmd_1, cmd_2, cmd_3)),
                             dependencies=dependencies)
Example #43
0
 def _setup(self, config, temp):
     CommandNode._setup(self, config, temp)
     for fname in ("3pGtoA_freq.txt", "5pCtoT_freq.txt", "dnacomp.txt",
                   "misincorporation.txt"):
         relpath = os.path.join(self._directory, fname)
         abspath = os.path.abspath(relpath)
         os.symlink(abspath, os.path.join(temp, fname))
Example #44
0
    def __init__(self, samples, prefix, output_prefix, dependencies=()):
        abs_prefix = os.path.abspath(prefix)

        script = rtools.rscript("zonkey", "pca.r")
        call = ["Rscript", script,
                abs_prefix, "%(IN_SAMPLES)s", "%(TEMP_OUT_PREFIX)s"]

        cmd = AtomicCmd(call,
                        AUX_SCRIPT=script,
                        IN_FILE_EVAL=prefix + ".eval",
                        IN_FILE_EVEC=prefix + ".evec",
                        IN_SAMPLES=samples,
                        TEMP_OUT_PREFIX=os.path.basename(output_prefix),
                        OUT_PDF=output_prefix + ".pdf",
                        OUT_PNG=output_prefix + ".png",
                        CHECK_R=RSCRIPT_VERSION,
                        CHECK_R_GGPLOT2=rtools.requirement("ggplot2"),
                        CHECK_R_LABELS=rtools.requirement("directlabels"),
                        set_cwd=True)

        CommandNode.__init__(self,
                             description="<PlotPCA -> '%s.*'>"
                             % (output_prefix,),
                             command=cmd,
                             dependencies=dependencies)
Example #45
0
    def __init__(self,
                 input_file,
                 output_prefix,
                 threads=1,
                 options={},
                 dependencies=()):
        # See below for parameters in common between SE/PE
        cmd = _get_common_parameters(threads=threads, options=options)

        # Prefix for output files, ensure that all end up in temp folder
        cmd.set_option("--basename", "%(TEMP_OUT_BASENAME)s")

        output_tmpl = output_prefix + ".%s.gz"
        cmd.set_kwargs(
            TEMP_OUT_BASENAME=os.path.basename(output_prefix),
            OUT_SETTINGS=output_prefix + ".settings",
            OUT_MATE_1=output_tmpl % ("truncated", ),
            OUT_DISCARDED=output_tmpl % ("discarded", ),
        )

        cmd.set_option("--file1", "%(IN_READS_1)s")
        cmd.set_kwargs(IN_READS_1=input_file)

        apply_options(cmd, options)

        CommandNode.__init__(
            self,
            command=cmd.finalize(),
            threads=threads,
            description="<AdapterRM (SE): %s -> '%s.*'>" % (
                fileutils.describe_files(input_file),
                output_prefix,
            ),
            dependencies=dependencies,
        )
Example #46
0
 def __init__(self, parameters):
     command = parameters.command.finalize()
     description = "<BWA Index '%s' -> '%s.*'>" % (parameters.input_file,
                                                   parameters.prefix)
     CommandNode.__init__(self,
                          command=command,
                          description=description,
                          dependencies=parameters.dependencies)
Example #47
0
    def _teardown(self, config, temp):
        os.remove(os.path.join(temp, "RAxML_info.output"))

        source      = os.path.join(temp, "RAxML_parsimonyTree.output.0")
        destination = fileutils.reroot_path(temp, self._output_tree)
        fileutils.move_file(source, destination)

        CommandNode._teardown(self, config, temp)
Example #48
0
    def _teardown(self, config, temp):
        os.remove(os.path.join(temp, "RAxML_info.output"))

        source = os.path.join(temp, "RAxML_parsimonyTree.output.0")
        destination = fileutils.reroot_path(temp, self._output_tree)
        fileutils.move_file(source, destination)

        CommandNode._teardown(self, config, temp)
Example #49
0
    def _teardown(self, config, temp):
        os.remove(os.path.join(temp, self.PIPE_FILE))
        if self._index_format:
            os.remove(
                os.path.join(temp, swap_ext(self.PIPE_FILE,
                                            self._index_format)))

        CommandNode._teardown(self, config, temp)
Example #50
0
    def __init__(self, parameters):
        self._directory = parameters.directory

        description = "<mapDamage (model): %r>" % (parameters.directory, )
        CommandNode.__init__(self,
                             command=parameters.command.finalize(),
                             description=description,
                             dependencies=parameters.dependencies)
Example #51
0
    def _setup(self, config, temp):
        for key in ("IN_ALIGNMENT", "IN_PARTITION"):
            source      = os.path.abspath(self._kwargs[key])
            destination = os.path.join(temp, self._kwargs["TEMP_" + key])

            os.symlink(source, destination)

        CommandNode._setup(self, config, temp)
Example #52
0
    def __init__(self, parameters):
        self._directory = parameters.directory

        description = "<mapDamage (model): %r>" % (parameters.directory,)
        CommandNode.__init__(self,
                             command=parameters.command.finalize(),
                             description=description,
                             dependencies=parameters.dependencies)
Example #53
0
 def _teardown(self, config, temp):
     template   = self._output_template
     bootstraps = self._bootstrap_num
     start      = self._bootstrap_start
     for (src_file, dst_file) in self._bootstraps(template, bootstraps, start):
         src_file = os.path.join(temp, src_file)
         dst_file = fileutils.reroot_path(temp, dst_file)
         fileutils.move_file(src_file, dst_file)
     CommandNode._teardown(self, config, temp)
Example #54
0
    def _setup(self, config, temp):
        CommandNode._setup(self, config, temp)

        with open(self._tfam) as in_handle:
            samples = [line.split(None, 1)[0] for line in in_handle]

        with open(os.path.join(temp, "samples.clust"), "w") as handle:
            for sample in samples:
                handle.write("{0} {0} {0}\n".format(sample))
Example #55
0
    def _setup(self, config, temp):
        CommandNode._setup(self, config, temp)

        # Required to avoid the creation of files outside the temp folder
        for filename in self._symlinks:
            source      = os.path.abspath(filename)
            destination = os.path.join(temp, os.path.basename(filename))

            os.symlink(source, destination)
Example #56
0
    def _teardown(self, config, temp):
        with open(fileutils.reroot_path(temp, self._params_file), "w") as out:
            out.write("k: %i\n" % (self._param_k,))
            out.write("m: %i\n" % (self._param_m,))
            out.write("outgroup: %r\n" % (list(self._param_outgroup),))

        open(fileutils.reroot_path(temp, self._parameters_hash), "w").close()

        CommandNode._teardown(self, config, temp)
Example #57
0
    def __init__(self, parameters):
        self._symlinks = [os.path.abspath(parameters.input_alignment)]
        self._output_tree = os.path.basename(parameters.output_tree)


        CommandNode.__init__(self,
                             command      = parameters.command.finalize(),
                             description  = "<Parsimonator: '%s' -> '%s'>" \
                                 % (parameters.input_alignment, parameters.output_tree),
                             dependencies = parameters.dependencies)