Ejemplo n.º 1
1
    def _teardown(self, config, temp):
        # Picard creates a folder named after the user in the temp-root
        try_rmtree(os.path.join(temp, getpass.getuser()))
        # Some JREs may create a folder for temporary performance counters
        try_rmtree(os.path.join(temp, "hsperfdata_" + getpass.getuser()))

        CommandNode._teardown(self, config, temp)
Ejemplo n.º 2
0
    def _setup(self, config, temp):
        CommandNode._setup(self, config, temp)

        # The temp folder may contain old files:
        # Remove old pipes to prevent failure at _teardown
        for pipe_fname in glob.glob(os.path.join(temp, "pipe*")):
            fileutils.try_remove(pipe_fname)
        # ExaML refuses to overwrite old info files
        fileutils.try_remove(os.path.join(temp, "ExaML_info.Pypeline"))

        # Resume from last checkpoint, if one such was generated
        checkpoints = glob.glob(os.path.join(temp,
                                "ExaML_binaryCheckpoint.Pypeline_*"))
        if not checkpoints:
            return

        cache = FileStatusCache()
        if cache.files_up_to_date(checkpoints, self.input_files):
            checkpoints.sort(key=lambda fname: int(fname.rsplit("_", 1)[-1]))

            # FIXME: Less hacky solution to modifying AtomicCmds needed
            self._command._command.append("-R")
            self._command._command.append(checkpoints[-1])
        else:
            for fpath in checkpoints:
                fileutils.try_remove(fpath)
Ejemplo n.º 3
0
 def __init__(self, parameters):
     description = "<MarkDuplicates: %s>" % (describe_files(
         parameters.input_bams), )
     CommandNode.__init__(self,
                          command=parameters.command.finalize(),
                          description=description,
                          dependencies=parameters.dependencies)
Ejemplo n.º 4
0
 def __init__(self, parameters):
     description =  "<Merge BAMs: %i file(s) -> '%s'>" \
         % (len(parameters.input_bams), parameters.output_bam)
     CommandNode.__init__(self,
                          command=parameters.command.finalize(),
                          description=description,
                          dependencies=parameters.dependencies)
Ejemplo n.º 5
0
    def __init__(self, config, reference, input_files, output_file, dependencies):
        cat_cmds, cat_obj = concatenate_input_bams(config, input_files)
        cmd_map = AtomicCmd(["mapDamage",
                            "-n", _MAPDAMAGE_MAX_READS,
                             "-i", "-",
                             "-d", "%(TEMP_DIR)s",
                             "-r", reference],
                            IN_STDIN        = cat_obj,
                            CHECK_VERSION   = MAPDAMAGE_VERSION)
        train_cmds = ParallelCmds(cat_cmds + [cmd_map])

        cat_cmds, cat_obj = concatenate_input_bams(config, input_files)
        cmd_scale = AtomicCmd(["mapDamage", "--rescale-only",
                               "-n", _MAPDAMAGE_MAX_READS,
                               "-i", "-",
                               "-d", "%(TEMP_DIR)s",
                               "-r", reference,
                               "--rescale-out", "%(OUT_BAM)s"],
                               IN_STDIN        = cat_obj,
                               OUT_BAM         = output_file,
                               CHECK_VERSION   = MAPDAMAGE_VERSION)
        rescale_cmds = ParallelCmds(cat_cmds + [cmd_scale])

        description =  "<mapDamageRescale: %i file(s) -> '%s'>" % (len(input_files), output_file)
        CommandNode.__init__(self,
                             command      = SequentialCmds([train_cmds, rescale_cmds]),
                             description  = description,
                             dependencies = dependencies)
Ejemplo n.º 6
0
    def __init__(self, control_file, sequence_file, trees_file, output_prefix, dependencies = ()):
        self._control_file  = control_file
        self._sequence_file = sequence_file
        self._trees_file    = trees_file
        self._output_prefix = output_prefix

        command = AtomicCmd(["codeml", "template.ctl"],
                            IN_CONTROL_FILE  = control_file,
                            IN_SEQUENCE_FILE = sequence_file,
                            IN_TREES_FILE    = trees_file,
                            TEMP_OUT_CTL     = "template.ctl",
                            TEMP_OUT_SEQS    = "template.seqs",
                            TEMP_OUT_TREES   = "template.trees",
                            TEMP_OUT_STDOUT  = "template.stdout",
                            TEMP_OUT_STDERR  = "template.stderr",
                            OUT_CODEML       = output_prefix + ".codeml",
                            TEMP_OUT_2NG_DN  = "2NG.dN",
                            TEMP_OUT_2NG_DS  = "2NG.dS",
                            TEMP_OUT_2NG_T   = "2NG.t",
                            TEMP_OUT_4FOLD   = "4fold.nuc",
                            TEMP_OUT_LNF     = "lnf",
                            TEMP_OUT_RST     = "rst",
                            TEMP_OUT_RST1    = "rst1",
                            TEMP_OUT_RUB     = "rub",
                            IN_STDIN         = "/dev/null", # Prevent promts from blocking
                            set_cwd          = True)

        CommandNode.__init__(self,
                             description  = "<CodemlNode: '%s' -> '%s.*'>" % (sequence_file, output_prefix),
                             command      = command,
                             dependencies = dependencies)
Ejemplo n.º 7
0
    def __init__(self, infile, outfile, genome, from_start = 0, from_end = 0, strand_relative = False, dependencies = ()):
        if type(from_start) != type(from_end):
            raise ValueError("'from_start' and 'from_end' should be of same type!")

        call = ["bedtools", "slop",
                "-i", "%(IN_FILE)s",
                "-g", "%(IN_GENOME)s",
                "-l", str(from_start),
                "-r", str(from_end)]

        if strand_relative:
            call.append("-s")
        if type(from_start) is float:
            call.append("-pct")

        command = AtomicCmd(call,
                            IN_FILE    = infile,
                            IN_GENOME  = genome,
                            OUT_STDOUT = outfile,
                            CHECK_VERSION = BEDTOOLS_VERSION)

        description = "<SlopBed: '%s' -> '%s'>" % (infile, outfile)

        CommandNode.__init__(self,
                             description  = description,
                             command      = command,
                             dependencies = dependencies)
Ejemplo n.º 8
0
    def __init__(self, control_file, sequence_file, trees_file, output_tar, exclude_groups = (), dependencies = ()):
        self._exclude_groups = safe_coerce_to_frozenset(exclude_groups)
        self._control_file   = control_file
        self._sequence_file  = sequence_file
        self._trees_file     = trees_file

        paml_cmd = AtomicCmd(["codeml", "template.ctl"],
                             IN_CONTROL_FILE  = control_file,
                             IN_SEQUENCE_FILE = sequence_file,
                             IN_TREES_FILE    = trees_file,
                             TEMP_OUT_CTL     = "template.ctl",
                             TEMP_OUT_SEQS    = "template.seqs",
                             TEMP_OUT_TREES   = "template.trees",
                             TEMP_OUT_STDOUT  = "template.stdout",
                             TEMP_OUT_STDERR  = "template.stderr",
                             TEMP_OUT_4FOLD   = "4fold.nuc",
                             IN_STDIN         = "/dev/null", # Prevent promts from blocking
                             set_cwd          = True,
                             **CodemlNode._get_codeml_files("TEMP_OUT_CODEML"))

        tar_pairs = CodemlNode._get_codeml_files("TEMP_IN_CODEML")
        tar_files = ["%%(%s)s" % (key,) for key in tar_pairs]
        tar_cmd  = AtomicCmd(["tar", "cvzf", "%(OUT_FILE)s"] + tar_files,
                             OUT_FILE = output_tar,
                             set_cwd  = True,
                             **tar_pairs)

        CommandNode.__init__(self,
                             description  = "<CodemlNode: %r -> %r>" % (sequence_file, output_tar),
                             command      = SequentialCmds([paml_cmd, tar_cmd]),
                             dependencies = dependencies)
Ejemplo n.º 9
0
    def __init__(self, config, reference, intervals, infiles, outfile, dependencies = ()):
        self._basename = os.path.basename(outfile)

        infiles  = safe_coerce_to_tuple(infiles)
        jar_file = os.path.join(config.jar_root, "GenomeAnalysisTK.jar")
        command  = AtomicJavaCmdBuilder(config, jar_file)
        command.set_option("-T", "IndelRealigner")
        command.set_option("-R", "%(IN_REFERENCE)s")
        command.set_option("-targetIntervals", "%(IN_INTERVALS)s")
        command.set_option("-o", "%(OUT_BAMFILE)s")
        command.set_option("--bam_compression", 0)
        command.set_option("--disable_bam_indexing")
        _set_input_files(command, infiles)

        command.set_kwargs(IN_REFERENCE = reference,
                           IN_REF_DICT  = fileutils.swap_ext(reference, ".dict"),
                           IN_INTERVALS = intervals,
                           OUT_BAMFILE  = outfile)

        calmd   = AtomicCmd(["samtools", "calmd", "-b", "%(TEMP_IN_BAM)s", "%(IN_REF)s"],
                            TEMP_IN_BAM     = self._basename,
                            IN_REF          = reference,
                            TEMP_OUT_STDOUT = self._basename + ".calmd")

        description = "<Indel Realign: %i file(s) -> '%s'>" \
            % (len(infiles), outfile)

        CommandNode.__init__(self,
                             description  = description,
                             command      = ParallelCmds([command.finalize(),
                                                          calmd]),
                             dependencies = dependencies)
Ejemplo n.º 10
0
    def __init__(self, config, reference, input_bam, output_bam, tags, min_mapq = 0, dependencies = ()):
        flt = AtomicCmd(["samtools", "view", "-bu", "-F0x4", "-q%i" % min_mapq, "%(IN_BAM)s"],
                        IN_BAM  = input_bam,
                        OUT_STDOUT = AtomicCmd.PIPE)

        jar_file = os.path.join(config.jar_root, "AddOrReplaceReadGroups.jar")
        params = AtomicJavaCmdBuilder(config, jar_file)
        params.set_option("INPUT", "/dev/stdin", sep = "=")
        params.set_option("OUTPUT", "/dev/stdout", sep = "=")
        params.set_option("QUIET", "true", sep = "=")
        params.set_option("COMPRESSION_LEVEL", "0", sep = "=")

        for (tag, value) in sorted(tags.iteritems()):
            if tag not in ("PG", "Target", "PU_src", "PU_cur"):
                params.set_option(tag, value, sep = "=")
            elif tag == "PU_src":
                params.set_option("PU", value, sep = "=")

        params.set_kwargs(IN_STDIN   = flt,
                         OUT_STDOUT = AtomicCmd.PIPE)
        annotate = params.finalize()

        calmd = AtomicCmd(["samtools", "calmd", "-b", "-", "%(IN_REF)s"],
                          IN_REF   = reference,
                          IN_STDIN = annotate,
                          OUT_STDOUT = output_bam)

        description =  "<Cleanup BAM: %s -> '%s'>" \
            % (input_bam, output_bam)
        CommandNode.__init__(self,
                             command      = ParallelCmds([flt, annotate, calmd]),
                             description  = description,
                             dependencies = dependencies)
Ejemplo n.º 11
0
    def _setup(self, config, temp):
        """See CommandNode._setup."""
        infile = os.path.abspath(self._infile)
        outfile = reroot_path(temp, self._infile)
        os.symlink(infile, outfile)

        CommandNode._setup(self, config, temp)
Ejemplo n.º 12
0
 def __init__(self, parameters):
     self._kwargs = parameters.command.kwargs
     CommandNode.__init__(self,
                          command      = parameters.command.finalize(),
                          description  = "<RAxMLReduce: '%s' -> '%s'>" \
                                  % (parameters.input_alignment, parameters.output_alignment),
                          dependencies = parameters.dependencies)
Ejemplo n.º 13
0
    def __init__(self, parameters):
        commands = [parameters.commands[key].finalize() for key in ("cat", "filter", "bgzip")]

        description = "<VCFFilter: '%s' -> '%s'>" % (parameters.infile, parameters.outfile)
        CommandNode.__init__(
            self, description=description, command=ParallelCmds(commands), dependencies=parameters.dependencies
        )
Ejemplo n.º 14
0
 def __init__(self, parameters):
     description =  "<Merge BAMs: %i file(s) -> '%s'>" \
         % (len(parameters.input_bams), parameters.output_bam)
     CommandNode.__init__(self,
                          command      = parameters.command.finalize(),
                          description  = description,
                          dependencies = parameters.dependencies)
Ejemplo n.º 15
0
 def _setup(self, config, temp):
     CommandNode._setup(self, config, temp)
     for fname in ("3pGtoA_freq.txt", "5pCtoT_freq.txt", "dnacomp.txt",
                   "misincorporation.txt"):
         relpath = os.path.join(self._directory, fname)
         abspath = os.path.abspath(relpath)
         os.symlink(abspath, os.path.join(temp, fname))
Ejemplo n.º 16
0
    def __init__(self, config, d_make, bedn, mappa, unique, dependencies=()):
        inbedfile = d_make.bedfiles[bedn]
        basename, extension = os.path.splitext(os.path.basename(inbedfile))
        bname = "{}_MappaOnly{}".format(basename, extension)
        dest = os.path.join(config.temp_local, bname)

        d_make.bedfiles[bedn] = dest

        call1 = [
            "python",
            os.path.join(PREFIX, "intersectmappabed.py"), "%(IN_BED)s",
            "%(IN_MAPPA)s",
            str(unique), "%(OUT_DEST)s"
        ]

        cmd = AtomicCmd(call1,
                        IN_BED=inbedfile,
                        IN_MAPPA=mappa,
                        OUT_DEST=dest,
                        CHECK_VERSION=PYTHON_VERSION)

        description = ("<CLEANBEDFILES: '%s' -> '%s', Uniqueness: '%s'>" %
                       (inbedfile, dest, unique))
        CommandNode.__init__(self,
                             description=description,
                             command=cmd,
                             dependencies=dependencies)
Ejemplo n.º 17
0
 def __init__(self, parameters):
     commands = [parameters.commands[cmd].finalize() for cmd in ('fastq_dump',)]
     description = "<Mapping Pipeline>"
     CommandNode.__init__(self,
                          description  = description,
                          command      = ParallelCmds(commands),
                          dependencies = parameters.dependencies)
Ejemplo n.º 18
0
    def _setup(self, config, temp):
        check_fastq_files(self.input_files, self._quality_offset)

        os.mkfifo(os.path.join(temp, self._basename + ".truncated"))
        os.mkfifo(os.path.join(temp, self._basename + ".discarded"))
        os.mkfifo(os.path.join(temp, "uncompressed_input"))

        CommandNode._setup(self, config, temp)
Ejemplo n.º 19
0
 def __init__(self, parameters):
     commands = [parameters.commands[key].finalize() for key in ("unicat", "pileup")]
     description = "<VCFPileup: '%s' -> '%s'>" % (parameters.in_bam,
                                                  parameters.outfile)
     CommandNode.__init__(self,
                          description  = description,
                          command      = ParallelCmds(commands),
                          dependencies = parameters.dependencies)
Ejemplo n.º 20
0
 def __init__(self, parameters):
     command = parameters.command.finalize()
     description = "<BuildRegions: '%s' -> '%s'>" % (parameters.infile,
                                                     parameters.outfile)
     CommandNode.__init__(self,
                          description  = description,
                          command      = command,
                          dependencies = parameters.dependencies)
Ejemplo n.º 21
0
    def __init__(self, parameters):
        self._directory = parameters.directory

        description = "<mapDamage (model): %r>" % (parameters.directory,)
        CommandNode.__init__(self,
                             command=parameters.command.finalize(),
                             description=description,
                             dependencies=parameters.dependencies)
Ejemplo n.º 22
0
 def __init__(self, parameters):
     command = parameters.command.finalize()
     description = "<BuildRegions: '%s' -> '%s'>" % (parameters.infile,
                                                     parameters.outfile)
     CommandNode.__init__(self,
                          description=description,
                          command=command,
                          dependencies=parameters.dependencies)
Ejemplo n.º 23
0
    def _setup(self, config, temp):
        for key in ("IN_ALIGNMENT", "IN_PARTITION"):
            source      = self._kwargs[key]
            destination = os.path.join(temp, self._kwargs["TEMP_" + key])

            fileutils.copy_file(source, destination)

        CommandNode._setup(self, config, temp)
Ejemplo n.º 24
0
    def _setup(self, config, temp):
        for key in ("IN_ALIGNMENT", "IN_PARTITION"):
            source = os.path.abspath(self._kwargs[key])
            destination = os.path.join(temp, self._kwargs["TEMP_" + key])

            os.symlink(source, destination)

        CommandNode._setup(self, config, temp)
Ejemplo n.º 25
0
    def _teardown(self, config, temp):
        os.remove(os.path.join(temp, "RAxML_info.output"))

        source      = os.path.join(temp, "RAxML_parsimonyTree.output.0")
        destination = fileutils.reroot_path(temp, self._output_tree)
        fileutils.move_file(source, destination)

        CommandNode._teardown(self, config, temp)
Ejemplo n.º 26
0
    def _setup(self, config, temp):
        for key in ("IN_ALIGNMENT", "IN_PARTITION"):
            source      = os.path.abspath(self._kwargs[key])
            destination = os.path.join(temp, self._kwargs["TEMP_" + key])

            os.symlink(source, destination)

        CommandNode._setup(self, config, temp)
Ejemplo n.º 27
0
 def __init__(self, parameters):
     command = parameters.command.finalize()
     description =  "<Bowtie2 Index '%s' -> '%s.*'>" % (parameters.input_file,
                                                    parameters.prefix)
     CommandNode.__init__(self,
                          command      = command,
                          description  = description,
                          dependencies = parameters.dependencies)
Ejemplo n.º 28
0
    def _setup(self, config, temp):
        CommandNode._setup(self, config, temp)

        # Required to avoid the creation of files outside the temp folder
        for filename in self._symlinks:
            source      = os.path.abspath(filename)
            destination = os.path.join(temp, os.path.basename(filename))

            os.symlink(source, destination)
Ejemplo n.º 29
0
 def __init__(self, parameters):
     _check_bwa_prefix(parameters.prefix)
     command = ParallelCmds([parameters.commands[key].finalize() for key in parameters.order])
     description =  "<PE_BWA (%i threads): '%s'>" % (parameters.threads, parameters.input_file_1)
     CommandNode.__init__(self,
                          command      = command,
                          description  = description,
                          threads      = parameters.threads,
                          dependencies = parameters.dependencies)
Ejemplo n.º 30
0
 def _teardown(self, config, temp):
     template   = self._output_template
     bootstraps = self._bootstrap_num
     start      = self._bootstrap_start
     for (src_file, dst_file) in self._bootstraps(template, bootstraps, start):
         src_file = os.path.join(temp, src_file)
         dst_file = fileutils.reroot_path(temp, dst_file)
         fileutils.move_file(src_file, dst_file)
     CommandNode._teardown(self, config, temp)
Ejemplo n.º 31
0
    def __init__(self, parameters):
        self._symlinks = [os.path.abspath(parameters.input_alignment)]
        self._output_tree = os.path.basename(parameters.output_tree)


        CommandNode.__init__(self,
                             command      = parameters.command.finalize(),
                             description  = "<Parsimonator: '%s' -> '%s'>" \
                                 % (parameters.input_alignment, parameters.output_tree),
                             dependencies = parameters.dependencies)
Ejemplo n.º 32
0
    def __init__(self, infile, dependencies = ()):
        cmd_index = AtomicCmd(["samtools", "index", "%(IN_BAM)s", "%(OUT_BAI)s"],
                              IN_BAM      = infile,
                              OUT_BAI     = swap_ext(infile, ".bai"),
                              CHECK_SAM   = SAMTOOLS_VERSION)

        CommandNode.__init__(self,
                             description  = "<BAMIndex: '%s'>" % (infile,),
                             command      = cmd_index,
                             dependencies = dependencies)
Ejemplo n.º 33
0
 def _run(self, config, temp):
     try:
         CommandNode._run(self, config, temp)
     except NodeError, error:
         if self._command.join() == [1, None]:
             with open(fileutils.reroot_path(temp, "template.stdout")) as handle:
                 lines = handle.readlines()
             if lines and ("Giving up." in lines[-1]):
                 error = NodeError("%s\n\n%s" % (error, lines[-1]))
         raise error
Ejemplo n.º 34
0
    def _teardown(self, config, temp):
        for filename in os.listdir(temp):
            match = re.match("RAxML_(.*).Pypeline", filename)
            if match:
                source      = os.path.join(temp, filename)
                destination = os.path.join(temp, self._template % match.groups())

                fileutils.move_file(source, destination)

        CommandNode._teardown(self, config, temp)
Ejemplo n.º 35
0
    def __init__(self, parameters):
        self._input_alignment  = parameters.input_alignment
        self._input_partitions = parameters.input_partitions
        self._output_tree      = parameters.output_tree

        CommandNode.__init__(self,
                             command      = parameters.command.finalize(),
                             description  = "<RAxMLParsimonyTree: '%s' -> '%s'>" \
                                     % (parameters.input_alignment, parameters.output_tree),
                             dependencies = parameters.dependencies)