Python safe_coerce_to_tuple 예제들, paleomix.common.utilities.safe_coerce_to_tuple Python 예제들

예제 #1

0

파일 보기

 def add_nodes(self, *nodes):
     for subnodes in safe_coerce_to_tuple(nodes):
         for node in safe_coerce_to_tuple(subnodes):
             if not isinstance(node, Node):
                 raise TypeError("Node object expected, recieved %s" %
                                 repr(node))
             self._nodes.append(node)

예제 #2

0

파일 보기

파일: pipeline.py 프로젝트: muslih14/paleomix

 def add_nodes(self, *nodes):
     for subnodes in safe_coerce_to_tuple(nodes):
         for node in safe_coerce_to_tuple(subnodes):
             if not isinstance(node, Node):
                 raise TypeError("Node object expected, recieved %s"
                                 % repr(node))
             self._nodes.append(node)

예제 #3

0

파일 보기

    def customize(cls,
                  input_file_1,
                  input_file_2,
                  output_file,
                  reference,
                  prefix,
                  threads=2,
                  log_file=None,
                  dependencies=()):

        # Setting IN_FILE_2 to None makes AtomicCmd ignore this key
        aln = _bowtie2_template(("bowtie2", ),
                                prefix,
                                OUT_STDOUT=AtomicCmd.PIPE,
                                CHECK_VERSION=BOWTIE2_VERSION)

        aln.set_option("-x", prefix)

        if log_file is not None:
            aln.set_kwargs(OUT_STDERR=log_file)

        if input_file_1 and not input_file_2:
            aln.add_multiple_options("-U",
                                     safe_coerce_to_tuple(input_file_1),
                                     template="IN_FILE_1_%02i")
        elif input_file_1 and input_file_2:
            aln.add_multiple_options("-1",
                                     safe_coerce_to_tuple(input_file_1),
                                     template="IN_FILE_1_%02i")
            aln.add_multiple_options("-2",
                                     safe_coerce_to_tuple(input_file_2),
                                     template="IN_FILE_2_%02i")
        else:
            raise NodeError("Input 1, OR both input 1 and input 2 must "
                            "be specified for Bowtie2 node")

        max_threads = _get_max_threads(reference, threads)
        aln.set_option("--threads", max_threads)

        run_fixmate = input_file_1 and input_file_2
        order, commands = _process_output(aln,
                                          output_file,
                                          reference,
                                          run_fixmate=run_fixmate)
        commands["aln"] = aln

        return {
            "commands": commands,
            "order": ["aln"] + order,
            "threads": max_threads,
            "dependencies": dependencies
        }

예제 #4

0

파일 보기

파일: newick.py 프로젝트: MikkelSchubert/paleomix

    def __init__(self, main_tree_files, support_tree_files, output_file, dependencies = ()):
        self._output_file        = output_file
        self._main_tree_files    = safe_coerce_to_tuple(main_tree_files)
        self._support_tree_files = safe_coerce_to_tuple(support_tree_files)
        input_files = self._main_tree_files + self._support_tree_files

        description  = "<NewickSupport: %s>" % \
          (describe_files(main_tree_files),)

        Node.__init__(self,
                      description  = description,
                      input_files  = input_files,
                      output_files = output_file,
                      dependencies = dependencies)

예제 #5

0

파일 보기

파일: gatk.py 프로젝트: jelber2/paleomix

    def __init__(self, config, reference, infiles, outfile,
                 threads=1, dependencies=()):
        threads = _get_max_threads(reference, threads)
        infiles = safe_coerce_to_tuple(infiles)
        jar_file = os.path.join(config.jar_root, "GenomeAnalysisTK.jar")
        command = AtomicJavaCmdBuilder(jar_file,
                                       jre_options=config.jre_options)
        command.set_option("-T", "RealignerTargetCreator")
        command.set_option("-R", "%(IN_REFERENCE)s")
        command.set_option("-o", "%(OUT_INTERVALS)s")
        command.set_option("-nt", threads)

        _set_input_files(command, infiles)
        command.set_kwargs(IN_REFERENCE=reference,
                           IN_REF_DICT=fileutils.swap_ext(reference, ".dict"),
                           OUT_INTERVALS=outfile,
                           CHECK_GATK=_get_gatk_version_check(config))

        description = "<GATK Indel Realigner (training): %s -> %r>" \
            % (describe_files(infiles), outfile)
        CommandNode.__init__(self,
                             threads=threads,
                             description=description,
                             command=command.finalize(),
                             dependencies=dependencies)

예제 #6

0

파일 보기

파일: library.py 프로젝트: muslih14/paleomix

    def __init__(self, config, target, prefix, lanes, name):
        self.name = name
        self.lanes = safe_coerce_to_tuple(lanes)
        self.options = lanes[0].options
        self.folder = os.path.dirname(self.lanes[0].folder)

        assert all((self.folder == os.path.dirname(lane.folder)) for lane in self.lanes)
        assert all((self.options == lane.options) for lane in self.lanes)

        lane_bams = self._collect_bams_by_type(self.lanes)

        pcr_duplicates = self.options["PCRDuplicates"]
        if pcr_duplicates:
            # pcr_duplicates may be "mark" or any trueish value
            lane_bams = self._remove_pcr_duplicates(config, prefix, lane_bams, pcr_duplicates)

        # At this point we no longer need to differentiate between types of reads
        files_and_nodes = self._collect_files_and_nodes(lane_bams)

        # Collect output bams, possible following rescaling
        self.bams, mapdamage_nodes \
            = self._build_mapdamage_nodes(config, target, prefix, files_and_nodes)

        nodes = [self._build_dataduplication_node(lane_bams)]
        nodes.extend(mapdamage_nodes)

        histogram_node = self._build_duphist_nodes(config, target, prefix, lane_bams)
        if histogram_node:
            nodes.append(histogram_node)

        self.nodes = tuple(nodes)

예제 #7

0

파일 보기

파일: picard.py 프로젝트: muslih14/paleomix

    def __init__(self, config, input_bams, pipename="input.bam", indexed=True):
        self.pipe = pipename
        self.indexed = indexed
        self.files = safe_coerce_to_tuple(input_bams)

        self.commands = []
        self.kwargs = {"TEMP_IN_BAM": self.pipe}
        if len(self.files) > 1:
            params = picard_command(config, "MergeSamFiles")

            params.set_option("SO", "coordinate", sep="=", fixed=False)
            params.set_option("CREATE_INDEX", "False", sep="=")
            params.set_option("COMPRESSION_LEVEL", 0, sep="=")
            params.set_option("OUTPUT", "%(TEMP_OUT_BAM)s", sep="=")
            params.add_multiple_options("I", input_bams, sep="=")

            params.set_kwargs(TEMP_OUT_BAM=self.pipe)

            self.commands = [params.finalize()]
        else:
            # Ensure that the actual command depends on the input
            self.kwargs["IN_FILE_00"] = self.files[0]

            if indexed:
                self.kwargs["IN_FILE_01"] = swap_ext(self.files[0], ".bai")

예제 #8

0

파일 보기

파일: newick.py 프로젝트: MikkelSchubert/paleomix

    def add_support(self, bootstraps, fmt = "{Support}"):
        """Adds support values to the current tree, based on a set of trees containing
        the same taxa. It is assumed that the support trees represent unrooted or
        arbitarily rooted trees, and no weight is given to the rooted topology of these
        trees.

        The main tree should itself be rooted, and the the toplogy and ordering of this
        tree is preserved, with node-names updated using the formatting string 'fmt'.

        Formatting is carried out using str.format, with these fields:
          {Support}    -- The total number of trees in which a clade is supported.
          {Percentage} -- The percentage of trees in which a clade is supported (float).
          {Fraction}   -- The fraction of trees in which a clade is supported (float).

        For example, typical percentage support-values can be realized by setting 'fmt'
        to the value "{Percentage:.0f}" to produce integer values.
        """
        clade_counts = {}
        leaf_names_lst = list(self.get_leaf_names())
        leaf_names = frozenset(leaf_names_lst)
        if len(leaf_names) != len(leaf_names_lst):
            raise NewickError("Cannot add support values to trees with duplicate leaf names")

        bootstraps   = safe_coerce_to_tuple(bootstraps)
        for support_tree in bootstraps:
            support_tree_names = frozenset(support_tree.get_leaf_names())
            if leaf_names != support_tree_names:
                raise NewickError("Support tree does not contain same set of leaf nodes")

            support_graph      = _NewickGraph(support_tree)
            for clade in support_graph.get_clade_names():
                clade_counts[clade] = clade_counts.get(clade, 0) + 1

        return self._add_support(self, len(bootstraps), clade_counts, fmt)

예제 #9

0

파일 보기

파일: gatk.py 프로젝트: jelber2/paleomix

    def __init__(self, config, reference, intervals, infiles, outfile,
                 dependencies=()):
        self._basename = os.path.basename(outfile)

        infiles = safe_coerce_to_tuple(infiles)
        jar_file = os.path.join(config.jar_root, "GenomeAnalysisTK.jar")
        command = AtomicJavaCmdBuilder(jar_file,
                                       jre_options=config.jre_options)
        command.set_option("-T", "IndelRealigner")
        command.set_option("-R", "%(IN_REFERENCE)s")
        command.set_option("-targetIntervals", "%(IN_INTERVALS)s")
        command.set_option("-o", "%(OUT_BAMFILE)s")
        command.set_option("--bam_compression", 0)
        command.set_option("--disable_bam_indexing")
        _set_input_files(command, infiles)

        command.set_kwargs(IN_REFERENCE=reference,
                           IN_REF_DICT=fileutils.swap_ext(reference, ".dict"),
                           IN_INTERVALS=intervals,
                           OUT_BAMFILE=outfile,
                           CHECK_GATK=_get_gatk_version_check(config))

        calmd = AtomicCmd(["samtools", "calmd", "-b",
                           "%(TEMP_IN_BAM)s", "%(IN_REF)s"],
                          TEMP_IN_BAM=self._basename,
                          IN_REF=reference,
                          TEMP_OUT_STDOUT=self._basename + ".calmd",
                          CHECK_VERSION=SAMTOOLS_VERSION)

        description = "<GATK Indel Realigner (aligning): %s -> %r>" \
            % (describe_files(infiles), outfile)
        CommandNode.__init__(self,
                             description=description,
                             command=ParallelCmds([command.finalize(), calmd]),
                             dependencies=dependencies)

예제 #10

0

파일 보기

파일: picard.py 프로젝트: MikkelSchubert/paleomix

    def __init__(self, config, input_bams, command, index_format=None,
                 description=None, threads=1, dependencies=()):
        self._input_bams = safe_coerce_to_tuple(input_bams)
        self._index_format = index_format

        if not self._input_bams:
            raise ValueError("No input BAM files specified!")
        elif len(self._input_bams) > 1 and index_format:
            raise ValueError("BAM index cannot be required for > 1 file")
        elif index_format not in (None, ".bai", ".csi"):
            raise ValueError("Unknown index format %r" % (index_format,))

        if len(self._input_bams) > 1:
            merge = picard_command(config, "MergeSamFiles")
            merge.set_option("SO", "coordinate", sep="=")
            merge.set_option("COMPRESSION_LEVEL", 0, sep="=")
            merge.set_option("OUTPUT", "%(TEMP_OUT_BAM)s", sep="=")
            # Validation is mostly left to manual ValidateSamFile runs; this
            # is because .csi indexed BAM records can have "invalid" bins.
            merge.set_option("VALIDATION_STRINGENCY", "LENIENT", sep="=")
            merge.add_multiple_options("I", input_bams, sep="=")

            merge.set_kwargs(TEMP_OUT_BAM=self.PIPE_FILE)

            command = ParallelCmds([merge.finalize(), command])

        CommandNode.__init__(self,
                             command=command,
                             description=description,
                             threads=threads,
                             dependencies=dependencies)

예제 #11

0

파일 보기

파일: formats.py 프로젝트: muslih14/paleomix

    def __init__(self, infiles, out_prefix, exclude_groups=(), reduce=False,
                 dependencies=(), file_dependencies=()):
        """
        infiles = {names : {"partitions" : ..., "filenames" : [...]}}
        """
        if not (isinstance(infiles, dict)
                and all(isinstance(dd, dict) for dd in infiles.values())):
            raise TypeError("'infiles' must be a dictionary of dictionaries")

        input_filenames = []
        for (name, subdd) in infiles.iteritems():
            if set(subdd) - _VALID_KEYS:
                raise ValueError("Invalid keys found for %r: %s"
                                 % (name, ", ".join(set(subdd) - _VALID_KEYS)))
            elif not isinstance(subdd["filenames"], list):
                raise ValueError("filenames must be a list of strings")
            input_filenames.extend(subdd["filenames"])
        # Optional file dependencies; used to depend on the list of sequcences
        input_filenames.extend(safe_coerce_to_tuple(file_dependencies))

        self._reduce = bool(reduce)
        self._infiles = copy.deepcopy(infiles)
        self._out_prefix = out_prefix
        self._excluded = safe_coerce_to_frozenset(exclude_groups)

        description = "<FastaToPartitionedPhy%s: %i file(s) -> '%s.*'>" % \
            (" (reducing)" if reduce else "", len(infiles), out_prefix)

        Node.__init__(self,
                      description=description,
                      input_files=input_filenames,
                      output_files=[out_prefix + ".phy",
                                    out_prefix + ".partitions"],
                      dependencies=dependencies)

예제 #12

0

파일 보기

파일: mapdamage.py 프로젝트: jelber2/paleomix

    def customize(self,
                  config,
                  reference,
                  input_files,
                  output_file,
                  directory,
                  dependencies=()):
        input_files = safe_coerce_to_tuple(input_files)

        stats_out_fname = "Stats_out_MCMC_correct_prob.csv"
        command = AtomicCmdBuilder([
            "mapDamage", "--rescale-only", "-i", "%(TEMP_IN_BAM)s", "-d",
            "%(TEMP_DIR)s", "-r", "%(IN_REFERENCE)s", "--rescale-out",
            "%(OUT_BAM)s"
        ],
                                   TEMP_IN_BAM=MultiBAMInputNode.PIPE_FILE,
                                   IN_REFERENCE=reference,
                                   TEMP_OUT_LOG="Runtime_log.txt",
                                   TEMP_OUT_CSV=stats_out_fname,
                                   OUT_BAM=output_file,
                                   CHECK_VERSION=MAPDAMAGE_VERSION)

        command.add_multiple_kwargs(input_files)

        return {
            "command": command,
            "config": config,
            "input_files": input_files,
            "directory": directory,
            "dependencies": dependencies
        }

예제 #13

0

파일 보기

파일: mapdamage.py 프로젝트: MikkelSchubert/paleomix

    def customize(self, config, reference, input_files, output_file, directory,
                  dependencies=()):
        input_files = safe_coerce_to_tuple(input_files)

        stats_out_fname = "Stats_out_MCMC_correct_prob.csv"
        command = AtomicCmdBuilder(["mapDamage", "--rescale-only",
                                    "-i", "%(TEMP_IN_BAM)s",
                                    "-d", "%(TEMP_DIR)s",
                                    "-r", "%(IN_REFERENCE)s",
                                    "--rescale-out", "%(OUT_BAM)s"],

                                   TEMP_IN_BAM=MultiBAMInputNode.PIPE_FILE,
                                   IN_REFERENCE=reference,
                                   TEMP_OUT_LOG="Runtime_log.txt",
                                   TEMP_OUT_CSV=stats_out_fname,
                                   OUT_BAM=output_file,
                                   CHECK_VERSION=MAPDAMAGE_VERSION)

        command.add_multiple_kwargs(input_files)

        return {"command": command,
                "config": config,
                "input_files": input_files,
                "directory": directory,
                "dependencies": dependencies}

예제 #14

0

파일 보기

    def __init__(self, config, target, prefix, lanes, name):
        self.name = name
        self.lanes = safe_coerce_to_tuple(lanes)
        self.options = lanes[0].options
        self.folder = os.path.dirname(os.path.dirname(self.lanes[0].folder))

        assert all(
            (self.folder == os.path.dirname(os.path.dirname(lane.folder)))
            for lane in self.lanes
        )
        assert all((self.options == lane.options) for lane in self.lanes)

        lane_bams = self._collect_bams_by_type(self.lanes)

        pcr_duplicates = self.options["PCRDuplicates"]
        if pcr_duplicates:
            # pcr_duplicates may be "mark" or any trueish value
            lane_bams = self._remove_pcr_duplicates(
                config, prefix, lane_bams, pcr_duplicates
            )

        # At this point we no longer need to differentiate between read types
        files_and_nodes = self._collect_files_and_nodes(lane_bams)

        # Collect output bams, possible following rescaling
        self.bams, mapdamage_nodes = self._build_mapdamage_nodes(
            config, target, prefix, files_and_nodes
        )

        nodes = [self._build_dataduplication_node(lane_bams)]
        nodes.extend(mapdamage_nodes)

        self.nodes = tuple(nodes)

예제 #15

0

파일 보기

파일: commands.py 프로젝트: jelber2/paleomix

    def __init__(self,
                 config,
                 input_bams,
                 output_bam,
                 keep_dupes=True,
                 dependencies=()):
        input_bams = safe_coerce_to_tuple(input_bams)

        builder = factory.new("rmdup_collapsed")
        builder.add_value("%(TEMP_IN_BAM)s")
        builder.set_kwargs(OUT_STDOUT=output_bam,
                           TEMP_IN_BAM=MultiBAMInputNode.PIPE_FILE)
        builder.add_multiple_kwargs(input_bams)

        if not keep_dupes:
            builder.set_option("--remove-duplicates")

        description = "<FilterCollapsedBAM: %s>" \
            % (describe_files(input_bams),)
        MultiBAMInputNode.__init__(self,
                                   config=config,
                                   input_bams=input_bams,
                                   command=builder.finalize(),
                                   description=description,
                                   dependencies=dependencies)

예제 #16

0

파일 보기

파일: prefix.py 프로젝트: muslih14/paleomix

    def __init__(self, config, prefix, samples, features, target):
        self.name      = prefix["Name"]
        self.label     = prefix.get("Label") or self.name
        self.reference = prefix["Reference"]
        self.roi       = prefix.get("RegionsOfInterest", {})

        self.samples = safe_coerce_to_tuple(samples)
        self.folder  = config.destination
        self.target  = target

        files_and_nodes = {}
        for sample in self.samples:
            files_and_nodes.update(sample.bams.iteritems())

        self.datadup_check = self._build_dataduplication_node(prefix, files_and_nodes)

        self.bams = {}
        if features["RawBAM"]:
            self.bams.update(self._build_raw_bam(config, prefix, files_and_nodes))
        if features["RealignedBAM"]:
            self.bams.update(self._build_realigned_bam(config, prefix, files_and_nodes))

        if not self.bams:
            for sample in self.samples:
                self.bams.update(sample.bams)

        nodes = []
        for sample in self.samples:
            nodes.extend(sample.nodes)
        self.nodes = tuple(nodes)

예제 #17

0

파일 보기

    def __init__(self, tree_files, output_file, taxa=(), dependencies=()):
        self._output_file = output_file
        self._tree_files = safe_coerce_to_tuple(tree_files)
        self._reroot_on_taxa = safe_coerce_to_tuple(taxa)

        reroot_on = "midpoint"
        if self._reroot_on_taxa:
            reroot_on = repr("', '".join(sorted(self._reroot_on_taxa)))

        description  = "<NewickReroot (on %s): %s>" % \
          (reroot_on, describe_files(tree_files),)

        Node.__init__(self,
                      description=description,
                      input_files=self._tree_files,
                      output_files=self._output_file,
                      dependencies=dependencies)

예제 #18

0

파일 보기

파일: newick.py 프로젝트: MikkelSchubert/paleomix

    def __init__(self, tree_files, output_file, taxa = (), dependencies = ()):
        self._output_file    = output_file
        self._tree_files     = safe_coerce_to_tuple(tree_files)
        self._reroot_on_taxa = safe_coerce_to_tuple(taxa)

        reroot_on = "midpoint"
        if self._reroot_on_taxa:
            reroot_on = repr("', '".join(sorted(self._reroot_on_taxa)))

        description  = "<NewickReroot (on %s): %s>" % \
          (reroot_on, describe_files(tree_files),)

        Node.__init__(self,
                      description  = description,
                      input_files  = self._tree_files,
                      output_files = self._output_file,
                      dependencies = dependencies)

예제 #19

0

파일 보기

파일: sets.py 프로젝트: MikkelSchubert/paleomix

    def __init__(self, commands):
        self._ready = False

        commands = safe_coerce_to_tuple(commands)
        for command in commands:
            if not isinstance(command, (AtomicCmd, _CommandSet)):
                raise CmdError("ParallelCmds must only contain AtomicCmds or other ParallelCmds!")
        _CommandSet.__init__(self, commands)

예제 #20

0

파일 보기

파일: fileutils.py 프로젝트: MikkelSchubert/paleomix

def _validate_filenames(filenames):
    """Sanity checks for filenames handled by
    'describe_files' and 'describe_paired_files."""
    filenames = safe_coerce_to_tuple(filenames)
    for filename in filenames:
        if not isinstance(filename, types.StringTypes):
            raise ValueError("Only string types are allowed for filenames, not %s" \
                             % (filename.__class__.__name__,))
    return filenames

예제 #21

0

파일 보기

파일: target.py 프로젝트: muslih14/paleomix

    def __init__(self, config, prefixes, name):
        self.name = name
        self.prefixes = safe_coerce_to_tuple(prefixes)

        self.bams = {}
        self.nodes = []
        for prefix in self.prefixes:
            self.nodes.extend(prefix.nodes)
            self.bams.update(prefix.bams.iteritems())

예제 #22

0

파일 보기

    def __init__(self, config, prefixes, name):
        self.name = name
        self.prefixes = safe_coerce_to_tuple(prefixes)

        self.bams = {}
        self.nodes = []
        for prefix in self.prefixes:
            self.nodes.extend(prefix.nodes)
            self.bams.update(prefix.bams.iteritems())

예제 #23

0

파일 보기

def _validate_filenames(filenames):
    """Sanity checks for filenames handled by
    'describe_files' and 'describe_paired_files."""
    filenames = safe_coerce_to_tuple(filenames)
    for filename in filenames:
        if not isinstance(filename, types.StringTypes):
            raise ValueError("Only string types are allowed for filenames, not %s" \
                             % (filename.__class__.__name__,))
    return filenames

예제 #24

0

파일 보기

    def __init__(self,
                 main_tree_files,
                 support_tree_files,
                 output_file,
                 dependencies=()):
        self._output_file = output_file
        self._main_tree_files = safe_coerce_to_tuple(main_tree_files)
        self._support_tree_files = safe_coerce_to_tuple(support_tree_files)
        input_files = self._main_tree_files + self._support_tree_files

        description  = "<NewickSupport: %s>" % \
          (describe_files(main_tree_files),)

        Node.__init__(self,
                      description=description,
                      input_files=input_files,
                      output_files=output_file,
                      dependencies=dependencies)

예제 #25

0

파일 보기

파일: sets.py 프로젝트: tmancill/paleomix

    def __init__(self, commands):
        self._ready = False

        commands = safe_coerce_to_tuple(commands)
        for command in commands:
            if not isinstance(command, (AtomicCmd, _CommandSet)):
                raise CmdError(
                    "ParallelCmds must only contain AtomicCmds or other ParallelCmds!"
                )
        _CommandSet.__init__(self, commands)

예제 #26

0

파일 보기

 def __init__(self, call, search, checks, name=None, priority=0):
     """See function 'Requrement' for a description of parameters.
     """
     self._call = safe_coerce_to_tuple(call)
     self._done = None
     self.name = str(name or self._call[0])
     self.priority = int(priority)
     self.checks = checks
     self._rege = re.compile(search)
     self._version = None

예제 #27

0

파일 보기

파일: versions.py 프로젝트: MikkelSchubert/paleomix

 def __init__(self, call, search, checks, name=None, priority=0):
     """See function 'Requrement' for a description of parameters.
     """
     self._call = safe_coerce_to_tuple(call)
     self._done = None
     self.name = str(name or self._call[0])
     self.priority = int(priority)
     self.checks = checks
     self._rege = re.compile(search)
     self._version = None

예제 #28

0

파일 보기

    def __init__(self, call, **kwargs):
        """See AtomiCmd.__init__ for parameters / keyword arguments.
        """
        self._call = safe_coerce_to_tuple(call)
        self._options = []
        self._values = []
        self._kwargs = {}
        self._object = None

        self.set_kwargs(**kwargs)

예제 #29

0

파일 보기

파일: builder.py 프로젝트: muslih14/paleomix

    def __init__(self, call, **kwargs):
        """See AtomiCmd.__init__ for parameters / keyword arguments.
        """
        self._call = safe_coerce_to_tuple(call)
        self._options = []
        self._values = []
        self._kwargs = {}
        self._object = None

        self.set_kwargs(**kwargs)

예제 #30

0

파일 보기

    def __init__(self, config, prefix, libraries, name):
        self.name = name
        self.libraries = safe_coerce_to_tuple(libraries)
        self.folder = os.path.dirname(self.libraries[0].folder)

        self.bams = {}
        for library in self.libraries:
            self.bams.update(library.bams.items())

        nodes = []
        for library in self.libraries:
            nodes.extend(library.nodes)
        self.nodes = tuple(nodes)

예제 #31

0

파일 보기

파일: builder.py 프로젝트: muslih14/paleomix

    def __init__(self, call, threads = 1, **kwargs):
        if not isinstance(threads, (types.IntType, types.LongType)):
            raise TypeError("'threads' must be an integer value, not %r" % threads.__class__.__name__)
        elif threads < 1:
            raise ValueError("'threads' must be 1 or greater, not %i" % threads)
        elif threads == 1:
            AtomicCmdBuilder.__init__(self, call, EXEC_MPI = "mpirun", **kwargs)
        else:
            call = safe_coerce_to_tuple(call)
            mpi_call = ["mpirun", "-n", threads]
            mpi_call.extend(call)

            AtomicCmdBuilder.__init__(self, mpi_call, EXEC_MAIN = call[0], **kwargs)

예제 #32

0

파일 보기

파일: sample.py 프로젝트: MikkelSchubert/paleomix

    def __init__(self, config, prefix, libraries, name):
        self.name = name
        self.libraries = safe_coerce_to_tuple(libraries)
        self.folder = os.path.dirname(self.libraries[0].folder)

        self.bams = {}
        for library in self.libraries:
            self.bams.update(library.bams.iteritems())

        nodes = []
        for library in self.libraries:
            nodes.extend(library.nodes)
        self.nodes = tuple(nodes)

예제 #33

0

파일 보기

파일: prefix.py 프로젝트: jelber2/paleomix

    def __init__(self, config, prefix, samples, features, target):
        self.name = prefix["Name"]
        self.label = prefix.get("Label") or self.name
        self.roi = prefix.get("RegionsOfInterest", {})

        self.samples = safe_coerce_to_tuple(samples)
        self.folder = config.destination
        self.target = target

        files_and_nodes = {}
        for sample in self.samples:
            files_and_nodes.update(sample.bams.iteritems())

        self.datadup_check = self._build_dataduplication_node(
            prefix, files_and_nodes)

        build_raw_bam = features["RawBAM"]
        build_realigned_bam = features["RealignedBAM"]
        if build_realigned_bam and prefix['IndexFormat'] == '.csi':
            if prefix['Path'] not in _CSI_WARNINGS:
                ui.print_err("\nWARNING: Realigned BAMs enabled for reference "
                             "genome %r, but the file contains sequences too "
                             "large for GATK, which does not support .csi "
                             "index files. Raw BAMs will be built instead of "
                             "realigned BAMs, for this reference sequence." %
                             (prefix['Path']))

                # TODO: Add reference to FAQ when written.

            _CSI_WARNINGS.add(prefix['Path'])
            build_realigned_bam = False
            build_raw_bam = True

        self.bams = {}
        if build_raw_bam:
            self.bams.update(
                self._build_raw_bam(config, prefix, files_and_nodes))

        if build_realigned_bam:
            self.bams.update(
                self._build_realigned_bam(config, prefix, files_and_nodes))

        if not self.bams:
            for sample in self.samples:
                self.bams.update(sample.bams)

        nodes = [self.datadup_check]
        for sample in self.samples:
            nodes.extend(sample.nodes)
        self.nodes = tuple(nodes)

예제 #34

0

파일 보기

파일: prefix.py 프로젝트: MikkelSchubert/paleomix

    def __init__(self, config, prefix, samples, features, target):
        self.name = prefix["Name"]
        self.label = prefix.get("Label") or self.name
        self.roi = prefix.get("RegionsOfInterest", {})

        self.samples = safe_coerce_to_tuple(samples)
        self.folder = config.destination
        self.target = target

        files_and_nodes = {}
        for sample in self.samples:
            files_and_nodes.update(sample.bams.iteritems())

        self.datadup_check = self._build_dataduplication_node(
            prefix, files_and_nodes)

        build_raw_bam = features["RawBAM"]
        build_realigned_bam = features["RealignedBAM"]
        if build_realigned_bam and prefix['IndexFormat'] == '.csi':
            if prefix['Path'] not in _CSI_WARNINGS:
                ui.print_err("\nWARNING: Realigned BAMs enabled for reference "
                             "genome %r, but the file contains sequences too "
                             "large for GATK, which does not support .csi "
                             "index files. Raw BAMs will be built instead of "
                             "realigned BAMs, for this reference sequence."
                             % (prefix['Path']))

                # TODO: Add reference to FAQ when written.

            _CSI_WARNINGS.add(prefix['Path'])
            build_realigned_bam = False
            build_raw_bam = True

        self.bams = {}
        if build_raw_bam:
            self.bams.update(self._build_raw_bam(
                config, prefix, files_and_nodes))

        if build_realigned_bam:
            self.bams.update(self._build_realigned_bam(
                config, prefix, files_and_nodes))

        if not self.bams:
            for sample in self.samples:
                self.bams.update(sample.bams)

        nodes = [self.datadup_check]
        for sample in self.samples:
            nodes.extend(sample.nodes)
        self.nodes = tuple(nodes)

예제 #35

0

파일 보기

파일: mapdamage.py 프로젝트: jelber2/paleomix

    def customize(self,
                  config,
                  reference,
                  input_files,
                  output_directory,
                  title="mapDamage",
                  dependencies=()):
        input_files = safe_coerce_to_tuple(input_files)

        command = AtomicCmdBuilder(
            [
                "mapDamage",
                "--no-stats",
                # Prevent references with many contigs from using excessive
                # amounts of memory, at the cost of per-contig statistics:
                "--merge-reference-sequences",
                "-t",
                title,
                "-i",
                "%(TEMP_IN_BAM)s",
                "-d",
                "%(TEMP_DIR)s",
                "-r",
                "%(IN_REFERENCE)s"
            ],
            TEMP_IN_BAM=MultiBAMInputNode.PIPE_FILE,
            IN_REFERENCE=reference,
            OUT_FREQ_3p=os.path.join(output_directory, "3pGtoA_freq.txt"),
            OUT_FREQ_5p=os.path.join(output_directory, "5pCtoT_freq.txt"),
            OUT_COMP_USER=os.path.join(output_directory, "dnacomp.txt"),
            OUT_PLOT_FRAG=os.path.join(output_directory,
                                       "Fragmisincorporation_plot.pdf"),
            OUT_PLOT_LEN=os.path.join(output_directory, "Length_plot.pdf"),
            OUT_LENGTH=os.path.join(output_directory, "lgdistribution.txt"),
            OUT_MISINCORP=os.path.join(output_directory,
                                       "misincorporation.txt"),
            OUT_LOG=os.path.join(output_directory, "Runtime_log.txt"),
            TEMP_OUT_STDOUT="pipe_mapDamage.stdout",
            TEMP_OUT_STDERR="pipe_mapDamage.stderr",
            CHECK_RSCRIPT=RSCRIPT_VERSION,
            CHECK_MAPDAMAGE=MAPDAMAGE_VERSION)

        command.add_multiple_kwargs(input_files)

        return {
            "command": command,
            "config": config,
            "input_files": input_files,
            "dependencies": dependencies
        }

예제 #36

0

파일 보기

파일: bowtie2.py 프로젝트: MikkelSchubert/paleomix

    def customize(cls, input_file_1, input_file_2, output_file, reference,
                  prefix, threads=2, log_file=None, dependencies=()):

        # Setting IN_FILE_2 to None makes AtomicCmd ignore this key
        aln = _bowtie2_template(("bowtie2",), prefix,
                                OUT_STDOUT=AtomicCmd.PIPE,
                                CHECK_VERSION=BOWTIE2_VERSION)

        aln.set_option("-x", prefix)

        if log_file is not None:
            aln.set_kwargs(OUT_STDERR=log_file)

        if input_file_1 and not input_file_2:
            aln.add_multiple_options("-U", safe_coerce_to_tuple(input_file_1),
                                     template="IN_FILE_1_%02i")
        elif input_file_1 and input_file_2:
            aln.add_multiple_options("-1", safe_coerce_to_tuple(input_file_1),
                                     template="IN_FILE_1_%02i")
            aln.add_multiple_options("-2", safe_coerce_to_tuple(input_file_2),
                                     template="IN_FILE_2_%02i")
        else:
            raise NodeError("Input 1, OR both input 1 and input 2 must "
                            "be specified for Bowtie2 node")

        max_threads = _get_max_threads(reference, threads)
        aln.set_option("--threads", max_threads)

        run_fixmate = input_file_1 and input_file_2
        order, commands = _process_output(aln, output_file, reference,
                                          run_fixmate=run_fixmate)
        commands["aln"] = aln

        return {"commands": commands,
                "order": ["aln"] + order,
                "threads": max_threads,
                "dependencies": dependencies}

예제 #37

0

파일 보기

파일: misc.py 프로젝트: muslih14/paleomix

    def __init__(self, description, destination, source_nodes):
        source_nodes = safe_coerce_to_tuple(source_nodes)

        input_files  = []
        for source_node in source_nodes:
            input_files.extend(source_node.output_files)

        output_files = [reroot_path(destination, fpath) for fpath in input_files]
        self._files  = zip(input_files, output_files)

        Node.__init__(self,
                      description  = "<Copy %s output to %r>" % (description, destination),
                      input_files  = input_files,
                      output_files = output_files,
                      dependencies = source_nodes)

예제 #38

0

파일 보기

    def __init__(self, call, threads=1, **kwargs):
        if not isinstance(threads, (types.IntType, types.LongType)):
            raise TypeError("'threads' must be an integer value, not %r"
                            % threads.__class__.__name__)
        elif threads < 1:
            raise ValueError("'threads' must be 1 or greater, not %i"
                             % threads)
        elif threads == 1:
            AtomicCmdBuilder.__init__(self, call, EXEC_MPI="mpirun", **kwargs)
        else:
            call = safe_coerce_to_tuple(call)
            mpi_call = ["mpirun", "-n", threads]
            mpi_call.extend(call)

            AtomicCmdBuilder.__init__(
                self, mpi_call, EXEC_MAIN=call[0], **kwargs)

예제 #39

0

파일 보기

파일: commands.py 프로젝트: jelber2/paleomix

    def __init__(self, config, input_files, output_file, dependencies=()):
        input_files = safe_coerce_to_tuple(input_files)

        builder = factory.new("duphist")
        builder.add_value('%(TEMP_IN_BAM)s')
        builder.set_kwargs(OUT_STDOUT=output_file,
                           TEMP_IN_BAM=MultiBAMInputNode.PIPE_FILE)
        builder.add_multiple_kwargs(input_files)

        description = "<DuplicateHistogram: %s -> %r>" \
            % (describe_files(input_files), output_file)
        MultiBAMInputNode.__init__(self,
                                   config=config,
                                   input_bams=input_files,
                                   command=builder.finalize(),
                                   description=description,
                                   dependencies=dependencies)

예제 #40

0

파일 보기

파일: versions.py 프로젝트: MikkelSchubert/paleomix

def Requirement(call, search, checks, name=None, priority=0):
    # Ignore function naming scheme
    # pylint: disable=C0103
    """Returns a singleton Requirement object, based on the parameters,
    which may be used to check that version requirements are met for a
    given program/utility/module, etc.

    Parameters:
      call   -- A string, or a tuple containing strings for a system call,
                or a tuple containing a function at the first position, and
                a set of positional parameters. In the case of system calls,
                stdout and stderr are returned as a single string, in the case
                of a function call, the return value is expected to be a str.
      search -- A regular expression (string or re object), used to search
                the output of the "call". Groups are assumed to represent
                version numbers.
      checks -- A callable that implements the interface described in the
                Check class.
      name   -- Descriptive name for the executable/module/etc. If not
                specified, first value in 'call' will be used; if multiple
                otherwise identical checks are made, the last name that
                does not equal the first value of 'call' will be used.
      priority -- Order in which requirements are checked; if multiple
                  otherwise identical checks are made with different priority,
                  the highest priority takes precedence.

    Implementation detail: To reduce the need for performing calls or system-
    calls multiple times, caches are implemented using the call object as keys.
    Thus the same calls should be passed in a manner which allow equality
    between the same calls to be established.
    """
    call = safe_coerce_to_tuple(call)
    key = (call, search, checks)

    try:
        requirement = _REQUIREMENT_CACHE[key]

        # Highest priority takes precedence
        requirement.priority = max(requirement.priority, priority)
        # Last explicitly specified name takes precedence
        requirement.name = name or requirement.name
    except KeyError:
        requirement = RequirementObj(*key, name=name, priority=priority)
        _REQUIREMENT_CACHE[key] = requirement

    return requirement

예제 #41

0

파일 보기

def Requirement(call, search, checks, name=None, priority=0):
    # Ignore function naming scheme
    # pylint: disable=C0103
    """Returns a singleton Requirement object, based on the parameters,
    which may be used to check that version requirements are met for a
    given program/utility/module, etc.

    Parameters:
      call   -- A string, or a tuple containing strings for a system call,
                or a tuple containing a function at the first position, and
                a set of positional parameters. In the case of system calls,
                stdout and stderr are returned as a single string, in the case
                of a function call, the return value is expected to be a str.
      search -- A regular expression (string or re object), used to search
                the output of the "call". Groups are assumed to represent
                version numbers.
      checks -- A callable that implements the interface described in the
                Check class.
      name   -- Descriptive name for the executable/module/etc. If not
                specified, first value in 'call' will be used; if multiple
                otherwise identical checks are made, the last name that
                does not equal the first value of 'call' will be used.
      priority -- Order in which requirements are checked; if multiple
                  otherwise identical checks are made with different priority,
                  the highest priority takes precedence.

    Implementation detail: To reduce the need for performing calls or system-
    calls multiple times, caches are implemented using the call object as keys.
    Thus the same calls should be passed in a manner which allow equality
    between the same calls to be established.
    """
    call = safe_coerce_to_tuple(call)
    key = (call, search, checks)

    try:
        requirement = _REQUIREMENT_CACHE[key]

        # Highest priority takes precedence
        requirement.priority = max(requirement.priority, priority)
        # Last explicitly specified name takes precedence
        requirement.name = name or requirement.name
    except KeyError:
        requirement = RequirementObj(*key, name=name, priority=priority)
        _REQUIREMENT_CACHE[key] = requirement

    return requirement

예제 #42

0

파일 보기

파일: formats.py 프로젝트: tmancill/paleomix

    def __init__(
            self,
            infiles,
            out_prefix,
            exclude_groups=(),
            reduce=False,
            dependencies=(),
            file_dependencies=(),
    ):
        """
        infiles = {names : {"partitions" : ..., "filenames" : [...]}}
        """
        if not (isinstance(infiles, dict)
                and all(isinstance(dd, dict) for dd in infiles.values())):
            raise TypeError("'infiles' must be a dictionary of dictionaries")

        input_filenames = []
        for (name, subdd) in infiles.items():
            if set(subdd) - _VALID_KEYS:
                raise ValueError("Invalid keys found for %r: %s" %
                                 (name, ", ".join(set(subdd) - _VALID_KEYS)))
            elif not isinstance(subdd["filenames"], list):
                raise ValueError("filenames must be a list of strings")
            input_filenames.extend(subdd["filenames"])
        # Optional file dependencies; used to depend on the list of sequcences
        input_filenames.extend(safe_coerce_to_tuple(file_dependencies))

        self._reduce = bool(reduce)
        self._infiles = copy.deepcopy(infiles)
        self._out_prefix = out_prefix
        self._excluded = safe_coerce_to_frozenset(exclude_groups)

        description = "<FastaToPartitionedPhy%s: %i file(s) -> '%s.*'>" % (
            " (reducing)" if reduce else "",
            len(infiles),
            out_prefix,
        )

        Node.__init__(
            self,
            description=description,
            input_files=input_filenames,
            output_files=[out_prefix + ".phy", out_prefix + ".partitions"],
            dependencies=dependencies,
        )

예제 #43

0

파일 보기

    def __init__(self, description, destination, source_nodes):
        source_nodes = safe_coerce_to_tuple(source_nodes)

        input_files = []
        for source_node in source_nodes:
            input_files.extend(source_node.output_files)

        output_files = [
            reroot_path(destination, fpath) for fpath in input_files
        ]
        self._files = zip(input_files, output_files)

        Node.__init__(self,
                      description="<Copy %s output to %r>" %
                      (description, destination),
                      input_files=input_files,
                      output_files=output_files,
                      dependencies=source_nodes)

예제 #44

0

파일 보기

    def __init__(self, config, prefix, samples, features, target):
        self.name = prefix["Name"]
        self.roi = prefix.get("RegionsOfInterest", {})

        self.samples = safe_coerce_to_tuple(samples)
        self.folder = config.destination
        self.target = target

        files_and_nodes = {}
        for sample in self.samples:
            files_and_nodes.update(sample.bams.items())

        self.datadup_check = self._build_dataduplication_node(
            prefix, files_and_nodes)
        self.bams = self._build_bam(config, prefix, files_and_nodes)

        nodes = [self.datadup_check]
        for sample in self.samples:
            nodes.extend(sample.nodes)
        self.nodes = tuple(nodes)

예제 #45

0

파일 보기

파일: mapdamage.py 프로젝트: MikkelSchubert/paleomix

    def customize(self, config, reference, input_files, output_directory,
                  title="mapDamage", dependencies=()):
        input_files = safe_coerce_to_tuple(input_files)

        command = AtomicCmdBuilder(
            ["mapDamage", "--no-stats",
             # Prevent references with many contigs from using excessive
             # amounts of memory, at the cost of per-contig statistics:
             "--merge-reference-sequences",
             "-t", title,
             "-i", "%(TEMP_IN_BAM)s",
             "-d", "%(TEMP_DIR)s",
             "-r", "%(IN_REFERENCE)s"],

            TEMP_IN_BAM=MultiBAMInputNode.PIPE_FILE,
            IN_REFERENCE=reference,
            OUT_FREQ_3p=os.path.join(output_directory, "3pGtoA_freq.txt"),
            OUT_FREQ_5p=os.path.join(output_directory, "5pCtoT_freq.txt"),
            OUT_COMP_USER=os.path.join(output_directory, "dnacomp.txt"),
            OUT_PLOT_FRAG=os.path.join(output_directory,
                                       "Fragmisincorporation_plot.pdf"),
            OUT_PLOT_LEN=os.path.join(output_directory, "Length_plot.pdf"),
            OUT_LENGTH=os.path.join(output_directory, "lgdistribution.txt"),
            OUT_MISINCORP=os.path.join(output_directory,
                                       "misincorporation.txt"),
            OUT_LOG=os.path.join(output_directory, "Runtime_log.txt"),
            TEMP_OUT_STDOUT="pipe_mapDamage.stdout",
            TEMP_OUT_STDERR="pipe_mapDamage.stderr",

            CHECK_RSCRIPT=RSCRIPT_VERSION,
            CHECK_MAPDAMAGE=MAPDAMAGE_VERSION)

        command.add_multiple_kwargs(input_files)

        return {"command": command,
                "config": config,
                "input_files": input_files,
                "dependencies": dependencies}

예제 #46

0

파일 보기

파일: newick.py 프로젝트: tmancill/paleomix

    def add_support(self, bootstraps, fmt="{Support}"):
        """Adds support values to the current tree, based on a set of trees containing
        the same taxa. It is assumed that the support trees represent unrooted or
        arbitarily rooted trees, and no weight is given to the rooted topology of these
        trees.

        The main tree should itself be rooted, and the the toplogy and ordering of this
        tree is preserved, with node-names updated using the formatting string 'fmt'.

        Formatting is carried out using str.format, with these fields:
          {Support}    -- The total number of trees in which a clade is supported.
          {Percentage} -- The percentage of trees in which a clade is supported (float).
          {Fraction}   -- The fraction of trees in which a clade is supported (float).

        For example, typical percentage support-values can be realized by setting 'fmt'
        to the value "{Percentage:.0f}" to produce integer values.
        """
        clade_counts = {}
        leaf_names_lst = list(self.get_leaf_names())
        leaf_names = frozenset(leaf_names_lst)
        if len(leaf_names) != len(leaf_names_lst):
            raise NewickError(
                "Cannot add support values to trees with duplicate leaf names"
            )

        bootstraps = safe_coerce_to_tuple(bootstraps)
        for support_tree in bootstraps:
            support_tree_names = frozenset(support_tree.get_leaf_names())
            if leaf_names != support_tree_names:
                raise NewickError(
                    "Support tree does not contain same set of leaf nodes"
                )

            support_graph = _NewickGraph(support_tree)
            for clade in support_graph.get_clade_names():
                clade_counts[clade] = clade_counts.get(clade, 0) + 1

        return self._add_support(self, len(bootstraps), clade_counts, fmt)

예제 #47

0

파일 보기

    def __init__(self,
                 config,
                 input_bams,
                 command,
                 index_format=None,
                 description=None,
                 threads=1,
                 dependencies=()):
        self._input_bams = safe_coerce_to_tuple(input_bams)
        self._index_format = index_format

        if not self._input_bams:
            raise ValueError("No input BAM files specified!")
        elif len(self._input_bams) > 1 and index_format:
            raise ValueError("BAM index cannot be required for > 1 file")
        elif index_format not in (None, ".bai", ".csi"):
            raise ValueError("Unknown index format %r" % (index_format, ))

        if len(self._input_bams) > 1:
            merge = picard_command(config, "MergeSamFiles")
            merge.set_option("SO", "coordinate", sep="=")
            merge.set_option("COMPRESSION_LEVEL", 0, sep="=")
            merge.set_option("OUTPUT", "%(TEMP_OUT_BAM)s", sep="=")
            # Validation is mostly left to manual ValidateSamFile runs; this
            # is because .csi indexed BAM records can have "invalid" bins.
            merge.set_option("VALIDATION_STRINGENCY", "LENIENT", sep="=")
            merge.add_multiple_options("I", input_bams, sep="=")

            merge.set_kwargs(TEMP_OUT_BAM=self.PIPE_FILE)

            command = ParallelCmds([merge.finalize(), command])

        CommandNode.__init__(self,
                             command=command,
                             description=description,
                             threads=threads,
                             dependencies=dependencies)

예제 #48

0

파일 보기

파일: commands.py 프로젝트: jelber2/paleomix

    def __init__(self,
                 config,
                 target_name,
                 input_files,
                 output_file,
                 prefix,
                 regions_file=None,
                 dependencies=()):
        input_files = safe_coerce_to_tuple(input_files)
        index_format = regions_file and prefix['IndexFormat']

        builder = factory.new("depths")
        builder.add_value("%(TEMP_IN_BAM)s")
        builder.add_value("%(OUT_FILE)s")
        builder.set_option("--target-name", target_name)
        builder.set_kwargs(OUT_FILE=output_file,
                           TEMP_IN_BAM=MultiBAMInputNode.PIPE_FILE)
        builder.add_multiple_kwargs(input_files)

        if regions_file:
            index_file = swap_ext(MultiBAMInputNode.PIPE_FILE, index_format)

            builder.set_option('--regions-file', '%(IN_REGIONS)s')
            builder.set_kwargs(IN_REGIONS=regions_file,
                               TEMP_IN_INDEX=index_file)

        description = "<DepthHistogram: %s -> '%s'>" \
            % (describe_files(input_files), output_file)

        MultiBAMInputNode.__init__(self,
                                   config=config,
                                   input_bams=input_files,
                                   index_format=index_format,
                                   command=builder.finalize(),
                                   description=description,
                                   dependencies=dependencies)

예제 #49

0

파일 보기

파일: utilities_test.py 프로젝트: jelber2/paleomix

def test_safe_coerce_to_tuple__list():
    assert_equal(utils.safe_coerce_to_tuple([1, 3, 2]), (1, 3, 2))

예제 #50

0

파일 보기

파일: command.py 프로젝트: muslih14/paleomix

    def __init__(self, command, set_cwd=False, **kwargs):
        """Takes a command and a set of files.

        The command is expected to be an iterable starting with the name of an
        executable, with each item representing one string on the command line.
        Thus, the command "find /etc -name 'profile*'" might be represented as
        the list ["find", "/etc", "-name", "profile*"].

        Commands typically consist of an executable, one or more input files,
        one or more output files, and one or more pipes. In atomic command,
        such files are not specified directly, but instead are specified using
        keywords, which allows easy tracking of requirements and other
        features. Note that only files, and not directories, are supported as
        input/output!

        Each keyword represents a type of file, as determined by the prefix:
           IN_    -- Path to input file transformed/analysed the executable.
           OUT_   -- Path to output file generated by the executable. During
                     execution of the AtomicCmd, these paths are modified to
                     point to the temporary directory.
           EXEC_  -- Name of / path to executable. The first item in the
                     command is always one of the executables, even if not
                     specified in this manner.
           AUX_   -- Auxillery files required by the executable(s), which are
                     themselves not executable. Examples include scripts,
                     config files, data-bases, and the like.
           CHECK_ -- A callable, which upon calling does version checking,
                     raising an exception in the case of requirements not being
                     met. This may be used to ensure that prerequisites are met
                     before running the command. The function is not called by
                     AtomicCmd itself.

        EXAMPLE 1: Creating a gzipped tar-archive from two files
        The command "tar cjf output-file input-file-1 input-file-2" could be
        represented using the following AtomicCmd:
        cmd = AtomicCmd(["tar", "cjf", "%(OUT_FILE)s",
                         "%(IN_FILE_1)s", "%(IN_FILE_2)s"],
                        OUT_FILE  = "output-file",
                        IN_FILE_1 = "input-file-1",
                        IN_FILE_2 = "input-file-2")

        Note that files that are not directly invoked may be included above,
        in order to allow the specification of requirements. This could include
        required data files, or executables indirectly executed by a script.

        If the above is prefixed with "TEMP_", files are read from / written
        to the temporary folder in which the command is executed. Note that all
        TEMP_OUT_ files are deleted when commit is called (if they exist), and
        only filenames (not dirname component) are allowed for TEMP_ values.

        In addition, the follow special names may be used with the above:
           STDIN_  -- Takes a filename, or an AtomicCmd, in which case stdout
                      of that command is piped to the stdin of this instance.
           STDOUT_ -- Takes a filename, or the special value PIPE to allow
                      another AtomicCmd instance to use the output directly.
           STDERR_ -- Takes a filename.

        Each pipe can only be used once, with or without the TEMP_ prefix.

        EXAMPLE 2: zcat'ing an archive
        The command "zcat input-file > output-file" could be represented using
        the following AtomicCmd:
        cmd = AtomicCmd(["zcat", "%(IN_FILE)s"],
                        OUT_STDOUT = "output-file")

        If 'set_cwd' is True, the current working directory is set to the
        temporary directory before the command is executed. Input paths are
        automatically turned into absolute paths in this case."""
        self._proc = None
        self._temp = None
        self._running = False
        self._command = map(str, safe_coerce_to_tuple(command))
        self._set_cwd = set_cwd
        if not self._command or not self._command[0]:
            raise ValueError("Empty command in AtomicCmd constructor")

        arguments = self._process_arguments(id(self), self._command, kwargs)
        self._files = self._build_files_dict(arguments)
        self._file_sets = self._build_files_map(self._command, arguments)

        # Dry-run, to catch errors early
        self._generate_call("/tmp")

예제 #51

0

파일 보기

파일: utilities_test.py 프로젝트: jelber2/paleomix

def test_safe_coerce_to_tuple__dict():
    assert_equal(utils.safe_coerce_to_tuple({1: 2, 3: 4}), ({1: 2, 3: 4}, ))

예제 #52

0

파일 보기

파일: utilities_test.py 프로젝트: jelber2/paleomix

def test_safe_coerce_to_tuple__iterable():
    assert_equal(utils.safe_coerce_to_tuple(xrange(3)), (0, 1, 2))

예제 #53

0

파일 보기

파일: utilities_test.py 프로젝트: jelber2/paleomix

def test_safe_coerce_to_tuple__tuple():
    assert_equal(utils.safe_coerce_to_tuple((1, 3, 2)), (1, 3, 2))

예제 #54

0

파일 보기

파일: sets.py 프로젝트: MikkelSchubert/paleomix

    def __init__(self, commands):
        self._commands = safe_coerce_to_tuple(commands)
        if not self._commands:
            raise CmdError("Empty list passed to command set")

        self._validate_commands()