Esempio n. 1
0
    def __init__(self, config, dependencies=()):
        self._root = config.destination
        self._data = config.database
        self._samples = config.samples
        self._sample_keys = self._samples.keys()

        input_files = set()
        self._reports = {}
        for sample, info in self._samples.iteritems():
            report = AnalysisReport(config=config,
                                    root=os.path.join(self._root, sample),
                                    has_nuc="Nuc" in info["Files"],
                                    has_mt="Mito" in info["Files"])

            input_files.update(report.input_files())
            self._reports[sample] = report

        output_prefix = os.path.join(self._root, "summary")
        Node.__init__(self,
                      description="<SummaryReport -> %r>"
                      % (output_prefix + '.html',),
                      input_files=input_files,
                      output_files=(output_prefix + '.html',
                                    output_prefix + '.css'),
                      dependencies=dependencies)
Esempio n. 2
0
    def __init__(self, replicates, output_root, dependencies=()):
        replicates = tuple(replicates)
        if not replicates:
            raise ValueError("No replicates passed to SelectBestAdmixture")

        input_files = []
        ref_filenames = None
        for node in replicates:
            filenames = frozenset(
                os.path.basename(filename) for filename in node.output_files)

            if ref_filenames is None:
                ref_filenames = filenames
            elif ref_filenames != filenames:
                raise RuntimeError("Node %r does not contain expected files, "
                                   "%r, vs %r" %
                                   (node, ref_filenames, filenames))

            input_files.extend(node.output_files)

        output_files = [
            os.path.join(output_root, filename) for filename in ref_filenames
        ]

        self._ref_filenames = ref_filenames
        self._files = tuple(node.output_files for node in replicates)
        self._output_root = output_root

        Node.__init__(self,
                      description="<SelectBestAdmixture -> %r>" %
                      (output_root, ),
                      input_files=input_files,
                      output_files=output_files,
                      dependencies=tuple(dependencies) + tuple(replicates))
Esempio n. 3
0
    def __init__(self, fasta_files, sequences, destination, dependencies=()):
        """
        fasta_files -- { taxon_name_1 : filename_1, ... }
        sequences   -- { interval_name_1, ... }
        """

        self._infiles = copy.deepcopy(fasta_files)
        self._sequences = utilities.safe_coerce_to_frozenset(sequences)
        self._destination = copy.copy(destination)
        self._outfiles = [
            os.path.join(destination, name + ".fasta") for name in self._sequences
        ]

        input_files = list(self._infiles.values())
        for filename in self._infiles.values():
            input_files.append(filename + ".fai")

        desc = "<CollectSequences: %i sequences from %i files -> '%s'>" % (
            len(self._sequences),
            len(self._infiles),
            self._destination,
        )
        Node.__init__(
            self,
            description=desc,
            input_files=input_files,
            output_files=self._outfiles,
            dependencies=dependencies,
        )
Esempio n. 4
0
 def __init__(self, input_files, output_file, dependencies=()):
     Node.__init__(self,
                   description="<Detect Input Duplication: %s>"
                   % (describe_files(input_files)),
                   input_files=input_files,
                   output_files=output_file,
                   dependencies=dependencies)
Esempio n. 5
0
    def __init__(self,
                 infiles,
                 out_partitions,
                 partition_by="123",
                 dependencies=()):
        if (len(partition_by) != 3):
            raise ValueError("Default 'partition_by' must be 3 entires long!")
        elif not isinstance(infiles, dict):
            raise TypeError("'infiles' must be a dictionary")
        elif any(
                len(dd.get("partition_by", "123")) != 3
                for dd in infiles.itervalues()):
            raise ValueError("'partition_by' must be 3 entires long!")
        elif not all(isinstance(dd, dict) for dd in infiles.values()):
            raise TypeError("'infiles' must be a dictionary of dictionaries")
        elif not any(("name" in dd) for dd in infiles.values()):
            raise ValueError("'name' must be specified for all input files")
        elif any((set(dd) - _VALID_KEYS) for dd in infiles.values()):
            raise ValueError("Invalid keys found: %s" %
                             ", ".join(set(dd) - _VALID_KEYS))

        self._infiles = infiles
        self._out_part = out_partitions
        self._part_by = partition_by

        description  = "<FastaToPartitions (default: %s): %i file(s) -> '%s'>" % \
            (partition_by, len(infiles), out_partitions)

        Node.__init__(self,
                      description=description,
                      input_files=infiles.keys(),
                      output_files=out_partitions,
                      dependencies=dependencies)
Esempio n. 6
0
    def __init__(self, replicates, output_root, dependencies=()):
        replicates = tuple(replicates)
        if not replicates:
            raise ValueError("No replicates passed to SelectBestAdmixture")

        input_files = []
        ref_filenames = None
        for node in replicates:
            filenames = frozenset(os.path.basename(filename)
                                  for filename in node.output_files)

            if ref_filenames is None:
                ref_filenames = filenames
            elif ref_filenames != filenames:
                raise RuntimeError("Node %r does not contain expected files, "
                                   "%r, vs %r" % (node, ref_filenames,
                                                  filenames))

            input_files.extend(node.output_files)

        output_files = [os.path.join(output_root, filename)
                        for filename in ref_filenames]

        self._ref_filenames = ref_filenames
        self._files = tuple(node.output_files for node in replicates)
        self._output_root = output_root

        Node.__init__(self,
                      description="<SelectBestAdmixture -> %r>"
                      % (output_root,),
                      input_files=input_files,
                      output_files=output_files,
                      dependencies=tuple(dependencies) + tuple(replicates))
Esempio n. 7
0
 def __init__(self, input_files, output_file, dependencies=()):
     Node.__init__(self,
                   description="<Detect Input Duplication: %s>" %
                   (describe_files(input_files)),
                   input_files=input_files,
                   output_files=output_file,
                   dependencies=dependencies)
Esempio n. 8
0
    def __init__(self, infiles, out_partitions, partition_by = "123", dependencies = ()):
        if (len(partition_by) != 3):
            raise ValueError("Default 'partition_by' must be 3 entires long!")
        elif not isinstance(infiles, dict):
            raise TypeError("'infiles' must be a dictionary")
        elif any(len(dd.get("partition_by", "123")) != 3 for dd in infiles.itervalues()):
            raise ValueError("'partition_by' must be 3 entires long!")
        elif not all(isinstance(dd, dict) for dd in infiles.values()):
            raise TypeError("'infiles' must be a dictionary of dictionaries")
        elif not any(("name" in dd) for dd in infiles.values()):
            raise ValueError("'name' must be specified for all input files")
        elif any((set(dd) - _VALID_KEYS) for dd in infiles.values()):
            raise ValueError("Invalid keys found: %s" % ", ".join(set(dd) - _VALID_KEYS))

        self._infiles   = infiles
        self._out_part  = out_partitions
        self._part_by   = partition_by

        description  = "<FastaToPartitions (default: %s): %i file(s) -> '%s'>" % \
            (partition_by, len(infiles), out_partitions)

        Node.__init__(self,
                      description  = description,
                      input_files  = infiles.keys(),
                      output_files = out_partitions,
                      dependencies = dependencies)
Esempio n. 9
0
    def __init__(self, infiles, out_prefix, exclude_groups=(), reduce=False,
                 dependencies=(), file_dependencies=()):
        """
        infiles = {names : {"partitions" : ..., "filenames" : [...]}}
        """
        if not (isinstance(infiles, dict)
                and all(isinstance(dd, dict) for dd in infiles.values())):
            raise TypeError("'infiles' must be a dictionary of dictionaries")

        input_filenames = []
        for (name, subdd) in infiles.iteritems():
            if set(subdd) - _VALID_KEYS:
                raise ValueError("Invalid keys found for %r: %s"
                                 % (name, ", ".join(set(subdd) - _VALID_KEYS)))
            elif not isinstance(subdd["filenames"], list):
                raise ValueError("filenames must be a list of strings")
            input_filenames.extend(subdd["filenames"])
        # Optional file dependencies; used to depend on the list of sequcences
        input_filenames.extend(safe_coerce_to_tuple(file_dependencies))

        self._reduce = bool(reduce)
        self._infiles = copy.deepcopy(infiles)
        self._out_prefix = out_prefix
        self._excluded = safe_coerce_to_frozenset(exclude_groups)

        description = "<FastaToPartitionedPhy%s: %i file(s) -> '%s.*'>" % \
            (" (reducing)" if reduce else "", len(infiles), out_prefix)

        Node.__init__(self,
                      description=description,
                      input_files=input_filenames,
                      output_files=[out_prefix + ".phy",
                                    out_prefix + ".partitions"],
                      dependencies=dependencies)
Esempio n. 10
0
    def __init__(self, config, root, has_nuc, has_mt, dependencies=()):
        """

        Arguments:
          config -- Config object generated using paleomix.tools.zonkey.config.
          root -- Root folder containing current analysis.
          has_nuc -- True if a nuclear BAM was provided.
          has_mt -- True if a mitochondrial BAM was provided.
          dependencies -- Nodes for ReportNode to depend on.
        """

        self._root = root
        self._data = copy.deepcopy(config.database)
        self._report = AnalysisReport(config, root, has_nuc, has_mt)
        self._has_nuc = bool(has_nuc)
        self._has_mt = bool(has_mt)

        self._treemix_outgroup = config.treemix_outgroup
        self._treemix_k = config.treemix_k
        if self._treemix_k is None:
            self._treemix_k = '<automatic>'

        Node.__init__(self,
                      description="<Report -> %r>"
                      % (os.path.join(self._root, "report.html"),),
                      input_files=self._report.input_files(),
                      output_files=(os.path.join(self._root, "report.html"),
                                    os.path.join(self._root, "report.css")),
                      dependencies=dependencies)
Esempio n. 11
0
    def __init__(self, config, dependencies=()):
        self._root = config.destination
        self._data = config.database
        self._samples = config.samples
        self._sample_keys = list(self._samples.keys())

        input_files = set()
        self._reports = {}
        for sample, info in self._samples.items():
            report = AnalysisReport(
                config=config,
                root=os.path.join(self._root, sample),
                has_nuc="Nuc" in info["Files"],
                has_mt="Mito" in info["Files"],
            )

            input_files.update(report.input_files())
            self._reports[sample] = report

        output_prefix = os.path.join(self._root, "summary")
        Node.__init__(
            self,
            description="<SummaryReport -> %r>" % (output_prefix + ".html", ),
            input_files=input_files,
            output_files=(output_prefix + ".html", output_prefix + ".css"),
            dependencies=dependencies,
        )
Esempio n. 12
0
    def __init__(self, config, root, has_nuc, has_mt, dependencies=()):
        """

        Arguments:
          config -- Config object generated using paleomix.tools.zonkey.config.
          root -- Root folder containing current analysis.
          has_nuc -- True if a nuclear BAM was provided.
          has_mt -- True if a mitochondrial BAM was provided.
          dependencies -- Nodes for ReportNode to depend on.
        """

        self._root = root
        self._data = copy.deepcopy(config.database)
        self._report = AnalysisReport(config, root, has_nuc, has_mt)
        self._has_nuc = bool(has_nuc)
        self._has_mt = bool(has_mt)

        self._treemix_outgroup = config.treemix_outgroup
        self._treemix_k = config.treemix_k
        if self._treemix_k is None:
            self._treemix_k = '<automatic>'

        Node.__init__(self,
                      description="<Report -> %r>" %
                      (os.path.join(self._root, "report.html"), ),
                      input_files=self._report.input_files(),
                      output_files=(os.path.join(self._root, "report.html"),
                                    os.path.join(self._root, "report.css")),
                      dependencies=dependencies)
Esempio n. 13
0
    def __init__(self, config, output_file, dependencies=()):
        self._samples = config.database.samples

        Node.__init__(self,
                      description="<WriteSampleList -> %r>" % (output_file,),
                      input_files=(config.tablefile,),
                      output_files=(output_file,),
                      dependencies=dependencies)
Esempio n. 14
0
 def __init__(self, input_file, output_file, dependencies=()):
     Node.__init__(
         self,
         description="<FreqToTreemix -> %r" % (output_file, ),
         input_files=(input_file, ),
         output_files=(output_file, ),
         dependencies=dependencies,
     )
Esempio n. 15
0
 def __init__(self, input_files, output_file, offset, dependencies=()):
     self._offset = offset
     Node.__init__(self,
                   description="<Validate FASTQ Files: %s>"
                   % (describe_files(input_files)),
                   input_files=input_files,
                   output_files=output_file,
                   dependencies=dependencies)
Esempio n. 16
0
    def __init__(self, config, output_file, dependencies=()):
        self._samples = config.database.samples

        Node.__init__(self,
                      description="<WriteSampleList -> %r>" % (output_file, ),
                      input_files=(config.tablefile, ),
                      output_files=(output_file, ),
                      dependencies=dependencies)
Esempio n. 17
0
    def __init__(self, input_files, output_file, dependencies=()):
        Node.__init__(self,
                      description="<Validate FASTA Files: %s>"
                      % (describe_files(input_files)),
                      input_files=input_files,
                      output_files=output_file,
                      dependencies=dependencies)

        assert len(self.output_files) == 1, self.output_files
Esempio n. 18
0
    def __init__(self, input_files, output_file, dependencies=()):
        self._output_file = output_file

        Node.__init__(self,
                      description="<MergeCoverage: %s -> '%s'>" %
                      (describe_files(input_files), self._output_file),
                      input_files=input_files,
                      output_files=self._output_file,
                      dependencies=dependencies)
Esempio n. 19
0
    def __init__(self, input_files, output_file, dependencies=()):
        Node.__init__(self,
                      description="<Validate FASTA Files: %s>" %
                      (describe_files(input_files)),
                      input_files=input_files,
                      output_files=output_file,
                      dependencies=dependencies)

        assert len(self.output_files) == 1, self.output_files
Esempio n. 20
0
    def __init__(self,
                 config,
                 makefile,
                 target,
                 cov_for_lanes,
                 cov_for_libs,
                 dependencies=()):
        self._target = target.name
        self._output_file = os.path.join(config.destination,
                                         self._target + ".summary")
        self._prefixes = makefile["Prefixes"]
        self._makefile = makefile["Statistics"]

        self._in_raw_bams = cov_for_lanes
        self._in_lib_bams = cov_for_libs
        input_files = set()
        input_files.update(sum(map(list, self._in_raw_bams.values()), []))
        input_files.update(sum(map(list, self._in_lib_bams.values()), []))

        self._in_raw_read = collections.defaultdict(list)
        for prefix in target.prefixes:
            for sample in prefix.samples:
                for library in sample.libraries:
                    for lane in library.lanes:
                        filename = None
                        filetype = None

                        if lane.reads.stats:
                            filetype = 'Raw'
                            filename = lane.reads.stats
                        elif lane.reads.validation:
                            filename = lane.reads.validation
                            fileset = set(lane.reads.files)

                            if fileset & _PE_READS and fileset & _SE_READS:
                                filetype = '*'
                            elif fileset & _PE_READS:
                                filetype = 'PE'
                            elif fileset & _SE_READS:
                                filetype = 'SE'
                            else:
                                assert False, lane.reads.files
                                continue

                            input_files.add(filename)
                        else:
                            assert False

                        self._in_raw_read[(sample.name, library.name,
                                           lane.name)] = (filetype, filename)

        Node.__init__(self,
                      description="<Summary: %s>" % self._output_file,
                      input_files=filter(None, input_files),
                      output_files=[self._output_file],
                      dependencies=dependencies)
Esempio n. 21
0
    def __init__(self, infile, outfile, fai_file, amount=0, dependencies=()):
        self._amount = int(amount)
        self._infile = infile
        self._outfile = outfile
        self._fai_file = fai_file

        Node.__init__(self,
                      description='<PaddedBed (%i): %r -> %r>'
                      % (amount, infile, outfile),
                      input_files=(infile, fai_file),
                      output_files=(outfile,),
                      dependencies=dependencies)
Esempio n. 22
0
    def __init__(self, reference, bedfile, outfile, dependencies=()):
        self._reference = reference
        self._bedfile = bedfile
        self._outfile = outfile

        description = "<ExtractReference: '%s' -> '%s'>" \
            % (reference, outfile)
        Node.__init__(self,
                      description=description,
                      input_files=[reference, bedfile],
                      output_files=[outfile],
                      dependencies=dependencies)
Esempio n. 23
0
    def __init__(self, infile, outfile, fai_file, amount=0, dependencies=()):
        self._amount = int(amount)
        self._infile = infile
        self._outfile = outfile
        self._fai_file = fai_file

        Node.__init__(self,
                      description='<PaddedBed (%i): %r -> %r>' %
                      (amount, infile, outfile),
                      input_files=(infile, fai_file),
                      output_files=(outfile, ),
                      dependencies=dependencies)
Esempio n. 24
0
    def __init__(self, reference, bedfile, outfile, dependencies=()):
        self._reference = reference
        self._bedfile = bedfile
        self._outfile = outfile

        description = "<ExtractReference: '%s' -> '%s'>" \
            % (reference, outfile)
        Node.__init__(self,
                      description=description,
                      input_files=[reference, bedfile],
                      output_files=[outfile],
                      dependencies=dependencies)
Esempio n. 25
0
    def __init__(self, infiles, out_phy, add_flag=False, dependencies=()):
        self._add_flag = add_flag
        self._out_phy = out_phy

        description  = "<FastaToInterleavedPhy: %i file(s) -> '%s'%s>" % \
            (len(infiles), out_phy, (" (w/ flag)" if add_flag else ""))

        Node.__init__(self,
                      description=description,
                      input_files=infiles,
                      output_files=[out_phy],
                      dependencies=dependencies)
Esempio n. 26
0
    def __init__(self, infiles, out_phy, add_flag = False, dependencies = ()):
        self._add_flag  = add_flag
        self._out_phy   = out_phy

        description  = "<FastaToInterleavedPhy: %i file(s) -> '%s'%s>" % \
            (len(infiles), out_phy, (" (w/ flag)" if add_flag else ""))

        Node.__init__(self,
                      description  = description,
                      input_files  = infiles,
                      output_files = [out_phy],
                      dependencies = dependencies)
Esempio n. 27
0
    def __init__(self, input_alignment, input_partition, output_alignment,
                 seed = None, dependencies = ()):
        self._input_phy  = input_alignment
        self._input_part = input_partition
        self._output_phy = output_alignment
        self._seed       = seed

        Node.__init__(self,
                      description  = "<PHYLIPBootstrap: %r -> %r>" \
                        % (input_alignment, output_alignment),
                      input_files  = (input_alignment, input_partition),
                      output_files = (output_alignment,),
                      dependencies = dependencies)
Esempio n. 28
0
    def __init__(self, main_tree_files, support_tree_files, output_file, dependencies = ()):
        self._output_file        = output_file
        self._main_tree_files    = safe_coerce_to_tuple(main_tree_files)
        self._support_tree_files = safe_coerce_to_tuple(support_tree_files)
        input_files = self._main_tree_files + self._support_tree_files

        description  = "<NewickSupport: %s>" % \
          (describe_files(main_tree_files),)

        Node.__init__(self,
                      description  = description,
                      input_files  = input_files,
                      output_files = output_file,
                      dependencies = dependencies)
Esempio n. 29
0
    def __init__(self, description, destination, source_nodes):
        source_nodes = safe_coerce_to_tuple(source_nodes)

        input_files  = []
        for source_node in source_nodes:
            input_files.extend(source_node.output_files)

        output_files = [reroot_path(destination, fpath) for fpath in input_files]
        self._files  = zip(input_files, output_files)

        Node.__init__(self,
                      description  = "<Copy %s output to %r>" % (description, destination),
                      input_files  = input_files,
                      output_files = output_files,
                      dependencies = source_nodes)
Esempio n. 30
0
    def __init__(self, config, makefile, target, cov_for_lanes, cov_for_libs, dependencies = ()):
        self._target        = target.name
        self._output_file   = os.path.join(config.destination, self._target + ".summary")
        self._prefixes      = makefile["Prefixes"]
        self._makefile      = makefile["Statistics"]

        self._in_raw_bams = cov_for_lanes
        self._in_lib_bams = cov_for_libs
        input_files = set()
        input_files.update(sum(map(list, self._in_raw_bams.values()), []))
        input_files.update(sum(map(list, self._in_lib_bams.values()), []))

        self._in_raw_read = collections.defaultdict(list)
        for prefix in target.prefixes:
            for sample in prefix.samples:
                for library in sample.libraries:
                    for lane in library.lanes:
                        filename = None
                        filetype = None

                        if lane.reads.stats:
                            filetype = 'Raw'
                            filename = lane.reads.stats
                        elif lane.reads.validation:
                            filename = lane.reads.validation
                            fileset = set(lane.reads.files)

                            if fileset & _PE_READS and fileset & _SE_READS:
                                filetype = '*'
                            elif fileset & _PE_READS:
                                filetype = 'PE'
                            elif fileset & _SE_READS:
                                filetype = 'SE'
                            else:
                                assert False, lane.reads.files
                                continue

                            input_files.add(filename)
                        else:
                            assert False

                        self._in_raw_read[(sample.name, library.name, lane.name)] = (filetype, filename)

        Node.__init__(self,
                      description  = "<Summary: %s>" % self._output_file,
                      input_files  = filter(None, input_files),
                      output_files = [self._output_file],
                      dependencies = dependencies)
Esempio n. 31
0
    def __init__(self, tree_files, output_file, taxa=(), dependencies=()):
        self._output_file = output_file
        self._tree_files = safe_coerce_to_tuple(tree_files)
        self._reroot_on_taxa = safe_coerce_to_tuple(taxa)

        reroot_on = "midpoint"
        if self._reroot_on_taxa:
            reroot_on = repr("', '".join(sorted(self._reroot_on_taxa)))

        description  = "<NewickReroot (on %s): %s>" % \
          (reroot_on, describe_files(tree_files),)

        Node.__init__(self,
                      description=description,
                      input_files=self._tree_files,
                      output_files=self._output_file,
                      dependencies=dependencies)
Esempio n. 32
0
    def __init__(self, input_files, output_file, offset, dependencies=()):
        self._offset = offset
        self._files = set()
        for (read_type, filename) in input_files.iteritems():
            if read_type == "Paired":
                self._files.add((read_type, filename.format(Pair=1)))
                self._files.add((read_type, filename.format(Pair=2)))
            else:
                self._files.add((read_type, filename))

        input_files = [filename for _, filename in self._files]
        Node.__init__(self,
                      description="<Validate FASTQ Files: %s>" %
                      (describe_files(input_files)),
                      input_files=input_files,
                      output_files=output_file,
                      dependencies=dependencies)
Esempio n. 33
0
    def __init__(self, tree_files, output_file, taxa = (), dependencies = ()):
        self._output_file    = output_file
        self._tree_files     = safe_coerce_to_tuple(tree_files)
        self._reroot_on_taxa = safe_coerce_to_tuple(taxa)

        reroot_on = "midpoint"
        if self._reroot_on_taxa:
            reroot_on = repr("', '".join(sorted(self._reroot_on_taxa)))

        description  = "<NewickReroot (on %s): %s>" % \
          (reroot_on, describe_files(tree_files),)

        Node.__init__(self,
                      description  = description,
                      input_files  = self._tree_files,
                      output_files = self._output_file,
                      dependencies = dependencies)
Esempio n. 34
0
    def __init__(self, input_files, output_file, offset, dependencies=()):
        self._offset = offset
        self._files = set()
        for (read_type, filename) in input_files.iteritems():
            if read_type == "Paired":
                self._files.add((read_type, filename.format(Pair=1)))
                self._files.add((read_type, filename.format(Pair=2)))
            else:
                self._files.add((read_type, filename))

        input_files = [filename for _, filename in self._files]
        Node.__init__(self,
                      description="<Validate FASTQ Files: %s>"
                      % (describe_files(input_files)),
                      input_files=input_files,
                      output_files=output_file,
                      dependencies=dependencies)
Esempio n. 35
0
    def __init__(self,
                 input_alignment,
                 input_partition,
                 output_alignment,
                 seed=None,
                 dependencies=()):
        self._input_phy = input_alignment
        self._input_part = input_partition
        self._output_phy = output_alignment
        self._seed = seed

        Node.__init__(self,
                      description  = "<PHYLIPBootstrap: %r -> %r>" \
                        % (input_alignment, output_alignment),
                      input_files  = (input_alignment, input_partition),
                      output_files = (output_alignment,),
                      dependencies = dependencies)
Esempio n. 36
0
    def __init__(
            self,
            infiles,
            out_prefix,
            exclude_groups=(),
            reduce=False,
            dependencies=(),
            file_dependencies=(),
    ):
        """
        infiles = {names : {"partitions" : ..., "filenames" : [...]}}
        """
        if not (isinstance(infiles, dict)
                and all(isinstance(dd, dict) for dd in infiles.values())):
            raise TypeError("'infiles' must be a dictionary of dictionaries")

        input_filenames = []
        for (name, subdd) in infiles.items():
            if set(subdd) - _VALID_KEYS:
                raise ValueError("Invalid keys found for %r: %s" %
                                 (name, ", ".join(set(subdd) - _VALID_KEYS)))
            elif not isinstance(subdd["filenames"], list):
                raise ValueError("filenames must be a list of strings")
            input_filenames.extend(subdd["filenames"])
        # Optional file dependencies; used to depend on the list of sequcences
        input_filenames.extend(safe_coerce_to_tuple(file_dependencies))

        self._reduce = bool(reduce)
        self._infiles = copy.deepcopy(infiles)
        self._out_prefix = out_prefix
        self._excluded = safe_coerce_to_frozenset(exclude_groups)

        description = "<FastaToPartitionedPhy%s: %i file(s) -> '%s.*'>" % (
            " (reducing)" if reduce else "",
            len(infiles),
            out_prefix,
        )

        Node.__init__(
            self,
            description=description,
            input_files=input_filenames,
            output_files=[out_prefix + ".phy", out_prefix + ".partitions"],
            dependencies=dependencies,
        )
Esempio n. 37
0
    def __init__(self,
                 main_tree_files,
                 support_tree_files,
                 output_file,
                 dependencies=()):
        self._output_file = output_file
        self._main_tree_files = safe_coerce_to_tuple(main_tree_files)
        self._support_tree_files = safe_coerce_to_tuple(support_tree_files)
        input_files = self._main_tree_files + self._support_tree_files

        description  = "<NewickSupport: %s>" % \
          (describe_files(main_tree_files),)

        Node.__init__(self,
                      description=description,
                      input_files=input_files,
                      output_files=output_file,
                      dependencies=dependencies)
Esempio n. 38
0
    def __init__(self, description, destination, source_nodes):
        source_nodes = safe_coerce_to_tuple(source_nodes)

        input_files = []
        for source_node in source_nodes:
            input_files.extend(source_node.output_files)

        output_files = [
            reroot_path(destination, fpath) for fpath in input_files
        ]
        self._files = zip(input_files, output_files)

        Node.__init__(self,
                      description="<Copy %s output to %r>" %
                      (description, destination),
                      input_files=input_files,
                      output_files=output_files,
                      dependencies=source_nodes)
Esempio n. 39
0
    def __init__(self, input_file, output_file, filter_by, dependencies):
        self._input_file = input_file
        self._output_file = output_file
        self._filter_by = dict(filter_by)
        for (to_filter, groups) in self._filter_by.items():
            # The taxa to be filtered is implied to be part of the group,
            # but is not needed when actually carrying out the filtering
            groups = utilities.safe_coerce_to_frozenset(groups) \
                - utilities.safe_coerce_to_frozenset(to_filter)

            if not groups:
                raise RuntimeError("Singleton filtering must involve at least "
                                   "one other taxa")
            self._filter_by[to_filter] = groups

        Node.__init__(self,
                      description="<FilterSingleton: '%s' -> '%s'>"
                      % (input_file, output_file),
                      input_files=[input_file],
                      output_files=[output_file],
                      dependencies=dependencies)
Esempio n. 40
0
    def __init__(self, input_file, output_file, filter_by, dependencies):
        self._input_file = input_file
        self._output_file = output_file
        self._filter_by = dict(filter_by)
        for (to_filter, groups) in self._filter_by.items():
            # The taxa to be filtered is implied to be part of the group,
            # but is not needed when actually carrying out the filtering
            groups = utilities.safe_coerce_to_frozenset(groups) \
                - utilities.safe_coerce_to_frozenset(to_filter)

            if not groups:
                raise RuntimeError("Singleton filtering must involve at least "
                                   "one other taxa")
            self._filter_by[to_filter] = groups

        Node.__init__(self,
                      description="<FilterSingleton: '%s' -> '%s'>" %
                      (input_file, output_file),
                      input_files=[input_file],
                      output_files=[output_file],
                      dependencies=dependencies)
Esempio n. 41
0
    def __init__(self, fasta_files, sequences, destination, dependencies=()):
        """
        fasta_files -- { taxon_name_1 : filename_1, ... }
        sequences   -- { interval_name_1, ... }
        """

        self._infiles = copy.deepcopy(fasta_files)
        self._sequences = utilities.safe_coerce_to_frozenset(sequences)
        self._destination = copy.copy(destination)
        self._outfiles = [os.path.join(destination, name + ".fasta")
                          for name in self._sequences]

        input_files = list(self._infiles.itervalues())
        for filename in self._infiles.itervalues():
            input_files.append(filename + ".fai")

        desc = "<CollectSequences: %i sequences from %i files -> '%s'>" \
               % (len(self._sequences), len(self._infiles), self._destination)
        Node.__init__(self,
                      description=desc,
                      input_files=input_files,
                      output_files=self._outfiles,
                      dependencies=dependencies)
Esempio n. 42
0
 def __init__(self, input_file, output_file, dependencies=()):
     Node.__init__(self,
                   description="<FreqToTreemix -> %r" % (output_file,),
                   input_files=(input_file,),
                   output_files=(output_file,),
                   dependencies=dependencies)