def modified_after(younger, older): """Returns true any of the files expected to be 'younger' have been modified after any of the files expected to be 'older'.""" def get_mtimes(filenames): for filename in filenames: yield os.path.getmtime(filename) younger_time = max(get_mtimes(safe_coerce_to_frozenset(younger))) older_time = min(get_mtimes(safe_coerce_to_frozenset(older))) return younger_time > older_time
def __init__(self, nodes, cache_factory=FileStatusCache): self._cache_factory = cache_factory self._state_observers = [] self._states = {} nodes = safe_coerce_to_frozenset(nodes) self._logger = logging.getLogger(__name__) self._reverse_dependencies = collections.defaultdict(set) self._collect_reverse_dependencies(nodes, self._reverse_dependencies, set()) self._intersections = {} self._top_nodes = [ node for (node, rev_deps) in self._reverse_dependencies.iteritems() if not rev_deps ] self._logger.info(" - Checking file dependencies ...") self._check_file_dependencies(self._reverse_dependencies) self._logger.info(" - Checking for required executables ...") self._check_required_executables(self._reverse_dependencies) self._logger.info(" - Checking version requirements ...") self._check_version_requirements(self._reverse_dependencies) self._logger.info(" - Determining states ...") self.refresh_states() self._logger.info(" - Ready ...\n")
def _validate_requirements(cls, requirements): requirements = safe_coerce_to_frozenset(requirements) for requirement in requirements: if not isinstance(requirement, collections.Callable): raise TypeError("'requirements' must be callable, not %r" \ % (type(requirement),)) return requirements
def _validate_files(cls, files): files = safe_coerce_to_frozenset(files) for filename in files: if not isinstance(filename, types.StringTypes): raise TypeError('Files must be strings, not %r' % filename.__class__.__name__) return files
def __init__(self, fasta_files, sequences, destination, dependencies=()): """ fasta_files -- { taxon_name_1 : filename_1, ... } sequences -- { interval_name_1, ... } """ self._infiles = copy.deepcopy(fasta_files) self._sequences = utilities.safe_coerce_to_frozenset(sequences) self._destination = copy.copy(destination) self._outfiles = [ os.path.join(destination, name + ".fasta") for name in self._sequences ] input_files = list(self._infiles.itervalues()) for filename in self._infiles.itervalues(): input_files.append(filename + ".fai") desc = "<CollectSequences: %i sequences from %i files -> '%s'>" \ % (len(self._sequences), len(self._infiles), self._destination) Node.__init__(self, description=desc, input_files=input_files, output_files=self._outfiles, dependencies=dependencies)
def __init__(self, control_file, sequence_file, trees_file, output_tar, exclude_groups = (), dependencies = ()): self._exclude_groups = safe_coerce_to_frozenset(exclude_groups) self._control_file = control_file self._sequence_file = sequence_file self._trees_file = trees_file paml_cmd = AtomicCmd(["codeml", "template.ctl"], IN_CONTROL_FILE = control_file, IN_SEQUENCE_FILE = sequence_file, IN_TREES_FILE = trees_file, TEMP_OUT_CTL = "template.ctl", TEMP_OUT_SEQS = "template.seqs", TEMP_OUT_TREES = "template.trees", TEMP_OUT_STDOUT = "template.stdout", TEMP_OUT_STDERR = "template.stderr", TEMP_OUT_4FOLD = "4fold.nuc", IN_STDIN = "/dev/null", # Prevent promts from blocking set_cwd = True, **CodemlNode._get_codeml_files("TEMP_OUT_CODEML")) tar_pairs = CodemlNode._get_codeml_files("TEMP_IN_CODEML") tar_files = ["%%(%s)s" % (key,) for key in tar_pairs] tar_cmd = AtomicCmd(["tar", "cvzf", "%(OUT_FILE)s"] + tar_files, OUT_FILE = output_tar, set_cwd = True, **tar_pairs) CommandNode.__init__(self, description = "<CodemlNode: %r -> %r>" % (sequence_file, output_tar), command = SequentialCmds([paml_cmd, tar_cmd]), dependencies = dependencies)
def __init__(self, infiles, out_prefix, exclude_groups=(), reduce=False, dependencies=(), file_dependencies=()): """ infiles = {names : {"partitions" : ..., "filenames" : [...]}} """ if not (isinstance(infiles, dict) and all(isinstance(dd, dict) for dd in infiles.values())): raise TypeError("'infiles' must be a dictionary of dictionaries") input_filenames = [] for (name, subdd) in infiles.iteritems(): if set(subdd) - _VALID_KEYS: raise ValueError("Invalid keys found for %r: %s" % (name, ", ".join(set(subdd) - _VALID_KEYS))) elif not isinstance(subdd["filenames"], list): raise ValueError("filenames must be a list of strings") input_filenames.extend(subdd["filenames"]) # Optional file dependencies; used to depend on the list of sequcences input_filenames.extend(safe_coerce_to_tuple(file_dependencies)) self._reduce = bool(reduce) self._infiles = copy.deepcopy(infiles) self._out_prefix = out_prefix self._excluded = safe_coerce_to_frozenset(exclude_groups) description = "<FastaToPartitionedPhy%s: %i file(s) -> '%s.*'>" % \ (" (reducing)" if reduce else "", len(infiles), out_prefix) Node.__init__(self, description=description, input_files=input_filenames, output_files=[out_prefix + ".phy", out_prefix + ".partitions"], dependencies=dependencies)
def missing_files(filenames): """Given a list of filenames, returns a list of those that does not exist. Note that this function does not differentiate between files and folders.""" result = [] for filename in safe_coerce_to_frozenset(filenames): if not os.path.exists(filename): result.append(filename) return result
def __init__(self, input_file, output_file, filter_by, dependencies): self._input_file = input_file self._output_file = output_file self._filter_by = dict(filter_by) for (to_filter, groups) in self._filter_by.items(): # The taxa to be filtered is implied to be part of the group, # but is not needed when actually carrying out the filtering groups = utilities.safe_coerce_to_frozenset(groups) - utilities.safe_coerce_to_frozenset(to_filter) if not groups: raise RuntimeError("Singleton filtering must involve at least " "one other taxa") self._filter_by[to_filter] = groups Node.__init__( self, description="<FilterSingleton: '%s' -> '%s'>" % (input_file, output_file), input_files=[input_file], output_files=[output_file], dependencies=dependencies, )
def reroot_on_taxa(self, taxa): taxa = safe_coerce_to_frozenset(taxa) if not taxa: raise ValueError("No taxa in outgroup") clades = self._collect_clades() root_on = self._collect_nodes_from_names(taxa) # Because None is the id of the root atm: # pylint: disable=W1111 root = self._create_root_with_clade(clades, root_on) return self.rebuild_tree(root, root)
def __init__(self, input_file, output_file, filter_by, dependencies): self._input_file = input_file self._output_file = output_file self._filter_by = dict(filter_by) for (to_filter, groups) in self._filter_by.items(): # The taxa to be filtered is implied to be part of the group, # but is not needed when actually carrying out the filtering groups = utilities.safe_coerce_to_frozenset(groups) \ - utilities.safe_coerce_to_frozenset(to_filter) if not groups: raise RuntimeError("Singleton filtering must involve at least " "one other taxa") self._filter_by[to_filter] = groups Node.__init__(self, description="<FilterSingleton: '%s' -> '%s'>" % (input_file, output_file), input_files=[input_file], output_files=[output_file], dependencies=dependencies)
def _collect_nodes(self, nodes, description): if nodes is None: return frozenset() nodes = safe_coerce_to_frozenset(nodes) bad_nodes = [node for node in nodes if not isinstance(node, Node)] if bad_nodes: bad_nodes = [repr(node) for node in bad_nodes] message = "%s-list contain non-Node objects:\n\t- Command: %s\n\t- Objects: %s" \ % (description, self, "\n\t ".join(bad_nodes)) raise TypeError(message) return nodes
def __init__(self, nodes): nodes = safe_coerce_to_frozenset(nodes) self._reverse_dependencies = collections.defaultdict(set) self._collect_reverse_dependencies(nodes, self._reverse_dependencies) self._intersections = self._calculate_intersections() self._top_nodes = [node for (node, rev_deps) in self._reverse_dependencies.iteritems() if not rev_deps] ui.print_info(" - Checking file dependencies ...", file = sys.stderr) self._check_file_dependencies(self._reverse_dependencies) ui.print_info(" - Checking for required executables ...", file = sys.stderr) self._check_required_executables(self._reverse_dependencies) ui.print_info("", file = sys.stderr) self._states = {} self.refresh_states()
def _group(self, selection, extra = None): selection = safe_coerce_to_frozenset(selection) if (extra in selection): raise MSAError("Key used for multiple selections: %r" % extra) elif not selection: raise ValueError("No FASTA names given") missing_keys = selection - self.names() if missing_keys: raise KeyError("Key(s) not found: %r" % (", ".join(map(str, missing_keys)))) included, excluded, other = [], [], None for record in self: if record.name in selection: included.append(record) elif record.name != extra: excluded.append(record) else: other = record return included, excluded, other
def __init__(self, infiles, out_prefix, exclude_groups=(), reduce=False, dependencies=(), file_dependencies=()): """ infiles = {names : {"partitions" : ..., "filenames" : [...]}} """ if not (isinstance(infiles, dict) and all(isinstance(dd, dict) for dd in infiles.values())): raise TypeError("'infiles' must be a dictionary of dictionaries") input_filenames = [] for (name, subdd) in infiles.iteritems(): if set(subdd) - _VALID_KEYS: raise ValueError("Invalid keys found for %r: %s" % (name, ", ".join(set(subdd) - _VALID_KEYS))) elif not isinstance(subdd["filenames"], list): raise ValueError("filenames must be a list of strings") input_filenames.extend(subdd["filenames"]) # Optional file dependencies; used to depend on the list of sequcences input_filenames.extend(safe_coerce_to_tuple(file_dependencies)) self._reduce = bool(reduce) self._infiles = copy.deepcopy(infiles) self._out_prefix = out_prefix self._excluded = safe_coerce_to_frozenset(exclude_groups) description = "<FastaToPartitionedPhy%s: %i file(s) -> '%s.*'>" % \ (" (reducing)" if reduce else "", len(infiles), out_prefix) Node.__init__( self, description=description, input_files=input_filenames, output_files=[out_prefix + ".phy", out_prefix + ".partitions"], dependencies=dependencies)
def __init__(self, nodes, cache_factory=FileStatusCache): self._cache_factory = cache_factory self._state_observers = [] self._states = {} nodes = safe_coerce_to_frozenset(nodes) self._logger = logging.getLogger(__name__) self._reverse_dependencies = collections.defaultdict(set) self._collect_reverse_dependencies(nodes, self._reverse_dependencies, set()) self._intersections = {} self._top_nodes = [node for (node, rev_deps) in self._reverse_dependencies.iteritems() if not rev_deps] self._logger.info(" - Checking file dependencies ...") self._check_file_dependencies(self._reverse_dependencies) self._logger.info(" - Checking for required executables ...") self._check_required_executables(self._reverse_dependencies) self._logger.info(" - Checking version requirements ...") self._check_version_requirements(self._reverse_dependencies) self._logger.info(" - Determining states ...") self.refresh_states() self._logger.info(" - Ready ...\n")
def __init__(self, fasta_files, sequences, destination, dependencies=()): """ fasta_files -- { taxon_name_1 : filename_1, ... } sequences -- { interval_name_1, ... } """ self._infiles = copy.deepcopy(fasta_files) self._sequences = utilities.safe_coerce_to_frozenset(sequences) self._destination = copy.copy(destination) self._outfiles = [os.path.join(destination, name + ".fasta") for name in self._sequences] input_files = list(self._infiles.itervalues()) for filename in self._infiles.itervalues(): input_files.append(filename + ".fai") desc = "<CollectSequences: %i sequences from %i files -> '%s'>" % ( len(self._sequences), len(self._infiles), self._destination, ) Node.__init__( self, description=desc, input_files=input_files, output_files=self._outfiles, dependencies=dependencies )
def test_safe_coerce_to_frozenset__tuple(): assert_equal(utils.safe_coerce_to_frozenset((1, 3, 2)), frozenset(((1, 3, 2))))
def test_safe_coerce_to_frozenset__list(): assert_equal(utils.safe_coerce_to_frozenset([1, 3, 2]), frozenset( (1, 3, 2)))
def test_safe_coerce_to_frozenset__int(): assert_equal(utils.safe_coerce_to_frozenset(17), frozenset((17, )))
def test_safe_coerce_to_frozenset__unicode(): assert_equal(utils.safe_coerce_to_frozenset(u"foo"), frozenset((u"foo", )))
def test_safe_coerce_to_frozenset__str(): assert_equal(utils.safe_coerce_to_frozenset("foo"), frozenset(("foo", )))
def test_safe_coerce_to_frozenset__iterable(): assert_equal(utils.safe_coerce_to_frozenset(xrange(3)), frozenset( (0, 1, 2)))
def missing_executables(filenames): result = [] for filename in safe_coerce_to_frozenset(filenames): if not executable_exists(filename): result.append(filename) return result
def test_safe_coerce_to_frozenset__unicode(): assert_equal(utils.safe_coerce_to_frozenset(u"foo"), frozenset((u"foo",)))
def test_safe_coerce_to_frozenset__iterable(): assert_equal(utils.safe_coerce_to_frozenset(xrange(3)), frozenset((0, 1, 2)))
def test_safe_coerce_to_frozenset__dict(): utils.safe_coerce_to_frozenset({1: 2, 3: 4})
def test_safe_coerce_to_frozenset__int(): assert_equal(utils.safe_coerce_to_frozenset(17), frozenset((17,)))
def _do_test_constructor__single_value(key, value): defaults = {"input_files": _EMPTY_FILE} defaults[key] = value node = Node(**defaults) expected = safe_coerce_to_frozenset(value) assert_equal(getattr(node, key), expected)
def test_safe_coerce_to_frozenset__dict(): utils.safe_coerce_to_frozenset({1 : 2, 3 : 4})
def test_safe_coerce_to_frozenset__list(): assert_equal(utils.safe_coerce_to_frozenset([1, 3, 2]), frozenset((1, 3, 2)))
def _do_test_constructor__single_value(key, value): node = Node(**{key : value}) expected = safe_coerce_to_frozenset(value) assert_equal(getattr(node, key), expected)
def _do_test_constructor__single_value(key, value): defaults = {"input_files": _EMPTY_FILE} defaults[key] = value node = Node(**defaults ) expected = safe_coerce_to_frozenset(value) assert_equal(getattr(node, key), expected)
def test_safe_coerce_to_frozenset__str(): assert_equal(utils.safe_coerce_to_frozenset("foo"), frozenset(("foo",)))