def __init__(self, filepath, rule=None): if isinstance(filepath, SourceFile): self.file = IOFile(str(filepath.get_path_or_uri()), rule=rule) elif isinstance(filepath, _IOFile): self.file = filepath else: self.file = IOFile(filepath, rule=rule)
def cleanup(f): prefix = self.rule.workflow.default_remote_prefix # remove constraints and turn this into a plain string cleaned = strip_wildcard_constraints(f) if (self.rule.workflow.default_remote_provider is not None and f.startswith(prefix) and not is_flagged(f, "local")): cleaned = f[len(prefix) + 1:] cleaned = IOFile(cleaned, rule=self.rule) else: cleaned = IOFile(AnnotatedString(cleaned), rule=self.rule) cleaned.clone_remote_object(f) return cleaned
def concretize_logfile(f, wildcards): if not isinstance(f, _IOFile): return IOFile(f, rule=self) else: return f.apply_wildcards(wildcards, fill_missing=False, fail_dynamic=self.dynamic_output)
def benchmark(self, benchmark): if not callable(benchmark): benchmark = self.apply_default_remote(benchmark) benchmark = self._update_item_wildcard_constraints(benchmark) self._benchmark = IOFile(benchmark, rule=self) self.register_wildcards(self._benchmark.get_wildcard_names())
def concretize_logfile(f, wildcards, is_from_callable): if is_from_callable: return IOFile(f, rule=self) else: return f.apply_wildcards(wildcards, fill_missing=False, fail_dynamic=self.dynamic_output)
def shadowed_path(self, f): """ Get the shadowed path of IOFile f. """ if not self.shadow_dir: return f f_ = IOFile(os.path.join(self.shadow_dir, f), self.rule) f_.clone_flags(f) return f_
def update_wildcard_constraints(self): for i in range(len(self.output)): item = self.output[i] newitem = IOFile(self._update_item_wildcard_constraints( self.output[i]), rule=self) # the updated item has to have the same flags newitem.clone_flags(item) self.output[i] = newitem
def _set_inoutput_item(self, item, output=False, name=None): """ Set an item to be input or output. Arguments item -- the item inoutput -- either a Namedlist of input or output items name -- an optional name for the item """ inoutput = self.output if output else self.input if isinstance(item, str): # add the rule to the dependencies if isinstance(item, _IOFile): self.dependencies[item] = item.rule _item = IOFile(item, rule=self) if is_flagged(item, "temp"): if output: self.temp_output.add(_item) if is_flagged(item, "protected"): if output: self.protected_output.add(_item) if is_flagged(item, "touch"): if output: self.touch_output.add(_item) if is_flagged(item, "dynamic"): if output: self.dynamic_output.add(_item) else: self.dynamic_input.add(_item) if is_flagged(item, "subworkflow"): if output: raise SyntaxError( "Only input files may refer to a subworkflow") else: # record the workflow this item comes from self.subworkflow_input[_item] = item.flags["subworkflow"] inoutput.append(_item) if name: inoutput.add_name(name) elif callable(item): if output: raise SyntaxError( "Only input files can be specified as functions") inoutput.append(item) if name: inoutput.add_name(name) else: try: start = len(inoutput) for i in item: self._set_inoutput_item(i, output=output) if name: # if the list was named, make it accessible inoutput.set_name(name, start, end=len(inoutput)) except TypeError: raise SyntaxError( "Input and output files have to be specified as strings or lists of strings." )
def benchmark(self, benchmark): if isinstance(benchmark, Path): benchmark = str(benchmark) if not callable(benchmark): benchmark = self.apply_path_modifier(benchmark, property="benchmark") benchmark = self._update_item_wildcard_constraints(benchmark) self._benchmark = IOFile(benchmark, rule=self) self.register_wildcards(self._benchmark.get_wildcard_names())
def cleanup(f): prefix = self.rule.workflow.default_remote_prefix # remove constraints and turn this into a plain string cleaned = strip_wildcard_constraints(f) modified_by = get_flag_value(f, PATH_MODIFIER_FLAG) if (self.rule.workflow.default_remote_provider is not None and f.startswith(prefix) and not is_flagged(f, "local")): cleaned = f[len(prefix) + 1:] cleaned = IOFile(cleaned, rule=self.rule) else: cleaned = IOFile(AnnotatedString(cleaned), rule=self.rule) cleaned.clone_remote_object(f) if modified_by is not None: cleaned.flags[PATH_MODIFIER_FLAG] = modified_by return cleaned
def inputfile(self, path): """Mark file as being an input file of the workflow. This also means that eventual --default-remote-provider/prefix settings will be applied to this file. The file is returned as _IOFile object, such that it can e.g. be transparently opened with _IOFile.open(). """ if isinstance(path, Path): path = str(path) if self.default_remote_provider is not None: path = self.apply_default_remote(path) return IOFile(path)
def expanded_output(self): """ Iterate over output files while dynamic output is expanded. """ for f, f_ in zip(self.output, self.rule.output): if f in self.dynamic_output: expansion = self.expand_dynamic(f_) if not expansion: yield f_ for f, _ in expansion: file_to_yield = IOFile(f, self.rule) file_to_yield.clone_flags(f_) yield file_to_yield else: yield f
def partially_expand(f, wildcards): """Expand the wildcards in f from the ones present in wildcards This is done by replacing all wildcard delimiters by `{{` or `}}` that are not in `wildcards.keys()`. """ # perform the partial expansion from f's string representation s = str(f).replace("{", "{{").replace("}", "}}") for key in wildcards.keys(): s = s.replace("{{{{{}}}}}".format(key), "{{{}}}".format(key)) # build result anno_s = AnnotatedString(s) anno_s.flags = f.flags return IOFile(anno_s, f.rule)
def dynamic_branch(self, wildcards, input=True): def get_io(rule): return (rule.input, rule.dynamic_input) if input else (rule.output, rule.dynamic_output) io, dynamic_io = get_io(self) branch = Rule(self) io_, dynamic_io_ = get_io(branch) expansion = defaultdict(list) for i, f in enumerate(io): if f in dynamic_io: try: for e in reversed(expand(f, zip, **wildcards)): expansion[i].append(IOFile(e, rule=branch)) except KeyError: return None # replace the dynamic files with the expanded files replacements = [(i, io[i], e) for i, e in reversed(list(expansion.items()))] for i, old, exp in replacements: dynamic_io_.remove(old) io_.insert_items(i, exp) if not input: for i, old, exp in replacements: if old in branch.temp_output: branch.temp_output.discard(old) branch.temp_output.update(exp) if old in branch.protected_output: branch.protected_output.discard(old) branch.protected_output.update(exp) if old in branch.touch_output: branch.touch_output.discard(old) branch.touch_output.update(exp) branch.wildcard_names.clear() non_dynamic_wildcards = dict((name, values[0]) for name, values in wildcards.items() if len(set(values)) == 1) # TODO have a look into how to concretize dependencies here (branch._input, branch._output, branch._params, branch._log, branch._benchmark, _, branch.dependencies) = branch.expand_wildcards( wildcards=non_dynamic_wildcards) return branch, non_dynamic_wildcards return branch
def expanded_output(self): """ Iterate over output files while dynamic output is expanded. """ for f, f_ in zip(self.output, self.rule.output): if f in self.dynamic_output: expansion = self.expand_dynamic( f_, restriction=self.wildcards, omit_value=_IOFile.dynamic_fill) if not expansion: yield f_ for f, _ in expansion: yield IOFile(f, self.rule) else: yield f
def concretize_iofile(f, wildcards, is_from_callable): if is_from_callable: if isinstance(f, Path): f = str(f) return IOFile(f, rule=self).apply_wildcards( wildcards, fill_missing=f in self.dynamic_input, fail_dynamic=self.dynamic_output, ) else: return f.apply_wildcards( wildcards, fill_missing=f in self.dynamic_input, fail_dynamic=self.dynamic_output, )
def _set_log_item(self, item, name=None): if isinstance(item, str) or callable(item): self.log.append( IOFile(item, rule=self) if isinstance(item, str) else item) if name: self.log.add_name(name) else: try: start = len(self.log) for i in item: self._set_log_item(i) if name: self.log.set_name(name, start, end=len(self.log)) except TypeError: raise SyntaxError("Log files have to be specified as strings.")
def _set_inoutput_item(self, item, output=False, name=None): """ Set an item to be input or output. Arguments item -- the item inoutput -- either a Namedlist of input or output items name -- an optional name for the item """ inoutput = self.output if output else self.input if isinstance(item, str): _item = IOFile(item, rule=self) if is_flagged(item, "temp"): if not output: raise SyntaxError("Only output files may be temporary") self.temp_output.add(_item) if is_flagged(item, "protected"): if not output: raise SyntaxError("Only output files may be protected") self.protected_output.add(_item) if is_flagged(item, "dynamic"): if output: self.dynamic_output.add(_item) else: self.dynamic_input.add(_item) inoutput.append(_item) if name: inoutput.add_name(name) elif callable(item): if output: raise SyntaxError( "Only input files can be specified as functions") inoutput.append(item) if name: inoutput.add_name(name) else: try: start = len(inoutput) for i in item: self._set_inoutput_item(i, output=output) if name: # if the list was named, make it accessible inoutput.set_name(name, start, end=len(inoutput)) except TypeError: raise SyntaxError( "Input and output files have to be specified as strings.")
def _set_log_item(self, item, name=None): if isinstance(item, str) or callable(item): if not callable(item): item = self.apply_default_remote(item) item = self._update_item_wildcard_constraints(item) self.log.append( IOFile(item, rule=self) if isinstance(item, str) else item) if name: self.log.add_name(name) else: try: start = len(self.log) for i in item: self._set_log_item(i) if name: self.log.set_name(name, start, end=len(self.log)) except TypeError: raise SyntaxError("Log files have to be specified as strings.")
def _set_log_item(self, item, name=None): # Pathlib compatibility if isinstance(item, Path): item = str(item) if isinstance(item, str) or callable(item): if not callable(item): item = self.apply_path_modifier(item, property="log") item = self._update_item_wildcard_constraints(item) self.log.append(IOFile(item, rule=self) if isinstance(item, str) else item) if name: self.log._add_name(name) else: try: start = len(self.log) for i in item: self._set_log_item(i) if name: self.log._set_name(name, start, end=len(self.log)) except TypeError: raise SyntaxError("Log files have to be specified as strings.")
def benchmark(self, benchmark): self._benchmark = IOFile(benchmark, rule=self)
def _set_inoutput_item(self, item, output=False, name=None): """ Set an item to be input or output. Arguments item -- the item inoutput -- a Namedlist of either input or output items name -- an optional name for the item """ inoutput = self.output if output else self.input # Check to see if the item is a path, if so, just make it a string if isinstance(item, Path): item = str(item) if isinstance(item, str): if ON_WINDOWS: if isinstance(item, (_IOFile, AnnotatedString)): item = item.new_from(item.replace(os.sep, os.altsep)) else: item = item.replace(os.sep, os.altsep) rule_dependency = None if isinstance(item, _IOFile) and item.rule and item in item.rule.output: rule_dependency = item.rule item = self.apply_path_modifier( item, property="output" if output else "input") # Check to see that all flags are valid # Note that "remote", "dynamic", and "expand" are valid for both inputs and outputs. if isinstance(item, AnnotatedString): for flag in item.flags: if not output and flag in [ "protected", "temp", "temporary", "directory", "touch", "pipe", ]: logger.warning( "The flag '{}' used in rule {} is only valid for outputs, not inputs." .format(flag, self)) if output and flag in ["ancient"]: logger.warning( "The flag '{}' used in rule {} is only valid for inputs, not outputs." .format(flag, self)) # add the rule to the dependencies if rule_dependency is not None: self.dependencies[item] = rule_dependency if output: item = self._update_item_wildcard_constraints(item) else: if (contains_wildcard_constraints(item) and self.workflow.mode != Mode.subprocess): logger.warning( "Wildcard constraints in inputs are ignored. (rule: {})" .format(self)) if self.workflow.all_temp and output: # mark as temp if all output files shall be marked as temp item = snakemake.io.flag(item, "temp") # record rule if this is an output file output _item = IOFile(item, rule=self) if is_flagged(item, "temp"): if output: self.temp_output.add(_item) if is_flagged(item, "protected"): if output: self.protected_output.add(_item) if is_flagged(item, "touch"): if output: self.touch_output.add(_item) if is_flagged(item, "dynamic"): if output: self.dynamic_output.add(_item) else: self.dynamic_input.add(_item) if is_flagged(item, "report"): report_obj = item.flags["report"] if report_obj.caption is not None: r = ReportObject( os.path.join(self.workflow.current_basedir, report_obj.caption), report_obj.category, report_obj.subcategory, report_obj.patterns, report_obj.htmlindex, ) item.flags["report"] = r if is_flagged(item, "subworkflow"): if output: raise SyntaxError( "Only input files may refer to a subworkflow") else: # record the workflow this item comes from sub = item.flags["subworkflow"] if _item in self.subworkflow_input: other = self.subworkflow_input[_item] if sub != other: raise WorkflowError( "The input file {} is ambiguously " "associated with two subworkflows " "{} and {}.".format(item, sub, other), rule=self, ) self.subworkflow_input[_item] = sub inoutput.append(_item) if name: inoutput._add_name(name) elif callable(item): if output: raise SyntaxError( "Only input files can be specified as functions") inoutput.append(item) if name: inoutput._add_name(name) else: try: start = len(inoutput) for i in item: self._set_inoutput_item(i, output=output) if name: # if the list was named, make it accessible inoutput._set_name(name, start, end=len(inoutput)) except TypeError: raise SyntaxError( "Input and output files have to be specified as strings or lists of strings." )
def conda_env(self, conda_env): self._conda_env = IOFile(conda_env, rule=self)
def dynamic_branch(self, wildcards, input=True): def get_io(rule): return ((rule.input, rule.dynamic_input) if input else (rule.output, rule.dynamic_output)) def partially_expand(f, wildcards): """Expand the wildcards in f from the ones present in wildcards This is done by replacing all wildcard delimiters by `{{` or `}}` that are not in `wildcards.keys()`. """ # perform the partial expansion from f's string representation s = str(f).replace("{", "{{").replace("}", "}}") for key in wildcards.keys(): s = s.replace("{{{{{}}}}}".format(key), "{{{}}}".format(key)) # build result anno_s = AnnotatedString(s) anno_s.flags = f.flags return IOFile(anno_s, f.rule) io, dynamic_io = get_io(self) branch = Rule(self) io_, dynamic_io_ = get_io(branch) expansion = collections.defaultdict(list) for i, f in enumerate(io): if f in dynamic_io: f = partially_expand(f, wildcards) try: for e in reversed(expand(str(f), zip, **wildcards)): # need to clone the flags so intermediate # dynamic remote file paths are expanded and # removed appropriately ioFile = IOFile(e, rule=branch) ioFile.clone_flags(f) expansion[i].append(ioFile) except KeyError: return None # replace the dynamic files with the expanded files replacements = [(i, io[i], e) for i, e in reversed(list(expansion.items()))] for i, old, exp in replacements: dynamic_io_.remove(old) io_._insert_items(i, exp) if not input: for i, old, exp in replacements: if old in branch.temp_output: branch.temp_output.discard(old) branch.temp_output.update(exp) if old in branch.protected_output: branch.protected_output.discard(old) branch.protected_output.update(exp) if old in branch.touch_output: branch.touch_output.discard(old) branch.touch_output.update(exp) branch.wildcard_names.clear() non_dynamic_wildcards = dict((name, values[0]) for name, values in wildcards.items() if len(set(values)) == 1) # TODO have a look into how to concretize dependencies here branch._input, _, branch.dependencies = branch.expand_input( non_dynamic_wildcards) branch._output, _ = branch.expand_output(non_dynamic_wildcards) resources = branch.expand_resources(non_dynamic_wildcards, branch._input, 1) branch._params = branch.expand_params( non_dynamic_wildcards, branch._input, branch._output, resources, omit_callable=True, ) branch.resources = dict(resources.items()) branch._log = branch.expand_log(non_dynamic_wildcards) branch._benchmark = branch.expand_benchmark(non_dynamic_wildcards) branch._conda_env = branch.expand_conda_env(non_dynamic_wildcards) return branch, non_dynamic_wildcards return branch
def benchmark(self): return IOFile(strip_wildcard_constraints(self.rule.benchmark), rule=self.rule)
def _to_iofile(self, files): for i in range(len(files)): files[i] = IOFile(files[i], rule=self.rule) return files
def _set_inoutput_item(self, item, output=False, name=None): """ Set an item to be input or output. Arguments item -- the item inoutput -- a Namedlist of either input or output items name -- an optional name for the item """ inoutput = self.output if output else self.input # Check to see if the item is a path, if so, just make it a string if isinstance(item, Path): item = str(item) if isinstance(item, str): item = self.apply_default_remote(item) # add the rule to the dependencies if (isinstance(item, _IOFile) and item.rule and item in item.rule.output): self.dependencies[item] = item.rule if output: rule = self if self.wildcard_constraints or self.workflow._wildcard_constraints: try: item = update_wildcard_constraints( item, self.wildcard_constraints, self.workflow._wildcard_constraints) except ValueError as e: raise IOFileException(str(e), snakefile=self.snakefile, lineno=self.lineno) else: rule = self if contains_wildcard_constraints( item) and self.workflow.mode != Mode.subprocess: logger.warning( "wildcard constraints in inputs are ignored") # record rule if this is an output file output _item = IOFile(item, rule=rule) if is_flagged(item, "temp"): if output: self.temp_output.add(_item) if is_flagged(item, "protected"): if output: self.protected_output.add(_item) if is_flagged(item, "touch"): if output: self.touch_output.add(_item) if is_flagged(item, "dynamic"): if output: self.dynamic_output.add(_item) else: self.dynamic_input.add(_item) if is_flagged(item, "report"): item.flags["report"] = os.path.join( self.workflow.current_basedir, item.flags["report"]) if is_flagged(item, "subworkflow"): if output: raise SyntaxError( "Only input files may refer to a subworkflow") else: # record the workflow this item comes from sub = item.flags["subworkflow"] if _item in self.subworkflow_input: other = self.subworkflow_input[_item] if sub != other: raise WorkflowError( "The input file {} is ambiguously " "associated with two subworkflows " "{} and {}.".format(item, sub, other), rule=self) self.subworkflow_input[_item] = sub inoutput.append(_item) if name: inoutput.add_name(name) elif callable(item): if output: raise SyntaxError( "Only input files can be specified as functions") inoutput.append(item) if name: inoutput.add_name(name) else: try: start = len(inoutput) for i in item: self._set_inoutput_item(i, output=output) if name: # if the list was named, make it accessible inoutput.set_name(name, start, end=len(inoutput)) except TypeError: raise SyntaxError( "Input and output files have to be specified as strings or lists of strings." )
def _set_inoutput_item(self, item, output=False, name=None): """ Set an item to be input or output. Arguments item -- the item inoutput -- either a Namedlist of input or output items name -- an optional name for the item """ inoutput = self.output if output else self.input if isinstance(item, str): # add the rule to the dependencies if isinstance(item, _IOFile) and item.rule: self.dependencies[item] = item.rule if output: rule = self if self.wildcard_constraints or self.workflow._wildcard_constraints: try: item = update_wildcard_constraints( item, self.wildcard_constraints, self.workflow._wildcard_constraints) except ValueError as e: raise IOFileException( str(e), snakefile=self.snakefile, lineno=self.lineno) else: rule = None if contains_wildcard_constraints(item) and self.workflow.mode != Mode.subprocess: logger.warning( "wildcard constraints in inputs are ignored") # record rule if this is an output file output _item = IOFile(item, rule=rule) if is_flagged(item, "temp"): if output: self.temp_output.add(_item) if is_flagged(item, "protected"): if output: self.protected_output.add(_item) if is_flagged(item, "touch"): if output: self.touch_output.add(_item) if is_flagged(item, "dynamic"): if output: self.dynamic_output.add(_item) else: self.dynamic_input.add(_item) if is_flagged(item, "subworkflow"): if output: raise SyntaxError( "Only input files may refer to a subworkflow") else: # record the workflow this item comes from self.subworkflow_input[_item] = item.flags["subworkflow"] inoutput.append(_item) if name: inoutput.add_name(name) elif callable(item): if output: raise SyntaxError( "Only input files can be specified as functions") inoutput.append(item) if name: inoutput.add_name(name) else: try: start = len(inoutput) for i in item: self._set_inoutput_item(i, output=output) if name: # if the list was named, make it accessible inoutput.set_name(name, start, end=len(inoutput)) except TypeError: raise SyntaxError( "Input and output files have to be specified as strings or lists of strings.")
def log(self, log): self._log = IOFile(log, rule=self)