def format_wildcards(string, job_properties): """ Format a string with variables from the job. """ class Job(object): def __init__(self, job_properties): for key in job_properties: setattr(self, key, job_properties[key]) job = Job(job_properties) if "params" in job_properties: job._format_params = Wildcards(fromdict=job_properties["params"]) else: job._format_params = None if "wildcards" in job_properties: job._format_wildcards = Wildcards(fromdict=job_properties["wildcards"]) else: job._format_wildcards = None _variables = dict() _variables.update( dict(params=job._format_params, wildcards=job._format_wildcards)) if hasattr(job, "rule"): _variables.update(dict(rule=job.rule)) try: return format(string, **_variables) except NameError as ex: raise WorkflowError("NameError with group job {}: {}".format( job.jobid, str(ex))) except IndexError as ex: raise WorkflowError("IndexError with group job {}: {}".format( job.jobid, str(ex)))
def __init__(self, rule, dag, wildcards_dict=None, format_wildcards=None): self.rule = rule self.dag = dag self.wildcards_dict = wildcards_dict self.wildcards = Wildcards(fromdict=self.wildcards_dict) self._format_wildcards = (self.wildcards if format_wildcards is None else Wildcards(fromdict=format_wildcards)) self.input, input_mapping, self.dependencies = self.rule.expand_input(self.wildcards_dict) self.output, output_mapping = self.rule.expand_output(self.wildcards_dict) # other properties are lazy to be able to use additional parameters and check already existing files self._params = None self._log = None self._benchmark = None self._resources = None self._conda_env_file = None self._conda_env = None self._group = None self.shadow_dir = None self._inputsize = None self._attempt = self.dag.workflow.attempt # TODO get rid of these self.dynamic_output, self.dynamic_input = set(), set() self.temp_output, self.protected_output = set(), set() self.touch_output = set() self.subworkflow_input = dict() for f in self.output: f_ = output_mapping[f] if f_ in self.rule.dynamic_output: self.dynamic_output.add(f) if f_ in self.rule.temp_output: self.temp_output.add(f) if f_ in self.rule.protected_output: self.protected_output.add(f) if f_ in self.rule.touch_output: self.touch_output.add(f) for f in self.input: f_ = input_mapping[f] if f_ in self.rule.dynamic_input: self.dynamic_input.add(f) if f_ in self.rule.subworkflow_input: self.subworkflow_input[f] = self.rule.subworkflow_input[f_] elif "subworkflow" in f.flags: sub = f.flags["subworkflow"] if f in self.subworkflow_input: other = self.subworkflow_input[f] if sub != other: raise WorkflowError("The input file {} is ambiguously " "associated with two subworkflows {} " "and {}.".format(f, sub, other), rule=self.rule) self.subworkflow_input[f] = sub self._hash = self.rule.__hash__() for wildcard_value in self.wildcards_dict.values(): self._hash ^= wildcard_value.__hash__()
def __init__(self, rule, dag, targetfile=None, format_wildcards=None): self.rule = rule self.dag = dag self.targetfile = targetfile self.wildcards_dict = self.rule.get_wildcards(targetfile) self.wildcards = Wildcards(fromdict=self.wildcards_dict) self._format_wildcards = (self.wildcards if format_wildcards is None else Wildcards(fromdict=format_wildcards)) (self.input, self.output, self.params, self.log, self.benchmark, self.ruleio, self.dependencies) = rule.expand_wildcards(self.wildcards_dict) self.resources_dict = {} for name, res in rule.resources.items(): if callable(res): res = res(self.wildcards) if not isinstance(res, int): raise ValueError("Callable for resources must return int") self.resources_dict[name] = min( self.rule.workflow.global_resources.get(name, res), res) self.threads = self.resources_dict["_cores"] self.resources = Resources(fromdict=self.resources_dict) self.shadow_dir = None self._inputsize = None self.dynamic_output, self.dynamic_input = set(), set() self.temp_output, self.protected_output = set(), set() self.touch_output = set() self.subworkflow_input = dict() for f in self.output: f_ = self.ruleio[f] if f_ in self.rule.dynamic_output: self.dynamic_output.add(f) if f_ in self.rule.temp_output: self.temp_output.add(f) if f_ in self.rule.protected_output: self.protected_output.add(f) if f_ in self.rule.touch_output: self.touch_output.add(f) for f in self.input: f_ = self.ruleio[f] if f_ in self.rule.dynamic_input: self.dynamic_input.add(f) if f_ in self.rule.subworkflow_input: self.subworkflow_input[f] = self.rule.subworkflow_input[f_] self._hash = self.rule.__hash__() if True or not self.dynamic_output: for o in self.output: self._hash ^= o.__hash__()
def __init__(self, rule, dag, targetfile=None, format_wildcards=None): self.rule = rule self.dag = dag self.targetfile = targetfile self.wildcards_dict = self.rule.get_wildcards(targetfile) self.wildcards = Wildcards(fromdict=self.wildcards_dict) self._format_wildcards = (self.wildcards if format_wildcards is None else Wildcards(fromdict=format_wildcards)) self.input, input_mapping, self.dependencies = self.rule.expand_input( self.wildcards_dict) self.output, output_mapping = self.rule.expand_output( self.wildcards_dict) # other properties are lazy to be able to use additional parameters and check already existing files self._params = None self._log = None self._benchmark = None self._resources = None self._conda_env_file = None self._conda_env = None self.shadow_dir = None self._inputsize = None self.restart_times = self.rule.restart_times self.dynamic_output, self.dynamic_input = set(), set() self.temp_output, self.protected_output = set(), set() self.touch_output = set() self.subworkflow_input = dict() for f in self.output: f_ = output_mapping[f] if f_ in self.rule.dynamic_output: self.dynamic_output.add(f) if f_ in self.rule.temp_output: self.temp_output.add(f) if f_ in self.rule.protected_output: self.protected_output.add(f) if f_ in self.rule.touch_output: self.touch_output.add(f) for f in self.input: f_ = input_mapping[f] if f_ in self.rule.dynamic_input: self.dynamic_input.add(f) if f_ in self.rule.subworkflow_input: self.subworkflow_input[f] = self.rule.subworkflow_input[f_] self._hash = self.rule.__hash__() if True or not self.dynamic_output: for o in self.output: self._hash ^= o.__hash__()
def apply_input_function(self, func, wildcards, incomplete_checkpoint_func=lambda e: None, raw_exceptions=False, **aux_params): incomplete = False if isinstance(func, _IOFile): func = func._file.callable elif isinstance(func, AnnotatedString): func = func.callable sig = inspect.signature(func) _aux_params = { k: v for k, v in aux_params.items() if k in sig.parameters } try: value = func(Wildcards(fromdict=wildcards), **_aux_params) except IncompleteCheckpointException as e: value = incomplete_checkpoint_func(e) incomplete = True except FileNotFoundError as e: # Function evaluation can depend on input files. Since expansion can happen during dryrun, # where input files are not yet present, we need to skip such cases and # mark them as <TBD>. if e.filename in aux_params["input"]: value = TBDString() else: raise e except (Exception, BaseException) as e: if raw_exceptions: raise e else: raise InputFunctionException(e, rule=self, wildcards=wildcards) return value, incomplete
def apply_input_function(self, func, wildcards, incomplete_checkpoint_func=lambda e: None, raw_exceptions=False, **aux_params): incomplete = False if isinstance(func, _IOFile): func = func._file.callable elif isinstance(func, AnnotatedString): func = func.callable sig = inspect.signature(func) _aux_params = { k: v for k, v in aux_params.items() if k in sig.parameters } try: value = func(Wildcards(fromdict=wildcards), **_aux_params) except IncompleteCheckpointException as e: value = incomplete_checkpoint_func(e) incomplete = True except (Exception, BaseException) as e: if raw_exceptions: raise e else: raise InputFunctionException(e, rule=self, wildcards=wildcards) return value, incomplete
def apply_input_function(self, func, wildcards, **aux_params): sig = inspect.signature(func) _aux_params = {k: v for k, v in aux_params.items() if k in sig.parameters} try: value = func(Wildcards(fromdict=wildcards), **_aux_params) except (Exception, BaseException) as e: raise InputFunctionException(e, rule=self, wildcards=wildcards) return value
def __init__(self, rule, dag, targetfile=None, format_wildcards=None): self.rule = rule self.dag = dag self.targetfile = targetfile self.wildcards_dict = self.rule.get_wildcards(targetfile) self.wildcards = Wildcards(fromdict=self.wildcards_dict) self._format_wildcards = (self.wildcards if format_wildcards is None else Wildcards(fromdict=format_wildcards)) (self.input, self.output, self.params, self.log, self.benchmark, self.ruleio, self.dependencies) = rule.expand_wildcards(self.wildcards_dict) self.resources_dict = { name: min(self.rule.workflow.global_resources.get(name, res), res) for name, res in rule.resources.items() } self.threads = self.resources_dict["_cores"] self.resources = Resources(fromdict=self.resources_dict) self._inputsize = None self.dynamic_output, self.dynamic_input = set(), set() self.temp_output, self.protected_output = set(), set() self.touch_output = set() self.subworkflow_input = dict() for f in self.output: f_ = self.ruleio[f] if f_ in self.rule.dynamic_output: self.dynamic_output.add(f) if f_ in self.rule.temp_output: self.temp_output.add(f) if f_ in self.rule.protected_output: self.protected_output.add(f) if f_ in self.rule.touch_output: self.touch_output.add(f) for f in self.input: f_ = self.ruleio[f] if f_ in self.rule.dynamic_input: self.dynamic_input.add(f) if f_ in self.rule.subworkflow_input: self.subworkflow_input[f] = self.rule.subworkflow_input[f_] self._hash = self.rule.__hash__() if True or not self.dynamic_output: for o in self.output: self._hash ^= o.__hash__()
def __init__(self, id, jobs, wildcards_dict): self.groupid = id self.jobs = jobs self.toposorted = None self._resources = None self._input = None self._output = None self._log = None self._inputsize = None self._all_products = None self._attempt = self.dag.workflow.attempt self.wildcards_dict = wildcards_dict self.wildcards = Wildcards(fromdict=self.wildcards_dict)
def __init__(self, input=None, output=None, params=None, wildcards=None, log="", config=None): self.input = InputFiles(self.make_namedlist(input)) self.output = OutputFiles(self.make_namedlist(output)) self.params = Params(self.make_namedlist(params)) self.wildcards = Wildcards(self.make_namedlist(wildcards)) self.log = Log(log) self.config = config or {} self.rulename = "mock"
def apply_input_function( self, func, wildcards, incomplete_checkpoint_func=lambda e: None, raw_exceptions=False, groupid=None, **aux_params, ): incomplete = False if isinstance(func, _IOFile): func = func._file.callable elif isinstance(func, AnnotatedString): func = func.callable if "groupid" in get_function_params(func): if groupid is not None: aux_params["groupid"] = groupid else: # Return empty list of files and incomplete marker # the job will be reevaluated once groupids have been determined return [], True _aux_params = get_input_function_aux_params(func, aux_params) try: value = func(Wildcards(fromdict=wildcards), **_aux_params) if isinstance(value, types.GeneratorType): # generators should be immediately collected here, # otherwise we would miss any exceptions and # would have to capture them again later. value = list(value) except IncompleteCheckpointException as e: value = incomplete_checkpoint_func(e) incomplete = True except FileNotFoundError as e: # Function evaluation can depend on input files. Since expansion can happen during dryrun, # where input files are not yet present, we need to skip such cases and # mark them as <TBD>. if "input" in aux_params and e.filename in aux_params["input"]: value = TBDString() else: raise e except (Exception, BaseException) as e: if raw_exceptions: raise e else: raise InputFunctionException(e, rule=self, wildcards=wildcards) return value, incomplete
def __init__(self, rule, dag, wildcards_dict=None, format_wildcards=None, targetfile=None): self.rule = rule self.dag = dag # the targetfile that led to the job # it is important to record this, since we need it to submit the # job on a cluster. In contrast, an arbitrary targetfile could # lead to a different composition of wildcard values (in case of # ambiguity in matching). self.targetfile = targetfile self.wildcards_dict = wildcards_dict self.wildcards = Wildcards(fromdict=self.wildcards_dict) self._format_wildcards = (self.wildcards if format_wildcards is None else Wildcards(fromdict=format_wildcards)) self.input, input_mapping, self.dependencies = self.rule.expand_input( self.wildcards_dict) self.output, output_mapping = self.rule.expand_output( self.wildcards_dict) # other properties are lazy to be able to use additional parameters and check already existing files self._params = None self._log = None self._benchmark = None self._resources = None self._conda_env_file = None self._conda_env = None self._group = None self.shadow_dir = None self._inputsize = None self.is_updated = False self._attempt = self.dag.workflow.attempt # TODO get rid of these self.dynamic_output, self.dynamic_input = set(), set() self.temp_output, self.protected_output = set(), set() self.touch_output = set() self.subworkflow_input = dict() for f in self.output: f_ = output_mapping[f] if f_ in self.rule.dynamic_output: self.dynamic_output.add(f) if f_ in self.rule.temp_output: self.temp_output.add(f) if f_ in self.rule.protected_output: self.protected_output.add(f) if f_ in self.rule.touch_output: self.touch_output.add(f) for f in self.input: f_ = input_mapping[f] if f_ in self.rule.dynamic_input: self.dynamic_input.add(f) if f_ in self.rule.subworkflow_input: self.subworkflow_input[f] = self.rule.subworkflow_input[f_] elif "subworkflow" in f.flags: sub = f.flags["subworkflow"] if f in self.subworkflow_input: other = self.subworkflow_input[f] if sub != other: raise WorkflowError( "The input file {} is ambiguously " "associated with two subworkflows {} " "and {}.".format(f, sub, other), rule=self.rule, ) self.subworkflow_input[f] = sub self._hash = self.rule.__hash__() for wildcard_value in self.wildcards_dict.values(): self._hash ^= wildcard_value.__hash__()
def cluster_wildcards(self, job): cluster = self.cluster_config.get("__default__", dict()).copy() cluster.update(self.cluster_config.get(job.rule.name, dict())) return Wildcards(fromdict=cluster)
def expand_wildcards(self, wildcards=None): """ Expand wildcards depending on the requested output or given wildcards dict. """ def concretize_iofile(f, wildcards): if not isinstance(f, _IOFile): return IOFile(f, rule=self) else: return f.apply_wildcards(wildcards, fill_missing=f in self.dynamic_input, fail_dynamic=self.dynamic_output) def concretize_param(p, wildcards): if isinstance(p, str): return apply_wildcards(p, wildcards) return p def check_string_type(f): if not isinstance(f, str): raise RuleException( "Input function did not return str or list of str.", rule=self) def _apply_wildcards(newitems, olditems, wildcards, wildcards_obj, concretize=apply_wildcards, check_return_type=check_string_type, ruleio=None, no_flattening=False): for name, item in olditems.allitems(): start = len(newitems) is_iterable = True if callable(item): try: item = item(wildcards_obj) except (Exception, BaseException) as e: raise InputFunctionException(e, rule=self, wildcards=wildcards) if not_iterable(item) or no_flattening: item = [item] is_iterable = False for item_ in item: check_return_type(item_) concrete = concretize(item_, wildcards) newitems.append(concrete) if ruleio is not None: ruleio[concrete] = item_ if name: newitems.set_name( name, start, end=len(newitems) if is_iterable else None) if wildcards is None: wildcards = dict() missing_wildcards = self.wildcard_names - set(wildcards.keys()) if missing_wildcards: raise RuleException( "Could not resolve wildcards in rule {}:\n{}".format( self.name, "\n".join(self.wildcard_names)), lineno=self.lineno, snakefile=self.snakefile) ruleio = dict() try: input = InputFiles() wildcards_obj = Wildcards(fromdict=wildcards) _apply_wildcards(input, self.input, wildcards, wildcards_obj, concretize=concretize_iofile, ruleio=ruleio) params = Params() #When applying wildcards to params, the return type need not be #a string, so the check is disabled. _apply_wildcards(params, self.params, wildcards, wildcards_obj, concretize=concretize_param, check_return_type=lambda x: None, no_flattening=True) output = OutputFiles( o.apply_wildcards(wildcards) for o in self.output) output.take_names(self.output.get_names()) dependencies = { None if f is None else f.apply_wildcards(wildcards): rule for f, rule in self.dependencies.items() } ruleio.update(dict((f, f_) for f, f_ in zip(output, self.output))) log = Log() _apply_wildcards(log, self.log, wildcards, wildcards_obj, concretize=concretize_iofile) benchmark = self.benchmark.apply_wildcards( wildcards) if self.benchmark else None return input, output, params, log, benchmark, ruleio, dependencies except WildcardError as ex: # this can only happen if an input contains an unresolved wildcard. raise RuleException( "Wildcards in input, params, log or benchmark file of rule {} cannot be " "determined from output files:\n{}".format(self, str(ex)), lineno=self.lineno, snakefile=self.snakefile)
def auto_report(dag, path): try: from jinja2 import Template, Environment, PackageLoader except ImportError as e: raise WorkflowError( "Python package jinja2 must be installed to create reports.") if not path.endswith(".html"): raise WorkflowError("Report file does not end with .html") logger.info("Creating report...") env = Environment( loader=PackageLoader("snakemake", "report"), trim_blocks=True, lstrip_blocks=True, ) env.filters["get_resource_as_string"] = get_resource_as_string persistence = dag.workflow.persistence results = defaultdict(list) records = defaultdict(JobRecord) recorded_files = set() for job in dag.jobs: for f in itertools.chain(job.expanded_output, job.input): if is_flagged(f, "report") and f not in recorded_files: if not f.exists: raise WorkflowError("File {} marked for report but does " "not exist.".format(f)) report_obj = get_flag_value(f, "report") category = Category(report_obj.category) def register_file(f, wildcards_overwrite=None): results[category].append( FileRecord( f, job, report_obj.caption, env, category, wildcards_overwrite=wildcards_overwrite, )) recorded_files.add(f) if os.path.isfile(f): register_file(f) if os.path.isdir(f): if not isinstance(report_obj.patterns, list): raise WorkflowError( "Invalid patterns given for report. Must be list.", rule=job.rule, ) if not report_obj.patterns: raise WorkflowError( "Directory marked for report but no file patterns given via patterns=[...]. " "See report documentation.", rule=job.rule, ) for pattern in report_obj.patterns: pattern = os.path.join(f, pattern) wildcards = glob_wildcards(pattern)._asdict() names = wildcards.keys() for w in zip(*wildcards.values()): w = dict(zip(names, w)) w.update(job.wildcards_dict) w = Wildcards(fromdict=w) f = apply_wildcards(pattern, w) register_file(f, wildcards_overwrite=w) for f in job.expanded_output: meta = persistence.metadata(f) if not meta: logger.warning("Missing metadata for file {}. Maybe metadata " "was deleted or it was created using an older " "version of Snakemake. This is a non critical " "warning.".format(f)) continue try: job_hash = meta["job_hash"] rule = meta["rule"] rec = records[(job_hash, rule)] rec.rule = rule rec.job = job rec.starttime = min(rec.starttime, meta["starttime"]) rec.endtime = max(rec.endtime, meta["endtime"]) rec.conda_env_file = None rec.conda_env = meta["conda_env"] rec.container_img_url = meta["container_img_url"] rec.output.append(f) except KeyError as e: print(e) logger.warning("Metadata for file {} was created with a too " "old Snakemake version.".format(f)) for catresults in results.values(): catresults.sort(key=lambda res: res.name) # prepare runtimes runtimes = [{ "rule": rec.rule, "runtime": rec.endtime - rec.starttime } for rec in sorted(records.values(), key=lambda rec: rec.rule)] # prepare end times timeline = [{ "rule": rec.rule, "starttime": datetime.datetime.fromtimestamp(rec.starttime).isoformat(), "endtime": datetime.datetime.fromtimestamp(rec.endtime).isoformat(), } for rec in sorted(records.values(), key=lambda rec: rec.rule)] # prepare per-rule information rules = defaultdict(list) for rec in records.values(): rule = RuleRecord(rec.job, rec) if rec.rule not in rules: rules[rec.rule].append(rule) else: merged = False for other in rules[rec.rule]: if rule == other: other.add(rec) merged = True break if not merged: rules[rec.rule].append(rule) # rulegraph rulegraph, xmax, ymax = rulegraph_d3_spec(dag) # configfiles configfiles = [ConfigfileRecord(f) for f in dag.workflow.configfiles] seen = set() files = [ seen.add(res.target) or res for cat in results.values() for res in cat if res.target not in seen ] rst_links = textwrap.dedent(""" .. _Results: #results .. _Rules: #rules .. _Statistics: #stats {% for cat, catresults in categories|dictsort %} .. _{{ cat.name }}: #{{ cat.id }} {% for res in files %} .. _{{ res.target }}: #{{ res.id }} {% endfor %} {% endfor %} .. _ """) for cat, catresults in results.items(): for res in catresults: res.render(env, rst_links, results, files) # global description text = "" if dag.workflow.report_text: with open(dag.workflow.report_text) as f: class Snakemake: config = dag.workflow.config text = f.read() + rst_links text = publish_parts( env.from_string(text).render(snakemake=Snakemake, categories=results, files=files), writer_name="html", )["body"] # record time now = "{} {}".format(datetime.datetime.now().ctime(), time.tzname[0]) results_size = sum(res.size for cat in results.values() for res in cat) try: from pygments.formatters import HtmlFormatter except ImportError: raise WorkflowError( "Python package pygments must be installed to create reports.") # render HTML template = env.get_template("report.html") with open(path, "w", encoding="utf-8") as out: out.write( template.render( results=results, results_size=results_size, configfiles=configfiles, text=text, rulegraph_nodes=rulegraph["nodes"], rulegraph_links=rulegraph["links"], rulegraph_width=xmax + 20, rulegraph_height=ymax + 20, runtimes=runtimes, timeline=timeline, rules=[rec for recs in rules.values() for rec in recs], version=__version__, now=now, pygments_css=HtmlFormatter( style="trac").get_style_defs(".source"), )) logger.info("Report created.")
def cluster_wildcards(self, job): return Wildcards(fromdict=self.cluster_params(job))
def auto_report(dag, path, stylesheet=None): try: from jinja2 import Template, Environment, PackageLoader except ImportError as e: raise WorkflowError( "Python package jinja2 must be installed to create reports." ) mode_embedded = True if path.endswith(".zip"): mode_embedded = False elif not path.endswith(".html"): raise WorkflowError("Report file does not end with .html or .zip") custom_stylesheet = None if stylesheet is not None: try: with open(stylesheet) as s: custom_stylesheet = s.read() except (Exception, BaseException) as e: raise WorkflowError("Unable to read custom report stylesheet.", e) logger.info("Creating report...") env = Environment( loader=PackageLoader("snakemake", "report"), trim_blocks=True, lstrip_blocks=True, ) env.filters["get_resource_as_string"] = get_resource_as_string persistence = dag.workflow.persistence results = defaultdict(lambda: defaultdict(list)) records = defaultdict(JobRecord) recorded_files = set() for job in dag.jobs: for f in itertools.chain(job.expanded_output, job.input): if is_flagged(f, "report") and f not in recorded_files: if not f.exists: raise WorkflowError( "File {} marked for report but does " "not exist.".format(f) ) report_obj = get_flag_value(f, "report") def register_file( f, wildcards_overwrite=None, aux_files=None, name_overwrite=None ): wildcards = wildcards_overwrite or job.wildcards category = Category( report_obj.category, wildcards=wildcards, job=job ) subcategory = Category( report_obj.subcategory, wildcards=wildcards, job=job ) results[category][subcategory].append( FileRecord( f, job, report_obj.caption, env, category, dag.workflow, wildcards_overwrite=wildcards_overwrite, mode_embedded=mode_embedded, aux_files=aux_files, name_overwrite=name_overwrite, ) ) recorded_files.add(f) if os.path.isfile(f): register_file(f) elif os.path.isdir(f): if report_obj.htmlindex: if mode_embedded: raise WorkflowError( "Directory marked for report specifies htmlindex. " "This is unsupported when requesting a pure HTML report. " "Please use store as zip instead (--report report.zip)." ) aux_files = [] index_found = False for root, dirs, files in os.walk(f): for name in files: if name != ".snakemake_timestamp": filepath = os.path.join(root, name) if ( os.path.relpath(filepath, f) != report_obj.htmlindex ): aux_files.append(filepath) else: index_found = True if not index_found: raise WorkflowError( "Given htmlindex {} not found in directory " "marked for report".format(report_obj.htmlindex) ) register_file( os.path.join(f, report_obj.htmlindex), aux_files=aux_files, name_overwrite="{}.html".format(os.path.basename(f)), ) elif report_obj.patterns: if not isinstance(report_obj.patterns, list): raise WorkflowError( "Invalid patterns given for report. Must be list.", rule=job.rule, ) for pattern in report_obj.patterns: pattern = os.path.join(f, pattern) wildcards = glob_wildcards(pattern)._asdict() names = wildcards.keys() for w in zip(*wildcards.values()): w = dict(zip(names, w)) w.update(job.wildcards_dict) w = Wildcards(fromdict=w) f = apply_wildcards(pattern, w) register_file(f, wildcards_overwrite=w) else: raise WorkflowError( "Directory marked for report but neither file patterns " "given via patterns=[...], nor htmlindex given. " "See report documentation.", rule=job.rule, ) for f in job.expanded_output: meta = persistence.metadata(f) if not meta: logger.warning( "Missing metadata for file {}. Maybe metadata " "was deleted or it was created using an older " "version of Snakemake. This is a non critical " "warning.".format(f) ) continue def get_time(rectime, metatime, sel_func): if metatime is None: return rectime return sel_func(metatime, rectime) try: job_hash = meta["job_hash"] rule = meta["rule"] rec = records[(job_hash, rule)] rec.rule = rule rec.job = job rec.starttime = get_time(rec.starttime, meta["starttime"], min) rec.endtime = get_time(rec.endtime, meta["endtime"], max) rec.conda_env_file = None rec.conda_env = meta["conda_env"] rec.container_img_url = meta["container_img_url"] rec.output.append(f) except KeyError as e: print(e) logger.warning( "Metadata for file {} was created with a too " "old Snakemake version.".format(f) ) for subcats in results.values(): for catresults in subcats.values(): catresults.sort(key=lambda res: res.name) # prepare runtimes runtimes = [ {"rule": rec.rule, "runtime": rec.endtime - rec.starttime} for rec in sorted(records.values(), key=lambda rec: rec.rule) ] def get_datetime(rectime): try: return datetime.datetime.fromtimestamp(rectime).isoformat() except OSError: return None # prepare end times timeline = [ { "rule": rec.rule, "starttime": get_datetime(rec.starttime), "endtime": get_datetime(rec.endtime), } for rec in sorted(records.values(), key=lambda rec: rec.rule) ] # prepare per-rule information rules = defaultdict(list) for rec in records.values(): rule = RuleRecord(rec.job, rec) if rec.rule not in rules: rules[rec.rule].append(rule) else: merged = False for other in rules[rec.rule]: if rule == other: other.add(rec) merged = True break if not merged: rules[rec.rule].append(rule) # rulegraph rulegraph, xmax, ymax = rulegraph_d3_spec(dag) # configfiles configfiles = [ConfigfileRecord(f) for f in dag.workflow.configfiles] seen = set() files = [ seen.add(res.target) or res for cat in results.values() for subcat in cat.values() for res in subcat if res.target not in seen ] rst_links = textwrap.dedent( """ .. _Workflow: javascript:show_panel('workflow') .. _Statistics: javascript:show_panel('statistics') {% for cat, catresults in categories|dictsort %} .. _{{ cat.name }}: javascript:show_panel("{{ cat.id }}") {% endfor %} {% for res in files %} .. _{{ res.target }}: javascript:show_panel("{{ res.category.id }}") {% endfor %} """ ) for cat, subcats in results.items(): for subcat, catresults in subcats.items(): for res in catresults: res.render(env, rst_links, results, files) # global description text = "" if dag.workflow.report_text: with dag.workflow.sourcecache.open(dag.workflow.report_text) as f: class Snakemake: config = dag.workflow.config text = f.read() + rst_links text = publish_parts( env.from_string(text).render( snakemake=Snakemake, categories=results, files=files ), writer_name="html", )["body"] # record time now = "{} {}".format(datetime.datetime.now().ctime(), time.tzname[0]) results_size = sum( res.size for cat in results.values() for subcat in cat.values() for res in subcat ) try: from pygments.formatters import HtmlFormatter except ImportError: raise WorkflowError( "Python package pygments must be installed to create reports." ) template = env.get_template("report.html.jinja2") logger.info("Downloading resources and rendering HTML.") rendered = template.render( results=results, results_size=results_size, configfiles=configfiles, text=text, rulegraph_nodes=rulegraph["nodes"], rulegraph_links=rulegraph["links"], rulegraph_width=xmax + 20, rulegraph_height=ymax + 20, runtimes=runtimes, timeline=timeline, rules=[rec for recs in rules.values() for rec in recs], version=__version__, now=now, pygments_css=HtmlFormatter(style="trac").get_style_defs(".source"), custom_stylesheet=custom_stylesheet, mode_embedded=mode_embedded, ) # TODO look into supporting .WARC format, also see (https://webrecorder.io) if not mode_embedded: with ZipFile(path, compression=ZIP_DEFLATED, mode="w") as zipout: folder = Path(Path(path).stem) # store results in data folder for subcats in results.values(): for catresults in subcats.values(): for result in catresults: # write raw data zipout.write(result.path, str(folder.joinpath(result.data_uri))) # write thumbnail if result.is_img and result.png_content: zipout.writestr( str(folder.joinpath(result.png_uri)), result.png_content ) # write aux files parent = folder.joinpath(result.data_uri).parent for aux_path in result.aux_files: # print(aux_path, parent, str(parent.joinpath(os.path.relpath(aux_path, os.path.dirname(result.path))))) zipout.write( aux_path, str( parent.joinpath( os.path.relpath( aux_path, os.path.dirname(result.path) ) ) ), ) # write report html zipout.writestr(str(folder.joinpath("report.html")), rendered) else: with open(path, "w", encoding="utf-8") as htmlout: htmlout.write(rendered) logger.info("Report created: {}.".format(path))
def expand_wildcards(self, wildcards=None): """ Expand wildcards depending on the requested output or given wildcards dict. """ def concretize_iofile(f, wildcards): if not isinstance(f, _IOFile): return IOFile(f, rule=self) else: return f.apply_wildcards(wildcards, fill_missing=f in self.dynamic_input, fail_dynamic=self.dynamic_output) def _apply_wildcards(newitems, olditems, wildcards, wildcards_obj, concretize=apply_wildcards, ruleio=None): for name, item in olditems.allitems(): start = len(newitems) if callable(item): item = item(wildcards_obj) if not_iterable(item): item = [item] for item_ in item: if not isinstance(item_, str): raise RuleException( "Input function did not return str or list of str.", rule=self) concrete = concretize(item_, wildcards) newitems.append(concrete) if ruleio is not None: ruleio[concrete] = item_ else: if not_iterable(item): item = [item] for item_ in item: concrete = concretize(item_, wildcards) newitems.append(concrete) if ruleio is not None: ruleio[concrete] = item_ if name: newitems.set_name(name, start, end=len(newitems)) if wildcards is None: wildcards = dict() # TODO validate missing_wildcards = self.wildcard_names - set(wildcards.keys()) if missing_wildcards: raise RuleException( "Could not resolve wildcards in rule {}:\n{}".format( self.name, "\n".join(self.wildcard_names)), lineno=self.lineno, snakefile=self.snakefile) ruleio = dict() try: input = InputFiles() wildcards_obj = Wildcards(fromdict=wildcards) _apply_wildcards(input, self.input, wildcards, wildcards_obj, concretize=concretize_iofile, ruleio=ruleio) params = Params() _apply_wildcards(params, self.params, wildcards, wildcards_obj) output = OutputFiles( o.apply_wildcards(wildcards) for o in self.output) output.take_names(self.output.get_names()) ruleio.update(dict((f, f_) for f, f_ in zip(output, self.output))) log = self.log.apply_wildcards(wildcards) if self.log else None return input, output, params, log, ruleio except WildcardError as ex: # this can only happen if an input contains an unresolved wildcard. raise RuleException( "Wildcards in input or log file of rule {} cannot be " "determined from output files:\n{}".format(self, str(ex)), lineno=self.lineno, snakefile=self.snakefile)