def concretize_param(p, wildcards, is_from_callable): if not is_from_callable: if isinstance(p, str): return apply_wildcards(p, wildcards) if isinstance(p, list): return [(apply_wildcards(v, wildcards) if isinstance( v, str) else v) for v in p] return p
def get_outputfiles(self, job: Job): if job.rule.output[0].is_multiext: prefix_len = len( apply_wildcards(job.rule.output[0].multiext_prefix, job.wildcards)) yield from ((f, f[prefix_len:]) for f in job.output) else: yield (job.output[0], "")
def expand_group(self, wildcards): """Expand the group given wildcards.""" if callable(self.group): return self.apply_input_function(self.group, wildcards) elif isinstance(self.group, str): return apply_wildcards(self.group, wildcards, dynamic_fill=DYNAMIC_FILL) else: return self.group
def get_outputfiles(self, job: Job): if job.rule.output[0].is_multiext: prefix_len = len( apply_wildcards(job.rule.output[0].multiext_prefix, job.wildcards)) yield from ((f, f[prefix_len:]) for f in job.output) else: assert ( len(job.output) == 1 ), "bug: multiple output files in cacheable job but multiext not used for declaring them" yield (job.output[0], "")
def __init__(self, name, wildcards, job): if name is None: name = "Other" self.is_other = True else: self.is_other = False try: name = apply_wildcards(name, wildcards) except AttributeError as e: raise WorkflowError("Failed to resolve wildcards.", e, rule=job.rule) self.name = name h = hashlib.sha256() h.update(name.encode()) self.id = h.hexdigest()
def __init__(self, name, wildcards, job): if name is None: name = "Other" self.is_other = True else: self.is_other = False try: name = apply_wildcards(name, wildcards) except AttributeError as e: raise WorkflowError("Failed to resolve wildcards.", e, rule=job.rule) self.name = name self.id = "results-{name}".format(name=urllib.parse.quote(name))
def expand_report_argument(item, wildcards, job): if is_callable(item): aux_params = get_input_function_aux_params(item, {"params": job.params}) try: item = item(wildcards, **aux_params) except Exception as e: raise InputFunctionException(e, rule=job.rule, wildcards=wildcards) if isinstance(item, str): try: return apply_wildcards(item, wildcards) except AttributeError as e: raise WorkflowError("Failed to resolve wildcards.", e, rule=job.rule) else: return item
def auto_report(dag, path, stylesheet=None): try: from jinja2 import Template, Environment, PackageLoader except ImportError as e: raise WorkflowError( "Python package jinja2 must be installed to create reports." ) mode_embedded = True if path.endswith(".zip"): mode_embedded = False elif not path.endswith(".html"): raise WorkflowError("Report file does not end with .html or .zip") custom_stylesheet = None if stylesheet is not None: try: with open(stylesheet) as s: custom_stylesheet = s.read() except (Exception, BaseException) as e: raise WorkflowError("Unable to read custom report stylesheet.", e) logger.info("Creating report...") env = Environment( loader=PackageLoader("snakemake", "report"), trim_blocks=True, lstrip_blocks=True, ) env.filters["get_resource_as_string"] = get_resource_as_string persistence = dag.workflow.persistence results = defaultdict(lambda: defaultdict(list)) records = defaultdict(JobRecord) recorded_files = set() for job in dag.jobs: for f in itertools.chain(job.expanded_output, job.input): if is_flagged(f, "report") and f not in recorded_files: if not f.exists: raise WorkflowError( "File {} marked for report but does " "not exist.".format(f) ) report_obj = get_flag_value(f, "report") def register_file( f, wildcards_overwrite=None, aux_files=None, name_overwrite=None ): wildcards = wildcards_overwrite or job.wildcards category = Category( report_obj.category, wildcards=wildcards, job=job ) subcategory = Category( report_obj.subcategory, wildcards=wildcards, job=job ) results[category][subcategory].append( FileRecord( f, job, report_obj.caption, env, category, dag.workflow, wildcards_overwrite=wildcards_overwrite, mode_embedded=mode_embedded, aux_files=aux_files, name_overwrite=name_overwrite, ) ) recorded_files.add(f) if os.path.isfile(f): register_file(f) elif os.path.isdir(f): if report_obj.htmlindex: if mode_embedded: raise WorkflowError( "Directory marked for report specifies htmlindex. " "This is unsupported when requesting a pure HTML report. " "Please use store as zip instead (--report report.zip)." ) aux_files = [] index_found = False for root, dirs, files in os.walk(f): for name in files: if name != ".snakemake_timestamp": filepath = os.path.join(root, name) if ( os.path.relpath(filepath, f) != report_obj.htmlindex ): aux_files.append(filepath) else: index_found = True if not index_found: raise WorkflowError( "Given htmlindex {} not found in directory " "marked for report".format(report_obj.htmlindex) ) register_file( os.path.join(f, report_obj.htmlindex), aux_files=aux_files, name_overwrite="{}.html".format(os.path.basename(f)), ) elif report_obj.patterns: if not isinstance(report_obj.patterns, list): raise WorkflowError( "Invalid patterns given for report. Must be list.", rule=job.rule, ) for pattern in report_obj.patterns: pattern = os.path.join(f, pattern) wildcards = glob_wildcards(pattern)._asdict() names = wildcards.keys() for w in zip(*wildcards.values()): w = dict(zip(names, w)) w.update(job.wildcards_dict) w = Wildcards(fromdict=w) f = apply_wildcards(pattern, w) register_file(f, wildcards_overwrite=w) else: raise WorkflowError( "Directory marked for report but neither file patterns " "given via patterns=[...], nor htmlindex given. " "See report documentation.", rule=job.rule, ) for f in job.expanded_output: meta = persistence.metadata(f) if not meta: logger.warning( "Missing metadata for file {}. Maybe metadata " "was deleted or it was created using an older " "version of Snakemake. This is a non critical " "warning.".format(f) ) continue def get_time(rectime, metatime, sel_func): if metatime is None: return rectime return sel_func(metatime, rectime) try: job_hash = meta["job_hash"] rule = meta["rule"] rec = records[(job_hash, rule)] rec.rule = rule rec.job = job rec.starttime = get_time(rec.starttime, meta["starttime"], min) rec.endtime = get_time(rec.endtime, meta["endtime"], max) rec.conda_env_file = None rec.conda_env = meta["conda_env"] rec.container_img_url = meta["container_img_url"] rec.output.append(f) except KeyError as e: print(e) logger.warning( "Metadata for file {} was created with a too " "old Snakemake version.".format(f) ) for subcats in results.values(): for catresults in subcats.values(): catresults.sort(key=lambda res: res.name) # prepare runtimes runtimes = [ {"rule": rec.rule, "runtime": rec.endtime - rec.starttime} for rec in sorted(records.values(), key=lambda rec: rec.rule) ] def get_datetime(rectime): try: return datetime.datetime.fromtimestamp(rectime).isoformat() except OSError: return None # prepare end times timeline = [ { "rule": rec.rule, "starttime": get_datetime(rec.starttime), "endtime": get_datetime(rec.endtime), } for rec in sorted(records.values(), key=lambda rec: rec.rule) ] # prepare per-rule information rules = defaultdict(list) for rec in records.values(): rule = RuleRecord(rec.job, rec) if rec.rule not in rules: rules[rec.rule].append(rule) else: merged = False for other in rules[rec.rule]: if rule == other: other.add(rec) merged = True break if not merged: rules[rec.rule].append(rule) # rulegraph rulegraph, xmax, ymax = rulegraph_d3_spec(dag) # configfiles configfiles = [ConfigfileRecord(f) for f in dag.workflow.configfiles] seen = set() files = [ seen.add(res.target) or res for cat in results.values() for subcat in cat.values() for res in subcat if res.target not in seen ] rst_links = textwrap.dedent( """ .. _Workflow: javascript:show_panel('workflow') .. _Statistics: javascript:show_panel('statistics') {% for cat, catresults in categories|dictsort %} .. _{{ cat.name }}: javascript:show_panel("{{ cat.id }}") {% endfor %} {% for res in files %} .. _{{ res.target }}: javascript:show_panel("{{ res.category.id }}") {% endfor %} """ ) for cat, subcats in results.items(): for subcat, catresults in subcats.items(): for res in catresults: res.render(env, rst_links, results, files) # global description text = "" if dag.workflow.report_text: with dag.workflow.sourcecache.open(dag.workflow.report_text) as f: class Snakemake: config = dag.workflow.config text = f.read() + rst_links text = publish_parts( env.from_string(text).render( snakemake=Snakemake, categories=results, files=files ), writer_name="html", )["body"] # record time now = "{} {}".format(datetime.datetime.now().ctime(), time.tzname[0]) results_size = sum( res.size for cat in results.values() for subcat in cat.values() for res in subcat ) try: from pygments.formatters import HtmlFormatter except ImportError: raise WorkflowError( "Python package pygments must be installed to create reports." ) template = env.get_template("report.html.jinja2") logger.info("Downloading resources and rendering HTML.") rendered = template.render( results=results, results_size=results_size, configfiles=configfiles, text=text, rulegraph_nodes=rulegraph["nodes"], rulegraph_links=rulegraph["links"], rulegraph_width=xmax + 20, rulegraph_height=ymax + 20, runtimes=runtimes, timeline=timeline, rules=[rec for recs in rules.values() for rec in recs], version=__version__, now=now, pygments_css=HtmlFormatter(style="trac").get_style_defs(".source"), custom_stylesheet=custom_stylesheet, mode_embedded=mode_embedded, ) # TODO look into supporting .WARC format, also see (https://webrecorder.io) if not mode_embedded: with ZipFile(path, compression=ZIP_DEFLATED, mode="w") as zipout: folder = Path(Path(path).stem) # store results in data folder for subcats in results.values(): for catresults in subcats.values(): for result in catresults: # write raw data zipout.write(result.path, str(folder.joinpath(result.data_uri))) # write thumbnail if result.is_img and result.png_content: zipout.writestr( str(folder.joinpath(result.png_uri)), result.png_content ) # write aux files parent = folder.joinpath(result.data_uri).parent for aux_path in result.aux_files: # print(aux_path, parent, str(parent.joinpath(os.path.relpath(aux_path, os.path.dirname(result.path))))) zipout.write( aux_path, str( parent.joinpath( os.path.relpath( aux_path, os.path.dirname(result.path) ) ) ), ) # write report html zipout.writestr(str(folder.joinpath("report.html")), rendered) else: with open(path, "w", encoding="utf-8") as htmlout: htmlout.write(rendered) logger.info("Report created: {}.".format(path))
def format(self, item, *args, **kwargs): if 'wc' in kwargs: item = apply_wildcards(item, kwargs['wc']) return item
def apply_wildcards(self, wildcards, _): return CondaEnvNameSpec(apply_wildcards(self.name, wildcards))
def concretize_param(p, wildcards): if isinstance(p, str): return apply_wildcards(p, wildcards) return p
def auto_report(dag, path): try: from jinja2 import Template, Environment, PackageLoader except ImportError as e: raise WorkflowError( "Python package jinja2 must be installed to create reports.") if not path.endswith(".html"): raise WorkflowError("Report file does not end with .html") logger.info("Creating report...") env = Environment( loader=PackageLoader("snakemake", "report"), trim_blocks=True, lstrip_blocks=True, ) env.filters["get_resource_as_string"] = get_resource_as_string persistence = dag.workflow.persistence results = defaultdict(list) records = defaultdict(JobRecord) recorded_files = set() for job in dag.jobs: for f in itertools.chain(job.expanded_output, job.input): if is_flagged(f, "report") and f not in recorded_files: if not f.exists: raise WorkflowError("File {} marked for report but does " "not exist.".format(f)) report_obj = get_flag_value(f, "report") category = Category(report_obj.category) def register_file(f, wildcards_overwrite=None): results[category].append( FileRecord( f, job, report_obj.caption, env, category, wildcards_overwrite=wildcards_overwrite, )) recorded_files.add(f) if os.path.isfile(f): register_file(f) if os.path.isdir(f): if not isinstance(report_obj.patterns, list): raise WorkflowError( "Invalid patterns given for report. Must be list.", rule=job.rule, ) if not report_obj.patterns: raise WorkflowError( "Directory marked for report but no file patterns given via patterns=[...]. " "See report documentation.", rule=job.rule, ) for pattern in report_obj.patterns: pattern = os.path.join(f, pattern) wildcards = glob_wildcards(pattern)._asdict() names = wildcards.keys() for w in zip(*wildcards.values()): w = dict(zip(names, w)) w.update(job.wildcards_dict) w = Wildcards(fromdict=w) f = apply_wildcards(pattern, w) register_file(f, wildcards_overwrite=w) for f in job.expanded_output: meta = persistence.metadata(f) if not meta: logger.warning("Missing metadata for file {}. Maybe metadata " "was deleted or it was created using an older " "version of Snakemake. This is a non critical " "warning.".format(f)) continue try: job_hash = meta["job_hash"] rule = meta["rule"] rec = records[(job_hash, rule)] rec.rule = rule rec.job = job rec.starttime = min(rec.starttime, meta["starttime"]) rec.endtime = max(rec.endtime, meta["endtime"]) rec.conda_env_file = None rec.conda_env = meta["conda_env"] rec.container_img_url = meta["container_img_url"] rec.output.append(f) except KeyError as e: print(e) logger.warning("Metadata for file {} was created with a too " "old Snakemake version.".format(f)) for catresults in results.values(): catresults.sort(key=lambda res: res.name) # prepare runtimes runtimes = [{ "rule": rec.rule, "runtime": rec.endtime - rec.starttime } for rec in sorted(records.values(), key=lambda rec: rec.rule)] # prepare end times timeline = [{ "rule": rec.rule, "starttime": datetime.datetime.fromtimestamp(rec.starttime).isoformat(), "endtime": datetime.datetime.fromtimestamp(rec.endtime).isoformat(), } for rec in sorted(records.values(), key=lambda rec: rec.rule)] # prepare per-rule information rules = defaultdict(list) for rec in records.values(): rule = RuleRecord(rec.job, rec) if rec.rule not in rules: rules[rec.rule].append(rule) else: merged = False for other in rules[rec.rule]: if rule == other: other.add(rec) merged = True break if not merged: rules[rec.rule].append(rule) # rulegraph rulegraph, xmax, ymax = rulegraph_d3_spec(dag) # configfiles configfiles = [ConfigfileRecord(f) for f in dag.workflow.configfiles] seen = set() files = [ seen.add(res.target) or res for cat in results.values() for res in cat if res.target not in seen ] rst_links = textwrap.dedent(""" .. _Results: #results .. _Rules: #rules .. _Statistics: #stats {% for cat, catresults in categories|dictsort %} .. _{{ cat.name }}: #{{ cat.id }} {% for res in files %} .. _{{ res.target }}: #{{ res.id }} {% endfor %} {% endfor %} .. _ """) for cat, catresults in results.items(): for res in catresults: res.render(env, rst_links, results, files) # global description text = "" if dag.workflow.report_text: with open(dag.workflow.report_text) as f: class Snakemake: config = dag.workflow.config text = f.read() + rst_links text = publish_parts( env.from_string(text).render(snakemake=Snakemake, categories=results, files=files), writer_name="html", )["body"] # record time now = "{} {}".format(datetime.datetime.now().ctime(), time.tzname[0]) results_size = sum(res.size for cat in results.values() for res in cat) try: from pygments.formatters import HtmlFormatter except ImportError: raise WorkflowError( "Python package pygments must be installed to create reports.") # render HTML template = env.get_template("report.html") with open(path, "w", encoding="utf-8") as out: out.write( template.render( results=results, results_size=results_size, configfiles=configfiles, text=text, rulegraph_nodes=rulegraph["nodes"], rulegraph_links=rulegraph["links"], rulegraph_width=xmax + 20, rulegraph_height=ymax + 20, runtimes=runtimes, timeline=timeline, rules=[rec for recs in rules.values() for rec in recs], version=__version__, now=now, pygments_css=HtmlFormatter( style="trac").get_style_defs(".source"), )) logger.info("Report created.")
def concretize_param(p, wildcards, is_from_callable): if not is_from_callable and isinstance(p, str): return apply_wildcards(p, wildcards) return p