Ejemplo n.º 1
0
def format_wildcards(string, job_properties):
    """ Format a string with variables from the job. """
    class Job(object):
        def __init__(self, job_properties):
            for key in job_properties:
                setattr(self, key, job_properties[key])

    job = Job(job_properties)
    if "params" in job_properties:
        job._format_params = Wildcards(fromdict=job_properties["params"])
    else:
        job._format_params = None
    if "wildcards" in job_properties:
        job._format_wildcards = Wildcards(fromdict=job_properties["wildcards"])
    else:
        job._format_wildcards = None
    _variables = dict()
    _variables.update(
        dict(params=job._format_params, wildcards=job._format_wildcards))
    if hasattr(job, "rule"):
        _variables.update(dict(rule=job.rule))
    try:
        return format(string, **_variables)
    except NameError as ex:
        raise WorkflowError("NameError with group job {}: {}".format(
            job.jobid, str(ex)))
    except IndexError as ex:
        raise WorkflowError("IndexError with group job {}: {}".format(
            job.jobid, str(ex)))
Ejemplo n.º 2
0
    def __init__(self, rule, dag, wildcards_dict=None, format_wildcards=None):
        self.rule = rule
        self.dag = dag

        self.wildcards_dict = wildcards_dict
        self.wildcards = Wildcards(fromdict=self.wildcards_dict)
        self._format_wildcards = (self.wildcards if format_wildcards is None
                                  else Wildcards(fromdict=format_wildcards))

        self.input, input_mapping, self.dependencies = self.rule.expand_input(self.wildcards_dict)
        self.output, output_mapping = self.rule.expand_output(self.wildcards_dict)
        # other properties are lazy to be able to use additional parameters and check already existing files
        self._params = None
        self._log = None
        self._benchmark = None
        self._resources = None
        self._conda_env_file = None
        self._conda_env = None
        self._group = None

        self.shadow_dir = None
        self._inputsize = None

        self._attempt = self.dag.workflow.attempt

        # TODO get rid of these
        self.dynamic_output, self.dynamic_input = set(), set()
        self.temp_output, self.protected_output = set(), set()
        self.touch_output = set()
        self.subworkflow_input = dict()
        for f in self.output:
            f_ = output_mapping[f]
            if f_ in self.rule.dynamic_output:
                self.dynamic_output.add(f)
            if f_ in self.rule.temp_output:
                self.temp_output.add(f)
            if f_ in self.rule.protected_output:
                self.protected_output.add(f)
            if f_ in self.rule.touch_output:
                self.touch_output.add(f)
        for f in self.input:
            f_ = input_mapping[f]
            if f_ in self.rule.dynamic_input:
                self.dynamic_input.add(f)
            if f_ in self.rule.subworkflow_input:
                self.subworkflow_input[f] = self.rule.subworkflow_input[f_]
            elif "subworkflow" in f.flags:
                sub = f.flags["subworkflow"]
                if f in self.subworkflow_input:
                    other = self.subworkflow_input[f]
                    if sub != other:
                        raise WorkflowError("The input file {} is ambiguously "
                                            "associated with two subworkflows {} "
                                            "and {}.".format(f, sub, other),
                                            rule=self.rule)
                self.subworkflow_input[f] = sub
        self._hash = self.rule.__hash__()
        for wildcard_value in self.wildcards_dict.values():
            self._hash ^= wildcard_value.__hash__()
Ejemplo n.º 3
0
    def __init__(self, rule, dag, targetfile=None, format_wildcards=None):
        self.rule = rule
        self.dag = dag
        self.targetfile = targetfile

        self.wildcards_dict = self.rule.get_wildcards(targetfile)
        self.wildcards = Wildcards(fromdict=self.wildcards_dict)
        self._format_wildcards = (self.wildcards if format_wildcards is None
                                  else Wildcards(fromdict=format_wildcards))

        (self.input, self.output, self.params, self.log, self.benchmark,
         self.ruleio,
         self.dependencies) = rule.expand_wildcards(self.wildcards_dict)

        self.resources_dict = {}
        for name, res in rule.resources.items():
            if callable(res):
                res = res(self.wildcards)
                if not isinstance(res, int):
                    raise ValueError("Callable for resources must return int")
            self.resources_dict[name] = min(
                self.rule.workflow.global_resources.get(name, res), res)

        self.threads = self.resources_dict["_cores"]
        self.resources = Resources(fromdict=self.resources_dict)
        self.shadow_dir = None
        self._inputsize = None

        self.dynamic_output, self.dynamic_input = set(), set()
        self.temp_output, self.protected_output = set(), set()
        self.touch_output = set()
        self.subworkflow_input = dict()
        for f in self.output:
            f_ = self.ruleio[f]
            if f_ in self.rule.dynamic_output:
                self.dynamic_output.add(f)
            if f_ in self.rule.temp_output:
                self.temp_output.add(f)
            if f_ in self.rule.protected_output:
                self.protected_output.add(f)
            if f_ in self.rule.touch_output:
                self.touch_output.add(f)
        for f in self.input:
            f_ = self.ruleio[f]
            if f_ in self.rule.dynamic_input:
                self.dynamic_input.add(f)
            if f_ in self.rule.subworkflow_input:
                self.subworkflow_input[f] = self.rule.subworkflow_input[f_]
        self._hash = self.rule.__hash__()
        if True or not self.dynamic_output:
            for o in self.output:
                self._hash ^= o.__hash__()
Ejemplo n.º 4
0
    def __init__(self, rule, dag, targetfile=None, format_wildcards=None):
        self.rule = rule
        self.dag = dag
        self.targetfile = targetfile

        self.wildcards_dict = self.rule.get_wildcards(targetfile)
        self.wildcards = Wildcards(fromdict=self.wildcards_dict)
        self._format_wildcards = (self.wildcards if format_wildcards is None
                                  else Wildcards(fromdict=format_wildcards))

        self.input, input_mapping, self.dependencies = self.rule.expand_input(
            self.wildcards_dict)
        self.output, output_mapping = self.rule.expand_output(
            self.wildcards_dict)
        # other properties are lazy to be able to use additional parameters and check already existing files
        self._params = None
        self._log = None
        self._benchmark = None
        self._resources = None
        self._conda_env_file = None
        self._conda_env = None

        self.shadow_dir = None
        self._inputsize = None

        self.restart_times = self.rule.restart_times

        self.dynamic_output, self.dynamic_input = set(), set()
        self.temp_output, self.protected_output = set(), set()
        self.touch_output = set()
        self.subworkflow_input = dict()
        for f in self.output:
            f_ = output_mapping[f]
            if f_ in self.rule.dynamic_output:
                self.dynamic_output.add(f)
            if f_ in self.rule.temp_output:
                self.temp_output.add(f)
            if f_ in self.rule.protected_output:
                self.protected_output.add(f)
            if f_ in self.rule.touch_output:
                self.touch_output.add(f)
        for f in self.input:
            f_ = input_mapping[f]
            if f_ in self.rule.dynamic_input:
                self.dynamic_input.add(f)
            if f_ in self.rule.subworkflow_input:
                self.subworkflow_input[f] = self.rule.subworkflow_input[f_]
        self._hash = self.rule.__hash__()
        if True or not self.dynamic_output:
            for o in self.output:
                self._hash ^= o.__hash__()
Ejemplo n.º 5
0
 def apply_input_function(self,
                          func,
                          wildcards,
                          incomplete_checkpoint_func=lambda e: None,
                          raw_exceptions=False,
                          **aux_params):
     incomplete = False
     if isinstance(func, _IOFile):
         func = func._file.callable
     elif isinstance(func, AnnotatedString):
         func = func.callable
     sig = inspect.signature(func)
     _aux_params = {
         k: v
         for k, v in aux_params.items() if k in sig.parameters
     }
     try:
         value = func(Wildcards(fromdict=wildcards), **_aux_params)
     except IncompleteCheckpointException as e:
         value = incomplete_checkpoint_func(e)
         incomplete = True
     except FileNotFoundError as e:
         # Function evaluation can depend on input files. Since expansion can happen during dryrun,
         # where input files are not yet present, we need to skip such cases and
         # mark them as <TBD>.
         if e.filename in aux_params["input"]:
             value = TBDString()
         else:
             raise e
     except (Exception, BaseException) as e:
         if raw_exceptions:
             raise e
         else:
             raise InputFunctionException(e, rule=self, wildcards=wildcards)
     return value, incomplete
Ejemplo n.º 6
0
 def apply_input_function(self,
                          func,
                          wildcards,
                          incomplete_checkpoint_func=lambda e: None,
                          raw_exceptions=False,
                          **aux_params):
     incomplete = False
     if isinstance(func, _IOFile):
         func = func._file.callable
     elif isinstance(func, AnnotatedString):
         func = func.callable
     sig = inspect.signature(func)
     _aux_params = {
         k: v
         for k, v in aux_params.items() if k in sig.parameters
     }
     try:
         value = func(Wildcards(fromdict=wildcards), **_aux_params)
     except IncompleteCheckpointException as e:
         value = incomplete_checkpoint_func(e)
         incomplete = True
     except (Exception, BaseException) as e:
         if raw_exceptions:
             raise e
         else:
             raise InputFunctionException(e, rule=self, wildcards=wildcards)
     return value, incomplete
Ejemplo n.º 7
0
 def apply_input_function(self, func, wildcards, **aux_params):
     sig = inspect.signature(func)
     _aux_params = {k: v for k, v in aux_params.items() if k in sig.parameters}
     try:
         value = func(Wildcards(fromdict=wildcards), **_aux_params)
     except (Exception, BaseException) as e:
         raise InputFunctionException(e, rule=self, wildcards=wildcards)
     return value
Ejemplo n.º 8
0
    def __init__(self, rule, dag, targetfile=None, format_wildcards=None):
        self.rule = rule
        self.dag = dag
        self.targetfile = targetfile

        self.wildcards_dict = self.rule.get_wildcards(targetfile)
        self.wildcards = Wildcards(fromdict=self.wildcards_dict)
        self._format_wildcards = (self.wildcards if format_wildcards is None
                                  else Wildcards(fromdict=format_wildcards))

        (self.input, self.output, self.params, self.log, self.benchmark,
         self.ruleio,
         self.dependencies) = rule.expand_wildcards(self.wildcards_dict)

        self.resources_dict = {
            name: min(self.rule.workflow.global_resources.get(name, res), res)
            for name, res in rule.resources.items()
        }
        self.threads = self.resources_dict["_cores"]
        self.resources = Resources(fromdict=self.resources_dict)
        self._inputsize = None

        self.dynamic_output, self.dynamic_input = set(), set()
        self.temp_output, self.protected_output = set(), set()
        self.touch_output = set()
        self.subworkflow_input = dict()
        for f in self.output:
            f_ = self.ruleio[f]
            if f_ in self.rule.dynamic_output:
                self.dynamic_output.add(f)
            if f_ in self.rule.temp_output:
                self.temp_output.add(f)
            if f_ in self.rule.protected_output:
                self.protected_output.add(f)
            if f_ in self.rule.touch_output:
                self.touch_output.add(f)
        for f in self.input:
            f_ = self.ruleio[f]
            if f_ in self.rule.dynamic_input:
                self.dynamic_input.add(f)
            if f_ in self.rule.subworkflow_input:
                self.subworkflow_input[f] = self.rule.subworkflow_input[f_]
        self._hash = self.rule.__hash__()
        if True or not self.dynamic_output:
            for o in self.output:
                self._hash ^= o.__hash__()
Ejemplo n.º 9
0
    def __init__(self, id, jobs, wildcards_dict):
        self.groupid = id
        self.jobs = jobs
        self.toposorted = None
        self._resources = None
        self._input = None
        self._output = None
        self._log = None
        self._inputsize = None
        self._all_products = None
        self._attempt = self.dag.workflow.attempt

        self.wildcards_dict = wildcards_dict
        self.wildcards = Wildcards(fromdict=self.wildcards_dict)
Ejemplo n.º 10
0
 def __init__(self,
              input=None,
              output=None,
              params=None,
              wildcards=None,
              log="",
              config=None):
     self.input = InputFiles(self.make_namedlist(input))
     self.output = OutputFiles(self.make_namedlist(output))
     self.params = Params(self.make_namedlist(params))
     self.wildcards = Wildcards(self.make_namedlist(wildcards))
     self.log = Log(log)
     self.config = config or {}
     self.rulename = "mock"
Ejemplo n.º 11
0
    def apply_input_function(
        self,
        func,
        wildcards,
        incomplete_checkpoint_func=lambda e: None,
        raw_exceptions=False,
        groupid=None,
        **aux_params,
    ):
        incomplete = False
        if isinstance(func, _IOFile):
            func = func._file.callable
        elif isinstance(func, AnnotatedString):
            func = func.callable

        if "groupid" in get_function_params(func):
            if groupid is not None:
                aux_params["groupid"] = groupid
            else:
                # Return empty list of files and incomplete marker
                # the job will be reevaluated once groupids have been determined
                return [], True

        _aux_params = get_input_function_aux_params(func, aux_params)

        try:
            value = func(Wildcards(fromdict=wildcards), **_aux_params)
            if isinstance(value, types.GeneratorType):
                # generators should be immediately collected here,
                # otherwise we would miss any exceptions and
                # would have to capture them again later.
                value = list(value)
        except IncompleteCheckpointException as e:
            value = incomplete_checkpoint_func(e)
            incomplete = True
        except FileNotFoundError as e:
            # Function evaluation can depend on input files. Since expansion can happen during dryrun,
            # where input files are not yet present, we need to skip such cases and
            # mark them as <TBD>.
            if "input" in aux_params and e.filename in aux_params["input"]:
                value = TBDString()
            else:
                raise e
        except (Exception, BaseException) as e:
            if raw_exceptions:
                raise e
            else:
                raise InputFunctionException(e, rule=self, wildcards=wildcards)
        return value, incomplete
Ejemplo n.º 12
0
    def __init__(self,
                 rule,
                 dag,
                 wildcards_dict=None,
                 format_wildcards=None,
                 targetfile=None):
        self.rule = rule
        self.dag = dag

        # the targetfile that led to the job
        # it is important to record this, since we need it to submit the
        # job on a cluster. In contrast, an arbitrary targetfile could
        # lead to a different composition of wildcard values (in case of
        # ambiguity in matching).
        self.targetfile = targetfile
        self.wildcards_dict = wildcards_dict
        self.wildcards = Wildcards(fromdict=self.wildcards_dict)
        self._format_wildcards = (self.wildcards if format_wildcards is None
                                  else Wildcards(fromdict=format_wildcards))

        self.input, input_mapping, self.dependencies = self.rule.expand_input(
            self.wildcards_dict)

        self.output, output_mapping = self.rule.expand_output(
            self.wildcards_dict)
        # other properties are lazy to be able to use additional parameters and check already existing files
        self._params = None
        self._log = None
        self._benchmark = None
        self._resources = None
        self._conda_env_file = None
        self._conda_env = None
        self._group = None

        self.shadow_dir = None
        self._inputsize = None
        self.is_updated = False

        self._attempt = self.dag.workflow.attempt

        # TODO get rid of these
        self.dynamic_output, self.dynamic_input = set(), set()
        self.temp_output, self.protected_output = set(), set()
        self.touch_output = set()
        self.subworkflow_input = dict()
        for f in self.output:
            f_ = output_mapping[f]
            if f_ in self.rule.dynamic_output:
                self.dynamic_output.add(f)
            if f_ in self.rule.temp_output:
                self.temp_output.add(f)
            if f_ in self.rule.protected_output:
                self.protected_output.add(f)
            if f_ in self.rule.touch_output:
                self.touch_output.add(f)
        for f in self.input:
            f_ = input_mapping[f]
            if f_ in self.rule.dynamic_input:
                self.dynamic_input.add(f)
            if f_ in self.rule.subworkflow_input:
                self.subworkflow_input[f] = self.rule.subworkflow_input[f_]
            elif "subworkflow" in f.flags:
                sub = f.flags["subworkflow"]
                if f in self.subworkflow_input:
                    other = self.subworkflow_input[f]
                    if sub != other:
                        raise WorkflowError(
                            "The input file {} is ambiguously "
                            "associated with two subworkflows {} "
                            "and {}.".format(f, sub, other),
                            rule=self.rule,
                        )
                self.subworkflow_input[f] = sub
        self._hash = self.rule.__hash__()
        for wildcard_value in self.wildcards_dict.values():
            self._hash ^= wildcard_value.__hash__()
Ejemplo n.º 13
0
 def cluster_wildcards(self, job):
     cluster = self.cluster_config.get("__default__", dict()).copy()
     cluster.update(self.cluster_config.get(job.rule.name, dict()))
     return Wildcards(fromdict=cluster)
Ejemplo n.º 14
0
    def expand_wildcards(self, wildcards=None):
        """
        Expand wildcards depending on the requested output
        or given wildcards dict.
        """
        def concretize_iofile(f, wildcards):
            if not isinstance(f, _IOFile):
                return IOFile(f, rule=self)
            else:
                return f.apply_wildcards(wildcards,
                                         fill_missing=f in self.dynamic_input,
                                         fail_dynamic=self.dynamic_output)

        def concretize_param(p, wildcards):
            if isinstance(p, str):
                return apply_wildcards(p, wildcards)
            return p

        def check_string_type(f):
            if not isinstance(f, str):
                raise RuleException(
                    "Input function did not return str or list of str.",
                    rule=self)

        def _apply_wildcards(newitems,
                             olditems,
                             wildcards,
                             wildcards_obj,
                             concretize=apply_wildcards,
                             check_return_type=check_string_type,
                             ruleio=None,
                             no_flattening=False):
            for name, item in olditems.allitems():
                start = len(newitems)
                is_iterable = True

                if callable(item):
                    try:
                        item = item(wildcards_obj)
                    except (Exception, BaseException) as e:
                        raise InputFunctionException(e,
                                                     rule=self,
                                                     wildcards=wildcards)

                if not_iterable(item) or no_flattening:
                    item = [item]
                    is_iterable = False
                for item_ in item:
                    check_return_type(item_)
                    concrete = concretize(item_, wildcards)
                    newitems.append(concrete)
                    if ruleio is not None:
                        ruleio[concrete] = item_

                if name:
                    newitems.set_name(
                        name,
                        start,
                        end=len(newitems) if is_iterable else None)

        if wildcards is None:
            wildcards = dict()
        missing_wildcards = self.wildcard_names - set(wildcards.keys())

        if missing_wildcards:
            raise RuleException(
                "Could not resolve wildcards in rule {}:\n{}".format(
                    self.name, "\n".join(self.wildcard_names)),
                lineno=self.lineno,
                snakefile=self.snakefile)

        ruleio = dict()

        try:
            input = InputFiles()
            wildcards_obj = Wildcards(fromdict=wildcards)
            _apply_wildcards(input,
                             self.input,
                             wildcards,
                             wildcards_obj,
                             concretize=concretize_iofile,
                             ruleio=ruleio)

            params = Params()
            #When applying wildcards to params, the return type need not be
            #a string, so the check is disabled.
            _apply_wildcards(params,
                             self.params,
                             wildcards,
                             wildcards_obj,
                             concretize=concretize_param,
                             check_return_type=lambda x: None,
                             no_flattening=True)

            output = OutputFiles(
                o.apply_wildcards(wildcards) for o in self.output)
            output.take_names(self.output.get_names())

            dependencies = {
                None if f is None else f.apply_wildcards(wildcards): rule
                for f, rule in self.dependencies.items()
            }

            ruleio.update(dict((f, f_) for f, f_ in zip(output, self.output)))

            log = Log()
            _apply_wildcards(log,
                             self.log,
                             wildcards,
                             wildcards_obj,
                             concretize=concretize_iofile)

            benchmark = self.benchmark.apply_wildcards(
                wildcards) if self.benchmark else None
            return input, output, params, log, benchmark, ruleio, dependencies
        except WildcardError as ex:
            # this can only happen if an input contains an unresolved wildcard.
            raise RuleException(
                "Wildcards in input, params, log or benchmark file of rule {} cannot be "
                "determined from output files:\n{}".format(self, str(ex)),
                lineno=self.lineno,
                snakefile=self.snakefile)
Ejemplo n.º 15
0
def auto_report(dag, path):
    try:
        from jinja2 import Template, Environment, PackageLoader
    except ImportError as e:
        raise WorkflowError(
            "Python package jinja2 must be installed to create reports.")

    if not path.endswith(".html"):
        raise WorkflowError("Report file does not end with .html")

    logger.info("Creating report...")

    env = Environment(
        loader=PackageLoader("snakemake", "report"),
        trim_blocks=True,
        lstrip_blocks=True,
    )
    env.filters["get_resource_as_string"] = get_resource_as_string

    persistence = dag.workflow.persistence
    results = defaultdict(list)
    records = defaultdict(JobRecord)
    recorded_files = set()
    for job in dag.jobs:
        for f in itertools.chain(job.expanded_output, job.input):
            if is_flagged(f, "report") and f not in recorded_files:
                if not f.exists:
                    raise WorkflowError("File {} marked for report but does "
                                        "not exist.".format(f))
                report_obj = get_flag_value(f, "report")
                category = Category(report_obj.category)

                def register_file(f, wildcards_overwrite=None):
                    results[category].append(
                        FileRecord(
                            f,
                            job,
                            report_obj.caption,
                            env,
                            category,
                            wildcards_overwrite=wildcards_overwrite,
                        ))
                    recorded_files.add(f)

                if os.path.isfile(f):
                    register_file(f)
                if os.path.isdir(f):
                    if not isinstance(report_obj.patterns, list):
                        raise WorkflowError(
                            "Invalid patterns given for report. Must be list.",
                            rule=job.rule,
                        )
                    if not report_obj.patterns:
                        raise WorkflowError(
                            "Directory marked for report but no file patterns given via patterns=[...]. "
                            "See report documentation.",
                            rule=job.rule,
                        )
                    for pattern in report_obj.patterns:
                        pattern = os.path.join(f, pattern)
                        wildcards = glob_wildcards(pattern)._asdict()
                        names = wildcards.keys()
                        for w in zip(*wildcards.values()):
                            w = dict(zip(names, w))
                            w.update(job.wildcards_dict)
                            w = Wildcards(fromdict=w)
                            f = apply_wildcards(pattern, w)
                            register_file(f, wildcards_overwrite=w)

        for f in job.expanded_output:
            meta = persistence.metadata(f)
            if not meta:
                logger.warning("Missing metadata for file {}. Maybe metadata "
                               "was deleted or it was created using an older "
                               "version of Snakemake. This is a non critical "
                               "warning.".format(f))
                continue
            try:
                job_hash = meta["job_hash"]
                rule = meta["rule"]
                rec = records[(job_hash, rule)]
                rec.rule = rule
                rec.job = job
                rec.starttime = min(rec.starttime, meta["starttime"])
                rec.endtime = max(rec.endtime, meta["endtime"])
                rec.conda_env_file = None
                rec.conda_env = meta["conda_env"]
                rec.container_img_url = meta["container_img_url"]
                rec.output.append(f)
            except KeyError as e:
                print(e)
                logger.warning("Metadata for file {} was created with a too "
                               "old Snakemake version.".format(f))

    for catresults in results.values():
        catresults.sort(key=lambda res: res.name)

    # prepare runtimes
    runtimes = [{
        "rule": rec.rule,
        "runtime": rec.endtime - rec.starttime
    } for rec in sorted(records.values(), key=lambda rec: rec.rule)]

    # prepare end times
    timeline = [{
        "rule":
        rec.rule,
        "starttime":
        datetime.datetime.fromtimestamp(rec.starttime).isoformat(),
        "endtime":
        datetime.datetime.fromtimestamp(rec.endtime).isoformat(),
    } for rec in sorted(records.values(), key=lambda rec: rec.rule)]

    # prepare per-rule information
    rules = defaultdict(list)
    for rec in records.values():
        rule = RuleRecord(rec.job, rec)
        if rec.rule not in rules:
            rules[rec.rule].append(rule)
        else:
            merged = False
            for other in rules[rec.rule]:
                if rule == other:
                    other.add(rec)
                    merged = True
                    break
            if not merged:
                rules[rec.rule].append(rule)

    # rulegraph
    rulegraph, xmax, ymax = rulegraph_d3_spec(dag)

    # configfiles
    configfiles = [ConfigfileRecord(f) for f in dag.workflow.configfiles]

    seen = set()
    files = [
        seen.add(res.target) or res for cat in results.values() for res in cat
        if res.target not in seen
    ]

    rst_links = textwrap.dedent("""

    .. _Results: #results
    .. _Rules: #rules
    .. _Statistics: #stats
    {% for cat, catresults in categories|dictsort %}
    .. _{{ cat.name }}: #{{ cat.id }}
    {% for res in files %}
    .. _{{ res.target }}: #{{ res.id }}
    {% endfor %}
    {% endfor %}
    .. _
    """)
    for cat, catresults in results.items():
        for res in catresults:
            res.render(env, rst_links, results, files)

    # global description
    text = ""
    if dag.workflow.report_text:
        with open(dag.workflow.report_text) as f:

            class Snakemake:
                config = dag.workflow.config

            text = f.read() + rst_links
            text = publish_parts(
                env.from_string(text).render(snakemake=Snakemake,
                                             categories=results,
                                             files=files),
                writer_name="html",
            )["body"]

    # record time
    now = "{} {}".format(datetime.datetime.now().ctime(), time.tzname[0])
    results_size = sum(res.size for cat in results.values() for res in cat)

    try:
        from pygments.formatters import HtmlFormatter
    except ImportError:
        raise WorkflowError(
            "Python package pygments must be installed to create reports.")

    # render HTML
    template = env.get_template("report.html")
    with open(path, "w", encoding="utf-8") as out:
        out.write(
            template.render(
                results=results,
                results_size=results_size,
                configfiles=configfiles,
                text=text,
                rulegraph_nodes=rulegraph["nodes"],
                rulegraph_links=rulegraph["links"],
                rulegraph_width=xmax + 20,
                rulegraph_height=ymax + 20,
                runtimes=runtimes,
                timeline=timeline,
                rules=[rec for recs in rules.values() for rec in recs],
                version=__version__,
                now=now,
                pygments_css=HtmlFormatter(
                    style="trac").get_style_defs(".source"),
            ))
    logger.info("Report created.")
Ejemplo n.º 16
0
 def cluster_wildcards(self, job):
     return Wildcards(fromdict=self.cluster_params(job))
Ejemplo n.º 17
0
def auto_report(dag, path, stylesheet=None):
    try:
        from jinja2 import Template, Environment, PackageLoader
    except ImportError as e:
        raise WorkflowError(
            "Python package jinja2 must be installed to create reports."
        )

    mode_embedded = True
    if path.endswith(".zip"):
        mode_embedded = False
    elif not path.endswith(".html"):
        raise WorkflowError("Report file does not end with .html or .zip")

    custom_stylesheet = None
    if stylesheet is not None:
        try:
            with open(stylesheet) as s:
                custom_stylesheet = s.read()
        except (Exception, BaseException) as e:
            raise WorkflowError("Unable to read custom report stylesheet.", e)

    logger.info("Creating report...")

    env = Environment(
        loader=PackageLoader("snakemake", "report"),
        trim_blocks=True,
        lstrip_blocks=True,
    )
    env.filters["get_resource_as_string"] = get_resource_as_string

    persistence = dag.workflow.persistence
    results = defaultdict(lambda: defaultdict(list))
    records = defaultdict(JobRecord)
    recorded_files = set()
    for job in dag.jobs:
        for f in itertools.chain(job.expanded_output, job.input):
            if is_flagged(f, "report") and f not in recorded_files:
                if not f.exists:
                    raise WorkflowError(
                        "File {} marked for report but does " "not exist.".format(f)
                    )
                report_obj = get_flag_value(f, "report")

                def register_file(
                    f, wildcards_overwrite=None, aux_files=None, name_overwrite=None
                ):
                    wildcards = wildcards_overwrite or job.wildcards
                    category = Category(
                        report_obj.category, wildcards=wildcards, job=job
                    )
                    subcategory = Category(
                        report_obj.subcategory, wildcards=wildcards, job=job
                    )

                    results[category][subcategory].append(
                        FileRecord(
                            f,
                            job,
                            report_obj.caption,
                            env,
                            category,
                            dag.workflow,
                            wildcards_overwrite=wildcards_overwrite,
                            mode_embedded=mode_embedded,
                            aux_files=aux_files,
                            name_overwrite=name_overwrite,
                        )
                    )
                    recorded_files.add(f)

                if os.path.isfile(f):
                    register_file(f)
                elif os.path.isdir(f):
                    if report_obj.htmlindex:
                        if mode_embedded:
                            raise WorkflowError(
                                "Directory marked for report specifies htmlindex. "
                                "This is unsupported when requesting a pure HTML report. "
                                "Please use store as zip instead (--report report.zip)."
                            )
                        aux_files = []
                        index_found = False
                        for root, dirs, files in os.walk(f):
                            for name in files:
                                if name != ".snakemake_timestamp":
                                    filepath = os.path.join(root, name)
                                    if (
                                        os.path.relpath(filepath, f)
                                        != report_obj.htmlindex
                                    ):
                                        aux_files.append(filepath)
                                    else:
                                        index_found = True
                        if not index_found:
                            raise WorkflowError(
                                "Given htmlindex {} not found in directory "
                                "marked for report".format(report_obj.htmlindex)
                            )
                        register_file(
                            os.path.join(f, report_obj.htmlindex),
                            aux_files=aux_files,
                            name_overwrite="{}.html".format(os.path.basename(f)),
                        )
                    elif report_obj.patterns:
                        if not isinstance(report_obj.patterns, list):
                            raise WorkflowError(
                                "Invalid patterns given for report. Must be list.",
                                rule=job.rule,
                            )

                        for pattern in report_obj.patterns:
                            pattern = os.path.join(f, pattern)
                            wildcards = glob_wildcards(pattern)._asdict()
                            names = wildcards.keys()
                            for w in zip(*wildcards.values()):
                                w = dict(zip(names, w))
                                w.update(job.wildcards_dict)
                                w = Wildcards(fromdict=w)
                                f = apply_wildcards(pattern, w)
                                register_file(f, wildcards_overwrite=w)
                    else:
                        raise WorkflowError(
                            "Directory marked for report but neither file patterns "
                            "given via patterns=[...], nor htmlindex given. "
                            "See report documentation.",
                            rule=job.rule,
                        )

        for f in job.expanded_output:
            meta = persistence.metadata(f)
            if not meta:
                logger.warning(
                    "Missing metadata for file {}. Maybe metadata "
                    "was deleted or it was created using an older "
                    "version of Snakemake. This is a non critical "
                    "warning.".format(f)
                )
                continue

            def get_time(rectime, metatime, sel_func):
                if metatime is None:
                    return rectime
                return sel_func(metatime, rectime)

            try:
                job_hash = meta["job_hash"]
                rule = meta["rule"]
                rec = records[(job_hash, rule)]
                rec.rule = rule
                rec.job = job
                rec.starttime = get_time(rec.starttime, meta["starttime"], min)
                rec.endtime = get_time(rec.endtime, meta["endtime"], max)
                rec.conda_env_file = None
                rec.conda_env = meta["conda_env"]
                rec.container_img_url = meta["container_img_url"]
                rec.output.append(f)
            except KeyError as e:
                print(e)
                logger.warning(
                    "Metadata for file {} was created with a too "
                    "old Snakemake version.".format(f)
                )

    for subcats in results.values():
        for catresults in subcats.values():
            catresults.sort(key=lambda res: res.name)

    # prepare runtimes
    runtimes = [
        {"rule": rec.rule, "runtime": rec.endtime - rec.starttime}
        for rec in sorted(records.values(), key=lambda rec: rec.rule)
    ]

    def get_datetime(rectime):
        try:
            return datetime.datetime.fromtimestamp(rectime).isoformat()
        except OSError:
            return None

    # prepare end times
    timeline = [
        {
            "rule": rec.rule,
            "starttime": get_datetime(rec.starttime),
            "endtime": get_datetime(rec.endtime),
        }
        for rec in sorted(records.values(), key=lambda rec: rec.rule)
    ]

    # prepare per-rule information
    rules = defaultdict(list)
    for rec in records.values():
        rule = RuleRecord(rec.job, rec)
        if rec.rule not in rules:
            rules[rec.rule].append(rule)
        else:
            merged = False
            for other in rules[rec.rule]:
                if rule == other:
                    other.add(rec)
                    merged = True
                    break
            if not merged:
                rules[rec.rule].append(rule)

    # rulegraph
    rulegraph, xmax, ymax = rulegraph_d3_spec(dag)

    # configfiles
    configfiles = [ConfigfileRecord(f) for f in dag.workflow.configfiles]

    seen = set()
    files = [
        seen.add(res.target) or res
        for cat in results.values()
        for subcat in cat.values()
        for res in subcat
        if res.target not in seen
    ]

    rst_links = textwrap.dedent(
        """

    .. _Workflow: javascript:show_panel('workflow')
    .. _Statistics: javascript:show_panel('statistics')
    {% for cat, catresults in categories|dictsort %}
    .. _{{ cat.name }}: javascript:show_panel("{{ cat.id }}")
    {% endfor %}
    {% for res in files %}
    .. _{{ res.target }}: javascript:show_panel("{{ res.category.id }}")
    {% endfor %}
    """
    )
    for cat, subcats in results.items():
        for subcat, catresults in subcats.items():
            for res in catresults:
                res.render(env, rst_links, results, files)

    # global description
    text = ""
    if dag.workflow.report_text:
        with dag.workflow.sourcecache.open(dag.workflow.report_text) as f:

            class Snakemake:
                config = dag.workflow.config

            text = f.read() + rst_links
            text = publish_parts(
                env.from_string(text).render(
                    snakemake=Snakemake, categories=results, files=files
                ),
                writer_name="html",
            )["body"]

    # record time
    now = "{} {}".format(datetime.datetime.now().ctime(), time.tzname[0])
    results_size = sum(
        res.size
        for cat in results.values()
        for subcat in cat.values()
        for res in subcat
    )

    try:
        from pygments.formatters import HtmlFormatter
    except ImportError:
        raise WorkflowError(
            "Python package pygments must be installed to create reports."
        )

    template = env.get_template("report.html.jinja2")

    logger.info("Downloading resources and rendering HTML.")

    rendered = template.render(
        results=results,
        results_size=results_size,
        configfiles=configfiles,
        text=text,
        rulegraph_nodes=rulegraph["nodes"],
        rulegraph_links=rulegraph["links"],
        rulegraph_width=xmax + 20,
        rulegraph_height=ymax + 20,
        runtimes=runtimes,
        timeline=timeline,
        rules=[rec for recs in rules.values() for rec in recs],
        version=__version__,
        now=now,
        pygments_css=HtmlFormatter(style="trac").get_style_defs(".source"),
        custom_stylesheet=custom_stylesheet,
        mode_embedded=mode_embedded,
    )

    # TODO look into supporting .WARC format, also see (https://webrecorder.io)

    if not mode_embedded:
        with ZipFile(path, compression=ZIP_DEFLATED, mode="w") as zipout:
            folder = Path(Path(path).stem)
            # store results in data folder
            for subcats in results.values():
                for catresults in subcats.values():
                    for result in catresults:
                        # write raw data
                        zipout.write(result.path, str(folder.joinpath(result.data_uri)))
                        # write thumbnail
                        if result.is_img and result.png_content:
                            zipout.writestr(
                                str(folder.joinpath(result.png_uri)), result.png_content
                            )
                        # write aux files
                        parent = folder.joinpath(result.data_uri).parent
                        for aux_path in result.aux_files:
                            # print(aux_path, parent, str(parent.joinpath(os.path.relpath(aux_path, os.path.dirname(result.path)))))
                            zipout.write(
                                aux_path,
                                str(
                                    parent.joinpath(
                                        os.path.relpath(
                                            aux_path, os.path.dirname(result.path)
                                        )
                                    )
                                ),
                            )

            # write report html
            zipout.writestr(str(folder.joinpath("report.html")), rendered)
    else:
        with open(path, "w", encoding="utf-8") as htmlout:
            htmlout.write(rendered)

    logger.info("Report created: {}.".format(path))
Ejemplo n.º 18
0
    def expand_wildcards(self, wildcards=None):
        """
        Expand wildcards depending on the requested output
        or given wildcards dict.
        """
        def concretize_iofile(f, wildcards):
            if not isinstance(f, _IOFile):
                return IOFile(f, rule=self)
            else:
                return f.apply_wildcards(wildcards,
                                         fill_missing=f in self.dynamic_input,
                                         fail_dynamic=self.dynamic_output)

        def _apply_wildcards(newitems,
                             olditems,
                             wildcards,
                             wildcards_obj,
                             concretize=apply_wildcards,
                             ruleio=None):
            for name, item in olditems.allitems():
                start = len(newitems)
                if callable(item):
                    item = item(wildcards_obj)
                    if not_iterable(item):
                        item = [item]
                    for item_ in item:
                        if not isinstance(item_, str):
                            raise RuleException(
                                "Input function did not return str or list of str.",
                                rule=self)
                        concrete = concretize(item_, wildcards)
                        newitems.append(concrete)
                        if ruleio is not None:
                            ruleio[concrete] = item_
                else:
                    if not_iterable(item):
                        item = [item]
                    for item_ in item:
                        concrete = concretize(item_, wildcards)
                        newitems.append(concrete)
                        if ruleio is not None:
                            ruleio[concrete] = item_
                if name:
                    newitems.set_name(name, start, end=len(newitems))

        if wildcards is None:
            wildcards = dict()
        # TODO validate
        missing_wildcards = self.wildcard_names - set(wildcards.keys())

        if missing_wildcards:
            raise RuleException(
                "Could not resolve wildcards in rule {}:\n{}".format(
                    self.name, "\n".join(self.wildcard_names)),
                lineno=self.lineno,
                snakefile=self.snakefile)

        ruleio = dict()

        try:
            input = InputFiles()
            wildcards_obj = Wildcards(fromdict=wildcards)
            _apply_wildcards(input,
                             self.input,
                             wildcards,
                             wildcards_obj,
                             concretize=concretize_iofile,
                             ruleio=ruleio)

            params = Params()
            _apply_wildcards(params, self.params, wildcards, wildcards_obj)

            output = OutputFiles(
                o.apply_wildcards(wildcards) for o in self.output)
            output.take_names(self.output.get_names())

            ruleio.update(dict((f, f_) for f, f_ in zip(output, self.output)))

            log = self.log.apply_wildcards(wildcards) if self.log else None
            return input, output, params, log, ruleio
        except WildcardError as ex:
            # this can only happen if an input contains an unresolved wildcard.
            raise RuleException(
                "Wildcards in input or log file of rule {} cannot be "
                "determined from output files:\n{}".format(self, str(ex)),
                lineno=self.lineno,
                snakefile=self.snakefile)