Esempio n. 1
0
    def code(self):
        try:
            from pygments.lexers import get_lexer_by_name
            from pygments.formatters import HtmlFormatter
            from pygments import highlight
            import pygments.util
        except ImportError:
            raise WorkflowError(
                "Python package pygments must be installed to create reports.")
        source, language = None, None
        if self._rule.shellcmd is not None:
            source = self._rule.shellcmd
            language = "bash"
        elif self._rule.script is not None:
            logger.info("Loading script code for rule {}".format(self.name))
            _, source, language = script.get_source(self._rule.script,
                                                    self._rule.basedir)
            source = source.decode()
        elif self._rule.wrapper is not None:
            logger.info("Loading wrapper code for rule {}".format(self.name))
            _, source, language = script.get_source(
                wrapper.get_script(self._rule.wrapper,
                                   prefix=self._rule.workflow.wrapper_prefix))
            source = source.decode()

        try:
            lexer = get_lexer_by_name(language)
            return highlight(
                source, lexer,
                HtmlFormatter(linenos=True, cssclass="source", wrapcode=True))
        except pygments.util.ClassNotFound:
            return "<pre><code>source</code></pre>"
Esempio n. 2
0
    def code(self):
        try:
            from pygments.lexers import get_lexer_by_name
            from pygments.formatters import HtmlFormatter
            from pygments import highlight
            import pygments.util
        except ImportError:
            raise WorkflowError(
                "Python package pygments must be installed to create reports.")
        sources, language = None, None
        if self._rule.shellcmd is not None:
            sources = [self._rule.shellcmd]
            language = "bash"
        elif self._rule.script is not None and not contains_wildcard(
                self._rule.script):
            logger.info("Loading script code for rule {}".format(self.name))
            _, source, language = script.get_source(self._rule.script,
                                                    self._rule.basedir)
            sources = [source.decode()]
        elif self._rule.wrapper is not None and not contains_wildcard(
                self._rule.wrapper):
            logger.info("Loading wrapper code for rule {}".format(self.name))
            _, source, language = script.get_source(
                wrapper.get_script(self._rule.wrapper,
                                   prefix=self._rule.workflow.wrapper_prefix))
            sources = [source.decode()]
        elif self._rule.notebook is not None and not contains_wildcard(
                self._rule.notebook):
            _, source, language = script.get_source(self._rule.notebook,
                                                    self._rule.basedir)
            language = language.split("_")[1]
            sources = notebook.get_cell_sources(source)
        else:
            # A run directive. There is no easy way yet to obtain
            # the actual uncompiled source code.
            sources = []
            language = "python"

        try:
            lexer = get_lexer_by_name(language)

            highlighted = [
                highlight(
                    source,
                    lexer,
                    HtmlFormatter(linenos=True,
                                  cssclass="source",
                                  wrapcode=True),
                ) for source in sources
            ]

            return highlighted
        except pygments.util.ClassNotFound:
            return [
                '<pre class="source"><code>{}</code></pre>'.format(source)
                for source in sources
            ]
Esempio n. 3
0
    def _get_provenance_hash(self, job: Job):
        """
        Recursively calculate hash for the output of the given job
        and all upstream jobs in a blockchain fashion.

        This is based on an idea of Sven Nahnsen.
        Fails if job has more than one output file. The reason is that there
        is no way to generate a per-output file hash without generating the files.
        This hash, however, shall work without having to generate the files,
        just by describing all steps down to a given job.
        """
        if job in self._hashes:
            return self._hashes[job]

        workflow = job.dag.workflow
        h = hashlib.sha256()

        # Hash shell command or script.
        if job.is_shell:
            # We cannot use the formatted shell command, because it also contains threads,
            # resources, and filenames (which shall be irrelevant for the hash).
            h.update(job.rule.shellcmd.encode())
        elif job.is_script:
            _, source, _ = script.get_source(
                job.rule.script,
                basedir=job.rule.basedir,
                wildcards=job.wildcards,
                params=job.params,
            )
            h.update(source)
        elif job.is_notebook:
            _, source, _ = script.get_source(
                job.rule.notebook,
                basedir=job.rule.basedir,
                wildcards=job.wildcards,
                params=job.params,
            )
            h.update(source)
        elif job.is_wrapper:
            _, source, _ = script.get_source(
                wrapper.get_script(job.rule.wrapper,
                                   prefix=workflow.wrapper_prefix),
                basedir=job.rule.basedir,
                wildcards=job.wildcards,
                params=job.params,
            )
            h.update(source)

        # Hash params.
        for key, value in sorted(job.params._allitems()):
            if key is not None:
                h.update(key.encode())
            # If this raises a TypeError, we cannot calculate a reliable hash.
            try:
                h.update(json.dumps(value, sort_keys=True).encode())
            except TypeError as e:
                raise WorkflowError(
                    "Rule {} cannot be cached, because params "
                    "are not JSON serializable. "
                    "Consider converting them into a suitable format "
                    "if you are sure that caching is necessary. "
                    "Otherwise, deactivate caching for this rule "
                    "by removing it from the --cache command line argument "
                    "or removing the cache: true directive from the rule itself."
                    .format(job.rule.name),
                    e,
                )

        # Hash input files that are not generated by other jobs (sorted by hash value).
        for file_hash in sorted(
                hash_file(f) for f in job.input if not any(
                    f in depfiles
                    for depfiles in job.dag.dependencies[job].values())):
            h.update(file_hash.encode())

        # Hash used containers or conda environments.
        if workflow.use_conda and job.conda_env:
            if workflow.use_singularity and job.conda_env.container_img_url:
                h.update(job.conda_env.container_img_url.encode())
            h.update(job.conda_env.content)
        elif workflow.use_singularity and job.container_img_url:
            h.update(job.container_img_url.encode())

        # Generate hashes of dependencies, and add them in a blockchain fashion (as input to the current hash, sorted by hash value).
        for dep_hash in sorted(
                self._get_provenance_hash(dep)
                for dep in set(job.dag.dependencies[job].keys())):
            h.update(dep_hash.encode())

        provenance_hash = h.hexdigest()

        # Store for re-use.
        self._hashes[job] = provenance_hash

        return provenance_hash
Esempio n. 4
0
    def _get_provenance_hash(self, job: Job):
        """
        Recursively calculate hash for the output of the given job
        and all upstream jobs in a blockchain fashion.

        This is based on an idea of Sven Nahnsen.
        Fails if job has more than one output file. The reason is that there
        is no way to generate a per-output file hash without generating the files.
        This hash, however, shall work without having to generate the files,
        just by describing all steps down to a given job.
        """
        if job in self._hashes:
            return self._hashes[job]

        workflow = job.dag.workflow
        h = hashlib.sha256()

        # Hash shell command or script.
        if job.is_shell:
            # We cannot use the formatted shell command, because it also contains threads,
            # resources, and filenames (which shall be irrelevant for the hash).
            h.update(job.rule.shellcmd.encode())
        elif job.is_script:
            _, source, _ = script.get_source(job.rule.script)
            h.update(source)
        elif job.is_wrapper:
            _, source, _ = script.get_source(
                wrapper.get_script(job.rule.wrapper,
                                   prefix=workflow.wrapper_prefix))
            h.update(source)

        # Hash params.
        for key, value in sorted(job.params._allitems()):
            h.update(key.encode())
            # If this raises a TypeError, we cannot calculate a reliable hash.
            h.update(json.dumps(value, sort_keys=True).encode())

        # Hash input files that are not generated by other jobs.
        for f in job.input:
            if not any(f in depfiles
                       for depfiles in job.dag.dependencies[job].values()):
                with open(f, "b") as f:
                    # Read and update hash string value in blocks of 4K
                    for byte_block in iter(lambda: f.read(4096), b""):
                        h.update(byte_block)

        # Hash used containers or conda environments.
        if workflow.use_conda and job.conda_env:
            if workflow.use_singularity and job.conda_env.singularity_img_url:
                h.update(job.conda_env.singularity_img_url.encode())
            h.update(job.conda_env.content)
        elif workflow.use_singularity and job.singularity_img_url:
            h.update(job.singularity_img_url.encode())

        # Generate hashes of dependencies, and add them in a blockchain fashion (as input to the current hash).
        for dep_hash in sorted(
                self._get_provenance_hash(dep)
                for dep in set(job.dag.dependencies[job].keys())):
            h.update(dep_hash.encode())

        provenance_hash = h.hexdigest()

        # Store for re-use.
        self._hashes[job] = provenance_hash

        return provenance_hash