Beispiel #1
0
    def code(self):
        try:
            from pygments.lexers import get_lexer_by_name
            from pygments.formatters import HtmlFormatter
            from pygments import highlight
            import pygments.util
        except ImportError:
            raise WorkflowError(
                "Python package pygments must be installed to create reports.")
        source, language = None, None
        if self._rule.shellcmd is not None:
            source = self._rule.shellcmd
            language = "bash"
        elif self._rule.script is not None:
            logger.info("Loading script code for rule {}".format(self.name))
            _, source, language = script.get_source(self._rule.script,
                                                    self._rule.basedir)
            source = source.decode()
        elif self._rule.wrapper is not None:
            logger.info("Loading wrapper code for rule {}".format(self.name))
            _, source, language = script.get_source(
                wrapper.get_script(self._rule.wrapper,
                                   prefix=self._rule.workflow.wrapper_prefix))
            source = source.decode()

        try:
            lexer = get_lexer_by_name(language)
            return highlight(
                source, lexer,
                HtmlFormatter(linenos=True, cssclass="source", wrapcode=True))
        except pygments.util.ClassNotFound:
            return "<pre><code>source</code></pre>"
Beispiel #2
0
    def code(self):
        try:
            from pygments.lexers import get_lexer_by_name
            from pygments.formatters import HtmlFormatter
            from pygments import highlight
            import pygments.util
        except ImportError:
            raise WorkflowError(
                "Python package pygments must be installed to create reports.")
        sources, language = None, None
        if self._rule.shellcmd is not None:
            sources = [self._rule.shellcmd]
            language = "bash"
        elif self._rule.script is not None and not contains_wildcard(
                self._rule.script):
            logger.info("Loading script code for rule {}".format(self.name))
            _, source, language = script.get_source(self._rule.script,
                                                    self._rule.basedir)
            sources = [source.decode()]
        elif self._rule.wrapper is not None and not contains_wildcard(
                self._rule.wrapper):
            logger.info("Loading wrapper code for rule {}".format(self.name))
            _, source, language = script.get_source(
                wrapper.get_script(self._rule.wrapper,
                                   prefix=self._rule.workflow.wrapper_prefix))
            sources = [source.decode()]
        elif self._rule.notebook is not None and not contains_wildcard(
                self._rule.notebook):
            _, source, language = script.get_source(self._rule.notebook,
                                                    self._rule.basedir)
            language = language.split("_")[1]
            sources = notebook.get_cell_sources(source)
        else:
            # A run directive. There is no easy way yet to obtain
            # the actual uncompiled source code.
            sources = []
            language = "python"

        try:
            lexer = get_lexer_by_name(language)

            highlighted = [
                highlight(
                    source,
                    lexer,
                    HtmlFormatter(linenos=True,
                                  cssclass="source",
                                  wrapcode=True),
                ) for source in sources
            ]

            return highlighted
        except pygments.util.ClassNotFound:
            return [
                '<pre class="source"><code>{}</code></pre>'.format(source)
                for source in sources
            ]
Beispiel #3
0
def notebook(
    path,
    basedir,
    input,
    output,
    params,
    wildcards,
    threads,
    resources,
    log,
    config,
    rulename,
    conda_env,
    singularity_img,
    singularity_args,
    env_modules,
    bench_record,
    jobid,
    bench_iteration,
    cleanup_scripts,
    shadow_dir,
):
    """
    Load a script from the given basedir + path and execute it.
    """
    path, source, language = get_source(path, basedir)

    ExecClass = {
        "jupyter_python": PythonJupyterNotebook,
        "jupyter_r": RJupyterNotebook,
    }.get(language, None)
    if ExecClass is None:
        raise ValueError(
            "Unsupported notebook: Expecting Jupyter Notebook (.ipynb).")

    executor = ExecClass(
        path,
        source,
        basedir,
        input,
        output,
        params,
        wildcards,
        threads,
        resources,
        log,
        config,
        rulename,
        conda_env,
        singularity_img,
        singularity_args,
        env_modules,
        bench_record,
        jobid,
        bench_iteration,
        cleanup_scripts,
        shadow_dir,
    )
    executor.evaluate()
Beispiel #4
0
def notebook(
    path,
    basedir,
    input,
    output,
    params,
    wildcards,
    threads,
    resources,
    log,
    config,
    rulename,
    conda_env,
    container_img,
    singularity_args,
    env_modules,
    bench_record,
    jobid,
    bench_iteration,
    cleanup_scripts,
    shadow_dir,
    edit=None,
):
    """
    Load a script from the given basedir + path and execute it.
    """
    draft = False
    if edit is not None:
        if urlparse(path).scheme == "":
            if not os.path.isabs(path):
                local_path = os.path.join(basedir, path)
            else:
                local_path = path
            if not os.path.exists(local_path):
                # draft the notebook, it does not exist yet
                language = None
                draft = True
                path = "file://{}".format(os.path.abspath(local_path))
                if path.endswith(".py.ipynb"):
                    language = "jupyter_python"
                elif path.endswith(".r.ipynb"):
                    language = "jupyter_r"
                else:
                    raise WorkflowError(
                        "Notebook to edit has to end on .py.ipynb or .r.ipynb in order "
                        "to decide which programming language shall be used.")
        else:
            raise WorkflowError(
                "Notebook {} is not local, but edit mode is only allowed for "
                "local notebooks.".format(path))

    if not draft:
        path, source, language = get_source(path, basedir)
    else:
        source = None

    exec_class = get_exec_class(language)

    executor = exec_class(
        path,
        source,
        basedir,
        input,
        output,
        params,
        wildcards,
        threads,
        resources,
        log,
        config,
        rulename,
        conda_env,
        container_img,
        singularity_args,
        env_modules,
        bench_record,
        jobid,
        bench_iteration,
        cleanup_scripts,
        shadow_dir,
    )

    if draft:
        executor.draft(listen=edit)
    else:
        executor.evaluate(edit=edit)
Beispiel #5
0
    def _get_provenance_hash(self, job: Job):
        """
        Recursively calculate hash for the output of the given job
        and all upstream jobs in a blockchain fashion.

        This is based on an idea of Sven Nahnsen.
        Fails if job has more than one output file. The reason is that there
        is no way to generate a per-output file hash without generating the files.
        This hash, however, shall work without having to generate the files,
        just by describing all steps down to a given job.
        """
        if job in self._hashes:
            return self._hashes[job]

        workflow = job.dag.workflow
        h = hashlib.sha256()

        # Hash shell command or script.
        if job.is_shell:
            # We cannot use the formatted shell command, because it also contains threads,
            # resources, and filenames (which shall be irrelevant for the hash).
            h.update(job.rule.shellcmd.encode())
        elif job.is_script:
            _, source, _ = script.get_source(
                job.rule.script,
                basedir=job.rule.basedir,
                wildcards=job.wildcards,
                params=job.params,
            )
            h.update(source)
        elif job.is_notebook:
            _, source, _ = script.get_source(
                job.rule.notebook,
                basedir=job.rule.basedir,
                wildcards=job.wildcards,
                params=job.params,
            )
            h.update(source)
        elif job.is_wrapper:
            _, source, _ = script.get_source(
                wrapper.get_script(job.rule.wrapper,
                                   prefix=workflow.wrapper_prefix),
                basedir=job.rule.basedir,
                wildcards=job.wildcards,
                params=job.params,
            )
            h.update(source)

        # Hash params.
        for key, value in sorted(job.params._allitems()):
            if key is not None:
                h.update(key.encode())
            # If this raises a TypeError, we cannot calculate a reliable hash.
            try:
                h.update(json.dumps(value, sort_keys=True).encode())
            except TypeError as e:
                raise WorkflowError(
                    "Rule {} cannot be cached, because params "
                    "are not JSON serializable. "
                    "Consider converting them into a suitable format "
                    "if you are sure that caching is necessary. "
                    "Otherwise, deactivate caching for this rule "
                    "by removing it from the --cache command line argument "
                    "or removing the cache: true directive from the rule itself."
                    .format(job.rule.name),
                    e,
                )

        # Hash input files that are not generated by other jobs (sorted by hash value).
        for file_hash in sorted(
                hash_file(f) for f in job.input if not any(
                    f in depfiles
                    for depfiles in job.dag.dependencies[job].values())):
            h.update(file_hash.encode())

        # Hash used containers or conda environments.
        if workflow.use_conda and job.conda_env:
            if workflow.use_singularity and job.conda_env.container_img_url:
                h.update(job.conda_env.container_img_url.encode())
            h.update(job.conda_env.content)
        elif workflow.use_singularity and job.container_img_url:
            h.update(job.container_img_url.encode())

        # Generate hashes of dependencies, and add them in a blockchain fashion (as input to the current hash, sorted by hash value).
        for dep_hash in sorted(
                self._get_provenance_hash(dep)
                for dep in set(job.dag.dependencies[job].keys())):
            h.update(dep_hash.encode())

        provenance_hash = h.hexdigest()

        # Store for re-use.
        self._hashes[job] = provenance_hash

        return provenance_hash
Beispiel #6
0
def notebook(
    path,
    basedir,
    input,
    output,
    params,
    wildcards,
    threads,
    resources,
    log,
    config,
    rulename,
    conda_env,
    conda_base_path,
    container_img,
    singularity_args,
    env_modules,
    bench_record,
    jobid,
    bench_iteration,
    cleanup_scripts,
    shadow_dir,
    edit,
    runtime_sourcecache_path,
):
    """
    Load a script from the given basedir + path and execute it.
    """
    draft = False
    if edit is not None:
        if is_local_file(path):
            if not os.path.isabs(path):
                local_path = os.path.join(basedir, path)
            else:
                local_path = path
            if not os.path.exists(local_path):
                # draft the notebook, it does not exist yet
                language = None
                draft = True
                path = "file://{}".format(os.path.abspath(local_path))
                if path.endswith(".py.ipynb"):
                    language = "jupyter_python"
                elif path.endswith(".r.ipynb"):
                    language = "jupyter_r"
                else:
                    raise WorkflowError(
                        "Notebook to edit has to end on .py.ipynb or .r.ipynb in order "
                        "to decide which programming language shall be used.")
        else:
            raise WorkflowError(
                "Notebook {} is not local, but edit mode is only allowed for "
                "local notebooks.".format(path))

    if not draft:
        path, source, language, is_local = get_source(
            path, SourceCache(runtime_sourcecache_path), basedir, wildcards,
            params)
    else:
        source = None
        is_local = True
        path = infer_source_file(path)

    exec_class = get_exec_class(language)

    executor = exec_class(
        path,
        source,
        basedir,
        input,
        output,
        params,
        wildcards,
        threads,
        resources,
        log,
        config,
        rulename,
        conda_env,
        conda_base_path,
        container_img,
        singularity_args,
        env_modules,
        bench_record,
        jobid,
        bench_iteration,
        cleanup_scripts,
        shadow_dir,
        is_local,
    )

    if edit is None:
        executor.evaluate(edit=edit)
    elif edit.draft_only:
        executor.draft()
        msg = "Generated skeleton notebook:\n{} ".format(path)
        if conda_env and not container_img:
            msg += (
                "\n\nEditing with VSCode:\nOpen notebook, run command 'Select notebook kernel' (Ctrl+Shift+P or Cmd+Shift+P), and choose:"
                "\n{}\n".format(
                    str(
                        Path(conda_env) / "bin" /
                        executor.get_interpreter_exec())))
            msg += ("\nEditing with Jupyter CLI:"
                    "\nconda activate {}\njupyter notebook {}\n".format(
                        conda_env, path))
        logger.info(msg)
    elif draft:
        executor.draft_and_edit(listen=edit)
    else:
        executor.evaluate(edit=edit)
Beispiel #7
0
    def _get_provenance_hash(self, job: Job):
        """
        Recursively calculate hash for the output of the given job
        and all upstream jobs in a blockchain fashion.

        This is based on an idea of Sven Nahnsen.
        Fails if job has more than one output file. The reason is that there
        is no way to generate a per-output file hash without generating the files.
        This hash, however, shall work without having to generate the files,
        just by describing all steps down to a given job.
        """
        if job in self._hashes:
            return self._hashes[job]

        workflow = job.dag.workflow
        h = hashlib.sha256()

        # Hash shell command or script.
        if job.is_shell:
            # We cannot use the formatted shell command, because it also contains threads,
            # resources, and filenames (which shall be irrelevant for the hash).
            h.update(job.rule.shellcmd.encode())
        elif job.is_script:
            _, source, _ = script.get_source(job.rule.script)
            h.update(source)
        elif job.is_wrapper:
            _, source, _ = script.get_source(
                wrapper.get_script(job.rule.wrapper,
                                   prefix=workflow.wrapper_prefix))
            h.update(source)

        # Hash params.
        for key, value in sorted(job.params._allitems()):
            h.update(key.encode())
            # If this raises a TypeError, we cannot calculate a reliable hash.
            h.update(json.dumps(value, sort_keys=True).encode())

        # Hash input files that are not generated by other jobs.
        for f in job.input:
            if not any(f in depfiles
                       for depfiles in job.dag.dependencies[job].values()):
                with open(f, "b") as f:
                    # Read and update hash string value in blocks of 4K
                    for byte_block in iter(lambda: f.read(4096), b""):
                        h.update(byte_block)

        # Hash used containers or conda environments.
        if workflow.use_conda and job.conda_env:
            if workflow.use_singularity and job.conda_env.singularity_img_url:
                h.update(job.conda_env.singularity_img_url.encode())
            h.update(job.conda_env.content)
        elif workflow.use_singularity and job.singularity_img_url:
            h.update(job.singularity_img_url.encode())

        # Generate hashes of dependencies, and add them in a blockchain fashion (as input to the current hash).
        for dep_hash in sorted(
                self._get_provenance_hash(dep)
                for dep in set(job.dag.dependencies[job].keys())):
            h.update(dep_hash.encode())

        provenance_hash = h.hexdigest()

        # Store for re-use.
        self._hashes[job] = provenance_hash

        return provenance_hash