Example #1
0
    def merge_match_into_notebook(
        self,
        nb: nbf.NotebookNode,
        nb_meta=("kernelspec", "language_info", "widgets"),
        cell_meta=None,
    ) -> Tuple[int, nbf.NotebookNode]:
        """Match to an executed notebook and return a merged version

        :param nb: The input notebook
        :param nb_meta: metadata keys to merge from the cached notebook (all if None)
        :param cell_meta: cell metadata keys to merge from cached notebook (all if None)
        :raises KeyError: if no match is found
        :return: pk, input notebook with cached code cells and metadata merged.
        """
        pk = self.match_cache_notebook(nb).pk
        cache_nb = self.get_cache_bundle(pk).nb
        nb = copy.deepcopy(nb)
        if nb_meta is None:
            nb.metadata = cache_nb.metadata
        else:
            for key in nb_meta:
                if key in cache_nb.metadata:
                    nb.metadata[key] = cache_nb.metadata[key]
        for idx in range(len(nb.cells)):
            if nb.cells[idx].cell_type == "code":
                cache_cell = cache_nb.cells.pop(0)
                if cell_meta is not None:
                    # update the input metadata with select cached notebook metadata
                    # then add the input metadata to the cached cell
                    nb.cells[idx].metadata.update(
                        {k: v for k, v in cache_cell.metadata.items() if k in cell_meta}
                    )
                    cache_cell.metadata = nb.cells[idx].metadata
                nb.cells[idx] = cache_cell
        return pk, nb
Example #2
0
def config_from_metadata(nb: NotebookNode) -> dict:
    """Extract configuration data from notebook/cell metadata."""
    nb_metadata = nb.get("metadata", {}).get(META_KEY, {})
    validate_metadata(nb_metadata, "/metadata")

    diff_replace = [tuple(d) for d in nb_metadata.get("diff_replace", [])]
    diff_ignore = set(nb_metadata.get("diff_ignore", []))

    for i, cell in enumerate(nb.get("cells", [])):
        cell_metadata = cell.get("metadata", {}).get(META_KEY, {})
        validate_metadata(cell_metadata, f"/cells/{i}/metadata")

        diff_replace.extend([
            (f"/cells/{i}{p}", x, r)
            for p, x, r in cell_metadata.get("diff_replace", [])
        ])
        diff_ignore.update(
            [f"/cells/{i}{p}" for p in cell_metadata.get("diff_ignore", [])])

    return MetadataConfig(
        tuple(diff_replace),
        diff_ignore,
        nb_metadata.get("skip", False),
        nb_metadata.get("skip_reason", ""),
    )
Example #3
0
 def _create_hashable_nb(
         self,
         nb: nbf.NotebookNode,
         compare_nb_meta=("kernelspec", ),
         compare_cell_meta=None,
 ):
     """Create a notebook containing only content desired for hashing."""
     nb = copy.deepcopy(nb)
     nb.metadata = nbf.from_dict({
         k: v
         for k, v in nb.metadata.items()
         if compare_nb_meta is None or (k in compare_nb_meta)
     })
     diff_cells = []
     for cell in nb.cells:
         if cell.cell_type != "code":
             continue
         diff_cell = nbf.from_dict({
             "cell_type": cell.cell_type,
             "source": cell.source,
             "metadata": {
                 k: v
                 for k, v in cell.metadata.items()
                 if compare_cell_meta is None or (k in compare_cell_meta)
             },
             "execution_count": None,
             "outputs": [],
         })
         diff_cells.append(diff_cell)
     nb.cells = diff_cells
     return nb
Example #4
0
def patch_dict(obj, diff):
    newobj = {}
    deleted_keys = set()

    for e in diff:
        op = e.op
        key = e.key
        assert isinstance(key, str), 'dict key must be string'
        assert key not in newobj, 'multiple diff entries target same key: %r' % key

        if op == DiffOp.ADD:
            assert key not in obj, 'patch add value not found for key: %r' % key
            newobj[key] = e.value
        elif op == DiffOp.REMOVE:
            deleted_keys.add(key)
        elif op == DiffOp.REPLACE:
            assert key not in deleted_keys, 'cannot replace deleted key: %r' % key
            newobj[key] = e.value
        elif op == DiffOp.PATCH:
            assert key not in deleted_keys, 'cannot patch deleted key: %r' % key
            newobj[key] = patch(obj[key], e.diff)
        else:
            raise NBDiffFormatError("Invalid op {}.".format(op))

    # Take items not mentioned in diff
    for key in obj:
        if key not in deleted_keys and key not in newobj:
            newobj[key] = copy.deepcopy(obj[key])

    return NotebookNode(newobj)
Example #5
0
def test_main(mongo_host):
    with mock.patch("notebooker.execute_notebook.pm.execute_notebook") as exec_nb, mock.patch(
        "notebooker.utils.conversion.jupytext.read"
    ) as read_nb, mock.patch("notebooker.utils.conversion.PDFExporter") as pdf_exporter:
        pdf_contents = b"This is a PDF."
        pdf_exporter().from_notebook_node.return_value = (pdf_contents, None)
        versions = nbv.split(".")
        major, minor = int(versions[0]), int(versions[1])
        if major >= 5:
            major, minor = 4, 4
        read_nb.return_value = NotebookNode({"cells": [], "metadata": {}, "nbformat": major, "nbformat_minor": minor})
        exec_nb.side_effect = mock_nb_execute
        job_id = "ttttteeeesssstttt"
        runner = CliRunner()
        cli_result = runner.invoke(
            execute_notebook.main, ["--report-name", "test_report", "--mongo-host", mongo_host, "--job-id", job_id]
        )
        assert cli_result.exit_code == 0
        serializer = PyMongoNotebookResultSerializer(
            mongo_host=mongo_host, database_name="notebooker", result_collection_name="NOTEBOOK_OUTPUT"
        )
        result = serializer.get_check_result(job_id)
        assert isinstance(result, NotebookResultComplete), "Result is not instance of {}, it is {}".format(
            NotebookResultComplete, type(result)
        )
        assert result.raw_ipynb_json
        assert result.pdf == pdf_contents
Example #6
0
def check_active_cell(ext, active_dict):
    text = ("" if ext == ".py" else HEADER[ext]) + active_dict[ext]
    nb = jupytext.reads(text, ext)
    assert len(nb.cells) == 1
    compare(jupytext.writes(nb, ext), text)
    cell = NotebookNode(active_dict[".ipynb"])
    compare_cells(nb.cells, [cell], compare_ids=False)
Example #7
0
 def _prepare_nb_for_cache(self, nb: nbf.NotebookNode, deepcopy=False):
     """Prepare in-place, we remove non-code cells.
     """
     if deepcopy:
         nb = copy.deepcopy(nb)
     nb.cells = [cell for cell in nb.cells if cell.cell_type == "code"]
     return nb
Example #8
0
def test_main(mongo_host, cli_args, expected_mailto):
    with mock.patch(
            "notebooker.execute_notebook.pm.execute_notebook"
    ) as exec_nb, mock.patch(
            "notebooker.utils.conversion.jupytext.read"
    ) as read_nb, mock.patch(
            "notebooker.execute_notebook.send_result_email") as send_email:
        exec_nb.side_effect = Exception()
        versions = nbv.split(".")
        major, minor = int(versions[0]), int(versions[1])
        if major >= 5:
            major, minor = 4, 4
        read_nb.return_value = NotebookNode({
            "cells": [],
            "metadata": {},
            "nbformat": major,
            "nbformat_minor": minor
        })
        job_id = "ttttteeeesssstttt"
        runner = CliRunner()
        cli_result = runner.invoke(
            base_notebooker,
            [
                "--serializer-cls",
                DEFAULT_SERIALIZER,
                "--mongo-host",
                mongo_host,
                "execute-notebook",
                "--job-id",
                job_id,
            ] + cli_args,
        )

        mailto = send_email.call_args_list[0][0][0].mailto
        assert mailto == expected_mailto
Example #9
0
def patch_dict(obj, diff):
    newobj = {}
    deleted_keys = set()

    for e in diff:
        op = e.op
        key = e.key
        assert isinstance(key, string_types)
        assert key not in newobj

        if op == DiffOp.ADD:
            assert key not in obj
            newobj[key] = e.value
        elif op == DiffOp.REMOVE:
            deleted_keys.add(key)
        elif op == DiffOp.REPLACE:
            assert key not in deleted_keys
            newobj[key] = e.value
        elif op == DiffOp.PATCH:
            assert key not in deleted_keys
            newobj[key] = patch(obj[key], e.diff)
        else:
            raise NBDiffFormatError("Invalid op {}.".format(op))

    # Take items not mentioned in diff
    for key in obj:
        if key not in deleted_keys and key not in newobj:
            newobj[key] = copy.deepcopy(obj[key])

    return NotebookNode(newobj)
Example #10
0
 def preprocess(self, notebook, resources):
     filepath = join(resources['metadata']['path'], resources['metadata']['name']) + '.ipynb'
     notebook.cells.append(NotebookNode(
         cell_type='markdown',
         metadata={},
         source=f'<Binder filepath="{filepath}" />'
     ))
     return notebook, resources
Example #11
0
 def preprocess_cell(self, cell: NotebookNode, resources, index):
     if cell.cell_type == 'code':
         for match in re.finditer(r'^%{1,2}(?P<magic>[^ \n\t]*) (?P<parameters>.*)$', cell.source):
             magic = match.group('magic')
             parameters = match.group('parameters').split()
             resources['magics'][magic] = parameters
             cell.source = cell.source[:match.start()] + cell.source[match.end() + 1:]
     return cell, resources
Example #12
0
def _run_code_type(outs, runner, msg_type, content):
    out = NotebookNode(output_type=msg_type)
    if msg_type in ('status', 'pyin', 'execute_input'):
        return outs
    elif msg_type == 'stream':
        out.stream = content['name']
        if 'text' in content:
            out.text = content['text']
        else:
            out.text = content['data']
    elif msg_type in ('display_data', 'pyout'):
        for mime, data in content['data'].items():
            try:
                attr = runner.MIME_MAP[mime]
            except KeyError:
                raise NotImplementedError('unhandled mime type: %s' % mime)

            setattr(out, attr, data)
    elif msg_type == 'pyerr':
        out.ename = content['ename']
        out.evalue = content['evalue']
        out.traceback = content['traceback']
    elif msg_type == 'clear_output':
        outs = list()
        return outs
    else:
        raise NotImplementedError('unhandled iopub message: %s' % msg_type)
    outs.append(out)
    return outs
Example #13
0
 def preprocess(self, nb, resources):
     syspathstr = 'sys.path = {} + sys.path'.format(str(sys.path))
     cell = {'cell_type': 'code',
             'execution_count': None,
             'metadata': {},
             'outputs': [],
             'source': 'import sys\n' + syspathstr}
     nb.cells.insert(0, NotebookNode(cell))
     return nb, resources
    def create_hashed_notebook(
        self,
        nb: nbf.NotebookNode,
        nb_metadata: Optional[Iterable[str]] = ("kernelspec", ),
        cell_metadata: Optional[Iterable[str]] = None,
    ) -> Tuple[nbf.NotebookNode, str]:
        """Convert a notebook to a standard format and hash.

        Note: we always hash notebooks as version 4.4,
        to allow for matching notebooks of different versions

        :param nb_metadata: The notebook metadata keys to hash (if None, use all)
        :param cell_metadata: The cell metadata keys to hash (if None, use all)

        :return: (notebook, hash)
        """
        # copy the notebook
        nb = copy.deepcopy(nb)
        # update the notebook to consistent version 4.4
        nb = nbf.convert(nb, to_version=NB_VERSION)
        if nb.nbformat_minor > 5:
            raise CachingError(
                "notebook version greater than 4.5 not yet supported")
        # remove non-code cells
        nb.cells = [cell for cell in nb.cells if cell.cell_type == "code"]
        # create notebook for hashing, with selected metadata
        hash_nb = nbf.from_dict({
            "nbformat":
            nb.nbformat,
            "nbformat_minor":
            4,  # v4.5 include cell ids, which we do not cache
            "metadata": {
                k: v
                for k, v in nb.metadata.items()
                if nb_metadata is None or (k in nb_metadata)
            },
            "cells": [{
                "cell_type": cell.cell_type,
                "source": cell.source,
                "metadata": {
                    k: v
                    for k, v in cell.metadata.items()
                    if cell_metadata is None or (k in cell_metadata)
                },
                "execution_count": None,
                "outputs": [],
            } for cell in nb.cells if cell.cell_type == "code"],
        })

        # hash notebook
        string = nbf.writes(hash_nb, nbf.NO_CONVERT)
        hash_string = hashlib.md5(string.encode()).hexdigest()

        return (nb, hash_string)
Example #15
0
def run_cell(kc, cell, tout):
    retval = kc.execute(cell.input)

    retval = kc.get_shell_msg(timeout=tout)

    print "CONTENT_STATUS: %s" % retval['content']['status']
    if retval['content']['status'] == 'error':
        print "ENAME: "
        pprint(retval['content']['ename'])
        print "EVALUE:"
        pprint(retval['content']['evalue'])
        print "TRACEBACK:"
        for i in retval['content']['traceback']:
            print i

    outs = []
    while True:
        try:
            msg = kc.get_iopub_msg(timeout=0.5)
        except Empty:
            break
        msg_type = msg['msg_type']
        if msg_type in ('status', 'execute_input'):
            continue
        elif msg_type == 'execute_input':
            continue
        elif msg_type == 'clear_output':
            outs = []
            continue

        content = msg['content']
        # print msg_type, content
        out = NotebookNode(output_type=msg_type)

        if msg_type == 'stream':
            out.stream = content['name']
            out.text = content['text']
        elif msg_type in ('display_data', 'execute_result'):
            out['metadata'] = content['metadata']
            for mime, data in content['data'].iteritems():
                attr = mime.split('/')[-1].lower()
                # this gets most right, but fix svg+html, plain
                attr = attr.replace('+xml', '').replace('plain', 'text')
                setattr(out, attr, data)
            if msg_type == 'execute_result':
                out.prompt_number = content['execution_count']
        elif msg_type == 'error':
            out.ename = content['ename']
            out.evalue = content['evalue']
            out.traceback = content['traceback']
        else:
            print "unhandled iopub msg:", msg_type
        # print "msg_type: %s" % msg_type
        outs.append(out)

    return retval['content']['status'], outs
Example #16
0
    def test_non_code_cell(self, executor, cell_mock, message_mock):
        cell_mock = NotebookNode(source='"foo" = "bar"',
                                 metadata={},
                                 cell_type='raw',
                                 outputs=[])
        # Should NOT raise nor execute any code
        executor.execute_cell(cell_mock, 0)

        # An error followed by an idle
        assert message_mock.call_count == 0
        # Should also consume the message stream
        assert cell_mock.outputs == []
Example #17
0
def filter_cells(notebook: nbformat.NotebookNode, filter):
    """
    Return new notebook object that matches filter function
    """
    picked_cells = []
    for cell in notebook.cells:
        if filter(cell):
            picked_cells.append(cell)

    new_nb = notebook.copy()
    new_nb.cells = picked_cells

    return new_nb
Example #18
0
def extract_glue_data_cell(
        cell: NotebookNode) -> list[tuple[str, NotebookNode]]:
    """Extract glue data from a single cell."""
    outputs = []
    data = []
    for output in cell.get("outputs", []):
        meta = output.get("metadata", {})
        if "scrapbook" not in meta:
            outputs.append(output)
            continue
        key = meta["scrapbook"]["name"]
        mime_prefix = len(meta["scrapbook"].get("mime_prefix", ""))
        output["data"] = {
            k[mime_prefix:]: v
            for k, v in output["data"].items()
        }
        data.append((key, output))
        if not mime_prefix:
            # assume that the output is a displayable object
            outputs.append(output)
        cell.outputs = outputs
    return data
Example #19
0
def nb_output_to_disc(ntbk: nbf.NotebookNode,
                      document: nodes.document) -> Path:
    """Write the notebook's output to disk

    We remove all the mime prefixes from "glue" step.
    This way, writing properly captures the glued images
    """
    replace_mime = []
    for cell in ntbk.cells:
        if hasattr(cell, "outputs"):
            for out in cell.outputs:
                if "data" in out:
                    # Only do the mimebundle replacing for the scrapbook outputs
                    mime_prefix = (out.get("metadata",
                                           {}).get("scrapbook",
                                                   {}).get("mime_prefix"))
                    if mime_prefix:
                        out["data"] = {
                            key.replace(mime_prefix, ""): val
                            for key, val in out["data"].items()
                        }
                        replace_mime.append(out)

    # Write the notebook's output to disk. This changes metadata in notebook cells
    path_doc = Path(document.settings.env.docname)
    doc_relpath = path_doc.parent
    doc_filename = path_doc.name
    build_dir = Path(document.settings.env.app.outdir).parent
    output_dir = build_dir.joinpath("jupyter_execute", doc_relpath)

    # Write a script too.
    if not ntbk.metadata.get("language_info"):
        # TODO: we can remove this
        # once https://github.com/executablebooks/MyST-NB/issues/177 is merged
        ntbk.metadata["language_info"] = {"file_extension": ".txt"}
        SPHINX_LOGGER.warning(
            "Notebook code has no file extension metadata, "
            "defaulting to `.txt`",
            location=document.settings.env.docname,
        )
    write_notebook_output(ntbk, str(output_dir), doc_filename)

    # Now add back the mime prefixes to the right outputs so they aren't rendered
    # until called from the role/directive
    for out in replace_mime:
        out["data"] = {
            f"{GLUE_PREFIX}{key}": val
            for key, val in out["data"].items()
        }

    return path_doc
Example #20
0
def add_conflicts_record(value, le, re):
    """Add an item 'nbdime-conflicts' to a metadata dict.

    Simply storing metadata conflicts for mergetool inspection.
    """
    assert isinstance(value, dict)
    c = {}
    if le is not None:
        c["local"] = le
    if re is not None:
        c["remote"] = re
    newvalue = NotebookNode(value)
    newvalue["nbdime-conflicts"] = c
    return newvalue
def blacken_code(cell: NotebookNode, resources: dict,
                 index: int) -> Tuple[NotebookNode, dict]:
    """Format python source code with black (see https://black.readthedocs.io)."""
    try:
        import black
    except ImportError:
        raise ImportError(
            "black not installed: see https://black.readthedocs.io")

    if cell.get("cell_type", None) != "code":
        return cell, resources

    # TODO use metadata to set target versions and whether to raise on exceptions
    # i.e. black.FileMode(target_versions, {black.TargetVersion.PY36})
    try:
        cell.source = black.format_str(cell.source, mode=black.FileMode())
    except (SyntaxError, black.InvalidInput):
        logger.debug(f"cell {index} could not be formatted by black.")

    # code cells don't require a trailing new line
    cell.source = cell.source.rstrip()

    return cell, resources
Example #22
0
def pytabs_create_cells(main_name, other_names=tuple()):
    tab_nav_template = Template(TAB_NAV_TEMPLATE)

    nb1 = jupytext.read(main_name) if isinstance(main_name, str) else main_name
    other_nbs = [jupytext.read(fname) for fname in other_names]
    other_nb_cells = [get_pytab_cells_dict(nb.cells) for nb in other_nbs]

    # collect cells from other notebooks
    #   - main notebook will have meta.pytabs.class
    #   - other notebooks will have matching cells
    # need index from main notebook for next step

    # result should be a list of cells (first being a nav list)
    pytab_cells = {}
    for nb1_indx, cell in enum_pytab_cells(nb1.cells):
        tab_class = cell.metadata["pytabs"]["class"]

        # add cell ids
        new_cell = create_node_with_id(cell)

        other_cells_all = [
            process_other_cell(cell_dict, tab_class)
            for cell_dict in other_nb_cells
        ]
        other_cells = [cell for cell in other_cells_all if cell is not None]
        other_cell_meta = [cell.metadata for cell in other_cells]

        # run template ------------
        nav_cell = NotebookNode(
            cell_type='markdown',
            metadata=NotebookNode(),
            source=tab_nav_template.render(
                tabs=[new_cell.metadata, *other_cell_meta]))

        # output list of cells -------
        pytab_cells[nb1_indx] = [nav_cell, new_cell, *other_cells]
    return pytab_cells
Example #23
0
    def test_no_source(self, executor, cell_mock, message_mock):
        cell_mock = NotebookNode(
            # Stripped source is empty
            source='     ',
            metadata={},
            cell_type='code',
            outputs=[],
        )
        # Should NOT raise nor execute any code
        executor.execute_cell(cell_mock, 0)

        # An error followed by an idle
        assert message_mock.call_count == 0
        # Should also consume the message stream
        assert cell_mock.outputs == []
def coalesce_streams(cell: NotebookNode, resources: dict,
                     index: int) -> Tuple[NotebookNode, dict]:
    """Merge all stream outputs with shared names into single streams.

    This ensure deterministic outputs.

    Adapted from:
    https://github.com/computationalmodelling/nbval/blob/master/nbval/plugin.py.
    """

    if "outputs" not in cell:
        return cell, resources

    new_outputs = []
    streams = {}
    for output in cell.outputs:
        if output.output_type == "stream":
            if output.name in streams:
                streams[output.name].text += output.text
            else:
                new_outputs.append(output)
                streams[output.name] = output
        else:
            new_outputs.append(output)

    # process \r and \b characters
    for output in streams.values():
        old = output.text
        while len(output.text) < len(old):
            old = output.text
            # Cancel out anything-but-newline followed by backspace
            output.text = RGX_BACKSPACE.sub("", output.text)
        # Replace all carriage returns not followed by newline
        output.text = RGX_CARRIAGERETURN.sub("", output.text)

    # We also want to ensure stdout and stderr are always in the same consecutive order,
    # because they are asynchronous, so order isn't guaranteed.
    for i, output in enumerate(new_outputs):
        if output.output_type == "stream" and output.name == "stderr":
            if (len(new_outputs) >= i + 2
                    and new_outputs[i + 1].output_type == "stream"
                    and new_outputs[i + 1].name == "stdout"):
                stdout = new_outputs.pop(i + 1)
                new_outputs.insert(i, stdout)

    cell.outputs = new_outputs

    return cell, resources
Example #25
0
        def test_mock_wrapper(self):
            """
            This inner function wrapper populates the preprocessor object with
            the fake kernel client. This client has it's iopub and shell
            channels mocked so as to fake the setup handshake and return
            the messages passed into prepare_cell_mocks as the run_cell loop
            processes them.
            """
            cell_mock = NotebookNode(source='"foo" = "bar"', outputs=[])
            preprocessor = build_preprocessor({})
            preprocessor.nb = {'cells': [cell_mock]}

            # self.kc.iopub_channel.get_msg => message_mock.side_effect[i]
            message_mock = iopub_messages_mock()
            preprocessor.kc = MagicMock(
                iopub_channel=MagicMock(get_msg=message_mock),
                shell_channel=MagicMock(get_msg=shell_channel_message_mock()),
                execute=MagicMock(return_value=parent_id))
            return func(self, preprocessor, cell_mock, message_mock)
Example #26
0
def test_main(mongo_host):
    with mock.patch(
            "notebooker.execute_notebook.pm.execute_notebook"
    ) as exec_nb, mock.patch(
            "notebooker.utils.conversion.jupytext.read"
    ) as read_nb, mock.patch(
            "notebooker.utils.conversion.PDFExporter") as pdf_exporter:
        pdf_contents = b"This is a PDF."
        pdf_exporter().from_notebook_node.return_value = (pdf_contents, None)
        versions = nbv.split(".")
        major, minor = int(versions[0]), int(versions[1])
        if major >= 5:
            major, minor = 4, 4
        read_nb.return_value = NotebookNode({
            "cells": [],
            "metadata": {},
            "nbformat": major,
            "nbformat_minor": minor
        })
        exec_nb.side_effect = mock_nb_execute
        job_id = "ttttteeeesssstttt"
        runner = CliRunner()
        # usually the parent process calls this and sets up the environment, then also explicitly passes
        # values on the CLI
        setup_env_vars()
        cli_result = runner.invoke(execute_notebook.main, [
            "--report-name", "test_report", "--mongo-host", mongo_host,
            "--job-id", job_id
        ])
        assert cli_result.exit_code == 0
        serializer = PyMongoNotebookResultSerializer(
            mongo_host=mongo_host,
            database_name=os.environ["DATABASE_NAME"],
            result_collection_name=os.environ["RESULT_COLLECTION_NAME"],
        )
        result = serializer.get_check_result(job_id)
        assert isinstance(result, NotebookResultComplete
                          ), "Result is not instance of {}, it is {}".format(
                              NotebookResultComplete, type(result))
        assert result.raw_ipynb_json
        assert result.pdf == pdf_contents
def beautifulsoup(cell: NotebookNode, resources: dict,
                  index: int) -> Tuple[NotebookNode, dict]:
    """Format text/html and image/svg+xml outputs with beautiful-soup.

    See: https://beautiful-soup-4.readthedocs.io.
    """
    try:
        from bs4 import BeautifulSoup
    except ImportError:
        raise ImportError(
            "bs4 not installed: see https://beautiful-soup-4.readthedocs.io")

    if cell.get("cell_type", None) != "code":
        return cell, resources

    if "outputs" not in cell:
        return cell, resources

    for i, output in enumerate(cell.outputs):
        if output.output_type not in ["execute_result", "display_data"]:
            continue
        for mimetype, value in output.get("data", {}).items():
            if mimetype not in ["text/html", "image/svg+xml"]:
                continue
            path = f"/cells/{index}/outputs/{i}/{mimetype}"
            # TODO use metadata to set builder and whether to raise on exceptions
            try:
                output["data"][mimetype] = BeautifulSoup(
                    output["data"][mimetype], "html.parser").prettify()
                # record which paths have been formatted (mainly for testing)
                resources.setdefault("beautifulsoup", []).append(path)
            except Exception:  # TODO what exceptions might be raised?
                logger.debug(
                    f"{path} could not be formatted by beautiful-soup.")

    return cell, resources
Example #28
0
        .. _launchbinder: http://mybinder.org:/repo/brian-team/brian2-binder/notebooks/tutorials/{tutorial}.ipynb
    
        .. note::
           This tutorial is a static non-editable version. You can launch an
           interactive, editable version without installing any local files
           using the Binder service (although note that at some times this
           may be slow or fail to open): |launchbinder|_
    
           Alternatively, you can download a copy of the notebook file
           to use locally: :download:`{tutorial}.ipynb`
    
           See the :doc:`tutorial overview page <index>` for more details.

    '''.format(tutorial=basename))
    notebook.cells.insert(
        1, NotebookNode(cell_type=u'raw', metadata={}, source=note))
    exporter = RSTExporter()
    output, resources = exporter.from_notebook_node(
        notebook, resources={'unique_key': basename + '_image'})
    with codecs.open(output_rst_fname, 'w', encoding='utf-8') as f:
        f.write(output)

    for image_name, image_data in resources['outputs'].iteritems():
        with open(os.path.join(target_dir, image_name), 'wb') as f:
            f.write(image_data)

print 'Generating index.rst'

text = '''
..
    This is a generated file, do not edit directly.
Example #29
0
    def runtest(self):
        """
        Run test is called by pytest for each of these nodes that are
        collected i.e. a notebook cell. Runs all the cell tests in one
        kernel without restarting.  It is very common for ipython
        notebooks to run through assuming a single kernel.  The cells
        are tested that they execute without errors and that the
        output matches the output stored in the notebook.

        """
        # Simply skip cell if configured to
        if self.options['skip']:
            pytest.skip()

        kernel = self.parent.kernel
        if not kernel.is_alive():
            raise RuntimeError("Kernel dead on test start")

        # Execute the code in the current cell in the kernel. Returns the
        # message id of the corresponding response from iopub.
        msg_id = kernel.execute_cell_input(
            self.cell.source, allow_stdin=False)

        # Timeout for the cell execution
        # after code is sent for execution, the kernel sends a message on
        # the shell channel. Timeout if no message received.
        timeout = self.config.option.nbval_cell_timeout
        timed_out_this_run = False

        # Poll the shell channel to get a message
        try:
            self.parent.kernel.await_reply(msg_id, timeout=timeout)
        except Empty:  # Timeout reached
            # Try to interrupt kernel, as this will give us traceback:
            kernel.interrupt()
            self.parent.timed_out = True
            timed_out_this_run = True

        # This list stores the output information for the entire cell
        outs = []
        # TODO: Only store if comparing with nbdime, to save on memory usage
        self.test_outputs = outs

        # Now get the outputs from the iopub channel
        while True:
            # The iopub channel broadcasts a range of messages. We keep reading
            # them until we find the message containing the side-effects of our
            # code execution.
            try:
                # Get a message from the kernel iopub channel
                msg = self.parent.get_kernel_message(timeout=self.output_timeout)

            except Empty:
                # This is not working: ! The code will not be checked
                # if the time is out (when the cell stops to be executed?)
                # Halt kernel here!
                kernel.stop()
                if timed_out_this_run:
                    self.raise_cell_error(
                        "Timeout of %g seconds exceeded while executing cell."
                        " Failed to interrupt kernel in %d seconds, so "
                        "failing without traceback." %
                            (timeout, self.output_timeout),
                    )
                else:
                    self.parent.timed_out = True
                    self.raise_cell_error(
                        "Timeout of %d seconds exceeded waiting for output." %
                            self.output_timeout,
                    )



            # now we must handle the message by checking the type and reply
            # info and we store the output of the cell in a notebook node object
            msg_type = msg['msg_type']
            reply = msg['content']
            out = NotebookNode(output_type=msg_type)

            # Is the iopub message related to this cell execution?
            if msg['parent_header'].get('msg_id') != msg_id:
                continue

            # When the kernel starts to execute code, it will enter the 'busy'
            # state and when it finishes, it will enter the 'idle' state.
            # The kernel will publish state 'starting' exactly
            # once at process startup.
            if msg_type == 'status':
                if reply['execution_state'] == 'idle':
                    break
                else:
                    continue

            # execute_input: To let all frontends know what code is
            # being executed at any given time, these messages contain a
            # re-broadcast of the code portion of an execute_request,
            # along with the execution_count.
            elif msg_type == 'execute_input':
                continue

            # com? execute reply?
            elif msg_type.startswith('comm'):
                continue
            elif msg_type == 'execute_reply':
                continue

            # This message type is used to clear the output that is
            # visible on the frontend
            # elif msg_type == 'clear_output':
            #     outs = []
            #     continue


            # elif (msg_type == 'clear_output'
            #       and msg_type['execution_state'] == 'idle'):
            #     outs = []
            #     continue

            # 'execute_result' is equivalent to a display_data message.
            # The object being displayed is passed to the display
            # hook, i.e. the *result* of the execution.
            # The only difference is that 'execute_result' has an
            # 'execution_count' number which does not seems useful
            # (we will filter it in the sanitize function)
            #
            # When the reply is display_data or execute_result,
            # the dictionary contains
            # a 'data' sub-dictionary with the 'text' AND the 'image/png'
            # picture (in hexadecimal). There is also a 'metadata' entry
            # but currently is not of much use, sometimes there is information
            # as height and width of the image (CHECK the documentation)
            # Thus we iterate through the keys (mimes) 'data' sub-dictionary
            # to obtain the 'text' and 'image/png' information
            elif msg_type in ('display_data', 'execute_result'):
                out['metadata'] = reply['metadata']
                out['data'] = reply['data']
                outs.append(out)

                if msg_type == 'execute_result':
                    out.execution_count = reply['execution_count']


            # if the message is a stream then we store the output
            elif msg_type == 'stream':
                out.name = reply['name']
                out.text = reply['text']
                outs.append(out)


            # if the message type is an error then an error has occurred during
            # cell execution. Therefore raise a cell error and pass the
            # traceback information.
            elif msg_type == 'error':
                # Store error in output first
                out['ename'] = reply['ename']
                out['evalue'] = reply['evalue']
                out['traceback'] = reply['traceback']
                outs.append(out)
                if not self.options['check_exception']:
                    # Ensure we flush iopub before raising error
                    try:
                        self.parent.kernel.await_idle(msg_id, self.output_timeout)
                    except Empty:
                        self.stop()
                        raise RuntimeError('Timed out waiting for idle kernel!')
                    traceback = '\n' + '\n'.join(reply['traceback'])
                    if out['ename'] == 'KeyboardInterrupt' and self.parent.timed_out:
                        msg = "Timeout of %g seconds exceeded executing cell" % timeout
                    else:
                        msg = "Cell execution caused an exception"
                    self.raise_cell_error(msg, traceback)

            # any other message type is not expected
            # should this raise an error?
            else:
                print("unhandled iopub msg:", msg_type)

        outs[:] = coalesce_streams(outs)

        # Cells where the reference is not run, will not check outputs:
        #unrun = self.cell.execution_count is None
        #if unrun and self.cell.outputs:
            #self.raise_cell_error('Unrun reference cell has outputs')

        # Compare if the outputs have the same number of lines
        # and throw an error if it fails
        # if len(outs) != len(self.cell.outputs):
        #     self.diff_number_outputs(outs, self.cell.outputs)
        #     failed = True
        failed = False
        if self.options['check'] and not unrun:
            if not self.compare_outputs(outs, coalesce_streams(self.cell.outputs)):
                failed = True

        # If the comparison failed then we raise an exception.
        if failed:
            # The traceback containing the difference in the outputs is
            # stored in the variable comparison_traceback
            self.raise_cell_error(
                "Cell outputs differ",
                # Here we must put the traceback output:
                '\n'.join(self.comparison_traceback),
            )
Example #30
0
def _execute_cell(cell, shell, iopub, timeout=300):
    """
    Execute an IPython Notebook Cell and return the cell output.

    Parameters
    ----------
    cell : IPython.nbformat.current.NotebookNode
        The IPython Notebook cell to execute.
    shell : IPython.kernel.blocking.channels.BlockingShellChannel
        The shell channel which the cell is submitted to for execution.
    iopub : IPython.kernel.blocking.channels.BlockingIOPubChannel
        The iopub channel used to retrieve the result of the execution.
    timeout : int
        The number of seconds to wait for the execution to finish before giving
        up.

    Returns
    -------
    cell_outputs : list
        The list of NotebookNodes holding the result of the execution.

    """

    # Execute input
    shell.execute(cell.input)
    exe_result = shell.get_shell_msg(timeout=timeout)
    if exe_result['content']['status'] == 'error':
        raise RuntimeError('Failed to execute cell due to error: {!r}'.format(
            str(exe_result['content']['evalue'])))

    cell_outputs = list()

    # Poll for iopub messages until no more messages are available
    while True:
        try:
            msg = iopub.get_iopub_msg(timeout=0.5)
        except Empty:
            break

        msg_type = msg['msg_type']
        if msg_type in ('status', 'pyin', 'execute_input', 'execute_result'):
            continue

        content = msg['content']
        node = NotebookNode(output_type=msg_type)

        if msg_type == 'stream':
            node.stream = content['name']
            if 'text' in content:
                # v4 notebook format
                node.text = content['text']
            else:
                # v3 notebook format
                node.text = content['data']

            bug_text = 'Using Anaconda Cloud api site https://api.anaconda.org'
            if bug_text in node.text:
                # Ignore conda (spam) messages/warnings
                continue
        elif msg_type in ('display_data', 'pyout'):
            node['metadata'] = content['metadata']
            for mime, data in list(content['data'].items()):
                attr = mime.split('/')[-1].lower()
                attr = attr.replace('+xml', '').replace('plain', 'text')
                setattr(node, attr, data)
            if msg_type == 'pyout':
                node.prompt_number = content['execution_count']
        elif msg_type == 'pyerr':
            node.ename = content['ename']
            node.evalue = content['evalue']
            node.traceback = content['traceback']
        else:
            raise RuntimeError('Unhandled iopub message of type: {}'.format(
                msg_type))

        cell_outputs.append(node)

    return cell_outputs
Example #31
0
def new_latex_cell(source=''):
    return NotebookNode(
        cell_type='raw',
        metadata=NotebookNode(raw_mimetype='text/latex'),
        source=source,
    )
Example #32
0
    def runtest(self):
        """
        Run test is called by pytest for each of these nodes that are
        collected i.e. a notebook cell. Runs all the cell tests in one
        kernel without restarting.  It is very common for ipython
        notebooks to run through assuming a single kernel.  The cells
        are tested that they execute without errors and that the
        output matches the output stored in the notebook.

        """
        # Simply skip cell if configured to
        if self.options['skip']:
            pytest.skip()

        kernel = self.parent.kernel
        if not kernel.is_alive():
            raise RuntimeError("Kernel dead on test start")

        # Execute the code in the current cell in the kernel. Returns the
        # message id of the corresponding response from iopub.
        msg_id = kernel.execute_cell_input(
            self.cell.source, allow_stdin=False)

        # Timeout for the cell execution
        # after code is sent for execution, the kernel sends a message on
        # the shell channel. Timeout if no message received.
        timeout = self.config.option.nbval_cell_timeout
        timed_out_this_run = False

        # Poll the shell channel to get a message
        try:
            self.parent.kernel.await_reply(msg_id, timeout=timeout)
        except Empty:  # Timeout reached
            # Try to interrupt kernel, as this will give us traceback:
            kernel.interrupt()
            self.parent.timed_out = True
            timed_out_this_run = True

        # This list stores the output information for the entire cell
        outs = []
        # TODO: Only store if comparing with nbdime, to save on memory usage
        self.test_outputs = outs

        # Now get the outputs from the iopub channel
        while True:
            # The iopub channel broadcasts a range of messages. We keep reading
            # them until we find the message containing the side-effects of our
            # code execution.
            try:
                # Get a message from the kernel iopub channel
                msg = self.parent.get_kernel_message(timeout=self.output_timeout)

            except Empty:
                # This is not working: ! The code will not be checked
                # if the time is out (when the cell stops to be executed?)
                # Halt kernel here!
                kernel.stop()
                if timed_out_this_run:
                    self.raise_cell_error(
                        "Timeout of %g seconds exceeded while executing cell."
                        " Failed to interrupt kernel in %d seconds, so "
                        "failing without traceback." %
                            (timeout, self.output_timeout),
                    )
                else:
                    self.parent.timed_out = True
                    self.raise_cell_error(
                        "Timeout of %d seconds exceeded waiting for output." %
                            self.output_timeout,
                    )



            # now we must handle the message by checking the type and reply
            # info and we store the output of the cell in a notebook node object
            msg_type = msg['msg_type']
            reply = msg['content']
            out = NotebookNode(output_type=msg_type)

            # Is the iopub message related to this cell execution?
            if msg['parent_header'].get('msg_id') != msg_id:
                continue

            # When the kernel starts to execute code, it will enter the 'busy'
            # state and when it finishes, it will enter the 'idle' state.
            # The kernel will publish state 'starting' exactly
            # once at process startup.
            if msg_type == 'status':
                if reply['execution_state'] == 'idle':
                    break
                else:
                    continue

            # execute_input: To let all frontends know what code is
            # being executed at any given time, these messages contain a
            # re-broadcast of the code portion of an execute_request,
            # along with the execution_count.
            elif msg_type == 'execute_input':
                continue

            # com? execute reply?
            elif msg_type.startswith('comm'):
                continue
            elif msg_type == 'execute_reply':
                continue

            # This message type is used to clear the output that is
            # visible on the frontend
            # elif msg_type == 'clear_output':
            #     outs = []
            #     continue


            # elif (msg_type == 'clear_output'
            #       and msg_type['execution_state'] == 'idle'):
            #     outs = []
            #     continue

            # 'execute_result' is equivalent to a display_data message.
            # The object being displayed is passed to the display
            # hook, i.e. the *result* of the execution.
            # The only difference is that 'execute_result' has an
            # 'execution_count' number which does not seems useful
            # (we will filter it in the sanitize function)
            #
            # When the reply is display_data or execute_result,
            # the dictionary contains
            # a 'data' sub-dictionary with the 'text' AND the 'image/png'
            # picture (in hexadecimal). There is also a 'metadata' entry
            # but currently is not of much use, sometimes there is information
            # as height and width of the image (CHECK the documentation)
            # Thus we iterate through the keys (mimes) 'data' sub-dictionary
            # to obtain the 'text' and 'image/png' information
            elif msg_type in ('display_data', 'execute_result'):
                out['metadata'] = reply['metadata']
                out['data'] = reply['data']
                outs.append(out)

                if msg_type == 'execute_result':
                    out.execution_count = reply['execution_count']


            # if the message is a stream then we store the output
            elif msg_type == 'stream':
                out.name = reply['name']
                out.text = reply['text']
                outs.append(out)


            # if the message type is an error then an error has occurred during
            # cell execution. Therefore raise a cell error and pass the
            # traceback information.
            elif msg_type == 'error':
                # Store error in output first
                out['ename'] = reply['ename']
                out['evalue'] = reply['evalue']
                out['traceback'] = reply['traceback']
                outs.append(out)
                if not self.options['check_exception']:
                    # Ensure we flush iopub before raising error
                    try:
                        self.parent.kernel.await_idle(msg_id, self.output_timeout)
                    except Empty:
                        self.stop()
                        raise RuntimeError('Timed out waiting for idle kernel!')
                    traceback = '\n' + '\n'.join(reply['traceback'])
                    if out['ename'] == 'KeyboardInterrupt' and self.parent.timed_out:
                        msg = "Timeout of %g seconds exceeded executing cell" % timeout
                    else:
                        msg = "Cell execution caused an exception"
                    self.raise_cell_error(msg, traceback)

            # any other message type is not expected
            # should this raise an error?
            else:
                print("unhandled iopub msg:", msg_type)

        outs[:] = coalesce_streams(outs)

        # Cells where the reference is not run, will not check outputs:
        unrun = self.cell.execution_count is None
        if unrun and self.cell.outputs:
            self.raise_cell_error('Unrun reference cell has outputs')

        # Compare if the outputs have the same number of lines
        # and throw an error if it fails
        # if len(outs) != len(self.cell.outputs):
        #     self.diff_number_outputs(outs, self.cell.outputs)
        #     failed = True
        failed = False
        if self.options['check'] and not unrun:
            if not self.compare_outputs(outs, coalesce_streams(self.cell.outputs)):
                failed = True

        # If the comparison failed then we raise an exception.
        if failed:
            # The traceback containing the difference in the outputs is
            # stored in the variable comparison_traceback
            self.raise_cell_error(
                "Cell outputs differ",
                # Here we must put the traceback output:
                '\n'.join(self.comparison_traceback),
            )
Example #33
0
    def run_cell(self, line, code):
        self.kc.execute(code, allow_stdin=True)
        reply = self.kc.get_shell_msg()
        status = reply['content']['status']

        outs = list()
        while True:
            try:
                msg = self.kc.get_iopub_msg(timeout=1)
                if msg['msg_type'] == 'status':
                    if msg['content']['execution_state'] == 'idle':
                        break
            except Empty:
                print("empty ?!")
                raise

            content = msg['content']
            msg_type = msg['msg_type']

            notebook3_format_conversions = {
                'error': 'pyerr',
                'execute_result': 'pyout'
            }
            msg_type = notebook3_format_conversions.get(msg_type, msg_type)

            out = NotebookNode(output_type=msg_type)

            if msg_type == 'pyout':
                print(content['data']['text/plain'])
                continue
            if msg_type in ('status', 'pyin', 'execute_input'):
                continue
            elif msg_type in ('comm_open', 'comm_msg', 'comm_close'):
                # TODO handle this msg ?!?!?!
                continue
            elif msg_type == 'stream':
                out.stream = content['name']
                if 'text' in content:
                    out.text = content['text']
                else:
                    out.text = content['data']
            elif msg_type in ('display_data', 'pyout'):
                for mime, data in content['data'].items():
                    try:
                        attr = self.MIME_MAP[mime]
                    except KeyError:
                        print("unhandled mime")
                        raise NotImplementedError('unhandled mime type: %s' %
                                                  mime)

                    setattr(out, attr, data)
            elif msg_type == 'pyerr':
                out.ename = content['ename']
                out.evalue = content['evalue']
                out.traceback = content['traceback']
            elif msg_type == 'clear_output':
                outs = list()
                continue
            else:
                print("unhandled " + msg_type)
                raise NotImplementedError('unhandled iopub message: %s' %
                                          msg_type)
            outs.append(out)
            # NOTE: Ver 4 format still have 'pyout', Why?
            # upgrade_outputs(outs)

            print(str(outs))
            print("status: {}".format(status))
Example #34
0
    def run_cell(self, cell, cidx):
        '''
        Run a notebook cell and update the output of that cell in-place.
        '''
        logging.debug('running cell {}'.format(cidx))
        # logging.debug(u'cell.input {}'.format(cell.input))
        self.kc.execute(cell.source)
        reply = self.kc.get_shell_msg()
        status = reply['content']['status']
        max_mem = system_memory_used()
        logging.info('  memory used: {}'.format(sizeof_fmt(max_mem)))
        if status == 'error':
            traceback_text = 'Cell raised uncaught exception: \n' + \
                '\n'.join(reply['content']['traceback'])
            traceback_text = remove_ansicolor(traceback_text)
            if 'NoDataFound' not in traceback_text:
                logging.error(traceback_text)
        else:
            logging.debug('run_cell ok')

        outs = list()
        while True:
            try:
                msg = self.kc.get_iopub_msg(timeout=1)
                if msg['msg_type'] == 'status':
                    if msg['content']['execution_state'] == 'idle':
                        break
            except Empty:
                # execution state should return to idle before the queue
                # becomes empty,
                # if it doesn't, something bad has happened
                logging.error("empty exception")
                raise

            content = msg['content']
            msg_type = msg['msg_type']

            # IPython 3.0.0-dev writes pyerr/pyout in the notebook format but
            # uses error/execute_result in the message spec. This does the
            # translation needed for tests to pass with IPython 3.0.0-dev
            notebook3_format_conversions = {
                'error': 'pyerr',
                'execute_result': 'pyout'
            }
            msg_type = notebook3_format_conversions.get(msg_type, msg_type)

            out = NotebookNode(output_type=msg_type)

            #if 'execution_count' in content:
                #cell['prompt_number'] = content['execution_count']
                #out.prompt_number = content['execution_count']

            if msg_type in ('status', 'pyin', 'execute_input'):
                continue
            elif msg_type == 'stream':
                out.stream = content['name']
                if 'text' in content:
                    out.text = content['text']
                else:
                    out.text = content['data']
                # print(out.text, end='')
            elif msg_type in ('display_data', 'pyout'):
                for mime, data in content['data'].items():
                    try:
                        attr = self.MIME_MAP[mime]
                    except KeyError:
                        logging.error("unhandled mime")
                        raise NotImplementedError('unhandled mime type: %s' %
                                                  mime)

                    setattr(out, attr, data)
            elif msg_type == 'pyerr':
                out.ename = content['ename']
                out.evalue = content['evalue']
                out.traceback = content['traceback']
            elif msg_type == 'clear_output':
                outs = list()
                continue
            else:
                logging.error("unhandled iopub")
                raise NotImplementedError('unhandled iopub message: %s' %
                                          msg_type)
            outs.append(out)
        # NOTE: Ver 4 format still have 'pyout', Why?
        cell['outputs'] = upgrade_outputs(outs)

        logging.debug("status: {}".format(status))
        if status == 'error':
            if 'NoDataFound' in traceback_text:
                raise NoDataFound(traceback_text.split('\n')[-1])
            else:
                logging.debug(u"NotebookError raised")
                raise NotebookError(traceback_text)
Example #35
0
    def runtest(self):
        """
        Run test is called by pytest for each of these nodes that are
        collected i.e. a notebook cell. Runs all the cell tests in one
        kernel without restarting.  It is very common for ipython
        notebooks to run through assuming a single kernel.  The cells
        are tested that they execute without errors and that the
        output matches the output stored in the notebook.

        """
        # Execute the code in the current cell in the kernel. Returns the
        # message id of the corresponding response from iopub.
        msg_id = self.parent.kernel.execute_cell_input(
            self.cell.source, allow_stdin=False)

        # Timeout for the cell execution
        # after code is sent for execution, the kernel sends a message on
        # the shell channel. Timeout if no message received.
        timeout = 2000

        # Poll the shell channel to get a message
        while True:
            try:
                msg = self.parent.get_kernel_message(stream='shell',
                                                     timeout=timeout)
            except Empty:
                raise NbCellError("Timeout of %d seconds exceeded"
                                  " executing cell: %s" (timeout,
                                                         self.cell.input))

            # Is this the message we are waiting for?
            if msg['parent_header'].get('msg_id') == msg_id:
                break
            else:
                continue

        # This list stores the output information for the entire cell
        outs = []
        # TODO: Only store if comparing with nbdime, to save on memory usage
        self.test_outputs = outs

        # Now get the outputs from the iopub channel, need smaller timeout
        timeout = 5
        while True:
            # The iopub channel broadcasts a range of messages. We keep reading
            # them until we find the message containing the side-effects of our
            # code execution.
            try:
                # Get a message from the kernel iopub channel
                msg = self.parent.get_kernel_message(timeout=timeout)

            except Empty:
                # This is not working: ! The code will not be checked
                # if the time is out (when the cell stops to be executed?)
                raise NbCellError("Timeout of %d seconds exceeded"
                                  " waiting for output.")



            # now we must handle the message by checking the type and reply
            # info and we store the output of the cell in a notebook node object
            msg_type = msg['msg_type']
            reply = msg['content']
            out = NotebookNode(output_type=msg_type)

            # Is the iopub message related to this cell execution?
            if msg['parent_header'].get('msg_id') != msg_id:
                continue

            # When the kernel starts to execute code, it will enter the 'busy'
            # state and when it finishes, it will enter the 'idle' state.
            # The kernel will publish state 'starting' exactly
            # once at process startup.
            if msg_type == 'status':
                if reply['execution_state'] == 'idle':
                    break
                else:
                    continue

            # execute_input: To let all frontends know what code is
            # being executed at any given time, these messages contain a
            # re-broadcast of the code portion of an execute_request,
            # along with the execution_count.
            elif msg_type == 'execute_input':
                continue

            # com? execute reply?
            elif msg_type.startswith('comm'):
                continue
            elif msg_type == 'execute_reply':
                continue

            # This message type is used to clear the output that is
            # visible on the frontend
            # elif msg_type == 'clear_output':
            #     outs = []
            #     continue


            # elif (msg_type == 'clear_output'
            #       and msg_type['execution_state'] == 'idle'):
            #     outs = []
            #     continue

            # 'execute_result' is equivalent to a display_data message.
            # The object being displayed is passed to the display
            # hook, i.e. the *result* of the execution.
            # The only difference is that 'execute_result' has an
            # 'execution_count' number which does not seems useful
            # (we will filter it in the sanitize function)
            #
            # When the reply is display_data or execute_count,
            # the dictionary contains
            # a 'data' sub-dictionary with the 'text' AND the 'image/png'
            # picture (in hexadecimal). There is also a 'metadata' entry
            # but currently is not of much use, sometimes there is information
            # as height and width of the image (CHECK the documentation)
            # Thus we iterate through the keys (mimes) 'data' sub-dictionary
            # to obtain the 'text' and 'image/png' information
            elif msg_type in ('display_data', 'execute_result'):
                out['metadata'] = reply['metadata']
                out['data'] = {}
                for mime, data in six.iteritems(reply['data']):
                    # This could be useful for reference or backward compatibility
                    #     attr = mime.split('/')[-1].lower()
                    #     attr = attr.replace('+xml', '').replace('plain', 'text')
                    #     setattr(out, attr, data)

                    # Return the relevant entries from data:
                    # plain/text, image/png, execution_count, etc
                    # We could use a mime types list for this (MAYBE)
                    out.data[mime] = data
                outs.append(out)

                if msg_type == 'execute_result':
                     out.execution_count = reply['execution_count']


            # if the message is a stream then we store the output
            elif msg_type == 'stream':
                out.stream = reply['name']
                out.text = reply['text']
                outs.append(out)


            # if the message type is an error then an error has occurred during
            # cell execution. Therefore raise a cell error and pass the
            # traceback information.
            elif msg_type == 'error':
                # Store error in output first
                out['ename'] = reply['ename']
                out['evalue'] = reply['evalue']
                out['traceback'] = reply['traceback']
                outs.append(out)
                traceback = '\n' + '\n'.join(reply['traceback'])
                raise NbCellError(self.cell_num, "Cell execution caused an exception",
                                  self.cell.source, traceback)

            # any other message type is not expected
            # should this raise an error?
            else:
                print("unhandled iopub msg:", msg_type)

        # Compare if the outputs have the same number of lines
        # and throw an error if it fails
        # if len(outs) != len(self.cell.outputs):
        #     self.diff_number_outputs(outs, self.cell.outputs)
        #     failed = True
        failed = False
        if self.docompare:
            if not self.compare_outputs(outs, self.cell.outputs):
                failed = True


        # If the comparison failed then we raise an exception.
        if failed:
            # The traceback containing the difference in the outputs is
            # stored in the variable comparison_traceback
            raise NbCellError(self.cell_num,
                              "Cell outputs differ",
                              self.cell.source,
                              # Here we must put the traceback output:
                              '\n'.join(self.comparison_traceback))
Example #36
0
    def runtest(self):
        kc = self.parent.runner.kc
        cell = self.cell

        if ("SKIPCI" in self.cell_description) and ("CI" in os.environ):
            return

        if self.parent.fixture_cell:
            kc.execute(self.parent.fixture_cell.input, allow_stdin=False)

        if self.cell_description.lower().startswith("fixture") or self.cell_description.lower().startswith("setup"):
            self.parent.fixture_cell = self.cell

        kc.execute(cell.input, allow_stdin=False)
        # XXX: the way it's currently implemented there doesn't seem to be a
        # point in handling a timeout situation here, since raising an exception
        # on timeout breaks the rest of the tests. The correct way to do it is to
        # send interrupt to the kernel, and then report timeout, so that the
        # rest of the tests can continue.
        reply = kc.get_shell_msg()

        status = reply['content']['status']
        traceback_text = ''
        if status == 'error':
            traceback_text = 'Cell raised uncaught exception: \n' + \
                '\n'.join(reply['content']['traceback'])

        # extract various outputs and streams
        outs = list()
        timeout = 20
        while True:
            try:
                msg = kc.get_iopub_msg(timeout=timeout)
                if msg['msg_type'] == 'status':
                    if msg['content']['execution_state'] == 'idle':
                        break
            except Empty:
                # execution state should return to idle
                # before the queue becomes empty,
                # if it doesn't, something bad has happened
                raise

            content = msg['content']
            msg_type = msg['msg_type']

            out = NotebookNode(output_type=msg_type)

            if 'execution_count' in content:
                out.prompt_number = content['execution_count']

            if msg_type in ('status', 'pyin', 'execute_input'):
                continue
            elif msg_type == 'stream':
                out.stream = content['name']
                if 'text' in content: out.text = content['text']
            # execute_result == Out[] (_)
            elif msg_type in ('display_data', 'execute_result'):
                for mime, data in content['data'].items():
                    try:
                        attr = self.MIME_MAP[mime]
                    except KeyError:
                        raise NotImplementedError(f'unhandled mime type: {mime}')

                    json_encode = (mime == "application/json")
                    data_out = data if not json_encode else json.dumps(data)
                    setattr(out, "data_type", attr)
                    setattr(out, "data", data_out)
            elif msg_type == 'error':
                out.ename = content['ename']
                out.evalue = content['evalue']
                out.traceback = content['traceback']
            elif msg_type == 'clear_output': pass
                # ignore
                #outs = list()
                #continue
            else:
                raise NotImplementedError(f'unhandled iopub message: {msg_type}')
            outs.append(out)
        #pprint(outs)

        if status == 'error':
            # Get all output streams, so that we can display them in the exception
            pyout = []
            stdout_data = ''
            stderr_data = ''
            for out in outs:
                if out.output_type == 'execute_result':
                    # text, html, json (others are binary)
                    if out.data_type in ["text", "html", "json"]:
                        pyout.append(out.data)
                    else:
                        pyout.append(f"[{out.data_type} object]")
                elif 'stream' in out:
                    if out.stream == 'stdout':
                        stdout_data = out.text
                    elif out.stream == 'stderr':
                        stderr_data = out.text

            raise IPyNbException(self.cell_num, self.cell.input, "\n".join(pyout),
                                 stdout_data, stderr_data, traceback_text)
Example #37
0
    def run_cell(self, index_cell, cell, clean_function=None):
        '''
        Run a notebook cell and update the output of that cell in-place.

        @param      index_cell          index of the cell
        @param      cell                cell to execute
        @param      clean_function      cleaning function to apply to the code before running it
        @return                         output of the cell
        '''
        iscell, codei = NotebookRunner.get_cell_code(cell)

        self.fLOG('-- running cell:\n%s\n' % codei)

        code = self.clean_code(codei)
        if clean_function is not None:
            code = clean_function(code)
        if len(code) == 0:
            return ""
        if self.kc is None:
            raise ValueError(
                "No kernel was started, specify kernel=True when initializing the instance.")
        self.kc.execute(code)

        reply = self.kc.get_shell_msg()
        reason = None
        try:
            status = reply['content']['status']
        except KeyError:
            status = 'error'
            reason = "no status key in reply['content']"

        if status == 'error':
            ansi_escape = re.compile(r'\x1b[^m]*m')
            try:
                tr = [ansi_escape.sub('', _)
                      for _ in reply['content']['traceback']]
            except KeyError:
                tr = ["No traceback, available keys in reply['content']"] + \
                    [_ for _ in reply['content']]
            traceback_text = '\n'.join(tr)
            self.fLOG("ERR:\n", traceback_text)
        else:
            traceback_text = ''
            self.fLOG('-- cell returned')

        outs = list()
        nbissue = 0
        while True:
            try:
                msg = self.kc.get_iopub_msg(timeout=1)
                if msg['msg_type'] == 'status':
                    if msg['content']['execution_state'] == 'idle':
                        break
            except Empty:
                # execution state should return to idle before the queue becomes empty,
                # if it doesn't, something bad has happened
                status = "error"
                reason = "exception Empty was raised"
                nbissue += 1
                if nbissue > 10:
                    # the notebook is empty
                    return ""
                else:
                    continue

            content = msg['content']
            msg_type = msg['msg_type']

            # IPython 3.0.0-dev writes pyerr/pyout in the notebook format but uses
            # error/execute_result in the message spec. This does the translation
            # needed for tests to pass with IPython 3.0.0-dev
            notebook3_format_conversions = {
                'error': 'pyerr',
                'execute_result': 'pyout'
            }
            msg_type = notebook3_format_conversions.get(msg_type, msg_type)

            out = NotebookNode(output_type=msg_type)

            if 'execution_count' in content:
                if iscell:
                    cell['prompt_number'] = content['execution_count']
                out.prompt_number = content['execution_count']

            if msg_type in ('status', 'pyin', 'execute_input'):
                continue

            elif msg_type == 'stream':
                out.stream = content['name']
                # in msgspec 5, this is name, text
                # in msgspec 4, this is name, data
                if 'text' in content:
                    out.text = content['text']
                else:
                    out.data = content['data']

            elif msg_type in ('display_data', 'pyout'):
                out.data = content['data']

            elif msg_type == 'pyerr':
                out.ename = content['ename']
                out.evalue = content['evalue']
                out.traceback = content['traceback']

            elif msg_type == 'clear_output':
                outs = list()
                continue

            elif msg_type == 'comm_open' or msg_type == 'comm_msg':
                # widgets in a notebook
                out.data = content["data"]
                out.comm_id = content["comm_id"]

            else:
                dcontent = "\n".join("{0}={1}".format(k, v)
                                     for k, v in sorted(content.items()))
                raise NotImplementedError(
                    'unhandled iopub message: %s' % msg_type + "\nCONTENT:\n" + dcontent)

            outs.append(out)

        if iscell:
            cell['outputs'] = outs

        raw = []
        for _ in outs:
            try:
                t = _.data
            except AttributeError:
                continue

            # see MIMEMAP to see the available output type
            for k, v in t.items():
                if k.startswith("text"):
                    raw.append(v)

        sraw = "\n".join(raw)
        self.fLOG(sraw)

        def reply2string(reply):
            sreply = []
            for k, v in sorted(reply.items()):
                if isinstance(v, dict):
                    temp = []
                    for _, __ in sorted(v.items()):
                        temp.append("    [{0}]={1}".format(_, str(__)))
                    v = "\n".join(temp)
                    sreply.append("reply['{0}']=dict\n{1}".format(k, v))
                else:
                    sreply.append("reply['{0}']={1}".format(k, str(v)))
            sreply = "\n".join(sreply)
            return sreply

        if status == 'error':
            sreply = reply2string(reply)
            if len(code) < 5:
                scode = [code]
            else:
                scode = ""
            mes = "FILENAME\n{10}:1:1\n{7}\nCELL status={8}, reason={9} -- {4} length={5} -- {6}:\n-----------------\n{0}" + \
                  "\n-----------------\nTRACE:\n{1}\nRAW:\n{2}REPLY:\n{3}"
            raise NotebookError(mes.format(
                code, traceback_text, sraw, sreply, index_cell, len(
                    code), scode, self.comment, status, reason,
                self._filename))
        return outs