Ejemplo n.º 1
0
    def loadnb(self):
        """
        Load the notebook and parse all the cells, assigning them filenames
        and extracting their contents.
        """
        with open(self.root) as f:
            node = nbformat.reader.read(f)

        # ipynb format 4 is current; update the data structure
        # for consistency if it is an older version
        ipynb_version = nbformat.reader.get_version(node)
        if ipynb_version < (4, 0):
            node = nbformat.convert(node, 4)

        # extension for code cells (TODO: this might not be available)
        codeext = node.metadata.language_info.file_extension

        # assign filenames to parts of the data
        for i, cell in enumerate(node.cells):
            if cell.cell_type == "markdown":
                fname = "cell{0}.md".format(i)
                self.fnames[fname] = ("markdown", cell)
                self.fdata[fname] = maybe_join(cell.source).encode("utf-8")
            elif cell.cell_type == "code":
                fname = "cell{0}{1}".format(i, codeext)
                self.fnames[fname] = ("code", cell)
                self.fdata[fname] = maybe_join(cell.source).encode("utf-8")
                for j, output in enumerate(cell.outputs):
                    if output.output_type == "stream":
                        fname = "cell{0}_out{1}_{2}.txt".format(
                            i, j, output.name)
                        self.fnames[fname] = ("stream", output)
                        self.fdata[fname] = maybe_join(
                            output.text).encode("utf-8")
                    elif output.output_type in ("display_data",
                                                "execute_result"):
                        for k, mime in enumerate(output.data):
                            ext = mimetypes.guess_extension(mime)
                            fname = "cell{0}_out{1}_data{2}{3}".format(
                                i, j, k, ext)
                            self.fnames[fname] = ("data", (mime,
                                                           output.data[mime]))

                            # interpreting these types as base64 and everything
                            # else as text matches behaviour in nbconvert
                            # but it's probably not extensible
                            # the nbformat really needs to say how display_data
                            # is encoded
                            if mime in ("image/png", "image/jpeg",
                                        "application/pdf"):
                                self.fdata[fname] = base64.decodestring(
                                    bytes(maybe_join(output.data[mime]),
                                          "ascii"))
                            else:
                                self.fdata[fname] = maybe_join(
                                    output.data[mime]).encode("utf-8")

        return node
Ejemplo n.º 2
0
def copy_current_notebook(oldpath,
                          newpath,
                          cell=0,
                          copy_dirs='dirs',
                          copy_root='root'):
    # read notebook data
    data = open(oldpath).read()
    version = json.loads(data)['nbformat']
    nbdata = nbformat.reads(data, version)
    nbdata.keys()
    # convert to current format
    current_version = nbformat.current_nbformat
    nbdata = nbformat.convert(nbdata, current_version)
    current_format = getattr(nbformat, 'v' + str(current_version))
    # accommodate worksheets, if available
    if hasattr(nbdata, 'worksheets'):
        raise (RuntimeError,
               "copy_current_notebook: not compatible with worksheets")
    metadata = nbdata['metadata']
    cells = nbdata['cells']
    # strip out all cells up to and including indicated one
    del cells[:cell + 1]
    # scrub cell output
    for c in cells:
        scrub_cell(c)
    # insert boilerplate code
    code = "import radiopadre\n" + \
           "%s = radiopadre.DirList('.')" % copy_dirs
    if copy_root:
        code += "\n%s = %s[0]" % (copy_root, copy_dirs)
    code += "\n%s.show()" % copy_dirs
    # insert output
    output = current_format.new_output(
        "display_data",
        data={
            "text/html": [
                "<b style='color: red'>Please select Cell|Run all from the menu to render this notebook.</b>"
            ]
        })
    cells.insert(0, current_format.new_code_cell(code, outputs=[output]))
    # insert markdown
    cells.insert(
        0,
        current_format.new_markdown_cell(
            """# %s\nThis
                radiopadre notebook was automatically generated from ``%s`` 
                using the 'copy notebook' feature. Please select "Cell|Run all"
                from the menu to render this notebook.
                """ % (newpath, oldpath), ))
    # cleanup metadata
    metadata['radiopadre_notebook_protect'] = 0
    metadata['radiopadre_notebook_scrub'] = 0
    if 'signature' in metadata:
        metadata['signature'] = ""
    # save
    nbformat.write(nbdata, open(newpath, 'w'), version)
    return newpath
Ejemplo n.º 3
0
    def loadnb(self):
        """
        Load the notebook and parse all the cells, assigning them filenames
        and extracting their contents.
        """
        with open(self.root) as f:
            node = nbformat.reader.read(f)

        # ipynb format 4 is current; update the data structure
        # for consistency if it is an older version
        ipynb_version = nbformat.reader.get_version(node)
        if ipynb_version < (4, 0):
            node = nbformat.convert(node, 4)

        # extension for code cells (TODO: this might not be available)
        codeext = node.metadata.language_info.file_extension

        # assign filenames to parts of the data
        for i, cell in enumerate(node.cells):
            if cell.cell_type == "markdown":
                fname = "cell{0}.md".format(i)
                self.fnames[fname] = ("markdown", cell)
                self.fdata[fname] = maybe_join(cell.source).encode("utf-8")
            elif cell.cell_type == "code":
                fname = "cell{0}{1}".format(i, codeext)
                self.fnames[fname] = ("code", cell)
                self.fdata[fname] = maybe_join(cell.source).encode("utf-8")
                for j, output in enumerate(cell.outputs):
                    if output.output_type == "stream":
                        fname = "cell{0}_out{1}_{2}.txt".format(i, j,
                                                                output.name)
                        self.fnames[fname] = ("stream", output)
                        self.fdata[fname] = maybe_join(output.text).encode("utf-8")
                    elif output.output_type in ("display_data",
                                                "execute_result"):
                        for k, mime in enumerate(output.data):
                            ext = mimetypes.guess_extension(mime)
                            fname = "cell{0}_out{1}_data{2}{3}".format(i, j,
                                                                       k, ext)
                            self.fnames[fname] = ("data",
                                                  (mime, output.data[mime]))

                            # interpreting these types as base64 and everything
                            # else as text matches behaviour in nbconvert
                            # but it's probably not extensible
                            # the nbformat really needs to say how display_data
                            # is encoded
                            if mime in ("image/png", "image/jpeg",
                                        "application/pdf"):
                                self.fdata[fname] = base64.decodestring(bytes(maybe_join(output.data[mime]), "ascii"))
                            else:
                                self.fdata[fname] = maybe_join(output.data[mime]).encode("utf-8")

        return node
    def create_hashed_notebook(
        self,
        nb: nbf.NotebookNode,
        nb_metadata: Optional[Iterable[str]] = ("kernelspec", ),
        cell_metadata: Optional[Iterable[str]] = None,
    ) -> Tuple[nbf.NotebookNode, str]:
        """Convert a notebook to a standard format and hash.

        Note: we always hash notebooks as version 4.4,
        to allow for matching notebooks of different versions

        :param nb_metadata: The notebook metadata keys to hash (if None, use all)
        :param cell_metadata: The cell metadata keys to hash (if None, use all)

        :return: (notebook, hash)
        """
        # copy the notebook
        nb = copy.deepcopy(nb)
        # update the notebook to consistent version 4.4
        nb = nbf.convert(nb, to_version=NB_VERSION)
        if nb.nbformat_minor > 5:
            raise CachingError(
                "notebook version greater than 4.5 not yet supported")
        # remove non-code cells
        nb.cells = [cell for cell in nb.cells if cell.cell_type == "code"]
        # create notebook for hashing, with selected metadata
        hash_nb = nbf.from_dict({
            "nbformat":
            nb.nbformat,
            "nbformat_minor":
            4,  # v4.5 include cell ids, which we do not cache
            "metadata": {
                k: v
                for k, v in nb.metadata.items()
                if nb_metadata is None or (k in nb_metadata)
            },
            "cells": [{
                "cell_type": cell.cell_type,
                "source": cell.source,
                "metadata": {
                    k: v
                    for k, v in cell.metadata.items()
                    if cell_metadata is None or (k in cell_metadata)
                },
                "execution_count": None,
                "outputs": [],
            } for cell in nb.cells if cell.cell_type == "code"],
        })

        # hash notebook
        string = nbf.writes(hash_nb, nbf.NO_CONVERT)
        hash_string = hashlib.md5(string.encode()).hexdigest()

        return (nb, hash_string)
Ejemplo n.º 5
0
def process_repository(session, status, repository, query_iter):
    query_iter = list(query_iter)
    zip_path = None
    tarzip = None
    if not repository.path.exists():
        if not repository.zip_path.exists():
            repository.processed |= consts.R_UNAVAILABLE_FILES
            session.add(repository)
            status.count += len(query_iter)
            return "Failed. Repository not found: {}".format(repository)
        tarzip =  tarfile.open(str(repository.zip_path))
        zip_path = Path(repository.hash_dir2)

    shell = InteractiveShell.instance()
    group = groupby(
        query_iter,
        lambda x: (x[1])
    )
    for notebook, new_iter in group:
        cells = list(query_iter)
        vprint(1, "Processing notebook: {}. Found {} cells".format(notebook, len(cells)))
        name = notebook.name
        vprint(2, "Loading notebook file")
        if tarzip:
            notebook = nbf.read(
                tarzip.extractfile(tarzip.getmember(str(zip_path / name))),
                nbf.NO_CONVERT
            )
        else:
            with open(str(repository.path / name)) as ofile:
                notebook = nbf.read(ofile, nbf.NO_CONVERT)
        notebook = nbf.convert(notebook, 4)
        metadata = notebook["metadata"]
        language_info = metadata.get("language_info", {})
        language_name = language_info.get("name", "unknown")

        for cell, _, _ in new_iter:
            vprint(2, "Loading cell {}".format(cell.index))

            index = int(cell.index)
            notebook_cell = notebook["cells"][index]
            source = notebook_cell.get("source", "")
            if language_name == "python" and notebook_cell.get("cell_type") == "code":
                try:
                    source = shell.input_transformer_manager.transform_cell(source)
                except (IndentationError, SyntaxError):
                    pass
            cell.source = source
            if cell.processed & consts.C_MARKED_FOR_EXTRACTION:
                cell.processed -= consts.C_MARKED_FOR_EXTRACTION
            session.add(cell)
        session.commit()
    return "ok"
Ejemplo n.º 6
0
def py_file_to_notebook(filename, nbver=None):
    """Convert a Python file into a notebook object."""

    ipy = py_file_to_ipy_string(filename)
    # Read using v3 of nbformat
    with StringIO(ipy) as fin:
        nb = nbpy.read(fin)

    # Convert to specific notebook version if specified
    if nbver is not None:
        nb = nbf.convert(nb, nbver)

    return nb
Ejemplo n.º 7
0
def py_string_to_notebook(str, nbver=None):
    """Convert a string representation of a regular Python script into
    a notebook object.
    """

    ipy = py_string_to_ipy_string(str)
    # Read using v3 of nbformat
    with StringIO(ipy) as fin:
        nb = nbpy.read(fin)

    # Convert to specific notebook version if specified
    if nbver is not None:
        nb = nbf.convert(nb, nbver)

    return nb
Ejemplo n.º 8
0
def render_nb():
	# need ipynb v3 to play nice with runipy
	notebook = read(open("stock_infos.ipynb"), 3)

	nb = NotebookRunner(notebook, pylab=True)
	nb.run_notebook()
	
	# need ipynb v4 to play nice with Jupyter
	nb = nbformat.convert(nb.nb, 4)

	html_exporter = HTMLExporter()
	body, resources = html_exporter.from_notebook_node(nb)

	html_file= open("static/stock_infos.html","w")
	html_file.write(body.encode('utf8', 'ignore'))
	html_file.close()

	return app.send_static_file('stock_infos.html')
Ejemplo n.º 9
0
def render_nb():
    # need ipynb v3 to play nice with runipy
    notebook = read(open("stock_infos.ipynb"), 3)

    nb = NotebookRunner(notebook, pylab=True)
    nb.run_notebook()

    # need ipynb v4 to play nice with Jupyter
    nb = nbformat.convert(nb.nb, 4)

    html_exporter = HTMLExporter()
    body, resources = html_exporter.from_notebook_node(nb)

    html_file = open("static/stock_infos.html", "w")
    html_file.write(body.encode('utf8', 'ignore'))
    html_file.close()

    return app.send_static_file('stock_infos.html')
    def merge_match_into_notebook(
        self,
        nb: nbf.NotebookNode,
        nb_meta: Optional[Iterable[str]] = ("kernelspec", "language_info",
                                            "widgets"),
        cell_meta: Optional[Iterable[str]] = None,
    ) -> Tuple[int, nbf.NotebookNode]:
        """Match to an executed notebook and return a merged version

        :param nb: The input notebook
        :param nb_meta: metadata keys to merge from the cached notebook (all if None)
        :param cell_meta: cell metadata keys to merge from cached notebook (all if None)
        :raises KeyError: if no match is found
        :return: pk, input notebook with cached code cells and metadata merged.

        """
        pk = self.match_cache_notebook(nb).pk
        cache_nb = self.get_cache_bundle(pk).nb
        nb = nbf.convert(copy.deepcopy(nb), NB_VERSION)
        if nb_meta is None:
            nb.metadata = cache_nb.metadata
        else:
            for key in nb_meta:
                if key in cache_nb.metadata:
                    nb.metadata[key] = cache_nb.metadata[key]
        for idx in range(len(nb.cells)):
            if nb.cells[idx].cell_type == "code":
                cache_cell = cache_nb.cells.pop(0)
                in_cell = nb.cells[idx]
                if cell_meta is not None:
                    # update the input metadata with select cached notebook metadata
                    # then add the input metadata to the cached cell
                    in_cell.metadata.update({
                        k: v
                        for k, v in cache_cell.metadata.items()
                        if k in cell_meta
                    })
                    cache_cell.metadata = in_cell.metadata
                if nb.nbformat_minor >= 5:
                    cache_cell.id = in_cell.id
                else:
                    cache_cell.pop("id", None)
                nb.cells[idx] = cache_cell
        return pk, nb
Ejemplo n.º 11
0
def run_notebook(nb_name_input, nb_name_output, nb_kwargs=None,
                 insert_pos=1, timeout=3600, execute_kwargs=None):

    timestamp_cell = "**Executed:** %s\n\n**Duration:** %d seconds."
    if nb_kwargs is not None:
        header = '# Cell inserted during automated execution.'
        code = dict_to_code(nb_kwargs)
        code_cell = '\n'.join((header, code))

    if execute_kwargs is None:
        execute_kwargs = {}
    ep = ExecutePreprocessor(timeout=timeout, **execute_kwargs)
    nb = nbformat.read(nb_name_input, as_version=4)
    if len(nb_kwargs) > 0:
        nb['cells'].insert(1, nbformat.v4.new_code_cell(code_cell))

    start_time = time.time()
    try:
        # Execute the notebook
        ep.preprocess(nb, {'metadata': {'path': './'}})
    except:
        # Execution failed, print a message then raise.
        msg = 'Error executing the notebook "%s".\n\n' % nb_name_input
        msg += 'See notebook "%s" for the traceback.' % nb_name_output
        print(msg)
        raise
    else:
        # On successful execution, add timestamping cell
        duration = time.time() - start_time
        timestamp_cell = timestamp_cell % (time.ctime(start_time), duration)
        nb['cells'].insert(0, nbformat.v4.new_markdown_cell(timestamp_cell))
    finally:
        # Save the notebook to HTML output, even when it raises an error
        nbformat.write(nb, nb_name_output)
        exporter = HTMLExporter()
        output, resources = exporter.from_notebook_node(
            nbformat.convert(nb, nbformat.current_nbformat)
        )
        codecs.open(nb_name_output, 'w', encoding='utf-8').write(output)
Ejemplo n.º 12
0
def read_ipynb(infile, header=None):
    with open(infile) as f:
        node = nbformat.reader.read(f)

    # ipynb format 4 is current as of IPython 3.0; update the data structure
    # for consistency if it is an older version
    ipynb_version = nbformat.reader.get_version(node)
    if ipynb_version < (4, 0):
        node = nbformat.convert(node, 4)

    notebook_lang = node.metadata.get('language_info', {}).get('name', None)
    if not notebook_lang == 'R':
        print('Warning: Notebook language "{0}" != R'.format(notebook_lang))
        print("Output is unlikely to be a valid Rmd document")

    # to allow round-tripping, if no explicit header is specified and
    # node.metadata.Rmd_header exists, dump it as a YAML header
    if header is None:
        if "Rmd_header" in node.metadata:
            # header will consist of NotebookNode rather than dict objects
            # we added a representer function for these above
            header = node.metadata["Rmd_header"]

    return node, header
Ejemplo n.º 13
0
def read_ipynb(infile, header=None):
    with open(infile) as f:
        node = nbformat.reader.read(f)

    # ipynb format 4 is current as of IPython 3.0; update the data structure
    # for consistency if it is an older version
    ipynb_version = nbformat.reader.get_version(node)
    if ipynb_version < (4, 0):
        node = nbformat.convert(node, 4)

    notebook_lang = node.metadata.get('language_info', {}).get('name', None)
    if not notebook_lang == 'R':
        print('Warning: Notebook language "{0}" != R'.format(notebook_lang))
        print("Output is unlikely to be a valid Rmd document")

    # to allow round-tripping, if no explicit header is specified and
    # node.metadata.Rmd_header exists, dump it as a YAML header
    if header is None:
        if "Rmd_header" in node.metadata:
            # header will consist of NotebookNode rather than dict objects
            # we added a representer function for these above
            header = node.metadata["Rmd_header"]

    return node, header
Ejemplo n.º 14
0
        extra_arguments = ['--pylab=inline'] + extra_arguments

    used_output_filter = [t_name.strip() for t_name in args.ttypes.split(',')]
    used_output_types = filter(
        lambda x: any(f in x for f in used_output_filter),
        registered_output_types)

    if verbose:
        tv.write(tv.blue('>>> using the following content types to compare\n'))
        for tt in used_output_types:
            tv.write(tt + '\n', indent=4)
    with open(ipynb, encoding='utf-8') as f:
        nb = nbformat.reads(f.read(), 4)
        # Convert all notebooks to the format IPython 3.0.0 uses to
        # simplify comparison
        nb = nbformat.convert(nb, 4)

    notebook_restart = True
    notebook_run_count = 0

    while notebook_restart:
        notebook_restart = False
        notebook_run_count += 1

        tv.reset()
        tv.write("starting kernel ... ")
        with IPyKernel(extra_arguments=extra_arguments) as ipy:
            ipy.default_timeout = args.timeout
            tv.writeln("ok")

            nbs = ipynb.split('/')[-1].split('.')
Ejemplo n.º 15
0
def writes(notebook, fmt, version=nbformat.NO_CONVERT, config=None, **kwargs):
    """Return the text representation of the notebook

    :param notebook: the notebook
    :param fmt: the jupytext format like `md`, `py:percent`, ...
    :param version: see nbformat.writes
    :param config: (optional) a Jupytext configuration object
    :param kwargs: (not used) additional parameters for nbformat.writes
    :return: the text representation of the notebook
    """
    if version is not nbformat.NO_CONVERT:
        if not isinstance(version, int):
            raise TypeError(
                "The argument 'version' should be either nbformat.NO_CONVERT, or an integer."
            )
        notebook = nbformat.convert(notebook, version)
    (version, version_minor) = nbformat.reader.get_version(notebook)
    if version < 4:
        raise NotSupportedNBFormatVersion(
            f"Notebooks in nbformat version {version}.{version_minor} are not supported by Jupytext. "
            f"Please convert your notebooks to nbformat version 4 with "
            f"'jupyter nbconvert --to notebook --inplace', or call this function with 'version=4'."
        )
    if version > 4 or (version == 4 and version_minor > 5):
        warnings.warn(
            f"Notebooks in nbformat version {version}.{version_minor} "
            f"have not been tested with Jupytext version {__version__}."
        )

    metadata = deepcopy(notebook.metadata)
    rearrange_jupytext_metadata(metadata)
    fmt = copy(fmt)
    fmt = long_form_one_format(fmt, metadata)
    ext = fmt["extension"]
    format_name = fmt.get("format_name")

    jupytext_metadata = metadata.get("jupytext", {})

    if ext == ".ipynb":
        # Remove jupytext section if empty
        jupytext_metadata.pop("text_representation", {})
        if not jupytext_metadata:
            metadata.pop("jupytext", {})
        return nbformat.writes(
            NotebookNode(
                nbformat=notebook.nbformat,
                nbformat_minor=notebook.nbformat_minor,
                metadata=metadata,
                cells=notebook.cells,
            ),
            version,
            **kwargs,
        )

    if not format_name:
        format_name = format_name_for_ext(metadata, ext, explicit_default=False)

    if format_name:
        fmt["format_name"] = format_name
        update_jupytext_formats_metadata(metadata, fmt)

    writer = TextNotebookConverter(fmt, config)
    return writer.writes(notebook, metadata)
Ejemplo n.º 16
0
def render_page(nbname, config={}):

    # Combine base config with any provided overrides.
    config = dict(flask_app.base_config, **config)

    global runner

    if not nbmanager.notebook_exists(nbname):
        print "Notebook %s does not exist." % nbname
        flask.abort(404)

    print "Loading notebook %s" % nbname
    #nbmanager.trust_notebook(nbname)
    nb = nbmanager.get_notebook(nbname)

    if config['run']:
        print "Making runner..." ''

        # This is an ugly little bit to deal with a sporadic
        #  'queue empty' bug in jupyter that only seems to
        #  happen on the integration servers...
        #  see https://github.com/paulgb/runipy/issues/36
        N_RUN_RETRIES = 4
        from Queue import Empty

        for i in range(N_RUN_RETRIES):
            try:
                if runner is None:
                    make_notebook_runner_thread.join()

                # Do as complete of a reset of the kernel as we can.
                # Unfortunately, this doesn't really do a 'hard' reset
                # of any modules...
                class ResetCell(dict):
                    """Simulates just enough of a notebook cell to get this
                    'reset cell' executed using the existing runipy
                     machinery."""
                    input = "get_ipython().reset(new_session=True)"

                runner.run_cell(ResetCell())
                runner.nb = nb
                print "Running notebook"
                runner.run_notebook(skip_exceptions=True)
                break
            except Empty as e:
                print "WARNING: Empty bug happened."
                if i >= (N_RUN_RETRIES - 1):
                    raise
        nb = runner.nb
    # else:
    #     nb = nb['content']
    print "Exporting notebook"
    exporter = HTMLExporter(config=Config({
        'HTMLExporter': {
            'template_file':
            config['template'],
            'template_path':
            ['.', os.path.join(os.path.split(__file__)[0], 'templates')]
        }
    }))
    output, resources = exporter.from_notebook_node(
        convert(nb, current_nbformat))
    print "Returning."
    return output
Ejemplo n.º 17
0
        extra_arguments = ['--pylab=inline'] + extra_arguments

    used_output_filter = [t_name.strip() for t_name in args.ttypes.split(',')]
    used_output_types = filter(
        lambda x: any(f in x for f in used_output_filter),
        registered_output_types)

    if verbose:
        tv.write(tv.blue('>>> using the following content types to compare\n'))
        for tt in used_output_types:
            tv.write(tt + '\n', indent=4)
    with open(ipynb, encoding='utf-8') as f:
        nb = nbformat.reads(f.read(), 4)
        # Convert all notebooks to the format IPython 3.0.0 uses to
        # simplify comparison
        nb = nbformat.convert(nb, 4)

    notebook_restart = True
    notebook_run_count = 0

    while notebook_restart:
        notebook_restart = False
        notebook_run_count += 1

        tv.reset()
        tv.write("starting kernel ... ")
        with IPyKernel(extra_arguments=extra_arguments) as ipy:
            ipy.default_timeout = args.timeout
            tv.writeln("ok")

            nbs = ipynb.split('/')[-1].split('.')
Ejemplo n.º 18
0
 def from_notebook_node(self, nb, resources=None, **kw):
     if nb.nbformat != 4:
         nb = _nbformat.convert(nb, to_version=4)
     nb, resources = super().from_notebook_node(nb, resources, **kw)
     return _jf.serialize(nb), resources
Ejemplo n.º 19
0
async def api_convert(request: web.Request) -> web.Response:
    """HTTP POST route at /api/convert for JSON nbconvert API.
    Return JSON Response with 'body', 'resources', and 'mime-type' fields.
    body field is Base64 encoded string if conversion method returned bytes
    """
    data = await request.json()

    try:
        notebook_data = data['notebook']
    except KeyError:
        return make_REST_error_response("Missing field",
                                        "Missing notebook field")

    try:
        exporter_type = data['exporter']
    except KeyError:
        return make_REST_error_response("Missing field",
                                        "Missing exporter field")

    exporter_names = get_exporter_names()
    if exporter_type not in exporter_names:
        return make_REST_error_response(
            "Invalid field",
            f"Invalid exporter {exporter_type!r}, must be one of "
            f"{exporter_names}")

    try:
        config = data['config']
    except KeyError:
        config = None
    else:
        if not isinstance(config, dict):
            return make_REST_error_response(
                "Invalid field",
                f"Invalid config field value {config!r}, must be a dict")

    # Load notebook
    notebook = nbformat.from_dict(notebook_data)
    notebook = nbformat.convert(notebook, to_version=TO_VERSION)

    try:
        nbformat.validate(notebook)
    except nbformat.ValidationError as err:
        return make_REST_error_response(
            "Invalid field",
            f"Notebook JSON invalid for version {TO_VERSION}: {err}")

    # Get notebook-json format of notebook
    major, minor = nbformat.reader.get_version(notebook)
    notebook_dict = nbformat.versions[major].to_notebook_json(notebook)

    loop = asyncio.get_event_loop()
    try:
        result = await loop.run_in_executor(pool, convert_notebook_sync,
                                            notebook_dict, exporter_type,
                                            config)
    except Exception as err:
        return make_REST_error_response("Unknown error", str(err))

    json_result = result.copy()
    if isinstance(json_result['body'], bytes):
        json_result['body'] = base64.b64encode(
            json_result['body']).decode('utf-8')

    return web.json_response(json_result)
Ejemplo n.º 20
0
def load_notebook(repository_id, path, notebook_file, nbrow):
    """Extract notebook information and cells from notebook"""
    # pylint: disable=too-many-locals
    status = 0
    try:
        with open(str(path / notebook_file)) as ofile:
            notebook = nbf.read(ofile, nbf.NO_CONVERT)
        nbrow["nbformat"] = "{0[nbformat]}".format(notebook)
        if "nbformat_minor" in notebook:
            nbrow["nbformat"] += ".{0[nbformat_minor]}".format(notebook)
        notebook = nbf.convert(notebook, 4)
        metadata = notebook["metadata"]
    except OSError as e:
        vprint(3, "Failed to open notebook {}".format(e))
        nbrow["processed"] = consts.N_LOAD_ERROR
        if os.path.islink(str(path / notebook_file)):
            import textwrap
            vprint(3, "Notebook is broken link. Use the following SQL to fix:")
            text = (textwrap.dedent("""\
            select notebooks_count, (char_length(newtext) - char_length(replace(newtext, '''', ''))), concat(
                'update repositories ',
                'set notebooks_count = ',
                (char_length(newtext) - char_length(replace(newtext, ';', ''))) + 1,
                ', notebooks = ''',
                newtext,
                ''' where id = ',
                id,
                ';'
            ) from (
                select id, notebooks_count, replace(
                    replace(
                        replace(
                            notebooks,
                            '{0};', ''
                        ),
                        ';{0}', ''
                    ),
                    '''', ''''''
                ) as newtext
                from repositories where id = {1}
            ) as foo;
            """.format(notebook_file, repository_id)))
            text = " ".join(x.strip() for x in text.split("\n"))
            print(text)
        return nbrow, []

    except Exception as e:  # pylint: disable=broad-except
        vprint(3, "Failed to load notebook {}".format(e))
        nbrow["processed"] = consts.N_LOAD_FORMAT_ERROR
        return nbrow, []

    nbrow["kernel"] = metadata.get("kernelspec", {}).get("name", "no-kernel")

    language_info = metadata.get("language_info", {})
    nbrow["language"] = language_info.get("name", "unknown")
    nbrow["language_version"] = language_info.get("version", "unknown")
    shell = InteractiveShell.instance()
    is_python = nbrow["language"] == "python"
    is_unknown_version = nbrow["language_version"] == "unknown"

    cells = notebook["cells"]
    cells_info = []
    exec_count = -1
    for index, cell in enumerate(cells):
        vprint(3, "Loading cell {}".format(index))
        cell_exec_count = cell.get("execution_count") or -1
        if isinstance(cell_exec_count, str) and cell_exec_count.isdigit():
            cell_exec_count = int(cell_exec_count)
        if isinstance(cell_exec_count, int):
            exec_count = max(exec_count, cell_exec_count)
        output_formats = ";".join(set(cell_output_formats(cell)))

        cell_processed = consts.C_OK
        if is_unknown_version:
            cell_processed = consts.C_UNKNOWN_VERSION

        try:
            source = cell["source"] = cell["source"] or ""
            if is_python and cell.get("cell_type") == "code":
                try:
                    source = shell.input_transformer_manager.transform_cell(source)
                except (IndentationError, SyntaxError) as err:
                    vprint(3, "Error on cell transformation: {}".format(err))
                    source = ""
                    status = consts.N_LOAD_SYNTAX_ERROR
                    cell_processed |= consts.C_SYNTAX_ERROR
                if "\0" in source:
                    vprint(3, "Found null byte in source. Replacing it by \\n")
                    source = source.replace("\0", "\n")

            cellrow = {
                "repository_id": repository_id,
                "notebook_id": None,
                "index": index,
                "cell_type": cell.get("cell_type", "<unknown>"),
                "execution_count": cell.get("execution_count"),
                "lines": cell["source"].count("\n") + 1,
                "output_formats": output_formats,
                "source": source,
                "python": is_python,
                "processed": cell_processed,
            }
            cells_info.append(cellrow)
            nbrow["total_cells"] += 1
            if cell.get("cell_type") == "code":
                nbrow["code_cells"] += 1
                if output_formats:
                    nbrow["code_cells_with_output"] += 1
            elif cell.get("cell_type") == "markdown":
                nbrow["markdown_cells"] += 1
            elif cell.get("cell_type") == "raw":
                nbrow["raw_cells"] += 1
            else:
                nbrow["unknown_cell_formats"] += 1
            if not cell["source"].strip():
                nbrow["empty_cells"] += 1
        except KeyError as err:
            vprint(3, "Error on cell extraction: {}".format(err))
            status = consts.N_LOAD_FORMAT_ERROR
    if nbrow["total_cells"] == 0:
        status = consts.N_LOAD_FORMAT_ERROR

    nbrow["max_execution_count"] = exec_count
    nbrow["processed"] = status
    return nbrow, cells_info
Ejemplo n.º 21
0
import json
import nbformat

with open("test_graph.ipynb", 'r') as f:
    nbjson = f.read()

print "validate", nbformat.validate(json.loads(nbjson))

node = nbformat.reads(nbjson, 4)

noded = nbformat.from_dict(json.loads(nbjson))
noded4 = nbformat.convert(noded, 4)



Ejemplo n.º 22
0
def render_page(nbname, config={}):

    # Combine base config with any provided overrides.
    config = dict(flask_app.base_config, **config)

    global runner

    if not nbmanager.notebook_exists(nbname):
        print "Notebook %s does not exist." % nbname
        flask.abort(404)

    print "Loading notebook %s" % nbname
    #nbmanager.trust_notebook(nbname)
    nb = nbmanager.get_notebook(nbname)

    if config['run']:
        print "Making runner..."''

        # This is an ugly little bit to deal with a sporadic
        #  'queue empty' bug in jupyter that only seems to
        #  happen on the integration servers...
        #  see https://github.com/paulgb/runipy/issues/36
        N_RUN_RETRIES = 4
        from Queue import Empty

        for i in range(N_RUN_RETRIES):
            try:
                if runner is None:
                    make_notebook_runner_thread.join()

                # Do as complete of a reset of the kernel as we can.
                # Unfortunately, this doesn't really do a 'hard' reset
                # of any modules...
                class ResetCell(dict):
                    """Simulates just enough of a notebook cell to get this
                    'reset cell' executed using the existing runipy
                     machinery."""
                    input = "get_ipython().reset(new_session=True)"
                runner.run_cell(ResetCell())
                runner.nb = nb
                print "Running notebook"
                runner.run_notebook(skip_exceptions=True)
                break
            except Empty as e:
                print "WARNING: Empty bug happened."
                if i >= (N_RUN_RETRIES - 1):
                    raise
        nb = runner.nb
    # else:
    #     nb = nb['content']
    print "Exporting notebook"
    exporter = HTMLExporter(
        config=Config({
            'HTMLExporter': {
                'template_file': config['template'],
                'template_path': ['.', os.path.join(os.path.split(__file__)[0], 'templates')]
            }
        })
    )
    output, resources = exporter.from_notebook_node(
        convert(nb, current_nbformat)
    )
    print "Returning."
    return output