예제 #1
0
파일: _notebook.py 프로젝트: podoc/podoc
 def write(self, ast, context=None):
     self.execution_count = 1
     self._md = MarkdownPlugin()
     # Add code cells in the AST.
     ccw = CodeCellWrapper()
     ast = ccw.wrap(ast)
     # Find the directory containing the notebook file.
     doc_path = (context or {}).get('path', None)
     if doc_path:
         self._dir_path = op.dirname(op.realpath(doc_path))
     else:
         logger.warn("No input path, unable to resolve the image relative paths.")
         self._dir_path = None
     # Create the notebook.
     # new_output, new_code_cell, new_markdown_cell
     # TODO: kernelspect
     nb = new_notebook()
     # Go through all top-level blocks.
     for index, node in enumerate(ast.children):
         # Determine the block type.
         if node.name == 'CodeCell':
             node_type = 'code'
         else:
             node_type = 'markdown'
         # Create the notebook cell.
         cell = getattr(self, 'new_{}_cell'.format(node_type))(node, index)
         # Add it to the notebook.
         nb.cells.append(cell)
     nbformat.validate(nb)
     return nb
예제 #2
0
def test_notebook_reader_hello():
    # Open a test notebook with just 1 Markdown cell.
    path = get_test_file_path('notebook', 'hello.ipynb')
    notebook = open_notebook(path)
    # Convert it to an AST.
    ast = NotebookReader().read(notebook)
    ast.show()
    # Check that the AST is equal to the one of a simple Mardown line.
    ast_1 = MarkdownPlugin().read('hello *world*')
    assert ast == ast_1
예제 #3
0
파일: _notebook.py 프로젝트: podoc/podoc
 def read_markdown(self, cell, cell_index=None):
     if self._markdown_tree:
         cell_tree = self._markdown_tree.pop(0)
         self.tree.children.extend(cell_tree.children)
     else:
         logger.warn("Isolated read_markdown() call: slow because of pandoc call overhead.")
         ast = MarkdownPlugin().read(cell.source)
         if not ast.children:
             logger.debug("Skipping empty node.")
             return
         self.tree.children.append(ast)  # pragma: no cover
예제 #4
0
파일: _notebook.py 프로젝트: podoc/podoc
 def _read_all_markdown(self, cells):
     sources = [cell.source for cell in cells if cell.cell_type == 'markdown']
     contents = ('\n\n%s\n\n' % self._NEW_CELL_DELIMITER).join(sources)
     ast = MarkdownPlugin().read(contents)
     if not ast.children:
         logger.debug("Skipping empty node.")
         return
     curtree = ASTNode('root')
     for child in ast.children:
         curtree.children.append(child)
         # Create a new tree at every cell delimiter.
         if child.children and child.children[0] == self._NEW_CELL_DELIMITER:
             # Remove the delimiter node.
             curtree.children.pop()
             # Append the current cell tree and create the next one.
             self._markdown_tree.append(curtree)
             curtree = ASTNode('root')
     # Append the last cell tree if not empty.
     if curtree.children:
         self._markdown_tree.append(curtree)
예제 #5
0
파일: _notebook.py 프로젝트: rossant/podoc
 def write(self, ast, resources=None):
     # Mapping {filename: data}.
     self.resources = resources or {}
     self.execution_count = 1
     self._md = MarkdownPlugin()
     # Add code cells in the AST.
     ast = wrap_code_cells(ast)
     # Create the notebook.
     # new_output, new_code_cell, new_markdown_cell
     nb = new_notebook()
     # Go through all top-level blocks.
     for index, node in enumerate(ast.children):
         # Determine the block type.
         if node.name == 'CodeCell':
             node_type = 'code'
         else:
             node_type = 'markdown'
         # Create the notebook cell.
         cell = getattr(self, 'new_{}_cell'.format(node_type))(node, index)
         # Add it to the notebook.
         nb.cells.append(cell)
     nbformat.validate(nb)
     return nb
예제 #6
0
def test_notebook_reader_notebook():
    # Open a test notebook with a code cell.
    path = get_test_file_path('notebook', 'simplenb.ipynb')
    notebook = open_notebook(path)
    # Convert it to an AST.
    reader = NotebookReader()
    ast = reader.read(notebook)
    ast.show()

    # Compare with the markdown version.
    path = get_test_file_path('markdown', 'simplenb.md')
    markdown_expected = load_text(path)
    markdown_converted = MarkdownPlugin().write(ast)
    markdown_converted = re.sub(r'\{resource:([^\}]+)\}', r'simplenb_files/\1',
                                markdown_converted)
    # The test file has a trailing new line, but not the AST.
    markdown_converted += '\n'
    # Replace the image filename because the conversion is done without output path.
    markdown_expected = markdown_expected.replace('simplenb_4_1.png',
                                                  'output_4_1.png')
    assert markdown_converted == markdown_expected

    assert 'output_4_1.png' in reader.resources
예제 #7
0
파일: _notebook.py 프로젝트: rossant/podoc
class NotebookWriter(object):
    def write(self, ast, resources=None):
        # Mapping {filename: data}.
        self.resources = resources or {}
        self.execution_count = 1
        self._md = MarkdownPlugin()
        # Add code cells in the AST.
        ast = wrap_code_cells(ast)
        # Create the notebook.
        # new_output, new_code_cell, new_markdown_cell
        nb = new_notebook()
        # Go through all top-level blocks.
        for index, node in enumerate(ast.children):
            # Determine the block type.
            if node.name == 'CodeCell':
                node_type = 'code'
            else:
                node_type = 'markdown'
            # Create the notebook cell.
            cell = getattr(self, 'new_{}_cell'.format(node_type))(node, index)
            # Add it to the notebook.
            nb.cells.append(cell)
        nbformat.validate(nb)
        return nb

    def new_markdown_cell(self, node, index=None):
        return new_markdown_cell(self._md.write(node))

    def _get_b64_resource(self, fn):
        """Return the base64 of a resource from its filename.

        The mapping `resources={fn: data}` needs to be passed to the `write()`
        method.

        """
        data = self.resources.get(fn, None)
        if not data:  # pragma: no cover
            logger.warn("Resource `%s` couldn't be found.", fn)
            return ''
        out = base64.b64encode(data).decode('utf8')
        # NOTE: split the output in multiple lines of 76 characters,
        # to make easier the comparison with actual Jupyter Notebook files.
        N = 76
        out = '\n'.join([out[i:i + N] for i in range(0, len(out), N)]) + '\n'
        return out

    def new_code_cell(self, node, index=None):
        # Get the code cell input: the first child of the CodeCell block.
        input_block = node.children[0]
        assert input_block.name == 'CodeBlock'
        cell = new_code_cell(input_block.children[0],
                             execution_count=self.execution_count,
                             )
        # Next we need to add the outputs: the next children in the CodeCell.
        for child in node.children[1:]:
            # Outputs can be code blocks or Markdown paragraphs containing
            # an image.
            if child.name == 'CodeBlock':
                # The output is a code block.
                # What is the output's type? It depends on the code block's
                # name. It can be: `stdout`, `stderr`, `result`.
                output_type = child.lang or 'result'
                assert output_type in ('stdout', 'stderr', 'result')
                contents = child.children[0]
                # NOTE: append new lines at the end of every line in stdout
                # and stderr contents, to match with the Jupyter Notebook.
                if output_type != 'result':
                    contents = _append_newlines(contents)
                if output_type == 'result':
                    kwargs = dict(execution_count=self.execution_count,
                                  data={'text/plain': contents})
                    # Output type to pass to nbformat.
                    output_type = 'execute_result'
                elif output_type in ('stdout', 'stderr'):
                    # Standard output or error.
                    kwargs = dict(text=contents, name=output_type)
                    # Output type to pass to nbformat.
                    output_type = 'stream'
            elif child.name == 'Para':
                img = child.children[0]
                assert img.name == 'Image'
                fn = img.url
                caption = self._md.write(img.children[0])
                output_type = 'display_data'
                data = {}  # Dictionary {mimetype: data_buffer}.
                # Infer the mime type of the file, from its filename and
                # extension.
                mime_type = guess_type(fn)[0]
                assert mime_type  # unknown extension: this shouldn't happen!
                data[mime_type] = self._get_b64_resource(fn)
                assert data[mime_type]  # TODO
                data['text/plain'] = caption
                kwargs = dict(data=data)
            output = new_output(output_type, **kwargs)
            cell.outputs.append(output)
        self.execution_count += 1
        return cell

    def new_raw_cell(self, node, index=None):
        # TODO
        pass
예제 #8
0
파일: _notebook.py 프로젝트: podoc/podoc
class NotebookWriter(object):
    def write(self, ast, context=None):
        self.execution_count = 1
        self._md = MarkdownPlugin()
        # Add code cells in the AST.
        ccw = CodeCellWrapper()
        ast = ccw.wrap(ast)
        # Find the directory containing the notebook file.
        doc_path = (context or {}).get('path', None)
        if doc_path:
            self._dir_path = op.dirname(op.realpath(doc_path))
        else:
            logger.warn("No input path, unable to resolve the image relative paths.")
            self._dir_path = None
        # Create the notebook.
        # new_output, new_code_cell, new_markdown_cell
        # TODO: kernelspect
        nb = new_notebook()
        # Go through all top-level blocks.
        for index, node in enumerate(ast.children):
            # Determine the block type.
            if node.name == 'CodeCell':
                node_type = 'code'
            else:
                node_type = 'markdown'
            # Create the notebook cell.
            cell = getattr(self, 'new_{}_cell'.format(node_type))(node, index)
            # Add it to the notebook.
            nb.cells.append(cell)
        nbformat.validate(nb)
        return nb

    def new_markdown_cell(self, node, index=None):
        return new_markdown_cell(self._md.write(node))

    def new_code_cell(self, node, index=None):
        # Get the code cell input: the first child of the CodeCell block.
        input_block = node.children[0]
        assert input_block.name == 'CodeBlock'
        cell = new_code_cell(input_block.children[0],
                             execution_count=self.execution_count,
                             )
        # Next we need to add the outputs: the next children in the CodeCell.
        for child in node.children[1:]:
            # Outputs can be code blocks or Markdown paragraphs containing
            # an image.
            if child.name == 'CodeBlock':
                # The output is a code block.
                # What is the output's type? It depends on the code block's
                # name. It can be: `stdout`, `stderr`, `result`.
                output_type = child.lang or '{output:result}'
                assert output_type.startswith('{output')
                contents = child.children[0]
                # NOTE: append new lines at the end of every line in stdout
                # and stderr contents, to match with the Jupyter Notebook.
                if output_type != '{output:result}':
                    contents = _append_newlines(contents)
                if output_type == '{output:result}':
                    kwargs = dict(execution_count=self.execution_count,
                                  data={'text/plain': contents})
                    # Output type to pass to nbformat.
                    output_type = 'execute_result'
                elif output_type in ('{output:stdout}', '{output:stderr}'):
                    # Standard output or error.
                    # NOTE: strip {output } and only keep stdout/stderr in name.
                    kwargs = dict(text=contents, name=output_type[8:-1])
                    # Output type to pass to nbformat.
                    output_type = 'stream'
            elif child.name == 'Para':
                img = child.children[0]
                assert img.name == 'Image'
                fn = img.url
                caption = self._md.write(img.children[0])
                output_type = 'display_data'
                data = {}  # Dictionary {mimetype: data_buffer}.
                # Infer the mime type of the file, from its filename and
                # extension.
                mime_type = guess_type(fn)[0]
                assert mime_type  # unknown extension: this shouldn't happen!
                # Get the resource data.
                if self._dir_path:
                    image_path = op.join(self._dir_path, fn)
                # The image path could be absolute.
                elif op.isabs(fn):
                    image_path = fn
                else:  # pragma: no cover
                    image_path = None
                # If the image path exists, open it.
                if image_path and op.exists(image_path):
                    with open(image_path, 'rb') as f:
                        data[mime_type] = _get_b64_resource(f.read())
                else:  # pragma: no cover
                    logger.debug("File `%s` doesn't exist.", image_path)
                # Save the caption in the output text.
                data['text/plain'] = caption
                # Save the caption in the cell metadata too, so that it is not lost when
                # executing the notebook.
                if 'podoc' not in cell.metadata:
                    cell.metadata['podoc'] = {}
                cell.metadata['podoc'].update({'output_text': caption})
                kwargs = dict(data=data)
            assert not output_type.startswith('{output')
            output = new_output(output_type, **kwargs)
            cell.outputs.append(output)
        self.execution_count += 1
        return cell

    def new_raw_cell(self, node, index=None):
        # TODO
        pass