コード例 #1
0
def test_notebook_empty():
    ast = ASTNode('root')
    ast.add_child(ASTNode('Para'))
    nb = NotebookWriter().write(ast)
    reader = NotebookReader()
    ast = reader.read(nb)
    assert not ast.children
コード例 #2
0
ファイル: _markdown.py プロジェクト: pombredanne/podoc
    def transform(self, obj):
        if isinstance(obj, string_types):
            return obj
        # obj is a CommonMark.Node instance.
        name = self.get_node_name(obj)

        # The transform_* functions take the 'c' attribute and the newly-
        # created node, and return the list of children objects to process.
        if name == 'List':
            # Special treatment for lists. In CommonMark, there is a single
            # node type, List, and the type (Bullet or Ordered) is found
            # in list_data['type']
            # The name is BulletList or OrderedList.
            name = obj.list_data['type'] + 'List'
            func = (self.transform_BulletList
                    if obj.list_data['type'] == 'Bullet'
                    else self.transform_OrderedList)
        else:
            func = getattr(self, 'transform_%s' % name, self.transform_Node)

        node = ASTNode(name)
        out = func(obj, node)
        # NOTE: if the function returns a node, we directly return it
        # instead of assuming the output is a list of children.
        if isinstance(out, ASTNode):
            return out
        # We directly return a string output.
        elif isinstance(out, string_types):
            return out
        # Otherwise, the output is a list of non-processed children.
        children = out
        assert isinstance(children, list)
        # Recursively transform all children and assign them to the node.
        node.children = [self.transform(child) for child in children]
        return node
コード例 #3
0
ファイル: _notebook.py プロジェクト: pombredanne/podoc
 def read_code(self, cell, cell_index=None):
     node = ASTNode('CodeCell')
     # The first child is the source.
     # NOTE: the language of the code block is the notebook's language.
     node.add_child(ASTNode('CodeBlock',
                            lang=self.language,
                            children=[cell.source]))
     # Then, we add one extra child per output.
     for output_index, output in enumerate(cell.get('outputs', [])):
         if output.output_type == 'stream':
             child = ASTNode('CodeBlock', lang=output.name,  # stdout/stderr
                             children=[output.text])
         elif output.output_type in ('display_data', 'execute_result'):
             # Output text node.
             text = output.data.get('text/plain', 'Output')
             # Extract image output, if any.
             out = extract_output(output)
             if out is None:
                 child = ASTNode('CodeBlock', lang='result',
                                 children=[text])
             else:
                 mime_type, data = out
                 fn = output_filename(mime_type=mime_type,
                                      cell_index=cell_index,
                                      output_index=output_index,
                                      unique_key=None,  # TODO
                                      )
                 self.resources[fn] = data
                 child = ASTNode('Image', url=fn, children=[text])
         node.add_child(child)
     self.tree.children.append(node)
コード例 #4
0
ファイル: _markdown.py プロジェクト: ellisonbg/podoc
    def transform(self, obj):
        if isinstance(obj, string_types):
            return obj
        # obj is a CommonMark.Node instance.
        name = self.get_node_name(obj)

        # The transform_* functions take the 'c' attribute and the newly-
        # created node, and return the list of children objects to process.
        if name == 'List':
            # Special treatment for lists. In CommonMark, there is a single
            # node type, List, and the type (Bullet or Ordered) is found
            # in list_data['type']
            # The name is BulletList or OrderedList.
            name = obj.list_data['type'] + 'List'
            func = (self.transform_BulletList
                    if obj.list_data['type'] == 'Bullet'
                    else self.transform_OrderedList)
        else:
            func = getattr(self, 'transform_%s' % name, self.transform_Node)

        node = ASTNode(name)
        out = func(obj, node)
        # NOTE: if the function returns a node, we directly return it
        # instead of assuming the output is a list of children.
        if isinstance(out, ASTNode):
            return out
        # We directly return a string output.
        elif isinstance(out, string_types):
            return out
        # Otherwise, the output is a list of non-processed children.
        children = out
        assert isinstance(children, list)
        # Recursively transform all children and assign them to the node.
        node.children = [self.transform(child) for child in children]
        return node
コード例 #5
0
ファイル: _notebook.py プロジェクト: podoc/podoc
class CodeCellWrapper(object):
    def infer_language(self, ast):
        """Return the most common CodeBlock language: it is supposed to be the
        notebook's language."""
        mc = Counter([_get_cell_lang(node) for node in ast.children
                      if node.name in ('CodeBlock', 'CodeCell')]).most_common(1)
        return mc[0][0] if mc else 'python'

    def wrap(self, ast):
        self.ast = ast.copy()
        self.ast.children = []
        self._code_cell = None
        # Infer the notebook's language.
        self.language = self.infer_language(ast)
        for i, node in enumerate(ast.children):
            if self._code_cell:
                if self.is_output(node) or self.is_image(node):
                    self.add_output(node)
                else:
                    self.end_code_cell()
            if not self._code_cell:
                if self.is_source(node):
                    self.start_code_cell(node)
                else:
                    self.append(node)
        # Ensure the last cell is appended.
        self.end_code_cell()
        return self.ast

    def is_output(self, node):
        return ((node.name == 'CodeBlock') and
                (node.lang in (None, '') or node.lang.startswith('{output')))

    def is_image(self, node):
        children = node.children
        return ((node.name == 'Para') and
                (len(children) == 1) and
                (isinstance(children[0], ASTNode)) and
                (children[0].name == 'Image'))

    def is_source(self, node):
        return node.name == 'CodeBlock' and node.lang == self.language

    def start_code_cell(self, node):
        self._code_cell = ASTNode('CodeCell')
        # Source CodeBlock.
        self._code_cell.add_child(node)

    def add_output(self, node):
        self._code_cell.add_child(node)

    def end_code_cell(self):
        if self._code_cell:
            self.append(self._code_cell)
            self._code_cell = None

    def append(self, node):
        self.ast.add_child(node)
コード例 #6
0
def test_wrap_code_cells_1():
    # Test wrap_code_cells() with a single code cell.
    ast = ASTNode('root')
    ast.add_child(ASTNode('CodeBlock', lang='python', children=['']))

    ast_wrapped = wrap_code_cells(ast)
    ast_wrapped.show()

    ast_expected = ASTNode('root')
    ast_expected.add_child(ASTNode('CodeCell', children=[ast.children[0]]))

    assert ast_wrapped == ast_expected
コード例 #7
0
ファイル: _notebook.py プロジェクト: rossant/podoc
class CodeCellWrapper(object):
    def wrap(self, ast):
        self.ast = ast.copy()
        self.ast.children = []
        self._code_cell = None
        for i, node in enumerate(ast.children):
            if self._code_cell:
                if self.is_output(node) or self.is_image(node):
                    self.add_output(node)
                else:
                    self.end_code_cell()
            if not self._code_cell:
                if self.is_source(node):
                    self.start_code_cell(node)
                else:
                    self.append(node)
        # Ensure the last cell is appended.
        self.end_code_cell()
        return self.ast

    def is_output(self, node):
        return ((node.name == 'CodeBlock') and
                (node.lang in (None, '', 'stdout', 'stderr', 'result')))

    def is_image(self, node):
        children = node.children
        return ((node.name == 'Para') and
                (len(children) == 1) and
                (isinstance(children[0], ASTNode)) and
                (children[0].name == 'Image'))

    def is_source(self, node):
        # TODO: customizable lang
        return node.name == 'CodeBlock' and node.lang == 'python'

    def start_code_cell(self, node):
        self._code_cell = ASTNode('CodeCell')
        # Source CodeBlock.
        self._code_cell.add_child(node)

    def add_output(self, node):
        self._code_cell.add_child(node)

    def end_code_cell(self):
        if self._code_cell:
            self.append(self._code_cell)
            self._code_cell = None

    def append(self, node):
        self.ast.add_child(node)
コード例 #8
0
ファイル: test_markdown.py プロジェクト: pombredanne/podoc
def ast():
    # TODO: move this to conftest
    ast = ASTNode('root')

    # First block
    block = ASTNode(name='Para',
                    children=['hello '])
    inline = ASTNode(name='Emph')
    inline.add_child('world')
    block.add_child(inline)
    ast.add_child(block)
    return ast
コード例 #9
0
ファイル: _notebook.py プロジェクト: podoc/podoc
    def read(self, notebook, context=None):
        assert isinstance(notebook, nbformat.NotebookNode)
        self.resources = {}  # Dictionary {filename: data}.
        context = context or {}
        # Get the unique key for image names: basename of the output file, if it exists.
        self._unique_key = op.basename(context.get('output', None) or '')
        self._unique_key = self._unique_key or op.basename(context.get('path', None) or '')
        self._unique_key = op.splitext(self._unique_key)[0] or None
        # Create the output tree.
        self.tree = ASTNode('root')
        # Language of the notebook.
        m = notebook.metadata
        # NOTE: if no language is available in the metadata, use Python
        # by default.
        self.language = m.get('language_info', {}).get('name', 'python')

        # NOTE: for performance reasons, we parse the Markdown of all cells at once
        # to reduce the overhead of calling pandoc.
        self._markdown_tree = []
        self._read_all_markdown(notebook.cells)

        for cell_index, cell in enumerate(notebook.cells):
            getattr(self, 'read_{}'.format(cell.cell_type))(cell, cell_index)

        return self.tree
コード例 #10
0
ファイル: _markdown.py プロジェクト: ellisonbg/podoc
 def transform_CodeBlock(self, obj, node):
     node.lang = obj.info
     contents = obj.literal
     # Detect math block elements.
     if node.lang == 'math':
         node.name = 'Para'
         node.children = [ASTNode('MathBlock',
                                  children=[contents.strip()])]
         return node
     return [contents]
コード例 #11
0
ファイル: _notebook.py プロジェクト: pombredanne/podoc
def wrap_code_cells(ast):
    """Take an AST and wrap top-level CodeBlocks within CodeCells."""
    out = ast.copy()
    out.children = []
    current_cell = None
    for i, child in enumerate(ast.children):
        # Notebook code cell.
        if child.name == 'CodeBlock' and child.lang == 'python':
            current_cell = ASTNode('CodeCell')
            # TODO: parameterizable language
            # Wrap CodeBlocks within CodeCells.
            current_cell.add_child(child)
        else:
            # Decide whether we're part of the current cell.
            name = child.name
            children = child.children
            # Case 1: we're a code block with a notebook-specific language.
            is_output = ((name == 'CodeBlock') and
                         (child.lang in (None, '', 'stdout',
                                         'stderr', 'result')))
            # Case 2: we're just an image.
            is_image = ((name == 'Para') and
                        (len(children) == 1) and
                        (isinstance(children[0], ASTNode)) and
                        (children[0].name == 'Image'))
            if current_cell:
                if is_output or is_image:
                    # Add the current block to the cell's outputs.
                    current_cell.add_child(child)
                else:
                    # We're no longer part of the current cell.
                    # First, we add the cell that has just finished.
                    out.add_child(current_cell)
                    # Then, we add the current block.
                    out.add_child(child)
                    current_cell = None
            else:
                out.add_child(child)
    # Add the last current cell (if it had no output).
    if current_cell:
        out.add_child(current_cell)
    return out
コード例 #12
0
ファイル: _notebook.py プロジェクト: podoc/podoc
 def _read_all_markdown(self, cells):
     sources = [cell.source for cell in cells if cell.cell_type == 'markdown']
     contents = ('\n\n%s\n\n' % self._NEW_CELL_DELIMITER).join(sources)
     ast = MarkdownPlugin().read(contents)
     if not ast.children:
         logger.debug("Skipping empty node.")
         return
     curtree = ASTNode('root')
     for child in ast.children:
         curtree.children.append(child)
         # Create a new tree at every cell delimiter.
         if child.children and child.children[0] == self._NEW_CELL_DELIMITER:
             # Remove the delimiter node.
             curtree.children.pop()
             # Append the current cell tree and create the next one.
             self._markdown_tree.append(curtree)
             curtree = ASTNode('root')
     # Append the last cell tree if not empty.
     if curtree.children:
         self._markdown_tree.append(curtree)
コード例 #13
0
ファイル: test_notebook.py プロジェクト: rossant/podoc
def test_wrap_code_cells_1():
    # Test wrap_code_cells() with a single code cell.
    ast = ASTNode('root')
    ast.add_child(ASTNode('CodeBlock', lang='python', children=['']))

    ast_wrapped = wrap_code_cells(ast)
    ast_wrapped.show()

    ast_expected = ASTNode('root')
    ast_expected.add_child(ASTNode('CodeCell', children=[ast.children[0]]))

    assert_equal(ast_wrapped, ast_expected)
コード例 #14
0
ファイル: _notebook.py プロジェクト: rossant/podoc
 def start_code_cell(self, node):
     self._code_cell = ASTNode('CodeCell')
     # Source CodeBlock.
     self._code_cell.add_child(node)
コード例 #15
0
ファイル: _notebook.py プロジェクト: podoc/podoc
 def read_code(self, cell, cell_index=None):
     node = ASTNode('CodeCell')
     # TODO: improve this.
     node._visit_meta['is_block'] = True
     # The first child is the source.
     # NOTE: the language of the code block is the notebook's language.
     node.add_child(ASTNode('CodeBlock',
                            lang=self.language,
                            children=[cell.source.rstrip()]))
     # Then, we add one extra child per output.
     for output_index, output in enumerate(cell.get('outputs', [])):
         if output.output_type == 'stream':
             child = ASTNode('CodeBlock',
                             lang='{output:' + output.name + '}',  # stdout/stderr
                             children=[_remove_ansi(output.text.rstrip())])
         elif output.output_type == 'error':  # pragma: no cover
             child = ASTNode('CodeBlock',
                             lang='{output:error}',
                             children=[_remove_ansi('\n'.join(output.traceback))])
         elif output.output_type in ('display_data', 'execute_result'):
             # Output text node.
             # Take it from cell metadata first, otherwise from the cell's output text.
             text = cell.metadata.get('podoc', {}).get('output_text', None)
             text = text or output.data.get('text/plain', 'Output')
             # Remove color codes.
             text = _remove_ansi(text)
             # Extract image output, if any.
             out = extract_image(output)
             if out is None:
                 out = extract_table(output)
             if out is None:
                 child = ASTNode('CodeBlock',
                                 lang='{output:result}',
                                 children=[text])
             else:
                 mime_type, data = out
                 fn = output_filename(mime_type=mime_type,
                                      cell_index=cell_index,
                                      output_index=output_index,
                                      unique_key=self._unique_key,
                                      )
                 self.resources[fn] = data
                 # Prevent multi-line image legend.
                 if '\n' in text:  # pragma: no cover
                     text = 'Output'
                 # Wrap the Image node in a Para.
                 img_child = ASTNode('Image', url='{resource:%s}' % fn,
                                     children=[text])
                 child = ASTNode('Para', children=[img_child])
         else:  # pragma: no cover
             raise ValueError("Unknown output type `%s`." % output.output_type)
         node.add_child(child)
     self.tree.children.append(node)
コード例 #16
0
ファイル: test_notebook.py プロジェクト: rossant/podoc
def test_wrap_code_cells_2():
    # Test wrap_code_cells() with two code cells.
    ast = ASTNode('root')

    cb0 = ASTNode('CodeBlock', lang='python', children=['a'])
    cb1 = ASTNode('CodeBlock', lang='python', children=['b'])

    ast.add_child(cb0)
    ast.add_child(cb1)

    ast.show()
    ast_wrapped = wrap_code_cells(ast)
    ast_wrapped.show()

    ast_expected = ASTNode('root')

    # First code cell.
    code_cell0 = ASTNode('CodeCell')
    code_cell0.add_child(cb0)
    ast_expected.add_child(code_cell0)

    # Second code cell.
    code_cell1 = ASTNode('CodeCell')
    code_cell1.add_child(cb1)
    ast_expected.add_child(code_cell1)
    ast_expected.show()

    assert_equal(ast_wrapped, ast_expected)
コード例 #17
0
ファイル: _notebook.py プロジェクト: podoc/podoc
 def start_code_cell(self, node):
     self._code_cell = ASTNode('CodeCell')
     # Source CodeBlock.
     self._code_cell.add_child(node)
コード例 #18
0
def ast():
    # TODO: move this to conftest
    ast = ASTNode('root')

    # First block
    block = ASTNode(name='Para', children=['hello '])
    inline = ASTNode(name='Emph')
    inline.add_child('world')
    block.add_child(inline)
    ast.add_child(block)
    return ast
コード例 #19
0
def test_wrap_code_cells_2():
    # Test wrap_code_cells() with two code cells.
    ast = ASTNode('root')

    cb0 = ASTNode('CodeBlock', lang='python', children=['a'])
    cb1 = ASTNode('CodeBlock', lang='python', children=['b'])

    ast.add_child(cb0)
    ast.add_child(cb1)

    ast.show()
    ast_wrapped = wrap_code_cells(ast)
    ast_wrapped.show()

    ast_expected = ASTNode('root')

    # First code cell.
    code_cell0 = ASTNode('CodeCell')
    code_cell0.add_child(cb0)
    ast_expected.add_child(code_cell0)

    # Second code cell.
    code_cell1 = ASTNode('CodeCell')
    code_cell1.add_child(cb1)
    ast_expected.add_child(code_cell1)
    ast_expected.show()

    assert ast_wrapped == ast_expected
コード例 #20
0
ファイル: _markdown.py プロジェクト: ellisonbg/podoc
 def transform_main(self, cm):
     # TODO: should be def transform() for consistency with the other way
     children = [self.transform(block)
                 for block in self.get_node_children(cm)]
     return ASTNode('root', children=children)