def test_notebook_empty(): ast = ASTNode('root') ast.add_child(ASTNode('Para')) nb = NotebookWriter().write(ast) reader = NotebookReader() ast = reader.read(nb) assert not ast.children
def transform(self, obj): if isinstance(obj, string_types): return obj # obj is a CommonMark.Node instance. name = self.get_node_name(obj) # The transform_* functions take the 'c' attribute and the newly- # created node, and return the list of children objects to process. if name == 'List': # Special treatment for lists. In CommonMark, there is a single # node type, List, and the type (Bullet or Ordered) is found # in list_data['type'] # The name is BulletList or OrderedList. name = obj.list_data['type'] + 'List' func = (self.transform_BulletList if obj.list_data['type'] == 'Bullet' else self.transform_OrderedList) else: func = getattr(self, 'transform_%s' % name, self.transform_Node) node = ASTNode(name) out = func(obj, node) # NOTE: if the function returns a node, we directly return it # instead of assuming the output is a list of children. if isinstance(out, ASTNode): return out # We directly return a string output. elif isinstance(out, string_types): return out # Otherwise, the output is a list of non-processed children. children = out assert isinstance(children, list) # Recursively transform all children and assign them to the node. node.children = [self.transform(child) for child in children] return node
def read_code(self, cell, cell_index=None): node = ASTNode('CodeCell') # The first child is the source. # NOTE: the language of the code block is the notebook's language. node.add_child(ASTNode('CodeBlock', lang=self.language, children=[cell.source])) # Then, we add one extra child per output. for output_index, output in enumerate(cell.get('outputs', [])): if output.output_type == 'stream': child = ASTNode('CodeBlock', lang=output.name, # stdout/stderr children=[output.text]) elif output.output_type in ('display_data', 'execute_result'): # Output text node. text = output.data.get('text/plain', 'Output') # Extract image output, if any. out = extract_output(output) if out is None: child = ASTNode('CodeBlock', lang='result', children=[text]) else: mime_type, data = out fn = output_filename(mime_type=mime_type, cell_index=cell_index, output_index=output_index, unique_key=None, # TODO ) self.resources[fn] = data child = ASTNode('Image', url=fn, children=[text]) node.add_child(child) self.tree.children.append(node)
class CodeCellWrapper(object): def infer_language(self, ast): """Return the most common CodeBlock language: it is supposed to be the notebook's language.""" mc = Counter([_get_cell_lang(node) for node in ast.children if node.name in ('CodeBlock', 'CodeCell')]).most_common(1) return mc[0][0] if mc else 'python' def wrap(self, ast): self.ast = ast.copy() self.ast.children = [] self._code_cell = None # Infer the notebook's language. self.language = self.infer_language(ast) for i, node in enumerate(ast.children): if self._code_cell: if self.is_output(node) or self.is_image(node): self.add_output(node) else: self.end_code_cell() if not self._code_cell: if self.is_source(node): self.start_code_cell(node) else: self.append(node) # Ensure the last cell is appended. self.end_code_cell() return self.ast def is_output(self, node): return ((node.name == 'CodeBlock') and (node.lang in (None, '') or node.lang.startswith('{output'))) def is_image(self, node): children = node.children return ((node.name == 'Para') and (len(children) == 1) and (isinstance(children[0], ASTNode)) and (children[0].name == 'Image')) def is_source(self, node): return node.name == 'CodeBlock' and node.lang == self.language def start_code_cell(self, node): self._code_cell = ASTNode('CodeCell') # Source CodeBlock. self._code_cell.add_child(node) def add_output(self, node): self._code_cell.add_child(node) def end_code_cell(self): if self._code_cell: self.append(self._code_cell) self._code_cell = None def append(self, node): self.ast.add_child(node)
def test_wrap_code_cells_1(): # Test wrap_code_cells() with a single code cell. ast = ASTNode('root') ast.add_child(ASTNode('CodeBlock', lang='python', children=[''])) ast_wrapped = wrap_code_cells(ast) ast_wrapped.show() ast_expected = ASTNode('root') ast_expected.add_child(ASTNode('CodeCell', children=[ast.children[0]])) assert ast_wrapped == ast_expected
class CodeCellWrapper(object): def wrap(self, ast): self.ast = ast.copy() self.ast.children = [] self._code_cell = None for i, node in enumerate(ast.children): if self._code_cell: if self.is_output(node) or self.is_image(node): self.add_output(node) else: self.end_code_cell() if not self._code_cell: if self.is_source(node): self.start_code_cell(node) else: self.append(node) # Ensure the last cell is appended. self.end_code_cell() return self.ast def is_output(self, node): return ((node.name == 'CodeBlock') and (node.lang in (None, '', 'stdout', 'stderr', 'result'))) def is_image(self, node): children = node.children return ((node.name == 'Para') and (len(children) == 1) and (isinstance(children[0], ASTNode)) and (children[0].name == 'Image')) def is_source(self, node): # TODO: customizable lang return node.name == 'CodeBlock' and node.lang == 'python' def start_code_cell(self, node): self._code_cell = ASTNode('CodeCell') # Source CodeBlock. self._code_cell.add_child(node) def add_output(self, node): self._code_cell.add_child(node) def end_code_cell(self): if self._code_cell: self.append(self._code_cell) self._code_cell = None def append(self, node): self.ast.add_child(node)
def ast(): # TODO: move this to conftest ast = ASTNode('root') # First block block = ASTNode(name='Para', children=['hello ']) inline = ASTNode(name='Emph') inline.add_child('world') block.add_child(inline) ast.add_child(block) return ast
def read(self, notebook, context=None): assert isinstance(notebook, nbformat.NotebookNode) self.resources = {} # Dictionary {filename: data}. context = context or {} # Get the unique key for image names: basename of the output file, if it exists. self._unique_key = op.basename(context.get('output', None) or '') self._unique_key = self._unique_key or op.basename(context.get('path', None) or '') self._unique_key = op.splitext(self._unique_key)[0] or None # Create the output tree. self.tree = ASTNode('root') # Language of the notebook. m = notebook.metadata # NOTE: if no language is available in the metadata, use Python # by default. self.language = m.get('language_info', {}).get('name', 'python') # NOTE: for performance reasons, we parse the Markdown of all cells at once # to reduce the overhead of calling pandoc. self._markdown_tree = [] self._read_all_markdown(notebook.cells) for cell_index, cell in enumerate(notebook.cells): getattr(self, 'read_{}'.format(cell.cell_type))(cell, cell_index) return self.tree
def transform_CodeBlock(self, obj, node): node.lang = obj.info contents = obj.literal # Detect math block elements. if node.lang == 'math': node.name = 'Para' node.children = [ASTNode('MathBlock', children=[contents.strip()])] return node return [contents]
def wrap_code_cells(ast): """Take an AST and wrap top-level CodeBlocks within CodeCells.""" out = ast.copy() out.children = [] current_cell = None for i, child in enumerate(ast.children): # Notebook code cell. if child.name == 'CodeBlock' and child.lang == 'python': current_cell = ASTNode('CodeCell') # TODO: parameterizable language # Wrap CodeBlocks within CodeCells. current_cell.add_child(child) else: # Decide whether we're part of the current cell. name = child.name children = child.children # Case 1: we're a code block with a notebook-specific language. is_output = ((name == 'CodeBlock') and (child.lang in (None, '', 'stdout', 'stderr', 'result'))) # Case 2: we're just an image. is_image = ((name == 'Para') and (len(children) == 1) and (isinstance(children[0], ASTNode)) and (children[0].name == 'Image')) if current_cell: if is_output or is_image: # Add the current block to the cell's outputs. current_cell.add_child(child) else: # We're no longer part of the current cell. # First, we add the cell that has just finished. out.add_child(current_cell) # Then, we add the current block. out.add_child(child) current_cell = None else: out.add_child(child) # Add the last current cell (if it had no output). if current_cell: out.add_child(current_cell) return out
def _read_all_markdown(self, cells): sources = [cell.source for cell in cells if cell.cell_type == 'markdown'] contents = ('\n\n%s\n\n' % self._NEW_CELL_DELIMITER).join(sources) ast = MarkdownPlugin().read(contents) if not ast.children: logger.debug("Skipping empty node.") return curtree = ASTNode('root') for child in ast.children: curtree.children.append(child) # Create a new tree at every cell delimiter. if child.children and child.children[0] == self._NEW_CELL_DELIMITER: # Remove the delimiter node. curtree.children.pop() # Append the current cell tree and create the next one. self._markdown_tree.append(curtree) curtree = ASTNode('root') # Append the last cell tree if not empty. if curtree.children: self._markdown_tree.append(curtree)
def test_wrap_code_cells_1(): # Test wrap_code_cells() with a single code cell. ast = ASTNode('root') ast.add_child(ASTNode('CodeBlock', lang='python', children=[''])) ast_wrapped = wrap_code_cells(ast) ast_wrapped.show() ast_expected = ASTNode('root') ast_expected.add_child(ASTNode('CodeCell', children=[ast.children[0]])) assert_equal(ast_wrapped, ast_expected)
def start_code_cell(self, node): self._code_cell = ASTNode('CodeCell') # Source CodeBlock. self._code_cell.add_child(node)
def read_code(self, cell, cell_index=None): node = ASTNode('CodeCell') # TODO: improve this. node._visit_meta['is_block'] = True # The first child is the source. # NOTE: the language of the code block is the notebook's language. node.add_child(ASTNode('CodeBlock', lang=self.language, children=[cell.source.rstrip()])) # Then, we add one extra child per output. for output_index, output in enumerate(cell.get('outputs', [])): if output.output_type == 'stream': child = ASTNode('CodeBlock', lang='{output:' + output.name + '}', # stdout/stderr children=[_remove_ansi(output.text.rstrip())]) elif output.output_type == 'error': # pragma: no cover child = ASTNode('CodeBlock', lang='{output:error}', children=[_remove_ansi('\n'.join(output.traceback))]) elif output.output_type in ('display_data', 'execute_result'): # Output text node. # Take it from cell metadata first, otherwise from the cell's output text. text = cell.metadata.get('podoc', {}).get('output_text', None) text = text or output.data.get('text/plain', 'Output') # Remove color codes. text = _remove_ansi(text) # Extract image output, if any. out = extract_image(output) if out is None: out = extract_table(output) if out is None: child = ASTNode('CodeBlock', lang='{output:result}', children=[text]) else: mime_type, data = out fn = output_filename(mime_type=mime_type, cell_index=cell_index, output_index=output_index, unique_key=self._unique_key, ) self.resources[fn] = data # Prevent multi-line image legend. if '\n' in text: # pragma: no cover text = 'Output' # Wrap the Image node in a Para. img_child = ASTNode('Image', url='{resource:%s}' % fn, children=[text]) child = ASTNode('Para', children=[img_child]) else: # pragma: no cover raise ValueError("Unknown output type `%s`." % output.output_type) node.add_child(child) self.tree.children.append(node)
def test_wrap_code_cells_2(): # Test wrap_code_cells() with two code cells. ast = ASTNode('root') cb0 = ASTNode('CodeBlock', lang='python', children=['a']) cb1 = ASTNode('CodeBlock', lang='python', children=['b']) ast.add_child(cb0) ast.add_child(cb1) ast.show() ast_wrapped = wrap_code_cells(ast) ast_wrapped.show() ast_expected = ASTNode('root') # First code cell. code_cell0 = ASTNode('CodeCell') code_cell0.add_child(cb0) ast_expected.add_child(code_cell0) # Second code cell. code_cell1 = ASTNode('CodeCell') code_cell1.add_child(cb1) ast_expected.add_child(code_cell1) ast_expected.show() assert_equal(ast_wrapped, ast_expected)
def test_wrap_code_cells_2(): # Test wrap_code_cells() with two code cells. ast = ASTNode('root') cb0 = ASTNode('CodeBlock', lang='python', children=['a']) cb1 = ASTNode('CodeBlock', lang='python', children=['b']) ast.add_child(cb0) ast.add_child(cb1) ast.show() ast_wrapped = wrap_code_cells(ast) ast_wrapped.show() ast_expected = ASTNode('root') # First code cell. code_cell0 = ASTNode('CodeCell') code_cell0.add_child(cb0) ast_expected.add_child(code_cell0) # Second code cell. code_cell1 = ASTNode('CodeCell') code_cell1.add_child(cb1) ast_expected.add_child(code_cell1) ast_expected.show() assert ast_wrapped == ast_expected
def transform_main(self, cm): # TODO: should be def transform() for consistency with the other way children = [self.transform(block) for block in self.get_node_children(cm)] return ASTNode('root', children=children)