def write(self, ast, context=None): self.execution_count = 1 self._md = MarkdownPlugin() # Add code cells in the AST. ccw = CodeCellWrapper() ast = ccw.wrap(ast) # Find the directory containing the notebook file. doc_path = (context or {}).get('path', None) if doc_path: self._dir_path = op.dirname(op.realpath(doc_path)) else: logger.warn("No input path, unable to resolve the image relative paths.") self._dir_path = None # Create the notebook. # new_output, new_code_cell, new_markdown_cell # TODO: kernelspect nb = new_notebook() # Go through all top-level blocks. for index, node in enumerate(ast.children): # Determine the block type. if node.name == 'CodeCell': node_type = 'code' else: node_type = 'markdown' # Create the notebook cell. cell = getattr(self, 'new_{}_cell'.format(node_type))(node, index) # Add it to the notebook. nb.cells.append(cell) nbformat.validate(nb) return nb
def test_notebook_reader_hello(): # Open a test notebook with just 1 Markdown cell. path = get_test_file_path('notebook', 'hello.ipynb') notebook = open_notebook(path) # Convert it to an AST. ast = NotebookReader().read(notebook) ast.show() # Check that the AST is equal to the one of a simple Mardown line. ast_1 = MarkdownPlugin().read('hello *world*') assert ast == ast_1
def read_markdown(self, cell, cell_index=None): if self._markdown_tree: cell_tree = self._markdown_tree.pop(0) self.tree.children.extend(cell_tree.children) else: logger.warn("Isolated read_markdown() call: slow because of pandoc call overhead.") ast = MarkdownPlugin().read(cell.source) if not ast.children: logger.debug("Skipping empty node.") return self.tree.children.append(ast) # pragma: no cover
def _read_all_markdown(self, cells): sources = [cell.source for cell in cells if cell.cell_type == 'markdown'] contents = ('\n\n%s\n\n' % self._NEW_CELL_DELIMITER).join(sources) ast = MarkdownPlugin().read(contents) if not ast.children: logger.debug("Skipping empty node.") return curtree = ASTNode('root') for child in ast.children: curtree.children.append(child) # Create a new tree at every cell delimiter. if child.children and child.children[0] == self._NEW_CELL_DELIMITER: # Remove the delimiter node. curtree.children.pop() # Append the current cell tree and create the next one. self._markdown_tree.append(curtree) curtree = ASTNode('root') # Append the last cell tree if not empty. if curtree.children: self._markdown_tree.append(curtree)
def write(self, ast, resources=None): # Mapping {filename: data}. self.resources = resources or {} self.execution_count = 1 self._md = MarkdownPlugin() # Add code cells in the AST. ast = wrap_code_cells(ast) # Create the notebook. # new_output, new_code_cell, new_markdown_cell nb = new_notebook() # Go through all top-level blocks. for index, node in enumerate(ast.children): # Determine the block type. if node.name == 'CodeCell': node_type = 'code' else: node_type = 'markdown' # Create the notebook cell. cell = getattr(self, 'new_{}_cell'.format(node_type))(node, index) # Add it to the notebook. nb.cells.append(cell) nbformat.validate(nb) return nb
def test_notebook_reader_notebook(): # Open a test notebook with a code cell. path = get_test_file_path('notebook', 'simplenb.ipynb') notebook = open_notebook(path) # Convert it to an AST. reader = NotebookReader() ast = reader.read(notebook) ast.show() # Compare with the markdown version. path = get_test_file_path('markdown', 'simplenb.md') markdown_expected = load_text(path) markdown_converted = MarkdownPlugin().write(ast) markdown_converted = re.sub(r'\{resource:([^\}]+)\}', r'simplenb_files/\1', markdown_converted) # The test file has a trailing new line, but not the AST. markdown_converted += '\n' # Replace the image filename because the conversion is done without output path. markdown_expected = markdown_expected.replace('simplenb_4_1.png', 'output_4_1.png') assert markdown_converted == markdown_expected assert 'output_4_1.png' in reader.resources
class NotebookWriter(object): def write(self, ast, resources=None): # Mapping {filename: data}. self.resources = resources or {} self.execution_count = 1 self._md = MarkdownPlugin() # Add code cells in the AST. ast = wrap_code_cells(ast) # Create the notebook. # new_output, new_code_cell, new_markdown_cell nb = new_notebook() # Go through all top-level blocks. for index, node in enumerate(ast.children): # Determine the block type. if node.name == 'CodeCell': node_type = 'code' else: node_type = 'markdown' # Create the notebook cell. cell = getattr(self, 'new_{}_cell'.format(node_type))(node, index) # Add it to the notebook. nb.cells.append(cell) nbformat.validate(nb) return nb def new_markdown_cell(self, node, index=None): return new_markdown_cell(self._md.write(node)) def _get_b64_resource(self, fn): """Return the base64 of a resource from its filename. The mapping `resources={fn: data}` needs to be passed to the `write()` method. """ data = self.resources.get(fn, None) if not data: # pragma: no cover logger.warn("Resource `%s` couldn't be found.", fn) return '' out = base64.b64encode(data).decode('utf8') # NOTE: split the output in multiple lines of 76 characters, # to make easier the comparison with actual Jupyter Notebook files. N = 76 out = '\n'.join([out[i:i + N] for i in range(0, len(out), N)]) + '\n' return out def new_code_cell(self, node, index=None): # Get the code cell input: the first child of the CodeCell block. input_block = node.children[0] assert input_block.name == 'CodeBlock' cell = new_code_cell(input_block.children[0], execution_count=self.execution_count, ) # Next we need to add the outputs: the next children in the CodeCell. for child in node.children[1:]: # Outputs can be code blocks or Markdown paragraphs containing # an image. if child.name == 'CodeBlock': # The output is a code block. # What is the output's type? It depends on the code block's # name. It can be: `stdout`, `stderr`, `result`. output_type = child.lang or 'result' assert output_type in ('stdout', 'stderr', 'result') contents = child.children[0] # NOTE: append new lines at the end of every line in stdout # and stderr contents, to match with the Jupyter Notebook. if output_type != 'result': contents = _append_newlines(contents) if output_type == 'result': kwargs = dict(execution_count=self.execution_count, data={'text/plain': contents}) # Output type to pass to nbformat. output_type = 'execute_result' elif output_type in ('stdout', 'stderr'): # Standard output or error. kwargs = dict(text=contents, name=output_type) # Output type to pass to nbformat. output_type = 'stream' elif child.name == 'Para': img = child.children[0] assert img.name == 'Image' fn = img.url caption = self._md.write(img.children[0]) output_type = 'display_data' data = {} # Dictionary {mimetype: data_buffer}. # Infer the mime type of the file, from its filename and # extension. mime_type = guess_type(fn)[0] assert mime_type # unknown extension: this shouldn't happen! data[mime_type] = self._get_b64_resource(fn) assert data[mime_type] # TODO data['text/plain'] = caption kwargs = dict(data=data) output = new_output(output_type, **kwargs) cell.outputs.append(output) self.execution_count += 1 return cell def new_raw_cell(self, node, index=None): # TODO pass
class NotebookWriter(object): def write(self, ast, context=None): self.execution_count = 1 self._md = MarkdownPlugin() # Add code cells in the AST. ccw = CodeCellWrapper() ast = ccw.wrap(ast) # Find the directory containing the notebook file. doc_path = (context or {}).get('path', None) if doc_path: self._dir_path = op.dirname(op.realpath(doc_path)) else: logger.warn("No input path, unable to resolve the image relative paths.") self._dir_path = None # Create the notebook. # new_output, new_code_cell, new_markdown_cell # TODO: kernelspect nb = new_notebook() # Go through all top-level blocks. for index, node in enumerate(ast.children): # Determine the block type. if node.name == 'CodeCell': node_type = 'code' else: node_type = 'markdown' # Create the notebook cell. cell = getattr(self, 'new_{}_cell'.format(node_type))(node, index) # Add it to the notebook. nb.cells.append(cell) nbformat.validate(nb) return nb def new_markdown_cell(self, node, index=None): return new_markdown_cell(self._md.write(node)) def new_code_cell(self, node, index=None): # Get the code cell input: the first child of the CodeCell block. input_block = node.children[0] assert input_block.name == 'CodeBlock' cell = new_code_cell(input_block.children[0], execution_count=self.execution_count, ) # Next we need to add the outputs: the next children in the CodeCell. for child in node.children[1:]: # Outputs can be code blocks or Markdown paragraphs containing # an image. if child.name == 'CodeBlock': # The output is a code block. # What is the output's type? It depends on the code block's # name. It can be: `stdout`, `stderr`, `result`. output_type = child.lang or '{output:result}' assert output_type.startswith('{output') contents = child.children[0] # NOTE: append new lines at the end of every line in stdout # and stderr contents, to match with the Jupyter Notebook. if output_type != '{output:result}': contents = _append_newlines(contents) if output_type == '{output:result}': kwargs = dict(execution_count=self.execution_count, data={'text/plain': contents}) # Output type to pass to nbformat. output_type = 'execute_result' elif output_type in ('{output:stdout}', '{output:stderr}'): # Standard output or error. # NOTE: strip {output } and only keep stdout/stderr in name. kwargs = dict(text=contents, name=output_type[8:-1]) # Output type to pass to nbformat. output_type = 'stream' elif child.name == 'Para': img = child.children[0] assert img.name == 'Image' fn = img.url caption = self._md.write(img.children[0]) output_type = 'display_data' data = {} # Dictionary {mimetype: data_buffer}. # Infer the mime type of the file, from its filename and # extension. mime_type = guess_type(fn)[0] assert mime_type # unknown extension: this shouldn't happen! # Get the resource data. if self._dir_path: image_path = op.join(self._dir_path, fn) # The image path could be absolute. elif op.isabs(fn): image_path = fn else: # pragma: no cover image_path = None # If the image path exists, open it. if image_path and op.exists(image_path): with open(image_path, 'rb') as f: data[mime_type] = _get_b64_resource(f.read()) else: # pragma: no cover logger.debug("File `%s` doesn't exist.", image_path) # Save the caption in the output text. data['text/plain'] = caption # Save the caption in the cell metadata too, so that it is not lost when # executing the notebook. if 'podoc' not in cell.metadata: cell.metadata['podoc'] = {} cell.metadata['podoc'].update({'output_text': caption}) kwargs = dict(data=data) assert not output_type.startswith('{output') output = new_output(output_type, **kwargs) cell.outputs.append(output) self.execution_count += 1 return cell def new_raw_cell(self, node, index=None): # TODO pass