def copy_cells(from_notebook='', from_cells=(0, 0), to_notebook='', at_cell=0): if '.ipynb' not in from_notebook: from_notebook += '.ipynb' if '.ipynb' not in to_notebook: to_notebook += '.ipynb' with open(from_notebook) as nb1_f: nb1_raw = nb1_f.read() with open(to_notebook) as nb2_f: nb2_raw = nb2_f.read() nb1 = nbformat.reads(nb1_raw, as_version=ipy_ver) nb2 = nbformat.reads(nb2_raw, as_version=ipy_ver) start_id, end_id = from_cells copied_cells = nb1['worksheets'][0]['cells'][start_id:end_id] active_cells = nb2['worksheets'][0]['cells'] nb2['worksheets'][0]['cells'] = active_cells[:at_cell] + copied_cells + active_cells[at_cell:] nb2_modified_raw = nbformat.writes(nb2, as_version=ipy_ver) with open(to_notebook, 'w') as nb2_f: nb2_f.write(nb2_modified_raw)
def copy_cells(from_notebook='', from_cells=(0, 0), to_notebook='', at_cell=0): if '.ipynb' not in from_notebook: from_notebook += '.ipynb' if '.ipynb' not in to_notebook: to_notebook += '.ipynb' with open(from_notebook) as nb1_f: nb1_raw = nb1_f.read() with open(to_notebook) as nb2_f: nb2_raw = nb2_f.read() nb1 = nbformat.reads(nb1_raw, as_version=ipy_ver) nb2 = nbformat.reads(nb2_raw, as_version=ipy_ver) start_id, end_id = from_cells copied_cells = nb1['worksheets'][0]['cells'][start_id:end_id] active_cells = nb2['worksheets'][0]['cells'] nb2['worksheets'][0][ 'cells'] = active_cells[:at_cell] + copied_cells + active_cells[ at_cell:] nb2_modified_raw = nbformat.writes(nb2, as_version=ipy_ver) with open(to_notebook, 'w') as nb2_f: nb2_f.write(nb2_modified_raw)
def from_notebook_node(self, nb, resources=None, **kw): nb_copy, resources = super(NotebookExporter, self).from_notebook_node(nb, resources, **kw) if self.nbformat_version != nb_copy.nbformat: resources['output_suffix'] = '.v%i' % self.nbformat_version else: resources['output_suffix'] = '.nbconvert' output = nbformat.writes(nb_copy, version=self.nbformat_version) return output, resources
def _save_notebook(self, os_path, nb): """Save a notebook to an os_path.""" with self.atomic_writing(os_path, encoding="utf-8") as f: if ftdetect(os_path) == "notebook": nbformat.write(nb, f, version=nbformat.NO_CONVERT) elif ftdetect(os_path) == "markdown": nbjson = nbformat.writes(nb, version=nbformat.NO_CONVERT) markdown = convert(nbjson, informat="notebook", outformat="markdown") f.write(markdown)
def test_write_downgrade_2(self): """dowgrade a v3 notebook to v2""" # Open a version 3 notebook. with self.fopen(u'test3.ipynb', 'r') as f: nb = read(f, as_version=3) jsons = writes(nb, version=2) nb2 = json.loads(jsons) (major, minor) = get_version(nb2) self.assertEqual(major, 2)
def notebook_content(self, content): if isinstance(content, compat.string_types): self._notebook_content = content return try: # maybe this is a notebook content = nbformat.writes(content, version=nbformat.NO_CONVERT) self._notebook_content = content except: raise
def _save_notebook(self, os_path, nb): """Save a notebook to an os_path.""" with self.atomic_writing(os_path, encoding='utf-8') as f: if ftdetect(os_path) == 'notebook': nbformat.write(nb, f, version=nbformat.NO_CONVERT) elif ftdetect(os_path) == 'markdown': nbjson = nbformat.writes(nb, version=nbformat.NO_CONVERT) markdown = convert(nbjson, informat='notebook', outformat='markdown') f.write(markdown)
def to_json(self, filename, encoding="utf8"): """ convert the notebook into json @param filename filename or stream """ if isinstance(filename, str # unicode# ): with open(filename, "w", encoding=encoding) as payload: self.to_json(payload) else: filename.write(writes(self.nb))
def test_run_nb(self): """Test %run notebook.ipynb""" from IPython.nbformat import v4, writes nb = v4.new_notebook(cells=[ v4.new_markdown_cell("The Ultimate Question of Everything"), v4.new_code_cell("answer=42") ]) src = writes(nb, version=4) self.mktmp(src, ext='.ipynb') _ip.magic("run %s" % self.fname) nt.assert_equal(_ip.user_ns['answer'], 42)
def save_notebook(self, model, name='', path=''): """Save the notebook model and return the model with no content.""" path = path.strip('/') if 'content' not in model: raise web.HTTPError(400, u'No notebook JSON data provided') if not path: raise web.HTTPError(400, u'We require path for saving.') nb = nbformat.from_dict(model['content']) gist = self._get_gist(name, path) if gist is None: tags = parse_tags(name) if path: tags.append(path) content = nbformat.writes(nb, version=nbformat.NO_CONVERT) gist = self.gisthub.create_gist(name, tags, content) # One checkpoint should always exist #if self.notebook_exists(name, path) and not self.list_checkpoints(name, path): # self.create_checkpoint(name, path) new_path = model.get('path', path).strip('/') new_name = model.get('name', name) if path != new_path: raise web.HTTPError(400, u'Gist backend does not support path change') # remove [gist_id] if we're being sent old key_name gist.name = gist.strip_gist_id(new_name) gist.notebook_content = nb self.check_and_sign(nb, self.fullpath(new_path, new_name)) if 'name' in nb['metadata']: nb['metadata']['name'] = u'' try: self.log.debug("Autosaving notebook %s %s", path, name) self.gisthub.save(gist) except Exception as e: raise web.HTTPError( 400, u'Unexpected error while autosaving notebook: %s %s %s' % (path, name, e)) # NOTE: since gist.name might not have [gist_id] suffix on rename # we use gist.key_name model = self.get_notebook(gist.key_name, new_path, content=False) return model
def test_run_nb(self): """Test %run notebook.ipynb""" from IPython.nbformat import v4, writes nb = v4.new_notebook( cells=[ v4.new_markdown_cell("The Ultimate Question of Everything"), v4.new_code_cell("answer=42") ] ) src = writes(nb, version=4) self.mktmp(src, ext='.ipynb') _ip.magic("run %s" % self.fname) nt.assert_equal(_ip.user_ns['answer'], 42)
def write(cells, nb_version=4): """Turn cells list into valid IPython notebook code.""" # Use IPython.nbformat functionality for writing the notebook if nb_version == 3: from IPython.nbformat.v3 import ( new_code_cell, new_text_cell, new_worksheet, new_notebook, new_metadata, new_author, ) nb = new_worksheet() elif nb_version == 4: from IPython.nbformat.v4 import new_code_cell, new_markdown_cell, new_notebook nb_cells = [] for cell_tp, language, block in cells: if cell_tp == "markdown": if nb_version == 3: nb.cells.append(new_text_cell(u"markdown", source=block)) elif nb_version == 4: nb_cells.append(new_markdown_cell(source=block)) elif cell_tp == "codecell": if nb_version == 3: nb.cells.append(new_code_cell(input=block)) elif nb_version == 4: nb_cells.append(new_code_cell(source=block)) if nb_version == 3: nb = new_notebook(worksheets=[nb], metadata=new_metadata()) # Let us make v4 notebook here by upgrading from IPython.nbformat.v4 import upgrade nb = upgrade(nb) import IPython.nbformat.v4.nbjson as nbjson # Convert nb to json format filestr = nbjson.writes(nb) elif nb_version == 4: nb = new_notebook(cells=nb_cells) from IPython.nbformat import writes filestr = writes(nb, version=4) return filestr
def save_notebook(self, model, name='', path=''): """Save the notebook model and return the model with no content.""" path = path.strip('/') if 'content' not in model: raise web.HTTPError(400, u'No notebook JSON data provided') if not path: raise web.HTTPError(400, u'We require path for saving.') nb = nbformat.from_dict(model['content']) gist = self._get_gist(name, path) if gist is None: tags = parse_tags(name) if path: tags.append(path) content = nbformat.writes(nb, version=nbformat.NO_CONVERT) gist = self.gisthub.create_gist(name, tags, content) # One checkpoint should always exist #if self.notebook_exists(name, path) and not self.list_checkpoints(name, path): # self.create_checkpoint(name, path) new_path = model.get('path', path).strip('/') new_name = model.get('name', name) if path != new_path: raise web.HTTPError(400, u'Gist backend does not support path change') # remove [gist_id] if we're being sent old key_name gist.name = gist.strip_gist_id(new_name) gist.notebook_content = nb self.check_and_sign(nb, self.fullpath(new_path, new_name)) if 'name' in nb['metadata']: nb['metadata']['name'] = u'' try: self.log.debug("Autosaving notebook %s %s", path, name) self.gisthub.save(gist) except Exception as e: raise web.HTTPError(400, u'Unexpected error while autosaving notebook: %s %s %s' % (path, name, e)) # NOTE: since gist.name might not have [gist_id] suffix on rename # we use gist.key_name model = self.get_notebook(gist.key_name, new_path, content=False) return model
def merge_notebooks(filenames): merged = None for fname in filenames: with io.open(fname, 'r', encoding='utf-8') as f: nb = nbformat.read(f, as_version=4) if merged is None: merged = nb else: # TODO: add an optional marker between joined notebooks # like an horizontal rule, for example, or some other arbitrary # (user specified) markdown cell) merged.cells.extend(nb.cells) if not hasattr(merged.metadata, 'name'): merged.metadata.name = '' merged.metadata.name += "_merged" print(nbformat.writes(merged))
def write(cells): """Turn cells list into valid IPython notebook code.""" # Use IPython.nbformat functionality for writing the notebook from IPython.nbformat.v4 import ( new_code_cell, new_markdown_cell, new_notebook) nb_cells = [] for cell_tp, language, block in cells: if cell_tp == 'markdown': nb_cells.append(new_markdown_cell(source=block)) elif cell_tp == 'codecell': nb_cells.append(new_code_cell(source=block)) nb = new_notebook(cells=nb_cells) from IPython.nbformat import writes filestr = writes(nb, version=4) return filestr
def merge_notebooks(outfile, filenames): merged = None added_appendix = False for fname in filenames: with io.open(fname, 'r', encoding='utf-8') as f: nb = nbformat.read(f, nbformat.NO_CONVERT) remove_formatting(nb) if not added_appendix and fname[0:8] == 'Appendix': remove_links_add_appendix(nb) added_appendix = True else: remove_links(nb) if merged is None: merged = nb else: merged.cells.extend(nb.cells) #merged.metadata.name += "_merged" outfile.write(nbformat.writes(merged, nbformat.NO_CONVERT))
def test_roundtrip(): """Run nbconvert using our custom markdown template to recover original markdown from a notebook. """ # create a notebook from the markdown mr = notedown.MarkdownReader() roundtrip_notebook = mr.to_notebook(roundtrip_markdown) # write the notebook into json notebook_json = nbformat.writes(roundtrip_notebook) # write the json back into notebook notebook = nbformat.reads(notebook_json, as_version=4) # convert notebook to markdown mw = notedown.MarkdownWriter(template_file='notedown/templates/markdown.tpl', strip_outputs=True) markdown = mw.writes(notebook) nt.assert_multi_line_equal(roundtrip_markdown, markdown)
def test_roundtrip(): """Run nbconvert using our custom markdown template to recover original markdown from a notebook. """ # create a notebook from the markdown mr = notedown.MarkdownReader() roundtrip_notebook = mr.to_notebook(roundtrip_markdown) # write the notebook into json notebook_json = nbformat.writes(roundtrip_notebook) # write the json back into notebook notebook = nbformat.reads(notebook_json, as_version=4) # convert notebook to markdown mw = notedown.MarkdownWriter( template_file='notedown/templates/markdown.tpl', strip_outputs=True) markdown = mw.writes(notebook) nt.assert_multi_line_equal(roundtrip_markdown, markdown)
def write(cells, nb_version=4): """Turn cells list into valid IPython notebook code.""" # Use IPython.nbformat functionality for writing the notebook if nb_version == 3: from IPython.nbformat.v3 import (new_code_cell, new_text_cell, new_worksheet, new_notebook, new_metadata, new_author) nb = new_worksheet() elif nb_version == 4: from IPython.nbformat.v4 import (new_code_cell, new_markdown_cell, new_notebook) nb_cells = [] for cell_tp, language, block in cells: if cell_tp == 'markdown': if nb_version == 3: nb.cells.append(new_text_cell(u'markdown', source=block)) elif nb_version == 4: nb_cells.append(new_markdown_cell(source=block)) elif cell_tp == 'codecell': if nb_version == 3: nb.cells.append(new_code_cell(input=block)) elif nb_version == 4: nb_cells.append(new_code_cell(source=block)) if nb_version == 3: nb = new_notebook(worksheets=[nb], metadata=new_metadata()) # Let us make v4 notebook here by upgrading from IPython.nbformat.v4 import upgrade nb = upgrade(nb) import IPython.nbformat.v4.nbjson as nbjson # Convert nb to json format filestr = nbjson.writes(nb) elif nb_version == 4: nb = new_notebook(cells=nb_cells) from IPython.nbformat import writes filestr = writes(nb, version=4) return filestr
def _save_notebook(self, os_path, model, path): """save a notebook to shock""" # Get name name = path.rsplit('/', 1)[-1] # Get attributes attr = {} attr['name'] = self._strip_ext(name) attr['type'] = self.node_type attr['format'] = 'json' attr['last_modified'] = tz.utcnow().isoformat() # creation timestamp if 'created' in model: attr['created'] = model['created'].isoformat() elif name in self.nb_list: attr['created'] = self.nb_list[name]['attributes']['created'] else: attr['created'] = attr['last_modified'] # original id if name in self.nb_list: attr['original'] = self.nb_list[name]['attributes']['original'] else: attr['original'] = str(uuid.uuid4()) attr_str = json.dumps(attr) # Get the notebook content nb = nbformat.from_dict(model['content']) self.check_and_sign(nb, name) nb_str = nbformat.writes(nb, version=nbformat.NO_CONVERT) # Save to shock try: self.log.debug("Saving %s to Shock", name) node = self._post_shock_node(name, nb_str, attr_str) except Exception as e: raise web.HTTPError(400, u'Unexpected error while saving notebook: %s' %e) # update lists self.nb_list[name] = node open(os_path, 'w').close()
def ipynb_code(filestr, code_blocks, code_block_types, tex_blocks, format): """ # We expand all newcommands now from html import embed_newcommands newcommands = embed_newcommands(filestr) if newcommands: filestr = newcommands + filestr """ # Fix pandoc citations to normal internal links: [[key]](#key) filestr = re.sub(r'\[@(.+?)\]', r'[[\g<1>]](#\g<1>)', filestr) # filestr becomes json list after this function so we must typeset # envirs here. All envirs are typeset as pandoc_quote. from common import _CODE_BLOCK, _MATH_BLOCK envir_format = option('ipynb_admon=', 'paragraph') # Remove all !bpop-!epop environments (they cause only problens and # have no use) for envir in 'pop', 'slidecell': filestr = re.sub('^<!-- !b%s .*\n' % envir, '', filestr, flags=re.MULTILINE) filestr = re.sub('^<!-- !e%s .*\n' % envir, '', filestr, flags=re.MULTILINE) filestr = re.sub('^<!-- !bnotes.*?<!-- !enotes -->\n', '', filestr, flags=re.DOTALL|re.MULTILINE) filestr = re.sub('^<!-- !split -->\n', '', filestr, flags=re.MULTILINE) from doconce import doconce_envirs envirs = doconce_envirs()[8:-2] for envir in envirs: pattern = r'^!b%s(.*?)\n(.+?)\s*^!e%s' % (envir, envir) if envir_format in ('quote', 'paragraph', 'hrule'): def subst(m): title = m.group(1).strip() # Text size specified in parenthesis? m2 = re.search('^\s*\((.+?)\)', title) if title == '' and envir not in ('block', 'quote'): title = envir.capitalize() + '.' elif title.lower() == 'none': title == '' elif m2: text_size = m2.group(1).lower() title = title.replace('(%s)' % text_size, '').strip() elif title and title[-1] not in ('.', ':', '!', '?'): # Make sure the title ends with puncuation title += '.' # Recall that this formatting is called very late # so native format must be used! if title: title = '**' + title + '**\n' # Could also consider subsubsection formatting block = m.group(2) # Always use quote typesetting for quotes if envir_format == 'quote' or envir == 'quote': # Make Markdown quote of the block: lines start with > lines = [] for line in block.splitlines(): # Just quote plain text if not (_MATH_BLOCK in line or _CODE_BLOCK in line or line.startswith('FIGURE:') or line.startswith('MOVIE:') or line.startswith('|')): lines.append('> ' + line) else: lines.append('\n' + line + '\n') block = '\n'.join(lines) + '\n\n' # Add quote and a blank line after title if title: title = '> ' + title + '>\n' else: # Add a blank line after title if title: title += '\n' if envir_format == 'hrule': # Native ------ does not work, use <hr/> #text = '\n\n----------\n' + title + '----------\n' + \ # block + '\n----------\n\n' text = '\n\n<hr/>\n' + title + \ block + '\n<hr/>\n\n' else: text = title + block + '\n\n' return text else: errwarn('*** error: --ipynb_admon=%s is not supported' % envir_format) filestr = re.sub(pattern, subst, filestr, flags=re.DOTALL | re.MULTILINE) # Fix pyshell and ipy interactive sessions: remove prompt and output. # or split in multiple cells such that output comes out at the end of a cell # Fix sys environments and use run prog.py so programs can be run in cell # Insert %matplotlib inline in the first block using matplotlib # Only typeset Python code as blocks, otherwise !bc environmens # become plain indented Markdown. from doconce import dofile_basename from sets import Set ipynb_tarfile = 'ipynb-%s-src.tar.gz' % dofile_basename src_paths = Set() mpl_inline = False split_pyshell = option('ipynb_split_pyshell=', 'on') if split_pyshell is None: split_pyshell = False elif split_pyshell in ('no', 'False', 'off'): split_pyshell = False else: split_pyshell = True ipynb_code_tp = [None]*len(code_blocks) for i in range(len(code_blocks)): # Check if continuation lines are in the code block, because # doconce.py inserts a blank after the backslash if '\\ \n' in code_blocks[i]: code_blocks[i] = code_blocks[i].replace('\\ \n', '\\\n') if not mpl_inline and ( re.search(r'import +matplotlib', code_blocks[i]) or \ re.search(r'from +matplotlib', code_blocks[i]) or \ re.search(r'import +scitools', code_blocks[i]) or \ re.search(r'from +scitools', code_blocks[i])): code_blocks[i] = '%matplotlib inline\n\n' + code_blocks[i] mpl_inline = True tp = code_block_types[i] if tp.endswith('-t'): # Standard Markdown code with pandoc/github extension language = tp[:-2] language_spec = language2pandoc.get(language, '') #code_blocks[i] = '\n' + indent_lines(code_blocks[i], format) + '\n' code_blocks[i] = "```%s\n" % language_spec + \ indent_lines(code_blocks[i].strip(), format) + \ "```" ipynb_code_tp[i] = 'markdown' elif tp.startswith('pyshell') or tp.startswith('ipy'): lines = code_blocks[i].splitlines() last_cell_end = -1 if split_pyshell: new_code_blocks = [] # Split for each output an put in separate cell for j in range(len(lines)): if lines[j].startswith('>>>') or lines[j].startswith('... '): lines[j] = lines[j][4:] elif lines[j].startswith('In ['): # IPython lines[j] = ':'.join(lines[j].split(':')[1:]).strip() elif lines[j].startswith(' ...: '): # IPython lines[j] = lines[j][8:] else: # output (no prefix or Out) lines[j] = '' new_code_blocks.append( '\n'.join(lines[last_cell_end+1:j+1])) last_cell_end = j code_blocks[i] = new_code_blocks ipynb_code_tp[i] = 'cell' else: # Remove prompt and output lines; leave code executable in cell for j in range(len(lines)): if lines[j].startswith('>>> ') or lines[j].startswith('... '): lines[j] = lines[j][4:] elif lines[j].startswith('In ['): lines[j] = ':'.join(lines[j].split(':')[1:]).strip() else: # output lines[j] = '' for j in range(lines.count('')): lines.remove('') code_blocks[i] = '\n'.join(lines) ipynb_code_tp[i] = 'cell' elif tp.startswith('sys'): # Do we find execution of python file? If so, copy the file # to separate subdir and make a run file command in a cell. # Otherwise, it is just a plain verbatim Markdown block. found_unix_lines = False lines = code_blocks[i].splitlines() for j in range(len(lines)): m = re.search(r'(.+?>|\$) *python +([A-Za-z_0-9]+?\.py)', lines[j]) if m: name = m.group(2).strip() if os.path.isfile(name): src_paths.add(os.path.dirname(name)) lines[j] = '%%run "%s"' % fullpath else: found_unix_lines = True src_paths = list(src_paths) if src_paths and not found_unix_lines: # This is a sys block with run commands only code_blocks[i] = '\n'.join(lines) ipynb_code_tp[i] = 'cell' else: # Standard Markdown code code_blocks[i] = '\n'.join(lines) code_blocks[i] = indent_lines(code_blocks[i], format) ipynb_code_tp[i] = 'markdown' elif tp.endswith('hid'): ipynb_code_tp[i] = 'cell_hidden' elif tp.startswith('py'): ipynb_code_tp[i] = 'cell' else: # Should support other languages as well, but not for now code_blocks[i] = indent_lines(code_blocks[i], format) ipynb_code_tp[i] = 'markdown' # figure_files and movie_files are global variables and contain # all figures and movies referred to src_paths = list(src_paths) if figure_files: src_paths += figure_files if movie_files: src_paths += movie_files if src_paths: # Make tar file with all the source dirs with files # that need to be executed os.system('tar cfz %s %s' % (ipynb_tarfile, ' '.join(src_paths))) errwarn('collected all required additional files in ' + ipynb_tarfile + ' which must be distributed with the notebook') elif os.path.isfile(ipynb_tarfile): os.remove(ipynb_tarfile) # Parse document into markdown text, code blocks, and tex blocks. # Store in nested list notebook_blocks. notebook_blocks = [[]] authors = '' for line in filestr.splitlines(): if line.startswith('authors = [new_author(name='): # old author method authors = line[10:] elif _CODE_BLOCK in line: code_block_tp = line.split()[-1] if code_block_tp in ('pyhid',) or not code_block_tp.endswith('hid'): notebook_blocks[-1] = '\n'.join(notebook_blocks[-1]).strip() notebook_blocks.append(line) # else: hidden block to be dropped (may include more languages # with time in the above tuple) elif _MATH_BLOCK in line: notebook_blocks[-1] = '\n'.join(notebook_blocks[-1]).strip() notebook_blocks.append(line) else: if not isinstance(notebook_blocks[-1], list): notebook_blocks.append([]) notebook_blocks[-1].append(line) if isinstance(notebook_blocks[-1], list): notebook_blocks[-1] = '\n'.join(notebook_blocks[-1]).strip() # Add block type info pattern = r'(\d+) +%s' for i in range(len(notebook_blocks)): if re.match(pattern % _CODE_BLOCK, notebook_blocks[i]): m = re.match(pattern % _CODE_BLOCK, notebook_blocks[i]) idx = int(m.group(1)) if ipynb_code_tp[idx] == 'cell': notebook_blocks[i] = ['cell', notebook_blocks[i]] elif ipynb_code_tp[idx] == 'cell_hidden': notebook_blocks[i] = ['cell_hidden', notebook_blocks[i]] else: notebook_blocks[i] = ['text', notebook_blocks[i]] elif re.match(pattern % _MATH_BLOCK, notebook_blocks[i]): notebook_blocks[i] = ['math', notebook_blocks[i]] else: notebook_blocks[i] = ['text', notebook_blocks[i]] # Go through tex_blocks and wrap in $$ # (doconce.py runs align2equations so there are no align/align* # environments in tex blocks) label2tag = {} tag_counter = 1 for i in range(len(tex_blocks)): # Extract labels and add tags labels = re.findall(r'label\{(.+?)\}', tex_blocks[i]) for label in labels: label2tag[label] = tag_counter # Insert tag to get labeled equation tex_blocks[i] = tex_blocks[i].replace( 'label{%s}' % label, 'label{%s} \\tag{%s}' % (label, tag_counter)) tag_counter += 1 # Remove \[ and \] or \begin/end{equation*} in single equations tex_blocks[i] = tex_blocks[i].replace(r'\[', '') tex_blocks[i] = tex_blocks[i].replace(r'\]', '') tex_blocks[i] = tex_blocks[i].replace(r'\begin{equation*}', '') tex_blocks[i] = tex_blocks[i].replace(r'\end{equation*}', '') # Check for illegal environments m = re.search(r'\\begin\{(.+?)\}', tex_blocks[i]) if m: envir = m.group(1) if envir not in ('equation', 'equation*', 'align*', 'align', 'array'): errwarn("""\ *** warning: latex envir \\begin{%s} does not work well in Markdown. Stick to \\[ ... \\], equation, equation*, align, or align* environments in math environments. """ % envir) eq_type = 'heading' # or '$$' eq_type = '$$' # Markdown: add $$ on each side of the equation if eq_type == '$$': # Make sure there are no newline after equation tex_blocks[i] = '$$\n' + tex_blocks[i].strip() + '\n$$' # Here: use heading (###) and simple formula (remove newline # in math expressions to keep everything within a heading) as # the equation then looks bigger elif eq_type == 'heading': tex_blocks[i] = '### $ ' + ' '.join(tex_blocks[i].splitlines()) + ' $' # Add labels for the eqs above the block (for reference) if labels: #label_tp = '<a name="%s"></a>' label_tp = '<div id="%s"></div>' tex_blocks[i] = '<!-- Equation labels as ordinary links -->\n' + \ ' '.join([label_tp % label for label in labels]) + '\n\n' + \ tex_blocks[i] # blocks is now a list of text chunks in markdown and math/code line # instructions. Insert code and tex blocks for i in range(len(notebook_blocks)): if _CODE_BLOCK in notebook_blocks[i][1] or _MATH_BLOCK in notebook_blocks[i][1]: words = notebook_blocks[i][1].split() # start of notebook_blocks[i]: number block-indicator code-type n = int(words[0]) if _CODE_BLOCK in notebook_blocks[i][1]: notebook_blocks[i][1] = code_blocks[n] # can be list! if _MATH_BLOCK in notebook_blocks[i][1]: notebook_blocks[i][1] = tex_blocks[n] # Make IPython structures nb_version = int(option('ipynb_version=', '4')) if nb_version == 3: try: from IPython.nbformat.v3 import ( new_code_cell, new_text_cell, new_worksheet, new_notebook, new_metadata, new_author) nb = new_worksheet() except ImportError: errwarn('*** error: could not import IPython.nbformat.v3!') errwarn(' set --ipynb_version=4 or leave out --ipynb_version=3') _abort() elif nb_version == 4: try: from nbformat.v4 import ( new_code_cell, new_markdown_cell, new_notebook) except ImportError: # Try old style try: from IPython.nbformat.v4 import ( new_code_cell, new_markdown_cell, new_notebook) except ImportError: errwarn('*** error: cannot do import nbformat.v4 or IPython.nbformat.v4') errwarn(' make sure IPython notebook or Jupyter is installed correctly') _abort() cells = [] mdstr = [] # plain md format of the notebook prompt_number = 1 for block_tp, block in notebook_blocks: if (block_tp == 'text' or block_tp == 'math') and block != '': # Pure comments between math/code and math/code come # out as empty blocks, should detect that situation # (challenging - can have multiple lines of comments, # or begin and end comment lines with important things between) if nb_version == 3: nb.cells.append(new_text_cell(u'markdown', source=block)) elif nb_version == 4: cells.append(new_markdown_cell(source=block)) mdstr.append(('markdown', block)) elif block_tp == 'cell' and block != '' and block != []: if isinstance(block, list): for block_ in block: block_ = block_.rstrip() if block_ != '': if nb_version == 3: nb.cells.append(new_code_cell( input=block_, prompt_number=prompt_number, collapsed=False)) elif nb_version == 4: cells.append(new_code_cell( source=block_, execution_count=prompt_number, metadata=dict(collapsed=False))) prompt_number += 1 mdstr.append(('codecell', block_)) else: block = block.rstrip() if block != '': if nb_version == 3: nb.cells.append(new_code_cell( input=block, prompt_number=prompt_number, collapsed=False)) elif nb_version == 4: cells.append(new_code_cell( source=block, execution_count=prompt_number, metadata=dict(collapsed=False))) prompt_number += 1 mdstr.append(('codecell', block)) elif block_tp == 'cell_hidden' and block != '': block = block.rstrip() if nb_version == 3: nb.cells.append(new_code_cell( input=block, prompt_number=prompt_number, collapsed=True)) elif nb_version == 4: cells.append(new_code_cell( source=block, execution_count=prompt_number, metadata=dict(collapsed=True))) prompt_number += 1 mdstr.append(('codecell', block)) """ # Dump the notebook cells in a simple ASCII format # (doc/src/ipynb/ipynb_generator.py can translate it back to .ipynb file) f = open(dofile_basename + '.md-ipynb', 'w') for cell_tp, block in mdstr: if cell_tp == 'markdown': f.write('\n-----\n\n') elif cell_tp == 'codecell': f.write('\n-----py\n\n') f.write(block) f.close() """ if nb_version == 3: # Catch the title as the first heading m = re.search(r'^#+\s*(.+)$', filestr, flags=re.MULTILINE) title = m.group(1).strip() if m else '' # md below is not used for anything if authors: authors = eval(authors) md = new_metadata(name=title, authors=authors) else: md = new_metadata(name=title) nb = new_notebook(worksheets=[nb], metadata=new_metadata()) # Let us make v4 notebook here by upgrading from IPython.nbformat.v4 import upgrade nb = upgrade(nb) import IPython.nbformat.v4.nbjson as nbjson # Convert nb to json format filestr = nbjson.writes(nb) elif nb_version == 4: nb = new_notebook(cells=cells) from IPython.nbformat import writes filestr = writes(nb, version=4) # Check that there are no empty cells: if '"input": []' in filestr: errwarn('*** error: empty cells in notebook - report bug in DocOnce') _abort() # must do the replacements here at the very end when json is written out # \eqref and labels will not work, but labels (only in math) do no harm filestr = re.sub(r'([^\\])label\{', r'\g<1>\\\\label{', filestr, flags=re.MULTILINE) # \\eqref{} just gives (???) link at this stage - future versions # will probably support labels #filestr = re.sub(r'\(ref\{(.+?)\}\)', r'\\eqref{\g<1>}', filestr) # Now we use explicit references to tags def subst(m): label = m.group(1) try: return r'[(%s)](#%s)' % (label2tag[label], label) except KeyError as e: errwarn('*** error: label "%s" is not defined' % str(e)) filestr = re.sub(r'\(ref\{(.+?)\}\)', subst, filestr) """ # MathJax reference to tag (recall that the equations have both label # and tag (know that tag only works well in HTML, but this mjx-eqn-no # label does not work in ipynb) filestr = re.sub(r'\(ref\{(.+?)\}\)', lambda m: r'[(%s)](#mjx-eqn-%s)' % (label2tag[m.group(1)], label2tag[m.group(1)]), filestr) """ #filestr = re.sub(r'\(ref\{(.+?)\}\)', r'Eq (\g<1>)', filestr) ''' # Final fixes: replace all text between cells by markdown code cells # Note: the patterns are overlapping so a plain re.sub will not work, # here we run through all blocks found and subsitute the first remaining # one, one by one. pattern = r' \},\n(.+?)\{\n "cell_type":' begin_pattern = r'^(.+?)\{\n "cell_type":' remaining_block_begin = re.findall(begin_pattern, filestr, flags=re.DOTALL) remaining_blocks = re.findall(pattern, filestr, flags=re.DOTALL) import string for block in remaining_block_begin + remaining_blocks: filestr = string.replace(filestr, block, json_markdown(block) + ' ', maxreplace=1) filestr_end = re.sub(r' \{\n "cell_type": .+?\n \},\n', '', filestr, flags=re.DOTALL) filestr = filestr.replace(filestr_end, json_markdown(filestr_end)) filestr = """{ "metadata": { "name": "SOME NAME" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ """ + filestr.rstrip() + '\n'+ \ json_pycode('', final_prompt_no+1, 'python').rstrip()[:-1] + """ ], "metadata": {} } ] }""" ''' return filestr
def create_json_notebook(markdown): reader = notedown.MarkdownReader() notebook = reader.reads(markdown) json_notebook = nbformat.writes(notebook) return json_notebook
def writes_base64(nb, version=NBFORMAT_VERSION): """ Write a notebook as base64. """ return b64encode(writes(nb, version=version).encode('utf-8'))
def ipynb_code(filestr, code_blocks, code_block_types, tex_blocks, format): """ # We expand all newcommands now from html import embed_newcommands newcommands = embed_newcommands(filestr) if newcommands: filestr = newcommands + filestr """ # Fix pandoc citations to normal internal links: [[key]](#key) filestr = re.sub(r'\[@(.+?)\]', r'[[\g<1>]](#\g<1>)', filestr) # filestr becomes json list after this function so we must typeset # envirs here. All envirs are typeset as pandoc_quote. from common import _CODE_BLOCK, _MATH_BLOCK envir_format = option('ipynb_admon=', 'paragraph') # Remove all !bpop-!epop environments (they cause only problens and # have no use) for envir in 'pop', 'slidecell': filestr = re.sub('^<!-- !b%s .*\n' % envir, '', filestr, flags=re.MULTILINE) filestr = re.sub('^<!-- !e%s .*\n' % envir, '', filestr, flags=re.MULTILINE) filestr = re.sub('^<!-- !bnotes.*?<!-- !enotes -->\n', '', filestr, flags=re.DOTALL | re.MULTILINE) filestr = re.sub('^<!-- !split -->\n', '', filestr, flags=re.MULTILINE) from doconce import doconce_envirs envirs = doconce_envirs()[8:-2] for envir in envirs: pattern = r'^!b%s(.*?)\n(.+?)\s*^!e%s' % (envir, envir) if envir_format in ('quote', 'paragraph', 'hrule'): def subst(m): title = m.group(1).strip() # Text size specified in parenthesis? m2 = re.search('^\s*\((.+?)\)', title) if title == '' and envir not in ('block', 'quote'): title = envir.capitalize() + '.' elif title.lower() == 'none': title == '' elif m2: text_size = m2.group(1).lower() title = title.replace('(%s)' % text_size, '').strip() elif title and title[-1] not in ('.', ':', '!', '?'): # Make sure the title ends with puncuation title += '.' # Recall that this formatting is called very late # so native format must be used! if title: title = '**' + title + '**\n' # Could also consider subsubsection formatting block = m.group(2) # Always use quote typesetting for quotes if envir_format == 'quote' or envir == 'quote': # Make Markdown quote of the block: lines start with > lines = [] for line in block.splitlines(): # Just quote plain text if not (_MATH_BLOCK in line or _CODE_BLOCK in line or line.startswith('FIGURE:') or line.startswith('MOVIE:') or line.startswith('|')): lines.append('> ' + line) else: lines.append('\n' + line + '\n') block = '\n'.join(lines) + '\n\n' # Add quote and a blank line after title if title: title = '> ' + title + '>\n' else: # Add a blank line after title if title: title += '\n' if envir_format == 'hrule': # Native ------ does not work, use <hr/> #text = '\n\n----------\n' + title + '----------\n' + \ # block + '\n----------\n\n' text = '\n\n<hr/>\n' + title + \ block + '\n<hr/>\n\n' else: text = title + block + '\n\n' return text else: print '*** error: --ipynb_admon=%s is not supported' % envir_format filestr = re.sub(pattern, subst, filestr, flags=re.DOTALL | re.MULTILINE) # Fix pyshell and ipy interactive sessions: remove prompt and output. # or split in multiple cells such that output comes out at the end of a cell # Fix sys environments and use run prog.py so programs can be run in cell # Insert %matplotlib inline in the first block using matplotlib # Only typeset Python code as blocks, otherwise !bc environmens # become plain indented Markdown. from doconce import dofile_basename from sets import Set ipynb_tarfile = 'ipynb-%s-src.tar.gz' % dofile_basename src_paths = Set() mpl_inline = False split_pyshell = option('ipynb_split_pyshell=', 'on') if split_pyshell is None: split_pyshell = False elif split_pyshell in ('no', 'False', 'off'): split_pyshell = False else: split_pyshell = True ipynb_code_tp = [None] * len(code_blocks) for i in range(len(code_blocks)): # Check if continuation lines are in the code block, because # doconce.py inserts a blank after the backslash if '\\ \n' in code_blocks[i]: code_blocks[i] = code_blocks[i].replace('\\ \n', '\\\n') if not mpl_inline and ( re.search(r'import +matplotlib', code_blocks[i]) or \ re.search(r'from +matplotlib', code_blocks[i]) or \ re.search(r'import +scitools', code_blocks[i]) or \ re.search(r'from +scitools', code_blocks[i])): code_blocks[i] = '%matplotlib inline\n\n' + code_blocks[i] mpl_inline = True tp = code_block_types[i] if tp.endswith('-t'): # Standard Markdown code with pandoc/github extension language = tp[:-2] language_spec = language2pandoc.get(language, '') #code_blocks[i] = '\n' + indent_lines(code_blocks[i], format) + '\n' code_blocks[i] = "```%s\n" % language_spec + \ indent_lines(code_blocks[i].strip(), format) + \ "```" ipynb_code_tp[i] = 'markdown' elif tp.startswith('pyshell') or tp.startswith('ipy'): lines = code_blocks[i].splitlines() last_cell_end = -1 if split_pyshell: new_code_blocks = [] # Split for each output an put in separate cell for j in range(len(lines)): if lines[j].startswith('>>>') or lines[j].startswith( '... '): lines[j] = lines[j][4:] elif lines[j].startswith('In ['): # IPython lines[j] = ':'.join(lines[j].split(':')[1:]).strip() elif lines[j].startswith(' ...: '): # IPython lines[j] = lines[j][8:] else: # output (no prefix or Out) lines[j] = '' new_code_blocks.append('\n'.join(lines[last_cell_end + 1:j + 1])) last_cell_end = j code_blocks[i] = new_code_blocks ipynb_code_tp[i] = 'cell' else: # Remove prompt and output lines; leave code executable in cell for j in range(len(lines)): if lines[j].startswith('>>> ') or lines[j].startswith( '... '): lines[j] = lines[j][4:] elif lines[j].startswith('In ['): lines[j] = ':'.join(lines[j].split(':')[1:]).strip() else: # output lines[j] = '' for j in range(lines.count('')): lines.remove('') code_blocks[i] = '\n'.join(lines) ipynb_code_tp[i] = 'cell' elif tp.startswith('sys'): # Do we find execution of python file? If so, copy the file # to separate subdir and make a run file command in a cell. # Otherwise, it is just a plain verbatim Markdown block. found_unix_lines = False lines = code_blocks[i].splitlines() for j in range(len(lines)): m = re.search(r'(.+?>|\$) *python +([A-Za-z_0-9]+?\.py)', lines[j]) if m: name = m.group(2).strip() if os.path.isfile(name): src_paths.add(os.path.dirname(name)) lines[j] = '%%run "%s"' % fullpath else: found_unix_lines = True src_paths = list(src_paths) if src_paths and not found_unix_lines: # This is a sys block with run commands only code_blocks[i] = '\n'.join(lines) ipynb_code_tp[i] = 'cell' else: # Standard Markdown code code_blocks[i] = '\n'.join(lines) code_blocks[i] = indent_lines(code_blocks[i], format) ipynb_code_tp[i] = 'markdown' elif tp.endswith('hid'): ipynb_code_tp[i] = 'cell_hidden' elif tp.startswith('py'): ipynb_code_tp[i] = 'cell' else: # Should support other languages as well, but not for now code_blocks[i] = indent_lines(code_blocks[i], format) ipynb_code_tp[i] = 'markdown' # figure_files and movie_files are global variables and contain # all figures and movies referred to src_paths = list(src_paths) if figure_files: src_paths += figure_files if movie_files: src_paths += movie_files if src_paths: # Make tar file with all the source dirs with files # that need to be executed os.system('tar cfz %s %s' % (ipynb_tarfile, ' '.join(src_paths))) print 'collected all required additional files in', ipynb_tarfile, 'which must be distributed with the notebook' elif os.path.isfile(ipynb_tarfile): os.remove(ipynb_tarfile) # Parse document into markdown text, code blocks, and tex blocks. # Store in nested list notebook_blocks. notebook_blocks = [[]] authors = '' for line in filestr.splitlines(): if line.startswith('authors = [new_author(name='): # old author method authors = line[10:] elif _CODE_BLOCK in line: code_block_tp = line.split()[-1] if code_block_tp in ( 'pyhid', ) or not code_block_tp.endswith('hid'): notebook_blocks[-1] = '\n'.join(notebook_blocks[-1]).strip() notebook_blocks.append(line) # else: hidden block to be dropped (may include more languages # with time in the above tuple) elif _MATH_BLOCK in line: notebook_blocks[-1] = '\n'.join(notebook_blocks[-1]).strip() notebook_blocks.append(line) else: if not isinstance(notebook_blocks[-1], list): notebook_blocks.append([]) notebook_blocks[-1].append(line) if isinstance(notebook_blocks[-1], list): notebook_blocks[-1] = '\n'.join(notebook_blocks[-1]).strip() # Add block type info pattern = r'(\d+) +%s' for i in range(len(notebook_blocks)): if re.match(pattern % _CODE_BLOCK, notebook_blocks[i]): m = re.match(pattern % _CODE_BLOCK, notebook_blocks[i]) idx = int(m.group(1)) if ipynb_code_tp[idx] == 'cell': notebook_blocks[i] = ['cell', notebook_blocks[i]] elif ipynb_code_tp[idx] == 'cell_hidden': notebook_blocks[i] = ['cell_hidden', notebook_blocks[i]] else: notebook_blocks[i] = ['text', notebook_blocks[i]] elif re.match(pattern % _MATH_BLOCK, notebook_blocks[i]): notebook_blocks[i] = ['math', notebook_blocks[i]] else: notebook_blocks[i] = ['text', notebook_blocks[i]] # Go through tex_blocks and wrap in $$ # (doconce.py runs align2equations so there are no align/align* # environments in tex blocks) label2tag = {} tag_counter = 1 for i in range(len(tex_blocks)): # Extract labels and add tags labels = re.findall(r'label\{(.+?)\}', tex_blocks[i]) for label in labels: label2tag[label] = tag_counter # Insert tag to get labeled equation tex_blocks[i] = tex_blocks[i].replace( 'label{%s}' % label, 'label{%s} \\tag{%s}' % (label, tag_counter)) tag_counter += 1 # Remove \[ and \] or \begin/end{equation*} in single equations tex_blocks[i] = tex_blocks[i].replace(r'\[', '') tex_blocks[i] = tex_blocks[i].replace(r'\]', '') tex_blocks[i] = tex_blocks[i].replace(r'\begin{equation*}', '') tex_blocks[i] = tex_blocks[i].replace(r'\end{equation*}', '') # Check for illegal environments m = re.search(r'\\begin\{(.+?)\}', tex_blocks[i]) if m: envir = m.group(1) if envir not in ('equation', 'equation*', 'align*', 'align', 'array'): print """\ *** warning: latex envir \\begin{%s} does not work well in Markdown. Stick to \\[ ... \\], equation, equation*, align, or align* environments in math environments. """ % envir eq_type = 'heading' # or '$$' eq_type = '$$' # Markdown: add $$ on each side of the equation if eq_type == '$$': # Make sure there are no newline after equation tex_blocks[i] = '$$\n' + tex_blocks[i].strip() + '\n$$' # Here: use heading (###) and simple formula (remove newline # in math expressions to keep everything within a heading) as # the equation then looks bigger elif eq_type == 'heading': tex_blocks[i] = '### $ ' + ' '.join( tex_blocks[i].splitlines()) + ' $' # Add labels for the eqs above the block (for reference) if labels: #label_tp = '<a name="%s"></a>' label_tp = '<div id="%s"></div>' tex_blocks[i] = '<!-- Equation labels as ordinary links -->\n' + \ ' '.join([label_tp % label for label in labels]) + '\n\n' + \ tex_blocks[i] # blocks is now a list of text chunks in markdown and math/code line # instructions. Insert code and tex blocks for i in range(len(notebook_blocks)): if _CODE_BLOCK in notebook_blocks[i][ 1] or _MATH_BLOCK in notebook_blocks[i][1]: words = notebook_blocks[i][1].split() # start of notebook_blocks[i]: number block-indicator code-type n = int(words[0]) if _CODE_BLOCK in notebook_blocks[i][1]: notebook_blocks[i][1] = code_blocks[n] # can be list! if _MATH_BLOCK in notebook_blocks[i][1]: notebook_blocks[i][1] = tex_blocks[n] # Make IPython structures nb_version = int(option('ipynb_version=', '3')) if nb_version == 3: from IPython.nbformat.v3 import (new_code_cell, new_text_cell, new_worksheet, new_notebook, new_metadata, new_author) nb = new_worksheet() elif nb_version == 4: from IPython.nbformat.v4 import (new_code_cell, new_markdown_cell, new_notebook) cells = [] mdstr = [] # plain md format of the notebook prompt_number = 1 for block_tp, block in notebook_blocks: if (block_tp == 'text' or block_tp == 'math') and block != '': # Pure comments between math/code and math/code come # out as empty blocks, should detect that situation # (challenging - can have multiple lines of comments, # or begin and end comment lines with important things between) if nb_version == 3: nb.cells.append(new_text_cell(u'markdown', source=block)) elif nb_version == 4: cells.append(new_markdown_cell(source=block)) mdstr.append(('markdown', block)) elif block_tp == 'cell' and block != '' and block != []: if isinstance(block, list): for block_ in block: block_ = block_.rstrip() if block_ != '': if nb_version == 3: nb.cells.append( new_code_cell(input=block_, prompt_number=prompt_number, collapsed=False)) elif nb_version == 4: cells.append( new_code_cell(source=block_, execution_count=prompt_number)) prompt_number += 1 mdstr.append(('codecell', block_)) else: block = block.rstrip() if block != '': if nb_version == 3: nb.cells.append( new_code_cell(input=block, prompt_number=prompt_number, collapsed=False)) elif nb_version == 4: cells.append( new_code_cell(source=block, execution_count=prompt_number)) prompt_number += 1 mdstr.append(('codecell', block)) elif block_tp == 'cell_hidden' and block != '': block = block.rstrip() if nb_version == 3: nb.cells.append( new_code_cell(input=block, prompt_number=prompt_number, collapsed=True)) elif nb_version == 4: cells.append( new_code_cell(source=block, execution_count=prompt_number)) prompt_number += 1 mdstr.append(('codecell', block)) """ # Dump the notebook cells in a simple ASCII format # (doc/src/ipynb/ipynb_generator.py can translate it back to .ipynb file) f = open(dofile_basename + '.md-ipynb', 'w') for cell_tp, block in mdstr: if cell_tp == 'markdown': f.write('\n-----\n\n') elif cell_tp == 'codecell': f.write('\n-----py\n\n') f.write(block) f.close() """ if nb_version == 3: # Catch the title as the first heading m = re.search(r'^#+\s*(.+)$', filestr, flags=re.MULTILINE) title = m.group(1).strip() if m else '' # md below is not used for anything if authors: authors = eval(authors) md = new_metadata(name=title, authors=authors) else: md = new_metadata(name=title) nb = new_notebook(worksheets=[nb], metadata=new_metadata()) # Let us make v4 notebook here by upgrading from IPython.nbformat.v4 import upgrade nb = upgrade(nb) import IPython.nbformat.v4.nbjson as nbjson # Convert nb to json format filestr = nbjson.writes(nb) elif nb_version == 4: nb = new_notebook(cells=cells) from IPython.nbformat import writes filestr = writes(nb, version=4) # Check that there are no empty cells: if '"input": []' in filestr: print '*** error: empty cells in notebook - report bug in DocOnce' _abort() # must do the replacements here at the very end when json is written out # \eqref and labels will not work, but labels (only in math) do no harm filestr = re.sub(r'([^\\])label\{', r'\g<1>\\\\label{', filestr, flags=re.MULTILINE) # \\eqref{} just gives (???) link at this stage - future versions # will probably support labels #filestr = re.sub(r'\(ref\{(.+?)\}\)', r'\\eqref{\g<1>}', filestr) # Now we use explicit references to tags def subst(m): label = m.group(1) try: return r'[(%s)](#%s)' % (label2tag[label], label) except KeyError as e: print '*** error: label "%s" is not defined' % str(e) filestr = re.sub(r'\(ref\{(.+?)\}\)', subst, filestr) """ # MathJax reference to tag (recall that the equations have both label # and tag (know that tag only works well in HTML, but this mjx-eqn-no # label does not work in ipynb) filestr = re.sub(r'\(ref\{(.+?)\}\)', lambda m: r'[(%s)](#mjx-eqn-%s)' % (label2tag[m.group(1)], label2tag[m.group(1)]), filestr) """ #filestr = re.sub(r'\(ref\{(.+?)\}\)', r'Eq (\g<1>)', filestr) ''' # Final fixes: replace all text between cells by markdown code cells # Note: the patterns are overlapping so a plain re.sub will not work, # here we run through all blocks found and subsitute the first remaining # one, one by one. pattern = r' \},\n(.+?)\{\n "cell_type":' begin_pattern = r'^(.+?)\{\n "cell_type":' remaining_block_begin = re.findall(begin_pattern, filestr, flags=re.DOTALL) remaining_blocks = re.findall(pattern, filestr, flags=re.DOTALL) import string for block in remaining_block_begin + remaining_blocks: filestr = string.replace(filestr, block, json_markdown(block) + ' ', maxreplace=1) filestr_end = re.sub(r' \{\n "cell_type": .+?\n \},\n', '', filestr, flags=re.DOTALL) filestr = filestr.replace(filestr_end, json_markdown(filestr_end)) filestr = """{ "metadata": { "name": "SOME NAME" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ """ + filestr.rstrip() + '\n'+ \ json_pycode('', final_prompt_no+1, 'python').rstrip()[:-1] + """ ], "metadata": {} } ] }""" ''' return filestr
def post(self, convert=False): # Check if this is a convert operation if convert: # Get JSON payload json_data = tornado.escape.json_decode(self.request.body) # Go through all the assignments / bundles in the project payload for x in json_data['bundles']: # Create a notebook version4 object nb = nbfv4.new_notebook() # Create the task description task_description = '## Task \n' + x['description'] + '\n'\ '___ \n' \ '#### '+ x['owner'] + '\n' \ '___ \n' # Create the general description string common_description ='#### Temporary folder \n' \ 'Set your working dir to following folder '+ json_data['gid']+'. Upload your csv/data files into '\ 'this directoy to use them.<br/>'\ '`ftp://pycard.ifi.uzh.ch/data/'+json_data['gid']+'`'\ '<br/><br/>'\ 'Use with R Kernel <br/>' \ '`setwd("./'+ json_data['gid']+'")` <br/><br/>' \ 'Use with Python Kernel <br/> ' \ '`import os` <br/>' \ '`os.chdir("./'+ json_data['gid']+'")` \n' \ '___ \n' \ '#### Notes board \n' \ 'In order to avoid conflicts between notebooks and have a clean transition from one step to another, use the shared notes file ' \ 'shared.txt . The contents of the file will be loaded and made in every notebook, so it is a good place to register variable names used in the different steps, or to provide feedback after each iteration. <br/><br/>' # Add the task_description as a markdown cell heading = nbfv4.new_markdown_cell(task_description) # Set the task_description as read-only heading['metadata'].run_control = dict() heading['metadata'].run_control['read_only'] = True # Append cell to notebook nb['cells'].append(heading) # Add the common description cell as a markdown cell common = nbfv4.new_markdown_cell(common_description) # Set the common description cell as read only common['metadata'].run_control = dict() common['metadata']['common'] = True common['metadata'].run_control['read_only'] = True # Add the cell to the notebook nb['cells'].append(common) # Create a markdown cell for the note board, set the variable_cell metadata to true variablesh = nbfv4.new_markdown_cell() variablesh['metadata']['variable_cell'] = True nb['cells'].append(variablesh) # Set the notebook kernel in metadata nb['metadata']['language'] = json_data['kernel'] # Set cell toolbar to Side Comments in metadata nb['metadata']['celltoolbar'] = "Side Comments" # Set project ID in metadata nb['metadata']['pgid'] = json_data['gid'] # Set id of notes board for this project (shared_notes.txt file) nb['metadata']['variablesid'] = json_data['variablesid'] # Set Google ID for this notebook nb['metadata']['id'] = x['gid'] # Set the worker assigned to this task nb['metadata']['bundle-owner'] = x['owner'] # Go through all the actions in the assignment for a in x['actions']: # Create action description text text = '#### This is the description of the actions that need to be implemented.' \ '\n' \ '### ' + a['name'] + '\n' \ 'Description: ' + a['description'] + '<br>' \ 'Input: ' + a['input'] + '<br>' \ 'Output: ' + a['output'] code = "# Enter implementation here." # Add the description cell as a markdown cell, set it read-only desc = nbfv4.new_markdown_cell(text) desc['metadata'].run_control = dict() desc['metadata'].run_control['read_only'] = True nb['cells'].append(desc) # Create a cell for logging the work log_text = '# Log and description \n' \ 'Please record here all information needed to reproduce and understand your work: \n' \ '- Algorithms used\n' \ '- Things tried but discarded\n' \ '- Explanation **why** you have solved the problem the way you solved it.\n' # Add the description cell as a markdown cell, set it read-only log_desc = nbfv4.new_markdown_cell(log_text) nb['cells'].append(log_desc) # Create the cell code for this action code_cell = nbfv4.new_code_cell(code) code_cell['metadata'].side_comments = dict() # Create the cell code for this action, set the section id needed # for the SideComments extension as metadata code_cell['metadata'].side_comments['id'] = ''.join( random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(20)) logger.info(code_cell['metadata']) # Add cell to notebook nb['cells'].append(code_cell) # Add the contents of the created notebook a the bundle {notebook} property in the project payload x['notebook'] = nbf.writes(nb, version=4) json_data['variables'] = html2text.html2text( json_data['variables']) # Send the answer back to client self.write(json.dumps(json_data, cls=Encoder)) self.set_status(201) else: # Get the project payload from the client json_data = tornado.escape.json_decode(self.request.body) # Add the project to the database ret = db.addProject(json_data) if ret['ok'] == 1.0: # If project was added, create temporary work folder for the project # which can be accessed through ftp if not os.path.exists('/ftp/ipython/data/' + json_data['gid']): os.makedirs('/ftp/ipython/data/' + json_data['gid']) os.chmod('/ftp/ipython/data/' + json_data['gid'], 0o755) self.set_status(201)
def strip_output(nb): """strip the outputs from a notebook object""" # nb.metadata.pop('signature', None) hit = False index = 0 strip_cells = [] for cell in _cells(nb): index += 1 hit = False try: if 'skip' in cell.metadata.slideshow.slide_type: hit = True except AttributeError: pass if hit: strip_cells.append(index - 1) if strip_cells != []: for i in reversed(strip_cells): nb.cells.pop(i) return nb if __name__ == '__main__': filename = sys.argv[1] with io.open(filename, 'r', encoding='utf8') as f: nb = read(f, as_version=NO_CONVERT) nb = strip_output(nb) print(writes(nb)) # with io.open(filename, 'w', encoding='utf8') as f: # write(nb, f)
def post(self, convert=False): # Check if this is a convert operation if convert: # Get JSON payload json_data = tornado.escape.json_decode(self.request.body) # Go through all the assignments / bundles in the project payload for x in json_data['bundles']: # Create a notebook version4 object nb = nbfv4.new_notebook() # Create the task description task_description = '## Task \n' + x['description'] + '\n'\ '___ \n' \ '#### '+ x['owner'] + '\n' \ '___ \n' # Create the general description string common_description ='#### Temporary folder \n' \ 'Set your working dir to following folder '+ json_data['gid']+'. Upload your csv/data files into '\ 'this directoy to use them.<br/>'\ '`ftp://pycard.ifi.uzh.ch/data/'+json_data['gid']+'`'\ '<br/><br/>'\ 'Use with R Kernel <br/>' \ '`setwd("./'+ json_data['gid']+'")` <br/><br/>' \ 'Use with Python Kernel <br/> ' \ '`import os` <br/>' \ '`os.chdir("./'+ json_data['gid']+'")` \n' \ '___ \n' \ '#### Notes board \n' \ 'In order to avoid conflicts between notebooks and have a clean transition from one step to another, use the shared notes file ' \ 'shared.txt . The contents of the file will be loaded and made in every notebook, so it is a good place to register variable names used in the different steps, or to provide feedback after each iteration. <br/><br/>' # Add the task_description as a markdown cell heading = nbfv4.new_markdown_cell(task_description) # Set the task_description as read-only heading['metadata'].run_control = dict() heading['metadata'].run_control['read_only'] = True # Append cell to notebook nb['cells'].append(heading) # Add the common description cell as a markdown cell common = nbfv4.new_markdown_cell(common_description) # Set the common description cell as read only common['metadata'].run_control = dict() common['metadata']['common'] = True common['metadata'].run_control['read_only'] = True # Add the cell to the notebook nb['cells'].append(common) # Create a markdown cell for the note board, set the variable_cell metadata to true variablesh = nbfv4.new_markdown_cell() variablesh['metadata']['variable_cell'] = True nb['cells'].append(variablesh) # Set the notebook kernel in metadata nb['metadata']['language'] = json_data['kernel'] # Set cell toolbar to Side Comments in metadata nb['metadata']['celltoolbar'] = "Side Comments" # Set project ID in metadata nb['metadata']['pgid'] = json_data['gid'] # Set id of notes board for this project (shared_notes.txt file) nb['metadata']['variablesid'] = json_data['variablesid'] # Set Google ID for this notebook nb['metadata']['id'] = x['gid'] # Go through all the actions in the assignment for a in x['actions']: # Create action description text text = '#### This is the description of the actions that need to be implemented.' \ '\n' \ '### ' + a['name'] + '\n' \ 'Description: ' + a['description'] + '<br>' \ 'Input: ' + a['input'] + '<br>' \ 'Output: ' + a['output'] code = "# Enter implementation here." # Add the description cell as a markdown cell, set it read-only desc = nbfv4.new_markdown_cell(text) desc['metadata'].run_control = dict() desc['metadata'].run_control['read_only'] = True nb['cells'].append(desc) # Create the cell code for this action code_cell = nbfv4.new_code_cell(code) code_cell['metadata'].side_comments = dict() # Create the cell code for this action, set the section id needed # for the SideComments extension as metadata code_cell['metadata'].side_comments['id'] = ''.join(random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(20)) logger.info(code_cell['metadata']) # Add cell to notebook nb['cells'].append(code_cell) # Add the contents of the created notebook a the bundle {notebook} property in the project payload x['notebook'] = nbf.writes(nb,version=4) json_data['variables'] = html2text.html2text(json_data['variables']) # Send the answer back to client self.write(json.dumps(json_data, cls=Encoder)) self.set_status(201) else: # Get the project payload from the client json_data = tornado.escape.json_decode(self.request.body) # Add the project to the database ret = db.addProject(json_data) if ret['ok'] == 1.0: # If project was added, create temporary work folder for the project # which can be accessed through ftp if not os.path.exists('/ftp/ipython/data/' + json_data['gid']): os.makedirs('/ftp/ipython/data/' + json_data['gid']) os.chmod('/ftp/ipython/data/' + json_data['gid'], 0o755) self.set_status(201)