def test_match_arbitrary(): mr = notedown.MarkdownReader(match='attr') nb = mr.to_notebook(attribute_markdown) assert(nb.cells[0]['cell_type'] == 'markdown') assert(nb.cells[2]['cell_type'] == 'code') assert(nb.cells[3]['cell_type'] == 'code')
def get_codes_to_save(input_fn, save_mark): """get the codes blocks (import, class, def) and will be saved""" reader = notedown.MarkdownReader(match='strict') with open(input_fn, 'r') as f: nb = reader.read(f) saved = [] for cell in nb.cells: if cell.cell_type == 'code': lines = cell.source.split('\n') for i, l in enumerate(lines): if l.strip().startswith('#') and save_mark in l: block = [lines[i+1]] if lines[i+1].startswith('import') or lines[i+1].startswith('from'): for j in range(i+2, len(lines)): l = lines[j] if l.startswith(' ') or not l: break block.append(l) else: for j in range(i+2, len(lines)): l = lines[j] if not l.startswith(' ') and len(l): break block.append(l) saved.append(block) return saved
def get_code_to_save(input_fn, save_mark): """get the code blocks (import, class, def) that will be saved""" reader = notedown.MarkdownReader(match='strict') with open(input_fn, 'r') as f: nb = reader.read(f) saved = [] for cell in nb.cells: if cell.cell_type == 'code': lines = cell.source.split('\n') for i, l in enumerate(lines): if l.strip().startswith('#') and save_mark in l: block = [lines[i + 1]] # For code block only containing import statements (e.g., in # preface.md) if lines[i + 1].startswith('import') or lines[ i + 1].startswith('from'): for j in range(i + 2, len(lines)): block.append(lines[j]) # For code blocks containing def or class else: for j in range(i + 2, len(lines)): l = lines[j] if not l.startswith(' ') and len(l): break block.append(l) if len(block[-1]) == 0: del block[-1] saved.append(block) return saved
def eval_notebook(input_fn, output_fn, run_cells, timeout=20*60, lang='python'): # process: add empty lines before and after a mark, otherwise it confuses # the rst parser... with open(input_fn, 'r') as f: md = f.read() lines = md.split('\n') in_code = CharInMDCode(lines) for i, line in enumerate(lines): m = mark_re_md.match(line) if (m is not None and m[1] not in ('ref', 'numref', 'eqref') and not in_code.in_code(i,0) and m.end() == len(line)): lines[i] = '\n'+line+'\n' reader = notedown.MarkdownReader(match='strict') notebook= reader.reads('\n'.join(lines)) # evaluate if run_cells: # change to the notebook directory to resolve the relpaths properly cwd = os.getcwd() os.chdir(os.path.join(cwd, os.path.dirname(output_fn))) notedown.run(notebook, timeout) os.chdir(cwd) # write notebook['metadata'].update({'language_info':{'name':lang}}) with open(output_fn, 'w') as f: f.write(nbformat.writes(notebook))
def _release_notebook(dst_dir): """convert .md into notebooks and make a zip file""" reader = notedown.MarkdownReader(match='strict') files = glob.glob('*/*.md') package_files = ['environment.yml', 'utils.py', 'README.md', 'LICENSE'] for fname in files: # parse if each markdown file is actually a jupyter notebook with open(fname, 'r') as fp: valid = '```{.python .input' in fp.read() if not valid: package_files.append(fname) continue # read with open(fname, 'r') as f: notebook = reader.read(f) # write new_fname = _replace_ext(fname, 'ipynb') with open(new_fname, 'w') as f: f.write(nbformat.writes(notebook)) package_files.append(new_fname) print('=== Packing ', package_files) with ZipFile(os.path.join(dst_dir, 'gluon_tutorials_zh.zip'), 'w') as pkg: for f in package_files: pkg.write(f) with tarfile.open(os.path.join(dst_dir, 'gluon_tutorials_zh.tar.gz'), "w:gz") as tar: for f in package_files: tar.add(f) for f in glob.glob('*/*.ipynb'): os.remove(f)
def md2ipynb(): assert len(sys.argv) == 3, 'usage: input.md output.rst' (src_fn, input_fn, output_fn) = sys.argv # timeout for each notebook, in sec timeout = 60 * 60 # if enable evaluation do_eval = int(os.environ.get('EVAL', True)) # Skip these notebooks as some APIs will no longer be used skip_list = ["pytorch.md", "mnist.md", "custom-loss.md", "fit_api_tutorial.md", \ "01-ndarray-intro.md", "02-ndarray-operations.md", "03-ndarray-contexts.md", \ "gotchas_numpy_in_mxnet.md", "csr.md", "row_sparse.md", "fine_tuning_gluon.md", \ "inference_on_onnx_model.md", "amp.md", "profiler.md"] require_gpu = [] # the files will be ignored for execution ignore_execution = skip_list + require_gpu reader = notedown.MarkdownReader(match='strict') with open(input_fn, 'r', encoding="utf8") as f: notebook = reader.read(f) if do_eval: if not any([i in input_fn for i in ignore_execution]): tic = time.time() notedown.run(notebook, timeout) print('%s: Evaluated %s in %f sec' % (src_fn, input_fn, time.time() - tic)) # need to add language info to for syntax highlight notebook['metadata'].update({'language_info': {'name': 'python'}}) with open(output_fn, 'w', encoding='utf-8') as f: f.write(nbformat.writes(notebook)) print('%s: Write results into %s' % (src_fn, output_fn))
def md2ipynb(): assert len(sys.argv) == 3, 'usage: input.md output.rst' (src_fn, input_fn, output_fn) = sys.argv # timeout for each notebook, in sec timeout = 20 * 60 # if enable evaluation do_eval = int(os.environ.get('EVAL', True)) reader = notedown.MarkdownReader() with open(input_fn, 'r') as f: notebook = reader.read(f) notebook['metadata'].update({'language_info': { 'name': 'R' }}) # need to add language info for syntax highlight if do_eval: tic = time.time() executor = ExecutePreprocessor(timeout=timeout, kernel_name='ir') print('%s: Evaluated %s in %f sec' % (src_fn, input_fn, time.time() - tic)) try: notebook, resources = executor.preprocess(notebook, resources={}) except CellExecutionError: msg = 'Error executing the notebook "%s".\n\n' % input_fn msg += 'See notebook "%s" for the traceback.' % output_fn print(msg) raise finally: with open(output_fn, 'w') as f: f.write(nbformat.writes(notebook).encode('utf8')) print('%s: Write results into %s' % (src_fn, output_fn))
def convert_md(): """Find all markdown files, convert into jupyter notebooks """ converted_files = [] reader = notedown.MarkdownReader(match='strict') files = glob.glob('*/*.md') # evaluate the newest file first, so we can catchup error ealier files.sort(key=os.path.getmtime, reverse=True) do_eval = int(os.environ.get('DO_EVAL', True)) if do_eval: do_eval = int(os.environ.get('EVAL', True)) if not do_eval: print('=== Will skip evaluating notebooks') for fname in files: new_fname = _get_new_fname(fname) # parse if each markdown file is actually a jupyter notebook with open(fname, 'r') as fp: data = fp.read() valid = '```{.python .input' in data or '```python' in data if not valid: if new_fname != fname: print('=== Rename %s -> %s' % (fname, new_fname)) shutil.copyfile(fname, new_fname) converted_files.append((fname, new_fname)) continue # read with open(fname, 'r') as f: notebook = reader.read(f) if do_eval and not (_has_output(notebook) or any([i in fname for i in ignore_execution])): print('=== Evaluate %s with timeout %d sec' % (fname, timeout)) tic = time.time() # update from ../data to data for c in notebook.cells: if c.get('cell_type', None) == 'code': c['source'] = c['source'].replace('"../data', '"data').replace( "'../data", "'data") notedown.run(notebook, timeout) print('=== Finished in %f sec' % (time.time() - tic)) # even that we will check it later, but do it ealier so we can see the # error message before evaluating all notebooks _check_notebook(notebook) # write # need to add language info to for syntax highlight notebook['metadata'].update({'language_info': {'name': 'python'}}) new_fname = _replace_ext(new_fname, 'ipynb') print('=== Convert %s -> %s' % (fname, new_fname)) with open(new_fname, 'w') as f: f.write(nbformat.writes(notebook)) converted_files.append((fname, new_fname)) return converted_files
def create_json_notebook(): reader = notedown.MarkdownReader() writer = notedown.JSONWriter() notebook = reader.reads(sample_markdown) json_notebook = writer.writes(notebook) return json_notebook
def test_alt_lang(): """Specifying a language that isn't python should generate code blocks using %%language magic.""" reader = notedown.MarkdownReader(code_regex='fenced') all_blocks = reader.parse_blocks(alt_lang) code_blocks = [b for b in all_blocks if b['type'] == reader.code] magic_block = code_blocks[0] reader.process_code_block(magic_block) assert(magic_block['content'] == alt_lang_code)
def outputcheck(self): notebooks, _, _ = self._find_md_files() reader = notedown.MarkdownReader() error = False for fn in notebooks: with open(fn, 'r') as f: notebook= reader.read(f) for c in notebook.cells: if 'outputs' in c and len(c['outputs']): logging.error("Found execution outputs in %s", fn) error = True if error: exit(-1)
def convert(path, timeout=40 * 60): with path.open() as in_file: notebook = notedown.MarkdownReader().read(in_file) start = time.time() notedown.run(notebook, timeout) print(f"=== {path.name} finished evaluation in {time.time() - start} sec") # need to add language info to for syntax highlight notebook["metadata"].update(language_info={"name": "python"}) with path.with_suffix(".ipynb").open("w") as out_file: out_file.write(nbformat.writes(notebook))
def _get_subpages(input_fn): """read toc in input_fn, returns what it contains""" subpages = [] reader = notedown.MarkdownReader() with open(input_fn, 'r', encoding='UTF-8') as f: nb = reader.read(f) for cell in nb.cells: if (cell.cell_type == 'code' and 'attributes' in cell.metadata and 'toc' in cell.metadata.attributes['classes']): for l in cell.source.split('\n'): l = l.strip() if not l.startswith(':'): fn = os.path.join(os.path.dirname(input_fn), l + '.md') if os.path.exists(fn): subpages.append(fn) return subpages
def test_roundtrip(): """Run nbconvert using our custom markdown template to recover original markdown from a notebook. """ # create a notebook from the markdown mr = notedown.MarkdownReader() roundtrip_notebook = mr.to_notebook(roundtrip_markdown) # write the notebook into json notebook_json = nbformat.writes(roundtrip_notebook) # write the json back into notebook notebook = nbformat.reads(notebook_json, as_version=4) # convert notebook to markdown mw = notedown.MarkdownWriter(template_file='notedown/templates/markdown.tpl', strip_outputs=True) markdown = mw.writes(notebook) nt.assert_multi_line_equal(roundtrip_markdown, markdown)
def md2ipynb(): assert len(sys.argv) == 3, 'usage: input.md output.rst' (src_fn, input_fn, output_fn) = sys.argv # timeout for each notebook, in sec timeout = 20 * 60 # if enable evaluation do_eval = int(os.environ.get('EVAL', True)) reader = notedown.MarkdownReader(match='strict') with open(input_fn, 'r', encoding="utf8") as f: notebook = reader.read(f) if do_eval: tic = time.time() notedown.run(notebook, timeout) print('%s: Evaluated %s in %f sec'%(src_fn, input_fn, time.time()-tic)) # need to add language info to for syntax highlight notebook['metadata'].update({'language_info':{'name':'python'}}) with open(output_fn, 'w') as f: f.write(nbformat.writes(notebook)) print('%s: Write results into %s'%(src_fn, output_fn))
def test_R(): """Check that the R notebook generated from Rmd looks the same as the reference (without output cells). """ knitr = notedown.Knitr() with open('r-examples/r-example.Rmd') as rmd: knitted_markdown_file = knitr.knit(rmd) reader = notedown.MarkdownReader(precode=r"%load_ext rpy2.ipython", magic=True) notebook = reader.read(knitted_markdown_file) with open('r-examples/r-example.ipynb') as f: reference_notebook = nbformat.read(f, as_version=4) notedown.main.strip(notebook) notedown.main.strip(reference_notebook) writer = nbformat nbjson = writer.writes(notebook) reference_nbjson = writer.writes(reference_notebook) nt.assert_multi_line_equal(nbjson, reference_nbjson)
import time import notedown import nbformat assert len(sys.argv) == 2, "usage: input.md" # timeout for each notebook, in sec timeout = 40 * 60 # the files will be ignored for execution ignore_execution = [] input_fn = sys.argv[1] output_fn = ".".join(input_fn.split(".")[:-1] + ["ipynb"]) reader = notedown.MarkdownReader() # read with open(input_fn, "r") as f: notebook = reader.read(f) if not any([i in input_fn for i in ignore_execution]): tic = time.time() notedown.run(notebook, timeout) print("=== Finished evaluation in %f sec" % (time.time() - tic)) # write # need to add language info to for syntax highlight notebook["metadata"].update({"language_info": {"name": "python"}}) with open(output_fn, "w") as f:
ret.append(' ') ret.append(string[-1]) return ''.join(ret) # timeout for each notebook, in sec timeout = 20 * 60 # the files will be ingored for execution ignore_execution = [] input_fn = sys.argv[1] output_fn = sys.argv[2] reader = notedown.MarkdownReader(match='strict') do_eval = int(os.environ.get('EVAL', True)) # read with open(input_fn, 'r') as f: notebook = reader.read(f) for c in notebook.cells: c.source = add_space_between_ascii_and_non_ascii(c.source) if do_eval and not any([i in input_fn for i in ignore_execution]): tic = time.time() notedown.run(notebook, timeout) print('=== Finished evaluation in %f sec' % (time.time() - tic))
def create_json_notebook(markdown): reader = notedown.MarkdownReader() notebook = reader.reads(markdown) json_notebook = nbformat.writes(notebook) return json_notebook
def parse_cells(text, regex=None): reader = notedown.MarkdownReader(code_regex=regex) return reader.parse_blocks(text)
def test_markdown_markdown(): mr = notedown.MarkdownReader() mw = notedown.MarkdownWriter(notedown.markdown_template) nb = mr.reads(roundtrip_markdown) markdown = mw.writes(nb) nt.assert_multi_line_equal(markdown, roundtrip_markdown)
def read_markdown(source: Union[str, List[str]]) -> notebooknode.NotebookNode: """Returns a notebook from markdown source""" if not isinstance(source, str): source = '\n'.join(source) reader = notedown.MarkdownReader(match='strict') return reader.reads(source)
finalcseguidE = (pYPKa.linearize(EcoRV) + pcr(fp, rp, template)).looped().cseguid() content = t.format(tp=insertname, gbref=gbref, gblink=gblink, templatesize=templatesize, insertseguid=insertseguid, finalcseguidZ=finalcseguidZ, finalcseguidE=finalcseguidE, fpn=fp.name, fps=fp.seq, rpn=rp.name, rps=rp.seq) obj = notedown.MarkdownReader() nb = obj.to_notebook(content) pp = ExecutePreprocessor(timeout=600, kernel_name='python3') pp.timeout = 120 # seconds pp.interrupt_on_timeout = True pp.preprocess(nb, resources={}) with open(newname, 'wt') as f: nbformat.write(nb, f) #os.chdir(cwd) # with open("README_template.md", "r", encoding="utf8") as f:
def test_match_fenced(): mr = notedown.MarkdownReader(match='fenced') nb = mr.to_notebook(sample_markdown) assert(nb.cells[1]['cell_type'] == 'code') assert(nb.cells[3]['cell_type'] == 'markdown')