def eval_notebook(input_fn, output_fn, run_cells, timeout=20*60, lang='python'): # process: add empty lines before and after a mark, otherwise it confuses # the rst parser... with open(input_fn, 'r') as f: md = f.read() lines = md.split('\n') in_code = CharInMDCode(lines) for i, line in enumerate(lines): m = mark_re_md.match(line) if (m is not None and m[1] not in ('ref', 'numref', 'eqref') and not in_code.in_code(i,0) and m.end() == len(line)): lines[i] = '\n'+line+'\n' reader = notedown.MarkdownReader(match='strict') notebook= reader.reads('\n'.join(lines)) # evaluate if run_cells: # change to the notebook directory to resolve the relpaths properly cwd = os.getcwd() os.chdir(os.path.join(cwd, os.path.dirname(output_fn))) notedown.run(notebook, timeout) os.chdir(cwd) # write notebook['metadata'].update({'language_info':{'name':lang}}) with open(output_fn, 'w') as f: f.write(nbformat.writes(notebook))
def md2ipynb(): assert len(sys.argv) == 3, 'usage: input.md output.rst' (src_fn, input_fn, output_fn) = sys.argv # timeout for each notebook, in sec timeout = 60 * 60 # if enable evaluation do_eval = int(os.environ.get('EVAL', True)) # Skip these notebooks as some APIs will no longer be used skip_list = ["pytorch.md", "mnist.md", "custom-loss.md", "fit_api_tutorial.md", \ "01-ndarray-intro.md", "02-ndarray-operations.md", "03-ndarray-contexts.md", \ "gotchas_numpy_in_mxnet.md", "csr.md", "row_sparse.md", "fine_tuning_gluon.md", \ "inference_on_onnx_model.md", "amp.md", "profiler.md"] require_gpu = [] # the files will be ignored for execution ignore_execution = skip_list + require_gpu reader = notedown.MarkdownReader(match='strict') with open(input_fn, 'r', encoding="utf8") as f: notebook = reader.read(f) if do_eval: if not any([i in input_fn for i in ignore_execution]): tic = time.time() notedown.run(notebook, timeout) print('%s: Evaluated %s in %f sec' % (src_fn, input_fn, time.time() - tic)) # need to add language info to for syntax highlight notebook['metadata'].update({'language_info': {'name': 'python'}}) with open(output_fn, 'w', encoding='utf-8') as f: f.write(nbformat.writes(notebook)) print('%s: Write results into %s' % (src_fn, output_fn))
def process_and_eval_notebook(input_fn, output_fn, run_cells, timeout=20 * 60, lang='python', tab=None, default_tab=None): with open(input_fn, 'r') as f: md = f.read() nb = notebook.read_markdown(md) if tab: # get the tab nb = notebook.split_markdown_cell(nb) nb = notebook.get_tab_notebook(nb, tab, default_tab) if not nb: logging.info(f"Skip to eval tab {tab} for {input_fn}") # write an emtpy file to track the dependencies open(output_fn, 'w') return # evaluate if run_cells: # change to the notebook directory to resolve the relpaths properly cwd = os.getcwd() os.chdir(os.path.join(cwd, os.path.dirname(output_fn))) notedown.run(nb, timeout) os.chdir(cwd) # write nb['metadata'].update({'language_info': {'name': lang}}) with open(output_fn, 'w') as f: f.write(nbformat.writes(nb))
def _job(nb, output_fn, run_cells, timeout, lang): # evaluate if run_cells: # change to the notebook directory to resolve the relpaths properly cwd = os.getcwd() os.chdir(os.path.join(cwd, os.path.dirname(output_fn))) notedown.run(nb, timeout) os.chdir(cwd) # change stderr output to stdout output for cell in nb.cells: if cell.cell_type == 'code' and 'outputs' in cell: outputs = [] for out in cell['outputs']: if ('data' in out and 'text/plain' in out['data'] and out['data']['text/plain'].startswith('HBox')): # that's tqdm progress bar cannot displayed properly. continue if 'name' in out and out['name'] == 'stderr': out['name'] = 'stdout' outputs.append(out) cell['outputs'] = outputs # write nb['metadata'].update({'language_info': {'name': lang}}) with open(output_fn, 'w') as f: f.write(nbformat.writes(nb))
def convert_md(): """Find all markdown files, convert into jupyter notebooks """ converted_files = [] reader = notedown.MarkdownReader(match='strict') files = glob.glob('*/*.md') # evaluate the newest file first, so we can catchup error ealier files.sort(key=os.path.getmtime, reverse=True) do_eval = int(os.environ.get('DO_EVAL', True)) if do_eval: do_eval = int(os.environ.get('EVAL', True)) if not do_eval: print('=== Will skip evaluating notebooks') for fname in files: new_fname = _get_new_fname(fname) # parse if each markdown file is actually a jupyter notebook with open(fname, 'r') as fp: data = fp.read() valid = '```{.python .input' in data or '```python' in data if not valid: if new_fname != fname: print('=== Rename %s -> %s' % (fname, new_fname)) shutil.copyfile(fname, new_fname) converted_files.append((fname, new_fname)) continue # read with open(fname, 'r') as f: notebook = reader.read(f) if do_eval and not (_has_output(notebook) or any([i in fname for i in ignore_execution])): print('=== Evaluate %s with timeout %d sec' % (fname, timeout)) tic = time.time() # update from ../data to data for c in notebook.cells: if c.get('cell_type', None) == 'code': c['source'] = c['source'].replace('"../data', '"data').replace( "'../data", "'data") notedown.run(notebook, timeout) print('=== Finished in %f sec' % (time.time() - tic)) # even that we will check it later, but do it ealier so we can see the # error message before evaluating all notebooks _check_notebook(notebook) # write # need to add language info to for syntax highlight notebook['metadata'].update({'language_info': {'name': 'python'}}) new_fname = _replace_ext(new_fname, 'ipynb') print('=== Convert %s -> %s' % (fname, new_fname)) with open(new_fname, 'w') as f: f.write(nbformat.writes(notebook)) converted_files.append((fname, new_fname)) return converted_files
def convert_md(): """Find all markdown files, convert into jupyter notebooks """ converted_files = [] reader = notedown.MarkdownReader(match='strict') files = glob.glob('*/*.md') # evaluate the newest file first, so we can catchup error ealier files.sort(key=os.path.getmtime, reverse=True) do_eval = int(os.environ.get('DO_EVAL', True)) if do_eval: do_eval = int(os.environ.get('EVAL', True)) if not do_eval: print('=== Will skip evaluating notebooks') for fname in files: new_fname = _get_new_fname(fname) # parse if each markdown file is actually a jupyter notebook with open(fname, 'r') as fp: valid = '```{.python .input' in fp.read() if not valid: if new_fname != fname: print('=== Rename %s -> %s' % (fname, new_fname)) shutil.copyfile(fname, new_fname) converted_files.append((fname, new_fname)) continue # read with open(fname, 'r') as f: notebook = reader.read(f) if do_eval and not (_has_output(notebook) or any([i in fname for i in ignore_execution])): print('=== Evaluate %s with timeout %d sec'%(fname, timeout)) tic = time.time() # update from ../data to data for c in notebook.cells: if c.get('cell_type', None) == 'code': c['source'] = c['source'].replace( '"../data', '"data').replace("'../data", "'data") notedown.run(notebook, timeout) print('=== Finished in %f sec'%(time.time()-tic)) # even that we will check it later, but do it ealier so we can see the # error message before evaluating all notebooks _check_notebook(notebook) # write # need to add language info to for syntax highlight notebook['metadata'].update({'language_info':{'name':'python'}}) new_fname = _replace_ext(new_fname, 'ipynb') print('=== Convert %s -> %s' % (fname, new_fname)) with open(new_fname, 'w') as f: f.write(nbformat.writes(notebook)) converted_files.append((fname, new_fname)) return converted_files
def convert(path, timeout=40 * 60): with path.open() as in_file: notebook = notedown.MarkdownReader().read(in_file) start = time.time() notedown.run(notebook, timeout) print(f"=== {path.name} finished evaluation in {time.time() - start} sec") # need to add language info to for syntax highlight notebook["metadata"].update(language_info={"name": "python"}) with path.with_suffix(".ipynb").open("w") as out_file: out_file.write(nbformat.writes(notebook))
def _process_and_eval_notebook(input_fn, output_fn, run_cells, config, timeout=20 * 60, lang='python'): with open(input_fn, 'r') as f: md = f.read() nb = notebook.read_markdown(md) tab = config.tab if tab: # get the tab nb = notebook.split_markdown_cell(nb) nb = notebook.get_tab_notebook(nb, tab, config.default_tab) if not nb: logging.info(f"Skip to eval tab {tab} for {input_fn}") # write an empty file to track the dependencies open(output_fn, 'w') return # replace alias if tab in config.library: nb = library.replace_alias(nb, config.library[tab]) # evaluate if run_cells: # change to the notebook directory to resolve the relpaths properly cwd = os.getcwd() os.chdir(os.path.join(cwd, os.path.dirname(output_fn))) notedown.run(nb, timeout) os.chdir(cwd) # change stderr output to stdout output for cell in nb.cells: if cell.cell_type == 'code' and 'outputs' in cell: outputs = [] for out in cell['outputs']: if ('data' in out and 'text/plain' in out['data'] and out['data']['text/plain'].startswith('HBox')): # that's tqdm progress bar cannot displayed properly. continue if 'name' in out and out['name'] == 'stderr': out['name'] = 'stdout' outputs.append(out) cell['outputs'] = outputs # write nb['metadata'].update({'language_info': {'name': lang}}) with open(output_fn, 'w') as f: f.write(nbformat.writes(nb))
def md2ipynb(): assert len(sys.argv) == 3, 'usage: input.md output.rst' (src_fn, input_fn, output_fn) = sys.argv # timeout for each notebook, in sec timeout = 20 * 60 # if enable evaluation do_eval = int(os.environ.get('EVAL', True)) reader = notedown.MarkdownReader(match='strict') with open(input_fn, 'r', encoding="utf8") as f: notebook = reader.read(f) if do_eval: tic = time.time() notedown.run(notebook, timeout) print('%s: Evaluated %s in %f sec'%(src_fn, input_fn, time.time()-tic)) # need to add language info to for syntax highlight notebook['metadata'].update({'language_info':{'name':'python'}}) with open(output_fn, 'w') as f: f.write(nbformat.writes(notebook)) print('%s: Write results into %s'%(src_fn, output_fn))
import nbformat assert len(sys.argv) == 2, "usage: input.md" # timeout for each notebook, in sec timeout = 40 * 60 # the files will be ignored for execution ignore_execution = [] input_fn = sys.argv[1] output_fn = ".".join(input_fn.split(".")[:-1] + ["ipynb"]) reader = notedown.MarkdownReader() # read with open(input_fn, "r") as f: notebook = reader.read(f) if not any([i in input_fn for i in ignore_execution]): tic = time.time() notedown.run(notebook, timeout) print("=== Finished evaluation in %f sec" % (time.time() - tic)) # write # need to add language info to for syntax highlight notebook["metadata"].update({"language_info": {"name": "python"}}) with open(output_fn, "w") as f: f.write(nbformat.writes(notebook))
timeout = 20 * 60 # the files will be ingored for execution ignore_execution = [] input_fn = sys.argv[1] output_fn = sys.argv[2] reader = notedown.MarkdownReader(match='strict') do_eval = int(os.environ.get('EVAL', True)) # read with open(input_fn, 'r') as f: notebook = reader.read(f) for c in notebook.cells: c.source = add_space_between_ascii_and_non_ascii(c.source) if do_eval and not any([i in input_fn for i in ignore_execution]): tic = time.time() notedown.run(notebook, timeout) print('=== Finished evaluation in %f sec'%(time.time()-tic)) # write # need to add language info to for syntax highlight notebook['metadata'].update({'language_info':{'name':'python'}}) with open(output_fn, 'w') as f: f.write(nbformat.writes(notebook))