def main(): if len(sys.argv) > 1: if sys.argv[1] in ['help', '-h', '--help']: print(__doc__, file=sys.stderr) sys.exit(1) if sys.argv[1] in ['install', '--install']: sys.exit(install()) force = False filenames = sys.argv[1:] if filenames[0] in ['-f', '--force']: force = True filenames.pop(0) for filename in filenames: if not force and not filename.endswith('.ipynb'): continue try: with io.open(filename, 'r', encoding='utf8') as f: nb = read(f, as_version=NO_CONVERT) nb = strip_output(nb) with io.open(filename, 'w', encoding='utf8') as f: write(nb, f) except Exception: # Ignore exceptions for non-notebook files. print("Could not strip '{}'".format(filename)) raise else: write(strip_output(read(sys.stdin, as_version=NO_CONVERT)), sys.stdout)
def main_diff(afn, bfn, dfn=None): for fn in (afn, bfn): if not os.path.exists(fn): print("Missing file {}".format(fn)) return 1 a = nbformat.read(afn, as_version=4) b = nbformat.read(bfn, as_version=4) # TODO: Split lines here? #a = split_lines(a) #b = split_lines(b) d = diff_notebooks(a, b) verbose = True if verbose: pretty_print_notebook_diff(afn, bfn, a, d) if dfn: with open(dfn, "w") as df: # Compact version: #json.dump(d, df) # Verbose version: json.dump(d, df, indent=2, separators=(",", ": ")) return 0
def main_diff(args): afn = args.base bfn = args.remote dfn = args.output for fn in (afn, bfn): if not os.path.exists(fn): print("Missing file {}".format(fn)) return 1 a = nbformat.read(afn, as_version=4) b = nbformat.read(bfn, as_version=4) d = diff_notebooks(a, b) if dfn: with io.open(dfn, "w", encoding="utf8") as df: # Compact version: # json.dump(d, df) # Verbose version: json.dump(d, df, indent=2, separators=(",", ": ")) else: # This printer is to keep the unit tests passing, # some tests capture output with capsys which doesn't # pick up on sys.stdout.write() class Printer: def write(self, text): print(text, end="") pretty_print_notebook_diff(afn, bfn, a, d, Printer()) return 0
def setUp(self): self.data_dir = tempfile.mkdtemp() self.notary = sign.NotebookNotary( db_file=':memory:', secret=b'secret', data_dir=self.data_dir, ) with self.fopen(u'test3.ipynb', u'r') as f: self.nb = read(f, as_version=4) with self.fopen(u'test3.ipynb', u'r') as f: self.nb3 = read(f, as_version=3)
def main_merge(args): bfn = args.base lfn = args.local rfn = args.remote mfn = args.output for fn in (bfn, lfn, rfn): if not os.path.exists(fn): print("Cannot find file '{}'".format(fn)) print(_usage) return 1 b = nbformat.read(bfn, as_version=4) l = nbformat.read(lfn, as_version=4) r = nbformat.read(rfn, as_version=4) # TODO: Split lines here? #b = split_lines(b) #l = split_lines(l) #r = split_lines(r) m, lc, rc = merge_notebooks(b, l, r, args) if lc or rc: print("Conflicts occured during merge operation.") else: print("Merge completed successfully with no unresolvable conflicts.") if mfn: if lc or rc: # Write partial merge and conflicts to a foo.ipynb-merge file result = { "merged": m, "local_conflicts": lc, "remote_conflicts": rc } with open(mfn+"-merge", "w") as mf: json.dump(result, mf) else: # Write fully completed merge to given foo.ipynb filename with open(mfn, "w") as mf: nbformat.write(m, mf) else: # FIXME: Display conflicts in a useful way if lc or rc: print("Local conflicts:") pprint(lc) print("Remote conflicts:") pprint(rc) return 0
def main(): parser = ArgumentParser(epilog=__doc__, formatter_class=RawDescriptionHelpFormatter) task = parser.add_mutually_exclusive_group() task.add_argument('--install', action='store_true', help="""Install nbstripout in the current repository (set up the git filter and attributes)""") task.add_argument('--uninstall', action='store_true', help="""Uninstall nbstripout from the current repository (remove the git filter and attributes)""") task.add_argument('--is-installed', action='store_true', help='Check if nbstripout is installed in current repository') task.add_argument('--status', action='store_true', help='Print status of nbstripout installation in current repository and configuration summary if installed') parser.add_argument('--attributes', metavar='FILEPATH', help="""Attributes file to add the filter to (in combination with --install/--uninstall), defaults to .git/info/attributes""") task.add_argument('--version', action='store_true', help='Print version') parser.add_argument('--force', '-f', action='store_true', help='Strip output also from files with non ipynb extension') parser.add_argument('files', nargs='*', help='Files to strip output from') args = parser.parse_args() if args.install: sys.exit(install(args.attributes)) if args.uninstall: sys.exit(uninstall(args.attributes)) if args.is_installed: sys.exit(status(verbose=False)) if args.status: sys.exit(status(verbose=True)) if args.version: print(__version__) sys.exit(0) for filename in args.files: if not (args.force or filename.endswith('.ipynb')): continue try: with io.open(filename, 'r', encoding='utf8') as f: nb = read(f, as_version=NO_CONVERT) nb = strip_output(nb) with io.open(filename, 'w', encoding='utf8') as f: write(nb, f) except Exception: # Ignore exceptions for non-notebook files. print("Could not strip '{}'".format(filename)) raise if not args.files: write(strip_output(read(input_stream, as_version=NO_CONVERT)), output_stream)
def main(): if len(sys.argv) > 1: if sys.argv[1] in ['help', '-h', '--help']: print(__doc__, file=sys.stderr) sys.exit(1) if sys.argv[1] in ['install', '--install']: sys.exit(install()) filename = sys.argv[1] with io.open(filename, 'r', encoding='utf8') as f: nb = read(f, as_version=NO_CONVERT) nb = strip_output(nb) with io.open(filename, 'w', encoding='utf8') as f: write(nb, f) else: write(strip_output(read(sys.stdin, as_version=NO_CONVERT)), sys.stdout)
def update_notebook_by_run(path): rundb.reset_run(path) logging.debug(u'update_notebook_by_run {}'.format(path)) # init runner with codecs.open(path, 'r', 'utf8') as fp: nb = read(fp, IPYNB_VER) r = NotebookRunner(nb) r.clear_outputs() # run config & startup run_init(r, path) # run cells cellcnt = r.cellcnt rundb.start_run(path, cellcnt) err = None memory_used = [] try: r.run_notebook(memory_used, lambda cur: _progress_cell(path, cur)) run_code(r, "if 'manifest_' in globals() and manifest_ is not None: " "manifest_._check_output_hdf()") except NotebookError, e: logging.debug("except NotebookError") err = unicode(e)
def test_tutorials(testfolder, tutorials=('ligand-binding-analysis', 'protein-folding-analysis')): startdir = os.path.dirname(os.path.realpath(__file__)) pyexp = PythonExporter() pyexp.template_file = os.path.join(startdir, 'simplepython.tpl') # For each tutorial noteboook for tut in glob(os.path.join(startdir, '*.ipynb')): name = os.path.splitext(os.path.basename(tut))[0] if name not in tutorials: continue testsubf = os.path.abspath(os.path.join(testfolder, name)) if not os.path.exists(testsubf): os.makedirs(testsubf) # Read notebook in with open(tut, 'r') as fi: notebook = nbformat.read(fi, nbformat.NO_CONVERT) output, resources = pyexp.from_notebook_node(notebook, ) # Write it out in .py format with open(os.path.join(testsubf, 'test_{}.py'.format(name)), 'w') as fo: fo.writelines(format_script(output, testsubf, name)) return pytest.main([testfolder])
def test_custom_kernel_manager(self): from .fake_kernelmanager import FakeCustomKernelManager current_dir = os.path.dirname(__file__) filename = os.path.join(current_dir, 'files', 'HelloWorld.ipynb') with io.open(filename) as f: input_nb = nbformat.read(f, 4) preprocessor = self.build_preprocessor({ 'kernel_manager_class': FakeCustomKernelManager }) cleaned_input_nb = copy.deepcopy(input_nb) for cell in cleaned_input_nb.cells: if 'execution_count' in cell: del cell['execution_count'] cell['outputs'] = [] # Override terminal size to standardise traceback format with modified_env({'COLUMNS': '80', 'LINES': '24'}): output_nb, _ = preprocessor(cleaned_input_nb, self.build_resources()) expected = FakeCustomKernelManager.expected_methods.items() for method, call_count in expected: self.assertNotEqual(call_count, 0, '{} was called'.format(method))
def execute_nb(src, dst, allow_errors=False, timeout=1000, kernel_name=''): """ Execute notebook in `src` and write the output to `dst` Parameters ---------- src, dst: str path to notebook allow_errors: bool timeout: int kernel_name: str defualts to value set in notebook metadata Returns ------- dst: str """ import nbformat from nbconvert.preprocessors import ExecutePreprocessor with io.open(src, encoding='utf-8') as f: nb = nbformat.read(f, as_version=4) ep = ExecutePreprocessor(allow_errors=allow_errors, timeout=timeout, kernel_name=kernel_name) ep.preprocess(nb, resources={}) with io.open(dst, 'wt', encoding='utf-8') as f: nbformat.write(nb, f) return dst
def list_examples(self): categories = ['reviewed', 'unreviewed'] dirs = [self.reviewed_example_dir, self.unreviewed_example_dir] all_examples = [] uid = os.getuid() for category, d in zip(categories, dirs): filepaths = glob.glob(os.path.join(d, '*.ipynb')) examples = [{'filepath': os.path.abspath(fp)} for fp in filepaths] for example in examples: node = nbformat.read(example['filepath'], nbformat.NO_CONVERT) st = os.stat(example['filepath']) try: user = pwd.getpwuid(st.st_uid) except KeyError: example['user'] = None else: example['user'] = user.pw_gecos or user.pw_name example['datetime'] = st.st_mtime example['filename'] = os.path.basename(example['filepath']) example['metadata'] = node.metadata example['category'] = category example['basename'] = os.path.basename(example['filepath']) example['owned'] = st.st_uid == uid all_examples.extend(examples) return all_examples
def clear_notebooks(root): """Clear the outputs of documentation notebooks.""" preprocessor = ClearOutputPreprocessor() for dirpath, dirnames, filenames in os.walk(root): for filename in sorted(filenames): if os.path.splitext(filename)[1] == '.ipynb': # read in the notebook pth = os.path.join(dirpath, filename) with open(pth, 'r') as fh: orig_nb = read(fh, 4) # copy the original notebook new_nb = deepcopy(orig_nb) # check outputs of all the cells new_nb = preprocessor.preprocess(new_nb, {})[0] # clear metadata new_nb.metadata = {} # write the notebook back to disk with open(pth, 'w') as fh: write(new_nb, fh, 4)
def execute(self): print("Cleaning lowfat/reports/html ...") old_reports = os.listdir("lowfat/reports/html") for old_report in old_reports: print("- Removing lowfat/reports/html/{}".format(old_report)) os.remove("lowfat/reports/html/{}".format(old_report)) print("Cleaning of lowfat/reports/html is complete.") notebook_filenames = os.listdir("lowfat/reports") for notebook_filename in notebook_filenames: if not notebook_filename.endswith(".ipynb"): continue print("Processing lowfat/reports/{}".format(notebook_filename)) # Based on Executing notebooks, nbconvert Documentation by Jupyter Development Team. # https://nbconvert.readthedocs.io/en/latest/execute_api.html with open("lowfat/reports/{}".format(notebook_filename)) as file_: notebook = nbformat.read(file_, as_version=4) # Kernel is provided by https://github.com/django-extensions/django-extensions/ execute_preprocessor = ExecutePreprocessor(timeout=600, kernel_name='django_extensions') execute_preprocessor.preprocess(notebook, {'metadata': {'path': '.'}}) html_exporter = HTMLExporter() html_exporter.template_file = 'basic' (body, dummy_resources) = html_exporter.from_notebook_node(notebook) with open('lowfat/reports/html/{}.html'.format(notebook_filename), 'wt') as file_: file_.write(body)
def test_iter_validation_error(self): with self.fopen(u'invalid.ipynb', u'r') as f: nb = read(f, as_version=4) errors = list(iter_validate(nb)) assert len(errors) == 3 assert {e.ref for e in errors} == {'markdown_cell', 'heading_cell', 'bad stream'}
def setUp(self): if self.metadata is None: metadata = self.default_metadata else: metadata = self.metadata self.orig = nbformat.from_dict({ "nbformat": NBFORMAT_VERSION, "nbformat_minor": 0, "metadata": metadata, "cells": self.cells }) with tempfile.TemporaryDirectory() as d: ipynb0_name = d + "/0" rmd_name = d + "/1" ipynb1_name = d + "/2" with open(ipynb0_name, "w") as f: nbformat.write(self.orig, f) if self.use_rmd: ipyrmd.ipynb_to_rmd(ipynb0_name, rmd_name) ipyrmd.rmd_to_ipynb(rmd_name, ipynb1_name) else: ipyrmd.ipynb_to_spin(ipynb0_name, rmd_name) ipyrmd.spin_to_ipynb(rmd_name, ipynb1_name) with open(rmd_name) as f: self.rmd = f.read() with open(ipynb1_name) as f: self.roundtrip = nbformat.read(f, NBFORMAT_VERSION)
def _notebook_run(path): """ Execute a notebook via nbconvert and collect output. Taken from https://blog.thedataincubator.com/2016/06/testing-jupyter-notebooks/ Args: path (str): file path for the notebook object Returns: (parsed nb object, execution errors) """ dirname, __ = os.path.split(path) os.chdir(dirname) with tempfile.NamedTemporaryFile(suffix=".ipynb") as fout: args = ["jupyter", "nbconvert", "--to", "notebook", "--execute", "--ExecutePreprocessor.timeout=60", "--output", fout.name, path] subprocess.check_call(args) fout.seek(0) nb = nbformat.read(fout, nbformat.current_nbformat) errors = [output for cell in nb.cells if "outputs" in cell for output in cell["outputs"]\ if output.output_type == "error"] return nb, errors
def main_patch(args): bfn = args.base dfn = args.patch afn = args.output for fn in (bfn, dfn): if not os.path.exists(fn): print("Missing file {}".format(fn)) return 1 before = nbformat.read(bfn, as_version=4) with open(dfn) as df: d = json.load(df) d = to_diffentry_dicts(d) # TODO: Split lines here? Must be consistent with the diff for patch_notebook to work correctly!? #before = split_lines(before) after = patch_notebook(before, d) if afn: nbformat.write(after, afn) else: print(after) return 0
def load_module(self, fullname): """import a notebook as a module""" path = find_notebook(fullname, self.path) # load the notebook object with io.open(path, 'r', encoding='utf-8') as f: nb = nbformat.read(f, 4) # create the module and add it to sys.modules # if name in sys.modules: # return sys.modules[name] mod = types.ModuleType(fullname) mod.__file__ = path mod.__loader__ = self mod.__dict__['get_ipython'] = get_ipython sys.modules[fullname] = mod # extra work to ensure that magics that would affect the user_ns # actually affect the notebook module's ns save_user_ns = self.shell.user_ns self.shell.user_ns = mod.__dict__ try: for cell in nb.cells: if cell.cell_type == 'code': # transform the input to executable Python code = self.shell.input_transformer_manager.transform_cell(cell.source) # run the code in themodule exec(code, mod.__dict__) finally: self.shell.user_ns = save_user_ns return mod
def split_into_units(nb_name, include_header=True): """Split notebook into units.""" try: nb = nbformat.read(nb_name, as_version=4) except IOError as e: if e.errno == 2: print('File not found: {0}'.format(nb_name), sys.stderr) return [] else: raise e cells = nb.cells indexes = [i for i, cell in enumerate(cells) if cell.cell_type == 'markdown' and cell.source.startswith('# ')] separated_cells = [cells[i:j] for i, j in zip(indexes, indexes[1:]+[None])] units = [current.new_notebook(cells=cells, metadata={'name': cells[0] .source .split('\n')[0][2:]}) for cells in separated_cells] if not include_header: for unit in units: # The first line is the header. unit.cells[0].source = '\n'.join(unit.cells[0].source .split('\n')[1:]) return units
def from_file(self, filename): import nbformat from nbconvert import MarkdownExporter from jinja2 import DictLoader from traitlets.config import Config c = Config() # c.ExtractOutputPreprocessor.extract_output_types = set() c.ExtractOutputPreprocessor.output_filename_template = 'images/{unique_key}_{cell_index}_{index}{extension}' c.NbConvertBase.display_data_priority = ['application/javascript', 'text/html', 'text/markdown', 'image/svg+xml', 'text/latex', 'image/png', 'image/jpeg', 'text/plain'] nb = nbformat.read(filename, as_version=4) dl = DictLoader({'full.tpl': TEMPLATE}) md_exporter = MarkdownExporter(config=c, extra_loaders=[ dl], template_file='full.tpl') (body, resources) = md_exporter.from_notebook_node(nb) self.kp.write(body, images={name.split( 'images/')[1]: data for name, data in resources.get('outputs', {}).items()}) # Add cleaned ipynb file for cell in nb['cells']: if cell['cell_type'] == 'code': cell['outputs'] = [] # remove output data cell['execution_count'] = None # reset to not executed self.kp.write_src(os.path.basename(filename), nbformat.writes(nb))
def execute_nb(src, dst, allow_errors=False, timeout=1000, kernel_name=None): """ Execute notebook in `src` and write the output to `dst` Parameters ---------- src, dst: str path to notebook allow_errors: bool timeout: int kernel_name: str defualts to value set in notebook metadata Returns ------- dst: str """ with io.open(src, encoding="utf-8") as f: nb = nbformat.read(f, as_version=4) ep = ExecutePreprocessor(allow_errors=allow_errors, timeout=timeout, kernel_name=kernel_name) ep.preprocess(nb, {"metadta": {"path": "notebooks/"}}) with io.open(dst, "wt", encoding="utf-8") as f: nbformat.write(nb, f) return dst
def get_notebook_argument(self, argname): # Assuming a request on the form "{'argname':arg}" body = json.loads(escape.to_unicode(self.request.body)) arg = body[argname] # Currently assuming arg is a filename relative to # where the server was started from, later we may # want to accept urls or full notebooks as well. if not isinstance(arg, string_types): raise web.HTTPError(400, "Expecting a filename.") # Check that file exists path = os.path.join(self.params["cwd"], arg) if not os.path.exists(path): # Assume file is URI r = requests.get(arg) # Let nbformat do the reading and validation try: if os.path.exists(path): nb = nbformat.read(path, as_version=4) else: nb = nbformat.reads(r.text, as_version=4) except: raise web.HTTPError(400, "Invalid notebook: %s" % truncate_filename(arg)) return nb
def _process(self): config = Config() config.HTMLExporter.preprocessors = [CppHighlighter] config.HTMLExporter.template_file = 'basic' with self.attachment.file.open() as f: notebook = nbformat.read(f, as_version=4) html_exporter = HTMLExporter(config=config) body, resources = html_exporter.from_notebook_node(notebook) css_code = '\n'.join(resources['inlining'].get('css', [])) nonce = str(uuid4()) html = render_template('previewer_jupyter:ipynb_preview.html', attachment=self.attachment, html_code=body, css_code=css_code, nonce=nonce) response = current_app.response_class(html) # Use CSP to restrict access to possibly malicious scripts or inline JS csp_header = "script-src cdn.mathjax.org 'nonce-{}';".format(nonce) response.headers['Content-Security-Policy'] = csp_header response.headers['X-Webkit-CSP'] = csp_header # IE10 doesn't have proper CSP support, so we need to be more strict response.headers['X-Content-Security-Policy'] = "sandbox allow-same-origin;" return response
def _runTest(self): kernel = 'python%d' % sys.version_info[0] cur_dir = os.path.dirname(self.nbfile) with open(self.nbfile) as f: nb = nbformat.read(f, as_version=4) if self.cov: covdict = {'cell_type': 'code', 'execution_count': 1, 'metadata': {'collapsed': True}, 'outputs': [], 'nbsphinx': 'hidden', 'source': 'import coverage\n' 'coverage.process_startup()\n' 'import sys\n' 'sys.path.append("{0}")\n'.format(cur_dir) } nb['cells'].insert(0, nbformat.from_dict(covdict)) exproc = ExecutePreprocessor(kernel_name=kernel, timeout=600) try: run_dir = os.getenv('WRADLIB_BUILD_DIR', cur_dir) exproc.preprocess(nb, {'metadata': {'path': run_dir}}) except CellExecutionError as e: raise e if self.cov: nb['cells'].pop(0) with io.open(self.nbfile, 'wt') as f: nbformat.write(nb, f) self.assertTrue(True)
def test_mergedriver(git_repo): p = filespath() # enable diff/merge drivers nbdime.gitdiffdriver.main(['config', '--enable']) nbdime.gitmergedriver.main(['config', '--enable']) # run merge with no conflicts out = get_output('git merge remote-no-conflict', err=True) assert 'nbmergeapp' in out with open('merge-no-conflict.ipynb') as f: merged = f.read() with open(os.path.join(p, 'multilevel-test-merged.ipynb')) as f: expected = f.read() # verify merge success assert merged == expected # reset call('git reset local --hard') # run merge with conflicts with pytest.raises(CalledProcessError): call('git merge remote-conflict') status = get_output('git status') assert 'merge-conflict.ipynb' in status out = get_output('git diff HEAD') assert 'nbdiff' in out # verify that the conflicted result is a valid notebook nb = nbformat.read('merge-conflict.ipynb', as_version=4) nbformat.validate(nb)
def _read_notebook(self, os_path, as_version=4): """Read a notebook from an os path.""" with self.open(os_path, 'r', encoding='utf-8') as f: try: return nbformat.read(f, as_version=as_version) except Exception as e: e_orig = e # If use_atomic_writing is enabled, we'll guess that it was also # enabled when this notebook was written and look for a valid # atomic intermediate. tmp_path = path_to_intermediate(os_path) if not self.use_atomic_writing or not os.path.exists(tmp_path): raise HTTPError( 400, u"Unreadable Notebook: %s %r" % (os_path, e_orig), ) # Move the bad file aside, restore the intermediate, and try again. invalid_file = path_to_invalid(os_path) # Rename over existing file doesn't work on Windows if os.name == 'nt' and os.path.exists(invalid_file): os.remove(invalid_file) os.rename(os_path, invalid_file) os.rename(tmp_path, os_path) return self._read_notebook(os_path, as_version)
def execute_nb(src, dst, allow_errors=False, timeout=1000, kernel_name=None): ''' Execute notebook in `src` and write the output to `dst` Parameters ---------- src, dst: str path to notebook allow_errors: bool timeout: int kernel_name: str defualts to value set in notebook metadata Returns ------- dst: str ''' with io.open(src, encoding='utf-8') as f: nb = nbformat.read(f, as_version=4) ep = ExecutePreprocessor(allow_errors=False, timeout=timeout, kernel_name=kernel_name) ep.preprocess(nb, {'metadata': {'path': SOURCE_DIR}}) with io.open(dst, 'wt', encoding='utf-8') as f: nbformat.write(nb, f) return dst
def copy_notebooks(): nblist = sorted(nb for nb in os.listdir(NB_SOURCE_DIR) if nb.endswith('.ipynb')) name_map = {nb: nb.rsplit('.', 1)[0].lower() + '.html' for nb in nblist} figsource = abspath_from_here('..', 'notebooks', 'figures') figdest = abspath_from_here('content', 'figures') if os.path.exists(figdest): shutil.rmtree(figdest) shutil.copytree(figsource, figdest) figurelist = os.listdir(abspath_from_here('content', 'figures')) figure_map = {os.path.join('figures', fig) : os.path.join('/PythonDataScienceHandbook/figures', fig) for fig in figurelist} for nb in nblist: base, ext = os.path.splitext(nb) print('-', nb) content = nbformat.read(os.path.join(NB_SOURCE_DIR, nb), as_version=4) if nb == 'Index.ipynb': cells = '1:' template = 'page' title = 'Python Data Science Handbook' content.cells[2].source = INTRO_TEXT else: cells = '2:' template = 'booksection' title = content.cells[2].source if not title.startswith('#') or len(title.splitlines()) > 1: raise ValueError('title not found in third cell') title = title.lstrip('#').strip() # put nav below title content.cells[0], content.cells[1], content.cells[2] = content.cells[2], content.cells[0], content.cells[1] # Replace internal URLs and figure links in notebook for cell in content.cells: if cell.cell_type == 'markdown': for nbname, htmlname in name_map.items(): if nbname in cell.source: cell.source = cell.source.replace(nbname, htmlname) for figname, newfigname in figure_map.items(): if figname in cell.source: cell.source = cell.source.replace(figname, newfigname) nbformat.write(content, os.path.join(NB_DEST_DIR, nb)) pagefile = os.path.join(PAGE_DEST_DIR, base + '.md') htmlfile = base.lower() + '.html' with open(pagefile, 'w') as f: f.write(PAGEFILE.format(title=title, htmlfile=htmlfile, notebook_file=nb, template=template, cells=cells))
def collect(self): nb = nbformat.read(self.fspath.open(), 4) self.km = testbook.km self.kc = testbook.kc self.name = ntpath.basename(self.name).replace(".ipynb", "") name = "Default Name" self.case = "" setup = False self.test_setup = [] for cell in nb.cells: if cell.cell_type == 'markdown': if "## Test Results" in cell.source: return if '## Test Configurations' in cell.source or '## Environmental Needs' in cell.source: setup = True continue if cell.source.startswith("## TC"): case, _, _ = cell.source.partition("](https") self.case = case.replace("## ", "").replace("[", "") for step in ["### Given", "### And", "### When", "### Then", "### But"]: if cell.source.startswith(step): setup = False self.header = cell.source.split("\n")[0].replace("### ", "") header = re.sub(r'### |\(|\)|\"|\'', '', self.header) self.header = ".".join([self.name, self.header]) name = header.strip().replace(" ", "_").lower() if cell.cell_type == 'code' and nb.metadata.kernelspec.language == 'python': if setup: self.test_setup.append(cell.source) continue if name == "Default Name": continue yield Teststep(self.case, self.header, name, self, cell)
def run_notebook(path): """Executes notebook `path` and shows any exceptions. Useful for testing""" nb = nbformat.read(open(path), as_version=nbformat.NO_CONVERT) ExecutePreprocessor(timeout=600).preprocess(nb, {}) print('done')
def convert_notebooks_to_html_partial(notebook_paths): """ Converts notebooks in notebook_paths to HTML partials in NOTEBOOK_HTML_DIR """ for notebook_path in notebook_paths: # Computes <name>.ipynb from notebooks/<name>.ipynb filepath, _, filename = notebook_path.rpartition('/') filepath = filepath.partition('/')[2] # Computes <name> from <name>.ipynb basename = filename.rpartition('.')[0] # Computes <name>.html from notebooks/<name>.ipynb outfile_name = basename + '.html' print("filepath", filepath, "filename", filename, "basename", basename, outfile_name) # This results in images like AB_5_1.png for a notebook called AB.ipynb unique_image_key = basename # This sets the img tag URL in the rendered HTML. This restricts the # the chapter markdown files to be one level deep. It isn't ideal, but # the only way around it is to buy a domain for the staging textbook as # well and we'd rather not have to do that. output_files_dir = '../' + NOTEBOOK_IMAGE_DIR extract_output_config = { 'unique_key': unique_image_key, 'output_files_dir': output_files_dir, } notebook = nbformat.read(notebook_path, 4) raw_html, resources = html_exporter.from_notebook_node( notebook, resources=extract_output_config) html = preamble + _extract_cells(raw_html) + postscript # Get dependencies from notebook matches = list( DATASET_REGEX.finditer('\n'.join( [cell['source'] for cell in notebook.cells]))) dependencies = [match.group('dataset') for match in matches] + \ [filename] if filepath != "": dependencies = [ filepath + "/" + dependency for dependency in dependencies ] print(dependencies) paths = '&'.join([PATH_PREFIX.format(dep) for dep in dependencies]) print(INTERACT_LINK.format(paths=paths)) with_wrapper = """<div id="ipython-notebook"> <a class="interact-button" href="{interact_link}">Interact</a> {html} </div>""".format(interact_link=INTERACT_LINK.format(paths=paths), html=html) # Remove newlines before closing div tags final_output = CLOSING_DIV_REGEX.sub('</div>', with_wrapper) # Write out HTML outfile_path = os.path.join(os.curdir, NOTEBOOK_HTML_DIR, outfile_name) with open(outfile_path, 'w') as outfile: outfile.write(final_output) # Write out images for relative_path, image_data in resources['outputs'].items(): image_name = relative_path.split('/')[-1] final_image_path = '{}/{}'.format(NOTEBOOK_IMAGE_DIR, image_name) with open(final_image_path, 'wb') as outimage: outimage.write(image_data) print(outfile_path + " written.")
nb = read(f, as_version=4) nb.metadata.colab = {"name": file} with open(file, "w", encoding="utf-8") as f: write(nb, f) def calls_input(file): with open(file) as f: nb = read(f, as_version=4) cells = nb.cells code_cells = [c for c in cells if c.cell_type == "code"] for cell in code_cells: if "input(" in cell.source: return True return False for file in os.listdir("."): if file.endswith(".ipynb"): set_colab_name(file) # skip running cells if input is called in notebook if calls_input(file): print("Skip", file) continue with open(file) as f: nb = read(f, as_version=4) ep.preprocess(nb) with open(file, "w", encoding="utf-8") as f: write(nb, f) print("Ran", file)
def scan_files(files, args, pattern): ''' Process the notebooks in the list of files. Open each notebook and scan each cell, subjecting each cell to a series of filters. Cells that pass all filters are then printed to stdout. ''' def print_cell_ids(fn, lst): ''' If --tags is on the command line print cell tag names instead of cell contents. ''' print(fn) for tag in lst: print(' ', tag) print() def print_hits(fn, lst): ''' Print the contents of cells that pass all the filters. fn is the name of the file that contains the cells, lst is a list of cells from that file. Unless --plain was specified on the command line the file name and the parts of the cell that match the search pattern are printed in color. ''' if not args.plain: print(highlight(fn)) print(highlight('='*len(fn))) for source in lst: print(colorized(source)) if not args.plain: print(highlight('---------')) print() def colorized(text): if args.plain or not pattern: return text else: return re.sub(pattern, lambda m: highlight(m.group(0),4), text, flags=re.IGNORECASE) def highlight(text, cn=1): return '\033[03{:1d}m{}\033[m'.format(cn, text) for fn in files: try: nb = nbformat.read(fn, args.nbformat) except (NotJSONError, UnicodeDecodeError) as e: print(e, fn) continue matches = [] cell_type = 'code' if args.code else 'markdown' if args.markdown else None for cell in nb['cells']: tag = cell['metadata']['nbgrader'].get('grade_id') if 'nbgrader' in cell['metadata'] else None if cell_type and (cell['cell_type'] != cell_type): continue if args.id and tag != args.id: continue if pattern and (re.search(pattern, cell['source'], re.IGNORECASE) is None): continue if args.tags: if tag is not None: matches.append(tag) else: matches.append(cell['source']) if len(matches) > 0: if args.tags: print_cell_ids(fn, matches) else: print_hits(fn, matches)
def run(self): if self.execute_notebooks == "never": return # Setting os.environ["CI"] will disable interactive (blocking) mode in # Jupyter notebooks os.environ["CI"] = "true" # Copy test_data directory to the tutorial folder test_data_in_dir = (Path(self.current_file_dir).parent / "examples" / "test_data") test_data_out_dir = Path(self.current_file_dir) / "test_data" if test_data_out_dir.exists(): shutil.rmtree(test_data_out_dir) shutil.copytree(test_data_in_dir, test_data_out_dir) # Copy and execute notebooks in the tutorial folder nb_paths = [] nb_ignored = ['tensor.ipynb'] example_dirs = ["Basic", "Advanced"] for example_dir in example_dirs: in_dir = (Path(self.current_file_dir).parent / "examples" / "python" / example_dir) out_dir = Path(self.current_file_dir) / "tutorial" / example_dir shutil.copy( in_dir.parent / "open3d_tutorial.py", out_dir.parent / "open3d_tutorial.py", ) if self.clean_notebooks: for nb_out_path in out_dir.glob("*.ipynb"): if (nb_out_path.name not in nb_ignored): print("Delete: {}".format(nb_out_path)) nb_out_path.unlink() for nb_in_path in in_dir.glob("*.ipynb"): nb_out_path = out_dir / nb_in_path.name if not nb_out_path.is_file(): print("Copy: {}\n -> {}".format(nb_in_path, nb_out_path)) shutil.copy(nb_in_path, nb_out_path) else: print("Copy skipped: {}.format(nb_out_path)") nb_paths.append(nb_out_path) # Execute Jupyter notebooks for nb_path in nb_paths: print("[Processing notebook {}]".format(nb_path.name)) with open(nb_path, encoding="utf-8") as f: nb = nbformat.read(f, as_version=4) # https://github.com/spatialaudio/nbsphinx/blob/master/src/nbsphinx.py has_code = any(c.source for c in nb.cells if c.cell_type == "code") has_output = any( c.get("outputs") or c.get("execution_count") for c in nb.cells if c.cell_type == "code") execute = (self.execute_notebooks == "auto" and has_code and not has_output) or self.execute_notebooks == "always" print("has_code: {}, has_output: {}, execute: {}".format( has_code, has_output, execute)) if execute: ep = nbconvert.preprocessors.ExecutePreprocessor(timeout=6000) try: ep.preprocess(nb, {"metadata": {"path": nb_path.parent}}) except nbconvert.preprocessors.execute.CellExecutionError: print( "Execution of {} failed, this will cause Travis to fail." .format(nb_path.name)) if "TRAVIS" in os.environ: raise with open(nb_path, "w", encoding="utf-8") as f: nbformat.write(nb, f)
def run_notebook(notebook, notebook_dir, kernel=None, no_cache=False, temp_dir='tmp_notebook'): """Run tutorial Jupyter notebook to catch any execution error. Parameters ---------- notebook : string the name of the notebook to be tested notebook_dir : string the directory of the notebook to be tested kernel : string, None controls which kernel to use when running the notebook. e.g: python2 no_cache : '1' or False controls whether to clean the temporary directory in which the notebook was run and re-download any resource file. The default behavior is to not clean the directory. Set to '1' to force clean the directory. NB: in the real CI, the tests will re-download everything since they start from a clean workspace. temp_dir: string The temporary sub-directory directory in which to run the notebook. Returns ------- Returns true if the workbook runs with no warning or exception. """ logging.info("Running notebook '{}'".format(notebook)) notebook_path = os.path.join(*([notebook_dir] + notebook.split('/'))) working_dir = os.path.join(*([temp_dir] + notebook.split('/'))) if no_cache == '1': logging.info("Cleaning and setting up temp directory '{}'".format(working_dir)) shutil.rmtree(temp_dir, ignore_errors=True) errors = [] notebook = None if not os.path.isdir(working_dir): os.makedirs(working_dir) try: notebook = nbformat.read(notebook_path + '.ipynb', as_version=IPYTHON_VERSION) # Adding a small delay to allow time for sockets to be freed # stop-gap measure to battle the 1000ms linger of socket hard coded # in the kernel API code time.sleep(1.1) if kernel is not None: eprocessor = ExecutePreprocessor(timeout=TIME_OUT, kernel_name=kernel) else: eprocessor = ExecutePreprocessor(timeout=TIME_OUT) nb, _ = eprocessor.preprocess(notebook, {'metadata': {'path': working_dir}}) except Exception as err: err_msg = str(err) errors.append(err_msg) finally: if notebook is not None: output_file = os.path.join(working_dir, "output.txt") nbformat.write(notebook, output_file) output_nb = io.open(output_file, mode='r', encoding='utf-8') for line in output_nb: if "Warning:" in line: errors.append("Warning:\n" + line) if len(errors) > 0: logging.error('\n'.join(errors)) return False return True
# This script is used for generating other notebook variants for the different conditions import nbformat as nbf nb_id = "communities/communities" nb = nbf.read(f"{nb_id}.ipynb", as_version=4) #the toy original is the pandas-only control code = "import lux" nb["cells"].insert(0, nbf.v4.new_code_cell(code)) # Explicit compute_meta_recs at every non-print cell i = 0 for cell in nb["cells"]: if i > 2: if ("# {{NO LUX}}" in cell["source"] and "df" in cell["source"]): cell["source"] = cell["source"] + "\ndf.compute_meta_recs()" i += 1 nbf.write(nb, f'{nb_id}_baseline.ipynb') nb = nbf.read(f"{nb_id}.ipynb", as_version=4) code = "import lux\nlux.config.lazy_maintain = True" nb["cells"].insert(0, nbf.v4.new_code_cell(code)) nbf.write(nb, f'{nb_id}_o1.ipynb') nb = nbf.read(f"{nb_id}.ipynb", as_version=4) code = "import lux\nlux.config.lazy_maintain = True\nlux.config.early_pruning = True" nb["cells"].insert(0, nbf.v4.new_code_cell(code)) nbf.write(nb, f'{nb_id}_o1o2.ipynb') nb = nbf.read(f"{nb_id}.ipynb", as_version=4) code = "import lux\nlux.config.lazy_maintain = True\nlux.config.early_pruning = True\nlux.config.streaming = True"
def replace_links_in_notebook(nb_file): notebook = nbf.read(nb_file, as_version=4) for cell in markdown_cells(notebook): cell['source'] = replace_link_urls(cell['source']) with open(nb_file, 'w') as f: nbf.write(notebook, f)
def cli(configfile): # default config with open(resource("fletteconf-default.yaml")) as f: config = yaml.load(f) # load config file configfile = os.path.abspath(configfile) if os.path.isdir(configfile): configfile = os.path.join(configfile, "fletteconf.yaml") try: with open(configfile) as f: config.update(yaml.load(f)) except: pass # load template from theme themedir = resource("theme-" + config["theme"]) if not os.path.isdir(themedir): themedir = config["theme"] # absolute theme directory with open(os.path.join(themedir, "template.html.j2")) as tf: tmpl = Template(tf.read()) # setup html exporter html_exporter = FletteHTMLExporter() html_exporter.template_file = "basic" # directories nbdir = os.path.join(os.path.dirname(configfile), config["source"]) wwwdir = os.path.join(os.path.dirname(configfile), config["target"]) # parse notebooks notebooks = sorted(glob.glob(os.path.join(nbdir, "*.ipynb"))) if len(notebooks) == 0: print("\033[31merror\033[0m no notebooks found in {}".format(nbdir)) sys.exit(1) data = [] with click.progressbar(notebooks, label="process notebooks") as bar: for nb_fn in bar: rel_fn = os.path.splitext(os.path.relpath(nb_fn, nbdir))[0] # read notebook nb = nbformat.read(nb_fn, as_version=4) nb.metadata["language_info"]["pygments_lexer"] = "ipython" body, _ = html_exporter.from_notebook_node(nb) # toc toc = [] titles = PyQuery(body).remove("a.anchor-link")("h1,h2,h3,h4,h5,h6") for h in titles.items(): li = PyQuery("<li class='nav-item'><a>{}</a></li>") li("a").html(h.html()) li("a").attr("href", "#" + h.attr("id")) li.addClass("indent-{}".format(h.outer_html()[2])) toc.append(li) # fix: html_exporter self-closes empty html tags (<i/> -> <i></i>) body = PyQuery(body).outer_html(method="html") # template variables htmlfile = rel_fn.split(".", 1)[1] + ".html" # remove numeric prefix data.append( dict(htmlfile=htmlfile, body=body, title=toc[0]("a").html(method="html"), toc=[t.outer_html(method="html") for t in toc])) if config["title_sidebar"] == True: config["title_sidebar"] = data[0]["title"] # render for i, d in enumerate(data): htmlfile_full = os.path.join(wwwdir, d["htmlfile"]) os.makedirs(os.path.dirname(htmlfile_full), exist_ok=True) with open(htmlfile_full, "w") as f: c = tmpl.render( toc_pre=[data[j] for j in range(len(data)) if j < i], toc_post=[data[j] for j in range(len(data)) if j > i], **config, **d) f.write(c) # copy theme files with open(os.path.join(themedir, "jpy-flette-theme.yaml")) as f: theme_files = yaml.load(f) with click.progressbar(theme_files, label="copy theme files ") as bar: for f in bar: try: # e.g. "- node_modules/jquery.js: static/jquery.js" srcf, trgf = list(f.items())[0] except: # e.g. "- static/fonts/graviola/stylesheet.css" srcf = trgf = f if os.path.isfile(os.path.join(themedir, srcf)): os.makedirs(os.path.dirname(os.path.join(wwwdir, trgf)), exist_ok=True) shutil.copy(os.path.join(themedir, srcf), os.path.join(wwwdir, trgf)) else: print("\n\033[31merror\033[0m {} is not a file".format(srcf))
def read_notebook(nb_path): return nbformat.read(nb_path, as_version=4)
def nbmemestra(nbfile, decorator, filepath): nb = nbformat.read(nbfile, 4) return nbmemestra_from_nbnode(nb, decorator, filepath)
def __init__(self, path_to_jnb, path_to_save="dh-analytics.ipynb"): with open(path_to_jnb) as fp: self.nb = nbf.read(fp, 4) self.path_to_save = path_to_save
def output_HTML(read_file, output_file): exporter = HTMLExporter() # read_file is '.ipynb', output_file is '.html' output_notebook = nbformat.read(read_file, as_version=4) output, resources = exporter.from_notebook_node(output_notebook) codecs.open(output_file, 'w', encoding='utf-8').write(output)
import os import nbformat as nbf for file in os.listdir("."): if file.endswith(".ipynb"): with open(file) as f: nb = nbf.read(f, as_version=4) if any(cell["source"] == "## Quiz" for cell in nb.cells): print("Headers already in", file) continue nb.cells.append(nbf.v4.new_markdown_cell(source="## Konsol")) nb.cells.append(nbf.v4.new_markdown_cell(source="## Quiz")) with open(file, "w", encoding="utf-8") as f: nbf.write(nb, f) print("Headers added to", file)
def get_notebook(self): with open('DiabetesNB/Diabetes.ipynb') as f: nb = nbformat.read(f, as_version=4) self.nb = nb
########################################### ##### Otterifies Notebooks from Gofer ##### ########################################### import json import sys import nbformat import re import os GOFER_IMPORT_REGEX = r"from gofer\.ok import check" GOFER_REGEX = r"check\([\"'](.+)\.py[\"']\)" for notebook in sys.argv[1:]: print(f"Converting {notebook}") nb = nbformat.read(notebook, as_version=nbformat.NO_CONVERT) for cell in nb.cells: if cell.cell_type == 'code': match = re.search(GOFER_REGEX, cell.source) if match: q_id = os.path.split(match[1])[1] cell.source = re.sub(GOFER_REGEX, f"grader.check(\"{q_id}\")", cell.source) match = re.search(GOFER_IMPORT_REGEX, cell.source) if match: cell.source = re.sub(GOFER_IMPORT_REGEX, "import otter\ngrader = otter.Notebook()", cell.source) nbformat.write(nb, notebook)
parser.add_argument('-k', '--key-value', dest='args_dict', action=StoreDictKeyPair, nargs='+', metavar='KEY=VAL') return parser.parse_args() if __name__ == '__main__': options = get_options() if options.args_dict is None: options.args_dict = {} with open(options.template) as template: nb = nbformat.read(template, as_version=4) orig_parameters = extract_parameters(nb) params = parameter_values(orig_parameters, **options.args_dict) print('Old parameters: %s' % orig_parameters) print('New parameters: %s' % params) new_nb = replace_definitions(nb, params, execute=False) with open(options.output, 'w') as output: nbformat.write(new_nb, output)
def process_notebook(self, disable_warnings=True): """Process the notebook and create all the pictures and files This method runs the notebook using the :mod:`nbconvert` and :mod:`nbformat` modules. It creates the :attr:`outfile` notebook, a python and a rst file""" infile = self.infile outfile = self.outfile in_dir = os.path.dirname(infile) + os.path.sep odir = os.path.dirname(outfile) + os.path.sep create_dirs(os.path.join(odir, 'images')) ep = nbconvert.preprocessors.ExecutePreprocessor( timeout=300) cp = nbconvert.preprocessors.ClearOutputPreprocessor( timeout=300) self.nb = nb = nbformat.read(infile, nbformat.current_nbformat) # disable warnings in the rst file if disable_warnings: for i, cell in enumerate(nb.cells): if cell['cell_type'] == 'code': cell = cell.copy() break cell = cell.copy() cell.source = """ import logging logging.captureWarnings(True) logging.getLogger('py.warnings').setLevel(logging.ERROR) """ nb.cells.insert(i, cell) # write and process rst_file if self.preprocess: t = dt.datetime.now() logger.info('Processing %s', self.infile) try: ep.preprocess(nb, {'metadata': {'path': in_dir}}) except nbconvert.preprocessors.execute.CellExecutionError: logger.critical( 'Error while processing %s!', self.infile, exc_info=True) else: logger.info('Done. Seconds needed: %i', (dt.datetime.now() - t).seconds) if disable_warnings: nb.cells.pop(i) self.py_file = self.get_out_file('py') if self.remove_tags: tp = nbconvert.preprocessors.TagRemovePreprocessor(timeout=300) for key, val in self.tag_options.items(): setattr(tp, key, set(val)) nb4rst = deepcopy(nb) tp.preprocess(nb4rst, {'metadata': {'path': in_dir}}) else: nb4rst = nb self.create_rst(nb4rst, in_dir, odir) if self.clear: cp.preprocess(nb, {'metadata': {'path': in_dir}}) # write notebook file nbformat.write(nb, outfile) self.create_py(nb)
def export_notebook_code(notebook_name, path=None): # notebook_path = import_notebooks.find_notebook(notebook_name, path) notebook_path = notebook_name # load the notebook with io.open(notebook_path, 'r', encoding='utf-8') as f: notebook = nbformat.read(f, 4) # shell = InteractiveShell.instance() # Get versioning info notebook_modification_time = os.path.getmtime(notebook_path) timestamp = datetime.datetime.fromtimestamp(notebook_modification_time) \ .astimezone().isoformat(sep=' ', timespec='seconds') module = os.path.splitext(os.path.basename(notebook_name))[0] header = HEADER.format(module=module, timestamp=timestamp) print_utf8(header) sep = '' for cell in notebook.cells: if cell.cell_type == 'code': code = cell.source if len(code.strip()) == 0: # Empty code continue bang = False if code.startswith('!'): code = "import os\nos.system(" + repr(code[1:]) + ")" bang = True if RE_IMPORT_FUZZINGBOOK_UTILS.match(code): # Don't import all of fuzzingbook_utils (requires nbformat & Ipython) print_if_main(SET_FIXED_SEED) elif RE_IMPORT_IF_MAIN.match(code): print_if_main(code) elif RE_FROM_FUZZINGBOOK_UTILS.match(code): # This would be "from fuzzingbook_utils import HTML" # print ("Code: ", repr(code)) code = fix_imports(code) print_utf8("\n" + code + "\n") elif RE_IGNORE.match(code): # Code to ignore - comment out print_utf8("\n" + prefix_code(code, "# ") + "\n") elif RE_CODE.match(code) and not bang: # imports and defs code = fix_imports(code) print_utf8("\n" + code + "\n") elif is_all_comments(code): # Only comments print_utf8("\n" + code + "\n") else: print_if_main(code) else: # Anything else contents = cell.source if contents.startswith('#'): # Header line = first_line(contents) print_utf8("\n" + prefix_code(decode_title(line), "# ") + "\n") print_if_main("print(" + repr(sep + decode_title(line)) + ")\n\n") sep = '\n' else: # We don't include contents, as they fall under a different license # print_utf8("\n" + prefix_code(contents, "# ") + "\n") pass
def main(): parser = ArgumentParser(epilog=__doc__, formatter_class=RawDescriptionHelpFormatter) task = parser.add_mutually_exclusive_group() task.add_argument('--dry-run', action='store_true', help='Print which notebooks would have been stripped') task.add_argument('--install', action='store_true', help='Install nbstripout in the current repository (set ' 'up the git filter and attributes)') task.add_argument('--uninstall', action='store_true', help='Uninstall nbstripout from the current repository ' '(remove the git filter and attributes)') task.add_argument( '--is-installed', action='store_true', help='Check if nbstripout is installed in current repository') task.add_argument( '--status', action='store_true', help='Print status of nbstripout installation in current ' 'repository and configuration summary if installed') parser.add_argument('--keep-count', action='store_true', help='Do not strip the execution count/prompt number') parser.add_argument('--keep-output', action='store_true', help='Do not strip output', default=None) parser.add_argument( '--extra-keys', default='', help= 'Extra keys to strip from metadata, e.g. metadata.foo cell.metadata.bar' ) parser.add_argument('--attributes', metavar='FILEPATH', help='Attributes file to add the filter to (in ' 'combination with --install/--uninstall), ' 'defaults to .git/info/attributes') parser.add_argument('--global', dest='_global', action='store_true', help='Use global git config (default is local config)') task.add_argument('--version', action='store_true', help='Print version') parser.add_argument( '--force', '-f', action='store_true', help='Strip output also from files with non ipynb extension') parser.add_argument('--textconv', '-t', action='store_true', help='Prints stripped files to STDOUT') parser.add_argument('files', nargs='*', help='Files to strip output from') args = parser.parse_args() git_config = ['git', 'config'] + (['--global'] if args._global else []) if args.install: sys.exit( install(git_config, user=args._global, attrfile=args.attributes)) if args.uninstall: sys.exit( uninstall(git_config, user=args._global, attrfile=args.attributes)) if args.is_installed: sys.exit(status(git_config, user=args._global, verbose=False)) if args.status: sys.exit(status(git_config, user=args._global, verbose=True)) if args.version: print(__version__) sys.exit(0) try: extra_keys = check_output(git_config + ['filter.nbstripout.extrakeys']).strip() if args.extra_keys: extra_keys = ' '.join((extra_keys, args.extra_keys)) except (CalledProcessError, FileNotFoundError): extra_keys = args.extra_keys input_stream = None if sys.version_info < (3, 0): import codecs # Use UTF8 reader/writer for stdin/stdout # http://stackoverflow.com/a/1169209 if sys.stdin: input_stream = codecs.getreader('utf8')(sys.stdin) output_stream = codecs.getwriter('utf8')(sys.stdout) else: # Wrap input/output stream in UTF-8 encoded text wrapper # https://stackoverflow.com/a/16549381 if sys.stdin: input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8') output_stream = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', newline='') for filename in args.files: if not (args.force or filename.endswith('.ipynb')): continue try: with io.open(filename, 'r', encoding='utf8') as f: with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) nb = read(f, as_version=NO_CONVERT) nb = strip_output(nb, args.keep_output, args.keep_count, extra_keys) if args.dry_run: output_stream.write( 'Dry run: would have stripped {}\n'.format(filename)) continue if args.textconv: with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) output_stream.write(writes(nb)) output_stream.flush() else: with io.open(filename, 'w', encoding='utf8', newline='') as f: with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) f.write(writes(nb)) except NotJSONError: print("'{}' is not a valid notebook".format(filename), file=sys.stderr) sys.exit(1) except FileNotFoundError: print("Could not strip '{}': file not found".format(filename), file=sys.stderr) sys.exit(1) except Exception: # Ignore exceptions for non-notebook files. print("Could not strip '{}'".format(filename), file=sys.stderr) raise if not args.files and input_stream: try: with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) nb = read(input_stream, as_version=NO_CONVERT) nb = strip_output(nb, args.keep_output, args.keep_count, extra_keys) if args.dry_run: output_stream.write('Dry run: would have stripped input from ' 'stdin\n') else: with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) output_stream.write(writes(nb)) output_stream.flush() except NotJSONError: print('No valid notebook detected', file=sys.stderr) sys.exit(1)
def test_notebook_execution_with_pandas_backend( titanic_data_context_no_data_docs_no_checkpoint_store, ): """ This tests that the notebook is written to disk and executes without error. To set this test up we: - create a scaffold notebook - verify that no validations have happened We then: - execute that notebook (Note this will raise various errors like CellExecutionError if any cell in the notebook fails - create a new context from disk - verify that a validation has been run with our expectation suite """ # Since we'll run the notebook, we use a context with no data docs to avoid # the renderer's default behavior of building and opening docs, which is not # part of this test. context = titanic_data_context_no_data_docs_no_checkpoint_store root_dir = context.root_directory uncommitted_dir = os.path.join(root_dir, "uncommitted") suite_name = "my_suite" suite = context.create_expectation_suite(suite_name) csv_path = os.path.join(root_dir, "..", "data", "Titanic.csv") batch_kwargs = {"datasource": "mydatasource", "path": csv_path} # Sanity check test setup assert context.list_expectation_suite_names() == [suite_name] assert context.list_datasources() == [{ "module_name": "great_expectations.datasource", "class_name": "PandasDatasource", "data_asset_type": { "module_name": "great_expectations.dataset", "class_name": "PandasDataset", }, "batch_kwargs_generators": { "mygenerator": { "class_name": "SubdirReaderBatchKwargsGenerator", "base_directory": "../data", } }, "name": "mydatasource", }] assert context.get_validation_result(suite_name) == {} notebook_path = os.path.join(uncommitted_dir, f"{suite_name}.ipynb") assert not os.path.isfile(notebook_path) # Create notebook renderer = SuiteScaffoldNotebookRenderer( titanic_data_context_no_data_docs_no_checkpoint_store, suite, batch_kwargs) renderer.render_to_disk(notebook_path) assert os.path.isfile(notebook_path) with open(notebook_path) as f: nb = nbformat.read(f, as_version=4) # Run notebook ep = ExecutePreprocessor(timeout=600, kernel_name="python3") ep.preprocess(nb, {"metadata": {"path": uncommitted_dir}}) # Useful to inspect executed notebook output_notebook = os.path.join(uncommitted_dir, "output.ipynb") with open(output_notebook, "w") as f: nbformat.write(nb, f) # Assertions about output context = DataContext(root_dir) obs_validation_result = context.get_validation_result(suite_name) assert obs_validation_result.statistics == { "evaluated_expectations": 2, "successful_expectations": 2, "unsuccessful_expectations": 0, "success_percent": 100, } suite = context.get_expectation_suite(suite_name) assert suite.expectations ( columns_with_expectations, expectations_from_suite, ) = get_set_of_columns_and_expectations_from_suite(suite) expected_expectations = { "expect_table_columns_to_match_ordered_list", "expect_table_row_count_to_be_between", } assert columns_with_expectations == set() assert expectations_from_suite == expected_expectations
def run(self): if self.execute_notebooks == "never": return # Setting os.environ["CI"] will disable interactive (blocking) mode in # Jupyter notebooks os.environ["CI"] = "true" # Copy test_data directory to the tutorial folder test_data_in_dir = (Path(self.current_file_dir).parent / "examples" / "test_data") test_data_out_dir = Path(self.current_file_dir) / "test_data" if test_data_out_dir.exists(): shutil.rmtree(test_data_out_dir) shutil.copytree(test_data_in_dir, test_data_out_dir) # Copy and execute notebooks in the tutorial folder nb_paths = [] nb_direct_copy = [ 'tensor.ipynb', 'hashmap.ipynb', 't_icp_registration.ipynb', 'jupyter_visualization.ipynb' ] example_dirs = [ "geometry", "core", "pipelines", "visualization", "t_pipelines" ] for example_dir in example_dirs: in_dir = (Path(self.current_file_dir) / "jupyter" / example_dir) out_dir = Path(self.current_file_dir) / "tutorial" / example_dir out_dir.mkdir(parents=True, exist_ok=True) shutil.copy( in_dir.parent / "open3d_tutorial.py", out_dir.parent / "open3d_tutorial.py", ) if self.clean_notebooks: for nb_out_path in out_dir.glob("*.ipynb"): print("Delete: {}".format(nb_out_path)) nb_out_path.unlink() for nb_in_path in in_dir.glob("*.ipynb"): nb_out_path = out_dir / nb_in_path.name _update_file(nb_in_path, nb_out_path) nb_paths.append(nb_out_path) # Copy the 'images' dir present in some example dirs. if (in_dir / "images").is_dir(): if (out_dir / "images").exists(): shutil.rmtree(out_dir / "images") print("Copy: {}\n -> {}".format(in_dir / "images", out_dir / "images")) shutil.copytree(in_dir / "images", out_dir / "images") # Execute Jupyter notebooks for nb_path in nb_paths: if nb_path.name in nb_direct_copy: print("[Processing notebook {}, directly copied]".format( nb_path.name)) continue print("[Processing notebook {}]".format(nb_path.name)) with open(nb_path, encoding="utf-8") as f: nb = nbformat.read(f, as_version=4) # https://github.com/spatialaudio/nbsphinx/blob/master/src/nbsphinx.py has_code = any(c.source for c in nb.cells if c.cell_type == "code") has_output = any( c.get("outputs") or c.get("execution_count") for c in nb.cells if c.cell_type == "code") execute = (self.execute_notebooks == "auto" and has_code and not has_output) or self.execute_notebooks == "always" print("has_code: {}, has_output: {}, execute: {}".format( has_code, has_output, execute)) if execute: ep = nbconvert.preprocessors.ExecutePreprocessor(timeout=6000) try: ep.preprocess(nb, {"metadata": {"path": nb_path.parent}}) except nbconvert.preprocessors.execute.CellExecutionError: print( "Execution of {} failed, this will cause CI to fail.". format(nb_path.name)) if "GITHUB_ACTIONS" in os.environ: raise with open(nb_path, "w", encoding="utf-8") as f: nbformat.write(nb, f) url = "https://github.com/isl-org/Open3D/files/7592880/t_icp_registration.zip" output_file = "t_icp_registration.ipynb" output_file_path = os.path.join( self.current_file_dir, "tutorial/t_pipelines/t_icp_registration.ipynb") self.overwrite_tutorial_file(url, output_file, output_file_path)
def gen_cells(ipynb_file) -> nbformat.notebooknode.NotebookNode: nb = nbformat.read(ipynb_file, nbformat.NO_CONVERT) for cell in nb['cells']: yield cell
def bundle_declarative_widgets(output_path, notebook_file, widget_folder='static'): ''' Adds frontend bower components dependencies into the bundle for the dashboard application. Creates the following directories under output_path: static/urth_widgets: Stores the js for urth_widgets which will be loaded in the frontend of the dashboard static/urth_components: The directory for all of the bower components of the dashboard. NOTE: This function is too specific to urth widgets. In the future we should investigate ways to make this more generic. :param output_path: The output path of the dashboard being assembled :param notebook_file: The absolute path to the notebook file being packaged :param widget_folder: Subfolder name in which the widgets should be contained. ''' # Check if any of the cells contain widgets, if not we do not to copy the bower_components notebook = nbformat.read(notebook_file, 4) # Using find instead of a regex to help future-proof changes that might be # to how user's will use urth-core-import # (i.e. <link is=urth-core-import> vs. <urth-core-import>) any_cells_with_widgets = any( cell.get('source').find('urth-core-') != -1 for cell in notebook.cells) if not any_cells_with_widgets: return # Directory of declarative widgets extension widgets_dir = get_extension_path('urth_widgets') if widgets_dir is None: raise web.HTTPError(500, 'Missing jupyter_declarativewidgets extension') # Root of declarative widgets within a dashboard app output_widgets_dir = pjoin( output_path, widget_folder, 'urth_widgets/') if widget_folder is not None else pjoin( output_path, 'urth_widgets/') # JavaScript entry point for widgets in dashboard app output_js_dir = pjoin(output_widgets_dir, 'js') # Web referenceable path from which all urth widget components will be served output_components_dir = pjoin( output_path, widget_folder, 'urth_components/') if widget_folder is not None else pjoin( output_path, 'urth_components/') # Copy declarative widgets js and installed bower components into the app # under output directory widgets_js_dir = pjoin(widgets_dir, 'js') shutil.copytree(widgets_js_dir, output_js_dir) # Widgets bower components could be under 'urth_components' or # 'bower_components' depending on the version of widgets being used. widgets_components_dir = pjoin(widgets_dir, 'urth_components') if not os.path.isdir(widgets_components_dir): widgets_components_dir = pjoin(widgets_dir, 'bower_components') # Install the widget components into the output components directory shutil.copytree(widgets_components_dir, output_components_dir)
def notebook_execute(options, status): # if this is a re-execution of a previously loaded kernel, # make sure the underlying python version hasn't changed python_cmd = options.get("python_cmd", None) if python_cmd: if hasattr(notebook_execute, "python_cmd"): if notebook_execute.python_cmd != python_cmd: raise RestartKernel else: notebook_execute.python_cmd = python_cmd # unpack options input = options["target"]["input"] format = options["format"] resource_dir = options["resourceDir"] params = options.get("params", None) run_path = options.get("cwd", "") quiet = options.get('quiet', False) # change working directory and strip dir off of paths original_input = input os.chdir(Path(input).parent) input = Path(input).name # read variables out of format execute = format["execute"] eval = execute["eval"] allow_errors = bool(execute["error"]) fig_width = execute["fig-width"] fig_height = execute["fig-height"] fig_format = execute["fig-format"] fig_dpi = execute["fig-dpi"] if "cache" in execute: cache = execute["cache"] else: cache = "user" # set environment variables os.environ["QUARTO_FIG_WIDTH"] = str(fig_width) os.environ["QUARTO_FIG_HEIGHT"] = str(fig_height) if fig_format == "retina": os.environ["QUARTO_FIG_DPI"] = str(fig_dpi * 2) os.environ["QUARTO_FIG_FORMAT"] = "png" else: os.environ["QUARTO_FIG_DPI"] = str(fig_dpi) os.environ["QUARTO_FIG_FORMAT"] = fig_format # read the notebook nb = nbformat.read(input, as_version=NB_FORMAT_VERSION) # inject parameters if provided if params: nb_parameterize(nb, params) # insert setup cell setup_cell = nb_setup_cell(nb.metadata.kernelspec, resource_dir, fig_width, fig_height, fig_format, fig_dpi, run_path) nb.cells.insert(0, setup_cell) # are we using the cache, if so connect to the cache, and then if we aren't in 'refresh' # (forced re-execution) mode then try to satisfy the execution request from the cache if cache == True or cache == "refresh": if not get_cache: raise ImportError( 'The jupyter-cache package is required for cached execution') nb_cache = get_cache(".jupyter_cache") if not cache == "refresh": cached_nb = nb_from_cache(nb, nb_cache) if cached_nb: cached_nb.cells.pop(0) nb_write(cached_nb, input) status("(Notebook read from cache)\n\n") return True # can persist kernel else: nb_cache = None # create resources for execution resources = dict({"metadata": { "input": original_input, }}) if run_path: resources["metadata"]["path"] = run_path # create NotebookClient client, created = notebook_init(nb, resources, allow_errors) # complete progress if necessary if (not quiet) and created: status("Done\n") # compute total code cells (for progress) current_code_cell = 1 total_code_cells = sum(cell.cell_type == 'code' for cell in client.nb.cells) # execute the cells for index, cell in enumerate(client.nb.cells): # progress progress = (not quiet) and cell.cell_type == 'code' and index > 0 if progress: status(" Cell {0}/{1}...".format(current_code_cell - 1, total_code_cells - 1)) # clear cell output cell = cell_clear_output(cell) # execute cell if cell.cell_type == 'code': cell = cell_execute( client, cell, index, current_code_cell, eval, index > 0 # add_to_history ) cell.execution_count = current_code_cell # if this was the setup cell, see if we need to exit b/c dependencies are out of date if index == 0: kernel_deps = nb_kernel_depenencies(cell) if kernel_deps: if hasattr(notebook_execute, "kernel_deps"): for path in kernel_deps.keys(): if path in notebook_execute.kernel_deps.keys(): if notebook_execute.kernel_deps[ path] != kernel_deps[path]: raise RestartKernel else: notebook_execute.kernel_deps[path] = kernel_deps[ path] else: notebook_execute.kernel_deps = kernel_deps else: notebook_execute.kernel_deps = {} # we are done w/ setup (with no restarts) so it's safe to print 'Executing...' if not quiet: status("\nExecuting '{0}'\n".format(input)) # assign cell client.nb.cells[index] = cell # increment current code cell if cell.cell_type == 'code': current_code_cell += 1 # end progress if progress: status("Done\n") # set widgets metadata client.set_widgets_metadata() # write to the cache if nb_cache: nb_write(client.nb, input) nb_cache.cache_notebook_file(path=Path(input), overwrite=True) # remove setup cell (then renumber execution_Count) client.nb.cells.pop(0) for index, cell in enumerate(client.nb.cells): if cell.cell_type == 'code': cell.execution_count = cell.execution_count - 1 # re-write without setup cell nb_write(client.nb, input) # execute cleanup cell cleanup_cell = nb_cleanup_cell(nb.metadata.kernelspec, resource_dir) nb.cells.append(cleanup_cell) client.execute_cell(cell=cleanup_cell, cell_index=len(client.nb.cells) - 1, store_history=False) nb.cells.pop() # progress if not quiet: status("\n") # return flag indicating whether we should persist persist = notebook_execute.kernel_deps != None return persist
def main(arglist): """Process IPython notebooks from a list of files.""" args = parse_args(arglist) # Filter paths from the git manifest # - Only process .ipynb # - Don't process student notebooks # - Don't process deleted notebooks def should_process(path): return all([ path.endswith(".ipynb"), "student/" not in path, os.path.isfile(path), ]) nb_paths = [arg for arg in args.files if should_process(arg)] if not nb_paths: print("No notebook files found") sys.exit(0) # Allow environment to override stored kernel name exec_kws = {"timeout": 600} if "NB_KERNEL" in os.environ: exec_kws["kernel_name"] = os.environ["NB_KERNEL"] # Defer failures until after processing all notebooks errors = {} notebooks = {} for nb_path in nb_paths: # Load the notebook structure with open(nb_path) as f: nb = nbformat.read(f, nbformat.NO_CONVERT) if not sequentially_executed(nb): if args.require_sequntial: err = ( "Notebook is not sequentially executed on a fresh kernel." "\n" "Please do 'Restart and run all' before pushing to Github." ) errors[nb_path] = err continue # Run the notebook from top to bottom, catching errors print(f"Executing {nb_path}") executor = ExecutePreprocessor(**exec_kws) try: executor.preprocess(nb) except Exception as err: # Log the error, but then continue errors[nb_path] = err else: notebooks[nb_path] = nb if errors or args.check_only: exit(errors) # TODO Check compliancy with PEP8, generate a report, but don't fail # Further filter the notebooks to run post-processing only on tutorials tutorials = { nb_path: nb for nb_path, nb in notebooks.items() if nb_path.startswith("tutorials") } # TODO Check notebook name format? # (If implemented, update the CI workflow to only run on tutorials) # Post-process notebooks to remove solution code and write both versions for nb_path, nb in tutorials.items(): # Extract components of the notebook path nb_dir, nb_fname = os.path.split(nb_path) nb_name, _ = os.path.splitext(nb_fname) # Loop through the cells and fix any Colab badges we encounter for cell in nb.get("cells", []): if has_colab_badge(cell): redirect_colab_badge_to_master_branch(cell) # Set the colab metadata to have the notebook name match the filepath if "colab" in nb["metadata"]: nb["metadata"]["colab"]["name"] = f"NeuromatchAcademy_{nb_name}" # Write out the executed version of the original notebooks print(f"Writing complete notebook to {nb_path}") with open(nb_path, "w") as f: nbformat.write(nb, f) # Create subdirectories, if they don't exist student_dir = make_sub_dir(nb_dir, "student") static_dir = make_sub_dir(nb_dir, "static") solutions_dir = make_sub_dir(nb_dir, "solutions") # Generate the student version and save it to a subdirectory print(f"Extracting solutions from {nb_path}") processed = extract_solutions(nb, nb_dir, nb_name) student_nb, static_images, solution_snippets = processed # Loop through cells and point the colab badge at the student version for cell in student_nb.get("cells", []): if has_colab_badge(cell): redirect_colab_badge_to_student_version(cell) # Write the student version of the notebook student_nb_path = os.path.join(student_dir, nb_fname) print(f"Writing student notebook to {student_nb_path}") with open(student_nb_path, "w") as f: nbformat.write(student_nb, f) # Write the images extracted from the solution cells print(f"Writing solution images to {static_dir}") for fname, image in static_images.items(): fname = fname.replace("static", static_dir) image.save(fname) # Write the solution snippets print(f"Writing solution snippets to {solutions_dir}") for fname, snippet in solution_snippets.items(): fname = fname.replace("solutions", solutions_dir) with open(fname, "w") as f: f.write(snippet) exit(errors)
def export(self, file): with open(file, "r", encoding=self.read_encoding) as f: nb = nbformat.read(f, as_version=4) body, resources = self.exporter.from_notebook_node(nb) return self.replace_image_names(body, resources, file)
yield cell def link_fix(text, name_dict): """ Replace old file names with new in markdown links. """ new_text = text for old, new in name_dict.items(): new_text = new_text.replace(f']({old})', f']({new})') return new_text if __name__ == '__main__': names = {k: v for k, v in Table.read('old-and-new-names.csv')} for notebook_name in names.values(): try: notebook = nbf.read(notebook_name, as_version=4) except FileNotFoundError: continue for cell in markdown_cells(notebook): new_source = link_fix(cell['source'], names) if new_source != cell['source']: print(f'fixed link in {notebook_name}') cell['source'] = new_source with open(notebook_name, 'w') as f: nbf.write(notebook, f)
def run(self, parent_trace): # Catch warnings and handle them so that we avoid spurious noise in the CLI due to noisy 3rd party libraries with warnings.catch_warnings(record=True) as w: WarningUtils().turn_traceback_on(parent_trace, warnings_list=w) # As documented in https://nbconvert.readthedocs.io/en/latest/execute_api.html # # May get an error like this unless we explicity use UTF8 encoding: # # File "C:\Alex\CodeImages\technos\anaconda3\envs\ea-journeys-env\lib\encodings\cp1252.py", line 19, in encode # return codecs.charmap_encode(input,self.errors,encoding_table)[0] # UnicodeEncodeError: 'charmap' codec can't encode character '\u2610' in position 61874: character maps to <undefined> # my_trace = parent_trace.doing("Attempting to load notebook") try: with open(self.src_folder + '/' + self.src_filename, encoding="utf8") as f: nb = _nbformat.read(f, as_version=4) except Exception as ex: raise ApodeixiError( "Encountered this error while loading notebook: " + str(ex), data={ 'src_folder': self.src_folder, 'src_filename': self.src_filename }) my_trace = parent_trace.doing("Attempting to execute notebook") try: #ep = ExecutePreprocessor(timeout=600, kernel_name='python3') ep = ExecutePreprocessor( timeout=600 ) # Use virtual-env's kernel, so don't specify: kernel_name='python3' ep.preprocess( nb, {'metadata': { 'path': self.destination_folder + '/' }} ) # notebook executes in the directory specified by the 'path' metadata field except Exception as ex: raise ApodeixiError( my_trace, "Encountered this error while executing notebook: " + str(ex), data={ 'src_folder': self.src_folder, 'src_filename': self.src_filename }) my_trace = parent_trace.doing("Attempting to save notebook") try: if self.destination_folder != None and self.destination_filename != None: with open(self.destination_folder + '/' + self.destination_filename, 'w', encoding='utf-8') as f: _nbformat.write(nb, f) except Exception as ex: raise ApodeixiError( "Encountered this error while executing notebook: " + str(ex), data={ 'destination_folder': self.destination_folder, 'destination_filename': self.destination_filename }) WarningUtils().handle_warnings(parent_trace, warning_list=w) my_trace = parent_trace.doing( "Converting notebook to dictionary after executing it") return NotebookUtils._val_to_dict(my_trace, nb)
def shorten_notebook(notebook_path, args): # load the notebook if notebook_path == '-': notebook = nbformat.read(sys.stdin, 4) else: with io.open(notebook_path, 'r', encoding='utf-8') as f: notebook = nbformat.read(f, 4) in_excursion = False skipped_cells = 0 new_cells = [] notebook_title = None for cell in notebook.cells: skip_this_cell = in_excursion if notebook_title is None: match_notebook_title = RE_NOTEBOOK_TITLE.match(cell.source) if match_notebook_title: notebook_title = match_notebook_title.group('title') if cell.cell_type == 'markdown': match_begin_excursion = RE_BEGIN_EXCURSION.match(cell.source) match_end_excursion = RE_END_EXCURSION.match(cell.source) if match_begin_excursion: skip_this_cell = True in_excursion = True if args.link_to: # Add a link to online version title = match_begin_excursion.group('title') cell.source = f'({title} can be found in ["{notebook_title}" online]({link(args.link_to, notebook_path, title)}).)' skip_this_cell = False elif match_end_excursion: skip_this_cell = True in_excursion = False if skip_this_cell: skipped_cells += 1 if args.skip_slides: # Don't include in slides if 'metadata' not in cell: cell['metadata'] = {} if 'slideshow' not in cell.metadata: cell.metadata['slideshow'] = {} if 'slide_type' not in cell.metadata.slideshow: cell.metadata.slideshow['slide_type'] = 'skip' else: new_cells.append(cell) notebook.cells = new_cells notebook_contents = (nbformat.writes(notebook) + '\n').encode('utf-8') if args.in_place: if skipped_cells > 0: temp_notebook_path = notebook_path + "~" with io.open(temp_notebook_path, 'wb') as f: f.write(notebook_contents) os.rename(temp_notebook_path, notebook_path) print("%s: %d cell(s) skipped" % (notebook_path, skipped_cells)) else: print("%s: unchanged" % notebook_path) else: sys.stdout.buffer.write(notebook_contents)