def write_notebook(self, include_html=True): suffix = "_responses_with_names" if self.include_usernames else "_responses" nb_name = self.nb_name_stem + suffix output_file = os.path.join(PROCESSED_NOTEBOOK_DIR, nb_name + '.ipynb') html_output = os.path.join(PROCESSED_NOTEBOOK_DIR, nb_name + '.html') remove_duplicate_answers = not self.include_usernames filtered_cells = [] for prompt in self.question_prompts: filtered_cells += prompt.cells answers = prompt.answers_without_duplicates if remove_duplicate_answers else prompt.answers for gh_username, response_cells in answers.items(): if self.include_usernames: filtered_cells.append( NotebookUtils.markdown_heading_cell(self.gh_username_to_fullname(gh_username), 4)) filtered_cells.extend(response_cells) answer_book = deepcopy(self.template) answer_book['cells'] = filtered_cells nb = nbformat.from_dict(answer_book) print "Writing", output_file with io.open(output_file, 'wt') as fp: nbformat.write(nb, fp, version=4) if include_html: # TODO why is the following necessary? nb = nbformat.reads(nbformat.writes(nb, version=4), as_version=4) html_content, _ = nbconvert.export_html(nb) print "Writing", html_output with io.open(html_output, 'w') as fp: fp.write(html_content)
def fix_notebook(filename, grade_id, source): with io.open(filename, "r", encoding="utf-8") as f: nb = nbformat.read(f, as_version=4) for i, cell in enumerate(nb.cells): if "nbgrader" in cell["metadata"] and cell["metadata"]["nbgrader"]["grade_id"] == grade_id: nb.cells[i]["source"] = source return nbformat.writes(nb)
def bundle(handler, model): """Create a compressed tarball containing the notebook document. Parameters ---------- handler : tornado.web.RequestHandler Handler that serviced the bundle request model : dict Notebook model from the configured ContentManager """ notebook_filename = model['name'] notebook_content = nbformat.writes(model['content']).encode('utf-8') notebook_name = os.path.splitext(notebook_filename)[0] tar_filename = '{}.tar.gz'.format(notebook_name) info = tarfile.TarInfo(notebook_filename) info.size = len(notebook_content) with io.BytesIO() as tar_buffer: with tarfile.open(tar_filename, "w:gz", fileobj=tar_buffer) as tar: tar.addfile(info, io.BytesIO(notebook_content)) handler.set_attachment_header(tar_filename) handler.set_header('Content-Type', 'application/gzip') # Return the buffer value as the response handler.finish(tar_buffer.getvalue())
def from_file(self, filename): import nbformat from nbconvert import MarkdownExporter from jinja2 import DictLoader from traitlets.config import Config c = Config() # c.ExtractOutputPreprocessor.extract_output_types = set() c.ExtractOutputPreprocessor.output_filename_template = 'images/{unique_key}_{cell_index}_{index}{extension}' c.NbConvertBase.display_data_priority = ['application/javascript', 'text/html', 'text/markdown', 'image/svg+xml', 'text/latex', 'image/png', 'image/jpeg', 'text/plain'] nb = nbformat.read(filename, as_version=4) dl = DictLoader({'full.tpl': TEMPLATE}) md_exporter = MarkdownExporter(config=c, extra_loaders=[ dl], template_file='full.tpl') (body, resources) = md_exporter.from_notebook_node(nb) self.kp.write(body, images={name.split( 'images/')[1]: data for name, data in resources.get('outputs', {}).items()}) # Add cleaned ipynb file for cell in nb['cells']: if cell['cell_type'] == 'code': cell['outputs'] = [] # remove output data cell['execution_count'] = None # reset to not executed self.kp.write_src(os.path.basename(filename), nbformat.writes(nb))
def from_notebook_node(self, nb, resources=None, **kw): nb_copy, resources = super(NotebookExporter, self).from_notebook_node(nb, resources, **kw) if self.nbformat_version != nb_copy.nbformat: resources['output_suffix'] = '.v%i' % self.nbformat_version else: resources['output_suffix'] = '.nbconvert' output = nbformat.writes(nb_copy, version=self.nbformat_version) return output, resources
def notebook_node_to_string_list(notebook_node): """ Writes a NotebookNode to a list of strings. :param notebook_node: The notebook as NotebookNode to write. :return: The notebook as list of strings (linewise). """ return nbformat.writes(notebook_node, nbformat.NO_CONVERT).splitlines(True)
def convert_md(): """Find all markdown files, convert into jupyter notebooks """ converted_files = [] reader = notedown.MarkdownReader(match='strict') files = glob.glob('*/*.md') # evaluate the newest file first, so we can catchup error ealier files.sort(key=os.path.getmtime, reverse=True) do_eval = int(os.environ.get('DO_EVAL', True)) if do_eval: do_eval = int(os.environ.get('EVAL', True)) if not do_eval: print('=== Will skip evaluating notebooks') for fname in files: new_fname = _get_new_fname(fname) # parse if each markdown file is actually a jupyter notebook with open(fname, 'r') as fp: valid = '```{.python .input' in fp.read() if not valid: if new_fname != fname: print('=== Rename %s -> %s' % (fname, new_fname)) shutil.copyfile(fname, new_fname) converted_files.append((fname, new_fname)) continue # read with open(fname, 'r') as f: notebook = reader.read(f) if do_eval and not (_has_output(notebook) or any([i in fname for i in ignore_execution])): print('=== Evaluate %s with timeout %d sec'%(fname, timeout)) tic = time.time() # update from ../data to data for c in notebook.cells: if c.get('cell_type', None) == 'code': c['source'] = c['source'].replace( '"../data', '"data').replace("'../data", "'data") notedown.run(notebook, timeout) print('=== Finished in %f sec'%(time.time()-tic)) # even that we will check it later, but do it ealier so we can see the # error message before evaluating all notebooks _check_notebook(notebook) # write # need to add language info to for syntax highlight notebook['metadata'].update({'language_info':{'name':'python'}}) new_fname = _replace_ext(new_fname, 'ipynb') print('=== Convert %s -> %s' % (fname, new_fname)) with open(new_fname, 'w') as f: f.write(nbformat.writes(notebook)) converted_files.append((fname, new_fname)) return converted_files
def save(self, keep_alt=False): if keep_alt: # xxx store in alt filename outfilename = "{}.alt.ipynb".format(self.name) else: outfilename = self.filename # xxx don't specify output version for now new_contents = nbformat.writes(self.notebook) if replace_file_with_string(outfilename, new_contents): print("{} saved into {}".format(self.name, outfilename))
def test_write_downgrade_2(self): """dowgrade a v3 notebook to v2""" # Open a version 3 notebook. with self.fopen(u'test3.ipynb', 'r') as f: nb = read(f, as_version=3) jsons = writes(nb, version=2) nb2 = json.loads(jsons) (major, minor) = get_version(nb2) self.assertEqual(major, 2)
def _save_notebook(self, os_path, nb): """Save a notebook to an os_path.""" with self.atomic_writing(os_path, encoding='utf-8') as f: if ftdetect(os_path) == 'notebook': nbformat.write(nb, f, version=nbformat.NO_CONVERT) elif ftdetect(os_path) == 'markdown': nbjson = nbformat.writes(nb, version=nbformat.NO_CONVERT) markdown = convert(nbjson, informat='notebook', outformat='markdown') f.write(markdown)
def _save_notebook(self, path, nb): self.log.debug('_save_notebook: %s', locals()) k = boto.s3.key.Key(self.bucket) k.key = self._path_to_s3_key(path) try: notebook_json = nbformat.writes(nb, version=nbformat.NO_CONVERT) k.set_contents_from_string(notebook_json) except Exception as e: raise web.HTTPError(400, u"Unexpected Error Writing Notebook: %s %s" % (path, e))
def notebook_content(self, content): if isinstance(content, compat.string_types): self._notebook_content = content return try: # maybe this is a notebook content = nbformat.writes(content, version=nbformat.NO_CONVERT) self._notebook_content = content except: raise
def write(self, filepath, notebookNode, version=4): """ Write a notebook to Storage :param filepath: The path to the notebook to write on the Storage :param notebookNode: notebookNode object to write :param version: Version of the notebook :return boolean """ self.log.debug("Write the notebook '%s' to storage" % filepath) content = nbformat.writes(notebookNode, version); return self.do_write(filepath, content);
def to_json(self, filename=None, encoding="utf8"): """ convert the notebook into json @param filename filename or stream @param encoding encoding @return Json string if filename is None, None otherwise .. versionchanged:: 1.4 The function now returns the json string if filename is None. """ if isinstance(filename, str # unicode# ): with open(filename, "w", encoding=encoding) as payload: self.to_json(payload) elif filename is None: st = StringIO() st.write(writes(self.nb)) return st.getvalue() else: filename.write(writes(self.nb))
def translate(self): visitor = NBTranslator(self.document, self.app, self.docpath) self.document.walkabout(visitor) nb = _finilize_markdown_cells(visitor.nb) if self.app.config.nbexport_execute: ep = ExecutePreprocessor(allow_errors=True) try: ep.preprocess(nb, {'metadata': {}}) except CellExecutionError as e: self.app.warn(str(e)) self.output = nbformat.writes(nb)
def test_run_nb(self): """Test %run notebook.ipynb""" from nbformat import v4, writes nb = v4.new_notebook( cells=[ v4.new_markdown_cell("The Ultimate Question of Everything"), v4.new_code_cell("answer=42") ] ) src = writes(nb, version=4) self.mktmp(src, ext='.ipynb') _ip.magic("run %s" % self.fname) nt.assert_equal(_ip.user_ns['answer'], 42)
def save_notebook(self, model, name='', path=''): """Save the notebook model and return the model with no content.""" path = path.strip('/') if 'content' not in model: raise web.HTTPError(400, u'No notebook JSON data provided') if not path: raise web.HTTPError(400, u'We require path for saving.') nb = nbformat.from_dict(model['content']) gist = self._get_gist(name, path) if gist is None: tags = parse_tags(name) if path: tags.append(path) content = nbformat.writes(nb, version=nbformat.NO_CONVERT) gist = self.gisthub.create_gist(name, tags, content) # One checkpoint should always exist #if self.notebook_exists(name, path) and not self.list_checkpoints(name, path): # self.create_checkpoint(name, path) new_path = model.get('path', path).strip('/') new_name = model.get('name', name) if path != new_path: raise web.HTTPError(400, u'Gist backend does not support path change') # remove [gist_id] if we're being sent old key_name gist.name = gist.strip_gist_id(new_name) gist.notebook_content = nb self.check_and_sign(nb, self.fullpath(new_path, new_name)) if 'name' in nb['metadata']: nb['metadata']['name'] = u'' try: self.log.debug("Autosaving notebook %s %s", path, name) self.gisthub.save(gist) except Exception as e: raise web.HTTPError(400, u'Unexpected error while autosaving notebook: %s %s %s' % (path, name, e)) # NOTE: since gist.name might not have [gist_id] suffix on rename # we use gist.key_name model = self.get_notebook(gist.key_name, new_path, content=False) return model
def merge_notebooks(filenames): merged = None for fname in filenames: with io.open(fname, 'r', encoding='utf-8') as f: nb = nbformat.read(f, as_version=4) if merged is None: merged = nb else: # TODO: add an optional marker between joined notebooks # like an horizontal rule, for example, or some other arbitrary # (user specified) markdown cell) merged.cells.extend(nb.cells) if not hasattr(merged.metadata, 'name'): merged.metadata.name = '' merged.metadata.name += "_merged" print(nbformat.writes(merged))
def fetch(self, filepath, dest): """ Fetch a notebook from Storage to home directory :param filepath: Path to the notebook on the storage :param dest: Path to the notebook fetched in the home directory of the user :return the path to the notebook in the home directory of the user """ self.log.debug("Fetch notebook '%s' to '%s'" % (filepath, dest)); nb = self.read(filepath); if not dest.endswith('.ipynb'): dest += '.ipynb' # Write the notebook on local storage self.local.write(dest, nbformat.writes(nb)); return os.path.join(self.local.sharedNotebook, dest);
def test_directory_empty_mainipynb(self): import nbformat doc = Document() source = nbformat.v4.new_notebook() result = {} def load(filename): handler = bahd.DirectoryHandler(filename=filename) handler.modify_document(doc) result['handler'] = handler result['filename'] = filename if handler.failed: raise RuntimeError(handler.error) with_directory_contents({ 'main.ipynb': nbformat.writes(source) }, load) assert not doc.roots
def merge_notebooks(outfile, filenames): merged = None added_appendix = False for fname in filenames: with io.open(fname, 'r', encoding='utf-8') as f: nb = nbformat.read(f, nbformat.NO_CONVERT) remove_formatting(nb) if not added_appendix and fname[0:8] == 'Appendix': remove_links_add_appendix(nb) added_appendix = True else: remove_links(nb) if merged is None: merged = nb else: merged.cells.extend(nb.cells) #merged.metadata.name += "_merged" outfile.write(nbformat.writes(merged, nbformat.NO_CONVERT))
def test_roundtrip(): """Run nbconvert using our custom markdown template to recover original markdown from a notebook. """ # create a notebook from the markdown mr = notedown.MarkdownReader() roundtrip_notebook = mr.to_notebook(roundtrip_markdown) # write the notebook into json notebook_json = nbformat.writes(roundtrip_notebook) # write the json back into notebook notebook = nbformat.reads(notebook_json, as_version=4) # convert notebook to markdown mw = notedown.MarkdownWriter(template_file='notedown/templates/markdown.tpl', strip_outputs=True) markdown = mw.writes(notebook) nt.assert_multi_line_equal(roundtrip_markdown, markdown)
def merge_notebooks(filenames): merged = None for fname in filenames: with io.open(fname, 'r', encoding='utf-8') as f: nb = nbformat.read(f, as_version=4) for i, cell in enumerate(nb.cells): if ( 'nbgrader' in cell['metadata'] and cell['metadata']['nbgrader']['grade_id'] == 'header'): nb.cells.pop(i) if merged is None: merged = nb else: # TODO: add an optional marker between joined notebooks # like an horizontal rule, for example, or some other arbitrary # (user specified) markdown cell) merged.cells.extend(nb.cells) if not hasattr(merged.metadata, 'name'): merged.metadata.name = '' merged.metadata.name += "_merged" return nbformat.writes(merged)
def test_directory_mainipynb_adds_roots(self) -> None: import nbformat doc = Document() source = nbformat.v4.new_notebook() code = script_adds_two_roots('SomeModelInNbTestDirectory', 'AnotherModelInNbTestDirectory') source.cells.append(nbformat.v4.new_code_cell(code)) result: Dict[str, Handler] = {} def load(filename: str): handler = bahd.DirectoryHandler(filename=filename) handler.modify_document(doc) result['handler'] = handler result['filename'] = filename if handler.failed: raise RuntimeError(handler.error) with_directory_contents({'main.ipynb': nbformat.writes(source)}, load) assert len(doc.roots) == 2
def update_ipynb_toc(root): """Change the toc code block into a list of clickable links""" notebooks = find_files('**/*.ipynb', root) for fn in notebooks: nb = notebook.read(fn) if not nb: continue for cell in nb.cells: if (cell.cell_type == 'markdown' and '```toc' in cell.source): md_cells = markdown.split_markdown(cell.source) for c in md_cells: if c['type'] == 'code' and c['class'] == 'toc': toc = [] for l in c['source'].split('\n'): if l and not l.startswith(':'): toc.append(' - [%s](%s.ipynb)' % (l, l)) c['source'] = '\n'.join(toc) c['type'] = 'markdown' cell.source = markdown.join_markdown_cells(md_cells) with open(fn, 'w') as f: f.write(nbformat.writes(nb))
def md2ipynb(): assert len(sys.argv) == 3, 'usage: input.md output.rst' (src_fn, input_fn, output_fn) = sys.argv # timeout for each notebook, in sec timeout = 20 * 60 # if enable evaluation do_eval = int(os.environ.get('EVAL', True)) reader = notedown.MarkdownReader(match='strict') with open(input_fn, 'r', encoding="utf8") as f: notebook = reader.read(f) if do_eval: tic = time.time() notedown.run(notebook, timeout) print('%s: Evaluated %s in %f sec'%(src_fn, input_fn, time.time()-tic)) # need to add language info to for syntax highlight notebook['metadata'].update({'language_info':{'name':'python'}}) with open(output_fn, 'w') as f: f.write(nbformat.writes(notebook)) print('%s: Write results into %s'%(src_fn, output_fn))
def writes(notebook, fmt, version=nbformat.NO_CONVERT, **kwargs): """" Write a notebook to a file name or a file object :param notebook: the notebook :param fmt: the jupytext format like `md`, `py:percent`, ... :param version: see nbformat.writes :param kwargs: (not used) additional parameters for nbformat.writes :return: the text representation of the notebook """ metadata = deepcopy(notebook.metadata) rearrange_jupytext_metadata(metadata) fmt = copy(fmt) fmt = long_form_one_format(fmt, metadata) ext = fmt['extension'] format_name = fmt.get('format_name') jupytext_metadata = metadata.get('jupytext', {}) if ext == '.ipynb': # Remove jupytext section if empty jupytext_metadata.pop('text_representation', {}) if not jupytext_metadata: metadata.pop('jupytext', {}) return nbformat.writes( NotebookNode( nbformat=notebook.nbformat, nbformat_minor=notebook.nbformat_minor, metadata=metadata, cells=notebook.cells), version, **kwargs) if not format_name: format_name = format_name_for_ext(metadata, ext, explicit_default=False) if format_name: fmt['format_name'] = format_name update_jupytext_formats_metadata(metadata, fmt) writer = TextNotebookConverter(fmt) return writer.writes(notebook, metadata)
def compare_multiple_pipelines(self, workspace_id: Text): u_api = ce_api.UsersApi(self.client) user = api_utils.api_call(u_api.get_loggedin_user_api_v1_users_me_get) info = { constants.ACTIVE_USER: user.email, user.email: { constants.TOKEN: self.client.configuration.access_token, constants.ACTIVE_WORKSPACE: workspace_id } } # generate notebook nb = nbf.v4.new_notebook() nb['cells'] = [ nbf.v4.new_code_cell(evaluation.import_block()), nbf.v4.new_code_cell(evaluation.info_block(info)), nbf.v4.new_code_cell(evaluation.application_block()), nbf.v4.new_code_cell(evaluation.interface_block()), ] # write notebook config_folder = click.get_app_dir(constants.APP_NAME) if not (os.path.exists(config_folder) and os.path.isdir(config_folder)): os.makedirs(config_folder) final_out_path = os.path.join(config_folder, constants.COMPARISON_NOTEBOOK) s = nbf.writes(nb) if isinstance(s, bytes): s = s.decode('utf8') with open(final_out_path, 'w') as f: f.write(s) # serve notebook os.system('panel serve "{}" --show'.format(final_out_path))
def from_notebook_node(self, notebook, resources=None, **kwargs): notebook, resources = super().from_notebook_node( notebook, resources=resources, **kwargs ) # if it is unset or an empty value, set it if resources.get("ipywidgets_base_url", "") == "": resources["ipywidgets_base_url"] = "https://unpkg.com/" with tempfile.TemporaryDirectory(suffix="nb-as-pdf") as name: pdf_fname = os.path.join(name, "output.pdf") pdf_fname2 = os.path.join(name, "output-with-attachment.pdf") pyppeteer_args = ["--no-sandbox"] if self.no_sandbox else None self.pool.submit( asyncio.run, notebook_to_pdf( notebook, pdf_fname, config=self.config, resources=resources, pyppeteer_args=pyppeteer_args, **kwargs, ), ).result() resources["output_extension"] = ".pdf" attach_notebook( pdf_fname, pdf_fname2, { "file_name": f"{resources['metadata']['name']}.ipynb", "contents": nbformat.writes(notebook).encode("utf-8"), }, ) with open(pdf_fname2, "rb") as f: pdf_bytes = f.read() return (pdf_bytes, resources)
def notebook(root_dir): # Build sub directory. if not root_dir.joinpath('foo').is_dir(): subdir = root_dir / 'foo' subdir.mkdir() # Build a notebook programmatically. nb = new_notebook() nb.cells.append(new_markdown_cell(u'Created by test ³')) cc1 = new_code_cell(source=u'print(2*6)') cc1.outputs.append(new_output(output_type="stream", text=u'12')) cc1.outputs.append( new_output( output_type="execute_result", data={'image/png': png_green_pixel}, execution_count=1, )) nb.cells.append(cc1) # Write file to tmp dir. nbfile = subdir / 'testnb.ipynb' nbfile.write_text(writes(nb, version=4))
def test_directory_mainipynb_adds_roots(self): import nbformat doc = Document() source = nbformat.v4.new_notebook() code = script_adds_two_roots('SomeModelInNbTestDirectory', 'AnotherModelInNbTestDirectory') source.cells.append(nbformat.v4.new_code_cell(code)) result = {} def load(filename): handler = bahd.DirectoryHandler(filename=filename) handler.modify_document(doc) result['handler'] = handler result['filename'] = filename if handler.failed: raise RuntimeError(handler.error) with_directory_contents({ 'main.ipynb': nbformat.writes(source) }, load) assert len(doc.roots) == 2
def test_directory_both_mainipynb_and_mainpy(self) -> None: doc = Document() def load(filename: str): handler = bahd.DirectoryHandler(filename=filename) handler.modify_document(doc) if handler.failed: raise RuntimeError(handler.error) import nbformat source = nbformat.v4.new_notebook() with_directory_contents( { 'main.py': script_adds_two_roots('SomeModelInTestDirectory', 'AnotherModelInTestDirectory'), 'main.ipynb': nbformat.writes(source), }, load) assert len(doc.roots) == 2
def notebook(jp_root_dir): # Build sub directory. subdir = jp_root_dir / "foo" if not jp_root_dir.joinpath("foo").is_dir(): subdir.mkdir() # Build a notebook programmatically. nb = new_notebook() nb.cells.append(new_markdown_cell(u"Created by test ³")) cc1 = new_code_cell(source=u"print(2*6)") cc1.outputs.append(new_output(output_type="stream", text=u"12")) cc1.outputs.append( new_output( output_type="execute_result", data={"image/png": png_green_pixel}, execution_count=1, )) nb.cells.append(cc1) # Write file to tmp dir. nbfile = subdir / "testnb.ipynb" nbfile.write_text(writes(nb, version=4), encoding="utf-8")
def from_meeting(self, meeting: Meeting): self.meeting = meeting nb = read_notebook(meeting) resources = {"output_extension": FileExtensions.Solutionbook} # NotebookExporter.from_notebook_node returns a notebook as a string notebook, resources = super().from_notebook_node(nb, resources=resources) # to operate over the notebook, we need it to be a NotebookNode notebook = nbformat.reads(notebook, as_version=4) notebook.cells.insert(0, self._notebook_heading()) # to write to disk, it now needs to be a string notebook = nbformat.writes(notebook) filename = (repositories.local_meeting_root(meeting) / repr(meeting)).with_suffix(FileExtensions.Solutionbook) open(filename, "w").write(notebook) return notebook, resources
def generate_notebooks(config, eval_dir, colab_dir): """Add a colab setup code cell and then save to colab_dir""" if not config['github_repo']: return # copy notebook fron eval_dir to colab_dir run_cmd(['rm -rf', colab_dir]) run_cmd(['cp -r', eval_dir, colab_dir]) notebooks = find_files('**/*.ipynb', colab_dir) for fn in notebooks: with open(fn, 'r') as f: notebook = nbformat.read(f, as_version=4) # Use Python3 as the kernel notebook['metadata'].update({"kernelspec": { "name": "python3", "display_name": "Python 3" }}) # Check if GPU is needed use_gpu = False for cell in notebook.cells: if cell.cell_type == 'code': if config['gpu_pattern'] in cell.source: use_gpu = True break if use_gpu: notebook['metadata'].update({"accelerator": "GPU"}) logging.info('Use GPU for '+fn) # Update SVG image URLs if config['replace_svg_url']: update_svg_urls(notebook, config['replace_svg_url'], fn, colab_dir) # Add additional libraries if config['libs']: cell = get_installation_cell(notebook, config['libs']) if cell: notebook.cells.insert(0, cell) if config['libs_header']: notebook.cells.insert( 0, nbformat.v4.new_markdown_cell(source=config['libs_header'])) with open(fn, 'w') as f: f.write(nbformat.writes(notebook))
def make_source_bundle(model, environment, ext_resources_dir, extra_files=[]): """Create a bundle containing the specified notebook and python environment. Returns a file-like object containing the bundle tarball. """ nb_name = model['name'] nb_content = nbformat.writes(model['content'], nbformat.NO_CONVERT) + '\n' manifest = make_source_manifest(nb_name, environment, 'jupyter-static') manifest_add_buffer(manifest, nb_name, nb_content) manifest_add_buffer(manifest, environment['filename'], environment['contents']) paths = [] if extra_files: paths = list_files(ext_resources_dir, extra_files) skip = [nb_name, environment['filename'], 'manifest.json'] paths = sorted(list(set(paths) - set(skip))) for rel_path in paths: abs_path = normpath(join(ext_resources_dir, rel_path)) if not abs_path.startswith(ext_resources_dir): raise ValueError('Path %s is not within the notebook directory' % rel_path) manifest_add_file(manifest, rel_path, ext_resources_dir) log.debug('manifest: %r', manifest) bundle_file = tempfile.TemporaryFile(prefix='rsc_bundle') with tarfile.open(mode='w:gz', fileobj=bundle_file) as bundle: # add the manifest first in case we want to partially untar the bundle for inspection bundle_add_buffer(bundle, 'manifest.json', json.dumps(manifest, indent=2)) bundle_add_buffer(bundle, nb_name, nb_content) bundle_add_buffer(bundle, environment['filename'], environment['contents']) for rel_path in paths: bundle_add_file(bundle, rel_path, ext_resources_dir) bundle_file.seek(0) return bundle_file
def merge_notebooks(filenames, remove_header=False): merged = None for fname in filenames: nb = read_notebook(fname) if remove_header: for i, cell in enumerate(nb.cells[:]): grade_id_exists = ('nbgrader' in cell['metadata']) and ('grade_id' in cell['metadata']['nbgrader']) header_exists = grade_id_exists and (cell['metadata']['nbgrader']['grade_id'] == 'header') due_date_exists = grade_id_exists and (cell['metadata']['nbgrader']['grade_id'] == 'due_date') if header_exists or due_date_exists: nb.cells.remove(cell) if merged is None: merged = nb else: # TODO: add an optional marker between joined notebooks # like an horizontal rule, for example, or some other arbitrary # (user specified) markdown cell) merged.cells.extend(nb.cells) if not hasattr(merged.metadata, 'name'): merged.metadata.name = '' merged.metadata.name += "_merged" return nbformat.writes(merged)
def translatenb_v4(cells): from nbformat.v4 import (new_code_cell, new_markdown_cell, new_notebook) from nbformat.v4.nbbase import new_raw_cell nb_cells = [] for cell_type, language, block in cells: block = '\n'.join(block) if cell_type == 'markdown': if block != "": nb_cells.append(new_markdown_cell(source=block)) elif cell_type == 'code': nb_cells.append(new_code_cell(source=block)) elif cell_type == 'raw': nb_cells.append(new_raw_cell(source=block)) else: raise ValueError( 'Wrong cell_type was given [{}]'.format(cell_type)) nb = new_notebook(cells=nb_cells) from nbformat import writes return writes(nb, version=4)
def run_internal(self, context, **kwargs): NotebookCommand.init_jupyter(context) task = context.get_task(kwargs.get('task'), task_class=ETLTask) import_lines = get_imports(task) if isinstance(task, SingleSourceETLTask): source = task.source if isinstance(source, six.string_types): body_lines = ['source = """{}"""\n'.format(source)] else: body_lines = ['source = {}\n'.format({'a': 1})] if task.is_source_query: body_lines.append( 'context.load_query(source, conn=\'{}\')\n'.format( task.source_conn)) else: body_lines.append('context.load(source, conn=\'{}\')\n'.format( task.source_conn)) else: body_lines = get_function_body(task.execute_internal) body_lines = list( map(lambda x: x.replace('return ', ''), body_lines)) source_lines = import_lines + ['\n\n'] + body_lines notebook = nbformat.v4.new_notebook() cell = nbformat.v4.new_code_cell(source=source_lines) notebook.cells.append(cell) ipynb_path = os.path.join(context.workdir, '.jupyter', 'notebook', task.name + '.ipynb') with open(ipynb_path, 'wb') as f: ipy_content = nbformat.writes(notebook) f.write(ipy_content.encode()) f.flush() os.system('jupyter notebook {}.ipynb'.format(task.name))
def write_notebook_output(notebook, output_dir, notebook_name): """Extract output from notebook cells and write to files in output_dir. This also modifies 'notebook' in-place, adding metadata to each cell that maps output mime-types to the filenames the output was saved under. """ resources = dict(unique_key=os.path.join(output_dir, notebook_name), outputs={}) # Modifies 'resources' in-place ExtractOutputPreprocessor().preprocess(notebook, resources) # Write the cell outputs to files where we can (images and PDFs), # as well as the notebook file. FilesWriter(build_directory=output_dir).write( nbformat.writes(notebook), resources, os.path.join(output_dir, notebook_name + ".ipynb"), ) # Write a script too. ext = notebook.metadata.language_info.file_extension contents = "\n\n".join(cell.source for cell in notebook.cells) with open(os.path.join(output_dir, notebook_name + ext), "w") as f: f.write(contents)
def executenb(text, nbversion=4, timeout=600, kernel_name='python3', run_path='.', outputname=None): import nbformat from nbconvert.preprocessors import ExecutePreprocessor from nbconvert.preprocessors.execute import CellExecutionError nb = nbformat.reads(text, as_version=nbversion) ep = ExecutePreprocessor(timeout=timeout, kernel_name=kernel_name) try: out = ep.preprocess(nb, {'metadata': {'path': run_path}}) except CellExecutionError: msg = 'Error executing the notebook.' if outputname is not None: msg += ' See notebook "{:s}" for the traceback.'.format(outputname) logger.error(msg) raise finally: return nbformat.writes(nb)
def upload_notebook(notebook, notebook_configuration, time, engine, user_id=None): # Get UID notebook_string = nbf.writes(notebook) notebook_uid = ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(9)) # Upload to Bucket client = storage.Client() bucket = client.get_bucket('jupyter-notebook-generator') blob = bucket.blob('{notebook_uid}/{notebook_configuration[notebook][title]}.ipynb'.format(**locals())) blob.upload_from_string(notebook_string, content_type='text/html') blob.make_public() # Upload dataset dataset = notebook_configuration['data']['parameters'].get('gse') if notebook_configuration['data']['parameters'].get('gse') else notebook_configuration['data']['parameters'].get('uid') if not dataset: dataset = 'gtex' notebook_dataframe = pd.Series({'notebook_uid': notebook_uid, 'notebook_title': notebook_configuration['notebook']['title'], 'notebook_configuration': json.dumps(notebook_configuration), 'version': notebook_configuration['notebook']['version'], 'time': time, 'dataset': dataset, 'user_fk': user_id, 'private': 1 if user_id else 0}).to_frame().T notebook_dataframe.to_sql('notebook', engine, if_exists='append', index=False) # Get tool IDs tool_dict = pd.read_sql_table('tool', engine).set_index('tool_string')['id'].to_dict() # Get notebook ID notebook_id = pd.read_sql_query('SELECT id FROM notebook WHERE notebook_uid = "{}"'.format(notebook_uid), engine)['id'][0] # Notebook-tool dataframe notebook_tool_dataframe = pd.DataFrame({'tool_fk': [tool_dict[x['tool_string']] for x in notebook_configuration['tools']], 'notebook_fk': notebook_id}) notebook_tool_dataframe.to_sql('notebook_tool', engine, if_exists='append', index=False) # Notebook-tag dataframe if notebook_configuration.get('terms'): notebook_tag_dataframe = pd.DataFrame({'ontology_term_fk': [x for x in notebook_configuration.get('terms')], 'notebook_fk': notebook_id}) notebook_tag_dataframe.to_sql('notebook_ontology_term', engine, if_exists='append', index=False) # Return return notebook_uid
def upload_notebook(notebook, notebook_configuration, engine): # Get UID notebook_string = nbf.writes(notebook) notebook_uid = ''.join( random.choice(string.ascii_letters + string.digits) for _ in range(9)) # Upload to Bucket client = storage.Client() bucket = client.get_bucket('jupyter-notebook-generator') blob = bucket.blob( '{notebook_uid}/{notebook_configuration[notebook][title]}.ipynb'. format(**locals())) blob.upload_from_string(notebook_string, content_type='text/html') blob.make_public() notebook_url = urllib.parse.unquote(blob.public_url) # Upload to database notebook_dataframe = pd.Series({ 'notebook_uid': notebook_uid, 'notebook_url': notebook_url, 'notebook_configuration': json.dumps(notebook_configuration), 'version': notebook_configuration['notebook']['version'], 'gse': notebook_configuration['data']['parameters'].get('gse') }).to_frame().T notebook_dataframe.to_sql('notebooks', engine, if_exists='append', index=False) # Return return notebook_url
def test_upload_instance(client, github_auth_header, mocker): responses.add(responses.GET, 'https://api.github.com/user', status=200, json={'login': '******'}) responses.add(responses.GET, 'https://api.github.com/user/orgs', status=200, json=[{ 'login': '******' }]) responses.add(responses.GET, 'https://keeper.lsst.codes/token', status=200, json={'token': 'ltdtoken'}) mock_task = mocker.patch( 'uservice_nbreport.routes.uploadnb.publish_instance') mock_url_for = mocker.patch('uservice_nbreport.routes.uploadnb.url_for') mock_url_for.return_value = 'https://example.com/12345' # Create a mock notebook nb = nbformat.v4.new_notebook() nb.cells.append(nbformat.v4.new_markdown_cell(source='Hello world')) nb_data = nbformat.writes(nb, version=4) with client: headers = dict(github_auth_header) headers['Content-Type'] = 'application/x-ipynb+json' response = client.post( '/nbreport/reports/testr-000/instances/1/notebook', headers=headers, data=nb_data) assert response.status_code == 202 mock_task.apply_async.assert_called_once() mock_url_for.assert_called_once()
def read(cls, path): resolved_path, fmt = cls.extract_format(path) text = _resolved_handler(resolved_path).read(resolved_path) # Read the document nb = jupytext.reads(text, fmt=fmt) # Set a kernel if there was none if nb.metadata.get('kernelspec', {}).get('name') is None: language = nb.metadata.get('jupytext', {}).get( 'main_language') or nb.metadata['kernelspec']['language'] if not language: raise ValueError( 'Cannot infer a kernel as the document language is not defined' ) kernelspec = kernelspec_from_language(language) if not kernelspec: raise ValueError('Found no kernel for {}'.format(language)) nb.metadata['kernelspec'] = kernelspec # Return the notebook as a JSON string return nbformat.writes(nb)
def sample_perf(nb, n=30): samples = pd.DataFrame( pd.np.NaN, index=pd.MultiIndex.from_product( (range(n), ['nbformat'] + JUPYTEXT_FORMATS), names=['sample', 'implementation']), columns=pd.Index(['size', 'read', 'write'], name='measure')) for i, fmt in samples.index: t0 = time.time() if fmt == 'nbformat': text = nbformat.writes(nb) else: text = jupytext.writes(nb, fmt) t1 = time.time() samples.loc[(i, fmt), 'write'] = t1 - t0 samples.loc[(i, fmt), 'size'] = len(text) t0 = time.time() if fmt == 'nbformat': nbformat.reads(text, as_version=4) else: jupytext.reads(text, fmt) t1 = time.time() samples.loc[(i, fmt), 'read'] = t1 - t0 return samples
def _release_notebook(dst_dir): """convert .md into notebooks and make a zip file""" reader = notedown.MarkdownReader(match='strict') files = glob.glob('*/*.md') package_files = ['environment.yml', 'utils.py', 'README.md', 'LICENSE'] package_files.extend(glob.glob('img/*')) package_files.extend(glob.glob('data/*')) for fname in files: # parse if each markdown file is actually a jupyter notebook with open(fname, 'r') as fp: valid = '```{.python .input' in fp.read() if not valid: package_files.append(fname) continue # read with open(fname, 'r') as f: notebook = reader.read(f) # write new_fname = _replace_ext(fname, 'ipynb') with open(new_fname, 'w') as f: f.write(nbformat.writes(notebook)) package_files.append(new_fname) print('=== Packing ', package_files) with ZipFile(os.path.join(dst_dir, 'gluon_tutorials_zh.zip'), 'w') as pkg: for f in package_files: pkg.write(f) with tarfile.open(os.path.join(dst_dir, 'gluon_tutorials_zh.tar.gz'), "w:gz") as tar: for f in package_files: tar.add(f) for f in glob.glob('*/*.ipynb'): os.remove(f)
def _release_notebook(dst_dir): """convert .md into notebooks and make a zip file""" reader = notedown.MarkdownReader(match='strict') files = glob.glob('*/*.md') package_files = ['environment.yml', 'utils.py', 'README.md', 'LICENSE'] package_files.extend(glob.glob('img/*')) package_files.extend(glob.glob('data/*')) for fname in files: # parse if each markdown file is actually a jupyter notebook with open(fname, 'r') as fp: valid = '```{.python .input' in fp.read() if not valid: package_files.append(fname) continue # read with open(fname, 'r') as f: notebook = reader.read(f) # write new_fname = _replace_ext(fname, 'ipynb') with open(new_fname, 'w') as f: f.write(nbformat.writes(notebook)) package_files.append(new_fname) print('=== Packing ', package_files) with ZipFile(os.path.join(dst_dir, 'gluon_tutorials_zh.zip'), 'w') as pkg: for f in package_files: pkg.write(f) with tarfile.open( os.path.join(dst_dir, 'gluon_tutorials_zh.tar.gz'), "w:gz") as tar: for f in package_files: tar.add(f) for f in glob.glob('*/*.ipynb'): os.remove(f)
def write_doc(self, docname, doctree): # work around multiple string % tuple issues in docutils; # replace tuples in attribute values with lists doctree = doctree.deepcopy() destination = docutils.io.StringOutput(encoding="utf-8") ### output notebooks for executing for single pdfs, the urlpath should be set to website url self.writer._set_ref_urlpath(self.config["jupyter_pdf_urlpath"]) self.writer._set_jupyter_download_nb_image_urlpath(None) self.writer.write(doctree, destination) # get a NotebookNode object from a string nb = nbformat.reads(self.writer.output, as_version=4) nb = self.update_Metadata(nb) ### execute the notebook - keep it forcefully on strDocname = str(docname) if strDocname in self.execution_vars['dependency_lists'].keys(): self.execution_vars['delayed_notebooks'].update({strDocname: nb}) else: self._execute_notebook_class.execute_notebook( self, nb, docname, self.execution_vars, self.execution_vars['futures']) ### mkdir if the directory does not exist outfilename = os.path.join(self.outdir, os_path(docname) + self.out_suffix) ensuredir(os.path.dirname(outfilename)) try: with codecs.open(outfilename, "w", "utf-8") as f: self.writer.output = nbformat.writes(nb, version=4) f.write(self.writer.output) except (IOError, OSError) as err: self.logger.warning("error writing file %s: %s" % (outfilename, err))
def from_file(self, filename): import nbformat from nbconvert import MarkdownExporter from jinja2 import DictLoader from traitlets.config import Config c = Config() # c.ExtractOutputPreprocessor.extract_output_types = set() c.ExtractOutputPreprocessor.output_filename_template = 'images/{unique_key}_{cell_index}_{index}{extension}' c.NbConvertBase.display_data_priority = [ 'application/javascript', 'text/html', 'text/markdown', 'image/svg+xml', 'text/latex', 'image/png', 'image/jpeg', 'text/plain' ] nb = nbformat.read(filename, as_version=4) dl = DictLoader({'full.tpl': TEMPLATE}) md_exporter = MarkdownExporter(config=c, extra_loaders=[dl], template_file='full.tpl') (body, resources) = md_exporter.from_notebook_node(nb) self.kp.write( body, images={ name.split('images/')[1]: data for name, data in resources.get('outputs', {}).items() }) # Add cleaned ipynb file for cell in nb['cells']: if cell['cell_type'] == 'code': cell['outputs'] = [] # remove output data cell['execution_count'] = None # reset to not executed self.kp.write_src(os.path.basename(filename), nbformat.writes(nb))
def with_file_object(f): nbsource = nbformat.writes(contents) f.write(nbsource.encode("UTF-8")) f.flush() func(f.name)
def write_doc(self, docname, doctree): # work around multiple string % tuple issues in docutils; # replace tuples in attribute values with lists doctree = doctree.deepcopy() destination = docutils.io.StringOutput(encoding="utf-8") ### print an output for downloading notebooks as well with proper links if variable is set if "jupyter_download_nb" in self.config and self.config["jupyter_download_nb"]: outfilename = os.path.join(self.downloadsdir, os_path(docname) + self.out_suffix) ensuredir(os.path.dirname(outfilename)) self.writer._set_ref_urlpath(self.config["jupyter_download_nb_urlpath"]) self.writer._set_jupyter_download_nb_image_urlpath((self.config["jupyter_download_nb_image_urlpath"])) self.writer.write(doctree, destination) # get a NotebookNode object from a string nb = nbformat.reads(self.writer.output, as_version=4) nb = self.update_Metadata(nb) try: with codecs.open(outfilename, "w", "utf-8") as f: self.writer.output = nbformat.writes(nb, version=4) f.write(self.writer.output) except (IOError, OSError) as err: self.warn("error writing file %s: %s" % (outfilename, err)) ### executing downloaded notebooks if (self.config['jupyter_download_nb_execute']): strDocname = str(docname) if strDocname in self.download_execution_vars['dependency_lists'].keys(): self.download_execution_vars['delayed_notebooks'].update({strDocname: nb}) else: self._execute_notebook_class.execute_notebook(self, nb, docname, self.download_execution_vars, self.download_execution_vars['futures']) ### output notebooks for executing self.writer._set_ref_urlpath(None) self.writer._set_jupyter_download_nb_image_urlpath(None) self.writer.write(doctree, destination) # get a NotebookNode object from a string nb = nbformat.reads(self.writer.output, as_version=4) nb = self.update_Metadata(nb) ### execute the notebook if (self.config["jupyter_execute_notebooks"]): strDocname = str(docname) if strDocname in self.execution_vars['dependency_lists'].keys(): self.execution_vars['delayed_notebooks'].update({strDocname: nb}) else: self._execute_notebook_class.execute_notebook(self, nb, docname, self.execution_vars, self.execution_vars['futures']) else: #do not execute if (self.config['jupyter_generate_html']): language_info = nb.metadata.kernelspec.language self._convert_class = convertToHtmlWriter(self) self._convert_class.convert(nb, docname, language_info, self.outdir) ### mkdir if the directory does not exist outfilename = os.path.join(self.outdir, os_path(docname) + self.out_suffix) ensuredir(os.path.dirname(outfilename)) try: with codecs.open(outfilename, "w", "utf-8") as f: self.writer.output = nbformat.writes(nb, version=4) f.write(self.writer.output) except (IOError, OSError) as err: self.logger.warning("error writing file %s: %s" % (outfilename, err))
def add_toctree(app, docname, source): # If no globaltoc is given, we'll skip this part if not app.config["globaltoc_path"]: return # First check whether this page has any descendants # If so, then we'll manually add them as a toctree object path = app.env.doc2path(docname, base=None) toc = app.config["globaltoc"] page = find_name(toc, _no_suffix(path)) # If we didn't find this page in the TOC, raise an error if page is None: raise FileNotFoundError( f"The following path in your table of contents couldn't be found:\n\n{path}.\n\nDouble check your `_toc.yml` file to make sure the paths are correct." ) # If we have no sections, then don't worry about a toctree sections = [(ii.get("file"), ii.get("name")) for ii in page.get("pages", [])] if len(sections) == 0: return for ii, (path_sec, name) in enumerate(sections): # Update path so it is relative to the root of the parent path_parent_folder = Path(page["file"]).parent path_sec = os.path.relpath(path_sec, path_parent_folder) # Decide whether we'll over-ride with a name in the toctree this_section = f"{path_sec}" if name: this_section = f"{name} <{this_section}>" sections[ii] = this_section # Parse flags in the page metadata options = [] if page.get("numbered"): options.append("numbered") options = "\n".join([f":{ii}:" for ii in options]) # Figure out what kind of text defines a toctree directive for this file # currently, assumed to be markdown suff = Path(path).suffix toctree_text = dedent(""" ```{{toctree}} :hidden: :titlesonly: {options} {sections} ``` """) # Create the markdown directive for our toctree toctree = toctree_text.format(options=options, sections="\n".join(sections)) if suff == ".md": source[0] += toctree + "\n" elif suff == ".ipynb": # Lazy import nbformat because we only need it if we have an ipynb file import nbformat as nbf ntbk = nbf.reads(source[0], nbf.NO_CONVERT) md = nbf.v4.new_markdown_cell(toctree) ntbk.cells.append(md) source[0] = nbf.writes(ntbk) else: raise ValueError("Only markdown and ipynb files are supported.")
def create_notebook_checkpoint(self, nb, path): content = nbformat.writes(nb) return self.create_file_checkpoint(content, 'text', path)
def write_nb(nb, nb_path, mode='w'): with open(nb_path, mode) as f: f.write(nbformat.writes(nbformat.from_dict(nb), version=4))
def save(self): outfilename = self.filename # don't specify output version for now new_contents = nbformat.writes(self.notebook) + "\n" if replace_file_with_string(outfilename, new_contents): print(f"{self.name} saved into {outfilename}")
import nbformat from sys import argv # TODO: if name main stuff for usage and stderr file_input = argv[1] with open(file_input, 'r', encoding='utf-8') as f: nb = nbformat.read(f, as_version=4) # slides of type slide are white listed def is_assigned(c): if hasattr(c.metadata, 'slideshow'): if c.metadata.slideshow.slide_type == 'slide': return True else: return False else: return False new_cells = list(filter(is_assigned, nb['cells'])) nb['cells'] = new_cells print(nbformat.writes(nb))
def writes_base64(nb, version=NBFORMAT_VERSION): """ Write a notebook as base64. """ return b64encode(writes(nb, version=version).encode('utf-8')).decode()
def dumps(self, nb): return nbformat.writes(nb, _NBFORMAT_VERSION)