def post(self, path, categories): filename, filetype = os.path.splitext(os.path.basename(path)) post = None if filetype == '.ipynb': jake_notebook = nbformat.reads(open(path, 'r').read(), as_version=4) mark = MarkdownExporter() content, _ = mark.from_notebook_node(jake_notebook) # check title if len(jake_notebook.cells) >= 1: source = str(jake_notebook.cells[0].source) if source.startswith('- '): s = yaml.load(source) res = {} [res.update(i) for i in s] title = res.get("title", filename) tags = res.get("tags", '') tmp_categories = categories or res.get("category", '').split(',') tmp_categories = categories del jake_notebook.cells[0] content, _ = mark.from_notebook_node(jake_notebook) post = Post( title=title, description=content, mt_keywords=tags, categories=tmp_categories, ) post = post or Post( title=self.name_convent(filename), description=content, categories=categories, ) elif filetype == '.md': content = open(path, 'r').read() post = Post( title=self.name_convent(filename), description=content, categories=categories, ) else: print("error {}".format(filetype)) return self.typecho.new_post(post, publish=True)
def from_file(self, filename): import nbformat from nbconvert import MarkdownExporter from jinja2 import DictLoader from traitlets.config import Config c = Config() # c.ExtractOutputPreprocessor.extract_output_types = set() c.ExtractOutputPreprocessor.output_filename_template = 'images/{unique_key}_{cell_index}_{index}{extension}' c.NbConvertBase.display_data_priority = ['application/javascript', 'text/html', 'text/markdown', 'image/svg+xml', 'text/latex', 'image/png', 'image/jpeg', 'text/plain'] nb = nbformat.read(filename, as_version=4) dl = DictLoader({'full.tpl': TEMPLATE}) md_exporter = MarkdownExporter(config=c, extra_loaders=[ dl], template_file='full.tpl') (body, resources) = md_exporter.from_notebook_node(nb) self.kp.write(body, images={name.split( 'images/')[1]: data for name, data in resources.get('outputs', {}).items()}) # Add cleaned ipynb file for cell in nb['cells']: if cell['cell_type'] == 'code': cell['outputs'] = [] # remove output data cell['execution_count'] = None # reset to not executed self.kp.write_src(os.path.basename(filename), nbformat.writes(nb))
def convert(self): """Converts a Jupyter notebook for use in Journal. Specifically, this function: """ import nbformat from traitlets.config import Config from nbconvert import MarkdownExporter notebook = nbformat.read(self.filepath, as_version=4) # Determine the static folder path and configure the Config c = Config() c.ExtractOutputPreprocessor.output_filename_template = path.join( '/images', 'team', config['username'], self.post_slug, '{unique_key}_{cell_index}_{index}{extension}') exporter = MarkdownExporter(config=c, raw_template=IPYNB_TEMPLATE) post, images = exporter.from_notebook_node(notebook) for image_path, content in images['outputs'].items(): image_name = path.basename(image_path) self.save_image(image_name, content) new_filename = '{}.md'.format(self.post_slug) post_path = generate_post_path(new_filename) click.secho('Saving post content to {}'.format(post_path), fg='green') with open(post_path, 'w') as output: output.write(post) return post_path
def notebook_to_markdown(path: Union[Path, str]) -> str: """ Convert jupyter notebook to hugo-formatted markdown string Args: path: path to notebook Returns: hugo-formatted markdown """ # first, update the notebook's metadata update_notebook_metadata(path) with open(Path(path)) as fp: notebook = nbformat.read(fp, as_version=4) assert 'front-matter' in notebook[ 'metadata'], "You must have a front-matter field in the notebook's metadata" front_matter_dict = dict(notebook['metadata']['front-matter']) front_matter = json.dumps(front_matter_dict, indent=2) c = Config() c.MarkdownExporter.preprocessors = [CustomPreprocessor] markdown_exporter = MarkdownExporter(config=c) markdown, _ = markdown_exporter.from_notebook_node(notebook) doctored_md = doctor(markdown) # added <!--more--> comment to prevent summary creation output = '\n'.join( ('---', front_matter, '---', '<!--more-->', doctored_md)) return output
def convert_notebook_to_markdown(filename): global ROOT_DIR with open(filename, 'r') as f: notebook = nbformat.read(f, 4) # Metadata 설정하기 fname = os.path.splitext(os.path.split(filename)[1])[0] metadata = [name.strip() for name in fname.split('|')] notebook.metadata['title'] = metadata[0] if len(metadata) >= 2: notebook.metadata['categories'] = metadata[1].strip() if len(metadata) >= 3: notebook.metadata['tags'] = metadata[2].strip() curr_time = datetime.now() - timedelta(days=1) notebook.metadata['modified_date'] = str(curr_time)[:-7] exporter = MarkdownExporter() exporter.template_file = os.path.join(ROOT_DIR, 'jekyll.tpl') body, resource = exporter.from_notebook_node(notebook) curr_date = curr_time.strftime("%Y-%m-%d") markdown_path = os.path.join(ROOT_DIR, "_posts", curr_date + "-" + metadata[0] + ".md") with open(markdown_path, "w") as f: f.write(body)
def to_md(self): new_dir = self.path.stem.replace(' ', '_') + '_files' images_home = self.nb_home / new_dir if images_home.is_dir(): shutil.rmtree(images_home) images_home.mkdir() resources = { "metadata": { "path": str(self.nb_home) }, 'output_files_dir': str(new_dir) } # This is relative to the above path me = MarkdownExporter(config={ "NbConvertBase": { "display_data_priority": self.DATA_DISPLAY_PRIORITY } }) md_data, output_resources = me.from_notebook_node(self.nb, resources) # the base64 encoded binary files are saved in output_resources for filename, data in output_resources["outputs"].items(): with open(self.nb_home / filename, "wb") as f: f.write(data) fn = self.path.with_suffix(".md") with open(fn, mode="w") as f: f.write(md_data)
def render_notebook(to_render): os.chdir(BASE_DIR) nb_dir = basename(dirname(to_render)) markdown_file = os.path.join(OUTPUT_DIR, nb_dir + ".md") resources_dir = os.path.join(OUTPUT_DIR, nb_dir) if os.path.isdir(resources_dir): shutil.rmtree(resources_dir) os.makedirs(resources_dir) with open(to_render) as input: notebook = nbformat.reads(input.read(), as_version=4) front_matter = "---\n" m = re.search(r'# *(.*)\n', notebook.cells[0].source, re.M) title = m.group(1) front_matter += f"title: \"{title}\"\n" notebook.cells[0].source = notebook.cells[0].source.replace(m.group(0), "") publish_date = nb_dir[:10] front_matter += f"date: {publish_date}\n" front_matter += "---\n" inline_math = re.compile(r'(?:[^\$]|^)\$[^\$]+\$(?:[^\$]|$)') multiline_math = re.compile(r'\$\$[^\$]+\$\$') for i in range(len(notebook.cells)): cell = notebook.cells[i] if not cell['cell_type'] == 'markdown': continue source = cell['source'] inlines = inline_math.findall(source) for inline in inlines: r = inline.replace(r"\\", r"\\\\\\\\") r = r.replace("_", r"\_") source = source.replace(inline, r) multilines = multiline_math.findall(source) for multiline in multilines: r = multiline.replace(r"\\", r"\\\\\\\\") r = r.replace("_", r"\_") source = source.replace(multiline, r) cell['source'] = source from nbconvert import MarkdownExporter md_exporter = MarkdownExporter() body, resources = md_exporter.from_notebook_node(notebook) files = resources['outputs'] for filename in files: p = os.path.join(resources_dir, filename) with open(p, "wb") as f: f.write(files[filename]) with open(markdown_file, "w") as output: output.write(front_matter) output.write(body)
def convert_nb_to_markdown( nb_contents: nbformat.notebooknode.NotebookNode) -> list: """Convert a Jupyter notebook to Markdown.""" md_exporter = MarkdownExporter() nbc_out: str nbc_out, _ = md_exporter.from_notebook_node(nb_contents) return nbc_out.splitlines()
def convertNotebook(notebookPath, modulePath): with open(notebookPath) as fh: nb = nbformat.reads(fh.read(), nbformat.NO_CONVERT) exporter = MarkdownExporter() source, meta = exporter.from_notebook_node(nb) with open(modulePath, "w+") as fh: fh.writelines(str(source))
def load_file(ipynb_file): with open(ipynb_file) as f: nb = nbformat.reads(f.read(), as_version=4) exporter = MarkdownExporter() body, resources = exporter.from_notebook_node(nb) body = body.strip() if body.startswith('#'): # if md file starts with title remove it body = '\n'.join(body.split('\n')[1:]) return body, resources['outputs']
def convert_to_markdown(filename): with open(filename, mode='r') as file: notebook_str = file.read() notebook = nbformat.reads(notebook_str, as_version=4) markdown_exporter = MarkdownExporter() (body, resources) = markdown_exporter.from_notebook_node(notebook) return (body, resources)
def markdown(): executor = preprocessors.ExecutePreprocessor() exporter = MarkdownExporter() for notebook in get_files("ipynb"): with open(notebook, encoding="utf8") as nb_file: nb = nbformat.read(nb_file, as_version=4) executor.preprocess(nb) if not nb.cells[-1]["source"]: nb.cells.pop() markdown, _ = exporter.from_notebook_node(nb) with open(f"{notebook.stem}.md", "w", encoding="utf8") as writable: writable.write(markdown)
def convert(notebook_fname, strip_code=False): root = '..' basename, ext = os.path.splitext(notebook_fname) title = basename[11:].replace('-', ' ') mathjax = '<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML" type="text/javascript"></script>' header = [ '---' ,'title: ' + title ,'layout: post' ,'---' ,'' ,mathjax ,'' ] asset_dir = '/assets/posts/' + basename asset_realpath = root + asset_dir + '/' if not os.path.exists(asset_realpath): os.makedirs(asset_realpath) post_dir = '/_posts' post_realpath = root + post_dir notebook = nbformat.read(notebook_fname, as_version=4) if strip_code: notebook['cells'] = [strip_code_source(c) for c in notebook['cells']] exporter = MarkdownExporter() body, resources = exporter.from_notebook_node(notebook) if strip_code: body = body.replace('```python\n\n```', '') output_paths = {k: asset_dir + '/' + k for k in resources['outputs'].keys()} if output_paths: newbody = replace_by_dict(body, output_paths) markdown_lines = header + newbody.split('\n') else: markdown_lines = header + body.split('\n') for fname, data in resources['outputs'].items(): with open(asset_realpath + fname, 'wb') as f: f.write(data) markdown_fname = basename + '.md' with open(markdown_fname, 'w') as mdfile: mdfile.write('\n'.join(markdown_lines)) shutil.move(markdown_fname, post_realpath + '/' + markdown_fname)
def notebook_to_markdown( path, date, slug, **kwargs ): """ Convert notebook to Markdown format Args: path: str, path to notebook date: datestring in YYYY-MM-DD format slug: str, front-matter parameter, used to compose adress of blogpost kwargs: str, float, int, list, tuple, other front-matter parameters recommended to pass title """ path_nb = Path(path) path_out = path_nb.parents[1] / 'static'/ date.split('-')[0] / date.split('-')[1] / slug path_post = path_nb.parents[1] / 'content/post/' / ( date + '-' + slug + '.md' ) assert path_nb.exists() assert path_post.parent.exists() assert bool( re.match('[0-9]{4}-[0-1][0-9]-[0-3][0-9]', date) ), 'Incorrect date format, need YYYY-MM-DD' # convert notebook to .md---------------------------------------------------- with Path(path).open() as fp: notebook = nbformat.read(fp, as_version=4) c = Config() c.MarkdownExporter.preprocessors = [CustomPreprocessor] markdown_exporter = MarkdownExporter(config=c) markdown, resources = markdown_exporter.from_notebook_node(notebook) md = doctor(markdown) yaml = make_yaml_header( date = date , slug = slug , mathjax= 'ture' , **kwargs) md = yaml + md with path_post.open('w') as f: f.write(md) # write outputs as png -------------------------------------------------------- if 'outputs' in resources.keys(): if not path_out.exists(): path_out.mkdir(parents=True) for key in resources['outputs'].keys(): with (path_out / key).open('wb') as f: f.write( resources['outputs'][key] )
def convert_notebook(name, notebook, output_dir): c = Config() c.ExtractOutputPreprocessor.output_filename_template = name + '/{unique_key}_{cell_index}_{index}{extension}' md_exporter = MarkdownExporter(config=c) py_exporter = PythonExporter() (body, _) = py_exporter.from_notebook_node(notebook) with open(output_dir / f'{name}.py', 'w') as fp: fp.write(body) (body, resources) = md_exporter.from_notebook_node(notebook) with open(output_dir / f'{name}.md', 'w') as fp: fp.write(post_process_markdown(body)) for resource, value in resources['outputs'].items(): resource_path = output_dir / resource resource_path.parent.mkdir(exist_ok=True, parents=True) with open(resource_path, 'wb') as fp: fp.write(value)
def convert(notebook_fname): root = '..' basename, ext = os.path.splitext(notebook_fname) title = basename[11:].replace('-', ' ') header = [ "---" ,"title: " + title ,"layout: post" ,"---" ] asset_dir = '/assets/posts/' + basename asset_realpath = root + asset_dir + '/' if not os.path.exists(asset_realpath): os.makedirs(asset_realpath) post_dir = '/_posts' post_realpath = root + post_dir strip_code = True notebook = nbformat.read(notebook_fname, as_version=4) if strip_code: notebook['cells'] = [strip_code_source(c) for c in notebook['cells']] exporter = MarkdownExporter() body, resources = exporter.from_notebook_node(notebook) if strip_code: body = body.replace('```python\n\n```', '') output_paths = {k: asset_dir + '/' + k for k in resources['outputs'].keys()} newbody = replace_by_dict(body, output_paths) markdown_lines = header + newbody.split('\n') for fname, data in resources['outputs'].items(): with open(asset_realpath + fname, 'wb') as f: f.write(data) markdown_fname = basename + '.md' with open(markdown_fname, 'w') as mdfile: mdfile.write('\n'.join(markdown_lines)) shutil.move(markdown_fname, post_realpath + '/' + markdown_fname)
def prepare_readme(): """Convert notebook to markdown and write it to the readme.""" from nbconvert import MarkdownExporter import nbformat print('reading notebook') notebook = nbformat.reads( Path('graphql_example/graphql_example.ipynb').read_text(), as_version=4) print('converting notebook to RST') exporter = MarkdownExporter() body, *_ = exporter.from_notebook_node(notebook) print('writing to RST to readme') with open('README.md', 'w') as readme: readme.write(body) print('success')
class NotebookConverter(BasePlugin): def __init__(self): self.exporter = MarkdownExporter() def can_load(self, path): return path.lower().endswith( '.ipynb') and not 'ipynb_checkpoints' in path.lower() def on_config(self, config, **kwargs): config['extra_javascript'].append( 'https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.2/MathJax.js?config=TeX-MML-AM_CHTML' ) def on_page_read_source(self, something, **kwargs): page = kwargs['page'] config = kwargs['config'] input_path = page.file.src_path if not self.can_load(input_path): return ipynb_path = os.path.join(config['docs_dir'], input_path) nb = nbformat.read(ipynb_path, as_version=4) # we'll place the supporting files alongside the final HTML stem = os.path.splitext(os.path.basename(input_path))[0] exporter_resources = {'output_files_dir': stem} (body, resources) = self.exporter.from_notebook_node( nb, resources=exporter_resources) # folder in site may not have been created yet, create it so that we # can drop the support files in there target_in_site = os.path.join(config['site_dir'], page.abs_url[1:]) os.makedirs(target_in_site, exist_ok=True) for output in resources['outputs'].keys(): path = os.path.join(target_in_site, '..', output) with open(path, 'wb') as f: f.write(resources['outputs'][output]) return body
def convert_notebook_to_presentation(notebook_path, markdown_path): notebook_filename = notebook_path with open(notebook_filename, encoding="utf8") as f: nb = nbformat.read(f, as_version=4) path = os.path.split(os.path.abspath(__file__))[0] c = Config() c.MarkdownExporter.preprocessors = [ChangeIbynbLink] # 2. Instantiate the exporter. We use the `basic` template for now; we'll get into more details # later about how to customize the exporter further. markdown_exporter = MarkdownExporter(config=c) markdown_exporter.template_file = os.path.join(path, 'hidecode.tplx') # 3. Process the notebook we loaded earlier (body, resources) = markdown_exporter.from_notebook_node(nb) writer = nbconvert.writers.FilesWriter() writer.write(body, resources, markdown_path)
def to_md(self): if self.first: preprocessors = self.get_preprocessors('md') self.create_images_dir() self.preprocess(preprocessors) me = MarkdownExporter(config={ 'NbConvertBase': { 'display_data_priority': self.DISPLAY_DATA_PRIORITY } }) md_data, self.resources = me.from_notebook_node( self.nb, self.resources) # the base64 encoded binary files are saved in output_resources for filename, data in self.resources['outputs'].items(): with open(self.final_nb_home / filename, 'wb') as f: f.write(data) fn = self.final_nb_home / (self.document_name + '.md') with open(fn, mode='w') as f: f.write(md_data) self.reset_resources()
def to_md(self): me = MarkdownExporter(config={ 'NbConvertBase': { 'display_data_priority': self.DISPLAY_DATA_PRIORITY } }) md_data, self.resources = me.from_notebook_node( self.nb, self.resources) # the base64 encoded binary files are saved in output_resources image_data_dict = { **self.resources['outputs'], **self.resources['image_data_dict'] } for filename, image_data in image_data_dict.items(): new_filename = str(Path(self.image_dir_name) / filename) new_filename = urllib.parse.quote(new_filename) md_data = md_data.replace(filename, new_filename) if self.web_app: self.return_data['md_data'] = md_data self.return_data['md_images'] = image_data_dict self.return_data['image_dir_name'] = self.image_dir_name else: image_dir = self.final_nb_home / self.image_dir_name if image_dir.is_dir(): shutil.rmtree(image_dir) image_dir.mkdir() for filename, value in image_data_dict.items(): with open(image_dir / filename, 'wb') as f: f.write(value) fn = self.final_nb_home / (self.document_name + '.md') with open(fn, mode='w') as f: f.write(md_data)
"ms.service: machine-learning\n" \ "ms.subservice: core\n" \ "ms.topic: conceptual\n" \ "ms.author: " + nb_metadata.get("ms.author") + "\n" \ "author: \n" \ "ms.reviewer: " + nb_metadata.get("ms.author") + "\n" \ "ms.date: 02/13/2020 \n" \ "---\n\n" content = nb_as_md[0] content = meta_string + content copy = (content, nb_as_md[1]) return copy # load nb and extract metadata nb = nbformat.read("model-register-and-deploy.ipynb", as_version=4) nb_metadata = parse_metadata(nb) # process and convert to .md config = Config() config.MarkdownExporter.preprocessors = [CustomPreprocessor] custom_exporter = MarkdownExporter(config=config) nb_as_md = custom_exporter.from_notebook_node(nb) # append msdocs metadata nb_as_md = build_append_md_metadata(nb_metadata, nb_as_md) with open("model-register-and-deploy.md", 'w') as f: f.write(nb_as_md[0])
def ipynb2md(readIn): """ Butchered from data8/textbook, URL below ... ... with intention to adapt and repurpose for Markdown content. https://github.com/data-8/textbook/blob/gh-pages/convert_notebooks_to_html_partial.py """ from nbformat import reads from nbconvert import MarkdownExporter from traitlets.config import Config imageDirectory = "img" # output notebook images here # interactLink = 'http://interact.syzygy.ca/jupyter/interact?repo=2017-Winterk&{paths}' # extracts images as separate files config = Config() config.MarkdownExporter.preprocessors = [ "nbconvert.preprocessors.ExtractOutputPreprocessor" ] # assign unique key to each image based on notebook name """ # assign unique key to each image based on notebook name extractOutputConfig = { 'unique_key': filename, 'output_files_dir': '/' + IMAGE_DIR } """ extractOutputConfig = { "unique_key": filename, "output_files_dir": "/" + imageDirectory, } nb = reads(readIn, 4) #! originally read `path` mdExporter = MarkdownExporter(config=config) md, resources = mdExporter.from_notebook_node(nb, resources=extractOutputConfig) """write out images, !! personalize this""" """ write out images to IMAGE_DIRECTORY, get image paths and to each assign a unique key by its respective notebook name, """ """ if not os.path.exists(IMAGE_DIRECTORY): os.makedirs(IMAGE_DIRECTORY) for relative_path, image in resources['outputs'].items(): image_name = relative_path.split('/')[-1] image_path = os.path.join(IMAGE_DIRECTORY, image_name) with open(image_path, 'wb') as image_file: image_file.write(image) """ ##! additional manual processing here! if not os.path.exists("img"): os.makedirs("img") for relPath, imgData in resources["outputs"].items(): imgName = relPath.split("/")[-1] # get filename imgPath = "{}/{}".format(imageDirectory, imgName) # build newpath with open(imgPath, "wb") as outImg: outImg.write(imgData) return md.encode("utf-8") # recover text from first index #! additional manual processing here! return md[0]
def on_files(self, files, config): logger = self._logger logger.info('nbconvert: plugin config=%s', pformat(self.config)) # deal with dirs config_file_dir = os.path.dirname(config['config_file_path']) input_dir = os.path.normpath(self.config['input_dir']) output_dir = os.path.realpath( os.path.join(config['docs_dir'], os.path.normpath(self.config['output_dir']))) if not os.path.isabs(input_dir): input_dir = os.path.realpath( os.path.join(config_file_dir, input_dir)) # glob match glob_recursive = self.config[ 'recursive'] if PYTHON_VERSION_MAJOR_MINOR >= '3.5' else False if glob_recursive: nb_paths_iter = iglob(os.path.join(config_file_dir, input_dir, '**', '*.ipynb'), recursive=True) else: nb_paths_iter = iglob( os.path.join(config_file_dir, input_dir, '*.ipynb')) # Exporter md_exporter = MarkdownExporter() # Converting for nb_path in nb_paths_iter: # Prepare output file/dir nb_dirname, nb_basename = os.path.split(nb_path) nb_basename_root, _ = os.path.splitext(nb_basename) nb_subdir = os.path.relpath(nb_dirname, input_dir) md_dir = os.path.join(output_dir, nb_subdir) md_basename = '{0}.md'.format(nb_basename_root) md_path = os.path.join(md_dir, md_basename) md_rel_dir = os.path.relpath(md_dir, config['docs_dir']) md_rel_path = os.path.join(md_rel_dir, md_basename) # logger.debug('nbconvert: markdown export %s => %s', nb_path, md_path) # run nbconvert with open(nb_path) as fp: nb_node = nbformat.read(fp, nbformat.NO_CONVERT) body, resources = md_exporter.from_notebook_node(nb_node) # save exported if not os.path.exists(md_dir): os.makedirs(md_dir) with open(md_path, 'w', encoding='UTF8') as fp: fp.write(body) file_obj = File(path=md_rel_path, src_dir=config['docs_dir'], dest_dir=config['site_dir'], use_directory_urls=config['use_directory_urls']) for resource_name, resource_data in resources['outputs'].items(): resource_src_dir = os.path.dirname(file_obj.abs_src_path) resource_src_path = os.path.join(resource_src_dir, resource_name) if not os.path.isdir(resource_src_dir): os.makedirs(resource_src_dir) with open(resource_src_path, 'wb') as fp: fp.write(resource_data) resource_dest_dir = os.path.dirname(file_obj.abs_dest_path) resource_dest_path = os.path.join(resource_dest_dir, resource_name) logger.debug( 'nbconvert: resource output(%dBytes): resource_name --> %s', len(resource_data), resource_dest_path) if not os.path.isdir(resource_dest_dir): os.makedirs(resource_dest_dir) with open(resource_dest_path, 'wb') as fp: fp.write(resource_data) logger.debug( 'nbconvert: add file object<abs_src_path=%s abs_dest_path=%s url=%s>', file_obj.abs_src_path, file_obj.abs_dest_path, file_obj.url) files.append(file_obj) return files
def render(which='notebooks', rm=False): path_notebooks = f'_{which}' # Clear the output directories path_markdown = f'_includes/notebooks' path_toc = f'_includes/notebooks/toc' path_output = f'assets/notebooks' if rm: shutil.rmtree(path_markdown) os.makedirs(path_markdown) os.makedirs(path_toc) shutil.rmtree(path_output) os.makedirs(path_output) # Create new output for fname in os.listdir(path_notebooks): fpath = os.path.join(path_notebooks, fname) # Ignore directories if not os.path.isfile(fpath): continue # Ignore non-notebooks name, ext = fname.split('.', 2) if not ext == 'ipynb': continue # Set the appropriate output location (note: this is relative # to the website root /) www_output_files_dir = ('assets/notebooks/%s_files' % name) path_output_files_dir = 'assets/notebooks/%s_files' % name # Render the notebook with open(fpath) as f: r = {'output_files_dir': www_output_files_dir} # Read the notebook nb = nbformat.read(f, as_version=4) md_exporter = MarkdownExporter() (body, resources) = md_exporter.from_notebook_node(nb, resources=r) # Handle output files if not os.path.exists(path_output_files_dir): os.makedirs(path_output_files_dir) for www_name, v in resources['outputs'].items(): # Save the output file to the correct location with open(www_name, 'wb') as fw: fw.write(v) # Replace output paths to get relative urls search = r'\b%s/(.*)\b' % www_output_files_dir replace = r'{{ "/%s/\1" | relative_url }}' % www_output_files_dir body = re.sub(search, replace, body) # Write markdown file outname = '%s.md' % name outpath = os.path.join(path_markdown, outname) with open(outpath, 'w') as fw: fw.write(body) # Construct the table of contents cmd = ("pandoc --template=_toc-template.md --toc -t markdown %s" % outpath) ps = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) output = ps.communicate()[0] tocpath = os.path.join(path_toc, outname) with open(tocpath, 'wb') as fw: fw.write(output)
#! /usr/bin/python # -*- coding: utf-8 -*- #get header from jupyter notebook import nbformat from nbconvert import MarkdownExporter with open("content/motivation.ipynb", "r") as f: raw = f.read() notebook = nbformat.reads(raw, as_version=4) #access cells notebook.cells[0] #convert md_exporter = MarkdownExporter() md = md_exporter.from_notebook_node(notebook) #then md can be used with the below code import markdown md = markdown.Markdown(extensions=["toc"]) with open("content/units/units.md", "r") as f: md.convert(f.read()) md.toc_tokens
class Formatter: def __init__(self, output_format, destination_mode): assert output_format in SUPPORTED_FORMATS, f"supported formats are {SUPPORTED_FORMATS}" assert ( destination_mode in SUPPORTED_DESTINATIONS_MODES ), f"supported destination modes are {SUPPORTED_DESTINATIONS_MODES}" self.read_encoding = "utf-8" self.write_encoding = "utf-8" self.format = output_format self.destination_mode = destination_mode if self.format == "pdf": pdf = PDFExporter() pdf.exclude_output_prompt = True pdf.exclude_input = True self.exporter = pdf elif self.format == "rst": self.exporter = RSTExporter() else: self.exporter = MarkdownExporter() def get_output_dir_base(self, file): isMirrorFolder = self.destination_mode == "mirror_folder" return f"converted/to_{self.format}/" if isMirrorFolder else "" def get_output_file_path(self, file): filePath = file.split("/")[0:-1] isMultiFilesFormat = self.format in ["rst", "md"] if isMultiFilesFormat: filePath.append(self.dest_file(file, withFormat=False)) return "/".join(filePath) + "/" if len(filePath) > 0 else "" def dst_folder(self, file): return self.get_output_dir_base(file) + self.get_output_file_path(file) def dest_file(self, file, withFormat=True): return file.split("/")[-1].replace( ".ipynb", "." + self.format if withFormat else "") def dst_path(self, file): return self.dst_folder(file) + self.dest_file(file) def convert(self, file): assert os.path.exists( file), f"this should not happen, path {file} must exist" body, resources = self.export(file) fw = FilesWriter() fw._makedir(self.dst_folder(file)) fw.build_directory = self.dst_folder(file) fw.write(body, resources, notebook_name=self.dest_file(file, withFormat=False)) def export(self, file): with open(file, "r", encoding=self.read_encoding) as f: nb = nbformat.read(f, as_version=4) body, resources = self.exporter.from_notebook_node(nb) return body, resources def needs_format(self, file): f_path = self.dst_path(file) if not os.path.exists(f_path): return True notebook_modified = os.stat(file).st_mtime formatted_modified = os.stat(f_path).st_mtime return notebook_modified > formatted_modified def save_figures(self, resources): if "outputs" not in resources: return for name, bytes_ in resources["outputs"]: print(f"name = {name}, bytes = {len(bytes_)}") for key, value in resources.items(): pass
class Formatter: def __init__(self, output): assert output in SUPPORTED_FORMATS, f"supported formats are {SUPPORTED_FORMATS}" self.read_encoding = "utf-8" self.write_encoding = "utf-8" self.format = output if self.format == "pdf": pdf = PDFExporter() pdf.exclude_output_prompt = True pdf.exclude_input = True self.exporter = pdf elif self.format == "rst": self.exporter = RSTExporter() else: self.exporter = MarkdownExporter() def convert(self, file): assert os.path.exists( file), f"this should not happen, path {file} must exist" body, resources = self.export(file) fw = FilesWriter() fw.build_directory = os.path.dirname(file) f_name = os.path.basename(file).replace(".ipynb", "") fw.write(body, resources, notebook_name=f_name) def dst_path(self, file): return file.replace(".ipynb", f".{self.format}") def export(self, file): with open(file, "r", encoding=self.read_encoding) as f: nb = nbformat.read(f, as_version=4) body, resources = self.exporter.from_notebook_node(nb) return self.replace_image_names(body, resources, file) def replace_image_names(self, body, resources, file): names = self._get_output_names(resources) if not names: return body, resources f_name = os.path.basename(file).replace(".ipynb", "") new_outputs = {} for i, old_key in enumerate(names): _, image_extension = os.path.splitext(old_key) output_name = f"{f_name}_{i}{image_extension}" new_outputs[output_name] = resources["outputs"][old_key] body = body.replace(old_key, output_name) resources["outputs"] = new_outputs return body, resources def _get_output_names(self, resources): """'outputs' may be empty or contain a string. Ask forgiveness, not permission.""" try: return resources["outputs"].keys() except Exception: return [] def needs_format(self, file): f_path = self.dst_path(file) if not os.path.exists(f_path): return True notebook_modified = os.stat(file).st_mtime formatted_modified = os.stat(f_path).st_mtime return notebook_modified > formatted_modified def save_figures(self, resources): if "outputs" not in resources: return for name, bytes_ in resources["outputs"]: print(f"name = {name}, bytes = {len(bytes_)}") for key, value in resources.items(): pass