def export_notebook(notebook_filename, resources): """Step 2: Export the notebook Exports the notebook to a particular format according to the specified exporter. This function returns the output and (possibly modified) resources from the exporter. Parameters ---------- notebook_filename : str name of notebook file. resources : dict Returns ------- output dict resources (possibly modified) """ config = Config() basePath = os.path.dirname(__file__) exporter = MarkdownExporter( config=config, template_path=[os.path.join(basePath, 'templates/')], template_file='Jekyll_template.tpl', filters={'jekyllpath': jekyllpath}) content, resources = exporter.from_filename(notebook_filename, resources=resources) content = parse_html(content) return content, resources
def notebook_to_markdown(path: Union[Path, str]) -> str: """ Convert jupyter notebook to hugo-formatted markdown string Args: path: path to notebook Returns: hugo-formatted markdown """ # first, update the notebook's metadata update_notebook_metadata(path) with open(Path(path)) as fp: notebook = nbformat.read(fp, as_version=4) assert 'front-matter' in notebook[ 'metadata'], "You must have a front-matter field in the notebook's metadata" front_matter_dict = dict(notebook['metadata']['front-matter']) front_matter = json.dumps(front_matter_dict, indent=2) c = Config() c.MarkdownExporter.preprocessors = [CustomPreprocessor] markdown_exporter = MarkdownExporter(config=c) markdown, _ = markdown_exporter.from_notebook_node(notebook) doctored_md = doctor(markdown) # added <!--more--> comment to prevent summary creation output = '\n'.join( ('---', front_matter, '---', '<!--more-->', doctored_md)) return output
def nb2md(self, include_front_matter=True): for name, notebook in zip(self.names, self.notebooks): """convert notebook to Markdown and write out Markdown to MARKDOWN_DIRECTORY""" markdown, resources = MarkdownExporter().from_notebook_node( notebook["notebook"]) post_paths = os.listdir(MARKDOWN_DIRECTORY) for path in post_paths: if path.split("-")[-1].split(".")[0] == name: markdown_path = os.path.join(MARKDOWN_DIRECTORY, path) print("Updating", path.split("-")[-1], "...") break else: markdown_path = os.path.join( MARKDOWN_DIRECTORY, notebook["date_modified"] + "-" + name + ".md", ) if len(post_paths) == 0: markdown_path = os.path.join( MARKDOWN_DIRECTORY, notebook["date_created"] + "-" + name + ".md") if not os.path.exists(MARKDOWN_DIRECTORY): os.makedirs(MARKDOWN_DIRECTORY) with open(markdown_path, "wb") as markdown_file: if include_front_matter: self._add_front_matter(name, notebook, markdown_file) markdown_file.write(markdown.encode("utf-8"))
def get_output_resources(self, output_path=None): ''' Markdown Exporter ''' if output_path is None: output_path = os.path.join(os.path.sep, "assets", "images", self.type, self.notebook_name) try: # We are creating nested directories, hence os.makedirs(self.blogpath + output_path) except: pass self.output_path = self.blogpath + "/" + output_path md = MarkdownExporter() # Extract config dictionary nbapp = nbconvertapp.NbConvertApp() self.config = nbapp.init_single_notebook_resources(self.notebook_path) self.config['output_files_dir'] = output_path ''' Of type: {'config_dir': '/Users/Ankivarun/.jupyter', \ 'unique_key': self.notebook_name,\ 'output_files_dir': output_path} ''' self.output, self.resources = md.from_filename(self.notebook_path, self.config)
def convert_notebook_to_markdown(filename): global ROOT_DIR with open(filename, 'r') as f: notebook = nbformat.read(f, 4) # Metadata 설정하기 fname = os.path.splitext(os.path.split(filename)[1])[0] metadata = [name.strip() for name in fname.split('|')] notebook.metadata['title'] = metadata[0] if len(metadata) >= 2: notebook.metadata['categories'] = metadata[1].strip() if len(metadata) >= 3: notebook.metadata['tags'] = metadata[2].strip() curr_time = datetime.now() - timedelta(days=1) notebook.metadata['modified_date'] = str(curr_time)[:-7] exporter = MarkdownExporter() exporter.template_file = os.path.join(ROOT_DIR, 'jekyll.tpl') body, resource = exporter.from_notebook_node(notebook) curr_date = curr_time.strftime("%Y-%m-%d") markdown_path = os.path.join(ROOT_DIR, "_posts", curr_date + "-" + metadata[0] + ".md") with open(markdown_path, "w") as f: f.write(body)
def to_md(self): new_dir = self.path.stem.replace(' ', '_') + '_files' images_home = self.nb_home / new_dir if images_home.is_dir(): shutil.rmtree(images_home) images_home.mkdir() resources = { "metadata": { "path": str(self.nb_home) }, 'output_files_dir': str(new_dir) } # This is relative to the above path me = MarkdownExporter(config={ "NbConvertBase": { "display_data_priority": self.DATA_DISPLAY_PRIORITY } }) md_data, output_resources = me.from_notebook_node(self.nb, resources) # the base64 encoded binary files are saved in output_resources for filename, data in output_resources["outputs"].items(): with open(self.nb_home / filename, "wb") as f: f.write(data) fn = self.path.with_suffix(".md") with open(fn, mode="w") as f: f.write(md_data)
def from_file(self, filename): import nbformat from nbconvert import MarkdownExporter from jinja2 import DictLoader from traitlets.config import Config c = Config() # c.ExtractOutputPreprocessor.extract_output_types = set() c.ExtractOutputPreprocessor.output_filename_template = 'images/{unique_key}_{cell_index}_{index}{extension}' c.NbConvertBase.display_data_priority = ['application/javascript', 'text/html', 'text/markdown', 'image/svg+xml', 'text/latex', 'image/png', 'image/jpeg', 'text/plain'] nb = nbformat.read(filename, as_version=4) dl = DictLoader({'full.tpl': TEMPLATE}) md_exporter = MarkdownExporter(config=c, extra_loaders=[ dl], template_file='full.tpl') (body, resources) = md_exporter.from_notebook_node(nb) self.kp.write(body, images={name.split( 'images/')[1]: data for name, data in resources.get('outputs', {}).items()}) # Add cleaned ipynb file for cell in nb['cells']: if cell['cell_type'] == 'code': cell['outputs'] = [] # remove output data cell['execution_count'] = None # reset to not executed self.kp.write_src(os.path.basename(filename), nbformat.writes(nb))
def convert(self): """Converts a Jupyter notebook for use in Journal. Specifically, this function: """ import nbformat from traitlets.config import Config from nbconvert import MarkdownExporter notebook = nbformat.read(self.filepath, as_version=4) # Determine the static folder path and configure the Config c = Config() c.ExtractOutputPreprocessor.output_filename_template = path.join( '/images', 'team', config['username'], self.post_slug, '{unique_key}_{cell_index}_{index}{extension}') exporter = MarkdownExporter(config=c, raw_template=IPYNB_TEMPLATE) post, images = exporter.from_notebook_node(notebook) for image_path, content in images['outputs'].items(): image_name = path.basename(image_path) self.save_image(image_name, content) new_filename = '{}.md'.format(self.post_slug) post_path = generate_post_path(new_filename) click.secho('Saving post content to {}'.format(post_path), fg='green') with open(post_path, 'w') as output: output.write(post) return post_path
def render_notebook(to_render): os.chdir(BASE_DIR) nb_dir = basename(dirname(to_render)) markdown_file = os.path.join(OUTPUT_DIR, nb_dir + ".md") resources_dir = os.path.join(OUTPUT_DIR, nb_dir) if os.path.isdir(resources_dir): shutil.rmtree(resources_dir) os.makedirs(resources_dir) with open(to_render) as input: notebook = nbformat.reads(input.read(), as_version=4) front_matter = "---\n" m = re.search(r'# *(.*)\n', notebook.cells[0].source, re.M) title = m.group(1) front_matter += f"title: \"{title}\"\n" notebook.cells[0].source = notebook.cells[0].source.replace(m.group(0), "") publish_date = nb_dir[:10] front_matter += f"date: {publish_date}\n" front_matter += "---\n" inline_math = re.compile(r'(?:[^\$]|^)\$[^\$]+\$(?:[^\$]|$)') multiline_math = re.compile(r'\$\$[^\$]+\$\$') for i in range(len(notebook.cells)): cell = notebook.cells[i] if not cell['cell_type'] == 'markdown': continue source = cell['source'] inlines = inline_math.findall(source) for inline in inlines: r = inline.replace(r"\\", r"\\\\\\\\") r = r.replace("_", r"\_") source = source.replace(inline, r) multilines = multiline_math.findall(source) for multiline in multilines: r = multiline.replace(r"\\", r"\\\\\\\\") r = r.replace("_", r"\_") source = source.replace(multiline, r) cell['source'] = source from nbconvert import MarkdownExporter md_exporter = MarkdownExporter() body, resources = md_exporter.from_notebook_node(notebook) files = resources['outputs'] for filename in files: p = os.path.join(resources_dir, filename) with open(p, "wb") as f: f.write(files[filename]) with open(markdown_file, "w") as output: output.write(front_matter) output.write(body)
def convert_nb_to_markdown( nb_contents: nbformat.notebooknode.NotebookNode) -> list: """Convert a Jupyter notebook to Markdown.""" md_exporter = MarkdownExporter() nbc_out: str nbc_out, _ = md_exporter.from_notebook_node(nb_contents) return nbc_out.splitlines()
def __init__(self, template, root): self.root = root # self.exporter = HTMLExporter() self.mexporter = MarkdownExporter() self.markdown = Markdown(template, root) # self.exporter.template_name = 'base' # self.exporter.theme = "light" # self.anchor_link_text = "" self.template = template
def load_file(ipynb_file): with open(ipynb_file) as f: nb = nbformat.reads(f.read(), as_version=4) exporter = MarkdownExporter() body, resources = exporter.from_notebook_node(nb) body = body.strip() if body.startswith('#'): # if md file starts with title remove it body = '\n'.join(body.split('\n')[1:]) return body, resources['outputs']
def convertNotebook(notebookPath, modulePath): with open(notebookPath) as fh: nb = nbformat.reads(fh.read(), nbformat.NO_CONVERT) exporter = MarkdownExporter() source, meta = exporter.from_notebook_node(nb) with open(modulePath, "w+") as fh: fh.writelines(str(source))
def generate_clrs(): clrs_path = Path('docs/CLRS') nb_files = sorted(clrs_path.glob('**/*.ipynb')) for nb_file in nb_files: md_exporter = MarkdownExporter() with open(nb_file) as f: markdown, _ = md_exporter.from_file(f) title = nb_file.stem md_file = nb_file.parent / f'{title}.md' print(md_file) md_file.write_text(markdown)
def convert_to_markdown(filename): with open(filename, mode='r') as file: notebook_str = file.read() notebook = nbformat.reads(notebook_str, as_version=4) markdown_exporter = MarkdownExporter() (body, resources) = markdown_exporter.from_notebook_node(notebook) return (body, resources)
def markdown(): executor = preprocessors.ExecutePreprocessor() exporter = MarkdownExporter() for notebook in get_files("ipynb"): with open(notebook, encoding="utf8") as nb_file: nb = nbformat.read(nb_file, as_version=4) executor.preprocess(nb) if not nb.cells[-1]["source"]: nb.cells.pop() markdown, _ = exporter.from_notebook_node(nb) with open(f"{notebook.stem}.md", "w", encoding="utf8") as writable: writable.write(markdown)
def notebook_to_markdown( path, date, slug, **kwargs ): """ Convert notebook to Markdown format Args: path: str, path to notebook date: datestring in YYYY-MM-DD format slug: str, front-matter parameter, used to compose adress of blogpost kwargs: str, float, int, list, tuple, other front-matter parameters recommended to pass title """ path_nb = Path(path) path_out = path_nb.parents[1] / 'static'/ date.split('-')[0] / date.split('-')[1] / slug path_post = path_nb.parents[1] / 'content/post/' / ( date + '-' + slug + '.md' ) assert path_nb.exists() assert path_post.parent.exists() assert bool( re.match('[0-9]{4}-[0-1][0-9]-[0-3][0-9]', date) ), 'Incorrect date format, need YYYY-MM-DD' # convert notebook to .md---------------------------------------------------- with Path(path).open() as fp: notebook = nbformat.read(fp, as_version=4) c = Config() c.MarkdownExporter.preprocessors = [CustomPreprocessor] markdown_exporter = MarkdownExporter(config=c) markdown, resources = markdown_exporter.from_notebook_node(notebook) md = doctor(markdown) yaml = make_yaml_header( date = date , slug = slug , mathjax= 'ture' , **kwargs) md = yaml + md with path_post.open('w') as f: f.write(md) # write outputs as png -------------------------------------------------------- if 'outputs' in resources.keys(): if not path_out.exists(): path_out.mkdir(parents=True) for key in resources['outputs'].keys(): with (path_out / key).open('wb') as f: f.write( resources['outputs'][key] )
def post(self, path, categories): filename, filetype = os.path.splitext(os.path.basename(path)) post = None if filetype == '.ipynb': jake_notebook = nbformat.reads(open(path, 'r').read(), as_version=4) mark = MarkdownExporter() content, _ = mark.from_notebook_node(jake_notebook) # check title if len(jake_notebook.cells) >= 1: source = str(jake_notebook.cells[0].source) if source.startswith('- '): s = yaml.load(source) res = {} [res.update(i) for i in s] title = res.get("title", filename) tags = res.get("tags", '') tmp_categories = categories or res.get("category", '').split(',') tmp_categories = categories del jake_notebook.cells[0] content, _ = mark.from_notebook_node(jake_notebook) post = Post( title=title, description=content, mt_keywords=tags, categories=tmp_categories, ) post = post or Post( title=self.name_convent(filename), description=content, categories=categories, ) elif filetype == '.md': content = open(path, 'r').read() post = Post( title=self.name_convent(filename), description=content, categories=categories, ) else: print("error {}".format(filetype)) return self.typecho.new_post(post, publish=True)
def __init__(self, output): assert output in SUPPORTED_FORMATS, f"supported formats are {SUPPORTED_FORMATS}" self.read_encoding = "utf-8" self.write_encoding = "utf-8" self.format = output if self.format == "pdf": pdf = PDFExporter() pdf.exclude_output_prompt = True pdf.exclude_input = True self.exporter = pdf elif self.format == "rst": self.exporter = RSTExporter() else: self.exporter = MarkdownExporter()
def convert_notebook(name, notebook, output_dir): c = Config() c.ExtractOutputPreprocessor.output_filename_template = name + '/{unique_key}_{cell_index}_{index}{extension}' md_exporter = MarkdownExporter(config=c) py_exporter = PythonExporter() (body, _) = py_exporter.from_notebook_node(notebook) with open(output_dir / f'{name}.py', 'w') as fp: fp.write(body) (body, resources) = md_exporter.from_notebook_node(notebook) with open(output_dir / f'{name}.md', 'w') as fp: fp.write(post_process_markdown(body)) for resource, value in resources['outputs'].items(): resource_path = output_dir / resource resource_path.parent.mkdir(exist_ok=True, parents=True) with open(resource_path, 'wb') as fp: fp.write(value)
def convert_notebooks(dirs): notebooks = [] for dir in dirs: for root, _, files in os.walk(dir): for file in files: if file.endswith('.ipynb'): notebooks.append(os.path.join(root, file)) exporter = MarkdownExporter() exporter.extra_template_basedirs.append(os.path.dirname(__file__)) exporter.template_name = 'ext_md' writer = writers.FilesWriter() for file in notebooks: convert_notebook(file, exporter=exporter, writer=writer)
def prepare_readme(): """Convert notebook to markdown and write it to the readme.""" from nbconvert import MarkdownExporter import nbformat print('reading notebook') notebook = nbformat.reads( Path('graphql_example/graphql_example.ipynb').read_text(), as_version=4) print('converting notebook to RST') exporter = MarkdownExporter() body, *_ = exporter.from_notebook_node(notebook) print('writing to RST to readme') with open('README.md', 'w') as readme: readme.write(body) print('success')
def __init__(self, output_format, destination_mode): assert output_format in SUPPORTED_FORMATS, f"supported formats are {SUPPORTED_FORMATS}" assert ( destination_mode in SUPPORTED_DESTINATIONS_MODES ), f"supported destination modes are {SUPPORTED_DESTINATIONS_MODES}" self.read_encoding = "utf-8" self.write_encoding = "utf-8" self.format = output_format self.destination_mode = destination_mode if self.format == "pdf": pdf = PDFExporter() pdf.exclude_output_prompt = True pdf.exclude_input = True self.exporter = pdf elif self.format == "rst": self.exporter = RSTExporter() else: self.exporter = MarkdownExporter()
def convert_notebook_to_presentation(notebook_path, markdown_path): notebook_filename = notebook_path with open(notebook_filename, encoding="utf8") as f: nb = nbformat.read(f, as_version=4) path = os.path.split(os.path.abspath(__file__))[0] c = Config() c.MarkdownExporter.preprocessors = [ChangeIbynbLink] # 2. Instantiate the exporter. We use the `basic` template for now; we'll get into more details # later about how to customize the exporter further. markdown_exporter = MarkdownExporter(config=c) markdown_exporter.template_file = os.path.join(path, 'hidecode.tplx') # 3. Process the notebook we loaded earlier (body, resources) = markdown_exporter.from_notebook_node(nb) writer = nbconvert.writers.FilesWriter() writer.write(body, resources, markdown_path)
def on_config(self, config): c = Config() if self.config["execute"]: if self.config["preamble"]: default_preprocessors = MarkdownExporter.default_preprocessors.default_args[ 0] default_preprocessors.insert( default_preprocessors.index( "nbconvert.preprocessors.ExecutePreprocessor"), "nbconvert_utils.ExecuteWithPreamble", ) c.default_preprocessors = default_preprocessors c.ExecutePreprocessor.timeout = self.config["timeout"] c.ExecuteWithPreamble.enabled = True c.ExecuteWithPreamble.preamble_scripts = [ self.config["preamble"] ] else: c.Executor.enabled = True config["notebook_exporter"] = MarkdownExporter(config=c) return config
def to_md(self): if self.first: preprocessors = self.get_preprocessors('md') self.create_images_dir() self.preprocess(preprocessors) me = MarkdownExporter(config={ 'NbConvertBase': { 'display_data_priority': self.DISPLAY_DATA_PRIORITY } }) md_data, self.resources = me.from_notebook_node( self.nb, self.resources) # the base64 encoded binary files are saved in output_resources for filename, data in self.resources['outputs'].items(): with open(self.final_nb_home / filename, 'wb') as f: f.write(data) fn = self.final_nb_home / (self.document_name + '.md') with open(fn, mode='w') as f: f.write(md_data) self.reset_resources()
def export_md(jupyter_output, output_notebook, add_nunit_attachment, file_ext='.md', root="."): """ Export Jupyter Output to Markdown File :param jupyter_output: :param output_notebook: :param add_nunit_attachment: :param file_ext: :param root: """ markdown_exporter = MarkdownExporter() export_notebook(markdown_exporter, jupyter_output, output_notebook, add_nunit_attachment, file_ext, root=root)
def to_md(self): me = MarkdownExporter(config={ 'NbConvertBase': { 'display_data_priority': self.DISPLAY_DATA_PRIORITY } }) md_data, self.resources = me.from_notebook_node( self.nb, self.resources) # the base64 encoded binary files are saved in output_resources image_data_dict = { **self.resources['outputs'], **self.resources['image_data_dict'] } for filename, image_data in image_data_dict.items(): new_filename = str(Path(self.image_dir_name) / filename) new_filename = urllib.parse.quote(new_filename) md_data = md_data.replace(filename, new_filename) if self.web_app: self.return_data['md_data'] = md_data self.return_data['md_images'] = image_data_dict self.return_data['image_dir_name'] = self.image_dir_name else: image_dir = self.final_nb_home / self.image_dir_name if image_dir.is_dir(): shutil.rmtree(image_dir) image_dir.mkdir() for filename, value in image_data_dict.items(): with open(image_dir / filename, 'wb') as f: f.write(value) fn = self.final_nb_home / (self.document_name + '.md') with open(fn, mode='w') as f: f.write(md_data)
else: td.string = format_number(val, 3) output.data['text/html'] = str(soup) try: nb.metadata.notebook except AttributeError: nb.metadata.notebook = notebook_name md_writer.files = files return nb, resources app = NbConvertApp(output_base=output_name) md_exporter = MarkdownExporter(template_file='./index.md.j2', preprocessors=[CustomPreprocess]) md_writer = FilesWriter(build_directory=output_dir) app.exporter = md_exporter app.writer = md_writer app.convert_single_notebook(notebook) if len(md_writer.files) > 0: rootLogger.info("Collating files...") for file in md_writer.files: src = normpath(join(output_dir, file)) dst = join(output_dir, output_name + '_files', basename(file)) rename(src, dst) rootLogger.info("Moving '{}'".format(src)) rootLogger.info("to '{}'".format(dst)) rootLogger.info("...done.")
import re from nbconvert import MarkdownExporter import os from pathlib import Path from headers import headers def atoi(text): return int(text) if text.isdigit() else text def natural_keys(text): test = [atoi(c) for c in re.split("(\d+)", text)] return test dir = Path("../../../../tutorials") notebooks = [x for x in os.listdir(dir) if x[-6:] == ".ipynb"] # sort notebooks based on numbers within name of notebook notebooks = sorted(notebooks, key=lambda x: natural_keys(x)) e = MarkdownExporter(exclude_output=True) for i, nb in enumerate(notebooks): body, resources = e.from_filename(dir / nb) with open(str(i + 1) + ".md", "w") as f: f.write(headers[i + 1] + "\n\n") f.write(body)