Ejemplo n.º 1
0
    def post(self, path, categories):
        filename, filetype = os.path.splitext(os.path.basename(path))

        post = None
        if filetype == '.ipynb':
            jake_notebook = nbformat.reads(open(path, 'r').read(),
                                           as_version=4)
            mark = MarkdownExporter()
            content, _ = mark.from_notebook_node(jake_notebook)
            # check title
            if len(jake_notebook.cells) >= 1:
                source = str(jake_notebook.cells[0].source)
                if source.startswith('- '):
                    s = yaml.load(source)
                    res = {}
                    [res.update(i) for i in s]

                    title = res.get("title", filename)
                    tags = res.get("tags", '')
                    tmp_categories = categories or res.get("category",
                                                           '').split(',')
                    tmp_categories = categories

                    del jake_notebook.cells[0]
                    content, _ = mark.from_notebook_node(jake_notebook)
                    post = Post(
                        title=title,
                        description=content,
                        mt_keywords=tags,
                        categories=tmp_categories,
                    )

            post = post or Post(
                title=self.name_convent(filename),
                description=content,
                categories=categories,
            )
        elif filetype == '.md':
            content = open(path, 'r').read()
            post = Post(
                title=self.name_convent(filename),
                description=content,
                categories=categories,
            )
        else:
            print("error {}".format(filetype))
            return

        self.typecho.new_post(post, publish=True)
Ejemplo n.º 2
0
    def from_file(self, filename):
        import nbformat
        from nbconvert import MarkdownExporter
        from jinja2 import DictLoader
        from traitlets.config import Config

        c = Config()
        # c.ExtractOutputPreprocessor.extract_output_types = set()
        c.ExtractOutputPreprocessor.output_filename_template = 'images/{unique_key}_{cell_index}_{index}{extension}'
        c.NbConvertBase.display_data_priority = ['application/javascript', 'text/html', 'text/markdown',
                                                 'image/svg+xml', 'text/latex', 'image/png', 'image/jpeg',
                                                 'text/plain']

        nb = nbformat.read(filename, as_version=4)

        dl = DictLoader({'full.tpl': TEMPLATE})
        md_exporter = MarkdownExporter(config=c, extra_loaders=[
                                       dl], template_file='full.tpl')
        (body, resources) = md_exporter.from_notebook_node(nb)

        self.kp.write(body, images={name.split(
            'images/')[1]: data for name, data in resources.get('outputs', {}).items()})

        # Add cleaned ipynb file
        for cell in nb['cells']:
            if cell['cell_type'] == 'code':
                cell['outputs'] = []  # remove output data
                cell['execution_count'] = None  # reset to not executed
        self.kp.write_src(os.path.basename(filename), nbformat.writes(nb))
Ejemplo n.º 3
0
    def convert(self):
        """Converts a Jupyter notebook for use in Journal.

        Specifically, this function:
        """
        import nbformat
        from traitlets.config import Config
        from nbconvert import MarkdownExporter

        notebook = nbformat.read(self.filepath, as_version=4)
        # Determine the static folder path and configure the Config
        c = Config()
        c.ExtractOutputPreprocessor.output_filename_template = path.join(
            '/images', 'team', config['username'], self.post_slug,
            '{unique_key}_{cell_index}_{index}{extension}')
        exporter = MarkdownExporter(config=c, raw_template=IPYNB_TEMPLATE)
        post, images = exporter.from_notebook_node(notebook)
        for image_path, content in images['outputs'].items():
            image_name = path.basename(image_path)
            self.save_image(image_name, content)
        new_filename = '{}.md'.format(self.post_slug)
        post_path = generate_post_path(new_filename)
        click.secho('Saving post content to {}'.format(post_path), fg='green')
        with open(post_path, 'w') as output:
            output.write(post)
        return post_path
Ejemplo n.º 4
0
def notebook_to_markdown(path: Union[Path, str]) -> str:
    """
    Convert jupyter notebook to hugo-formatted markdown string

    Args:
        path: path to notebook

    Returns: hugo-formatted markdown

    """
    # first, update the notebook's metadata
    update_notebook_metadata(path)

    with open(Path(path)) as fp:
        notebook = nbformat.read(fp, as_version=4)
        assert 'front-matter' in notebook[
            'metadata'], "You must have a front-matter field in the notebook's metadata"
        front_matter_dict = dict(notebook['metadata']['front-matter'])
        front_matter = json.dumps(front_matter_dict, indent=2)

    c = Config()
    c.MarkdownExporter.preprocessors = [CustomPreprocessor]
    markdown_exporter = MarkdownExporter(config=c)

    markdown, _ = markdown_exporter.from_notebook_node(notebook)
    doctored_md = doctor(markdown)
    # added <!--more--> comment to prevent summary creation
    output = '\n'.join(
        ('---', front_matter, '---', '<!--more-->', doctored_md))

    return output
Ejemplo n.º 5
0
def convert_notebook_to_markdown(filename):
    global ROOT_DIR
    with open(filename, 'r') as f:
        notebook = nbformat.read(f, 4)

    # Metadata 설정하기
    fname = os.path.splitext(os.path.split(filename)[1])[0]
    metadata = [name.strip() for name in fname.split('|')]

    notebook.metadata['title'] = metadata[0]
    if len(metadata) >= 2:
        notebook.metadata['categories'] = metadata[1].strip()
    if len(metadata) >= 3:
        notebook.metadata['tags'] = metadata[2].strip()

    curr_time = datetime.now() - timedelta(days=1)
    notebook.metadata['modified_date'] = str(curr_time)[:-7]

    exporter = MarkdownExporter()
    exporter.template_file = os.path.join(ROOT_DIR, 'jekyll.tpl')
    body, resource = exporter.from_notebook_node(notebook)

    curr_date = curr_time.strftime("%Y-%m-%d")
    markdown_path = os.path.join(ROOT_DIR, "_posts",
                                 curr_date + "-" + metadata[0] + ".md")

    with open(markdown_path, "w") as f:
        f.write(body)
Ejemplo n.º 6
0
    def to_md(self):
        new_dir = self.path.stem.replace(' ', '_') + '_files'
        images_home = self.nb_home / new_dir
        if images_home.is_dir():
            shutil.rmtree(images_home)
        images_home.mkdir()

        resources = {
            "metadata": {
                "path": str(self.nb_home)
            },
            'output_files_dir': str(new_dir)
        }  # This is relative to the above path

        me = MarkdownExporter(config={
            "NbConvertBase": {
                "display_data_priority": self.DATA_DISPLAY_PRIORITY
            }
        })
        md_data, output_resources = me.from_notebook_node(self.nb, resources)

        # the base64 encoded binary files are saved in output_resources
        for filename, data in output_resources["outputs"].items():
            with open(self.nb_home / filename, "wb") as f:
                f.write(data)
        fn = self.path.with_suffix(".md")
        with open(fn, mode="w") as f:
            f.write(md_data)
Ejemplo n.º 7
0
def render_notebook(to_render):
    os.chdir(BASE_DIR)
    nb_dir = basename(dirname(to_render))
    markdown_file = os.path.join(OUTPUT_DIR, nb_dir + ".md")
    resources_dir = os.path.join(OUTPUT_DIR, nb_dir)
    if os.path.isdir(resources_dir):
        shutil.rmtree(resources_dir)
    os.makedirs(resources_dir)

    with open(to_render) as input:
        notebook = nbformat.reads(input.read(), as_version=4)

    front_matter = "---\n"

    m = re.search(r'# *(.*)\n', notebook.cells[0].source, re.M)
    title = m.group(1)
    front_matter += f"title: \"{title}\"\n"
    notebook.cells[0].source = notebook.cells[0].source.replace(m.group(0), "")

    publish_date = nb_dir[:10]
    front_matter += f"date: {publish_date}\n"
    front_matter += "---\n"

    inline_math = re.compile(r'(?:[^\$]|^)\$[^\$]+\$(?:[^\$]|$)')
    multiline_math = re.compile(r'\$\$[^\$]+\$\$')

    for i in range(len(notebook.cells)):
        cell = notebook.cells[i]
        if not cell['cell_type'] == 'markdown':
            continue
        source = cell['source']

        inlines = inline_math.findall(source)
        for inline in inlines:
            r = inline.replace(r"\\", r"\\\\\\\\")
            r = r.replace("_", r"\_")
            source = source.replace(inline, r)

        multilines = multiline_math.findall(source)
        for multiline in multilines:
            r = multiline.replace(r"\\", r"\\\\\\\\")
            r = r.replace("_", r"\_")
            source = source.replace(multiline, r)

        cell['source'] = source

    from nbconvert import MarkdownExporter
    md_exporter = MarkdownExporter()
    body, resources = md_exporter.from_notebook_node(notebook)

    files = resources['outputs']
    for filename in files:
        p = os.path.join(resources_dir, filename)
        with open(p, "wb") as f:
            f.write(files[filename])

    with open(markdown_file, "w") as output:
        output.write(front_matter)
        output.write(body)
Ejemplo n.º 8
0
def convert_nb_to_markdown(
        nb_contents: nbformat.notebooknode.NotebookNode) -> list:
    """Convert a Jupyter notebook to Markdown."""
    md_exporter = MarkdownExporter()
    nbc_out: str
    nbc_out, _ = md_exporter.from_notebook_node(nb_contents)

    return nbc_out.splitlines()
def convertNotebook(notebookPath, modulePath):

    with open(notebookPath) as fh:
        nb = nbformat.reads(fh.read(), nbformat.NO_CONVERT)

    exporter = MarkdownExporter()
    source, meta = exporter.from_notebook_node(nb)

    with open(modulePath, "w+") as fh:
        fh.writelines(str(source))
Ejemplo n.º 10
0
 def load_file(ipynb_file):
     with open(ipynb_file) as f:
         nb = nbformat.reads(f.read(), as_version=4)
     exporter = MarkdownExporter()
     body, resources = exporter.from_notebook_node(nb)
     body = body.strip()
     if body.startswith('#'):
         # if md file starts with title remove it
         body = '\n'.join(body.split('\n')[1:])
     return body, resources['outputs']
Ejemplo n.º 11
0
def convert_to_markdown(filename):

    with open(filename, mode='r') as file:
        notebook_str = file.read()

    notebook = nbformat.reads(notebook_str, as_version=4)

    markdown_exporter = MarkdownExporter()

    (body, resources) = markdown_exporter.from_notebook_node(notebook)

    return (body, resources)
Ejemplo n.º 12
0
def markdown():
    executor = preprocessors.ExecutePreprocessor()
    exporter = MarkdownExporter()
    for notebook in get_files("ipynb"):
        with open(notebook, encoding="utf8") as nb_file:
            nb = nbformat.read(nb_file, as_version=4)
        executor.preprocess(nb)

        if not nb.cells[-1]["source"]:
            nb.cells.pop()
        markdown, _ = exporter.from_notebook_node(nb)
        with open(f"{notebook.stem}.md", "w", encoding="utf8") as writable:
            writable.write(markdown)
Ejemplo n.º 13
0
def convert(notebook_fname, strip_code=False):
    root = '..'
    basename, ext = os.path.splitext(notebook_fname)
    title = basename[11:].replace('-', ' ')

    mathjax = '<script src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML" type="text/javascript"></script>'
    header = [
         '---'
        ,'title: ' + title
        ,'layout: post'
        ,'---'
        ,''
        ,mathjax
        ,''
    ]

    asset_dir = '/assets/posts/' + basename
    asset_realpath = root + asset_dir + '/'
    if not os.path.exists(asset_realpath):
        os.makedirs(asset_realpath)

    post_dir = '/_posts'
    post_realpath = root + post_dir

    notebook = nbformat.read(notebook_fname, as_version=4)

    if strip_code:
        notebook['cells'] = [strip_code_source(c) for c in notebook['cells']]

    exporter = MarkdownExporter()
    body, resources = exporter.from_notebook_node(notebook)

    if strip_code:
        body = body.replace('```python\n\n```', '')

    output_paths = {k: asset_dir + '/' +  k for k in resources['outputs'].keys()}
    if output_paths:
        newbody = replace_by_dict(body, output_paths)
        markdown_lines = header + newbody.split('\n')
    else:
        markdown_lines = header + body.split('\n')	

    for fname, data in resources['outputs'].items():
        with open(asset_realpath + fname, 'wb') as f:
            f.write(data)

    markdown_fname = basename + '.md'
    with open(markdown_fname, 'w') as mdfile:
        mdfile.write('\n'.join(markdown_lines))

    shutil.move(markdown_fname, post_realpath + '/' + markdown_fname)
Ejemplo n.º 14
0
def notebook_to_markdown( path, date, slug, **kwargs ):
    """
    Convert notebook to Markdown format

    Args:
        path: str, path to notebook
        date: datestring in YYYY-MM-DD format
        slug: str, front-matter parameter, used to compose adress of blogpost
        kwargs: str, float, int, list, tuple, other front-matter parameters recommended to pass title

    """
    path_nb = Path(path)
    path_out = path_nb.parents[1] / 'static'/ date.split('-')[0] / date.split('-')[1] / slug
    path_post = path_nb.parents[1] / 'content/post/' / ( date + '-' + slug + '.md' )


    assert path_nb.exists()
    assert path_post.parent.exists()
    assert bool( re.match('[0-9]{4}-[0-1][0-9]-[0-3][0-9]', date) ), 'Incorrect date format, need YYYY-MM-DD'

    # convert notebook to .md----------------------------------------------------

    with Path(path).open() as fp:
        notebook = nbformat.read(fp, as_version=4)

    c = Config()
    c.MarkdownExporter.preprocessors = [CustomPreprocessor]
    markdown_exporter = MarkdownExporter(config=c)

    markdown, resources = markdown_exporter.from_notebook_node(notebook)
    md = doctor(markdown)

    yaml = make_yaml_header(  date = date
                             , slug = slug
                             , mathjax= 'ture'
                             , **kwargs)

    md = yaml + md

    with path_post.open('w') as f:
        f.write(md)
    # write outputs as png --------------------------------------------------------

    if 'outputs' in resources.keys():
        if not path_out.exists():
            path_out.mkdir(parents=True)
        for key in resources['outputs'].keys():
            with (path_out / key).open('wb') as f:
                f.write( resources['outputs'][key] )
Ejemplo n.º 15
0
def convert_notebook(name, notebook, output_dir):
    c = Config()
    c.ExtractOutputPreprocessor.output_filename_template = name + '/{unique_key}_{cell_index}_{index}{extension}'
    md_exporter = MarkdownExporter(config=c)
    py_exporter = PythonExporter()
    (body, _) = py_exporter.from_notebook_node(notebook)
    with open(output_dir / f'{name}.py', 'w') as fp:
        fp.write(body)
    (body, resources) = md_exporter.from_notebook_node(notebook)
    with open(output_dir / f'{name}.md', 'w') as fp:
        fp.write(post_process_markdown(body))
    for resource, value in resources['outputs'].items():
        resource_path = output_dir / resource
        resource_path.parent.mkdir(exist_ok=True, parents=True)
        with open(resource_path, 'wb') as fp:
            fp.write(value)
def convert(notebook_fname):
    root = '..'
    basename, ext = os.path.splitext(notebook_fname)
    title = basename[11:].replace('-', ' ')

    header = [
         "---"
        ,"title: " + title
        ,"layout: post"
        ,"---"
    ]

    asset_dir = '/assets/posts/' + basename
    asset_realpath = root + asset_dir + '/'
    if not os.path.exists(asset_realpath):
        os.makedirs(asset_realpath)

    post_dir = '/_posts'
    post_realpath = root + post_dir


    strip_code = True
    notebook = nbformat.read(notebook_fname, as_version=4)

    if strip_code:
        notebook['cells'] = [strip_code_source(c) for c in notebook['cells']]

    exporter = MarkdownExporter()
    body, resources = exporter.from_notebook_node(notebook)

    if strip_code:
        body = body.replace('```python\n\n```', '')

    output_paths = {k: asset_dir + '/' +  k for k in resources['outputs'].keys()}
    newbody = replace_by_dict(body, output_paths)
    markdown_lines = header + newbody.split('\n')

    for fname, data in resources['outputs'].items():
        with open(asset_realpath + fname, 'wb') as f:
            f.write(data)

    markdown_fname = basename + '.md'
    with open(markdown_fname, 'w') as mdfile:
        mdfile.write('\n'.join(markdown_lines))

    shutil.move(markdown_fname, post_realpath + '/' + markdown_fname)
Ejemplo n.º 17
0
def prepare_readme():
    """Convert notebook to markdown and write it to the readme."""
    from nbconvert import MarkdownExporter
    import nbformat

    print('reading notebook')
    notebook = nbformat.reads(
        Path('graphql_example/graphql_example.ipynb').read_text(),
        as_version=4)

    print('converting notebook to RST')
    exporter = MarkdownExporter()
    body, *_ = exporter.from_notebook_node(notebook)

    print('writing to RST to readme')
    with open('README.md', 'w') as readme:
        readme.write(body)

    print('success')
Ejemplo n.º 18
0
class NotebookConverter(BasePlugin):
    def __init__(self):
        self.exporter = MarkdownExporter()

    def can_load(self, path):
        return path.lower().endswith(
            '.ipynb') and not 'ipynb_checkpoints' in path.lower()

    def on_config(self, config, **kwargs):
        config['extra_javascript'].append(
            'https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.2/MathJax.js?config=TeX-MML-AM_CHTML'
        )

    def on_page_read_source(self, something, **kwargs):
        page = kwargs['page']
        config = kwargs['config']
        input_path = page.file.src_path

        if not self.can_load(input_path):
            return

        ipynb_path = os.path.join(config['docs_dir'], input_path)
        nb = nbformat.read(ipynb_path, as_version=4)

        # we'll place the supporting files alongside the final HTML
        stem = os.path.splitext(os.path.basename(input_path))[0]
        exporter_resources = {'output_files_dir': stem}

        (body, resources) = self.exporter.from_notebook_node(
            nb, resources=exporter_resources)

        # folder in site may not have been created yet, create it so that we
        # can drop the support files in there
        target_in_site = os.path.join(config['site_dir'], page.abs_url[1:])
        os.makedirs(target_in_site, exist_ok=True)

        for output in resources['outputs'].keys():
            path = os.path.join(target_in_site, '..', output)

            with open(path, 'wb') as f:
                f.write(resources['outputs'][output])

        return body
Ejemplo n.º 19
0
def convert_notebook_to_presentation(notebook_path, markdown_path):

    notebook_filename = notebook_path
    with open(notebook_filename, encoding="utf8") as f:
        nb = nbformat.read(f, as_version=4)

    path = os.path.split(os.path.abspath(__file__))[0]
    c = Config()
    c.MarkdownExporter.preprocessors = [ChangeIbynbLink]

    # 2. Instantiate the exporter. We use the `basic` template for now; we'll get into more details
    # later about how to customize the exporter further.
    markdown_exporter = MarkdownExporter(config=c)
    markdown_exporter.template_file = os.path.join(path, 'hidecode.tplx')

    # 3. Process the notebook we loaded earlier
    (body, resources) = markdown_exporter.from_notebook_node(nb)

    writer = nbconvert.writers.FilesWriter()
    writer.write(body, resources, markdown_path)
Ejemplo n.º 20
0
    def to_md(self):
        if self.first:
            preprocessors = self.get_preprocessors('md')
            self.create_images_dir()
            self.preprocess(preprocessors)

        me = MarkdownExporter(config={
            'NbConvertBase': {
                'display_data_priority': self.DISPLAY_DATA_PRIORITY
            }
        })
        md_data, self.resources = me.from_notebook_node(
            self.nb, self.resources)
        # the base64 encoded binary files are saved in output_resources
        for filename, data in self.resources['outputs'].items():
            with open(self.final_nb_home / filename, 'wb') as f:
                f.write(data)
        fn = self.final_nb_home / (self.document_name + '.md')
        with open(fn, mode='w') as f:
            f.write(md_data)
        self.reset_resources()
Ejemplo n.º 21
0
    def to_md(self):
        me = MarkdownExporter(config={
            'NbConvertBase': {
                'display_data_priority': self.DISPLAY_DATA_PRIORITY
            }
        })
        md_data, self.resources = me.from_notebook_node(
            self.nb, self.resources)

        # the base64 encoded binary files are saved in output_resources
        image_data_dict = {
            **self.resources['outputs'],
            **self.resources['image_data_dict']
        }
        for filename, image_data in image_data_dict.items():
            new_filename = str(Path(self.image_dir_name) / filename)
            new_filename = urllib.parse.quote(new_filename)
            md_data = md_data.replace(filename, new_filename)

        if self.web_app:
            self.return_data['md_data'] = md_data
            self.return_data['md_images'] = image_data_dict
            self.return_data['image_dir_name'] = self.image_dir_name
        else:
            image_dir = self.final_nb_home / self.image_dir_name
            if image_dir.is_dir():
                shutil.rmtree(image_dir)
            image_dir.mkdir()

            for filename, value in image_data_dict.items():
                with open(image_dir / filename, 'wb') as f:
                    f.write(value)

            fn = self.final_nb_home / (self.document_name + '.md')
            with open(fn, mode='w') as f:
                f.write(md_data)
Ejemplo n.º 22
0
    "ms.service: machine-learning\n" \
    "ms.subservice: core\n" \
    "ms.topic: conceptual\n" \
    "ms.author: " + nb_metadata.get("ms.author") + "\n" \
    "author: \n" \
    "ms.reviewer: " + nb_metadata.get("ms.author") + "\n" \
    "ms.date: 02/13/2020 \n" \
    "---\n\n"

    content = nb_as_md[0]
    content = meta_string + content
    copy = (content, nb_as_md[1])
    return copy


# load nb and extract metadata
nb = nbformat.read("model-register-and-deploy.ipynb", as_version=4)
nb_metadata = parse_metadata(nb)

# process and convert to .md
config = Config()
config.MarkdownExporter.preprocessors = [CustomPreprocessor]
custom_exporter = MarkdownExporter(config=config)
nb_as_md = custom_exporter.from_notebook_node(nb)

# append msdocs metadata
nb_as_md = build_append_md_metadata(nb_metadata, nb_as_md)

with open("model-register-and-deploy.md", 'w') as f:
    f.write(nb_as_md[0])
Ejemplo n.º 23
0
def ipynb2md(readIn):
    """
    Butchered from data8/textbook, URL below ...
    ... with intention to adapt and repurpose for Markdown content.

    https://github.com/data-8/textbook/blob/gh-pages/convert_notebooks_to_html_partial.py
    """
    from nbformat import reads
    from nbconvert import MarkdownExporter
    from traitlets.config import Config

    imageDirectory = "img"  # output notebook images here
    # interactLink = 'http://interact.syzygy.ca/jupyter/interact?repo=2017-Winterk&{paths}'

    # extracts images as separate files
    config = Config()
    config.MarkdownExporter.preprocessors = [
        "nbconvert.preprocessors.ExtractOutputPreprocessor"
    ]

    # assign unique key to each image based on notebook name
    """
    # assign unique key to each image based on notebook name
    extractOutputConfig = {
        'unique_key': filename,
        'output_files_dir': '/' + IMAGE_DIR
        }
    """
    extractOutputConfig = {
        "unique_key": filename,
        "output_files_dir": "/" + imageDirectory,
    }

    nb = reads(readIn, 4)  #! originally read `path`
    mdExporter = MarkdownExporter(config=config)
    md, resources = mdExporter.from_notebook_node(nb, resources=extractOutputConfig)

    """write out images, !! personalize this"""
    """
    write out images to IMAGE_DIRECTORY,
    get image paths and to each assign a unique key by its respective notebook name,
    """
    """
    if not os.path.exists(IMAGE_DIRECTORY):
        os.makedirs(IMAGE_DIRECTORY)

    for relative_path, image in resources['outputs'].items():
        image_name = relative_path.split('/')[-1]
        image_path = os.path.join(IMAGE_DIRECTORY, image_name)
        with open(image_path, 'wb') as image_file:
            image_file.write(image)
    """

    ##! additional manual processing here!

    if not os.path.exists("img"):
        os.makedirs("img")
    for relPath, imgData in resources["outputs"].items():
        imgName = relPath.split("/")[-1]  # get filename
        imgPath = "{}/{}".format(imageDirectory, imgName)  # build newpath
        with open(imgPath, "wb") as outImg:
            outImg.write(imgData)

    return md.encode("utf-8")  # recover text from first index

    #! additional manual processing here!
    return md[0]
Ejemplo n.º 24
0
    def on_files(self, files, config):
        logger = self._logger
        logger.info('nbconvert: plugin config=%s', pformat(self.config))
        # deal with dirs
        config_file_dir = os.path.dirname(config['config_file_path'])
        input_dir = os.path.normpath(self.config['input_dir'])
        output_dir = os.path.realpath(
            os.path.join(config['docs_dir'],
                         os.path.normpath(self.config['output_dir'])))
        if not os.path.isabs(input_dir):
            input_dir = os.path.realpath(
                os.path.join(config_file_dir, input_dir))
        # glob match
        glob_recursive = self.config[
            'recursive'] if PYTHON_VERSION_MAJOR_MINOR >= '3.5' else False
        if glob_recursive:
            nb_paths_iter = iglob(os.path.join(config_file_dir, input_dir,
                                               '**', '*.ipynb'),
                                  recursive=True)
        else:
            nb_paths_iter = iglob(
                os.path.join(config_file_dir, input_dir, '*.ipynb'))
        # Exporter
        md_exporter = MarkdownExporter()
        # Converting
        for nb_path in nb_paths_iter:
            # Prepare output file/dir
            nb_dirname, nb_basename = os.path.split(nb_path)
            nb_basename_root, _ = os.path.splitext(nb_basename)
            nb_subdir = os.path.relpath(nb_dirname, input_dir)
            md_dir = os.path.join(output_dir, nb_subdir)
            md_basename = '{0}.md'.format(nb_basename_root)
            md_path = os.path.join(md_dir, md_basename)
            md_rel_dir = os.path.relpath(md_dir, config['docs_dir'])
            md_rel_path = os.path.join(md_rel_dir, md_basename)
            #
            logger.debug('nbconvert: markdown export %s => %s', nb_path,
                         md_path)
            # run nbconvert
            with open(nb_path) as fp:
                nb_node = nbformat.read(fp, nbformat.NO_CONVERT)
            body, resources = md_exporter.from_notebook_node(nb_node)
            # save exported
            if not os.path.exists(md_dir):
                os.makedirs(md_dir)
            with open(md_path, 'w', encoding='UTF8') as fp:
                fp.write(body)
            file_obj = File(path=md_rel_path,
                            src_dir=config['docs_dir'],
                            dest_dir=config['site_dir'],
                            use_directory_urls=config['use_directory_urls'])

            for resource_name, resource_data in resources['outputs'].items():
                resource_src_dir = os.path.dirname(file_obj.abs_src_path)
                resource_src_path = os.path.join(resource_src_dir,
                                                 resource_name)
                if not os.path.isdir(resource_src_dir):
                    os.makedirs(resource_src_dir)
                with open(resource_src_path, 'wb') as fp:
                    fp.write(resource_data)
                resource_dest_dir = os.path.dirname(file_obj.abs_dest_path)
                resource_dest_path = os.path.join(resource_dest_dir,
                                                  resource_name)
                logger.debug(
                    'nbconvert: resource output(%dBytes): resource_name --> %s',
                    len(resource_data), resource_dest_path)
                if not os.path.isdir(resource_dest_dir):
                    os.makedirs(resource_dest_dir)
                with open(resource_dest_path, 'wb') as fp:
                    fp.write(resource_data)

            logger.debug(
                'nbconvert: add file object<abs_src_path=%s abs_dest_path=%s url=%s>',
                file_obj.abs_src_path, file_obj.abs_dest_path, file_obj.url)
            files.append(file_obj)
        return files
Ejemplo n.º 25
0
def render(which='notebooks', rm=False):
    path_notebooks = f'_{which}'

    # Clear the output directories
    path_markdown = f'_includes/notebooks'
    path_toc = f'_includes/notebooks/toc'
    path_output = f'assets/notebooks'

    if rm:
        shutil.rmtree(path_markdown)
        os.makedirs(path_markdown)
        os.makedirs(path_toc)
        shutil.rmtree(path_output)
        os.makedirs(path_output)

    # Create new output
    for fname in os.listdir(path_notebooks):
        fpath = os.path.join(path_notebooks, fname)

        # Ignore directories
        if not os.path.isfile(fpath):
            continue

        # Ignore non-notebooks
        name, ext = fname.split('.', 2)
        if not ext == 'ipynb':
            continue

        # Set the appropriate output location (note: this is relative
        # to the website root /)
        www_output_files_dir = ('assets/notebooks/%s_files' % name)
        path_output_files_dir = 'assets/notebooks/%s_files' % name

        # Render the notebook
        with open(fpath) as f:
            r = {'output_files_dir': www_output_files_dir}

            # Read the notebook
            nb = nbformat.read(f, as_version=4)
            md_exporter = MarkdownExporter()
            (body, resources) = md_exporter.from_notebook_node(nb, resources=r)

            # Handle output files
            if not os.path.exists(path_output_files_dir):
                os.makedirs(path_output_files_dir)

            for www_name, v in resources['outputs'].items():
                # Save the output file to the correct location
                with open(www_name, 'wb') as fw:
                    fw.write(v)

            # Replace output paths to get relative urls
            search = r'\b%s/(.*)\b' % www_output_files_dir
            replace = r'{{ "/%s/\1" | relative_url }}' % www_output_files_dir
            body = re.sub(search, replace, body)

            # Write markdown file
            outname = '%s.md' % name
            outpath = os.path.join(path_markdown, outname)
            with open(outpath, 'w') as fw:
                fw.write(body)

            # Construct the table of contents
            cmd = ("pandoc --template=_toc-template.md --toc -t markdown %s" %
                   outpath)
            ps = subprocess.Popen(cmd,
                                  shell=True,
                                  stdout=subprocess.PIPE,
                                  stderr=subprocess.STDOUT)
            output = ps.communicate()[0]
            tocpath = os.path.join(path_toc, outname)
            with open(tocpath, 'wb') as fw:
                fw.write(output)
Ejemplo n.º 26
0
#! /usr/bin/python
# -*- coding: utf-8 -*-

#get header from jupyter notebook
import nbformat
from nbconvert import MarkdownExporter

with open("content/motivation.ipynb", "r") as f:
    raw = f.read()

notebook = nbformat.reads(raw, as_version=4)
#access cells
notebook.cells[0]

#convert
md_exporter = MarkdownExporter()
md = md_exporter.from_notebook_node(notebook)
#then md can be used with the below code


import markdown

md = markdown.Markdown(extensions=["toc"])

with open("content/units/units.md", "r") as f:
    md.convert(f.read())

md.toc_tokens


Ejemplo n.º 27
0
class Formatter:
    def __init__(self, output_format, destination_mode):
        assert output_format in SUPPORTED_FORMATS, f"supported formats are {SUPPORTED_FORMATS}"
        assert (
            destination_mode in SUPPORTED_DESTINATIONS_MODES
        ), f"supported destination modes are {SUPPORTED_DESTINATIONS_MODES}"

        self.read_encoding = "utf-8"
        self.write_encoding = "utf-8"
        self.format = output_format
        self.destination_mode = destination_mode

        if self.format == "pdf":
            pdf = PDFExporter()
            pdf.exclude_output_prompt = True
            pdf.exclude_input = True
            self.exporter = pdf
        elif self.format == "rst":
            self.exporter = RSTExporter()
        else:
            self.exporter = MarkdownExporter()

    def get_output_dir_base(self, file):
        isMirrorFolder = self.destination_mode == "mirror_folder"
        return f"converted/to_{self.format}/" if isMirrorFolder else ""

    def get_output_file_path(self, file):
        filePath = file.split("/")[0:-1]
        isMultiFilesFormat = self.format in ["rst", "md"]
        if isMultiFilesFormat:
            filePath.append(self.dest_file(file, withFormat=False))
        return "/".join(filePath) + "/" if len(filePath) > 0 else ""

    def dst_folder(self, file):
        return self.get_output_dir_base(file) + self.get_output_file_path(file)

    def dest_file(self, file, withFormat=True):
        return file.split("/")[-1].replace(
            ".ipynb", "." + self.format if withFormat else "")

    def dst_path(self, file):
        return self.dst_folder(file) + self.dest_file(file)

    def convert(self, file):
        assert os.path.exists(
            file), f"this should not happen, path {file} must exist"
        body, resources = self.export(file)

        fw = FilesWriter()
        fw._makedir(self.dst_folder(file))
        fw.build_directory = self.dst_folder(file)
        fw.write(body,
                 resources,
                 notebook_name=self.dest_file(file, withFormat=False))

    def export(self, file):
        with open(file, "r", encoding=self.read_encoding) as f:
            nb = nbformat.read(f, as_version=4)
            body, resources = self.exporter.from_notebook_node(nb)
            return body, resources

    def needs_format(self, file):
        f_path = self.dst_path(file)

        if not os.path.exists(f_path):
            return True

        notebook_modified = os.stat(file).st_mtime
        formatted_modified = os.stat(f_path).st_mtime

        return notebook_modified > formatted_modified

    def save_figures(self, resources):
        if "outputs" not in resources:
            return

        for name, bytes_ in resources["outputs"]:
            print(f"name = {name}, bytes = {len(bytes_)}")

        for key, value in resources.items():
            pass
Ejemplo n.º 28
0
class Formatter:
    def __init__(self, output):
        assert output in SUPPORTED_FORMATS, f"supported formats are {SUPPORTED_FORMATS}"
        self.read_encoding = "utf-8"
        self.write_encoding = "utf-8"
        self.format = output

        if self.format == "pdf":
            pdf = PDFExporter()
            pdf.exclude_output_prompt = True
            pdf.exclude_input = True
            self.exporter = pdf
        elif self.format == "rst":
            self.exporter = RSTExporter()
        else:
            self.exporter = MarkdownExporter()

    def convert(self, file):
        assert os.path.exists(
            file), f"this should not happen, path {file} must exist"
        body, resources = self.export(file)

        fw = FilesWriter()
        fw.build_directory = os.path.dirname(file)
        f_name = os.path.basename(file).replace(".ipynb", "")
        fw.write(body, resources, notebook_name=f_name)

    def dst_path(self, file):
        return file.replace(".ipynb", f".{self.format}")

    def export(self, file):
        with open(file, "r", encoding=self.read_encoding) as f:
            nb = nbformat.read(f, as_version=4)
            body, resources = self.exporter.from_notebook_node(nb)
            return self.replace_image_names(body, resources, file)

    def replace_image_names(self, body, resources, file):
        names = self._get_output_names(resources)
        if not names:
            return body, resources

        f_name = os.path.basename(file).replace(".ipynb", "")
        new_outputs = {}
        for i, old_key in enumerate(names):
            _, image_extension = os.path.splitext(old_key)
            output_name = f"{f_name}_{i}{image_extension}"
            new_outputs[output_name] = resources["outputs"][old_key]
            body = body.replace(old_key, output_name)
        resources["outputs"] = new_outputs

        return body, resources

    def _get_output_names(self, resources):
        """'outputs' may be empty or contain a string. Ask forgiveness, not permission."""
        try:
            return resources["outputs"].keys()
        except Exception:
            return []

    def needs_format(self, file):
        f_path = self.dst_path(file)

        if not os.path.exists(f_path):
            return True

        notebook_modified = os.stat(file).st_mtime
        formatted_modified = os.stat(f_path).st_mtime

        return notebook_modified > formatted_modified

    def save_figures(self, resources):
        if "outputs" not in resources:
            return

        for name, bytes_ in resources["outputs"]:
            print(f"name = {name}, bytes = {len(bytes_)}")

        for key, value in resources.items():
            pass