Exemple #1
0
def test_split_yaml(tmpdir):
    path_build_test = op.join(tmpdir.dirpath(), 'tmp_test', 'test')
    with open(op.join(path_build_test, '_build', 'tests', 'features.md'),
              'r') as ff:
        lines = ff.readlines()

    # Make sure the yaml remains in the file
    assert is_in(lines, "yaml_frontmatter: true")

    # Edgecases etc on the splitter function
    assert _split_yaml([]) == ([], [])
    assert _split_yaml(['foo\n', 'bar\n']) == ([], ['foo\n', 'bar\n'])
    assert _split_yaml(['---\n', 'foo\n',
                        'bar\n']) == ([], ['---\n', 'foo\n', 'bar\n'])
    exp = ['---\n', 'foo\n', '---\n']
    assert _split_yaml(exp) == (['foo\n'], [])
    assert (_split_yaml(['---\n', 'foo\n', '---\n', 'baz\n',
                         'barf\n']) == (['foo\n'], ['baz\n', 'barf\n']))
    assert (_split_yaml(
        ['---\n', 'foo\n', 'bar\n', '---\n', 'baz\n',
         'barf\n']) == (['foo\n', 'bar\n'], ['baz\n', 'barf\n']))
    assert (_split_yaml(
        ['\n', '\n', '---\n', 'foo\n', '---\n', 'baz\n',
         'barf\n']) == (['foo\n'], ['baz\n', 'barf\n']))
    assert (_split_yaml(
        ['   \n', ' \n', '---\n', 'foo\n', '---\n', 'baz\n',
         'barf\n']) == (['foo\n'], ['baz\n', 'barf\n']))
Exemple #2
0
def build_book(path_book,
               path_toc_yaml=None,
               config_file=None,
               path_template=None,
               local_build=False,
               execute=False,
               overwrite=False):
    """Build the markdown for a book using its TOC and a content folder.

    Parameters
    ----------
    path_book : str
        Path to the root of the book repository
    path_toc_yaml : str | None
        Path to the Table of Contents YAML file
    config_file : str | None
        Path to the Jekyll configuration file
    path_template : str | None
        Path to the template nbconvert uses to build markdown
        files
    local_build : bool
        Specify you are building site locally for later upload
    execute : bool
        Whether to execute notebooks before converting to markdown
    overwrite : bool
        Whether to overwrite existing markdown files
    """

    PATH_IMAGES_FOLDER = op.join(path_book, '_build', 'images')
    BUILD_FOLDER = op.join(path_book, BUILD_FOLDER_NAME)

    ###############################################
    # Read in textbook configuration

    # Load the yaml for this site
    with open(config_file, 'r') as ff:
        site_yaml = yaml.safe_load(ff.read())
    CONTENT_FOLDER_NAME = site_yaml.get('content_folder_name').strip('/')
    PATH_CONTENT_FOLDER = op.join(path_book, CONTENT_FOLDER_NAME)

    # Load the textbook yaml for this site
    if not op.exists(path_toc_yaml):
        raise _error("No toc.yml file found, please create one at `{}`".format(
            path_toc_yaml))
    with open(path_toc_yaml, 'r') as ff:
        toc = yaml.safe_load(ff.read())

    # Drop divider items and non-linked pages in the sidebar, un-nest sections
    toc = _prepare_toc(toc)

    ################################################
    # Generating the Jekyll files for all content

    n_skipped_files = 0
    n_built_files = 0
    case_check = _case_sensitive_fs(BUILD_FOLDER) and local_build
    print("Convert and copy notebook/md files...")
    for ix_file, page in enumerate(tqdm(list(toc))):
        url_page = page.get('url', None)
        title = page.get('title', None)
        if page.get('external', None):
            # If its an external link, just pass
            continue

        # Make sure URLs (file paths) have correct structure
        _check_url_page(url_page, CONTENT_FOLDER_NAME)

        ##############################################
        # Create path to old/new file and create directory

        # URL will be relative to the CONTENT_FOLDER
        path_url_page = os.path.join(PATH_CONTENT_FOLDER, url_page.lstrip('/'))
        path_url_folder = os.path.dirname(path_url_page)

        # URLs shouldn't have the suffix in there already so
        # now we find which one to add
        for suf in SUPPORTED_FILE_SUFFIXES:
            if op.exists(path_url_page + suf):
                path_url_page = path_url_page + suf
                break

        if not op.exists(path_url_page):
            raise _error(
                "Could not find file called {} with any of these extensions: {}"
                .format(path_url_page, SUPPORTED_FILE_SUFFIXES))

        # Create and check new folder / file paths
        path_build_new_folder = path_url_folder.replace(
            os.sep + CONTENT_FOLDER_NAME, os.sep + BUILD_FOLDER_NAME) + os.sep
        path_build_new_file = op.join(
            path_build_new_folder,
            op.basename(path_url_page).replace('.ipynb', '.md'))

        if overwrite is False and op.exists(path_build_new_file) \
           and os.stat(path_build_new_file).st_mtime > os.stat(path_url_page).st_mtime:
            n_skipped_files += 1
            continue

        if not op.isdir(path_build_new_folder):
            os.makedirs(path_build_new_folder)

        ################################################
        # Generate previous/next page URLs
        if ix_file == 0:
            url_prev_page = ''
            prev_file_title = ''
        else:
            prev_file_title = toc[ix_file - 1].get('title')
            url_prev_page = toc[ix_file - 1].get('url')
            pre_external = toc[ix_file - 1].get('external', False)
            if pre_external is False:
                url_prev_page = _prepare_url(url_prev_page)

        if ix_file == len(toc) - 1:
            url_next_page = ''
            next_file_title = ''
        else:
            next_file_title = toc[ix_file + 1].get('title')
            url_next_page = toc[ix_file + 1].get('url')
            next_external = toc[ix_file + 1].get('external', False)
            if next_external is False:
                url_next_page = _prepare_url(url_next_page)

        ###############################################################################
        # Get kernel name and presence of widgets from notebooks metadata

        kernel_name = ''
        if path_url_page.endswith('.ipynb'):
            data = nbf.read(path_url_page, nbf.NO_CONVERT)
            if 'metadata' in data and 'kernelspec' in data['metadata']:
                kernel_name = data['metadata']['kernelspec']['name']
            has_widgets = "true" if any(
                "interactive" in cell['metadata'].get('tags', [])
                for cell in data['cells']) else "false"

        ############################################
        # Content conversion

        # Convert notebooks or just copy md if no notebook.
        if path_url_page.endswith('.ipynb'):
            notebook_name = op.splitext(op.basename(path_url_page))[0]
            ntbk = nbf.read(path_url_page, nbf.NO_CONVERT)

            ########################################
            # Notebook cleaning

            # Clean up the file before converting
            cleaner = NotebookCleaner(ntbk)
            cleaner.remove_cells(empty=True)
            cleaner.clear('stderr')
            ntbk = cleaner.ntbk
            _clean_notebook_cells(ntbk)

            #############################################
            # Conversion to Jekyll Markdown
            # create a configuration object that changes the preprocessors
            c = Config()
            c.FilesWriter.build_directory = path_build_new_folder

            if execute is True:
                # Excution of the notebook if we wish
                ep = ExecutePreprocessor(timeout=600, kernel_name=kernel_name)
                ep.preprocess(
                    ntbk, {'metadata': {
                        'path': op.dirname(path_url_folder)
                    }})

            # Define the path to images and then the relative path to where they'll originally be placed
            path_after_build_folder = path_build_new_folder.split(
                os.sep + BUILD_FOLDER_NAME + os.sep)[-1]
            path_images_new_folder = op.join(PATH_IMAGES_FOLDER,
                                             path_after_build_folder)
            path_images_rel = op.relpath(path_images_new_folder,
                                         path_build_new_folder)

            # Generate Markdown from our notebook using the template
            output_resources = {
                'output_files_dir': path_images_rel,
                'unique_key': notebook_name
            }
            exp = MarkdownExporter(template_file=path_template, config=c)
            markdown, resources = exp.from_notebook_node(
                ntbk, resources=output_resources)

            # Now write the markdown and resources
            writer = FilesWriter(config=c)
            writer.write(markdown, resources, notebook_name=notebook_name)

        elif path_url_page.endswith('.md'):
            # If a non-notebook file, just copy it over.
            # If markdown we'll add frontmatter later
            sh.copy2(path_url_page, path_build_new_file)
        else:
            raise _error("Files must end in ipynb or md. Found file {}".format(
                path_url_page))

        ###############################################################################
        # Modify the generated Markdown to work with Jekyll
        # Clean markdown for Jekyll quirks (e.g. extra escape characters)
        with open(path_build_new_file, 'r', encoding='utf8') as ff:
            lines = ff.readlines()
        lines = _clean_lines(lines, path_build_new_file, path_book,
                             PATH_IMAGES_FOLDER)

        # Split off original yaml
        yaml_orig, lines = _split_yaml(lines)

        # Front-matter YAML
        yaml_fm = []
        yaml_fm += ['---']
        # In case pre-existing links are sanitized
        sanitized = url_page.lower().replace('_', '-')
        if sanitized != url_page:
            if case_check and url_page.lower() == sanitized:
                raise RuntimeError(
                    'Redirect {} clashes with page {} for local build on '
                    'case-insensitive FS\n'.format(sanitized, url_page) +
                    'Rename source page to lower case or build on a case '
                    'sensitive FS, e.g. case-sensitive disk image on Mac')
            yaml_fm += ['redirect_from:']
            yaml_fm += ['  - "{}"'.format(sanitized)]
        if path_url_page.endswith('.ipynb'):
            interact_path = CONTENT_FOLDER_NAME + '/' + \
                path_url_page.split(CONTENT_FOLDER_NAME + '/')[-1]
            yaml_fm += ['interact_link: {}'.format(interact_path)]
            yaml_fm += ["kernel_name: {}".format(kernel_name)]
            yaml_fm += ["has_widgets: {}".format(has_widgets)]

        # Page metadata
        yaml_fm += ["title: '{}'".format(title)]
        yaml_fm += ['prev_page:']
        yaml_fm += ['  url: {}'.format(url_prev_page)]
        yaml_fm += ["  title: '{}'".format(prev_file_title)]
        yaml_fm += ['next_page:']
        yaml_fm += ['  url: {}'.format(url_next_page)]
        yaml_fm += ["  title: '{}'".format(next_file_title)]

        # Add back any original YaML, and end markers
        yaml_fm += yaml_orig
        yaml_fm += [
            'comment: "***PROGRAMMATICALLY GENERATED, DO NOT EDIT. SEE ORIGINAL FILES IN /{}***"'
            .format(CONTENT_FOLDER_NAME)
        ]
        yaml_fm += ['---']
        yaml_fm = [ii + '\n' for ii in yaml_fm]
        lines = yaml_fm + lines

        # Write the result as UTF-8.
        with open(path_build_new_file, 'w', encoding='utf8') as ff:
            ff.writelines(lines)
        n_built_files += 1

    #######################################################
    # Finishing up...

    # Copy non-markdown files in notebooks/ in case they're referenced in the notebooks
    print('Copying non-content files inside `{}/`...'.format(
        CONTENT_FOLDER_NAME))
    _copy_non_content_files(PATH_CONTENT_FOLDER, CONTENT_FOLDER_NAME,
                            BUILD_FOLDER_NAME)

    # Message at the end
    msg = [
        "Generated {} new files\nSkipped {} already-built files".format(
            n_built_files, n_skipped_files)
    ]
    if n_built_files == 0:
        msg += [
            "Delete the markdown files in '{}' for any pages that you wish to re-build, or use --overwrite option to re-build all."
            .format(BUILD_FOLDER_NAME)
        ]
    msg += ["Your Jupyter Book is now in `{}/`.".format(BUILD_FOLDER_NAME)]
    msg += ["Demo your Jupyter book with `make serve` or push to GitHub!"]
    print_message_box('\n'.join(msg))
def build_book():
    """Build the markdown for a book using its TOC and a content folder."""
    parser = argparse.ArgumentParser(description=DESCRIPTION)
    parser.add_argument("path_book",
                        help="Path to the root of the book repository.")
    parser.add_argument(
        "--template",
        default=None,
        help="Path to the template nbconvert uses to build markdown files")
    parser.add_argument("--config",
                        default=None,
                        help="Path to the Jekyll configuration file")
    parser.add_argument("--toc",
                        default=None,
                        help="Path to the Table of Contents YAML file")
    parser.add_argument("--overwrite",
                        action='store_true',
                        help="Overwrite md files if they already exist.")
    parser.add_argument("--execute",
                        action='store_true',
                        help="Execute notebooks before converting to MD.")
    parser.add_argument(
        "--local-build",
        action='store_true',
        help="Specify you are building site locally for later upload.")
    parser.set_defaults(overwrite=False, execute=False)

    ###############################################################################
    # Default values and arguments

    args = parser.parse_args(sys.argv[2:])
    overwrite = bool(args.overwrite)
    execute = bool(args.execute)

    # Paths for our notebooks
    PATH_BOOK = op.abspath(args.path_book)

    PATH_TOC_YAML = args.toc if args.toc is not None else op.join(
        PATH_BOOK, '_data', 'toc.yml')
    CONFIG_FILE = args.config if args.config is not None else op.join(
        PATH_BOOK, '_config.yml')
    PATH_TEMPLATE = args.template if args.template is not None else op.join(
        PATH_BOOK, 'scripts', 'templates', 'jekyllmd.tpl')
    PATH_IMAGES_FOLDER = op.join(PATH_BOOK, '_build', 'images')
    BUILD_FOLDER = op.join(PATH_BOOK, BUILD_FOLDER_NAME)

    ###############################################################################
    # Read in textbook configuration

    # Load the yaml for this site
    with open(CONFIG_FILE, 'r') as ff:
        site_yaml = yaml.load(ff.read())
    CONTENT_FOLDER_NAME = site_yaml.get('content_folder_name').strip('/')
    PATH_CONTENT_FOLDER = op.join(PATH_BOOK, CONTENT_FOLDER_NAME)

    # Load the textbook yaml for this site
    if not op.exists(PATH_TOC_YAML):
        raise _error("No toc.yml file found, please create one at `{}`".format(
            PATH_TOC_YAML))
    with open(PATH_TOC_YAML, 'r') as ff:
        toc = yaml.load(ff.read())

    # Drop divider items and non-linked pages in the sidebar, un-nest sections
    toc = _prepare_toc(toc)

    ###############################################################################
    # Generating the Jekyll files for all content

    n_skipped_files = 0
    n_built_files = 0
    case_check = _case_sensitive_fs(BUILD_FOLDER) and args.local_build
    print("Convert and copy notebook/md files...")
    for ix_file, page in enumerate(tqdm(list(toc))):
        url_page = page.get('url', None)
        title = page.get('title', None)
        if page.get('external', None):
            # If its an external link, just pass
            continue

        # Make sure URLs (file paths) have correct structure
        _check_url_page(url_page, CONTENT_FOLDER_NAME)

        ###############################################################################
        # Create path to old/new file and create directory

        # URL will be relative to the CONTENT_FOLDER
        path_url_page = os.path.join(PATH_CONTENT_FOLDER, url_page.lstrip('/'))
        path_url_folder = os.path.dirname(path_url_page)

        # URLs shouldn't have the suffix in there already so now we find which one to add
        for suf in SUPPORTED_FILE_SUFFIXES:
            if op.exists(path_url_page + suf):
                path_url_page = path_url_page + suf
                break

        if not op.exists(path_url_page):
            raise _error(
                "Could not find file called {} with any of these extensions: {}"
                .format(path_url_page, SUPPORTED_FILE_SUFFIXES))

        # Create and check new folder / file paths
        path_new_folder = path_url_folder.replace(os.sep + CONTENT_FOLDER_NAME,
                                                  os.sep + BUILD_FOLDER_NAME)
        path_new_file = op.join(
            path_new_folder,
            op.basename(path_url_page).replace('.ipynb', '.md'))

        if overwrite is False and op.exists(path_new_file) \
           and os.stat(path_new_file).st_mtime > os.stat(path_url_page).st_mtime:
            n_skipped_files += 1
            continue

        if not op.isdir(path_new_folder):
            os.makedirs(path_new_folder)

        ###############################################################################
        # Generate previous/next page URLs
        if ix_file == 0:
            url_prev_page = ''
            prev_file_title = ''
        else:
            prev_file_title = toc[ix_file - 1].get('title')
            url_prev_page = toc[ix_file - 1].get('url')
            url_prev_page = _prepare_url(url_prev_page)

        if ix_file == len(toc) - 1:
            url_next_page = ''
            next_file_title = ''
        else:
            next_file_title = toc[ix_file + 1].get('title')
            url_next_page = toc[ix_file + 1].get('url')
            url_next_page = _prepare_url(url_next_page)

        ###############################################################################
        # Get kernel name from notebooks metadata

        kernel_name = ''
        if path_url_page.endswith('.ipynb'):
            data = nbf.read(path_url_page, nbf.NO_CONVERT)
            kernel_name = data['metadata']['kernelspec']['name']

        ###############################################################################
        # Content conversion

        # Convert notebooks or just copy md if no notebook.
        if path_url_page.endswith('.ipynb'):
            # Create a temporary version of the notebook we can modify
            tmp_notebook = path_url_page + '_TMP'
            sh.copy2(path_url_page, tmp_notebook)

            ###############################################################################
            # Notebook cleaning

            # Clean up the file before converting
            cleaner = NotebookCleaner(tmp_notebook)
            cleaner.remove_cells(empty=True)
            if site_yaml.get('hide_cell_text', False):
                cleaner.remove_cells(
                    search_text=site_yaml.get('hide_cell_text'))
            if site_yaml.get('hide_code_text', False):
                cleaner.clear(kind="content",
                              search_text=site_yaml.get('hide_code_text'))
            cleaner.clear('stderr')
            cleaner.save(tmp_notebook)
            _clean_notebook_cells(tmp_notebook)

            ###############################################################################
            # Conversion to Jekyll Markdown

            # Run nbconvert moving it to the output folder
            # This is the output directory for `.md` files
            build_call = '--FilesWriter.build_directory={}'.format(
                path_new_folder)
            # Copy notebook output images to the build directory using the base folder name
            path_after_build_folder = path_new_folder.split(os.sep +
                                                            BUILD_FOLDER_NAME +
                                                            os.sep)[-1]
            nb_output_folder = op.join(PATH_IMAGES_FOLDER,
                                       path_after_build_folder)
            images_call = '--NbConvertApp.output_files_dir={}'.format(
                nb_output_folder)
            call = [
                'jupyter', 'nbconvert', '--log-level="CRITICAL"', '--to',
                'markdown', '--template', PATH_TEMPLATE, images_call,
                build_call, tmp_notebook
            ]
            if execute is True:
                call.insert(-1, '--execute')

            check_call(call)
            os.remove(tmp_notebook)
        elif path_url_page.endswith('.md'):
            # If a non-notebook file, just copy it over.
            # If markdown we'll add frontmatter later
            sh.copy2(path_url_page, path_new_file)
        else:
            raise _error("Files must end in ipynb or md. Found file {}".format(
                path_url_page))

        ###############################################################################
        # Modify the generated Markdown to work with Jekyll

        # Clean markdown for Jekyll quirks (e.g. extra escape characters)
        with open(path_new_file, 'r') as ff:
            lines = ff.readlines()
        lines = _clean_lines(lines, path_new_file, PATH_BOOK,
                             PATH_IMAGES_FOLDER)

        # Split off original yaml
        yaml_orig, lines = _split_yaml(lines)

        # Front-matter YAML
        yaml_fm = []
        yaml_fm += ['---']
        # In case pre-existing links are sanitized
        sanitized = url_page.lower().replace('_', '-')
        if sanitized != url_page:
            if case_check and url_page.lower() == sanitized:
                raise RuntimeError(
                    'Redirect {} clashes with page {} for local build on '
                    'case-insensitive FS\n'.format(sanitized, url_page) +
                    'Rename source page to lower case or build on a case '
                    'sensitive FS, e.g. case-sensitive disk image on Mac')
            yaml_fm += ['redirect_from:']
            yaml_fm += ['  - "{}"'.format(sanitized)]
        if path_url_page.endswith('.ipynb'):
            interact_path = CONTENT_FOLDER_NAME + '/' + path_url_page.split(
                CONTENT_FOLDER_NAME + '/')[-1]
            yaml_fm += ['interact_link: {}'.format(interact_path)]
            yaml_fm += ["kernel_name: {}".format(kernel_name)]
        yaml_fm += ["title: '{}'".format(title)]
        yaml_fm += ['prev_page:']
        yaml_fm += ['  url: {}'.format(url_prev_page)]
        yaml_fm += ["  title: '{}'".format(prev_file_title)]
        yaml_fm += ['next_page:']
        yaml_fm += ['  url: {}'.format(url_next_page)]
        yaml_fm += ["  title: '{}'".format(next_file_title)]

        # Add back any original YaML, and end markers
        yaml_fm += yaml_orig
        yaml_fm += [
            'comment: "***PROGRAMMATICALLY GENERATED, DO NOT EDIT. SEE ORIGINAL FILES IN /{}***"'
            .format(CONTENT_FOLDER_NAME)
        ]
        yaml_fm += ['---']
        yaml_fm = [ii + '\n' for ii in yaml_fm]
        lines = yaml_fm + lines

        # Write the result
        with open(path_new_file, 'w') as ff:
            ff.writelines(lines)
        n_built_files += 1

    ###############################################################################
    # Finishing up...

    # Copy non-markdown files in notebooks/ in case they're referenced in the notebooks
    print('Copying non-content files inside `{}/`...'.format(
        CONTENT_FOLDER_NAME))
    _copy_non_content_files(PATH_CONTENT_FOLDER, CONTENT_FOLDER_NAME,
                            BUILD_FOLDER_NAME)

    # Message at the end
    msg = [
        "Generated {} new files\nSkipped {} already-built files".format(
            n_built_files, n_skipped_files)
    ]
    if n_built_files == 0:
        msg += [
            "Delete the markdown files in '{}' for any pages that you wish to re-build, or use --overwrite option to re-build all."
            .format(BUILD_FOLDER_NAME)
        ]
    msg += ["Your Jupyter Book is now in `{}/`.".format(BUILD_FOLDER_NAME)]
    msg += ["Demo your Jupyter book with `make serve` or push to GitHub!"]
    print_message_box('\n'.join(msg))