Esempio n. 1
0
def generate_operators_page(raw_operators_api, destination_dir, lang_dirs):
    operators_output = ''

    try:
        operators = clean_json_string(raw_operators_api)

        # Go through all the operators and construct a new HTML object.
        operator_template = Template(OPERATOR_TEMPLATE)

        operators_output += OPERATORS_WRAPPER[0]

        for operator in operators:
            if 'comment' in operator:

                formula_map = {}
                comment = reserve_formulas(operator['comment'],
                                           formula_map,
                                           only_reserve_double_dollar=True)

                comment = markdown.markdown(comment,
                                            extensions=MARKDOWN_EXTENSIONS)

                #if len(operator_comment_line) > 0:
                if 'markdown-equation' in comment:
                    soup = BeautifulSoup('<p>' + comment + '</p>', 'lxml')
                    markdown_equation_placeholders = soup.select(
                        '.markdown-equation')

                    for equation in markdown_equation_placeholders:
                        equation.string = formula_map[equation.get('id')]

                    comment = unicode(
                        str(soup.select('body')[0])[6:-7], 'utf-8')

                operator['comment'] = comment

            operators_output += operator_template.render(Context(operator))

        operators_output += OPERATORS_WRAPPER[1]

        for lang in lang_dirs:
            operators_output_path = '%s/%s/operators.html' % (destination_dir,
                                                              lang)

            print 'Saving operators.html to %s' % operators_output_path
            if not os.path.exists(os.path.dirname(operators_output_path)):
                os.makedirs(os.path.dirname(operators_output_path))

            with codecs.open(operators_output_path, 'w',
                             'utf-8') as operators_output_file:
                operators_output_file.write(operators_output)

    except Exception, e:
        print 'Failed to build operator docs because: ', e
def generate_book_docs(original_documentation_dir, output_dir_name):
    """
    Strip out the static and extract the body contents, headers, and body.
    """
    # Traverse through all the HTML pages of the dir, and take contents in the "markdown" section
    # and transform them using a markdown library.
    destination_documentation_dir = _get_destination_documentation_dir(
        output_dir_name)

    # Remove old generated docs directory
    if os.path.exists(destination_documentation_dir) and os.path.isdir(
            destination_documentation_dir):
        shutil.rmtree(destination_documentation_dir)

    if os.path.exists(os.path.dirname(original_documentation_dir)):
        for subdir, dirs, all_files in os.walk(original_documentation_dir):
            for file in all_files:
                subpath = os.path.join(subdir,
                                       file)[len(original_documentation_dir):]

                # Replace .md with .html, and 'README' with 'index'.
                (name, extension) = os.path.splitext(subpath)
                if extension == '.md':
                    if 'README' in name:
                        subpath = name[:name.index('README')] + 'index' + name[
                            name.index('README') + 6:] + '.html'
                    else:
                        subpath = name + '.html'

                new_path = '%s/%s' % (destination_documentation_dir, subpath)

                if '.md' in file or 'image/' in subpath:
                    if not os.path.exists(os.path.dirname(new_path)):
                        os.makedirs(os.path.dirname(new_path))

                if '.md' in file:
                    # Convert the contents of the MD file.
                    with open(os.path.join(subdir, file)) as original_md_file:
                        markdown_body = sanitize_markdown(
                            original_md_file.read())

                    # Mathjax formula like $n$ would cause the conversion from markdown to html
                    # mal-formatted. So we first store the existing formulas to formula_map and replace
                    # them with <span></span>. After the conversion, we put them back.
                    markdown_body = unicode(str(markdown_body), 'utf-8')
                    formula_map = {}
                    markdown_body = reserve_formulas(markdown_body,
                                                     formula_map)

                    # NOTE: This ignores the root index files.
                    if len(markdown_body) > 0:
                        with codecs.open(new_path, 'w',
                                         'utf-8') as new_html_partial:
                            converted_content = markdown.markdown(
                                markdown_body, extensions=MARKDOWN_EXTENSIONS)

                            soup = BeautifulSoup(converted_content, 'lxml')
                            markdown_equation_placeholders = soup.select(
                                '.markdown-equation')

                            for equation in markdown_equation_placeholders:
                                equation.string = formula_map[equation.get(
                                    'id')]

                            try:
                                # NOTE: The 6:-7 removes the opening and closing body tag.
                                new_html_partial.write(
                                    '{% verbatim %}\n' + unicode(
                                        str(soup.select('body')[0])[6:-7],
                                        'utf-8') + '\n{% endverbatim %}')
                            except:
                                print 'Cannot generated a page for: ' + subpath

                elif 'image/' in subpath:
                    shutil.copyfile(os.path.join(subdir, file), new_path)

    else:
        raise Exception('Cannot generate book, directory %s does not exists.' %
                        original_documentation_dir)

    return destination_documentation_dir
def generate_models_docs(original_documentation_dir, output_dir_name, options=None):
    """
    Strip out the static and extract the body contents, headers, and body.
    """
    # Traverse through all the HTML pages of the dir, and take contents in the "markdown" section
    # and transform them using a markdown library.
    destination_documentation_dir = _get_destination_documentation_dir(output_dir_name)

    for subdir, dirs, all_files in os.walk(original_documentation_dir):
        for file in all_files:
            subpath = os.path.join(subdir, file)[len(
                original_documentation_dir):]

            # Replace .md with .html.
            (name, extension) = os.path.splitext(subpath)
            if extension == '.md':
                subpath = name + '.html'

            new_path = '%s/%s' % (destination_documentation_dir, subpath)

            if '.md' in file or 'images' in subpath:
                if not os.path.exists(os.path.dirname(new_path)):
                    os.makedirs(os.path.dirname(new_path))

            if '.md' in file:
                # Convert the contents of the MD file.
                with open(os.path.join(subdir, file)) as original_md_file:
                    markdown_body = sanitize_markdown(original_md_file.read())

                    # Preserve all formula
                    formula_map = {}
                    markdown_body = reserve_formulas(markdown_body, formula_map)

                    with codecs.open(new_path, 'w', 'utf-8') as new_html_partial:
                        # Strip out the wrapping HTML
                        converted_content = markdown.markdown(
                            unicode(markdown_body, 'utf-8'),
                            extensions=MARKDOWN_EXTENSIONS
                        )

                        github_url = 'https://github.com/PaddlePaddle/models/tree/'

                        soup = BeautifulSoup(converted_content, 'lxml')

                        # Insert the preserved formulas
                        markdown_equation_placeholders = soup.select('.markdown-equation')
                        for equation in markdown_equation_placeholders:
                            equation.string = formula_map[equation.get('id')]

                        all_local_links = soup.select('a[href^=%s]' % github_url)
                        for link in all_local_links:
                            link_path, md_extension = os.path.splitext(link['href'])

                            # Remove the github link and version.
                            link_path = link_path.replace(github_url, '')
                            link_path = re.sub(r"^v?[0-9]+\.[0-9]+\.[0-9]+/|^develop/", '', link_path)
                            link['href'] = _update_link_path(link_path, md_extension)

                        # Note: Some files have links to local md files. Change those links to local html files
                        all_local_links_with_relative_path = soup.select('a[href^=%s]' % './')
                        for link in all_local_links_with_relative_path:
                            link_path, md_extension = os.path.splitext(link['href'])
                            link['href'] = _update_link_path(link_path, md_extension)

                        try:
                            # NOTE: The 6:-7 removes the opening and closing body tag.
                            new_html_partial.write('{% verbatim %}\n' + unicode(
                                str(soup.select('body')[0])[6:-7], 'utf-8'
                            ) + '\n{% endverbatim %}')
                        except:
                            print 'Cannot generated a page for: ' + subpath


            elif 'images' in subpath:
                shutil.copyfile(os.path.join(subdir, file), new_path)

    return destination_documentation_dir