Beispiel #1
0
def render_page(state: State, path, filename, env):
    logging.debug("generating %s.html", '.'.join(path))

    # Call all registered page begin hooks
    for hook in state.hooks_pre_page: hook()

    # Render the file
    with open(filename, 'r') as f: pub = publish_rst(state, f.read())

    # Extract metadata from the page
    metadata = {}
    for docinfo in pub.document.traverse(docutils.nodes.docinfo):
        for element in docinfo.children:
            if element.tagname == 'field':
                name_elem, body_elem = element.children
                name = name_elem.astext()
                if name in state.config['FORMATTED_METADATA']:
                    # If the metadata are formatted, format them. Use a special
                    # translator that doesn't add <dd> tags around the content,
                    # also explicitly disable the <p> around as we not need it
                    # always.
                    # TODO: uncrapify this a bit
                    visitor = m.htmlsanity._SaneFieldBodyTranslator(pub.document)
                    visitor.compact_field_list = True
                    body_elem.walkabout(visitor)
                    value = visitor.astext()
                else:
                    value = body_elem.astext()
                metadata[name.lower()] = value

    # Breadcrumb, we don't do page hierarchy yet
    assert len(path) == 1
    breadcrumb = [(pub.writer.parts.get('title'), path[0] + '.html')]

    page = Empty()
    page.url = breadcrumb[-1][1]
    page.breadcrumb = breadcrumb
    page.prefix_wbr = path[0]

    # Set page content and add extra metadata from there
    page.content = pub.writer.parts.get('body').rstrip()
    for key, value in metadata.items(): setattr(page, key, value)
    if not hasattr(page, 'summary'): page.summary = ''

    render(state.config, 'page.html', page, env)

    # Index entry for this page, return only if it's not an index
    if path == ['index']: return []
    index_entry = IndexEntry()
    index_entry.kind = 'page'
    index_entry.name = breadcrumb[-1][0]
    index_entry.url = page.url
    index_entry.summary = page.summary
    return [index_entry]
Beispiel #2
0
def render_module(state: State, path, module, env):
    logging.debug("generating %s.html", '.'.join(path))

    # Call all registered page begin hooks
    for hook in state.hooks_pre_page: hook()

    url_base = ''
    breadcrumb = []
    for i in path:
        url_base += i + '.'
        breadcrumb += [(i, url_base + 'html')]

    page = Empty()
    page.summary = extract_summary(state, state.module_docs, path, module.__doc__)
    page.url = breadcrumb[-1][1]
    page.breadcrumb = breadcrumb
    page.prefix_wbr = '.<wbr />'.join(path + [''])
    page.modules = []
    page.classes = []
    page.enums = []
    page.functions = []
    page.data = []
    page.has_enum_details = False

    # External page content, if provided
    path_str = '.'.join(path)
    if path_str in state.module_docs:
        page.content = render_rst(state, state.module_docs[path_str]['content'])
        state.module_docs[path_str]['used'] = True

    # Index entry for this module, returned together with children at the end
    index_entry = IndexEntry()
    index_entry.kind = 'module'
    index_entry.name = breadcrumb[-1][0]
    index_entry.url = page.url
    index_entry.summary = page.summary

    # List of inner modules and classes to render, these will be done after the
    # current class introspection is done to have some better memory allocation
    # pattern
    modules_to_render = []
    classes_to_render = []

    # This is actually complicated -- if the module defines __all__, use that.
    # The __all__ is meant to expose the public API, so we don't filter out
    # underscored things.
    if hasattr(module, '__all__'):
        # Names exposed in __all__ could be also imported from elsewhere, for
        # example this is a common pattern with native libraries and we want
        # Foo, Bar, submodule and *everything* in submodule to be referred to
        # as `library.RealName` (`library.submodule.func()`, etc.) instead of
        # `library._native.Foo`, `library._native.sub.func()` etc.
        #
        #   from ._native import Foo as PublicName
        #   from ._native import sub as submodule
        #   __all__ = ['PublicName', 'submodule']
        #
        # The name references can be cyclic so extract the mapping in a
        # separate pass before everything else.
        for name in module.__all__:
            # Everything available in __all__ is already imported, so get those
            # directly
            object = getattr(module, name)
            subpath = path + [name]

            # Modules have __name__ while other objects have __module__, need
            # to check both.
            if inspect.ismodule(object) and object.__name__ != '.'.join(subpath):
                assert object.__name__ not in state.module_mapping
                state.module_mapping[object.__name__] = '.'.join(subpath)
            elif hasattr(object, '__module__'):
                subname = object.__module__ + '.' + object.__name__
                if subname != '.'.join(subpath):
                    assert subname not in state.module_mapping
                    state.module_mapping[subname] = '.'.join(subpath)

        # Now extract the actual docs
        for name in module.__all__:
            object = getattr(module, name)
            subpath = path + [name]

            # We allow undocumented submodules (since they're often in the
            # standard lib), but not undocumented classes etc. Render the
            # submodules and subclasses recursively.
            if inspect.ismodule(object):
                page.modules += [extract_module_doc(state, subpath, object)]
                index_entry.children += [render_module(state, subpath, object, env)]
            elif inspect.isclass(object) and not is_enum(state, object):
                page.classes += [extract_class_doc(state, subpath, object)]
                index_entry.children += [render_class(state, subpath, object, env)]
            elif inspect.isclass(object) and is_enum(state, object):
                enum_ = extract_enum_doc(state, subpath, object)
                page.enums += [enum_]
                if enum_.has_details: page.has_enum_details = True
            elif inspect.isfunction(object) or inspect.isbuiltin(object):
                page.functions += extract_function_doc(state, module, subpath, object)
            # Assume everything else is data. The builtin help help() (from
            # pydoc) does the same:
            # https://github.com/python/cpython/blob/d29b3dd9227cfc4a23f77e99d62e20e063272de1/Lib/pydoc.py#L113
            # TODO: unify this query
            elif not inspect.isframe(object) and not inspect.istraceback(object) and not inspect.iscode(object):
                page.data += [extract_data_doc(state, module, subpath, object)]
            else: # pragma: no cover
                logging.warning("unknown symbol %s in %s", name, '.'.join(path))

    # Otherwise, enumerate the members using inspect. However, inspect lists
    # also imported modules, functions and classes, so take only those which
    # have __module__ equivalent to `path`.
    else:
        # Get (and render) inner modules
        for name, object in inspect.getmembers(module, inspect.ismodule):
            if is_internal_or_imported_module_member(state, module, path, name, object): continue

            subpath = path + [name]
            page.modules += [extract_module_doc(state, subpath, object)]
            modules_to_render += [(subpath, object)]

        # Get (and render) inner classes
        for name, object in inspect.getmembers(module, lambda o: inspect.isclass(o) and not is_enum(state, o)):
            if is_internal_or_imported_module_member(state, module, path, name, object): continue

            subpath = path + [name]
            if not object.__doc__: logging.warning("%s is undocumented", '.'.join(subpath))

            page.classes += [extract_class_doc(state, subpath, object)]
            classes_to_render += [(subpath, object)]

        # Get enums
        for name, object in inspect.getmembers(module, lambda o: is_enum(state, o)):
            if is_internal_or_imported_module_member(state, module, path, name, object): continue

            subpath = path + [name]
            if not object.__doc__: logging.warning("%s is undocumented", '.'.join(subpath))

            enum_ = extract_enum_doc(state, subpath, object)
            page.enums += [enum_]
            if enum_.has_details: page.has_enum_details = True

        # Get inner functions
        for name, object in inspect.getmembers(module, lambda o: inspect.isfunction(o) or inspect.isbuiltin(o)):
            if is_internal_or_imported_module_member(state, module, path, name, object): continue

            subpath = path + [name]
            if not object.__doc__: logging.warning("%s() is undocumented", '.'.join(subpath))

            page.functions += extract_function_doc(state, module, subpath, object)

        # Get data
        # TODO: unify this query
        for name, object in inspect.getmembers(module, lambda o: not inspect.ismodule(o) and not inspect.isclass(o) and not inspect.isroutine(o) and not inspect.isframe(o) and not inspect.istraceback(o) and not inspect.iscode(o)):
            if is_internal_or_imported_module_member(state, module, path, name, object): continue

            page.data += [extract_data_doc(state, module, path + [name], object)]

    # Render the module, free the page data to avoid memory rising indefinitely
    render(state.config, 'module.html', page, env)
    del page

    # Render submodules and subclasses
    for subpath, object in modules_to_render:
        index_entry.children += [render_module(state, subpath, object, env)]
    for subpath, object in classes_to_render:
        index_entry.children += [render_class(state, subpath, object, env)]

    return index_entry
Beispiel #3
0
def render_class(state: State, path, class_, env):
    logging.debug("generating %s.html", '.'.join(path))

    # Call all registered page begin hooks
    for hook in state.hooks_pre_page: hook()

    url_base = ''
    breadcrumb = []
    for i in path:
        url_base += i + '.'
        breadcrumb += [(i, url_base + 'html')]

    page = Empty()
    page.summary = extract_summary(state, state.class_docs, path, class_.__doc__)
    page.url = breadcrumb[-1][1]
    page.breadcrumb = breadcrumb
    page.prefix_wbr = '.<wbr />'.join(path + [''])
    page.classes = []
    page.enums = []
    page.classmethods = []
    page.staticmethods = []
    page.dunder_methods = []
    page.methods = []
    page.properties = []
    page.data = []
    page.has_enum_details = False

    # External page content, if provided
    path_str = '.'.join(path)
    if path_str in state.class_docs:
        page.content = render_rst(state, state.class_docs[path_str]['content'])
        state.class_docs[path_str]['used'] = True

    # Index entry for this module, returned together with children at the end
    index_entry = IndexEntry()
    index_entry.kind = 'class'
    index_entry.name = breadcrumb[-1][0]
    index_entry.url = page.url
    index_entry.summary = page.summary

    # List of inner classes to render, these will be done after the current
    # class introspection is done to have some better memory allocation pattern
    classes_to_render = []

    # Get inner classes
    for name, object in inspect.getmembers(class_, lambda o: inspect.isclass(o) and not is_enum(state, o)):
        if name in ['__base__', '__class__']: continue # TODO
        if name.startswith('_'): continue

        subpath = path + [name]
        if not object.__doc__: logging.warning("%s is undocumented", '.'.join(subpath))

        page.classes += [extract_class_doc(state, subpath, object)]
        classes_to_render += [(subpath, object)]

    # Get enums
    for name, object in inspect.getmembers(class_, lambda o: is_enum(state, o)):
        if name.startswith('_'): continue

        subpath = path + [name]
        if not object.__doc__: logging.warning("%s is undocumented", '.'.join(subpath))

        enum_ = extract_enum_doc(state, subpath, object)
        page.enums += [enum_]
        if enum_.has_details: page.has_enum_details = True

    # Get methods
    for name, object in inspect.getmembers(class_, inspect.isroutine):
        # Filter out underscored methods (but not dunder methods)
        if is_internal_function_name(name): continue

        # Filter out dunder methods that don't have their own docs
        if name.startswith('__') and (name, object.__doc__) in _filtered_builtin_functions: continue

        subpath = path + [name]
        if not object.__doc__: logging.warning("%s() is undocumented", '.'.join(subpath))

        for function in extract_function_doc(state, class_, subpath, object):
            if name.startswith('__'):
                page.dunder_methods += [function]
            elif function.is_classmethod:
                page.classmethods += [function]
            elif function.is_staticmethod:
                page.staticmethods += [function]
            else:
                page.methods += [function]

    # Get properties
    for name, object in inspect.getmembers(class_, inspect.isdatadescriptor):
        if (name, object.__doc__) in _filtered_builtin_properties:
            continue
        if name.startswith('_'): continue # TODO: are there any dunder props?

        subpath = path + [name]
        if not object.__doc__: logging.warning("%s is undocumented", '.'.join(subpath))

        page.properties += [extract_property_doc(state, subpath, object)]

    # Get data
    # TODO: unify this query
    for name, object in inspect.getmembers(class_, lambda o: not inspect.ismodule(o) and not inspect.isclass(o) and not inspect.isroutine(o) and not inspect.isframe(o) and not inspect.istraceback(o) and not inspect.iscode(o) and not inspect.isdatadescriptor(o)):
        if name.startswith('_'): continue

        subpath = path + [name]
        page.data += [extract_data_doc(state, class_, subpath, object)]

    # Render the class, free the page data to avoid memory rising indefinitely
    render(state.config, 'class.html', page, env)
    del page

    # Render subclasses
    for subpath, object in classes_to_render:
        index_entry.children += [render_class(state, subpath, object, env)]

    return index_entry
Beispiel #4
0
def run(basedir, config, templates):
    # Prepare Jinja environment
    env = jinja2.Environment(
        loader=jinja2.FileSystemLoader(templates), trim_blocks=True,
        lstrip_blocks=True, enable_async=True)
    # Filter to return file basename or the full URL, if absolute
    def basename_or_url(path):
        if urllib.parse.urlparse(path).netloc: return path
        return os.path.basename(path)
    # Filter to return URL for given symbol or the full URL, if absolute
    def path_to_url(path):
        if urllib.parse.urlparse(path).netloc: return path
        return path + '.html'
    env.filters['basename_or_url'] = basename_or_url
    env.filters['path_to_url'] = path_to_url
    env.filters['urljoin'] = urljoin

    # Populate the INPUT, if not specified, make it absolute
    if config['INPUT'] is None: config['INPUT'] = basedir
    else: config['INPUT'] = os.path.join(basedir, config['INPUT'])

    # Make the output dir absolute
    config['OUTPUT'] = os.path.join(config['INPUT'], config['OUTPUT'])
    if not os.path.exists(config['OUTPUT']): os.makedirs(config['OUTPUT'])

    # Guess MIME type of the favicon
    if config['FAVICON']:
        config['FAVICON'] = (config['FAVICON'], mimetypes.guess_type(config['FAVICON'])[0])

    state = State(config)

    # Set up extra plugin paths. The one for m.css plugins was added above.
    for path in config['PLUGIN_PATHS']:
        if path not in sys.path: sys.path.append(os.path.join(config['INPUT'], path))

    # Import plugins
    for plugin in ['m.htmlsanity'] + config['PLUGINS']:
        module = importlib.import_module(plugin)
        module.register_mcss(
            mcss_settings=config,
            jinja_environment=env,
            module_doc_contents=state.module_docs,
            class_doc_contents=state.class_docs,
            data_doc_contents=state.data_docs,
            hooks_pre_page=state.hooks_pre_page,
            hooks_post_run=state.hooks_post_run)

    # Call all registered page begin hooks for the first time
    for hook in state.hooks_pre_page: hook()

    # First process the doc input files so we have all data for rendering
    # module pages
    for file in config['INPUT_DOCS']:
        render_doc(state, os.path.join(basedir, file))

    for module in config['INPUT_MODULES']:
        if isinstance(module, str):
            module_name = module
            module = importlib.import_module(module)
        else:
            module_name = module.__name__

        state.class_index += [render_module(state, [module_name], module, env)]

    # Warn if there are any unused contents left after processing everything
    unused_module_docs = [key for key, value in state.module_docs.items() if not 'used' in value]
    unused_class_docs = [key for key, value in state.class_docs.items() if not 'used' in value]
    unused_data_docs = [key for key, value in state.data_docs.items() if not 'used' in value]
    if unused_module_docs:
        logging.warning("The following module doc contents were unused: %s", unused_module_docs)
    if unused_class_docs:
        logging.warning("The following class doc contents were unused: %s", unused_class_docs)
    if unused_data_docs:
        logging.warning("The following data doc contents were unused: %s", unused_data_docs)

    for page in config['INPUT_PAGES']:
        state.page_index += render_page(state, [os.path.splitext(os.path.basename(page))[0]], os.path.join(config['INPUT'], page), env)

    # Recurse into the tree and mark every node that has nested modules with
    # has_nestaable_children.
    def mark_nested_modules(list: List[IndexEntry]):
        has_nestable_children = False
        for i in list:
            if i.kind != 'module': continue
            has_nestable_children = True
            i.has_nestable_children = mark_nested_modules(i.children)
        return has_nestable_children
    mark_nested_modules(state.class_index)

    # Create module and class index
    index = Empty()
    index.classes = state.class_index
    index.pages = state.page_index
    for file in ['modules.html', 'classes.html', 'pages.html']:
        template = env.get_template(file)
        rendered = template.render(index=index, FILENAME=file, **config)
        with open(os.path.join(config['OUTPUT'], file), 'wb') as f:
            f.write(rendered.encode('utf-8'))
            # Add back a trailing newline so we don't need to bother with
            # patching test files to include a trailing newline to make Git
            # happy
            # TODO could keep_trailing_newline fix this better?
            f.write(b'\n')

    # Create index.html if it was not provided by the user
    if 'index.rst' not in [os.path.basename(i) for i in config['INPUT_PAGES']]:
        logging.debug("writing index.html for an empty main page")

        page = Empty()
        page.breadcrumb = [(config['PROJECT_TITLE'], 'index.html')]
        page.url = page.breadcrumb[-1][1]
        render(config, 'page.html', page, env)

    # Copy referenced files
    for i in config['STYLESHEETS'] + config['EXTRA_FILES'] + ([config['FAVICON'][0]] if config['FAVICON'] else []) + list(state.external_data) + ([] if config['SEARCH_DISABLED'] else ['search.js']):
        # Skip absolute URLs
        if urllib.parse.urlparse(i).netloc: continue

        # If file is found relative to the conf file, use that
        if os.path.exists(os.path.join(config['INPUT'], i)):
            i = os.path.join(config['INPUT'], i)

        # Otherwise use path relative to script directory
        else:
            i = os.path.join(os.path.dirname(os.path.realpath(__file__)), i)

        logging.debug("copying %s to output", i)
        shutil.copy(i, os.path.join(config['OUTPUT'], os.path.basename(i)))

    # Call all registered finalization hooks for the first time
    for hook in state.hooks_post_run: hook()