def render_page(state: State, path, filename, env): logging.debug("generating %s.html", '.'.join(path)) # Call all registered page begin hooks for hook in state.hooks_pre_page: hook() # Render the file with open(filename, 'r') as f: pub = publish_rst(state, f.read()) # Extract metadata from the page metadata = {} for docinfo in pub.document.traverse(docutils.nodes.docinfo): for element in docinfo.children: if element.tagname == 'field': name_elem, body_elem = element.children name = name_elem.astext() if name in state.config['FORMATTED_METADATA']: # If the metadata are formatted, format them. Use a special # translator that doesn't add <dd> tags around the content, # also explicitly disable the <p> around as we not need it # always. # TODO: uncrapify this a bit visitor = m.htmlsanity._SaneFieldBodyTranslator(pub.document) visitor.compact_field_list = True body_elem.walkabout(visitor) value = visitor.astext() else: value = body_elem.astext() metadata[name.lower()] = value # Breadcrumb, we don't do page hierarchy yet assert len(path) == 1 breadcrumb = [(pub.writer.parts.get('title'), path[0] + '.html')] page = Empty() page.url = breadcrumb[-1][1] page.breadcrumb = breadcrumb page.prefix_wbr = path[0] # Set page content and add extra metadata from there page.content = pub.writer.parts.get('body').rstrip() for key, value in metadata.items(): setattr(page, key, value) if not hasattr(page, 'summary'): page.summary = '' render(state.config, 'page.html', page, env) # Index entry for this page, return only if it's not an index if path == ['index']: return [] index_entry = IndexEntry() index_entry.kind = 'page' index_entry.name = breadcrumb[-1][0] index_entry.url = page.url index_entry.summary = page.summary return [index_entry]
def render_module(state: State, path, module, env): logging.debug("generating %s.html", '.'.join(path)) # Call all registered page begin hooks for hook in state.hooks_pre_page: hook() url_base = '' breadcrumb = [] for i in path: url_base += i + '.' breadcrumb += [(i, url_base + 'html')] page = Empty() page.summary = extract_summary(state, state.module_docs, path, module.__doc__) page.url = breadcrumb[-1][1] page.breadcrumb = breadcrumb page.prefix_wbr = '.<wbr />'.join(path + ['']) page.modules = [] page.classes = [] page.enums = [] page.functions = [] page.data = [] page.has_enum_details = False # External page content, if provided path_str = '.'.join(path) if path_str in state.module_docs: page.content = render_rst(state, state.module_docs[path_str]['content']) state.module_docs[path_str]['used'] = True # Index entry for this module, returned together with children at the end index_entry = IndexEntry() index_entry.kind = 'module' index_entry.name = breadcrumb[-1][0] index_entry.url = page.url index_entry.summary = page.summary # List of inner modules and classes to render, these will be done after the # current class introspection is done to have some better memory allocation # pattern modules_to_render = [] classes_to_render = [] # This is actually complicated -- if the module defines __all__, use that. # The __all__ is meant to expose the public API, so we don't filter out # underscored things. if hasattr(module, '__all__'): # Names exposed in __all__ could be also imported from elsewhere, for # example this is a common pattern with native libraries and we want # Foo, Bar, submodule and *everything* in submodule to be referred to # as `library.RealName` (`library.submodule.func()`, etc.) instead of # `library._native.Foo`, `library._native.sub.func()` etc. # # from ._native import Foo as PublicName # from ._native import sub as submodule # __all__ = ['PublicName', 'submodule'] # # The name references can be cyclic so extract the mapping in a # separate pass before everything else. for name in module.__all__: # Everything available in __all__ is already imported, so get those # directly object = getattr(module, name) subpath = path + [name] # Modules have __name__ while other objects have __module__, need # to check both. if inspect.ismodule(object) and object.__name__ != '.'.join(subpath): assert object.__name__ not in state.module_mapping state.module_mapping[object.__name__] = '.'.join(subpath) elif hasattr(object, '__module__'): subname = object.__module__ + '.' + object.__name__ if subname != '.'.join(subpath): assert subname not in state.module_mapping state.module_mapping[subname] = '.'.join(subpath) # Now extract the actual docs for name in module.__all__: object = getattr(module, name) subpath = path + [name] # We allow undocumented submodules (since they're often in the # standard lib), but not undocumented classes etc. Render the # submodules and subclasses recursively. if inspect.ismodule(object): page.modules += [extract_module_doc(state, subpath, object)] index_entry.children += [render_module(state, subpath, object, env)] elif inspect.isclass(object) and not is_enum(state, object): page.classes += [extract_class_doc(state, subpath, object)] index_entry.children += [render_class(state, subpath, object, env)] elif inspect.isclass(object) and is_enum(state, object): enum_ = extract_enum_doc(state, subpath, object) page.enums += [enum_] if enum_.has_details: page.has_enum_details = True elif inspect.isfunction(object) or inspect.isbuiltin(object): page.functions += extract_function_doc(state, module, subpath, object) # Assume everything else is data. The builtin help help() (from # pydoc) does the same: # https://github.com/python/cpython/blob/d29b3dd9227cfc4a23f77e99d62e20e063272de1/Lib/pydoc.py#L113 # TODO: unify this query elif not inspect.isframe(object) and not inspect.istraceback(object) and not inspect.iscode(object): page.data += [extract_data_doc(state, module, subpath, object)] else: # pragma: no cover logging.warning("unknown symbol %s in %s", name, '.'.join(path)) # Otherwise, enumerate the members using inspect. However, inspect lists # also imported modules, functions and classes, so take only those which # have __module__ equivalent to `path`. else: # Get (and render) inner modules for name, object in inspect.getmembers(module, inspect.ismodule): if is_internal_or_imported_module_member(state, module, path, name, object): continue subpath = path + [name] page.modules += [extract_module_doc(state, subpath, object)] modules_to_render += [(subpath, object)] # Get (and render) inner classes for name, object in inspect.getmembers(module, lambda o: inspect.isclass(o) and not is_enum(state, o)): if is_internal_or_imported_module_member(state, module, path, name, object): continue subpath = path + [name] if not object.__doc__: logging.warning("%s is undocumented", '.'.join(subpath)) page.classes += [extract_class_doc(state, subpath, object)] classes_to_render += [(subpath, object)] # Get enums for name, object in inspect.getmembers(module, lambda o: is_enum(state, o)): if is_internal_or_imported_module_member(state, module, path, name, object): continue subpath = path + [name] if not object.__doc__: logging.warning("%s is undocumented", '.'.join(subpath)) enum_ = extract_enum_doc(state, subpath, object) page.enums += [enum_] if enum_.has_details: page.has_enum_details = True # Get inner functions for name, object in inspect.getmembers(module, lambda o: inspect.isfunction(o) or inspect.isbuiltin(o)): if is_internal_or_imported_module_member(state, module, path, name, object): continue subpath = path + [name] if not object.__doc__: logging.warning("%s() is undocumented", '.'.join(subpath)) page.functions += extract_function_doc(state, module, subpath, object) # Get data # TODO: unify this query for name, object in inspect.getmembers(module, lambda o: not inspect.ismodule(o) and not inspect.isclass(o) and not inspect.isroutine(o) and not inspect.isframe(o) and not inspect.istraceback(o) and not inspect.iscode(o)): if is_internal_or_imported_module_member(state, module, path, name, object): continue page.data += [extract_data_doc(state, module, path + [name], object)] # Render the module, free the page data to avoid memory rising indefinitely render(state.config, 'module.html', page, env) del page # Render submodules and subclasses for subpath, object in modules_to_render: index_entry.children += [render_module(state, subpath, object, env)] for subpath, object in classes_to_render: index_entry.children += [render_class(state, subpath, object, env)] return index_entry
def render_class(state: State, path, class_, env): logging.debug("generating %s.html", '.'.join(path)) # Call all registered page begin hooks for hook in state.hooks_pre_page: hook() url_base = '' breadcrumb = [] for i in path: url_base += i + '.' breadcrumb += [(i, url_base + 'html')] page = Empty() page.summary = extract_summary(state, state.class_docs, path, class_.__doc__) page.url = breadcrumb[-1][1] page.breadcrumb = breadcrumb page.prefix_wbr = '.<wbr />'.join(path + ['']) page.classes = [] page.enums = [] page.classmethods = [] page.staticmethods = [] page.dunder_methods = [] page.methods = [] page.properties = [] page.data = [] page.has_enum_details = False # External page content, if provided path_str = '.'.join(path) if path_str in state.class_docs: page.content = render_rst(state, state.class_docs[path_str]['content']) state.class_docs[path_str]['used'] = True # Index entry for this module, returned together with children at the end index_entry = IndexEntry() index_entry.kind = 'class' index_entry.name = breadcrumb[-1][0] index_entry.url = page.url index_entry.summary = page.summary # List of inner classes to render, these will be done after the current # class introspection is done to have some better memory allocation pattern classes_to_render = [] # Get inner classes for name, object in inspect.getmembers(class_, lambda o: inspect.isclass(o) and not is_enum(state, o)): if name in ['__base__', '__class__']: continue # TODO if name.startswith('_'): continue subpath = path + [name] if not object.__doc__: logging.warning("%s is undocumented", '.'.join(subpath)) page.classes += [extract_class_doc(state, subpath, object)] classes_to_render += [(subpath, object)] # Get enums for name, object in inspect.getmembers(class_, lambda o: is_enum(state, o)): if name.startswith('_'): continue subpath = path + [name] if not object.__doc__: logging.warning("%s is undocumented", '.'.join(subpath)) enum_ = extract_enum_doc(state, subpath, object) page.enums += [enum_] if enum_.has_details: page.has_enum_details = True # Get methods for name, object in inspect.getmembers(class_, inspect.isroutine): # Filter out underscored methods (but not dunder methods) if is_internal_function_name(name): continue # Filter out dunder methods that don't have their own docs if name.startswith('__') and (name, object.__doc__) in _filtered_builtin_functions: continue subpath = path + [name] if not object.__doc__: logging.warning("%s() is undocumented", '.'.join(subpath)) for function in extract_function_doc(state, class_, subpath, object): if name.startswith('__'): page.dunder_methods += [function] elif function.is_classmethod: page.classmethods += [function] elif function.is_staticmethod: page.staticmethods += [function] else: page.methods += [function] # Get properties for name, object in inspect.getmembers(class_, inspect.isdatadescriptor): if (name, object.__doc__) in _filtered_builtin_properties: continue if name.startswith('_'): continue # TODO: are there any dunder props? subpath = path + [name] if not object.__doc__: logging.warning("%s is undocumented", '.'.join(subpath)) page.properties += [extract_property_doc(state, subpath, object)] # Get data # TODO: unify this query for name, object in inspect.getmembers(class_, lambda o: not inspect.ismodule(o) and not inspect.isclass(o) and not inspect.isroutine(o) and not inspect.isframe(o) and not inspect.istraceback(o) and not inspect.iscode(o) and not inspect.isdatadescriptor(o)): if name.startswith('_'): continue subpath = path + [name] page.data += [extract_data_doc(state, class_, subpath, object)] # Render the class, free the page data to avoid memory rising indefinitely render(state.config, 'class.html', page, env) del page # Render subclasses for subpath, object in classes_to_render: index_entry.children += [render_class(state, subpath, object, env)] return index_entry
def run(basedir, config, templates): # Prepare Jinja environment env = jinja2.Environment( loader=jinja2.FileSystemLoader(templates), trim_blocks=True, lstrip_blocks=True, enable_async=True) # Filter to return file basename or the full URL, if absolute def basename_or_url(path): if urllib.parse.urlparse(path).netloc: return path return os.path.basename(path) # Filter to return URL for given symbol or the full URL, if absolute def path_to_url(path): if urllib.parse.urlparse(path).netloc: return path return path + '.html' env.filters['basename_or_url'] = basename_or_url env.filters['path_to_url'] = path_to_url env.filters['urljoin'] = urljoin # Populate the INPUT, if not specified, make it absolute if config['INPUT'] is None: config['INPUT'] = basedir else: config['INPUT'] = os.path.join(basedir, config['INPUT']) # Make the output dir absolute config['OUTPUT'] = os.path.join(config['INPUT'], config['OUTPUT']) if not os.path.exists(config['OUTPUT']): os.makedirs(config['OUTPUT']) # Guess MIME type of the favicon if config['FAVICON']: config['FAVICON'] = (config['FAVICON'], mimetypes.guess_type(config['FAVICON'])[0]) state = State(config) # Set up extra plugin paths. The one for m.css plugins was added above. for path in config['PLUGIN_PATHS']: if path not in sys.path: sys.path.append(os.path.join(config['INPUT'], path)) # Import plugins for plugin in ['m.htmlsanity'] + config['PLUGINS']: module = importlib.import_module(plugin) module.register_mcss( mcss_settings=config, jinja_environment=env, module_doc_contents=state.module_docs, class_doc_contents=state.class_docs, data_doc_contents=state.data_docs, hooks_pre_page=state.hooks_pre_page, hooks_post_run=state.hooks_post_run) # Call all registered page begin hooks for the first time for hook in state.hooks_pre_page: hook() # First process the doc input files so we have all data for rendering # module pages for file in config['INPUT_DOCS']: render_doc(state, os.path.join(basedir, file)) for module in config['INPUT_MODULES']: if isinstance(module, str): module_name = module module = importlib.import_module(module) else: module_name = module.__name__ state.class_index += [render_module(state, [module_name], module, env)] # Warn if there are any unused contents left after processing everything unused_module_docs = [key for key, value in state.module_docs.items() if not 'used' in value] unused_class_docs = [key for key, value in state.class_docs.items() if not 'used' in value] unused_data_docs = [key for key, value in state.data_docs.items() if not 'used' in value] if unused_module_docs: logging.warning("The following module doc contents were unused: %s", unused_module_docs) if unused_class_docs: logging.warning("The following class doc contents were unused: %s", unused_class_docs) if unused_data_docs: logging.warning("The following data doc contents were unused: %s", unused_data_docs) for page in config['INPUT_PAGES']: state.page_index += render_page(state, [os.path.splitext(os.path.basename(page))[0]], os.path.join(config['INPUT'], page), env) # Recurse into the tree and mark every node that has nested modules with # has_nestaable_children. def mark_nested_modules(list: List[IndexEntry]): has_nestable_children = False for i in list: if i.kind != 'module': continue has_nestable_children = True i.has_nestable_children = mark_nested_modules(i.children) return has_nestable_children mark_nested_modules(state.class_index) # Create module and class index index = Empty() index.classes = state.class_index index.pages = state.page_index for file in ['modules.html', 'classes.html', 'pages.html']: template = env.get_template(file) rendered = template.render(index=index, FILENAME=file, **config) with open(os.path.join(config['OUTPUT'], file), 'wb') as f: f.write(rendered.encode('utf-8')) # Add back a trailing newline so we don't need to bother with # patching test files to include a trailing newline to make Git # happy # TODO could keep_trailing_newline fix this better? f.write(b'\n') # Create index.html if it was not provided by the user if 'index.rst' not in [os.path.basename(i) for i in config['INPUT_PAGES']]: logging.debug("writing index.html for an empty main page") page = Empty() page.breadcrumb = [(config['PROJECT_TITLE'], 'index.html')] page.url = page.breadcrumb[-1][1] render(config, 'page.html', page, env) # Copy referenced files for i in config['STYLESHEETS'] + config['EXTRA_FILES'] + ([config['FAVICON'][0]] if config['FAVICON'] else []) + list(state.external_data) + ([] if config['SEARCH_DISABLED'] else ['search.js']): # Skip absolute URLs if urllib.parse.urlparse(i).netloc: continue # If file is found relative to the conf file, use that if os.path.exists(os.path.join(config['INPUT'], i)): i = os.path.join(config['INPUT'], i) # Otherwise use path relative to script directory else: i = os.path.join(os.path.dirname(os.path.realpath(__file__)), i) logging.debug("copying %s to output", i) shutil.copy(i, os.path.join(config['OUTPUT'], os.path.basename(i))) # Call all registered finalization hooks for the first time for hook in state.hooks_post_run: hook()