def add_page(output_root, parent, *, id=None, path=None, title=None): if parent.tag == 'tab': parent.attrib['type'] = 'usergroup' element = ET.SubElement(parent, 'tab', type='user', title=title, url='@ref ' + id if id else '') if not path: assert title, "title must be specified if path isn't" element.attrib['title'] = title return element output_path = output_root / path output_path.parent.mkdir(parents=True, exist_ok=True) with (OMZ_ROOT / path).open('r', encoding='utf-8') as input_file: lines = input_file.readlines() page = omzdocs.DocumentationPage(''.join(lines)) if page.title is None: raise RuntimeError(f'{path}: must begin with level 1 heading') if not title: title = page.title element.attrib['title'] = title # the only way to override the ID that Doxygen gives Markdown pages # is to add a label to the top-level heading. For simplicity, we hardcode # the assumption that the file immediately begins with that heading. if not lines[0].startswith('# '): raise RuntimeError(f'{path}: line 1 must contain the level 1 heading') assert id, "id must be specified if path is" lines[0] = lines[0].rstrip('\n') + f' {{#{id}}}\n' with (output_root / path).open('w', encoding='utf-8') as output_file: output_file.writelines(lines) # copy all referenced images image_urls = [ref.url for ref in page.external_references() if ref.type == 'image'] for image_url in image_urls: parsed_image_url = urllib.parse.urlparse(image_url) if parsed_image_url.scheme or parsed_image_url.netloc: continue # not a relative URL image_rel_path = path.parent / urllib.request.url2pathname(parsed_image_url.path) (output_root / image_rel_path.parent).mkdir(parents=True, exist_ok=True) shutil.copyfile(OMZ_ROOT / image_rel_path, output_root / image_rel_path) return element
def add_page(output_root, parent, *, id=None, path=None, title=None, index=-1): if not isinstance(index, int): raise ValueError('index must be an integer') if parent.tag == 'tab': parent.attrib['type'] = 'usergroup' element = ET.Element('tab') element.attrib['type'] = 'user' element.attrib['title'] = title element.attrib['url'] = '@ref ' + id if id else '' if index == -1: parent.append(element) else: parent.insert(index, element) if not path: assert title, "title must be specified if path isn't" element.attrib['title'] = title return element documentation_md_paths.add(Path(path)) output_path = output_root / path output_path.parent.mkdir(parents=True, exist_ok=True) with (OMZ_ROOT / path).open('r', encoding='utf-8') as input_file: lines = input_file.readlines() page = omzdocs.DocumentationPage(''.join(lines)) if page.title is None: raise RuntimeError(f'{path}: must begin with level 1 heading') if not title: title = page.title element.attrib['title'] = title # the only way to override the ID that Doxygen gives Markdown pages # is to add a label to the top-level heading. For simplicity, we hardcode # the assumption that the file immediately begins with that heading. if not lines[0].startswith('# '): raise RuntimeError(f'{path}: line 1 must contain the level 1 heading') assert id, "id must be specified if path is" lines[0] = lines[0].rstrip('\n') + f' {{#{id}}}\n' with (output_root / path).open('w', encoding='utf-8') as output_file: output_file.writelines(lines) # copy all referenced images image_urls = [ ref.url for ref in page.external_references() if ref.type == 'image' ] for image_url in image_urls: parsed_image_url = urllib.parse.urlparse(image_url) if parsed_image_url.scheme or parsed_image_url.netloc: continue # not a relative URL image_rel_path = path.parent / urllib.request.url2pathname( parsed_image_url.path) image_filename = image_rel_path.name image_abs_path = (OMZ_ROOT / image_rel_path).resolve() if image_filename in all_images_paths and all_images_paths[ image_filename] != image_abs_path: raise RuntimeError( f'{path}: Image with "{image_filename}" filename already exists. ' f'Rename "{image_rel_path}" to unique name.') all_images_paths[image_filename] = image_abs_path (output_root / image_rel_path.parent).mkdir(parents=True, exist_ok=True) shutil.copyfile(image_abs_path, output_root / image_rel_path) links = [ ref.url for ref in page.external_references() if ref.type == 'link' ] for link in links: parsed_link = urllib.parse.urlparse(link) if parsed_link.scheme or parsed_link.netloc: continue # not a relative URL if parsed_link.fragment: continue # link to markdown section relative_path = (OMZ_ROOT / Path(path).parent / link).resolve().relative_to(OMZ_ROOT) if link.endswith('.md'): all_md_paths[relative_path] = Path(path) else: suggested_path = OMZ_PREFIX + Path(relative_path).as_posix() raise RuntimeError( f'{path}: Relative link to non-markdown file "{link}". ' f'Replace it by `{suggested_path}`') return element
def main(): all_passed = True index_file_paths = ( OMZ_ROOT / 'models/intel/index.md', OMZ_ROOT / 'models/public/index.md', OMZ_ROOT / 'demos/README.md', ) all_md_files = tuple(find_md_files()) def complain(message): nonlocal all_passed all_passed = False print(message, file=sys.stderr) index_child_md_links = {} for index_file_path in index_file_paths: if not index_file_path.exists(): complain(f'{index_file_path}: file not found') continue required_md_links = [] for md_file in all_md_files: if md_file.name == "README.md" and md_file.parent != index_file_path.parent: try: md_rel_path = md_file.relative_to(index_file_path.parent) except ValueError: continue md_intermediate_parents = list(md_rel_path.parents)[ 1:-1] # removed root and first parent dirs if not any((index_file_path.parent / parent_dir / 'README.md').exists() for parent_dir in md_intermediate_parents): required_md_links.append(md_file) index_child_md_links[index_file_path] = sorted(required_md_links) omz_reference_prefix = '<omz_dir>/' for md_path in sorted(all_md_files): referenced_md_files = set() md_path_rel = md_path.relative_to(OMZ_ROOT) doc_page = omzdocs.DocumentationPage( md_path.read_text(encoding='UTF-8')) # check local link validity for url in sorted([ref.url for ref in doc_page.external_references()]): try: components = urllib.parse.urlparse(url) except ValueError: complain(f'{md_path_rel}: invalid URL reference {url!r}') continue if components.scheme: # non-local URLs continue if components.netloc or components.path.startswith('/'): complain( f'{md_path_rel}: non-relative local URL reference "{url}"') continue if not components.path: # self-link continue target_path = ( md_path.parent / urllib.request.url2pathname(components.path)).resolve() if OMZ_ROOT not in target_path.parents: complain( f'{md_path_rel}: URL reference "{url}" points outside the OMZ directory' ) continue if not target_path.is_file(): complain(f'{md_path_rel}: URL reference "{url}" target' ' does not exist or is not a file') continue if md_path in index_child_md_links: referenced_md_files.add(target_path) # check <omz_dir> reference validity for code_span in doc_page.code_spans(): if code_span.startswith(omz_reference_prefix): target_path_rel = Path(code_span[len(omz_reference_prefix):]) target_path = OMZ_ROOT / target_path_rel if ".." in target_path_rel.parts: complain(f'{md_path_rel}: OMZ reference "{code_span}"' ' contains a ".." component.') continue if not target_path.exists(): complain( f'{md_path_rel}: OMZ reference "{code_span}" target' ' does not exist') # check for existence of links to README.md files of models and demos if md_path in index_child_md_links: for md_file in index_child_md_links[md_path]: if md_file not in referenced_md_files: complain( f"{md_path_rel}: {md_file.relative_to(OMZ_ROOT)} is not referenced" ) # check for HTML fragments that are unsupported by Doxygen for html_fragment in doc_page.html_fragments(): match = HTML_FRAGMENT_RE.match(html_fragment) if not match: complain( f'{md_path_rel}: cannot parse HTML fragment {html_fragment!r}' ) continue if match.group(1).lower() not in ALLOWED_HTML_ELEMENTS: complain( f'{md_path_rel}: unknown/disallowed HTML element in {html_fragment!r}' ) continue sys.exit(0 if all_passed else 1)