def write_page(self, page, output): root = etree.HTML(unicode(page.detailed_description)) id_nodes = {n.attrib['id']: "".join([x for x in n.itertext()]) for n in root.xpath('.//*[@id]')} section_numbers = self.__init_section_numbers(root) targets = root.xpath( './/*[self::h1 or self::h2 or self::h3 or ' 'self::h4 or self::h5 or self::img]') for target in targets: section_number = self.__update_section_number( target, section_numbers) if 'id' in target.attrib: continue if target.tag == 'img': text = target.attrib.get('alt') else: text = "".join([x for x in target.itertext()]) if not text: continue id_ = id_from_text(text) ref_id = id_ index = 1 while id_ in id_nodes: id_ = '%s%s' % (ref_id, index) index += 1 if section_number: target.text = '%s %s' % (section_number, target.text or '') target.attrib['id'] = id_ id_nodes[id_] = text empty_links = root.xpath('.//a[not(text()) and not(*)]') for link in empty_links: href = link.attrib.get('href') if href and href.startswith('#'): title = id_nodes.get(href.strip('#')) if title: link.text = title else: warn('bad-local-link', "Empty anchor link to %s in %s points nowhere" % (href, page.source_file)) link.text = "FIXME broken link to %s" % href page.detailed_description = lxml.html.tostring( root, doctype="<!DOCTYPE html>", encoding='unicode', include_meta_content_type=True) return Formatter.write_page(self, page, output)
def _make_title_id(self, node, id_nodes): if node.tag == 'img': text = node.attrib.get('alt') else: text = "".join([x for x in node.itertext()]) if not text: return None id_ = id_from_text(text) ref_id = id_ index = 1 while id_ in id_nodes: id_ = '%s%s' % (ref_id, index) index += 1 return id_
def __format_page_comment(self, formatter, link_resolver): if not self.comment: return if self.comment.short_description: self.short_description = formatter.format_comment( self.comment.short_description, link_resolver).strip() if self.short_description.startswith('<p>'): self.short_description = self.short_description[3:-4] if self.comment.title: self.title = formatter.format_comment( self.comment.title, link_resolver).strip() if self.title.startswith('<p>'): self.title = self.title[3:-4] if self.title: self.formatted_contents += '<h1 id="%s-page">%s</h1>' % ( id_from_text(self.title), self.title) self.formatted_contents += formatter.format_comment( self.comment, link_resolver)
def __validate_html(self, project, page, doc_root): rel_path = os.path.join(self.get_output_folder(page), page.link.ref) id_nodes = { n.attrib['id']: "".join([x for x in n.itertext()]) for n in doc_root.xpath('.//*[@id]') } section_numbers = self.__init_section_numbers(doc_root) targets = doc_root.xpath('.//*[self::h1 or self::h2 or self::h3 or ' 'self::h4 or self::h5 or self::img]') for target in targets: section_number = self.__update_section_number( target, section_numbers) if 'id' in target.attrib: continue if target.tag == 'img': text = target.attrib.get('alt') else: text = "".join([x for x in target.itertext()]) if not text: continue id_ = id_from_text(text) ref_id = id_ index = 1 while id_ in id_nodes: id_ = '%s%s' % (ref_id, index) index += 1 if section_number: target.text = '%s %s' % (section_number, target.text or '') target.attrib['id'] = id_ id_nodes[id_] = text main_node = doc_root.find('.//*[@data-hotdoc-role="main"]') links = main_node.xpath('.//a') for link in links: href = link.attrib.get('href') if href and href.startswith('#'): if not link.text and not link.getchildren(): title = id_nodes.get(href.strip('#')) if title: link.text = title else: warn( 'bad-local-link', "Empty anchor link to %s in %s points nowhere" % (href, page.source_file)) link.text = "FIXME broken link to %s" % href link.attrib["href"] = rel_path + href assets = main_node.xpath('.//*[@src]') # All required assets should now be in place for asset in assets: self.__lookup_asset(asset, project, page)
def write_page(self, page, build_root, output): root = etree.HTML(unicode(page.detailed_description)) id_nodes = {n.attrib['id']: "".join([x for x in n.itertext()]) for n in root.xpath('.//*[@id]')} section_numbers = self.__init_section_numbers(root) targets = root.xpath( './/*[self::h1 or self::h2 or self::h3 or ' 'self::h4 or self::h5 or self::img]') for target in targets: section_number = self.__update_section_number( target, section_numbers) if 'id' in target.attrib: continue if target.tag == 'img': text = target.attrib.get('alt') else: text = "".join([x for x in target.itertext()]) if not text: continue id_ = id_from_text(text) ref_id = id_ index = 1 while id_ in id_nodes: id_ = '%s%s' % (ref_id, index) index += 1 if section_number: target.text = '%s %s' % (section_number, target.text or '') target.attrib['id'] = id_ id_nodes[id_] = text empty_links = root.xpath('.//a[not(text()) and not(*)]') for link in empty_links: href = link.attrib.get('href') if href and href.startswith('#'): title = id_nodes.get(href.strip('#')) if title: link.text = title else: warn('bad-local-link', "Empty anchor link to %s in %s points nowhere" % (href, page.source_file)) link.text = "FIXME broken link to %s" % href page.detailed_description = lxml.html.tostring( root, doctype="<!DOCTYPE html>", encoding='unicode', include_meta_content_type=True) full_path = Formatter.write_page(self, page, build_root, output) images = root.xpath('.//img') # All required assets should now be in place for img in images: src = img.attrib.get('src') if not src: warn('no-image-src', 'Empty image source in %s' % page.source_file) continue comps = urlparse.urlparse(src) if comps.scheme: continue path = os.path.abspath(os.path.join( os.path.dirname(full_path), src)) if not os.path.exists(path): warn('bad-image-src', ('In %s, a local image refers to an unknown source (%s). ' 'It should be available in the build folder, at %s') % (page.source_file, src, path)) continue return full_path