Exemple #1
0
    def write_page(self, page, output):
        root = etree.HTML(unicode(page.detailed_description))
        id_nodes = {n.attrib['id']: "".join([x for x in n.itertext()])
                    for n in root.xpath('.//*[@id]')}

        section_numbers = self.__init_section_numbers(root)

        targets = root.xpath(
            './/*[self::h1 or self::h2 or self::h3 or '
            'self::h4 or self::h5 or self::img]')

        for target in targets:
            section_number = self.__update_section_number(
                target, section_numbers)

            if 'id' in target.attrib:
                continue

            if target.tag == 'img':
                text = target.attrib.get('alt')
            else:
                text = "".join([x for x in target.itertext()])

            if not text:
                continue

            id_ = id_from_text(text)
            ref_id = id_
            index = 1

            while id_ in id_nodes:
                id_ = '%s%s' % (ref_id, index)
                index += 1

            if section_number:
                target.text = '%s %s' % (section_number, target.text or '')

            target.attrib['id'] = id_
            id_nodes[id_] = text

        empty_links = root.xpath('.//a[not(text()) and not(*)]')
        for link in empty_links:
            href = link.attrib.get('href')
            if href and href.startswith('#'):
                title = id_nodes.get(href.strip('#'))
                if title:
                    link.text = title
                else:
                    warn('bad-local-link',
                         "Empty anchor link to %s in %s points nowhere" %
                         (href, page.source_file))
                    link.text = "FIXME broken link to %s" % href

        page.detailed_description = lxml.html.tostring(
            root, doctype="<!DOCTYPE html>", encoding='unicode',
            include_meta_content_type=True)
        return Formatter.write_page(self, page, output)
    def write_page(self, page, output):
        root = etree.HTML(unicode(page.detailed_description))
        id_nodes = {n.attrib['id']: "".join([x for x in n.itertext()])
                    for n in root.xpath('.//*[@id]')}

        section_numbers = self.__init_section_numbers(root)

        targets = root.xpath(
            './/*[self::h1 or self::h2 or self::h3 or '
            'self::h4 or self::h5 or self::img]')

        for target in targets:
            section_number = self.__update_section_number(
                target, section_numbers)

            if 'id' in target.attrib:
                continue

            if target.tag == 'img':
                text = target.attrib.get('alt')
            else:
                text = "".join([x for x in target.itertext()])

            if not text:
                continue

            id_ = id_from_text(text)
            ref_id = id_
            index = 1

            while id_ in id_nodes:
                id_ = '%s%s' % (ref_id, index)
                index += 1

            if section_number:
                target.text = '%s %s' % (section_number, target.text or '')

            target.attrib['id'] = id_
            id_nodes[id_] = text

        empty_links = root.xpath('.//a[not(text()) and not(*)]')
        for link in empty_links:
            href = link.attrib.get('href')
            if href and href.startswith('#'):
                title = id_nodes.get(href.strip('#'))
                if title:
                    link.text = title
                else:
                    warn('bad-local-link',
                         "Empty anchor link to %s in %s points nowhere" %
                         (href, page.source_file))
                    link.text = "FIXME broken link to %s" % href

        page.detailed_description = lxml.html.tostring(
            root, doctype="<!DOCTYPE html>", encoding='unicode',
            include_meta_content_type=True)
        return Formatter.write_page(self, page, output)
Exemple #3
0
    def _make_title_id(self, node, id_nodes):
        if node.tag == 'img':
            text = node.attrib.get('alt')
        else:
            text = "".join([x for x in node.itertext()])

        if not text:
            return None

        id_ = id_from_text(text)
        ref_id = id_
        index = 1

        while id_ in id_nodes:
            id_ = '%s%s' % (ref_id, index)
            index += 1

        return id_
Exemple #4
0
    def __format_page_comment(self, formatter, link_resolver):
        if not self.comment:
            return

        if self.comment.short_description:
            self.short_description = formatter.format_comment(
                self.comment.short_description, link_resolver).strip()
            if self.short_description.startswith('<p>'):
                self.short_description = self.short_description[3:-4]
        if self.comment.title:
            self.title = formatter.format_comment(
                self.comment.title, link_resolver).strip()
            if self.title.startswith('<p>'):
                self.title = self.title[3:-4]

        if self.title:
            self.formatted_contents += '<h1 id="%s-page">%s</h1>' % (
                id_from_text(self.title), self.title)

        self.formatted_contents += formatter.format_comment(
            self.comment, link_resolver)
Exemple #5
0
    def __validate_html(self, project, page, doc_root):
        rel_path = os.path.join(self.get_output_folder(page), page.link.ref)
        id_nodes = {
            n.attrib['id']: "".join([x for x in n.itertext()])
            for n in doc_root.xpath('.//*[@id]')
        }

        section_numbers = self.__init_section_numbers(doc_root)

        targets = doc_root.xpath('.//*[self::h1 or self::h2 or self::h3 or '
                                 'self::h4 or self::h5 or self::img]')

        for target in targets:
            section_number = self.__update_section_number(
                target, section_numbers)

            if 'id' in target.attrib:
                continue

            if target.tag == 'img':
                text = target.attrib.get('alt')
            else:
                text = "".join([x for x in target.itertext()])

            if not text:
                continue

            id_ = id_from_text(text)
            ref_id = id_
            index = 1

            while id_ in id_nodes:
                id_ = '%s%s' % (ref_id, index)
                index += 1

            if section_number:
                target.text = '%s %s' % (section_number, target.text or '')

            target.attrib['id'] = id_
            id_nodes[id_] = text

        main_node = doc_root.find('.//*[@data-hotdoc-role="main"]')

        links = main_node.xpath('.//a')
        for link in links:
            href = link.attrib.get('href')
            if href and href.startswith('#'):
                if not link.text and not link.getchildren():
                    title = id_nodes.get(href.strip('#'))
                    if title:
                        link.text = title
                    else:
                        warn(
                            'bad-local-link',
                            "Empty anchor link to %s in %s points nowhere" %
                            (href, page.source_file))
                        link.text = "FIXME broken link to %s" % href
                link.attrib["href"] = rel_path + href

        assets = main_node.xpath('.//*[@src]')
        # All required assets should now be in place
        for asset in assets:
            self.__lookup_asset(asset, project, page)
Exemple #6
0
    def write_page(self, page, build_root, output):
        root = etree.HTML(unicode(page.detailed_description))
        id_nodes = {n.attrib['id']: "".join([x for x in n.itertext()])
                    for n in root.xpath('.//*[@id]')}

        section_numbers = self.__init_section_numbers(root)

        targets = root.xpath(
            './/*[self::h1 or self::h2 or self::h3 or '
            'self::h4 or self::h5 or self::img]')

        for target in targets:
            section_number = self.__update_section_number(
                target, section_numbers)

            if 'id' in target.attrib:
                continue

            if target.tag == 'img':
                text = target.attrib.get('alt')
            else:
                text = "".join([x for x in target.itertext()])

            if not text:
                continue

            id_ = id_from_text(text)
            ref_id = id_
            index = 1

            while id_ in id_nodes:
                id_ = '%s%s' % (ref_id, index)
                index += 1

            if section_number:
                target.text = '%s %s' % (section_number, target.text or '')

            target.attrib['id'] = id_
            id_nodes[id_] = text

        empty_links = root.xpath('.//a[not(text()) and not(*)]')
        for link in empty_links:
            href = link.attrib.get('href')
            if href and href.startswith('#'):
                title = id_nodes.get(href.strip('#'))
                if title:
                    link.text = title
                else:
                    warn('bad-local-link',
                         "Empty anchor link to %s in %s points nowhere" %
                         (href, page.source_file))
                    link.text = "FIXME broken link to %s" % href

        page.detailed_description = lxml.html.tostring(
            root, doctype="<!DOCTYPE html>", encoding='unicode',
            include_meta_content_type=True)
        full_path = Formatter.write_page(self, page, build_root, output)

        images = root.xpath('.//img')
        # All required assets should now be in place
        for img in images:
            src = img.attrib.get('src')
            if not src:
                warn('no-image-src',
                     'Empty image source in %s' % page.source_file)
                continue

            comps = urlparse.urlparse(src)
            if comps.scheme:
                continue

            path = os.path.abspath(os.path.join(
                os.path.dirname(full_path), src))
            if not os.path.exists(path):
                warn('bad-image-src',
                     ('In %s, a local image refers to an unknown source (%s). '
                      'It should be available in the build folder, at %s') %
                     (page.source_file, src, path))
                continue
        return full_path