Example #1
0
def get_sections(dom):
    # special case where a lone header is lifted to be
    # subtitle.. didn't really want that to happen, but
    # it's easier to work around.. who has a single
    # navigational element, anyway?
    for elem in htmlutil.getElementsByClass(dom, 'subtitle'):
        title = htmlutil.getNodeContentsAsText(elem)
        idx = elem.parentNode.childNodes.index(elem)
        description = None
        link = '/'+title.lower()

        while elem.parentNode.childNodes[idx+1:]:
            p = elem.parentNode.childNodes[idx+1]
            if p.nodeType == p.TEXT_NODE:
                buf = StringIO()
                p.writexml(buf)
                assert buf.getvalue().strip() == '', \
                    'Subtitle cannot be followed by plain text: %r' % buf.getvalue()
                idx += 1
            else:
                break

        if elem.parentNode.childNodes[idx+1:]:
            p = elem.parentNode.childNodes[idx+1]
            if (p.nodeType == p.ELEMENT_NODE
                and p.nodeName == 'table'):
                assert p.getAttribute('class') == 'docinfo'
                for field in htmlutil.getElementsByClass(p, 'field'):
                    # TODO use classes for locating th and td, just
                    # missing getOnlyElementByClass
                    th = htmlutil.getOnlyElementByTagName(field, 'th')
                    name = htmlutil.getNodeContentsAsText(th)
                    assert name == 'link:'
                    td = htmlutil.getOnlyElementByTagName(field, 'td')
                    link = htmlutil.getNodeContentsAsText(td)
                    if not link.startswith('/'):
                        link = '/'+link

                idx += 1

        while elem.parentNode.childNodes[idx+1:]:
            p = elem.parentNode.childNodes[idx+1]
            if p.nodeType == p.TEXT_NODE:
                buf = StringIO()
                p.writexml(buf)
                assert buf.getvalue().strip() == '', \
                    'Subtitle cannot be followed by plain text: %r' % buf.getvalue()
                idx += 1
            else:
                break

        if elem.parentNode.childNodes[idx+1:]:
            p = elem.parentNode.childNodes[idx+1]
            if (p.nodeType == p.ELEMENT_NODE
                and p.nodeName == 'p'):
                description = htmlutil.getNodeContentsAsText(p)

        yield dict(
            link=link,
            title=title,
            description=description,
            )

    for section in htmlutil.getElementsByClass(dom, 'section'):
        h1 = htmlutil.getOnlyElementByTagName(section, 'h1')
        a = htmlutil.getOnlyElementByTagName(h1, 'a')
        title = htmlutil.getNodeContentsAsText(a)
        link = '/'+title.lower()

        for table in htmlutil.getElementsByClass(section, 'field-list'):
            for field in htmlutil.getElementsByClass(table, 'field'):
                # TODO use classes for locating th and td, just
                # missing getOnlyElementByClass
                th = htmlutil.getOnlyElementByTagName(field, 'th')
                name = htmlutil.getNodeContentsAsText(th)
                assert name == 'link:'
                td = htmlutil.getOnlyElementByTagName(field, 'td')
                link = htmlutil.getNodeContentsAsText(td)
                if not link.startswith('/'):
                    link = '/'+link

        p = section.getElementsByTagName('p')
        if p:
            description = htmlutil.getNodeContentsAsText(p[0])
        else:
            description = None

        yield dict(
            link=link,
            title=title,
            description=description,
            )
Example #2
0
File: rst.py Project: mchubby/roast
def asDOM(
    text,
    source_path=None,
    template=None,
    flavor=None,
    s5_theme_url=None,
    navigation=None,
    operation=None,
    ):
    if flavor is None:
        flavor = 'html'

    settings = dict(
        input_encoding='utf-8',
        output_encoding='utf-8',
        embed_stylesheet=False,
        stylesheet_path=htmlutil.KLUDGE_KILL_CSS,
        generator=False,

        # TODO file insertion should really be disabled
        # but can't do that now, as that would make the
        # original include directive fail.. also can't
        # just temporarily enable it to kludge, as that
        # would mean the included file sees it as fully
        # enabled.. will have to reimplement include.

#         file_insertion_enabled=0,

        # TODO ponder disabling raw; it allows content creators to
        # attack the site

#         raw_enabled=0,

        _disable_config=1,
        roast_operation=operation,
        )

    if flavor == 's5':
        writer = s5_html.Writer()
        assert template is None
        assert s5_theme_url is not None
        settings.update(dict(
                theme=None,
                theme_url=s5_theme_url,
                current_slide=True,
                ))
    elif flavor == 'html':
        writer = html4css1.Writer()
    else:
        raise 'Unknown RST flavor: %r' % flavor

    # Docutils stores default `foo` role in global state that persists
    # from one parser to another. Parsing directive "default-role"
    # sets that, usually from s5defs.txt. To avoid infecting all
    # latter runs (`foo` will create <span
    # class="incremental">foo</span> instead of <cite>foo</cite>), we
    # try to contain the damage, and restore the default role to
    # original settings before every run.
    try:
        del roles._roles['']
    except KeyError:
        pass

    html, publisher = publish_programmatically(
        source_class=io.StringInput,
        source=text,
        source_path=source_path,
        destination_class=io.StringOutput,
        destination=None,
        destination_path=None,
        reader=None,
        reader_name='standalone',
        parser=None,
        parser_name='restructuredtext',
        writer=writer,
        writer_name=None,
        settings=None,
        settings_spec=None,
        settings_overrides=settings,
        config_section=None,
        enable_exit_status=None)

    tree = minidom.parseString(html)
    title = htmlutil.getTitle(tree)
    title = htmlutil.getNodeContentsAsText(title)

    # kill generator meta tag
    htmlutil.killGeneratorMetaTags(tree)

    # kill stylesheet
    htmlutil.killLinkedStylesheet(tree)

    if flavor == 'html':
        body = htmlutil.getOnlyElementByTagName(tree, 'body')

        docs = htmlutil.getElementsByClass(body, 'document')
        if len(docs) == 1:
            body = docs[0]

        # remove the headings rst promoted to top level,
        # the template will take care of that
        for h1 in body.getElementsByTagName('h1'):
            if htmlutil.elementHasClass(h1, 'title'):
                h1.parentNode.removeChild(h1)
                break

        if template is not None:
            template = Template(original=body,
                                docFactory=loaders.xmlstr(template),
                                title=title,
                                navigation=navigation,
                                )
            html = flat.flatten(template)
            tree = minidom.parseString(html)

    htmlutil.fixXMLTags(tree)
    return tree