Example #1
0
    def commentSubstitutions(self, ElementTree, w3c_compat=False, \
                             w3c_compat_substitutions=False,
                             w3c_compat_crazy_substitutions=False, **kwargs):
        # Basic substitutions
        instance_basic_comment_subs = basic_comment_subs

        # Add more basic substitutions in compat. mode
        if w3c_compat or w3c_compat_substitutions:
            instance_basic_comment_subs += ((logo, logo_sub),
                                            (copyright, copyright_sub))

        # Set of nodes to remove
        to_remove = set()

        # Link
        in_link = False
        for node in ElementTree.iter():
            if in_link:
                if node.tag is etree.Comment and \
                   node.text.strip(utils.spaceCharacters) == u"end-link":
                    if node.getparent() is not link_parent:
                        raise DifferentParentException(u"begin-link and end-link have different parents")
                    utils.removeInteractiveContentChildren(link)
                    link.set(u"href", utils.textContent(link))
                    in_link = False
                else:
                    if node.getparent() is link_parent:
                        link.append(deepcopy(node))
                    to_remove.add(node)
            elif node.tag is etree.Comment and \
                 node.text.strip(utils.spaceCharacters) == u"begin-link":
                link_parent = node.getparent()
                in_link = True
                link = etree.Element(u"a")
                link.text = node.tail
                node.tail = None
                node.addnext(link)

        # Basic substitutions
        for comment, sub in instance_basic_comment_subs:
            begin_sub = u"begin-" + comment
            end_sub = u"end-" + comment
            in_sub = False
            for node in ElementTree.iter():
                if in_sub:
                    if node.tag is etree.Comment and \
                       node.text.strip(utils.spaceCharacters) == end_sub:
                        if node.getparent() is not sub_parent:
                            raise DifferentParentException(u"%s and %s have different parents" % begin_sub, end_sub)
                        in_sub = False
                    else:
                        to_remove.add(node)
                elif node.tag is etree.Comment:
                    if node.text.strip(utils.spaceCharacters) == begin_sub:
                        sub_parent = node.getparent()
                        in_sub = True
                        node.tail = None
                        node.addnext(deepcopy(sub))
                    elif node.text.strip(utils.spaceCharacters) == comment:
                        node.addprevious(etree.Comment(begin_sub))
                        node.addprevious(deepcopy(sub))
                        node.addprevious(etree.Comment(end_sub))
                        node.getprevious().tail = node.tail
                        to_remove.add(node)

        # Remove nodes
        for node in to_remove:
            node.getparent().remove(node)
Example #2
0
    def commentSubstitutions(self, ElementTree, w3c_compat=False, \
                             w3c_compat_substitutions=False,
                             w3c_compat_crazy_substitutions=False, **kwargs):
        # Basic substitutions
        instance_basic_comment_subs = basic_comment_subs

        # Add more basic substitutions in compat. mode
        if w3c_compat or w3c_compat_substitutions:
            instance_basic_comment_subs += ((logo, logo_sub), (copyright,
                                                               copyright_sub))

        # Set of nodes to remove
        to_remove = set()

        # Link
        in_link = False
        for node in ElementTree.iter():
            if in_link:
                if node.tag is etree.Comment and \
                   node.text.strip(utils.spaceCharacters) == u"end-link":
                    if node.getparent() is not link_parent:
                        raise DifferentParentException(
                            u"begin-link and end-link have different parents")
                    utils.removeInteractiveContentChildren(link)
                    link.set(u"href", utils.textContent(link))
                    in_link = False
                else:
                    if node.getparent() is link_parent:
                        link.append(deepcopy(node))
                    to_remove.add(node)
            elif node.tag is etree.Comment and \
                 node.text.strip(utils.spaceCharacters) == u"begin-link":
                link_parent = node.getparent()
                in_link = True
                link = etree.Element(u"a")
                link.text = node.tail
                node.tail = None
                node.addnext(link)

        # Basic substitutions
        for comment, sub in instance_basic_comment_subs:
            begin_sub = u"begin-" + comment
            end_sub = u"end-" + comment
            in_sub = False
            for node in ElementTree.iter():
                if in_sub:
                    if node.tag is etree.Comment and \
                       node.text.strip(utils.spaceCharacters) == end_sub:
                        if node.getparent() is not sub_parent:
                            raise DifferentParentException(
                                u"%s and %s have different parents" %
                                begin_sub, end_sub)
                        in_sub = False
                    else:
                        to_remove.add(node)
                elif node.tag is etree.Comment:
                    if node.text.strip(utils.spaceCharacters) == begin_sub:
                        sub_parent = node.getparent()
                        in_sub = True
                        node.tail = None
                        node.addnext(deepcopy(sub))
                    elif node.text.strip(utils.spaceCharacters) == comment:
                        node.addprevious(etree.Comment(begin_sub))
                        node.addprevious(deepcopy(sub))
                        node.addprevious(etree.Comment(end_sub))
                        node.getprevious().tail = node.tail
                        to_remove.add(node)

        # Remove nodes
        for node in to_remove:
            node.getparent().remove(node)
Example #3
0
    def commentSubstitutions(self,
                             ElementTree,
                             w3c_compat=False,
                             w3c_compat_substitutions=False,
                             w3c_compat_crazy_substitutions=False,
                             enable_woolly=False,
                             **kwargs):
        # Basic substitutions
        instance_basic_comment_subs = basic_comment_subs

        # Add more basic substitutions in compat. mode
        if w3c_compat or w3c_compat_substitutions:
            copyright = "copyright"
            copyright_sub = etree.fromstring(
                '<p class="copyright"><a href="http://www.w3.org/Consortium/Legal/ipr-notice#Copyright">Copyright</a> &#xA9; %s <a href="http://www.w3.org/"><abbr title="World Wide Web Consortium">W3C</abbr></a><sup>&#xAE;</sup> (<a href="http://www.csail.mit.edu/"><abbr title="Massachusetts Institute of Technology">MIT</abbr></a>, <a href="http://www.ercim.eu/"><abbr title="European Research Consortium for Informatics and Mathematics">ERCIM</abbr></a>, <a href="http://www.keio.ac.jp/">Keio</a>, <a href="http://ev.buaa.edu.cn/">Beihang</a>), All Rights Reserved. W3C <a href="http://www.w3.org/Consortium/Legal/ipr-notice#Legal_Disclaimer">liability</a>, <a href="http://www.w3.org/Consortium/Legal/ipr-notice#W3C_Trademarks">trademark</a> and <a href="http://www.w3.org/Consortium/Legal/copyright-documents">document use</a> rules apply.</p>'
                % time.strftime("%Y", self.pubdate))

            logo = "logo"
            logo_str = '<a href="http://www.w3.org/"><img height="48" width="72" alt="W3C" src="https://www.w3.org/Icons/w3c_home"/></a>'
            if enable_woolly:
                logo_str += '<a class="logo" href="https://www.w3.org/Style/Group/" rel="in-activity"><img alt="CSS WG" src="https://www.w3.org/Style/Woolly/woolly-icon"/></a>'

            logo_sub = etree.fromstring('<p>%s</p>' % logo_str)

            instance_basic_comment_subs += ((logo, logo_sub), (copyright,
                                                               copyright_sub))

        # Set of nodes to remove
        to_remove = set()

        # Link
        link_parent = None
        link = None
        for node in ElementTree.iter():
            if link_parent is not None:
                if node.tag is etree.Comment and \
                   node.text.strip(utils.spaceCharacters) == "end-link":
                    if node.getparent() is not link_parent:
                        raise utils.DifferentParentException(
                            "begin-link and end-link have different parents")
                    utils.removeInteractiveContentChildren(link)
                    link.set("href", utils.textContent(link))
                    link_parent = None
                else:
                    if node.getparent() is link_parent:
                        link.append(deepcopy(node))
                    to_remove.add(node)
            elif node.tag is etree.Comment and \
                 node.text.strip(utils.spaceCharacters) == "begin-link":
                link_parent = node.getparent()
                link = etree.Element("a")
                link.text = node.tail
                node.tail = None
                node.addnext(link)

        # Basic substitutions
        for comment, sub in instance_basic_comment_subs:
            utils.replaceComment(ElementTree, comment, sub, **kwargs)

        # Remove nodes
        for node in to_remove:
            node.getparent().remove(node)
Example #4
0
    def buildToc(self, ElementTree, min_depth=2, max_depth=6, w3c_compat=False,
                 w3c_compat_class_toc=False, **kwargs):
        # Build the outline of the document
        outline_creator = outliner.Outliner(ElementTree, **kwargs)
        outline = outline_creator.build(**kwargs)

        # Get a list of all the top level sections, and their depth (0)
        sections = [(section, 0) for section in reversed(outline)]

        # Numbering
        num = []

        # Loop over all sections in a DFS
        while sections:
            # Get the section and depth at the end of list
            section, depth = sections.pop()

            # If we have a header, regardless of how deep we are
            if section.header is not None:
                # Get the element that represents the section header's text
                if section.header.tag == u"hgroup":
                    i = 1
                    while i <= 6:
                        header_text = section.header.find(u".//h" + unicode(i))
                        if header_text is not None:
                            break
                        i += 1
                    else:
                        header_text = None
                else:
                    header_text = section.header
            else:
                header_text = None

            # If we have a section heading text element, regardless of depth
            if header_text is not None:
                # Remove any existing number
                for element in header_text.findall(u".//span"):
                    if utils.elementHasClass(element, u"secno"):
                        # Copy content, to prepare for the node being
                        # removed
                        utils.copyContentForRemoval(element, text=False,
                                                    children=False)
                        # Remove the element (we can do this as we're not
                        # iterating over the elements, but over a list)
                        element.getparent().remove(element)

            # Check we're in the valid depth range (min/max_depth are 1 based,
            # depth is 0 based)
            if depth >= min_depth - 1 and depth <= max_depth - 1:
                # Calculate the corrected depth (i.e., the actual depth within
                # the numbering/TOC)
                corrected_depth = depth - min_depth + 1

                # Numbering:
                # No children, no sibling, move back to parent's sibling
                if corrected_depth + 1 < len(num):
                    del num[corrected_depth + 1:]
                # Children
                elif corrected_depth == len(num):
                    num.append(0)

                # Increment the current section's number
                if header_text is not None and \
                   not utils.elementHasClass(header_text, u"no-num") or \
                   header_text is None and section:
                    num[-1] += 1

                # Get the current TOC section for this depth, and add another
                # item to it
                if header_text is not None and \
                   not utils.elementHasClass(header_text, u"no-toc") or \
                   header_text is None and section:
                    # Find the appropriate section of the TOC
                    i = 0
                    toc_section = self.toc
                    while i < corrected_depth:
                        try:
                            # If the final li has no children, or the last
                            # children isn't an ol element
                            if len(toc_section[-1]) == 0 or \
                               toc_section[-1][-1].tag != u"ol":
                                toc_section[-1].append(etree.Element(u"ol"))
                                self.indentNode(toc_section[-1][-1],
                                                (i + 1) * 2, **kwargs)
                                if w3c_compat or w3c_compat_class_toc:
                                    toc_section[-1][-1].set(u"class", u"toc")
                        except IndexError:
                            # If the current ol has no li in it
                            toc_section.append(etree.Element(u"li"))
                            self.indentNode(toc_section[0], (i + 1) * 2 - 1,
                                            **kwargs)
                            toc_section[0].append(etree.Element(u"ol"))
                            self.indentNode(toc_section[0][0], (i + 1) * 2,
                                            **kwargs)
                            if w3c_compat or w3c_compat_class_toc:
                                toc_section[0][0].set(u"class", u"toc")
                        # TOC Section is now the final child (ol) of the final
                        # item (li) in the previous section
                        assert toc_section[-1].tag == u"li"
                        assert toc_section[-1][-1].tag == u"ol"
                        toc_section = toc_section[-1][-1]
                        i += 1
                    # Add the current item to the TOC
                    item = etree.Element(u"li")
                    toc_section.append(item)
                    self.indentNode(item, (i + 1) * 2 - 1, **kwargs)

                # If we have a header
                if header_text is not None:
                    # Add ID to header
                    id = utils.generateID(header_text, **kwargs)
                    if header_text.get(u"id") is not None:
                        del header_text.attrib[u"id"]
                    section.header.set(u"id", id)

                    # Add number, if @class doesn't contain no-num
                    if not utils.elementHasClass(header_text, u"no-num"):
                        header_text[0:0] = [etree.Element(u"span", {u"class":
                                                                    u"secno"})]
                        header_text[0].tail = header_text.text
                        header_text.text = None
                        header_text[0].text = u".".join(map(unicode, num))
                        header_text[0].text += u" "
                    # Add to TOC, if @class doesn't contain no-toc
                    if not utils.elementHasClass(header_text, u"no-toc"):
                        link = deepcopy(header_text)
                        item.append(link)
                        # Make it link to the header
                        link.tag = u"a"
                        link.set(u"href", u"#" + id)
                        # Remove interactive content child elements
                        utils.removeInteractiveContentChildren(link)
                        # Remove other child elements
                        for element_name in remove_elements_from_toc:
                            # Iterate over all the desendants of the new link
                            # with that element name
                            for element in link.findall(u".//" + element_name):
                                # Copy content, to prepare for the node being
                                # removed
                                utils.copyContentForRemoval(element)
                                # Remove the element (we can do this as we're
                                # not iterating over the elements, but over a
                                # list)
                                element.getparent().remove(element)
                        # Remove unwanted attributes
                        for element in link.iter(tag=etree.Element):
                            for attribute_name in remove_attributes_from_toc:
                                if element.get(attribute_name) is not None:
                                    del element.attrib[attribute_name]
                        # We don't want the old tail
                        link.tail = None
                        # Check we haven't changed the content in all of that
                        assert utils.textContent(header_text) == \
                               utils.textContent(link)
            # Add subsections in reverse order (so the next one is executed
            # next) with a higher depth value
            sections.extend([(child_section, depth + 1)
                             for child_section in reversed(section)])