def stringSubstitutions(self, ElementTree, w3c_compat=False, w3c_compat_substitutions=False, w3c_compat_crazy_substitutions=False, **kwargs): # Get doc_title from the title element try: doc_title = utils.textContent( ElementTree.getroot().find(u"head").find(u"title")) except (AttributeError, TypeError): doc_title = u"" if w3c_compat or w3c_compat_substitutions: # Get the right long status doc_longstatus = longstatus_map[self.w3c_status] if w3c_compat_crazy_substitutions: # Get the right stylesheet doc_w3c_stylesheet = u"http://www.w3.org/StyleSheets/TR/W3C-" + \ self.w3c_status # Get all the subs we want instance_string_subs = string_subs + \ ((title, doc_title, title_identifier), ) # And even more in compat. mode if w3c_compat or w3c_compat_substitutions: instance_string_subs += ((status, self.w3c_status, status_identifier), (longstatus, doc_longstatus, longstatus_identifier)) # And more that aren't even enabled by default in compat. mode if w3c_compat_crazy_substitutions: instance_string_subs += ((w3c_stylesheet, doc_w3c_stylesheet, w3c_stylesheet_identifier), ) for node in ElementTree.iter(): for regex, sub, identifier in instance_string_subs: if node.text is not None and identifier in node.text: node.text = regex.sub(sub, node.text) if node.tail is not None and identifier in node.tail: node.tail = regex.sub(sub, node.tail) for name, value in node.attrib.items(): if identifier in value: node.attrib[name] = regex.sub(sub, value)
def getTerm(self, element, w3c_compat=False, w3c_compat_xref_normalization=False, **kwargs): if element.get("data-anolis-xref") is not None: term = element.get("data-anolis-xref") elif element.get("data-x") is not None: term = element.get("data-x") elif element.get("title") is not None: term = element.get("title") else: term = utils.textContent(element) term = term.strip(utils.spaceCharacters).lower() return utils.spacesRegex.sub(" ", term)
def getTerm(self, element, w3c_compat=False, w3c_compat_xref_normalization=False, **kwargs): if element.get(u"data-anolis-xref") is not None: term = element.get(u"data-anolis-xref") elif element.get(u"title") is not None: term = element.get(u"title") else: term = utils.textContent(element) term = term.strip(utils.spaceCharacters).lower() term = utils.spacesRegex.sub(u" ", term) if w3c_compat or w3c_compat_xref_normalization: term = non_alphanumeric_spaces.sub(u"", term) return term
def stringSubstitutions(self, ElementTree, w3c_compat=False, w3c_compat_substitutions=False, w3c_compat_crazy_substitutions=False, **kwargs): # Get doc_title from the title element try: doc_title = utils.textContent(ElementTree.getroot().find(u"head") .find(u"title")) except (AttributeError, TypeError): doc_title = u"" if w3c_compat or w3c_compat_substitutions: # Get the right long status doc_longstatus = longstatus_map[self.w3c_status] if w3c_compat_crazy_substitutions: # Get the right stylesheet doc_w3c_stylesheet = u"http://www.w3.org/StyleSheets/TR/W3C-" + \ self.w3c_status # Get all the subs we want instance_string_subs = string_subs + \ ((title, doc_title, title_identifier), ) # And even more in compat. mode if w3c_compat or w3c_compat_substitutions: instance_string_subs += ((status, self.w3c_status, status_identifier), (longstatus, doc_longstatus, longstatus_identifier)) # And more that aren't even enabled by default in compat. mode if w3c_compat_crazy_substitutions: instance_string_subs += ((w3c_stylesheet, doc_w3c_stylesheet, w3c_stylesheet_identifier), ) for node in ElementTree.iter(): for regex, sub, identifier in instance_string_subs: if node.text is not None and identifier in node.text: node.text = regex.sub(sub, node.text) if node.tail is not None and identifier in node.tail: node.tail = regex.sub(sub, node.tail) for name, value in node.attrib.items(): if identifier in value: node.attrib[name] = regex.sub(sub, value)
def readDoc(self, ElementTree, name, localName, captionLocalName, figures): i = 0 for element in ElementTree.getroot().findall(u".//%s" % localName): i += 1 if utils.elementHasClass(element, u"no-num"): continue if not u"id" in element.attrib: element.set(u"id", u"anolis-%s-%d" % (localName, i)) id = element.get(u"id") cap = element.find(u".//%s" % captionLocalName) if cap is None: cap = etree.Element(u"%s" % captionLocalName) cap.text = u"(untitled)" element.append(cap) caption = utils.textContent(cap) cap.text = u"%s %d: %s" % (name, i, cap.text) figures.append((id, caption))
def commentSubstitutions(self, ElementTree, w3c_compat=False, \ w3c_compat_substitutions=False, w3c_compat_crazy_substitutions=False, **kwargs): # Basic substitutions instance_basic_comment_subs = basic_comment_subs # Add more basic substitutions in compat. mode if w3c_compat or w3c_compat_substitutions: instance_basic_comment_subs += ((logo, logo_sub), (copyright, copyright_sub)) # Set of nodes to remove to_remove = set() # Link in_link = False for node in ElementTree.iter(): if in_link: if node.tag is etree.Comment and \ node.text.strip(utils.spaceCharacters) == u"end-link": if node.getparent() is not link_parent: raise DifferentParentException(u"begin-link and end-link have different parents") utils.removeInteractiveContentChildren(link) link.set(u"href", utils.textContent(link)) in_link = False else: if node.getparent() is link_parent: link.append(deepcopy(node)) to_remove.add(node) elif node.tag is etree.Comment and \ node.text.strip(utils.spaceCharacters) == u"begin-link": link_parent = node.getparent() in_link = True link = etree.Element(u"a") link.text = node.tail node.tail = None node.addnext(link) # Basic substitutions for comment, sub in instance_basic_comment_subs: begin_sub = u"begin-" + comment end_sub = u"end-" + comment in_sub = False for node in ElementTree.iter(): if in_sub: if node.tag is etree.Comment and \ node.text.strip(utils.spaceCharacters) == end_sub: if node.getparent() is not sub_parent: raise DifferentParentException(u"%s and %s have different parents" % begin_sub, end_sub) in_sub = False else: to_remove.add(node) elif node.tag is etree.Comment: if node.text.strip(utils.spaceCharacters) == begin_sub: sub_parent = node.getparent() in_sub = True node.tail = None node.addnext(deepcopy(sub)) elif node.text.strip(utils.spaceCharacters) == comment: node.addprevious(etree.Comment(begin_sub)) node.addprevious(deepcopy(sub)) node.addprevious(etree.Comment(end_sub)) node.getprevious().tail = node.tail to_remove.add(node) # Remove nodes for node in to_remove: node.getparent().remove(node)
def buildTerms(self, ElementTree, w3c_compat=False, **kwargs): self.terms.text = "\n" # make a list of all the defining instances of "terms" in the document # -- <dfn> elements dfnList = ElementTree.findall("//dfn") if dfnList: indexNavTop = etree.Element(u"div", { u"class": "index-nav", u"id": "index-terms_top" }) indexNavTop.text = "\n" indexNavTop.tail = "\n" indexNavHelpers = {"top": indexNavTop} self.terms.append(indexNavHelpers["top"]) termFirstLetter = None prevTermFirstLetter = None firstLetters = ["top"] # sort the list of <dfn> terms by the lowercase value of the DOM # textContent of the <dfn> element (concantentation of the <dfn> # text nodes and that of any of its descendant elements) dfnList.sort(key=lambda dfn: utils.textContent(dfn).lower()) for dfn in dfnList: # we don't need the tail, so copy the <dfn> and drop the tail term = deepcopy(dfn) term.tail = None termID = None dfnHasID = False if dfn.get("id"): # if this <dfn> itself has an id, we'll use it as part of the # id on the index entry for this term termID = dfn.get("id") dfnHasID = True elif dfn.getparent().get("id"): # if this <dfn> itself has no id, use the id of its parent # node as the id on the index entry for this term, with or termID = dfn.getparent().get("id") # if we found an id, then create an index entry for this <dfn> # term; otherwise, do nothing further if termID: indexEntry = etree.Element(u"dl") # we want to give this index entry an id attribute based on # the <dfn> or parent of a <dfn> we got the id-attribute # value from earlier; but, if this <dfn> has no id attribute # and has any sibling <dfn>s that also lack id attributes, # we need to further qualify the id attribute here to make # it unique dfnSiblings = int( dfn.xpath("count(preceding-sibling::dfn[not(@id)])")) if not dfnHasID and dfnSiblings > 0: indexEntry = etree.Element(u"dl", { u"id": termID + "_" + str(dfnSiblings) + "_index" }) else: indexEntry = etree.Element(u"dl", {u"id": termID + "_index"}) indexEntry.text = "\n" # termName is container of the name of the term as it appears in the index termName = etree.Element(u"dt") if "id" in term.attrib: del term.attrib["id"] term.tag = "span" term.tail = "\n" termName.append(term) termName.tail = "\n" indexEntry.append(termName) # normalize the text content of each <dfn> in the document # and then normalize the text content of this <dfn>, then # do a case-insensitive comparison of them and count how # many matches we have expr = "count(//dfn\ [normalize-space(translate(.,'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'))\ =normalize-space(translate($content,'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz'))])" if ElementTree.xpath(expr, content=utils.textContent(term)) > 1: # we have more than one <dfn> in the document whose # content is a case-insensitive match for the # textContent of this <dfn>; so, we attempt to # disambiguate them by copying the parent node of the # <dfn> and including that in our output as an excerpt, # to provide the context for the term dfnContext = etree.Element(u"dd", {u"class": u"dfn-excerpt"}) dfnContext.text = "\n" dfnContext.tail = "\n" dfnParentNode = deepcopy(dfn.getparent()) # if length of the parent node isn't greater than 1, # then the <dfn> is the only child node of its parent, # and so there is no useful context we can provide, so # we do nothing. Also, if the parent node is an h1-h6 # heading, we are already listing it in the entry, to # it'd be redundant to be it here too, so we don't if len(dfnParentNode) > 1 and not re.match( "^[hH][1-6]$", dfnParentNode.tag): # we just drop all of the text in this parent up to # the first child element, because it's often just # part of phrase like "The foo attribute" or # something, and we don't need that. But, after we # drop it, we don't want the node to end up starting # with no next at all (because it looks odd in our # output), so we replace it with some characters to # indicate that there's something been ellided if not dfnParentNode[0].tag == "dfn": dfnParentNode.text = "*** " # ...except for the case where we know our current # dfn is the first child element, and then we deal # with handling of that a little further down else: dfnParentNode.text = "" dfnParentNode.tag = "span" # remove ID so that we don't duplicate it if "id" in dfnParentNode.attrib: del dfnParentNode.attrib["id"] descendants = dfnParentNode.xpath( ".//*[self::dfn or @id]") for descendant in descendants: if descendant.tag == "dfn": descendant.tag = "span" if "id" in descendant.attrib: del descendant.attrib["id"] # if the text content of this descendant is the # same as the text content of the term, then we # don't want to repeat it, so instead we # replace it with ellipses if utils.textContent(descendant).lower( ) == utils.textContent(term).lower(): tail = "" if descendant.tail is not None: tail = descendant.tail # drop any children this element might have, # and just put ellipsis in place of it descendant.clear() descendant.text = "..." + tail elif descendant == descendants[0]: # if we get here it means that the first dfn # child of this parent node is _not_ our # current dfn, so we use some alternative # characters (other than ellipses) to # indicate that we've ellided something dfnParentNode.text = "*** " dfnContext.append(dfnParentNode) indexEntry.append(dfnContext) # we need a first letter so that we can build navigational # links for the alphabetic nav bars injected into the index termFirstLetter = utils.textContent(term)[0].upper() if termFirstLetter != prevTermFirstLetter and termFirstLetter.isalpha( ): firstLetters.append(termFirstLetter) indexNavHelpers[termFirstLetter] = etree.Element( u"div", { u"class": "index-nav", u"id": "index-terms_" + termFirstLetter }) prevTermFirstLetter = termFirstLetter self.terms.append(indexNavHelpers[termFirstLetter]) # ######################################################### # make a list of all the instances of terms in the document # that are hyperlinked references back to the <dfn> term # that is the defining instance of this term, as well as # the <dfn> defining instance itself # ######################################################### instanceList = ElementTree.xpath( "//a[substring-after(@href,'#')=$targetID]|//*[@id=$targetID]", targetID=termID) if instanceList: instanceItem = None lastLinkToHeading = None lastInstanceItem = None for instance in instanceList: # each of these term instances is an <a> hyperlink # without an id attribute, but we need each to have # an id attribute so that we can link back to it # from the index of terms; so, create an id for each instanceID = utils.generateID(instance, **kwargs) instance.set(u"id", instanceID) # make a link that's a copy of the node of the h1-h6 # heading for the section that contains this # instance hyperlink linkToHeading = self.getAncestorHeadingLink( instance, instanceID) if not instance.tag == u"a": linkToHeading.set(u"class", "dfn-ref") # if this heading is not the same as one that we've # already added to the index entry for this term, # then process the heading if lastLinkToHeading is None or \ utils.textContent(linkToHeading) != utils.textContent(lastLinkToHeading): instanceItem = etree.Element(u"dd") instanceItem.text = "\n" lastLinkToHeading = linkToHeading n = 1 # we wait to add the item for the previous # instance at this point because we need to # delay adding in order to see if for this # instance there are multiple references to the # same ancestor heading (if there are, we append # link numbers to them, instead of repeating the # heading; see below) if lastInstanceItem is not None: #print(etree.tostring(lastInstanceItem,method="text")) indexEntry.append(lastInstanceItem) lastInstanceItem = instanceItem linkToHeading.tail = "\n" instanceItem.append(linkToHeading) instanceItem.tail = "\n" # otherwise, this heading is the same as one that # we've already added to the index entry for this # term; so instead of reprocessing the heading, we # just append one or more link numbers to it else: n += 1 counterLink = etree.Element( u"a", { u"href": "#" + instanceID, u"class": "index-counter" }) if not instance.tag == u"a": counterLink.set(u"class", "dfn-ref") else: counterLink.set(u"class", "index-counter") counterLink.text = "(" + str(n) + ")" counterLink.tail = "\n" instanceItem.append(counterLink) # if the value of our n counter is still at 1 at # this point, it means the document contains only # one instance of a reference this term, so we need # to add that instance now if n == 1: indexEntry.append(instanceItem) if not len(instanceList) > 1: # if we don't have more than one item in this list, it # means the <dfn> defining instance is the only item in # the list, and the document contains no hyperlinked # references back to that defining instance at all, so # we need to set a flag to indicate that indexEntry.set(u"class", "has-norefs") self.terms.append(indexEntry) indexEntry.tail = "\n" # ###################################################################### # inject some alphabetic nav hyperlink bars into the index, strictly # for convenience purposes # ###################################################################### navLetters = etree.Element(u"p") navLetters.text = "\n" navLetters.tail = "\n" navLettersClones = {} # reverse the letters list so that we can just pop off it firstLetters.append("end") firstLetters.reverse() while (firstLetters): letter = firstLetters.pop() navLetter = etree.Element(u"a", {u"href": "#index-terms_" + letter}) navLetter.text = letter navLetter.tail = "\n" navLetters.append(navLetter) for key, navNode in indexNavHelpers.items(): # this seems really hacky... but we need some way to manage multiple # copies of the sets of nav hyperlink letters we inject into the # index; otherwise, how to do it without just moving a single node # around instead of copying it... navLettersClones[key] = deepcopy(navLetters) navNode.text = "\n" navNode.append(navLettersClones[key]) navNode.tail = "\n" navLettersEnd = deepcopy(navLetters) indexNavEnd = etree.Element(u"div", { u"class": "index-nav", u"id": "index-terms_end" }) indexNavEnd.text = "\n" indexNavEnd.tail = "\n" indexNavEnd.append(navLettersEnd) indexNavHelpers = {"end": indexNavEnd} self.terms.append(indexNavHelpers["end"]) self.terms.tail = "\n"
def commentSubstitutions(self, ElementTree, w3c_compat=False, w3c_compat_substitutions=False, w3c_compat_crazy_substitutions=False, enable_woolly=False, **kwargs): # Basic substitutions instance_basic_comment_subs = basic_comment_subs # Add more basic substitutions in compat. mode if w3c_compat or w3c_compat_substitutions: copyright = "copyright" copyright_sub = etree.fromstring( '<p class="copyright"><a href="http://www.w3.org/Consortium/Legal/ipr-notice#Copyright">Copyright</a> © %s <a href="http://www.w3.org/"><abbr title="World Wide Web Consortium">W3C</abbr></a><sup>®</sup> (<a href="http://www.csail.mit.edu/"><abbr title="Massachusetts Institute of Technology">MIT</abbr></a>, <a href="http://www.ercim.eu/"><abbr title="European Research Consortium for Informatics and Mathematics">ERCIM</abbr></a>, <a href="http://www.keio.ac.jp/">Keio</a>, <a href="http://ev.buaa.edu.cn/">Beihang</a>), All Rights Reserved. W3C <a href="http://www.w3.org/Consortium/Legal/ipr-notice#Legal_Disclaimer">liability</a>, <a href="http://www.w3.org/Consortium/Legal/ipr-notice#W3C_Trademarks">trademark</a> and <a href="http://www.w3.org/Consortium/Legal/copyright-documents">document use</a> rules apply.</p>' % time.strftime("%Y", self.pubdate)) logo = "logo" logo_str = '<a href="http://www.w3.org/"><img height="48" width="72" alt="W3C" src="https://www.w3.org/Icons/w3c_home"/></a>' if enable_woolly: logo_str += '<a class="logo" href="https://www.w3.org/Style/Group/" rel="in-activity"><img alt="CSS WG" src="https://www.w3.org/Style/Woolly/woolly-icon"/></a>' logo_sub = etree.fromstring('<p>%s</p>' % logo_str) instance_basic_comment_subs += ((logo, logo_sub), (copyright, copyright_sub)) # Set of nodes to remove to_remove = set() # Link link_parent = None link = None for node in ElementTree.iter(): if link_parent is not None: if node.tag is etree.Comment and \ node.text.strip(utils.spaceCharacters) == "end-link": if node.getparent() is not link_parent: raise utils.DifferentParentException( "begin-link and end-link have different parents") utils.removeInteractiveContentChildren(link) link.set("href", utils.textContent(link)) link_parent = None else: if node.getparent() is link_parent: link.append(deepcopy(node)) to_remove.add(node) elif node.tag is etree.Comment and \ node.text.strip(utils.spaceCharacters) == "begin-link": link_parent = node.getparent() link = etree.Element("a") link.text = node.tail node.tail = None node.addnext(link) # Basic substitutions for comment, sub in instance_basic_comment_subs: utils.replaceComment(ElementTree, comment, sub, **kwargs) # Remove nodes for node in to_remove: node.getparent().remove(node)
def stringSubstitutions(self, ElementTree, w3c_compat=False, w3c_compat_substitutions=False, w3c_compat_crazy_substitutions=False, w3c_shortname='', **kwargs): # Get doc_title from the title element try: doc_title = utils.textContent( ElementTree.getroot().find("head").find("title")) except (AttributeError, TypeError): doc_title = "" year = re.compile(r"\[YEAR[^\]]*\]") year_sub = time.strftime("%Y", self.pubdate) year_identifier = "[YEAR" date = re.compile(r"\[DATE[^\]]*\]") date_sub = time.strftime("%d %B %Y", self.pubdate).lstrip("0") date_identifier = "[DATE" cdate = re.compile(r"\[CDATE[^\]]*\]") cdate_sub = time.strftime("%Y%m%d", self.pubdate) cdate_identifier = "[CDATE" udate = re.compile(r"\[UDATE[^\]]*\]") udate_sub = time.strftime("%Y-%m-%d", self.pubdate) udate_identifier = "[UDATE" string_subs = ((year, year_sub, year_identifier), (date, date_sub, date_identifier), (cdate, cdate_sub, cdate_identifier), (udate, udate_sub, udate_identifier)) if w3c_compat or w3c_compat_substitutions: # Get the right long status doc_longstatus = longstatus_map[self.w3c_status] if w3c_compat_crazy_substitutions: # Get the right stylesheet doc_w3c_stylesheet = "http://www.w3.org/StyleSheets/TR/W3C-%s" % ( self.w3c_status, ) # Get all the subs we want string_subs += ((title, doc_title, title_identifier), ) # And even more in compat. mode if w3c_compat or w3c_compat_substitutions: try: shortname_sub = w3c_shortname or os.path.basename(os.getcwd()) except OSError: shortname_sub = "" latest_sub = "http://www.w3.org/TR/%s/" % (shortname_sub, ) version_sub = "http://www.w3.org/TR/%s/%s-%s-%s/" % ( year_sub, self.w3c_status, shortname_sub, cdate_sub) string_subs += ((status, self.w3c_status, status_identifier), (longstatus, doc_longstatus, longstatus_identifier), (shortname, shortname_sub, shortname_identifier), (latest, latest_sub, latest_identifier), (version, version_sub, version_identifier)) # And more that aren't even enabled by default in compat. mode if w3c_compat_crazy_substitutions: string_subs += ((w3c_stylesheet, doc_w3c_stylesheet, w3c_stylesheet_identifier), ) for node in ElementTree.iter(): for regex, sub, identifier in string_subs: if node.text is not None and identifier in node.text: node.text = regex.sub(sub, node.text) if node.tail is not None and identifier in node.tail: node.tail = regex.sub(sub, node.tail) for name, value in node.attrib.items(): if identifier in value: node.attrib[name] = regex.sub(sub, value)
def commentSubstitutions(self, ElementTree, w3c_compat=False, \ w3c_compat_substitutions=False, w3c_compat_crazy_substitutions=False, **kwargs): # Basic substitutions instance_basic_comment_subs = basic_comment_subs # Add more basic substitutions in compat. mode if w3c_compat or w3c_compat_substitutions: instance_basic_comment_subs += ((logo, logo_sub), (copyright, copyright_sub)) # Set of nodes to remove to_remove = set() # Link in_link = False for node in ElementTree.iter(): if in_link: if node.tag is etree.Comment and \ node.text.strip(utils.spaceCharacters) == u"end-link": if node.getparent() is not link_parent: raise DifferentParentException( u"begin-link and end-link have different parents") utils.removeInteractiveContentChildren(link) link.set(u"href", utils.textContent(link)) in_link = False else: if node.getparent() is link_parent: link.append(deepcopy(node)) to_remove.add(node) elif node.tag is etree.Comment and \ node.text.strip(utils.spaceCharacters) == u"begin-link": link_parent = node.getparent() in_link = True link = etree.Element(u"a") link.text = node.tail node.tail = None node.addnext(link) # Basic substitutions for comment, sub in instance_basic_comment_subs: begin_sub = u"begin-" + comment end_sub = u"end-" + comment in_sub = False for node in ElementTree.iter(): if in_sub: if node.tag is etree.Comment and \ node.text.strip(utils.spaceCharacters) == end_sub: if node.getparent() is not sub_parent: raise DifferentParentException( u"%s and %s have different parents" % begin_sub, end_sub) in_sub = False else: to_remove.add(node) elif node.tag is etree.Comment: if node.text.strip(utils.spaceCharacters) == begin_sub: sub_parent = node.getparent() in_sub = True node.tail = None node.addnext(deepcopy(sub)) elif node.text.strip(utils.spaceCharacters) == comment: node.addprevious(etree.Comment(begin_sub)) node.addprevious(deepcopy(sub)) node.addprevious(etree.Comment(end_sub)) node.getprevious().tail = node.tail to_remove.add(node) # Remove nodes for node in to_remove: node.getparent().remove(node)
def buildToc(self, ElementTree, min_depth=2, max_depth=6, w3c_compat=False, w3c_compat_class_toc=False, **kwargs): # Build the outline of the document outline_creator = outliner.Outliner(ElementTree, **kwargs) outline = outline_creator.build(**kwargs) # Get a list of all the top level sections, and their depth (0) sections = [(section, 0) for section in reversed(outline)] # Numbering num = [] # Loop over all sections in a DFS while sections: # Get the section and depth at the end of list section, depth = sections.pop() # If we have a header, regardless of how deep we are if section.header is not None: # Get the element that represents the section header's text if section.header.tag == u"hgroup": i = 1 while i <= 6: header_text = section.header.find(u".//h" + unicode(i)) if header_text is not None: break i += 1 else: header_text = None else: header_text = section.header else: header_text = None # If we have a section heading text element, regardless of depth if header_text is not None: # Remove any existing number for element in header_text.findall(u".//span"): if utils.elementHasClass(element, u"secno"): # Copy content, to prepare for the node being # removed utils.copyContentForRemoval(element, text=False, children=False) # Remove the element (we can do this as we're not # iterating over the elements, but over a list) element.getparent().remove(element) # Check we're in the valid depth range (min/max_depth are 1 based, # depth is 0 based) if depth >= min_depth - 1 and depth <= max_depth - 1: # Calculate the corrected depth (i.e., the actual depth within # the numbering/TOC) corrected_depth = depth - min_depth + 1 # Numbering: # No children, no sibling, move back to parent's sibling if corrected_depth + 1 < len(num): del num[corrected_depth + 1:] # Children elif corrected_depth == len(num): num.append(0) # Increment the current section's number if header_text is not None and \ not utils.elementHasClass(header_text, u"no-num") or \ header_text is None and section: num[-1] += 1 # Get the current TOC section for this depth, and add another # item to it if header_text is not None and \ not utils.elementHasClass(header_text, u"no-toc") or \ header_text is None and section: # Find the appropriate section of the TOC i = 0 toc_section = self.toc while i < corrected_depth: try: # If the final li has no children, or the last # children isn't an ol element if len(toc_section[-1]) == 0 or \ toc_section[-1][-1].tag != u"ol": toc_section[-1].append(etree.Element(u"ol")) self.indentNode(toc_section[-1][-1], (i + 1) * 2, **kwargs) if w3c_compat or w3c_compat_class_toc: toc_section[-1][-1].set(u"class", u"toc") except IndexError: # If the current ol has no li in it toc_section.append(etree.Element(u"li")) self.indentNode(toc_section[0], (i + 1) * 2 - 1, **kwargs) toc_section[0].append(etree.Element(u"ol")) self.indentNode(toc_section[0][0], (i + 1) * 2, **kwargs) if w3c_compat or w3c_compat_class_toc: toc_section[0][0].set(u"class", u"toc") # TOC Section is now the final child (ol) of the final # item (li) in the previous section assert toc_section[-1].tag == u"li" assert toc_section[-1][-1].tag == u"ol" toc_section = toc_section[-1][-1] i += 1 # Add the current item to the TOC item = etree.Element(u"li") toc_section.append(item) self.indentNode(item, (i + 1) * 2 - 1, **kwargs) # If we have a header if header_text is not None: # Add ID to header id = utils.generateID(header_text, **kwargs) if header_text.get(u"id") is not None: del header_text.attrib[u"id"] section.header.set(u"id", id) # Add number, if @class doesn't contain no-num if not utils.elementHasClass(header_text, u"no-num"): header_text[0:0] = [etree.Element(u"span", {u"class": u"secno"})] header_text[0].tail = header_text.text header_text.text = None header_text[0].text = u".".join(map(unicode, num)) header_text[0].text += u" " # Add to TOC, if @class doesn't contain no-toc if not utils.elementHasClass(header_text, u"no-toc"): link = deepcopy(header_text) item.append(link) # Make it link to the header link.tag = u"a" link.set(u"href", u"#" + id) # Remove interactive content child elements utils.removeInteractiveContentChildren(link) # Remove other child elements for element_name in remove_elements_from_toc: # Iterate over all the desendants of the new link # with that element name for element in link.findall(u".//" + element_name): # Copy content, to prepare for the node being # removed utils.copyContentForRemoval(element) # Remove the element (we can do this as we're # not iterating over the elements, but over a # list) element.getparent().remove(element) # Remove unwanted attributes for element in link.iter(tag=etree.Element): for attribute_name in remove_attributes_from_toc: if element.get(attribute_name) is not None: del element.attrib[attribute_name] # We don't want the old tail link.tail = None # Check we haven't changed the content in all of that assert utils.textContent(header_text) == \ utils.textContent(link) # Add subsections in reverse order (so the next one is executed # next) with a higher depth value sections.extend([(child_section, depth + 1) for child_section in reversed(section)])
def buildToc(ElementTree, **kwargs): # Create root element of TOC toc = etree.Element(u"ol") # Build the outline of the document outline_creator = outliner.Outliner(ElementTree, **kwargs) outline = outline_creator.build(**kwargs) # Get a list of all the top level sections, and their depth (0) sections = [(section, 0) for section in reversed(outline)] # Loop over all sections in a DFS while sections: # Get the section and depth at the end of list section, depth = sections.pop() # If we have a header, regardless of how deep we are if section.header is not None: # Get the element that represents the section header's text if section.header.tag == u"hgroup": i = 1 while i <= 6: header_text = section.header.find(u".//h" + unicode(i)) if header_text is not None: break i += 1 else: header_text = None else: header_text = section.header else: header_text = None # Find the appropriate section of the TOC i = 0 toc_section = toc while i < depth: try: # If the final li has no children, or the last # children isn't an ol element if len(toc_section[-1]) == 0 or \ toc_section[-1][-1].tag != u"ol": toc_section[-1].append(etree.Element(u"ol")) except IndexError: # If the current ol has no li in it toc_section.append(etree.Element(u"li")) toc_section[0].append(etree.Element(u"ol")) # TOC Section is now the final child (ol) of the final # item (li) in the previous section assert toc_section[-1].tag == u"li" assert toc_section[-1][-1].tag == u"ol" toc_section = toc_section[-1][-1] i += 1 # Add the current item to the TOC item = etree.Element(u"li") toc_section.append(item) # If we have a header if header_text is not None: item.text = utils.textContent(header_text) else: italics = etree.Element(u"i") italics.text = "Untitled Section" item.append(italics) # Add subsections in reverse order (so the next one is executed # next) with a higher depth value sections.extend([(child_section, depth + 1) for child_section in reversed(section)]) return toc