def parseHTML(str): doc = html5lib.parse(u(str), treebuilder='lxml', namespaceHTMLElements=False) body = doc.getroot()[1] if body.text is None: return list(body.iterchildren()) else: return [body.text] + list(body.iterchildren())
def die(msg, *formatArgs): msg = u"\033[1;31mFATAL ERROR:\033[0m "+u(msg).format(*map(u, formatArgs)) if msg not in messages: messages.add(msg) print msg if not config.debug: sys.exit(1)
def linkTextsFromElement(el, preserveCasing=False): from lib.htmlhelpers import textContent if el.get('title') == '': return [] elif el.get('title'): texts = [u(x.strip()) for x in el.get('title').split('|')] else: texts = [textContent(el).strip()] if preserveCasing: return texts else: return [t.lower() for t in texts]
def setSpecData(self, spec): if spec.status in ("ED", "DREAM", "UD"): self.specStatus = "ED" else: self.specStatus = "TR" # I'll want to make this more complex later, # to enforce pubrules linking policy. self.specLevel = spec.level self.specName = spec.shortname self.specVName = spec.shortname + "-" + u(spec.level) # Need to get a real versioned shortname, # with the possibility of overriding the "shortname-level" pattern. self.removeSameSpecRefs()
def innerHTML(el): if el is None: return u'' return u((el.text or u'') + u''.join(u(html.tostring(x, encoding="unicode")) for x in el))
def outerHTML(el): if el is None: return u'' return u(html.tostring(el, with_tail=False, encoding="unicode"))
def say(msg, *formatArgs): if not config.quiet: print u(msg).format(*map(u, formatArgs))
def warn(msg, *formatArgs): if not config.quiet: msg = u"\033[1;33mWARNING:\033[0m "+u(msg).format(*map(u, formatArgs)) if msg not in messages: messages.add(msg) print msg
def escapeAttr(str): return u(str).replace(u'&', u'&').replace(u"'", u''').replace( u'"', u'"')
def escapeHTML(str): # Escape HTML return u(str).replace(u'&', u'&').replace(u'<', u'<')
def parseDocument(str): doc = html5lib.parse(u(str), treebuilder='lxml', namespaceHTMLElements=False) return doc
def textContent(el): return u(html.tostring(el, method='text', with_tail=False, encoding="unicode"))
def textContent(el): return u( html.tostring(el, method='text', with_tail=False, encoding="unicode"))
def headingLevelOfElement(el): for el in relevantHeadings(el, levels=[2, 3, 4, 5, 6]): if el.get('data-level') is not None: return u(el.get('data-level')) return None
def escapeAttr(str): return u(str).replace(u'&', u'&').replace(u"'", u''').replace(u'"', u'"')
def headingLevelOfElement(el): for el in relevantHeadings(el, levels=[2,3,4,5,6]): if el.get('data-level') is not None: return u(el.get('data-level')) return None