def __call__(self, data, **metadata): if strutils.is_xml(data): parser = lxml.etree.HTMLParser(strip_cdata=True, remove_blank_text=True) d = lxml.html.fromstring(data, parser=parser) docinfo = d.getroottree().docinfo s = lxml.etree.tostring(d, pretty_print=True, doctype=docinfo.doctype, encoding='utf8') return "HTML", format_text(s)
def __call__(self, data, **metadata): if strutils.is_xml(data): parser = lxml.etree.HTMLParser( strip_cdata=True, remove_blank_text=True ) d = lxml.html.fromstring(data, parser=parser) docinfo = d.getroottree().docinfo s = lxml.etree.tostring( d, pretty_print=True, doctype=docinfo.doctype, encoding='utf8' ) return "HTML", format_text(s)
def __call__(self, data, **metadata): if strutils.is_xml(data): parser = lxml.etree.HTMLParser( strip_cdata=True, remove_blank_text=True ) d = lxml.html.fromstring(data, parser=parser) docinfo = d.getroottree().docinfo def piglify(src): words = src.split() ret = '' for word in words: idx = -1 while word[idx] in string.punctuation and (idx * -1) != len(word): idx -= 1 if word[0].lower() in 'aeiou': if idx == -1: ret += word[0:] + "hay" else: ret += word[0:len(word) + idx + 1] + "hay" + word[idx + 1:] else: if idx == -1: ret += word[1:] + word[0] + "ay" else: ret += word[1:len(word) + idx + 1] + word[0] + "ay" + word[idx + 1:] ret += ' ' return ret.strip() def recurse(root): if hasattr(root, 'text') and root.text: root.text = piglify(root.text) if hasattr(root, 'tail') and root.tail: root.tail = piglify(root.tail) if len(root): for child in root: recurse(child) recurse(d) s = lxml.etree.tostring( d, pretty_print=True, doctype=docinfo.doctype ) return "HTML", contentviews.format_text(s)
def __call__(self, data, **metadata): headers = metadata.get("headers", {}) ctype = headers.get("content-type") if data and ctype: ct = http.parse_content_type(ctype) if ctype else None ct = "%s/%s" % (ct[0], ct[1]) if ct in content_types_map: return content_types_map[ct][0](data, **metadata) elif strutils.is_xml(data): return get("XML")(data, **metadata) if metadata.get("query"): return get("Query")(data, **metadata) if data and strutils.is_mostly_bin(data): return get("Hex")(data) if not data: return "No content", [] return get("Raw")(data)
def test_is_xml(): assert not strutils.is_xml(b"foo") assert strutils.is_xml(b"<foo") assert strutils.is_xml(b" \n<foo")