def identify_mime(binary): """ :param binary: bytes html :return: mime """ mime = magic.from_buffer(binary, mime=True).decode() xhtml_uri = 'http://www.w3.org/1999/xhtml' if mime == mime.HTML: try: root = parse(binary.decode()).root if root.tag == (None, 'html') and root.prefixs[xhtml_uri] is None: return mime.XHTML except KeyError: return mime.HTML return mime
def test_parse(): xmlstr = open(TEST_XML_PATH).read() xl = parse(xmlstr) e = xl.root clean_e = clean_whitespaces(e) pretty_e = pretty_insert(clean_e, dont_do_when_one_child=True) assert clean_e.string() == clean_whitespaces(pretty_e).string() assert pretty_e.string() == pretty_insert(clean_e, dont_do_when_one_child=True).string() assert clean_e.string() != pretty_e.string() Et.fromstring(xl.string()) xl.root = clean_e Et.fromstring(xl.string()) xl.root = pretty_e Et.fromstring(xl.string())