def html_minify(html_code, ignore_comments=True, parser="html5lib"): html_code = force_decode(html_code) soup = bs4.BeautifulSoup(html_code, parser) mini_soup = space_minify(soup, ignore_comments) if FOLD_DOCTYPE is True: # monkey patching to remove new line after doctype bs4.element.Doctype.SUFFIX = u'>' return unicode(mini_soup)
def test_should_be_able_to_chose_the_encoding(self): ENCODING = 'IBM857' unicode_object = "Blá blá".decode("utf-8").encode(ENCODING) string = str(unicode_object) self.assertEqual(u"Blá blá", force_decode(string, encoding=ENCODING))
def test_shoulde_decode_a_latin_string(self): unicode_object = "Blá blá".decode("utf-8").encode("latin-1") string = str(unicode_object) self.assertEqual(u"Blá blá", force_decode(string))
def test_should_decode_a_utf8_string(self): string = "Blá blá" self.assertEqual(u"Blá blá", force_decode(string))