Esempio n. 1
0
def normalize_html(html_text):
    html_text = u'\n'.join(html.tostring(el, encoding=unicode)
                           for el in antimarkdown.parse_fragments(html_text))
    tidied = tidy(
        u'\n'.join(line.strip()
                   for line in nodes.whitespace(html_text.strip()).replace(u'>', u'>\n').splitlines()),
        pretty_print=True)
    return tidied.decode('utf-8')
Esempio n. 2
0
 def _fix_document(self, doc, use_soup=False):
     if use_soup:
         soup = BeautifulSoup(doc)
         soup.prettify()
         doc = unicode(soup)
     else:
         doc = tidy(doc)
     return doc
Esempio n. 3
0
 def dump(self, item):
     fullpath = item.filename + item.extension
     dirname, basename = os.path.split(fullpath)
     target_dir = os.path.join(self.folder, dirname)
     if not os.path.isdir(target_dir):
         os.makedirs(target_dir)
     target_file = os.path.join(target_dir, basename)
     request = urllib2.Request(
         item.url,
         item.data,
         item.headers
     )
     with open(target_file, 'wb') as target_stream:
         input_stream = urllib2.urlopen(request)
         if CAN_TIDY:
             data = tidy(input_stream, pretty_print=True, encoding="utf-8")
         else:
             data = input_stream.read()
         target_stream.write(data)
def html5tidy (src):
    return mark_safe(tidy(src, fragment=True))