Esempio n. 1
0
def cleanTags(string):
  """Remove all html tags from the string.

    >>> cleanTags("<html><head><title>Hello</title><body>Test</body></html>")
    'HelloTest'

  @type  string: string
  @param string: the string to clean
  @rtype: string
  @return: the cleaned up string
  """
  # http://lxml.de/api/lxml.html.clean.Cleaner-class.html
  htmlCleaned = Cleaner(allow_tags=[''], remove_unknown_tags=False, style=True
      ).clean_html(string or u"dummy")
  nice = htmlCleaned[5:-6] if htmlCleaned.startswith("<div>") else htmlCleaned
  return resub(r"\s\s+" , " ", resub(r"\s\s+" , " ", nice)).strip()