Example #1
0
	def strip_and_clean(self, html):
		""" Cleans up the HTML structure and strips all tags. """
		html = "".join([line.strip() for line in html.split("\n")])
		html = re.sub('<!DOCTYPE.*?>', '', html)
		html = StringUtil.br2nl(html)

		# strip html
		html = ''.join(BeautifulSoup(html).findAll(text=True))

		return html