def convert(self, doc): """Convert html data to raw text""" p = StripTagParser() p.feed(doc) p.close() return str(p)
def convert(self, doc, encoding, mimetype, logError=False, raiseException=False): # Use encoding from XML preamble if present mo = encoding_reg.search(doc) if mo: encoding = mo.group(1) if not encoding: encoding = default_encoding if not isinstance(doc, unicode): doc = unicode(doc, encoding, 'replace') doc = convert_entities(doc) doc = doc.encode('utf-8') p = StripTagParser() p.feed(doc) p.close() return str(p), 'utf-8'