Esempio n. 1
0
    def convert(self, doc):
        """Convert html data to raw text"""

        p = StripTagParser()
        p.feed(doc)
        p.close()
        return str(p)
Esempio n. 2
0
    def convert(self, doc, encoding, mimetype,
                logError=False, raiseException=False):

        # Use encoding from XML preamble if present
        mo = encoding_reg.search(doc)
        if mo:
            encoding = mo.group(1)

        if not encoding:
            encoding = default_encoding
        
        if not isinstance(doc, unicode):
            doc = unicode(doc, encoding, 'replace')
        doc = convert_entities(doc)
        doc = doc.encode('utf-8')
        p = StripTagParser()
        p.feed(doc)
        p.close()
        return str(p), 'utf-8'
Esempio n. 3
0
    def convert(self, doc):
        """Convert html data to raw text"""

        p = StripTagParser()
        p.feed(doc)
        p.close()
        return str(p)
Esempio n. 4
0
    def convert(self,
                doc,
                encoding,
                mimetype,
                logError=False,
                raiseException=False):

        # Use encoding from XML preamble if present
        mo = encoding_reg.search(doc)
        if mo:
            encoding = mo.group(1)

        if not encoding:
            encoding = default_encoding

        if not isinstance(doc, unicode):
            doc = unicode(doc, encoding, 'replace')
        doc = convert_entities(doc)
        doc = doc.encode('utf-8')
        p = StripTagParser()
        p.feed(doc)
        p.close()
        return str(p), 'utf-8'