Ejemplo n.º 1
0
    def _parse(self, str):
        """ Parses the text data from an XML element defined by tag.
        """

        str = replace_entities(str)
        str = strip_tags(str)
        str = collapse_spaces(str)
        return str
Ejemplo n.º 2
0
    def _parse(self, str):

        """ Parses the text data from an XML element defined by tag.
        """

        str = replace_entities(str)
        str = strip_tags(str)
        str = collapse_spaces(str)
        return str
Ejemplo n.º 3
0
    def load(self, data):

        data = replace_entities(data)
        try:
            BeautifulSoup.__init__(self, data)
        except UnicodeEncodeError:
            self.error = PageUnicodeError()
            BeautifulSoup.__init__(self, "")
        except:
            self.error = PageParseError()
            BeautifulSoup.__init__(self, "")
Ejemplo n.º 4
0
    def load(self, data):

        data = replace_entities(data)
        try:
            BeautifulSoup.__init__(self, data)
        except UnicodeEncodeError:
            self.error = PageUnicodeError()
            BeautifulSoup.__init__(self, "")
        except:
            self.error = PageParseError()
            BeautifulSoup.__init__(self, "")
Ejemplo n.º 5
0
    def _parse(self, e, tag):
        """ Parses the text data from an XML element defined by tag.
        """

        tags = e.getElementsByTagName(tag)
        children = tags[0].childNodes
        if len(children) != 1: return None
        assert children[0].nodeType == xml.dom.minidom.Element.TEXT_NODE

        s = children[0].nodeValue
        s = format_data(s)
        s = replace_entities(s)

        return s
Ejemplo n.º 6
0
    def _parse(self, e, tag):

        """ Parses the text data from an XML element defined by tag.
        """

        tags = e.getElementsByTagName(tag)
        children = tags[0].childNodes
        if len(children) != 1:
            return None
        assert children[0].nodeType == xml.dom.minidom.Element.TEXT_NODE

        s = children[0].nodeValue
        s = format_data(s)
        s = replace_entities(s)

        return s