Beispiel #1
0
 def parse_html(self, file):
     html = open(file, 'r').read()
     html = html.decode('utf-8', 'ignore')
     html = convertentities(html)
     parser = etree.HTMLParser()
     tree = etree.parse(StringIO(html), parser)
     return tree.getroot()
Beispiel #2
0
 def parse_html(self, file):
     html = open(file, 'r').read()
     html = html.decode('utf-8', 'ignore')
     html = convertentities(html)
     parser = etree.HTMLParser()
     tree = etree.parse(StringIO(html), parser)
     return tree.getroot()
Beispiel #3
0
 def load_source(self):
     with open(self.ft_source,'r') as f:
         raw_xml = f.read()
         raw_xml = re.sub('(<!-- body|endbody -->)', '', raw_xml)
         raw_xml = convertentities(raw_xml.decode('utf-8', 'ignore'))
         raw_xml = re.sub('<\?CDATA.+?\?>', '', raw_xml)
         self.source_content = raw_xml
     self.source_loaded = True
Beispiel #4
0
 def load_source(self):
     with open(self.ft_source, 'r') as f:
         raw_xml = f.read()
         raw_xml = re.sub('(<!-- body|endbody -->)', '', raw_xml)
         raw_xml = convertentities(raw_xml.decode('utf-8', 'ignore'))
         raw_xml = re.sub('<\?CDATA.+?\?>', '', raw_xml)
         self.source_content = raw_xml
     self.source_loaded = True