def test_site_pages(self):
     """
     Tests from real pages. More reliable and easy to build for more complicated structures
     """
     SAMPLES_FILE_PREFIX = os.path.join(path, "samples/samples_pageparsing")
     count = 0
     fname = "%s_%d.json" % (SAMPLES_FILE_PREFIX, count)
     while os.path.exists(fname):
         source = open("%s_%d.html" % (SAMPLES_FILE_PREFIX, count), "rb").read()
         source = source.decode('utf-8')
         annotations = json.loads(open(fname, "rb").read().decode('utf-8'))
         template = HtmlPage(body=source)
         parser = TemplatePageParser(TokenDict())
         parser.feed(template)
         for annotation in parser.annotations:
             test_annotation = annotations.pop(0)
             for s in annotation.__slots__:
                 if s == "tag_attributes":
                     for pair in getattr(annotation, s):
                         self.assertEqual(list(pair), test_annotation[s].pop(0))
                 else:
                     self.assertEqual(getattr(annotation, s), test_annotation[s])
         self.assertEqual(annotations, [])
         count += 1
         fname = "%s_%d.json" % (SAMPLES_FILE_PREFIX, count)
Exemple #2
0
 def test_site_pages(self):
     """
     Tests from real pages. More reliable and easy to build for more complicated structures
     """
     for source, annotations in iter_samples('pageparsing'):
         template = HtmlPage(body=source)
         parser = TemplatePageParser(TokenDict())
         parser.feed(template)
         for annotation in parser.annotations:
             test_annotation = annotations.pop(0)
             for s in annotation.__slots__:
                 if s == "tag_attributes":
                     for pair in getattr(annotation, s):
                         self.assertEqual(list(pair), test_annotation[s].pop(0))
                 else:
                     self.assertEqual(getattr(annotation, s), test_annotation[s])
         self.assertEqual(annotations, [])
Exemple #3
0
 def test_site_pages(self):
     """
     Tests from real pages. More reliable and easy to build for more complicated structures
     """
     for source, annotations in iter_samples('pageparsing'):
         template = HtmlPage(body=source)
         parser = TemplatePageParser(TokenDict())
         parser.feed(template)
         for annotation in parser.annotations:
             test_annotation = annotations.pop(0)
             for s in annotation.__slots__:
                 if s == "tag_attributes":
                     for pair in getattr(annotation, s):
                         self.assertEqual(list(pair), test_annotation[s].pop(0))
                 else:
                     self.assertEqual(getattr(annotation, s), test_annotation[s])
         self.assertEqual(annotations, [])