Ejemplo n.º 1
0
 def test_must_found_a_subject_mentioned(self):
     fifi_parser = HtmlParser()
     fifi_parser.feed(
         '<html>    <body>      <div class="item">meu item 1            <div>           <a href="link-1">teste</a>          </div>      </div>      <div class="item">meu item 2            <div>               <a href="link-2">teste</a>          </div>      </div>      <div class="item">meu item 3            <div>               <a href="link-3">alvo</a>           </div>      alvo</div>  </body></html>'
     )
     finder = HtmlFinder(fifi_parser.document)
     result = finder.find_by('alvo')
     self.assertEqual(1, len(result))
Ejemplo n.º 2
0
 def test_must_found_references_to_subject(self):
     fifi_parser = HtmlParser()
     fifi_parser.feed(
         '<html>    <body>      <div class="item">meu item 1            <div>           <a href="link-1">teste</a>          </div>      </div>      <div class="item">meu item 2            <div>               <a href="link-2">LInk</a>          </div>Alvo Existente      </div>      <div class="item">meu item 3            <div>               <a href="link-3">Link</a>           </div>Alvo Existente      </div>  </body></html>'
     )
     finder = HtmlFinder(fifi_parser.document)
     result = finder.find_references_by('link')
     self.assertEqual(2, len(result))
Ejemplo n.º 3
0
 def test_must_read_a_element_with_attributes(self):
     parser = HtmlParser()
     parser.feed('<html><body><div class="item">meu <a target="_blank" href="link-1">item</a> item 1</div></body></html>')
     self.assertEqual(4, len(parser.document))
     for idx in parser.document:
         element = parser.document[idx]
         if element.get('tag', None) == 'a':
             attributes = element.get('attributes', [])
             self.assertEqual(len(attributes), 2)
             self.assertEqual(attributes['href'], 'link-1')
Ejemplo n.º 4
0
    def test_must_read_html(self):
        a_count = 0
        parser = HtmlParser()
        parser.feed('<html>    <body>      <div class="item">meu item 1            <div>           <a href="link-1">teste</a>          </div>      </div>      <div class="item">meu item 2            <div>               <a href="link-2">teste</a>          </div>      </div>      <div class="item">meu item 3            <div>               <a href="link-3">teste</a>          </div>      </div>  </body></html>')

        for idx in parser.document:
            element = parser.document[idx]
            if element.get('tag', None) == 'a':
                a_count += 1
                parent = element.get('parent', None)
                if parent:
                    self.assertEqual(parser.document[parent]['tag'], 'div')

        self.assertEqual(a_count, 3)
Ejemplo n.º 5
0
 def test_must_read_a_simple_html(self):
     parser = HtmlParser()
     parser.feed('<html><body><div class="item">meu item 1</div><div class="item">meu item 2</div><div class="item">meu item 3</div></body></html>')
     self.assertEqual(5, len(parser.document))