Example #1
0
 def test_extraction(self):
     # Default arguments
     lx = HtmlParserLinkExtractor()
     self.assertEqual(lx.extract_links(self.response),
                      [Link(url='http://example.com/sample2.html', text=u'sample 2'),
                       Link(url='http://example.com/sample3.html', text=u'sample 3 text'),
                       Link(url='http://example.com/sample3.html', text=u'sample 3 repetition'),
                       Link(url='http://www.google.com/something', text=u''),
                       Link(url='http://example.com/innertag.html', text=u'inner tag'),])
 def test_extraction(self):
     # Default arguments
     lx = HtmlParserLinkExtractor()
     self.assertEqual(lx.extract_links(self.response),
                      [Link(url='http://example.com/sample2.html', text=u'sample 2'),
                       Link(url='http://example.com/sample3.html', text=u'sample 3 text'),
                       Link(url='http://example.com/sample3.html', text=u'sample 3 repetition'),
                       Link(url='http://www.google.com/something', text=u''),
                       Link(url='http://example.com/innertag.html', text=u'inner tag'),])
Example #3
0
 def test_link_wrong_href(self):
     html = """
     <a href="http://example.org/item1.html">Item 1</a>
     <a href="http://[example.org/item2.html">Item 2</a>
     <a href="http://example.org/item3.html">Item 3</a>
     """
     response = HtmlResponse("http://example.org/index.html", body=html)
     lx = HtmlParserLinkExtractor()
     self.assertEqual([link for link in lx.extract_links(response)], [
         Link(url='http://example.org/item1.html', text=u'Item 1', nofollow=False),
         Link(url='http://example.org/item3.html', text=u'Item 3', nofollow=False),
     ])
 def test_link_wrong_href(self):
     html = """
     <a href="http://example.org/item1.html">Item 1</a>
     <a href="http://[example.org/item2.html">Item 2</a>
     <a href="http://example.org/item3.html">Item 3</a>
     """
     response = HtmlResponse("http://example.org/index.html", body=html)
     lx = HtmlParserLinkExtractor()
     self.assertEqual([link for link in lx.extract_links(response)], [
         Link(url='http://example.org/item1.html', text=u'Item 1', nofollow=False),
         Link(url='http://example.org/item3.html', text=u'Item 3', nofollow=False),
     ])
 def test_extraction(self):
     # Default arguments
     lx = HtmlParserLinkExtractor()
     self.assertEqual(
         lx.extract_links(self.response),
         [
             Link(url="http://example.com/sample2.html", text=u"sample 2"),
             Link(url="http://example.com/sample3.html", text=u"sample 3 text"),
             Link(url="http://example.com/sample3.html", text=u"sample 3 repetition"),
             Link(url="http://www.google.com/something", text=u""),
             Link(url="http://example.com/innertag.html", text=u"inner tag"),
         ],
     )