예제 #1
0
    def test_scrape_ignores_links_that_fail_the_rules(self):
        link_scraper = mock()
        crawler_rules = mock()
        mock_writer = mock()
        same_domain_links1 = [
            Link(url="/about",
                 label="About",
                 parent_url="http://samplepage.com")
        ]
        when(link_scraper).scrape_links("http://samplepage.com").thenReturn(
            same_domain_links1)
        when(link_scraper).scrape_links(
            "http://samplepage.com/about").thenReturn([
                Link(url="http://anotherdoamin.com",
                     label="External",
                     parent_url="http://samplepage.com/about")
            ])
        when(link_scraper).scrape_links("http://anotherdoamin.com").thenReturn(
            [
                Link(url="/anotherabout",
                     label="External About",
                     parent_url="http://anotherdoamin.com")
            ])
        when(link_scraper).scrape_links(
            "http://anotherdoamin.com/anotherabout").thenReturn([])
        when(crawler_rules).apply_rules(same_domain_links1).thenReturn(
            same_domain_links1)
        when(crawler_rules).apply_rules(neq(same_domain_links1)).thenReturn([])
        spider = Spider(link_scraper, crawler_rules)
        expected_links = {
            "page_url":
            "http://samplepage.com",
            "child_links": [{
                "page_url": "http://samplepage.com/about",
                "child_links": []
            }]
        }

        links = spider.scrape("http://samplepage.com", mock_writer)

        self.assertEquals(expected_links, links)
예제 #2
0
 def testShouldNotSatisfyIfArgMatchesGivenValue(self):
     self.assertFalse(neq("foo").matches("foo"))
예제 #3
0
 def testShouldSatisfyIfArgDoesNotMatchGivenValue(self):
     self.assertTrue(neq("foo").matches("bar"))