Esempio n. 1
0
    def test_crawling_over_page_with_limit(self, mocker, correct_response):
        """Test that crawling will stop when limit value were met."""
        requests_mck = mocker.patch('requests.get')
        requests_mck.return_value = correct_response

        url = 'test1.com'
        crawler = Crawler(url, 1)
        crawler.analyze()

        expected = {'http://test1.com'}
        assert crawler.links == expected
Esempio n. 2
0
    def test_analyzing_page_with_mailto(self, mocker, response_with_mail,
                                        not_found_response):
        requests_mck = mocker.patch('requests.get')
        requests_mck.side_effect = [response_with_mail, not_found_response]

        url = 'test2.com'
        crawler = Crawler(url)
        crawler.analyze()

        # http://test2.com/help.html doesn't exists
        # that's why only base page is visible
        expected = {'http://test2.com'}
        assert crawler.links == expected
Esempio n. 3
0
    def test_analyzing_page_different_style(self, mocker, correct_response):
        """Test getting links from page content."""
        requests_mck = mocker.patch('requests.get')
        requests_mck.return_value = correct_response

        url = 'test1.com'
        crawler = Crawler(url)
        crawler.analyze()

        expected = {
            'http://test1.com',
            'http://test1.com/help.html',
        }
        assert crawler.links == expected