Beispiel #1
0
    def test_page_twice(self):
        # make sure database is cleared out between tests
        url = 'http://www.example.com/foo'
        Page.create(url=url, content='hi world', status_code=200)
        self.assertEqual(Page.select().count(), 1)

        self.assertRaises(IntegrityError, Page.create, url=url, content='hi world', status_code=200)
Beispiel #2
0
    def test_page_twice(self):
        # make sure database is cleared out between tests
        url = 'http://www.example.com/foo'
        Page.create(url=url, content='hi world', status_code=200)
        self.assertEqual(Page.select().count(), 1)

        self.assertRaises(IntegrityError,
                          Page.create,
                          url=url,
                          content='hi world',
                          status_code=200)
Beispiel #3
0
    def test_crawl_existing_page(self):
        content = 'foobar'
        page = Page.create(url=link1, content=content, status_code=200)

        crawled_page, crawled = crawl_page(link1)

        self.assertFalse(crawled)
        self.assertEqual(page.id, crawled_page.id)
Beispiel #4
0
    def test_crawl_existing_page(self):
        content = 'foobar'
        page = Page.create(url=link1, content=content, status_code=200)

        crawled_page, crawled = crawl_page(link1)

        self.assertFalse(crawled)
        self.assertEqual(page.id, crawled_page.id)
Beispiel #5
0
    def test_add_page_info_to_page(self, requests_get, requests_head):
        url = "http://www.example.com/foo"
        text = '<div>hi world</div>'
        content_type = 'text/html'
        headers = {
            'content-type': '{}; charset=utf-8'.format(content_type),
        }
        requests_head.return_value = MagicMock(status_code=200, headers=headers)
        requests_get.return_value = MagicMock(status_code=200, text=text)

        page = Page.create(url=url, content='', status_code=0)
        add_page_info_to_page(page)

        self.assertEqual(page.content, text)
        self.assertEqual(page.content_type, content_type)

        self.assertEqual(requests_head.call_count, 1)
        self.assertEqual(requests_get.call_count, 1)
Beispiel #6
0
    def test_add_page_info_to_page(self, requests_get, requests_head):
        url = "http://www.example.com/foo"
        text = '<div>hi world</div>'
        content_type = 'text/html'
        headers = {
            'content-type': '{}; charset=utf-8'.format(content_type),
        }
        requests_head.return_value = MagicMock(status_code=200,
                                               headers=headers)
        requests_get.return_value = MagicMock(status_code=200, text=text)

        page = Page.create(url=url, content='', status_code=0)
        add_page_info_to_page(page)

        self.assertEqual(page.content, text)
        self.assertEqual(page.content_type, content_type)

        self.assertEqual(requests_head.call_count, 1)
        self.assertEqual(requests_get.call_count, 1)
Beispiel #7
0
    def test_permanent_redirect(self, requests_get, requests_head):
        url = "http://www.example.com/foo"
        redirect_url = "http://www.example.com/bar"
        headers = {'location': redirect_url}
        requests_head.return_value = MagicMock(status_code=301,
                                               headers=headers)

        page = Page.create(url=url, content='', status_code=0)
        add_page_info_to_page(page)

        to_page = Page.select().where(Page.url == redirect_url).first()
        self.assertTrue(to_page)

        url_redirect_link = Link.select().where(Link.from_page == page,
                                                Link.to_page == to_page)
        self.assertTrue(url_redirect_link.exists())

        self.assertEqual(requests_head.call_count, 1)
        self.assertFalse(requests_get.called)

        self.assertEqual(page.content, redirect_url)
Beispiel #8
0
    def test_permanent_redirect(self, requests_get, requests_head):
        url = "http://www.example.com/foo"
        redirect_url = "http://www.example.com/bar"
        headers = { 'location': redirect_url }
        requests_head.return_value = MagicMock(status_code=301, headers=headers)

        page = Page.create(url=url, content='', status_code=0)
        add_page_info_to_page(page)

        to_page = Page.select().where(Page.url == redirect_url).first()
        self.assertTrue(to_page)

        url_redirect_link = Link.select().where(
            Link.from_page == page,
            Link.to_page == to_page)
        self.assertTrue(url_redirect_link.exists())

        self.assertEqual(requests_head.call_count, 1)
        self.assertFalse(requests_get.called)

        self.assertEqual(page.content, redirect_url)
Beispiel #9
0
 def test_page(self):
     now = datetime.datetime.utcnow()
     page = Page.create(url='http://www.example.com/foo', content='hi world', status_code=200)
     self.assertTrue(page.first_visited > now)
Beispiel #10
0
 def test_page(self):
     now = datetime.datetime.utcnow()
     page = Page.create(url='http://www.example.com/foo',
                        content='hi world',
                        status_code=200)
     self.assertTrue(page.first_visited > now)