def test_page_twice(self): # make sure database is cleared out between tests url = 'http://www.example.com/foo' Page.create(url=url, content='hi world', status_code=200) self.assertEqual(Page.select().count(), 1) self.assertRaises(IntegrityError, Page.create, url=url, content='hi world', status_code=200)
def test_page_twice(self): # make sure database is cleared out between tests url = 'http://www.example.com/foo' Page.create(url=url, content='hi world', status_code=200) self.assertEqual(Page.select().count(), 1) self.assertRaises(IntegrityError, Page.create, url=url, content='hi world', status_code=200)
def test_crawl_existing_page(self): content = 'foobar' page = Page.create(url=link1, content=content, status_code=200) crawled_page, crawled = crawl_page(link1) self.assertFalse(crawled) self.assertEqual(page.id, crawled_page.id)
def test_crawl_existing_page(self): content = 'foobar' page = Page.create(url=link1, content=content, status_code=200) crawled_page, crawled = crawl_page(link1) self.assertFalse(crawled) self.assertEqual(page.id, crawled_page.id)
def test_add_page_info_to_page(self, requests_get, requests_head): url = "http://www.example.com/foo" text = '<div>hi world</div>' content_type = 'text/html' headers = { 'content-type': '{}; charset=utf-8'.format(content_type), } requests_head.return_value = MagicMock(status_code=200, headers=headers) requests_get.return_value = MagicMock(status_code=200, text=text) page = Page.create(url=url, content='', status_code=0) add_page_info_to_page(page) self.assertEqual(page.content, text) self.assertEqual(page.content_type, content_type) self.assertEqual(requests_head.call_count, 1) self.assertEqual(requests_get.call_count, 1)
def test_add_page_info_to_page(self, requests_get, requests_head): url = "http://www.example.com/foo" text = '<div>hi world</div>' content_type = 'text/html' headers = { 'content-type': '{}; charset=utf-8'.format(content_type), } requests_head.return_value = MagicMock(status_code=200, headers=headers) requests_get.return_value = MagicMock(status_code=200, text=text) page = Page.create(url=url, content='', status_code=0) add_page_info_to_page(page) self.assertEqual(page.content, text) self.assertEqual(page.content_type, content_type) self.assertEqual(requests_head.call_count, 1) self.assertEqual(requests_get.call_count, 1)
def test_permanent_redirect(self, requests_get, requests_head): url = "http://www.example.com/foo" redirect_url = "http://www.example.com/bar" headers = {'location': redirect_url} requests_head.return_value = MagicMock(status_code=301, headers=headers) page = Page.create(url=url, content='', status_code=0) add_page_info_to_page(page) to_page = Page.select().where(Page.url == redirect_url).first() self.assertTrue(to_page) url_redirect_link = Link.select().where(Link.from_page == page, Link.to_page == to_page) self.assertTrue(url_redirect_link.exists()) self.assertEqual(requests_head.call_count, 1) self.assertFalse(requests_get.called) self.assertEqual(page.content, redirect_url)
def test_permanent_redirect(self, requests_get, requests_head): url = "http://www.example.com/foo" redirect_url = "http://www.example.com/bar" headers = { 'location': redirect_url } requests_head.return_value = MagicMock(status_code=301, headers=headers) page = Page.create(url=url, content='', status_code=0) add_page_info_to_page(page) to_page = Page.select().where(Page.url == redirect_url).first() self.assertTrue(to_page) url_redirect_link = Link.select().where( Link.from_page == page, Link.to_page == to_page) self.assertTrue(url_redirect_link.exists()) self.assertEqual(requests_head.call_count, 1) self.assertFalse(requests_get.called) self.assertEqual(page.content, redirect_url)
def test_page(self): now = datetime.datetime.utcnow() page = Page.create(url='http://www.example.com/foo', content='hi world', status_code=200) self.assertTrue(page.first_visited > now)
def test_page(self): now = datetime.datetime.utcnow() page = Page.create(url='http://www.example.com/foo', content='hi world', status_code=200) self.assertTrue(page.first_visited > now)