def test_cannot_resolve_with_wrong_params_dict(self): # Ommitted params p = base.resolve("http://www.mediagaon.or.kr/jsp/sch/mnews/newsView.jsp") self.assertNotIsInstance(p, KindsArticlePage) with self.assertRaises(base.NotResolvedError): # Params not declared in pagetype base.resolve(url="http://www.mediagaon.or.kr/jsp/sch/mnews/newsView.jsp", params={"foo": "bar"}) # Wrong params does not raise NotResolvedError base.resolve(url="http://www.mediagaon.or.kr/jsp/sch/mnews/newsView.jsp", params={"newsId": "bar"})
def test_raise_exception_on_implicit_fetching(self): page = base.resolve("http://example.com") with self.assertRaises(base.NotFetchedYetError): page.get_links() with self.assertRaises(base.NotFetchedYetError): page.get_links(fetch=False)
def test_select_and_parse_properties(self): p = base.resolve("https://googleblog.blogspot.kr/2015/11/google-gobble-thanksgiving-trends-on.html") p.fetch() title = p.get_properties()["title"] body = p.get_properties()["body"] self.assertEqual(title, "Google gobble: Thanksgiving trends on Search") self.assertTrue(body.startswith("In just a few hours"))
def test_parse_example_webpages(self): page = base.resolve("http://example.com") self.assertIsInstance(page, base.Hypertext) page.fetch() links = page["links"] self.assertEqual(len(links), 1) self.assertEqual("http://www.iana.org/domains/example", links[0])
def test_resolve_with_params_and_parse_properties(self): p = base.resolve( "http://www.mediagaon.or.kr/jsp/sch/mnews/newsView.jsp", params={"newsId": "01100101.20151102100000159"} ) self.assertIsInstance(p, KindsArticlePage) p.fetch() title = p.get_properties()["title"] self.assertEqual(title, "[한·중·일 정상회의] 3국 정상, 회의 전엔 ‘미소 촬영’ 회견 땐 웃음기 ‘싹’…비빔밥으로 만찬")
def test_resolve_with_multiple_params(self): p = base.resolve( url="http://www.mediagaon.or.kr/jsp/sch/mnews/search.jsp", params={"startDate": "2015.01.01", "endDate": "2015.10.05"}, ) self.assertIsInstance(p, KindsSearchPage) p.fetch() print(p["links"])
def test_parse_cern(self): page = base.resolve("http://info.cern.ch") self.assertIsInstance(page, base.Hypertext) page.fetch() links = page["links"] self.assertEqual(len(links), 4) self.assertEqual( [ item for item in links ], [ "http://info.cern.ch/hypertext/WWW/TheProject.html", "http://line-mode.cern.ch/www/hypertext/WWW/TheProject.html", "http://home.web.cern.ch/topics/birth-web", "http://home.web.cern.ch/about" ] )
def test_resolve_url_with_pattern_vars(self): p = base.resolve("https://googleblog.blogspot.kr/2015/11/google-gobble-thanksgiving-trends-on.html") self.assertIsInstance(p, GoogleBlogPage)
def test_resolve_pagetype_with_url(self): p = base.resolve("http://info.cern.ch/hypertext/WWW/TheProject.html") self.assertIsInstance(p, W3Page)
def test_base_hypertext_does_not_parse_any_content(self): page = base.resolve("http://info.cern.ch") properties = page.get_properties(fetch=True) self.assertEqual(properties.keys(), {"links"})
def test_cannot_parse_example_dot_com_with_post_method(self): with self.assertRaises(base.NotResolvedError): page = base.resolve("http://example.com", method="POST")