def test_should_return_the_root_url(self): self.assertEqual("http://example.com/", metainspector.get("http://example.com").root_url, "Should return the root url") self.assertEqual("https://example.com/", metainspector.get("https://example.com/", request_function=ignore_ssl_request_function).root_url, "Should return the root url") self.assertEqual("http://example.com/", metainspector.get("example.com").root_url, "Should return the root url") self.assertEqual("http://example.com/", metainspector.get("http://example.com/faqs").root_url, "Should return the root url")
def test_get_rss_feed(self): self.assertEqual("http://www.iteh.at/de/rss/", metainspector.get("http://www.iteh.at").feed, "Should get rss feed") self.assertEqual("http://www.tea-tron.com/jbravo/blog/feed/", metainspector.get("http://www.tea-tron.com/jbravo/blog/").feed, "Should get atom feed") self.assertIsNone(metainspector.get("http://www.alazan.com").feed, "Should return None if no feed found")
def test_should_find_og_image(self): #has: og:image self.assertEqual("http://o.onionstatic.com/images/articles/article/2772/Apple-Claims-600w-R_jpg_130x110_q85.jpg", metainspector.get("http://www.theonion.com/articles/apple-claims-new-iphone-only-visible-to-most-loyal,2772/").image, "Should find the og:image") self.assertEqual("http://i2.ytimg.com/vi/iaGSSrp49uc/mqdefault.jpg", metainspector.get("http://www.youtube.com/watch?v=iaGSSrp49uc").image, "Should find image on youtube")
def test_should_find_all_page_images(self): self.assertEqual(['http://pagerankalert.com/images/pagerank_alert.png?1305794559'], metainspector.get("http://pagerankalert.com").images, "Should find all page images") result = metainspector.get("https://twitter.com/markupvalidator") self.assertEqual(6, len(result.images), "Should find 6 images on twitter (image without src should be ignored") self.assertEqual("https://twimg0-a.akamaihd.net/profile_images/2380086215/fcu46ozay5f5al9kdfvq_reasonably_small.png; https://twimg0-a.akamaihd.net/profile_images/2380086215/fcu46ozay5f5al9kdfvq_normal.png; https://twimg0-a.akamaihd.net/profile_images/2293774732/v0pgo4xpdd9rou2xq5h0_normal.png; https://twimg0-a.akamaihd.net/profile_images/1538528659/jaime_nov_08_normal.jpg; https://si0.twimg.com/sticky/default_profile_images/default_profile_6_mini.png; https://twimg0-a.akamaihd.net/a/1342841381/images/bigger_spinner.gif", "; ".join(result.images), "Should find images on twitter")
def test_basic_scrape_hastitle_noimage(self): result = metainspector.get("http://pagerankalert.com") #has: title, no: og:image self.assertEqual("PageRankAlert.com :: Track your PageRank changes & receive alerts", result.title, "Should get the title") self.assertIsNone(result.image, "Should not find an image")
def test_edit_url_should_update_the_url(self): page = metainspector.get("http://first.com") page.url = "https://second.com/" self.assertEqual("https://second.com/", page.url, "Should update the url") self.assertEqual("https", page.scheme, "Should update the scheme") self.assertEqual("second.com", page.host, "Should update the host") self.assertEqual("https://second.com/", page.root_url, "Should update the root url")
def test_edit_url_should_add_missing_scheme_and_normalize(self): page = metainspector.get("http://first.com") page.url = "second.com" self.assertEqual("http://second.com/", page.url, "Should add the missing scheme and normalize")
def test_should_return_the_host(self): self.assertEqual("example.com", metainspector.get("http://example.com").host, "Should return the host") self.assertEqual("example.com", metainspector.get("https://example.com/", request_function=ignore_ssl_request_function).host, "Should return the host") self.assertEqual("example.com", metainspector.get("example.com").host, "Should return the host")
def test_should_return_the_scheme(self): self.assertEqual("http", metainspector.get("http://example.com").scheme, "Should return the scheme") self.assertEqual("https", metainspector.get("https://example.com/", request_function=ignore_ssl_request_function).scheme, "Should return the scheme") self.assertEqual("http", metainspector.get("example.com").scheme, "Should return the scheme")
def test_should_accept_url_with_international_characters(self): self.assertEqual("http://www.international.com/ol%C3%A9", metainspector.get("http://www.international.com/olé").url, "Should accept an URL with international characters")
def test_should_use_http_as_default_scheme(self): self.assertEqual("http://example.com/", metainspector.get("example.com").url, "Should use http:// as a default scheme") self.assertEqual("http://example.com/", metainspector.get("//example.com").url, "Should use http:// as a default scheme on omitted scheme url")
def test_should_accept_an_url_with_scheme(self): self.assertEqual("http://example.com/", metainspector.get("http://example.com/").url, "Should accept an URL with scheme") self.assertEqual("https://example.com/", metainspector.get("https://example.com/", request_function=ignore_ssl_request_function).url, "Should accept an URL with scheme")
def test_should_normalize_urls(self): self.assertEqual("http://example.com/", metainspector.get("http://example.com").url, "Should normalize URLs")