def test_should_return_the_root_url(self):
     self.assertEqual("http://example.com/", metainspector.get("http://example.com").root_url, "Should return the root url")
     self.assertEqual("https://example.com/",
                      metainspector.get("https://example.com/", request_function=ignore_ssl_request_function).root_url,
                      "Should return the root url")
     self.assertEqual("http://example.com/", metainspector.get("example.com").root_url, "Should return the root url")
     self.assertEqual("http://example.com/", metainspector.get("http://example.com/faqs").root_url, "Should return the root url")
 def test_get_rss_feed(self):
     self.assertEqual("http://www.iteh.at/de/rss/",
                      metainspector.get("http://www.iteh.at").feed,
                      "Should get rss feed")
     self.assertEqual("http://www.tea-tron.com/jbravo/blog/feed/",
                      metainspector.get("http://www.tea-tron.com/jbravo/blog/").feed,
                      "Should get atom feed")
     self.assertIsNone(metainspector.get("http://www.alazan.com").feed,
                      "Should return None if no feed found")
    def test_should_find_og_image(self):
        #has: og:image
        self.assertEqual("http://o.onionstatic.com/images/articles/article/2772/Apple-Claims-600w-R_jpg_130x110_q85.jpg",
                         metainspector.get("http://www.theonion.com/articles/apple-claims-new-iphone-only-visible-to-most-loyal,2772/").image,
                         "Should find the og:image")

        self.assertEqual("http://i2.ytimg.com/vi/iaGSSrp49uc/mqdefault.jpg",
                         metainspector.get("http://www.youtube.com/watch?v=iaGSSrp49uc").image,
                         "Should find image on youtube")
    def test_should_find_all_page_images(self):
        self.assertEqual(['http://pagerankalert.com/images/pagerank_alert.png?1305794559'],
                         metainspector.get("http://pagerankalert.com").images,
                         "Should find all page images")

        result = metainspector.get("https://twitter.com/markupvalidator")
        self.assertEqual(6, len(result.images), "Should find 6 images on twitter (image without src should be ignored")
        self.assertEqual("https://twimg0-a.akamaihd.net/profile_images/2380086215/fcu46ozay5f5al9kdfvq_reasonably_small.png; https://twimg0-a.akamaihd.net/profile_images/2380086215/fcu46ozay5f5al9kdfvq_normal.png; https://twimg0-a.akamaihd.net/profile_images/2293774732/v0pgo4xpdd9rou2xq5h0_normal.png; https://twimg0-a.akamaihd.net/profile_images/1538528659/jaime_nov_08_normal.jpg; https://si0.twimg.com/sticky/default_profile_images/default_profile_6_mini.png; https://twimg0-a.akamaihd.net/a/1342841381/images/bigger_spinner.gif",
                         "; ".join(result.images),
                         "Should find images on twitter")
    def test_basic_scrape_hastitle_noimage(self):
        result = metainspector.get("http://pagerankalert.com")

        #has: title, no: og:image
        self.assertEqual("PageRankAlert.com :: Track your PageRank changes & receive alerts",
                         result.title, "Should get the title")
        self.assertIsNone(result.image, "Should not find an image")
 def test_edit_url_should_update_the_url(self):
     page = metainspector.get("http://first.com")
     page.url = "https://second.com/"
     self.assertEqual("https://second.com/", page.url, "Should update the url")
     self.assertEqual("https", page.scheme, "Should update the scheme")
     self.assertEqual("second.com", page.host, "Should update the host")
     self.assertEqual("https://second.com/", page.root_url, "Should update the root url")
 def test_edit_url_should_add_missing_scheme_and_normalize(self):
     page = metainspector.get("http://first.com")
     page.url = "second.com"
     self.assertEqual("http://second.com/", page.url, "Should add the missing scheme and normalize")
 def test_should_return_the_host(self):
     self.assertEqual("example.com", metainspector.get("http://example.com").host, "Should return the host")
     self.assertEqual("example.com",
                      metainspector.get("https://example.com/", request_function=ignore_ssl_request_function).host,
                      "Should return the host")
     self.assertEqual("example.com", metainspector.get("example.com").host, "Should return the host")
 def test_should_return_the_scheme(self):
     self.assertEqual("http", metainspector.get("http://example.com").scheme, "Should return the scheme")
     self.assertEqual("https",
                      metainspector.get("https://example.com/", request_function=ignore_ssl_request_function).scheme,
                      "Should return the scheme")
     self.assertEqual("http", metainspector.get("example.com").scheme, "Should return the scheme")
 def test_should_accept_url_with_international_characters(self):
     self.assertEqual("http://www.international.com/ol%C3%A9",
                      metainspector.get("http://www.international.com/olé").url,
                      "Should accept an URL with international characters")
 def test_should_use_http_as_default_scheme(self):
     self.assertEqual("http://example.com/", metainspector.get("example.com").url,
                      "Should use http:// as a default scheme")
     self.assertEqual("http://example.com/", metainspector.get("//example.com").url,
                      "Should use http:// as a default scheme on omitted scheme url")
 def test_should_accept_an_url_with_scheme(self):
     self.assertEqual("http://example.com/", metainspector.get("http://example.com/").url,
                      "Should accept an URL with scheme")
     self.assertEqual("https://example.com/",
                      metainspector.get("https://example.com/", request_function=ignore_ssl_request_function).url,
                      "Should accept an URL with scheme")
 def test_should_normalize_urls(self):
     self.assertEqual("http://example.com/", metainspector.get("http://example.com").url, "Should normalize URLs")