Beispiel #1
0
 def test__has_login_form(self):
     assert WebSiteInformation._has_login_form(
         'blablablasign inlololakdkljas') is True
     assert WebSiteInformation._has_login_form(
         'blablablaSign inlololakdkljas') is True
     assert WebSiteInformation._has_login_form(
         'blablablaLog inlololakdkljas') is True
Beispiel #2
0
 def test_to_dict(self, mock_requests):
     test_html = '<!DOCTYPE html><html><head><title>Test</title></head><body><h2>HTML Links</h2><p><a href="https://www.google.com/html/">This will go to google</a></p></body></html>'
     mock_requests.return_value = Mock(status_code=200, text=test_html)
     website = WebSiteInformation("http://google.com")
     dict_representation = website.to_dict()
     assert dict_representation["url"] == website.url
     assert dict_representation["title"] == website.title
     assert dict_representation["headers"] == website.headers
     assert dict_representation["links"] == website.links
     assert dict_representation["has_login"] == website.has_login
Beispiel #3
0
 def test_get_website_information(self, mock_requests):
     test_html = '<!DOCTYPE html><html><head><title>Test</title></head><body><h2>HTML Links</h2><p><a href="https://www.google.com/html/">This will go to google</a></p></body></html>'
     mock_requests.return_value = Mock(status_code=200, text=test_html)
     website = WebSiteInformation("http://google.com")
     website.get_website_information()
     assert "Test" == website.title
     assert isinstance(website.links, dict)
     assert "https://www.google.com/html/" in website.links["internal"]
     assert isinstance(website.headers, dict)
     assert "h2" in website.headers
     assert 1 == website.headers["h2"]
Beispiel #4
0
 def test_from_json(self, mock_requests):
     test_html = '<!DOCTYPE html><html><head><title>Test</title></head><body><h2>HTML Links</h2><p><a href="https://www.google.com/html/">This will go to google</a></p></body></html>'
     mock_requests.return_value = Mock(status_code=200, text=test_html)
     website_from_json = WebSiteInformation.from_json(
         '{"url": "http://google.com", "title": null, "headers": {}, "links": {"internal": [], "external": [], "unreachable": []}, "has_login": null}'
     )
     website = WebSiteInformation("http://google.com")
     assert website_from_json.url == website.url
     assert website_from_json.title == website.title
     assert website_from_json.has_login == website.has_login
     assert website_from_json.headers == website.headers
     assert website_from_json.links == website.links
Beispiel #5
0
 def test__get_all_links(self, mock_requests):
     mock_requests.return_value = Mock(status_code=200)
     html = "<html><a href='./test'></a><a href='http://google.com'></a><a></a></html>"
     soup = BeautifulSoup(html, "html.parser")
     links = WebSiteInformation._get_all_links('http://test.com', soup)
     assert dict is type(links)
     assert "http://google.com" in links["external"]
     assert "http://test.com/test" in links["internal"]
Beispiel #6
0
 def test__format_href(self):
     assert "http://google.com.br/test" == WebSiteInformation._format_href(
         "http://google.com.br", "../../../../../test")
     assert "http://rte.ie" == WebSiteInformation._format_href(
         "http://google.com.br", "http://rte.ie")
Beispiel #7
0
 def test__get_all_headers(self):
     html = "<html><h1>test</h1><h2>test2</h2></html>"
     soup = BeautifulSoup(html, "html.parser")
     headers = WebSiteInformation._get_all_headers(soup)
     assert 1 == headers["h1"]
     assert 1 == headers["h2"]
Beispiel #8
0
 def test__get_website_information_fails_for_invalid_urls(self):
     with self.assertRaises(expected_exception=InvalidURLException):
         WebSiteInformation(url='invalid_url')
Beispiel #9
0
 def test_WebSiteInformation(self):
     WebSiteInformation("http://google.com")
     WebSiteInformation("google.com")
Beispiel #10
0
 def test__is_valid_url(self):
     assert WebSiteInformation.is_valid_url("google.com") is False
     assert WebSiteInformation.is_valid_url("http://google.com") is True
     assert WebSiteInformation.is_valid_url("invalid_website.com") is False
     assert WebSiteInformation.is_valid_url(
         "http://invalid_website.com") is False
Beispiel #11
0
    def get(self, url):
        website = WebSiteInformation(url)
        website.get_website_information()

        return website.to_dict()