예제 #1
0
 def test_recursively_finds_http_links(self, mock_get):
     links_1 = [
         "http://cs.ucla.edu/a", "http://cs.ucla.edu/b",
         "http://cs.ucla.edu/c"
     ]
     links_2 = [
         "http://cs.ucla.edu/d", "http://cs.ucla.edu/e",
         "http://cs.ucla.edu/f"
     ]
     links_3 = [
         "http://cs.ucla.edu/g", "http://cs.ucla.edu/h",
         "http://cs.ucla.edu/i"
     ]
     links_4 = [
         "http://cs.ucla.edu/j", "http://cs.ucla.edu/k",
         "http://cs.ucla.edu/l"
     ]
     all_links = (links_1, links_2, links_3, links_4)
     all_htmls = [make_page_links(*links) for links in all_links]
     type(mock_get.return_value).text = mock.PropertyMock(
         side_effect=all_htmls)
     url = "http://cs.ucla.edu"
     links = get_page_links(url, max_depth=2)
     assert len(links) == sum(map(len, all_links))
     assert set(links) == set(sum(all_links, []))
예제 #2
0
 def test_returns_unique_links_in_order(self, mock_get):
     http_links = [
         "http://cs.ucla.edu/a", "http://cs.ucla.edu/b",
         "http://cs.ucla.edu/c"
     ]
     html = make_page_links(*http_links, *http_links, *http_links)
     mock_get.return_value.text = html
     url = "http://cs.ucla.edu"
     links = get_page_links(url)
     assert links == http_links
예제 #3
0
 def test_ignores_links_to_another_domain(self, mock_get):
     ucla_links = [
         "http://cs.ucla.edu/a", "http://cs.ucla.edu/b",
         "http://cs.ucla.edu/c"
     ]
     non_ucla_links = [
         "http://ucla.edu", "http://seas.ucla.edu", "http://stanford.edu"
     ]
     html = make_page_links(*ucla_links, *non_ucla_links)
     mock_get.return_value.text = html
     url = "http://cs.ucla.edu"
     links = get_page_links(url)
     assert links == ucla_links
예제 #4
0
 def test_ignores_non_http_links(self, mock_get):
     http_links = [
         "http://cs.ucla.edu/a", "http://cs.ucla.edu/b",
         "http://cs.ucla.edu/c"
     ]
     non_http_links = [
         "ftp://b.com/test",
         "rss://a/feed",
         "data:image/gif;base64,R0lGODlhEAAJAIAAAP///wAAACH5BAEAAAAALAAAAAAQAAkAAAIKhI+py+0Po5yUFQA7",
     ]
     html = make_page_links(*http_links, *non_http_links)
     mock_get.return_value.text = html
     url = "http://cs.ucla.edu"
     links = get_page_links(url)
     assert links == http_links
예제 #5
0
 def test_returns_empty_if_request_status_not_200(self, mock_get):
     mock_get.return_value.raise_for_status.side_effect = requests.exceptions.RequestException(
     )
     url = "http://cs.ucla.edu"
     links = get_page_links(url)
     assert not links
예제 #6
0
 def test_returns_empty_array_when_max_depth_reached(self):
     url = "http://cs.ucla.edu"
     links = get_page_links(url, max_depth=0)
     assert not links
예제 #7
0
 def test_returns_empty_if_invalid_html_received(self, mock_get):
     mock_get.return_value.text = "<html><body></html>"
     url = "http://cs.ucla.edu"
     links = get_page_links(url)
     assert not links
예제 #8
0
 def test_raises_if_nonrequest_related_exception_occurs(self, mock_get):
     mock_get.side_effect = RuntimeError()
     url = "http://cs.ucla.edu"
     with pytest.raises(RuntimeError):
         get_page_links(url)
예제 #9
0
 def test_returns_empty_if_request_fails(self, mock_get):
     mock_get.side_effect = requests.exceptions.RequestException()
     url = "http://cs.ucla.edu"
     links = get_page_links(url)
     assert not links