Exemple #1
0
 def test_no_query_full_address(self):
     cleaned_url = CrawlerState.clean_url(
         "http://www.domain.com/index.html;someparam=some;otherparam=other?query1"
         "=val1&query2=val2#frag", True)
     self.assertEqual(
         cleaned_url,
         "http://www.domain.com/index.html;someparam=some;otherparam=other")
Exemple #2
0
 def test_no_query_with_fragment_query_and_ending_slash(self):
     cleaned_url = CrawlerState.clean_url(
         "http://www.domain.com/?query=value#fragment", True)
     self.assertEqual(cleaned_url, "http://www.domain.com")
Exemple #3
0
 def test_no_query_with_queries(self):
     cleaned_url = CrawlerState.clean_url(
         "http://www.domain.com/?query=value&query2=value2", True)
     self.assertEqual(cleaned_url, "http://www.domain.com")
Exemple #4
0
 def test_with_fragment_and_ending_slash(self):
     cleaned_url = CrawlerState.clean_url("http://www.domain.com/#fragment")
     self.assertEqual(cleaned_url, "http://www.domain.com")
Exemple #5
0
 def test_with_fragment(self):
     cleaned_url = CrawlerState.clean_url("http://www.domain.com#fragment")
     self.assertEqual(cleaned_url, "http://www.domain.com")
Exemple #6
0
 def test_with_query_and_ending_slash(self):
     cleaned_url = CrawlerState.clean_url(
         "http://www.domain.com/?query=value")
     self.assertEqual(cleaned_url, "http://www.domain.com?query=value")
Exemple #7
0
 def test_with_way_too_much_ending_slash(self):
     cleaned_url = CrawlerState.clean_url("http://www.domain.com//////")
     self.assertEqual(cleaned_url, "http://www.domain.com")
Exemple #8
0
 def test_simple_path(self):
     cleaned_url = CrawlerState.clean_url("http://www.domain.com")
     self.assertEqual(cleaned_url, "http://www.domain.com")