def test_question_mark_are_removed_from_target(self): raw_urls = [ 'http://www.test.de/test_2', 'http://www.test.de/test_2?something', ] cleaned_urls = get_clean_urls(raw_urls) print(cleaned_urls) assert cleaned_urls == ['https://www.test.de/test_2']
def test_anchors_are_removed_from_target(self): raw_urls = [ 'http://www.test.de/test_2', 'http://www.test.de/test_2#anchor', ] cleaned_urls = get_clean_urls(raw_urls) print(cleaned_urls) assert cleaned_urls == ['https://www.test.de/test_2']
def test_get_clean_tasks(self): raw_urls = [ 'http://www.test.de/test_1', 'https://www.test.de/test_1', 'http://www.test.de/test_2', 'http://test.de/test_2', 'http://www.test.de/test_3#anchor', 'http://www.test.de/test_4?something', ] expected_cleaned_urls = [ 'https://www.test.de/test_1', 'https://www.test.de/test_2', 'https://www.test.de/test_3', 'https://www.test.de/test_4', ] computed_cleaned_urls = get_clean_urls(raw_urls) print(computed_cleaned_urls) assert len(computed_cleaned_urls) == len(expected_cleaned_urls) assert all([i in expected_cleaned_urls for i in computed_cleaned_urls])
def test_anchor_is_removed(self): raw_urls = ['http://www.test.de/test_2#ANCHOR'] cleaned_urls = get_clean_urls(raw_urls) print(cleaned_urls) assert cleaned_urls == ['https://www.test.de/test_2']
def test_duplicate_is_ignored(self): raw_urls = ['http://www.test.de/test_2', 'http://www.test.de/test_2'] cleaned_urls = get_clean_urls(raw_urls) print(cleaned_urls) assert cleaned_urls == ['https://www.test.de/test_2']
def test_with_www_and_without_www_articles_result_in_only_one_target(self): raw_urls = ['https://www.test.de/test_1', 'https://test.de/test_1'] cleaned_urls = get_clean_urls(raw_urls) print(cleaned_urls) assert cleaned_urls == ['https://www.test.de/test_1']