def test_urls_storage_write_new_urls(self): base_url = 'http://podcast-site.com/' urls_storage = UrlsStorage(base_url) new_urls = [ 'http://podcast-site.com/page/1', 'http://podcast-site.com/page/2', 'http://podcast-site.com/page/3', ] urls_storage.write(new_urls) assert urls_storage == [ 'http://podcast-site.com/', 'http://podcast-site.com/page/1', 'http://podcast-site.com/page/2', 'http://podcast-site.com/page/3', ]
def build_kwargs_dark_keeper(base_url_raw, mongo_uri_raw): return dict( http_client=HttpClient( delay=0, user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) ' 'AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/81.0.4044.138 Safari/537.36 OPR/68.0.3618.125', ), urls_storage=UrlsStorage(base_url=base_url_raw), data_storage=DataStorage(), export_mongo=ExportMongo(mongo_uri=mongo_uri_raw), )
data = [] for post_item in content.get_block_items( '.posts-list .posts-list-item'): post_data = dict( title=post_item.parse_text('.number-title'), desc=post_item.parse_text('.post-podcast-content'), mp3=post_item.parse_attr('.post-podcast-content audio', 'src'), ) if post_data['title'] and post_data['mp3']: data.append(post_data) return data if __name__ == '__main__': pk = DarkKeeper( http_client=HttpClient( delay=2, user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) ' 'AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/81.0.4044.138 Safari/537.36 OPR/68.0.3618.125', ), parser=PodcastParser(), urls_storage=UrlsStorage(base_url='https://radio-t.com/'), data_storage=DataStorage(), export_mongo=ExportMongo( mongo_uri='mongodb://localhost/podcasts.radio-t.com'), ) pk.run()
def test_urls_storage(self): base_url = 'http://podcast-site.com/' urls_storage = UrlsStorage(base_url) assert urls_storage == ['http://podcast-site.com/']
def test_urls_storage_write_new_urls_blank_value(self): base_url = 'http://podcast-site.com/' urls_storage = UrlsStorage(base_url) new_urls = ['', None] urls_storage.write(new_urls) assert urls_storage == ['http://podcast-site.com/']
def test_urls_storage_write_validation_wrong_url(self): base_url = 'http://podcast-site.com/' urls_storage = UrlsStorage(base_url) for url in ['wrong url', '123 456 789', 'test url for validation']: urls_storage.write(url) assert urls_storage == ['http://podcast-site.com/']
def test_urls_storage_write_validation_only_domain(self): base_url = 'http://podcast-site.com/' urls_storage = UrlsStorage(base_url) urls_storage.write('wrong-url.ru') assert urls_storage == ['http://podcast-site.com/']
def test_urls_storage_write_validation_blank_url(self): base_url = 'http://podcast-site.com/' urls_storage = UrlsStorage(base_url) urls_storage.write('') assert urls_storage == ['http://podcast-site.com/']
def test_urls_storage_validation_only_domain(self): base_url = 'wrong-url.ru' urls_storage = UrlsStorage(base_url) assert urls_storage == []
def test_urls_storage_validation_wrong_url(self): for base_url in [ 'wrong url', '123 456 789', 'test url for validation' ]: urls_storage = UrlsStorage(base_url) assert urls_storage == []
def test_urls_storage_validation_blank_url(self): base_url = '' urls_storage = UrlsStorage(base_url) assert urls_storage == []
def test_urls_storage_no_base_url(self): with pytest.raises(TypeError) as e: UrlsStorage() assert str( e.value ) == '__init__() missing 1 required positional argument: \'base_url\''