Exemple #1
0
 def test_urls_storage_write_new_urls(self):
     base_url = 'http://podcast-site.com/'
     urls_storage = UrlsStorage(base_url)
     new_urls = [
         'http://podcast-site.com/page/1',
         'http://podcast-site.com/page/2',
         'http://podcast-site.com/page/3',
     ]
     urls_storage.write(new_urls)
     assert urls_storage == [
         'http://podcast-site.com/',
         'http://podcast-site.com/page/1',
         'http://podcast-site.com/page/2',
         'http://podcast-site.com/page/3',
     ]
Exemple #2
0
def build_kwargs_dark_keeper(base_url_raw, mongo_uri_raw):
    return dict(
        http_client=HttpClient(
            delay=0,
            user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
            'AppleWebKit/537.36 (KHTML, like Gecko) '
            'Chrome/81.0.4044.138 Safari/537.36 OPR/68.0.3618.125',
        ),
        urls_storage=UrlsStorage(base_url=base_url_raw),
        data_storage=DataStorage(),
        export_mongo=ExportMongo(mongo_uri=mongo_uri_raw),
    )
Exemple #3
0
        data = []
        for post_item in content.get_block_items(
                '.posts-list .posts-list-item'):
            post_data = dict(
                title=post_item.parse_text('.number-title'),
                desc=post_item.parse_text('.post-podcast-content'),
                mp3=post_item.parse_attr('.post-podcast-content audio', 'src'),
            )

            if post_data['title'] and post_data['mp3']:
                data.append(post_data)

        return data


if __name__ == '__main__':
    pk = DarkKeeper(
        http_client=HttpClient(
            delay=2,
            user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
            'AppleWebKit/537.36 (KHTML, like Gecko) '
            'Chrome/81.0.4044.138 Safari/537.36 OPR/68.0.3618.125',
        ),
        parser=PodcastParser(),
        urls_storage=UrlsStorage(base_url='https://radio-t.com/'),
        data_storage=DataStorage(),
        export_mongo=ExportMongo(
            mongo_uri='mongodb://localhost/podcasts.radio-t.com'),
    )
    pk.run()
Exemple #4
0
 def test_urls_storage(self):
     base_url = 'http://podcast-site.com/'
     urls_storage = UrlsStorage(base_url)
     assert urls_storage == ['http://podcast-site.com/']
Exemple #5
0
 def test_urls_storage_write_new_urls_blank_value(self):
     base_url = 'http://podcast-site.com/'
     urls_storage = UrlsStorage(base_url)
     new_urls = ['', None]
     urls_storage.write(new_urls)
     assert urls_storage == ['http://podcast-site.com/']
Exemple #6
0
 def test_urls_storage_write_validation_wrong_url(self):
     base_url = 'http://podcast-site.com/'
     urls_storage = UrlsStorage(base_url)
     for url in ['wrong url', '123 456 789', 'test url for validation']:
         urls_storage.write(url)
     assert urls_storage == ['http://podcast-site.com/']
Exemple #7
0
 def test_urls_storage_write_validation_only_domain(self):
     base_url = 'http://podcast-site.com/'
     urls_storage = UrlsStorage(base_url)
     urls_storage.write('wrong-url.ru')
     assert urls_storage == ['http://podcast-site.com/']
Exemple #8
0
 def test_urls_storage_write_validation_blank_url(self):
     base_url = 'http://podcast-site.com/'
     urls_storage = UrlsStorage(base_url)
     urls_storage.write('')
     assert urls_storage == ['http://podcast-site.com/']
Exemple #9
0
 def test_urls_storage_validation_only_domain(self):
     base_url = 'wrong-url.ru'
     urls_storage = UrlsStorage(base_url)
     assert urls_storage == []
Exemple #10
0
 def test_urls_storage_validation_wrong_url(self):
     for base_url in [
             'wrong url', '123 456 789', 'test url for validation'
     ]:
         urls_storage = UrlsStorage(base_url)
         assert urls_storage == []
Exemple #11
0
 def test_urls_storage_validation_blank_url(self):
     base_url = ''
     urls_storage = UrlsStorage(base_url)
     assert urls_storage == []
Exemple #12
0
 def test_urls_storage_no_base_url(self):
     with pytest.raises(TypeError) as e:
         UrlsStorage()
     assert str(
         e.value
     ) == '__init__() missing 1 required positional argument: \'base_url\''