def test_write_manifest_file_wrong_algorithm(self, mock_session, tmpdir):
     """Test writing a manifest file for an algorithm we don't have."""
     mock_session.return_value.get.return_value = MockResponse200()
     sub_dir = 'downloads'
     dest = tmpdir.mkdir(sub_dir)
     downloads = wc.Downloads(WASAPI_URL, download=False, destination=str(dest))
     with pytest.raises(wc.WASAPIManifestError):
         downloads.write_manifest_file('sha2')
 def test_populate_downloads_urls(self, mock_session):
     """Test urls is populated with first location per file."""
     mock_session.return_value.get.return_value = MockResponse200()
     downloads = wc.Downloads(WASAPI_URL, download=False)
     assert len(downloads.urls) == 2
     for url in ['https://warcs.example.com/webdatafile/AIT-JOB256123-00000.warc.gz',
                 'https://warcs.example.com/webdatafile/AIT-JOB256118-00000.warc.gz']:
         assert url in downloads.urls
 def test_populate_downloads_generate_manifest(self, mock_session, tmpdir):
     """Test checksum files are created for all algorithms."""
     mock_session.return_value.get.return_value = MockResponse200()
     sub_dir = 'downloads'
     dest = tmpdir.mkdir(sub_dir)
     downloads = wc.Downloads(WASAPI_URL, download=False, destination=str(dest))
     downloads.generate_manifests()
     sub_dir_contents = dest.listdir()
     assert len(sub_dir_contents) == 2
     for name in ['manifest-md5.txt', 'manifest-sha1.txt']:
         assert dest.join(name) in sub_dir_contents
Ejemplo n.º 4
0
 def test_populate_downloads(self, mock_session):
     """Test a queue is returned with expected data."""
     mock_session.return_value.get.return_value = MockResponse200()
     downloads = wc.Downloads(WASAPI_URL, download=True)
     j_queue = downloads.get_q
     assert j_queue.qsize() == 2
     # Drain the JoinableQueue to avoid BrokenPipeError.
     # There could be a better way to handle this...
     while j_queue.qsize():
         q_item = j_queue.get()
         assert isinstance(q_item, wc.DataFile)
         j_queue.task_done()
 def test_write_manifest_file(self, mock_session, tmpdir):
     """Test a manifest file is written for the given algorithm."""
     mock_session.return_value.get.return_value = MockResponse200()
     sub_dir = 'downloads'
     dest = tmpdir.mkdir(sub_dir)
     downloads = wc.Downloads(WASAPI_URL, download=False, destination=str(dest))
     downloads.write_manifest_file('sha1')
     assert len(dest.listdir()) == 1
     txt = (
         'edef6bca652d75d0587ef411d5f028335341b074  {p}{s}AIT-JOB256123-00000.warc.gz\n'
         '54a466421471ef7d8cb4d6bbfb85afd76022a378  {p}{s}ARCHIVEIT-JOB256118-00000.warc.gz\n'
     )
     assert dest.join('manifest-sha1.txt').read() == txt.format(p=dest, s=os.sep)
 def test_populate_downloads_manifest(self, mock_session):
     """Test the checksums dict is populated."""
     mock_session.return_value.get.return_value = MockResponse200()
     downloads = wc.Downloads(WASAPI_URL, download=False)
     assert len(downloads.checksums)
     assert downloads.checksums['md5'] == [('61f818912d1f39bc9dd15d4b87461110',
                                            'AIT-JOB256123-00000.warc.gz'),
                                           ('748120fd9672b22df5942bb44e9cde81',
                                            'ARCHIVEIT-JOB256118-00000.warc.gz')]
     assert downloads.checksums['sha1'] == [('edef6bca652d75d0587ef411d5f028335341b074',
                                             'AIT-JOB256123-00000.warc.gz'),
                                            ('54a466421471ef7d8cb4d6bbfb85afd76022a378',
                                             'ARCHIVEIT-JOB256118-00000.warc.gz')]
Ejemplo n.º 7
0
 def test_populate_downloads_multi_page(self, mock_session):
     """Test the queue returned for multiple results pages."""
     # Give the first of our two page responses a next page URL.
     p1 = WASAPI_TEXT.replace('"next":null', '"next":"http://test?page=2"')
     responses = [MockResponse200(p1), MockResponse200()]
     mock_session.return_value.get.side_effect = responses
     downloads = wc.Downloads(WASAPI_URL, download=True)
     j_queue = downloads.get_q
     assert j_queue.qsize() == 4
     # Drain the JoinableQueue to avoid BrokenPipeError.
     while j_queue.qsize():
         q_item = j_queue.get()
         assert isinstance(q_item, wc.DataFile)
         j_queue.task_done()
 def test_populate_downloads_no_get_q(self, mock_session):
     """Test download=False prevents get_q attribute existing."""
     mock_session.return_value.get.return_value = MockResponse200()
     downloads = wc.Downloads(WASAPI_URL, download=False)
     with pytest.raises(AttributeError):
         getattr(downloads, 'get_q')