def test_resume_large_file_upload(self): def wrapped_write(*args, **kwargs): data = args[1] # Exit only on last block if len(data) < arvados.config.KEEP_BLOCK_SIZE: raise SystemExit("Simulated error") return self.arvfile_write(*args, **kwargs) with mock.patch('arvados.arvfile.ArvadosFileWriter.write', autospec=True) as mocked_write: mocked_write.side_effect = wrapped_write writer = arv_put.ArvPutUploadJob([self.large_file_name], replication_desired=1) with self.assertRaises(SystemExit): writer.start(save_collection=False) # Confirm that the file was partially uploaded self.assertGreater(writer.bytes_written, 0) self.assertLess(writer.bytes_written, os.path.getsize(self.large_file_name)) # Retry the upload writer2 = arv_put.ArvPutUploadJob([self.large_file_name], replication_desired=1) writer2.start(save_collection=False) self.assertEqual( writer.bytes_written + writer2.bytes_written - writer2.bytes_skipped, os.path.getsize(self.large_file_name)) writer2.destroy_cache()
def test_no_resume_when_no_cache(self): def wrapped_write(*args, **kwargs): data = args[1] # Exit only on last block if len(data) < arvados.config.KEEP_BLOCK_SIZE: # Simulate a checkpoint before quitting. self.writer._update() raise SystemExit("Simulated error") return self.arvfile_write(*args, **kwargs) with mock.patch('arvados.arvfile.ArvadosFileWriter.write', autospec=True) as mocked_write: mocked_write.side_effect = wrapped_write writer = arv_put.ArvPutUploadJob([self.large_file_name], replication_desired=1) # We'll be accessing from inside the wrapper self.writer = writer with self.assertRaises(SystemExit): writer.start(save_collection=False) # Confirm that the file was partially uploaded self.assertGreater(writer.bytes_written, 0) self.assertLess(writer.bytes_written, os.path.getsize(self.large_file_name)) # Retry the upload, this time without cache usage writer2 = arv_put.ArvPutUploadJob([self.large_file_name], replication_desired=1, resume=False, use_cache=False) writer2.start(save_collection=False) self.assertEqual(writer2.bytes_skipped, 0) self.assertEqual(writer2.bytes_written, os.path.getsize(self.large_file_name)) writer2.destroy_cache() del (self.writer)
def test_expected_bytes_for_tree(self): tree = self.make_tmpdir() shutil.copyfile(__file__, os.path.join(tree, 'one')) shutil.copyfile(__file__, os.path.join(tree, 'two')) writer = arv_put.ArvPutUploadJob([tree]) self.assertEqual(self.TEST_SIZE * 2, writer.bytes_expected) writer = arv_put.ArvPutUploadJob([tree, __file__]) self.assertEqual(self.TEST_SIZE * 3, writer.bytes_expected)
def test_writer_works_with_cache(self): with tempfile.NamedTemporaryFile() as f: f.write('foo') f.flush() cwriter = arv_put.ArvPutUploadJob([f.name]) cwriter.start(save_collection=False) self.assertEqual(3, cwriter.bytes_written - cwriter.bytes_skipped) # Don't destroy the cache, and start another upload cwriter_new = arv_put.ArvPutUploadJob([f.name]) cwriter_new.start(save_collection=False) cwriter_new.destroy_cache() self.assertEqual( 0, cwriter_new.bytes_written - cwriter_new.bytes_skipped)
def test_symlinks_are_not_followed_when_requested(self): cwriter = arv_put.ArvPutUploadJob([self.tempdir_with_symlink], follow_links=False) cwriter.start(save_collection=False) self.assertNotIn('linkeddir', cwriter.manifest_text()) self.assertNotIn('linkedfile', cwriter.manifest_text()) cwriter.destroy_cache()
def test_progress_reporting(self): with tempfile.NamedTemporaryFile() as f: f.write(b'foo') f.flush() for expect_count in (None, 8): progression, reporter = self.make_progress_tester() cwriter = arv_put.ArvPutUploadJob([f.name], reporter=reporter) cwriter.bytes_expected = expect_count cwriter.start(save_collection=False) cwriter.destroy_cache() self.assertIn((3, expect_count), progression)
def test_dry_run_feature(self): def wrapped_write(*args, **kwargs): data = args[1] # Exit only on last block if len(data) < arvados.config.KEEP_BLOCK_SIZE: # Simulate a checkpoint before quitting. self.writer._update() raise SystemExit("Simulated error") return self.arvfile_write(*args, **kwargs) with mock.patch('arvados.arvfile.ArvadosFileWriter.write', autospec=True) as mocked_write: mocked_write.side_effect = wrapped_write writer = arv_put.ArvPutUploadJob([self.large_file_name], replication_desired=1) # We'll be accessing from inside the wrapper self.writer = writer with self.assertRaises(SystemExit): writer.start(save_collection=False) # Confirm that the file was partially uploaded self.assertGreater(writer.bytes_written, 0) self.assertLess(writer.bytes_written, os.path.getsize(self.large_file_name)) # Retry the upload using dry_run to check if there is a pending upload writer2 = arv_put.ArvPutUploadJob([self.large_file_name], replication_desired=1, dry_run=True) with self.assertRaises(arv_put.ArvPutUploadIsPending): writer2.start(save_collection=False) # Complete the pending upload writer3 = arv_put.ArvPutUploadJob([self.large_file_name], replication_desired=1) writer3.start(save_collection=False) # Confirm there's no pending upload with dry_run=True writer4 = arv_put.ArvPutUploadJob([self.large_file_name], replication_desired=1, dry_run=True) with self.assertRaises(arv_put.ArvPutUploadNotPending): writer4.start(save_collection=False) writer4.destroy_cache() # Test obvious cases with self.assertRaises(arv_put.ArvPutUploadIsPending): arv_put.ArvPutUploadJob([self.large_file_name], replication_desired=1, dry_run=True, resume=False, use_cache=False) with self.assertRaises(arv_put.ArvPutUploadIsPending): arv_put.ArvPutUploadJob([self.large_file_name], replication_desired=1, dry_run=True, resume=False) del (self.writer)
def test_graceful_exit_while_repacking_small_blocks(self): def wrapped_commit(*args, **kwargs): raise SystemExit("Simulated error") with mock.patch('arvados.arvfile._BlockManager.commit_bufferblock', autospec=True) as mocked_commit: mocked_commit.side_effect = wrapped_commit # Upload a little more than 1 block, wrapped_commit will make the first block # commit to fail. # arv-put should not exit with an exception by trying to commit the collection # as it's in an inconsistent state. writer = arv_put.ArvPutUploadJob([self.small_files_dir], replication_desired=1) try: with self.assertRaises(SystemExit): writer.start(save_collection=False) except arvados.arvfile.UnownedBlockError: self.fail( "arv-put command is trying to use a corrupted BlockManager. See https://dev.arvados.org/issues/11002" ) writer.destroy_cache()
def test_expected_bytes_for_device(self): writer = arv_put.ArvPutUploadJob(['/dev/null']) self.assertIsNone(writer.bytes_expected) writer = arv_put.ArvPutUploadJob([__file__, '/dev/null']) self.assertIsNone(writer.bytes_expected)
def test_expected_bytes_for_file(self): writer = arv_put.ArvPutUploadJob([__file__]) self.assertEqual(self.TEST_SIZE, writer.bytes_expected)
def test_writer_upload_directory(self): cwriter = arv_put.ArvPutUploadJob([self.tempdir]) cwriter.start(save_collection=False) cwriter.destroy_cache() self.assertEqual(1024 * (1 + 2 + 3 + 4 + 5), cwriter.bytes_written)
def test_writer_works_without_cache(self): cwriter = arv_put.ArvPutUploadJob(['/dev/null'], resume=False) cwriter.start(save_collection=False) self.assertEqual(". d41d8cd98f00b204e9800998ecf8427e+0 0:0:null\n", cwriter.manifest_text())
def test_passing_nonexistant_path_raise_exception(self): uuid_str = str(uuid.uuid4()) with self.assertRaises(arv_put.PathDoesNotExistError): cwriter = arv_put.ArvPutUploadJob( ["/this/path/does/not/exist/{}".format(uuid_str)])
def test_symlinks_are_followed_by_default(self): cwriter = arv_put.ArvPutUploadJob([self.tempdir_with_symlink]) cwriter.start(save_collection=False) self.assertIn('linkeddir', cwriter.manifest_text()) self.assertIn('linkedfile', cwriter.manifest_text()) cwriter.destroy_cache()