def test_update_progress(azure): """ Upload a 32 bytes file in chunks of 8 and test that the progress is incrementally updated. """ calls = [] def recording_callback(progress, total): calls.append((progress, total)) def transfer(adlfs, src, dst, offset, size, blocksize, buffersize, shutdown_event=None): return size, None client = ADLTransferClient(azure, transfer=transfer, chunksize=8, chunked=True, progress_callback=recording_callback) client.submit('foo', AzureDLPath('bar'), 32) client.run() assert calls == [(8, 32), (16, 32), (24, 32), (32, 32)]
def test_merge(azure): calls = [] def merge(adlfs, outfile, files, shutdown_event=None, overwrite=False): calls.append(files) def transfer(adlfs, src, dst, offset, size, blocksize, buffersize, shutdown_event=None): return size, None class XLoaderMock(object): _overwrite = False file_size = 32 chunk_size = 8 client = ADLTransferClient(azure, parent=XLoaderMock(), transfer=transfer, merge=merge, chunksize=chunk_size, chunked=True) client.submit('foo', AzureDLPath('bar'), file_size) client.run() assert len(calls[0]) == file_size / chunk_size
def mock_client(adl, nthreads): def transfer(adlfs, src, dst, offset, size, buffersize, blocksize, shutdown_event=None): pass def merge(adlfs, outfile, files, shutdown_event=None): pass return ADLTransferClient( adl, 'foo', transfer=transfer, merge=merge, nthreads=nthreads)
def test_upload_one_empty_file(local_files, azure): with azure_teardown(azure): bigfile, littlefile, emptyfile, a, b, c = local_files # transfer client w/ deterministic temporary directory from azure.datalake.store.multithread import put_chunk client = ADLTransferClient(azure, transfer=put_chunk, unique_temporary=False) # single chunk, empty file up = ADLUploader(azure, test_dir / 'emptyfile', emptyfile, nthreads=1, overwrite=True) assert azure.info(test_dir / 'emptyfile')['length'] == 0 azure.rm(test_dir / 'emptyfile')
def test_upload_single_file_in_dir(tempdir, azure): with azure_teardown(azure): lpath_dir = tempdir lfilename = os.path.join(lpath_dir, 'singlefile') with open(lfilename, 'wb') as f: f.write(b'0123456789') # transfer client w/ deterministic temporary directory from azure.datalake.store.multithread import put_chunk client = ADLTransferClient(azure, transfer=put_chunk, unique_temporary=False) up = ADLUploader(azure, test_dir / 'singlefiledir', lpath_dir, nthreads=1, overwrite=True) assert azure.info(test_dir / 'singlefiledir' / 'singlefile')['length'] == 10 azure.rm(test_dir / 'singlefiledir' / 'singlefile')
def test_submit_and_run(azure): def transfer(adlfs, src, dst, offset, size, blocksize, buffersize, shutdown_event=None): time.sleep(0.1) return size, None client = ADLTransferClient(azure, transfer=transfer, chunksize=8, chunked=False) client.submit('foo', 'bar', 16) client.submit('abc', '123', 8) nfiles = len(client.progress) assert nfiles == 2 assert len([client.progress[i].chunks for i in range(nfiles)]) assert all([client.progress[i].state == 'pending' for i in range(nfiles)]) assert all([ chunk.state == 'pending' for f in client.progress for chunk in f.chunks ]) expected = {('bar', 0), ('bar', 8), ('123', 0)} assert {(chunk.name, chunk.offset) for f in client.progress for chunk in f.chunks} == expected client.run() assert all([client.progress[i].state == 'finished' for i in range(nfiles)]) assert all([ chunk.state == 'finished' for f in client.progress for chunk in f.chunks ]) assert all([ chunk.expected == chunk.actual for f in client.progress for chunk in f.chunks ])
def test_upload_empty_folder(tempdir, azure): with azure_teardown(azure): os.mkdir(os.path.join(tempdir, "dir1")) os.mkdir(os.path.join(tempdir, "dir1", "b")) with open(os.path.join(tempdir, "dir1", "file.txt"), 'wb') as f: f.write(b'0123456789') # transfer client w/ deterministic temporary directory from azure.datalake.store.multithread import put_chunk client = ADLTransferClient(azure, transfer=put_chunk, unique_temporary=False) # single chunk, empty file up = ADLUploader(azure, test_dir / "dir1", os.path.join(tempdir, "dir1") , nthreads=1, overwrite=True) assert azure.info(test_dir / "dir1" /"b")['type'] == 'DIRECTORY' azure.rm(test_dir / "dir1", recursive=True)
def test_shutdown(azure): def transfer(adlfs, src, dst, offset, size, blocksize, buffersize, retries=5, shutdown_event=None): while shutdown_event and not shutdown_event.is_set(): time.sleep(0.1) return size, None client = ADLTransferClient(azure, transfer=transfer, chunksize=1, chunked=False) client.submit('foo', 'bar', 16) client.run(monitor=False) client.shutdown() assert client.progress[0].state == 'finished'
def test_upload_one(local_files, azure): with azure_teardown(azure): bigfile, littlefile, emptyfile, a, b, c = local_files # transfer client w/ deterministic temporary directory from azure.datalake.store.multithread import put_chunk client = ADLTransferClient(azure, transfer=put_chunk, unique_temporary=False) # single chunk up = ADLUploader(azure, test_dir / 'littlefile', littlefile, nthreads=1, overwrite=True) assert azure.info(test_dir / 'littlefile')['length'] == 10 # multiple chunks, one thread size = 10000 up = ADLUploader(azure, test_dir / 'bigfile', bigfile, nthreads=1, chunksize=size//5, client=client, run=False, overwrite=True) up.run() assert azure.info(test_dir / 'bigfile')['length'] == size azure.rm(test_dir / 'bigfile')