def test_update_progress(azure):
    """
    Upload a 32 bytes file in chunks of 8 and test that the progress is incrementally
    updated.
    """
    calls = []

    def recording_callback(progress, total):
        calls.append((progress, total))

    def transfer(adlfs,
                 src,
                 dst,
                 offset,
                 size,
                 blocksize,
                 buffersize,
                 shutdown_event=None):
        return size, None

    client = ADLTransferClient(azure,
                               transfer=transfer,
                               chunksize=8,
                               chunked=True,
                               progress_callback=recording_callback)

    client.submit('foo', AzureDLPath('bar'), 32)
    client.run()

    assert calls == [(8, 32), (16, 32), (24, 32), (32, 32)]
def test_merge(azure):

    calls = []

    def merge(adlfs, outfile, files, shutdown_event=None, overwrite=False):
        calls.append(files)

    def transfer(adlfs,
                 src,
                 dst,
                 offset,
                 size,
                 blocksize,
                 buffersize,
                 shutdown_event=None):
        return size, None

    class XLoaderMock(object):
        _overwrite = False

    file_size = 32
    chunk_size = 8
    client = ADLTransferClient(azure,
                               parent=XLoaderMock(),
                               transfer=transfer,
                               merge=merge,
                               chunksize=chunk_size,
                               chunked=True)

    client.submit('foo', AzureDLPath('bar'), file_size)
    client.run()

    assert len(calls[0]) == file_size / chunk_size
Exemple #3
0
def mock_client(adl, nthreads):
    def transfer(adlfs, src, dst, offset, size, buffersize, blocksize, shutdown_event=None):
        pass

    def merge(adlfs, outfile, files, shutdown_event=None):
        pass

    return ADLTransferClient(
        adl,
        'foo',
        transfer=transfer,
        merge=merge,
        nthreads=nthreads)
def test_upload_one_empty_file(local_files, azure):
    with azure_teardown(azure):
        bigfile, littlefile, emptyfile, a, b, c = local_files

        # transfer client w/ deterministic temporary directory
        from azure.datalake.store.multithread import put_chunk
        client = ADLTransferClient(azure, transfer=put_chunk,
                                   unique_temporary=False)

        # single chunk, empty file
        up = ADLUploader(azure, test_dir / 'emptyfile', emptyfile, nthreads=1,
                         overwrite=True)
        assert azure.info(test_dir / 'emptyfile')['length'] == 0
        azure.rm(test_dir / 'emptyfile')
def test_upload_single_file_in_dir(tempdir, azure):
    with azure_teardown(azure):
        lpath_dir = tempdir
        lfilename = os.path.join(lpath_dir, 'singlefile')
        with open(lfilename, 'wb') as f:
            f.write(b'0123456789')

        # transfer client w/ deterministic temporary directory
        from azure.datalake.store.multithread import put_chunk
        client = ADLTransferClient(azure, transfer=put_chunk,
                                   unique_temporary=False)

        up = ADLUploader(azure, test_dir / 'singlefiledir', lpath_dir, nthreads=1,
                         overwrite=True)
        assert azure.info(test_dir / 'singlefiledir' / 'singlefile')['length'] == 10
        azure.rm(test_dir / 'singlefiledir' / 'singlefile')
def test_submit_and_run(azure):
    def transfer(adlfs,
                 src,
                 dst,
                 offset,
                 size,
                 blocksize,
                 buffersize,
                 shutdown_event=None):
        time.sleep(0.1)
        return size, None

    client = ADLTransferClient(azure,
                               transfer=transfer,
                               chunksize=8,
                               chunked=False)

    client.submit('foo', 'bar', 16)
    client.submit('abc', '123', 8)

    nfiles = len(client.progress)
    assert nfiles == 2
    assert len([client.progress[i].chunks for i in range(nfiles)])

    assert all([client.progress[i].state == 'pending' for i in range(nfiles)])
    assert all([
        chunk.state == 'pending' for f in client.progress for chunk in f.chunks
    ])

    expected = {('bar', 0), ('bar', 8), ('123', 0)}
    assert {(chunk.name, chunk.offset)
            for f in client.progress for chunk in f.chunks} == expected

    client.run()

    assert all([client.progress[i].state == 'finished' for i in range(nfiles)])
    assert all([
        chunk.state == 'finished' for f in client.progress
        for chunk in f.chunks
    ])
    assert all([
        chunk.expected == chunk.actual for f in client.progress
        for chunk in f.chunks
    ])
Exemple #7
0
def test_upload_empty_folder(tempdir, azure):
    with azure_teardown(azure):
        os.mkdir(os.path.join(tempdir, "dir1"))
        os.mkdir(os.path.join(tempdir, "dir1", "b"))

        with open(os.path.join(tempdir, "dir1", "file.txt"), 'wb') as f:
            f.write(b'0123456789')

        # transfer client w/ deterministic temporary directory
        from azure.datalake.store.multithread import put_chunk
        client = ADLTransferClient(azure, transfer=put_chunk,
                                   unique_temporary=False)

        # single chunk, empty file
        up = ADLUploader(azure, test_dir / "dir1", os.path.join(tempdir, "dir1") , nthreads=1,
                         overwrite=True)
        assert azure.info(test_dir / "dir1" /"b")['type'] == 'DIRECTORY'
        azure.rm(test_dir / "dir1", recursive=True)
def test_shutdown(azure):
    def transfer(adlfs,
                 src,
                 dst,
                 offset,
                 size,
                 blocksize,
                 buffersize,
                 retries=5,
                 shutdown_event=None):
        while shutdown_event and not shutdown_event.is_set():
            time.sleep(0.1)
        return size, None

    client = ADLTransferClient(azure,
                               transfer=transfer,
                               chunksize=1,
                               chunked=False)
    client.submit('foo', 'bar', 16)
    client.run(monitor=False)
    client.shutdown()

    assert client.progress[0].state == 'finished'
def test_upload_one(local_files, azure):
    with azure_teardown(azure):
        bigfile, littlefile, emptyfile, a, b, c = local_files

        # transfer client w/ deterministic temporary directory
        from azure.datalake.store.multithread import put_chunk
        client = ADLTransferClient(azure, transfer=put_chunk,
                                   unique_temporary=False)

        # single chunk
        up = ADLUploader(azure, test_dir / 'littlefile', littlefile, nthreads=1,
                         overwrite=True)
        assert azure.info(test_dir / 'littlefile')['length'] == 10

        # multiple chunks, one thread
        size = 10000
        up = ADLUploader(azure, test_dir / 'bigfile', bigfile, nthreads=1,
                         chunksize=size//5, client=client, run=False,
                         overwrite=True)
        up.run()

        assert azure.info(test_dir / 'bigfile')['length'] == size

        azure.rm(test_dir / 'bigfile')