Beispiel #1
0
def test_read_delimited_block(azure):
    fn = '/tmp/test/a'
    delimiter = b'\n'
    data = delimiter.join([b'123', b'456', b'789'])
    with azure_teardown(azure):
        with azure.open(fn, 'wb') as f:
            f.write(data)

        #TODO: add E2E validation with the transfer client once delimiters are hooked up
        assert azure.read_block(fn, 1, 2) == b'23'
        assert azure.read_block(fn, 0, 1, delimiter=b'\n') == b'1'
        assert azure.read_block(fn, 0, 2, delimiter=b'\n') == b'12'
        assert azure.read_block(fn, 0, 3, delimiter=b'\n') == b'123'
        assert azure.read_block(fn, 0, 4, delimiter=b'\n') == b'123\n'
        assert azure.read_block(fn, 0, 5, delimiter=b'\n') == b'123\n'
        assert azure.read_block(fn, 0, 8, delimiter=b'\n') == b'123\n456\n'
        assert azure.read_block(fn, 0, 100, delimiter=b'\n') == b'123\n456\n'
        assert azure.read_block(fn, 1, 1, delimiter=b'\n') == b'2'
        assert azure.read_block(fn, 1, 5, delimiter=b'\n') == b'23\n'
        assert azure.read_block(fn, 1, 8, delimiter=b'\n') == b'23\n456\n'

        azure.rm(fn)
        # test the negative cases of just the util read block
        with io.BytesIO(bytearray([1] * 2**22)) as data:
            with pytest.raises(IndexError):
                utils.read_block(data, 0, 2**22, delimiter=b'\n')

            # ensure it throws if the new line is past 4MB
            data.seek(2**22)
            data.write(b'\n')
            data.seek(0)
            with pytest.raises(IndexError):
                utils.read_block(data, 0, 1 + 2**22, delimiter=b'\n')
Beispiel #2
0
def test_upload_one(local_files, azure):
    with azure_teardown(azure):
        bigfile, littlefile, emptyfile, a, b, c = local_files

        # transfer client w/ deterministic temporary directory
        from azure.datalake.store.multithread import put_chunk
        client = ADLTransferClient(azure,
                                   transfer=put_chunk,
                                   unique_temporary=False)

        # single chunk
        up = ADLUploader(azure,
                         test_dir / 'littlefile',
                         littlefile,
                         nthreads=1,
                         overwrite=True)
        assert azure.info(test_dir / 'littlefile')['length'] == 10

        # multiple chunks, one thread
        size = 10000
        up = ADLUploader(azure,
                         test_dir / 'bigfile',
                         bigfile,
                         nthreads=1,
                         chunksize=size // 5,
                         client=client,
                         run=False,
                         overwrite=True)
        up.run()

        assert azure.info(test_dir / 'bigfile')['length'] == size

        azure.rm(test_dir / 'bigfile')
Beispiel #3
0
def setup_dir(azure):
    d = str(working_dir() / 'foo')
    azure.mkdir(d)
    try:
        yield d
    finally:
        azure.rm(d, recursive=True)
Beispiel #4
0
def test_rm(azure):
    with azure_teardown(azure):
        assert not azure.exists(a, invalidate_cache=False)
        azure.touch(a)
        assert azure.exists(a, invalidate_cache=False)
        azure.rm(a)
        assert not azure.exists(a, invalidate_cache=False)
Beispiel #5
0
def test_chmod(azure):
    with azure_teardown(azure):
        azure.touch(a)

        assert azure.info(a)['permission'] == '770'

        azure.chmod(a, '0555')
        assert azure.info(a)['permission'] == '555'

        with pytest.raises((OSError, IOError)):
            with azure.open(a, 'ab') as f:
                try:
                    f.write(b'data')
                except Exception as e:
                    print(e)
                    raise e

        azure.chmod(a, '0770')
        azure.rm(a)

        azure.mkdir(test_dir / 'deep')
        azure.touch(test_dir / 'deep' / 'file')
        azure.chmod(test_dir / 'deep', '660')

        with pytest.raises((OSError, IOError)):
            azure.ls(test_dir / 'deep')

        azure.chmod(test_dir / 'deep', '770')
Beispiel #6
0
def test_rm(azure):
    with azure_teardown(azure):
        assert not azure.exists(a)
        azure.touch(a)
        assert azure.exists(a)
        azure.rm(a)
        assert not azure.exists(a)
Beispiel #7
0
def setup_file(azure):
    tmp = str(working_dir() / 'foo' / 'bar')
    with azure.open(tmp, 'wb') as f:
        f.write('123456'.encode())
    try:
        yield tmp
    finally:
        azure.rm(tmp)
Beispiel #8
0
def test_file_creation_open(azure):
    with azure_teardown(azure):
        if azure.exists(a):
            azure.rm(a)
        assert not azure.exists(a)
        f = azure.open(a, "wb")
        assert azure.exists(a)
        f.close()
        assert azure.info(a)['length'] == 0
Beispiel #9
0
def setup_tree(azure):
    for directory in ['', 'data/a', 'data/b']:
        azure.mkdir(test_dir / directory)
        for filename in ['x.csv', 'y.csv', 'z.txt']:
            with azure.open(test_dir / directory / filename, 'wb') as f:
                f.write(b'123456')
    try:
        yield
    finally:
        for path in azure.ls(test_dir, invalidate_cache=False):
            if azure.exists(path, invalidate_cache=False):
                azure.rm(path, recursive=True)
def test_upload_one_empty_file(local_files, azure):
    with azure_teardown(azure):
        bigfile, littlefile, emptyfile, a, b, c = local_files

        # transfer client w/ deterministic temporary directory
        from azure.datalake.store.multithread import put_chunk
        client = ADLTransferClient(azure, transfer=put_chunk,
                                   unique_temporary=False)

        # single chunk, empty file
        up = ADLUploader(azure, test_dir / 'emptyfile', emptyfile, nthreads=1,
                         overwrite=True)
        assert azure.info(test_dir / 'emptyfile')['length'] == 0
        azure.rm(test_dir / 'emptyfile')
Beispiel #11
0
def test_concat(azure):
    with azure.open(a, 'wb') as f:
        f.write(b'hello ')
    with azure.open(b, 'wb') as f:
        f.write(b'world')
    try:
        azure.rm(c)
    except:
        pass
    azure.concat(c, [a, b])

    out = azure.cat(c)
    azure.rm(c)

    assert out == b'hello world'
def test_upload_single_file_in_dir(tempdir, azure):
    with azure_teardown(azure):
        lpath_dir = tempdir
        lfilename = os.path.join(lpath_dir, 'singlefile')
        with open(lfilename, 'wb') as f:
            f.write(b'0123456789')

        # transfer client w/ deterministic temporary directory
        from azure.datalake.store.multithread import put_chunk
        client = ADLTransferClient(azure, transfer=put_chunk,
                                   unique_temporary=False)

        up = ADLUploader(azure, test_dir / 'singlefiledir', lpath_dir, nthreads=1,
                         overwrite=True)
        assert azure.info(test_dir / 'singlefiledir' / 'singlefile')['length'] == 10
        azure.rm(test_dir / 'singlefiledir' / 'singlefile')
Beispiel #13
0
def test_exists_remove_invalidate_cache(azure, second_azure):
    with azure_teardown(azure):
        # test to ensure file does not exist up front, cache doesn't matter
        assert not azure.exists(a, invalidate_cache=False)
        assert not second_azure.exists(a, invalidate_cache=False)
        azure.touch(a)
        # now ensure that it exists in the client that did the work, but not in the other
        assert azure.exists(a, invalidate_cache=False)
        assert not second_azure.exists(a, invalidate_cache=False)
        # now, with cache invalidation it should exist
        assert second_azure.exists(a, invalidate_cache=True)
        azure.rm(a)
        # same idea with remove. It should no longer exist (cache invalidated or not) in client 1, but still exist in client 2
        assert not azure.exists(a, invalidate_cache=False)
        assert second_azure.exists(a, invalidate_cache=False)
        # now ensure it does not exist when we do invalidate the cache
        assert not second_azure.exists(a, invalidate_cache=True)
Beispiel #14
0
def test_concat(azure):
    aplus = a + "+file1"
    bplus = b + "+file2"
    cplus = c + "+res"
    with azure.open(aplus, 'wb') as f:
        f.write(b'hello ')
    with azure.open(bplus, 'wb') as f:
        f.write(b'world')
    try:
        azure.rm(cplus)
    except:
        pass

    azure.concat(cplus, [aplus, bplus])
    out = azure.cat(cplus)
    azure.rm(cplus)

    assert out == b'hello world'
Beispiel #15
0
def test_upload_empty_folder(tempdir, azure):
    with azure_teardown(azure):
        os.mkdir(os.path.join(tempdir, "dir1"))
        os.mkdir(os.path.join(tempdir, "dir1", "b"))

        with open(os.path.join(tempdir, "dir1", "file.txt"), 'wb') as f:
            f.write(b'0123456789')

        # transfer client w/ deterministic temporary directory
        from azure.datalake.store.multithread import put_chunk
        client = ADLTransferClient(azure, transfer=put_chunk,
                                   unique_temporary=False)

        # single chunk, empty file
        up = ADLUploader(azure, test_dir / "dir1", os.path.join(tempdir, "dir1") , nthreads=1,
                         overwrite=True)
        assert azure.info(test_dir / "dir1" /"b")['type'] == 'DIRECTORY'
        azure.rm(test_dir / "dir1", recursive=True)
Beispiel #16
0
def test_errors(azure):
    with pytest.raises((IOError, OSError)):
        azure.open(test_dir / 'shfoshf', 'rb')

    # This is totally OK: directory is silently created
    # Will need extend invalidate_cache
    # with pytest.raises((IOError, OSError)):
    #     azure.touch(test_dir / 'shfoshf' / 'x')

    with pytest.raises((IOError, OSError)):
        azure.rm(test_dir / 'shfoshf' / 'xxx')

    with pytest.raises((IOError, OSError)):
        azure.mv(test_dir / 'shfoshf' / 'x', test_dir / 'shfoshxbf' / 'y')

    # with pytest.raises(IOError):
    #    azure.chown('unknown', 'someone', 'group')

    # with pytest.raises(IOError):
    #     azure.chmod('unknonwn', 'rb')

    with pytest.raises(IOError):
        azure.rm(test_dir / 'unknown')