def test_read_delimited_block(azure): fn = '/tmp/test/a' delimiter = b'\n' data = delimiter.join([b'123', b'456', b'789']) with azure_teardown(azure): with azure.open(fn, 'wb') as f: f.write(data) #TODO: add E2E validation with the transfer client once delimiters are hooked up assert azure.read_block(fn, 1, 2) == b'23' assert azure.read_block(fn, 0, 1, delimiter=b'\n') == b'1' assert azure.read_block(fn, 0, 2, delimiter=b'\n') == b'12' assert azure.read_block(fn, 0, 3, delimiter=b'\n') == b'123' assert azure.read_block(fn, 0, 4, delimiter=b'\n') == b'123\n' assert azure.read_block(fn, 0, 5, delimiter=b'\n') == b'123\n' assert azure.read_block(fn, 0, 8, delimiter=b'\n') == b'123\n456\n' assert azure.read_block(fn, 0, 100, delimiter=b'\n') == b'123\n456\n' assert azure.read_block(fn, 1, 1, delimiter=b'\n') == b'2' assert azure.read_block(fn, 1, 5, delimiter=b'\n') == b'23\n' assert azure.read_block(fn, 1, 8, delimiter=b'\n') == b'23\n456\n' azure.rm(fn) # test the negative cases of just the util read block with io.BytesIO(bytearray([1] * 2**22)) as data: with pytest.raises(IndexError): utils.read_block(data, 0, 2**22, delimiter=b'\n') # ensure it throws if the new line is past 4MB data.seek(2**22) data.write(b'\n') data.seek(0) with pytest.raises(IndexError): utils.read_block(data, 0, 1 + 2**22, delimiter=b'\n')
def test_upload_one(local_files, azure): with azure_teardown(azure): bigfile, littlefile, emptyfile, a, b, c = local_files # transfer client w/ deterministic temporary directory from azure.datalake.store.multithread import put_chunk client = ADLTransferClient(azure, transfer=put_chunk, unique_temporary=False) # single chunk up = ADLUploader(azure, test_dir / 'littlefile', littlefile, nthreads=1, overwrite=True) assert azure.info(test_dir / 'littlefile')['length'] == 10 # multiple chunks, one thread size = 10000 up = ADLUploader(azure, test_dir / 'bigfile', bigfile, nthreads=1, chunksize=size // 5, client=client, run=False, overwrite=True) up.run() assert azure.info(test_dir / 'bigfile')['length'] == size azure.rm(test_dir / 'bigfile')
def setup_dir(azure): d = str(working_dir() / 'foo') azure.mkdir(d) try: yield d finally: azure.rm(d, recursive=True)
def test_rm(azure): with azure_teardown(azure): assert not azure.exists(a, invalidate_cache=False) azure.touch(a) assert azure.exists(a, invalidate_cache=False) azure.rm(a) assert not azure.exists(a, invalidate_cache=False)
def test_chmod(azure): with azure_teardown(azure): azure.touch(a) assert azure.info(a)['permission'] == '770' azure.chmod(a, '0555') assert azure.info(a)['permission'] == '555' with pytest.raises((OSError, IOError)): with azure.open(a, 'ab') as f: try: f.write(b'data') except Exception as e: print(e) raise e azure.chmod(a, '0770') azure.rm(a) azure.mkdir(test_dir / 'deep') azure.touch(test_dir / 'deep' / 'file') azure.chmod(test_dir / 'deep', '660') with pytest.raises((OSError, IOError)): azure.ls(test_dir / 'deep') azure.chmod(test_dir / 'deep', '770')
def test_rm(azure): with azure_teardown(azure): assert not azure.exists(a) azure.touch(a) assert azure.exists(a) azure.rm(a) assert not azure.exists(a)
def setup_file(azure): tmp = str(working_dir() / 'foo' / 'bar') with azure.open(tmp, 'wb') as f: f.write('123456'.encode()) try: yield tmp finally: azure.rm(tmp)
def test_file_creation_open(azure): with azure_teardown(azure): if azure.exists(a): azure.rm(a) assert not azure.exists(a) f = azure.open(a, "wb") assert azure.exists(a) f.close() assert azure.info(a)['length'] == 0
def setup_tree(azure): for directory in ['', 'data/a', 'data/b']: azure.mkdir(test_dir / directory) for filename in ['x.csv', 'y.csv', 'z.txt']: with azure.open(test_dir / directory / filename, 'wb') as f: f.write(b'123456') try: yield finally: for path in azure.ls(test_dir, invalidate_cache=False): if azure.exists(path, invalidate_cache=False): azure.rm(path, recursive=True)
def test_upload_one_empty_file(local_files, azure): with azure_teardown(azure): bigfile, littlefile, emptyfile, a, b, c = local_files # transfer client w/ deterministic temporary directory from azure.datalake.store.multithread import put_chunk client = ADLTransferClient(azure, transfer=put_chunk, unique_temporary=False) # single chunk, empty file up = ADLUploader(azure, test_dir / 'emptyfile', emptyfile, nthreads=1, overwrite=True) assert azure.info(test_dir / 'emptyfile')['length'] == 0 azure.rm(test_dir / 'emptyfile')
def test_concat(azure): with azure.open(a, 'wb') as f: f.write(b'hello ') with azure.open(b, 'wb') as f: f.write(b'world') try: azure.rm(c) except: pass azure.concat(c, [a, b]) out = azure.cat(c) azure.rm(c) assert out == b'hello world'
def test_upload_single_file_in_dir(tempdir, azure): with azure_teardown(azure): lpath_dir = tempdir lfilename = os.path.join(lpath_dir, 'singlefile') with open(lfilename, 'wb') as f: f.write(b'0123456789') # transfer client w/ deterministic temporary directory from azure.datalake.store.multithread import put_chunk client = ADLTransferClient(azure, transfer=put_chunk, unique_temporary=False) up = ADLUploader(azure, test_dir / 'singlefiledir', lpath_dir, nthreads=1, overwrite=True) assert azure.info(test_dir / 'singlefiledir' / 'singlefile')['length'] == 10 azure.rm(test_dir / 'singlefiledir' / 'singlefile')
def test_exists_remove_invalidate_cache(azure, second_azure): with azure_teardown(azure): # test to ensure file does not exist up front, cache doesn't matter assert not azure.exists(a, invalidate_cache=False) assert not second_azure.exists(a, invalidate_cache=False) azure.touch(a) # now ensure that it exists in the client that did the work, but not in the other assert azure.exists(a, invalidate_cache=False) assert not second_azure.exists(a, invalidate_cache=False) # now, with cache invalidation it should exist assert second_azure.exists(a, invalidate_cache=True) azure.rm(a) # same idea with remove. It should no longer exist (cache invalidated or not) in client 1, but still exist in client 2 assert not azure.exists(a, invalidate_cache=False) assert second_azure.exists(a, invalidate_cache=False) # now ensure it does not exist when we do invalidate the cache assert not second_azure.exists(a, invalidate_cache=True)
def test_concat(azure): aplus = a + "+file1" bplus = b + "+file2" cplus = c + "+res" with azure.open(aplus, 'wb') as f: f.write(b'hello ') with azure.open(bplus, 'wb') as f: f.write(b'world') try: azure.rm(cplus) except: pass azure.concat(cplus, [aplus, bplus]) out = azure.cat(cplus) azure.rm(cplus) assert out == b'hello world'
def test_upload_empty_folder(tempdir, azure): with azure_teardown(azure): os.mkdir(os.path.join(tempdir, "dir1")) os.mkdir(os.path.join(tempdir, "dir1", "b")) with open(os.path.join(tempdir, "dir1", "file.txt"), 'wb') as f: f.write(b'0123456789') # transfer client w/ deterministic temporary directory from azure.datalake.store.multithread import put_chunk client = ADLTransferClient(azure, transfer=put_chunk, unique_temporary=False) # single chunk, empty file up = ADLUploader(azure, test_dir / "dir1", os.path.join(tempdir, "dir1") , nthreads=1, overwrite=True) assert azure.info(test_dir / "dir1" /"b")['type'] == 'DIRECTORY' azure.rm(test_dir / "dir1", recursive=True)
def test_errors(azure): with pytest.raises((IOError, OSError)): azure.open(test_dir / 'shfoshf', 'rb') # This is totally OK: directory is silently created # Will need extend invalidate_cache # with pytest.raises((IOError, OSError)): # azure.touch(test_dir / 'shfoshf' / 'x') with pytest.raises((IOError, OSError)): azure.rm(test_dir / 'shfoshf' / 'xxx') with pytest.raises((IOError, OSError)): azure.mv(test_dir / 'shfoshf' / 'x', test_dir / 'shfoshxbf' / 'y') # with pytest.raises(IOError): # azure.chown('unknown', 'someone', 'group') # with pytest.raises(IOError): # azure.chmod('unknonwn', 'rb') with pytest.raises(IOError): azure.rm(test_dir / 'unknown')