def test_download_path(azure): with setup_tree(azure): down = ADLDownloader(azure, lpath="/lpath/test/testfolder", rpath='/' + test_dir.name, run=False) for lfile, rfile in down._file_pairs: if 'data' in lfile: lfile = AzureDLPath(lfile) assert lfile.as_posix().startswith( '/lpath/test/testfolder/data')
def __init__(self, fs, path, mode="rb", blocksize=2**25, delimiter=None): super().__init__( azure=fs.azure_fs, path=AzureDLPath(path), mode=mode, blocksize=blocksize, delimiter=delimiter, ) self.fs = fs self.path = AzureDLPath(path) self.mode = mode
def isfile(self, path): apath = AzureDLPath(path).trim().as_posix() try: contents = self.fs.ls(apath) return len(contents) == 1 and contents[0] == apath except OSError: return False
def test_update_progress(azure): """ Upload a 32 bytes file in chunks of 8 and test that the progress is incrementally updated. """ calls = [] def recording_callback(progress, total): calls.append((progress, total)) def transfer(adlfs, src, dst, offset, size, blocksize, buffersize, shutdown_event=None): return size, None client = ADLTransferClient(azure, transfer=transfer, chunksize=8, chunked=True, progress_callback=recording_callback) client.submit('foo', AzureDLPath('bar'), 32) client.run() assert calls == [(8, 32), (16, 32), (24, 32), (32, 32)]
def test_merge(azure): calls = [] def merge(adlfs, outfile, files, shutdown_event=None, overwrite=False): calls.append(files) def transfer(adlfs, src, dst, offset, size, blocksize, buffersize, shutdown_event=None): return size, None class XLoaderMock(object): _overwrite = False file_size = 32 chunk_size = 8 client = ADLTransferClient(azure, parent=XLoaderMock(), transfer=transfer, merge=merge, chunksize=chunk_size, chunked=True) client.submit('foo', AzureDLPath('bar'), file_size) client.run() assert len(calls[0]) == file_size / chunk_size
def test_upload_glob(tempdir, azure): for directory in ['a', 'b']: d = os.path.join(tempdir, 'data', directory) os.makedirs(d) for data in ['x.csv', 'y.csv', 'z.txt']: with open(os.path.join(d, data), 'wb') as f: f.write(b'0123456789') with azure_teardown(azure): local_path = os.path.join(tempdir, 'data', 'a', '*.csv') up = ADLUploader(azure, test_dir, local_path, run=False, overwrite=True) file_pair_dict = dict(up._file_pairs) assert len(file_pair_dict.keys()) == 2 rfiles = [posix(AzureDLPath(f).relative_to(test_dir)) for f in file_pair_dict.values()] assert sorted(rfiles) == sorted(['x.csv', 'y.csv']) local_path = os.path.join(tempdir, 'data', '*', '*.csv') up = ADLUploader(azure, test_dir, local_path, run=False, overwrite=True) file_pair_dict = dict(up._file_pairs) assert len(file_pair_dict.keys()) == 4 rfiles = [posix(AzureDLPath(f).relative_to(test_dir)) for f in file_pair_dict.values()] assert sorted(rfiles) == sorted([ posix('a', 'x.csv'), posix('a', 'y.csv'), posix('b', 'x.csv'), posix('b', 'y.csv')]) local_path = os.path.join(tempdir, 'data', '*', 'z.txt') up = ADLUploader(azure, test_dir, local_path, run=False, overwrite=True) file_pair_dict = dict(up._file_pairs) assert len(file_pair_dict.keys()) == 2 rfiles = [posix(AzureDLPath(f).relative_to(test_dir)) for f in file_pair_dict.values()] assert sorted(rfiles) == sorted([posix('a', 'z.txt'), posix('b', 'z.txt')])
def generate_paths(): def iter_all_strings(): for size in itertools.count(1): for s in itertools.product(ascii_lowercase, repeat=size): yield "".join(s) for s in itertools.islice(iter_all_strings(), number_of_files): s = AzureDLPath(prefix + s + ".txt") yield root_path / s
def test_temporary_path(azure): def transfer(adlfs, src, dst, offset, size, blocksize, buffersize): return size, None client = ADLTransferClient(azure, transfer=transfer, chunksize=8, unique_temporary=False) client.submit('foo', AzureDLPath('bar'), 16) assert os.path.dirname(posix(client.progress[0].chunks[0].name)) == 'bar.segments'
def isdir(self, path): apath = AzureDLPath(path).trim().as_posix() try: contents = self.fs.ls(apath) if len(contents) == 1 and contents[0] == apath: return False else: return True except OSError: return False
def __init__( self, fs, path, mode="rb", autocommit=True, block_size=2 ** 25, cache_type="bytes", cache_options=None, *, delimiter=None, **kwargs, ): super().__init__( azure=fs.azure_fs, path=AzureDLPath(path), mode=mode, blocksize=block_size, delimiter=delimiter, ) self.fs = fs self.path = AzureDLPath(path) self.mode = mode
def test_forward_slash(): from azure.datalake.store.core import AzureDLPath posix_abspath = '/foo/bar' posix_relpath = 'foo/bar' abspath = AzureDLPath('/foo/bar') relpath = AzureDLPath('foo/bar') assert posix(abspath) == posix_abspath assert posix(abspath.trim()) == posix_relpath assert 'foo' in abspath assert 'foo' in relpath assert posix(AzureDLPath('/*').globless_prefix) == '/' assert posix(AzureDLPath('/foo/*').globless_prefix) == '/foo' assert posix(AzureDLPath('/foo/b*').globless_prefix) == '/foo'
def test_backslash(): from azure.datalake.store.core import AzureDLPath posix_abspath = '/foo/bar' posix_relpath = 'foo/bar' win_abspath = AzureDLPath('\\foo\\bar') win_relpath = AzureDLPath('foo\\bar') assert posix(win_abspath) == posix_abspath assert posix(win_abspath.trim()) == posix_relpath assert 'foo' in win_abspath assert 'foo' in win_relpath assert posix(AzureDLPath('\\*').globless_prefix) == '/' assert posix(AzureDLPath('\\foo\\*').globless_prefix) == '/foo' assert posix(AzureDLPath('\\foo\\b*').globless_prefix) == '/foo'
def working_dir(): if not hasattr(working_dir, "path"): unique_dir = 'azure_python_sdk_test_dir' + str(uuid.uuid4()) working_dir.path = AzureDLPath(unique_dir) return working_dir.path
def posix(*args): return AzureDLPath(*args).as_posix()
def working_dir(): if not hasattr(working_dir, "path"): working_dir.path = AzureDLPath('azure_test_dir') return working_dir.path
def test_download_root_folder(azure, tempdir): with setup_tree(azure): rpath = AzureDLPath('/'/test_dir / 'data/single/single'/ 'single.txt') ADLDownloader(azure, rpath=rpath, lpath=tempdir) assert os.path.isfile(os.path.join(tempdir, 'single.txt'))