def test_download_path(azure):
    with setup_tree(azure):
        down = ADLDownloader(azure,
                             lpath="/lpath/test/testfolder",
                             rpath='/' + test_dir.name,
                             run=False)
        for lfile, rfile in down._file_pairs:
            if 'data' in lfile:
                lfile = AzureDLPath(lfile)
                assert lfile.as_posix().startswith(
                    '/lpath/test/testfolder/data')
Esempio n. 2
0
 def __init__(self, fs, path, mode="rb", blocksize=2**25, delimiter=None):
     super().__init__(
         azure=fs.azure_fs,
         path=AzureDLPath(path),
         mode=mode,
         blocksize=blocksize,
         delimiter=delimiter,
     )
     self.fs = fs
     self.path = AzureDLPath(path)
     self.mode = mode
Esempio n. 3
0
 def isfile(self, path):
     apath = AzureDLPath(path).trim().as_posix()
     try:
         contents = self.fs.ls(apath)
         return len(contents) == 1 and contents[0] == apath
     except OSError:
         return False
def test_update_progress(azure):
    """
    Upload a 32 bytes file in chunks of 8 and test that the progress is incrementally
    updated.
    """
    calls = []

    def recording_callback(progress, total):
        calls.append((progress, total))

    def transfer(adlfs,
                 src,
                 dst,
                 offset,
                 size,
                 blocksize,
                 buffersize,
                 shutdown_event=None):
        return size, None

    client = ADLTransferClient(azure,
                               transfer=transfer,
                               chunksize=8,
                               chunked=True,
                               progress_callback=recording_callback)

    client.submit('foo', AzureDLPath('bar'), 32)
    client.run()

    assert calls == [(8, 32), (16, 32), (24, 32), (32, 32)]
def test_merge(azure):

    calls = []

    def merge(adlfs, outfile, files, shutdown_event=None, overwrite=False):
        calls.append(files)

    def transfer(adlfs,
                 src,
                 dst,
                 offset,
                 size,
                 blocksize,
                 buffersize,
                 shutdown_event=None):
        return size, None

    class XLoaderMock(object):
        _overwrite = False

    file_size = 32
    chunk_size = 8
    client = ADLTransferClient(azure,
                               parent=XLoaderMock(),
                               transfer=transfer,
                               merge=merge,
                               chunksize=chunk_size,
                               chunked=True)

    client.submit('foo', AzureDLPath('bar'), file_size)
    client.run()

    assert len(calls[0]) == file_size / chunk_size
def test_upload_glob(tempdir, azure):
    for directory in ['a', 'b']:
        d = os.path.join(tempdir, 'data', directory)
        os.makedirs(d)
        for data in ['x.csv', 'y.csv', 'z.txt']:
            with open(os.path.join(d, data), 'wb') as f:
                f.write(b'0123456789')

    with azure_teardown(azure):
        local_path = os.path.join(tempdir, 'data', 'a', '*.csv')
        up = ADLUploader(azure, test_dir, local_path, run=False,
                         overwrite=True)

        file_pair_dict = dict(up._file_pairs)
        assert len(file_pair_dict.keys()) == 2
        rfiles = [posix(AzureDLPath(f).relative_to(test_dir))
                  for f in file_pair_dict.values()]
        assert sorted(rfiles) == sorted(['x.csv', 'y.csv'])

        local_path = os.path.join(tempdir, 'data', '*', '*.csv')
        up = ADLUploader(azure, test_dir, local_path, run=False,
                         overwrite=True)

        file_pair_dict = dict(up._file_pairs)
        assert len(file_pair_dict.keys()) == 4

        rfiles = [posix(AzureDLPath(f).relative_to(test_dir))
                  for f in file_pair_dict.values()]
        assert sorted(rfiles) == sorted([
            posix('a', 'x.csv'),
            posix('a', 'y.csv'),
            posix('b', 'x.csv'),
            posix('b', 'y.csv')])

        local_path = os.path.join(tempdir, 'data', '*', 'z.txt')
        up = ADLUploader(azure, test_dir, local_path, run=False,
                         overwrite=True)

        file_pair_dict = dict(up._file_pairs)
        assert len(file_pair_dict.keys()) == 2

        rfiles = [posix(AzureDLPath(f).relative_to(test_dir))
                  for f in file_pair_dict.values()]

        assert sorted(rfiles) == sorted([posix('a', 'z.txt'), posix('b', 'z.txt')])
Esempio n. 7
0
    def generate_paths():
        def iter_all_strings():
            for size in itertools.count(1):
                for s in itertools.product(ascii_lowercase, repeat=size):
                    yield "".join(s)

        for s in itertools.islice(iter_all_strings(), number_of_files):
            s = AzureDLPath(prefix + s + ".txt")
            yield root_path / s
def test_temporary_path(azure):
    def transfer(adlfs, src, dst, offset, size, blocksize, buffersize):
        return size, None

    client = ADLTransferClient(azure, transfer=transfer, chunksize=8,
                               unique_temporary=False)
    client.submit('foo', AzureDLPath('bar'), 16)

    assert os.path.dirname(posix(client.progress[0].chunks[0].name)) == 'bar.segments'
Esempio n. 9
0
 def isdir(self, path):
     apath = AzureDLPath(path).trim().as_posix()
     try:
         contents = self.fs.ls(apath)
         if len(contents) == 1 and contents[0] == apath:
             return False
         else:
             return True
     except OSError:
         return False
Esempio n. 10
0
 def __init__(
     self,
     fs,
     path,
     mode="rb",
     autocommit=True,
     block_size=2 ** 25,
     cache_type="bytes",
     cache_options=None,
     *,
     delimiter=None,
     **kwargs,
 ):
     super().__init__(
         azure=fs.azure_fs,
         path=AzureDLPath(path),
         mode=mode,
         blocksize=block_size,
         delimiter=delimiter,
     )
     self.fs = fs
     self.path = AzureDLPath(path)
     self.mode = mode
Esempio n. 11
0
def test_forward_slash():
    from azure.datalake.store.core import AzureDLPath

    posix_abspath = '/foo/bar'
    posix_relpath = 'foo/bar'

    abspath = AzureDLPath('/foo/bar')
    relpath = AzureDLPath('foo/bar')

    assert posix(abspath) == posix_abspath
    assert posix(abspath.trim()) == posix_relpath

    assert 'foo' in abspath
    assert 'foo' in relpath

    assert posix(AzureDLPath('/*').globless_prefix) == '/'
    assert posix(AzureDLPath('/foo/*').globless_prefix) == '/foo'
    assert posix(AzureDLPath('/foo/b*').globless_prefix) == '/foo'
Esempio n. 12
0
def test_backslash():
    from azure.datalake.store.core import AzureDLPath

    posix_abspath = '/foo/bar'
    posix_relpath = 'foo/bar'

    win_abspath = AzureDLPath('\\foo\\bar')
    win_relpath = AzureDLPath('foo\\bar')

    assert posix(win_abspath) == posix_abspath
    assert posix(win_abspath.trim()) == posix_relpath

    assert 'foo' in win_abspath
    assert 'foo' in win_relpath

    assert posix(AzureDLPath('\\*').globless_prefix) == '/'
    assert posix(AzureDLPath('\\foo\\*').globless_prefix) == '/foo'
    assert posix(AzureDLPath('\\foo\\b*').globless_prefix) == '/foo'
Esempio n. 13
0
def working_dir():
    if not hasattr(working_dir, "path"):
        unique_dir = 'azure_python_sdk_test_dir' + str(uuid.uuid4())
        working_dir.path = AzureDLPath(unique_dir)
    return working_dir.path
Esempio n. 14
0
def posix(*args):
    return AzureDLPath(*args).as_posix()
Esempio n. 15
0
def working_dir():
    if not hasattr(working_dir, "path"):
        working_dir.path = AzureDLPath('azure_test_dir')
    return working_dir.path
def test_download_root_folder(azure, tempdir):
    with setup_tree(azure):
        rpath = AzureDLPath('/'/test_dir / 'data/single/single'/ 'single.txt')
        ADLDownloader(azure, rpath=rpath, lpath=tempdir)
        assert os.path.isfile(os.path.join(tempdir, 'single.txt'))