コード例 #1
0
def test_upload_glob(tempdir, azure):
    for directory in ['a', 'b']:
        d = os.path.join(tempdir, 'data', directory)
        os.makedirs(d)
        for data in ['x.csv', 'y.csv', 'z.txt']:
            with open(os.path.join(d, data), 'wb') as f:
                f.write(b'0123456789')

    with azure_teardown(azure):
        local_path = os.path.join(tempdir, 'data', 'a', '*.csv')
        up = ADLUploader(azure, test_dir, local_path, run=False,
                         overwrite=True)

        file_pair_dict = dict(up._file_pairs)
        assert len(file_pair_dict.keys()) == 2
        rfiles = [posix(AzureDLPath(f).relative_to(test_dir))
                  for f in file_pair_dict.values()]
        assert sorted(rfiles) == sorted(['x.csv', 'y.csv'])

        local_path = os.path.join(tempdir, 'data', '*', '*.csv')
        up = ADLUploader(azure, test_dir, local_path, run=False,
                         overwrite=True)

        file_pair_dict = dict(up._file_pairs)
        assert len(file_pair_dict.keys()) == 4

        rfiles = [posix(AzureDLPath(f).relative_to(test_dir))
                  for f in file_pair_dict.values()]
        assert sorted(rfiles) == sorted([
            posix('a', 'x.csv'),
            posix('a', 'y.csv'),
            posix('b', 'x.csv'),
            posix('b', 'y.csv')])

        local_path = os.path.join(tempdir, 'data', '*', 'z.txt')
        up = ADLUploader(azure, test_dir, local_path, run=False,
                         overwrite=True)

        file_pair_dict = dict(up._file_pairs)
        assert len(file_pair_dict.keys()) == 2

        rfiles = [posix(AzureDLPath(f).relative_to(test_dir))
                  for f in file_pair_dict.values()]

        assert sorted(rfiles) == sorted([posix('a', 'z.txt'), posix('b', 'z.txt')])
コード例 #2
0
def test_upload_overwrite(local_files, azure):
    bigfile, littlefile, emptyfile, a, b, c = local_files

    with azure_teardown(azure):
        # make the file already exist.
        azure.touch('/{}/littlefile'.format(test_dir.as_posix()))

        with pytest.raises(OSError) as e:
            ADLUploader(azure, test_dir, littlefile, nthreads=1)
        assert test_dir.as_posix() in str(e)
コード例 #3
0
def test_upload_many(local_files, azure):
    with azure_teardown(azure):
        bigfile, littlefile, emptyfile, a, b, c = local_files
        root = os.path.dirname(bigfile)

        # single thread
        up = ADLUploader(azure, test_dir, root, nthreads=1, overwrite=True)
        assert azure.info(test_dir / 'littlefile')['length'] == 10
        assert azure.cat(test_dir / 'nested1/nested2/a') == b'0123456789'
        assert len(azure.du(test_dir, deep=True)) == 6
        assert azure.du(test_dir, deep=True, total=True) == 10000 + 40
コード例 #4
0
def upload_to_adls(account_name,
                   source_path,
                   destination_path,
                   thread_count=None,
                   overwrite=False):
    client = cf_dls_filesystem(account_name)
    ADLUploader(client,
                destination_path,
                source_path,
                thread_count,
                overwrite=overwrite)
コード例 #5
0
def test_save_up(local_files, azure):
    bigfile, littlefile, emptyfile, a, b, c = local_files
    root = os.path.dirname(bigfile)

    up = ADLUploader(azure, '', root, 1, 1000000, run=False, overwrite=True)
    up.save()

    alluploads = ADLUploader.load()
    assert up.hash in alluploads

    up.save(keep=False)
    alluploads = ADLUploader.load()
    assert up.hash not in alluploads
コード例 #6
0
def test_upload_overwrite(local_files, azure):
    bigfile, littlefile, emptyfile, a, b, c = local_files

    with azure_teardown(azure):
        # create the folder that we want to make sure the overwrite
        # test fails on if it doesn't already exist
        if not azure.exists(test_dir):
            azure.mkdir(test_dir)

        with pytest.raises(OSError) as e:
            ADLUploader(azure, test_dir, littlefile, nthreads=1)
        assert test_dir.as_posix() in str(e)
コード例 #7
0
def test_upload_one(local_files, azure):
    with azure_teardown(azure):
        bigfile, littlefile, emptyfile, a, b, c = local_files

        # transfer client w/ deterministic temporary directory
        from azure.datalake.store.multithread import put_chunk
        client = ADLTransferClient(azure,
                                   transfer=put_chunk,
                                   unique_temporary=False)

        # single chunk
        up = ADLUploader(azure,
                         test_dir / 'littlefile',
                         littlefile,
                         nthreads=1,
                         overwrite=True)
        assert azure.info(test_dir / 'littlefile')['length'] == 10

        # multiple chunks, one thread
        size = 10000
        up = ADLUploader(azure,
                         test_dir / 'bigfile',
                         bigfile,
                         nthreads=1,
                         chunksize=size // 5,
                         client=client,
                         run=False,
                         overwrite=True)
        up.run()

        assert azure.info(test_dir / 'bigfile')['length'] == size

        azure.rm(test_dir / 'bigfile')
コード例 #8
0
    def do_put(self, line):
        parser = argparse.ArgumentParser(prog="put", add_help=False)
        parser.add_argument('local_path', type=str)
        parser.add_argument('remote_path', type=str, nargs='?', default='.')
        parser.add_argument('-b', '--chunksize', type=int, default=2**28)
        parser.add_argument('-c', '--threads', type=int, default=None)
        parser.add_argument('-f', '--force', action='store_true')
        try: args = parser.parse_args(line.split())
        except: pass

        ADLUploader(self._fs, args.remote_path, args.local_path,
                    nthreads=args.threads, chunksize=args.chunksize,
                    overwrite=args.force)
コード例 #9
0
ファイル: custom.py プロジェクト: yuan776/azure-cli
def upload_to_adls(cmd, account_name, source_path, destination_path, chunk_size, buffer_size, block_size,
                   thread_count=None, overwrite=False, progress_callback=None):
    client = cf_dls_filesystem(cmd.cli_ctx, account_name)
    ADLUploader(
        client,
        destination_path,
        source_path,
        thread_count,
        chunksize=chunk_size,
        buffersize=buffer_size,
        blocksize=block_size,
        overwrite=overwrite,
        progress_callback=progress_callback or get_update_progress(cmd.cli_ctx))
コード例 #10
0
def test_upload_one_empty_file(local_files, azure):
    with azure_teardown(azure):
        bigfile, littlefile, emptyfile, a, b, c = local_files

        # transfer client w/ deterministic temporary directory
        from azure.datalake.store.multithread import put_chunk
        client = ADLTransferClient(azure, transfer=put_chunk,
                                   unique_temporary=False)

        # single chunk, empty file
        up = ADLUploader(azure, test_dir / 'emptyfile', emptyfile, nthreads=1,
                         overwrite=True)
        assert azure.info(test_dir / 'emptyfile')['length'] == 0
        azure.rm(test_dir / 'emptyfile')
コード例 #11
0
def test_upload_single_file_in_dir(tempdir, azure):
    with azure_teardown(azure):
        lpath_dir = tempdir
        lfilename = os.path.join(lpath_dir, 'singlefile')
        with open(lfilename, 'wb') as f:
            f.write(b'0123456789')

        # transfer client w/ deterministic temporary directory
        from azure.datalake.store.multithread import put_chunk
        client = ADLTransferClient(azure, transfer=put_chunk,
                                   unique_temporary=False)

        up = ADLUploader(azure, test_dir / 'singlefiledir', lpath_dir, nthreads=1,
                         overwrite=True)
        assert azure.info(test_dir / 'singlefiledir' / 'singlefile')['length'] == 10
        azure.rm(test_dir / 'singlefiledir' / 'singlefile')
コード例 #12
0
def test_upload_empty_folder(tempdir, azure):
    with azure_teardown(azure):
        os.mkdir(os.path.join(tempdir, "dir1"))
        os.mkdir(os.path.join(tempdir, "dir1", "b"))

        with open(os.path.join(tempdir, "dir1", "file.txt"), 'wb') as f:
            f.write(b'0123456789')

        # transfer client w/ deterministic temporary directory
        from azure.datalake.store.multithread import put_chunk
        client = ADLTransferClient(azure, transfer=put_chunk,
                                   unique_temporary=False)

        # single chunk, empty file
        up = ADLUploader(azure, test_dir / "dir1", os.path.join(tempdir, "dir1") , nthreads=1,
                         overwrite=True)
        assert azure.info(test_dir / "dir1" /"b")['type'] == 'DIRECTORY'
        azure.rm(test_dir / "dir1", recursive=True)
コード例 #13
0
ファイル: custom.py プロジェクト: vjrantal/azure-cli
def upload_to_adls(account_name,
                   source_path,
                   destination_path,
                   chunk_size,
                   buffer_size,
                   block_size,
                   thread_count=None,
                   overwrite=False):
    client = cf_dls_filesystem(account_name)
    ADLUploader(
        client,
        destination_path,
        source_path,
        thread_count,
        chunksize=chunk_size,
        buffersize=buffer_size,
        blocksize=block_size,
        overwrite=overwrite)
コード例 #14
0
 def do_resume_upload(self, line):
     try:
         up = ADLUploader.load()[line]
         up.run()
     except KeyError:
         print("No such upload")
コード例 #15
0
 def do_list_uploads(self, line):
     print(ADLUploader.load())
コード例 #16
0
 def do_clear_uploads(self, line):
     ADLUploader.clear_saved()
コード例 #17
0
import time
from azure.datalake.store.multithread import ADLUploader
def abc():
    xyz()

def xyz():
    cat()

def cat():
    time.sleep(100)

abc()

x = ADLUploader()
print(x.clear_saved())