def test_download_glob_single_file(tempdir, azure):
    with setup_tree(azure):
        print("")
        remote_path = test_dir / 'data/single/single' / '*.txt'
        down = ADLDownloader(azure,
                             remote_path,
                             tempdir,
                             run=False,
                             overwrite=True)
        file_pair_dict = dict(down._file_pairs)

        assert len(file_pair_dict) == 1

        lfiles = [os.path.relpath(f, tempdir) for f in file_pair_dict.keys()]
        assert sorted(lfiles) == sorted(
            [os.path.join('single.txt.inprogress')])

        remote_path = test_dir / 'data/*/single' / 'single.txt'
        down = ADLDownloader(azure,
                             remote_path,
                             tempdir,
                             run=False,
                             overwrite=True)
        file_pair_dict = dict(down._file_pairs)
        assert len(file_pair_dict) == 1

        lfiles = [os.path.relpath(f, tempdir) for f in file_pair_dict.keys()]
        assert sorted(lfiles) == sorted(
            [os.path.join('single', 'single', 'single.txt.inprogress')])
Example #2
0
def test_download_single_file(tempdir, azure):
    with azure_teardown(azure):
        name = posix(test_dir, 'remote.csv')
        lines = 100
        fname = os.path.join(tempdir, 'local.csv')
        size, checksum = create_remote_csv(azure, name, 10, 5, lines)
        try:
            # single chunk
            down = ADLDownloader(azure,
                                 name,
                                 fname,
                                 1,
                                 size + 10,
                                 overwrite=True)
            assert md5sum(fname) == checksum
            assert os.stat(fname).st_size == size
            assert linecount(fname) == lines
        finally:
            if os.path.isfile(fname):
                os.remove(fname)

        try:
            # multiple chunks, one thread
            down = ADLDownloader(azure,
                                 name,
                                 fname,
                                 1,
                                 size // 5,
                                 overwrite=True)
            assert md5sum(fname) == checksum
            assert os.stat(fname).st_size == size
            assert linecount(fname) == lines
        finally:
            if os.path.isfile(fname):
                os.remove(fname)
Example #3
0
def test_download_glob(tempdir, azure):
    with setup_tree(azure):
        remote_path = test_dir / 'data' / 'a' / '*.csv'
        down = ADLDownloader(azure,
                             remote_path,
                             tempdir,
                             run=False,
                             overwrite=True)
        file_pair_dict = dict(down._file_pairs)

        assert len(file_pair_dict.keys()) == 2

        lfiles = [os.path.relpath(f, tempdir) for f in file_pair_dict.keys()]
        assert sorted(lfiles) == sorted(
            ['x.csv.inprogress', 'y.csv.inprogress'])

        remote_path = test_dir / 'data' / '*' / '*.csv'
        down = ADLDownloader(azure,
                             remote_path,
                             tempdir,
                             run=False,
                             overwrite=True)

        file_pair_dict = dict(down._file_pairs)
        assert len(file_pair_dict.keys()) == 4

        lfiles = [os.path.relpath(f, tempdir) for f in file_pair_dict.keys()]
        assert sorted(lfiles) == sorted([
            os.path.join('a', 'x.csv.inprogress'),
            os.path.join('a', 'y.csv.inprogress'),
            os.path.join('b', 'x.csv.inprogress'),
            os.path.join('b', 'y.csv.inprogress')
        ])

        remote_path = test_dir / 'data' / '*' / 'z.txt'
        down = ADLDownloader(azure,
                             remote_path,
                             tempdir,
                             run=False,
                             overwrite=True)
        file_pair_dict = dict(down._file_pairs)
        assert len(file_pair_dict.keys()) == 2

        lfiles = [os.path.relpath(f, tempdir) for f in file_pair_dict.keys()]
        assert sorted(lfiles) == sorted([
            os.path.join('a', 'z.txt.inprogress'),
            os.path.join('b', 'z.txt.inprogress')
        ])
def test_download_many(tempdir, azure):
    with setup_tree(azure):
        down = ADLDownloader(azure, test_dir, tempdir, 1, 2**24, overwrite=True)
        nfiles = 0
        for dirpath, dirnames, filenames in os.walk(tempdir):
            nfiles += len(filenames)
        assert nfiles > 1
Example #5
0
def test_download_overwrite(tempdir, azure):
    with setup_tree(azure):
        with open(os.path.join(tempdir, 'x.csv'), 'w') as f:
            f.write('12345')

        with pytest.raises(OSError) as e:
            ADLDownloader(azure, test_dir, tempdir, 1, 2**24, run=False)
        assert tempdir in str(e)
def test_save_down(tempdir, azure):
    with setup_tree(azure):
        down = ADLDownloader(azure, test_dir, tempdir, 1, 2**24, run=False,
                             overwrite=True)
        down.save()

        alldownloads = ADLDownloader.load()
        assert down.hash in alldownloads

        down.save(keep=False)
        alldownloads = ADLDownloader.load()
        assert down.hash not in alldownloads
def test_download_empty_directory(tempdir, azure):
    with setup_tree(azure):
        down = ADLDownloader(azure,
                             test_dir,
                             tempdir,
                             1,
                             2**24,
                             overwrite=True)
        dirname = os.path.join(tempdir, 'data/empty')
        assert os.path.isdir(dirname)
def test_download_path(azure):
    with setup_tree(azure):
        down = ADLDownloader(azure,
                             lpath="/lpath/test/testfolder",
                             rpath='/' + test_dir.name,
                             run=False)
        for lfile, rfile in down._file_pairs:
            if 'data' in lfile:
                lfile = AzureDLPath(lfile)
                assert lfile.as_posix().startswith(
                    '/lpath/test/testfolder/data')
Example #9
0
def download_from_adls(account_name,
                       source_path,
                       destination_path,
                       thread_count=None,
                       overwrite=False):
    client = cf_dls_filesystem(account_name)
    ADLDownloader(client,
                  source_path,
                  destination_path,
                  thread_count,
                  overwrite=overwrite)
def test_download_single_file_in_directory(tempdir, azure):
    with setup_tree(azure):
        down = ADLDownloader(azure,
                             test_dir,
                             tempdir,
                             1,
                             2**24,
                             overwrite=True)
        dirname = os.path.join(tempdir, 'data/single/single')
        assert os.path.isdir(dirname)
        assert os.path.isfile(os.path.join(dirname, 'single.txt'))
    def do_get(self, line):
        parser = argparse.ArgumentParser(prog="get", add_help=False)
        parser.add_argument('remote_path', type=str)
        parser.add_argument('local_path', type=str, nargs='?', default='.')
        parser.add_argument('-b', '--chunksize', type=int, default=2**28)
        parser.add_argument('-c', '--threads', type=int, default=None)
        parser.add_argument('-f', '--force', action='store_true')
        try: args = parser.parse_args(line.split())
        except: pass

        ADLDownloader(self._fs, args.remote_path, args.local_path,
                      nthreads=args.threads, chunksize=args.chunksize,
                      overwrite=args.force)
Example #12
0
def download_from_adls(cmd, account_name, source_path, destination_path, chunk_size, buffer_size, block_size,
                       thread_count=None, overwrite=False, progress_callback=None):
    client = cf_dls_filesystem(cmd.cli_ctx, account_name)
    ADLDownloader(
        client,
        source_path,
        destination_path,
        thread_count,
        chunksize=chunk_size,
        buffersize=buffer_size,
        blocksize=block_size,
        overwrite=overwrite,
        progress_callback=progress_callback or get_update_progress(cmd.cli_ctx))
def test_download_single_to_dir(tempdir, azure):
    with azure_teardown(azure):
        name = posix(test_dir, 'remote.csv')
        lines = 100
        size, checksum = create_remote_csv(azure, name, 10, 5, lines)
        fname = os.path.join(tempdir, 'remote.csv')
        try:
            down = ADLDownloader(azure, name, tempdir, 1, 2**24, overwrite=True)
            assert md5sum(fname) == checksum
            assert os.stat(fname).st_size == size
            assert linecount(fname) == lines
        finally:
            if os.path.isfile(fname):
                os.remove(fname)
def test_download_single_empty_file(tempdir, azure):
    with azure_teardown(azure):
        name = posix(test_dir, 'remote.csv')
        lines = 0 # the file should have no bytes in it
        size, checksum = create_remote_csv(azure, name, 10, 5, lines)
        fname = os.path.join(tempdir, 'local.csv')

        # single chunk
        try:
            down = ADLDownloader(azure, name, fname, 1, size + 10, overwrite=True)
            assert md5sum(fname) == checksum
            assert os.stat(fname).st_size == size
            assert linecount(fname) == lines
        finally:
            if os.path.isfile(fname):
                os.remove(fname)
Example #15
0
def download_from_adls(account_name,
                       source_path,
                       destination_path,
                       chunk_size,
                       buffer_size,
                       block_size,
                       thread_count=None,
                       overwrite=False):
    client = cf_dls_filesystem(account_name)
    ADLDownloader(
        client,
        source_path,
        destination_path,
        thread_count,
        chunksize=chunk_size,
        buffersize=buffer_size,
        blocksize=block_size,
        overwrite=overwrite)
 def do_list_downloads(self, line):
     print(ADLDownloader.load())
def test_download_root_folder(azure, tempdir):
    with setup_tree(azure):
        rpath = AzureDLPath('/'/test_dir / 'data/single/single'/ 'single.txt')
        ADLDownloader(azure, rpath=rpath, lpath=tempdir)
        assert os.path.isfile(os.path.join(tempdir, 'single.txt'))
        
 def do_resume_download(self, line):
     try:
         up = ADLDownloader.load()[line]
         up.run()
     except KeyError:
         print("No such download")
def test_download_overwrite(tempdir, azure):
    with setup_tree(azure):
        with pytest.raises(OSError) as e:
            ADLDownloader(azure, test_dir, tempdir, 1, 2**24, run=False)
        assert tempdir in str(e)
 def do_clear_downloads(self, line):
     ADLDownloader.clear_saved()
Example #21
0
# coding=<utf8>
from azure.datalake.store import core, lib
from azure.datalake.store.multithread import ADLDownloader
import logging

if __name__ == '__main__':
    # Please fill the values under quotes till ################################### section
    TENANT_ID = ""
    CLIENT_ID = ""
    CLIENT_SECRET = ""
    ACCOUNT_NAME = "targetadlssandbox"
    PATH_TO_DOWNLOAD = "/data_files/TGT_NSN_201904241016.zip"
    LOG_FILE_NAME = "adls.log"
    #####################################################################################

    #Authenticate and initialize adls filesystem object
    token = lib.auth(tenant_id=TENANT_ID, client_id=CLIENT_ID, client_secret=CLIENT_SECRET)
    adlfs = core.AzureDLFileSystem(token, store_name=ACCOUNT_NAME) # Add name of account
    # Set logging to debug
    adls_log_handler = logging.FileHandler(filename=LOG_FILE_NAME)
    adls_logger = logging.getLogger('azure.datalake.store')
    adls_logger.setLevel(logging.DEBUG)
    adls_logger.addHandler(adls_log_handler)
    # Print file info to check file size etc.
    print(adlfs.info(path=PATH_TO_DOWNLOAD))
    # Download the file to current directory
    ADLDownloader(adlfs, PATH_TO_DOWNLOAD, "./")