def test_download_single_file(tempdir, azure): with azure_teardown(azure): name = posix(test_dir, 'remote.csv') lines = 100 fname = os.path.join(tempdir, 'local.csv') size, checksum = create_remote_csv(azure, name, 10, 5, lines) try: # single chunk down = ADLDownloader(azure, name, fname, 1, size + 10, overwrite=True) assert md5sum(fname) == checksum assert os.stat(fname).st_size == size assert linecount(fname) == lines finally: if os.path.isfile(fname): os.remove(fname) try: # multiple chunks, one thread down = ADLDownloader(azure, name, fname, 1, size // 5, overwrite=True) assert md5sum(fname) == checksum assert os.stat(fname).st_size == size assert linecount(fname) == lines finally: if os.path.isfile(fname): os.remove(fname)
def checksum(path): """ Generate checksum for file/directory content """ if not os.path.exists(path): return None if os.path.isfile(path): return md5sum(path) partial_sums = [] for root, dirs, files in os.walk(path): for f in files: filename = os.path.join(root, f) if os.path.exists(filename): partial_sums.append(str.encode(md5sum(filename))) return hashlib.md5(b''.join(sorted(partial_sums))).hexdigest()
def test_download_single_to_dir(tempdir, azure): with azure_teardown(azure): name = posix(test_dir, 'remote.csv') lines = 100 size, checksum = create_remote_csv(azure, name, 10, 5, lines) fname = os.path.join(tempdir, 'remote.csv') try: down = ADLDownloader(azure, name, tempdir, 1, 2**24, overwrite=True) assert md5sum(fname) == checksum assert os.stat(fname).st_size == size assert linecount(fname) == lines finally: if os.path.isfile(fname): os.remove(fname)
def test_download_single_empty_file(tempdir, azure): with azure_teardown(azure): name = posix(test_dir, 'remote.csv') lines = 0 # the file should have no bytes in it size, checksum = create_remote_csv(azure, name, 10, 5, lines) fname = os.path.join(tempdir, 'local.csv') # single chunk try: down = ADLDownloader(azure, name, fname, 1, size + 10, overwrite=True) assert md5sum(fname) == checksum assert os.stat(fname).st_size == size assert linecount(fname) == lines finally: if os.path.isfile(fname): os.remove(fname)