def test_iter_archive_file(tar_nested_jsonl_path): dl_manager = StreamingDownloadManager() for num_tar, (path, file) in enumerate(dl_manager.iter_archive( str(tar_nested_jsonl_path)), start=1): for num_jsonl, (subpath, subfile) in enumerate(dl_manager.iter_archive(file), start=1): _test_jsonl(subpath, subfile) assert num_tar == 1 assert num_jsonl == 2
def test_iter_archive_path(tar_jsonl_path): dl_manager = StreamingDownloadManager() for num_jsonl, (path, file) in enumerate(dl_manager.iter_archive( str(tar_jsonl_path)), start=1): _test_jsonl(path, file) assert num_jsonl == 2
def test_iter_archive_file(tar_nested_jsonl_path): dl_manager = StreamingDownloadManager() files_iterable = dl_manager.iter_archive(str(tar_nested_jsonl_path)) num_tar, num_jsonl = 0, 0 for num_tar, (path, file) in enumerate(files_iterable, start=1): for num_jsonl, (subpath, subfile) in enumerate(dl_manager.iter_archive(file), start=1): _test_jsonl(subpath, subfile) assert num_tar == 1 assert num_jsonl == 2 # do it twice to make sure it's reset correctly num_tar, num_jsonl = 0, 0 for num_tar, (path, file) in enumerate(files_iterable, start=1): for num_jsonl, (subpath, subfile) in enumerate(dl_manager.iter_archive(file), start=1): _test_jsonl(subpath, subfile) assert num_tar == 1 assert num_jsonl == 2
def test_iter_archive_path(tar_jsonl_path): dl_manager = StreamingDownloadManager() archive_iterable = dl_manager.iter_archive(str(tar_jsonl_path)) num_jsonl = 0 for num_jsonl, (path, file) in enumerate(archive_iterable, start=1): _test_jsonl(path, file) assert num_jsonl == 2 # do it twice to make sure it's reset correctly num_jsonl = 0 for num_jsonl, (path, file) in enumerate(archive_iterable, start=1): _test_jsonl(path, file) assert num_jsonl == 2