Exemple #1
0
def test_iter_archive_file(tar_nested_jsonl_path):
    dl_manager = StreamingDownloadManager()
    for num_tar, (path, file) in enumerate(dl_manager.iter_archive(
            str(tar_nested_jsonl_path)),
                                           start=1):
        for num_jsonl, (subpath,
                        subfile) in enumerate(dl_manager.iter_archive(file),
                                              start=1):
            _test_jsonl(subpath, subfile)
    assert num_tar == 1
    assert num_jsonl == 2
Exemple #2
0
def test_iter_archive_path(tar_jsonl_path):
    dl_manager = StreamingDownloadManager()
    for num_jsonl, (path, file) in enumerate(dl_manager.iter_archive(
            str(tar_jsonl_path)),
                                             start=1):
        _test_jsonl(path, file)
    assert num_jsonl == 2
def test_iter_archive_file(tar_nested_jsonl_path):
    dl_manager = StreamingDownloadManager()
    files_iterable = dl_manager.iter_archive(str(tar_nested_jsonl_path))
    num_tar, num_jsonl = 0, 0
    for num_tar, (path, file) in enumerate(files_iterable, start=1):
        for num_jsonl, (subpath,
                        subfile) in enumerate(dl_manager.iter_archive(file),
                                              start=1):
            _test_jsonl(subpath, subfile)
    assert num_tar == 1
    assert num_jsonl == 2
    # do it twice to make sure it's reset correctly
    num_tar, num_jsonl = 0, 0
    for num_tar, (path, file) in enumerate(files_iterable, start=1):
        for num_jsonl, (subpath,
                        subfile) in enumerate(dl_manager.iter_archive(file),
                                              start=1):
            _test_jsonl(subpath, subfile)
    assert num_tar == 1
    assert num_jsonl == 2
def test_iter_archive_path(tar_jsonl_path):
    dl_manager = StreamingDownloadManager()
    archive_iterable = dl_manager.iter_archive(str(tar_jsonl_path))
    num_jsonl = 0
    for num_jsonl, (path, file) in enumerate(archive_iterable, start=1):
        _test_jsonl(path, file)
    assert num_jsonl == 2
    # do it twice to make sure it's reset correctly
    num_jsonl = 0
    for num_jsonl, (path, file) in enumerate(archive_iterable, start=1):
        _test_jsonl(path, file)
    assert num_jsonl == 2