Beispiel #1
0
def test_resolve_patterns_locally_or_by_urls_with_extensions(complex_data_dir, pattern, size, extensions):
    if size > 0:
        resolved_data_files = resolve_patterns_locally_or_by_urls(
            complex_data_dir, [pattern], allowed_extensions=extensions
        )
        assert len(resolved_data_files) == size
    else:
        with pytest.raises(FileNotFoundError):
            resolve_patterns_locally_or_by_urls(complex_data_dir, [pattern], allowed_extensions=extensions)
Beispiel #2
0
def test_resolve_patterns_locally_or_by_urls(complex_data_dir, pattern, pattern_results):
    try:
        resolved_data_files = resolve_patterns_locally_or_by_urls(complex_data_dir, [pattern])
        assert sorted(str(f) for f in resolved_data_files) == pattern_results[pattern]
        assert all(isinstance(path, Path) for path in resolved_data_files)
    except FileNotFoundError:
        assert len(pattern_results[pattern]) == 0
Beispiel #3
0
def test_resolve_patterns_locally_or_by_urls_sorted_files(tmp_path_factory):
    path = str(tmp_path_factory.mktemp("unsorted_text_files"))
    unsorted_names = ["0.txt", "2.txt", "3.txt"]
    for name in unsorted_names:
        with open(os.path.join(path, name), "w"):
            pass
    resolved_data_files = resolve_patterns_locally_or_by_urls(path, ["*"])
    resolved_names = [os.path.basename(data_file) for data_file in resolved_data_files]
    assert resolved_names == sorted(unsorted_names)
def test_fail_resolve_patterns_locally_or_by_urls(complex_data_dir):
    with pytest.raises(FileNotFoundError):
        resolve_patterns_locally_or_by_urls(complex_data_dir, ["blablabla"])
def test_resolve_patterns_locally_or_by_urls_with_absolute_path(
        tmp_path, complex_data_dir):
    abs_path = os.path.join(complex_data_dir, "data", "train.txt")
    resolved_data_files = resolve_patterns_locally_or_by_urls(
        str(tmp_path / "blabla"), [abs_path])
    assert len(resolved_data_files) == 1
Beispiel #6
0
def test_resolve_patterns_locally_or_by_urls(complex_data_dir, pattern, pattern_results):
    resolved_data_files = resolve_patterns_locally_or_by_urls(complex_data_dir, [pattern])
    assert sorted(str(f) for f in resolved_data_files) == pattern_results[pattern]
    assert all(isinstance(path, Path) for path in resolved_data_files)