def test_resolve_patterns_in_dataset_repository_with_base_path(hub_dataset_info, pattern, size, base_path): if size > 0: resolved_data_files = resolve_patterns_in_dataset_repository(hub_dataset_info, [pattern], base_path=base_path) assert len(resolved_data_files) == size else: with pytest.raises(FileNotFoundError): resolved_data_files = resolve_patterns_in_dataset_repository( hub_dataset_info, [pattern], base_path=base_path )
def test_resolve_patterns_in_dataset_repository_with_extensions( hub_dataset_info, pattern, size, extensions): if size > 0: resolved_data_files = resolve_patterns_in_dataset_repository( hub_dataset_info, [pattern], allowed_extensions=extensions) assert len(resolved_data_files) == size else: with pytest.raises(FileNotFoundError): resolved_data_files = resolve_patterns_in_dataset_repository( hub_dataset_info, [pattern], allowed_extensions=extensions)
def test_resolve_patterns_in_dataset_repository_sorted_files(): unsorted_names = ["0.txt", "2.txt", "3.txt"] siblings = [{"rfilename": name} for name in unsorted_names] datasets_infos = DatasetInfo(id="test_unsorted_files", siblings=siblings, sha="foobar") resolved_data_files = resolve_patterns_in_dataset_repository(datasets_infos, ["*"]) resolved_names = [os.path.basename(data_file) for data_file in resolved_data_files] assert resolved_names == sorted(unsorted_names)
def test_resolve_patterns_in_dataset_repository(hub_dataset_info, pattern, hub_dataset_info_patterns_results): try: resolved_data_files = resolve_patterns_in_dataset_repository(hub_dataset_info, [pattern]) assert sorted(str(f) for f in resolved_data_files) == hub_dataset_info_patterns_results[pattern] assert all(isinstance(url, Url) for url in resolved_data_files) except FileNotFoundError: assert len(hub_dataset_info_patterns_results[pattern]) == 0
def test_fail_resolve_patterns_in_dataset_repository(hub_dataset_info): with pytest.raises(FileNotFoundError): resolve_patterns_in_dataset_repository(hub_dataset_info, "blablabla")
def test_resolve_patterns_in_dataset_repository(hub_dataset_info, pattern, hub_dataset_info_patterns_results): resolved_data_files = resolve_patterns_in_dataset_repository(hub_dataset_info, [pattern]) assert sorted(str(f) for f in resolved_data_files) == hub_dataset_info_patterns_results[pattern] assert all(isinstance(url, Url) for url in resolved_data_files)