def test_DataFilesList_from_local_or_remote(complex_data_dir, pattern_results, pattern): try: data_files_list = DataFilesList.from_local_or_remote([pattern], complex_data_dir) assert sorted(str(f) for f in data_files_list) == pattern_results[pattern] assert all(isinstance(path, Path) for path in data_files_list) assert len(data_files_list.origin_metadata) > 0 except FileNotFoundError: assert len(pattern_results[pattern]) == 0
def test_DataFilesList_from_hf_repo(hub_dataset_info, hub_dataset_info_patterns_results, pattern): try: data_files_list = DataFilesList.from_hf_repo([pattern], hub_dataset_info) assert sorted(str(f) for f in data_files_list) == hub_dataset_info_patterns_results[pattern] assert all(isinstance(url, Url) for url in data_files_list) assert len(data_files_list.origin_metadata) > 0 except FileNotFoundError: assert len(hub_dataset_info_patterns_results[pattern]) == 0
def test_DataFilesList_from_local_or_remote_with_extra_files( complex_data_dir, text_file): data_files_list = DataFilesList.from_local_or_remote( [_TEST_URL, str(text_file)], complex_data_dir) assert list(data_files_list) == [Url(_TEST_URL), Path(text_file)] assert len(data_files_list.origin_metadata) == 2