Esempio n. 1
0
def test_DataFilesList_from_local_or_remote(complex_data_dir, pattern_results, pattern):
    try:
        data_files_list = DataFilesList.from_local_or_remote([pattern], complex_data_dir)
        assert sorted(str(f) for f in data_files_list) == pattern_results[pattern]
        assert all(isinstance(path, Path) for path in data_files_list)
        assert len(data_files_list.origin_metadata) > 0
    except FileNotFoundError:
        assert len(pattern_results[pattern]) == 0
Esempio n. 2
0
def test_DataFilesList_from_hf_repo(hub_dataset_info, hub_dataset_info_patterns_results, pattern):
    try:
        data_files_list = DataFilesList.from_hf_repo([pattern], hub_dataset_info)
        assert sorted(str(f) for f in data_files_list) == hub_dataset_info_patterns_results[pattern]
        assert all(isinstance(url, Url) for url in data_files_list)
        assert len(data_files_list.origin_metadata) > 0
    except FileNotFoundError:
        assert len(hub_dataset_info_patterns_results[pattern]) == 0
def test_DataFilesList_from_local_or_remote_with_extra_files(
        complex_data_dir, text_file):
    data_files_list = DataFilesList.from_local_or_remote(
        [_TEST_URL, str(text_file)], complex_data_dir)
    assert list(data_files_list) == [Url(_TEST_URL), Path(text_file)]
    assert len(data_files_list.origin_metadata) == 2