def test_files_integrity(self, dataset): TIME_LIMIT = 300 completed = False with timeout(TIME_LIMIT): try: # Currently some dataset have DataLad metadata, however, those are # out-of-date. Since the datasets are still functional but this leads can # lead to test failure, the DataLad metadata are ignore when running fsck. # # In the future, those metadata are likely to be removed. When this occurs, # this the `exclude=".datalad/metadata/**"` argument should be removed. fsck_output = git.Repo(dataset).git.annex( "fsck", fast=True, quiet=True, exclude=".datalad/metadata/**", ) if fsck_output: pytest.fail(fsck_output, pytrace=False) except Exception as e: pytest.fail(str(e), pytrace=False) completed = True if not completed: pytest.fail( f"The dataset timed out after {TIME_LIMIT} seconds before retrieving a file." + "\nCannot determine if the test is valid.")
def test_download(self, dataset): eval_config(dataset) authenticate(dataset) filenames = get_filenames(dataset) if len(filenames) == 0: return True k_smallest = get_approx_ksmallests(dataset, filenames) # Restricted Zenodo datasets require to download the whole archive before # downloading individual files. project = project_name2env(dataset.split("/")[-1]) if os.getenv(project + "_ZENODO_TOKEN", None): with timeout(300): api.get(path=dataset, on_failure="ignore") download_files(dataset, k_smallest)
def test_download(self, dataset): eval_config(dataset) authenticate(dataset) k_smallest = get_approx_ksmallests(dataset, get_filenames(dataset)) # Restricted Zenodo datasets require to download the whole archive before # downloading individual files. project = project_name2env(dataset.split("/")[-1]) if os.getenv(project + "_ZENODO_TOKEN", None): with timeout(300): api.get(path=dataset, on_failure="ignore") download_files(dataset, k_smallest) # Test the download of proper submodules. for submodule in get_proper_submodules(dataset): k_smallest = get_approx_ksmallests(submodule, get_filenames(submodule)) download_files(submodule, k_smallest)