def _test_checksums(self): # If no call to `dl_manager.download`, then no need to check url presence. if not self._download_urls: return err_msg = ( "Did you forget to record checksums with `--register_checksums` ? See " "instructions at: " "https://www.tensorflow.org/datasets/add_dataset#run_the_generation_codeIf" " want to opt-out of checksums validation, please add `SKIP_CHECKSUMS " "= True` to the `DatasetBuilderTestCase`.\n") url_infos = self.DATASET_CLASS.url_infos filepath = self.DATASET_CLASS._checksums_path # pylint: disable=protected-access # Legacy checksums: Search in `checksums/` dir if url_infos is None: legacy_filepath = checksums._checksum_paths().get( self.builder.name) # pylint: disable=protected-access if legacy_filepath and legacy_filepath.exists(): filepath = legacy_filepath url_infos = checksums.load_url_infos(filepath) # Checksums not present neither in legacy nor package if url_infos is None: raise FileNotFoundError( f"Checksums file not found at: {filepath}\n" f"{err_msg}\n") missing_urls = self._download_urls - set(url_infos.keys()) self.assertEmpty( missing_urls, f"Some urls checksums are missing at: {filepath}\n{err_msg}")
def _collect_path_to_url_infos( ) -> Dict[tfds.core.ReadWritePath, Dict[Url, checksums.UrlInfo]]: """Collect checksums paths to url_infos.""" # Collect legacy checksums paths url_info_paths = list(checksums._checksum_paths().values()) # pylint: disable=protected-access # Collect dataset-as-folder checksums path for name in tfds.list_builders(): url_info_path = tfds.builder_cls(name)._checksums_path # pylint: disable=protected-access if url_info_path.exists(): url_info_paths.append(url_info_path) url_info_paths = [tfds.core.utils.to_write_path(p) for p in url_info_paths] return { path: typing.cast(Dict[Url, checksums.UrlInfo], checksums.load_url_infos(path)) for path in url_info_paths }