def test_dataset_has_valid_etag(self, dataset_name): py_script_path = list(filter(lambda x: x, dataset_name.split("/")))[-1] + ".py" dataset_url = hf_bucket_url(dataset_name, filename=py_script_path, dataset=True) etag = None try: response = requests.head(dataset_url, allow_redirects=True, proxies=None, timeout=10) if response.status_code == 200: etag = response.headers.get("Etag") except (EnvironmentError, requests.exceptions.Timeout): pass self.assertIsNotNone(etag)
def download_dummy_data(self): if self.is_local is True: # extract local data path_to_dummy_data_dir = os.path.join("datasets", self.dataset_name, self.path_to_dummy_file) else: # get url to dummy data on AWS S3 bucket path_to_dummy_data_dir = hf_bucket_url( self.dataset_name, filename=self.path_to_dummy_file) # this function will download the dummy data and return the path local_path = cached_path(path_to_dummy_data_dir, cache_dir=self.cache_dir, extract_compressed_file=True, force_extract=True) self.complete_path_to_dummy_file = os.path.join( local_path, self.dummy_data_extracted_folder_name)