Beispiel #1
0
    def test_dataset_has_valid_etag(self, dataset_name):
        py_script_path = list(filter(lambda x: x, dataset_name.split("/")))[-1] + ".py"
        dataset_url = hf_bucket_url(dataset_name, filename=py_script_path, dataset=True)
        etag = None
        try:
            response = requests.head(dataset_url, allow_redirects=True, proxies=None, timeout=10)

            if response.status_code == 200:
                etag = response.headers.get("Etag")
        except (EnvironmentError, requests.exceptions.Timeout):
            pass

        self.assertIsNotNone(etag)
Beispiel #2
0
    def download_dummy_data(self):
        if self.is_local is True:
            # extract local data
            path_to_dummy_data_dir = os.path.join("datasets",
                                                  self.dataset_name,
                                                  self.path_to_dummy_file)
        else:
            # get url to dummy data on AWS S3 bucket
            path_to_dummy_data_dir = hf_bucket_url(
                self.dataset_name, filename=self.path_to_dummy_file)

            # this function will download the dummy data and return the path
        local_path = cached_path(path_to_dummy_data_dir,
                                 cache_dir=self.cache_dir,
                                 extract_compressed_file=True,
                                 force_extract=True)
        self.complete_path_to_dummy_file = os.path.join(
            local_path, self.dummy_data_extracted_folder_name)