def download_raw_dataset(self): """Download the raw dataset files and store in the cache location.""" with upload_output_directory(self.raw_dataset_path) as (tmpdir, _): for url in self.download_url: filename = url.split("/")[-1] fs, _ = get_fs_and_path(url) fs.get(url, os.path.join(tmpdir, filename), recursive=True)
def download_raw_dataset(self): """Download the raw dataset and store that in the cache location.""" with upload_output_directory(self.raw_dataset_path) as (tmpdir, _): for file_download_url in self.download_urls: filename = file_download_url.split("/")[-1] with TqdmUpTo(unit="B", unit_scale=True, unit_divisor=1024, miniters=1, desc=filename) as t: urllib.request.urlretrieve(file_download_url, os.path.join(tmpdir, filename), t.update_to)
def download_raw_dataset(self): """ Download the raw dataset and extract the contents of the tar file and store that in the cache location. """ with upload_output_directory(self.raw_dataset_path) as (tmpdir, _): for url in self.download_urls: filename = url.split('/')[-1] with TqdmUpTo(unit='B', unit_scale=True, unit_divisor=1024, miniters=1, desc=filename) as t: urllib.request.urlretrieve(url, os.path.join(tmpdir, filename), t.update_to) download_folder_name = url.split('/')[-1].split('.')[0] file_path = os.path.join(tmpdir, filename) with tarfile.open(file_path) as tar_file: tar_file.extractall(path=tmpdir) for f in os.scandir(os.path.join(tmpdir, download_folder_name)): shutil.copyfile(f, os.path.join(tmpdir, f.name))
def download_raw_dataset(self): """ Download the raw dataset files and store in the cache location. """ with upload_output_directory(self.raw_dataset_path) as (tmpdir, _): for url in self.download_url: filename = url.split('/')[-1] urllib.request.urlretrieve(url, os.path.join(tmpdir, filename))
def download_raw_dataset(self): """Download the raw dataset and extract the contents of the zip file and store that in the cache location.""" with upload_output_directory(self.raw_dataset_path) as (tmpdir, _): for url in self.download_urls: with urlopen(url) as zipresp: with ZipFile(BytesIO(zipresp.read())) as zfile: zfile.extractall(tmpdir)
def download_raw_dataset(self): """Download the raw dataset and extract the contents of the zip file and store that in the cache location.""" with upload_output_directory(self.raw_dataset_path) as (tmpdir, _): for file_download_url in self.download_urls: filename = file_download_url.split("/")[-1] with TqdmUpTo(unit="B", unit_scale=True, unit_divisor=1024, miniters=1, desc=filename) as t: urllib.request.urlretrieve(file_download_url, os.path.join(tmpdir, filename), t.update_to) gzip_content_file = ".".join(filename.split(".")[:-1]) with gzip.open(os.path.join(tmpdir, filename)) as gzfile: with open(os.path.join(tmpdir, gzip_content_file), "wb") as output: shutil.copyfileobj(gzfile, output)
def download_raw_dataset(self): """Download the raw dataset and extract the contents of the zip file and store that in the cache location. If the user has not specified creds in the kaggle.json file we lookup the passed in username and the api key and perform authentication. """ with self.update_env(KAGGLE_USERNAME=self.kaggle_username, KAGGLE_KEY=self.kaggle_key): # Call authenticate explicitly to pick up new credentials if necessary api = create_kaggle_client() api.authenticate() with upload_output_directory(self.raw_dataset_path) as (tmpdir, _): if self.is_kaggle_competition: download_func = api.competition_download_files else: download_func = api.dataset_download_files # Download all files for a competition/dataset download_func(self.competition_name, path=tmpdir) archive_zip = os.path.join(tmpdir, self.archive_filename) with ZipFile(archive_zip, "r") as z: z.extractall(tmpdir)