コード例 #1
0
 def download_raw_dataset(self):
     """Download the raw dataset files and store in the cache location."""
     with upload_output_directory(self.raw_dataset_path) as (tmpdir, _):
         for url in self.download_url:
             filename = url.split("/")[-1]
             fs, _ = get_fs_and_path(url)
             fs.get(url, os.path.join(tmpdir, filename), recursive=True)
コード例 #2
0
 def download_raw_dataset(self):
     """Download the raw dataset and store that in the cache location."""
     with upload_output_directory(self.raw_dataset_path) as (tmpdir, _):
         for file_download_url in self.download_urls:
             filename = file_download_url.split("/")[-1]
             with TqdmUpTo(unit="B", unit_scale=True, unit_divisor=1024, miniters=1, desc=filename) as t:
                 urllib.request.urlretrieve(file_download_url, os.path.join(tmpdir, filename), t.update_to)
コード例 #3
0
ファイル: download.py プロジェクト: kanishk16/ludwig
    def download_raw_dataset(self):
        """
        Download the raw dataset and extract the contents of the tar file and
        store that in the cache location.
        """

        with upload_output_directory(self.raw_dataset_path) as (tmpdir, _):
            for url in self.download_urls:
                filename = url.split('/')[-1]
                with TqdmUpTo(unit='B',
                              unit_scale=True,
                              unit_divisor=1024,
                              miniters=1,
                              desc=filename) as t:
                    urllib.request.urlretrieve(url,
                                               os.path.join(tmpdir, filename),
                                               t.update_to)

                download_folder_name = url.split('/')[-1].split('.')[0]
                file_path = os.path.join(tmpdir, filename)
                with tarfile.open(file_path) as tar_file:
                    tar_file.extractall(path=tmpdir)

                for f in os.scandir(os.path.join(tmpdir,
                                                 download_folder_name)):
                    shutil.copyfile(f, os.path.join(tmpdir, f.name))
コード例 #4
0
ファイル: download.py プロジェクト: kanishk16/ludwig
 def download_raw_dataset(self):
     """
     Download the raw dataset files and store in the cache location.
     """
     with upload_output_directory(self.raw_dataset_path) as (tmpdir, _):
         for url in self.download_url:
             filename = url.split('/')[-1]
             urllib.request.urlretrieve(url, os.path.join(tmpdir, filename))
コード例 #5
0
    def download_raw_dataset(self):
        """Download the raw dataset and extract the contents of the zip file and store that in the cache
        location."""

        with upload_output_directory(self.raw_dataset_path) as (tmpdir, _):
            for url in self.download_urls:
                with urlopen(url) as zipresp:
                    with ZipFile(BytesIO(zipresp.read())) as zfile:
                        zfile.extractall(tmpdir)
コード例 #6
0
 def download_raw_dataset(self):
     """Download the raw dataset and extract the contents of the zip file and store that in the cache
     location."""
     with upload_output_directory(self.raw_dataset_path) as (tmpdir, _):
         for file_download_url in self.download_urls:
             filename = file_download_url.split("/")[-1]
             with TqdmUpTo(unit="B", unit_scale=True, unit_divisor=1024, miniters=1, desc=filename) as t:
                 urllib.request.urlretrieve(file_download_url, os.path.join(tmpdir, filename), t.update_to)
             gzip_content_file = ".".join(filename.split(".")[:-1])
             with gzip.open(os.path.join(tmpdir, filename)) as gzfile:
                 with open(os.path.join(tmpdir, gzip_content_file), "wb") as output:
                     shutil.copyfileobj(gzfile, output)
コード例 #7
0
    def download_raw_dataset(self):
        """Download the raw dataset and extract the contents of the zip file and store that in the cache location.

        If the user has not specified creds in the kaggle.json file we lookup the passed in username and the api key and
        perform authentication.
        """
        with self.update_env(KAGGLE_USERNAME=self.kaggle_username, KAGGLE_KEY=self.kaggle_key):
            # Call authenticate explicitly to pick up new credentials if necessary
            api = create_kaggle_client()
            api.authenticate()

        with upload_output_directory(self.raw_dataset_path) as (tmpdir, _):
            if self.is_kaggle_competition:
                download_func = api.competition_download_files
            else:
                download_func = api.dataset_download_files
            # Download all files for a competition/dataset
            download_func(self.competition_name, path=tmpdir)

            archive_zip = os.path.join(tmpdir, self.archive_filename)
            with ZipFile(archive_zip, "r") as z:
                z.extractall(tmpdir)