def download_kaggle_dataset(dataset_url, data_dir, force=False, dry_run=False): print("Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds") os.environ['KAGGLE_USERNAME'] = click.prompt("Your Kaggle username") os.environ['KAGGLE_KEY'] = _get_kaggle_key() dataset_id = get_kaggle_dataset_id(dataset_url) if not dry_run: from kaggle import api api.authenticate() api.dataset_download_files( dataset_id, os.path.join(data_dir, dataset_id.split('/')[1]), force=force, quiet=False, unzip=True) else: print("This is a dry run, skipping..")
def download_coronavirus_data(path='New\ Data/', verbose=False): """Installs the Kaggle Command Line Interface to clone dataset. Then extracts dataset to specified path and displays name of main file. Args: path(str): Folder to extract dataset into (must end with a '/') Returns: file_list(list): List of full filepaths to downloaded csv files. """ ## Determine if dataset is downloaded via Kaggle CL import os, glob from zipfile import ZipFile from IPython.display import clear_output os.makedirs(path, exist_ok=True) ## Install Kaggle try: import kaggle.api as kaggle except: ## Install Kaggle os.system("pip install kaggle --upgrade") # clear_output() if verbose: print('\t- Installed kaggle command line tool.') ## Using the kaggle.api import kaggle.api as kaggle kaggle.authenticate() kaggle.dataset_download_files( 'sudalairajkumar/novel-corona-virus-2019-dataset', path=path, force=True, unzip=True) # ## Delete Zip File # zipfile = path+"novel-corona-virus-2019-dataset.zip" # try: # os.system(f"rm {zipfile}" ) # except: # print("ERROR DELETING ZIP FILE") ## Get list of all csvs print('[i] Extraction Complete.') file_list = glob.glob(path + "*.csv") ## Find main df main_file = [file for file in file_list if 'covid_19_data.csv' in file] if verbose: print(f"[i] The main file name is {main_file}") return main_file[0] #file_list[index]
def download_kaggle_dataset(dataset_url, data_dir, force=False, dry_run=False): dataset_id = get_kaggle_dataset_id(dataset_url) id = dataset_id.split('/')[1] target_dir = os.path.join(data_dir, id) if not force and os.path.exists(target_dir) and len( os.listdir(target_dir)) > 0: print( 'Skipping, found downloaded files in "{}" (use force=True to force download)' .format(target_dir)) return if not read_kaggle_creds(): print( "Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds" ) os.environ['KAGGLE_USERNAME'] = click.prompt("Your Kaggle username") os.environ['KAGGLE_KEY'] = _get_kaggle_key() if not dry_run: from kaggle import api api.authenticate() if dataset_id.split('/')[0] == 'competitions' or dataset_id.split( '/')[0] == 'c': api.competition_download_files(id, target_dir, force=force, quiet=False) zip_fname = target_dir + '/' + id + '.zip' extract_archive(zip_fname, target_dir) try: os.remove(zip_fname) except OSError as e: print('Could not delete zip file, got' + str(e)) else: api.dataset_download_files(dataset_id, target_dir, force=force, quiet=False, unzip=True) else: print("This is a dry run, skipping..")
import kaggle.api from kaggle.api.kaggle_api_extended import KaggleApi api = KaggleApi() api.authenticate() # Download all files of a dataset # Signature: dataset_download_files(dataset, path=None, force=False, quiet=True, unzip=False) # api.dataset_download_files('avenn98/world-of-warcraft-demographics') # downoad single file # Signature: dataset_download_file(dataset, file_name, path=None, force=False, quiet=True) api.dataset_download_files( '/shashikant9198/nlp-and-glove-word-embeddings-sentimental-analysis', path='/Users/fred/OneDrive - Adobe/Data/NLP_sentiment/Kaggle_Files')
def get_kaggle(dsname, fpath): import kaggle.api as k k.authenticate() k.dataset_download_files(dsname, path='temp/', unzip=True)
def download_coronavirus_data(self, path=None, verbose=None): """Installs the Kaggle Command Line Interface to clone dataset. Then extracts dataset to specified path and displays name of main file. Args: path(str): Folder to extract dataset into (must end with a '/') Returns: file_list(list): List of full filepaths to downloaded csv files. """ if verbose == None: verbose = self.__verbose if verbose: print('[i] DOWNLOADING DATA USING KAGGLE API') print( "\thttps://www.kaggle.com/sudalairajkumar/novel-corona-virus-2019-dataset" ) if path is None: path = self._data_folder ## Determine if dataset is downloaded via Kaggle CL import os, glob from zipfile import ZipFile from IPython.display import clear_output os.makedirs(path, exist_ok=True) try: import kaggle.api as kaggle except: ## Install Kaggle os.system("pip install kaggle --upgrade") # clear_output() if verbose: print('\t- Installed kaggle command line tool.') ## Using the kaggle.api import kaggle.api as kaggle kaggle.authenticate() kaggle.dataset_download_files( 'sudalairajkumar/novel-corona-virus-2019-dataset', path=path, force=True, unzip=True) ## Run Kaggle Command # cmd = 'kaggle datasets download -d sudalairajkumar/novel-corona-virus-2019-dataset' # os.system(cmd) # ## Extract ZipFile # zip_filepath = 'novel-corona-virus-2019-dataset.zip' # with ZipFile(zip_filepath) as file: # file.extractall(path) if self.__verbose: print(f'\t- Downloaded dataset .zip and extracted to:"{path}"') # ## Delete Zip File # try: # os.system(f"rm {path}novel-corona-virus-2019-dataset.zip" ) # except: # print("ERROR DELETING ZIP FILE") self.get_data_fpath(path)
def main(dataset_dir, force=False): if not os.path.exists(dataset_dir): os.mkdir(dataset_dir) if not os.listdir(dataset_dir) or force: api.dataset_download_files("datasnaek/youtube-new", path=dataset_dir, force=True, quiet=False, unzip=True)