def execute(self, context): # authentication kaggle_api = kaggle.KaggleApi() kaggle_api.authenticate() self.log.info('kaggle authentication successful.') self.log.info('Importing data from kaggle...') # downloads the entire dataset if self.dataset_or_file == "dataset": for data in self.dataset: dataset = data.get('dataset') kaggle_api.dataset_download_files(dataset, path=self.path, force=True, quiet=True, unzip=True) # downloads a specific file in the dataset elif self.dataset_or_file == "file": kaggle_api.dataset_download_file(self.dataset, self.file_name, path=self.path, force=True, quiet=True) self.log.info('Data imported to local.')
def submit_prediction(run_name, model_name): # Check for file cur_dir = os.getcwd() prediction_file = '%s\\Runs\\%s\\prediction.csv' % (cur_dir, run_name) assert os.path.exists(prediction_file), 'No submission file found.' kaggle_config = '%s\\kaggle.json' % cur_dir assert os.path.exists( kaggle_config ), 'No kaggle API config found, create a API key in My Account.' # Check to make sure kaggle.json is in kaggle folder user = os.getlogin() kaggle_path = 'C:\\Users\\%s\\.kaggle\\' % user if not os.path.exists(kaggle_path): os.mkdir(kaggle_path) dst = '%s\\kaggle.json' % kaggle_path if not os.path.exists(dst): copyfile(kaggle_config, dst) print("Submitting prediction...") import kaggle client = kaggle.KaggleApi() client.authenticate() client.competition_submit(prediction_file, 'Run = %s, Model = %s' % (run_name, model_name), 'uwb-css-485-fall-2018')
def update(self): revision_dir = self.get_today_revision_dir(include_subid=False) if exists(revision_dir) and listdir(revision_dir): return self.client = kaggle.KaggleApi() self.client.authenticate() self.client.dataset_download_files(self.dataset, revision_dir) dataset_name = self.dataset.split('/')[-1] with zipfile.ZipFile(f'{revision_dir}/{dataset_name}.zip', 'r') as zip_ref: zip_ref.extractall(revision_dir)
# # NeuroByte Tech is the Developer Company of Rohan Mathew. # # Project: kDigitRecognizer # File Name: main.py # Last Modified: 22/09/2020, 19:54 import torch as T import torch.nn as nn import torch.nn.functional as F import numpy as np import pandas as pd import kaggle as k device = T.device("cuda" if T.cuda.is_available() else "cpu") cli = k.KaggleApi() cli.authenticate() cli.competition_submissions("digit-recognizer") # Load model class ConvNet(nn.Module): def __init__(self, dropout=0.2): super().__init__() # Input 28 * 28 images self.conv1 = nn.Conv2d(1, 16, 3, padding=1) self.conv2 = nn.Conv2d(16, 32, 3, padding=1) # Input into fc 32 * 7 * 7 self.fc1 = nn.Linear(32 * 7 * 7, 128)
# %% from pyprojroot import here import kaggle import zipfile # %% root_p = here(".", [".here"]) data_p = root_p / "data/original" # create data dir if not exists if not data_p.is_dir(): data_p.mkdir() # %% # load Kaggle api and authenticate api = kaggle.KaggleApi() api.authenticate() print("Download data from kaggle...", end="") api.competition_download_files("dogs-vs-cats", str(data_p)) print("done") # %% # extract root archive archive = zipfile.ZipFile(root_p / "data/dogs-vs-cats.zip") archive.extractall(data_p) # %% # extract train archive train_archive = zipfile.ZipFile(root_p / "data/train.zip")
import kaggle import pandas as pd import re print("hello1") akg = kaggle.KaggleApi() akg.authenticate() adatasetloc = 'paultimothymooney/denver-crime-data' adataset = akg.dataset_view(adatasetloc) adataset.files print("hello") akg.dataset_download_cli(adatasetloc, unzip=True, force=True) myds = pd.read_csv('offense_codes.csv', header=0, names=[ 'off_code', 'offcode_ext', 'offtype_id', 'offtype_name', 'offcategory_id', 'offcategory_name', 'crime', 'traffic' ], sep=',', engine='c')
def main(): kapi = kaggle.KaggleApi() kapi.authenticate() config = kapi.read_config_file() if 'competition' not in config: competitions = [] found = False while not found: while len(competitions) == 0: keyword = input("Search competitions [KEYWORD]: ") if len(keyword) > 0: competitions = search(keyword) else: print("No results... try again.") print("Which competition would you like? [Select number]") for idx, comp in enumerate(competitions): print(' {}) {}'.format(idx + 1, comp)) comp_no = int(input('[SELECTION]: ')) if comp_no - 1 < len(competitions): competition = competitions[comp_no - 1] kapi.set_config_value(name='competition', value=competition) found = True else: print('Invalid number.') else: competition = config['competition'] datafiles = [f.parts[-1] for f in DATASET_DIR.iterdir()] if len(datafiles) == 0: competition_files = list_files(kapi, competition) download(kapi, competition) else: competition_files = datafiles gt_file = [f for f in datafiles if (f != 'train.csv' and f != 'test.csv')] gt_file = 'dataset/{}'.format(gt_file[0]) if len(gt_file) > 0 else None kaction = KaggleAction(train_filename='dataset/train.csv', test_filename='dataset/test.csv', groundtruth_filename=gt_file) kaction.train() kaction.test() list_submissions(kapi, competition) if len(list(LEADERBOARD_DIR.iterdir())) == 0: download_leaderboard(kapi, competition) view_leaderboard(kapi, competition) answer = False while not answer: choice = input( colored('Would you like to submit this result? [Y/n]: ', attrs=['bold'])) choice = choice.upper() if choice == 'Y' or choice == 'YES': submit(kapi, competition) answer = True elif choice == 'N' or choice == 'NO': answer = True else: print('Input not understood. Try again.')
# Install kaggle api. pip3 install kaggle import kaggle import re import pandas as pd # Refer video session: # Create account in https://kaggle.com if you do not have one already # Download access token from kaggle website and save it as kaggle.json in ~.kaggle directory # Kaggle API reference - https://github.com/Kaggle/kaggle-api & https://www.kaggle.com/docs/api # No direct python supporting documents are available. Let us learn how can we explore ourselves # Choose a data set and describe the use case for this data and jod down the steps that involves to achieve the same # https://www.kaggle.com/tunguz/movietweetings # Connect and authenticate Kaggle API orskl_kaggle = kaggle.KaggleApi() orskl_kaggle.authenticate() orskl_dataset_loc = 'tunguz/movietweetings' orskl_dataset = orskl_kaggle.dataset_view(orskl_dataset_loc) orskl_dataset.files # Pull data files from this datasets orskl_kaggle.dataset_download_cli(orskl_dataset_loc, unzip=True) # Understand three data files # movies.dat, ratings.dat and users.dat orskl_ratings_data = pd.read_csv('ratings.dat', header=None, names=['user_id', 'movie_id', 'rating', 'rating_timestamp'], sep='::', engine='python') # Why the default engine = 'c' doesnt work, what happens if we don't use this engine
def extract_data(competition,file,file_path): connect=kaggle.KaggleApi() connect.authenticate() connect.competition_download_file(competition,file_name=file,path=file_path)