def execute(self, context):

        # authentication
        kaggle_api = kaggle.KaggleApi()
        kaggle_api.authenticate()

        self.log.info('kaggle authentication successful.')
        self.log.info('Importing data from kaggle...')

        # downloads the entire dataset
        if self.dataset_or_file == "dataset":
            for data in self.dataset:
                dataset = data.get('dataset')
                kaggle_api.dataset_download_files(dataset,
                                                  path=self.path,
                                                  force=True,
                                                  quiet=True,
                                                  unzip=True)

        # downloads a specific file in the dataset
        elif self.dataset_or_file == "file":
            kaggle_api.dataset_download_file(self.dataset,
                                             self.file_name,
                                             path=self.path,
                                             force=True,
                                             quiet=True)

        self.log.info('Data imported to local.')
Esempio n. 2
0
def submit_prediction(run_name, model_name):
    # Check for file
    cur_dir = os.getcwd()
    prediction_file = '%s\\Runs\\%s\\prediction.csv' % (cur_dir, run_name)
    assert os.path.exists(prediction_file), 'No submission file found.'

    kaggle_config = '%s\\kaggle.json' % cur_dir
    assert os.path.exists(
        kaggle_config
    ), 'No kaggle API config found, create a API key in My Account.'

    # Check to make sure kaggle.json is in kaggle folder
    user = os.getlogin()
    kaggle_path = 'C:\\Users\\%s\\.kaggle\\' % user
    if not os.path.exists(kaggle_path):
        os.mkdir(kaggle_path)

    dst = '%s\\kaggle.json' % kaggle_path
    if not os.path.exists(dst):
        copyfile(kaggle_config, dst)

    print("Submitting prediction...")

    import kaggle

    client = kaggle.KaggleApi()
    client.authenticate()
    client.competition_submit(prediction_file,
                              'Run = %s, Model = %s' % (run_name, model_name),
                              'uwb-css-485-fall-2018')
    def update(self):
        revision_dir = self.get_today_revision_dir(include_subid=False)
        if exists(revision_dir) and listdir(revision_dir):
            return

        self.client = kaggle.KaggleApi()
        self.client.authenticate()
        self.client.dataset_download_files(self.dataset, revision_dir)

        dataset_name = self.dataset.split('/')[-1]
        with zipfile.ZipFile(f'{revision_dir}/{dataset_name}.zip',
                             'r') as zip_ref:
            zip_ref.extractall(revision_dir)
Esempio n. 4
0
#
#  NeuroByte Tech is the Developer Company of Rohan Mathew.
#
#  Project: kDigitRecognizer
#  File Name: main.py
#  Last Modified: 22/09/2020, 19:54

import torch as T
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd
import kaggle as k

device = T.device("cuda" if T.cuda.is_available() else "cpu")
cli = k.KaggleApi()
cli.authenticate()
cli.competition_submissions("digit-recognizer")


# Load model
class ConvNet(nn.Module):
    def __init__(self, dropout=0.2):
        super().__init__()
        # Input 28 * 28 images

        self.conv1 = nn.Conv2d(1, 16, 3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)

        # Input into fc 32 * 7 * 7
        self.fc1 = nn.Linear(32 * 7 * 7, 128)
Esempio n. 5
0
# %%
from pyprojroot import here
import kaggle
import zipfile

# %%
root_p = here(".", [".here"])
data_p = root_p / "data/original"

# create data dir if not exists
if not data_p.is_dir():
    data_p.mkdir()

# %%
# load Kaggle api and authenticate
api = kaggle.KaggleApi()
api.authenticate()

print("Download data from kaggle...", end="")

api.competition_download_files("dogs-vs-cats", str(data_p))
print("done")

# %%
# extract root archive
archive = zipfile.ZipFile(root_p / "data/dogs-vs-cats.zip")
archive.extractall(data_p)

# %%
# extract train archive
train_archive = zipfile.ZipFile(root_p / "data/train.zip")
Esempio n. 6
0
import kaggle
import pandas as pd
import re

print("hello1")
akg = kaggle.KaggleApi()
akg.authenticate()
adatasetloc = 'paultimothymooney/denver-crime-data'
adataset = akg.dataset_view(adatasetloc)
adataset.files
print("hello")
akg.dataset_download_cli(adatasetloc, unzip=True, force=True)

myds = pd.read_csv('offense_codes.csv',
                   header=0,
                   names=[
                       'off_code', 'offcode_ext', 'offtype_id', 'offtype_name',
                       'offcategory_id', 'offcategory_name', 'crime', 'traffic'
                   ],
                   sep=',',
                   engine='c')
Esempio n. 7
0
def main():
    kapi = kaggle.KaggleApi()
    kapi.authenticate()

    config = kapi.read_config_file()
    if 'competition' not in config:
        competitions = []
        found = False

        while not found:
            while len(competitions) == 0:
                keyword = input("Search competitions [KEYWORD]: ")
                if len(keyword) > 0:
                    competitions = search(keyword)
                else:
                    print("No results... try again.")

            print("Which competition would you like? [Select number]")
            for idx, comp in enumerate(competitions):
                print('  {})  {}'.format(idx + 1, comp))
            comp_no = int(input('[SELECTION]: '))

            if comp_no - 1 < len(competitions):
                competition = competitions[comp_no - 1]
                kapi.set_config_value(name='competition', value=competition)
                found = True
            else:
                print('Invalid number.')
    else:
        competition = config['competition']

    datafiles = [f.parts[-1] for f in DATASET_DIR.iterdir()]
    if len(datafiles) == 0:
        competition_files = list_files(kapi, competition)
        download(kapi, competition)
    else:
        competition_files = datafiles

    gt_file = [f for f in datafiles if (f != 'train.csv' and f != 'test.csv')]
    gt_file = 'dataset/{}'.format(gt_file[0]) if len(gt_file) > 0 else None
    kaction = KaggleAction(train_filename='dataset/train.csv',
                           test_filename='dataset/test.csv',
                           groundtruth_filename=gt_file)
    kaction.train()
    kaction.test()

    list_submissions(kapi, competition)

    if len(list(LEADERBOARD_DIR.iterdir())) == 0:
        download_leaderboard(kapi, competition)
    view_leaderboard(kapi, competition)

    answer = False
    while not answer:
        choice = input(
            colored('Would you like to submit this result? [Y/n]: ',
                    attrs=['bold']))
        choice = choice.upper()
        if choice == 'Y' or choice == 'YES':
            submit(kapi, competition)
            answer = True
        elif choice == 'N' or choice == 'NO':
            answer = True
        else:
            print('Input not understood. Try again.')
Esempio n. 8
0
# Install kaggle api. pip3 install kaggle
import kaggle
import re
import pandas as pd


# Refer video session:
# Create account in https://kaggle.com if you do not have one already
# Download access token from kaggle website and save it as kaggle.json in ~.kaggle directory
# Kaggle API reference - https://github.com/Kaggle/kaggle-api & https://www.kaggle.com/docs/api
# No direct python supporting documents are available. Let us learn how can we explore ourselves

# Choose a data set and describe the use case for this data and jod down the steps that involves to achieve the same
# https://www.kaggle.com/tunguz/movietweetings
# Connect and authenticate Kaggle API
orskl_kaggle = kaggle.KaggleApi()
orskl_kaggle.authenticate()
orskl_dataset_loc = 'tunguz/movietweetings'
orskl_dataset = orskl_kaggle.dataset_view(orskl_dataset_loc)
orskl_dataset.files

# Pull data files from this datasets
orskl_kaggle.dataset_download_cli(orskl_dataset_loc, unzip=True)

# Understand three data files
# movies.dat, ratings.dat and users.dat
orskl_ratings_data = pd.read_csv('ratings.dat', header=None,
                                 names=['user_id', 'movie_id', 'rating', 'rating_timestamp'],
                                 sep='::', engine='python')
# Why the default engine = 'c' doesnt work, what happens if we don't use this engine
Esempio n. 9
0
def extract_data(competition,file,file_path):
    connect=kaggle.KaggleApi()
    connect.authenticate()
    connect.competition_download_file(competition,file_name=file,path=file_path)