Beispiel #1
0
def kernel_update(request):
    # pull the most recent version of the kernel
    api = KaggleApi()
    api.authenticate()
    api.kernels_pull_cli("{}/{}".format(USERNAME, KERNEL_SLUG), path="{}".format(PATH), metadata=True)

    # push our notebook
    api.kernels_push_cli("{}".format(PATH))

    # save a copy of our notebook in our bucket (if you would prefer
    # not to save a copy, delete all lines from here to the end of the file).
    bucket = storage.bucket(BUCKET)
    metadata_blob = bucket.blob("kernel-metadata.json")
    notebook_blob = bucket.blob("{}.{}".format(KERNEL_SLUG, KERNEL_EXTENSION ))

    metadata_blob.upload_from_filename("{}/kernel-metadata.json".format(PATH))
    notebook_blob.upload_from_filename("{}/{}.{}".format(PATH, KERNEL_SLUG, KERNEL_EXTENSION))
#Authorize API's
#Authorize Twitter API
f = open('/home/pi/twitter_api_creds.json')
creds = json.load(f)
consumer_key = creds['consumer_key']
consumer_secret = creds['consumer_secret']
access_token = creds['access_token']
access_token_secret = creds['access_token_secret']
f.close()

auth = tw.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tw.API(auth, wait_on_rate_limit=True)
#Authorize Kaggle's API
kapi = KaggleApi()
kapi.authenticate()

# In[11]:


#Twitter Functions
#Calls on clean() for text cleanup then removes the URL
def remove_url(txt):
    #Call on clean to clean text first
    txt = clean(txt)
    #removes URL
    return " ".join(re.sub("([^0-9A-Za-z \t])|(\w+:\/\/\S+)", "", txt).split())


#Cleans up the text like newlines before url is removed.
from PIL import Image
import os
from kaggle.api.kaggle_api_extended import KaggleApi
api = KaggleApi()
api.authenticate()
api.competition_download_file('dogs-vs-cats-redux-kernels-edition','test.zip')
api.competition_download_file('dogs-vs-cats-redux-kernels-edition','train.zip')

from zipfile import ZipFile 
  
with ZipFile('test.zip', 'r') as zip: 
  

    zip.extractall() 

with ZipFile('train.zip', 'r') as zip:   

    zip.extractall()

def image_gs_scale(typ,num,scale):
    directory=os.getcwd()+'/train/'
    for i in range(num):
        img=Image.open(directory+typ+'.'+str(i)+'.jpg').convert('L')
        (wid,hei)=img.size
        wid*=scale
        hei*=scale
        img=img.resize((int(wid),int(hei)))
        img.save('gs_'+typ+str(i)+'.jpg')
        
        
def image_gs_size(typ,num,wid,hei):
Beispiel #4
0
import git
import zipfile
import os
import pandas as pd

from kaggle.api.kaggle_api_extended import KaggleApi

repo = git.Repo("./", search_parent_directories=True)
homedir = repo.working_dir
datadir = f"hotexamples_com/data/kor/covid/"

# This requires a valid kaggle.json file in ~/.kaggle/
api = KaggleApi()
api.authenticate()
# Download the complete DS4C SK dataset
api.dataset_download_files('kimjihoo/coronavirusdataset',
                           path=datadir,
                           force=True)

#unzip dataset to destination
with zipfile.ZipFile(f"{datadir}coronavirusdataset.zip", 'r') as zip_ref:
    zip_ref.extractall(datadir)

# Generate dictionary for file renaming
rename_dict = {
    "Case.csv": "kaggle_case_city_kor.csv",
    "PatientInfo.csv": "kaggle_patient-info_patient_kor.csv",
    "PatientRoute.csv": "kaggle_patient-route_patient_kor.csv",
    "Region.csv": "kaggle_province-demography_province_kor.csv",
    "SearchTrend.csv": "kaggle_search-trend_country_kor.csv",
    "SeoulFloating.csv": "kaggle_seoul-floating_city_kor.csv",
Beispiel #5
0
from __future__ import absolute_import
from kaggle.api.kaggle_api_extended import KaggleApi
from kaggle.api_client import ApiClient

api = KaggleApi(ApiClient())
api.authenticate()
Beispiel #6
0
# importing libraries
import pandas as pd
import numpy as np
from kaggle.api.kaggle_api_extended import KaggleApi
import csv

# Using an API token in a json file located in C:\Windows\s_dun\.kaggle.
# API Authentication using kaggle account credentials to retrieve the dataset.
api = KaggleApi()
api.authenticate()

# downloading from kaggle.com/c8debreaker619/alcohol-comsumption-around-the-world
# Writing the dataset file to my current project directory path with './'
api.dataset_download_file(
    'codebreaker619/alcohol-comsumption-around-the-world',
    file_name='drinks.csv')

# Read in my .csv file, setting index column.
pd.set_option("display.max_rows", 200, "display.max_columns", 5)
df_drinks = pd.read_csv(
    r'C:\Users\S_Dun\Desktop\UCDPA_SineadDunne\drinks.csv',
    header=0,
    names=['COUNTRY', 'BEER', 'SPIRIT', 'WINE', 'TOTAL_LITRES'])

# Ensure column names are updated correctly by displaying them out in a list.
for col in df_drinks.columns:
    print(col)

# First view of my dataset
print(df_drinks.head())
from kaggle.api.kaggle_api_extended import KaggleApi
api = KaggleApi()
api.authenticate(
)  # requires your computer to have a JSON file with your API keys

api.competition_download_files('coleridgeinitiative-show-us-the-data'
                               )  # downloads as a zip, you will need to unzip

print("Done")
Beispiel #8
0
                # InsertOne({'_id':teleArr[2]}),            
                InsertOne({
                    'ID': arr[i][0],
                    'County': arr[i][1],
                    }) for i in range(len(arr))
                ])
        except BulkWriteError as bwe:
            pprint(bwe.details)
        # logger.info(f'Loaded {} matches into db')
    


if __name__ == "__main__":

    if os.path.exists(dataset_path + '/' + file_name) == False:
        api = KaggleApi()
        api.authenticate()
        api.dataset_download_files('sobhanmoosavi/us-accidents',path = dataset_path,unzip=True)

    with open('./public/unemployment-by-county-2017.csv') as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        for row in csv_reader:
            # county = row[1].split(' ')[0]
            countyIDArr.append([row[0], row[1].split(' ')[0]])

    with open('sample.csv') as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        for row in csv_reader:

            # parse accident
            teleArr.append(row)
Beispiel #9
0
from kaggle.api.kaggle_api_extended import KaggleApi
api = KaggleApi()
api.authenticate()

# slug: [Description, Competition or Freestyle]
competition = {
    "intro-to-seismic-salt-and-how-to-geophysics":
    "Geophysics competition to identify salt bodies in seismic data. The competition was hosted by the seismic contractor TGS and provided patched seismic images with varying amounts of noise. The masks were provided as pixel-encoded masks to save bandwidth.",
    "intro-chest-xray-dicom-viz-u-nets-full-data":
    "Medical competition to identify pneumothorax in chest X-rays. Data provided in medical DICOM format. Very large images that benefit from downsampling.",
    "intro-to-santa-s-2019-viz-costs-22-s-and-search":
    "Fun kaggle competition to optimize allocation for families wanting to visit Santa's workshop. Optimization challenge where the global optimum was soon found.",
    "intro-to-connextx-env-and-minimax":
    "First reinforcement learning playground using Connect 4 on Kaggle to test new environment. Agents play against each other on the leaderboard. ConnectX is usually best approach by the Negamax algorithm, a Minimax variant.",
    "intro-to-deep-fakes-videos-and-metadata-eda":
    "Deep Fake competition to identify visual and sound manipulation usually using GANs in video footage.",
    "getting-started-with-standard-gans-tutorial":
    "Least Squares GAN on TPU to generate Monet painting from noise. Uses data augmentation to enrich the training set.",
    "understanding-and-improving-cyclegans-tutorial":
    "CycleGAN on TPUs to generate Monet painting from photographs. Uses Least Squares implementation of GANs to improve training and also implements some basic augmentation to get the competition going.",
}

# slug: [Description, Competition or Freestyle]
freestyle = {
    "the-reason-we-re-happy":
    "Exploration of the 'World Happiness Report' data. Found a strong correlation of final score with wealth indicators. June winner in Reddit community [r/dataisbeautiful](https://www.reddit.com/r/dataisbeautiful/comments/c89mz2/battle_dataviz_battle_for_the_month_of_july_2019/eskzdhd/).",
    "berlin-airbnbs-is-it-really-all-about-location":
    "AirBnB has been a source of income for many with a spare room, but become controversial due to commercial players buying apartments to rent out on the platform. In this analysis, we are looking at AirBnB data in Berlin, Germany. The questions are, what is the main influence on price of a rental. These questions are important for tourism, city planning, and renters alike. Where do affluent tourists rent AirBnBs? What decides a good price of your flat. Where can city planners accomodate for this new development of short term rental on the market?",
}

## Sad times
Beispiel #10
0
# -*- coding: utf-8 -*-
"""
Created on Wed Oct 13 18:55:36 2018

@author: chunhui zhu
"""

import pandas as pd
import numpy as np

import pickle
from kaggle.api.kaggle_api_extended import KaggleApi

download_path = "C:\\Users\\czhu5\\Desktop\\Data-622-2-ML\\hw3"

api = KaggleApi()

api.authenticate()

#download the kaggle data set to local folder
api.competition_download_files('titanic')

test = pd.read_csv('test.csv')
train = pd.read_csv('train.csv')

print("finish load for test data set")
print(test.head())
print("")
print("finish load for train data set")
print(train.head())
Beispiel #11
0
def download_kaggle_files():
    api = KaggleApi()
    api.authenticate()
    api.dataset_download_files('netflix-inc/netflix-prize-data',
                               path='/Users/pcc33/Downloads/',
                               unzip=True)
    # extract file in destination
    with zipfile.ZipFile(source, 'r') as zipref:
        zipref.extractall(destination)

    if delete_source:
        # delete source file
        os.remove(source)


# %% --------------------
test_original_dimension = pd.read_csv(
    "D:/GWU/4 Spring 2021/6501 Capstone/VBD CXR/PyCharm "
    "Workspace/vbd_cxr/7_POC/test_original_dimension_1024_sample.csv")

# %% --------------------
api = KaggleApi()
api.authenticate()

# , "00bcb82818ea83d6a86df241762cd7d0",
#                  "013893a5fa90241c65c3efcdbdd2cec1", "01ee6e560f083255a630c41bba779405"


# %% --------------------
def resize_image_test(img_arr, smallest_max_size):
    # create resize transform pipeline
    transform = albumentations.Compose([
        albumentations.SmallestMaxSize(max_size=smallest_max_size,
                                       always_apply=True)
    ])

    return transform(image=img_arr)
Beispiel #13
0
    def download(self):
        import os
        from zipfile import ZipFile
        try:
            from kaggle.api.kaggle_api_extended import KaggleApi
        except ImportError:
            raise RuntimeError(
                'please install and setup the kaggle '
                'competition api: https://github.com/Kaggle/kaggle-api')

        api = KaggleApi()
        api.authenticate()

        kgl_comp = 'trackml-particle-identification'
        test_file = 'train_sample.zip'

        if self.full_dataset:
            kgl_file = 'trackml-particle-identification.zip'
            print(
                'Downloading full TrackML dataset (~80GB), this may take a while...'
            )
            api.competition_download_files(kgl_comp,
                                           path=self.root,
                                           quiet=False,
                                           force=False)
            training_samples = None
            with ZipFile(os.path.join(self.root, kgl_file), 'r') as zf:
                training_samples = [fname for fname in filter(lambda x: 'train' in x and \
                                                                        'sample' not in x and \
                                                                        'blacklist' not in x,
                                                              zf.namelist())]

                for name in tqdm(training_samples, desc='extracting zipballs'):
                    if not os.path.exists(os.path.join(self.root, name)):
                        zf.extract(name, path=self.root)

            for sample in training_samples:
                with ZipFile(os.path.join(self.root, sample), 'r') as zf:
                    fnames = zf.namelist()
                    action = f'unpacking {sample}'
                    for name in tqdm(fnames, desc=action):
                        sample_dir = sample.split('.')[0] + '/'
                        if name == sample_dir:
                            continue
                        outname = os.path.join(self.raw_dir,
                                               os.path.basename(name))
                        if os.path.exists(outname):
                            raise Exception(f'{outname} already exists!')
                        with open(outname, 'wb') as fout:
                            fout.write(zf.read(name))

        else:
            kgl_file = test_file
            print(
                'Downloading training example from TrackML dataset, only 100 training events...'
            )
            api.competition_download_file(kgl_comp,
                                          test_file,
                                          path=self.root,
                                          quiet=False,
                                          force=False)
            with ZipFile(os.path.join(self.root, kgl_file), 'r') as zf:
                fnames = zf.namelist()
                for name in tqdm(fnames):
                    if name == 'train_100_events/':
                        continue
                    with open(
                            os.path.join(self.raw_dir, os.path.basename(name)),
                            'wb') as fout:
                        fout.write(zf.read(name))

        events = glob.glob(
            osp.join(osp.join(self.root, 'raw'), 'event*-hits.csv'))
        events = [e.split(osp.sep)[-1].split('-')[0][5:] for e in events]
        self.events = sorted(events)
        if (self.n_events > 0):
            self.events = self.events[:self.n_events]
Beispiel #14
0
"""
Script to download the dataset from kaggle.

Author: Tom Fleet
Created: 02/01/2021
"""

from kaggle.api.kaggle_api_extended import KaggleApi

from src.config import RAW_DATA

if not RAW_DATA.exists():
    RAW_DATA.mkdir(parents=True)

api = KaggleApi()
api.authenticate()

api.dataset_download_files("saurograndi/airplane-crashes-since-1908",
                           path=RAW_DATA,
                           unzip=True)
# Machine Learning - Unsupervised Machine Learning
# Course Code: DLBDSMLUSL01

# Feature Importance
# Chi²

#%% import libraries
import pandas as pd
from sklearn.feature_selection import SelectKBest, chi2
from kaggle.api.kaggle_api_extended import KaggleApi
import zipfile
from sklearn.preprocessing import LabelEncoder

#%% load sample data
# log into kaggle
api = KaggleApi()
api.authenticate()

# download the data
kaggle_user = '******'
kaggle_project = 'churn-prediction-of-bank-customers'
api.dataset_download_files(kaggle_user + '/' + kaggle_project)

# unzip the data
zip = zipfile.ZipFile(kaggle_project + '.zip').\
    extractall()

# load the data
churn_df = pd.read_csv('Churn_Modelling.csv')

#%% prepare the data
Beispiel #16
0
#!/usr/bin/python
#
# Copyright 2018 Kaggle Inc
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import absolute_import
from kaggle.api.kaggle_api_extended import KaggleApi
from kaggle.api_client import ApiClient

api = KaggleApi(ApiClient())
api.authenticate()
Beispiel #17
0
def update_dataset(folder, note):
    api = KaggleApi()
    api.authenticate()

    return api.dataset_create_version(folder, note, delete_old_versions=True)
Beispiel #18
0
print('Establishing environment credentials...')
import os
os.environ['KAGGLE_USERNAME'] = "******"  # For demo use only
os.environ[
    'KAGGLE_KEY'] = "f3f9220e8d85a7427864bd4f96f23ff2"  # Please use your own API token if making frequent queries

print('Querying Kaggle API...')
from kaggle.api.kaggle_api_extended import KaggleApi
api = KaggleApi()
api.authenticate()
api.competition_download_file('widsdatathon2020', 'training_v2.csv')

print('Extracing zip file...')
import zipfile
with zipfile.ZipFile('training_v2.csv.zip', 'r') as zip_ref:
    zip_ref.extractall()

print('Renaming csv and removing zip file...')
try:
    os.rename('training_v2.csv', 'data.csv')
    os.remove('training_v2.csv.zip')
except:
    print('Error renaming files')
Beispiel #19
0
import matplotlib.pyplot as plt
import numpy
import pandas as pd
from kaggle.api.kaggle_api_extended import KaggleApi
#import kaggle
import zipfile

api = KaggleApi()
api.authenticate()

api.dataset_download_file(
    'antgoldbloom/covid19-data-from-john-hopkins-university',
    'CONVENIENT_global_confirmed_cases.csv')

#api.dataset_download_file('antgoldbloom/covid19-data-from-john-hopkins-university','CONVENIENT_global_confirm_cases.csv')

df = pd.read_csv('CONVENIENT_global_confirmed_cases.csv')

print(df)

total = df['South Africa'].sum()

df_sa = df['South Africa']
print(df_sa)
print('Total ', total)

df['South Africa'].plot()

plt.plot(df['South Africa'].rolling(window=21).mean(), label='MA 21 days')
plt.plot(df['South Africa'].rolling(window=7).mean(), label='MA 7 days')
plt.title('South Africa Daily Reporter New Cases')
# get the Corona tweet dataset from Kaggle

# Getting the token: source https://www.kaggle.com/docs/api#interacting-with-datasets
# In order to use the Kaggle’s public API, you must first authenticate using an API token. From the site header, click
# on your user profile picture, then on “My Account” from the dropdown menu. This will take you to your account settings
# at https://www.kaggle.com/account. Scroll down to the section of the page labelled API:
#
# To create a new token, click on the “Create New API Token” button. This will download a fresh authentication token
# onto your machine.
#
# If you are using the Kaggle CLI tool, the tool will look for this token at ~/.kaggle/kaggle.json on Linux, OSX, and
# other UNIX-based operating systems, and at C:\Users<Windows-username>.kaggle\kaggle.json on Windows. If the token is
# not there, an error will be raised. Hence, once you’ve downloaded the token, you should move it from your Downloads
# folder to this folder.
#
# If you are using the Kaggle API directly, where you keep the token doesn’t matter, so long as you are able to provide
# your credentials at runtime.

from kaggle.api.kaggle_api_extended import KaggleApi

api = KaggleApi()
api.authenticate()
files = api.dataset_download_files('smid80/coronavirus-covid19-tweets',
                                   unzip=True,
                                   path='data/',
                                   quiet=False)
Beispiel #21
0
def chess_analysis():
    # Start time count to gauge process run time
    start = time.time()
    api = KaggleApi()
    api.authenticate()

    # downloading datasets for Chess games
    api.dataset_download_files('arevel/chess-games')

    # Read data in chunks of 100000 rows and concatenate into one dataframe at a time to speed up read time
    zf = zipfile.ZipFile('chess-games.zip')
    csv = pd.read_csv(zf.open('chess_games.csv'), chunksize=100000)
    chess_df = pd.concat(csv)

    # Remove any duplicate user names to limit data to one game per user
    chess_df = chess_df.drop_duplicates(subset=['White', 'Black'])

    # remove any rows with stockfish evaluation as this clogs up the data at a later stage
    chess_df = chess_df.drop(chess_df[chess_df.AN.str.contains(r'[{}]')].index)

    # use iterrows to print out data
    for index, row in chess_df.head(1000).iterrows():
        print(index, row)

    # reset index after dropping duplicate users and removing stockfish evaluations
    chess_df = chess_df.reset_index()

    # Define average elo rank per game
    chess_df['AverageElo'] = (chess_df['WhiteElo'] + chess_df['BlackElo']) / 2

    # create lists of conditions to use for np.se;ect to add new columns to turn numeric values into grouped categories
    white_conditions = [
        (chess_df['WhiteElo'] > 2700),
        (chess_df['WhiteElo'] < 2700) & (chess_df['WhiteElo'] >= 2500),
        (chess_df['WhiteElo'] < 2500) & (chess_df['WhiteElo'] >= 2400),
        (chess_df['WhiteElo'] < 2400) & (chess_df['WhiteElo'] >= 2300),
        (chess_df['WhiteElo'] < 2300) & (chess_df['WhiteElo'] >= 2200),
        (chess_df['WhiteElo'] < 2200) & (chess_df['WhiteElo'] >= 2000),
        (chess_df['WhiteElo'] < 2000) & (chess_df['WhiteElo'] >= 1800),
        (chess_df['WhiteElo'] < 1800) & (chess_df['WhiteElo'] >= 1600),
        (chess_df['WhiteElo'] < 1600) & (chess_df['WhiteElo'] >= 1400),
        (chess_df['WhiteElo'] < 1400) & (chess_df['WhiteElo'] >= 1200),
        (chess_df['WhiteElo'] < 1200) & (chess_df['WhiteElo'] >= 0)
    ]

    black_conditions = [
        (chess_df['BlackElo'] >= 2700),
        (chess_df['BlackElo'] < 2700) & (chess_df['BlackElo'] >= 2500),
        (chess_df['BlackElo'] < 2500) & (chess_df['BlackElo'] >= 2400),
        (chess_df['BlackElo'] < 2400) & (chess_df['BlackElo'] >= 2300),
        (chess_df['BlackElo'] < 2300) & (chess_df['BlackElo'] >= 2200),
        (chess_df['BlackElo'] < 2200) & (chess_df['BlackElo'] >= 2000),
        (chess_df['BlackElo'] < 2000) & (chess_df['BlackElo'] >= 1800),
        (chess_df['BlackElo'] < 1800) & (chess_df['BlackElo'] >= 1600),
        (chess_df['BlackElo'] < 1600) & (chess_df['BlackElo'] >= 1400),
        (chess_df['BlackElo'] < 1400) & (chess_df['BlackElo'] >= 1200),
        (chess_df['BlackElo'] < 1200) & (chess_df['BlackElo'] >= 0)
    ]

    average_conditions = [
        (chess_df['AverageElo'] >= 2700),
        (chess_df['AverageElo'] < 2700) & (chess_df['AverageElo'] >= 2500),
        (chess_df['AverageElo'] < 2500) & (chess_df['AverageElo'] >= 2400),
        (chess_df['AverageElo'] < 2400) & (chess_df['AverageElo'] >= 2300),
        (chess_df['AverageElo'] < 2300) & (chess_df['AverageElo'] >= 2200),
        (chess_df['AverageElo'] < 2200) & (chess_df['AverageElo'] >= 2000),
        (chess_df['AverageElo'] < 2000) & (chess_df['AverageElo'] >= 1800),
        (chess_df['AverageElo'] < 1800) & (chess_df['AverageElo'] >= 1600),
        (chess_df['AverageElo'] < 1600) & (chess_df['AverageElo'] >= 1400),
        (chess_df['AverageElo'] < 1400) & (chess_df['AverageElo'] >= 1200),
        (chess_df['AverageElo'] < 1200) & (chess_df['AverageElo'] >= 0)
    ]

    outcome_conditions = [(chess_df['Result']) == "1-0",
                          (chess_df['Result']) == "0-1",
                          (chess_df['Result']) == "1/2-1/2",
                          (chess_df['Result']) == "*"]

    # create a list of the values to assign for each condition
    elo = [
        'Super GM', 'GM', 'GM/IM', 'FM/IM', 'CM/NM', 'Experts', 'Class A',
        'Class B', 'Class C', 'Class D', 'Novices'
    ]
    outcome = ['White Wins', 'Black Wins', 'Draw', 'No Result']

    # create new columns and use np.select to assign values to it using the lists as arguments
    chess_df['WhiteEloRank'] = np.select(white_conditions, elo)
    chess_df['BlackEloRank'] = np.select(black_conditions, elo)
    chess_df['AverageEloRank'] = np.select(average_conditions, elo)
    chess_df['Outcome'] = np.select(outcome_conditions, outcome)

    # create dataframe for moves
    moves_df = chess_df["AN"].str.split(" ", n=30, expand=True)
    moves_df = moves_df.drop(moves_df.iloc[:, 0:31:3], axis=1)

    # append moves dataframe to chess dataframe
    chess_df = pd.concat([chess_df, moves_df], axis=1)
    chess_df.reset_index(inplace=True)

    # sort data from lowest average elo to highest average elo
    chess_df = chess_df.sort_values(by='AverageElo', ascending=False)

    # change data type from object to numeric values
    chess_df[["WhiteElo", "BlackElo", "AverageElo"]] = chess_df[["WhiteElo", "BlackElo", "AverageElo"]].\
        apply(pd.to_numeric)

    classical_df1 = chess_df[chess_df.Event == ' Classical ']
    classical_df2 = chess_df[chess_df.Event == 'Classical ']
    classical = pd.merge(classical_df1, classical_df2, how='outer')

    classical_tournament_df1 = chess_df[chess_df.Event ==
                                        ' Classical tournament ']
    classical_tournament_df2 = chess_df[chess_df.Event ==
                                        'Classical tournament ']
    classical_tournament = pd.merge(classical_tournament_df1,
                                    classical_tournament_df2,
                                    how='outer')

    blitz_df1 = chess_df[chess_df.Event == ' Blitz ']
    blitz_df2 = chess_df[chess_df.Event == 'Blitz ']
    blitz = pd.merge(blitz_df1, blitz_df2, how='outer')

    blitz_tournament_df1 = chess_df[chess_df.Event == ' Blitz tournament ']
    blitz_tournament_df2 = chess_df[chess_df.Event == 'Blitz tournament ']
    blitz_tournament = pd.merge(blitz_tournament_df1,
                                blitz_tournament_df2,
                                how='outer')

    bullet_df1 = chess_df[chess_df.Event == ' Bullet ']
    bullet_df2 = chess_df[chess_df.Event == 'Bullet ']
    bullet = pd.merge(bullet_df1, bullet_df2, how='outer')

    bullet_tournament_df1 = chess_df[chess_df.Event == ' Bullet tournament ']
    bullet_tournament_df2 = chess_df[chess_df.Event == 'Bullet tournament ']
    bullet_tournament = pd.merge(bullet_tournament_df1,
                                 bullet_tournament_df2,
                                 how='outer')

    correspondence_df1 = chess_df[chess_df.Event == ' Correspondence ']
    correspondence_df2 = chess_df[chess_df.Event == 'Correspondence ']
    correspondence = pd.merge(correspondence_df1,
                              correspondence_df2,
                              how='outer')

    # Plot results
    #  Categorical Data
    plots = ['Termination', 'Outcome', 'AverageEloRank']
    plots_1 = ['AverageElo']
    plots_2 = [1, 2]
    game_types = [
        classical, classical_tournament, blitz, blitz_tournament, bullet,
        bullet_tournament, correspondence
    ]
    game_types_str = [
        'Classical', 'Classical Tournament', 'Blitz', 'Blitz Tournament',
        'Bullet', 'Bullet Tournament', 'Correspondence'
    ]

    z = 0
    y = 0
    w = 0
    for x in game_types:
        a = 1  # number of rows, set to 1 to retrieve individual graph groups based on game type
        b = int(len(plots))  # number of columns
        c = 1  # initialize plot counter
        d = 1  # number of rows, set to 1 to retrieve individual graph groups based on game type
        e = int(len(plots_1))  # number of columns
        f = 1  # initialize plot counter
        g = 1  # number of rows, set to 1 to retrieve individual graph groups based on game type
        h = int(len(plots_2))  # number of columns
        k = 1  # initialize plot counter
        for i in plots:
            plt.subplot(a, b, c)
            plt.title(str(game_types_str[z]))
            plt.xlabel(i)
            plt.subplots_adjust(bottom=0.095, top=0.97, hspace=1, wspace=0.45)
            sns.countplot(x=x[i])
            plt.xticks(rotation=30)
            c = c + 1
        z = z + 1
        plt.show()
        plt.clf()

        for i in plots_1:
            plt.subplot(d, e, f)
            plt.title(str(game_types_str[y]))
            plt.xlabel(i)
            plt.subplots_adjust(bottom=0.095, top=0.97, hspace=1)
            sns.histplot(x=x[i], kde=True, bins=25)
            plt.xticks(rotation=30)
            f = f + 1
        y = y + 1
        plt.show()
        plt.clf()
        for i in plots_2:
            plt.subplot(g, h, k)
            plt.title(str(game_types_str[w]))
            plt.xlabel(i)
            plt.subplots_adjust(bottom=0.095, top=0.97, hspace=1)
            sns.countplot(x=x[i])
            plt.xticks(rotation=30)
            k = k + 1
        w = w + 1
        plt.show()
        plt.clf()

    end = time.time()

    print("Run Time: ", (end - start), 'Seconds')
Beispiel #22
0
from kaggle.api.kaggle_api_extended import KaggleApi
import datetime
from datetime import timezone
import time

api = KaggleApi()
api.authenticate()
COMPETITION = 'コンペ名(dataset downloadの-cオプションと同じ)'
result_ = api.competition_submissions(COMPETITION)[0]
latest_ref = str(result_)  # 最新のサブミット番号
submit_time = result_.date
status = ''
print('check this submit, ', latest_ref)
while status != 'complete':
    list_of_submission = api.competition_submissions(COMPETITION)
    for result in list_of_submission:
        if str(result.ref) == latest_ref:
            break
    status = result.status
    now = datetime.datetime.now(timezone.utc).replace(tzinfo=None)
    elapsed_time = int((now - submit_time).seconds / 60) + 1
    if status == 'complete':
        print('\r', f'run-time: {elapsed_time} min, LB: {result.publicScore}')
    else:
        print('\r', f'elapsed time: {elapsed_time} min', end='')
        time.sleep(60)
Beispiel #23
0
def download_data_files():
    api = KaggleApi()
    api.authenticate()
    api.dataset_download_file(dataset=dataset, file_name=confirm_cases_path)
    api.dataset_download_file(dataset=dataset, file_name=global_methadata_path)
    api.dataset_download_file(dataset=dataset,
                              file_name=RAW_global_confirm_path)
    api.dataset_download_file(dataset=dataset, file_name=death_cases_path)
Beispiel #24
0
import os
import numpy as np

# Check if API token has been setup.
try:
    from kaggle.api.kaggle_api_extended import KaggleApi
except OSError as e:
    sys.exit(e)

DATASET_URL = 'farhanhubble/multimnistm2nist'
SCRIPT_PATH = os.path.dirname(os.path.abspath(__file__))
DATA_PATH = SCRIPT_PATH + 2 * (os.sep + os.pardir) + '/data'
RAW_DATA_PATH = os.path.join(DATA_PATH, 'raw')
PROCESSED_DATA_PATH = os.path.join(DATA_PATH, 'processed')

api = KaggleApi()
api.authenticate()

# Download M2NIST dataset
if not os.path.exists(os.path.join(RAW_DATA_PATH, 'multimnistm2nist.zip')):
    print('Downloading M2NIST dataset...', end='')
    api.dataset_download_files(DATASET_URL, path=RAW_DATA_PATH)
    print('Done.')
else:
    print('Dataset already downloaded.')

# Check if extracted files already exist. Ignore existing files.
dataset = zipfile.ZipFile(os.path.join(RAW_DATA_PATH, 'multimnistm2nist.zip'),
                          'r')
files = dataset.namelist()
for f in files:
Beispiel #25
0
from kaggle.api.kaggle_api_extended import KaggleApi
api = KaggleApi()
api.authenticate()
api.dataset_download_files(
    'rajyellow46/wine-quality',
    path=
    'D:/Netology/Python/pyda-16-em/DS Project Flow/WineQualityPrediction/data_external/'
)
Beispiel #26
0
import os
from zipfile import ZipFile
from kaggle.api.kaggle_api_extended import KaggleApi

# Folders for Repos
if not os.path.isdir('./data'):
    os.mkdir('./data')
if not os.path.isdir('./code'):
    os.mkdir('./code')
if not os.path.isdir('./figures'):
    os.mkdir('./figures')
if not os.path.isdir('./sub'):
    os.mkdir('./sub')

api = KaggleApi()
api.authenticate()
comp = 'walmart-recruiting-store-sales-forecasting'
api.competition_download_files(comp, path='./data')

# Extract competition files
zf = ZipFile('./data/' + comp + '.zip', 'r')
zf.extractall('./data/')
os.remove('./data/walmart-recruiting-store-sales-forecasting.zip')

os.chdir('./data')
for file in os.listdir():
    print(file)
    if file.endswith('zip'):
        with ZipFile(file, 'r') as zipObj:
            zipObj.extractall()
        os.remove(file)
Beispiel #27
0
import random

from datetime import datetime as dt
from kaggle.api.kaggle_api_extended import KaggleApi
from kaggle.api_client import ApiClient
from discord.ext import commands
from dotenv import load_dotenv
from pytz import timezone

if os.environ.get("PRODUCTION") is None:
    load_dotenv(verbose=True)

i18n.set('locale', os.environ.get('LOCALE'))
i18n.load_path.append('./locale')

api = KaggleApi()
api.authenticate()

# Store discord token in the variable named token
token = os.getenv('DISCORD_TOKEN')

# This will set the command prefix
bot = commands.Bot(command_prefix='!')

# This function will list all 🏆 current competitions
@bot.command(name='competitions', help='Responds with a list of competitions')
async def competitions(comp):
    now = dt.now()
    now = now.astimezone(timezone('UTC'))
    await comp.send(i18n.t('kaggle.hi', hour=now.hour))
    competitions_list = api.competitions_list()
Beispiel #28
0
    assert isinstance(html,str)
    
    s = StackOverflowParser()
    s.feed(html) 
    return [item.replace('\n', ' ') for item in s.code_blocks]
            


# In[ ]:


from kaggle.api.kaggle_api_extended import KaggleApi

# Import the dataset directly from Kaggle 
# Requires a Kaggle account linked to an API key on your device 
api = KaggleApi()
api.authenticate()
api.dataset_download_files('stackoverflow/pythonquestions', path='./', unzip=True)


# In[2]:


import pandas as pd

# File Paths 
file_questions = 'Questions.csv'
file_answers = 'Answers.csv'
file_tags = 'Tags.csv'

dates = ["CreationDate"]
Beispiel #29
0
def auth():
	api = KaggleApi()
	api.authenticate()
	return api
    def test_config_actions(self):
        api = KaggleApi()

        self.assertTrue(api.config_dir.endswith('.kaggle'))
        self.assertEqual(api.get_config_value('doesntexist'), None)
Beispiel #31
0
 def __init__(self):
     # authentificate
     self.__api = KaggleApi()
     self.__api.authenticate()
Beispiel #32
0
    #================================
    # 可視化処理
    #================================
    classes = {0: 'cat', 1: 'dog'}
    fig, axes = plt.subplots(5, 5, figsize=(16, 20), facecolor='w')
    for i, ax in enumerate(axes.ravel()):
        if y_preds[i] > 0.5:
            label = 1
        else:
            label = 0
            
        ax.set_title( '{}.jpg'.format(i+1) + " / " + classes[label])
        img = Image.open( os.path.join(args.dataset_dir, "test", '{}.jpg'.format(i+1)) )
        ax.imshow(img)

    fig.savefig( os.path.join(args.results_dir, args.exper_name, "classification.png"), dpi = 300, bbox_inches = 'tight' )

    #================================
    # Kaggle API での submit
    #================================
    # 提出用データに値を設定
    ds_submission = pd.read_csv( os.path.join(args.dataset_dir, "sample_submission.csv" ) )
    ds_submission['label'][0:len(y_preds)] = list(map(float, y_preds))
    ds_submission.to_csv( os.path.join(args.results_dir, args.exper_name, args.submit_file), index=False)
    if( args.submit ):
        # Kaggle-API で submit
        api = KaggleApi()
        api.authenticate()
        api.competition_submit( os.path.join(args.results_dir, args.exper_name, args.submit_file), args.submit_message, args.competition_id)
        os.system('kaggle competitions submissions -c {}'.format(args.competition_id) )
Beispiel #33
0
from __future__ import absolute_import
from kaggle.api.kaggle_api_extended import KaggleApi
from kaggle.api_client import ApiClient

api = KaggleApi(ApiClient())
api.authenticate()
api.downloadPath(quiet = True)