def kernel_update(request): # pull the most recent version of the kernel api = KaggleApi() api.authenticate() api.kernels_pull_cli("{}/{}".format(USERNAME, KERNEL_SLUG), path="{}".format(PATH), metadata=True) # push our notebook api.kernels_push_cli("{}".format(PATH)) # save a copy of our notebook in our bucket (if you would prefer # not to save a copy, delete all lines from here to the end of the file). bucket = storage.bucket(BUCKET) metadata_blob = bucket.blob("kernel-metadata.json") notebook_blob = bucket.blob("{}.{}".format(KERNEL_SLUG, KERNEL_EXTENSION )) metadata_blob.upload_from_filename("{}/kernel-metadata.json".format(PATH)) notebook_blob.upload_from_filename("{}/{}.{}".format(PATH, KERNEL_SLUG, KERNEL_EXTENSION))
#Authorize API's #Authorize Twitter API f = open('/home/pi/twitter_api_creds.json') creds = json.load(f) consumer_key = creds['consumer_key'] consumer_secret = creds['consumer_secret'] access_token = creds['access_token'] access_token_secret = creds['access_token_secret'] f.close() auth = tw.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) api = tw.API(auth, wait_on_rate_limit=True) #Authorize Kaggle's API kapi = KaggleApi() kapi.authenticate() # In[11]: #Twitter Functions #Calls on clean() for text cleanup then removes the URL def remove_url(txt): #Call on clean to clean text first txt = clean(txt) #removes URL return " ".join(re.sub("([^0-9A-Za-z \t])|(\w+:\/\/\S+)", "", txt).split()) #Cleans up the text like newlines before url is removed.
from PIL import Image import os from kaggle.api.kaggle_api_extended import KaggleApi api = KaggleApi() api.authenticate() api.competition_download_file('dogs-vs-cats-redux-kernels-edition','test.zip') api.competition_download_file('dogs-vs-cats-redux-kernels-edition','train.zip') from zipfile import ZipFile with ZipFile('test.zip', 'r') as zip: zip.extractall() with ZipFile('train.zip', 'r') as zip: zip.extractall() def image_gs_scale(typ,num,scale): directory=os.getcwd()+'/train/' for i in range(num): img=Image.open(directory+typ+'.'+str(i)+'.jpg').convert('L') (wid,hei)=img.size wid*=scale hei*=scale img=img.resize((int(wid),int(hei))) img.save('gs_'+typ+str(i)+'.jpg') def image_gs_size(typ,num,wid,hei):
import git import zipfile import os import pandas as pd from kaggle.api.kaggle_api_extended import KaggleApi repo = git.Repo("./", search_parent_directories=True) homedir = repo.working_dir datadir = f"hotexamples_com/data/kor/covid/" # This requires a valid kaggle.json file in ~/.kaggle/ api = KaggleApi() api.authenticate() # Download the complete DS4C SK dataset api.dataset_download_files('kimjihoo/coronavirusdataset', path=datadir, force=True) #unzip dataset to destination with zipfile.ZipFile(f"{datadir}coronavirusdataset.zip", 'r') as zip_ref: zip_ref.extractall(datadir) # Generate dictionary for file renaming rename_dict = { "Case.csv": "kaggle_case_city_kor.csv", "PatientInfo.csv": "kaggle_patient-info_patient_kor.csv", "PatientRoute.csv": "kaggle_patient-route_patient_kor.csv", "Region.csv": "kaggle_province-demography_province_kor.csv", "SearchTrend.csv": "kaggle_search-trend_country_kor.csv", "SeoulFloating.csv": "kaggle_seoul-floating_city_kor.csv",
from __future__ import absolute_import from kaggle.api.kaggle_api_extended import KaggleApi from kaggle.api_client import ApiClient api = KaggleApi(ApiClient()) api.authenticate()
# importing libraries import pandas as pd import numpy as np from kaggle.api.kaggle_api_extended import KaggleApi import csv # Using an API token in a json file located in C:\Windows\s_dun\.kaggle. # API Authentication using kaggle account credentials to retrieve the dataset. api = KaggleApi() api.authenticate() # downloading from kaggle.com/c8debreaker619/alcohol-comsumption-around-the-world # Writing the dataset file to my current project directory path with './' api.dataset_download_file( 'codebreaker619/alcohol-comsumption-around-the-world', file_name='drinks.csv') # Read in my .csv file, setting index column. pd.set_option("display.max_rows", 200, "display.max_columns", 5) df_drinks = pd.read_csv( r'C:\Users\S_Dun\Desktop\UCDPA_SineadDunne\drinks.csv', header=0, names=['COUNTRY', 'BEER', 'SPIRIT', 'WINE', 'TOTAL_LITRES']) # Ensure column names are updated correctly by displaying them out in a list. for col in df_drinks.columns: print(col) # First view of my dataset print(df_drinks.head())
from kaggle.api.kaggle_api_extended import KaggleApi api = KaggleApi() api.authenticate( ) # requires your computer to have a JSON file with your API keys api.competition_download_files('coleridgeinitiative-show-us-the-data' ) # downloads as a zip, you will need to unzip print("Done")
# InsertOne({'_id':teleArr[2]}), InsertOne({ 'ID': arr[i][0], 'County': arr[i][1], }) for i in range(len(arr)) ]) except BulkWriteError as bwe: pprint(bwe.details) # logger.info(f'Loaded {} matches into db') if __name__ == "__main__": if os.path.exists(dataset_path + '/' + file_name) == False: api = KaggleApi() api.authenticate() api.dataset_download_files('sobhanmoosavi/us-accidents',path = dataset_path,unzip=True) with open('./public/unemployment-by-county-2017.csv') as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') for row in csv_reader: # county = row[1].split(' ')[0] countyIDArr.append([row[0], row[1].split(' ')[0]]) with open('sample.csv') as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') for row in csv_reader: # parse accident teleArr.append(row)
from kaggle.api.kaggle_api_extended import KaggleApi api = KaggleApi() api.authenticate() # slug: [Description, Competition or Freestyle] competition = { "intro-to-seismic-salt-and-how-to-geophysics": "Geophysics competition to identify salt bodies in seismic data. The competition was hosted by the seismic contractor TGS and provided patched seismic images with varying amounts of noise. The masks were provided as pixel-encoded masks to save bandwidth.", "intro-chest-xray-dicom-viz-u-nets-full-data": "Medical competition to identify pneumothorax in chest X-rays. Data provided in medical DICOM format. Very large images that benefit from downsampling.", "intro-to-santa-s-2019-viz-costs-22-s-and-search": "Fun kaggle competition to optimize allocation for families wanting to visit Santa's workshop. Optimization challenge where the global optimum was soon found.", "intro-to-connextx-env-and-minimax": "First reinforcement learning playground using Connect 4 on Kaggle to test new environment. Agents play against each other on the leaderboard. ConnectX is usually best approach by the Negamax algorithm, a Minimax variant.", "intro-to-deep-fakes-videos-and-metadata-eda": "Deep Fake competition to identify visual and sound manipulation usually using GANs in video footage.", "getting-started-with-standard-gans-tutorial": "Least Squares GAN on TPU to generate Monet painting from noise. Uses data augmentation to enrich the training set.", "understanding-and-improving-cyclegans-tutorial": "CycleGAN on TPUs to generate Monet painting from photographs. Uses Least Squares implementation of GANs to improve training and also implements some basic augmentation to get the competition going.", } # slug: [Description, Competition or Freestyle] freestyle = { "the-reason-we-re-happy": "Exploration of the 'World Happiness Report' data. Found a strong correlation of final score with wealth indicators. June winner in Reddit community [r/dataisbeautiful](https://www.reddit.com/r/dataisbeautiful/comments/c89mz2/battle_dataviz_battle_for_the_month_of_july_2019/eskzdhd/).", "berlin-airbnbs-is-it-really-all-about-location": "AirBnB has been a source of income for many with a spare room, but become controversial due to commercial players buying apartments to rent out on the platform. In this analysis, we are looking at AirBnB data in Berlin, Germany. The questions are, what is the main influence on price of a rental. These questions are important for tourism, city planning, and renters alike. Where do affluent tourists rent AirBnBs? What decides a good price of your flat. Where can city planners accomodate for this new development of short term rental on the market?", } ## Sad times
# -*- coding: utf-8 -*- """ Created on Wed Oct 13 18:55:36 2018 @author: chunhui zhu """ import pandas as pd import numpy as np import pickle from kaggle.api.kaggle_api_extended import KaggleApi download_path = "C:\\Users\\czhu5\\Desktop\\Data-622-2-ML\\hw3" api = KaggleApi() api.authenticate() #download the kaggle data set to local folder api.competition_download_files('titanic') test = pd.read_csv('test.csv') train = pd.read_csv('train.csv') print("finish load for test data set") print(test.head()) print("") print("finish load for train data set") print(train.head())
def download_kaggle_files(): api = KaggleApi() api.authenticate() api.dataset_download_files('netflix-inc/netflix-prize-data', path='/Users/pcc33/Downloads/', unzip=True)
# extract file in destination with zipfile.ZipFile(source, 'r') as zipref: zipref.extractall(destination) if delete_source: # delete source file os.remove(source) # %% -------------------- test_original_dimension = pd.read_csv( "D:/GWU/4 Spring 2021/6501 Capstone/VBD CXR/PyCharm " "Workspace/vbd_cxr/7_POC/test_original_dimension_1024_sample.csv") # %% -------------------- api = KaggleApi() api.authenticate() # , "00bcb82818ea83d6a86df241762cd7d0", # "013893a5fa90241c65c3efcdbdd2cec1", "01ee6e560f083255a630c41bba779405" # %% -------------------- def resize_image_test(img_arr, smallest_max_size): # create resize transform pipeline transform = albumentations.Compose([ albumentations.SmallestMaxSize(max_size=smallest_max_size, always_apply=True) ]) return transform(image=img_arr)
def download(self): import os from zipfile import ZipFile try: from kaggle.api.kaggle_api_extended import KaggleApi except ImportError: raise RuntimeError( 'please install and setup the kaggle ' 'competition api: https://github.com/Kaggle/kaggle-api') api = KaggleApi() api.authenticate() kgl_comp = 'trackml-particle-identification' test_file = 'train_sample.zip' if self.full_dataset: kgl_file = 'trackml-particle-identification.zip' print( 'Downloading full TrackML dataset (~80GB), this may take a while...' ) api.competition_download_files(kgl_comp, path=self.root, quiet=False, force=False) training_samples = None with ZipFile(os.path.join(self.root, kgl_file), 'r') as zf: training_samples = [fname for fname in filter(lambda x: 'train' in x and \ 'sample' not in x and \ 'blacklist' not in x, zf.namelist())] for name in tqdm(training_samples, desc='extracting zipballs'): if not os.path.exists(os.path.join(self.root, name)): zf.extract(name, path=self.root) for sample in training_samples: with ZipFile(os.path.join(self.root, sample), 'r') as zf: fnames = zf.namelist() action = f'unpacking {sample}' for name in tqdm(fnames, desc=action): sample_dir = sample.split('.')[0] + '/' if name == sample_dir: continue outname = os.path.join(self.raw_dir, os.path.basename(name)) if os.path.exists(outname): raise Exception(f'{outname} already exists!') with open(outname, 'wb') as fout: fout.write(zf.read(name)) else: kgl_file = test_file print( 'Downloading training example from TrackML dataset, only 100 training events...' ) api.competition_download_file(kgl_comp, test_file, path=self.root, quiet=False, force=False) with ZipFile(os.path.join(self.root, kgl_file), 'r') as zf: fnames = zf.namelist() for name in tqdm(fnames): if name == 'train_100_events/': continue with open( os.path.join(self.raw_dir, os.path.basename(name)), 'wb') as fout: fout.write(zf.read(name)) events = glob.glob( osp.join(osp.join(self.root, 'raw'), 'event*-hits.csv')) events = [e.split(osp.sep)[-1].split('-')[0][5:] for e in events] self.events = sorted(events) if (self.n_events > 0): self.events = self.events[:self.n_events]
""" Script to download the dataset from kaggle. Author: Tom Fleet Created: 02/01/2021 """ from kaggle.api.kaggle_api_extended import KaggleApi from src.config import RAW_DATA if not RAW_DATA.exists(): RAW_DATA.mkdir(parents=True) api = KaggleApi() api.authenticate() api.dataset_download_files("saurograndi/airplane-crashes-since-1908", path=RAW_DATA, unzip=True)
# Machine Learning - Unsupervised Machine Learning # Course Code: DLBDSMLUSL01 # Feature Importance # Chi² #%% import libraries import pandas as pd from sklearn.feature_selection import SelectKBest, chi2 from kaggle.api.kaggle_api_extended import KaggleApi import zipfile from sklearn.preprocessing import LabelEncoder #%% load sample data # log into kaggle api = KaggleApi() api.authenticate() # download the data kaggle_user = '******' kaggle_project = 'churn-prediction-of-bank-customers' api.dataset_download_files(kaggle_user + '/' + kaggle_project) # unzip the data zip = zipfile.ZipFile(kaggle_project + '.zip').\ extractall() # load the data churn_df = pd.read_csv('Churn_Modelling.csv') #%% prepare the data
#!/usr/bin/python # # Copyright 2018 Kaggle Inc # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from __future__ import absolute_import from kaggle.api.kaggle_api_extended import KaggleApi from kaggle.api_client import ApiClient api = KaggleApi(ApiClient()) api.authenticate()
def update_dataset(folder, note): api = KaggleApi() api.authenticate() return api.dataset_create_version(folder, note, delete_old_versions=True)
print('Establishing environment credentials...') import os os.environ['KAGGLE_USERNAME'] = "******" # For demo use only os.environ[ 'KAGGLE_KEY'] = "f3f9220e8d85a7427864bd4f96f23ff2" # Please use your own API token if making frequent queries print('Querying Kaggle API...') from kaggle.api.kaggle_api_extended import KaggleApi api = KaggleApi() api.authenticate() api.competition_download_file('widsdatathon2020', 'training_v2.csv') print('Extracing zip file...') import zipfile with zipfile.ZipFile('training_v2.csv.zip', 'r') as zip_ref: zip_ref.extractall() print('Renaming csv and removing zip file...') try: os.rename('training_v2.csv', 'data.csv') os.remove('training_v2.csv.zip') except: print('Error renaming files')
import matplotlib.pyplot as plt import numpy import pandas as pd from kaggle.api.kaggle_api_extended import KaggleApi #import kaggle import zipfile api = KaggleApi() api.authenticate() api.dataset_download_file( 'antgoldbloom/covid19-data-from-john-hopkins-university', 'CONVENIENT_global_confirmed_cases.csv') #api.dataset_download_file('antgoldbloom/covid19-data-from-john-hopkins-university','CONVENIENT_global_confirm_cases.csv') df = pd.read_csv('CONVENIENT_global_confirmed_cases.csv') print(df) total = df['South Africa'].sum() df_sa = df['South Africa'] print(df_sa) print('Total ', total) df['South Africa'].plot() plt.plot(df['South Africa'].rolling(window=21).mean(), label='MA 21 days') plt.plot(df['South Africa'].rolling(window=7).mean(), label='MA 7 days') plt.title('South Africa Daily Reporter New Cases')
# get the Corona tweet dataset from Kaggle # Getting the token: source https://www.kaggle.com/docs/api#interacting-with-datasets # In order to use the Kaggle’s public API, you must first authenticate using an API token. From the site header, click # on your user profile picture, then on “My Account” from the dropdown menu. This will take you to your account settings # at https://www.kaggle.com/account. Scroll down to the section of the page labelled API: # # To create a new token, click on the “Create New API Token” button. This will download a fresh authentication token # onto your machine. # # If you are using the Kaggle CLI tool, the tool will look for this token at ~/.kaggle/kaggle.json on Linux, OSX, and # other UNIX-based operating systems, and at C:\Users<Windows-username>.kaggle\kaggle.json on Windows. If the token is # not there, an error will be raised. Hence, once you’ve downloaded the token, you should move it from your Downloads # folder to this folder. # # If you are using the Kaggle API directly, where you keep the token doesn’t matter, so long as you are able to provide # your credentials at runtime. from kaggle.api.kaggle_api_extended import KaggleApi api = KaggleApi() api.authenticate() files = api.dataset_download_files('smid80/coronavirus-covid19-tweets', unzip=True, path='data/', quiet=False)
def chess_analysis(): # Start time count to gauge process run time start = time.time() api = KaggleApi() api.authenticate() # downloading datasets for Chess games api.dataset_download_files('arevel/chess-games') # Read data in chunks of 100000 rows and concatenate into one dataframe at a time to speed up read time zf = zipfile.ZipFile('chess-games.zip') csv = pd.read_csv(zf.open('chess_games.csv'), chunksize=100000) chess_df = pd.concat(csv) # Remove any duplicate user names to limit data to one game per user chess_df = chess_df.drop_duplicates(subset=['White', 'Black']) # remove any rows with stockfish evaluation as this clogs up the data at a later stage chess_df = chess_df.drop(chess_df[chess_df.AN.str.contains(r'[{}]')].index) # use iterrows to print out data for index, row in chess_df.head(1000).iterrows(): print(index, row) # reset index after dropping duplicate users and removing stockfish evaluations chess_df = chess_df.reset_index() # Define average elo rank per game chess_df['AverageElo'] = (chess_df['WhiteElo'] + chess_df['BlackElo']) / 2 # create lists of conditions to use for np.se;ect to add new columns to turn numeric values into grouped categories white_conditions = [ (chess_df['WhiteElo'] > 2700), (chess_df['WhiteElo'] < 2700) & (chess_df['WhiteElo'] >= 2500), (chess_df['WhiteElo'] < 2500) & (chess_df['WhiteElo'] >= 2400), (chess_df['WhiteElo'] < 2400) & (chess_df['WhiteElo'] >= 2300), (chess_df['WhiteElo'] < 2300) & (chess_df['WhiteElo'] >= 2200), (chess_df['WhiteElo'] < 2200) & (chess_df['WhiteElo'] >= 2000), (chess_df['WhiteElo'] < 2000) & (chess_df['WhiteElo'] >= 1800), (chess_df['WhiteElo'] < 1800) & (chess_df['WhiteElo'] >= 1600), (chess_df['WhiteElo'] < 1600) & (chess_df['WhiteElo'] >= 1400), (chess_df['WhiteElo'] < 1400) & (chess_df['WhiteElo'] >= 1200), (chess_df['WhiteElo'] < 1200) & (chess_df['WhiteElo'] >= 0) ] black_conditions = [ (chess_df['BlackElo'] >= 2700), (chess_df['BlackElo'] < 2700) & (chess_df['BlackElo'] >= 2500), (chess_df['BlackElo'] < 2500) & (chess_df['BlackElo'] >= 2400), (chess_df['BlackElo'] < 2400) & (chess_df['BlackElo'] >= 2300), (chess_df['BlackElo'] < 2300) & (chess_df['BlackElo'] >= 2200), (chess_df['BlackElo'] < 2200) & (chess_df['BlackElo'] >= 2000), (chess_df['BlackElo'] < 2000) & (chess_df['BlackElo'] >= 1800), (chess_df['BlackElo'] < 1800) & (chess_df['BlackElo'] >= 1600), (chess_df['BlackElo'] < 1600) & (chess_df['BlackElo'] >= 1400), (chess_df['BlackElo'] < 1400) & (chess_df['BlackElo'] >= 1200), (chess_df['BlackElo'] < 1200) & (chess_df['BlackElo'] >= 0) ] average_conditions = [ (chess_df['AverageElo'] >= 2700), (chess_df['AverageElo'] < 2700) & (chess_df['AverageElo'] >= 2500), (chess_df['AverageElo'] < 2500) & (chess_df['AverageElo'] >= 2400), (chess_df['AverageElo'] < 2400) & (chess_df['AverageElo'] >= 2300), (chess_df['AverageElo'] < 2300) & (chess_df['AverageElo'] >= 2200), (chess_df['AverageElo'] < 2200) & (chess_df['AverageElo'] >= 2000), (chess_df['AverageElo'] < 2000) & (chess_df['AverageElo'] >= 1800), (chess_df['AverageElo'] < 1800) & (chess_df['AverageElo'] >= 1600), (chess_df['AverageElo'] < 1600) & (chess_df['AverageElo'] >= 1400), (chess_df['AverageElo'] < 1400) & (chess_df['AverageElo'] >= 1200), (chess_df['AverageElo'] < 1200) & (chess_df['AverageElo'] >= 0) ] outcome_conditions = [(chess_df['Result']) == "1-0", (chess_df['Result']) == "0-1", (chess_df['Result']) == "1/2-1/2", (chess_df['Result']) == "*"] # create a list of the values to assign for each condition elo = [ 'Super GM', 'GM', 'GM/IM', 'FM/IM', 'CM/NM', 'Experts', 'Class A', 'Class B', 'Class C', 'Class D', 'Novices' ] outcome = ['White Wins', 'Black Wins', 'Draw', 'No Result'] # create new columns and use np.select to assign values to it using the lists as arguments chess_df['WhiteEloRank'] = np.select(white_conditions, elo) chess_df['BlackEloRank'] = np.select(black_conditions, elo) chess_df['AverageEloRank'] = np.select(average_conditions, elo) chess_df['Outcome'] = np.select(outcome_conditions, outcome) # create dataframe for moves moves_df = chess_df["AN"].str.split(" ", n=30, expand=True) moves_df = moves_df.drop(moves_df.iloc[:, 0:31:3], axis=1) # append moves dataframe to chess dataframe chess_df = pd.concat([chess_df, moves_df], axis=1) chess_df.reset_index(inplace=True) # sort data from lowest average elo to highest average elo chess_df = chess_df.sort_values(by='AverageElo', ascending=False) # change data type from object to numeric values chess_df[["WhiteElo", "BlackElo", "AverageElo"]] = chess_df[["WhiteElo", "BlackElo", "AverageElo"]].\ apply(pd.to_numeric) classical_df1 = chess_df[chess_df.Event == ' Classical '] classical_df2 = chess_df[chess_df.Event == 'Classical '] classical = pd.merge(classical_df1, classical_df2, how='outer') classical_tournament_df1 = chess_df[chess_df.Event == ' Classical tournament '] classical_tournament_df2 = chess_df[chess_df.Event == 'Classical tournament '] classical_tournament = pd.merge(classical_tournament_df1, classical_tournament_df2, how='outer') blitz_df1 = chess_df[chess_df.Event == ' Blitz '] blitz_df2 = chess_df[chess_df.Event == 'Blitz '] blitz = pd.merge(blitz_df1, blitz_df2, how='outer') blitz_tournament_df1 = chess_df[chess_df.Event == ' Blitz tournament '] blitz_tournament_df2 = chess_df[chess_df.Event == 'Blitz tournament '] blitz_tournament = pd.merge(blitz_tournament_df1, blitz_tournament_df2, how='outer') bullet_df1 = chess_df[chess_df.Event == ' Bullet '] bullet_df2 = chess_df[chess_df.Event == 'Bullet '] bullet = pd.merge(bullet_df1, bullet_df2, how='outer') bullet_tournament_df1 = chess_df[chess_df.Event == ' Bullet tournament '] bullet_tournament_df2 = chess_df[chess_df.Event == 'Bullet tournament '] bullet_tournament = pd.merge(bullet_tournament_df1, bullet_tournament_df2, how='outer') correspondence_df1 = chess_df[chess_df.Event == ' Correspondence '] correspondence_df2 = chess_df[chess_df.Event == 'Correspondence '] correspondence = pd.merge(correspondence_df1, correspondence_df2, how='outer') # Plot results # Categorical Data plots = ['Termination', 'Outcome', 'AverageEloRank'] plots_1 = ['AverageElo'] plots_2 = [1, 2] game_types = [ classical, classical_tournament, blitz, blitz_tournament, bullet, bullet_tournament, correspondence ] game_types_str = [ 'Classical', 'Classical Tournament', 'Blitz', 'Blitz Tournament', 'Bullet', 'Bullet Tournament', 'Correspondence' ] z = 0 y = 0 w = 0 for x in game_types: a = 1 # number of rows, set to 1 to retrieve individual graph groups based on game type b = int(len(plots)) # number of columns c = 1 # initialize plot counter d = 1 # number of rows, set to 1 to retrieve individual graph groups based on game type e = int(len(plots_1)) # number of columns f = 1 # initialize plot counter g = 1 # number of rows, set to 1 to retrieve individual graph groups based on game type h = int(len(plots_2)) # number of columns k = 1 # initialize plot counter for i in plots: plt.subplot(a, b, c) plt.title(str(game_types_str[z])) plt.xlabel(i) plt.subplots_adjust(bottom=0.095, top=0.97, hspace=1, wspace=0.45) sns.countplot(x=x[i]) plt.xticks(rotation=30) c = c + 1 z = z + 1 plt.show() plt.clf() for i in plots_1: plt.subplot(d, e, f) plt.title(str(game_types_str[y])) plt.xlabel(i) plt.subplots_adjust(bottom=0.095, top=0.97, hspace=1) sns.histplot(x=x[i], kde=True, bins=25) plt.xticks(rotation=30) f = f + 1 y = y + 1 plt.show() plt.clf() for i in plots_2: plt.subplot(g, h, k) plt.title(str(game_types_str[w])) plt.xlabel(i) plt.subplots_adjust(bottom=0.095, top=0.97, hspace=1) sns.countplot(x=x[i]) plt.xticks(rotation=30) k = k + 1 w = w + 1 plt.show() plt.clf() end = time.time() print("Run Time: ", (end - start), 'Seconds')
from kaggle.api.kaggle_api_extended import KaggleApi import datetime from datetime import timezone import time api = KaggleApi() api.authenticate() COMPETITION = 'コンペ名(dataset downloadの-cオプションと同じ)' result_ = api.competition_submissions(COMPETITION)[0] latest_ref = str(result_) # 最新のサブミット番号 submit_time = result_.date status = '' print('check this submit, ', latest_ref) while status != 'complete': list_of_submission = api.competition_submissions(COMPETITION) for result in list_of_submission: if str(result.ref) == latest_ref: break status = result.status now = datetime.datetime.now(timezone.utc).replace(tzinfo=None) elapsed_time = int((now - submit_time).seconds / 60) + 1 if status == 'complete': print('\r', f'run-time: {elapsed_time} min, LB: {result.publicScore}') else: print('\r', f'elapsed time: {elapsed_time} min', end='') time.sleep(60)
def download_data_files(): api = KaggleApi() api.authenticate() api.dataset_download_file(dataset=dataset, file_name=confirm_cases_path) api.dataset_download_file(dataset=dataset, file_name=global_methadata_path) api.dataset_download_file(dataset=dataset, file_name=RAW_global_confirm_path) api.dataset_download_file(dataset=dataset, file_name=death_cases_path)
import os import numpy as np # Check if API token has been setup. try: from kaggle.api.kaggle_api_extended import KaggleApi except OSError as e: sys.exit(e) DATASET_URL = 'farhanhubble/multimnistm2nist' SCRIPT_PATH = os.path.dirname(os.path.abspath(__file__)) DATA_PATH = SCRIPT_PATH + 2 * (os.sep + os.pardir) + '/data' RAW_DATA_PATH = os.path.join(DATA_PATH, 'raw') PROCESSED_DATA_PATH = os.path.join(DATA_PATH, 'processed') api = KaggleApi() api.authenticate() # Download M2NIST dataset if not os.path.exists(os.path.join(RAW_DATA_PATH, 'multimnistm2nist.zip')): print('Downloading M2NIST dataset...', end='') api.dataset_download_files(DATASET_URL, path=RAW_DATA_PATH) print('Done.') else: print('Dataset already downloaded.') # Check if extracted files already exist. Ignore existing files. dataset = zipfile.ZipFile(os.path.join(RAW_DATA_PATH, 'multimnistm2nist.zip'), 'r') files = dataset.namelist() for f in files:
from kaggle.api.kaggle_api_extended import KaggleApi api = KaggleApi() api.authenticate() api.dataset_download_files( 'rajyellow46/wine-quality', path= 'D:/Netology/Python/pyda-16-em/DS Project Flow/WineQualityPrediction/data_external/' )
import os from zipfile import ZipFile from kaggle.api.kaggle_api_extended import KaggleApi # Folders for Repos if not os.path.isdir('./data'): os.mkdir('./data') if not os.path.isdir('./code'): os.mkdir('./code') if not os.path.isdir('./figures'): os.mkdir('./figures') if not os.path.isdir('./sub'): os.mkdir('./sub') api = KaggleApi() api.authenticate() comp = 'walmart-recruiting-store-sales-forecasting' api.competition_download_files(comp, path='./data') # Extract competition files zf = ZipFile('./data/' + comp + '.zip', 'r') zf.extractall('./data/') os.remove('./data/walmart-recruiting-store-sales-forecasting.zip') os.chdir('./data') for file in os.listdir(): print(file) if file.endswith('zip'): with ZipFile(file, 'r') as zipObj: zipObj.extractall() os.remove(file)
import random from datetime import datetime as dt from kaggle.api.kaggle_api_extended import KaggleApi from kaggle.api_client import ApiClient from discord.ext import commands from dotenv import load_dotenv from pytz import timezone if os.environ.get("PRODUCTION") is None: load_dotenv(verbose=True) i18n.set('locale', os.environ.get('LOCALE')) i18n.load_path.append('./locale') api = KaggleApi() api.authenticate() # Store discord token in the variable named token token = os.getenv('DISCORD_TOKEN') # This will set the command prefix bot = commands.Bot(command_prefix='!') # This function will list all 🏆 current competitions @bot.command(name='competitions', help='Responds with a list of competitions') async def competitions(comp): now = dt.now() now = now.astimezone(timezone('UTC')) await comp.send(i18n.t('kaggle.hi', hour=now.hour)) competitions_list = api.competitions_list()
assert isinstance(html,str) s = StackOverflowParser() s.feed(html) return [item.replace('\n', ' ') for item in s.code_blocks] # In[ ]: from kaggle.api.kaggle_api_extended import KaggleApi # Import the dataset directly from Kaggle # Requires a Kaggle account linked to an API key on your device api = KaggleApi() api.authenticate() api.dataset_download_files('stackoverflow/pythonquestions', path='./', unzip=True) # In[2]: import pandas as pd # File Paths file_questions = 'Questions.csv' file_answers = 'Answers.csv' file_tags = 'Tags.csv' dates = ["CreationDate"]
def auth(): api = KaggleApi() api.authenticate() return api
def test_config_actions(self): api = KaggleApi() self.assertTrue(api.config_dir.endswith('.kaggle')) self.assertEqual(api.get_config_value('doesntexist'), None)
def __init__(self): # authentificate self.__api = KaggleApi() self.__api.authenticate()
#================================ # 可視化処理 #================================ classes = {0: 'cat', 1: 'dog'} fig, axes = plt.subplots(5, 5, figsize=(16, 20), facecolor='w') for i, ax in enumerate(axes.ravel()): if y_preds[i] > 0.5: label = 1 else: label = 0 ax.set_title( '{}.jpg'.format(i+1) + " / " + classes[label]) img = Image.open( os.path.join(args.dataset_dir, "test", '{}.jpg'.format(i+1)) ) ax.imshow(img) fig.savefig( os.path.join(args.results_dir, args.exper_name, "classification.png"), dpi = 300, bbox_inches = 'tight' ) #================================ # Kaggle API での submit #================================ # 提出用データに値を設定 ds_submission = pd.read_csv( os.path.join(args.dataset_dir, "sample_submission.csv" ) ) ds_submission['label'][0:len(y_preds)] = list(map(float, y_preds)) ds_submission.to_csv( os.path.join(args.results_dir, args.exper_name, args.submit_file), index=False) if( args.submit ): # Kaggle-API で submit api = KaggleApi() api.authenticate() api.competition_submit( os.path.join(args.results_dir, args.exper_name, args.submit_file), args.submit_message, args.competition_id) os.system('kaggle competitions submissions -c {}'.format(args.competition_id) )
from __future__ import absolute_import from kaggle.api.kaggle_api_extended import KaggleApi from kaggle.api_client import ApiClient api = KaggleApi(ApiClient()) api.authenticate() api.downloadPath(quiet = True)