def create_dataset( api: KaggleApi, dataset_slug: str, license_name: str, target_dir: Path, quiet: bool = False, ): if len(dataset_slug) < 6 or len(dataset_slug) > 50: raise ValueError("The dataset slug must be between 6 and 50 characters") owner_slug = get_username(api) request = DatasetNewRequest( title=dataset_slug, slug=dataset_slug, owner_slug=owner_slug, license_name=license_name, subtitle=None, description=None, files=[], is_private=True, convert_to_csv=False, category_ids=[], ) api.upload_files(request, None, target_dir, quiet, dir_mode="tar") result = DatasetNewResponse( api.process_response(api.datasets_create_new_with_http_info(request)) ) return result
def update_dataset( api: KaggleApi, dataset_slug: str, target_dir: Path, quiet=False, delete_old_versions=True, ): owner_slug = get_username(api) request = DatasetNewVersionRequest( version_notes="test", subtitle=None, description=None, files=[], convert_to_csv=False, category_ids=[], delete_old_versions=delete_old_versions, ) api.upload_files(request, None, target_dir, quiet, dir_mode="tar") result = DatasetNewVersionResponse( api.process_response( api.datasets_create_version_with_http_info( owner_slug, dataset_slug, request ) ) ) return result
def __init__(self, compete, work_dir, default_submission_id=0): self.kaggle_api = KaggleApi() self.kaggle_api.authenticate() self.work_dir = work_dir self.compete = compete self.default_submission_id = default_submission_id
def push( api: KaggleApi, params: KernelPushParams, script_body: str ) -> KernelPushResponse: """ read the metadata file and kernel files from a notebook, validate both, and use Kernel API to push to Kaggle if all is valid. Parameters ========== folder: the path of the folder """ language = "python" kernel_push_request = KernelPushRequest( id=params.id_no, slug=params.slug, new_title=params.new_title, text=script_body, language=language, kernel_type=params.kernel_type, is_private=params.is_private, enable_gpu=params.enable_gpu, enable_internet=params.enable_internet, dataset_data_sources=params.dataset_data_sources, competition_data_sources=params.competition_data_sources, kernel_data_sources=params.kernel_data_sources, category_ids=params.category_ids, ) result = KernelPushResponse( api.process_response( api.kernel_push_with_http_info(kernel_push_request=kernel_push_request) ) ) return result
def __get_authenticated_kaggle_api(self, configfile): if configfile is not None: self.login_with_configfile(configfile) api = KaggleApi() api.authenticate() return api
def get_notify_competitions_list(): try: api = KaggleApi() api.authenticate() competitions_list = [] for info in api.competitions_list(sort_by='recentlyCreated'): competition = Competition(info) if competition.notify_message != DO_NOT_NOTIFY: competitions_list.append(competition) return competitions_list except Exception as e: logger.error(e)
def publish_data(api, path): response = kag_api.dataset_create_version( api, path, f"Dataset updated till (UTC): {datetime.utcnow()}", convert_to_csv=True, delete_old_versions=False) print(f"[INFO] Kaggle Dataset uploaded.") clear_dir(path)
def wait_for_install_kernel_completion(api: KaggleApi, meta_data: Dict, kernel_slug: str, quiet: bool = False) -> Dict[str, Any]: owner_slug = get_username(api) while True: response = api.process_response( api.kernel_output_with_http_info(owner_slug, kernel_slug)) if response["log"] != "": time.sleep(5) # wait for completion of synchlonizing kernel status result = kernel_proc.status(api, kernel_slug) if result["status"] != "complete" or result["failureMessage"]: logs = json.loads(response["log"]) err_messages = get_error_messages(logs) raise InstallKernelError(err_messages) return response if not quiet: click.echo("Wait for install kernel completion...") time.sleep(10)
def competition_prompt(api: KaggleApi) -> str: competition_query = click.prompt("competition", default="", show_default=False) competitions = api.competitions_list(search=competition_query) for i, c in enumerate(competitions): click.echo(f"{i} {c}") competition_index = click.prompt(">", type=int, show_choices=False, prompt_suffix=" ") return str(competitions[competition_index])
def download_data(data_dir: Optional[str] = None) -> None: """ Download data to data directory :param data_dir: :return: """ if data_dir is None: data_dir_path = config.get_data_dir() else: data_dir_path = Path(data_dir) data_dir_path.mkdir(exist_ok=True) dataset = config.get_dataset_attributes() api = KaggleApi() api.authenticate() api.dataset_download_cli(dataset["name"], path=data_dir_path, unzip=True) selected_files = [ data_dir_path / dataset_file for dataset_file in dataset["files"] ] for obj in data_dir_path.glob("*"): if obj not in selected_files: if obj.is_dir(): shutil.rmtree(obj) else: obj.unlink() for obj in selected_files: if obj.is_dir(): for image in obj.glob("*.jpg"): image.rename(image.parents[1] / image.name) obj.rmdir()
def submit_report(date_report): api = KaggleApi() api.authenticate() last_update = datetime.strftime(date_report, "%m/%d/%Y %H:%M") api.dataset_create_version( "data/", f"Auto update - {last_update} GMT-3", delete_old_versions=True )
def make_kernels_url(): api = KaggleApi() api.authenticate() kernels_list = api.kernels_list(competition=COMPETITION_NAME, page_size=18, language='python', sort_by='scoreAscending') kernels_url = '' kernels_url_2 = '' i = 0 for kernel_info in kernels_list: title = getattr(kernel_info, 'title') url = getattr(kernel_info, 'ref') if i <= 8: kernels_url += '*{}\n'.format(title) kernels_url += 'url : https://www.kaggle.com/{}\n'.format(url) else: kernels_url_2 += '*{}\n'.format(title) kernels_url_2 += 'url : https://www.kaggle.com/{}\n'.format(url) i += 1 logger.debug('Get {} kernels'.format(len(kernels_list))) return kernels_url, kernels_url_2
def __init__(self, compete, name, work_dir, description=None, create_readme=False): self.id = str(time.time()) self.compete = compete self.name = f'{name} - {self.id}' self.description = description if work_dir[-1] == '/': self.new_folder_path = work_dir + self.name else: self.new_folder_path = work_dir + '/' + self.name self.kaggle_api = KaggleApi() self.kaggle_api.authenticate() if not os.path.exists(self.new_folder_path): os.mkdir(self.new_folder_path) self.readme = Readme(self.name, self.description) if create_readme else None
def upload_requirement_pkgs(api: KaggleApi, meta_data: Dict, target_dir: Path, quiet: bool = False): slug = get_dataset_slug(api, meta_data) _, dataset_slug = slug.split("/")[-2:] license_name = "CC0-1.0" status = api.dataset_status(slug) if status is None: return kernel_proc.create_dataset( api, dataset_slug=dataset_slug, license_name=license_name, target_dir=target_dir, quiet=quiet, ) else: return kernel_proc.update_dataset( api, dataset_slug=dataset_slug, target_dir=target_dir, quiet=quiet, )
def status(api: KaggleApi, kernel_slug: str): user_name = api.config_values[api.CONFIG_NAME_USER] return api.kernel_status(user_name, kernel_slug)
def get_kaggle_api() -> Any: return KaggleApi(ApiClient())
def _authenticated_client(): client = KaggleApi() client.authenticate() return client
def kaggle_authenticate(): api = kag_api() kag_api.authenticate(api) print("\n[INFO] Kaggle api authenticated.") return api
def kaggle_dataset_download(api, dataset_name, path): kag_api.dataset_download_files(api, dataset_name, unzip=True, path=path) print("[INFO] Dataset downloaded.")
def get_kaggle_client(credentials=DEFAULT_CREDENTIALS): load_credentials(credentials) api = KaggleApi() api.authenticate() return api
payload = {"message": message} if image_name: try: files = {"imageFile": open(image_name, "rb")} requests.post(url, headers=headers, params=payload, files=files) except: requests.post(url, headers=headers, params=payload) else: requests.post(url, headers=headers, params=payload) # message = 'test' # files = {"imageFile": open("./fig/uni1.jpg", "rb")} api = KaggleApi() api.authenticate() # api.competitions_list() klist = api.kernels_list(competition=COMP_NAME, page_size=999) if len(glob.glob(KERNEL_LIST)) > 0: klist_old_ref = pickle_read(KERNEL_LIST) """ ref is url key """ # for key in dir(klist[0]): # print('{}: {}'.format(key, getattr(klist[0], key))) # kernel_notifier('https://www.kaggle.com/' + klist[0].ref) klist_ref = [k.ref for k in klist]
class Submitter: def __init__(self, compete, work_dir, default_submission_id=0): self.kaggle_api = KaggleApi() self.kaggle_api.authenticate() self.work_dir = work_dir self.compete = compete self.default_submission_id = default_submission_id def submit(self, predicted: pd.DataFrame, file_name='submission.csv', message=None, save_model=True, model=None, model_name=None, submit=True, submission_id=None, submission_name=None, open_in_browser=False): # Folder and files if submission_id is None: submission_id = self.default_submission_id self.default_submission_id += 1 submission_folder_name = ' '.join( [str(submission_id), ' -- ', submission_name]) new_folder_path = self.work_dir + f'/{submission_folder_name}' if not os.path.exists(new_folder_path): os.mkdir(new_folder_path) # Save model if model_name is None: model_name = str(model) if save_model: with open(f'{new_folder_path}/{model_name}.pickle', 'wb') as pickle_file: pickle.dump(model, pickle_file) # Submission predicted.to_csv(f'{new_folder_path}/{file_name}', index=False) if message is None: if model is None: message = file_name else: message = str(model) with open(new_folder_path + '/message.txt', 'w') as message_file: message_file.write(' '.join( [str(submission_id), ' -- ', submission_name, '\n', message])) message_file.close() # Upload if submit: print('Uploading submission...') command = f'kaggle competitions submit -c {self.compete} -f "{new_folder_path}/{file_name}" -m "{message}"' print(command) output = os.system(command) print('Output: ', output) # Open in browser if open_in_browser: webbrowser.open( f'https://www.kaggle.com/c/{self.compete}/submissions', new=2) def check_submission(self): last_submission = self.kaggle_api.competitions_submissions_list( self.compete)[0] print('Description: ', last_submission['description']) print('Date: ', last_submission['date']) print('Status: ', last_submission['status']) print('Score: ', last_submission['publicScore'])
def new_kaggle_api(): api = KaggleApi() api.authenticate() return api
""" Pulls data from Kaggle API """ from kaggle import KaggleApi api = KaggleApi() api.authenticate() api.dataset_download_files("shivamb/Netflix-shows", unzip= True) api.kernels_output("eugenioscionti/scraping-rotten-tomatoes-to-enrich-netflix-dataset", "./")
def list_outputs(api: KaggleApi, kernel_slug: str): user_name = api.config_values[api.CONFIG_NAME_USER] return api.process_response( api.kernel_output_with_http_info(user_name, kernel_slug) )
from kaggle import KaggleApi import os import json if __name__ == '__main__': config_path = "kaggle.json" data_path = 'data' filename = 'submission.csv' print("auth") with open(config_path, 'r') as f: config_dict = json.load(f) api = KaggleApi() api._load_config(config_dict) print("submit") api.competition_submit( file_name=os.path.join(data_path, filename), competition="titanic", message="test submission", quiet=False )
class Submission: def __init__(self, compete, name, work_dir, description=None, create_readme=False): self.id = str(time.time()) self.compete = compete self.name = f'{name} - {self.id}' self.description = description if work_dir[-1] == '/': self.new_folder_path = work_dir + self.name else: self.new_folder_path = work_dir + '/' + self.name self.kaggle_api = KaggleApi() self.kaggle_api.authenticate() if not os.path.exists(self.new_folder_path): os.mkdir(self.new_folder_path) self.readme = Readme(self.name, self.description) if create_readme else None def save_model(self, model, file_name=None): if file_name is None: file_name = str(model).replace('\\', '') with open(f'{self.new_folder_path}/{file_name}.pickle', 'wb') as pickle_file: pickle.dump(model, pickle_file) def save_keras_model(self, model, file_name=None, save_format='pickle', save_summary_to_readme=True, *args, **kwargs): from tensorflow.keras import Model if not isinstance(model, Model): raise Exception(f'Model should be instance of keras.Model') if file_name is None: file_name = str(model).replace('\\', '') if save_summary_to_readme: summary = io.StringIO() model.summary(print_fn=lambda s: print(s, file=summary)) if self.readme is None: raise Exception("'create_readme' should be True") self.readme.model_summary = summary.getvalue() if save_format == 'pickle': with open(f'{self.new_folder_path}/{file_name}.pickle', 'wb') as pickle_file: pickle.dump(model, pickle_file) elif save_format == 'config': with open(f'{self.new_folder_path}/{file_name}.json', 'w') as config_file: json.dump(model.get_config(), config_file) elif save_format == 'h5': model.save(f'{self.new_folder_path}/{file_name}', *args, **kwargs) else: raise Exception('Undefined save_format') return self def save_predictions(self, predictions, columns, index, file_name='predictions.csv'): pd.DataFrame(dict(zip(columns, [index, predictions]))) \ .to_csv(f'{self.new_folder_path}/{file_name}', index=False) return self def open_in_browser(self): webbrowser.open(f'https://www.kaggle.com/c/{self.compete}/submissions', new=2) return self def submit(self, predictions_file_name='predictions.csv'): print('Uploading submission...') command = f'kaggle competitions submit -c {self.compete} -f "{self.new_folder_path}/{predictions_file_name}" -m "{self.description}"' print(command) output = os.system(command) print() print('Output: ', output) return self def check_results(self, timeout=5): time.sleep(timeout) last_submission = self.kaggle_api.competitions_submissions_list( self.compete)[0] if self.readme is not None: self.readme.score = last_submission['publicScore'] self.readme.date = last_submission['date'] self.readme.status = last_submission['status'] print('Description: ', last_submission['description']) print('Date: ', last_submission['date']) print('Status: ', last_submission['status']) print('Score: ', last_submission['publicScore']) return self def save_readme(self): readme_file = open(f'{self.new_folder_path}/README.md', 'w') readme_file.write(self.readme.markdown()) return self
from kaggle import KaggleApi import os import json import zipfile if __name__ == '__main__': config_path = "kaggle.json" data_path = 'data' print("auth") with open(config_path, 'r') as f: config_dict = json.load(f) api = KaggleApi() api._load_config(config_dict) print("download") api.competition_download_files(competition="titanic", path=data_path, quiet=False) print("extract") for file in os.listdir(data_path): if '.zip' in file: print(file) zip_ref = zipfile.ZipFile(os.path.join(data_path, file), 'r') zip_ref.extractall(data_path) zip_ref.close()
def fetch_pins_people(resize=.5, min_faces_per_person=0, color=False, slice_=(slice(25, 275), slice(25, 275)), download_if_missing=True): """Load PINS dataset. Use a PINS dataset provided by Kaggle, everage the scikit-learn memory optimizations. Args: resize (float, optional): Image resize factor. Defaults to .5. min_faces_per_person (int, optional): Minimal number of images per person. Defaults to 0. color (bool): Toggle is images should be in RGB or 1 channel. Defaults to False. slice_ (tuple, optional): A rectangle to which images are sliced. Defaults to (slice(70, 195), slice(78, 172)). download_if_missing (bool, optional): Set if the dataset should be downloaded if not present on the machine. Defaults to True. Returns: sklearn.utils.Bunch: Collection of data set """ from kaggle import KaggleApi # Extract ZIP dataset kaggle_api = KaggleApi() kaggle_home = kaggle_api.read_config_file()['path'] path_to_zip = os.path.join(kaggle_home, 'datasets', PINS_DATASET['name'], PINS_DATASET['zip']) path_to_files = os.path.join(kaggle_home, 'datasets', PINS_DATASET['name'], PINS_DATASET['folder']) # Download if missing if download_if_missing and not os.path.exists(path_to_zip): kaggle_api.authenticate() kaggle_api.dataset_download_files(PINS_DATASET['name'], quiet=False) if not os.path.exists(path_to_files): with ZipFile(path_to_zip, 'r') as zipObj: extraction_path = os.path.join(kaggle_home, 'datasets', PINS_DATASET['name']) zipObj.extractall(extraction_path) # Load data in memory m = Memory(location=kaggle_home, compress=6, verbose=0) load_func = m.cache(_fetch_lfw_people) faces, target, target_names = load_func( path_to_files, resize=resize, min_faces_per_person=min_faces_per_person, color=color, slice_=slice_) X = faces.reshape(len(faces), -1) # Fix names with np.nditer(target_names, op_flags=['readwrite']) as it: for x in it: x[...] = np.core.defchararray.replace(x, 'pins ', '') x[...] = np.core.defchararray.replace(x, ' face', '') x[...] = np.core.defchararray.title(x) # pack the results as a Bunch instance return Bunch(data=X, images=faces, target=target, target_names=target_names)