def download_data(data_dir: Optional[str] = None) -> None: """ Download data to data directory :param data_dir: :return: """ if data_dir is None: data_dir_path = config.get_data_dir() else: data_dir_path = Path(data_dir) data_dir_path.mkdir(exist_ok=True) dataset = config.get_dataset_attributes() api = KaggleApi() api.authenticate() api.dataset_download_cli(dataset["name"], path=data_dir_path, unzip=True) selected_files = [ data_dir_path / dataset_file for dataset_file in dataset["files"] ] for obj in data_dir_path.glob("*"): if obj not in selected_files: if obj.is_dir(): shutil.rmtree(obj) else: obj.unlink() for obj in selected_files: if obj.is_dir(): for image in obj.glob("*.jpg"): image.rename(image.parents[1] / image.name) obj.rmdir()
def __get_authenticated_kaggle_api(self, configfile): if configfile is not None: self.login_with_configfile(configfile) api = KaggleApi() api.authenticate() return api
def submit_report(date_report): api = KaggleApi() api.authenticate() last_update = datetime.strftime(date_report, "%m/%d/%Y %H:%M") api.dataset_create_version( "data/", f"Auto update - {last_update} GMT-3", delete_old_versions=True )
def get_notify_competitions_list(): try: api = KaggleApi() api.authenticate() competitions_list = [] for info in api.competitions_list(sort_by='recentlyCreated'): competition = Competition(info) if competition.notify_message != DO_NOT_NOTIFY: competitions_list.append(competition) return competitions_list except Exception as e: logger.error(e)
def make_kernels_url(): api = KaggleApi() api.authenticate() kernels_list = api.kernels_list(competition=COMPETITION_NAME, page_size=18, language='python', sort_by='scoreAscending') kernels_url = '' kernels_url_2 = '' i = 0 for kernel_info in kernels_list: title = getattr(kernel_info, 'title') url = getattr(kernel_info, 'ref') if i <= 8: kernels_url += '*{}\n'.format(title) kernels_url += 'url : https://www.kaggle.com/{}\n'.format(url) else: kernels_url_2 += '*{}\n'.format(title) kernels_url_2 += 'url : https://www.kaggle.com/{}\n'.format(url) i += 1 logger.debug('Get {} kernels'.format(len(kernels_list))) return kernels_url, kernels_url_2
def kaggle_authenticate(): api = kag_api() kag_api.authenticate(api) print("\n[INFO] Kaggle api authenticated.") return api
class Submitter: def __init__(self, compete, work_dir, default_submission_id=0): self.kaggle_api = KaggleApi() self.kaggle_api.authenticate() self.work_dir = work_dir self.compete = compete self.default_submission_id = default_submission_id def submit(self, predicted: pd.DataFrame, file_name='submission.csv', message=None, save_model=True, model=None, model_name=None, submit=True, submission_id=None, submission_name=None, open_in_browser=False): # Folder and files if submission_id is None: submission_id = self.default_submission_id self.default_submission_id += 1 submission_folder_name = ' '.join( [str(submission_id), ' -- ', submission_name]) new_folder_path = self.work_dir + f'/{submission_folder_name}' if not os.path.exists(new_folder_path): os.mkdir(new_folder_path) # Save model if model_name is None: model_name = str(model) if save_model: with open(f'{new_folder_path}/{model_name}.pickle', 'wb') as pickle_file: pickle.dump(model, pickle_file) # Submission predicted.to_csv(f'{new_folder_path}/{file_name}', index=False) if message is None: if model is None: message = file_name else: message = str(model) with open(new_folder_path + '/message.txt', 'w') as message_file: message_file.write(' '.join( [str(submission_id), ' -- ', submission_name, '\n', message])) message_file.close() # Upload if submit: print('Uploading submission...') command = f'kaggle competitions submit -c {self.compete} -f "{new_folder_path}/{file_name}" -m "{message}"' print(command) output = os.system(command) print('Output: ', output) # Open in browser if open_in_browser: webbrowser.open( f'https://www.kaggle.com/c/{self.compete}/submissions', new=2) def check_submission(self): last_submission = self.kaggle_api.competitions_submissions_list( self.compete)[0] print('Description: ', last_submission['description']) print('Date: ', last_submission['date']) print('Status: ', last_submission['status']) print('Score: ', last_submission['publicScore'])
class Submission: def __init__(self, compete, name, work_dir, description=None, create_readme=False): self.id = str(time.time()) self.compete = compete self.name = f'{name} - {self.id}' self.description = description if work_dir[-1] == '/': self.new_folder_path = work_dir + self.name else: self.new_folder_path = work_dir + '/' + self.name self.kaggle_api = KaggleApi() self.kaggle_api.authenticate() if not os.path.exists(self.new_folder_path): os.mkdir(self.new_folder_path) self.readme = Readme(self.name, self.description) if create_readme else None def save_model(self, model, file_name=None): if file_name is None: file_name = str(model).replace('\\', '') with open(f'{self.new_folder_path}/{file_name}.pickle', 'wb') as pickle_file: pickle.dump(model, pickle_file) def save_keras_model(self, model, file_name=None, save_format='pickle', save_summary_to_readme=True, *args, **kwargs): from tensorflow.keras import Model if not isinstance(model, Model): raise Exception(f'Model should be instance of keras.Model') if file_name is None: file_name = str(model).replace('\\', '') if save_summary_to_readme: summary = io.StringIO() model.summary(print_fn=lambda s: print(s, file=summary)) if self.readme is None: raise Exception("'create_readme' should be True") self.readme.model_summary = summary.getvalue() if save_format == 'pickle': with open(f'{self.new_folder_path}/{file_name}.pickle', 'wb') as pickle_file: pickle.dump(model, pickle_file) elif save_format == 'config': with open(f'{self.new_folder_path}/{file_name}.json', 'w') as config_file: json.dump(model.get_config(), config_file) elif save_format == 'h5': model.save(f'{self.new_folder_path}/{file_name}', *args, **kwargs) else: raise Exception('Undefined save_format') return self def save_predictions(self, predictions, columns, index, file_name='predictions.csv'): pd.DataFrame(dict(zip(columns, [index, predictions]))) \ .to_csv(f'{self.new_folder_path}/{file_name}', index=False) return self def open_in_browser(self): webbrowser.open(f'https://www.kaggle.com/c/{self.compete}/submissions', new=2) return self def submit(self, predictions_file_name='predictions.csv'): print('Uploading submission...') command = f'kaggle competitions submit -c {self.compete} -f "{self.new_folder_path}/{predictions_file_name}" -m "{self.description}"' print(command) output = os.system(command) print() print('Output: ', output) return self def check_results(self, timeout=5): time.sleep(timeout) last_submission = self.kaggle_api.competitions_submissions_list( self.compete)[0] if self.readme is not None: self.readme.score = last_submission['publicScore'] self.readme.date = last_submission['date'] self.readme.status = last_submission['status'] print('Description: ', last_submission['description']) print('Date: ', last_submission['date']) print('Status: ', last_submission['status']) print('Score: ', last_submission['publicScore']) return self def save_readme(self): readme_file = open(f'{self.new_folder_path}/README.md', 'w') readme_file.write(self.readme.markdown()) return self
""" Pulls data from Kaggle API """ from kaggle import KaggleApi api = KaggleApi() api.authenticate() api.dataset_download_files("shivamb/Netflix-shows", unzip= True) api.kernels_output("eugenioscionti/scraping-rotten-tomatoes-to-enrich-netflix-dataset", "./")
def new_kaggle_api(): api = KaggleApi() api.authenticate() return api
def get_kaggle_client(credentials=DEFAULT_CREDENTIALS): load_credentials(credentials) api = KaggleApi() api.authenticate() return api
def _authenticated_client(): client = KaggleApi() client.authenticate() return client
def fetch_pins_people(resize=.5, min_faces_per_person=0, color=False, slice_=(slice(25, 275), slice(25, 275)), download_if_missing=True): """Load PINS dataset. Use a PINS dataset provided by Kaggle, everage the scikit-learn memory optimizations. Args: resize (float, optional): Image resize factor. Defaults to .5. min_faces_per_person (int, optional): Minimal number of images per person. Defaults to 0. color (bool): Toggle is images should be in RGB or 1 channel. Defaults to False. slice_ (tuple, optional): A rectangle to which images are sliced. Defaults to (slice(70, 195), slice(78, 172)). download_if_missing (bool, optional): Set if the dataset should be downloaded if not present on the machine. Defaults to True. Returns: sklearn.utils.Bunch: Collection of data set """ from kaggle import KaggleApi # Extract ZIP dataset kaggle_api = KaggleApi() kaggle_home = kaggle_api.read_config_file()['path'] path_to_zip = os.path.join(kaggle_home, 'datasets', PINS_DATASET['name'], PINS_DATASET['zip']) path_to_files = os.path.join(kaggle_home, 'datasets', PINS_DATASET['name'], PINS_DATASET['folder']) # Download if missing if download_if_missing and not os.path.exists(path_to_zip): kaggle_api.authenticate() kaggle_api.dataset_download_files(PINS_DATASET['name'], quiet=False) if not os.path.exists(path_to_files): with ZipFile(path_to_zip, 'r') as zipObj: extraction_path = os.path.join(kaggle_home, 'datasets', PINS_DATASET['name']) zipObj.extractall(extraction_path) # Load data in memory m = Memory(location=kaggle_home, compress=6, verbose=0) load_func = m.cache(_fetch_lfw_people) faces, target, target_names = load_func( path_to_files, resize=resize, min_faces_per_person=min_faces_per_person, color=color, slice_=slice_) X = faces.reshape(len(faces), -1) # Fix names with np.nditer(target_names, op_flags=['readwrite']) as it: for x in it: x[...] = np.core.defchararray.replace(x, 'pins ', '') x[...] = np.core.defchararray.replace(x, ' face', '') x[...] = np.core.defchararray.title(x) # pack the results as a Bunch instance return Bunch(data=X, images=faces, target=target, target_names=target_names)