예제 #1
0
파일: kernel_proc.py 프로젝트: ar90n/kkt
def create_dataset(
    api: KaggleApi,
    dataset_slug: str,
    license_name: str,
    target_dir: Path,
    quiet: bool = False,
):
    if len(dataset_slug) < 6 or len(dataset_slug) > 50:
        raise ValueError("The dataset slug must be between 6 and 50 characters")

    owner_slug = get_username(api)
    request = DatasetNewRequest(
        title=dataset_slug,
        slug=dataset_slug,
        owner_slug=owner_slug,
        license_name=license_name,
        subtitle=None,
        description=None,
        files=[],
        is_private=True,
        convert_to_csv=False,
        category_ids=[],
    )
    api.upload_files(request, None, target_dir, quiet, dir_mode="tar")

    result = DatasetNewResponse(
        api.process_response(api.datasets_create_new_with_http_info(request))
    )
    return result
예제 #2
0
파일: kernel_proc.py 프로젝트: ar90n/kkt
def update_dataset(
    api: KaggleApi,
    dataset_slug: str,
    target_dir: Path,
    quiet=False,
    delete_old_versions=True,
):
    owner_slug = get_username(api)
    request = DatasetNewVersionRequest(
        version_notes="test",
        subtitle=None,
        description=None,
        files=[],
        convert_to_csv=False,
        category_ids=[],
        delete_old_versions=delete_old_versions,
    )
    api.upload_files(request, None, target_dir, quiet, dir_mode="tar")

    result = DatasetNewVersionResponse(
        api.process_response(
            api.datasets_create_version_with_http_info(
                owner_slug, dataset_slug, request
            )
        )
    )
    return result
예제 #3
0
    def __init__(self, compete, work_dir, default_submission_id=0):
        self.kaggle_api = KaggleApi()
        self.kaggle_api.authenticate()

        self.work_dir = work_dir
        self.compete = compete
        self.default_submission_id = default_submission_id
예제 #4
0
파일: kernel_proc.py 프로젝트: ar90n/kkt
def push(
    api: KaggleApi, params: KernelPushParams, script_body: str
) -> KernelPushResponse:
    """ read the metadata file and kernel files from a notebook, validate
        both, and use Kernel API to push to Kaggle if all is valid.
         Parameters
        ==========
        folder: the path of the folder
    """
    language = "python"
    kernel_push_request = KernelPushRequest(
        id=params.id_no,
        slug=params.slug,
        new_title=params.new_title,
        text=script_body,
        language=language,
        kernel_type=params.kernel_type,
        is_private=params.is_private,
        enable_gpu=params.enable_gpu,
        enable_internet=params.enable_internet,
        dataset_data_sources=params.dataset_data_sources,
        competition_data_sources=params.competition_data_sources,
        kernel_data_sources=params.kernel_data_sources,
        category_ids=params.category_ids,
    )

    result = KernelPushResponse(
        api.process_response(
            api.kernel_push_with_http_info(kernel_push_request=kernel_push_request)
        )
    )
    return result
예제 #5
0
    def __get_authenticated_kaggle_api(self, configfile):
        if configfile is not None:
            self.login_with_configfile(configfile)

        api = KaggleApi()
        api.authenticate()
        return api
예제 #6
0
def get_notify_competitions_list():
    try:
        api = KaggleApi()
        api.authenticate()

        competitions_list = []
        for info in api.competitions_list(sort_by='recentlyCreated'):
            competition = Competition(info)

            if competition.notify_message != DO_NOT_NOTIFY:
                competitions_list.append(competition)

        return competitions_list
    except Exception as e:
        logger.error(e)
예제 #7
0
def publish_data(api, path):
    response = kag_api.dataset_create_version(
        api,
        path,
        f"Dataset updated till (UTC): {datetime.utcnow()}",
        convert_to_csv=True,
        delete_old_versions=False)
    print(f"[INFO] Kaggle Dataset uploaded.")
    clear_dir(path)
예제 #8
0
파일: install.py 프로젝트: ar90n/kkt
def wait_for_install_kernel_completion(api: KaggleApi,
                                       meta_data: Dict,
                                       kernel_slug: str,
                                       quiet: bool = False) -> Dict[str, Any]:
    owner_slug = get_username(api)
    while True:
        response = api.process_response(
            api.kernel_output_with_http_info(owner_slug, kernel_slug))

        if response["log"] != "":
            time.sleep(5)  # wait for completion of synchlonizing kernel status
            result = kernel_proc.status(api, kernel_slug)
            if result["status"] != "complete" or result["failureMessage"]:
                logs = json.loads(response["log"])
                err_messages = get_error_messages(logs)
                raise InstallKernelError(err_messages)
            return response
        if not quiet:
            click.echo("Wait for install kernel completion...")
        time.sleep(10)
예제 #9
0
파일: init.py 프로젝트: ar90n/kkt
def competition_prompt(api: KaggleApi) -> str:
    competition_query = click.prompt("competition",
                                     default="",
                                     show_default=False)
    competitions = api.competitions_list(search=competition_query)
    for i, c in enumerate(competitions):
        click.echo(f"{i} {c}")
    competition_index = click.prompt(">",
                                     type=int,
                                     show_choices=False,
                                     prompt_suffix=" ")
    return str(competitions[competition_index])
예제 #10
0
def download_data(data_dir: Optional[str] = None) -> None:
    """
    Download data to data directory
    :param data_dir:
    :return:
    """
    if data_dir is None:
        data_dir_path = config.get_data_dir()
    else:
        data_dir_path = Path(data_dir)
    data_dir_path.mkdir(exist_ok=True)

    dataset = config.get_dataset_attributes()
    api = KaggleApi()
    api.authenticate()

    api.dataset_download_cli(dataset["name"], path=data_dir_path, unzip=True)
    selected_files = [
        data_dir_path / dataset_file for dataset_file in dataset["files"]
    ]

    for obj in data_dir_path.glob("*"):
        if obj not in selected_files:
            if obj.is_dir():
                shutil.rmtree(obj)
            else:
                obj.unlink()

    for obj in selected_files:
        if obj.is_dir():
            for image in obj.glob("*.jpg"):
                image.rename(image.parents[1] / image.name)
            obj.rmdir()
def submit_report(date_report):
    api = KaggleApi()
    api.authenticate()

    last_update = datetime.strftime(date_report, "%m/%d/%Y %H:%M")
    api.dataset_create_version(
        "data/", f"Auto update - {last_update} GMT-3", delete_old_versions=True
    )
예제 #12
0
def make_kernels_url():
    api = KaggleApi()
    api.authenticate()
    kernels_list = api.kernels_list(competition=COMPETITION_NAME,
                                    page_size=18,
                                    language='python',
                                    sort_by='scoreAscending')
    kernels_url = ''
    kernels_url_2 = ''
    i = 0
    for kernel_info in kernels_list:
        title = getattr(kernel_info, 'title')
        url = getattr(kernel_info, 'ref')
        if i <= 8:
            kernels_url += '*{}\n'.format(title)
            kernels_url += 'url : https://www.kaggle.com/{}\n'.format(url)
        else:
            kernels_url_2 += '*{}\n'.format(title)
            kernels_url_2 += 'url : https://www.kaggle.com/{}\n'.format(url)
        i += 1
    logger.debug('Get {} kernels'.format(len(kernels_list)))

    return kernels_url, kernels_url_2
예제 #13
0
    def __init__(self,
                 compete,
                 name,
                 work_dir,
                 description=None,
                 create_readme=False):
        self.id = str(time.time())
        self.compete = compete
        self.name = f'{name} - {self.id}'
        self.description = description

        if work_dir[-1] == '/':
            self.new_folder_path = work_dir + self.name
        else:
            self.new_folder_path = work_dir + '/' + self.name

        self.kaggle_api = KaggleApi()
        self.kaggle_api.authenticate()

        if not os.path.exists(self.new_folder_path):
            os.mkdir(self.new_folder_path)

        self.readme = Readme(self.name,
                             self.description) if create_readme else None
예제 #14
0
파일: install.py 프로젝트: ar90n/kkt
def upload_requirement_pkgs(api: KaggleApi,
                            meta_data: Dict,
                            target_dir: Path,
                            quiet: bool = False):
    slug = get_dataset_slug(api, meta_data)
    _, dataset_slug = slug.split("/")[-2:]
    license_name = "CC0-1.0"
    status = api.dataset_status(slug)
    if status is None:
        return kernel_proc.create_dataset(
            api,
            dataset_slug=dataset_slug,
            license_name=license_name,
            target_dir=target_dir,
            quiet=quiet,
        )
    else:
        return kernel_proc.update_dataset(
            api,
            dataset_slug=dataset_slug,
            target_dir=target_dir,
            quiet=quiet,
        )
예제 #15
0
파일: kernel_proc.py 프로젝트: ar90n/kkt
def status(api: KaggleApi, kernel_slug: str):
    user_name = api.config_values[api.CONFIG_NAME_USER]
    return api.kernel_status(user_name, kernel_slug)
예제 #16
0
파일: kkt_command.py 프로젝트: ar90n/kkt
def get_kaggle_api() -> Any:
    return KaggleApi(ApiClient())
예제 #17
0
def _authenticated_client():
    client = KaggleApi()
    client.authenticate()
    return client
예제 #18
0
def kaggle_authenticate():
    api = kag_api()
    kag_api.authenticate(api)
    print("\n[INFO] Kaggle api authenticated.")
    return api
예제 #19
0
def kaggle_dataset_download(api, dataset_name, path):
    kag_api.dataset_download_files(api, dataset_name, unzip=True, path=path)
    print("[INFO] Dataset downloaded.")
예제 #20
0
def get_kaggle_client(credentials=DEFAULT_CREDENTIALS):
    load_credentials(credentials)
    api = KaggleApi()
    api.authenticate()
    return api
    payload = {"message": message}
    if image_name:
        try:
            files = {"imageFile": open(image_name, "rb")}
            requests.post(url, headers=headers, params=payload, files=files)
        except:
            requests.post(url, headers=headers, params=payload)
    else:
        requests.post(url, headers=headers, params=payload)

    # message = 'test'
    # files = {"imageFile": open("./fig/uni1.jpg", "rb")}


api = KaggleApi()
api.authenticate()
# api.competitions_list()

klist = api.kernels_list(competition=COMP_NAME, page_size=999)

if len(glob.glob(KERNEL_LIST)) > 0:
    klist_old_ref = pickle_read(KERNEL_LIST)

    """ ref is url key """
    # for key in dir(klist[0]):
    #     print('{}: {}'.format(key, getattr(klist[0], key)))
    # kernel_notifier('https://www.kaggle.com/' + klist[0].ref)

    klist_ref = [k.ref for k in klist]
예제 #22
0
class Submitter:
    def __init__(self, compete, work_dir, default_submission_id=0):
        self.kaggle_api = KaggleApi()
        self.kaggle_api.authenticate()

        self.work_dir = work_dir
        self.compete = compete
        self.default_submission_id = default_submission_id

    def submit(self,
               predicted: pd.DataFrame,
               file_name='submission.csv',
               message=None,
               save_model=True,
               model=None,
               model_name=None,
               submit=True,
               submission_id=None,
               submission_name=None,
               open_in_browser=False):

        # Folder and files
        if submission_id is None:
            submission_id = self.default_submission_id
            self.default_submission_id += 1

        submission_folder_name = ' '.join(
            [str(submission_id), ' -- ', submission_name])

        new_folder_path = self.work_dir + f'/{submission_folder_name}'

        if not os.path.exists(new_folder_path):
            os.mkdir(new_folder_path)

        # Save model
        if model_name is None:
            model_name = str(model)

        if save_model:
            with open(f'{new_folder_path}/{model_name}.pickle',
                      'wb') as pickle_file:
                pickle.dump(model, pickle_file)

        # Submission
        predicted.to_csv(f'{new_folder_path}/{file_name}', index=False)
        if message is None:
            if model is None:
                message = file_name
            else:
                message = str(model)

        with open(new_folder_path + '/message.txt', 'w') as message_file:
            message_file.write(' '.join(
                [str(submission_id), ' -- ', submission_name, '\n', message]))
            message_file.close()

        # Upload
        if submit:
            print('Uploading submission...')
            command = f'kaggle competitions submit -c {self.compete} -f "{new_folder_path}/{file_name}" -m "{message}"'
            print(command)
            output = os.system(command)
            print('Output: ', output)

        # Open in browser
        if open_in_browser:
            webbrowser.open(
                f'https://www.kaggle.com/c/{self.compete}/submissions', new=2)

    def check_submission(self):
        last_submission = self.kaggle_api.competitions_submissions_list(
            self.compete)[0]
        print('Description: ', last_submission['description'])
        print('Date: ', last_submission['date'])
        print('Status: ', last_submission['status'])
        print('Score: ', last_submission['publicScore'])
예제 #23
0
파일: batch.py 프로젝트: todokku/kagoole
def new_kaggle_api():
    api = KaggleApi()
    api.authenticate()

    return api
"""
Pulls data from Kaggle API
"""
from kaggle import KaggleApi
api = KaggleApi()
api.authenticate()

api.dataset_download_files("shivamb/Netflix-shows", unzip= True)

api.kernels_output("eugenioscionti/scraping-rotten-tomatoes-to-enrich-netflix-dataset", "./")
예제 #25
0
파일: kernel_proc.py 프로젝트: ar90n/kkt
def list_outputs(api: KaggleApi, kernel_slug: str):
    user_name = api.config_values[api.CONFIG_NAME_USER]
    return api.process_response(
        api.kernel_output_with_http_info(user_name, kernel_slug)
    )
예제 #26
0
from kaggle import KaggleApi

import os
import json

if __name__ == '__main__':
    config_path = "kaggle.json"
    data_path = 'data'
    filename = 'submission.csv'

    print("auth")
    with open(config_path, 'r') as f:
        config_dict = json.load(f)

    api = KaggleApi()
    api._load_config(config_dict)
    print("submit")

    api.competition_submit(
        file_name=os.path.join(data_path, filename),
        competition="titanic",
        message="test submission",
        quiet=False
    )
예제 #27
0
class Submission:
    def __init__(self,
                 compete,
                 name,
                 work_dir,
                 description=None,
                 create_readme=False):
        self.id = str(time.time())
        self.compete = compete
        self.name = f'{name} - {self.id}'
        self.description = description

        if work_dir[-1] == '/':
            self.new_folder_path = work_dir + self.name
        else:
            self.new_folder_path = work_dir + '/' + self.name

        self.kaggle_api = KaggleApi()
        self.kaggle_api.authenticate()

        if not os.path.exists(self.new_folder_path):
            os.mkdir(self.new_folder_path)

        self.readme = Readme(self.name,
                             self.description) if create_readme else None

    def save_model(self, model, file_name=None):
        if file_name is None:
            file_name = str(model).replace('\\', '')

        with open(f'{self.new_folder_path}/{file_name}.pickle',
                  'wb') as pickle_file:
            pickle.dump(model, pickle_file)

    def save_keras_model(self,
                         model,
                         file_name=None,
                         save_format='pickle',
                         save_summary_to_readme=True,
                         *args,
                         **kwargs):
        from tensorflow.keras import Model

        if not isinstance(model, Model):
            raise Exception(f'Model should be instance of keras.Model')

        if file_name is None:
            file_name = str(model).replace('\\', '')

        if save_summary_to_readme:
            summary = io.StringIO()
            model.summary(print_fn=lambda s: print(s, file=summary))

            if self.readme is None:
                raise Exception("'create_readme' should be True")

            self.readme.model_summary = summary.getvalue()

        if save_format == 'pickle':
            with open(f'{self.new_folder_path}/{file_name}.pickle',
                      'wb') as pickle_file:
                pickle.dump(model, pickle_file)

        elif save_format == 'config':
            with open(f'{self.new_folder_path}/{file_name}.json',
                      'w') as config_file:
                json.dump(model.get_config(), config_file)

        elif save_format == 'h5':
            model.save(f'{self.new_folder_path}/{file_name}', *args, **kwargs)

        else:
            raise Exception('Undefined save_format')

        return self

    def save_predictions(self,
                         predictions,
                         columns,
                         index,
                         file_name='predictions.csv'):
        pd.DataFrame(dict(zip(columns, [index, predictions]))) \
            .to_csv(f'{self.new_folder_path}/{file_name}', index=False)

        return self

    def open_in_browser(self):
        webbrowser.open(f'https://www.kaggle.com/c/{self.compete}/submissions',
                        new=2)

        return self

    def submit(self, predictions_file_name='predictions.csv'):
        print('Uploading submission...')
        command = f'kaggle competitions submit -c {self.compete} -f "{self.new_folder_path}/{predictions_file_name}" -m "{self.description}"'
        print(command)
        output = os.system(command)
        print()
        print('Output: ', output)

        return self

    def check_results(self, timeout=5):
        time.sleep(timeout)
        last_submission = self.kaggle_api.competitions_submissions_list(
            self.compete)[0]

        if self.readme is not None:
            self.readme.score = last_submission['publicScore']
            self.readme.date = last_submission['date']
            self.readme.status = last_submission['status']

        print('Description: ', last_submission['description'])
        print('Date: ', last_submission['date'])
        print('Status: ', last_submission['status'])
        print('Score: ', last_submission['publicScore'])

        return self

    def save_readme(self):
        readme_file = open(f'{self.new_folder_path}/README.md', 'w')
        readme_file.write(self.readme.markdown())

        return self
예제 #28
0
from kaggle import KaggleApi

import os
import json
import zipfile

if __name__ == '__main__':
    config_path = "kaggle.json"
    data_path = 'data'
    print("auth")
    with open(config_path, 'r') as f:
        config_dict = json.load(f)

    api = KaggleApi()
    api._load_config(config_dict)
    print("download")
    api.competition_download_files(competition="titanic",
                                   path=data_path,
                                   quiet=False)
    print("extract")
    for file in os.listdir(data_path):
        if '.zip' in file:
            print(file)
            zip_ref = zipfile.ZipFile(os.path.join(data_path, file), 'r')
            zip_ref.extractall(data_path)
            zip_ref.close()
예제 #29
0
def fetch_pins_people(resize=.5,
                      min_faces_per_person=0,
                      color=False,
                      slice_=(slice(25, 275), slice(25, 275)),
                      download_if_missing=True):
    """Load PINS dataset.

    Use a PINS dataset provided by Kaggle, everage the scikit-learn memory
    optimizations.

    Args:
        resize (float, optional): Image resize factor. Defaults to .5.
        min_faces_per_person (int, optional): Minimal number of images per
            person. Defaults to 0.
        color (bool): Toggle is images should be in RGB or 1 channel.
            Defaults to False.
        slice_ (tuple, optional): A rectangle to which images are sliced.
            Defaults to (slice(70, 195), slice(78, 172)).
        download_if_missing (bool, optional): Set if the dataset should be
            downloaded if not present on the machine. Defaults to True.

    Returns:
        sklearn.utils.Bunch: Collection of data set
    """
    from kaggle import KaggleApi

    # Extract ZIP dataset
    kaggle_api = KaggleApi()
    kaggle_home = kaggle_api.read_config_file()['path']
    path_to_zip = os.path.join(kaggle_home, 'datasets', PINS_DATASET['name'],
                               PINS_DATASET['zip'])
    path_to_files = os.path.join(kaggle_home, 'datasets', PINS_DATASET['name'],
                                 PINS_DATASET['folder'])

    # Download if missing
    if download_if_missing and not os.path.exists(path_to_zip):
        kaggle_api.authenticate()
        kaggle_api.dataset_download_files(PINS_DATASET['name'], quiet=False)

    if not os.path.exists(path_to_files):
        with ZipFile(path_to_zip, 'r') as zipObj:
            extraction_path = os.path.join(kaggle_home, 'datasets',
                                           PINS_DATASET['name'])
            zipObj.extractall(extraction_path)

    # Load data in memory
    m = Memory(location=kaggle_home, compress=6, verbose=0)
    load_func = m.cache(_fetch_lfw_people)

    faces, target, target_names = load_func(
        path_to_files,
        resize=resize,
        min_faces_per_person=min_faces_per_person,
        color=color,
        slice_=slice_)

    X = faces.reshape(len(faces), -1)

    # Fix names
    with np.nditer(target_names, op_flags=['readwrite']) as it:
        for x in it:
            x[...] = np.core.defchararray.replace(x, 'pins ', '')
            x[...] = np.core.defchararray.replace(x, ' face', '')
            x[...] = np.core.defchararray.title(x)

    # pack the results as a Bunch instance
    return Bunch(data=X,
                 images=faces,
                 target=target,
                 target_names=target_names)