"""
Pulls data from Kaggle API
"""
from kaggle import KaggleApi
api = KaggleApi()
api.authenticate()

api.dataset_download_files("shivamb/Netflix-shows", unzip= True)

api.kernels_output("eugenioscionti/scraping-rotten-tomatoes-to-enrich-netflix-dataset", "./")
Exemple #2
0
def kaggle_dataset_download(api, dataset_name, path):
    kag_api.dataset_download_files(api, dataset_name, unzip=True, path=path)
    print("[INFO] Dataset downloaded.")
Exemple #3
0
def fetch_pins_people(resize=.5,
                      min_faces_per_person=0,
                      color=False,
                      slice_=(slice(25, 275), slice(25, 275)),
                      download_if_missing=True):
    """Load PINS dataset.

    Use a PINS dataset provided by Kaggle, everage the scikit-learn memory
    optimizations.

    Args:
        resize (float, optional): Image resize factor. Defaults to .5.
        min_faces_per_person (int, optional): Minimal number of images per
            person. Defaults to 0.
        color (bool): Toggle is images should be in RGB or 1 channel.
            Defaults to False.
        slice_ (tuple, optional): A rectangle to which images are sliced.
            Defaults to (slice(70, 195), slice(78, 172)).
        download_if_missing (bool, optional): Set if the dataset should be
            downloaded if not present on the machine. Defaults to True.

    Returns:
        sklearn.utils.Bunch: Collection of data set
    """
    from kaggle import KaggleApi

    # Extract ZIP dataset
    kaggle_api = KaggleApi()
    kaggle_home = kaggle_api.read_config_file()['path']
    path_to_zip = os.path.join(kaggle_home, 'datasets', PINS_DATASET['name'],
                               PINS_DATASET['zip'])
    path_to_files = os.path.join(kaggle_home, 'datasets', PINS_DATASET['name'],
                                 PINS_DATASET['folder'])

    # Download if missing
    if download_if_missing and not os.path.exists(path_to_zip):
        kaggle_api.authenticate()
        kaggle_api.dataset_download_files(PINS_DATASET['name'], quiet=False)

    if not os.path.exists(path_to_files):
        with ZipFile(path_to_zip, 'r') as zipObj:
            extraction_path = os.path.join(kaggle_home, 'datasets',
                                           PINS_DATASET['name'])
            zipObj.extractall(extraction_path)

    # Load data in memory
    m = Memory(location=kaggle_home, compress=6, verbose=0)
    load_func = m.cache(_fetch_lfw_people)

    faces, target, target_names = load_func(
        path_to_files,
        resize=resize,
        min_faces_per_person=min_faces_per_person,
        color=color,
        slice_=slice_)

    X = faces.reshape(len(faces), -1)

    # Fix names
    with np.nditer(target_names, op_flags=['readwrite']) as it:
        for x in it:
            x[...] = np.core.defchararray.replace(x, 'pins ', '')
            x[...] = np.core.defchararray.replace(x, ' face', '')
            x[...] = np.core.defchararray.title(x)

    # pack the results as a Bunch instance
    return Bunch(data=X,
                 images=faces,
                 target=target,
                 target_names=target_names)