Ejemplo n.º 1
0
    def _download_and_prepare(self, dataset_dir, scratch_dir, _):
        # Download dataset
        tmp_zip_path = os.path.join(scratch_dir, "dataset.zip")
        etaw.download_google_drive_file(self._GDRIVE_ID, path=tmp_zip_path)

        # Extract zip
        logger.info("Extracting dataset")
        etau.extract_zip(tmp_zip_path, delete_zip=True)
        _move_dir(os.path.join(scratch_dir, self._DIR_IN_ZIP), dataset_dir)

        # Get metadata
        logger.info("Parsing dataset metadata")
        classes = foud.FiftyOneDatasetImporter.get_classes(dataset_dir)
        num_samples = foud.FiftyOneDatasetImporter.get_num_samples(dataset_dir)
        logger.info("Found %d samples", num_samples)

        dataset_type = fot.FiftyOneDataset()
        return dataset_type, num_samples, classes
Ejemplo n.º 2
0
def download_coco_dataset_split(dataset_dir, split, year="2017", cleanup=True):
    """Downloads and extracts the given split of the COCO dataset to the
    specified directory.

    Any existing files are not re-downloaded.

    Args:
        dataset_dir: the directory to download the dataset
        split: the split to download. Supported values are
            ``("train", "validation", "test")``
        year ("2017"): the dataset year to download. Supported values are
            ``("2014", "2017")``
        cleanup (True): whether to cleanup the zip files after extraction

    Returns:
        a tuple of

        -   images_dir: the path to the directory containing the extracted
            images
        -   anno_path: the path to the annotations JSON file
    """
    if year not in _IMAGE_DOWNLOAD_LINKS:
        raise ValueError(
            "Unsupported year '%s'; supported values are %s"
            % (year, tuple(_IMAGE_DOWNLOAD_LINKS.keys()))
        )

    if split not in _IMAGE_DOWNLOAD_LINKS[year]:
        raise ValueError(
            "Unsupported split '%s'; supported values are %s"
            % (year, tuple(_IMAGE_DOWNLOAD_LINKS[year].keys()))
        )

    #
    # Download images
    #

    images_src_path = _IMAGE_DOWNLOAD_LINKS[year][split]
    images_zip_path = os.path.join(
        dataset_dir, os.path.basename(images_src_path)
    )
    images_dir = os.path.join(
        dataset_dir, os.path.splitext(os.path.basename(images_src_path))[0]
    )

    if not os.path.isdir(images_dir):
        logger.info("Downloading images zip to '%s'", images_zip_path)
        etaw.download_file(images_src_path, path=images_zip_path)
        logger.info("Extracting images to '%s'", images_dir)
        etau.extract_zip(images_zip_path, delete_zip=cleanup)
    else:
        logger.info("Image folder '%s' already exists", images_dir)

    #
    # Download annotations
    #

    anno_path = os.path.join(dataset_dir, _ANNOTATION_PATHS[year][split])

    if split == "test":
        # Test split has no annotations, so we must populate the labels file
        # manually
        images = _make_images_list(images_dir)

        labels = {
            "info": {},
            "licenses": [],
            "categories": [],
            "images": images,
            "annotations": [],
        }
        etas.write_json(labels, anno_path)
    else:
        anno_src_path = _ANNOTATION_DOWNLOAD_LINKS[year]
        anno_zip_path = os.path.join(
            dataset_dir, os.path.basename(anno_src_path)
        )

        if not os.path.isfile(anno_path):
            logger.info("Downloading annotations zip to '%s'", anno_zip_path)
            etaw.download_file(anno_src_path, path=anno_zip_path)
            logger.info("Extracting annotations to '%s'", anno_path)
            etau.extract_zip(anno_zip_path, delete_zip=cleanup)
        else:
            logger.info("Annotations file '%s' already exists", anno_path)

    return images_dir, anno_path
Ejemplo n.º 3
0
'''
# pragma pylint: disable=redefined-builtin
# pragma pylint: disable=unused-wildcard-import
# pragma pylint: disable=wildcard-import
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from builtins import *
# pragma pylint: enable=redefined-builtin
# pragma pylint: enable=unused-wildcard-import
# pragma pylint: enable=wildcard-import

import logging
import os

import eta.core.web as etaw
import eta.core.utils as etau


logger = logging.getLogger(__name__)


FILE_ID = "0B7phNvpRqNdpNEVpVjE2VXQxOWc"


logger.info("Downloading example data from Google Drive")
path = os.path.join(os.path.dirname(__file__), "data.zip")
etaw.download_google_drive_file(FILE_ID, path=path)
etau.extract_zip(path, delete_zip=True)