def _download_and_prepare(self, dataset_dir, scratch_dir, _): # Download dataset tmp_zip_path = os.path.join(scratch_dir, "dataset.zip") etaw.download_google_drive_file(self._GDRIVE_ID, path=tmp_zip_path) # Extract zip logger.info("Extracting dataset") etau.extract_zip(tmp_zip_path, delete_zip=True) _move_dir(os.path.join(scratch_dir, self._DIR_IN_ZIP), dataset_dir) # Get metadata logger.info("Parsing dataset metadata") classes = foud.FiftyOneDatasetImporter.get_classes(dataset_dir) num_samples = foud.FiftyOneDatasetImporter.get_num_samples(dataset_dir) logger.info("Found %d samples", num_samples) dataset_type = fot.FiftyOneDataset() return dataset_type, num_samples, classes
def download_coco_dataset_split(dataset_dir, split, year="2017", cleanup=True): """Downloads and extracts the given split of the COCO dataset to the specified directory. Any existing files are not re-downloaded. Args: dataset_dir: the directory to download the dataset split: the split to download. Supported values are ``("train", "validation", "test")`` year ("2017"): the dataset year to download. Supported values are ``("2014", "2017")`` cleanup (True): whether to cleanup the zip files after extraction Returns: a tuple of - images_dir: the path to the directory containing the extracted images - anno_path: the path to the annotations JSON file """ if year not in _IMAGE_DOWNLOAD_LINKS: raise ValueError( "Unsupported year '%s'; supported values are %s" % (year, tuple(_IMAGE_DOWNLOAD_LINKS.keys())) ) if split not in _IMAGE_DOWNLOAD_LINKS[year]: raise ValueError( "Unsupported split '%s'; supported values are %s" % (year, tuple(_IMAGE_DOWNLOAD_LINKS[year].keys())) ) # # Download images # images_src_path = _IMAGE_DOWNLOAD_LINKS[year][split] images_zip_path = os.path.join( dataset_dir, os.path.basename(images_src_path) ) images_dir = os.path.join( dataset_dir, os.path.splitext(os.path.basename(images_src_path))[0] ) if not os.path.isdir(images_dir): logger.info("Downloading images zip to '%s'", images_zip_path) etaw.download_file(images_src_path, path=images_zip_path) logger.info("Extracting images to '%s'", images_dir) etau.extract_zip(images_zip_path, delete_zip=cleanup) else: logger.info("Image folder '%s' already exists", images_dir) # # Download annotations # anno_path = os.path.join(dataset_dir, _ANNOTATION_PATHS[year][split]) if split == "test": # Test split has no annotations, so we must populate the labels file # manually images = _make_images_list(images_dir) labels = { "info": {}, "licenses": [], "categories": [], "images": images, "annotations": [], } etas.write_json(labels, anno_path) else: anno_src_path = _ANNOTATION_DOWNLOAD_LINKS[year] anno_zip_path = os.path.join( dataset_dir, os.path.basename(anno_src_path) ) if not os.path.isfile(anno_path): logger.info("Downloading annotations zip to '%s'", anno_zip_path) etaw.download_file(anno_src_path, path=anno_zip_path) logger.info("Extracting annotations to '%s'", anno_path) etau.extract_zip(anno_zip_path, delete_zip=cleanup) else: logger.info("Annotations file '%s' already exists", anno_path) return images_dir, anno_path
''' # pragma pylint: disable=redefined-builtin # pragma pylint: disable=unused-wildcard-import # pragma pylint: disable=wildcard-import from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals from builtins import * # pragma pylint: enable=redefined-builtin # pragma pylint: enable=unused-wildcard-import # pragma pylint: enable=wildcard-import import logging import os import eta.core.web as etaw import eta.core.utils as etau logger = logging.getLogger(__name__) FILE_ID = "0B7phNvpRqNdpNEVpVjE2VXQxOWc" logger.info("Downloading example data from Google Drive") path = os.path.join(os.path.dirname(__file__), "data.zip") etaw.download_google_drive_file(FILE_ID, path=path) etau.extract_zip(path, delete_zip=True)