def maybe_download_and_store_google_drive(file_pair: Dict[str, str], root_key: str, description: str = None, force_download: bool = False, use_subkeys=True, **kwargs) -> List[str]: old_keys: List[str] = [] if not force_download and DATA_STORE.is_valid( root_key) and validate_subkeys(root_key, old_keys): return old_keys keys = [] DATA_STORE.create_key(root_key, 'root.key', force=True) for file_name in file_pair: log_message("Downloading " + file_name) file_id = file_pair[file_name] file_dest = os.path.join(DATA_STORE.working_directory, file_name) data_path = maybe_download_google_drive(file_id, file_dest, force_download=force_download) data_path = post_process(data_path) log_message("Decompressed " + file_name + "to " + data_path) if os.path.isdir(data_path): if use_subkeys: _keys = register_to_datastore(data_path, root_key, description) keys.extend(_keys) else: data_key = os.path.join(root_key, file_name.split(".zip")[0]) DATA_STORE.add_folder(data_key, data_path, force=True) keys.append(data_key) else: _key = os.path.join(root_key, file_name.split(".")[0]) DATA_STORE.add_file(_key, data_path, description, force=True) keys.append(_key) log_message("Completed " + file_name) DATA_STORE.create_key(root_key, 'root.key', force=True) return [k for k in keys] + [root_key]
def maybe_download_and_store_zip(url: str, root_key: str, description: str = None, use_subkeys=True, **kwargs) -> List[str]: old_keys: List[str] = [] if DATA_STORE.is_valid(root_key) and validate_subkeys(root_key, old_keys): return old_keys # Ensure one layer file structure for zip file? TODO (Karen) data_path = maybe_download(file_name=url.split("/")[-1], source_url=url, work_directory=DATA_STORE.working_directory, postprocess=unzip, **kwargs) keys: List[str] = [] if use_subkeys: keys = register_to_datastore(data_path, root_key, description) # DATA_STORE.create_key(root_key, 'root.key', force=True) I removed this because this call removes all the file I have stored with the previous register_to_datastore. (Karen) else: DATA_STORE.add_folder(root_key, data_path, force=True) return [os.path.join(root_key, k) for k in keys]
def __init__(self, force_rebuild: bool = False, nohashcheck: bool = True) -> None: file = "http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar" self._classes = ( '__background__', # always index 0 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor') self.num_classes = 21 self._class_to_ind = dict(zip(self._classes, range(self.num_classes))) self.max_num_obj = 50 self.voc_root_key = "pascal/voc/2012" self.file_structure = os.path.join("VOCtrainval_11-May-2", "VOCdevkit", "VOC2012") work_file_path = os.path.join(DATA_STORE.working_directory, self.file_structure) _annotation_path = os.path.join(work_file_path, "Annotations") _problems = os.path.join(work_file_path, "ImageSets") _images = os.path.join(work_file_path, "JPEGImages") _segmentation_class = os.path.join(work_file_path, "SegmentationClass") _segmentation_object = os.path.join(work_file_path, "SegmentationObject") self.annotation_key = os.path.join(self.voc_root_key, "annotations") self.images_key = os.path.join(self.voc_root_key, "images") self.segmentation_key = os.path.join(self.voc_root_key, "segmentation", "class") self.segmentation_obj_key = os.path.join(self.voc_root_key, "segmentation", "obj") if force_rebuild: log_message("Copying data to destination folder in flux") maybe_download_and_store_tar(url=file, root_key='pascal/voc/2012', use_subkeys=False) DATA_STORE.add_folder(self.images_key, _images) DATA_STORE.add_folder(self.segmentation_key, _segmentation_class) DATA_STORE.add_folder(self.segmentation_obj, _segmentation_object) DATA_STORE.add_folder(self.annotation_key, _annotation_path) self.problems_key = retrieve_subkeys(self.voc_root_key) if len(self.problems_key) < 1: log_message("Building Problem Keys") self.problems_key = register_to_datastore(_problems, self.voc_root_key, "") self.problems_key = [ os.path.join(self.voc_root_key, key) for key in self.problems_key ] self.image_path = DATA_STORE[self.images_key] self.annotation_path = DATA_STORE[self.annotation_key] self.seg_class_path = DATA_STORE[self.segmentation_key] self.seg_obj_path = DATA_STORE[self.segmentation_obj_key]