Exemple #1
0
def maybe_download_and_store_google_drive(file_pair: Dict[str, str],
                                          root_key: str,
                                          description: str = None,
                                          force_download: bool = False,
                                          use_subkeys=True,
                                          **kwargs) -> List[str]:
    old_keys: List[str] = []
    if not force_download and DATA_STORE.is_valid(
            root_key) and validate_subkeys(root_key, old_keys):
        return old_keys

    keys = []
    DATA_STORE.create_key(root_key, 'root.key', force=True)

    for file_name in file_pair:
        log_message("Downloading " + file_name)
        file_id = file_pair[file_name]
        file_dest = os.path.join(DATA_STORE.working_directory, file_name)
        data_path = maybe_download_google_drive(file_id,
                                                file_dest,
                                                force_download=force_download)
        data_path = post_process(data_path)
        log_message("Decompressed " + file_name + "to " + data_path)
        if os.path.isdir(data_path):
            if use_subkeys:
                _keys = register_to_datastore(data_path, root_key, description)
                keys.extend(_keys)
            else:
                data_key = os.path.join(root_key, file_name.split(".zip")[0])
                DATA_STORE.add_folder(data_key, data_path, force=True)
                keys.append(data_key)
        else:
            _key = os.path.join(root_key, file_name.split(".")[0])
            DATA_STORE.add_file(_key, data_path, description, force=True)
            keys.append(_key)
        log_message("Completed " + file_name)
    DATA_STORE.create_key(root_key, 'root.key', force=True)

    return [k for k in keys] + [root_key]
Exemple #2
0
def maybe_download_and_store_zip(url: str,
                                 root_key: str,
                                 description: str = None,
                                 use_subkeys=True,
                                 **kwargs) -> List[str]:
    old_keys: List[str] = []
    if DATA_STORE.is_valid(root_key) and validate_subkeys(root_key, old_keys):
        return old_keys
        # Ensure one layer file structure for zip file? TODO (Karen)

    data_path = maybe_download(file_name=url.split("/")[-1],
                               source_url=url,
                               work_directory=DATA_STORE.working_directory,
                               postprocess=unzip,
                               **kwargs)
    keys: List[str] = []
    if use_subkeys:
        keys = register_to_datastore(data_path, root_key, description)
        # DATA_STORE.create_key(root_key, 'root.key', force=True) I removed this because this call removes all the file I have stored with the previous register_to_datastore. (Karen)
    else:
        DATA_STORE.add_folder(root_key, data_path, force=True)

    return [os.path.join(root_key, k) for k in keys]
Exemple #3
0
    def __init__(self,
                 force_rebuild: bool = False,
                 nohashcheck: bool = True) -> None:
        file = "http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar"
        self._classes = (
            '__background__',  # always index 0
            'aeroplane',
            'bicycle',
            'bird',
            'boat',
            'bottle',
            'bus',
            'car',
            'cat',
            'chair',
            'cow',
            'diningtable',
            'dog',
            'horse',
            'motorbike',
            'person',
            'pottedplant',
            'sheep',
            'sofa',
            'train',
            'tvmonitor')
        self.num_classes = 21
        self._class_to_ind = dict(zip(self._classes, range(self.num_classes)))
        self.max_num_obj = 50
        self.voc_root_key = "pascal/voc/2012"
        self.file_structure = os.path.join("VOCtrainval_11-May-2", "VOCdevkit",
                                           "VOC2012")

        work_file_path = os.path.join(DATA_STORE.working_directory,
                                      self.file_structure)
        _annotation_path = os.path.join(work_file_path, "Annotations")
        _problems = os.path.join(work_file_path, "ImageSets")
        _images = os.path.join(work_file_path, "JPEGImages")
        _segmentation_class = os.path.join(work_file_path, "SegmentationClass")
        _segmentation_object = os.path.join(work_file_path,
                                            "SegmentationObject")
        self.annotation_key = os.path.join(self.voc_root_key, "annotations")
        self.images_key = os.path.join(self.voc_root_key, "images")
        self.segmentation_key = os.path.join(self.voc_root_key, "segmentation",
                                             "class")
        self.segmentation_obj_key = os.path.join(self.voc_root_key,
                                                 "segmentation", "obj")

        if force_rebuild:
            log_message("Copying data to destination folder in flux")
            maybe_download_and_store_tar(url=file,
                                         root_key='pascal/voc/2012',
                                         use_subkeys=False)
            DATA_STORE.add_folder(self.images_key, _images)
            DATA_STORE.add_folder(self.segmentation_key, _segmentation_class)
            DATA_STORE.add_folder(self.segmentation_obj, _segmentation_object)
            DATA_STORE.add_folder(self.annotation_key, _annotation_path)

        self.problems_key = retrieve_subkeys(self.voc_root_key)
        if len(self.problems_key) < 1:
            log_message("Building Problem Keys")
            self.problems_key = register_to_datastore(_problems,
                                                      self.voc_root_key, "")
            self.problems_key = [
                os.path.join(self.voc_root_key, key)
                for key in self.problems_key
            ]

        self.image_path = DATA_STORE[self.images_key]
        self.annotation_path = DATA_STORE[self.annotation_key]
        self.seg_class_path = DATA_STORE[self.segmentation_key]
        self.seg_obj_path = DATA_STORE[self.segmentation_obj_key]