Esempio n. 1
0
def coco(
    image_archive: Union[str, pathlib.Path],
    annotation_archive: Union[str, pathlib.Path],
    decoder: Optional[str] = "pil",
):
    annotation_datapipe: Iterable = (str(pathlib.Path(annotation_archive).resolve()),)
    annotation_datapipe = dp.iter.LoadFilesFromDisk(annotation_datapipe)
    annotation_datapipe = dp.iter.ReadFilesFromZip(annotation_datapipe)
    annotation_datapipe = dp.iter.RoutedDecoder(annotation_datapipe)
    annotation_datapipe = IterateOverAnnotations(annotation_datapipe)

    image_datapipe: Iterable = (str(pathlib.Path(image_archive).resolve()),)
    image_datapipe = dp.iter.LoadFilesFromDisk(image_datapipe)
    image_datapipe = dp.iter.ReadFilesFromZip(image_datapipe)
    if decoder:
        image_datapipe = dp.iter.RoutedDecoder(
            image_datapipe, handlers=[imagehandler(decoder)]
        )
    image_datapipe = dp.iter.Map(image_datapipe, _collate_image)

    datapipe = DependentGroupByKey(
        annotation_datapipe, image_datapipe, key_fn=lambda data: data[0]
    )
    datapipe = dp.iter.Map(datapipe, _collate_sample)

    return datapipe
Esempio n. 2
0
def caltech101(
    root: Union[str, pathlib.Path], image_decoder: Optional[str] = "pil"
) -> Iterable[Dict[str, Any]]:
    root = pathlib.Path(root).resolve()

    images_datapipe: Iterable = (str(root / "101_ObjectCategories.tar.gz"),)
    images_datapipe = dp.iter.LoadFilesFromDisk(images_datapipe)
    images_datapipe = dp.iter.ReadFilesFromTar(images_datapipe)
    images_datapipe = Drop(images_datapipe, _images_drop_condition)
    if image_decoder:
        images_datapipe = dp.iter.RoutedDecoder(
            images_datapipe, handlers=[imagehandler(image_decoder)]
        )

    anns_datapipe: Iterable = (str(root / "101_Annotations.tar"),)
    anns_datapipe = dp.iter.LoadFilesFromDisk(anns_datapipe)
    anns_datapipe = dp.iter.ReadFilesFromTar(anns_datapipe)
    anns_datapipe = dp.iter.RoutedDecoder(anns_datapipe, handlers=[mathandler()])
    anns_datapipe = dp.iter.Map(anns_datapipe, _collate_ann)

    datapipe = DependentGroupByKey(
        images_datapipe, anns_datapipe, key_fn=_images_key_fn
    )
    datapipe = dp.iter.Map(datapipe, fn=_collate_sample)

    return datapipe
Esempio n. 3
0
def _make_image_datapipe(
        datapipe: Iterable, *,
        decoder: Optional[str]) -> Iterable[Tuple[str, Dict[str, Any]]]:
    if decoder:
        datapipe = dp.iter.RoutedDecoder(datapipe,
                                         handlers=[imagehandler(decoder)])
    datapipe = dp.iter.Map(datapipe, _collate_image)
    return datapipe
Esempio n. 4
0
def _images_datapipe(root: pathlib.Path, split_datapipe: Iterable[Tuple[str,
                                                                        bool]],
                     *, decoder: Optional[str]) -> Iterable[Tuple[str, Any]]:
    images_datapipe = (str(root / "img_align_celeba.zip"), )
    images_datapipe = dp.iter.LoadFilesFromDisk(images_datapipe)
    images_datapipe = dp.iter.ReadFilesFromZip(images_datapipe)
    images_datapipe = DependentDrop(images_datapipe,
                                    split_datapipe,
                                    key_fn=_key_fn)
    if decoder:
        images_datapipe = dp.iter.RoutedDecoder(
            images_datapipe, handlers=[imagehandler(decoder)])

    return images_datapipe
Esempio n. 5
0
def caltech256(
    root: Union[str, pathlib.Path],
    handler: Optional[str] = "pil",
) -> Iterable[Dict[str, Any]]:
    root = pathlib.Path(root).resolve()
    datapipe: Iterable = (str(root / "256_ObjectCategories.tar"), )
    datapipe = dp.iter.LoadFilesFromDisk(datapipe)
    datapipe = dp.iter.ReadFilesFromTar(datapipe)
    if handler:
        datapipe = dp.iter.RoutedDecoder(datapipe,
                                         handlers=[imagehandler(handler)])
    datapipe = dp.iter.Map(datapipe, fn=_caltech256_sample_map)

    return datapipe
Esempio n. 6
0
def _make_target_datapipe(
        datapipe: Iterable, *, target_type: str,
        decoder: Optional[str]) -> Iterable[Tuple[str, Dict[str, Any]]]:
    if target_type == "detection":
        # TODO
        collate = _collate_target_detection
    else:  # target_type == "segmentation":
        if decoder:
            datapipe = dp.iter.RoutedDecoder(datapipe,
                                             handlers=[imagehandler(decoder)])

        collate = _collate_target_segmentation
    datapipe = dp.iter.Map(datapipe, collate)
    return datapipe
Esempio n. 7
0
    def __init__(
        self,
        root: Union[str, pathlib.Path],
        *,
        year: str = "2012",
        split: str = "train",
        target_type: str = "detection",  # segmentation
        decoder: Optional[str] = "pil",
    ):
        self.target_type = target_type
        self.decoder = Decoder([imagehandler(decoder)]) if decoder else None

        root = pathlib.Path(root).resolve()
        # TODO: make this variable based on the input
        archive = "VOCtrainval_11-May-2012.tar"

        split_folder = SPLIT_FOLDER[target_type]
        target_type_folder = TARGET_TYPE_FOLDER[target_type]

        datapipe = (str(root / archive), )
        datapipe = dp.iter.LoadFilesFromDisk(datapipe)
        datapipe = dp.iter.ReadFilesFromTar(datapipe)

        split_files: Dict[str, Tuple[str, io.BufferedIOBase]] = {}
        self.images: Dict[str, Tuple[str, io.BufferedIOBase]] = {}
        self.targets: Dict[str, Tuple[str, io.BufferedIOBase]] = {}

        for data in datapipe:
            parent_ = pathlib.Path(data[0]).parent
            parent = parent_.name
            grand_parent = parent_.parent.name

            if grand_parent == "ImageSets" and parent == split_folder:
                dct = split_files
            elif parent == "JPEGImages":
                dct = self.images
            elif parent == target_type_folder:
                dct = self.targets
            else:
                continue

            dct[self._data_to_key(data)] = data

        self.keys = ReadLineFromFile((split_files[split], ))
Esempio n. 8
0
    def __init__(
        self,
        root: Union[str, pathlib.Path],
        *,
        split: str = "train",
        decoder: Optional[str] = "pil",
    ):
        self.root = pathlib.Path(root)
        self.split = split
        self._meta = _ImageNetMeta(self.root, split=self.split)

        datapipe = (str(
            (self.root / f"ILSVRC2012_img_{split}.tar").resolve()), )
        datapipe = dp.iter.LoadFilesFromDisk(datapipe)
        datapipe = dp.iter.ReadFilesFromTar(datapipe)
        if split == "train":
            # the train archive is a tar of tars
            datapipe = dp.iter.ReadFilesFromTar(datapipe)
        if decoder:
            datapipe = dp.iter.RoutedDecoder(datapipe,
                                             handlers=[imagehandler(decoder)])
        self.datapipe = datapipe