예제 #1
0
    def __init__(self, path):
        if not osp.isdir(path):
            raise FileNotFoundError("Can't read dataset directory '%s'" % path)

        # exclude dataset meta file
        subsets = [
            subset for subset in os.listdir(path)
            if osp.splitext(subset)[-1] != '.json'
        ]
        if len(subsets) < 1:
            raise FileNotFoundError("Can't read subsets in directory '%s'" %
                                    path)

        super().__init__(subsets=sorted(subsets))
        self._path = path

        self._items = []
        self._categories = {}

        if has_meta_file(self._path):
            self._categories = {
                AnnotationType.label:
                LabelCategories.from_iterable(
                    parse_meta_file(self._path).keys())
            }

        for subset in self._subsets:
            self._load_items(subset)
예제 #2
0
 def _load_categories(self):
     label_cat = LabelCategories()
     path = osp.join(self._dataset_dir, VggFace2Path.LABELS_FILE)
     if has_meta_file(self._dataset_dir):
         labels = parse_meta_file(self._dataset_dir).keys()
         for label in labels:
             label_cat.add(label)
     elif osp.isfile(path):
         with open(path, encoding='utf-8') as labels_file:
             lines = [s.strip() for s in labels_file]
         for line in lines:
             objects = line.split()
             label = objects[0]
             class_name = None
             if 1 < len(objects):
                 class_name = objects[1]
             label_cat.add(label, parent=class_name)
     else:
         for subset in self._subsets:
             subset_path = osp.join(self._dataset_dir, subset)
             if osp.isdir(subset_path):
                 for images_dir in sorted(os.listdir(subset_path)):
                     if osp.isdir(osp.join(subset_path, images_dir)) and \
                             images_dir != VggFace2Path.IMAGES_DIR_NO_LABEL:
                         label_cat.add(images_dir)
     self._categories[AnnotationType.label] = label_cat
예제 #3
0
    def __init__(self, path, image_meta=None):
        if not osp.isdir(path):
            raise FileNotFoundError("Can't read dataset directory '%s'" % path)

        super().__init__()

        self._dataset_dir = path

        self._annotation_files = os.listdir(
            osp.join(path, OpenImagesPath.ANNOTATIONS_DIR))

        self._categories = {}
        self._items = []

        assert image_meta is None or isinstance(image_meta, (dict, str))
        if isinstance(image_meta, dict):
            self._image_meta = dict(image_meta)
        elif isinstance(image_meta, str):
            self._image_meta = load_image_meta_file(osp.join(path, image_meta))
        elif image_meta is None:
            try:
                self._image_meta = load_image_meta_file(osp.join(
                    path, OpenImagesPath.ANNOTATIONS_DIR,
                    DEFAULT_IMAGE_META_FILE_NAME
                ))
            except FileNotFoundError:
                self._image_meta = {}

        if has_meta_file(path):
            self._categories = { AnnotationType.label: LabelCategories.
                from_iterable(parse_meta_file(path).keys()) }
        else:
            self._load_categories()
        self._load_items()
예제 #4
0
 def _load_categories(self):
     label_cat = LabelCategories()
     if has_meta_file(self._dataset_dir):
         labels = parse_meta_file(self._dataset_dir).keys()
         for label in labels:
             label_cat.add(label)
     elif osp.isfile(osp.join(self._dataset_dir,
                              WiderFacePath.LABELS_FILE)):
         path = osp.join(self._dataset_dir, WiderFacePath.LABELS_FILE)
         with open(path, encoding='utf-8') as labels_file:
             for line in labels_file:
                 label_cat.add(line.strip())
     else:
         label_cat.add(WiderFacePath.DEFAULT_LABEL)
         subset_path = osp.join(self._dataset_dir,
                                WiderFacePath.SUBSET_DIR + self._subset,
                                WiderFacePath.IMAGES_DIR)
         if osp.isdir(subset_path):
             for images_dir in sorted(os.listdir(subset_path)):
                 if osp.isdir(osp.join(subset_path, images_dir)) and \
                         images_dir != WiderFacePath.IMAGES_DIR_NO_LABEL:
                     if '--' in images_dir:
                         images_dir = images_dir.split('--')[1]
                     if images_dir != WiderFacePath.DEFAULT_LABEL:
                         label_cat.add(images_dir)
         if len(label_cat) == 1:
             label_cat = LabelCategories()
     return {AnnotationType.label: label_cat}
예제 #5
0
 def _load_categories(self, path):
     if has_meta_file(path):
         return make_categories(parse_meta_file(path))
     label_map_path = osp.join(path, SynthiaPath.LABELMAP_FILE)
     if osp.isfile(label_map_path):
         label_map = parse_label_map(label_map_path)
     else:
         label_map = SYNTHIA_LABEL_MAP
     return make_categories(label_map)
예제 #6
0
    def _load_categories(self, path):
        if self._task == KittiTask.segmentation:
            return self._load_categories_segmentation(path)
        elif self._task == KittiTask.detection:
            if has_meta_file(path):
                return {
                    AnnotationType.label:
                    LabelCategories.from_iterable(
                        parse_meta_file(path).keys())
                }

            return {AnnotationType.label: LabelCategories()}
예제 #7
0
 def _load_categories(self, path):
     label_map = None
     if has_meta_file(path):
         label_map = parse_meta_file(path)
     else:
         label_map_path = osp.join(path, CamvidPath.LABELMAP_FILE)
         if osp.isfile(label_map_path):
             label_map = parse_label_map(label_map_path)
         else:
             label_map = CamvidLabelMap
     self._labels = [label for label in label_map]
     return make_camvid_categories(label_map)
예제 #8
0
    def test_can_write_and_parse_dataset_meta_file(self):
        src_label_map = KittiLabelMap

        with TestDir() as test_dir:
            source_dataset = Dataset.from_iterable([],
                categories=make_kitti_categories(src_label_map))

            KittiConverter.convert(source_dataset, test_dir,
                save_dataset_meta=True)
            dst_label_map = parse_meta_file(test_dir)

            self.assertEqual(src_label_map, dst_label_map)
예제 #9
0
    def test_can_write_and_parse_dataset_meta_file(self):
        src_label_map = Cityscapes.CITYSCAPES_LABEL_MAP

        with TestDir() as test_dir:
            source_dataset = Dataset.from_iterable([],
                categories=Cityscapes.make_cityscapes_categories(src_label_map))

            CityscapesConverter.convert(source_dataset, test_dir,
                save_dataset_meta=True)
            dst_label_map = parse_meta_file(test_dir)

            self.assertEqual(src_label_map, dst_label_map)
예제 #10
0
    def _load_categories_segmentation(self, path):
        label_map = None
        if has_meta_file(path):
            label_map = parse_meta_file(path)
        else:
            label_map_path = osp.join(path, KittiPath.LABELMAP_FILE)
            if osp.isfile(label_map_path):
                label_map = parse_label_map(label_map_path)
            else:
                label_map = KittiLabelMap

        self._labels = [label for label in label_map]
        return make_kitti_categories(label_map)
예제 #11
0
    def __init__(self, path):
        if not osp.isdir(path):
            raise FileNotFoundError("Can't read dataset directory '%s'" % path)

        super().__init__()

        self._categories = {AnnotationType.label: LabelCategories()}
        if has_meta_file(path):
            self._categories = {
                AnnotationType.label:
                LabelCategories.from_iterable(parse_meta_file(path).keys())
            }

        self._items = list(self._load_items(path).values())
예제 #12
0
 def __init__(self, path, subset=None):
     assert osp.isdir(path), path
     super().__init__(subset=subset)
     self._images_dir = osp.join(path, 'images')
     self._anno_dir = osp.join(path, MotsPath.MASKS_DIR)
     if has_meta_file(path):
         self._categories = {
             AnnotationType.label:
             LabelCategories.from_iterable(parse_meta_file(path).keys())
         }
     else:
         self._categories = self._parse_categories(
             osp.join(self._anno_dir, MotsPath.LABELS_FILE))
     self._items = self._parse_items()
예제 #13
0
    def _load_categories(names_path):
        if has_meta_file(osp.dirname(names_path)):
            return LabelCategories.from_iterable(
                parse_meta_file(osp.dirname(names_path)).keys())

        label_categories = LabelCategories()

        with open(names_path, 'r', encoding='utf-8') as f:
            for label in f:
                label = label.strip()
                if label:
                    label_categories.add(label)

        return label_categories
예제 #14
0
    def _load_categories(self, path):
        if has_meta_file(self._dataset_dir):
            return {
                AnnotationType.label:
                LabelCategories.from_iterable(
                    parse_meta_file(self._dataset_dir).keys())
            }

        label_cat = LabelCategories()
        if osp.isfile(path):
            with open(path, encoding='utf-8') as labels_file:
                for line in labels_file:
                    objects = line.strip().split('\t')
                    if len(objects) == 2:
                        label_cat.add(objects[0])
        return {AnnotationType.label: label_cat}
예제 #15
0
    def __init__(self, path):
        if not osp.isfile(path):
            raise FileNotFoundError("Can't read annotation file '%s'" % path)

        super().__init__(subset=osp.splitext(osp.basename(path))[0].rsplit(
            '-', maxsplit=1)[0])

        if has_meta_file(path):
            self._categories = {
                AnnotationType.label:
                LabelCategories.from_iterable(parse_meta_file(path).keys())
            }
        else:
            self._categories = {AnnotationType.label: LabelCategories()}

        self._items = list(self._load_items(path).values())
예제 #16
0
    def __init__(self,
                 path,
                 labels=None,
                 occlusion_threshold=0,
                 is_gt=None,
                 subset=None):
        super().__init__(subset=subset)

        assert osp.isfile(path)
        seq_root = osp.dirname(osp.dirname(path))
        self._image_dir = ''
        if osp.isdir(osp.join(seq_root, MotPath.IMAGE_DIR)):
            self._image_dir = osp.join(seq_root, MotPath.IMAGE_DIR)

        seq_info = osp.join(seq_root, MotPath.SEQINFO_FILE)
        if osp.isfile(seq_info):
            seq_info = self._parse_seq_info(seq_info)
            self._image_dir = osp.join(seq_root, seq_info['imdir'])
        else:
            seq_info = None
        self._seq_info = seq_info

        self._occlusion_threshold = float(occlusion_threshold)

        assert is_gt in {None, True, False}
        if is_gt is None:
            if osp.basename(path) == MotPath.DET_FILENAME:
                is_gt = False
            else:
                is_gt = True
        self._is_gt = is_gt

        if has_meta_file(seq_root):
            labels = list(parse_meta_file(seq_root).keys())
        if labels is None:
            labels = osp.join(osp.dirname(path), MotPath.LABELS_FILE)
            if not osp.isfile(labels):
                labels = [lbl.name for lbl in MotLabel]
        if isinstance(labels, str):
            labels = self._parse_labels(labels)
        elif isinstance(labels, list):
            assert all(isinstance(lbl, str) for lbl in labels), labels
        else:
            raise TypeError("Unexpected type of 'labels' argument: %s" %
                            labels)
        self._categories = self._load_categories(labels)
        self._items = list(self._load_items(path).values())
예제 #17
0
    def _load_categories(self, path, use_train_label_map=False):
        label_map = None
        if has_meta_file(path):
            label_map = parse_meta_file(path)
        else:
            label_map_path = osp.join(path, CityscapesPath.LABELMAP_FILE)
            if osp.isfile(label_map_path):
                label_map = parse_label_map(label_map_path)

        if label_map is None:
            if use_train_label_map:
                label_map = TRAIN_CITYSCAPES_LABEL_MAP
            else:
                label_map = CITYSCAPES_LABEL_MAP

        self._labels = [label for label in label_map]
        return make_cityscapes_categories(label_map)
예제 #18
0
    def _parse(self, dataset_root):
        items = []
        subsets = set()

        if has_meta_file(dataset_root):
            categories = { AnnotationType.label:
                LabelCategories(attributes={ 'occluded', 'username' }).
                    from_iterable(parse_meta_file(dataset_root).keys()) }
        else:
            categories = { AnnotationType.label:
                LabelCategories(attributes={ 'occluded', 'username' })
            }

        for xml_path in sorted(
                glob(osp.join(dataset_root, '**', '*.xml'), recursive=True)):
            item_path = osp.relpath(xml_path, dataset_root)
            path_parts = split_path(item_path)
            subset = ''
            if 1 < len(path_parts):
                subset = path_parts[0]
                item_path = osp.join(*path_parts[1:]) # pylint: disable=no-value-for-parameter

            root = ElementTree.parse(xml_path)

            item_id = osp.join(root.find('folder').text or '',
                    root.find('filename').text) or \
                item_path
            image_path = osp.join(osp.dirname(xml_path), osp.basename(item_id))
            item_id = osp.splitext(item_id)[0]

            image_size = None
            imagesize_elem = root.find('imagesize')
            if imagesize_elem is not None:
                width_elem = imagesize_elem.find('ncols')
                height_elem = imagesize_elem.find('nrows')
                image_size = (int(height_elem.text), int(width_elem.text))

            image = Image(path=image_path, size=image_size)

            annotations = self._parse_annotations(root,
                osp.join(dataset_root, subset), categories)

            items.append(DatasetItem(id=item_id, subset=subset,
                image=image, annotations=annotations))
            subsets.add(items[-1].subset)
        return items, categories, subsets
예제 #19
0
    def _load_categories(self, label_map_source):
        if label_map_source == LabelmapType.kitti.name:
            # use the default KITTI colormap
            label_map = KittiLabelMap

        elif label_map_source == LabelmapType.source.name and \
                AnnotationType.mask not in self._extractor.categories():
            # generate colormap for input labels
            labels = self._extractor.categories() \
                .get(AnnotationType.label, LabelCategories())
            label_map = OrderedDict((item.name, None) for item in labels.items)

        elif label_map_source == LabelmapType.source.name and \
                AnnotationType.mask in self._extractor.categories():
            # use source colormap
            labels = self._extractor.categories()[AnnotationType.label]
            colors = self._extractor.categories()[AnnotationType.mask]
            label_map = OrderedDict()
            for idx, item in enumerate(labels.items):
                color = colors.colormap.get(idx)
                if color is not None:
                    label_map[item.name] = color

        elif isinstance(label_map_source, dict):
            label_map = OrderedDict(
                sorted(label_map_source.items(), key=lambda e: e[0]))

        elif isinstance(label_map_source,
                        str) and osp.isfile(label_map_source):
            if is_meta_file(label_map_source):
                label_map = parse_meta_file(label_map_source)
            else:
                label_map = parse_label_map(label_map_source)

        else:
            raise Exception("Wrong labelmap specified, "
                "expected one of %s or a file path" % \
                ', '.join(t.name for t in LabelmapType))

        self._categories = make_kitti_categories(label_map)
        self._label_map = label_map
        self._label_id_mapping = self._make_label_id_map()
예제 #20
0
    def __init__(self, path, task,
        subset=None,
        use_original_config=False,
        keep_original_category_ids=False
    ):
        assert osp.isdir(path), path
        self._path = path
        if subset is None:
            subset = osp.basename(self._path)
        super().__init__(subset=subset)

        annotations_dirs = [d for d in os.listdir(path)
            if d in MapillaryVistasPath.ANNOTATION_DIRS]

        if len(annotations_dirs) == 0:
            raise NotADirectoryError("Can't find annotation directory at %s. "
                "Expected one of these directories: %s" % (path,
                ','.join(MapillaryVistasPath.ANNOTATIONS_DIR_PATTERNS))
            )
        elif len(annotations_dirs) > 1:
            log.warning("Directory(-es): %s will be skipped, dataset should contain "
                "only one annotation directory" % ','.join(annotations_dirs[1:])
            )

        self._use_original_config = use_original_config
        self._format_version = annotations_dirs[0]
        self._annotations_dir = osp.join(path, annotations_dirs[0])
        self._images_dir = osp.join(path, MapillaryVistasPath.IMAGES_DIR)
        self._task = task

        if self._task == MapillaryVistasTask.instances:
            if has_meta_file(path):
                self._categories = make_mapillary_instance_categories(
                    parse_meta_file(path))
            else:
                self._categories = self._load_instances_categories()
            self._items = self._load_instances_items()
        else:
            panoptic_config = self._load_panoptic_config(self._annotations_dir)
            self._categories = self._load_panoptic_categories(
                panoptic_config['categories'], keep_original_category_ids)
            self._items = self._load_panoptic_items(panoptic_config)
    def __init__(self, path: str, *,
        labels: Union[Iterable[str], str] = _LabelsSource.file.name,
        labels_file: str = ImagenetTxtPath.LABELS_FILE,
        image_dir: Optional[str] = None,
        subset: Optional[str] = None,
    ):
        assert osp.isfile(path), path

        if not subset:
            subset = osp.splitext(osp.basename(path))[0]
        super().__init__(subset=subset)

        root_dir = osp.dirname(path)
        if not image_dir:
            image_dir = ImagenetTxtPath.IMAGE_DIR
        self.image_dir = osp.join(root_dir, image_dir)

        self._generate_labels = False

        if isinstance(labels, str):
            labels_source = _LabelsSource[labels]

            if labels_source == _LabelsSource.generate:
                labels = ()
                self._generate_labels = True
            elif labels_source == _LabelsSource.file:
                if has_meta_file(root_dir):
                    labels = parse_meta_file(root_dir).keys()
                else:
                    labels = self._parse_labels(
                        osp.join(root_dir, labels_file))
            else:
                assert False, "Unhandled labels source %s" % labels_source
        else:
            assert all(isinstance(e, str) for e in labels)

        self._categories = self._load_categories(labels)

        self._items = list(self._load_items(path).values())
예제 #22
0
    def _load_categories(self, json_data, *, keep_original_ids):
        if has_meta_file(self._rootpath):
            labels = parse_meta_file(self._rootpath).keys()
            self._categories = {
                AnnotationType.label: LabelCategories.from_iterable(labels)
            }
            # 0 is reserved for no class
            self._label_map = {i + 1: i for i in range(len(labels))}
            return

        self._categories = {}

        if self._task in [
                CocoTask.instances, CocoTask.labels, CocoTask.person_keypoints,
                CocoTask.stuff, CocoTask.panoptic
        ]:
            self._load_label_categories(
                json_data['categories'],
                keep_original_ids=keep_original_ids,
            )

        if self._task == CocoTask.person_keypoints:
            self._load_person_kp_categories(json_data['categories'])
예제 #23
0
    def _load_categories(self):
        if has_meta_file(self._dataset_dir):
            return {
                AnnotationType.label:
                LabelCategories.from_iterable(
                    parse_meta_file(self._dataset_dir).keys())
            }

        label_cat = LabelCategories()

        labels_file = osp.join(self._dataset_dir, 'labels.txt')
        if osp.isfile(labels_file):
            with open(labels_file, encoding='utf-8') as f:
                for line in f:
                    line = line.strip()
                    if not line:
                        continue
                    label_cat.add(line)
        else:
            for i in range(10):
                label_cat.add(str(i))

        return {AnnotationType.label: label_cat}
예제 #24
0
    def _load_categories(self, path):
        if has_meta_file(path):
            return { AnnotationType.label: LabelCategories.
                from_iterable(parse_meta_file(path).keys()) }

        label_cat = LabelCategories()

        meta_file = osp.join(path, CifarPath.META_10_FILE)
        if not osp.isfile(meta_file):
            meta_file = osp.join(path, CifarPath.META_100_FILE)
        if osp.isfile(meta_file):
            # CIFAR-10:
            # num_cases_per_batch: 1000
            # label_names: ['airplane', 'automobile', 'bird', 'cat', 'deer',
            #               'dog', 'frog', 'horse', 'ship', 'truck']
            # num_vis: 3072
            # CIFAR-100:
            # fine_label_names: ['apple', 'aquarium_fish', 'baby', ...]
            # coarse_label_names: ['aquatic_mammals', 'fish', 'flowers', ...]
            with open(meta_file, 'rb') as labels_file:
                data = PickleLoader.restricted_load(labels_file)
            labels = data.get('label_names')
            if labels is not None:
                for label in labels:
                    label_cat.add(label)
            else:
                labels = data.get('fine_label_names')
                self._coarse_labels = data.get('coarse_label_names', [])
                if labels is not None:
                    for label in labels:
                        label_cat.add(label)
        else:
            for label in Cifar10Label:
                label_cat.add(label)

        return { AnnotationType.label: label_cat }
예제 #25
0
    def _parse(cls, path):
        tracks = []
        track = None
        shape = None
        attr = None
        labels = {}
        point_tags = {'tx', 'ty', 'tz', 'rx', 'ry', 'rz'}

        # Can fail with "XML declaration not well-formed" on documents with
        # <?xml ... standalone="true"?>
        #                       ^^^^
        # (like the original Kitti dataset), while
        # <?xml ... standalone="yes"?>
        #                       ^^^
        # works.
        tree = ET.iterparse(path, events=("start", "end"))
        for ev, elem in tree:
            if ev == "start":
                if elem.tag == 'item':
                    if track is None:
                        track = {
                            'shapes': [],
                            'scale': {},
                            'label': None,
                            'attributes': {},
                            'start_frame': None,
                            'length': None,
                        }
                    else:
                        shape = {
                            'points': {},
                            'attributes': {},
                            'occluded': None,
                            'occluded_kf': False,
                            'truncated': None,
                        }

                elif elem.tag == 'attribute':
                    attr = {}

            elif ev == "end":
                if elem.tag == 'item':
                    assert track is not None

                    if shape:
                        track['shapes'].append(shape)
                        shape = None
                    else:
                        assert track['length'] == len(track['shapes'])

                        if track['label']:
                            labels.setdefault(track['label'], set())

                            for a in track['attributes']:
                                labels[track['label']].add(a)

                            for s in track['shapes']:
                                for a in s['attributes']:
                                    labels[track['label']].add(a)

                        tracks.append(track)
                        track = None

                # track tags
                elif track and elem.tag == 'objectType':
                    track['label'] = elem.text
                elif track and elem.tag in {'h', 'w', 'l'}:
                    track['scale'][elem.tag] = float(elem.text)
                elif track and elem.tag == 'first_frame':
                    track['start_frame'] = int(elem.text)
                elif track and elem.tag == 'count' and track:
                    track['length'] = int(elem.text)

                # pose tags
                elif shape and elem.tag in point_tags:
                    shape['points'][elem.tag] = float(elem.text)
                elif shape and elem.tag == 'occlusion':
                    shape['occluded'] = OcclusionStates(int(elem.text))
                elif shape and elem.tag == 'occlusion_kf':
                    shape['occluded_kf'] = elem.text == '1'
                elif shape and elem.tag == 'truncation':
                    shape['truncated'] = TruncationStates(int(elem.text))

                # common tags
                elif attr is not None and elem.tag == 'name':
                    if not elem.text:
                        raise ValueError("Attribute name can't be empty")
                    attr['name'] = elem.text
                elif attr is not None and elem.tag == 'value':
                    attr['value'] = elem.text or ''
                elif attr is not None and elem.tag == 'attribute':
                    if shape:
                        shape['attributes'][attr['name']] = attr['value']
                    else:
                        track['attributes'][attr['name']] = attr['value']
                    attr = None

        if track is not None or shape is not None or attr is not None:
            raise Exception("Failed to parse anotations from '%s'" % path)

        special_attrs = KittiRawPath.SPECIAL_ATTRS
        common_attrs = ['occluded']

        if has_meta_file(path):
            categories =  { AnnotationType.label: LabelCategories.
                from_iterable(parse_meta_file(path).keys()) }
        else:
            label_cat = LabelCategories(attributes=common_attrs)
            for label, attrs in sorted(labels.items(), key=lambda e: e[0]):
                label_cat.add(label, attributes=set(attrs) - special_attrs)

            categories = {AnnotationType.label: label_cat}

        items = {}
        for idx, track in enumerate(tracks):
            track_id = idx + 1
            for i, ann in enumerate(
                    cls._parse_track(track_id, track, categories)):
                frame_desc = items.setdefault(track['start_frame'] + i,
                    {'annotations': []})
                frame_desc['annotations'].append(ann)

        return items, categories