def _load_categories(self, label_map_source): if label_map_source == LabelmapType.voc.name: # use the default VOC colormap label_map = make_voc_label_map() elif label_map_source == LabelmapType.source.name and \ AnnotationType.mask not in self._extractor.categories(): # generate colormap for input labels labels = self._extractor.categories() \ .get(AnnotationType.label, LabelCategories()) label_map = OrderedDict( (item.name, [None, [], []]) for item in labels.items) elif label_map_source == LabelmapType.source.name and \ AnnotationType.mask in self._extractor.categories(): # use source colormap labels = self._extractor.categories()[AnnotationType.label] colors = self._extractor.categories()[AnnotationType.mask] label_map = OrderedDict() for idx, item in enumerate(labels.items): color = colors.colormap.get(idx) if color is not None: label_map[item.name] = [color, [], []] elif isinstance(label_map_source, dict): label_map = OrderedDict( sorted(label_map_source.items(), key=lambda e: e[0])) elif isinstance(label_map_source, str) and osp.isfile(label_map_source): if has_meta_file(label_map_source): label_map = parse_meta_file(label_map_source) else: label_map = parse_label_map(label_map_source) else: raise Exception("Wrong labelmap specified: '%s', " "expected one of %s or a file path" % \ (label_map_source, ', '.join(t.name for t in LabelmapType))) bg_label = find(label_map.items(), lambda x: x[1][0] == (0, 0, 0)) if bg_label is None: bg_label = 'background' if bg_label not in label_map: has_colors = any(v[0] is not None for v in label_map.values()) color = (0, 0, 0) if has_colors else None label_map[bg_label] = [color, [], []] label_map.move_to_end(bg_label, last=False) self._categories = make_voc_categories(label_map) # Update colors with assigned values colormap = self._categories[AnnotationType.mask].colormap for label_id, color in colormap.items(): label_desc = label_map[self._categories[ AnnotationType.label].items[label_id].name] label_desc[0] = color self._label_map = label_map self._label_id_mapping = self._make_label_id_map()
def __init__(self, path): if not osp.isdir(path): raise FileNotFoundError("Can't read dataset directory '%s'" % path) # exclude dataset meta file subsets = [ subset for subset in os.listdir(path) if osp.splitext(subset)[-1] != '.json' ] if len(subsets) < 1: raise FileNotFoundError("Can't read subsets in directory '%s'" % path) super().__init__(subsets=sorted(subsets)) self._path = path self._items = [] self._categories = {} if has_meta_file(self._path): self._categories = { AnnotationType.label: LabelCategories.from_iterable( parse_meta_file(self._path).keys()) } for subset in self._subsets: self._load_items(subset)
def _load_categories(self): label_cat = LabelCategories() path = osp.join(self._dataset_dir, VggFace2Path.LABELS_FILE) if has_meta_file(self._dataset_dir): labels = parse_meta_file(self._dataset_dir).keys() for label in labels: label_cat.add(label) elif osp.isfile(path): with open(path, encoding='utf-8') as labels_file: lines = [s.strip() for s in labels_file] for line in lines: objects = line.split() label = objects[0] class_name = None if 1 < len(objects): class_name = objects[1] label_cat.add(label, parent=class_name) else: for subset in self._subsets: subset_path = osp.join(self._dataset_dir, subset) if osp.isdir(subset_path): for images_dir in sorted(os.listdir(subset_path)): if osp.isdir(osp.join(subset_path, images_dir)) and \ images_dir != VggFace2Path.IMAGES_DIR_NO_LABEL: label_cat.add(images_dir) self._categories[AnnotationType.label] = label_cat
def _load_categories(self): label_cat = LabelCategories() if has_meta_file(self._dataset_dir): labels = parse_meta_file(self._dataset_dir).keys() for label in labels: label_cat.add(label) elif osp.isfile(osp.join(self._dataset_dir, WiderFacePath.LABELS_FILE)): path = osp.join(self._dataset_dir, WiderFacePath.LABELS_FILE) with open(path, encoding='utf-8') as labels_file: for line in labels_file: label_cat.add(line.strip()) else: label_cat.add(WiderFacePath.DEFAULT_LABEL) subset_path = osp.join(self._dataset_dir, WiderFacePath.SUBSET_DIR + self._subset, WiderFacePath.IMAGES_DIR) if osp.isdir(subset_path): for images_dir in sorted(os.listdir(subset_path)): if osp.isdir(osp.join(subset_path, images_dir)) and \ images_dir != WiderFacePath.IMAGES_DIR_NO_LABEL: if '--' in images_dir: images_dir = images_dir.split('--')[1] if images_dir != WiderFacePath.DEFAULT_LABEL: label_cat.add(images_dir) if len(label_cat) == 1: label_cat = LabelCategories() return {AnnotationType.label: label_cat}
def __init__(self, path, image_meta=None): if not osp.isdir(path): raise FileNotFoundError("Can't read dataset directory '%s'" % path) super().__init__() self._dataset_dir = path self._annotation_files = os.listdir( osp.join(path, OpenImagesPath.ANNOTATIONS_DIR)) self._categories = {} self._items = [] assert image_meta is None or isinstance(image_meta, (dict, str)) if isinstance(image_meta, dict): self._image_meta = dict(image_meta) elif isinstance(image_meta, str): self._image_meta = load_image_meta_file(osp.join(path, image_meta)) elif image_meta is None: try: self._image_meta = load_image_meta_file(osp.join( path, OpenImagesPath.ANNOTATIONS_DIR, DEFAULT_IMAGE_META_FILE_NAME )) except FileNotFoundError: self._image_meta = {} if has_meta_file(path): self._categories = { AnnotationType.label: LabelCategories. from_iterable(parse_meta_file(path).keys()) } else: self._load_categories() self._load_items()
def _load_categories(self, path): if has_meta_file(path): return make_categories(parse_meta_file(path)) label_map_path = osp.join(path, SynthiaPath.LABELMAP_FILE) if osp.isfile(label_map_path): label_map = parse_label_map(label_map_path) else: label_map = SYNTHIA_LABEL_MAP return make_categories(label_map)
def _load_categories(self, dataset_path): label_map = None if has_meta_file(dataset_path): label_map = parse_meta_file(dataset_path) else: label_map_path = osp.join(dataset_path, VocPath.LABELMAP_FILE) if osp.isfile(label_map_path): label_map = parse_label_map(label_map_path) return make_voc_categories(label_map)
def _load_categories(self, path): if self._task == KittiTask.segmentation: return self._load_categories_segmentation(path) elif self._task == KittiTask.detection: if has_meta_file(path): return { AnnotationType.label: LabelCategories.from_iterable( parse_meta_file(path).keys()) } return {AnnotationType.label: LabelCategories()}
def _load_categories(self, path): label_map = None if has_meta_file(path): label_map = parse_meta_file(path) else: label_map_path = osp.join(path, CamvidPath.LABELMAP_FILE) if osp.isfile(label_map_path): label_map = parse_label_map(label_map_path) else: label_map = CamvidLabelMap self._labels = [label for label in label_map] return make_camvid_categories(label_map)
def _load_categories_segmentation(self, path): label_map = None if has_meta_file(path): label_map = parse_meta_file(path) else: label_map_path = osp.join(path, KittiPath.LABELMAP_FILE) if osp.isfile(label_map_path): label_map = parse_label_map(label_map_path) else: label_map = KittiLabelMap self._labels = [label for label in label_map] return make_kitti_categories(label_map)
def _load_categories(names_path): if has_meta_file(osp.dirname(names_path)): return LabelCategories.from_iterable( parse_meta_file(osp.dirname(names_path)).keys()) label_categories = LabelCategories() with open(names_path, 'r', encoding='utf-8') as f: for label in f: label = label.strip() if label: label_categories.add(label) return label_categories
def __init__(self, path, subset=None): assert osp.isdir(path), path super().__init__(subset=subset) self._images_dir = osp.join(path, 'images') self._anno_dir = osp.join(path, MotsPath.MASKS_DIR) if has_meta_file(path): self._categories = { AnnotationType.label: LabelCategories.from_iterable(parse_meta_file(path).keys()) } else: self._categories = self._parse_categories( osp.join(self._anno_dir, MotsPath.LABELS_FILE)) self._items = self._parse_items()
def __init__(self, path): if not osp.isdir(path): raise FileNotFoundError("Can't read dataset directory '%s'" % path) super().__init__() self._categories = {AnnotationType.label: LabelCategories()} if has_meta_file(path): self._categories = { AnnotationType.label: LabelCategories.from_iterable(parse_meta_file(path).keys()) } self._items = list(self._load_items(path).values())
def _load_categories(self, path): if has_meta_file(self._dataset_dir): return { AnnotationType.label: LabelCategories.from_iterable( parse_meta_file(self._dataset_dir).keys()) } label_cat = LabelCategories() if osp.isfile(path): with open(path, encoding='utf-8') as labels_file: for line in labels_file: objects = line.strip().split('\t') if len(objects) == 2: label_cat.add(objects[0]) return {AnnotationType.label: label_cat}
def __init__(self, path): if not osp.isfile(path): raise FileNotFoundError("Can't read annotation file '%s'" % path) super().__init__(subset=osp.splitext(osp.basename(path))[0].rsplit( '-', maxsplit=1)[0]) if has_meta_file(path): self._categories = { AnnotationType.label: LabelCategories.from_iterable(parse_meta_file(path).keys()) } else: self._categories = {AnnotationType.label: LabelCategories()} self._items = list(self._load_items(path).values())
def __init__(self, path, labels=None, occlusion_threshold=0, is_gt=None, subset=None): super().__init__(subset=subset) assert osp.isfile(path) seq_root = osp.dirname(osp.dirname(path)) self._image_dir = '' if osp.isdir(osp.join(seq_root, MotPath.IMAGE_DIR)): self._image_dir = osp.join(seq_root, MotPath.IMAGE_DIR) seq_info = osp.join(seq_root, MotPath.SEQINFO_FILE) if osp.isfile(seq_info): seq_info = self._parse_seq_info(seq_info) self._image_dir = osp.join(seq_root, seq_info['imdir']) else: seq_info = None self._seq_info = seq_info self._occlusion_threshold = float(occlusion_threshold) assert is_gt in {None, True, False} if is_gt is None: if osp.basename(path) == MotPath.DET_FILENAME: is_gt = False else: is_gt = True self._is_gt = is_gt if has_meta_file(seq_root): labels = list(parse_meta_file(seq_root).keys()) if labels is None: labels = osp.join(osp.dirname(path), MotPath.LABELS_FILE) if not osp.isfile(labels): labels = [lbl.name for lbl in MotLabel] if isinstance(labels, str): labels = self._parse_labels(labels) elif isinstance(labels, list): assert all(isinstance(lbl, str) for lbl in labels), labels else: raise TypeError("Unexpected type of 'labels' argument: %s" % labels) self._categories = self._load_categories(labels) self._items = list(self._load_items(path).values())
def _parse(self, dataset_root): items = [] subsets = set() if has_meta_file(dataset_root): categories = { AnnotationType.label: LabelCategories(attributes={ 'occluded', 'username' }). from_iterable(parse_meta_file(dataset_root).keys()) } else: categories = { AnnotationType.label: LabelCategories(attributes={ 'occluded', 'username' }) } for xml_path in sorted( glob(osp.join(dataset_root, '**', '*.xml'), recursive=True)): item_path = osp.relpath(xml_path, dataset_root) path_parts = split_path(item_path) subset = '' if 1 < len(path_parts): subset = path_parts[0] item_path = osp.join(*path_parts[1:]) # pylint: disable=no-value-for-parameter root = ElementTree.parse(xml_path) item_id = osp.join(root.find('folder').text or '', root.find('filename').text) or \ item_path image_path = osp.join(osp.dirname(xml_path), osp.basename(item_id)) item_id = osp.splitext(item_id)[0] image_size = None imagesize_elem = root.find('imagesize') if imagesize_elem is not None: width_elem = imagesize_elem.find('ncols') height_elem = imagesize_elem.find('nrows') image_size = (int(height_elem.text), int(width_elem.text)) image = Image(path=image_path, size=image_size) annotations = self._parse_annotations(root, osp.join(dataset_root, subset), categories) items.append(DatasetItem(id=item_id, subset=subset, image=image, annotations=annotations)) subsets.add(items[-1].subset) return items, categories, subsets
def _load_categories(self, path, use_train_label_map=False): label_map = None if has_meta_file(path): label_map = parse_meta_file(path) else: label_map_path = osp.join(path, CityscapesPath.LABELMAP_FILE) if osp.isfile(label_map_path): label_map = parse_label_map(label_map_path) if label_map is None: if use_train_label_map: label_map = TRAIN_CITYSCAPES_LABEL_MAP else: label_map = CITYSCAPES_LABEL_MAP self._labels = [label for label in label_map] return make_cityscapes_categories(label_map)
def __init__(self, path, task, subset=None, use_original_config=False, keep_original_category_ids=False ): assert osp.isdir(path), path self._path = path if subset is None: subset = osp.basename(self._path) super().__init__(subset=subset) annotations_dirs = [d for d in os.listdir(path) if d in MapillaryVistasPath.ANNOTATION_DIRS] if len(annotations_dirs) == 0: raise NotADirectoryError("Can't find annotation directory at %s. " "Expected one of these directories: %s" % (path, ','.join(MapillaryVistasPath.ANNOTATIONS_DIR_PATTERNS)) ) elif len(annotations_dirs) > 1: log.warning("Directory(-es): %s will be skipped, dataset should contain " "only one annotation directory" % ','.join(annotations_dirs[1:]) ) self._use_original_config = use_original_config self._format_version = annotations_dirs[0] self._annotations_dir = osp.join(path, annotations_dirs[0]) self._images_dir = osp.join(path, MapillaryVistasPath.IMAGES_DIR) self._task = task if self._task == MapillaryVistasTask.instances: if has_meta_file(path): self._categories = make_mapillary_instance_categories( parse_meta_file(path)) else: self._categories = self._load_instances_categories() self._items = self._load_instances_items() else: panoptic_config = self._load_panoptic_config(self._annotations_dir) self._categories = self._load_panoptic_categories( panoptic_config['categories'], keep_original_category_ids) self._items = self._load_panoptic_items(panoptic_config)
def __init__(self, path: str, *, labels: Union[Iterable[str], str] = _LabelsSource.file.name, labels_file: str = ImagenetTxtPath.LABELS_FILE, image_dir: Optional[str] = None, subset: Optional[str] = None, ): assert osp.isfile(path), path if not subset: subset = osp.splitext(osp.basename(path))[0] super().__init__(subset=subset) root_dir = osp.dirname(path) if not image_dir: image_dir = ImagenetTxtPath.IMAGE_DIR self.image_dir = osp.join(root_dir, image_dir) self._generate_labels = False if isinstance(labels, str): labels_source = _LabelsSource[labels] if labels_source == _LabelsSource.generate: labels = () self._generate_labels = True elif labels_source == _LabelsSource.file: if has_meta_file(root_dir): labels = parse_meta_file(root_dir).keys() else: labels = self._parse_labels( osp.join(root_dir, labels_file)) else: assert False, "Unhandled labels source %s" % labels_source else: assert all(isinstance(e, str) for e in labels) self._categories = self._load_categories(labels) self._items = list(self._load_items(path).values())
def _load_categories(self): if has_meta_file(self._dataset_dir): return { AnnotationType.label: LabelCategories.from_iterable( parse_meta_file(self._dataset_dir).keys()) } label_cat = LabelCategories() labels_file = osp.join(self._dataset_dir, 'labels.txt') if osp.isfile(labels_file): with open(labels_file, encoding='utf-8') as f: for line in f: line = line.strip() if not line: continue label_cat.add(line) else: for i in range(10): label_cat.add(str(i)) return {AnnotationType.label: label_cat}
def _load_categories(self, json_data, *, keep_original_ids): if has_meta_file(self._rootpath): labels = parse_meta_file(self._rootpath).keys() self._categories = { AnnotationType.label: LabelCategories.from_iterable(labels) } # 0 is reserved for no class self._label_map = {i + 1: i for i in range(len(labels))} return self._categories = {} if self._task in [ CocoTask.instances, CocoTask.labels, CocoTask.person_keypoints, CocoTask.stuff, CocoTask.panoptic ]: self._load_label_categories( json_data['categories'], keep_original_ids=keep_original_ids, ) if self._task == CocoTask.person_keypoints: self._load_person_kp_categories(json_data['categories'])
def _load_categories(self, path): if has_meta_file(path): return { AnnotationType.label: LabelCategories. from_iterable(parse_meta_file(path).keys()) } label_cat = LabelCategories() meta_file = osp.join(path, CifarPath.META_10_FILE) if not osp.isfile(meta_file): meta_file = osp.join(path, CifarPath.META_100_FILE) if osp.isfile(meta_file): # CIFAR-10: # num_cases_per_batch: 1000 # label_names: ['airplane', 'automobile', 'bird', 'cat', 'deer', # 'dog', 'frog', 'horse', 'ship', 'truck'] # num_vis: 3072 # CIFAR-100: # fine_label_names: ['apple', 'aquarium_fish', 'baby', ...] # coarse_label_names: ['aquatic_mammals', 'fish', 'flowers', ...] with open(meta_file, 'rb') as labels_file: data = PickleLoader.restricted_load(labels_file) labels = data.get('label_names') if labels is not None: for label in labels: label_cat.add(label) else: labels = data.get('fine_label_names') self._coarse_labels = data.get('coarse_label_names', []) if labels is not None: for label in labels: label_cat.add(label) else: for label in Cifar10Label: label_cat.add(label) return { AnnotationType.label: label_cat }
def _parse(cls, path): tracks = [] track = None shape = None attr = None labels = {} point_tags = {'tx', 'ty', 'tz', 'rx', 'ry', 'rz'} # Can fail with "XML declaration not well-formed" on documents with # <?xml ... standalone="true"?> # ^^^^ # (like the original Kitti dataset), while # <?xml ... standalone="yes"?> # ^^^ # works. tree = ET.iterparse(path, events=("start", "end")) for ev, elem in tree: if ev == "start": if elem.tag == 'item': if track is None: track = { 'shapes': [], 'scale': {}, 'label': None, 'attributes': {}, 'start_frame': None, 'length': None, } else: shape = { 'points': {}, 'attributes': {}, 'occluded': None, 'occluded_kf': False, 'truncated': None, } elif elem.tag == 'attribute': attr = {} elif ev == "end": if elem.tag == 'item': assert track is not None if shape: track['shapes'].append(shape) shape = None else: assert track['length'] == len(track['shapes']) if track['label']: labels.setdefault(track['label'], set()) for a in track['attributes']: labels[track['label']].add(a) for s in track['shapes']: for a in s['attributes']: labels[track['label']].add(a) tracks.append(track) track = None # track tags elif track and elem.tag == 'objectType': track['label'] = elem.text elif track and elem.tag in {'h', 'w', 'l'}: track['scale'][elem.tag] = float(elem.text) elif track and elem.tag == 'first_frame': track['start_frame'] = int(elem.text) elif track and elem.tag == 'count' and track: track['length'] = int(elem.text) # pose tags elif shape and elem.tag in point_tags: shape['points'][elem.tag] = float(elem.text) elif shape and elem.tag == 'occlusion': shape['occluded'] = OcclusionStates(int(elem.text)) elif shape and elem.tag == 'occlusion_kf': shape['occluded_kf'] = elem.text == '1' elif shape and elem.tag == 'truncation': shape['truncated'] = TruncationStates(int(elem.text)) # common tags elif attr is not None and elem.tag == 'name': if not elem.text: raise ValueError("Attribute name can't be empty") attr['name'] = elem.text elif attr is not None and elem.tag == 'value': attr['value'] = elem.text or '' elif attr is not None and elem.tag == 'attribute': if shape: shape['attributes'][attr['name']] = attr['value'] else: track['attributes'][attr['name']] = attr['value'] attr = None if track is not None or shape is not None or attr is not None: raise Exception("Failed to parse anotations from '%s'" % path) special_attrs = KittiRawPath.SPECIAL_ATTRS common_attrs = ['occluded'] if has_meta_file(path): categories = { AnnotationType.label: LabelCategories. from_iterable(parse_meta_file(path).keys()) } else: label_cat = LabelCategories(attributes=common_attrs) for label, attrs in sorted(labels.items(), key=lambda e: e[0]): label_cat.add(label, attributes=set(attrs) - special_attrs) categories = {AnnotationType.label: label_cat} items = {} for idx, track in enumerate(tracks): track_id = idx + 1 for i, ann in enumerate( cls._parse_track(track_id, track, categories)): frame_desc = items.setdefault(track['start_frame'] + i, {'annotations': []}) frame_desc['annotations'].append(ann) return items, categories