Example #1
0
    def test_can_decode_compiled_mask(self):
        class_idx = 1000
        instance_idx = 10000
        mask = np.array([1])
        compiled_mask = CompiledMask(mask * class_idx, mask * instance_idx)

        labels = compiled_mask.get_instance_labels()

        self.assertEqual({instance_idx: class_idx}, labels)
Example #2
0
    def _load_annotations(self, item_id):
        item_annotations = []

        class_mask = None
        segm_path = osp.join(self._dataset_dir, VocPath.SEGMENTATION_DIR,
                             item_id + VocPath.SEGM_EXT)
        if osp.isfile(segm_path):
            inverse_cls_colormap = \
                self._categories[AnnotationType.mask].inverse_colormap
            class_mask = lazy_mask(segm_path, inverse_cls_colormap)

        instances_mask = None
        inst_path = osp.join(self._dataset_dir, VocPath.INSTANCES_DIR,
                             item_id + VocPath.SEGM_EXT)
        if osp.isfile(inst_path):
            instances_mask = lazy_mask(inst_path, _inverse_inst_colormap)

        if instances_mask is not None:
            compiled_mask = CompiledMask(class_mask, instances_mask)

            if class_mask is not None:
                label_cat = self._categories[AnnotationType.label]
                instance_labels = compiled_mask.get_instance_labels(
                    class_count=len(label_cat.items))
            else:
                instance_labels = {
                    i: None
                    for i in range(compiled_mask.instance_count)
                }

            for instance_id, label_id in instance_labels.items():
                image = compiled_mask.lazy_extract(instance_id)

                attributes = {}
                if label_id is not None:
                    actions = {
                        a: False
                        for a in label_cat.items[label_id].attributes
                    }
                    attributes.update(actions)

                item_annotations.append(
                    Mask(image=image,
                         label=label_id,
                         attributes=attributes,
                         group=instance_id))
        elif class_mask is not None:
            log.warn("item '%s': has only class segmentation, "
                     "instance masks will not be available" % item_id)
            class_mask = class_mask()
            classes = np.unique(class_mask)
            for label_id in classes:
                image = self._lazy_extract_mask(class_mask, label_id)
                item_annotations.append(Mask(image=image, label=label_id))

        return item_annotations
Example #3
0
    def _save_item(self, subset_name, subset, item):
        if self._save_images and item.has_image:
            self._save_image(item,
                             subdir=osp.join(subset_name,
                                             IcdarPath.IMAGES_DIR))

        annotation = ''
        colormap = [(255, 255, 255)]
        anns = [a for a in item.annotations if a.type == AnnotationType.mask]
        if anns:
            anns = sorted(anns,
                          key=lambda a: int(a.attributes.get('index', 0)))
            group = anns[0].group
            for ann in anns:
                if ann.group != group or (not ann.group
                                          and anns[0].group != 0):
                    annotation += '\n'
                text = ''
                if ann.attributes:
                    if 'text' in ann.attributes:
                        text = ann.attributes['text']
                    if text == ' ':
                        annotation += '#'
                    if 'color' in ann.attributes and \
                            len(ann.attributes['color'].split()) == 3:
                        color = ann.attributes['color'].split()
                        colormap.append(
                            (int(color[0]), int(color[1]), int(color[2])))
                        annotation += ' '.join(p for p in color)
                    else:
                        raise Exception(
                            "Item %s: a mask must have "
                            "an RGB color attribute, e.g. '10 7 50'" % item.id)
                    if 'center' in ann.attributes:
                        annotation += ' %s' % ann.attributes['center']
                    else:
                        annotation += ' - -'
                bbox = ann.get_bbox()
                annotation += ' %s %s %s %s' % (bbox[0], bbox[1], bbox[0] +
                                                bbox[2], bbox[1] + bbox[3])
                annotation += ' \"%s\"' % text
                annotation += '\n'
                group = ann.group

            mask = CompiledMask.from_instance_masks(
                anns,
                instance_labels=[m.attributes['index'] + 1 for m in anns])
            mask = paint_mask(mask.class_mask,
                              {i: colormap[i]
                               for i in range(len(colormap))})
            save_image(osp.join(self._save_dir, subset_name,
                                item.id + '_GT' + IcdarPath.GT_EXT),
                       mask,
                       create_dir=True)

        anno_file = osp.join(self._save_dir, subset_name,
                             item.id + '_GT' + '.txt')
        os.makedirs(osp.dirname(anno_file), exist_ok=True)
        with open(anno_file, 'w', encoding='utf-8') as f:
            f.write(annotation)
    def apply(self):
        subset_dir = self._save_dir
        os.makedirs(subset_dir, exist_ok=True)

        for subset_name, subset in self._extractor.subsets().items():
            segm_list = {}
            for item in subset:
                masks = [
                    a for a in item.annotations
                    if a.type == AnnotationType.mask
                ]

                if masks:
                    compiled_mask = CompiledMask.from_instance_masks(
                        masks,
                        instance_labels=[
                            self._label_id_mapping(m.label) for m in masks
                        ])

                    self.save_segm(
                        osp.join(subset_dir, subset_name + CamvidPath.SEGM_DIR,
                                 item.id + CamvidPath.IMAGE_EXT),
                        compiled_mask.class_mask)
                    segm_list[item.id] = True
                else:
                    segm_list[item.id] = False

                if self._save_images:
                    self._save_image(
                        item,
                        osp.join(subset_dir, subset_name,
                                 item.id + CamvidPath.IMAGE_EXT))

            self.save_segm_lists(subset_name, segm_list)
        self.save_label_map()
Example #5
0
    def apply(self):
        os.makedirs(self._save_dir, exist_ok=True)

        for subset_name, subset in self._extractor.subsets().items():
            segm_list = {}
            for item in subset:
                image_path = self._make_image_filename(item, subdir=subset_name)
                if self._save_images:
                    self._save_image(item, osp.join(self._save_dir, image_path))

                masks = [a for a in item.annotations
                    if a.type == AnnotationType.mask]

                if masks:
                    compiled_mask = CompiledMask.from_instance_masks(masks,
                        instance_labels=[self._label_id_mapping(m.label)
                            for m in masks])

                    mask_path = osp.join(subset_name + CamvidPath.SEGM_DIR,
                        item.id + CamvidPath.MASK_EXT)
                    self.save_segm(osp.join(self._save_dir, mask_path),
                        compiled_mask.class_mask)
                    segm_list[item.id] = (image_path, mask_path)
                else:
                    segm_list[item.id] = (image_path, '')

            self.save_segm_lists(subset_name, segm_list)
        self.save_label_map()
    def save_annotations(self, item, path):
        annotation = ''
        colormap = [(255, 255, 255)]
        anns = [a for a in item.annotations if a.type == AnnotationType.mask]
        if anns:
            is_not_index = len(
                [p for p in anns if 'index' not in p.attributes])
            if is_not_index:
                raise Exception("Item %s: a mask must have"
                                "'index' attribute" % item.id)
            anns = sorted(anns, key=lambda a: a.attributes['index'])
            group = anns[0].group
            for ann in anns:
                if ann.group != group or (not ann.group
                                          and anns[0].group != 0):
                    annotation += '\n'
                text = ''
                if ann.attributes:
                    if 'text' in ann.attributes:
                        text = ann.attributes['text']
                    if text == ' ':
                        annotation += '#'
                    if 'color' in ann.attributes and \
                            len(ann.attributes['color'].split()) == 3:
                        color = ann.attributes['color'].split()
                        colormap.append(
                            (int(color[0]), int(color[1]), int(color[2])))
                        annotation += ' '.join(p for p in color)
                    else:
                        raise Exception(
                            "Item %s: a mask must have "
                            "an RGB color attribute, e. g. '10 7 50'" %
                            item.id)
                    if 'center' in ann.attributes:
                        annotation += ' %s' % ann.attributes['center']
                    else:
                        annotation += ' - -'
                bbox = ann.get_bbox()
                annotation += ' %s %s %s %s' % (bbox[0], bbox[1], bbox[0] +
                                                bbox[2], bbox[1] + bbox[3])
                annotation += ' \"%s\"' % text
                annotation += '\n'
                group = ann.group

            mask = CompiledMask.from_instance_masks(
                anns,
                instance_labels=[m.attributes['index'] + 1 for m in anns])
            mask = paint_mask(mask.class_mask,
                              {i: colormap[i]
                               for i in range(len(colormap))})
            save_image(osp.join(path, item.id + '_GT' + IcdarPath.GT_EXT),
                       mask,
                       create_dir=True)
        self.annotations[item.id] = annotation
    def save_subsets(self):
        for subset_name, subset in self._extractor.subsets().items():
            class_lists = OrderedDict()
            clsdet_list = OrderedDict()
            action_list = OrderedDict()
            layout_list = OrderedDict()
            segm_list = OrderedDict()
            has_classes = False
            has_dets = False
            has_actions = False
            has_layouts = False
            has_masks = False

            for item in subset:
                log.debug("Converting item '%s'", item.id)

                image_filename = self._make_image_filename(item)
                if self._save_images:
                    if item.has_image and item.image.has_data:
                        self._save_image(
                            item, osp.join(self._images_dir, image_filename))
                    else:
                        log.debug("Item '%s' has no image", item.id)

                labels = []
                bboxes = []
                masks = []
                for a in item.annotations:
                    if a.type == AnnotationType.label:
                        labels.append(a)
                    elif a.type == AnnotationType.bbox:
                        bboxes.append(a)
                    elif a.type == AnnotationType.mask:
                        masks.append(a)

                if self._tasks & {
                        VocTask.detection, VocTask.person_layout,
                        VocTask.action_classification
                }:
                    root_elem = ET.Element('annotation')
                    if '_' in item.id:
                        folder = item.id[:item.id.find('_')]
                    else:
                        folder = ''
                    ET.SubElement(root_elem, 'folder').text = folder
                    ET.SubElement(root_elem, 'filename').text = image_filename

                    source_elem = ET.SubElement(root_elem, 'source')
                    ET.SubElement(source_elem, 'database').text = 'Unknown'
                    ET.SubElement(source_elem, 'annotation').text = 'Unknown'
                    ET.SubElement(source_elem, 'image').text = 'Unknown'

                    if item.has_image:
                        h, w = item.image.size
                        size_elem = ET.SubElement(root_elem, 'size')
                        ET.SubElement(size_elem, 'width').text = str(w)
                        ET.SubElement(size_elem, 'height').text = str(h)
                        ET.SubElement(size_elem, 'depth').text = ''

                    item_segmented = 0 < len(masks)
                    ET.SubElement(root_elem, 'segmented').text = \
                        str(int(item_segmented))

                    objects_with_parts = []
                    objects_with_actions = defaultdict(dict)

                    main_bboxes = []
                    layout_bboxes = []
                    for bbox in bboxes:
                        label = self.get_label(bbox.label)
                        if self._is_part(label):
                            layout_bboxes.append(bbox)
                        elif self._is_label(label):
                            main_bboxes.append(bbox)

                    for new_obj_id, obj in enumerate(main_bboxes):
                        attr = obj.attributes

                        obj_elem = ET.SubElement(root_elem, 'object')

                        obj_label = self.get_label(obj.label)
                        ET.SubElement(obj_elem, 'name').text = obj_label

                        if 'pose' in attr:
                            ET.SubElement(obj_elem, 'pose').text = \
                                str(attr['pose'])

                        if 'truncated' in attr:
                            truncated = _convert_attr('truncated', attr, int,
                                                      0)
                            ET.SubElement(obj_elem, 'truncated').text = \
                                '%d' % truncated

                        if 'difficult' in attr:
                            difficult = _convert_attr('difficult', attr, int,
                                                      0)
                            ET.SubElement(obj_elem, 'difficult').text = \
                                '%d' % difficult

                        if 'occluded' in attr:
                            occluded = _convert_attr('occluded', attr, int, 0)
                            ET.SubElement(obj_elem, 'occluded').text = \
                                '%d' % occluded

                        bbox = obj.get_bbox()
                        if bbox is not None:
                            _write_xml_bbox(bbox, obj_elem)

                        for part_bbox in filter(
                                lambda x: obj.group and obj.group == x.group,
                                layout_bboxes):
                            part_elem = ET.SubElement(obj_elem, 'part')
                            ET.SubElement(part_elem, 'name').text = \
                                self.get_label(part_bbox.label)
                            _write_xml_bbox(part_bbox.get_bbox(), part_elem)

                            objects_with_parts.append(new_obj_id)

                        label_actions = self._get_actions(obj_label)
                        actions_elem = ET.Element('actions')
                        for action in label_actions:
                            present = 0
                            if action in attr:
                                present = _convert_attr(
                                    action, attr, lambda v: int(v == True), 0)
                                ET.SubElement(actions_elem, action).text = \
                                    '%d' % present

                            objects_with_actions[new_obj_id][action] = present
                        if len(actions_elem) != 0:
                            obj_elem.append(actions_elem)

                        if self._allow_attributes:
                            native_attrs = set(self.BUILTIN_ATTRS)
                            native_attrs.update(label_actions)

                            attrs_elem = ET.Element('attributes')
                            for k, v in attr.items():
                                if k in native_attrs:
                                    continue
                                attr_elem = ET.SubElement(
                                    attrs_elem, 'attribute')
                                ET.SubElement(attr_elem, 'name').text = str(k)
                                ET.SubElement(attr_elem, 'value').text = str(v)
                            if len(attrs_elem):
                                obj_elem.append(attrs_elem)

                    if self._tasks & {
                            VocTask.detection, VocTask.person_layout,
                            VocTask.action_classification
                    }:
                        ann_path = osp.join(self._ann_dir, item.id + '.xml')
                        os.makedirs(osp.dirname(ann_path), exist_ok=True)
                        with open(ann_path, 'w') as f:
                            f.write(
                                ET.tostring(root_elem,
                                            encoding='unicode',
                                            pretty_print=True))

                    clsdet_list[item.id] = True
                    layout_list[item.id] = objects_with_parts
                    action_list[item.id] = objects_with_actions
                    has_dets = True
                    has_layouts |= len(objects_with_parts) != 0
                    has_actions |= len(objects_with_actions) != 0

                for label_ann in labels:
                    label = self.get_label(label_ann.label)
                    if not self._is_label(label):
                        continue
                    class_list = class_lists.get(item.id, set())
                    class_list.add(label_ann.label)
                    class_lists[item.id] = class_list
                    has_classes = True

                    clsdet_list[item.id] = True

                if masks:
                    compiled_mask = CompiledMask.from_instance_masks(
                        masks,
                        instance_labels=[
                            self._label_id_mapping(m.label) for m in masks
                        ])

                    self.save_segm(
                        osp.join(self._segm_dir, item.id + VocPath.SEGM_EXT),
                        compiled_mask.class_mask)
                    self.save_segm(osp.join(self._inst_dir,
                                            item.id + VocPath.SEGM_EXT),
                                   compiled_mask.instance_mask,
                                   colormap=VocInstColormap)

                    segm_list[item.id] = True
                    has_masks = True

                if len(item.annotations) == 0:
                    clsdet_list[item.id] = None
                    layout_list[item.id] = None
                    action_list[item.id] = None
                    segm_list[item.id] = None

            if (has_classes or has_dets) and self._tasks & {
                    VocTask.classification, VocTask.detection,
                    VocTask.action_classification, VocTask.person_layout
            }:
                self.save_clsdet_lists(subset_name, clsdet_list)
                if has_classes and self._tasks & {VocTask.classification}:
                    self.save_class_lists(subset_name, class_lists)
            if has_actions and self._tasks & {VocTask.action_classification}:
                self.save_action_lists(subset_name, action_list)
            if has_layouts and self._tasks & {VocTask.person_layout}:
                self.save_layout_lists(subset_name, layout_list)
            if has_masks and self._tasks & {VocTask.segmentation}:
                self.save_segm_lists(subset_name, segm_list)
Example #8
0
    def save_subsets(self):
        subsets = self._extractor.subsets()
        if len(subsets) == 0:
            subsets = [None]

        for subset_name in subsets:
            if subset_name:
                subset = self._extractor.get_subset(subset_name)
            else:
                subset_name = DEFAULT_SUBSET_NAME
                subset = self._extractor

            class_lists = OrderedDict()
            clsdet_list = OrderedDict()
            action_list = OrderedDict()
            layout_list = OrderedDict()
            segm_list = OrderedDict()

            for item in subset:
                log.debug("Converting item '%s'", item.id)

                image_filename = ''
                if item.has_image:
                    image_filename = item.image.filename
                if self._save_images:
                    if item.has_image and item.image.has_data:
                        if image_filename:
                            image_filename = osp.splitext(image_filename)[0]
                        else:
                            image_filename = item.id
                        image_filename += VocPath.IMAGE_EXT
                        save_image(osp.join(self._images_dir, image_filename),
                                   item.image.data)
                    else:
                        log.debug("Item '%s' has no image" % item.id)

                labels = []
                bboxes = []
                masks = []
                for a in item.annotations:
                    if a.type == AnnotationType.label:
                        labels.append(a)
                    elif a.type == AnnotationType.bbox:
                        bboxes.append(a)
                    elif a.type == AnnotationType.mask:
                        masks.append(a)

                if len(bboxes) != 0:
                    root_elem = ET.Element('annotation')
                    if '_' in item.id:
                        folder = item.id[:item.id.find('_')]
                    else:
                        folder = ''
                    ET.SubElement(root_elem, 'folder').text = folder
                    ET.SubElement(root_elem, 'filename').text = image_filename

                    source_elem = ET.SubElement(root_elem, 'source')
                    ET.SubElement(source_elem, 'database').text = 'Unknown'
                    ET.SubElement(source_elem, 'annotation').text = 'Unknown'
                    ET.SubElement(source_elem, 'image').text = 'Unknown'

                    if item.has_image:
                        h, w = item.image.size
                        if item.image.has_data:
                            image_shape = item.image.data.shape
                            c = 1 if len(image_shape) == 2 else image_shape[2]
                        else:
                            c = 3
                        size_elem = ET.SubElement(root_elem, 'size')
                        ET.SubElement(size_elem, 'width').text = str(w)
                        ET.SubElement(size_elem, 'height').text = str(h)
                        ET.SubElement(size_elem, 'depth').text = str(c)

                    item_segmented = 0 < len(masks)
                    ET.SubElement(root_elem, 'segmented').text = \
                        str(int(item_segmented))

                    objects_with_parts = []
                    objects_with_actions = defaultdict(dict)

                    main_bboxes = []
                    layout_bboxes = []
                    for bbox in bboxes:
                        label = self.get_label(bbox.label)
                        if self._is_part(label):
                            layout_bboxes.append(bbox)
                        elif self._is_label(label):
                            main_bboxes.append(bbox)

                    for new_obj_id, obj in enumerate(main_bboxes):
                        attr = obj.attributes

                        obj_elem = ET.SubElement(root_elem, 'object')

                        obj_label = self.get_label(obj.label)
                        ET.SubElement(obj_elem, 'name').text = obj_label

                        if 'pose' in attr:
                            pose = _convert_attr('pose', attr,
                                                 lambda v: VocPose[v],
                                                 VocPose.Unspecified)
                            ET.SubElement(obj_elem, 'pose').text = pose.name

                        if 'truncated' in attr:
                            truncated = _convert_attr('truncated', attr, int,
                                                      0)
                            ET.SubElement(obj_elem, 'truncated').text = \
                                '%d' % truncated

                        if 'difficult' in attr:
                            difficult = _convert_attr('difficult', attr, int,
                                                      0)
                            ET.SubElement(obj_elem, 'difficult').text = \
                                '%d' % difficult

                        if 'occluded' in attr:
                            occluded = _convert_attr('occluded', attr, int, 0)
                            ET.SubElement(obj_elem, 'occluded').text = \
                                '%d' % occluded

                        bbox = obj.get_bbox()
                        if bbox is not None:
                            _write_xml_bbox(bbox, obj_elem)

                        for part_bbox in filter(
                                lambda x: obj.group and obj.group == x.group,
                                layout_bboxes):
                            part_elem = ET.SubElement(obj_elem, 'part')
                            ET.SubElement(part_elem, 'name').text = \
                                self.get_label(part_bbox.label)
                            _write_xml_bbox(part_bbox.get_bbox(), part_elem)

                            objects_with_parts.append(new_obj_id)

                        label_actions = self._get_actions(obj_label)
                        actions_elem = ET.Element('actions')
                        for action in label_actions:
                            present = 0
                            if action in attr:
                                present = _convert_attr(
                                    action, attr, lambda v: int(v == True), 0)
                                ET.SubElement(actions_elem, action).text = \
                                    '%d' % present

                            objects_with_actions[new_obj_id][action] = present
                        if len(actions_elem) != 0:
                            obj_elem.append(actions_elem)

                    if self._tasks & {
                            None, VocTask.detection, VocTask.person_layout,
                            VocTask.action_classification
                    }:
                        with open(osp.join(self._ann_dir, item.id + '.xml'),
                                  'w') as f:
                            f.write(
                                ET.tostring(root_elem,
                                            encoding='unicode',
                                            pretty_print=True))

                    clsdet_list[item.id] = True
                    layout_list[item.id] = objects_with_parts
                    action_list[item.id] = objects_with_actions

                for label_ann in labels:
                    label = self.get_label(label_ann.label)
                    if not self._is_label(label):
                        continue
                    class_list = class_lists.get(item.id, set())
                    class_list.add(label_ann.label)
                    class_lists[item.id] = class_list

                    clsdet_list[item.id] = True

                if masks:
                    compiled_mask = CompiledMask.from_instance_masks(
                        masks,
                        instance_labels=[
                            self._label_id_mapping(m.label) for m in masks
                        ])

                    self.save_segm(
                        osp.join(self._segm_dir, item.id + VocPath.SEGM_EXT),
                        compiled_mask.class_mask)
                    self.save_segm(osp.join(self._inst_dir,
                                            item.id + VocPath.SEGM_EXT),
                                   compiled_mask.instance_mask,
                                   colormap=VocInstColormap)

                    segm_list[item.id] = True

                if len(item.annotations) == 0:
                    clsdet_list[item.id] = None
                    layout_list[item.id] = None
                    action_list[item.id] = None
                    segm_list[item.id] = None

                if self._tasks & {
                        None, VocTask.classification, VocTask.detection,
                        VocTask.action_classification, VocTask.person_layout
                }:
                    self.save_clsdet_lists(subset_name, clsdet_list)
                    if self._tasks & {None, VocTask.classification}:
                        self.save_class_lists(subset_name, class_lists)
                if self._tasks & {None, VocTask.action_classification}:
                    self.save_action_lists(subset_name, action_list)
                if self._tasks & {None, VocTask.person_layout}:
                    self.save_layout_lists(subset_name, layout_list)
                if self._tasks & {None, VocTask.segmentation}:
                    self.save_segm_lists(subset_name, segm_list)
Example #9
0
    def _get_annotations(self, item_id):
        item_annotations = []

        if self._task is VocTask.segmentation:
            class_mask = None
            segm_path = osp.join(self._path, VocPath.SEGMENTATION_DIR,
                                 item_id + VocPath.SEGM_EXT)
            if osp.isfile(segm_path):
                inverse_cls_colormap = \
                    self._categories[AnnotationType.mask].inverse_colormap
                class_mask = lazy_mask(segm_path, inverse_cls_colormap)

            instances_mask = None
            inst_path = osp.join(self._path, VocPath.INSTANCES_DIR,
                                 item_id + VocPath.SEGM_EXT)
            if osp.isfile(inst_path):
                instances_mask = lazy_mask(inst_path, _inverse_inst_colormap)

            if instances_mask is not None:
                compiled_mask = CompiledMask(class_mask, instances_mask)

                if class_mask is not None:
                    label_cat = self._categories[AnnotationType.label]
                    instance_labels = compiled_mask.get_instance_labels(
                        class_count=len(label_cat.items))
                else:
                    instance_labels = {
                        i: None
                        for i in range(compiled_mask.instance_count)
                    }

                for instance_id, label_id in instance_labels.items():
                    image = compiled_mask.lazy_extract(instance_id)

                    attributes = dict()
                    if label_id is not None:
                        actions = {
                            a: False
                            for a in label_cat.items[label_id].attributes
                        }
                        attributes.update(actions)

                    item_annotations.append(
                        Mask(image=image,
                             label=label_id,
                             attributes=attributes,
                             group=instance_id))
            elif class_mask is not None:
                log.warn("item '%s': has only class segmentation, "
                         "instance masks will not be available" % item_id)
                classes = class_mask.image.unique()
                for label_id in classes:
                    image = self._lazy_extract_mask(class_mask, label_id)
                    item_annotations.append(Mask(image=image, label=label_id))

        cls_annotations = self._annotations.get(VocTask.classification)
        if cls_annotations is not None and \
                self._task is VocTask.classification:
            item_labels = cls_annotations.get(item_id)
            if item_labels is not None:
                for label_id in item_labels:
                    item_annotations.append(Label(label_id))

        det_annotations = self._annotations.get(VocTask.detection)
        if det_annotations is not None:
            det_annotations = det_annotations.get(item_id)
        if det_annotations is not None:
            root_elem = ET.fromstring(det_annotations)

            for obj_id, object_elem in enumerate(root_elem.findall('object')):
                obj_id += 1
                attributes = {}
                group = obj_id

                obj_label_id = None
                label_elem = object_elem.find('name')
                if label_elem is not None:
                    obj_label_id = self._get_label_id(label_elem.text)

                obj_bbox = self._parse_bbox(object_elem)

                if obj_label_id is None or obj_bbox is None:
                    continue

                difficult_elem = object_elem.find('difficult')
                attributes['difficult'] = difficult_elem is not None and \
                    difficult_elem.text == '1'

                truncated_elem = object_elem.find('truncated')
                attributes['truncated'] = truncated_elem is not None and \
                    truncated_elem.text == '1'

                occluded_elem = object_elem.find('occluded')
                attributes['occluded'] = occluded_elem is not None and \
                    occluded_elem.text == '1'

                pose_elem = object_elem.find('pose')
                if pose_elem is not None:
                    attributes['pose'] = pose_elem.text

                point_elem = object_elem.find('point')
                if point_elem is not None:
                    point_x = point_elem.find('x')
                    point_y = point_elem.find('y')
                    point = [float(point_x.text), float(point_y.text)]
                    attributes['point'] = point

                actions_elem = object_elem.find('actions')
                actions = {a: False
                    for a in self._categories[AnnotationType.label] \
                        .items[obj_label_id].attributes}
                if actions_elem is not None:
                    for action_elem in actions_elem:
                        actions[action_elem.tag] = (action_elem.text == '1')
                for action, present in actions.items():
                    attributes[action] = present

                has_parts = False
                for part_elem in object_elem.findall('part'):
                    part = part_elem.find('name').text
                    part_label_id = self._get_label_id(part)
                    part_bbox = self._parse_bbox(part_elem)

                    if self._task is not VocTask.person_layout:
                        break
                    if part_bbox is None:
                        continue
                    has_parts = True
                    item_annotations.append(
                        Bbox(*part_bbox, label=part_label_id, group=group))

                if self._task is VocTask.person_layout and not has_parts:
                    continue
                if self._task is VocTask.action_classification and not actions:
                    continue

                item_annotations.append(
                    Bbox(*obj_bbox,
                         label=obj_label_id,
                         attributes=attributes,
                         id=obj_id,
                         group=group))

        return item_annotations