Ejemplo n.º 1
0
 def test_xml_parse(self):
     self.bk.manifest_iter.side_effect = lambda: bk_manifest_iter([(
         'media_overlays1', 'file_href1', 'application/smil+xml')])
     self.bk.text_iter.side_effect = lambda: bk_text_iter([])
     collector = core.XHTMLAttributes()
     core.parse_xml(self.bk, collector, self.prefs)
     self.assertEqual(collector.class_names, set())
     self.assertEqual(collector.literal_class_values, set())
     self.assertEqual(collector.id_values, set())
     self.assertEqual(
         collector.fragment_identifier, {
             'ch3_figure1', 'ch3_figure1_title', 'ch3_figure1_caption',
             'ch3_figure1_text1', 'ch3_figure1_text2'
         })
     self.assertEqual(collector.info_class_names, {})
     self.assertEqual(collector.info_id_values, {})
Ejemplo n.º 2
0
def _load_pascal_annotations(_abs_xml_path, _args):
    """
    Parse annotations from XML file in Pascal VOC format
    :param _abs_xml_path: absolute path of xml file
    :param _args: input args
    :return: objects, type=dict, for example {'name': [class1, class2, ...], 'area': [area1, area2, ...]}
    """

    objects = dict()
    objects.setdefault('name', [])
    objects.setdefault('area', [])

    annotaion = parse_xml(_abs_xml_path)

    objs = annotaion['annotation']
    if len(objs) == 0:
        tqdm.write(" There is no objects in xml : %s" % os.path.basename(_abs_xml_path))

    for obj in objs:
        objects['name'].append(obj['category'])
        objects['area'].append(obj['area'])

    return objects
Ejemplo n.º 3
0
def _load_pascal_annotations(_abs_xml_path, _args):
    """
    Parse annotations from XML file in Pascal VOC format
    :param _abs_xml_path: absolute path of xml file
    :param _args: input args
    :return: objects, type=dict, for example {'name': [class1, class2, ...], 'area': [area1, area2, ...]}
    """

    objects = dict()
    objects.setdefault('name', [])
    objects.setdefault('area', [])

    #annotaion = parse_xml(_abs_xml_path)
    #annotaion=dict()
    temp = parse_xml(_abs_xml_path)
    if temp is not None:
        annotaion = temp
    else:
        return None

    objs = annotaion['annotation']
    if len(objs) == 0:
        tqdm.write(" There is no objects in xml : %s" %
                   os.path.basename(_abs_xml_path))

    for obj in objs:
        #hl->以下四行用来过滤标注不正确的xml文件
        # if obj['category'] == 'unknow' or obj['category'] == 'Copy of bus tail' or obj['category'] == 'Copy of car tail' \
        #  or obj['category'] == 'Copy of car head'  or obj['category'] == 'high_60' or obj['category'] == 'high_40':
        #     print(f"\n{_abs_xml_path} is {obj['category']}")
        #     return None

        objects['name'].append(obj['category'])
        objects['area'].append(obj['area'])

    return objects
Ejemplo n.º 4
0
if __name__ == '__main__':
    args = parse_args()

    src_path = args.org_path
    dst_path = args.save_path

    xmls = [item for item in os.listdir(src_path) if item.endswith('.xml')]

    for idx in tqdm(range(len(xmls)), ncols=100):
        xml_name = xmls[idx]

        abs_src_xml_path = os.path.join(src_path, xml_name)
        abs_dst_xml_path = os.path.join(dst_path, xml_name)
        abs_image_path = os.path.join(args.image_path, "%s.png" % os.path.splitext(xml_name)[0])
        annotation = parse_xml(abs_src_xml_path)

        image_des = annotation['image']

        if not os.path.exists(abs_image_path):
            tqdm.write("Non existed pic path: %s" % abs_image_path)
            continue

        cv_image = cv2.imread(abs_image_path)
        image_height, image_width= cv_image.shape[:2]

        if image_height != image_des['height'] or image_width != image_des['width']:
            image_des['height'] = image_height
            image_des['width'] = image_width

            tqdm.write("Invalid image width/height found in: %s" % xml_name)
Ejemplo n.º 5
0
    def convert(self):
        # 遍历所有的XML文件
        bnd_id = 1
        image_id = 1

        for xml_idx in tqdm(range(len(self.m_xml_index)),
                            ncols=100,
                            desc="VOC2COCO"):
            xml = self.m_xml_index[xml_idx]
            abs_xml_path = os.path.join(self.m_xml_path, "%s.xml" % xml)
            if not os.path.exists(abs_xml_path):
                raise ValueError("Non existed xml path: %s" % abs_xml_path)

            annotaion = parse_xml(abs_xml_path, "png")
            print(abs_xml_path + '\n')
            image_des = annotaion['image']
            image_des['id'] = image_id

            abs_image_path = os.path.join(self.m_pics_path,
                                          image_des['file_name'])
            if not os.path.exists(abs_image_path):
                tqdm.write("Non existed pic path: %s" % abs_image_path)
                continue

            self.m_json_dict['images'].append(image_des)

            # TODO: Support segmentation. Currently we do not support segmentation.
            objs = annotaion['annotation']
            if len(objs) == 0:
                tqdm.write(" There is no objects in xml : %s" %
                           os.path.basename(xml))

            for obj in objs:

                # 所有标签均以小写格式保存,兼容xml中出现大写字母的情况
                category = obj['category'].lower()
                if category == 'prohibit':
                    category = 'limitspeed'

                # 排除不需要的标签
                if category not in self.m_categories:
                    continue
                '''
                if obj['area'] < 40*40:
                    print('area')
                    continue
                '''

                category_id = self.m_categories[category]
                obj_annotation = {
                    'area': obj['area'],
                    'iscrowd': 0,
                    'bbox': obj['bbox'],
                    'category_id': category_id,
                    'id': bnd_id,
                    'ignore': 0,
                    'segmentation': [],
                    'image_id': image_id
                }
                self.m_json_dict['annotations'].append(obj_annotation)

                bnd_id += 1

            image_id += 1
        # category
        for category, category_id in self.m_categories.items():
            #ignore background
            if category == 'background':
                continue
            self.m_json_dict['categories'].append({
                'supercategory':
                category.split(' ')[0],
                'id':
                category_id,
                'name':
                category
            })

        # save json
        with open(self.m_save_path, 'w') as fjson:  ##todo

            json.dump(self.m_json_dict, fjson)
Ejemplo n.º 6
0
    xml_list = mmcv.list_from_file(args.image_set)
    xml_list = list(map(lambda x: x + '.xml', xml_list))

    mismatched_nums = 0
    # 遍历xml 列表
    for i in tqdm(range(len(xml_list)), ncols=100, desc='Merging '):
        xml = xml_list[i]
        abs_src_xml_path1 = os.path.join(args.anno_path1, xml)
        abs_src_xml_path2 = os.path.join(args.anno_path2, xml)

        if not (os.path.exists(abs_src_xml_path1)
                and os.path.exists(abs_src_xml_path2)):
            mismatched_nums += 1
            tqdm.write("**** Mismatched xml: %s" % xml)
            continue

        src_anno1 = parse_xml(abs_src_xml_path1)
        src_anno2 = parse_xml(abs_src_xml_path2)

        # # check whether image is same
        # assert src_anno1['image']['file_name'] == src_anno2['image']['file_name'] \
        #        and src_anno1['image']['width'] == src_anno2['image']['width'] \
        #        and src_anno1['image']['height'] == src_anno2['image']['height'], "Mismatched xml %s" % xml

        dst_anno = src_anno1.copy()
        dst_anno['annotation'] += src_anno2['annotation']

        abs_dst_xml_path = os.path.join(args.save_path, xml)
        dump_xml(dst_anno, abs_dst_xml_path)

    print("There are %d mismatched xmls in total!!!" % mismatched_nums)