Beispiel #1
0
 def load_data(self, data_path: str) -> list:
     """
     从json文件中读取出 文本行的坐标和gt,字符的坐标和gt
     :param data_path:
     :return:
     """
     data_list = []
     for path in data_path:
         content = load(path)
         for gt in tqdm(content['data_list'], desc='read file {}'.format(path)):
             img_path = os.path.join(content['data_root'], gt['img_name'])
             polygons = []
             texts = []
             illegibility_list = []
             language_list = []
             for annotation in gt['annotations']:
                 if len(annotation['polygon']) == 0 or len(annotation['text']) == 0:
                     continue
                 if len(annotation['text']) > 1 and self.expand_one_char:
                     annotation['polygon'] = expand_polygon(annotation['polygon'])
                 polygons.append(annotation['polygon'])
                 texts.append(annotation['text'])
                 illegibility_list.append(annotation['illegibility'])
                 language_list.append(annotation['language'])
                 if self.load_char_annotation:
                     for char_annotation in annotation['chars']:
                         if len(char_annotation['polygon']) == 0 or len(char_annotation['char']) == 0:
                             continue
                         polygons.append(char_annotation['polygon'])
                         texts.append(char_annotation['char'])
                         illegibility_list.append(char_annotation['illegibility'])
                         language_list.append(char_annotation['language'])
             data_list.append({'img_path': img_path, 'img_name': gt['img_name'], 'text_polys': np.array(polygons),
                               'texts': texts, 'ignore_tags': illegibility_list})
     return data_list
Beispiel #2
0
    def load_data(self, data_path: str) -> list:
        """
        从json文件中读取出 文本行的坐标和gt,字符的坐标和gt
        :param data_path:
        :return:
        """
        data_list = []
        json_list, image_list = self._check_json(data_path[0])
        for json_path, img_path in tqdm(
                zip(json_list, image_list),
                desc='search file in {}'.format(data_path)):
            polygons = []
            texts = []
            illegibility_list = []
            language_list = []

            info = json.load(open(json_path, 'r'))
            for i, obj in enumerate(info['shapes']):
                poly = obj['points']
                if len(poly) == 2:
                    [xmin, ymin], [xmax, ymax] = poly[0], poly[1]
                    poly = [[xmin, ymin], [xmax, ymin], [xmax, ymax],
                            [xmin, ymax]]

                text = obj['label']
                if len(poly) == 0 or len(text) == 0:
                    continue
                if len(text) > 1 and self.expand_one_char:
                    poly = expand_polygon(poly)
                polygons.append(poly)
                texts.append(text)
                illegibility_list.append(False)
                language_list.append('EN')
                # if self.load_char_annotation:
                #     for char_annotation in annotation['chars']:
                #         if len(char_annotation['polygon']) == 0 or len(char_annotation['char']) == 0:
                #             continue
                #         polygons.append(char_annotation['polygon'])
                #         texts.append(char_annotation['char'])
                #         illegibility_list.append(char_annotation['illegibility'])
                #         language_list.append(char_annotation['language'])
            data_list.append({
                'img_path': img_path,
                'img_name': os.path.basename(img_path),
                'text_polys': np.array(polygons),
                'texts': texts,
                'ignore_tags': illegibility_list
            })
        print('Got {} useful samples!'.format(len(data_list)))
        return data_list