コード例 #1
0
def cvt(gt_path, save_path, img_folder):
    """
    将icdar2015格式的gt转换为json格式
    :param gt_path:
    :param save_path:
    :return:
    """
    gt_dict = {'data_root': img_folder}
    data_list = []
    for file_path in tqdm(get_file_list(gt_path, p_postfix=['.txt'])):
        content = load(file_path)
        file_path = pathlib.Path(file_path)
        img_name = file_path.name.replace('.txt', '.jpg')
        cur_gt = {'img_name': img_name, 'annotations': []}
        for line in content:
            cur_line_gt = {'polygon': [], 'text': '', 'illegibility': False, 'language': 'Latin'}
            chars_gt = [{'polygon': [], 'char': '', 'illegibility': False, 'language': 'Latin'}]
            cur_line_gt['chars'] = chars_gt
            line = line.split(',')
            # 字符串级别的信息
            x1, y1, x2, y2, x3, y3, x4, y4 = list(map(float, line[:8]))
            cur_line_gt['polygon'] = [[x1, y1], [x2, y2], [x3, y3], [x4, y4]]
            cur_line_gt['text'] = line[-1]
            cur_line_gt['illegibility'] = True if cur_line_gt['text'] == '*' or cur_line_gt['text'] == '###' else False
            cur_gt['annotations'].append(cur_line_gt)
        data_list.append(cur_gt)
    gt_dict['data_list'] = data_list
    save(gt_dict, save_path)
コード例 #2
0
def cvt(gt_path, save_path, img_folder):
    """
    将icdar2015格式的gt转换为json格式
    :param gt_path:
    :param save_path:
    :return:
    """
    gt_dict = {'data_root': img_folder}
    data_list = []
    for file_path in tqdm(get_file_list(gt_path, p_postfix=['.json'])):
        content = load(file_path)
        file_path = pathlib.Path(file_path)
        img_name = file_path.stem + '.jpg'
        cur_gt = {'img_name': img_name, 'annotations': []}
        char_polygon_list, char_illegibility_list, char_text_list = decode_chars(
            content['chars'])
        for line in content['lines']:
            cur_line_gt = {
                'polygon': [],
                'text': '',
                'illegibility': False,
                'language': 'Latin'
            }
            chars_gt = [{
                'polygon': [],
                'char': '',
                'illegibility': False,
                'language': 'Latin'
            }]
            cur_line_gt['chars'] = chars_gt
            # 字符串级别的信息
            cur_line_gt['polygon'] = np.array(line['points']).reshape(
                -1, 2).tolist()
            cur_line_gt['text'] = line['transcription']
            cur_line_gt[
                'illegibility'] = True if line['ignore'] == 1 else False
            str_len = len(line['transcription'])
            # 字符信息
            flag = False
            for char_idx in range(len(char_polygon_list)):
                for str_idx in range(1, str_len + 1):
                    if ''.join(
                            char_text_list[char_idx:char_idx +
                                           str_idx]) == line['transcription']:
                        chars_gt = []
                        for j in range(char_idx, char_idx + str_idx):
                            chars_gt.append({
                                'polygon':
                                char_polygon_list[j],
                                'char':
                                char_text_list[j],
                                'illegibility':
                                char_illegibility_list[j],
                                'language':
                                'Latin'
                            })
                        cur_line_gt['chars'] = chars_gt
                        char_polygon_list = char_polygon_list[char_idx +
                                                              str_len:]
                        char_text_list = char_text_list[char_idx + str_len:]
                        char_illegibility_list = char_illegibility_list[
                            char_idx + str_len:]
                        flag = True
                        break
                if flag:
                    break
            cur_gt['annotations'].append(cur_line_gt)
        data_list.append(cur_gt)
    gt_dict['data_list'] = data_list
    save(gt_dict, save_path)
コード例 #3
0
# -*- coding: utf-8 -*-
# @Time    : 2020/3/21 10:37
# @Author  : zhoujun
"""
用于将图片统一转换为jpg
"""
import os
import pathlib
from tqdm import tqdm
from convert.utils import get_file_list

if __name__ == '__main__':
    img_folder = r'D:\dataset\mlt2019\detection\imgs'
    for img_path in tqdm(get_file_list(img_folder, p_postfix=['.*'])):
        img_path = pathlib.Path(img_path)
        save_path = img_path.parent / (img_path.stem + '.jpg')
        if img_path != save_path:
            os.rename(img_path, save_path)