def crop(save_gt_path, json_path, save_path): if os.path.exists(save_path): shutil.rmtree(save_path, ignore_errors=True) os.makedirs(save_path, exist_ok=True) data = load_gt(json_path) file_list = [] for img_path, gt in tqdm(data.items()): img = Image.open(img_path).convert('RGB') img_name = pathlib.Path(img_path).stem for i, (polygon, text, illegibility, language) in enumerate( zip(gt['polygons'], gt['texts'], gt['illegibility_list'], gt['language_list'])): if illegibility: continue polygon = np.array(polygon) roi_img_save_path = os.path.join(save_path, '{}_{}.jpg'.format(img_name, i)) # 对于只有四个点的图片,反射变换后存储 if len(polygon) == 4: np_img = np.asarray(img) roi_img = four_point_transform(np_img, polygon) roi_img = Image.fromarray(roi_img).convert('RGB') else: x_min = polygon[:, 0].min() x_max = polygon[:, 0].max() y_min = polygon[:, 1].min() y_max = polygon[:, 1].max() roi_img = img.crop((x_min, y_min, x_max, y_max)) roi_img.save(roi_img_save_path) file_list.append(roi_img_save_path + '\t' + text + '\t' + language) # plt.title(text) # plt.imshow(roi_img) # plt.show() save(file_list, save_gt_path)
def cvt_det(gt_path, save_path, img_folder): """ 将icdar2015格式的gt转换为json格式 :param gt_path: :param save_path: :return: """ gt_dict = {'data_root': img_folder} data_list = [] origin_gt = load(gt_path) for img_name, gt in tqdm(origin_gt.items()): cur_gt = {'img_name': img_name + '.jpg', 'annotations': []} for line in gt: cur_line_gt = {'polygon': [], 'text': '', 'illegibility': False, 'language': 'Latin'} chars_gt = [{'polygon': [], 'char': '', 'illegibility': False, 'language': 'Latin'}] cur_line_gt['chars'] = chars_gt # 字符串级别的信息 cur_line_gt['polygon'] = line['points'] cur_line_gt['text'] = line['transcription'] cur_line_gt['illegibility'] = line['illegibility'] cur_line_gt['language'] = line['language'] cur_gt['annotations'].append(cur_line_gt) data_list.append(cur_gt) gt_dict['data_list'] = data_list save(gt_dict, save_path)
def cvt(gt_path, save_path, img_folder): """ 将icdar2015格式的gt转换为json格式 :param gt_path: :param save_path: :return: """ gt_dict = {'data_root': img_folder} data_list = [] for file_path in tqdm(get_file_list(gt_path, p_postfix=['.txt'])): content = load(file_path) file_path = pathlib.Path(file_path) img_name = file_path.name.replace('.txt', '.jpg') cur_gt = {'img_name': img_name, 'annotations': []} for line in content: cur_line_gt = {'polygon': [], 'text': '', 'illegibility': False, 'language': 'Latin'} chars_gt = [{'polygon': [], 'char': '', 'illegibility': False, 'language': 'Latin'}] cur_line_gt['chars'] = chars_gt line = line.split(',') # 字符串级别的信息 x1, y1, x2, y2, x3, y3, x4, y4 = list(map(float, line[:8])) cur_line_gt['polygon'] = [[x1, y1], [x2, y2], [x3, y3], [x4, y4]] cur_line_gt['text'] = line[-1] cur_line_gt['illegibility'] = True if cur_line_gt['text'] == '*' or cur_line_gt['text'] == '###' else False cur_gt['annotations'].append(cur_line_gt) data_list.append(cur_gt) gt_dict['data_list'] = data_list save(gt_dict, save_path)
def cvt(gt_path, save_path, imgs_folder): gt_dict = {'data_root': imgs_folder} data_list = [] ct = load(gt_path) for img_id, anns in tqdm(ct.items()): img_name = img_id.replace('gt', 'img') + '.jpg' cur_gt = {'img_name': img_name, 'annotations': []} for ann in anns: cur_line_gt = { 'polygon': [], 'text': '', 'illegibility': False, 'language': 'Latin' } chars_gt = [{ 'polygon': [], 'char': '', 'illegibility': False, 'language': 'Latin' }] cur_line_gt['chars'] = chars_gt cur_line_gt['polygon'] = ann['points'] cur_line_gt['illegibility'] = ann['illegibility'] cur_gt['annotations'].append(cur_line_gt) if len(cur_gt['annotations']) > 0: data_list.append(cur_gt) gt_dict['data_list'] = data_list save(gt_dict, save_path) print(len(gt_dict), len(data_list))
def cvt_rec(gt_path, save_path, img_folder): origin_gt = load(gt_path) file_list = [] for img_name, gt in tqdm(origin_gt.items()): assert len(gt) == 1 gt = gt[0] img_path = os.path.join(img_folder, img_name + '.jpg') file_list.append(img_path + '\t' + gt['transcription'] + '\t' + gt['language']) save(file_list, save_path)
def cvt(gt_path, save_path, img_folder): content = load(gt_path) file_list = [] for line in tqdm(content): line = line.split('\t') img_path = os.path.join(img_folder, line[-2]) if not os.path.exists(img_path): print(img_path) file_list.append(img_path + '\t' + line[-1] + '\t' + 'Chinese') # img = Image.open(img_path) # plt.title(line[-1]) # plt.imshow(img) # plt.show() save(file_list, save_path)
def cvt(gt_path, save_path, img_folder): content = load(gt_path) file_list = [] for i, line in tqdm(enumerate(content)): try: line = line.split('.jpg ') img_path = os.path.join(img_folder, line[-2]) file_list.append(img_path + '.jpg' + '\t' + line[-1] + '\t' + 'Chinese') # img = Image.open(img_path) # plt.title(line[-1]) # plt.imshow(img) # plt.show() except: a = 1 save(file_list, save_path)
def cvt(gt_path, save_path, img_folder): content = load(gt_path) file_list = [] for line in tqdm(content): img_relative_path = line.split(' ')[0] img_path = os.path.join(img_folder, img_relative_path) img_path = pathlib.Path(img_path) label = img_path.stem.split('_')[1] if not img_path.exists(): print(img_path) file_list.append(str(img_path) + '\t' + label + '\t' + 'English') # img = Image.open(img_path) # plt.title(label) # plt.imshow(img) # plt.show() save(file_list, save_path)
def cvt(gt_path, save_path, imgs_folder): gt_dict = {'data_root': imgs_folder} data_list = [] ct = COCO_Text(gt_path) train_img_ids = ct.getImgIds(imgIds=ct.val) for img_id in tqdm(train_img_ids): img = ct.loadImgs(img_id)[0] # img_path = os.path.join(imgs_folder, img['file_name']) # if not os.path.exists(img_path): # continue cur_gt = {'img_name': img['file_name'], 'annotations': []} annIds = ct.getAnnIds(imgIds=img['id']) anns = ct.loadAnns(annIds) for ann in anns: if len(ann['utf8_string']) == 0: continue cur_line_gt = { 'polygon': [], 'text': '', 'illegibility': False, 'language': 'Latin' } chars_gt = [{ 'polygon': [], 'char': '', 'illegibility': False, 'language': 'Latin' }] cur_line_gt['chars'] = chars_gt cur_line_gt['language'] = ann['language'] chars_gt[0]['language'] = ann['language'] cur_line_gt['polygon'] = np.array(ann['mask']).reshape(-1, 2).tolist() cur_line_gt['text'] = ann['utf8_string'] cur_line_gt['illegibility'] = True if ann[ 'legibility'] == "illegible" else False cur_gt['annotations'].append(cur_line_gt) if len(cur_gt['annotations']) > 0: data_list.append(cur_gt) gt_dict['data_list'] = data_list save(gt_dict, save_path) print(len(gt_dict), len(data_list))
def cvt(self): gt_dict = {'data_root': self.img_folder} data_list = [] pbar = tqdm(total=len(self.imageNames)) for imageName, wordBBoxes, texts in zip(self.imageNames, self.wordBBoxes, self.transcripts): wordBBoxes = np.expand_dims( wordBBoxes, axis=2) if (wordBBoxes.ndim == 2) else wordBBoxes _, _, numOfWords = wordBBoxes.shape text_polys = wordBBoxes.reshape([8, numOfWords], order='F').T # num_words * 8 text_polys = text_polys.reshape(numOfWords, 4, 2) # num_of_words * 4 * 2 transcripts = [word for line in texts for word in line.split()] if numOfWords != len(transcripts): continue cur_gt = {'img_name': imageName, 'annotations': []} for polygon, text in zip(text_polys, transcripts): cur_line_gt = { 'polygon': [], 'text': '', 'illegibility': False, 'language': 'Latin' } chars_gt = [{ 'polygon': [], 'char': '', 'illegibility': False, 'language': 'Latin' }] cur_line_gt['chars'] = chars_gt cur_line_gt['text'] = text cur_line_gt['polygon'] = polygon.tolist() cur_line_gt['illegibility'] = text in ['###', '*'] cur_gt['annotations'].append(cur_line_gt) data_list.append(cur_gt) pbar.update(1) pbar.close() gt_dict['data_list'] = data_list save(gt_dict, save_path)
def cvt(gt_path, save_path, img_folder): """ 将icdar2015格式的gt转换为json格式 :param gt_path: :param save_path: :return: """ gt_dict = {'data_root': img_folder} data_list = [] for file_path in tqdm(get_file_list(gt_path, p_postfix=['.json'])): content = load(file_path) file_path = pathlib.Path(file_path) img_name = file_path.stem + '.jpg' cur_gt = {'img_name': img_name, 'annotations': []} char_polygon_list, char_illegibility_list, char_text_list = decode_chars( content['chars']) for line in content['lines']: cur_line_gt = { 'polygon': [], 'text': '', 'illegibility': False, 'language': 'Latin' } chars_gt = [{ 'polygon': [], 'char': '', 'illegibility': False, 'language': 'Latin' }] cur_line_gt['chars'] = chars_gt # 字符串级别的信息 cur_line_gt['polygon'] = np.array(line['points']).reshape( -1, 2).tolist() cur_line_gt['text'] = line['transcription'] cur_line_gt[ 'illegibility'] = True if line['ignore'] == 1 else False str_len = len(line['transcription']) # 字符信息 flag = False for char_idx in range(len(char_polygon_list)): for str_idx in range(1, str_len + 1): if ''.join( char_text_list[char_idx:char_idx + str_idx]) == line['transcription']: chars_gt = [] for j in range(char_idx, char_idx + str_idx): chars_gt.append({ 'polygon': char_polygon_list[j], 'char': char_text_list[j], 'illegibility': char_illegibility_list[j], 'language': 'Latin' }) cur_line_gt['chars'] = chars_gt char_polygon_list = char_polygon_list[char_idx + str_len:] char_text_list = char_text_list[char_idx + str_len:] char_illegibility_list = char_illegibility_list[ char_idx + str_len:] flag = True break if flag: break cur_gt['annotations'].append(cur_line_gt) data_list.append(cur_gt) gt_dict['data_list'] = data_list save(gt_dict, save_path)