def crop(save_gt_path, json_path, save_path): if os.path.exists(save_path): shutil.rmtree(save_path, ignore_errors=True) os.makedirs(save_path, exist_ok=True) data = load_gt(json_path) file_list = [] for img_path, gt in tqdm(data.items()): img = Image.open(img_path).convert('RGB') img_name = pathlib.Path(img_path).stem for i, (polygon, text, illegibility, language) in enumerate( zip(gt['polygons'], gt['texts'], gt['illegibility_list'], gt['language_list'])): if illegibility: continue polygon = np.array(polygon) roi_img_save_path = os.path.join(save_path, '{}_{}.jpg'.format(img_name, i)) # 对于只有四个点的图片,反射变换后存储 if len(polygon) == 4: np_img = np.asarray(img) roi_img = four_point_transform(np_img, polygon) roi_img = Image.fromarray(roi_img).convert('RGB') else: x_min = polygon[:, 0].min() x_max = polygon[:, 0].max() y_min = polygon[:, 1].min() y_max = polygon[:, 1].max() roi_img = img.crop((x_min, y_min, x_max, y_max)) roi_img.save(roi_img_save_path) file_list.append(roi_img_save_path + '\t' + text + '\t' + language) # plt.title(text) # plt.imshow(roi_img) # plt.show() save(file_list, save_gt_path)
# -*- coding: utf-8 -*- # @Time : 2020/3/20 20:33 # @Author : zhoujun """ 用于检查生成的json文件有没有问题 """ from PIL import Image from tqdm import tqdm from matplotlib import pyplot as plt from convert.utils import show_bbox_on_image, load_gt if __name__ == '__main__': json_path = r'D:\dataset\自然场景文字检测挑战赛初赛数据\验证集\validation_new.json' data = load_gt(json_path) for img_path, gt in tqdm(data.items()): # print(gt['illegibility_list']) # print(gt['texts']) img = Image.open(img_path) img = show_bbox_on_image(img, gt['polygons'], gt['texts']) plt.imshow(img) plt.show()
env.close() print('Created dataset with %d samples' % nSamples) def show_demo(demo_number, image_path_list, label_list): print('\nShow some demo to prevent creating wrong lmdb data') print( 'The first line is the path to image and the second line is the image label' ) for i in range(demo_number): print('image: %s\nlabel: %s\n' % (image_path_list[i], label_list[i])) if __name__ == '__main__': parser = argparse.ArgumentParser() # parser.add_argument('--out', type = str, required = True, help = 'lmdb data output path') parser.add_argument( '--json_path', type=str, default='E:\\zj\\dataset\\icdar2015 (2)\\detection\\test.json', help='path to gt json') parser.add_argument('--save_floder', type=str, default=r'E:\zj\dataset\icdar2015 (2)', help='path to save lmdb') args = parser.parse_args() data_dict = load_gt(args.json_path) out_lmdb = os.path.join(args.save_floder, 'train') createDataset(out_lmdb, data_dict, map_size=79951162)