def main(args): # 加载图片 image, image_meta, _, _ = image_utils.load_image_gt( np.random.randint(10), args.image_path, config.IMAGE_SHAPE[0], None) # 加载模型 config.IMAGES_PER_GPU = 1 m = models.ctpn_net(config, 'test') if args.weight_path is not None: m.load_weights(args.weight_path) else: m.load_weights(config.WEIGHT_PATH) # m.summary() # 模型预测 text_boxes, text_scores, _ = m.predict(np.array([image])) text_boxes = np_utils.remove_pad(text_boxes[0]) text_scores = np_utils.remove_pad(text_scores[0])[:, 0] # 文本行检测器 image_meta = image_utils.parse_image_meta(image_meta) detector = TextDetector(config) text_lines = detector.detect(text_boxes, text_scores, config.IMAGE_SHAPE, image_meta['window']) # print("text_lines:{}".format(text_lines)) boxes_num = 15 fig = plt.figure(figsize=(16, 16)) ax = fig.add_subplot(1, 1, 1) visualize.display_polygons(image, text_lines[:boxes_num, :8], text_lines[:boxes_num, 8], ax=ax) fig.savefig('examples.{}.png'.format(np.random.randint(10)))
def __init__(self): config.USE_SIDE_REFINE = True config.IMAGES_PER_GPU = 1 config.IMAGE_SHAPE = (1024, 1024, 3) self.m = models.ctpn_net(config, 'test') self.m.load_weights(config.WEIGHT_PATH, by_name=True) self.m.summary()
def main(args): # 覆盖参数 rewrite the parameters config.USE_SIDE_REFINE = bool(args.use_side_refine) if args.weight_path is not None: config.WEIGHT_PATH = args.weight_path config.IMAGES_PER_GPU = 1 config.IMAGE_SHAPE = (1024, 1024, 3) # 加载图片 load images image, image_meta, _, _ = image_utils.load_image_gt(np.random.randint(10), args.image_path, config.IMAGE_SHAPE[0], None) # 加载模型 load the model m = models.ctpn_net(config, 'test') m.load_weights(config.WEIGHT_PATH, by_name=True) # m.summary() # 模型预测 prediction text_boxes, text_scores, _ = m.predict([np.array([image]), np.array([image_meta])]) text_boxes = np_utils.remove_pad(text_boxes[0]) text_scores = np_utils.remove_pad(text_scores[0])[:, 0] # 文本行检测器 text detector image_meta = image_utils.parse_image_meta(image_meta) detector = TextDetector(config) text_lines = detector.detect(text_boxes, text_scores, config.IMAGE_SHAPE, image_meta['window']) # 可视化保存图像 saving the images boxes_num = 30 fig = plt.figure(figsize=(16, 16)) ax = fig.add_subplot(1, 1, 1) visualize.display_polygons(image, text_lines[:boxes_num, :8], text_lines[:boxes_num, 8], ax=ax) image_name = os.path.basename(args.image_path) fig.savefig('{}.{}.jpg'.format(os.path.splitext(image_name)[0], int(config.USE_SIDE_REFINE)))
def main(args): # 覆盖参数 config.USE_SIDE_REFINE = bool(args.use_side_refine) if args.weight_path is None: args.weight_path = config.WEIGHT_PATH config.IMAGES_PER_GPU = 1 config.IMAGE_SHAPE = (720, 720, 3) config.set_root(args.root) try: os.makedirs(args.output_dir) except: pass # 图像路径 image_path_list = file_utils.get_sub_files(args.root) # 加载模型 m = models.ctpn_net(config, 'test') m.load_weights(args.weight_path, by_name=True) # 预测 start_time = datetime.datetime.now() gen = generator(image_path_list, config.IMAGE_SHAPE) text_boxes, text_scores, image_metas = m.predict_generator( generator=gen, steps=len(image_path_list), use_multiprocessing=True) end_time = datetime.datetime.now() print("======完成{}张图像评估,耗时:{} 秒".format(len(image_path_list), end_time - start_time)) # 去除padding text_boxes = [np_utils.remove_pad(text_box) for text_box in text_boxes] text_scores = [ np_utils.remove_pad(text_score)[:, 0] for text_score in text_scores ] image_metas = image_utils.batch_parse_image_meta(image_metas) # 文本行检测 detector = TextDetector(config) text_lines = [ detector.detect(boxes, scores, config.IMAGE_SHAPE, window) for boxes, scores, window in zip( text_boxes, text_scores, image_metas["window"]) ] # 还原检测文本行边框到原始图像坐标 text_lines = [ image_utils.recover_detect_quad(boxes, window, scale) for boxes, window, scale in zip(text_lines, image_metas["window"], image_metas["scale"]) ] # 写入文档中 for image_path, boxes in zip(image_path_list, text_lines): output_filename = os.path.splitext( 'res_' + os.path.basename(image_path))[0] + '.txt' with open(os.path.join(args.output_dir, output_filename), mode='w') as f: for box in boxes.astype(np.int32): f.write("{},{},{},{},{},{},{},{}\r\n".format( box[0], box[1], box[2], box[3], box[4], box[5], box[6], box[7]))
def main(args): # 覆盖参数 rewrite the parameters config.USE_SIDE_REFINE = bool(args.use_side_refine) if args.weight_path is not None: config.WEIGHT_PATH = args.weight_path config.IMAGES_PER_GPU = 1 config.IMAGE_SHAPE = (1024, 1024, 3) # 图像路径 path of the images image_path_list = file_utils.get_sub_files(args.image_dir) # 加载模型 load the model m = models.ctpn_net(config, 'test') m.load_weights(config.WEIGHT_PATH, by_name=True) # 预测 predicting start_time = datetime.datetime.now() gen = generator(image_path_list, config.IMAGE_SHAPE) text_boxes, text_scores, image_metas = m.predict_generator( generator=gen, steps=len(image_path_list), use_multiprocessing=True) end_time = datetime.datetime.now() print("====== Image No.{}:Evaluation completed,time:{} second".format( len(image_path_list), end_time - start_time)) # 去除padding removing the padding text_boxes = [np_utils.remove_pad(text_box) for text_box in text_boxes] text_scores = [ np_utils.remove_pad(text_score)[:, 0] for text_score in text_scores ] image_metas = image_utils.batch_parse_image_meta(image_metas) # 文本行检测 text Dector detector = TextDetector(config) text_lines = [ detector.detect(boxes, scores, config.IMAGE_SHAPE, window) for boxes, scores, window in zip( text_boxes, text_scores, image_metas["window"]) ] # 还原检测文本行边框到原始图像坐标 restore the text boundary to the original image text_lines = [ image_utils.recover_detect_quad(boxes, window, scale) for boxes, window, scale in zip(text_lines, image_metas["window"], image_metas["scale"]) ] # 写入文档中 writing into a txt file for image_path, boxes in zip(image_path_list, text_lines): output_filename = os.path.splitext( 'res_' + os.path.basename(image_path))[0] + '.txt' with open(os.path.join(args.output_dir, output_filename), mode='w') as f: for box in boxes.astype(np.int32): f.write("{},{},{},{},{},{},{},{}\r\n".format( box[0], box[1], box[2], box[3], box[4], box[5], box[6], box[7]))
def main(args): set_gpu_growth() # 加载标注 annotation_files = file_utils.get_sub_files(config.IMAGE_GT_DIR) image_annotations = [ reader.load_annotation(file, config.IMAGE_DIR) for file in annotation_files ] # 过滤不存在的图像,ICDAR2017中部分图像找不到 image_annotations = [ ann for ann in image_annotations if os.path.exists(ann['image_path']) ] # 加载模型 m = models.ctpn_net(config, 'train') models.compile(m, config, loss_names=['ctpn_regress_loss', 'ctpn_class_loss']) if args.init_epochs > 0: m.load_weights(args.weight_path, by_name=True) else: m.load_weights(config.PRE_TRAINED_WEIGHT, by_name=True) m.summary() # 生成器 gen = generator(image_annotations, config.IMAGES_PER_GPU, config.IMAGE_SHAPE, config.ANCHORS_WIDTH, config.MAX_GT_INSTANCES) # 训练 m.fit_generator(gen, steps_per_epoch=len(image_annotations) // config.IMAGES_PER_GPU, epochs=args.epochs, initial_epoch=args.init_epochs, verbose=True, callbacks=get_call_back(), use_multiprocessing=True) # 保存模型 m.save(config.WEIGHT_PATH) if __name__ == '__main__': parse = argparse.ArgumentParser() parse.add_argument("--epochs", type=int, default=50, help="epochs") parse.add_argument("--init_epochs", type=int, default=0, help="epochs") parse.add_argument("--weight_path", type=str, default=None, help="weight path") argments = parse.parse_args(sys.argv[1:]) main(argments)
def main(args): set_gpu_growth() # 加载标注 load the annotations annotation_files = file_utils.get_sub_files(config.IMAGE_GT_DIR) image_annotations = [ reader.load_annotation(file, config.IMAGE_DIR) for file in annotation_files ] # 过滤不存在的图像,ICDAR2017中部分图像找不到 remove the missing images image_annotations = [ ann for ann in image_annotations if os.path.exists(ann['image_path']) ] # 加载模型 load the model m = models.ctpn_net(config, 'train') models.compile(m, config, loss_names=[ 'ctpn_regress_loss', 'ctpn_class_loss', 'side_regress_loss' ]) # 增加度量 increasing the metrics output = models.get_layer(m, 'ctpn_target').output models.add_metrics( m, ['gt_num', 'pos_num', 'neg_num', 'gt_min_iou', 'gt_avg_iou'], output[-5:]) if args.init_epochs > 0: m.load_weights(args.weight_path, by_name=True) else: m.load_weights(config.PRE_TRAINED_WEIGHT, by_name=True) m.summary() # 生成器 generator gen = generator(image_annotations, config.IMAGES_PER_GPU, config.IMAGE_SHAPE, config.ANCHORS_WIDTH, config.MAX_GT_INSTANCES) # 训练 training m.fit_generator(gen, steps_per_epoch=len(image_annotations) // config.IMAGES_PER_GPU * 2, epochs=args.epochs, initial_epoch=args.init_epochs, verbose=True, callbacks=get_call_back(), workers=2, use_multiprocessing=True) # 保存模型 model saving m.save(config.WEIGHT_PATH)
import random as rng import matplotlib.pyplot as plt from rect_op import merge_bounding_boxes from tilt_align import get_lines, get_rotation_angle, ctpn_coordinate_pair, rotate_image matplotlib.use('Agg') from matplotlib import pyplot as plt from ctpn.utils import image_utils, np_utils, visualize from ctpn.utils.detector import TextDetector from ctpn.config import cur_config as config from ctpn.layers import models config.USE_SIDE_REFINE = True config.IMAGES_PER_GPU = 1 config.IMAGE_SHAPE = (1024, 1024, 3) m = models.ctpn_net(config, 'test') m.load_weights(config.WEIGHT_PATH, by_name=True) m.summary() def get_text_lines(name: str, dir=r'D:\citizenIdData\Train_DataSet'): # 加载图片 image, image_meta, _, _ = image_utils.load_image_gt( np.random.randint(10), dir + '\\' + name, config.IMAGE_SHAPE[0], None) # 加载模型 # 模型预测 text_boxes, text_scores, _ = m.predict( [np.array([image]), np.array([image_meta])]) text_boxes = np_utils.remove_pad(text_boxes[0])
def main(args): set_gpu_growth() # 加载标注 annotation_files = file_utils.get_sub_files(config.IMAGE_GT_DIR) # annotation_files_1 = [file for file in annotation_files if 'rctw' in file] annotation_files = [file for file in annotation_files if 'rects' in file] # annotation_files = annotation_files_1 + annotation_files_2 image_annotations = [ reader.load_annotation(file, config.IMAGE_DIR) for file in annotation_files ] # 过滤不存在的图像,ICDAR2017中部分图像找不到 image_annotations = [ ann for ann in image_annotations if os.path.exists(ann['image_path']) ] # 加载模型 m = models.ctpn_net(config, 'train') models.compile(m, config, loss_names=[ 'ctpn_regress_loss', 'ctpn_class_loss', 'side_regress_loss' ]) # 增加度量 output = models.get_layer(m, 'ctpn_target').output models.add_metrics( m, ['gt_num', 'pos_num', 'neg_num', 'gt_min_iou', 'gt_avg_iou'], output[-5:]) if args.init_epochs > 0: m.load_weights(args.weight_path, by_name=True) else: m.load_weights(config.PRE_TRAINED_WEIGHT, by_name=True) m.summary() # 生成器 gen = generator(image_annotations[:-100], config.IMAGES_PER_GPU, config.IMAGE_SHAPE, config.ANCHORS_WIDTH, config.MAX_GT_INSTANCES, horizontal_flip=False, random_crop=False) val_gen = generator(image_annotations[-100:], config.IMAGES_PER_GPU, config.IMAGE_SHAPE, config.ANCHORS_WIDTH, config.MAX_GT_INSTANCES) # 训练 m.fit_generator(gen, steps_per_epoch=len(image_annotations) // config.IMAGES_PER_GPU * 2, epochs=args.epochs, initial_epoch=args.init_epochs, validation_data=val_gen, validation_steps=100 // config.IMAGES_PER_GPU, verbose=True, callbacks=get_call_back(), workers=2, use_multiprocessing=True) # 保存模型 m.save(config.WEIGHT_PATH)
def main(args): set_gpu_growth() config.set_root(args.root) image_annotations = load_folder_annotation(args.root) if len(image_annotations) < 5: print("Too small dataset...") return # gen = generator(image_annotations[:-100], # config.IMAGES_PER_GPU, # config.IMAGE_SHAPE, # config.ANCHORS_WIDTH, # config.MAX_GT_INSTANCES, # horizontal_flip=False, # random_crop=False) # val_gen = generator(image_annotations[-100:], # config.IMAGES_PER_GPU, # config.IMAGE_SHAPE, # config.ANCHORS_WIDTH, # config.MAX_GT_INSTANCES) # for bat in range(100): # print(bat) # val, _ = next(gen) # for key in val.keys(): # print( val[key].shape, end="," ) # print() # val, _ = next(val_gen) # for key in val.keys(): # print( val[key].shape, end="," ) # print() # print() # exit(1) # 加载模型 m = models.ctpn_net(config, 'train') models.compile(m, config, loss_names=[ 'ctpn_regress_loss', 'ctpn_class_loss', 'side_regress_loss' ]) # 增加度量 output = models.get_layer(m, 'ctpn_target').output models.add_metrics( m, ['gt_num', 'pos_num', 'neg_num', 'gt_min_iou', 'gt_avg_iou'], output[-5:]) # 从0开始的话,用resnet50 if args.weight_path is None: args.weight_path = config.WEIGHT_PATH # get current epoch from file name. if args.init_epochs == 0: res = re.match(r".*ctpn\.(\d+)\.h5", args.weight_path) if res is not None: args.init_epochs = int(res.group(1)) m.load_weights(args.weight_path, by_name=True) m.summary() # print( len( image_annotations[:-100]), len(image_annotations[-100:]) ) # 生成器 # 前面100条作为训练集,后面100条做成测试集。 gen = generator(image_annotations[:-10], config.IMAGES_PER_GPU, config.IMAGE_SHAPE, config.ANCHORS_WIDTH, config.MAX_GT_INSTANCES, horizontal_flip=False, random_crop=False) val_gen = generator(image_annotations[-10:], config.IMAGES_PER_GPU, config.IMAGE_SHAPE, config.ANCHORS_WIDTH, config.MAX_GT_INSTANCES) # 训练 m.fit_generator(gen, steps_per_epoch=len(image_annotations) // config.IMAGES_PER_GPU * 2, epochs=args.epochs, initial_epoch=args.init_epochs, validation_data=val_gen, validation_steps=100 // config.IMAGES_PER_GPU, verbose=True, callbacks=get_call_back(), workers=args.jobs, use_multiprocessing=True) # # # 保存模型 path = os.path.split(config.WEIGHT_PATH) m.save(os.sep.join([path[0], "ctpn.%03d.h5" % (args.epochs)]))