def main(args): #image_file_list = get_image_file_list(args.image_dir) cap = cv2.VideoCapture("./video/test.mp4") text_sys = TextSystem(args) is_visualize = True font_path = args.vis_font_path #for image_file in image_file_list: #img, flag = check_and_read_gif(image_file) while cap.isOpened(): flag, img = cap.read() # if not flag: # img = cv2.imread(image_file) # if img is None: # logger.info("error in loading image:{}".format(image_file)) # continue starttime = time.time() dt_boxes, rec_res = text_sys(img) elapse = time.time() - starttime print("Predict time: %.3fs" % (elapse)) drop_score = 0.5 dt_num = len(dt_boxes) boxes = [] txts = [] scores = [] for dno in range(dt_num): text, score = rec_res[dno] if (score >= drop_score) and (text[0] in prov_list) \ and (text[1] in city_list) and (len(text) == 7 or 8): text_str = "%s, %.3f" % (text, score) txts.append(text) scores.append(score) boxes.append(dt_boxes[dno]) print(text_str) if is_visualize: image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) # boxes = list(boxes) # txts = list(txts) # scores = torch.Tensor(scores) draw_img = draw_ocr_box_txt(image, boxes, txts, scores, drop_score=drop_score, font_path=font_path) cv2.imshow('Car Plate Recognition', draw_img[:, :, ::-1]) cv2.waitKey(1) # draw_img_save = "./inference_results/" # if not os.path.exists(draw_img_save): # os.makedirs(draw_img_save) # cv2.imwrite( # os.path.join(draw_img_save, os.path.basename(image_file)), # draw_img[:, :, ::-1]) # print("The visualized image saved in {}".format( # os.path.join(draw_img_save, os.path.basename(image_file)))) if not flag: cap.release() cv2.destroyAllWindows()
def main(args): cap = cv2.VideoCapture(0) text_sys = TextSystem(args) is_visualize = True while True: success, img = cap.read() starttime = time.time() dt_boxes, rec_res = text_sys(img) elapse = time.time() - starttime drop_score = 0.5 dt_num = len(dt_boxes) for dno in range(dt_num): text, score = rec_res[dno] if score >= drop_score: text_str = "%s, %.3f" % (text, score) print(text_str) if is_visualize: image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) boxes = dt_boxes txts = [rec_res[i][0] for i in range(len(rec_res))] scores = [rec_res[i][1] for i in range(len(rec_res))] draw_img = draw_ocr_box_txt(image, boxes, txts, scores) cv2.imshow('img', draw_img[:, :, ::-1]) k = cv2.waitKey(1) if k == 27: break
def main(args): # print(1111) image_file_list = get_image_file_list(args.image_dir) # print(1111) text_sys = TextSystem(args) # print(1111) is_visualize = True font_path = args.vis_font_path print(111111) for image_file in image_file_list: img, flag = check_and_read_gif(image_file) if not flag: img = cv2.imread(image_file) if img is None: logger.info("error in loading image:{}".format(image_file)) continue starttime = time.time() dt_boxes, rec_res = text_sys(img) elapse = time.time() - starttime print(1) print(image_file) print(1) print("Predict time of %s: %.3fs" % (image_file, elapse)) drop_score = 0.5 dt_num = len(dt_boxes) for dno in range(dt_num): text, score = rec_res[dno] if score >= drop_score: text_str = "%s, %.3f" % (text, score) print(text_str) if is_visualize: image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) boxes = dt_boxes txts = [rec_res[i][0] for i in range(len(rec_res))] scores = [rec_res[i][1] for i in range(len(rec_res))] draw_img = draw_ocr_box_txt(image, boxes, txts, scores, drop_score=drop_score, font_path=font_path) draw_img_save = "./results" if not os.path.exists(draw_img_save): os.makedirs(draw_img_save) cv2.imwrite( os.path.join(draw_img_save, os.path.basename(image_file)), draw_img[:, :, ::-1]) print("The visualized image saved in {}".format( os.path.join(draw_img_save, os.path.basename(image_file))))
def draw_structure_result(image, result, font_path): if isinstance(image, np.ndarray): image = Image.fromarray(image) boxes, txts, scores = [], [], [] for region in result: if region['type'] == 'Table': pass else: for box, rec_res in zip(region['res'][0], region['res'][1]): boxes.append(np.array(box).reshape(-1, 2)) txts.append(rec_res[0]) scores.append(rec_res[1]) im_show = draw_ocr_box_txt(image, boxes, txts, scores, font_path=font_path,drop_score=0) return im_show
def main(args): image_file_list = get_image_file_list(args.image_dir) text_sys = TextSystem(args) is_visualize = True font_path = args.vis_font_path drop_score = args.drop_score for image_file in image_file_list: img, flag = check_and_read_gif(image_file) if not flag: img = cv2.imread(image_file) if img is None: logger.info("error in loading image:{}".format(image_file)) continue starttime = time.time() dt_boxes, rec_res = text_sys(img) elapse = time.time() - starttime logger.info("Predict time of %s: %.3fs" % (image_file, elapse)) for text, score in rec_res: logger.info("{}, {:.3f}".format(text, score)) if is_visualize: image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) boxes = dt_boxes txts = [rec_res[i][0] for i in range(len(rec_res))] scores = [rec_res[i][1] for i in range(len(rec_res))] draw_img = draw_ocr_box_txt(image, boxes, txts, scores, drop_score=drop_score, font_path=font_path) draw_img_save = "./inference_results/" if not os.path.exists(draw_img_save): os.makedirs(draw_img_save) if flag: image_file = image_file[:-3] + "png" cv2.imwrite( os.path.join(draw_img_save, os.path.basename(image_file)), draw_img[:, :, ::-1]) logger.info("The visualized image saved in {}".format( os.path.join(draw_img_save, os.path.basename(image_file))))
def main(args): image_file_list = get_image_file_list(args.image_dir) image_file_list = image_file_list[args.process_id::args.total_process_num] text_sys = TextSystem(args) is_visualize = False font_path = args.vis_font_path drop_score = args.drop_score num = 1 loop_count = 20 selected_imgs = random.sample(image_file_list, k=20) for image_file in selected_imgs: img, flag = check_and_read_gif(image_file) if not flag: img = cv2.imread(image_file) if img is None: # logger.info("error in loading image:{}".format(image_file)) continue starttime = time.time() dt_boxes, rec_res = text_sys(img) elapse = time.time() - starttime # logger.info("Predict time of %s: %.3fs" % (image_file, elapse)) for text, score in rec_res: logger.info("{}, {:.3f}".format(text, score)) if args.is_save: dataset_dir = './final_results/20/' txts = [rec_res[i][0] for i in range(len(rec_res))] # 写入到res_text里 print(image_file) path = os.path.join( dataset_dir, os.path.splitext(os.path.basename(image_file))[0]) if os.path.exists(path + '.txt'): continue res_txt = open(path + '.txt', 'w', encoding="utf-8") for item in txts: res_txt.write(item + '\n') if is_visualize: image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) boxes = dt_boxes txts = [rec_res[i][0] for i in range(len(rec_res))] scores = [rec_res[i][1] for i in range(len(rec_res))] draw_img = draw_ocr_box_txt(image, boxes, txts, scores, drop_score=drop_score, font_path=font_path) draw_img_save = "./inference_results/" if not os.path.exists(draw_img_save): os.makedirs(draw_img_save) cv2.imwrite( os.path.join(draw_img_save, os.path.basename(image_file)), draw_img[:, :, ::-1]) logger.info("The visualized image saved in {}".format( os.path.join(draw_img_save, os.path.basename(image_file)))) num = num + 1 if num > loop_count: break
def main(args): for root, dirs, files in os.walk(args.image_dir): for file in files: with open(root + "/" + file, "r") as f: imgpath = root + "/" + file image_file_list = get_image_file_list(imgpath) text_sys = TextSystem(args) is_visualize = True font_path = args.vis_font_path for image_file in image_file_list: img, flag = check_and_read_gif(image_file) if not flag: img = cv2.imread(image_file) if img is None: logger.info( "error in loading image:{}".format(image_file)) continue starttime = time.time() # img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # img_filter = cv2.bilateralFilter(img_gray, 5, 100, 100) # img_filter = cv2.blur(img_gray, (5, 5)) # img_filter = cv2.GaussianBlur(img_gray, (5, 5), 0) # img_filter = cv2.medianBlur(img_gray, 5) # img_filter = cv2.cvtColor(img_filter, cv2.COLOR_GRAY2BGR) # dt_boxes, rec_res = text_sys(img_filter) dt_boxes, rec_res = text_sys(img) roi = () name_box = np.empty(shape=(4, 2)) for i, val in enumerate(rec_res): if "姓名" in val[0]: if len(val[0]) < 3: name_box = dt_boxes[i + 1] rec_res.pop(i + 1) dt_boxes.pop(i + 1) break else: name_box = dt_boxes[i] rec_res.pop(i) dt_boxes.pop(i) break if "名" in val[0]: if len(val[0]) <= 2: name_box = dt_boxes[i + 1] rec_res.pop(i + 1) dt_boxes.pop(i + 1) break elif len(val[0]) <= 6: name_box = dt_boxes[i] rec_res.pop(i) dt_boxes.pop(i) break roi = (int(name_box[0][0]), int(name_box[0][1]), int(name_box[2][0]), int(name_box[2][1])) elapse = time.time() - starttime print("Predict time of %s: %.3fs" % (image_file, elapse)) drop_score = 0.5 json_img_save = "./inference_results_json/" + \ root.replace(args.image_dir+"\\", "") if not os.path.exists(json_img_save): os.makedirs(json_img_save) with open(json_img_save + "/" + file.replace(".jpg", "") + ".json", 'w', encoding='utf-8') as file_obj: ans_json = {'data': [{'str': i[0]} for i in rec_res]} json.dump(ans_json, file_obj, indent=4, ensure_ascii=False) if is_visualize: image = Image.fromarray( cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) boxes = dt_boxes txts = [rec_res[i][0] for i in range(len(rec_res))] scores = [rec_res[i][1] for i in range(len(rec_res))] draw_img = draw_ocr_box_txt(image, boxes, txts, scores, drop_score=args.drop_score, font_path=font_path) draw_img.paste((0, 0, 0), roi) draw_img = np.array(draw_img) draw_img_save = "./inference_results/" + \ root.replace(args.image_dir+"\\", "") if not os.path.exists(draw_img_save): os.makedirs(draw_img_save) cv2.imwrite( os.path.join(draw_img_save, os.path.basename(image_file)), draw_img[:, :, ::-1]) print("The visualized image saved in {}".format( os.path.join(draw_img_save, os.path.basename(image_file))))
def main(args): image_file_list = get_image_file_list(args.image_dir) text_sys = TextSystem(args) is_visualize = True font_path = args.vis_font_path drop_score = args.drop_score for image_file in image_file_list: img, flag = check_and_read_gif(image_file) if not flag: img = cv2.imread(image_file) if img is None: logger.info("error in loading image:{}".format(image_file)) continue starttime = time.time() dt_boxes, rec_res = text_sys(img) elapse = time.time() - starttime logger.info("Predict time of %s: %.3fs" % (image_file, elapse)) for text, score in rec_res: logger.info("{}, {:.3f}".format(text, score)) if is_visualize: image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) boxes = dt_boxes # for box in boxes: # xy_sum = np.sum(box, axis=0) / 4.0 # cx = xy_sum[0] # cy = xy_sum[1] # degree = np.arcsin((box[1][1] - box[0][1]) / (box[1][0] - box[0][0])) # w = abs(box[0][0] - box[1][0]) # h = abs(box[0][1] - box[3][1]) # x1, y1, x2, y2, x3, y3, x4, y4 = xy_rotate_box(cx, cy, w, h, degree / 180 * np.pi) # box[0][0] = x1 # box[0][1] = y1 # box[1][0] = x2 # box[1][1] = y2 # box[2][0] = x3 # box[2][1] = y3 # box[3][0] = x4 # box[3][1] = y4 txts = [rec_res[i][0] for i in range(len(rec_res))] scores = [rec_res[i][1] for i in range(len(rec_res))] assorted_results = {"text_boxes": [{'id': i + 1, 'bbox': [float(dt_boxes[i][0][0]), float(dt_boxes[i][0][1]), float(dt_boxes[i][2][0]), float(dt_boxes[i][2][1])], 'text': rec_res[i][0]} for i in range(len(rec_res))], "fields": [{"field_name": "customer_number", "value_id": [], "value_text": [], "key_id": [], "key_text": []}, {"field_name": "name", "value_id": [], "value_text": [], "key_id": [], "key_text": []}, {"field_name": "address", "value_id": [], "value_text": [], "key_id": [], "key_text": []}, {"field_name": "amount", "value_id": [], "value_text": [], "key_id": [], "key_text": []}, {"field_name": "date", "value_id": [], "value_text": [], "key_id": [], "key_text": []}, {"field_name": "content", "value_id": [], "value_text": [], "key_id": [], "key_text": []}], "global_attributes": { "file_id": image_file.split('/')[-1]} } with open(image_file + '.json', 'w', encoding='utf-8') as outfile: json.dump(assorted_results, outfile) #res = trainTicket.trainTicket(assorted_results, img=image) #res = res.res ##compare_img = clip_ground_truth_and_draw_txt(cv2.cvtColor(img, cv2.COLOR_BGR2GRAY), boxes, txts, scores, font_path=font_path) draw_img = draw_ocr_box_txt( image, boxes, txts, scores, drop_score=drop_score, font_path=font_path) draw_img_save = "./inference_results/" if not os.path.exists(draw_img_save): os.makedirs(draw_img_save) cv2.imwrite( os.path.join(draw_img_save, os.path.basename(image_file)), draw_img[:, :, ::-1]) # cv2.imwrite( # os.path.join(draw_img_save, os.path.basename(image_file)), # compare_img) logger.info("The visualized image saved in {}".format( os.path.join(draw_img_save, os.path.basename(image_file))))
def main(args): image_file_list = get_image_file_list(args.image_dir) image_file_list = image_file_list[args.process_id::args.total_process_num] text_sys = TextSystem(args) is_visualize = True font_path = args.vis_font_path drop_score = args.drop_score # warm up 10 times if args.warmup: img = np.random.uniform(0, 255, [640, 640, 3]).astype(np.uint8) for i in range(10): res = text_sys(img) total_time = 0 cpu_mem, gpu_mem, gpu_util = 0, 0, 0 _st = time.time() count = 0 for idx, image_file in enumerate(image_file_list): img, flag = check_and_read_gif(image_file) if not flag: img = cv2.imread(image_file) if img is None: logger.debug("error in loading image:{}".format(image_file)) continue starttime = time.time() dt_boxes, rec_res = text_sys(img) elapse = time.time() - starttime total_time += elapse logger.debug( str(idx) + " Predict time of %s: %.3fs" % (image_file, elapse)) for text, score in rec_res: logger.debug("{}, {:.3f}".format(text, score)) if is_visualize: image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) boxes = dt_boxes txts = [rec_res[i][0] for i in range(len(rec_res))] scores = [rec_res[i][1] for i in range(len(rec_res))] draw_img = draw_ocr_box_txt(image, boxes, txts, scores, drop_score=drop_score, font_path=font_path) draw_img_save_dir = args.draw_img_save_dir os.makedirs(draw_img_save_dir, exist_ok=True) if flag: image_file = image_file[:-3] + "png" cv2.imwrite( os.path.join(draw_img_save_dir, os.path.basename(image_file)), draw_img[:, :, ::-1]) logger.debug("The visualized image saved in {}".format( os.path.join(draw_img_save_dir, os.path.basename(image_file)))) logger.info("The predict total time is {}".format(time.time() - _st)) if args.benchmark: text_sys.text_detector.autolog.report() text_sys.text_recognizer.autolog.report()
def main(args): image_file_list = get_image_file_list(args.image_dir) image_file_list = image_file_list[args.process_id::args.total_process_num] text_sys = TextSystem(args) is_visualize = args.is_visualize font_path = args.vis_font_path drop_score = args.drop_score # warm up 10 times if args.warmup: img = np.random.uniform(0, 255, [640, 640, 3]).astype(np.uint8) for i in range(10): res = text_sys(img) total_time = 0 cpu_mem, gpu_mem, gpu_util = 0, 0, 0 _st = time.time() count = 0 save_res = [] for idx, image_file in enumerate(image_file_list): img, flag = check_and_read_gif(image_file) if not flag: img = cv2.imread(image_file) if img is None: logger.debug("error in loading image:{}".format(image_file)) continue starttime = time.time() dt_boxes, rec_res = text_sys(img) elapse = time.time() - starttime total_time += elapse # save results preds = [] dt_num = len(dt_boxes) for dno in range(dt_num): text, score = rec_res[dno] if score >= drop_score: preds.append({ "transcription": text, "points": np.array(dt_boxes[dno]).tolist() }) text_str = "%s, %.3f" % (text, score) save_res.append(image_file + '\t' + json.dumps(preds, ensure_ascii=False) + '\n') # print predicted results logger.debug( str(idx) + " Predict time of %s: %.3fs" % (image_file, elapse)) for text, score in rec_res: logger.debug("{}, {:.3f}".format(text, score)) if is_visualize: image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) boxes = dt_boxes txts = [rec_res[i][0] for i in range(len(rec_res))] scores = [rec_res[i][1] for i in range(len(rec_res))] draw_img = draw_ocr_box_txt(image, boxes, txts, scores, drop_score=drop_score, font_path=font_path) draw_img_save_dir = args.draw_img_save_dir os.makedirs(draw_img_save_dir, exist_ok=True) if flag: image_file = image_file[:-3] + "png" cv2.imwrite( os.path.join(draw_img_save_dir, os.path.basename(image_file)), draw_img[:, :, ::-1]) logger.debug("The visualized image saved in {}".format( os.path.join(draw_img_save_dir, os.path.basename(image_file)))) # The predicted results will be saved in os.path.join(os.draw_img_save_dir, "results.txt") save_results_to_txt(save_res, args.draw_img_save_dir) logger.info("The predict total time is {}".format(time.time() - _st)) if args.benchmark: text_sys.text_detector.autolog.report() text_sys.text_recognizer.autolog.report()