def text_detect(text_detector, image_path, img_type): print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" print "Image: %s"%image_path if img_type == "others": return [],0 im=cv2.imread(image_path) im_small, f=resize_im(im, Config.SCALE, Config.MAX_SCALE) timer = Timer() timer.tic() text_lines = text_detector.detect(im_small) text_lines = text_lines / f # project back to size of original image text_lines = refine_boxes(im, text_lines, expand_pixel_len = Config.DILATE_PIXEL, pixel_blank = Config.BREATH_PIXEL, binary_thresh=Config.BINARY_THRESH) text_area_ratio = calc_area_ratio(text_lines, im.shape) print "Number of the detected text lines: %s" % len(text_lines) print "Detection Time: %f" % timer.toc() print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" if Config.DEBUG_SAVE_BOX_IMG: im_with_text_lines = draw_boxes(im, text_lines, is_display=False, caption=image_path, wait=False) if im_with_text_lines is not None: cv2.imwrite(image_path+'_boxes.jpg', im_with_text_lines) return text_lines, text_area_ratio
def text_detect(args, text_detector, image_path): print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" print "Image: %s" % image_path im = cv2.imread(image_path) im_small, f = resize_im(im, cfg.SCALE, cfg.MAX_SCALE) timer = Timer() timer.tic() text_lines = text_detector.detect(im_small) text_lines = text_lines / f # project back to size of original image text_lines = refine_boxes(im, text_lines) text_lines = rank_boxes(text_lines) print "Number of the detected text lines: %s" % len(text_lines) print "Detection Time: %f" % timer.toc() print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" if args.SAVE_IMAGE_WITH_BOX: im_with_text_lines = draw_boxes(im, text_lines, is_display=False, caption=image_path, wait=False) if im_with_text_lines is not None: cv2.imwrite(image_path + '_boxes.jpg', im_with_text_lines) return im, text_lines
def detect(root, img_file, model_file): net = Net.CTPN() net.load_state_dict(torch.load(model_file)) # net.cuda() print(net) net.eval() text_connector = TextProposalConnector() im = cv2.imread(os.path.join(root, img_file)) # im = Dataset.scale_img(im, None, shortest_side=600) img = copy.deepcopy(im) img = img.transpose(2, 0, 1) img = img[np.newaxis, :, :, :] img = torch.Tensor(img) # img = img.cuda() v, score, side = net(img, val=True) score = score.cpu().detach().numpy()[:, :, :, 1] result = np.where(score > 0.7) for_nms = [] for anchor, height, width in zip(result[0], result[1], result[2]): vc = v[anchor, 0, height, width] vh = v[anchor, 1, height, width] cya = height * 16 + 7.5 ha = anchor_height[anchor] cy = vc * ha + cya h = math.pow(10, vh) * ha pt = other.trans_to_2pt(width, cy, h) for_nms.append( [pt[0], pt[1], pt[2], pt[3], score[anchor, height, width]]) for_nms = np.array(for_nms, dtype=np.float32) nms_result = nms.cpu_nms(for_nms, TEXT_LINE_NMS_THRESH) text_proposals = [] text_proposal_score = [] for i in nms_result: text_proposals.append(for_nms[i, 0:4]) text_proposal_score.append(for_nms[i, 4]) text_proposals = np.array(text_proposals) text_proposal_score = np.array(text_proposal_score) text_proposal_score = other.normalize(text_proposal_score) text_lines = text_connector.get_text_lines(text_proposals, text_proposal_score, im.shape[:2]) keep_index = filter_boxes(text_lines) text_lines = text_lines[keep_index] # nms for text lines if text_lines.shape[0] != 0: keep_inds = nms.cpu_nms(text_lines, TEXT_LINE_NMS_THRESH) text_lines = text_lines[keep_inds] rec = other.draw_boxes(im, text_lines) rec_file = open(os.path.join(img_dir, img_file.split('.')[0] + '.txt'), 'w') rec = rec.tolist() for box in rec: box = [str(pt) for pt in box] rec_file.write(','.join(box)) rec_file.write('\n')
def getTextRec(text_detector, im): im, f = resize_im(im, cfg.SCALE, cfg.MAX_SCALE) #generator the copy tmp = im.copy() text_lines = text_detector.detect(im) imsrc, text_recs = draw_boxes(tmp, text_lines, caption='im_name', wait=True) return tmp, imsrc, text_recs
def getCharBlock(text_detector, im): im, f = resize_im(im, cfg.SCALE, cfg.MAX_SCALE) tmp = im.copy() #timer=Timer() #timer.tic() text_lines = text_detector.detect(im) #print "Number of the detected text lines: %s"%len(text_lines) #print "Time: %f"%timer.toc() text_recs = draw_boxes(tmp, text_lines, caption='im_name', wait=True) return tmp, text_recs
def getCharBlock(text_detector,im): im, f=resize_im(im, cfg.SCALE, cfg.MAX_SCALE) cv2.imshow("src", im) tmp = im.copy() #timer=Timer() #timer.tic() text_lines=text_detector.detect(im) #print "Number of the detected text lines: %s"%len(text_lines) #print "Time: %f"%timer.toc() text_recs = draw_boxes(tmp, text_lines, caption='im_name', wait=True) return tmp,text_recs
text_proposals_detector = TextProposalDetector( CaffeModel(NET_DEF_FILE, MODEL_FILE)) text_detector = TextDetector(text_proposals_detector) demo_imnames = os.listdir(DEMO_IMAGE_DIR) timer = Timer() for im_name in demo_imnames: print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" print "Image: %s" % im_name im_file = osp.join(DEMO_IMAGE_DIR, im_name) im = cv2.imread(im_file) timer.tic() im, f = resize_im(im, cfg.SCALE, cfg.MAX_SCALE) text_lines = text_detector.detect(im) print "Number of the detected text lines: %s" % len(text_lines) print "Time: %f" % timer.toc() im_with_text_lines = draw_boxes(im, text_lines, caption=im_name, wait=False) print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" print "Thank you for trying our demo. Press any key to exit..." cv2.waitKey(0)
text_detector = TextDetector(text_proposals_detector) demo_imnames = os.listdir(DEMO_IMAGE_DIR) timer = Timer() for im_name in demo_imnames: print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" print "Image: %s" % im_name im_file = osp.join(DEMO_IMAGE_DIR, im_name) im = cv2.imread(im_file) timer.tic() im, f = resize_im(im, cfg.SCALE, cfg.MAX_SCALE) text_lines = text_detector.detect(im) print "Number of the detected text lines: %s" % len(text_lines) print "Time: %f" % timer.toc() im_with_text_lines = draw_boxes(im, text_lines, is_display=False, caption=im_name, wait=False) cv2.imwrite("%s_res.jpg" % im_name[:-4], im_with_text_lines) print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" print "Thank you for trying our demo. Press any key to exit..." cv2.waitKey(0)
timer=Timer() for im_name in demo_imnames: print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") print("Image: %s"%im_name) im_file=osp.join(DEMO_IMAGE_DIR, im_name) im=cv2.imread(im_file) timer.tic() im, f=resize_im(im, cfg.SCALE, cfg.MAX_SCALE) text_lines=text_detector.detect(im) print("Number of the detected text lines: %s"%len(text_lines)) print("Time: %f"%timer.toc()) im_with_text_lines=draw_boxes( im, text_lines, is_display=IS_CV_DISPLAY, caption=im_name, wait=False) if not IS_CV_DISPLAY: cv2.imwrite(f'{args.output_dir}/output_{im_name}', im_with_text_lines) print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") if IS_CV_DISPLAY: print("Thank you for trying our demo. Press any key to exit...") cv2.waitKey(0)
path = os.path.abspath(os.curdir) timer=Timer() print("\ninput exit break\n") while 1 : if sys.version_info >= (3, 0): im_name = input("\nplease input file name:") else: im_name = raw_input("\nplease input file name:") if im_name == "exit": break im_path = path + "/../img/" + im_name im = cv2.imread(im_path) if im is None: continue im, f=resize_im(im, cfg.SCALE, cfg.MAX_SCALE) cv2.imshow("src", im) tmp = im.copy() timer.tic() text_lines=text_detector.detect(im) print("Number of the detected text lines: %s"%len(text_lines)) print("Time: %f"%timer.toc()) im_with_text_lines=draw_boxes(tmp, text_lines, caption=im_name, wait=True)
text_proposals_detector = TextProposalDetector(CaffeModel(NET_DEF_FILE, MODEL_FILE)) text_detector = TextDetector(text_proposals_detector) path = os.path.abspath(os.curdir) timer=Timer() print "\ninput exit break\n" while 1 : im_name = raw_input("\nplease input file name:") if im_name == "exit": break im_path = path + "/demo_images/" + im_name im = cv2.imread(im_path) if im is None: continue im, f=resize_im(im, cfg.SCALE, cfg.MAX_SCALE) cv2.imshow("src", im) tmp = im.copy() timer.tic() text_lines=text_detector.detect(im) print "Number of the detected text lines: %s"%len(text_lines) print "Time: %f"%timer.toc() im_with_text_lines=draw_boxes(tmp, text_lines, caption=im_name, wait=True)
caffe.set_device(cfg.TEST_GPU_ID) # initialize the detectors text_proposals_detector=TextProposalDetector(CaffeModel(NET_DEF_FILE, MODEL_FILE)) text_detector=TextDetector(text_proposals_detector) demo_imnames=os.listdir(DEMO_IMAGE_DIR) timer=Timer() for im_name in demo_imnames: print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" print "Image: %s"%im_name im_file=osp.join(DEMO_IMAGE_DIR, im_name) im=cv2.imread(im_file) timer.tic() im, f=resize_im(im, cfg.SCALE, cfg.MAX_SCALE) text_lines=text_detector.detect(im) print "Number of the detected text lines: %s"%len(text_lines) print "Time: %f"%timer.toc() im_with_text_lines=draw_boxes(im, text_lines, caption=im_name, wait=False) print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" print "Thank you for trying our demo. Press any key to exit..." cv2.waitKey(0)
exit() # initialize the detectors text_proposals_detector = TextProposalDetector( CaffeModel(NET_DEF_FILE, MODEL_FILE)) text_detector = TextDetector(text_proposals_detector) timer = Timer() cam = cv2.VideoCapture(vid) while True: ret, im = cam.read() if not ret: break timer.tic() im, f = resize_im(im, cfg.SCALE, cfg.MAX_SCALE) text_lines = text_detector.detect(im) print "Number of the detected text lines: %s" % len(text_lines) print "Time: %f" % timer.toc() im_with_text_lines = draw_boxes(im, text_lines, caption="Text Detection", wait=False) key = cv2.waitKey(1) if key == 27: break