def get_boxes(img_c): # Resize img_r, target_ratio, size_heatmap = imgproc.resize_aspect_ratio( img_c, square_size=square_size, interpolation=cv2.INTER_LINEAR, mag_ratio=mag_ratio) # Save ratio index for height ratio_h = ratio_w = 1 / target_ratio # preprocessing of the image x = imgproc.normalizeMeanVariance(img_r) x = torch.from_numpy(x).permute(2, 0, 1) # [h, w, c] to [c, h, w] x = x.unsqueeze(0) # [c, h, w] to [b, c, h, w] # forward pass y, _ = net(x) # make score and link map score_text = y[0, :, :, 0].cpu().data.numpy() score_link = y[0, :, :, 1].cpu().data.numpy() # Post-processing boxes, _ = craft_utils.getDetBoxes(score_text, score_link, text_threshold=text_threshold, link_threshold=link_threshold, low_text=low_text, poly=False) # Coordinate adjustment boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h) return boxes
def test_net(canvas_size, mag_ratio, net, image, text_threshold, link_threshold, low_text, poly, device): # resize img_resized, target_ratio, size_heatmap = resize_aspect_ratio(image, canvas_size,\ interpolation=cv2.INTER_LINEAR, mag_ratio=mag_ratio) ratio_h = ratio_w = 1 / target_ratio # preprocessing x = normalizeMeanVariance(img_resized) x = torch.from_numpy(x).permute(2, 0, 1) # [h, w, c] to [c, h, w] x = Variable(x.unsqueeze(0)) # [c, h, w] to [b, c, h, w] x = x.to(device) # forward pass with torch.no_grad(): y, feature = net(x) # make score and link map score_text = y[0, :, :, 0].cpu().data.numpy() score_link = y[0, :, :, 1].cpu().data.numpy() # Post-processing boxes, polys = getDetBoxes(score_text, score_link, text_threshold, link_threshold, low_text, poly) # coordinate adjustment boxes = adjustResultCoordinates(boxes, ratio_w, ratio_h) polys = adjustResultCoordinates(polys, ratio_w, ratio_h) for k in range(len(polys)): if polys[k] is None: polys[k] = boxes[k] return boxes, polys
def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly): t0 = time.time() # resize img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio( image, args.canvas_size, interpolation=cv2.INTER_AREA, mag_ratio=args.mag_ratio) ratio_h = ratio_w = 1 / target_ratio # preprocessing x = imgproc.normalizeMeanVariance(img_resized) x = torch.from_numpy(x).permute(2, 0, 1) # [h, w, c] to [c, h, w] x = Variable(x.unsqueeze(0)) # [c, h, w] to [b, c, h, w] if cuda: x = x.cuda() # forward pass with torch.no_grad(): y, feature = net(x) # make score and link map score_text = y[0, :, :, 0].cpu().data.numpy() score_link = y[0, :, :, 1].cpu().data.numpy() # Post-processing boxes, polys = craft_utils.getDetBoxes(score_text, score_link, text_threshold, link_threshold, low_text, poly) # coordinate adjustment boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h) polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h) for k in range(len(polys)): if polys[k] is None: polys[k] = boxes[k] # render results (optional) render_img = score_text.copy() render_img = np.hstack((render_img, score_link)) ret_score_text = imgproc.cvt2HeatmapImg(render_img) return boxes, polys, ret_score_text
def process(self, craft, seq, key, sub_img): img_resized, target_ratio, size_heatmap = resize_aspect_ratio( sub_img, 2560, interpolation=cv2.INTER_LINEAR, mag_ratio=1.) ratio_h = ratio_w = 1 / target_ratio x = normalizeMeanVariance(img_resized) x = torch.from_numpy(x).permute(2, 0, 1) # [h, w, c] to [c, h, w] x = x.unsqueeze(0) # [c, h, w] to [b, c, h, w] x = x.to(self.device) y, feature = craft(x) score_text = y[0, :, :, 0].cpu().data.numpy() score_link = y[0, :, :, 1].cpu().data.numpy() boxes, polys = getDetBoxes(score_text, score_link, text_threshold=0.7, link_threshold=0.4, low_text=0.4, poly=False) boxes = adjustResultCoordinates(boxes, ratio_w, ratio_h) polys = adjustResultCoordinates(polys, ratio_w, ratio_h) for k in range(len(polys)): if polys[k] is None: polys[k] = boxes[k] result = [] for i, box in enumerate(polys): poly = np.array(box).astype(np.int32).reshape((-1)) result.append(poly) horizontal_list, free_list = group_text_box(result, slope_ths=0.8, ycenter_ths=0.5, height_ths=1, width_ths=1, add_margin=0.1) # horizontal_list = [i for i in horizontal_list if i[0] > 0 and i[1] > 0] min_size = 20 if min_size: horizontal_list = [ i for i in horizontal_list if max(i[1] - i[0], i[3] - i[2]) > 10 ] free_list = [ i for i in free_list if max(diff([c[0] for c in i]), diff([c[1] for c in i])) > min_size ] seq[:] = [None] * len(horizontal_list) model, vocab = build_model(self.config) model.load_state_dict( torch.load(self.weights, map_location=torch.device('cpu'))) for i, ele in enumerate(horizontal_list): ele = [0 if i < 0 else i for i in ele] img = sub_img[ele[2]:ele[3], ele[0]:ele[1], :] img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = Image.fromarray(img.astype(np.uint8)) p = threading.Thread(target=self.predict, args=(model, vocab, seq, key, i, img)) p.start() p.join()
def test_net(net, image, text_threshold, link_threshold, low_text, cuda, image_path): t0 = time.time() # resize img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio( image, args.canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=args.mag_ratio) ratio_h = ratio_w = 1 / target_ratio # preprocessing x = imgproc.normalizeMeanVariance(img_resized) x = torch.from_numpy(x).permute(2, 0, 1) # [h, w, c] to [c, h, w] x = Variable(x.unsqueeze(0)) # [c, h, w] to [b, c, h, w] if cuda: x = x.cuda() # forward pass y, _ = net(x) # make score and link map score_text = y[0, :, :, 0].cpu().data.numpy() score_link = y[0, :, :, 1].cpu().data.numpy() t0 = time.time() - t0 t1 = time.time() if args.debug: np.save( os.path.join( './debug', os.path.basename(image_path).split('.')[0] + '_score_text.npy'), score_text) np.save( os.path.join( './debug', os.path.basename(image_path).split('.')[0] + '_score_link.npy'), score_link) # Post-processing boxes = craft_utils.getDetBoxes(score_text, score_link, text_threshold, link_threshold, low_text) boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h) t1 = time.time() - t1 # render results (optional) render_img = score_text.copy() render_img = np.hstack((render_img, score_link)) ret_score_text = imgproc.cvt2HeatmapImg(render_img) if args.show_time: print("\ninfer/postproc time : {:.3f}/{:.3f}".format(t0, t1)) return boxes, ret_score_text
def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly, ocr_type): t0 = time.time() # resize img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio( image, args.canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=args.mag_ratio) ratio_h = ratio_w = 1 / target_ratio # preprocessing x = imgproc.normalizeMeanVariance(img_resized) x = torch.from_numpy(x).permute(2, 0, 1) # [h, w, c] to [c, h, w] x = x.unsqueeze(0) # [c, h, w] to [b, c, h, w] if cuda: x = x.cuda() # forward pass y, _ = net(x) # make score and link map score_text = y[0, :, :, 0].cpu().detach().numpy() score_link = y[0, :, :, 1].cpu().detach().numpy() t0 = time.time() - t0 t1 = time.time() # Post-processing boxes, polys = utils.getDetBoxes(score_text, score_link, text_threshold, link_threshold, low_text, poly, ocr_type) # coordinate adjustment boxes = utils.adjustResultCoordinates(boxes, ratio_w, ratio_h) polys = utils.adjustResultCoordinates(polys, ratio_w, ratio_h) if ocr_type == 'single_char': boxes = utils.cluster_sort(image.shape, boxes) for k in range(len(polys)): if polys[k] is None: polys[k] = boxes[k] t1 = time.time() - t1 # render results (optional) render_img = score_text.copy() render_img = np.hstack((render_img, score_link)) ret_score_text = imgproc.cvt2HeatmapImg(render_img) if args.show_time: print("\ninfer/postproc time : {:.3f}/{:.3f}".format(t0, t1)) return boxes, polys, ret_score_text
def __getitem__(self, idx): image = imgproc.loadImage(self.image_list[idx]) img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio( image, self.canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=self.mag_ratio) x = imgproc.normalizeMeanVariance(img_resized) x = torch.from_numpy(x).permute(2, 0, 1) # [h, w, c] to [c, h, w] # x = Variable(x.unsqueeze(0)) # [c, h, w] to [b, c, h, w] return x, 1
def test_net(self, image_opencv): # resize img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio( image_opencv, self.canvas_size, interpolation=self.interpolation, mag_ratio=self.mag_ratio) ratio_h = ratio_w = 1 / target_ratio # preprocessing x = imgproc.normalizeMeanVariance(img_resized) x = torch.from_numpy(x).permute(2, 0, 1) # [h, w, c] to [c, h, w] x = Variable(x.unsqueeze(0)) # [c, h, w] to [b, c, h, w] if self.cuda: x = x.cuda() # forward pass y, feature = self.net(x) # make score and link map score_text = y[0, :, :, 0].cpu().data.numpy() score_link = y[0, :, :, 1].cpu().data.numpy() # refine link t0 = time.time() if self.refine_net is not None: y_refiner = self.refine_net(y, feature) score_link = y_refiner[0, :, :, 0].cpu().data.numpy() t0 = time.time() - t0 t1 = time.time() # Post-processing boxes, polys = craft_utils.getDetBoxes(score_text, score_link, self.text_threshold, self.link_threshold, self.low_text, self.poly) #print(boxes) # coordinate adjustment boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h) polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h) for k in range(len(polys)): if polys[k] is None: polys[k] = boxes[k] t1 = time.time() - t1 if self.show_time: print("\ninfer/postproc time : {:.3f}/{:.3f}".format(t0, t1)) return boxes, polys
def test_net(self, image, text_threshold, link_threshold, low_text, cuda, poly, refine_net=None): t0 = time.time() # resize img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(image, canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=mag_ratio) ratio_h = ratio_w = 1 / target_ratio # preprocessing x = imgproc.normalizeMeanVariance(img_resized) x = torch.from_numpy(x).permute(2, 0, 1) # [h, w, c] to [c, h, w] x = Variable(x.unsqueeze(0)) # [c, h, w] to [b, c, h, w] if cuda: x = x.cuda() # forward pass with torch.no_grad(): y, feature = self.net(x) # make score and link map score_text = y[0,:,:,0].cpu().data.numpy() score_link = y[0,:,:,1].cpu().data.numpy() # refine link if refine_net is not None: with torch.no_grad(): y_refiner = refine_net(y, feature) score_link = y_refiner[0,:,:,0].cpu().data.numpy() t0 = time.time() - t0 t1 = time.time() # Post-processing boxes, polys = craft_utils.getDetBoxes(score_text, score_link, text_threshold, link_threshold, low_text, poly) # coordinate adjustment boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h) polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h) for k in range(len(polys)): if polys[k] is None: polys[k] = boxes[k] t1 = time.time() - t1 # render results (optional) render_img = score_text.copy() render_img = np.hstack((render_img, score_link)) ret_score_text = imgproc.cvt2HeatmapImg(render_img) # if show_time : print("\ninfer/postproc time : {:.3f}/{:.3f}".format(t0, t1)) return boxes, polys, ret_score_text
def get_prediction(net, image, text_threshold, link_threshold, low_text, cuda, poly, refine_net=None): # resize img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio( image, 1280, interpolation=cv2.INTER_LINEAR, mag_ratio=1.5) ratio_h = ratio_w = 1 / target_ratio # preprocessing x = imgproc.normalizeMeanVariance(img_resized) x = torch.from_numpy(x).permute(2, 0, 1) # [h, w, c] to [c, h, w] x = Variable(x.unsqueeze(0)) # [c, h, w] to [b, c, h, w] if cuda: x = x.cuda() # forward pass with torch.no_grad(): y, feature = net(x) # make score and link map score_text = y[0, :, :, 0].cpu().data.numpy() score_link = y[0, :, :, 1].cpu().data.numpy() # refine link if refine_net is not None: with torch.no_grad(): y_refiner = refine_net(y, feature) score_link = y_refiner[0, :, :, 0].cpu().data.numpy() # Post-processing boxes, polys = craft_utils.getDetBoxes(score_text, score_link, text_threshold, link_threshold, low_text, poly) # coordinate adjustment boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h) polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h) for k in range(len(polys)): if polys[k] is None: polys[k] = boxes[k] return boxes, polys
def test_net(net, image, text_threshold, link_threshold, low_text, cuda): # resize img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio( image, opt.MAXIMUM_IMAGE_SIZE, interpolation=cv2.INTER_LINEAR, mag_ratio=opt.MAG_RATIO) ratio_h = ratio_w = 1 / target_ratio x = imgproc.normalizeMeanVariance(img_resized) x = torch.from_numpy(x).permute(2, 0, 1) x = Variable(x.unsqueeze(0)) if cuda: x = x.cuda() # predict y, _ = net(x) score_text = y[0, :, :, 0].cpu().data.numpy() score_link = y[0, :, :, 1].cpu().data.numpy() # post-process : get shape of bounding box boxes, polys, word_boxes, word_polys, line_boxes, line_polys = ltd_utils.getDetBoxes( score_text, score_link, text_threshold, link_threshold, low_text) boxes = ltd_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h) polys = ltd_utils.adjustResultCoordinates(polys, ratio_w, ratio_h) word_boxes = ltd_utils.adjustResultCoordinates(word_boxes, ratio_w, ratio_h) word_polys = ltd_utils.adjustResultCoordinates(word_polys, ratio_w, ratio_h) line_boxes = ltd_utils.adjustResultCoordinates(line_boxes, ratio_w, ratio_h) line_polys = ltd_utils.adjustResultCoordinates(line_polys, ratio_w, ratio_h) for k in range(len(polys)): if polys[k] is None: polys[k] = boxes[k] for a in range(len(word_polys)): if word_polys[a] is None: word_polys[a] = word_boxes[a] for l in range(len(line_polys)): if line_polys[l] is None: line_polys[l] = line_boxes[l] return polys, word_polys, line_polys, score_text
def representative_data_gen(): for file in os.listdir(dataset_path)[:10]: file_path = dataset_path + file image = imgproc.loadImage(file_path) image = cv2.resize(image, dsize=(800, 1280), interpolation=cv2.INTER_LINEAR) img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio( image, 1280, interpolation=cv2.INTER_LINEAR, mag_ratio=1.5) ratio_h = ratio_w = 1 / target_ratio # preprocessing x = imgproc.normalizeMeanVariance(img_resized) x = torch.from_numpy(x).permute(2, 0, 1) # [h, w, c] to [c, h, w] x = Variable(x.unsqueeze(0)) # [c, h, w] to [b, c, h, w] x = x.cpu().detach().numpy() yield [x]
def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly,filename,result_folder=result_folder): t0 = time.time() img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(image, canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=mag_ratio) ratio_h = ratio_w = 1 / target_ratio # preprocessing x = imgproc.normalizeMeanVariance(img_resized) #cv2.imwrite("test.jpg",x) print("###") x = tf.expand_dims(x,0) print(x.shape) # forward pass y, _ = net(x) # make score and link map score_text = y[0,:,:,0].numpy() score_link = y[0,:,:,1].numpy() t0 = time.time() - t0 t1 = time.time() # Post-processing boxes, polys = craft_utils.getDetBoxes(score_text, score_link, text_threshold, link_threshold, low_text, poly) # coordinate adjustment boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h) polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h) for k in range(len(polys)): if polys[k] is None: polys[k] = boxes[k] t1 = time.time() - t1 # render results (optional) render_img = score_text.copy() render_img = np.hstack((render_img, score_link)) ret_score_text = imgproc.cvt2HeatmapImg(render_img) #print("score") #print(ret_score_text.shape) cv2.imwrite(result_folder + filename + "_mask.jpg",ret_score_text) #if show_time : print("\ninfer/postproc time : {:.3f}/{:.3f}".format(t0, t1)) return boxes, polys, ret_score_text
def __init__(self, args): filelist, _, _ = file_utils.list_files('./data/train/data') self.images = [] self.confmaps = [] self.scores_region = [] self.scores_link = [] for filename in filelist: # get datapath dataset = os.path.dirname(filename).split(os.sep)[-1] filenum = os.path.splitext(os.path.basename(filename)) label_dir = './data/train/ground_truth/{}/gt_{}/'.format( dataset, filenum) # If not exists, generate ground truth if not os.path.exists(label_dir): continue image = imgproc.loadImage(filename) score_region = torch.load(label_dir + 'region.pt') score_link = torch.load(label_dir + 'link.pt') conf_map = torch.load(label_dir + 'conf.pt') # resize img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio( image, args.canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=args.mag_ratio) # Image Preprocess x = imgproc.normalizeMeanVariance(img_resized) x = x.transpose((2, 0, 1)) # [h, w, c] to [c, h, w] h, w, _ = img_resized.shape # GT reshape score_region = cv2.resize(score_region, dsize=(h / 2, w / 2)) score_link = cv2.resize(score_link, dsize=(h / 2, w / 2)) conf_map = cv2.resize(conf_map, dsize=(h / 2, w / 2)) self.scores_region.append(score_region) self.scores_link.append(score_link) self.confmaps.append(conf_map) self.images.append(x)
def gt_net(net, image, args): # resize img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(image, args.canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=args.mag_ratio) # preprocessing x = imgproc.normalizeMeanVariance(img_resized) x = torch.tensor(x).permute(2, 0, 1).unsqueeze(0) # [h, w, c] to [b, c, h, w] if args.cuda: x = x.cuda() # forward pass with torch.no_grad(): y, feature = net(x) # make score and link map score_text = y[0, :, :, 0].cpu().data.numpy() return score_text, target_ratio
def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly): t0 = time.time() # リサイズ img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(image, args.canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=args.mag_ratio) ratio_h = ratio_w = 1 / target_ratio # 前処理 x = imgproc.normalizeMeanVariance(img_resized) x = torch.from_numpy(x).permute(2, 0, 1) # [h, w, c] to [c, h, w] x = Variable(x.unsqueeze(0)) # [c, h, w] to [b, c, h, w] if cuda: x = x.cuda() # 順伝播 y, _ = net(x) # スコア・リンクマップの作成 score_text = y[0,:,:,0].cpu().data.numpy() score_link = y[0,:,:,1].cpu().data.numpy() t0 = time.time() - t0 t1 = time.time() # 後処理 boxes, polys = craft_utils.getDetBoxes(score_text, score_link, text_threshold, link_threshold, low_text, poly) # 座標調整 boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h) polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h) for k in range(len(polys)): if polys[k] is None: polys[k] = boxes[k] t1 = time.time() - t1 # レンダリング結果(オプション) render_img = score_text.copy() render_img = np.hstack((render_img, score_link)) ret_score_text = imgproc.cvt2HeatmapImg(render_img) if args.show_time : print("\ninfer/postproc time : {:.3f}/{:.3f}".format(t0, t1)) return boxes, polys, ret_score_text
def detect(self, image): # resize img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(image, self.canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=self.mag_ratio) ratio_h = ratio_w = 1 / target_ratio # preprocessing x = imgproc.normalizeMeanVariance(img_resized) x = torch.from_numpy(x).permute(2, 0, 1) # [h, w, c] to [c, h, w] x = Variable(x.unsqueeze(0)) # [c, h, w] to [b, c, h, w] if self.cuda: x = x.cuda() # forward pass with torch.no_grad(): y, feature = self.net(x) # make score and link map score_text = y[0, :, :, 0].cpu().data.numpy() score_link = y[0, :, :, 1].cpu().data.numpy() # refine link if self.refine_net is not None: with torch.no_grad(): y_refiner = self.refine_net(y, feature) score_link = y_refiner[0, :, :, 0].cpu().data.numpy() # Post-processing boxes, _ = craft_utils.getDetBoxes(score_text, score_link, self.text_threshold, self.link_threshold, self.low_text, self.poly) # coordinate adjustment boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h) toRet = [] for box in boxes: toRet.append(box2xyxy(box, image.shape[0: 2])) return toRet
def test_net(self, net, image, text_threshold, link_threshold, low_text, poly, refine_net=None): img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio( image, 1280, interpolation=cv.INTER_LINEAR, mag_ratio=1.5) ratio_h = ratio_w = 1 / target_ratio x = imgproc.normalizeMeanVariance(img_resized) x = torch.from_numpy(x).permute(2, 0, 1) # [h, w, c] to [c, h, w] x = Variable(x.unsqueeze(0)) # [c, h, w] to [b, c, h, w] with torch.no_grad(): y, feature = net(x) # make score and link map score_text = y[0, :, :, 0].cpu().data.numpy() score_link = y[0, :, :, 1].cpu().data.numpy() # Post-processing boxes, polys = craft_utils.getDetBoxes(score_text, score_link, text_threshold, link_threshold, low_text, poly) # coordinate adjustment boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h) polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h) for k in range(len(polys)): if polys[k] is None: polys[k] = boxes[k] # render results (optional) render_img = score_text.copy() render_img = np.hstack((render_img, score_link)) ret_score_text = imgproc.cvt2HeatmapImg(render_img) return boxes, polys, ret_score_text
def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly): t0 = time.time() # resize img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(image, args.canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=args.mag_ratio) ratio_h = ratio_w = 1 / target_ratio # preprocessing x = imgproc.normalizeMeanVariance(img_resized) x = torch.from_numpy(x).permute(2, 0, 1) # [h, w, c] to [c, h, w] x = Variable(x.unsqueeze(0)) # [c, h, w] to [b, c, h, w] if cuda: x = x.cuda() # forward pass y, _ = net(x) # make score and link map score_text = y[0,:,:,0].cpu().data.numpy() score_link = y[0,:,:,1].cpu().data.numpy() return score_text
def __getitem__(self, i): # Image loading image = imgproc.loadImage(self.images[i]) # Preprocess image img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio( image, self.args.canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=self.args.mag_ratio) img_resized = imgproc.fill_canvas(img_resized, self.args.canvas_size) x = imgproc.normalizeMeanVariance(img_resized) x = torch.tensor(x).permute(2, 0, 1) # [h, w, c] to [c, h, w] # Load labels label_dir = self.labels[i] region = torch.tensor(torch.load(label_dir + 'region.pt'), dtype=torch.float64) link = torch.tensor(torch.load(label_dir + 'link.pt'), dtype=torch.float64) conf = torch.tensor(torch.load(label_dir + 'conf.pt'), dtype=torch.float64) return x, region, link, conf
def main(): import os os.makedirs('result', exist_ok=True) text_render.prepare_renderer() with open('alphabet-all-v5.txt', 'r') as fp: dictionary = [s[:-1] for s in fp.readlines()] model_ocr = OCR(dictionary, 768) model_ocr.load_state_dict(torch.load('ocr.ckpt', map_location='cpu'), strict=False) model_ocr.eval() model = CRAFT_net() sd = torch.load('detect.ckpt', map_location='cpu') model.load_state_dict(sd['model']) model = model.cpu() model.eval() img = cv2.imread(args.image) img_bbox = np.copy(img) img_bbox_all = np.copy(img) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_resized, target_ratio, _, pad_w, pad_h = imgproc.resize_aspect_ratio( img, args.size, cv2.INTER_LINEAR, mag_ratio=1) img_to_overlay = np.copy(img_resized) ratio_h = ratio_w = 1 / target_ratio img_resized = imgproc.normalizeMeanVariance(img_resized) print(img_resized.shape) rscore, ascore, mask = test(model, img_resized) overlay = imgproc.cvt2HeatmapImg(rscore + ascore) boxes, polys = craft_utils.getDetBoxes(rscore, ascore, args.text_threshold, args.link_threshold, args.low_text, False) boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h, ratio_net=2) polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h, ratio_net=2) for k in range(len(polys)): if polys[k] is None: polys[k] = boxes[k] # merge textlines polys = merge_bboxes(polys, can_merge_textline) for [tl, tr, br, bl] in polys: x = int(tl[0]) y = int(tl[1]) width = int(tr[0] - tl[0]) height = int(br[1] - tr[1]) cv2.rectangle(img_bbox_all, (x, y), (x + width, y + height), color=(255, 0, 0), thickness=2) # run OCR for each textline textlines = run_ocr(img_bbox, polys, dictionary, model_ocr, 32) # merge textline to text region, filter textlines without characters text_regions: List[BBox] = [] new_textlines = [] for (poly_regions, textline_indices, majority_dir) in merge_bboxes_text_region(textlines): [tl, tr, br, bl] = poly_regions x = int(tl[0]) - 5 y = int(tl[1]) - 5 width = int(tr[0] - tl[0]) + 10 height = int(br[1] - tr[1]) + 10 text = '' logprob_lengths = [] for textline_idx in textline_indices: if not text: text = textlines[textline_idx].text else: last_ch = text[-1] cur_ch = textlines[textline_idx].text[0] if ord(last_ch) > 255 and ord(cur_ch) > 255: text += textlines[textline_idx].text else: text += ' ' + textlines[textline_idx].text logprob_lengths.append((np.log(textlines[textline_idx].prob), len(textlines[textline_idx].text))) vc = count_valuable_text(text) total_logprobs = 0.0 for (logprob, length) in logprob_lengths: total_logprobs += logprob * length total_logprobs /= sum([x[1] for x in logprob_lengths]) # filter text region without characters if vc > 1: region = BBox(x, y, width, height, text, np.exp(total_logprobs)) region.textline_indices = [] region.majority_dir = majority_dir text_regions.append(region) for textline_idx in textline_indices: region.textline_indices.append(len(new_textlines)) new_textlines.append(textlines[textline_idx]) textlines = new_textlines # create mask from text_mask_utils import filter_masks, main_process mask_resized = cv2.resize(mask, (mask.shape[1] * 2, mask.shape[0] * 2), interpolation=cv2.INTER_LINEAR) if pad_h > 0: mask_resized = mask_resized[:-pad_h, :] elif pad_w > 0: mask_resized = mask_resized[:, :-pad_w] mask_resized = cv2.resize(mask_resized, (img.shape[1] // 2, img.shape[0] // 2), interpolation=cv2.INTER_LINEAR) img_resized_2 = cv2.resize(img, (img.shape[1] // 2, img.shape[0] // 2), interpolation=cv2.INTER_LINEAR) mask_resized[mask_resized > 250] = 255 text_lines = [(a.x // 2, a.y // 2, a.w // 2, a.h // 2) for a in textlines] mask_ccs, cc2textline_assignment = filter_masks(mask_resized, text_lines) cv2.imwrite('result/mask_filtered.png', reduce(cv2.bitwise_or, mask_ccs)) final_mask, textline_colors = main_process(img_resized_2, mask_ccs, text_lines, cc2textline_assignment) final_mask = cv2.resize(final_mask, (img.shape[1], img.shape[0]), interpolation=cv2.INTER_LINEAR) # run inpainting img_inpainted = run_inpainting(img, final_mask) # translate text region texts texts = '\n'.join([r.text for r in text_regions]) trans_ret = baidu_translator.translate('ja', 'zh-CN', texts) translated_sentences = [] batch = len(text_regions) if len(trans_ret) < batch: translated_sentences.extend(trans_ret) translated_sentences.extend([''] * (batch - len(trans_ret))) elif len(trans_ret) > batch: translated_sentences.extend(trans_ret[:batch]) else: translated_sentences.extend(trans_ret) # render translated texts img_canvas = np.copy(img_inpainted) for trans_text, region in zip(translated_sentences, text_regions): print(region.text) print(trans_text) print(region.majority_dir, region.x, region.y, region.w, region.h) img_bbox = cv2.rectangle(img_bbox, (region.x, region.y), (region.x + region.w, region.y + region.h), color=(0, 0, 255), thickness=2) for idx in region.textline_indices: txtln = textlines[idx] img_bbox = cv2.rectangle(img_bbox, (txtln.x, txtln.y), (txtln.x + txtln.w, txtln.y + txtln.h), color=textline_colors[idx], thickness=2) if region.majority_dir == 'h': text_render.put_text_horizontal(img_canvas, trans_text, len(region.textline_indices), region.x, region.y, region.w, region.h, textline_colors[idx], None) else: text_render.put_text_vertical(img_canvas, trans_text, len(region.textline_indices), region.x, region.y, region.w, region.h, textline_colors[idx], None) cv2.imwrite('result/rs.png', imgproc.cvt2HeatmapImg(rscore)) cv2.imwrite('result/as.png', imgproc.cvt2HeatmapImg(ascore)) cv2.imwrite('result/textline.png', overlay) cv2.imwrite('result/bbox.png', img_bbox) cv2.imwrite('result/bbox_unfiltered.png', img_bbox_all) cv2.imwrite( 'result/overlay.png', cv2.cvtColor( overlay_image( img_to_overlay, cv2.resize(overlay, (img_resized.shape[1], img_resized.shape[0]), interpolation=cv2.INTER_LINEAR)), cv2.COLOR_RGB2BGR)) cv2.imwrite('result/mask.png', final_mask) cv2.imwrite('result/masked.png', cv2.cvtColor(img_inpainted, cv2.COLOR_RGB2BGR)) cv2.imwrite('result/final.png', cv2.cvtColor(img_canvas, cv2.COLOR_RGB2BGR))
return new_state_dict # load net net = CRAFT() # initialize net = net.cuda() #net = torch.nn.DataParallel(net) net.load_state_dict(copyStateDict(torch.load('./weights/craft_mlt_25k.pth'))) net.eval() # load data image = imgproc.loadImage('./test_data/chi/0021_crop.jpg') # resize img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio( image, 384, interpolation=cv2.INTER_LINEAR, mag_ratio=1.5) ratio_h = ratio_w = 1 / target_ratio # preprocessing x = imgproc.normalizeMeanVariance(img_resized) x = torch.from_numpy(x).permute(2, 0, 1) # [h, w, c] to [c, h, w] x = Variable(x.unsqueeze(0)) # [c, h, w] to [b, c, h, w] onnx_input = x.data.numpy() x = x.cuda() # trace export torch.onnx.export(net, x, './craft_opset10.onnx', export_params=True, verbose=True,
def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly, refine_net=None): t0 = time.time() # resize img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio( image, args.canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=args.mag_ratio) ratio_h = ratio_w = 1 / target_ratio # preprocessing x = imgproc.normalizeMeanVariance(img_resized) x = torch.from_numpy(x).permute(2, 0, 1) # [h, w, c] to [c, h, w] x = Variable(x.unsqueeze(0)) # [c, h, w] to [b, c, h, w] if cuda: x = x.cuda() # forward pass with torch.no_grad(): y, feature = net(x) # make score and link map score_text = y[0, :, :, 0].cpu().data.numpy() score_link = y[0, :, :, 1].cpu().data.numpy() # refine link if refine_net is not None: with torch.no_grad(): y_refiner = refine_net(y, feature) score_link = y_refiner[0, :, :, 0].cpu().data.numpy() t0 = time.time() - t0 t1 = time.time() # Post-processing boxes, polys = craft_utils.getDetBoxes(score_text, score_link, text_threshold, link_threshold, low_text, poly) # coordinate adjustment boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h) '处理裂开的box,相邻的放在同一组' # 广度优先合并相邻的框 # 距离矩阵构建 all_rect_cx_cy = np.zeros((len(boxes), 2)) for i in range(len(boxes)): box = boxes[i] left = min(box[0][0], box[1][0], box[2][0], box[3][0]) right = max(box[0][0], box[1][0], box[2][0], box[3][0]) top = min(box[0][1], box[1][1], box[2][1], box[3][1]) bottom = max(box[0][1], box[1][1], box[2][1], box[3][1]) top = int(top) bottom = int(bottom) left = int(left) right = int(right) all_rect_cx_cy[i][0] = ((left + right) / 2) / 4 #减少x轴的影响 #还需调整 all_rect_cx_cy[i][1] = ((top + bottom) / 2) mat_distance = [] for i in range(len(all_rect_cx_cy)): mat_distance.append( np.sqrt(np.sum((all_rect_cx_cy - all_rect_cx_cy[i])**2, axis=-1))) print("generate distance mat;len:", len(mat_distance)) segment_group = [] ind_group = -1 search_queue = deque() cnt_processed = 0 processed = set() #广度优先 while cnt_processed < len(all_rect_cx_cy): # 只要搜索队列中有数据就一直遍历下去 if (len(search_queue) == 0): for i in range(len(all_rect_cx_cy)): if (i not in processed): search_queue.append(i) segment_group.append([]) ind_group += 1 break current_node = search_queue.popleft() # 从队列前边获取节点,即先进先出,这是BFS的核心 if current_node not in processed: # 当前节点是否被访问过 cnt_processed += 1 processed.add(current_node) inds = np.argsort(mat_distance[current_node]) segment_group[ind_group].append(boxes[current_node]) cnt_company = 0 distance_threshold = 20 #max(all_rect[current_node][2],all_rect[current_node][3]) # print(distance_threshold) for index in inds: # 遍历相邻节点,判断相邻节点是否已经在搜索队列 if mat_distance[current_node][index] > distance_threshold: break cnt_company += 1 if cnt_company > 200: print("error") exit() if index not in search_queue: # 如果相邻节点不在搜索队列则进行添加 search_queue.append(index) '合并在同一组的框' merge_boxes = [] for segment in segment_group: left_s = [] right_s = [] top_s = [] bottom_s = [] for box in segment: left = min(box[0][0], box[1][0], box[2][0], box[3][0]) right = max(box[0][0], box[1][0], box[2][0], box[3][0]) top = min(box[0][1], box[1][1], box[2][1], box[3][1]) bottom = max(box[0][1], box[1][1], box[2][1], box[3][1]) top = math.floor(top) bottom = math.floor(bottom) left = math.floor(left) right = math.floor(right) left_s.append(left) right_s.append(right) top_s.append(top) bottom_s.append(bottom) merge_boxes.append( [min(left_s), min(top_s), max(right_s), max(bottom_s)]) for rect in merge_boxes: threshold_hw = min(rect[3] - rect[1], rect[2] - rect[0]) * 0.2 crop = i_image[rect[1]:rect[3], rect[0]:rect[2]] ret, binary_img = cv2.threshold( crop, 175, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU) _, contours, _ = cv2.findContours(binary_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) group = [] for i in range(len(contours)): rect_char = cv2.boundingRect(contours[i]) group.append(rect_char) group.sort(key=lambda rect: rect[0]) last_x_start = group[0][0] last_x_end = group[0][0] + group[0][2] last = group[0] i = 1 '合并=/等符号' while i < len(group) and i >= 1: now = group[i] cx = now[0] + now[2] / 2 cy = now[1] + now[3] / 2 last_cy = last[1] + last[3] / 2 y_near = abs(last_cy - cy) < (last_x_end - last_x_start) * 0.6 if (last_x_start < cx and cx < last_x_end and y_near): group.pop(i) i -= 1 x1 = min(now[0], group[i][0]) y1 = min(now[1], group[i][1]) x2 = max(now[0] + now[2], group[i][0] + group[i][2]) y2 = max(now[1] + now[3], group[i][1] + group[i][3]) group[i] = (x1, y1, x2 - x1, y2 - y1) else: last_x_start = group[i][0] last_x_end = group[i][0] + group[i][2] last = group[i] i += 1 if (len(group) < 4 or len(group) > 16): continue '检测每个框及其结果' rect_set = [] res_set = [] def detect_rect(rect_char, binary_img): crop_char = binary_img[rect_char[1]:rect_char[1] + rect_char[3], rect_char[0]:rect_char[0] + rect_char[2]] crop_char = torch.tensor(crop_char, dtype=torch.int) crop_char = adapt_size(crop_char) crop_char = crop_char.float().cuda() res = classifer_box.eval( crop_char.unsqueeze(0)).squeeze().int().item() debug_write( crop_char[0].cpu().int().numpy().astype(np.uint8) * 255, config.CLASS_toString[res]) return res for i in range(len(group)): rect_char = group[i] if max(rect_char[2], rect_char[3]) < threshold_hw: continue res = detect_rect(rect_char, binary_img) res_set.append(res) rect_set.append(rect_char) res_str = '' for i in range(len(res_set)): res = res_set[i] res_str += config.CLASS_toString[res] # print('left',res) '等号右边颜色浅 针对右边进行二值化后重新检测' if (config.CLASS_is_eq(res)): rect_char = rect_set[i] crop = i_image[rect[1]:rect[3], rect[0]:rect[2]][:, rect_char[0] + rect_char[2]:] if (crop.shape[0] * crop.shape[1] < 4): break crop = convert_to_binary_inv(crop) debug_write(crop, '') _, contours_right, _ = cv2.findContours( crop, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) group_right = [] for i in range(len(contours_right)): rect_char_right = cv2.boundingRect(contours_right[i]) group_right.append(rect_char_right) group_right.sort(key=lambda rect: rect[0]) for rect_char in group_right: if (max(rect_char[2], rect_char[3]) < crop.shape[0] * 0.3): continue res_right = detect_rect(rect_char, crop) res_str += config.CLASS_toString[res_right] break eq = res_str.split('=') if (len(eq) == 2): global i_image_3_color res_str = res_str.replace("/", "d") print(res_str) if str_to_num(eq[0]) == str_to_num(eq[1]): cv2.rectangle(i_image_3_color, (rect[0], rect[1]), (rect[2], rect[3]), (46, 255, 87), 2) cv2.imwrite('./res/' + res_str + '.png', i_image[rect[1]:rect[3], rect[0]:rect[2]]) elif eq[1] == "": cv2.rectangle(i_image_3_color, (rect[0], rect[1]), (rect[2], rect[3]), (46, 87, 255), 2) cv2.imwrite('./res/' + res_str + '.png', i_image[rect[1]:rect[3], rect[0]:rect[2]]) else: cv2.rectangle(i_image_3_color, (rect[0], rect[1]), (rect[2], rect[3]), (255, 46, 87), 2) cv2.imwrite('./res/x_' + res_str + '.png', i_image[rect[1]:rect[3], rect[0]:rect[2]]) # print(str_to_num(eq[0]) # print(str_to_num(eq[1]) # cv2.imwrite('./res/'+res_str+'.png', binary_img) polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h) for k in range(len(polys)): if polys[k] is None: polys[k] = boxes[k] cv2.imshow('', i_image_3_color) cv2.waitKey() t1 = time.time() - t1 # render results (optional) render_img = score_text.copy() render_img = np.hstack((render_img, score_link)) ret_score_text = imgproc.cvt2HeatmapImg(render_img) if args.show_time: print("\ninfer/postproc time : {:.3f}/{:.3f}".format(t0, t1)) return boxes, polys, ret_score_text
fps_last_frame = None last_frame_resized = None while cap.isOpened(): ret, frame = cap.read() if not ret: break if counter % (args.skip_frame + 1) != 0: counter += 1 last_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) continue frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # RGB order frame_timestamp_ms = cap.get(cv2.CAP_PROP_POS_MSEC) frame_resized, target_ratio, _ = imgproc.resize_aspect_ratio( frame, args.size, cv2.INTER_AREA, mag_ratio=1) if last_frame_resized is not None and not args.verbose: ssim = metrics.structural_similarity( cv2.cvtColor(last_frame_resized, cv2.COLOR_RGB2GRAY), cv2.cvtColor(frame_resized, cv2.COLOR_RGB2GRAY)) if ssim > 0.9: counter += 1 last_frame_resized = frame_resized continue last_frame_resized = frame_resized frame_resized = cv2.bilateralFilter(frame_resized, 17, 80, 80) ratio_h = ratio_w = 1 / target_ratio frame_norm = imgproc.normalizeMeanVariance(frame_resized, (0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) #frame_norm = imgproc.normalizeMeanVariance(frame_resized) # if batch is None :
def process(self, craft, model, seq, key, sub_img): img_resized, target_ratio, size_heatmap = resize_aspect_ratio( sub_img, 2560, interpolation=cv2.INTER_LINEAR, mag_ratio=1.) ratio_h = ratio_w = 1 / target_ratio x = normalizeMeanVariance(img_resized) x = torch.from_numpy(x).permute(2, 0, 1) # [h, w, c] to [c, h, w] x = x.unsqueeze(0) # [c, h, w] to [b, c, h, w] x = x.to(self.device) y, feature = craft(x) score_text = y[0, :, :, 0].cpu().data.numpy() score_link = y[0, :, :, 1].cpu().data.numpy() boxes, polys = getDetBoxes(score_text, score_link, text_threshold=0.7, link_threshold=0.4, low_text=0.4, poly=False) boxes = adjustResultCoordinates(boxes, ratio_w, ratio_h) polys = adjustResultCoordinates(polys, ratio_w, ratio_h) for k in range(len(polys)): if polys[k] is None: polys[k] = boxes[k] result = [] for i, box in enumerate(polys): poly = np.array(box).astype(np.int32).reshape((-1)) result.append(poly) horizontal_list, free_list = group_text_box(result, slope_ths=0.8, ycenter_ths=0.5, height_ths=1, width_ths=1, add_margin=0.1) # horizontal_list = [i for i in horizontal_list if i[0] > 0 and i[1] > 0] min_size = 20 if min_size: horizontal_list = [ i for i in horizontal_list if max(i[1] - i[0], i[3] - i[2]) > 10 ] free_list = [ i for i in free_list if max(diff([c[0] for c in i]), diff([c[1] for c in i])) > min_size ] seq[:] = [None] * len(horizontal_list) for i, ele in enumerate(horizontal_list): ele = [0 if i < 0 else i for i in ele] img = sub_img[ele[2]:ele[3], ele[0]:ele[1], :] img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = Image.fromarray(img.astype(np.uint8)) img = process_input(img, self.config['dataset']['image_height'], self.config['dataset']['image_min_width'], self.config['dataset']['image_max_width']) img = img.to(self.config['device']) with torch.no_grad(): src = model.cnn(img) memory = model.transformer.forward_encoder(src) translated_sentence = [[1] * len(img)] max_length = 0 while max_length <= 128 and not all( np.any(np.asarray(translated_sentence).T == 2, axis=1)): tgt_inp = torch.LongTensor(translated_sentence).to( self.device) output = model.transformer.forward_decoder(tgt_inp, memory) output = output.to('cpu') values, indices = torch.topk(output, 5) indices = indices[:, -1, 0] indices = indices.tolist() translated_sentence.append(indices) max_length += 1 del output translated_sentence = np.asarray(translated_sentence).T s = translated_sentence[0].tolist() s = self.vocab.decode(s) seq[idx] = s
def get_bounding_box(self, image_file, verbose=False): """ Get the bounding boxes from image_file :param image_file :param verbose :return: """ image = cv2.imread(image_file) img_dim = image.shape img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio( image, self.canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=self.mag_ratio) ratio_h = ratio_w = 1 / target_ratio # preprocessing x = imgproc.normalizeMeanVariance(img_resized) x = torch.from_numpy(x).permute(2, 0, 1) # [h, w, c] to [c, h, w] x = Variable(x.unsqueeze(0)) # [c, h, w] to [b, c, h, w] if self.cuda: x = x.cuda() # forward pass with torch.no_grad(): y, feature = self.net(x) # make score and link map score_text = y[0, :, :, 0].cpu().data.numpy() score_link = y[0, :, :, 1].cpu().data.numpy() boxes, polys = craft_utils.getDetBoxes(score_text, score_link, self.text_threshold, self.link_threshold, self.low_text, self.poly) boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h) center_point = [] for i, _b in enumerate(boxes): b = np.array(_b, dtype=np.int16) xmin = np.min(b[:, 0]) ymin = np.min(b[:, 1]) xmax = np.max(b[:, 0]) ymax = np.max(b[:, 1]) x_m = xmin + (xmax - xmin) / 2 y_m = ymin + (ymax - ymin) / 2 center_point.append([x_m, y_m]) list_images = get_box_img(boxes, image) if verbose: for _b in boxes: b = np.array(_b, dtype=np.int16) xmin = np.min(b[:, 0]) ymin = np.min(b[:, 1]) xmax = np.max(b[:, 0]) ymax = np.max(b[:, 1]) r = image[ymin:ymax, xmin:xmax, :].copy() return boxes, list_images, center_point, img_dim
def detect_net(net, image, text_threshold, link_threshold, low_text, cuda, poly, refine_net, res_path): t0 = time.time() origin_image_1_channel = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) origin_image_3_color = np.array(image) # resize img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio( image, args.canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=args.mag_ratio) ratio_h = ratio_w = 1 / target_ratio # preprocessing x = imgproc.normalizeMeanVariance(img_resized) x = torch.from_numpy(x).permute(2, 0, 1) # [h, w, c] to [c, h, w] x = Variable(x.unsqueeze(0)) # [c, h, w] to [b, c, h, w] if cuda: x = x.cuda() # forward pass with torch.no_grad(): y, feature = net(x) # make score and link map score_text = y[0, :, :, 0].cpu().data.numpy() score_link = y[0, :, :, 1].cpu().data.numpy() cv2.imwrite("core_link.jpg", score_text * 255) cv2.imwrite("score_link.jpg", score_link * 255) # refine link if refine_net is not None: with torch.no_grad(): y_refiner = refine_net(y, feature) score_link = y_refiner[0, :, :, 0].cpu().data.numpy() t0 = time.time() - t0 t1 = time.time() # Post-processing # 获取CRAFT生成的框 boxes, polys = craft_utils.getDetBoxes(score_text, score_link, text_threshold, link_threshold, low_text, poly) # coordinate adjustment boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h) '处理裂开的box,相邻的放在同一组' # 广度优先合并相邻的框 # 距离矩阵构建 all_rect_cx_cy = np.zeros((len(boxes), 2)) for i in range(len(boxes)): box = boxes[i] left = min(box[0][0], box[1][0], box[2][0], box[3][0]) right = max(box[0][0], box[1][0], box[2][0], box[3][0]) top = min(box[0][1], box[1][1], box[2][1], box[3][1]) bottom = max(box[0][1], box[1][1], box[2][1], box[3][1]) top = int(top) bottom = int(bottom) left = int(left) right = int(right) all_rect_cx_cy[i][0] = ((left + right) / 2) / 4 # 减少x轴的影响 # 还需调整 all_rect_cx_cy[i][1] = ((top + bottom) / 2) mat_distance = [] for i in range(len(all_rect_cx_cy)): mat_distance.append( np.sqrt(np.sum((all_rect_cx_cy - all_rect_cx_cy[i])**2, axis=-1))) print("generate distance mat;len:", len(mat_distance)) segment_group = [] ind_group = -1 search_queue = deque() cnt_processed = 0 processed = set() # 广度优先 while cnt_processed < len(all_rect_cx_cy): # 只要搜索队列中有数据就一直遍历下去 if (len(search_queue) == 0): for i in range(len(all_rect_cx_cy)): if (i not in processed): search_queue.append(i) segment_group.append([]) ind_group += 1 break current_node = search_queue.popleft() # 从队列前边获取节点,即先进先出,这是BFS的核心 if current_node not in processed: # 当前节点是否被访问过 cnt_processed += 1 processed.add(current_node) inds = np.argsort(mat_distance[current_node]) segment_group[ind_group].append(boxes[current_node]) cnt_company = 0 distance_threshold = 20 # max(all_rect[current_node][2],all_rect[current_node][3]) # print(distance_threshold) for index in inds: # 遍历相邻节点,判断相邻节点是否已经在搜索队列 if mat_distance[current_node][index] > distance_threshold: break cnt_company += 1 if cnt_company > 200: print("error") exit() if index not in search_queue: # 如果相邻节点不在搜索队列则进行添加 search_queue.append(index) '合并在同一组的框' merge_boxes = [] for segment in segment_group: left_s = [] right_s = [] top_s = [] bottom_s = [] for box in segment: left = min(box[0][0], box[1][0], box[2][0], box[3][0]) right = max(box[0][0], box[1][0], box[2][0], box[3][0]) top = min(box[0][1], box[1][1], box[2][1], box[3][1]) bottom = max(box[0][1], box[1][1], box[2][1], box[3][1]) top = math.floor(top) bottom = math.floor(bottom) left = math.floor(left) right = math.floor(right) left_s.append(left) right_s.append(right) top_s.append(top) bottom_s.append(bottom) merge_boxes.append( [min(left_s), min(top_s), max(right_s), max(bottom_s)]) json_record = [] for rect in merge_boxes: threshold_hw = min(rect[3] - rect[1], rect[2] - rect[0]) * 0.2 crop = origin_image_1_channel[rect[1]:rect[3], rect[0]:rect[2]] # debug_write(crop,"exp"); # adaptiveThreshold binary_img = cv2.adaptiveThreshold(crop, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 31, 10) debug_write(binary_img, "all") # ret, binary_img = cv2.threshold(crop, 175, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU) # debug_write(binary_img,"dilate") # kernel = np.ones((1, 2), np.uint8) # binary_img_dilate = cv2.erode(binary_img, kernel, iterations=1) # debug_write(binary_img_dilate,"dilate") # print(binary_img.max(),binary_img.min()) _, contours, _ = cv2.findContours(binary_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) group = [] for i in range(len(contours)): rect_char = cv2.boundingRect(contours[i]) group.append(rect_char) group.sort(key=lambda rect: rect[0]) if (len(group) >= 1): last_x_start = group[0][0] last_x_end = group[0][0] + group[0][2] last = group[0] i = 1 '合并=/等符号' while i < len(group) and i >= 1: now = group[i] cx = now[0] + now[2] / 2 cy = now[1] + now[3] / 2 last_cy = last[1] + last[3] / 2 y_near = abs(last_cy - cy) < (last_x_end - last_x_start) * 0.6 if (last_x_start < cx and cx < last_x_end and y_near): group.pop(i) i -= 1 x1 = min(now[0], group[i][0]) y1 = min(now[1], group[i][1]) x2 = max(now[0] + now[2], group[i][0] + group[i][2]) y2 = max(now[1] + now[3], group[i][1] + group[i][3]) group[i] = (x1, y1, x2 - x1, y2 - y1) else: last_x_start = group[i][0] last_x_end = group[i][0] + group[i][2] last = group[i] i += 1 # if(len(group)<4 or len(group)>16): # continue '检测每个框及其结果' json_record_perline = [] rect_set = [] res_set = [] # def detect_rect(rect_char, binary_img): # # crop_char = binary_img[ # rect_char[1]: # rect_char[1] + rect_char[3], # rect_char[0]: # rect_char[0] + rect_char[2]] # # debug_crop_char = crop_char # if crop_char.shape[0]*6 < crop_char.shape[1]: # return '-' # if crop_char.shape[0] < 2 or crop_char.shape[1] < 2: # return '' # debug_write(crop_char, "detect_rect") # crnn_text_result = recognizer(crop_char) # # crop_char = torch.tensor(crop_char, dtype=torch.int) # # crop_char = adapt_size(crop_char) # crop_char = crop_char.float().to(device) # res = classifer_box.eval(crop_char.unsqueeze(0)).squeeze().int().item() # # print(config.CLASS[res], crnn_text_result) # # return config.CLASS_toString[res] def detect_rect(rect_char, binary_img, before_str): crop_char = binary_img[rect_char[1]:rect_char[1] + rect_char[3], rect_char[0]:rect_char[0] + rect_char[2]] # 减号 # print(crop_char.shape) # if crop_char.shape[0] * 3 < crop_char.shape[1] and crop_char.mean() > 128: # return '-' # if crop_char.shape[1] * 3 < crop_char.shape[0] and crop_char.mean() > 128: # return '1' # 区域过小 if crop_char.shape[0] < 2 and crop_char.shape[1] < 2: return '' # debug_write(crop_char, "detect_rect") # if crop_char.shape[1] < crop_char.shape[0] // 2: # fx = 4 # else: # fx = fy # crnn crnn_text_result = recognizer(crop_char) # debug_write(crop_char,crnn_text_result.replace('/','d')) # dense # crop_char = torch.tensor(crop_char, dtype=torch.int) # crop_char = adapt_size(crop_char) # crop_char = crop_char.float().to(device) # res = classifer_box.eval(crop_char.unsqueeze(0)).squeeze().int().item() # print(crnn_text_result,compress(crnn_text_result)) # print(crnn_text_result) return compress(crnn_text_result) res_str = '' for i in range(len(group)): rect_char = group[i] if max(rect_char[2], rect_char[3]) < threshold_hw: continue res = detect_rect(rect_char, binary_img, before_str=res_str) res_set.append(res) rect_set.append(rect_char) res_str += res print(res_str) # for i in range(len(res_set)): # res = res_set[i] # res_str += config.CLASS_toString[res] # # json_record_perline.append({'rect_char': rect_set[i], 'char': config.CLASS_toString[res]}) # # # print('left',res) # '等号右边颜色浅 针对右边进行二值化后重新检测' # if (config.CLASS_is_eq(res)): # rect_char = rect_set[i] # # crop = origin_image_1_channel[rect[1]:rect[3], rect[0]:rect[2]][:, rect_char[0] + rect_char[2]:] # # # 记录相对位置 # relative = (rect_char[0] + rect_char[2], 0, 0, 0) # # if (crop.shape[0] * crop.shape[1] < 4): # break # # 自适应算法 # # crop = convert_to_binary_inv(crop) # crop = cv2.adaptiveThreshold(crop, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, # cv2.THRESH_BINARY_INV, 31, 10) # # debug_write(crop,'') # # _, contours_right, _ = cv2.findContours(crop, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # # group_right = [] # for i in range(len(contours_right)): # rect_char_right = cv2.boundingRect(contours_right[i]) # group_right.append(rect_char_right) # group_right.sort(key=lambda rect: rect[0]) # for rect_char in group_right: # if (max(rect_char[2], rect_char[3]) < crop.shape[0] * 0.3): # continue # res_right = detect_rect(rect_char, crop) # res_str += config.CLASS_toString[res_right] # json_record_perline.append({'rect_char': ( # relative[0] + rect_char[0], # relative[1] + rect_char[1], # rect_char[2], # rect_char[3] # ), 'char': config.CLASS_toString[res_right]}) # # break eq = res_str.split('=') if (len(eq) >= 2): res_str = res_str.replace("/", "d") json_record.append({ 'rect_expression': (rect[0], rect[1], rect[2] - rect[0], rect[3] - rect[1]), 'expression': json_record_perline }) with open("resjson/" + res_str + ".json", 'w') as file_object: file_object.write( json.dumps({ 'rect_expression': (rect[0], rect[1], rect[2] - rect[0], rect[3] - rect[1]), 'expression': json_record_perline })) if str_to_num(eq[0]) == str_to_num(eq[-1]): # cv2.rectangle(origin_image_3_color, (rect[0], rect[1]), (rect[2] , rect[3]), (46,255,87), 2) cv2.line(origin_image_3_color, (rect[0], rect[3]), (rect[2], rect[3]), (46, 255, 87), 2) cv2.imwrite( './res/' + res_str + '.png', origin_image_1_channel[rect[1]:rect[3], rect[0]:rect[2]]) elif eq[-1] == "": cv2.rectangle(origin_image_3_color, (rect[0], rect[1]), (rect[2], rect[3]), (255, 46, 87), 2) cv2.imwrite( './res/O' + res_str + '.png', origin_image_1_channel[rect[1]:rect[3], rect[0]:rect[2]]) else: cv2.rectangle(origin_image_3_color, (rect[0], rect[1]), (rect[2], rect[3]), (46, 87, 255), 2) cv2.imwrite( './res/X' + res_str + '.png', origin_image_1_channel[rect[1]:rect[3], rect[0]:rect[2]]) print(res_path) cv2.imwrite(res_path, origin_image_3_color) polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h) for k in range(len(polys)): if polys[k] is None: polys[k] = boxes[k] # render results (optional) render_img = score_text.copy() render_img = np.hstack((render_img, score_link)) ret_score_text = imgproc.cvt2HeatmapImg(render_img) cv2.imwrite("xxxx.png", ret_score_text) # for line in json_record: # print(line) data2 = json.dumps(json_record) return data2
def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly): t0 = time.time() # resize img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio( image, args.canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=args.mag_ratio) ratio_h = ratio_w = 1 / target_ratio # preprocessing x = imgproc.normalizeMeanVariance(img_resized) x = torch.from_numpy(x).permute(2, 0, 1) # [h, w, c] to [c, h, w] x = Variable(x.unsqueeze(0)) # [c, h, w] to [b, c, h, w] if cuda: x = x.cuda() # forward pass y, _ = net(x) # # make score and link map # score_text = y[0,:,:,0].cpu().data.numpy() # score_link = y[0,:,:,1].cpu().data.numpy() gh_pred = y[0, :, :, :].permute((2, 0, 1)).cpu().data.numpy() t0 = time.time() - t0 t1 = time.time() boxes, polys = None, None # # Post-processing # boxes, polys = craft_utils.getDetBoxes(score_text, text_threshold, low_text, poly) postproc = [ craft_utils.getDetBoxes(score_text, text_threshold, low_text, poly) for score_text in gh_pred ] boxes_pred, polys_pred = zip(*postproc) # # coordinate adjustment # boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h) # polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h) for boxes, polys in zip(boxes_pred, polys_pred): for k in range(len(polys)): if polys[k] is None: polys[k] = boxes[k] t1 = time.time() - t1 # # render results (optional) # render_img = score_text.copy() # render_img = np.hstack((render_img, score_link)) # ret_score_text = imgproc.cvt2HeatmapImg(render_img) if args.show_time: print("\ninfer/postproc time : {:.3f}/{:.3f}".format(t0, t1)) return gh_pred, boxes_pred, polys_pred, size_heatmap return boxes, polys, ret_score_text
feature = interpreter.get_tensor(output_details[1]['index']) return y, feature if __name__ == '__main__': image_path = sys.argv[1] start_time = time.time() image = imgproc.loadImage(image_path) image = cv2.resize(image, dsize=(800, 1280), interpolation=cv2.INTER_LINEAR) img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio( image, canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=mag_ratio) ratio_h = ratio_w = 1 / target_ratio # preprocessing x = imgproc.normalizeMeanVariance(img_resized) x = torch.from_numpy(x).permute(2, 0, 1) # [h, w, c] to [c, h, w] x = Variable(x.unsqueeze(0)) # [c, h, w] to [b, c, h, w] # forward pass x = x.cpu().detach().numpy() y, feature = run_tflite_model(x) y = torch.from_numpy(y) feature = torch.from_numpy(feature)
def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly, image_path, refine_net=None): t0 = time.time() img_h, img_w, c = image.shape # resize img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio( image, args.canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=args.mag_ratio) ratio_h = ratio_w = 1 / target_ratio h, w, c = image.shape # preprocessing x = imgproc.normalizeMeanVariance(img_resized) x = torch.from_numpy(x).permute(2, 0, 1) # [h, w, c] to [c, h, w] x = Variable(x.unsqueeze(0)) # [c, h, w] to [b, c, h, w] if cuda: x = x.cuda() # forward pass y, feature = net(x) # make score and link map score_text = y[0, :, :, 0].cpu().data.numpy() #리전 스코어 Region score score_link = y[0, :, :, 1].cpu().data.numpy() #어피니티 스코어 # refine link if refine_net is not None: y_refiner = refine_net(y, feature) score_link = y_refiner[0, :, :, 0].cpu().data.numpy() t0 = time.time() - t0 t1 = time.time() # Post-processing boxes, polys = craft_utils.getDetBoxes(score_text, score_link, text_threshold, link_threshold, 0.4, poly) # CRAFT에서 박스를 그려주는 부분 # # coordinate adjustment #좌표설정 boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h) polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h) #print(scores) for k in range(len(polys)): if polys[k] is None: polys[k] = boxes[k] t1 = time.time() - t1 # render results (optional) render_img = score_text.copy() ret_score_text = imgproc.cvt2HeatmapImg(render_img) Plus_score_text = imgproc.cvMakeScores(render_img) ## filename, file_ext = os.path.splitext(os.path.basename(image_path)) if args.show_time: print("\ninfer/postproc time : {:.3f}/{:.3f}".format(t0, t1)) post_folder = './output/post' # 원본이미지를 이진화한 이미지 저장 resize_folder = './output/resize' # resize된 원본 이미지 저장 if not os.path.isdir(resize_folder + '/'): os.makedirs(resize_folder + '/') resize_file = resize_folder + "/resize_" + filename + '_mask.jpg' #오리지널 이미지 IMG_RGB2 = cv2.cvtColor(img_resized, cv2.COLOR_BGR2RGB) #craft에서 resize한 이미지를 RGB로 컨버트 # 합성 이미지를 만들기 위한 부분 pil_image = Image.fromarray((IMG_RGB2 * 255).astype(np.uint8)) images = np.array(pil_image) images = cv2.cvtColor(images, cv2.COLOR_BGR2GRAY) ret, thresh = cv2.threshold(images, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) #+ cv2.THRESH_OTSU # 이미지 합성을 위해 이진화 text_score = cv2.resize(Plus_score_text, None, fx=2, fy=2, interpolation=cv2.INTER_LINEAR) # 다시 원본 사이즈로 조절 thresh = cv2.resize(thresh, (img_w, img_h)) # 원본 이진화 이미지 text_score = cv2.resize(text_score, (img_w, img_h)) # Region 스코어 이진화 이미지 text_score = Image.fromarray((text_score).astype(np.uint8)) text_score = np.array(text_score) if not os.path.isdir('./output/og_bri' + '/'): # 원본 이진화 이미지 저장 폴더 os.makedirs('./output/og_bri' + '/') if not os.path.isdir('./output/score/'): # 스코어 이진화 이미지 저장 폴더 os.makedirs('./output/score/') cv2.imwrite('./output/og_bri' + "/og_" + filename + '.jpg', thresh) # 원본 이진화 이미지 저장 cv2.imwrite('./output/score' + "/score_" + filename + '.jpg', text_score) # 스코어 이진화 이미지 저장 img_h = thresh.shape[0] img_w = thresh.shape[1] IMG_RGB2 = cv2.resize(IMG_RGB2, (img_w, img_h)) # 다시 원본 사이즈로 resize cv2.imwrite(resize_file, IMG_RGB2) return boxes, polys, ret_score_text