Ejemplo n.º 1
0
def get_bbox(image_batch, ratio_w, ratio_h):
    images_torch = torch.from_numpy(image_batch).unsqueeze(0).cuda().permute(
        0, 3, 1, 2)
    with torch.no_grad(), torch.jit.optimized_execution(True):
        pred = model(images_torch)
        rs_tensor = pred[:, 0, :, :].cpu().numpy()
        as_tensor = pred[:, 1, :, :].cpu().numpy()
    render_img = rs_tensor[0].copy()
    render_img = np.hstack((render_img, as_tensor[0]))
    ret_score_text = imgproc.cvt2HeatmapImg(render_img)
    if args.verbose:
        cv2.imshow('score', ret_score_text)
    ret = []
    for (rs_img, as_img) in zip(rs_tensor, as_tensor):
        # Post-processing
        boxes, polys = craft_utils.getDetBoxes(rs_img, as_img, text_threshold,
                                               link_threshold, low_text,
                                               use_poly)

        # coordinate adjustment
        boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
        polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)
        for k in range(len(polys)):
            if polys[k] is None: polys[k] = boxes[k]
        frame_bboxes = []
        polys = merge_bboxes(polys)
        for [tl, tr, br, bl] in polys:
            frame_bboxes.append({
                'x': int(tl[0]),
                'y': int(tl[1]),
                'width': int(tr[0] - tl[0]),
                'height': int(br[1] - tr[1])
            })
        ret.append({'bboxes': frame_bboxes})
    return ret[0]['bboxes']
Ejemplo n.º 2
0
def get_boxes(img_c):
    # Resize
    img_r, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
        img_c,
        square_size=square_size,
        interpolation=cv2.INTER_LINEAR,
        mag_ratio=mag_ratio)
    # Save ratio index for height
    ratio_h = ratio_w = 1 / target_ratio
    # preprocessing of the image
    x = imgproc.normalizeMeanVariance(img_r)
    x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
    x = x.unsqueeze(0)  # [c, h, w] to [b, c, h, w]
    # forward pass
    y, _ = net(x)
    # make score and link map
    score_text = y[0, :, :, 0].cpu().data.numpy()
    score_link = y[0, :, :, 1].cpu().data.numpy()
    # Post-processing
    boxes, _ = craft_utils.getDetBoxes(score_text,
                                       score_link,
                                       text_threshold=text_threshold,
                                       link_threshold=link_threshold,
                                       low_text=low_text,
                                       poly=False)
    # Coordinate adjustment
    boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
    return boxes
Ejemplo n.º 3
0
def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly):
    t0 = time.time()
    # resize
    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
        image,
        args.canvas_size,
        interpolation=cv2.INTER_AREA,
        mag_ratio=args.mag_ratio)
    ratio_h = ratio_w = 1 / target_ratio
    # preprocessing
    x = imgproc.normalizeMeanVariance(img_resized)
    x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
    x = Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]
    if cuda:
        x = x.cuda()
    # forward pass
    with torch.no_grad():
        y, feature = net(x)
    # make score and link map
    score_text = y[0, :, :, 0].cpu().data.numpy()
    score_link = y[0, :, :, 1].cpu().data.numpy()
    # Post-processing
    boxes, polys = craft_utils.getDetBoxes(score_text, score_link,
                                           text_threshold, link_threshold,
                                           low_text, poly)
    # coordinate adjustment
    boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
    polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)
    for k in range(len(polys)):
        if polys[k] is None: polys[k] = boxes[k]
    # render results (optional)
    render_img = score_text.copy()
    render_img = np.hstack((render_img, score_link))
    ret_score_text = imgproc.cvt2HeatmapImg(render_img)
    return boxes, polys, ret_score_text
Ejemplo n.º 4
0
def test_net(canvas_size, mag_ratio, net, image, text_threshold,
             link_threshold, low_text, poly, device):
    # resize
    img_resized, target_ratio, size_heatmap = resize_aspect_ratio(image, canvas_size,\
                                                                          interpolation=cv2.INTER_LINEAR, mag_ratio=mag_ratio)
    ratio_h = ratio_w = 1 / target_ratio

    # preprocessing
    x = normalizeMeanVariance(img_resized)
    x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
    x = Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]
    x = x.to(device)

    # forward pass
    with torch.no_grad():
        y, feature = net(x)

    # make score and link map
    score_text = y[0, :, :, 0].cpu().data.numpy()
    score_link = y[0, :, :, 1].cpu().data.numpy()

    # Post-processing
    boxes, polys = getDetBoxes(score_text, score_link, text_threshold,
                               link_threshold, low_text, poly)

    # coordinate adjustment
    boxes = adjustResultCoordinates(boxes, ratio_w, ratio_h)
    polys = adjustResultCoordinates(polys, ratio_w, ratio_h)
    for k in range(len(polys)):
        if polys[k] is None: polys[k] = boxes[k]

    return boxes, polys
Ejemplo n.º 5
0
    def saveInput(self, imagename, image, region_scores, affinity_scores,
                  confidence_mask):

        boxes, polys = craft_utils.getDetBoxes(region_scores / 255,
                                               affinity_scores / 255, 0.7, 0.4,
                                               0.4, False)
        boxes = np.array(boxes, np.int32) * 2
        if len(boxes) > 0:
            np.clip(boxes[:, :, 0], 0, image.shape[1])
            np.clip(boxes[:, :, 1], 0, image.shape[0])
            for box in boxes:
                cv2.polylines(image, [np.reshape(box, (-1, 1, 2))], True,
                              (0, 0, 255))
        target_gaussian_heatmap_color = imgproc.cvt2HeatmapImg(region_scores /
                                                               255)
        target_gaussian_affinity_heatmap_color = imgproc.cvt2HeatmapImg(
            affinity_scores / 255)
        confidence_mask_gray = imgproc.cvt2HeatmapImg(confidence_mask)
        gt_scores = np.hstack([
            target_gaussian_heatmap_color,
            target_gaussian_affinity_heatmap_color
        ])
        confidence_mask_gray = np.hstack(
            [np.zeros_like(confidence_mask_gray), confidence_mask_gray])
        output = np.concatenate([gt_scores, confidence_mask_gray], axis=0)
        output = np.hstack([image, output])
        outpath = os.path.join(
            os.path.join(os.path.dirname(__file__) + '/output'),
            "%s_input.jpg" % imagename)
        print(outpath)
        if not os.path.exists(os.path.dirname(outpath)):
            os.mkdir(os.path.dirname(outpath))
        cv2.imwrite(outpath, output)
Ejemplo n.º 6
0
def test_net(net, image, text_threshold, link_threshold, low_text, cuda,
             image_path):
    t0 = time.time()

    # resize
    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
        image,
        args.canvas_size,
        interpolation=cv2.INTER_LINEAR,
        mag_ratio=args.mag_ratio)
    ratio_h = ratio_w = 1 / target_ratio

    # preprocessing
    x = imgproc.normalizeMeanVariance(img_resized)
    x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
    x = Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]
    if cuda:
        x = x.cuda()

    # forward pass
    y, _ = net(x)

    # make score and link map
    score_text = y[0, :, :, 0].cpu().data.numpy()
    score_link = y[0, :, :, 1].cpu().data.numpy()

    t0 = time.time() - t0
    t1 = time.time()

    if args.debug:
        np.save(
            os.path.join(
                './debug',
                os.path.basename(image_path).split('.')[0] +
                '_score_text.npy'), score_text)
        np.save(
            os.path.join(
                './debug',
                os.path.basename(image_path).split('.')[0] +
                '_score_link.npy'), score_link)

    # Post-processing
    boxes = craft_utils.getDetBoxes(score_text, score_link, text_threshold,
                                    link_threshold, low_text)
    boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)

    t1 = time.time() - t1

    # render results (optional)
    render_img = score_text.copy()
    render_img = np.hstack((render_img, score_link))
    ret_score_text = imgproc.cvt2HeatmapImg(render_img)

    if args.show_time:
        print("\ninfer/postproc time : {:.3f}/{:.3f}".format(t0, t1))

    return boxes, ret_score_text
Ejemplo n.º 7
0
def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly, refine_net=None):
    t0 = time.time()

    # resize
    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(image, args.canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=args.mag_ratio)
    ratio_h = ratio_w = 1 / target_ratio

    # preprocessing
    x = imgproc.normalizeMeanVariance(img_resized)
    x = torch.from_numpy(x).permute(2, 0, 1)    # [h, w, c] to [c, h, w]
    x = Variable(x.unsqueeze(0))                # [c, h, w] to [b, c, h, w]
    if cuda:
        x = x.cuda()

    print("x.chape", x.shape)

    # forward pass
    y, feature = net(x)

    # make score and link map
    score_text = y[0,:,:,0].cpu().data.numpy()
    score_link = y[0,:,:,1].cpu().data.numpy()

    # refine link
    if refine_net is not None:
        y_refiner = refine_net(y, feature)
        score_link = y_refiner[0,:,:,0].cpu().data.numpy()

    t0 = time.time() - t0
    t1 = time.time()

    # Post-processing
    boxes, polys, _ = craft_utils.getDetBoxes(
        img_resized, score_text, score_link, text_threshold,
        link_threshold, low_text, poly,
    )

    # coordinate adjustment
    boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
    polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)
    for k in range(len(polys)):
        if polys[k] is None: polys[k] = boxes[k]

    t1 = time.time() - t1

    # render results (optional)
    render_img = score_text.copy()
    render_img = np.hstack((render_img, score_link))
    ret_score_text = imgproc.cvt2HeatmapImg(render_img)

    if args.show_time : print("\ninfer/postproc time : {:.3f}/{:.3f}".format(t0, t1))

    return boxes, polys, ret_score_text
Ejemplo n.º 8
0
    def test_net(self, image_opencv):

        # resize
        img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
            image_opencv,
            self.canvas_size,
            interpolation=self.interpolation,
            mag_ratio=self.mag_ratio)
        ratio_h = ratio_w = 1 / target_ratio

        # preprocessing
        x = imgproc.normalizeMeanVariance(img_resized)
        x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
        x = Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]

        if self.cuda:
            x = x.cuda()

        # forward pass
        y, feature = self.net(x)

        # make score and link map
        score_text = y[0, :, :, 0].cpu().data.numpy()
        score_link = y[0, :, :, 1].cpu().data.numpy()

        # refine link
        t0 = time.time()
        if self.refine_net is not None:
            y_refiner = self.refine_net(y, feature)
            score_link = y_refiner[0, :, :, 0].cpu().data.numpy()
        t0 = time.time() - t0
        t1 = time.time()

        # Post-processing
        boxes, polys = craft_utils.getDetBoxes(score_text, score_link,
                                               self.text_threshold,
                                               self.link_threshold,
                                               self.low_text, self.poly)
        #print(boxes)

        # coordinate adjustment
        boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
        polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)
        for k in range(len(polys)):
            if polys[k] is None: polys[k] = boxes[k]
        t1 = time.time() - t1

        if self.show_time:
            print("\ninfer/postproc time : {:.3f}/{:.3f}".format(t0, t1))
        return boxes, polys
Ejemplo n.º 9
0
def get_prediction(net,
                   image,
                   text_threshold,
                   link_threshold,
                   low_text,
                   cuda,
                   poly,
                   refine_net=None):

    # resize
    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
        image, 1280, interpolation=cv2.INTER_LINEAR, mag_ratio=1.5)
    ratio_h = ratio_w = 1 / target_ratio

    # preprocessing
    x = imgproc.normalizeMeanVariance(img_resized)
    x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
    x = Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]
    if cuda:
        x = x.cuda()

    # forward pass
    with torch.no_grad():
        y, feature = net(x)

    # make score and link map
    score_text = y[0, :, :, 0].cpu().data.numpy()
    score_link = y[0, :, :, 1].cpu().data.numpy()

    # refine link
    if refine_net is not None:
        with torch.no_grad():
            y_refiner = refine_net(y, feature)
        score_link = y_refiner[0, :, :, 0].cpu().data.numpy()

    # Post-processing
    boxes, polys = craft_utils.getDetBoxes(score_text, score_link,
                                           text_threshold, link_threshold,
                                           low_text, poly)

    # coordinate adjustment
    boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
    polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)
    for k in range(len(polys)):
        if polys[k] is None: polys[k] = boxes[k]

    return boxes, polys
Ejemplo n.º 10
0
def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly,filename,result_folder=result_folder):
    t0 = time.time()
    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(image, canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=mag_ratio)
    ratio_h = ratio_w = 1 / target_ratio

    # preprocessing
    x = imgproc.normalizeMeanVariance(img_resized)
    #cv2.imwrite("test.jpg",x)
    print("###")
    x = tf.expand_dims(x,0)
    print(x.shape)

    # forward pass
    y, _ = net(x)

    # make score and link map
    score_text = y[0,:,:,0].numpy()
    score_link = y[0,:,:,1].numpy()

    t0 = time.time() - t0
    t1 = time.time()

    # Post-processing
    boxes, polys = craft_utils.getDetBoxes(score_text, score_link, text_threshold, link_threshold, low_text, poly)

    # coordinate adjustment
    boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
    polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)
    for k in range(len(polys)):
        if polys[k] is None: polys[k] = boxes[k]

    t1 = time.time() - t1

    # render results (optional)
    render_img = score_text.copy()
    render_img = np.hstack((render_img, score_link))
    ret_score_text = imgproc.cvt2HeatmapImg(render_img)
    #print("score")
    #print(ret_score_text.shape)
    cv2.imwrite(result_folder + filename + "_mask.jpg",ret_score_text)


    #if show_time : print("\ninfer/postproc time : {:.3f}/{:.3f}".format(t0, t1))

    return boxes, polys, ret_score_text
Ejemplo n.º 11
0
def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly):
    t0 = time.time()

    # リサイズ
    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(image, args.canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=args.mag_ratio)
    ratio_h = ratio_w = 1 / target_ratio

    # 前処理
    x = imgproc.normalizeMeanVariance(img_resized)
    x = torch.from_numpy(x).permute(2, 0, 1)    # [h, w, c] to [c, h, w]
    x = Variable(x.unsqueeze(0))                # [c, h, w] to [b, c, h, w]
    if cuda:
        x = x.cuda()

    # 順伝播
    y, _ = net(x)

    # スコア・リンクマップの作成
    score_text = y[0,:,:,0].cpu().data.numpy()
    score_link = y[0,:,:,1].cpu().data.numpy()

    t0 = time.time() - t0
    t1 = time.time()

    # 後処理
    boxes, polys = craft_utils.getDetBoxes(score_text, score_link, text_threshold, link_threshold, low_text, poly)

    # 座標調整
    boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
    polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)
    for k in range(len(polys)):
        if polys[k] is None: polys[k] = boxes[k]

    t1 = time.time() - t1

    # レンダリング結果(オプション)
    render_img = score_text.copy()
    render_img = np.hstack((render_img, score_link))
    ret_score_text = imgproc.cvt2HeatmapImg(render_img)

    if args.show_time : print("\ninfer/postproc time : {:.3f}/{:.3f}".format(t0, t1))

    return boxes, polys, ret_score_text
Ejemplo n.º 12
0
    def detect(self, image):
        # resize
        img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(image, self.canvas_size,
                                                                              interpolation=cv2.INTER_LINEAR,
                                                                              mag_ratio=self.mag_ratio)
        ratio_h = ratio_w = 1 / target_ratio

        # preprocessing
        x = imgproc.normalizeMeanVariance(img_resized)
        x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
        x = Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]

        if self.cuda:
            x = x.cuda()

        # forward pass
        with torch.no_grad():
            y, feature = self.net(x)

        # make score and link map
        score_text = y[0, :, :, 0].cpu().data.numpy()
        score_link = y[0, :, :, 1].cpu().data.numpy()

        # refine link
        if self.refine_net is not None:
            with torch.no_grad():
                y_refiner = self.refine_net(y, feature)
            score_link = y_refiner[0, :, :, 0].cpu().data.numpy()


        # Post-processing
        boxes, _ = craft_utils.getDetBoxes(score_text, score_link, self.text_threshold, self.link_threshold,
                                               self.low_text, self.poly)
        # coordinate adjustment
        boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
        toRet = []
        for box in boxes:
            toRet.append(box2xyxy(box, image.shape[0: 2]))

        return toRet
Ejemplo n.º 13
0
    def test_net(self,
                 net,
                 image,
                 text_threshold,
                 link_threshold,
                 low_text,
                 poly,
                 refine_net=None):
        img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
            image, 1280, interpolation=cv.INTER_LINEAR, mag_ratio=1.5)
        ratio_h = ratio_w = 1 / target_ratio
        x = imgproc.normalizeMeanVariance(img_resized)
        x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
        x = Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]

        with torch.no_grad():
            y, feature = net(x)

        # make score and link map
        score_text = y[0, :, :, 0].cpu().data.numpy()
        score_link = y[0, :, :, 1].cpu().data.numpy()

        # Post-processing
        boxes, polys = craft_utils.getDetBoxes(score_text, score_link,
                                               text_threshold, link_threshold,
                                               low_text, poly)

        # coordinate adjustment
        boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
        polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)
        for k in range(len(polys)):
            if polys[k] is None: polys[k] = boxes[k]

        # render results (optional)
        render_img = score_text.copy()
        render_img = np.hstack((render_img, score_link))
        ret_score_text = imgproc.cvt2HeatmapImg(render_img)

        return boxes, polys, ret_score_text
Ejemplo n.º 14
0
    x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
    x = Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]
    # forward pass

    x = x.cpu().detach().numpy()
    y, feature = run_tflite_model(x)

    y = torch.from_numpy(y)
    feature = torch.from_numpy(feature)
    # make score and link map
    score_text = y[0, :, :, 0].cpu().data.numpy()
    score_link = y[0, :, :, 1].cpu().data.numpy()

    # Post-processing
    boxes, polys = craft_utils.getDetBoxes(score_text, score_link,
                                           text_threshold, link_threshold,
                                           low_text, poly)

    # coordinate adjustment
    boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
    polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)
    for k in range(len(polys)):
        if polys[k] is None: polys[k] = boxes[k]

    # render results (optional)
    render_img = score_text.copy()
    render_img = np.hstack((render_img, score_link))
    ret_score_text = imgproc.cvt2HeatmapImg(render_img)

    file_utils.saveResult(image_path,
                          image[:, :, ::-1],
Ejemplo n.º 15
0
        image = np.array(plt.imread(image_path))  # 225*517*3
        region = np.load(region_path)
        affinity = np.load(affinity_path)

        # resize
        # img_resized=352*800*3, target_ratio=1.5
        # size_heatmap=400*176, ratio_h=w=0.66666667
        #        img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(image, canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=mag_ratio)
        #        ratio_h = ratio_w = 1 / target_ratio
        #        plt.imshow(img_resized.astype(np.int))
        #        region = cv2.resize(region,(img_resized.shape[1]//2,img_resized.shape[0]//2))
        #        affinity = cv2.resize(affinity,(img_resized.shape[1]//2,img_resized.shape[0]//2))

        # Post-processing
        boxes, polys = craft_utils.getDetBoxes(region, affinity,
                                               text_threshold, link_threshold,
                                               low_text, poly)

        # coordinate adjustment
        #        boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
        #        polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)
        #        for k in range(len(polys)):
        #            if polys[k] is None:
        #                polys[k] = boxes[k]

        # render results (optional)
        render_img = region.copy()
        render_img = np.hstack((render_img, affinity))
        ret_score_text = imgproc.cvt2HeatmapImg(render_img)
        for i, box in enumerate(boxes):
            _, (kernel_w, kernel_h), _ = cv2.minAreaRect(
Ejemplo n.º 16
0
def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly):
    t0 = time.time()

    # resize
    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
        image,
        args.canvas_size,
        interpolation=cv2.INTER_LINEAR,
        mag_ratio=args.mag_ratio)
    ratio_h = ratio_w = 1 / target_ratio

    # preprocessing
    x = imgproc.normalizeMeanVariance(img_resized)
    x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
    x = Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]
    if cuda:
        x = x.cuda()

    # forward pass
    y, _ = net(x)

    # # make score and link map
    # score_text = y[0,:,:,0].cpu().data.numpy()
    # score_link = y[0,:,:,1].cpu().data.numpy()

    gh_pred = y[0, :, :, :].permute((2, 0, 1)).cpu().data.numpy()

    t0 = time.time() - t0
    t1 = time.time()

    boxes, polys = None, None

    # # Post-processing
    # boxes, polys = craft_utils.getDetBoxes(score_text, text_threshold, low_text, poly)
    postproc = [
        craft_utils.getDetBoxes(score_text, text_threshold, low_text, poly)
        for score_text in gh_pred
    ]
    boxes_pred, polys_pred = zip(*postproc)

    # # coordinate adjustment
    # boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
    # polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)

    for boxes, polys in zip(boxes_pred, polys_pred):
        for k in range(len(polys)):
            if polys[k] is None: polys[k] = boxes[k]

    t1 = time.time() - t1

    # # render results (optional)
    # render_img = score_text.copy()
    # render_img = np.hstack((render_img, score_link))
    # ret_score_text = imgproc.cvt2HeatmapImg(render_img)

    if args.show_time:
        print("\ninfer/postproc time : {:.3f}/{:.3f}".format(t0, t1))

    return gh_pred, boxes_pred, polys_pred, size_heatmap

    return boxes, polys, ret_score_text
Ejemplo n.º 17
0
def test_net(net,
             image,
             text_threshold,
             link_threshold,
             low_text,
             cuda,
             poly,
             refine_net=None,
             overlap=0.0):
    t0 = time.time()

    # resize
    # img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(image, args.canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=args.mag_ratio)
    # ratio_h = ratio_w = 1 / target_ratio
    img_resized = image
    ratio_h = ratio_w = 1

    # preprocessing
    x = imgproc.normalizeMeanVariance(img_resized)

    # x = torch.from_numpy(x).permute(2, 0, 1)    # [h, w, c] to [c, h, w]
    # x = Variable(x.unsqueeze(0))                # [c, h, w] to [b, c, h, w]
    #
    # if cuda:
    #     x = x.cuda()
    #
    # # forward pass
    # with torch.no_grad():
    #     y, feature = net(x)
    #
    # # make score and link map
    # score_text = y[0,:,:,0].cpu().data.numpy()
    #
    # if refine_net is None:
    #     score_link = y[0,:,:,1].cpu().data.numpy()
    # else:
    #     # refine link
    #     with torch.no_grad():
    #         y_refiner = refine_net(y, feature)
    #
    #     score_link = y_refiner[0,:,:,0].cpu().data.numpy()

    split_coord = []
    if overlap > 0.0 and overlap < 1.0:
        x, split_coord = splitOverlap(x, overlap)

    x = torch.from_numpy(x).permute(0, 3, 1, 2)  # [h, w, c] to [c, h, w]
    # x = torch.from_numpy(x).permute(2, 0, 1)    # [h, w, c] to [c, h, w]
    x = Variable(x)  # [c, h, w] to [b, c, h, w]

    if cuda:
        x = x.cuda()

    # forward pass
    with torch.no_grad():
        y, feature = net(x)

    # make score and link map
    score_text = joinOverlap(y[:, :, :, 0].cpu().data.numpy(), split_coord)

    if refine_net is None:
        score_link = joinOverlap(y[:, :, :, 1].cpu().data.numpy(), split_coord)
    else:
        # refine link
        with torch.no_grad():
            y_refiner = refine_net(y, feature)

        score_link = joinOverlap(y_refiner[:, :, :, 0].cpu().data.numpy(),
                                 split_coord)

    t0 = time.time() - t0
    t1 = time.time()

    # Post-processing
    boxes, polys = craft_utils.getDetBoxes(score_text, score_link,
                                           text_threshold, link_threshold,
                                           low_text, poly)

    # coordinate adjustment
    boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
    polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)
    for k in range(len(polys)):
        if polys[k] is None: polys[k] = boxes[k]

    t1 = time.time() - t1

    # render results (optional)
    render_img = score_text.copy()
    render_img = np.hstack((render_img, score_link))
    ret_score_text = imgproc.cvt2HeatmapImg(render_img)

    if args.show_time:
        print("\ninfer/postproc time : {:.3f}/{:.3f}".format(t0, t1))

    return boxes, polys, ret_score_text
Ejemplo n.º 18
0
def main():
    import os
    os.makedirs('result', exist_ok=True)
    text_render.prepare_renderer()

    with open('alphabet-all-v5.txt', 'r') as fp:
        dictionary = [s[:-1] for s in fp.readlines()]
    model_ocr = OCR(dictionary, 768)
    model_ocr.load_state_dict(torch.load('ocr.ckpt', map_location='cpu'),
                              strict=False)
    model_ocr.eval()

    model = CRAFT_net()
    sd = torch.load('detect.ckpt', map_location='cpu')
    model.load_state_dict(sd['model'])
    model = model.cpu()
    model.eval()
    img = cv2.imread(args.image)
    img_bbox = np.copy(img)
    img_bbox_all = np.copy(img)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img_resized, target_ratio, _, pad_w, pad_h = imgproc.resize_aspect_ratio(
        img, args.size, cv2.INTER_LINEAR, mag_ratio=1)
    img_to_overlay = np.copy(img_resized)
    ratio_h = ratio_w = 1 / target_ratio
    img_resized = imgproc.normalizeMeanVariance(img_resized)
    print(img_resized.shape)
    rscore, ascore, mask = test(model, img_resized)
    overlay = imgproc.cvt2HeatmapImg(rscore + ascore)
    boxes, polys = craft_utils.getDetBoxes(rscore, ascore, args.text_threshold,
                                           args.link_threshold, args.low_text,
                                           False)
    boxes = craft_utils.adjustResultCoordinates(boxes,
                                                ratio_w,
                                                ratio_h,
                                                ratio_net=2)
    polys = craft_utils.adjustResultCoordinates(polys,
                                                ratio_w,
                                                ratio_h,
                                                ratio_net=2)
    for k in range(len(polys)):
        if polys[k] is None: polys[k] = boxes[k]
    # merge textlines
    polys = merge_bboxes(polys, can_merge_textline)
    for [tl, tr, br, bl] in polys:
        x = int(tl[0])
        y = int(tl[1])
        width = int(tr[0] - tl[0])
        height = int(br[1] - tr[1])
        cv2.rectangle(img_bbox_all, (x, y), (x + width, y + height),
                      color=(255, 0, 0),
                      thickness=2)
    # run OCR for each textline
    textlines = run_ocr(img_bbox, polys, dictionary, model_ocr, 32)
    # merge textline to text region, filter textlines without characters
    text_regions: List[BBox] = []
    new_textlines = []
    for (poly_regions, textline_indices,
         majority_dir) in merge_bboxes_text_region(textlines):
        [tl, tr, br, bl] = poly_regions
        x = int(tl[0]) - 5
        y = int(tl[1]) - 5
        width = int(tr[0] - tl[0]) + 10
        height = int(br[1] - tr[1]) + 10
        text = ''
        logprob_lengths = []
        for textline_idx in textline_indices:
            if not text:
                text = textlines[textline_idx].text
            else:
                last_ch = text[-1]
                cur_ch = textlines[textline_idx].text[0]
                if ord(last_ch) > 255 and ord(cur_ch) > 255:
                    text += textlines[textline_idx].text
                else:
                    text += ' ' + textlines[textline_idx].text
            logprob_lengths.append((np.log(textlines[textline_idx].prob),
                                    len(textlines[textline_idx].text)))
        vc = count_valuable_text(text)
        total_logprobs = 0.0
        for (logprob, length) in logprob_lengths:
            total_logprobs += logprob * length
        total_logprobs /= sum([x[1] for x in logprob_lengths])
        # filter text region without characters
        if vc > 1:
            region = BBox(x, y, width, height, text, np.exp(total_logprobs))
            region.textline_indices = []
            region.majority_dir = majority_dir
            text_regions.append(region)
            for textline_idx in textline_indices:
                region.textline_indices.append(len(new_textlines))
                new_textlines.append(textlines[textline_idx])
    textlines = new_textlines
    # create mask
    from text_mask_utils import filter_masks, main_process
    mask_resized = cv2.resize(mask, (mask.shape[1] * 2, mask.shape[0] * 2),
                              interpolation=cv2.INTER_LINEAR)
    if pad_h > 0:
        mask_resized = mask_resized[:-pad_h, :]
    elif pad_w > 0:
        mask_resized = mask_resized[:, :-pad_w]
    mask_resized = cv2.resize(mask_resized,
                              (img.shape[1] // 2, img.shape[0] // 2),
                              interpolation=cv2.INTER_LINEAR)
    img_resized_2 = cv2.resize(img, (img.shape[1] // 2, img.shape[0] // 2),
                               interpolation=cv2.INTER_LINEAR)
    mask_resized[mask_resized > 250] = 255
    text_lines = [(a.x // 2, a.y // 2, a.w // 2, a.h // 2) for a in textlines]
    mask_ccs, cc2textline_assignment = filter_masks(mask_resized, text_lines)
    cv2.imwrite('result/mask_filtered.png', reduce(cv2.bitwise_or, mask_ccs))
    final_mask, textline_colors = main_process(img_resized_2, mask_ccs,
                                               text_lines,
                                               cc2textline_assignment)
    final_mask = cv2.resize(final_mask, (img.shape[1], img.shape[0]),
                            interpolation=cv2.INTER_LINEAR)
    # run inpainting
    img_inpainted = run_inpainting(img, final_mask)
    # translate text region texts
    texts = '\n'.join([r.text for r in text_regions])
    trans_ret = baidu_translator.translate('ja', 'zh-CN', texts)
    translated_sentences = []
    batch = len(text_regions)
    if len(trans_ret) < batch:
        translated_sentences.extend(trans_ret)
        translated_sentences.extend([''] * (batch - len(trans_ret)))
    elif len(trans_ret) > batch:
        translated_sentences.extend(trans_ret[:batch])
    else:
        translated_sentences.extend(trans_ret)
    # render translated texts
    img_canvas = np.copy(img_inpainted)
    for trans_text, region in zip(translated_sentences, text_regions):
        print(region.text)
        print(trans_text)
        print(region.majority_dir, region.x, region.y, region.w, region.h)
        img_bbox = cv2.rectangle(img_bbox, (region.x, region.y),
                                 (region.x + region.w, region.y + region.h),
                                 color=(0, 0, 255),
                                 thickness=2)
        for idx in region.textline_indices:
            txtln = textlines[idx]
            img_bbox = cv2.rectangle(img_bbox, (txtln.x, txtln.y),
                                     (txtln.x + txtln.w, txtln.y + txtln.h),
                                     color=textline_colors[idx],
                                     thickness=2)
        if region.majority_dir == 'h':
            text_render.put_text_horizontal(img_canvas, trans_text,
                                            len(region.textline_indices),
                                            region.x, region.y, region.w,
                                            region.h, textline_colors[idx],
                                            None)
        else:
            text_render.put_text_vertical(img_canvas, trans_text,
                                          len(region.textline_indices),
                                          region.x, region.y, region.w,
                                          region.h, textline_colors[idx], None)

    cv2.imwrite('result/rs.png', imgproc.cvt2HeatmapImg(rscore))
    cv2.imwrite('result/as.png', imgproc.cvt2HeatmapImg(ascore))
    cv2.imwrite('result/textline.png', overlay)
    cv2.imwrite('result/bbox.png', img_bbox)
    cv2.imwrite('result/bbox_unfiltered.png', img_bbox_all)
    cv2.imwrite(
        'result/overlay.png',
        cv2.cvtColor(
            overlay_image(
                img_to_overlay,
                cv2.resize(overlay,
                           (img_resized.shape[1], img_resized.shape[0]),
                           interpolation=cv2.INTER_LINEAR)),
            cv2.COLOR_RGB2BGR))
    cv2.imwrite('result/mask.png', final_mask)
    cv2.imwrite('result/masked.png',
                cv2.cvtColor(img_inpainted, cv2.COLOR_RGB2BGR))
    cv2.imwrite('result/final.png', cv2.cvtColor(img_canvas,
                                                 cv2.COLOR_RGB2BGR))
Ejemplo n.º 19
0
def test_net(net,
             image,
             text_threshold,
             link_threshold,
             low_text,
             cuda,
             poly,
             image_path,
             refine_net=None):
    t0 = time.time()
    img_h, img_w, c = image.shape
    # resize
    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
        image,
        args.canvas_size,
        interpolation=cv2.INTER_LINEAR,
        mag_ratio=args.mag_ratio)
    ratio_h = ratio_w = 1 / target_ratio
    h, w, c = image.shape
    # preprocessing
    x = imgproc.normalizeMeanVariance(img_resized)
    x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
    x = Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]
    if cuda:
        x = x.cuda()

    # forward pass
    y, feature = net(x)
    # make score and link map
    score_text = y[0, :, :, 0].cpu().data.numpy()  #리전 스코어 Region score

    score_link = y[0, :, :, 1].cpu().data.numpy()  #어피니티 스코어
    # refine link
    if refine_net is not None:
        y_refiner = refine_net(y, feature)
        score_link = y_refiner[0, :, :, 0].cpu().data.numpy()

    t0 = time.time() - t0
    t1 = time.time()

    # Post-processing
    boxes, polys = craft_utils.getDetBoxes(score_text, score_link,
                                           text_threshold, link_threshold, 0.4,
                                           poly)  # CRAFT에서 박스를 그려주는 부분

    # # coordinate adjustment #좌표설정

    boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
    polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)

    #print(scores)

    for k in range(len(polys)):
        if polys[k] is None: polys[k] = boxes[k]
    t1 = time.time() - t1

    # render results (optional)
    render_img = score_text.copy()

    ret_score_text = imgproc.cvt2HeatmapImg(render_img)
    Plus_score_text = imgproc.cvMakeScores(render_img)  ##

    filename, file_ext = os.path.splitext(os.path.basename(image_path))

    if args.show_time:
        print("\ninfer/postproc time : {:.3f}/{:.3f}".format(t0, t1))
    post_folder = './output/post'  # 원본이미지를 이진화한 이미지 저장
    resize_folder = './output/resize'  # resize된 원본 이미지 저장

    if not os.path.isdir(resize_folder + '/'):
        os.makedirs(resize_folder + '/')

    resize_file = resize_folder + "/resize_" + filename + '_mask.jpg'  #오리지널 이미지

    IMG_RGB2 = cv2.cvtColor(img_resized,
                            cv2.COLOR_BGR2RGB)  #craft에서 resize한 이미지를 RGB로 컨버트

    # 합성 이미지를 만들기 위한 부분
    pil_image = Image.fromarray((IMG_RGB2 * 255).astype(np.uint8))
    images = np.array(pil_image)
    images = cv2.cvtColor(images, cv2.COLOR_BGR2GRAY)
    ret, thresh = cv2.threshold(images, 0, 255, cv2.THRESH_BINARY +
                                cv2.THRESH_OTSU)  #+ cv2.THRESH_OTSU
    # 이미지 합성을 위해 이진화

    text_score = cv2.resize(Plus_score_text,
                            None,
                            fx=2,
                            fy=2,
                            interpolation=cv2.INTER_LINEAR)  # 다시 원본 사이즈로 조절

    thresh = cv2.resize(thresh, (img_w, img_h))  # 원본 이진화 이미지
    text_score = cv2.resize(text_score, (img_w, img_h))  # Region 스코어 이진화 이미지

    text_score = Image.fromarray((text_score).astype(np.uint8))
    text_score = np.array(text_score)

    if not os.path.isdir('./output/og_bri' + '/'):  # 원본 이진화 이미지 저장 폴더
        os.makedirs('./output/og_bri' + '/')

    if not os.path.isdir('./output/score/'):  # 스코어 이진화 이미지 저장 폴더
        os.makedirs('./output/score/')

    cv2.imwrite('./output/og_bri' + "/og_" + filename + '.jpg',
                thresh)  # 원본 이진화 이미지 저장
    cv2.imwrite('./output/score' + "/score_" + filename + '.jpg',
                text_score)  # 스코어 이진화 이미지 저장

    img_h = thresh.shape[0]
    img_w = thresh.shape[1]

    IMG_RGB2 = cv2.resize(IMG_RGB2, (img_w, img_h))  # 다시 원본 사이즈로 resize
    cv2.imwrite(resize_file, IMG_RGB2)

    return boxes, polys, ret_score_text
Ejemplo n.º 20
0
def test_net(net,
             image,
             text_threshold,
             link_threshold,
             low_text,
             cuda,
             poly,
             refine_net=None):
    t0 = time.time()

    # resize
    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
        image,
        args.canvas_size,
        interpolation=cv2.INTER_LINEAR,
        mag_ratio=args.mag_ratio)
    ratio_h = ratio_w = 1 / target_ratio

    # preprocessing
    x = imgproc.normalizeMeanVariance(img_resized)
    x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
    x = Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]
    if cuda:
        x = x.cuda()

    # forward pass
    with torch.no_grad():
        y, feature = net(x)

    # make score and link map
    score_text = y[0, :, :, 0].cpu().data.numpy()
    score_link = y[0, :, :, 1].cpu().data.numpy()

    # refine link
    if refine_net is not None:
        with torch.no_grad():
            y_refiner = refine_net(y, feature)
        score_link = y_refiner[0, :, :, 0].cpu().data.numpy()

    t0 = time.time() - t0
    t1 = time.time()

    # Post-processing
    boxes, polys = craft_utils.getDetBoxes(score_text, score_link,
                                           text_threshold, link_threshold,
                                           low_text, poly)

    # coordinate adjustment

    boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)

    '处理裂开的box,相邻的放在同一组'
    # 广度优先合并相邻的框
    # 距离矩阵构建
    all_rect_cx_cy = np.zeros((len(boxes), 2))
    for i in range(len(boxes)):
        box = boxes[i]
        left = min(box[0][0], box[1][0], box[2][0], box[3][0])
        right = max(box[0][0], box[1][0], box[2][0], box[3][0])
        top = min(box[0][1], box[1][1], box[2][1], box[3][1])
        bottom = max(box[0][1], box[1][1], box[2][1], box[3][1])
        top = int(top)
        bottom = int(bottom)
        left = int(left)
        right = int(right)

        all_rect_cx_cy[i][0] = ((left + right) / 2) / 4
        #减少x轴的影响
        #还需调整
        all_rect_cx_cy[i][1] = ((top + bottom) / 2)
    mat_distance = []
    for i in range(len(all_rect_cx_cy)):
        mat_distance.append(
            np.sqrt(np.sum((all_rect_cx_cy - all_rect_cx_cy[i])**2, axis=-1)))
    print("generate distance mat;len:", len(mat_distance))

    segment_group = []
    ind_group = -1
    search_queue = deque()
    cnt_processed = 0
    processed = set()
    #广度优先
    while cnt_processed < len(all_rect_cx_cy):  # 只要搜索队列中有数据就一直遍历下去
        if (len(search_queue) == 0):
            for i in range(len(all_rect_cx_cy)):
                if (i not in processed):
                    search_queue.append(i)
                    segment_group.append([])
                    ind_group += 1
                    break
        current_node = search_queue.popleft()  # 从队列前边获取节点,即先进先出,这是BFS的核心
        if current_node not in processed:  # 当前节点是否被访问过
            cnt_processed += 1
            processed.add(current_node)
            inds = np.argsort(mat_distance[current_node])
            segment_group[ind_group].append(boxes[current_node])
            cnt_company = 0
            distance_threshold = 20  #max(all_rect[current_node][2],all_rect[current_node][3])
            # print(distance_threshold)
            for index in inds:  # 遍历相邻节点,判断相邻节点是否已经在搜索队列
                if mat_distance[current_node][index] > distance_threshold:
                    break
                cnt_company += 1
                if cnt_company > 200:
                    print("error")
                    exit()
                if index not in search_queue:  # 如果相邻节点不在搜索队列则进行添加
                    search_queue.append(index)

    '合并在同一组的框'
    merge_boxes = []
    for segment in segment_group:
        left_s = []
        right_s = []
        top_s = []
        bottom_s = []
        for box in segment:
            left = min(box[0][0], box[1][0], box[2][0], box[3][0])
            right = max(box[0][0], box[1][0], box[2][0], box[3][0])
            top = min(box[0][1], box[1][1], box[2][1], box[3][1])
            bottom = max(box[0][1], box[1][1], box[2][1], box[3][1])
            top = math.floor(top)
            bottom = math.floor(bottom)
            left = math.floor(left)
            right = math.floor(right)

            left_s.append(left)
            right_s.append(right)
            top_s.append(top)
            bottom_s.append(bottom)
        merge_boxes.append(
            [min(left_s), min(top_s),
             max(right_s),
             max(bottom_s)])

    for rect in merge_boxes:
        threshold_hw = min(rect[3] - rect[1], rect[2] - rect[0]) * 0.2
        crop = i_image[rect[1]:rect[3], rect[0]:rect[2]]
        ret, binary_img = cv2.threshold(
            crop, 175, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
        _, contours, _ = cv2.findContours(binary_img, cv2.RETR_EXTERNAL,
                                          cv2.CHAIN_APPROX_SIMPLE)

        group = []
        for i in range(len(contours)):
            rect_char = cv2.boundingRect(contours[i])
            group.append(rect_char)
        group.sort(key=lambda rect: rect[0])

        last_x_start = group[0][0]
        last_x_end = group[0][0] + group[0][2]
        last = group[0]
        i = 1

        '合并=/等符号'
        while i < len(group) and i >= 1:
            now = group[i]
            cx = now[0] + now[2] / 2
            cy = now[1] + now[3] / 2
            last_cy = last[1] + last[3] / 2
            y_near = abs(last_cy - cy) < (last_x_end - last_x_start) * 0.6
            if (last_x_start < cx and cx < last_x_end and y_near):
                group.pop(i)
                i -= 1
                x1 = min(now[0], group[i][0])
                y1 = min(now[1], group[i][1])
                x2 = max(now[0] + now[2], group[i][0] + group[i][2])
                y2 = max(now[1] + now[3], group[i][1] + group[i][3])
                group[i] = (x1, y1, x2 - x1, y2 - y1)
            else:
                last_x_start = group[i][0]
                last_x_end = group[i][0] + group[i][2]
                last = group[i]
            i += 1
        if (len(group) < 4 or len(group) > 16):
            continue
        '检测每个框及其结果'
        rect_set = []
        res_set = []

        def detect_rect(rect_char, binary_img):
            crop_char = binary_img[rect_char[1]:rect_char[1] + rect_char[3],
                                   rect_char[0]:rect_char[0] + rect_char[2]]
            crop_char = torch.tensor(crop_char, dtype=torch.int)
            crop_char = adapt_size(crop_char)
            crop_char = crop_char.float().cuda()
            res = classifer_box.eval(
                crop_char.unsqueeze(0)).squeeze().int().item()
            debug_write(
                crop_char[0].cpu().int().numpy().astype(np.uint8) * 255,
                config.CLASS_toString[res])
            return res

        for i in range(len(group)):
            rect_char = group[i]
            if max(rect_char[2], rect_char[3]) < threshold_hw:
                continue
            res = detect_rect(rect_char, binary_img)
            res_set.append(res)
            rect_set.append(rect_char)

        res_str = ''
        for i in range(len(res_set)):
            res = res_set[i]
            res_str += config.CLASS_toString[res]
            # print('left',res)
            '等号右边颜色浅 针对右边进行二值化后重新检测'
            if (config.CLASS_is_eq(res)):
                rect_char = rect_set[i]

                crop = i_image[rect[1]:rect[3],
                               rect[0]:rect[2]][:,
                                                rect_char[0] + rect_char[2]:]
                if (crop.shape[0] * crop.shape[1] < 4):
                    break
                crop = convert_to_binary_inv(crop)
                debug_write(crop, '')
                _, contours_right, _ = cv2.findContours(
                    crop, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

                group_right = []
                for i in range(len(contours_right)):
                    rect_char_right = cv2.boundingRect(contours_right[i])
                    group_right.append(rect_char_right)
                group_right.sort(key=lambda rect: rect[0])
                for rect_char in group_right:
                    if (max(rect_char[2], rect_char[3]) < crop.shape[0] * 0.3):
                        continue
                    res_right = detect_rect(rect_char, crop)
                    res_str += config.CLASS_toString[res_right]
                break
        eq = res_str.split('=')
        if (len(eq) == 2):
            global i_image_3_color
            res_str = res_str.replace("/", "d")
            print(res_str)
            if str_to_num(eq[0]) == str_to_num(eq[1]):
                cv2.rectangle(i_image_3_color, (rect[0], rect[1]),
                              (rect[2], rect[3]), (46, 255, 87), 2)
                cv2.imwrite('./res/' + res_str + '.png',
                            i_image[rect[1]:rect[3], rect[0]:rect[2]])
            elif eq[1] == "":
                cv2.rectangle(i_image_3_color, (rect[0], rect[1]),
                              (rect[2], rect[3]), (46, 87, 255), 2)
                cv2.imwrite('./res/' + res_str + '.png',
                            i_image[rect[1]:rect[3], rect[0]:rect[2]])
            else:
                cv2.rectangle(i_image_3_color, (rect[0], rect[1]),
                              (rect[2], rect[3]), (255, 46, 87), 2)
                cv2.imwrite('./res/x_' + res_str + '.png',
                            i_image[rect[1]:rect[3], rect[0]:rect[2]])
            # print(str_to_num(eq[0])
            # print(str_to_num(eq[1])

        # cv2.imwrite('./res/'+res_str+'.png', binary_img)

    polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)
    for k in range(len(polys)):
        if polys[k] is None: polys[k] = boxes[k]

    cv2.imshow('', i_image_3_color)
    cv2.waitKey()
    t1 = time.time() - t1

    # render results (optional)
    render_img = score_text.copy()
    render_img = np.hstack((render_img, score_link))
    ret_score_text = imgproc.cvt2HeatmapImg(render_img)

    if args.show_time:
        print("\ninfer/postproc time : {:.3f}/{:.3f}".format(t0, t1))

    return boxes, polys, ret_score_text
Ejemplo n.º 21
0
    def get_bounding_box(self, image_file, verbose=False):
        """
        Get the bounding boxes from image_file
        :param image_file
        :param verbose
        :return:
        """
        image = cv2.imread(image_file)
        img_dim = image.shape
        img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
            image,
            self.canvas_size,
            interpolation=cv2.INTER_LINEAR,
            mag_ratio=self.mag_ratio)

        ratio_h = ratio_w = 1 / target_ratio

        # preprocessing
        x = imgproc.normalizeMeanVariance(img_resized)
        x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
        x = Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]
        if self.cuda:
            x = x.cuda()

        # forward pass
        with torch.no_grad():
            y, feature = self.net(x)

        # make score and link map
        score_text = y[0, :, :, 0].cpu().data.numpy()
        score_link = y[0, :, :, 1].cpu().data.numpy()
        boxes, polys = craft_utils.getDetBoxes(score_text, score_link,
                                               self.text_threshold,
                                               self.link_threshold,
                                               self.low_text, self.poly)

        boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)

        center_point = []
        for i, _b in enumerate(boxes):
            b = np.array(_b, dtype=np.int16)
            xmin = np.min(b[:, 0])
            ymin = np.min(b[:, 1])

            xmax = np.max(b[:, 0])
            ymax = np.max(b[:, 1])
            x_m = xmin + (xmax - xmin) / 2
            y_m = ymin + (ymax - ymin) / 2
            center_point.append([x_m, y_m])

        list_images = get_box_img(boxes, image)

        if verbose:
            for _b in boxes:
                b = np.array(_b, dtype=np.int16)
                xmin = np.min(b[:, 0])
                ymin = np.min(b[:, 1])

                xmax = np.max(b[:, 0])
                ymax = np.max(b[:, 1])

                r = image[ymin:ymax, xmin:xmax, :].copy()

        return boxes, list_images, center_point, img_dim
Ejemplo n.º 22
0
def detect_net(net, image, text_threshold, link_threshold, low_text, cuda,
               poly, refine_net, res_path):
    t0 = time.time()

    origin_image_1_channel = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    origin_image_3_color = np.array(image)
    # resize
    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
        image,
        args.canvas_size,
        interpolation=cv2.INTER_LINEAR,
        mag_ratio=args.mag_ratio)
    ratio_h = ratio_w = 1 / target_ratio

    # preprocessing
    x = imgproc.normalizeMeanVariance(img_resized)
    x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
    x = Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]
    if cuda:
        x = x.cuda()

    # forward pass
    with torch.no_grad():
        y, feature = net(x)

    # make score and link map
    score_text = y[0, :, :, 0].cpu().data.numpy()
    score_link = y[0, :, :, 1].cpu().data.numpy()

    cv2.imwrite("core_link.jpg", score_text * 255)
    cv2.imwrite("score_link.jpg", score_link * 255)

    # refine link
    if refine_net is not None:
        with torch.no_grad():
            y_refiner = refine_net(y, feature)
        score_link = y_refiner[0, :, :, 0].cpu().data.numpy()

    t0 = time.time() - t0
    t1 = time.time()

    # Post-processing
    # 获取CRAFT生成的框
    boxes, polys = craft_utils.getDetBoxes(score_text, score_link,
                                           text_threshold, link_threshold,
                                           low_text, poly)

    # coordinate adjustment

    boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)

    '处理裂开的box,相邻的放在同一组'
    # 广度优先合并相邻的框
    # 距离矩阵构建
    all_rect_cx_cy = np.zeros((len(boxes), 2))
    for i in range(len(boxes)):
        box = boxes[i]
        left = min(box[0][0], box[1][0], box[2][0], box[3][0])
        right = max(box[0][0], box[1][0], box[2][0], box[3][0])
        top = min(box[0][1], box[1][1], box[2][1], box[3][1])
        bottom = max(box[0][1], box[1][1], box[2][1], box[3][1])
        top = int(top)
        bottom = int(bottom)
        left = int(left)
        right = int(right)

        all_rect_cx_cy[i][0] = ((left + right) / 2) / 4
        # 减少x轴的影响
        # 还需调整
        all_rect_cx_cy[i][1] = ((top + bottom) / 2)
    mat_distance = []
    for i in range(len(all_rect_cx_cy)):
        mat_distance.append(
            np.sqrt(np.sum((all_rect_cx_cy - all_rect_cx_cy[i])**2, axis=-1)))

    print("generate distance mat;len:", len(mat_distance))

    segment_group = []
    ind_group = -1
    search_queue = deque()
    cnt_processed = 0
    processed = set()
    # 广度优先
    while cnt_processed < len(all_rect_cx_cy):  # 只要搜索队列中有数据就一直遍历下去
        if (len(search_queue) == 0):
            for i in range(len(all_rect_cx_cy)):
                if (i not in processed):
                    search_queue.append(i)
                    segment_group.append([])
                    ind_group += 1
                    break
        current_node = search_queue.popleft()  # 从队列前边获取节点,即先进先出,这是BFS的核心
        if current_node not in processed:  # 当前节点是否被访问过
            cnt_processed += 1
            processed.add(current_node)
            inds = np.argsort(mat_distance[current_node])
            segment_group[ind_group].append(boxes[current_node])
            cnt_company = 0
            distance_threshold = 20  # max(all_rect[current_node][2],all_rect[current_node][3])
            # print(distance_threshold)
            for index in inds:  # 遍历相邻节点,判断相邻节点是否已经在搜索队列
                if mat_distance[current_node][index] > distance_threshold:
                    break
                cnt_company += 1
                if cnt_company > 200:
                    print("error")
                    exit()
                if index not in search_queue:  # 如果相邻节点不在搜索队列则进行添加
                    search_queue.append(index)

    '合并在同一组的框'
    merge_boxes = []
    for segment in segment_group:
        left_s = []
        right_s = []
        top_s = []
        bottom_s = []
        for box in segment:
            left = min(box[0][0], box[1][0], box[2][0], box[3][0])
            right = max(box[0][0], box[1][0], box[2][0], box[3][0])
            top = min(box[0][1], box[1][1], box[2][1], box[3][1])
            bottom = max(box[0][1], box[1][1], box[2][1], box[3][1])
            top = math.floor(top)
            bottom = math.floor(bottom)
            left = math.floor(left)
            right = math.floor(right)

            left_s.append(left)
            right_s.append(right)
            top_s.append(top)
            bottom_s.append(bottom)
        merge_boxes.append(
            [min(left_s), min(top_s),
             max(right_s),
             max(bottom_s)])

    json_record = []
    for rect in merge_boxes:
        threshold_hw = min(rect[3] - rect[1], rect[2] - rect[0]) * 0.2
        crop = origin_image_1_channel[rect[1]:rect[3], rect[0]:rect[2]]
        # debug_write(crop,"exp");

        # adaptiveThreshold
        binary_img = cv2.adaptiveThreshold(crop, 255,
                                           cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                           cv2.THRESH_BINARY_INV, 31, 10)
        debug_write(binary_img, "all")

        # ret, binary_img = cv2.threshold(crop, 175, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

        # debug_write(binary_img,"dilate")
        # kernel = np.ones((1, 2), np.uint8)

        # binary_img_dilate = cv2.erode(binary_img, kernel, iterations=1)

        # debug_write(binary_img_dilate,"dilate")
        # print(binary_img.max(),binary_img.min())
        _, contours, _ = cv2.findContours(binary_img, cv2.RETR_EXTERNAL,
                                          cv2.CHAIN_APPROX_SIMPLE)

        group = []
        for i in range(len(contours)):
            rect_char = cv2.boundingRect(contours[i])
            group.append(rect_char)
        group.sort(key=lambda rect: rect[0])

        if (len(group) >= 1):
            last_x_start = group[0][0]
            last_x_end = group[0][0] + group[0][2]
            last = group[0]
        i = 1

        '合并=/等符号'
        while i < len(group) and i >= 1:
            now = group[i]
            cx = now[0] + now[2] / 2
            cy = now[1] + now[3] / 2
            last_cy = last[1] + last[3] / 2
            y_near = abs(last_cy - cy) < (last_x_end - last_x_start) * 0.6
            if (last_x_start < cx and cx < last_x_end and y_near):
                group.pop(i)
                i -= 1
                x1 = min(now[0], group[i][0])
                y1 = min(now[1], group[i][1])
                x2 = max(now[0] + now[2], group[i][0] + group[i][2])
                y2 = max(now[1] + now[3], group[i][1] + group[i][3])
                group[i] = (x1, y1, x2 - x1, y2 - y1)
            else:
                last_x_start = group[i][0]
                last_x_end = group[i][0] + group[i][2]
                last = group[i]
            i += 1
        # if(len(group)<4 or len(group)>16):
        #     continue
        '检测每个框及其结果'

        json_record_perline = []

        rect_set = []
        res_set = []

        # def detect_rect(rect_char, binary_img):
        #
        #     crop_char = binary_img[
        #                 rect_char[1]:
        #                 rect_char[1] + rect_char[3],
        #                 rect_char[0]:
        #                 rect_char[0] + rect_char[2]]
        #
        #     debug_crop_char = crop_char
        #     if crop_char.shape[0]*6 < crop_char.shape[1]:
        #         return '-'
        #     if crop_char.shape[0] < 2 or crop_char.shape[1] < 2:
        #         return ''
        #     debug_write(crop_char, "detect_rect")
        #     crnn_text_result = recognizer(crop_char)
        #
        #     crop_char = torch.tensor(crop_char, dtype=torch.int)
        #
        #     crop_char = adapt_size(crop_char)
        #     crop_char = crop_char.float().to(device)
        #     res = classifer_box.eval(crop_char.unsqueeze(0)).squeeze().int().item()
        #
        #     print(config.CLASS[res], crnn_text_result)
        #
        #     return config.CLASS_toString[res]

        def detect_rect(rect_char, binary_img, before_str):

            crop_char = binary_img[rect_char[1]:rect_char[1] + rect_char[3],
                                   rect_char[0]:rect_char[0] + rect_char[2]]

            # 减号
            # print(crop_char.shape)
            # if crop_char.shape[0] * 3 < crop_char.shape[1] and crop_char.mean() > 128:
            #     return '-'
            # if crop_char.shape[1] * 3 < crop_char.shape[0] and crop_char.mean() > 128:
            #     return '1'
            # 区域过小
            if crop_char.shape[0] < 2 and crop_char.shape[1] < 2:
                return ''

            # debug_write(crop_char, "detect_rect")

            # if crop_char.shape[1] < crop_char.shape[0] // 2:
            #     fx = 4
            # else:
            #     fx = fy

            # crnn
            crnn_text_result = recognizer(crop_char)
            # debug_write(crop_char,crnn_text_result.replace('/','d'))

            # dense
            # crop_char = torch.tensor(crop_char, dtype=torch.int)
            # crop_char = adapt_size(crop_char)
            # crop_char = crop_char.float().to(device)
            # res = classifer_box.eval(crop_char.unsqueeze(0)).squeeze().int().item()

            # print(crnn_text_result,compress(crnn_text_result))

            # print(crnn_text_result)
            return compress(crnn_text_result)

        res_str = ''
        for i in range(len(group)):
            rect_char = group[i]
            if max(rect_char[2], rect_char[3]) < threshold_hw:
                continue
            res = detect_rect(rect_char, binary_img, before_str=res_str)
            res_set.append(res)
            rect_set.append(rect_char)
            res_str += res
        print(res_str)
        # for i in range(len(res_set)):
        #     res = res_set[i]
        #     res_str += config.CLASS_toString[res]
        #
        #     json_record_perline.append({'rect_char': rect_set[i], 'char': config.CLASS_toString[res]})
        #
        #     # print('left',res)
        #     '等号右边颜色浅 针对右边进行二值化后重新检测'
        #     if (config.CLASS_is_eq(res)):
        #         rect_char = rect_set[i]
        #
        #         crop = origin_image_1_channel[rect[1]:rect[3], rect[0]:rect[2]][:, rect_char[0] + rect_char[2]:]
        #
        #         # 记录相对位置
        #         relative = (rect_char[0] + rect_char[2], 0, 0, 0)
        #
        #         if (crop.shape[0] * crop.shape[1] < 4):
        #             break
        #         # 自适应算法
        #         # crop = convert_to_binary_inv(crop)
        #         crop = cv2.adaptiveThreshold(crop, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        #                               cv2.THRESH_BINARY_INV, 31, 10)
        #         # debug_write(crop,'')
        #
        #         _, contours_right, _ = cv2.findContours(crop, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        #
        #         group_right = []
        #         for i in range(len(contours_right)):
        #             rect_char_right = cv2.boundingRect(contours_right[i])
        #             group_right.append(rect_char_right)
        #         group_right.sort(key=lambda rect: rect[0])
        #         for rect_char in group_right:
        #             if (max(rect_char[2], rect_char[3]) < crop.shape[0] * 0.3):
        #                 continue
        #             res_right = detect_rect(rect_char, crop)
        #             res_str += config.CLASS_toString[res_right]
        #             json_record_perline.append({'rect_char': (
        #                 relative[0] + rect_char[0],
        #                 relative[1] + rect_char[1],
        #                 rect_char[2],
        #                 rect_char[3]
        #             ), 'char': config.CLASS_toString[res_right]})
        #
        #         break

        eq = res_str.split('=')
        if (len(eq) >= 2):
            res_str = res_str.replace("/", "d")

            json_record.append({
                'rect_expression':
                (rect[0], rect[1], rect[2] - rect[0], rect[3] - rect[1]),
                'expression':
                json_record_perline
            })
            with open("resjson/" + res_str + ".json", 'w') as file_object:
                file_object.write(
                    json.dumps({
                        'rect_expression':
                        (rect[0], rect[1], rect[2] - rect[0],
                         rect[3] - rect[1]),
                        'expression':
                        json_record_perline
                    }))

            if str_to_num(eq[0]) == str_to_num(eq[-1]):
                # cv2.rectangle(origin_image_3_color, (rect[0], rect[1]), (rect[2] , rect[3]),	(46,255,87), 2)
                cv2.line(origin_image_3_color, (rect[0], rect[3]),
                         (rect[2], rect[3]), (46, 255, 87), 2)
                cv2.imwrite(
                    './res/' + res_str + '.png',
                    origin_image_1_channel[rect[1]:rect[3], rect[0]:rect[2]])
            elif eq[-1] == "":
                cv2.rectangle(origin_image_3_color, (rect[0], rect[1]),
                              (rect[2], rect[3]), (255, 46, 87), 2)
                cv2.imwrite(
                    './res/O' + res_str + '.png',
                    origin_image_1_channel[rect[1]:rect[3], rect[0]:rect[2]])
            else:
                cv2.rectangle(origin_image_3_color, (rect[0], rect[1]),
                              (rect[2], rect[3]), (46, 87, 255), 2)
                cv2.imwrite(
                    './res/X' + res_str + '.png',
                    origin_image_1_channel[rect[1]:rect[3], rect[0]:rect[2]])

    print(res_path)
    cv2.imwrite(res_path, origin_image_3_color)

    polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)
    for k in range(len(polys)):
        if polys[k] is None: polys[k] = boxes[k]
    # render results (optional)
    render_img = score_text.copy()
    render_img = np.hstack((render_img, score_link))
    ret_score_text = imgproc.cvt2HeatmapImg(render_img)
    cv2.imwrite("xxxx.png", ret_score_text)

    # for line in json_record:
    #     print(line)
    data2 = json.dumps(json_record)
    return data2