예제 #1
0
def get_boxes(img_c):
    # Resize
    img_r, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
        img_c,
        square_size=square_size,
        interpolation=cv2.INTER_LINEAR,
        mag_ratio=mag_ratio)
    # Save ratio index for height
    ratio_h = ratio_w = 1 / target_ratio
    # preprocessing of the image
    x = imgproc.normalizeMeanVariance(img_r)
    x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
    x = x.unsqueeze(0)  # [c, h, w] to [b, c, h, w]
    # forward pass
    y, _ = net(x)
    # make score and link map
    score_text = y[0, :, :, 0].cpu().data.numpy()
    score_link = y[0, :, :, 1].cpu().data.numpy()
    # Post-processing
    boxes, _ = craft_utils.getDetBoxes(score_text,
                                       score_link,
                                       text_threshold=text_threshold,
                                       link_threshold=link_threshold,
                                       low_text=low_text,
                                       poly=False)
    # Coordinate adjustment
    boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
    return boxes
예제 #2
0
def test_net(canvas_size, mag_ratio, net, image, text_threshold,
             link_threshold, low_text, poly, device):
    # resize
    img_resized, target_ratio, size_heatmap = resize_aspect_ratio(image, canvas_size,\
                                                                          interpolation=cv2.INTER_LINEAR, mag_ratio=mag_ratio)
    ratio_h = ratio_w = 1 / target_ratio

    # preprocessing
    x = normalizeMeanVariance(img_resized)
    x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
    x = Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]
    x = x.to(device)

    # forward pass
    with torch.no_grad():
        y, feature = net(x)

    # make score and link map
    score_text = y[0, :, :, 0].cpu().data.numpy()
    score_link = y[0, :, :, 1].cpu().data.numpy()

    # Post-processing
    boxes, polys = getDetBoxes(score_text, score_link, text_threshold,
                               link_threshold, low_text, poly)

    # coordinate adjustment
    boxes = adjustResultCoordinates(boxes, ratio_w, ratio_h)
    polys = adjustResultCoordinates(polys, ratio_w, ratio_h)
    for k in range(len(polys)):
        if polys[k] is None: polys[k] = boxes[k]

    return boxes, polys
예제 #3
0
def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly):
    t0 = time.time()
    # resize
    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
        image,
        args.canvas_size,
        interpolation=cv2.INTER_AREA,
        mag_ratio=args.mag_ratio)
    ratio_h = ratio_w = 1 / target_ratio
    # preprocessing
    x = imgproc.normalizeMeanVariance(img_resized)
    x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
    x = Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]
    if cuda:
        x = x.cuda()
    # forward pass
    with torch.no_grad():
        y, feature = net(x)
    # make score and link map
    score_text = y[0, :, :, 0].cpu().data.numpy()
    score_link = y[0, :, :, 1].cpu().data.numpy()
    # Post-processing
    boxes, polys = craft_utils.getDetBoxes(score_text, score_link,
                                           text_threshold, link_threshold,
                                           low_text, poly)
    # coordinate adjustment
    boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
    polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)
    for k in range(len(polys)):
        if polys[k] is None: polys[k] = boxes[k]
    # render results (optional)
    render_img = score_text.copy()
    render_img = np.hstack((render_img, score_link))
    ret_score_text = imgproc.cvt2HeatmapImg(render_img)
    return boxes, polys, ret_score_text
예제 #4
0
    def process(self, craft, seq, key, sub_img):
        img_resized, target_ratio, size_heatmap = resize_aspect_ratio(
            sub_img, 2560, interpolation=cv2.INTER_LINEAR, mag_ratio=1.)
        ratio_h = ratio_w = 1 / target_ratio

        x = normalizeMeanVariance(img_resized)
        x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
        x = x.unsqueeze(0)  # [c, h, w] to [b, c, h, w]
        x = x.to(self.device)
        y, feature = craft(x)
        score_text = y[0, :, :, 0].cpu().data.numpy()
        score_link = y[0, :, :, 1].cpu().data.numpy()
        boxes, polys = getDetBoxes(score_text,
                                   score_link,
                                   text_threshold=0.7,
                                   link_threshold=0.4,
                                   low_text=0.4,
                                   poly=False)
        boxes = adjustResultCoordinates(boxes, ratio_w, ratio_h)
        polys = adjustResultCoordinates(polys, ratio_w, ratio_h)
        for k in range(len(polys)):
            if polys[k] is None:
                polys[k] = boxes[k]
        result = []
        for i, box in enumerate(polys):
            poly = np.array(box).astype(np.int32).reshape((-1))
            result.append(poly)
        horizontal_list, free_list = group_text_box(result,
                                                    slope_ths=0.8,
                                                    ycenter_ths=0.5,
                                                    height_ths=1,
                                                    width_ths=1,
                                                    add_margin=0.1)
        # horizontal_list = [i for i in horizontal_list if i[0] > 0 and i[1] > 0]
        min_size = 20
        if min_size:
            horizontal_list = [
                i for i in horizontal_list
                if max(i[1] - i[0], i[3] - i[2]) > 10
            ]
            free_list = [
                i for i in free_list
                if max(diff([c[0] for c in i]), diff([c[1]
                                                      for c in i])) > min_size
            ]
        seq[:] = [None] * len(horizontal_list)
        model, vocab = build_model(self.config)
        model.load_state_dict(
            torch.load(self.weights, map_location=torch.device('cpu')))

        for i, ele in enumerate(horizontal_list):
            ele = [0 if i < 0 else i for i in ele]
            img = sub_img[ele[2]:ele[3], ele[0]:ele[1], :]
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img = Image.fromarray(img.astype(np.uint8))
            p = threading.Thread(target=self.predict,
                                 args=(model, vocab, seq, key, i, img))
            p.start()
            p.join()
예제 #5
0
def test_net(net, image, text_threshold, link_threshold, low_text, cuda,
             image_path):
    t0 = time.time()

    # resize
    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
        image,
        args.canvas_size,
        interpolation=cv2.INTER_LINEAR,
        mag_ratio=args.mag_ratio)
    ratio_h = ratio_w = 1 / target_ratio

    # preprocessing
    x = imgproc.normalizeMeanVariance(img_resized)
    x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
    x = Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]
    if cuda:
        x = x.cuda()

    # forward pass
    y, _ = net(x)

    # make score and link map
    score_text = y[0, :, :, 0].cpu().data.numpy()
    score_link = y[0, :, :, 1].cpu().data.numpy()

    t0 = time.time() - t0
    t1 = time.time()

    if args.debug:
        np.save(
            os.path.join(
                './debug',
                os.path.basename(image_path).split('.')[0] +
                '_score_text.npy'), score_text)
        np.save(
            os.path.join(
                './debug',
                os.path.basename(image_path).split('.')[0] +
                '_score_link.npy'), score_link)

    # Post-processing
    boxes = craft_utils.getDetBoxes(score_text, score_link, text_threshold,
                                    link_threshold, low_text)
    boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)

    t1 = time.time() - t1

    # render results (optional)
    render_img = score_text.copy()
    render_img = np.hstack((render_img, score_link))
    ret_score_text = imgproc.cvt2HeatmapImg(render_img)

    if args.show_time:
        print("\ninfer/postproc time : {:.3f}/{:.3f}".format(t0, t1))

    return boxes, ret_score_text
예제 #6
0
def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly,
             ocr_type):
    t0 = time.time()

    # resize
    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
        image,
        args.canvas_size,
        interpolation=cv2.INTER_LINEAR,
        mag_ratio=args.mag_ratio)
    ratio_h = ratio_w = 1 / target_ratio

    # preprocessing
    x = imgproc.normalizeMeanVariance(img_resized)
    x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
    x = x.unsqueeze(0)  # [c, h, w] to [b, c, h, w]
    if cuda:
        x = x.cuda()

    # forward pass
    y, _ = net(x)

    # make score and link map
    score_text = y[0, :, :, 0].cpu().detach().numpy()
    score_link = y[0, :, :, 1].cpu().detach().numpy()

    t0 = time.time() - t0
    t1 = time.time()

    # Post-processing
    boxes, polys = utils.getDetBoxes(score_text, score_link, text_threshold,
                                     link_threshold, low_text, poly, ocr_type)

    # coordinate adjustment
    boxes = utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
    polys = utils.adjustResultCoordinates(polys, ratio_w, ratio_h)

    if ocr_type == 'single_char':
        boxes = utils.cluster_sort(image.shape, boxes)

    for k in range(len(polys)):
        if polys[k] is None:
            polys[k] = boxes[k]

    t1 = time.time() - t1

    # render results (optional)
    render_img = score_text.copy()
    render_img = np.hstack((render_img, score_link))
    ret_score_text = imgproc.cvt2HeatmapImg(render_img)

    if args.show_time:
        print("\ninfer/postproc time : {:.3f}/{:.3f}".format(t0, t1))

    return boxes, polys, ret_score_text
예제 #7
0
 def __getitem__(self, idx):
     image = imgproc.loadImage(self.image_list[idx])
     img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
         image,
         self.canvas_size,
         interpolation=cv2.INTER_LINEAR,
         mag_ratio=self.mag_ratio)
     x = imgproc.normalizeMeanVariance(img_resized)
     x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
     # x = Variable(x.unsqueeze(0))                # [c, h, w] to [b, c, h, w]
     return x, 1
예제 #8
0
    def test_net(self, image_opencv):

        # resize
        img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
            image_opencv,
            self.canvas_size,
            interpolation=self.interpolation,
            mag_ratio=self.mag_ratio)
        ratio_h = ratio_w = 1 / target_ratio

        # preprocessing
        x = imgproc.normalizeMeanVariance(img_resized)
        x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
        x = Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]

        if self.cuda:
            x = x.cuda()

        # forward pass
        y, feature = self.net(x)

        # make score and link map
        score_text = y[0, :, :, 0].cpu().data.numpy()
        score_link = y[0, :, :, 1].cpu().data.numpy()

        # refine link
        t0 = time.time()
        if self.refine_net is not None:
            y_refiner = self.refine_net(y, feature)
            score_link = y_refiner[0, :, :, 0].cpu().data.numpy()
        t0 = time.time() - t0
        t1 = time.time()

        # Post-processing
        boxes, polys = craft_utils.getDetBoxes(score_text, score_link,
                                               self.text_threshold,
                                               self.link_threshold,
                                               self.low_text, self.poly)
        #print(boxes)

        # coordinate adjustment
        boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
        polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)
        for k in range(len(polys)):
            if polys[k] is None: polys[k] = boxes[k]
        t1 = time.time() - t1

        if self.show_time:
            print("\ninfer/postproc time : {:.3f}/{:.3f}".format(t0, t1))
        return boxes, polys
예제 #9
0
    def test_net(self, image, text_threshold, link_threshold, low_text, cuda, poly, refine_net=None):
        t0 = time.time()

        # resize
        img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(image, canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=mag_ratio)
        ratio_h = ratio_w = 1 / target_ratio

        # preprocessing
        x = imgproc.normalizeMeanVariance(img_resized)
        x = torch.from_numpy(x).permute(2, 0, 1)    # [h, w, c] to [c, h, w]
        x = Variable(x.unsqueeze(0))                # [c, h, w] to [b, c, h, w]
        if cuda:
            x = x.cuda()

        # forward pass
        with torch.no_grad():
            y, feature = self.net(x)

        # make score and link map
        score_text = y[0,:,:,0].cpu().data.numpy()
        score_link = y[0,:,:,1].cpu().data.numpy()

        # refine link
        if refine_net is not None:
            with torch.no_grad():
                y_refiner = refine_net(y, feature)
            score_link = y_refiner[0,:,:,0].cpu().data.numpy()

        t0 = time.time() - t0
        t1 = time.time()

        # Post-processing
        boxes, polys = craft_utils.getDetBoxes(score_text, score_link, text_threshold, link_threshold, low_text, poly)

        # coordinate adjustment
        boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
        polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)
        for k in range(len(polys)):
            if polys[k] is None: polys[k] = boxes[k]

        t1 = time.time() - t1

        # render results (optional)
        render_img = score_text.copy()
        render_img = np.hstack((render_img, score_link))
        ret_score_text = imgproc.cvt2HeatmapImg(render_img)

        # if show_time : print("\ninfer/postproc time : {:.3f}/{:.3f}".format(t0, t1))

        return boxes, polys, ret_score_text
예제 #10
0
def get_prediction(net,
                   image,
                   text_threshold,
                   link_threshold,
                   low_text,
                   cuda,
                   poly,
                   refine_net=None):

    # resize
    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
        image, 1280, interpolation=cv2.INTER_LINEAR, mag_ratio=1.5)
    ratio_h = ratio_w = 1 / target_ratio

    # preprocessing
    x = imgproc.normalizeMeanVariance(img_resized)
    x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
    x = Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]
    if cuda:
        x = x.cuda()

    # forward pass
    with torch.no_grad():
        y, feature = net(x)

    # make score and link map
    score_text = y[0, :, :, 0].cpu().data.numpy()
    score_link = y[0, :, :, 1].cpu().data.numpy()

    # refine link
    if refine_net is not None:
        with torch.no_grad():
            y_refiner = refine_net(y, feature)
        score_link = y_refiner[0, :, :, 0].cpu().data.numpy()

    # Post-processing
    boxes, polys = craft_utils.getDetBoxes(score_text, score_link,
                                           text_threshold, link_threshold,
                                           low_text, poly)

    # coordinate adjustment
    boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
    polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)
    for k in range(len(polys)):
        if polys[k] is None: polys[k] = boxes[k]

    return boxes, polys
예제 #11
0
def test_net(net, image, text_threshold, link_threshold, low_text, cuda):

    # resize
    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
        image,
        opt.MAXIMUM_IMAGE_SIZE,
        interpolation=cv2.INTER_LINEAR,
        mag_ratio=opt.MAG_RATIO)
    ratio_h = ratio_w = 1 / target_ratio

    x = imgproc.normalizeMeanVariance(img_resized)
    x = torch.from_numpy(x).permute(2, 0, 1)
    x = Variable(x.unsqueeze(0))

    if cuda: x = x.cuda()

    # predict
    y, _ = net(x)

    score_text = y[0, :, :, 0].cpu().data.numpy()
    score_link = y[0, :, :, 1].cpu().data.numpy()

    # post-process : get shape of bounding box
    boxes, polys, word_boxes, word_polys, line_boxes, line_polys = ltd_utils.getDetBoxes(
        score_text, score_link, text_threshold, link_threshold, low_text)

    boxes = ltd_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
    polys = ltd_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)

    word_boxes = ltd_utils.adjustResultCoordinates(word_boxes, ratio_w,
                                                   ratio_h)
    word_polys = ltd_utils.adjustResultCoordinates(word_polys, ratio_w,
                                                   ratio_h)

    line_boxes = ltd_utils.adjustResultCoordinates(line_boxes, ratio_w,
                                                   ratio_h)
    line_polys = ltd_utils.adjustResultCoordinates(line_polys, ratio_w,
                                                   ratio_h)

    for k in range(len(polys)):
        if polys[k] is None: polys[k] = boxes[k]
    for a in range(len(word_polys)):
        if word_polys[a] is None: word_polys[a] = word_boxes[a]
    for l in range(len(line_polys)):
        if line_polys[l] is None: line_polys[l] = line_boxes[l]

    return polys, word_polys, line_polys, score_text
예제 #12
0
def representative_data_gen():
    for file in os.listdir(dataset_path)[:10]:
        file_path = dataset_path + file
        image = imgproc.loadImage(file_path)
        image = cv2.resize(image,
                           dsize=(800, 1280),
                           interpolation=cv2.INTER_LINEAR)
        img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
            image, 1280, interpolation=cv2.INTER_LINEAR, mag_ratio=1.5)
        ratio_h = ratio_w = 1 / target_ratio

        # preprocessing
        x = imgproc.normalizeMeanVariance(img_resized)
        x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
        x = Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]
        x = x.cpu().detach().numpy()
        yield [x]
예제 #13
0
def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly,filename,result_folder=result_folder):
    t0 = time.time()
    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(image, canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=mag_ratio)
    ratio_h = ratio_w = 1 / target_ratio

    # preprocessing
    x = imgproc.normalizeMeanVariance(img_resized)
    #cv2.imwrite("test.jpg",x)
    print("###")
    x = tf.expand_dims(x,0)
    print(x.shape)

    # forward pass
    y, _ = net(x)

    # make score and link map
    score_text = y[0,:,:,0].numpy()
    score_link = y[0,:,:,1].numpy()

    t0 = time.time() - t0
    t1 = time.time()

    # Post-processing
    boxes, polys = craft_utils.getDetBoxes(score_text, score_link, text_threshold, link_threshold, low_text, poly)

    # coordinate adjustment
    boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
    polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)
    for k in range(len(polys)):
        if polys[k] is None: polys[k] = boxes[k]

    t1 = time.time() - t1

    # render results (optional)
    render_img = score_text.copy()
    render_img = np.hstack((render_img, score_link))
    ret_score_text = imgproc.cvt2HeatmapImg(render_img)
    #print("score")
    #print(ret_score_text.shape)
    cv2.imwrite(result_folder + filename + "_mask.jpg",ret_score_text)


    #if show_time : print("\ninfer/postproc time : {:.3f}/{:.3f}".format(t0, t1))

    return boxes, polys, ret_score_text
예제 #14
0
    def __init__(self, args):
        filelist, _, _ = file_utils.list_files('./data/train/data')
        self.images = []
        self.confmaps = []
        self.scores_region = []
        self.scores_link = []
        for filename in filelist:
            # get datapath
            dataset = os.path.dirname(filename).split(os.sep)[-1]
            filenum = os.path.splitext(os.path.basename(filename))
            label_dir = './data/train/ground_truth/{}/gt_{}/'.format(
                dataset, filenum)

            # If not exists, generate ground truth
            if not os.path.exists(label_dir):
                continue

            image = imgproc.loadImage(filename)
            score_region = torch.load(label_dir + 'region.pt')
            score_link = torch.load(label_dir + 'link.pt')
            conf_map = torch.load(label_dir + 'conf.pt')

            # resize
            img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
                image,
                args.canvas_size,
                interpolation=cv2.INTER_LINEAR,
                mag_ratio=args.mag_ratio)

            # Image Preprocess
            x = imgproc.normalizeMeanVariance(img_resized)
            x = x.transpose((2, 0, 1))  # [h, w, c] to [c, h, w]

            h, w, _ = img_resized.shape

            # GT reshape
            score_region = cv2.resize(score_region, dsize=(h / 2, w / 2))
            score_link = cv2.resize(score_link, dsize=(h / 2, w / 2))
            conf_map = cv2.resize(conf_map, dsize=(h / 2, w / 2))

            self.scores_region.append(score_region)
            self.scores_link.append(score_link)
            self.confmaps.append(conf_map)
            self.images.append(x)
예제 #15
0
def gt_net(net, image, args):
    # resize
    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(image, args.canvas_size,
                                                                          interpolation=cv2.INTER_LINEAR,
                                                                          mag_ratio=args.mag_ratio)
    # preprocessing
    x = imgproc.normalizeMeanVariance(img_resized)
    x = torch.tensor(x).permute(2, 0, 1).unsqueeze(0)  # [h, w, c] to [b, c, h, w]
    if args.cuda:
        x = x.cuda()

    # forward pass
    with torch.no_grad():
        y, feature = net(x)

    # make score and link map
    score_text = y[0, :, :, 0].cpu().data.numpy()

    return score_text, target_ratio
예제 #16
0
def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly):
    t0 = time.time()

    # リサイズ
    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(image, args.canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=args.mag_ratio)
    ratio_h = ratio_w = 1 / target_ratio

    # 前処理
    x = imgproc.normalizeMeanVariance(img_resized)
    x = torch.from_numpy(x).permute(2, 0, 1)    # [h, w, c] to [c, h, w]
    x = Variable(x.unsqueeze(0))                # [c, h, w] to [b, c, h, w]
    if cuda:
        x = x.cuda()

    # 順伝播
    y, _ = net(x)

    # スコア・リンクマップの作成
    score_text = y[0,:,:,0].cpu().data.numpy()
    score_link = y[0,:,:,1].cpu().data.numpy()

    t0 = time.time() - t0
    t1 = time.time()

    # 後処理
    boxes, polys = craft_utils.getDetBoxes(score_text, score_link, text_threshold, link_threshold, low_text, poly)

    # 座標調整
    boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
    polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)
    for k in range(len(polys)):
        if polys[k] is None: polys[k] = boxes[k]

    t1 = time.time() - t1

    # レンダリング結果(オプション)
    render_img = score_text.copy()
    render_img = np.hstack((render_img, score_link))
    ret_score_text = imgproc.cvt2HeatmapImg(render_img)

    if args.show_time : print("\ninfer/postproc time : {:.3f}/{:.3f}".format(t0, t1))

    return boxes, polys, ret_score_text
예제 #17
0
    def detect(self, image):
        # resize
        img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(image, self.canvas_size,
                                                                              interpolation=cv2.INTER_LINEAR,
                                                                              mag_ratio=self.mag_ratio)
        ratio_h = ratio_w = 1 / target_ratio

        # preprocessing
        x = imgproc.normalizeMeanVariance(img_resized)
        x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
        x = Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]

        if self.cuda:
            x = x.cuda()

        # forward pass
        with torch.no_grad():
            y, feature = self.net(x)

        # make score and link map
        score_text = y[0, :, :, 0].cpu().data.numpy()
        score_link = y[0, :, :, 1].cpu().data.numpy()

        # refine link
        if self.refine_net is not None:
            with torch.no_grad():
                y_refiner = self.refine_net(y, feature)
            score_link = y_refiner[0, :, :, 0].cpu().data.numpy()


        # Post-processing
        boxes, _ = craft_utils.getDetBoxes(score_text, score_link, self.text_threshold, self.link_threshold,
                                               self.low_text, self.poly)
        # coordinate adjustment
        boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
        toRet = []
        for box in boxes:
            toRet.append(box2xyxy(box, image.shape[0: 2]))

        return toRet
    def test_net(self,
                 net,
                 image,
                 text_threshold,
                 link_threshold,
                 low_text,
                 poly,
                 refine_net=None):
        img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
            image, 1280, interpolation=cv.INTER_LINEAR, mag_ratio=1.5)
        ratio_h = ratio_w = 1 / target_ratio
        x = imgproc.normalizeMeanVariance(img_resized)
        x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
        x = Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]

        with torch.no_grad():
            y, feature = net(x)

        # make score and link map
        score_text = y[0, :, :, 0].cpu().data.numpy()
        score_link = y[0, :, :, 1].cpu().data.numpy()

        # Post-processing
        boxes, polys = craft_utils.getDetBoxes(score_text, score_link,
                                               text_threshold, link_threshold,
                                               low_text, poly)

        # coordinate adjustment
        boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
        polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)
        for k in range(len(polys)):
            if polys[k] is None: polys[k] = boxes[k]

        # render results (optional)
        render_img = score_text.copy()
        render_img = np.hstack((render_img, score_link))
        ret_score_text = imgproc.cvt2HeatmapImg(render_img)

        return boxes, polys, ret_score_text
예제 #19
0
def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly):
    t0 = time.time()

    # resize
    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(image, args.canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=args.mag_ratio)
    ratio_h = ratio_w = 1 / target_ratio

    # preprocessing
    x = imgproc.normalizeMeanVariance(img_resized)
    x = torch.from_numpy(x).permute(2, 0, 1)    # [h, w, c] to [c, h, w]
    x = Variable(x.unsqueeze(0))                # [c, h, w] to [b, c, h, w]
    if cuda:
        x = x.cuda()

    # forward pass
    y, _ = net(x)

    # make score and link map
    score_text = y[0,:,:,0].cpu().data.numpy()
    score_link = y[0,:,:,1].cpu().data.numpy()

    return score_text
예제 #20
0
    def __getitem__(self, i):
        # Image loading
        image = imgproc.loadImage(self.images[i])

        # Preprocess image
        img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
            image,
            self.args.canvas_size,
            interpolation=cv2.INTER_LINEAR,
            mag_ratio=self.args.mag_ratio)
        img_resized = imgproc.fill_canvas(img_resized, self.args.canvas_size)
        x = imgproc.normalizeMeanVariance(img_resized)
        x = torch.tensor(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]

        # Load labels
        label_dir = self.labels[i]
        region = torch.tensor(torch.load(label_dir + 'region.pt'),
                              dtype=torch.float64)
        link = torch.tensor(torch.load(label_dir + 'link.pt'),
                            dtype=torch.float64)
        conf = torch.tensor(torch.load(label_dir + 'conf.pt'),
                            dtype=torch.float64)

        return x, region, link, conf
예제 #21
0
def main():
    import os
    os.makedirs('result', exist_ok=True)
    text_render.prepare_renderer()

    with open('alphabet-all-v5.txt', 'r') as fp:
        dictionary = [s[:-1] for s in fp.readlines()]
    model_ocr = OCR(dictionary, 768)
    model_ocr.load_state_dict(torch.load('ocr.ckpt', map_location='cpu'),
                              strict=False)
    model_ocr.eval()

    model = CRAFT_net()
    sd = torch.load('detect.ckpt', map_location='cpu')
    model.load_state_dict(sd['model'])
    model = model.cpu()
    model.eval()
    img = cv2.imread(args.image)
    img_bbox = np.copy(img)
    img_bbox_all = np.copy(img)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img_resized, target_ratio, _, pad_w, pad_h = imgproc.resize_aspect_ratio(
        img, args.size, cv2.INTER_LINEAR, mag_ratio=1)
    img_to_overlay = np.copy(img_resized)
    ratio_h = ratio_w = 1 / target_ratio
    img_resized = imgproc.normalizeMeanVariance(img_resized)
    print(img_resized.shape)
    rscore, ascore, mask = test(model, img_resized)
    overlay = imgproc.cvt2HeatmapImg(rscore + ascore)
    boxes, polys = craft_utils.getDetBoxes(rscore, ascore, args.text_threshold,
                                           args.link_threshold, args.low_text,
                                           False)
    boxes = craft_utils.adjustResultCoordinates(boxes,
                                                ratio_w,
                                                ratio_h,
                                                ratio_net=2)
    polys = craft_utils.adjustResultCoordinates(polys,
                                                ratio_w,
                                                ratio_h,
                                                ratio_net=2)
    for k in range(len(polys)):
        if polys[k] is None: polys[k] = boxes[k]
    # merge textlines
    polys = merge_bboxes(polys, can_merge_textline)
    for [tl, tr, br, bl] in polys:
        x = int(tl[0])
        y = int(tl[1])
        width = int(tr[0] - tl[0])
        height = int(br[1] - tr[1])
        cv2.rectangle(img_bbox_all, (x, y), (x + width, y + height),
                      color=(255, 0, 0),
                      thickness=2)
    # run OCR for each textline
    textlines = run_ocr(img_bbox, polys, dictionary, model_ocr, 32)
    # merge textline to text region, filter textlines without characters
    text_regions: List[BBox] = []
    new_textlines = []
    for (poly_regions, textline_indices,
         majority_dir) in merge_bboxes_text_region(textlines):
        [tl, tr, br, bl] = poly_regions
        x = int(tl[0]) - 5
        y = int(tl[1]) - 5
        width = int(tr[0] - tl[0]) + 10
        height = int(br[1] - tr[1]) + 10
        text = ''
        logprob_lengths = []
        for textline_idx in textline_indices:
            if not text:
                text = textlines[textline_idx].text
            else:
                last_ch = text[-1]
                cur_ch = textlines[textline_idx].text[0]
                if ord(last_ch) > 255 and ord(cur_ch) > 255:
                    text += textlines[textline_idx].text
                else:
                    text += ' ' + textlines[textline_idx].text
            logprob_lengths.append((np.log(textlines[textline_idx].prob),
                                    len(textlines[textline_idx].text)))
        vc = count_valuable_text(text)
        total_logprobs = 0.0
        for (logprob, length) in logprob_lengths:
            total_logprobs += logprob * length
        total_logprobs /= sum([x[1] for x in logprob_lengths])
        # filter text region without characters
        if vc > 1:
            region = BBox(x, y, width, height, text, np.exp(total_logprobs))
            region.textline_indices = []
            region.majority_dir = majority_dir
            text_regions.append(region)
            for textline_idx in textline_indices:
                region.textline_indices.append(len(new_textlines))
                new_textlines.append(textlines[textline_idx])
    textlines = new_textlines
    # create mask
    from text_mask_utils import filter_masks, main_process
    mask_resized = cv2.resize(mask, (mask.shape[1] * 2, mask.shape[0] * 2),
                              interpolation=cv2.INTER_LINEAR)
    if pad_h > 0:
        mask_resized = mask_resized[:-pad_h, :]
    elif pad_w > 0:
        mask_resized = mask_resized[:, :-pad_w]
    mask_resized = cv2.resize(mask_resized,
                              (img.shape[1] // 2, img.shape[0] // 2),
                              interpolation=cv2.INTER_LINEAR)
    img_resized_2 = cv2.resize(img, (img.shape[1] // 2, img.shape[0] // 2),
                               interpolation=cv2.INTER_LINEAR)
    mask_resized[mask_resized > 250] = 255
    text_lines = [(a.x // 2, a.y // 2, a.w // 2, a.h // 2) for a in textlines]
    mask_ccs, cc2textline_assignment = filter_masks(mask_resized, text_lines)
    cv2.imwrite('result/mask_filtered.png', reduce(cv2.bitwise_or, mask_ccs))
    final_mask, textline_colors = main_process(img_resized_2, mask_ccs,
                                               text_lines,
                                               cc2textline_assignment)
    final_mask = cv2.resize(final_mask, (img.shape[1], img.shape[0]),
                            interpolation=cv2.INTER_LINEAR)
    # run inpainting
    img_inpainted = run_inpainting(img, final_mask)
    # translate text region texts
    texts = '\n'.join([r.text for r in text_regions])
    trans_ret = baidu_translator.translate('ja', 'zh-CN', texts)
    translated_sentences = []
    batch = len(text_regions)
    if len(trans_ret) < batch:
        translated_sentences.extend(trans_ret)
        translated_sentences.extend([''] * (batch - len(trans_ret)))
    elif len(trans_ret) > batch:
        translated_sentences.extend(trans_ret[:batch])
    else:
        translated_sentences.extend(trans_ret)
    # render translated texts
    img_canvas = np.copy(img_inpainted)
    for trans_text, region in zip(translated_sentences, text_regions):
        print(region.text)
        print(trans_text)
        print(region.majority_dir, region.x, region.y, region.w, region.h)
        img_bbox = cv2.rectangle(img_bbox, (region.x, region.y),
                                 (region.x + region.w, region.y + region.h),
                                 color=(0, 0, 255),
                                 thickness=2)
        for idx in region.textline_indices:
            txtln = textlines[idx]
            img_bbox = cv2.rectangle(img_bbox, (txtln.x, txtln.y),
                                     (txtln.x + txtln.w, txtln.y + txtln.h),
                                     color=textline_colors[idx],
                                     thickness=2)
        if region.majority_dir == 'h':
            text_render.put_text_horizontal(img_canvas, trans_text,
                                            len(region.textline_indices),
                                            region.x, region.y, region.w,
                                            region.h, textline_colors[idx],
                                            None)
        else:
            text_render.put_text_vertical(img_canvas, trans_text,
                                          len(region.textline_indices),
                                          region.x, region.y, region.w,
                                          region.h, textline_colors[idx], None)

    cv2.imwrite('result/rs.png', imgproc.cvt2HeatmapImg(rscore))
    cv2.imwrite('result/as.png', imgproc.cvt2HeatmapImg(ascore))
    cv2.imwrite('result/textline.png', overlay)
    cv2.imwrite('result/bbox.png', img_bbox)
    cv2.imwrite('result/bbox_unfiltered.png', img_bbox_all)
    cv2.imwrite(
        'result/overlay.png',
        cv2.cvtColor(
            overlay_image(
                img_to_overlay,
                cv2.resize(overlay,
                           (img_resized.shape[1], img_resized.shape[0]),
                           interpolation=cv2.INTER_LINEAR)),
            cv2.COLOR_RGB2BGR))
    cv2.imwrite('result/mask.png', final_mask)
    cv2.imwrite('result/masked.png',
                cv2.cvtColor(img_inpainted, cv2.COLOR_RGB2BGR))
    cv2.imwrite('result/final.png', cv2.cvtColor(img_canvas,
                                                 cv2.COLOR_RGB2BGR))
예제 #22
0
    return new_state_dict


# load net
net = CRAFT()  # initialize
net = net.cuda()
#net = torch.nn.DataParallel(net)

net.load_state_dict(copyStateDict(torch.load('./weights/craft_mlt_25k.pth')))
net.eval()

# load data
image = imgproc.loadImage('./test_data/chi/0021_crop.jpg')

# resize
img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
    image, 384, interpolation=cv2.INTER_LINEAR, mag_ratio=1.5)
ratio_h = ratio_w = 1 / target_ratio

# preprocessing
x = imgproc.normalizeMeanVariance(img_resized)
x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
x = Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]
onnx_input = x.data.numpy()
x = x.cuda()

# trace export
torch.onnx.export(net,
                  x,
                  './craft_opset10.onnx',
                  export_params=True,
                  verbose=True,
예제 #23
0
def test_net(net,
             image,
             text_threshold,
             link_threshold,
             low_text,
             cuda,
             poly,
             refine_net=None):
    t0 = time.time()

    # resize
    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
        image,
        args.canvas_size,
        interpolation=cv2.INTER_LINEAR,
        mag_ratio=args.mag_ratio)
    ratio_h = ratio_w = 1 / target_ratio

    # preprocessing
    x = imgproc.normalizeMeanVariance(img_resized)
    x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
    x = Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]
    if cuda:
        x = x.cuda()

    # forward pass
    with torch.no_grad():
        y, feature = net(x)

    # make score and link map
    score_text = y[0, :, :, 0].cpu().data.numpy()
    score_link = y[0, :, :, 1].cpu().data.numpy()

    # refine link
    if refine_net is not None:
        with torch.no_grad():
            y_refiner = refine_net(y, feature)
        score_link = y_refiner[0, :, :, 0].cpu().data.numpy()

    t0 = time.time() - t0
    t1 = time.time()

    # Post-processing
    boxes, polys = craft_utils.getDetBoxes(score_text, score_link,
                                           text_threshold, link_threshold,
                                           low_text, poly)

    # coordinate adjustment

    boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)

    '处理裂开的box,相邻的放在同一组'
    # 广度优先合并相邻的框
    # 距离矩阵构建
    all_rect_cx_cy = np.zeros((len(boxes), 2))
    for i in range(len(boxes)):
        box = boxes[i]
        left = min(box[0][0], box[1][0], box[2][0], box[3][0])
        right = max(box[0][0], box[1][0], box[2][0], box[3][0])
        top = min(box[0][1], box[1][1], box[2][1], box[3][1])
        bottom = max(box[0][1], box[1][1], box[2][1], box[3][1])
        top = int(top)
        bottom = int(bottom)
        left = int(left)
        right = int(right)

        all_rect_cx_cy[i][0] = ((left + right) / 2) / 4
        #减少x轴的影响
        #还需调整
        all_rect_cx_cy[i][1] = ((top + bottom) / 2)
    mat_distance = []
    for i in range(len(all_rect_cx_cy)):
        mat_distance.append(
            np.sqrt(np.sum((all_rect_cx_cy - all_rect_cx_cy[i])**2, axis=-1)))
    print("generate distance mat;len:", len(mat_distance))

    segment_group = []
    ind_group = -1
    search_queue = deque()
    cnt_processed = 0
    processed = set()
    #广度优先
    while cnt_processed < len(all_rect_cx_cy):  # 只要搜索队列中有数据就一直遍历下去
        if (len(search_queue) == 0):
            for i in range(len(all_rect_cx_cy)):
                if (i not in processed):
                    search_queue.append(i)
                    segment_group.append([])
                    ind_group += 1
                    break
        current_node = search_queue.popleft()  # 从队列前边获取节点,即先进先出,这是BFS的核心
        if current_node not in processed:  # 当前节点是否被访问过
            cnt_processed += 1
            processed.add(current_node)
            inds = np.argsort(mat_distance[current_node])
            segment_group[ind_group].append(boxes[current_node])
            cnt_company = 0
            distance_threshold = 20  #max(all_rect[current_node][2],all_rect[current_node][3])
            # print(distance_threshold)
            for index in inds:  # 遍历相邻节点,判断相邻节点是否已经在搜索队列
                if mat_distance[current_node][index] > distance_threshold:
                    break
                cnt_company += 1
                if cnt_company > 200:
                    print("error")
                    exit()
                if index not in search_queue:  # 如果相邻节点不在搜索队列则进行添加
                    search_queue.append(index)

    '合并在同一组的框'
    merge_boxes = []
    for segment in segment_group:
        left_s = []
        right_s = []
        top_s = []
        bottom_s = []
        for box in segment:
            left = min(box[0][0], box[1][0], box[2][0], box[3][0])
            right = max(box[0][0], box[1][0], box[2][0], box[3][0])
            top = min(box[0][1], box[1][1], box[2][1], box[3][1])
            bottom = max(box[0][1], box[1][1], box[2][1], box[3][1])
            top = math.floor(top)
            bottom = math.floor(bottom)
            left = math.floor(left)
            right = math.floor(right)

            left_s.append(left)
            right_s.append(right)
            top_s.append(top)
            bottom_s.append(bottom)
        merge_boxes.append(
            [min(left_s), min(top_s),
             max(right_s),
             max(bottom_s)])

    for rect in merge_boxes:
        threshold_hw = min(rect[3] - rect[1], rect[2] - rect[0]) * 0.2
        crop = i_image[rect[1]:rect[3], rect[0]:rect[2]]
        ret, binary_img = cv2.threshold(
            crop, 175, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
        _, contours, _ = cv2.findContours(binary_img, cv2.RETR_EXTERNAL,
                                          cv2.CHAIN_APPROX_SIMPLE)

        group = []
        for i in range(len(contours)):
            rect_char = cv2.boundingRect(contours[i])
            group.append(rect_char)
        group.sort(key=lambda rect: rect[0])

        last_x_start = group[0][0]
        last_x_end = group[0][0] + group[0][2]
        last = group[0]
        i = 1

        '合并=/等符号'
        while i < len(group) and i >= 1:
            now = group[i]
            cx = now[0] + now[2] / 2
            cy = now[1] + now[3] / 2
            last_cy = last[1] + last[3] / 2
            y_near = abs(last_cy - cy) < (last_x_end - last_x_start) * 0.6
            if (last_x_start < cx and cx < last_x_end and y_near):
                group.pop(i)
                i -= 1
                x1 = min(now[0], group[i][0])
                y1 = min(now[1], group[i][1])
                x2 = max(now[0] + now[2], group[i][0] + group[i][2])
                y2 = max(now[1] + now[3], group[i][1] + group[i][3])
                group[i] = (x1, y1, x2 - x1, y2 - y1)
            else:
                last_x_start = group[i][0]
                last_x_end = group[i][0] + group[i][2]
                last = group[i]
            i += 1
        if (len(group) < 4 or len(group) > 16):
            continue
        '检测每个框及其结果'
        rect_set = []
        res_set = []

        def detect_rect(rect_char, binary_img):
            crop_char = binary_img[rect_char[1]:rect_char[1] + rect_char[3],
                                   rect_char[0]:rect_char[0] + rect_char[2]]
            crop_char = torch.tensor(crop_char, dtype=torch.int)
            crop_char = adapt_size(crop_char)
            crop_char = crop_char.float().cuda()
            res = classifer_box.eval(
                crop_char.unsqueeze(0)).squeeze().int().item()
            debug_write(
                crop_char[0].cpu().int().numpy().astype(np.uint8) * 255,
                config.CLASS_toString[res])
            return res

        for i in range(len(group)):
            rect_char = group[i]
            if max(rect_char[2], rect_char[3]) < threshold_hw:
                continue
            res = detect_rect(rect_char, binary_img)
            res_set.append(res)
            rect_set.append(rect_char)

        res_str = ''
        for i in range(len(res_set)):
            res = res_set[i]
            res_str += config.CLASS_toString[res]
            # print('left',res)
            '等号右边颜色浅 针对右边进行二值化后重新检测'
            if (config.CLASS_is_eq(res)):
                rect_char = rect_set[i]

                crop = i_image[rect[1]:rect[3],
                               rect[0]:rect[2]][:,
                                                rect_char[0] + rect_char[2]:]
                if (crop.shape[0] * crop.shape[1] < 4):
                    break
                crop = convert_to_binary_inv(crop)
                debug_write(crop, '')
                _, contours_right, _ = cv2.findContours(
                    crop, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

                group_right = []
                for i in range(len(contours_right)):
                    rect_char_right = cv2.boundingRect(contours_right[i])
                    group_right.append(rect_char_right)
                group_right.sort(key=lambda rect: rect[0])
                for rect_char in group_right:
                    if (max(rect_char[2], rect_char[3]) < crop.shape[0] * 0.3):
                        continue
                    res_right = detect_rect(rect_char, crop)
                    res_str += config.CLASS_toString[res_right]
                break
        eq = res_str.split('=')
        if (len(eq) == 2):
            global i_image_3_color
            res_str = res_str.replace("/", "d")
            print(res_str)
            if str_to_num(eq[0]) == str_to_num(eq[1]):
                cv2.rectangle(i_image_3_color, (rect[0], rect[1]),
                              (rect[2], rect[3]), (46, 255, 87), 2)
                cv2.imwrite('./res/' + res_str + '.png',
                            i_image[rect[1]:rect[3], rect[0]:rect[2]])
            elif eq[1] == "":
                cv2.rectangle(i_image_3_color, (rect[0], rect[1]),
                              (rect[2], rect[3]), (46, 87, 255), 2)
                cv2.imwrite('./res/' + res_str + '.png',
                            i_image[rect[1]:rect[3], rect[0]:rect[2]])
            else:
                cv2.rectangle(i_image_3_color, (rect[0], rect[1]),
                              (rect[2], rect[3]), (255, 46, 87), 2)
                cv2.imwrite('./res/x_' + res_str + '.png',
                            i_image[rect[1]:rect[3], rect[0]:rect[2]])
            # print(str_to_num(eq[0])
            # print(str_to_num(eq[1])

        # cv2.imwrite('./res/'+res_str+'.png', binary_img)

    polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)
    for k in range(len(polys)):
        if polys[k] is None: polys[k] = boxes[k]

    cv2.imshow('', i_image_3_color)
    cv2.waitKey()
    t1 = time.time() - t1

    # render results (optional)
    render_img = score_text.copy()
    render_img = np.hstack((render_img, score_link))
    ret_score_text = imgproc.cvt2HeatmapImg(render_img)

    if args.show_time:
        print("\ninfer/postproc time : {:.3f}/{:.3f}".format(t0, t1))

    return boxes, polys, ret_score_text
예제 #24
0
fps_last_frame = None
last_frame_resized = None

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    if counter % (args.skip_frame + 1) != 0:
        counter += 1
        last_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        continue

    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  # RGB order

    frame_timestamp_ms = cap.get(cv2.CAP_PROP_POS_MSEC)
    frame_resized, target_ratio, _ = imgproc.resize_aspect_ratio(
        frame, args.size, cv2.INTER_AREA, mag_ratio=1)
    if last_frame_resized is not None and not args.verbose:
        ssim = metrics.structural_similarity(
            cv2.cvtColor(last_frame_resized, cv2.COLOR_RGB2GRAY),
            cv2.cvtColor(frame_resized, cv2.COLOR_RGB2GRAY))
        if ssim > 0.9:
            counter += 1
            last_frame_resized = frame_resized
            continue
    last_frame_resized = frame_resized
    frame_resized = cv2.bilateralFilter(frame_resized, 17, 80, 80)
    ratio_h = ratio_w = 1 / target_ratio
    frame_norm = imgproc.normalizeMeanVariance(frame_resized, (0.5, 0.5, 0.5),
                                               (0.5, 0.5, 0.5))
    #frame_norm = imgproc.normalizeMeanVariance(frame_resized)
    # if batch is None :
예제 #25
0
    def process(self, craft, model, seq, key, sub_img):
        img_resized, target_ratio, size_heatmap = resize_aspect_ratio(
            sub_img, 2560, interpolation=cv2.INTER_LINEAR, mag_ratio=1.)
        ratio_h = ratio_w = 1 / target_ratio

        x = normalizeMeanVariance(img_resized)
        x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
        x = x.unsqueeze(0)  # [c, h, w] to [b, c, h, w]
        x = x.to(self.device)
        y, feature = craft(x)
        score_text = y[0, :, :, 0].cpu().data.numpy()
        score_link = y[0, :, :, 1].cpu().data.numpy()
        boxes, polys = getDetBoxes(score_text,
                                   score_link,
                                   text_threshold=0.7,
                                   link_threshold=0.4,
                                   low_text=0.4,
                                   poly=False)
        boxes = adjustResultCoordinates(boxes, ratio_w, ratio_h)
        polys = adjustResultCoordinates(polys, ratio_w, ratio_h)
        for k in range(len(polys)):
            if polys[k] is None:
                polys[k] = boxes[k]
        result = []
        for i, box in enumerate(polys):
            poly = np.array(box).astype(np.int32).reshape((-1))
            result.append(poly)
        horizontal_list, free_list = group_text_box(result,
                                                    slope_ths=0.8,
                                                    ycenter_ths=0.5,
                                                    height_ths=1,
                                                    width_ths=1,
                                                    add_margin=0.1)
        # horizontal_list = [i for i in horizontal_list if i[0] > 0 and i[1] > 0]
        min_size = 20
        if min_size:
            horizontal_list = [
                i for i in horizontal_list
                if max(i[1] - i[0], i[3] - i[2]) > 10
            ]
            free_list = [
                i for i in free_list
                if max(diff([c[0] for c in i]), diff([c[1]
                                                      for c in i])) > min_size
            ]
        seq[:] = [None] * len(horizontal_list)

        for i, ele in enumerate(horizontal_list):
            ele = [0 if i < 0 else i for i in ele]
            img = sub_img[ele[2]:ele[3], ele[0]:ele[1], :]
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img = Image.fromarray(img.astype(np.uint8))
            img = process_input(img, self.config['dataset']['image_height'],
                                self.config['dataset']['image_min_width'],
                                self.config['dataset']['image_max_width'])
            img = img.to(self.config['device'])
            with torch.no_grad():
                src = model.cnn(img)
                memory = model.transformer.forward_encoder(src)
                translated_sentence = [[1] * len(img)]
                max_length = 0
                while max_length <= 128 and not all(
                        np.any(np.asarray(translated_sentence).T == 2,
                               axis=1)):
                    tgt_inp = torch.LongTensor(translated_sentence).to(
                        self.device)
                    output = model.transformer.forward_decoder(tgt_inp, memory)
                    output = output.to('cpu')
                    values, indices = torch.topk(output, 5)
                    indices = indices[:, -1, 0]
                    indices = indices.tolist()
                    translated_sentence.append(indices)
                    max_length += 1
                    del output
                translated_sentence = np.asarray(translated_sentence).T
            s = translated_sentence[0].tolist()
            s = self.vocab.decode(s)
            seq[idx] = s
예제 #26
0
    def get_bounding_box(self, image_file, verbose=False):
        """
        Get the bounding boxes from image_file
        :param image_file
        :param verbose
        :return:
        """
        image = cv2.imread(image_file)
        img_dim = image.shape
        img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
            image,
            self.canvas_size,
            interpolation=cv2.INTER_LINEAR,
            mag_ratio=self.mag_ratio)

        ratio_h = ratio_w = 1 / target_ratio

        # preprocessing
        x = imgproc.normalizeMeanVariance(img_resized)
        x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
        x = Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]
        if self.cuda:
            x = x.cuda()

        # forward pass
        with torch.no_grad():
            y, feature = self.net(x)

        # make score and link map
        score_text = y[0, :, :, 0].cpu().data.numpy()
        score_link = y[0, :, :, 1].cpu().data.numpy()
        boxes, polys = craft_utils.getDetBoxes(score_text, score_link,
                                               self.text_threshold,
                                               self.link_threshold,
                                               self.low_text, self.poly)

        boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)

        center_point = []
        for i, _b in enumerate(boxes):
            b = np.array(_b, dtype=np.int16)
            xmin = np.min(b[:, 0])
            ymin = np.min(b[:, 1])

            xmax = np.max(b[:, 0])
            ymax = np.max(b[:, 1])
            x_m = xmin + (xmax - xmin) / 2
            y_m = ymin + (ymax - ymin) / 2
            center_point.append([x_m, y_m])

        list_images = get_box_img(boxes, image)

        if verbose:
            for _b in boxes:
                b = np.array(_b, dtype=np.int16)
                xmin = np.min(b[:, 0])
                ymin = np.min(b[:, 1])

                xmax = np.max(b[:, 0])
                ymax = np.max(b[:, 1])

                r = image[ymin:ymax, xmin:xmax, :].copy()

        return boxes, list_images, center_point, img_dim
예제 #27
0
def detect_net(net, image, text_threshold, link_threshold, low_text, cuda,
               poly, refine_net, res_path):
    t0 = time.time()

    origin_image_1_channel = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    origin_image_3_color = np.array(image)
    # resize
    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
        image,
        args.canvas_size,
        interpolation=cv2.INTER_LINEAR,
        mag_ratio=args.mag_ratio)
    ratio_h = ratio_w = 1 / target_ratio

    # preprocessing
    x = imgproc.normalizeMeanVariance(img_resized)
    x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
    x = Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]
    if cuda:
        x = x.cuda()

    # forward pass
    with torch.no_grad():
        y, feature = net(x)

    # make score and link map
    score_text = y[0, :, :, 0].cpu().data.numpy()
    score_link = y[0, :, :, 1].cpu().data.numpy()

    cv2.imwrite("core_link.jpg", score_text * 255)
    cv2.imwrite("score_link.jpg", score_link * 255)

    # refine link
    if refine_net is not None:
        with torch.no_grad():
            y_refiner = refine_net(y, feature)
        score_link = y_refiner[0, :, :, 0].cpu().data.numpy()

    t0 = time.time() - t0
    t1 = time.time()

    # Post-processing
    # 获取CRAFT生成的框
    boxes, polys = craft_utils.getDetBoxes(score_text, score_link,
                                           text_threshold, link_threshold,
                                           low_text, poly)

    # coordinate adjustment

    boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)

    '处理裂开的box,相邻的放在同一组'
    # 广度优先合并相邻的框
    # 距离矩阵构建
    all_rect_cx_cy = np.zeros((len(boxes), 2))
    for i in range(len(boxes)):
        box = boxes[i]
        left = min(box[0][0], box[1][0], box[2][0], box[3][0])
        right = max(box[0][0], box[1][0], box[2][0], box[3][0])
        top = min(box[0][1], box[1][1], box[2][1], box[3][1])
        bottom = max(box[0][1], box[1][1], box[2][1], box[3][1])
        top = int(top)
        bottom = int(bottom)
        left = int(left)
        right = int(right)

        all_rect_cx_cy[i][0] = ((left + right) / 2) / 4
        # 减少x轴的影响
        # 还需调整
        all_rect_cx_cy[i][1] = ((top + bottom) / 2)
    mat_distance = []
    for i in range(len(all_rect_cx_cy)):
        mat_distance.append(
            np.sqrt(np.sum((all_rect_cx_cy - all_rect_cx_cy[i])**2, axis=-1)))

    print("generate distance mat;len:", len(mat_distance))

    segment_group = []
    ind_group = -1
    search_queue = deque()
    cnt_processed = 0
    processed = set()
    # 广度优先
    while cnt_processed < len(all_rect_cx_cy):  # 只要搜索队列中有数据就一直遍历下去
        if (len(search_queue) == 0):
            for i in range(len(all_rect_cx_cy)):
                if (i not in processed):
                    search_queue.append(i)
                    segment_group.append([])
                    ind_group += 1
                    break
        current_node = search_queue.popleft()  # 从队列前边获取节点,即先进先出,这是BFS的核心
        if current_node not in processed:  # 当前节点是否被访问过
            cnt_processed += 1
            processed.add(current_node)
            inds = np.argsort(mat_distance[current_node])
            segment_group[ind_group].append(boxes[current_node])
            cnt_company = 0
            distance_threshold = 20  # max(all_rect[current_node][2],all_rect[current_node][3])
            # print(distance_threshold)
            for index in inds:  # 遍历相邻节点,判断相邻节点是否已经在搜索队列
                if mat_distance[current_node][index] > distance_threshold:
                    break
                cnt_company += 1
                if cnt_company > 200:
                    print("error")
                    exit()
                if index not in search_queue:  # 如果相邻节点不在搜索队列则进行添加
                    search_queue.append(index)

    '合并在同一组的框'
    merge_boxes = []
    for segment in segment_group:
        left_s = []
        right_s = []
        top_s = []
        bottom_s = []
        for box in segment:
            left = min(box[0][0], box[1][0], box[2][0], box[3][0])
            right = max(box[0][0], box[1][0], box[2][0], box[3][0])
            top = min(box[0][1], box[1][1], box[2][1], box[3][1])
            bottom = max(box[0][1], box[1][1], box[2][1], box[3][1])
            top = math.floor(top)
            bottom = math.floor(bottom)
            left = math.floor(left)
            right = math.floor(right)

            left_s.append(left)
            right_s.append(right)
            top_s.append(top)
            bottom_s.append(bottom)
        merge_boxes.append(
            [min(left_s), min(top_s),
             max(right_s),
             max(bottom_s)])

    json_record = []
    for rect in merge_boxes:
        threshold_hw = min(rect[3] - rect[1], rect[2] - rect[0]) * 0.2
        crop = origin_image_1_channel[rect[1]:rect[3], rect[0]:rect[2]]
        # debug_write(crop,"exp");

        # adaptiveThreshold
        binary_img = cv2.adaptiveThreshold(crop, 255,
                                           cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                           cv2.THRESH_BINARY_INV, 31, 10)
        debug_write(binary_img, "all")

        # ret, binary_img = cv2.threshold(crop, 175, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

        # debug_write(binary_img,"dilate")
        # kernel = np.ones((1, 2), np.uint8)

        # binary_img_dilate = cv2.erode(binary_img, kernel, iterations=1)

        # debug_write(binary_img_dilate,"dilate")
        # print(binary_img.max(),binary_img.min())
        _, contours, _ = cv2.findContours(binary_img, cv2.RETR_EXTERNAL,
                                          cv2.CHAIN_APPROX_SIMPLE)

        group = []
        for i in range(len(contours)):
            rect_char = cv2.boundingRect(contours[i])
            group.append(rect_char)
        group.sort(key=lambda rect: rect[0])

        if (len(group) >= 1):
            last_x_start = group[0][0]
            last_x_end = group[0][0] + group[0][2]
            last = group[0]
        i = 1

        '合并=/等符号'
        while i < len(group) and i >= 1:
            now = group[i]
            cx = now[0] + now[2] / 2
            cy = now[1] + now[3] / 2
            last_cy = last[1] + last[3] / 2
            y_near = abs(last_cy - cy) < (last_x_end - last_x_start) * 0.6
            if (last_x_start < cx and cx < last_x_end and y_near):
                group.pop(i)
                i -= 1
                x1 = min(now[0], group[i][0])
                y1 = min(now[1], group[i][1])
                x2 = max(now[0] + now[2], group[i][0] + group[i][2])
                y2 = max(now[1] + now[3], group[i][1] + group[i][3])
                group[i] = (x1, y1, x2 - x1, y2 - y1)
            else:
                last_x_start = group[i][0]
                last_x_end = group[i][0] + group[i][2]
                last = group[i]
            i += 1
        # if(len(group)<4 or len(group)>16):
        #     continue
        '检测每个框及其结果'

        json_record_perline = []

        rect_set = []
        res_set = []

        # def detect_rect(rect_char, binary_img):
        #
        #     crop_char = binary_img[
        #                 rect_char[1]:
        #                 rect_char[1] + rect_char[3],
        #                 rect_char[0]:
        #                 rect_char[0] + rect_char[2]]
        #
        #     debug_crop_char = crop_char
        #     if crop_char.shape[0]*6 < crop_char.shape[1]:
        #         return '-'
        #     if crop_char.shape[0] < 2 or crop_char.shape[1] < 2:
        #         return ''
        #     debug_write(crop_char, "detect_rect")
        #     crnn_text_result = recognizer(crop_char)
        #
        #     crop_char = torch.tensor(crop_char, dtype=torch.int)
        #
        #     crop_char = adapt_size(crop_char)
        #     crop_char = crop_char.float().to(device)
        #     res = classifer_box.eval(crop_char.unsqueeze(0)).squeeze().int().item()
        #
        #     print(config.CLASS[res], crnn_text_result)
        #
        #     return config.CLASS_toString[res]

        def detect_rect(rect_char, binary_img, before_str):

            crop_char = binary_img[rect_char[1]:rect_char[1] + rect_char[3],
                                   rect_char[0]:rect_char[0] + rect_char[2]]

            # 减号
            # print(crop_char.shape)
            # if crop_char.shape[0] * 3 < crop_char.shape[1] and crop_char.mean() > 128:
            #     return '-'
            # if crop_char.shape[1] * 3 < crop_char.shape[0] and crop_char.mean() > 128:
            #     return '1'
            # 区域过小
            if crop_char.shape[0] < 2 and crop_char.shape[1] < 2:
                return ''

            # debug_write(crop_char, "detect_rect")

            # if crop_char.shape[1] < crop_char.shape[0] // 2:
            #     fx = 4
            # else:
            #     fx = fy

            # crnn
            crnn_text_result = recognizer(crop_char)
            # debug_write(crop_char,crnn_text_result.replace('/','d'))

            # dense
            # crop_char = torch.tensor(crop_char, dtype=torch.int)
            # crop_char = adapt_size(crop_char)
            # crop_char = crop_char.float().to(device)
            # res = classifer_box.eval(crop_char.unsqueeze(0)).squeeze().int().item()

            # print(crnn_text_result,compress(crnn_text_result))

            # print(crnn_text_result)
            return compress(crnn_text_result)

        res_str = ''
        for i in range(len(group)):
            rect_char = group[i]
            if max(rect_char[2], rect_char[3]) < threshold_hw:
                continue
            res = detect_rect(rect_char, binary_img, before_str=res_str)
            res_set.append(res)
            rect_set.append(rect_char)
            res_str += res
        print(res_str)
        # for i in range(len(res_set)):
        #     res = res_set[i]
        #     res_str += config.CLASS_toString[res]
        #
        #     json_record_perline.append({'rect_char': rect_set[i], 'char': config.CLASS_toString[res]})
        #
        #     # print('left',res)
        #     '等号右边颜色浅 针对右边进行二值化后重新检测'
        #     if (config.CLASS_is_eq(res)):
        #         rect_char = rect_set[i]
        #
        #         crop = origin_image_1_channel[rect[1]:rect[3], rect[0]:rect[2]][:, rect_char[0] + rect_char[2]:]
        #
        #         # 记录相对位置
        #         relative = (rect_char[0] + rect_char[2], 0, 0, 0)
        #
        #         if (crop.shape[0] * crop.shape[1] < 4):
        #             break
        #         # 自适应算法
        #         # crop = convert_to_binary_inv(crop)
        #         crop = cv2.adaptiveThreshold(crop, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        #                               cv2.THRESH_BINARY_INV, 31, 10)
        #         # debug_write(crop,'')
        #
        #         _, contours_right, _ = cv2.findContours(crop, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        #
        #         group_right = []
        #         for i in range(len(contours_right)):
        #             rect_char_right = cv2.boundingRect(contours_right[i])
        #             group_right.append(rect_char_right)
        #         group_right.sort(key=lambda rect: rect[0])
        #         for rect_char in group_right:
        #             if (max(rect_char[2], rect_char[3]) < crop.shape[0] * 0.3):
        #                 continue
        #             res_right = detect_rect(rect_char, crop)
        #             res_str += config.CLASS_toString[res_right]
        #             json_record_perline.append({'rect_char': (
        #                 relative[0] + rect_char[0],
        #                 relative[1] + rect_char[1],
        #                 rect_char[2],
        #                 rect_char[3]
        #             ), 'char': config.CLASS_toString[res_right]})
        #
        #         break

        eq = res_str.split('=')
        if (len(eq) >= 2):
            res_str = res_str.replace("/", "d")

            json_record.append({
                'rect_expression':
                (rect[0], rect[1], rect[2] - rect[0], rect[3] - rect[1]),
                'expression':
                json_record_perline
            })
            with open("resjson/" + res_str + ".json", 'w') as file_object:
                file_object.write(
                    json.dumps({
                        'rect_expression':
                        (rect[0], rect[1], rect[2] - rect[0],
                         rect[3] - rect[1]),
                        'expression':
                        json_record_perline
                    }))

            if str_to_num(eq[0]) == str_to_num(eq[-1]):
                # cv2.rectangle(origin_image_3_color, (rect[0], rect[1]), (rect[2] , rect[3]),	(46,255,87), 2)
                cv2.line(origin_image_3_color, (rect[0], rect[3]),
                         (rect[2], rect[3]), (46, 255, 87), 2)
                cv2.imwrite(
                    './res/' + res_str + '.png',
                    origin_image_1_channel[rect[1]:rect[3], rect[0]:rect[2]])
            elif eq[-1] == "":
                cv2.rectangle(origin_image_3_color, (rect[0], rect[1]),
                              (rect[2], rect[3]), (255, 46, 87), 2)
                cv2.imwrite(
                    './res/O' + res_str + '.png',
                    origin_image_1_channel[rect[1]:rect[3], rect[0]:rect[2]])
            else:
                cv2.rectangle(origin_image_3_color, (rect[0], rect[1]),
                              (rect[2], rect[3]), (46, 87, 255), 2)
                cv2.imwrite(
                    './res/X' + res_str + '.png',
                    origin_image_1_channel[rect[1]:rect[3], rect[0]:rect[2]])

    print(res_path)
    cv2.imwrite(res_path, origin_image_3_color)

    polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)
    for k in range(len(polys)):
        if polys[k] is None: polys[k] = boxes[k]
    # render results (optional)
    render_img = score_text.copy()
    render_img = np.hstack((render_img, score_link))
    ret_score_text = imgproc.cvt2HeatmapImg(render_img)
    cv2.imwrite("xxxx.png", ret_score_text)

    # for line in json_record:
    #     print(line)
    data2 = json.dumps(json_record)
    return data2
예제 #28
0
def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly):
    t0 = time.time()

    # resize
    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
        image,
        args.canvas_size,
        interpolation=cv2.INTER_LINEAR,
        mag_ratio=args.mag_ratio)
    ratio_h = ratio_w = 1 / target_ratio

    # preprocessing
    x = imgproc.normalizeMeanVariance(img_resized)
    x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
    x = Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]
    if cuda:
        x = x.cuda()

    # forward pass
    y, _ = net(x)

    # # make score and link map
    # score_text = y[0,:,:,0].cpu().data.numpy()
    # score_link = y[0,:,:,1].cpu().data.numpy()

    gh_pred = y[0, :, :, :].permute((2, 0, 1)).cpu().data.numpy()

    t0 = time.time() - t0
    t1 = time.time()

    boxes, polys = None, None

    # # Post-processing
    # boxes, polys = craft_utils.getDetBoxes(score_text, text_threshold, low_text, poly)
    postproc = [
        craft_utils.getDetBoxes(score_text, text_threshold, low_text, poly)
        for score_text in gh_pred
    ]
    boxes_pred, polys_pred = zip(*postproc)

    # # coordinate adjustment
    # boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
    # polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)

    for boxes, polys in zip(boxes_pred, polys_pred):
        for k in range(len(polys)):
            if polys[k] is None: polys[k] = boxes[k]

    t1 = time.time() - t1

    # # render results (optional)
    # render_img = score_text.copy()
    # render_img = np.hstack((render_img, score_link))
    # ret_score_text = imgproc.cvt2HeatmapImg(render_img)

    if args.show_time:
        print("\ninfer/postproc time : {:.3f}/{:.3f}".format(t0, t1))

    return gh_pred, boxes_pred, polys_pred, size_heatmap

    return boxes, polys, ret_score_text
예제 #29
0
    feature = interpreter.get_tensor(output_details[1]['index'])

    return y, feature


if __name__ == '__main__':
    image_path = sys.argv[1]

    start_time = time.time()
    image = imgproc.loadImage(image_path)
    image = cv2.resize(image,
                       dsize=(800, 1280),
                       interpolation=cv2.INTER_LINEAR)
    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
        image,
        canvas_size,
        interpolation=cv2.INTER_LINEAR,
        mag_ratio=mag_ratio)
    ratio_h = ratio_w = 1 / target_ratio

    # preprocessing
    x = imgproc.normalizeMeanVariance(img_resized)
    x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
    x = Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]
    # forward pass

    x = x.cpu().detach().numpy()
    y, feature = run_tflite_model(x)

    y = torch.from_numpy(y)
    feature = torch.from_numpy(feature)
예제 #30
0
def test_net(net,
             image,
             text_threshold,
             link_threshold,
             low_text,
             cuda,
             poly,
             image_path,
             refine_net=None):
    t0 = time.time()
    img_h, img_w, c = image.shape
    # resize
    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
        image,
        args.canvas_size,
        interpolation=cv2.INTER_LINEAR,
        mag_ratio=args.mag_ratio)
    ratio_h = ratio_w = 1 / target_ratio
    h, w, c = image.shape
    # preprocessing
    x = imgproc.normalizeMeanVariance(img_resized)
    x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
    x = Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]
    if cuda:
        x = x.cuda()

    # forward pass
    y, feature = net(x)
    # make score and link map
    score_text = y[0, :, :, 0].cpu().data.numpy()  #리전 스코어 Region score

    score_link = y[0, :, :, 1].cpu().data.numpy()  #어피니티 스코어
    # refine link
    if refine_net is not None:
        y_refiner = refine_net(y, feature)
        score_link = y_refiner[0, :, :, 0].cpu().data.numpy()

    t0 = time.time() - t0
    t1 = time.time()

    # Post-processing
    boxes, polys = craft_utils.getDetBoxes(score_text, score_link,
                                           text_threshold, link_threshold, 0.4,
                                           poly)  # CRAFT에서 박스를 그려주는 부분

    # # coordinate adjustment #좌표설정

    boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
    polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)

    #print(scores)

    for k in range(len(polys)):
        if polys[k] is None: polys[k] = boxes[k]
    t1 = time.time() - t1

    # render results (optional)
    render_img = score_text.copy()

    ret_score_text = imgproc.cvt2HeatmapImg(render_img)
    Plus_score_text = imgproc.cvMakeScores(render_img)  ##

    filename, file_ext = os.path.splitext(os.path.basename(image_path))

    if args.show_time:
        print("\ninfer/postproc time : {:.3f}/{:.3f}".format(t0, t1))
    post_folder = './output/post'  # 원본이미지를 이진화한 이미지 저장
    resize_folder = './output/resize'  # resize된 원본 이미지 저장

    if not os.path.isdir(resize_folder + '/'):
        os.makedirs(resize_folder + '/')

    resize_file = resize_folder + "/resize_" + filename + '_mask.jpg'  #오리지널 이미지

    IMG_RGB2 = cv2.cvtColor(img_resized,
                            cv2.COLOR_BGR2RGB)  #craft에서 resize한 이미지를 RGB로 컨버트

    # 합성 이미지를 만들기 위한 부분
    pil_image = Image.fromarray((IMG_RGB2 * 255).astype(np.uint8))
    images = np.array(pil_image)
    images = cv2.cvtColor(images, cv2.COLOR_BGR2GRAY)
    ret, thresh = cv2.threshold(images, 0, 255, cv2.THRESH_BINARY +
                                cv2.THRESH_OTSU)  #+ cv2.THRESH_OTSU
    # 이미지 합성을 위해 이진화

    text_score = cv2.resize(Plus_score_text,
                            None,
                            fx=2,
                            fy=2,
                            interpolation=cv2.INTER_LINEAR)  # 다시 원본 사이즈로 조절

    thresh = cv2.resize(thresh, (img_w, img_h))  # 원본 이진화 이미지
    text_score = cv2.resize(text_score, (img_w, img_h))  # Region 스코어 이진화 이미지

    text_score = Image.fromarray((text_score).astype(np.uint8))
    text_score = np.array(text_score)

    if not os.path.isdir('./output/og_bri' + '/'):  # 원본 이진화 이미지 저장 폴더
        os.makedirs('./output/og_bri' + '/')

    if not os.path.isdir('./output/score/'):  # 스코어 이진화 이미지 저장 폴더
        os.makedirs('./output/score/')

    cv2.imwrite('./output/og_bri' + "/og_" + filename + '.jpg',
                thresh)  # 원본 이진화 이미지 저장
    cv2.imwrite('./output/score' + "/score_" + filename + '.jpg',
                text_score)  # 스코어 이진화 이미지 저장

    img_h = thresh.shape[0]
    img_w = thresh.shape[1]

    IMG_RGB2 = cv2.resize(IMG_RGB2, (img_w, img_h))  # 다시 원본 사이즈로 resize
    cv2.imwrite(resize_file, IMG_RGB2)

    return boxes, polys, ret_score_text