예제 #1
0
    def test_net(self,
                 net,
                 image,
                 text_threshold,
                 link_threshold,
                 low_text,
                 cuda,
                 poly,
                 refine_net=None):
        t0 = time.time()
        # resize
        img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
            image,
            self.canvas_size,
            interpolation=cv2.INTER_LINEAR,
            mag_ratio=self.mag_ratio)
        ratio_h = ratio_w = 1 / target_ratio

        # preprocessing
        x = imgproc.normalizeMeanVariance(img_resized)
        x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
        x = Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]
        if cuda:
            x = x.cuda()

        # forward pass
        with torch.no_grad():
            y, feature = net(x)

        # make score and link map
        score_text = y[0, :, :, 0].cpu().data.numpy()
        score_link = y[0, :, :, 1].cpu().data.numpy()

        # refine link
        if refine_net is not None:
            with torch.no_grad():
                y_refiner = refine_net(y, feature)
            score_link = y_refiner[0, :, :, 0].cpu().data.numpy()

        t0 = time.time() - t0
        t1 = time.time()

        # Post-processing
        boxes, polys = craft_utils.getDetBoxes(score_text, score_link,
                                               text_threshold, link_threshold,
                                               low_text, poly)

        # coordinate adjustment
        boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
        polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)
        for k in range(len(polys)):
            if polys[k] is None: polys[k] = boxes[k]

        t1 = time.time() - t1

        # render results (optional)
        render_img = score_text.copy()
        render_img = np.hstack((render_img, score_link))
        ret_score_text = imgproc.cvt2HeatmapImg(render_img)

        if self.show_time:
            print("\ninfer/postproc time : {:.3f}/{:.3f}".format(t0, t1))

        return boxes, polys, ret_score_text
def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly, image_path, refine_net=None):
    t0 = time.time()
    img_h,img_w,c = image.shape
    # resize
    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(image, args.canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=args.mag_ratio)
    ratio_h = ratio_w = 1 / target_ratio
    h, w ,c = image.shape
    # preprocessing
    x = imgproc.normalizeMeanVariance(img_resized)
    x = torch.from_numpy(x).permute(2, 0, 1)    # [h, w, c] to [c, h, w]
    x = Variable(x.unsqueeze(0))                # [c, h, w] to [b, c, h, w]
    if cuda:
        x = x.cuda()

    # forward pass
    y, feature = net(x)
    # make score and link map
    score_text = y[0,:,:,0].cpu().data.numpy() #리전 스코어 Region score

    score_link = y[0,:,:,1].cpu().data.numpy() #어피니티 스코어
    # refine link
    if refine_net is not None:
        y_refiner = refine_net(y, feature)
        score_link = y_refiner[0,:,:,0].cpu().data.numpy()

    t0 = time.time() - t0
    t1 = time.time()
    

    # Post-processing
    boxes, polys = craft_utils.getDetBoxes(score_text, score_link, text_threshold, link_threshold, 0.4, poly) # CRAFT에서 박스를 그려주는 부분
    
    
    # # coordinate adjustment #좌표설정
    
    boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
    polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)

    
    #print(scores)

    for k in range(len(polys)):
         if polys[k] is None: polys[k] = boxes[k]
    t1 = time.time() - t1
    
    # render results (optional)
    render_img = score_text.copy()
    
    ret_score_text = imgproc.cvt2HeatmapImg(render_img)
    Plus_score_text = imgproc.cvMakeScores(render_img) ##

    filename, file_ext = os.path.splitext(os.path.basename(image_path))

    if args.show_time : print("\ninfer/postproc time : {:.3f}/{:.3f}".format(t0, t1))
    resize_folder = './resize' # resize된 원본 이미지 저장

    if not os.path.isdir(resize_folder+'/'):
        os.makedirs(resize_folder +'/')
    
    resize_file = resize_folder + "/resize_" + filename + '_mask.jpg' #오리지널 이미지



    IMG_RGB2 = cv2.cvtColor(img_resized, cv2.COLOR_BGR2RGB) #craft에서 resize한 이미지를 RGB로 컨버트


    # 합성 이미지를 만들기 위한 부분
    pil_image=Image.fromarray((IMG_RGB2* 255).astype(np.uint8)) 
    images = np.array(pil_image)
    images = cv2.cvtColor(images, cv2.COLOR_BGR2GRAY)
    ret, thresh = cv2.threshold(images, 0, 255, cv2.THRESH_BINARY+ cv2.THRESH_OTSU)#+ cv2.THRESH_OTSU
    # 이미지 합성을 위해 이진화

    text_score = cv2.resize(Plus_score_text, None,fx=2, fy=2, interpolation = cv2.INTER_LINEAR) # 다시 원본 사이즈로 조절

    
    thresh = cv2.resize(thresh, (img_w,img_h)) # 원본 이진화 이미지
    text_score = cv2.resize(text_score, (img_w,img_h)) # Region 스코어 이진화 이미지

    text_score=Image.fromarray((text_score).astype(np.uint8))
    text_score = np.array(text_score)

    #thresh=img_post.img_proc(text_score, thresh) # 

    if not os.path.isdir('./og_bri'+'/'): # 원본 이진화 이미지 저장 폴더
        os.makedirs('./og_bri' +'/')
    
    if not os.path.isdir('./score/'): # 스코어 이진화 이미지 저장 폴더
        os.makedirs('./score/')

    cv2.imwrite('./og_bri' + "/og_" + filename + '.jpg', thresh) # 원본 이진화 이미지 저장
    cv2.imwrite('./score' + "/score_" + filename + '.jpg', text_score) # 스코어 이진화 이미지 저장

    img_h = thresh.shape[0]
    img_w = thresh.shape[1]

    IMG_RGB2= cv2.resize(IMG_RGB2, (img_w, img_h)) # 다시 원본 사이즈로 resize
    cv2.imwrite(resize_file, IMG_RGB2)
    
    return boxes, polys, ret_score_text
예제 #3
0
            x, y, w, h = cv2.boundingRect(contour)
            coor = [[x, y], [x+w, y], [x+w, y+h], [x, y+h]]
            coordinates.append(coor)

        coordinates = np.array(coordinates, np.float64)

        height, width = np_image.shape[:2]
        mag_ratio = args.mag_ratio
        square_size = args.canvas_size
        target_size = mag_ratio * max(height, width)
        if target_size > square_size:
            target_size = square_size
        target_ratio = target_size / max(height, width)    
        ratio_h = ratio_w = 1 / target_ratio

        coordinates = craft_utils.adjustResultCoordinates(coordinates, ratio_w, ratio_h)
        coordinates = coordinates.astype(np.int64)

        a2 = 0.22
        for k, coor in enumerate(coordinates):
            x0, y0 = coor[0]
            x2, y2 = coor[2]
            mi = math.ceil(a2 * math.sqrt((x2-x0) * (y2-y0)))
            if 2 * h < w:
                char_image = image[:, x0-mi:x2+mi, :]
            elif 2 * w < h:
                char_image = image[y0-mi:y2+mi, :, :]
            else:
                char_image = image[y0-mi:y2+mi, x0-mi:x2+mi, :]

            if char_image.size:
def main():
    print(' -- Loading models')
    import os
    os.makedirs('result', exist_ok=True)
    text_render.prepare_renderer()
    dictionary, model_ocr = load_ocr_model()
    model_detect = load_detect_model()
    model_inpainting = load_inpainting_model()

    print(' -- Read image')
    img = cv2.imread(args.image)
    img_bbox = np.copy(img)
    img_bbox_all = np.copy(img)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img_resized, target_ratio, _, pad_w, pad_h = imgproc.resize_aspect_ratio(
        img, args.size, cv2.INTER_LINEAR, mag_ratio=1)
    img_to_overlay = np.copy(img_resized)
    ratio_h = ratio_w = 1 / target_ratio
    img_resized = imgproc.normalizeMeanVariance(img_resized)
    print(
        f'Detection resolution: {img_resized.shape[1]}x{img_resized.shape[0]}')
    print(' -- Running text detection')
    rscore, ascore, mask = run_detect(model_detect, img_resized)
    overlay = imgproc.cvt2HeatmapImg(rscore + ascore)
    boxes, polys = craft_utils.getDetBoxes(rscore, ascore, args.text_threshold,
                                           args.link_threshold, args.low_text,
                                           False)
    boxes = craft_utils.adjustResultCoordinates(boxes,
                                                ratio_w,
                                                ratio_h,
                                                ratio_net=2)
    polys = craft_utils.adjustResultCoordinates(polys,
                                                ratio_w,
                                                ratio_h,
                                                ratio_net=2)
    for k in range(len(polys)):
        if polys[k] is None: polys[k] = boxes[k]
    # merge textlines
    polys = merge_bboxes(polys, can_merge_textline)
    for [tl, tr, br, bl] in polys:
        x = int(tl[0])
        y = int(tl[1])
        width = int(tr[0] - tl[0])
        height = int(br[1] - tr[1])
        cv2.rectangle(img_bbox_all, (x, y), (x + width, y + height),
                      color=(255, 0, 0),
                      thickness=2)
    print(' -- Running OCR')
    # run OCR for each textline
    textlines = run_ocr(img_bbox, polys, dictionary, model_ocr, 32)
    # merge textline to text region, filter textlines without characters
    text_regions: List[BBox] = []
    new_textlines = []
    for (poly_regions, textline_indices, majority_dir, fg_r, fg_g, fg_b, bg_r,
         bg_g, bg_b) in merge_bboxes_text_region(textlines):
        [tl, tr, br, bl] = poly_regions
        x = int(tl[0]) - 5
        y = int(tl[1]) - 5
        width = int(tr[0] - tl[0]) + 10
        height = int(br[1] - tr[1]) + 10
        text = ''
        logprob_lengths = []
        for textline_idx in textline_indices:
            if not text:
                text = textlines[textline_idx].text
            else:
                last_ch = text[-1]
                cur_ch = textlines[textline_idx].text[0]
                if ord(last_ch) > 255 and ord(cur_ch) > 255:
                    text += textlines[textline_idx].text
                else:
                    text += ' ' + textlines[textline_idx].text
            logprob_lengths.append((np.log(textlines[textline_idx].prob),
                                    len(textlines[textline_idx].text)))
        vc = count_valuable_text(text)
        total_logprobs = 0.0
        for (logprob, length) in logprob_lengths:
            total_logprobs += logprob * length
        total_logprobs /= sum([x[1] for x in logprob_lengths])
        # filter text region without characters
        if vc > 1:
            region = BBox(x, y, width, height, text, np.exp(total_logprobs),
                          fg_r, fg_g, fg_b, bg_r, bg_g, bg_b)
            region.textline_indices = []
            region.majority_dir = majority_dir
            text_regions.append(region)
            for textline_idx in textline_indices:
                region.textline_indices.append(len(new_textlines))
                new_textlines.append(textlines[textline_idx])
    textlines = new_textlines
    print(' -- Generating text mask')
    # create mask
    from text_mask_utils import filter_masks, complete_mask
    mask_resized = cv2.resize(mask, (mask.shape[1] * 2, mask.shape[0] * 2),
                              interpolation=cv2.INTER_LINEAR)
    if pad_h > 0:
        mask_resized = mask_resized[:-pad_h, :]
    elif pad_w > 0:
        mask_resized = mask_resized[:, :-pad_w]
    mask_resized = cv2.resize(mask_resized,
                              (img.shape[1] // 2, img.shape[0] // 2),
                              interpolation=cv2.INTER_LINEAR)
    img_resized_2 = cv2.resize(img, (img.shape[1] // 2, img.shape[0] // 2),
                               interpolation=cv2.INTER_LINEAR)
    mask_resized[mask_resized > 250] = 255
    text_lines = [(a.x // 2, a.y // 2, a.w // 2, a.h // 2) for a in textlines]
    mask_ccs, cc2textline_assignment = filter_masks(mask_resized, text_lines)
    cv2.imwrite('result/mask_filtered.png', reduce(cv2.bitwise_or, mask_ccs))
    final_mask = complete_mask(img_resized_2, mask_ccs, text_lines,
                               cc2textline_assignment)
    final_mask = cv2.resize(final_mask, (img.shape[1], img.shape[0]),
                            interpolation=cv2.INTER_LINEAR)
    final_mask[final_mask > 0] = 255
    print(' -- Running inpainting')
    # run inpainting
    img_inpainted, inpaint_input = run_inpainting(model_inpainting, img,
                                                  final_mask,
                                                  args.inpainting_size)
    print(' -- Translating')
    # translate text region texts
    texts = '\n'.join([r.text for r in text_regions])
    trans_ret = baidu_translator.translate('ja', 'zh-CN', texts)
    translated_sentences = []
    batch = len(text_regions)
    if len(trans_ret) < batch:
        translated_sentences.extend(trans_ret)
        translated_sentences.extend([''] * (batch - len(trans_ret)))
    elif len(trans_ret) > batch:
        translated_sentences.extend(trans_ret[:batch])
    else:
        translated_sentences.extend(trans_ret)
    print(' -- Rendering translated text')
    # render translated texts
    img_canvas = np.copy(img_inpainted)
    for trans_text, region in zip(translated_sentences, text_regions):
        print(region.text)
        print(trans_text)
        print(region.majority_dir, region.x, region.y, region.w, region.h)
        img_bbox = cv2.rectangle(img_bbox, (region.x, region.y),
                                 (region.x + region.w, region.y + region.h),
                                 color=(0, 0, 255),
                                 thickness=2)
        fg = (region.fg_b, region.fg_g, region.fg_r)
        for idx in region.textline_indices:
            txtln = textlines[idx]
            img_bbox = cv2.rectangle(img_bbox, (txtln.x, txtln.y),
                                     (txtln.x + txtln.w, txtln.y + txtln.h),
                                     color=fg,
                                     thickness=2)
        if region.majority_dir == 'h':
            text_render.put_text_horizontal(img_canvas, trans_text,
                                            len(region.textline_indices),
                                            region.x, region.y, region.w,
                                            region.h, fg, None)
        else:
            text_render.put_text_vertical(img_canvas, trans_text,
                                          len(region.textline_indices),
                                          region.x, region.y, region.w,
                                          region.h, fg, None)

    print(' -- Saving results')
    cv2.imwrite('result/rs.png', imgproc.cvt2HeatmapImg(rscore))
    cv2.imwrite('result/as.png', imgproc.cvt2HeatmapImg(ascore))
    cv2.imwrite('result/textline.png', overlay)
    cv2.imwrite('result/bbox.png', img_bbox)
    cv2.imwrite('result/bbox_unfiltered.png', img_bbox_all)
    cv2.imwrite(
        'result/overlay.png',
        cv2.cvtColor(
            overlay_image(
                img_to_overlay,
                cv2.resize(overlay,
                           (img_resized.shape[1], img_resized.shape[0]),
                           interpolation=cv2.INTER_LINEAR)),
            cv2.COLOR_RGB2BGR))
    cv2.imwrite('result/mask.png', final_mask)
    cv2.imwrite('result/inpainted.png',
                cv2.cvtColor(img_inpainted, cv2.COLOR_RGB2BGR))
    if inpaint_input is not None:
        cv2.imwrite('result/inpaint_input.png',
                    cv2.cvtColor(inpaint_input, cv2.COLOR_RGB2BGR))
    cv2.imwrite('result/final.png', cv2.cvtColor(img_canvas,
                                                 cv2.COLOR_RGB2BGR))
예제 #5
0
    def get_bounding_box(self, image_file, verbose=False):
        """
        Get the bounding boxes from image_file
        :param image_file
        :param verbose
        :return:
        """
        image = cv2.imread(image_file)
        img_dim = image.shape
        img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
            image,
            self.canvas_size,
            interpolation=cv2.INTER_LINEAR,
            mag_ratio=self.mag_ratio)

        ratio_h = ratio_w = 1 / target_ratio

        # preprocessing
        x = imgproc.normalizeMeanVariance(img_resized)
        x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
        x = Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]
        if self.cuda:
            x = x.cuda()

        # forward pass
        with torch.no_grad():
            y, feature = self.net(x)

        # make score and link map
        score_text = y[0, :, :, 0].cpu().data.numpy()
        score_link = y[0, :, :, 1].cpu().data.numpy()
        boxes, polys = craft_utils.getDetBoxes(score_text, score_link,
                                               self.text_threshold,
                                               self.link_threshold,
                                               self.low_text, self.poly)

        boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)

        center_point = []
        for i, _b in enumerate(boxes):
            b = np.array(_b, dtype=np.int16)
            xmin = np.min(b[:, 0])
            ymin = np.min(b[:, 1])

            xmax = np.max(b[:, 0])
            ymax = np.max(b[:, 1])
            x_m = xmin + (xmax - xmin) / 2
            y_m = ymin + (ymax - ymin) / 2
            center_point.append([x_m, y_m])

        list_images = get_box_img(boxes, image)

        if verbose:
            for _b in boxes:
                b = np.array(_b, dtype=np.int16)
                xmin = np.min(b[:, 0])
                ymin = np.min(b[:, 1])

                xmax = np.max(b[:, 0])
                ymax = np.max(b[:, 1])

                r = image[ymin:ymax, xmin:xmax, :].copy()

        return boxes, list_images, center_point, img_dim
예제 #6
0
    x = x.cpu().detach().numpy()
    y, feature = run_tflite_model(x)

    y = torch.from_numpy(y)
    feature = torch.from_numpy(feature)
    # make score and link map
    score_text = y[0, :, :, 0].cpu().data.numpy()
    score_link = y[0, :, :, 1].cpu().data.numpy()

    # Post-processing
    boxes, polys = craft_utils.getDetBoxes(score_text, score_link,
                                           text_threshold, link_threshold,
                                           low_text, poly)

    # coordinate adjustment
    boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
    polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)
    for k in range(len(polys)):
        if polys[k] is None: polys[k] = boxes[k]

    # render results (optional)
    render_img = score_text.copy()
    render_img = np.hstack((render_img, score_link))
    ret_score_text = imgproc.cvt2HeatmapImg(render_img)

    file_utils.saveResult(image_path,
                          image[:, :, ::-1],
                          polys,
                          dirname=result_folder)
    filename, file_ext = os.path.splitext(os.path.basename(image_path))
    print("Total time taken to run CRAFT tflite model......",