Esempio n. 1
0
def get_boxes(img_c):
    # Resize
    img_r, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
        img_c,
        square_size=square_size,
        interpolation=cv2.INTER_LINEAR,
        mag_ratio=mag_ratio)
    # Save ratio index for height
    ratio_h = ratio_w = 1 / target_ratio
    # preprocessing of the image
    x = imgproc.normalizeMeanVariance(img_r)
    x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
    x = x.unsqueeze(0)  # [c, h, w] to [b, c, h, w]
    # forward pass
    y, _ = net(x)
    # make score and link map
    score_text = y[0, :, :, 0].cpu().data.numpy()
    score_link = y[0, :, :, 1].cpu().data.numpy()
    # Post-processing
    boxes, _ = craft_utils.getDetBoxes(score_text,
                                       score_link,
                                       text_threshold=text_threshold,
                                       link_threshold=link_threshold,
                                       low_text=low_text,
                                       poly=False)
    # Coordinate adjustment
    boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
    return boxes
Esempio n. 2
0
def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly):
    t0 = time.time()
    # resize
    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
        image,
        args.canvas_size,
        interpolation=cv2.INTER_AREA,
        mag_ratio=args.mag_ratio)
    ratio_h = ratio_w = 1 / target_ratio
    # preprocessing
    x = imgproc.normalizeMeanVariance(img_resized)
    x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
    x = Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]
    if cuda:
        x = x.cuda()
    # forward pass
    with torch.no_grad():
        y, feature = net(x)
    # make score and link map
    score_text = y[0, :, :, 0].cpu().data.numpy()
    score_link = y[0, :, :, 1].cpu().data.numpy()
    # Post-processing
    boxes, polys = craft_utils.getDetBoxes(score_text, score_link,
                                           text_threshold, link_threshold,
                                           low_text, poly)
    # coordinate adjustment
    boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
    polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)
    for k in range(len(polys)):
        if polys[k] is None: polys[k] = boxes[k]
    # render results (optional)
    render_img = score_text.copy()
    render_img = np.hstack((render_img, score_link))
    ret_score_text = imgproc.cvt2HeatmapImg(render_img)
    return boxes, polys, ret_score_text
Esempio n. 3
0
    def train_data_transform_webtoon(self, idx):
        ''' Prepare the data for training '''

        image, char_bboxes = self.load_image_and_gt(idx)
        region_score_GT = self.gaussian_generator.region(image, char_bboxes)
        affinity_score_GT = self.gaussian_generator.affinity(
            image, char_bboxes)
        region_score_GT, affinity_score_GT = self.train_data_resize(
            region_score_GT, affinity_score_GT)
        image = imgproc.normalizeMeanVariance(image)
        image = cv2.resize(image, (self.image_size, self.image_size),
                           interpolation=cv2.INTER_LINEAR)
        confidence = np.ones(
            (region_score_GT.shape[0], region_score_GT.shape[1]), np.float32)
        ''' Augment the data for training '''
        data_augmentation = Data_Augmentation(image, region_score_GT,
                                              affinity_score_GT, confidence)
        image, region_score_GT, affinity_score_GT, confidence = data_augmentation.select_augmentation_method(
        )
        ''' Convert the data for Model prediction '''
        image = torch.from_numpy(image).float().permute(2, 0, 1)
        region_score_GT = torch.from_numpy(region_score_GT / 255).float()
        affinity_score_GT = torch.from_numpy(affinity_score_GT / 255).float()
        confidence = torch.from_numpy(confidence).float()

        return image, region_score_GT, affinity_score_GT, confidence
Esempio n. 4
0
def test_net(canvas_size, mag_ratio, net, image, text_threshold,
             link_threshold, low_text, poly, device):
    # resize
    img_resized, target_ratio, size_heatmap = resize_aspect_ratio(image, canvas_size,\
                                                                          interpolation=cv2.INTER_LINEAR, mag_ratio=mag_ratio)
    ratio_h = ratio_w = 1 / target_ratio

    # preprocessing
    x = normalizeMeanVariance(img_resized)
    x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
    x = Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]
    x = x.to(device)

    # forward pass
    with torch.no_grad():
        y, feature = net(x)

    # make score and link map
    score_text = y[0, :, :, 0].cpu().data.numpy()
    score_link = y[0, :, :, 1].cpu().data.numpy()

    # Post-processing
    boxes, polys = getDetBoxes(score_text, score_link, text_threshold,
                               link_threshold, low_text, poly)

    # coordinate adjustment
    boxes = adjustResultCoordinates(boxes, ratio_w, ratio_h)
    polys = adjustResultCoordinates(polys, ratio_w, ratio_h)
    for k in range(len(polys)):
        if polys[k] is None: polys[k] = boxes[k]

    return boxes, polys
Esempio n. 5
0
    def process(self, craft, seq, key, sub_img):
        img_resized, target_ratio, size_heatmap = resize_aspect_ratio(
            sub_img, 2560, interpolation=cv2.INTER_LINEAR, mag_ratio=1.)
        ratio_h = ratio_w = 1 / target_ratio

        x = normalizeMeanVariance(img_resized)
        x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
        x = x.unsqueeze(0)  # [c, h, w] to [b, c, h, w]
        x = x.to(self.device)
        y, feature = craft(x)
        score_text = y[0, :, :, 0].cpu().data.numpy()
        score_link = y[0, :, :, 1].cpu().data.numpy()
        boxes, polys = getDetBoxes(score_text,
                                   score_link,
                                   text_threshold=0.7,
                                   link_threshold=0.4,
                                   low_text=0.4,
                                   poly=False)
        boxes = adjustResultCoordinates(boxes, ratio_w, ratio_h)
        polys = adjustResultCoordinates(polys, ratio_w, ratio_h)
        for k in range(len(polys)):
            if polys[k] is None:
                polys[k] = boxes[k]
        result = []
        for i, box in enumerate(polys):
            poly = np.array(box).astype(np.int32).reshape((-1))
            result.append(poly)
        horizontal_list, free_list = group_text_box(result,
                                                    slope_ths=0.8,
                                                    ycenter_ths=0.5,
                                                    height_ths=1,
                                                    width_ths=1,
                                                    add_margin=0.1)
        # horizontal_list = [i for i in horizontal_list if i[0] > 0 and i[1] > 0]
        min_size = 20
        if min_size:
            horizontal_list = [
                i for i in horizontal_list
                if max(i[1] - i[0], i[3] - i[2]) > 10
            ]
            free_list = [
                i for i in free_list
                if max(diff([c[0] for c in i]), diff([c[1]
                                                      for c in i])) > min_size
            ]
        seq[:] = [None] * len(horizontal_list)
        model, vocab = build_model(self.config)
        model.load_state_dict(
            torch.load(self.weights, map_location=torch.device('cpu')))

        for i, ele in enumerate(horizontal_list):
            ele = [0 if i < 0 else i for i in ele]
            img = sub_img[ele[2]:ele[3], ele[0]:ele[1], :]
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img = Image.fromarray(img.astype(np.uint8))
            p = threading.Thread(target=self.predict,
                                 args=(model, vocab, seq, key, i, img))
            p.start()
            p.join()
Esempio n. 6
0
def test_net(net, image, text_threshold, link_threshold, low_text, cuda,
             image_path):
    t0 = time.time()

    # resize
    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
        image,
        args.canvas_size,
        interpolation=cv2.INTER_LINEAR,
        mag_ratio=args.mag_ratio)
    ratio_h = ratio_w = 1 / target_ratio

    # preprocessing
    x = imgproc.normalizeMeanVariance(img_resized)
    x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
    x = Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]
    if cuda:
        x = x.cuda()

    # forward pass
    y, _ = net(x)

    # make score and link map
    score_text = y[0, :, :, 0].cpu().data.numpy()
    score_link = y[0, :, :, 1].cpu().data.numpy()

    t0 = time.time() - t0
    t1 = time.time()

    if args.debug:
        np.save(
            os.path.join(
                './debug',
                os.path.basename(image_path).split('.')[0] +
                '_score_text.npy'), score_text)
        np.save(
            os.path.join(
                './debug',
                os.path.basename(image_path).split('.')[0] +
                '_score_link.npy'), score_link)

    # Post-processing
    boxes = craft_utils.getDetBoxes(score_text, score_link, text_threshold,
                                    link_threshold, low_text)
    boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)

    t1 = time.time() - t1

    # render results (optional)
    render_img = score_text.copy()
    render_img = np.hstack((render_img, score_link))
    ret_score_text = imgproc.cvt2HeatmapImg(render_img)

    if args.show_time:
        print("\ninfer/postproc time : {:.3f}/{:.3f}".format(t0, t1))

    return boxes, ret_score_text
Esempio n. 7
0
    def pull_item(self, index):
        # if self.get_imagename(index) == 'img_59.jpg':
        #     pass
        # else:
        #     return [], [], [], [], np.array([0])
        image, character_bboxes, words, confidence_mask, confidences = self.load_image_gt_and_confidencemask(index)
        if len(confidences) == 0:
            confidences = 1.0
        else:
            confidences = np.array(confidences).mean()
        region_scores = np.zeros((image.shape[0], image.shape[1]), dtype=np.float32)
        affinity_scores = np.zeros((image.shape[0], image.shape[1]), dtype=np.float32)
        affinity_bboxes = []

        if len(character_bboxes) > 0:
            region_scores = self.gaussianTransformer.generate_region(region_scores.shape, character_bboxes)
            affinity_scores, affinity_bboxes = self.gaussianTransformer.generate_affinity(region_scores.shape,
                                                                                          character_bboxes,
                                                                                          words)
        if self.viz:
            self.saveImage(self.get_imagename(index), image.copy(), character_bboxes, affinity_bboxes, region_scores,
                           affinity_scores,
                           confidence_mask)
        random_transforms = [image, region_scores, affinity_scores, confidence_mask]
        random_transforms = random_crop(random_transforms, (self.target_size, self.target_size), character_bboxes)
        random_transforms = random_horizontal_flip(random_transforms)
        random_transforms = random_rotate(random_transforms)

        cvimage, region_scores, affinity_scores, confidence_mask = random_transforms

        region_scores = self.resizeGt(region_scores)
        affinity_scores = self.resizeGt(affinity_scores)
        confidence_mask = self.resizeGt(confidence_mask)

        if self.viz:
            self.saveInput(self.get_imagename(index), cvimage, region_scores, affinity_scores, confidence_mask)
        image = Image.fromarray(cvimage)
        image = image.convert('RGB')
        #image = transforms.ColorJitter(brightness=32.0 / 255, saturation=0.5)(image)
        #밝기, 채 변화시키기

        image = imgproc.normalizeMeanVariance(np.array(image), mean=(0.485, 0.456, 0.406),
                                              variance=(0.229, 0.224, 0.225))

        image_tensor = tf.convert_to_tensor(image, np.float32)
        #image_tensor = tf.transpose(image_tensor,[2,0,1])

        region_scores_tensor = tf.convert_to_tensor(region_scores / 255, np.float32)



        affinity_scores_tensor = tf.convert_to_tensor(affinity_scores/255, np.float32)
        confidence_mask_tensor = tf.convert_to_tensor(confidence_mask / 255, np.float32)
        #print(confidences)
        #self.count += 1
        return image_tensor, region_scores_tensor, affinity_scores_tensor, confidence_mask_tensor, confidences
Esempio n. 8
0
def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly,
             ocr_type):
    t0 = time.time()

    # resize
    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
        image,
        args.canvas_size,
        interpolation=cv2.INTER_LINEAR,
        mag_ratio=args.mag_ratio)
    ratio_h = ratio_w = 1 / target_ratio

    # preprocessing
    x = imgproc.normalizeMeanVariance(img_resized)
    x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
    x = x.unsqueeze(0)  # [c, h, w] to [b, c, h, w]
    if cuda:
        x = x.cuda()

    # forward pass
    y, _ = net(x)

    # make score and link map
    score_text = y[0, :, :, 0].cpu().detach().numpy()
    score_link = y[0, :, :, 1].cpu().detach().numpy()

    t0 = time.time() - t0
    t1 = time.time()

    # Post-processing
    boxes, polys = utils.getDetBoxes(score_text, score_link, text_threshold,
                                     link_threshold, low_text, poly, ocr_type)

    # coordinate adjustment
    boxes = utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
    polys = utils.adjustResultCoordinates(polys, ratio_w, ratio_h)

    if ocr_type == 'single_char':
        boxes = utils.cluster_sort(image.shape, boxes)

    for k in range(len(polys)):
        if polys[k] is None:
            polys[k] = boxes[k]

    t1 = time.time() - t1

    # render results (optional)
    render_img = score_text.copy()
    render_img = np.hstack((render_img, score_link))
    ret_score_text = imgproc.cvt2HeatmapImg(render_img)

    if args.show_time:
        print("\ninfer/postproc time : {:.3f}/{:.3f}".format(t0, t1))

    return boxes, polys, ret_score_text
Esempio n. 9
0
 def __getitem__(self, idx):
     image = imgproc.loadImage(self.image_list[idx])
     img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
         image,
         self.canvas_size,
         interpolation=cv2.INTER_LINEAR,
         mag_ratio=self.mag_ratio)
     x = imgproc.normalizeMeanVariance(img_resized)
     x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
     # x = Variable(x.unsqueeze(0))                # [c, h, w] to [b, c, h, w]
     return x, 1
Esempio n. 10
0
    def test_net(self, image, text_threshold, link_threshold, low_text, cuda, poly, refine_net=None):
        t0 = time.time()

        # resize
        img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(image, canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=mag_ratio)
        ratio_h = ratio_w = 1 / target_ratio

        # preprocessing
        x = imgproc.normalizeMeanVariance(img_resized)
        x = torch.from_numpy(x).permute(2, 0, 1)    # [h, w, c] to [c, h, w]
        x = Variable(x.unsqueeze(0))                # [c, h, w] to [b, c, h, w]
        if cuda:
            x = x.cuda()

        # forward pass
        with torch.no_grad():
            y, feature = self.net(x)

        # make score and link map
        score_text = y[0,:,:,0].cpu().data.numpy()
        score_link = y[0,:,:,1].cpu().data.numpy()

        # refine link
        if refine_net is not None:
            with torch.no_grad():
                y_refiner = refine_net(y, feature)
            score_link = y_refiner[0,:,:,0].cpu().data.numpy()

        t0 = time.time() - t0
        t1 = time.time()

        # Post-processing
        boxes, polys = craft_utils.getDetBoxes(score_text, score_link, text_threshold, link_threshold, low_text, poly)

        # coordinate adjustment
        boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
        polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)
        for k in range(len(polys)):
            if polys[k] is None: polys[k] = boxes[k]

        t1 = time.time() - t1

        # render results (optional)
        render_img = score_text.copy()
        render_img = np.hstack((render_img, score_link))
        ret_score_text = imgproc.cvt2HeatmapImg(render_img)

        # if show_time : print("\ninfer/postproc time : {:.3f}/{:.3f}".format(t0, t1))

        return boxes, polys, ret_score_text
Esempio n. 11
0
    def test_net(self, image_opencv):

        # resize
        img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
            image_opencv,
            self.canvas_size,
            interpolation=self.interpolation,
            mag_ratio=self.mag_ratio)
        ratio_h = ratio_w = 1 / target_ratio

        # preprocessing
        x = imgproc.normalizeMeanVariance(img_resized)
        x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
        x = Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]

        if self.cuda:
            x = x.cuda()

        # forward pass
        y, feature = self.net(x)

        # make score and link map
        score_text = y[0, :, :, 0].cpu().data.numpy()
        score_link = y[0, :, :, 1].cpu().data.numpy()

        # refine link
        t0 = time.time()
        if self.refine_net is not None:
            y_refiner = self.refine_net(y, feature)
            score_link = y_refiner[0, :, :, 0].cpu().data.numpy()
        t0 = time.time() - t0
        t1 = time.time()

        # Post-processing
        boxes, polys = craft_utils.getDetBoxes(score_text, score_link,
                                               self.text_threshold,
                                               self.link_threshold,
                                               self.low_text, self.poly)
        #print(boxes)

        # coordinate adjustment
        boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
        polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)
        for k in range(len(polys)):
            if polys[k] is None: polys[k] = boxes[k]
        t1 = time.time() - t1

        if self.show_time:
            print("\ninfer/postproc time : {:.3f}/{:.3f}".format(t0, t1))
        return boxes, polys
Esempio n. 12
0
def get_prediction(net,
                   image,
                   text_threshold,
                   link_threshold,
                   low_text,
                   cuda,
                   poly,
                   refine_net=None):

    # resize
    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
        image, 1280, interpolation=cv2.INTER_LINEAR, mag_ratio=1.5)
    ratio_h = ratio_w = 1 / target_ratio

    # preprocessing
    x = imgproc.normalizeMeanVariance(img_resized)
    x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
    x = Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]
    if cuda:
        x = x.cuda()

    # forward pass
    with torch.no_grad():
        y, feature = net(x)

    # make score and link map
    score_text = y[0, :, :, 0].cpu().data.numpy()
    score_link = y[0, :, :, 1].cpu().data.numpy()

    # refine link
    if refine_net is not None:
        with torch.no_grad():
            y_refiner = refine_net(y, feature)
        score_link = y_refiner[0, :, :, 0].cpu().data.numpy()

    # Post-processing
    boxes, polys = craft_utils.getDetBoxes(score_text, score_link,
                                           text_threshold, link_threshold,
                                           low_text, poly)

    # coordinate adjustment
    boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
    polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)
    for k in range(len(polys)):
        if polys[k] is None: polys[k] = boxes[k]

    return boxes, polys
    def pull_item(self, index):
        image, character_bboxes, words, confidence_mask, confidences = self.load_image_gt_and_confidence_mask(
            index)
        region_scores = np.zeros((image.shape[0], image.shape[1]),
                                 dtype=np.float32)
        affinity_scores = np.zeros((image.shape[0], image.shape[1]),
                                   dtype=np.float32)
        affinity_bboxes = []

        if len(character_bboxes) > 0:
            region_scores = self.gaussianTransformer.generate_region(
                region_scores.shape, character_bboxes)
            affinity_scores, affinity_bboxes = self.gaussianTransformer.generate_affinity(
                region_scores.shape, character_bboxes, words)
        if self.viz:
            self.save_image(self.get_image_name(index), image.copy(),
                            character_bboxes, affinity_bboxes, region_scores,
                            affinity_scores, confidence_mask)
        random_transforms = [
            image, region_scores, affinity_scores, confidence_mask
        ]
        random_transforms = random_crop(random_transforms,
                                        (self.target_size, self.target_size),
                                        character_bboxes)
        random_transforms = random_horizontal_flip(random_transforms)
        random_transforms = random_rotate(random_transforms)

        cvimage, region_scores, affinity_scores, confidence_mask = random_transforms

        region_scores = self.resizeGt(region_scores)
        affinity_scores = self.resizeGt(affinity_scores)
        confidence_mask = self.resizeGt(confidence_mask)

        if self.viz:
            self.save(self.get_image_name(index), cvimage, region_scores,
                      affinity_scores, confidence_mask)
        image = Image.fromarray(cvimage)
        image = image.convert('RGB')
        image = transforms.ColorJitter(brightness=32.0 / 255,
                                       saturation=0.5)(image)

        image = imgproc.normalizeMeanVariance(np.array(image))
        image = torch.from_numpy(image).float().permute(2, 0, 1)
        region_scores_torch = torch.from_numpy(region_scores / 255).float()
        affinity_scores_torch = torch.from_numpy(affinity_scores / 255).float()
        confidence_mask_torch = torch.from_numpy(confidence_mask).float()
        return image, region_scores_torch, affinity_scores_torch, confidence_mask_torch
Esempio n. 14
0
def test_net(net, image, text_threshold, link_threshold, low_text, cuda):

    # resize
    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
        image,
        opt.MAXIMUM_IMAGE_SIZE,
        interpolation=cv2.INTER_LINEAR,
        mag_ratio=opt.MAG_RATIO)
    ratio_h = ratio_w = 1 / target_ratio

    x = imgproc.normalizeMeanVariance(img_resized)
    x = torch.from_numpy(x).permute(2, 0, 1)
    x = Variable(x.unsqueeze(0))

    if cuda: x = x.cuda()

    # predict
    y, _ = net(x)

    score_text = y[0, :, :, 0].cpu().data.numpy()
    score_link = y[0, :, :, 1].cpu().data.numpy()

    # post-process : get shape of bounding box
    boxes, polys, word_boxes, word_polys, line_boxes, line_polys = ltd_utils.getDetBoxes(
        score_text, score_link, text_threshold, link_threshold, low_text)

    boxes = ltd_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
    polys = ltd_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)

    word_boxes = ltd_utils.adjustResultCoordinates(word_boxes, ratio_w,
                                                   ratio_h)
    word_polys = ltd_utils.adjustResultCoordinates(word_polys, ratio_w,
                                                   ratio_h)

    line_boxes = ltd_utils.adjustResultCoordinates(line_boxes, ratio_w,
                                                   ratio_h)
    line_polys = ltd_utils.adjustResultCoordinates(line_polys, ratio_w,
                                                   ratio_h)

    for k in range(len(polys)):
        if polys[k] is None: polys[k] = boxes[k]
    for a in range(len(word_polys)):
        if word_polys[a] is None: word_polys[a] = word_boxes[a]
    for l in range(len(line_polys)):
        if line_polys[l] is None: line_polys[l] = line_boxes[l]

    return polys, word_polys, line_polys, score_text
def representative_data_gen():
    for file in os.listdir(dataset_path)[:10]:
        file_path = dataset_path + file
        image = imgproc.loadImage(file_path)
        image = cv2.resize(image,
                           dsize=(800, 1280),
                           interpolation=cv2.INTER_LINEAR)
        img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
            image, 1280, interpolation=cv2.INTER_LINEAR, mag_ratio=1.5)
        ratio_h = ratio_w = 1 / target_ratio

        # preprocessing
        x = imgproc.normalizeMeanVariance(img_resized)
        x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
        x = Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]
        x = x.cpu().detach().numpy()
        yield [x]
Esempio n. 16
0
def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly,filename,result_folder=result_folder):
    t0 = time.time()
    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(image, canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=mag_ratio)
    ratio_h = ratio_w = 1 / target_ratio

    # preprocessing
    x = imgproc.normalizeMeanVariance(img_resized)
    #cv2.imwrite("test.jpg",x)
    print("###")
    x = tf.expand_dims(x,0)
    print(x.shape)

    # forward pass
    y, _ = net(x)

    # make score and link map
    score_text = y[0,:,:,0].numpy()
    score_link = y[0,:,:,1].numpy()

    t0 = time.time() - t0
    t1 = time.time()

    # Post-processing
    boxes, polys = craft_utils.getDetBoxes(score_text, score_link, text_threshold, link_threshold, low_text, poly)

    # coordinate adjustment
    boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
    polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)
    for k in range(len(polys)):
        if polys[k] is None: polys[k] = boxes[k]

    t1 = time.time() - t1

    # render results (optional)
    render_img = score_text.copy()
    render_img = np.hstack((render_img, score_link))
    ret_score_text = imgproc.cvt2HeatmapImg(render_img)
    #print("score")
    #print(ret_score_text.shape)
    cv2.imwrite(result_folder + filename + "_mask.jpg",ret_score_text)


    #if show_time : print("\ninfer/postproc time : {:.3f}/{:.3f}".format(t0, t1))

    return boxes, polys, ret_score_text
Esempio n. 17
0
    def __init__(self, args):
        filelist, _, _ = file_utils.list_files('./data/train/data')
        self.images = []
        self.confmaps = []
        self.scores_region = []
        self.scores_link = []
        for filename in filelist:
            # get datapath
            dataset = os.path.dirname(filename).split(os.sep)[-1]
            filenum = os.path.splitext(os.path.basename(filename))
            label_dir = './data/train/ground_truth/{}/gt_{}/'.format(
                dataset, filenum)

            # If not exists, generate ground truth
            if not os.path.exists(label_dir):
                continue

            image = imgproc.loadImage(filename)
            score_region = torch.load(label_dir + 'region.pt')
            score_link = torch.load(label_dir + 'link.pt')
            conf_map = torch.load(label_dir + 'conf.pt')

            # resize
            img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
                image,
                args.canvas_size,
                interpolation=cv2.INTER_LINEAR,
                mag_ratio=args.mag_ratio)

            # Image Preprocess
            x = imgproc.normalizeMeanVariance(img_resized)
            x = x.transpose((2, 0, 1))  # [h, w, c] to [c, h, w]

            h, w, _ = img_resized.shape

            # GT reshape
            score_region = cv2.resize(score_region, dsize=(h / 2, w / 2))
            score_link = cv2.resize(score_link, dsize=(h / 2, w / 2))
            conf_map = cv2.resize(conf_map, dsize=(h / 2, w / 2))

            self.scores_region.append(score_region)
            self.scores_link.append(score_link)
            self.confmaps.append(conf_map)
            self.images.append(x)
Esempio n. 18
0
def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly):
    t0 = time.time()

    # リサイズ
    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(image, args.canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=args.mag_ratio)
    ratio_h = ratio_w = 1 / target_ratio

    # 前処理
    x = imgproc.normalizeMeanVariance(img_resized)
    x = torch.from_numpy(x).permute(2, 0, 1)    # [h, w, c] to [c, h, w]
    x = Variable(x.unsqueeze(0))                # [c, h, w] to [b, c, h, w]
    if cuda:
        x = x.cuda()

    # 順伝播
    y, _ = net(x)

    # スコア・リンクマップの作成
    score_text = y[0,:,:,0].cpu().data.numpy()
    score_link = y[0,:,:,1].cpu().data.numpy()

    t0 = time.time() - t0
    t1 = time.time()

    # 後処理
    boxes, polys = craft_utils.getDetBoxes(score_text, score_link, text_threshold, link_threshold, low_text, poly)

    # 座標調整
    boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
    polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)
    for k in range(len(polys)):
        if polys[k] is None: polys[k] = boxes[k]

    t1 = time.time() - t1

    # レンダリング結果(オプション)
    render_img = score_text.copy()
    render_img = np.hstack((render_img, score_link))
    ret_score_text = imgproc.cvt2HeatmapImg(render_img)

    if args.show_time : print("\ninfer/postproc time : {:.3f}/{:.3f}".format(t0, t1))

    return boxes, polys, ret_score_text
Esempio n. 19
0
def gt_net(net, image, args):
    # resize
    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(image, args.canvas_size,
                                                                          interpolation=cv2.INTER_LINEAR,
                                                                          mag_ratio=args.mag_ratio)
    # preprocessing
    x = imgproc.normalizeMeanVariance(img_resized)
    x = torch.tensor(x).permute(2, 0, 1).unsqueeze(0)  # [h, w, c] to [b, c, h, w]
    if args.cuda:
        x = x.cuda()

    # forward pass
    with torch.no_grad():
        y, feature = net(x)

    # make score and link map
    score_text = y[0, :, :, 0].cpu().data.numpy()

    return score_text, target_ratio
Esempio n. 20
0
    def detect(self, image):
        # resize
        img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(image, self.canvas_size,
                                                                              interpolation=cv2.INTER_LINEAR,
                                                                              mag_ratio=self.mag_ratio)
        ratio_h = ratio_w = 1 / target_ratio

        # preprocessing
        x = imgproc.normalizeMeanVariance(img_resized)
        x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
        x = Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]

        if self.cuda:
            x = x.cuda()

        # forward pass
        with torch.no_grad():
            y, feature = self.net(x)

        # make score and link map
        score_text = y[0, :, :, 0].cpu().data.numpy()
        score_link = y[0, :, :, 1].cpu().data.numpy()

        # refine link
        if self.refine_net is not None:
            with torch.no_grad():
                y_refiner = self.refine_net(y, feature)
            score_link = y_refiner[0, :, :, 0].cpu().data.numpy()


        # Post-processing
        boxes, _ = craft_utils.getDetBoxes(score_text, score_link, self.text_threshold, self.link_threshold,
                                               self.low_text, self.poly)
        # coordinate adjustment
        boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
        toRet = []
        for box in boxes:
            toRet.append(box2xyxy(box, image.shape[0: 2]))

        return toRet
Esempio n. 21
0
def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly):
    t0 = time.time()

    # resize
    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(image, args.canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=args.mag_ratio)
    ratio_h = ratio_w = 1 / target_ratio

    # preprocessing
    x = imgproc.normalizeMeanVariance(img_resized)
    x = torch.from_numpy(x).permute(2, 0, 1)    # [h, w, c] to [c, h, w]
    x = Variable(x.unsqueeze(0))                # [c, h, w] to [b, c, h, w]
    if cuda:
        x = x.cuda()

    # forward pass
    y, _ = net(x)

    # make score and link map
    score_text = y[0,:,:,0].cpu().data.numpy()
    score_link = y[0,:,:,1].cpu().data.numpy()

    return score_text
    def test_net(self,
                 net,
                 image,
                 text_threshold,
                 link_threshold,
                 low_text,
                 poly,
                 refine_net=None):
        img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
            image, 1280, interpolation=cv.INTER_LINEAR, mag_ratio=1.5)
        ratio_h = ratio_w = 1 / target_ratio
        x = imgproc.normalizeMeanVariance(img_resized)
        x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
        x = Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]

        with torch.no_grad():
            y, feature = net(x)

        # make score and link map
        score_text = y[0, :, :, 0].cpu().data.numpy()
        score_link = y[0, :, :, 1].cpu().data.numpy()

        # Post-processing
        boxes, polys = craft_utils.getDetBoxes(score_text, score_link,
                                               text_threshold, link_threshold,
                                               low_text, poly)

        # coordinate adjustment
        boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
        polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)
        for k in range(len(polys)):
            if polys[k] is None: polys[k] = boxes[k]

        # render results (optional)
        render_img = score_text.copy()
        render_img = np.hstack((render_img, score_link))
        ret_score_text = imgproc.cvt2HeatmapImg(render_img)

        return boxes, polys, ret_score_text
Esempio n. 23
0
    def __getitem__(self, i):
        # Image loading
        image = imgproc.loadImage(self.images[i])

        # Preprocess image
        img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
            image,
            self.args.canvas_size,
            interpolation=cv2.INTER_LINEAR,
            mag_ratio=self.args.mag_ratio)
        img_resized = imgproc.fill_canvas(img_resized, self.args.canvas_size)
        x = imgproc.normalizeMeanVariance(img_resized)
        x = torch.tensor(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]

        # Load labels
        label_dir = self.labels[i]
        region = torch.tensor(torch.load(label_dir + 'region.pt'),
                              dtype=torch.float64)
        link = torch.tensor(torch.load(label_dir + 'link.pt'),
                            dtype=torch.float64)
        conf = torch.tensor(torch.load(label_dir + 'conf.pt'),
                            dtype=torch.float64)

        return x, region, link, conf
Esempio n. 24
0
    def inference_pursedo_bboxes(self, net, image, word_bbox, word, viz=False):

        word_image, MM = self.crop_image_by_bbox(image, word_bbox)

        real_word_without_space = word.replace('\s', '')
        real_char_nums = len(real_word_without_space)
        input = word_image.copy()
        scale = 64.0 / input.shape[0]
        input = cv2.resize(input, None, fx=scale, fy=scale)

        img_torch = torch.from_numpy(imgproc.normalizeMeanVariance(input, mean=(0.485, 0.456, 0.406),
                                                                   variance=(0.229, 0.224, 0.225)))
        img_torch = img_torch.permute(2, 0, 1).unsqueeze(0)
        img_torch = img_torch.type(torch.FloatTensor).cuda()
        scores, _ = net(img_torch)
        region_scores = scores[0, :, :, 0].cpu().data.numpy()
        fmf = np.uint8(np.clip(region_scores, 0, 1) * 255)
        bgr_region_scores = cv2.resize(region_scores, (input.shape[1], input.shape[0]))
        bgr_region_scores = cv2.cvtColor(bgr_region_scores, cv2.COLOR_GRAY2BGR)
        pursedo_bboxes = watershed(input, bgr_region_scores, False)

        _tmp = []
        for i in range(pursedo_bboxes.shape[0]):
            if np.mean(pursedo_bboxes[i].ravel()) > 2:
                _tmp.append(pursedo_bboxes[i])
            else:
                print("filter bboxes", pursedo_bboxes[i])
        pursedo_bboxes = np.array(_tmp, np.float32)
        if pursedo_bboxes.shape[0] > 1:
            index = np.argsort(pursedo_bboxes[:, 0, 0])
            pursedo_bboxes = pursedo_bboxes[index]

        confidence = self.get_confidence(real_char_nums, len(pursedo_bboxes))

        bboxes = []
        if confidence <= 0.5:
            width = input.shape[1]
            height = input.shape[0]

            width_per_char = width / len(word)
            for i, char in enumerate(word):
                if char == ' ':
                    continue
                left = i * width_per_char
                right = (i + 1) * width_per_char
                bbox = np.array([[left, 0], [right, 0], [right, height],
                                 [left, height]])
                bboxes.append(bbox)

            bboxes = np.array(bboxes, np.float32)
            confidence = 0.5

        else:
            bboxes = pursedo_bboxes
        if False:
            _tmp_bboxes = np.int32(bboxes.copy())
            _tmp_bboxes[:, :, 0] = np.clip(_tmp_bboxes[:, :, 0], 0, input.shape[1])
            _tmp_bboxes[:, :, 1] = np.clip(_tmp_bboxes[:, :, 1], 0, input.shape[0])
            for bbox in _tmp_bboxes:
                cv2.polylines(np.uint8(input), [np.reshape(bbox, (-1, 1, 2))], True, (255, 0, 0))
            region_scores_color = cv2.applyColorMap(np.uint8(region_scores), cv2.COLORMAP_JET)
            region_scores_color = cv2.resize(region_scores_color, (input.shape[1], input.shape[0]))
            target = self.gaussianTransformer.generate_region(region_scores_color.shape, [_tmp_bboxes])
            target_color = cv2.applyColorMap(target, cv2.COLORMAP_JET)
            viz_image = np.hstack([input[:, :, ::-1], region_scores_color, target_color])
            cv2.imshow("crop_image", viz_image)
            cv2.waitKey()
        bboxes /= scale
        try:
            for j in range(len(bboxes)):
                ones = np.ones((4, 1))
                tmp = np.concatenate([bboxes[j], ones], axis=-1)
                I = np.matrix(MM).I
                ori = np.matmul(I, tmp.transpose(1, 0)).transpose(1, 0)
                bboxes[j] = ori[:, :2]
        except Exception as e:
            print(e, gt_path)

        #         for j in range(len(bboxes)):
        #             ones = np.ones((4, 1))
        #             tmp = np.concatenate([bboxes[j], ones], axis=-1)
        #             I = np.matrix(MM).I
        #             ori = np.matmul(I, tmp.transpose(1, 0)).transpose(1, 0)
        #             bboxes[j] = ori[:, :2]

        bboxes[:, :, 1] = np.clip(bboxes[:, :, 1], 0., image.shape[0] - 1)
        bboxes[:, :, 0] = np.clip(bboxes[:, :, 0], 0., image.shape[1] - 1)

        return bboxes, region_scores, confidence
Esempio n. 25
0
def test_net(net, image, text_threshold, link_threshold, low_text, cuda, poly):
    t0 = time.time()

    # resize
    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
        image,
        args.canvas_size,
        interpolation=cv2.INTER_LINEAR,
        mag_ratio=args.mag_ratio)
    ratio_h = ratio_w = 1 / target_ratio

    # preprocessing
    x = imgproc.normalizeMeanVariance(img_resized)
    x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
    x = Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]
    if cuda:
        x = x.cuda()

    # forward pass
    y, _ = net(x)

    # # make score and link map
    # score_text = y[0,:,:,0].cpu().data.numpy()
    # score_link = y[0,:,:,1].cpu().data.numpy()

    gh_pred = y[0, :, :, :].permute((2, 0, 1)).cpu().data.numpy()

    t0 = time.time() - t0
    t1 = time.time()

    boxes, polys = None, None

    # # Post-processing
    # boxes, polys = craft_utils.getDetBoxes(score_text, text_threshold, low_text, poly)
    postproc = [
        craft_utils.getDetBoxes(score_text, text_threshold, low_text, poly)
        for score_text in gh_pred
    ]
    boxes_pred, polys_pred = zip(*postproc)

    # # coordinate adjustment
    # boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
    # polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)

    for boxes, polys in zip(boxes_pred, polys_pred):
        for k in range(len(polys)):
            if polys[k] is None: polys[k] = boxes[k]

    t1 = time.time() - t1

    # # render results (optional)
    # render_img = score_text.copy()
    # render_img = np.hstack((render_img, score_link))
    # ret_score_text = imgproc.cvt2HeatmapImg(render_img)

    if args.show_time:
        print("\ninfer/postproc time : {:.3f}/{:.3f}".format(t0, t1))

    return gh_pred, boxes_pred, polys_pred, size_heatmap

    return boxes, polys, ret_score_text
Esempio n. 26
0
def test_net(net,
             image,
             text_threshold,
             link_threshold,
             low_text,
             cuda,
             poly,
             refine_net=None,
             overlap=0.0):
    t0 = time.time()

    # resize
    # img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(image, args.canvas_size, interpolation=cv2.INTER_LINEAR, mag_ratio=args.mag_ratio)
    # ratio_h = ratio_w = 1 / target_ratio
    img_resized = image
    ratio_h = ratio_w = 1

    # preprocessing
    x = imgproc.normalizeMeanVariance(img_resized)

    # x = torch.from_numpy(x).permute(2, 0, 1)    # [h, w, c] to [c, h, w]
    # x = Variable(x.unsqueeze(0))                # [c, h, w] to [b, c, h, w]
    #
    # if cuda:
    #     x = x.cuda()
    #
    # # forward pass
    # with torch.no_grad():
    #     y, feature = net(x)
    #
    # # make score and link map
    # score_text = y[0,:,:,0].cpu().data.numpy()
    #
    # if refine_net is None:
    #     score_link = y[0,:,:,1].cpu().data.numpy()
    # else:
    #     # refine link
    #     with torch.no_grad():
    #         y_refiner = refine_net(y, feature)
    #
    #     score_link = y_refiner[0,:,:,0].cpu().data.numpy()

    split_coord = []
    if overlap > 0.0 and overlap < 1.0:
        x, split_coord = splitOverlap(x, overlap)

    x = torch.from_numpy(x).permute(0, 3, 1, 2)  # [h, w, c] to [c, h, w]
    # x = torch.from_numpy(x).permute(2, 0, 1)    # [h, w, c] to [c, h, w]
    x = Variable(x)  # [c, h, w] to [b, c, h, w]

    if cuda:
        x = x.cuda()

    # forward pass
    with torch.no_grad():
        y, feature = net(x)

    # make score and link map
    score_text = joinOverlap(y[:, :, :, 0].cpu().data.numpy(), split_coord)

    if refine_net is None:
        score_link = joinOverlap(y[:, :, :, 1].cpu().data.numpy(), split_coord)
    else:
        # refine link
        with torch.no_grad():
            y_refiner = refine_net(y, feature)

        score_link = joinOverlap(y_refiner[:, :, :, 0].cpu().data.numpy(),
                                 split_coord)

    t0 = time.time() - t0
    t1 = time.time()

    # Post-processing
    boxes, polys = craft_utils.getDetBoxes(score_text, score_link,
                                           text_threshold, link_threshold,
                                           low_text, poly)

    # coordinate adjustment
    boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
    polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)
    for k in range(len(polys)):
        if polys[k] is None: polys[k] = boxes[k]

    t1 = time.time() - t1

    # render results (optional)
    render_img = score_text.copy()
    render_img = np.hstack((render_img, score_link))
    ret_score_text = imgproc.cvt2HeatmapImg(render_img)

    if args.show_time:
        print("\ninfer/postproc time : {:.3f}/{:.3f}".format(t0, t1))

    return boxes, polys, ret_score_text
Esempio n. 27
0
def main():
    import os
    os.makedirs('result', exist_ok=True)
    text_render.prepare_renderer()

    with open('alphabet-all-v5.txt', 'r') as fp:
        dictionary = [s[:-1] for s in fp.readlines()]
    model_ocr = OCR(dictionary, 768)
    model_ocr.load_state_dict(torch.load('ocr.ckpt', map_location='cpu'),
                              strict=False)
    model_ocr.eval()

    model = CRAFT_net()
    sd = torch.load('detect.ckpt', map_location='cpu')
    model.load_state_dict(sd['model'])
    model = model.cpu()
    model.eval()
    img = cv2.imread(args.image)
    img_bbox = np.copy(img)
    img_bbox_all = np.copy(img)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img_resized, target_ratio, _, pad_w, pad_h = imgproc.resize_aspect_ratio(
        img, args.size, cv2.INTER_LINEAR, mag_ratio=1)
    img_to_overlay = np.copy(img_resized)
    ratio_h = ratio_w = 1 / target_ratio
    img_resized = imgproc.normalizeMeanVariance(img_resized)
    print(img_resized.shape)
    rscore, ascore, mask = test(model, img_resized)
    overlay = imgproc.cvt2HeatmapImg(rscore + ascore)
    boxes, polys = craft_utils.getDetBoxes(rscore, ascore, args.text_threshold,
                                           args.link_threshold, args.low_text,
                                           False)
    boxes = craft_utils.adjustResultCoordinates(boxes,
                                                ratio_w,
                                                ratio_h,
                                                ratio_net=2)
    polys = craft_utils.adjustResultCoordinates(polys,
                                                ratio_w,
                                                ratio_h,
                                                ratio_net=2)
    for k in range(len(polys)):
        if polys[k] is None: polys[k] = boxes[k]
    # merge textlines
    polys = merge_bboxes(polys, can_merge_textline)
    for [tl, tr, br, bl] in polys:
        x = int(tl[0])
        y = int(tl[1])
        width = int(tr[0] - tl[0])
        height = int(br[1] - tr[1])
        cv2.rectangle(img_bbox_all, (x, y), (x + width, y + height),
                      color=(255, 0, 0),
                      thickness=2)
    # run OCR for each textline
    textlines = run_ocr(img_bbox, polys, dictionary, model_ocr, 32)
    # merge textline to text region, filter textlines without characters
    text_regions: List[BBox] = []
    new_textlines = []
    for (poly_regions, textline_indices,
         majority_dir) in merge_bboxes_text_region(textlines):
        [tl, tr, br, bl] = poly_regions
        x = int(tl[0]) - 5
        y = int(tl[1]) - 5
        width = int(tr[0] - tl[0]) + 10
        height = int(br[1] - tr[1]) + 10
        text = ''
        logprob_lengths = []
        for textline_idx in textline_indices:
            if not text:
                text = textlines[textline_idx].text
            else:
                last_ch = text[-1]
                cur_ch = textlines[textline_idx].text[0]
                if ord(last_ch) > 255 and ord(cur_ch) > 255:
                    text += textlines[textline_idx].text
                else:
                    text += ' ' + textlines[textline_idx].text
            logprob_lengths.append((np.log(textlines[textline_idx].prob),
                                    len(textlines[textline_idx].text)))
        vc = count_valuable_text(text)
        total_logprobs = 0.0
        for (logprob, length) in logprob_lengths:
            total_logprobs += logprob * length
        total_logprobs /= sum([x[1] for x in logprob_lengths])
        # filter text region without characters
        if vc > 1:
            region = BBox(x, y, width, height, text, np.exp(total_logprobs))
            region.textline_indices = []
            region.majority_dir = majority_dir
            text_regions.append(region)
            for textline_idx in textline_indices:
                region.textline_indices.append(len(new_textlines))
                new_textlines.append(textlines[textline_idx])
    textlines = new_textlines
    # create mask
    from text_mask_utils import filter_masks, main_process
    mask_resized = cv2.resize(mask, (mask.shape[1] * 2, mask.shape[0] * 2),
                              interpolation=cv2.INTER_LINEAR)
    if pad_h > 0:
        mask_resized = mask_resized[:-pad_h, :]
    elif pad_w > 0:
        mask_resized = mask_resized[:, :-pad_w]
    mask_resized = cv2.resize(mask_resized,
                              (img.shape[1] // 2, img.shape[0] // 2),
                              interpolation=cv2.INTER_LINEAR)
    img_resized_2 = cv2.resize(img, (img.shape[1] // 2, img.shape[0] // 2),
                               interpolation=cv2.INTER_LINEAR)
    mask_resized[mask_resized > 250] = 255
    text_lines = [(a.x // 2, a.y // 2, a.w // 2, a.h // 2) for a in textlines]
    mask_ccs, cc2textline_assignment = filter_masks(mask_resized, text_lines)
    cv2.imwrite('result/mask_filtered.png', reduce(cv2.bitwise_or, mask_ccs))
    final_mask, textline_colors = main_process(img_resized_2, mask_ccs,
                                               text_lines,
                                               cc2textline_assignment)
    final_mask = cv2.resize(final_mask, (img.shape[1], img.shape[0]),
                            interpolation=cv2.INTER_LINEAR)
    # run inpainting
    img_inpainted = run_inpainting(img, final_mask)
    # translate text region texts
    texts = '\n'.join([r.text for r in text_regions])
    trans_ret = baidu_translator.translate('ja', 'zh-CN', texts)
    translated_sentences = []
    batch = len(text_regions)
    if len(trans_ret) < batch:
        translated_sentences.extend(trans_ret)
        translated_sentences.extend([''] * (batch - len(trans_ret)))
    elif len(trans_ret) > batch:
        translated_sentences.extend(trans_ret[:batch])
    else:
        translated_sentences.extend(trans_ret)
    # render translated texts
    img_canvas = np.copy(img_inpainted)
    for trans_text, region in zip(translated_sentences, text_regions):
        print(region.text)
        print(trans_text)
        print(region.majority_dir, region.x, region.y, region.w, region.h)
        img_bbox = cv2.rectangle(img_bbox, (region.x, region.y),
                                 (region.x + region.w, region.y + region.h),
                                 color=(0, 0, 255),
                                 thickness=2)
        for idx in region.textline_indices:
            txtln = textlines[idx]
            img_bbox = cv2.rectangle(img_bbox, (txtln.x, txtln.y),
                                     (txtln.x + txtln.w, txtln.y + txtln.h),
                                     color=textline_colors[idx],
                                     thickness=2)
        if region.majority_dir == 'h':
            text_render.put_text_horizontal(img_canvas, trans_text,
                                            len(region.textline_indices),
                                            region.x, region.y, region.w,
                                            region.h, textline_colors[idx],
                                            None)
        else:
            text_render.put_text_vertical(img_canvas, trans_text,
                                          len(region.textline_indices),
                                          region.x, region.y, region.w,
                                          region.h, textline_colors[idx], None)

    cv2.imwrite('result/rs.png', imgproc.cvt2HeatmapImg(rscore))
    cv2.imwrite('result/as.png', imgproc.cvt2HeatmapImg(ascore))
    cv2.imwrite('result/textline.png', overlay)
    cv2.imwrite('result/bbox.png', img_bbox)
    cv2.imwrite('result/bbox_unfiltered.png', img_bbox_all)
    cv2.imwrite(
        'result/overlay.png',
        cv2.cvtColor(
            overlay_image(
                img_to_overlay,
                cv2.resize(overlay,
                           (img_resized.shape[1], img_resized.shape[0]),
                           interpolation=cv2.INTER_LINEAR)),
            cv2.COLOR_RGB2BGR))
    cv2.imwrite('result/mask.png', final_mask)
    cv2.imwrite('result/masked.png',
                cv2.cvtColor(img_inpainted, cv2.COLOR_RGB2BGR))
    cv2.imwrite('result/final.png', cv2.cvtColor(img_canvas,
                                                 cv2.COLOR_RGB2BGR))
Esempio n. 28
0
net = net.cuda()
#net = torch.nn.DataParallel(net)

net.load_state_dict(copyStateDict(torch.load('./weights/craft_mlt_25k.pth')))
net.eval()

# load data
image = imgproc.loadImage('./test_data/chi/0021_crop.jpg')

# resize
img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
    image, 384, interpolation=cv2.INTER_LINEAR, mag_ratio=1.5)
ratio_h = ratio_w = 1 / target_ratio

# preprocessing
x = imgproc.normalizeMeanVariance(img_resized)
x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
x = Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]
onnx_input = x.data.numpy()
x = x.cuda()

# trace export
torch.onnx.export(net,
                  x,
                  './craft_opset10.onnx',
                  export_params=True,
                  verbose=True,
                  opset_version=10)

# test the inference process
def test_net(net,
             image,
             text_threshold,
             link_threshold,
             low_text,
             cuda,
             poly,
             image_path,
             refine_net=None):
    t0 = time.time()
    img_h, img_w, c = image.shape
    # resize
    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
        image,
        args.canvas_size,
        interpolation=cv2.INTER_LINEAR,
        mag_ratio=args.mag_ratio)
    ratio_h = ratio_w = 1 / target_ratio
    h, w, c = image.shape
    # preprocessing
    x = imgproc.normalizeMeanVariance(img_resized)
    x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
    x = Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]
    if cuda:
        x = x.cuda()

    # forward pass
    y, feature = net(x)
    # make score and link map
    score_text = y[0, :, :, 0].cpu().data.numpy()  #리전 스코어 Region score

    score_link = y[0, :, :, 1].cpu().data.numpy()  #어피니티 스코어
    # refine link
    if refine_net is not None:
        y_refiner = refine_net(y, feature)
        score_link = y_refiner[0, :, :, 0].cpu().data.numpy()

    t0 = time.time() - t0
    t1 = time.time()

    # Post-processing
    boxes, polys = craft_utils.getDetBoxes(score_text, score_link,
                                           text_threshold, link_threshold, 0.4,
                                           poly)  # CRAFT에서 박스를 그려주는 부분

    # # coordinate adjustment #좌표설정

    boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)
    polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)

    #print(scores)

    for k in range(len(polys)):
        if polys[k] is None: polys[k] = boxes[k]
    t1 = time.time() - t1

    # render results (optional)
    render_img = score_text.copy()

    ret_score_text = imgproc.cvt2HeatmapImg(render_img)
    Plus_score_text = imgproc.cvMakeScores(render_img)  ##

    filename, file_ext = os.path.splitext(os.path.basename(image_path))

    if args.show_time:
        print("\ninfer/postproc time : {:.3f}/{:.3f}".format(t0, t1))
    post_folder = './output/post'  # 원본이미지를 이진화한 이미지 저장
    resize_folder = './output/resize'  # resize된 원본 이미지 저장

    if not os.path.isdir(resize_folder + '/'):
        os.makedirs(resize_folder + '/')

    resize_file = resize_folder + "/resize_" + filename + '_mask.jpg'  #오리지널 이미지

    IMG_RGB2 = cv2.cvtColor(img_resized,
                            cv2.COLOR_BGR2RGB)  #craft에서 resize한 이미지를 RGB로 컨버트

    # 합성 이미지를 만들기 위한 부분
    pil_image = Image.fromarray((IMG_RGB2 * 255).astype(np.uint8))
    images = np.array(pil_image)
    images = cv2.cvtColor(images, cv2.COLOR_BGR2GRAY)
    ret, thresh = cv2.threshold(images, 0, 255, cv2.THRESH_BINARY +
                                cv2.THRESH_OTSU)  #+ cv2.THRESH_OTSU
    # 이미지 합성을 위해 이진화

    text_score = cv2.resize(Plus_score_text,
                            None,
                            fx=2,
                            fy=2,
                            interpolation=cv2.INTER_LINEAR)  # 다시 원본 사이즈로 조절

    thresh = cv2.resize(thresh, (img_w, img_h))  # 원본 이진화 이미지
    text_score = cv2.resize(text_score, (img_w, img_h))  # Region 스코어 이진화 이미지

    text_score = Image.fromarray((text_score).astype(np.uint8))
    text_score = np.array(text_score)

    if not os.path.isdir('./output/og_bri' + '/'):  # 원본 이진화 이미지 저장 폴더
        os.makedirs('./output/og_bri' + '/')

    if not os.path.isdir('./output/score/'):  # 스코어 이진화 이미지 저장 폴더
        os.makedirs('./output/score/')

    cv2.imwrite('./output/og_bri' + "/og_" + filename + '.jpg',
                thresh)  # 원본 이진화 이미지 저장
    cv2.imwrite('./output/score' + "/score_" + filename + '.jpg',
                text_score)  # 스코어 이진화 이미지 저장

    img_h = thresh.shape[0]
    img_w = thresh.shape[1]

    IMG_RGB2 = cv2.resize(IMG_RGB2, (img_w, img_h))  # 다시 원본 사이즈로 resize
    cv2.imwrite(resize_file, IMG_RGB2)

    return boxes, polys, ret_score_text
Esempio n. 30
0
def test_net(net,
             image,
             text_threshold,
             link_threshold,
             low_text,
             cuda,
             poly,
             refine_net=None):
    t0 = time.time()

    # resize
    img_resized, target_ratio, size_heatmap = imgproc.resize_aspect_ratio(
        image,
        args.canvas_size,
        interpolation=cv2.INTER_LINEAR,
        mag_ratio=args.mag_ratio)
    ratio_h = ratio_w = 1 / target_ratio

    # preprocessing
    x = imgproc.normalizeMeanVariance(img_resized)
    x = torch.from_numpy(x).permute(2, 0, 1)  # [h, w, c] to [c, h, w]
    x = Variable(x.unsqueeze(0))  # [c, h, w] to [b, c, h, w]
    if cuda:
        x = x.cuda()

    # forward pass
    with torch.no_grad():
        y, feature = net(x)

    # make score and link map
    score_text = y[0, :, :, 0].cpu().data.numpy()
    score_link = y[0, :, :, 1].cpu().data.numpy()

    # refine link
    if refine_net is not None:
        with torch.no_grad():
            y_refiner = refine_net(y, feature)
        score_link = y_refiner[0, :, :, 0].cpu().data.numpy()

    t0 = time.time() - t0
    t1 = time.time()

    # Post-processing
    boxes, polys = craft_utils.getDetBoxes(score_text, score_link,
                                           text_threshold, link_threshold,
                                           low_text, poly)

    # coordinate adjustment

    boxes = craft_utils.adjustResultCoordinates(boxes, ratio_w, ratio_h)

    '处理裂开的box,相邻的放在同一组'
    # 广度优先合并相邻的框
    # 距离矩阵构建
    all_rect_cx_cy = np.zeros((len(boxes), 2))
    for i in range(len(boxes)):
        box = boxes[i]
        left = min(box[0][0], box[1][0], box[2][0], box[3][0])
        right = max(box[0][0], box[1][0], box[2][0], box[3][0])
        top = min(box[0][1], box[1][1], box[2][1], box[3][1])
        bottom = max(box[0][1], box[1][1], box[2][1], box[3][1])
        top = int(top)
        bottom = int(bottom)
        left = int(left)
        right = int(right)

        all_rect_cx_cy[i][0] = ((left + right) / 2) / 4
        #减少x轴的影响
        #还需调整
        all_rect_cx_cy[i][1] = ((top + bottom) / 2)
    mat_distance = []
    for i in range(len(all_rect_cx_cy)):
        mat_distance.append(
            np.sqrt(np.sum((all_rect_cx_cy - all_rect_cx_cy[i])**2, axis=-1)))
    print("generate distance mat;len:", len(mat_distance))

    segment_group = []
    ind_group = -1
    search_queue = deque()
    cnt_processed = 0
    processed = set()
    #广度优先
    while cnt_processed < len(all_rect_cx_cy):  # 只要搜索队列中有数据就一直遍历下去
        if (len(search_queue) == 0):
            for i in range(len(all_rect_cx_cy)):
                if (i not in processed):
                    search_queue.append(i)
                    segment_group.append([])
                    ind_group += 1
                    break
        current_node = search_queue.popleft()  # 从队列前边获取节点,即先进先出,这是BFS的核心
        if current_node not in processed:  # 当前节点是否被访问过
            cnt_processed += 1
            processed.add(current_node)
            inds = np.argsort(mat_distance[current_node])
            segment_group[ind_group].append(boxes[current_node])
            cnt_company = 0
            distance_threshold = 20  #max(all_rect[current_node][2],all_rect[current_node][3])
            # print(distance_threshold)
            for index in inds:  # 遍历相邻节点,判断相邻节点是否已经在搜索队列
                if mat_distance[current_node][index] > distance_threshold:
                    break
                cnt_company += 1
                if cnt_company > 200:
                    print("error")
                    exit()
                if index not in search_queue:  # 如果相邻节点不在搜索队列则进行添加
                    search_queue.append(index)

    '合并在同一组的框'
    merge_boxes = []
    for segment in segment_group:
        left_s = []
        right_s = []
        top_s = []
        bottom_s = []
        for box in segment:
            left = min(box[0][0], box[1][0], box[2][0], box[3][0])
            right = max(box[0][0], box[1][0], box[2][0], box[3][0])
            top = min(box[0][1], box[1][1], box[2][1], box[3][1])
            bottom = max(box[0][1], box[1][1], box[2][1], box[3][1])
            top = math.floor(top)
            bottom = math.floor(bottom)
            left = math.floor(left)
            right = math.floor(right)

            left_s.append(left)
            right_s.append(right)
            top_s.append(top)
            bottom_s.append(bottom)
        merge_boxes.append(
            [min(left_s), min(top_s),
             max(right_s),
             max(bottom_s)])

    for rect in merge_boxes:
        threshold_hw = min(rect[3] - rect[1], rect[2] - rect[0]) * 0.2
        crop = i_image[rect[1]:rect[3], rect[0]:rect[2]]
        ret, binary_img = cv2.threshold(
            crop, 175, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
        _, contours, _ = cv2.findContours(binary_img, cv2.RETR_EXTERNAL,
                                          cv2.CHAIN_APPROX_SIMPLE)

        group = []
        for i in range(len(contours)):
            rect_char = cv2.boundingRect(contours[i])
            group.append(rect_char)
        group.sort(key=lambda rect: rect[0])

        last_x_start = group[0][0]
        last_x_end = group[0][0] + group[0][2]
        last = group[0]
        i = 1

        '合并=/等符号'
        while i < len(group) and i >= 1:
            now = group[i]
            cx = now[0] + now[2] / 2
            cy = now[1] + now[3] / 2
            last_cy = last[1] + last[3] / 2
            y_near = abs(last_cy - cy) < (last_x_end - last_x_start) * 0.6
            if (last_x_start < cx and cx < last_x_end and y_near):
                group.pop(i)
                i -= 1
                x1 = min(now[0], group[i][0])
                y1 = min(now[1], group[i][1])
                x2 = max(now[0] + now[2], group[i][0] + group[i][2])
                y2 = max(now[1] + now[3], group[i][1] + group[i][3])
                group[i] = (x1, y1, x2 - x1, y2 - y1)
            else:
                last_x_start = group[i][0]
                last_x_end = group[i][0] + group[i][2]
                last = group[i]
            i += 1
        if (len(group) < 4 or len(group) > 16):
            continue
        '检测每个框及其结果'
        rect_set = []
        res_set = []

        def detect_rect(rect_char, binary_img):
            crop_char = binary_img[rect_char[1]:rect_char[1] + rect_char[3],
                                   rect_char[0]:rect_char[0] + rect_char[2]]
            crop_char = torch.tensor(crop_char, dtype=torch.int)
            crop_char = adapt_size(crop_char)
            crop_char = crop_char.float().cuda()
            res = classifer_box.eval(
                crop_char.unsqueeze(0)).squeeze().int().item()
            debug_write(
                crop_char[0].cpu().int().numpy().astype(np.uint8) * 255,
                config.CLASS_toString[res])
            return res

        for i in range(len(group)):
            rect_char = group[i]
            if max(rect_char[2], rect_char[3]) < threshold_hw:
                continue
            res = detect_rect(rect_char, binary_img)
            res_set.append(res)
            rect_set.append(rect_char)

        res_str = ''
        for i in range(len(res_set)):
            res = res_set[i]
            res_str += config.CLASS_toString[res]
            # print('left',res)
            '等号右边颜色浅 针对右边进行二值化后重新检测'
            if (config.CLASS_is_eq(res)):
                rect_char = rect_set[i]

                crop = i_image[rect[1]:rect[3],
                               rect[0]:rect[2]][:,
                                                rect_char[0] + rect_char[2]:]
                if (crop.shape[0] * crop.shape[1] < 4):
                    break
                crop = convert_to_binary_inv(crop)
                debug_write(crop, '')
                _, contours_right, _ = cv2.findContours(
                    crop, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

                group_right = []
                for i in range(len(contours_right)):
                    rect_char_right = cv2.boundingRect(contours_right[i])
                    group_right.append(rect_char_right)
                group_right.sort(key=lambda rect: rect[0])
                for rect_char in group_right:
                    if (max(rect_char[2], rect_char[3]) < crop.shape[0] * 0.3):
                        continue
                    res_right = detect_rect(rect_char, crop)
                    res_str += config.CLASS_toString[res_right]
                break
        eq = res_str.split('=')
        if (len(eq) == 2):
            global i_image_3_color
            res_str = res_str.replace("/", "d")
            print(res_str)
            if str_to_num(eq[0]) == str_to_num(eq[1]):
                cv2.rectangle(i_image_3_color, (rect[0], rect[1]),
                              (rect[2], rect[3]), (46, 255, 87), 2)
                cv2.imwrite('./res/' + res_str + '.png',
                            i_image[rect[1]:rect[3], rect[0]:rect[2]])
            elif eq[1] == "":
                cv2.rectangle(i_image_3_color, (rect[0], rect[1]),
                              (rect[2], rect[3]), (46, 87, 255), 2)
                cv2.imwrite('./res/' + res_str + '.png',
                            i_image[rect[1]:rect[3], rect[0]:rect[2]])
            else:
                cv2.rectangle(i_image_3_color, (rect[0], rect[1]),
                              (rect[2], rect[3]), (255, 46, 87), 2)
                cv2.imwrite('./res/x_' + res_str + '.png',
                            i_image[rect[1]:rect[3], rect[0]:rect[2]])
            # print(str_to_num(eq[0])
            # print(str_to_num(eq[1])

        # cv2.imwrite('./res/'+res_str+'.png', binary_img)

    polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h)
    for k in range(len(polys)):
        if polys[k] is None: polys[k] = boxes[k]

    cv2.imshow('', i_image_3_color)
    cv2.waitKey()
    t1 = time.time() - t1

    # render results (optional)
    render_img = score_text.copy()
    render_img = np.hstack((render_img, score_link))
    ret_score_text = imgproc.cvt2HeatmapImg(render_img)

    if args.show_time:
        print("\ninfer/postproc time : {:.3f}/{:.3f}".format(t0, t1))

    return boxes, polys, ret_score_text