Ejemplo n.º 1
0
def evaluate():
    for image_path in image_paths:
        image = cv2.imread(image_path)[:, :, ::-1]
        image_fname = osp.split(image_path)[-1]
        image_fname_noext = osp.splitext(image_fname)[0]
        result_label_fname = 'res_' + image_fname_noext + '.txt'
        result_label_path = osp.join(result_label_dir, result_label_fname)
        h, w = image.shape[:2]
        src_image = image.copy()
        image, scale, pad, window = utils.resize_and_pad_image(image, 512)
        image = utils.mold_image(image)
        image = np.expand_dims(image, axis=0)
        # Run object detection
        batch_rpn_proposals, batch_rpn_probs = model.keras_model.predict(
            [image, anchors], verbose=0)
        rpn_proposals = batch_rpn_proposals[0]
        rpn_probs = batch_rpn_probs[0]
        boxes = utils.denorm_boxes(rpn_proposals, (512, 512))
        scores = rpn_probs[..., np.newaxis]
        # keep_ix = np.where(rpn_probs > 0.7)[0]
        # boxes = boxes[keep_ix]
        # scores = rpn_probs[keep_ix]
        # for box in boxes:
        #     cv2.rectangle(src_image, (box[1], box[0]), (box[3], box[2]), (0, 255, 0), 1)
        # show_image(src_image, 'image')
        # cv2.waitKey(0)
        detector = TextDetector()
        if inference_mode == 'rpn':
            text_boxes = detector.detect(
                boxes[:, [1, 0, 3, 2]].astype(np.float32), scores, (512, 512))
        # inference_mode == 'text'
        else:
            text_boxes = detector.detect2(
                boxes[:, [1, 0, 3, 2]].astype(np.float32), scores, (512, 512))
        text_boxes = text_boxes[:, [0, 1, 4, 5]]
        # -left_pad
        text_boxes[:, [0, 2]] -= pad[1][0]
        # -top_pad
        text_boxes[:, [1, 3]] -= pad[0][0]
        text_boxes = np.round(text_boxes / scale).astype(np.int32)
        text_boxes[:, [0, 2]] = np.clip(text_boxes[:, [0, 2]], 0, w - 1)
        text_boxes[:, [1, 3]] = np.clip(text_boxes[:, [1, 3]], 0, h - 1)
        with open(result_label_path, 'w') as f:
            for text_box in text_boxes:
                f.write(','.join(map(str, text_box.tolist())) + '\n')
Ejemplo n.º 2
0
# image_paths = ['/home/adam/Public/test.jpg']
# image_paths = glob.glob('datasets/art/train_images/*.jpg')
# image_paths = glob.glob('/home/adam/.keras/datasets/text/ctpn/VOCdevkit/VOC2007/JPEGImages/*.jpg')
image_paths = glob.glob('/home/adam/.keras/datasets/icdar2013/focused_scene_text/task12_images/*.jpg')
for image_path in image_paths:
    image = cv2.imread(image_path)[:, :, ::-1]
    image, scale, pad, window = utils.resize_and_pad_image(image, 512)
    src_image = image.copy()
    image = utils.mold_image(image)
    image = np.expand_dims(image, axis=0)
    # Run object detection
    start = time.time()
    batch_rpn_proposals, batch_rpn_probs = model.keras_model.predict([image, anchors], verbose=0)
    rpn_proposals = batch_rpn_proposals[0]
    rpn_probs = batch_rpn_probs[0]
    boxes = utils.denorm_boxes(rpn_proposals, (512, 512))
    scores = rpn_probs[..., np.newaxis]
    # keep_ix = np.where(rpn_probs > 0.7)[0]
    # boxes = boxes[keep_ix]
    # scores = rpn_probs[keep_ix]
    # for box in boxes:
    #     cv2.rectangle(src_image, (box[1], box[0]), (box[3], box[2]), (0, 255, 0), 1)
    # show_image(src_image, 'image')
    # cv2.waitKey(0)
    detector = TextDetector()
    if inference_mode == 'rpn':
        text_boxes = detector.detect(boxes[:, [1, 0, 3, 2]].astype(np.float32), scores, (512, 512))
    # inference_mode == 'text'
    else:
        text_boxes = detector.detect2(boxes[:, [1, 0, 3, 2]].astype(np.float32), scores, (512, 512))
    end = time.time()
Ejemplo n.º 3
0
def unmold_detections(detections, mrcnn_mask, original_image_shape,
                      image_shape, window):
    """Reformats the detections of one image from the format of the neural
        network output to a format suitable for use in the rest of the
        application.

        detections: [N, (y1, x1, y2, x2, class_id, score)] in normalized coordinates
        mrcnn_mask: [N, height, width, num_classes]
        original_image_shape: [H, W, C] Original image shape before resizing
        image_shape: [H, W, C] Shape of the image after resizing and padding
        window: [y1, x1, y2, x2] Pixel coordinates of box in the image where the real
                image is excluding the padding.

        Returns:
        boxes: [N, (y1, x1, y2, x2)] Bounding boxes in pixels
        class_ids: [N] Integer class IDs for each bounding box
        scores: [N] Float probability scores of the class_id
        masks: [height, width, num_instances] Instance masks
        """
    # How many detections do we have?
    # Detections array is padded with zeros. Find the first class_id == 0.
    zero_ix = np.where(detections[:, 4] == 0)[0]
    N = zero_ix[0] if zero_ix.shape[0] > 0 else detections.shape[0]

    # Extract boxes, class_ids, scores, and class-specific masks
    boxes = detections[:N, :4]
    class_ids = detections[:N, 4].astype(np.int32)
    scores = detections[:N, 5]
    masks = mrcnn_mask[np.arange(N), :, :, class_ids]

    # Translate normalized coordinates in the resized image to pixel
    # coordinates in the original image before resizing
    window = utils.norm_boxes(window, image_shape[:2])
    wy1, wx1, wy2, wx2 = window
    shift = np.array([wy1, wx1, wy1, wx1])
    wh = wy2 - wy1  # window height
    ww = wx2 - wx1  # window width
    scale = np.array([wh, ww, wh, ww])
    # Convert boxes to normalized coordinates on the window
    boxes = np.divide(boxes - shift, scale)
    # Convert boxes to pixel coordinates on the original image
    boxes = utils.denorm_boxes(boxes, original_image_shape[:2])

    # Filter out detections with zero area. Happens in early training when
    # network weights are still random
    exclude_ix = np.where((boxes[:, 2] - boxes[:, 0]) *
                          (boxes[:, 3] - boxes[:, 1]) <= 0)[0]
    if exclude_ix.shape[0] > 0:
        boxes = np.delete(boxes, exclude_ix, axis=0)
        class_ids = np.delete(class_ids, exclude_ix, axis=0)
        scores = np.delete(scores, exclude_ix, axis=0)
        masks = np.delete(masks, exclude_ix, axis=0)
        N = class_ids.shape[0]

    # Resize masks to original image size and set boundary threshold.
    full_masks = []
    for i in range(N):
        # Convert neural network mask to full size mask
        full_mask = utils.unmold_mask(masks[i], boxes[i], original_image_shape)
        full_masks.append(full_mask)
    full_masks = np.stack(full_masks, axis=-1)\
        if full_masks else np.empty(masks.shape[1:3] + (0,))

    return boxes, class_ids, scores, full_masks