Esempio n. 1
0
def encode_label(image, gt_boxes):
    target_scores = np.zeros(shape=[45, 60, 9,
                                    2])  # 0: background, 1: foreground, ,
    target_bboxes = np.zeros(shape=[45, 60, 9, 4])  # t_x, t_y, t_w, t_h
    target_masks = np.zeros(
        shape=[45, 60, 9])  # negative_samples: -1, positive_samples: 1
    for i in range(45):  # y: height
        for j in range(60):  # x: width
            for k in range(9):
                center_x = j * grid_width + grid_width * 0.5
                center_y = i * grid_height + grid_height * 0.5
                xmin = center_x - wandhG[k][0] * 0.5
                ymin = center_y - wandhG[k][1] * 0.5
                xmax = center_x + wandhG[k][0] * 0.5
                ymax = center_y + wandhG[k][1] * 0.5
                # print(xmin, ymin, xmax, ymax)
                # ignore cross-boundary anchors
                if (xmin > -5) & (ymin > -5) & (xmax < (image_width + 5)) & (
                        ymax < (image_height + 5)):
                    anchor_boxes = np.array([xmin, ymin, xmax, ymax])
                    anchor_boxes = np.expand_dims(anchor_boxes, axis=0)
                    # compute iou between this anchor and all ground-truth boxes in image.
                    ious = compute_iou(anchor_boxes, gt_boxes)
                    positive_masks = ious > pos_thresh
                    negative_masks = ious < neg_thresh

                    if np.any(positive_masks):
                        plot_boxes_on_image(image, anchor_boxes, thickness=1)
                        print("=> encode: %d, %d, %d" % (i, j, k))
                        cv2.circle(image,
                                   center=(int(0.5 * (xmin + xmax)),
                                           int(0.5 * (ymin + ymax))),
                                   radius=1,
                                   color=[255, 0, 0],
                                   thickness=4)

                        target_scores[i, j, k, 1] = 1.
                        target_masks[i, j,
                                     k] = 1  # labeled as a positive sample
                        # find out which ground-truth box matches this anchor
                        max_iou_idx = np.argmax(ious)
                        selected_gt_boxes = gt_boxes[max_iou_idx]
                        target_bboxes[i, j, k] = compute_regression(
                            selected_gt_boxes, anchor_boxes[0])

                    if np.all(negative_masks):
                        target_scores[i, j, k, 0] = 1.
                        target_masks[i, j,
                                     k] = -1  # labeled as a negative sample
                        cv2.circle(image,
                                   center=(int(0.5 * (xmin + xmax)),
                                           int(0.5 * (ymin + ymax))),
                                   radius=1,
                                   color=[0, 0, 0],
                                   thickness=4)
    Image.fromarray(image).show()
    return target_scores, target_bboxes, target_masks
Esempio n. 2
0
import os
import cv2
import numpy as np
import tensorflow as tf
from PIL import Image
from rpn import RPNplus
from utils import decode_output, plot_boxes_on_image, nms

synthetic_dataset_path ="./synthetic_dataset/synthetic_dataset"
prediction_result_path = "./prediction"
if not os.path.exists(prediction_result_path): os.mkdir(prediction_result_path)

model = RPNplus()
fake_data = np.ones(shape=[1, 720, 960, 3]).astype(np.float32)
model(fake_data) # initialize model to load weights
model.load_weights("./RPN.h5")

for idx in range(8000, 8200):
    image_path = os.path.join(synthetic_dataset_path, "image/%d.jpg" %(idx+1))
    raw_image = cv2.imread(image_path)
    image_data = np.expand_dims(raw_image / 255., 0)
    pred_scores, pred_bboxes = model(image_data)
    pred_scores = tf.nn.softmax(pred_scores, axis=-1)
    pred_scores, pred_bboxes = decode_output(pred_bboxes, pred_scores, 0.9)
    pred_bboxes = nms(pred_bboxes, pred_scores, 0.5)
    plot_boxes_on_image(raw_image, pred_bboxes)
    save_path = os.path.join(prediction_result_path, str(idx)+".jpg")
    print("=> saving prediction results into %s" %save_path)
    Image.fromarray(raw_image).save(save_path)
Esempio n. 3
0
from PIL import Image
from rpn import RPNplus
from utils import compute_iou, plot_boxes_on_image, wandhG, load_gt_boxes, compute_regression

pos_thresh = 0.5
neg_thresh = 0.1
iou_thresh = 0.5
grid_width = grid_height = 16
image_height, image_width = 720, 960
wandhG = np.array(wandhG)

image_path = "/Users/yangyun/synthetic_dataset/image/1.jpg"
gt_boxes = load_gt_boxes("/Users/yangyun/synthetic_dataset/imageAno/1.txt")
raw_image = cv2.imread(image_path)
image_with_gt_boxes = np.copy(raw_image)
plot_boxes_on_image(image_with_gt_boxes, gt_boxes)
Image.fromarray(image_with_gt_boxes).show()
encoded_image = np.copy(raw_image)

target_scores = np.zeros(shape=[45, 60, 9,
                                2])  # 0: background, 1: foreground, ,
target_bboxes = np.zeros(shape=[45, 60, 9, 4])  # t_x, t_y, t_w, t_h
target_masks = np.zeros(shape=[45, 60,
                               9])  # negative_samples: -1, positive_samples: 1

################################### ENCODE INPUT #################################

for i in range(45):
    for j in range(60):
        for k in range(9):
            center_x = j * grid_width + grid_width * 0.5
Esempio n. 4
0
File: demo.py Progetto: xsd1221/tf20
from utils import compute_iou, plot_boxes_on_image, wandhG, load_gt_boxes, compute_regression, decode_output

pos_thresh = 0.5
neg_thresh = 0.1
iou_thresh = 0.5
grid_width = 16
grid_height = 16
image_height = 720
image_width = 960

image_path = "./synthetic_dataset/image/1.jpg"
label_path = "./synthetic_dataset/imageAno/1.txt"
gt_boxes = load_gt_boxes(label_path)
raw_image = cv2.imread(image_path)
image_with_gt_boxes = np.copy(raw_image)
plot_boxes_on_image(image_with_gt_boxes, gt_boxes)
Image.fromarray(image_with_gt_boxes).show()
encoded_image = np.copy(raw_image)

target_scores = np.zeros(shape=[45, 60, 9,
                                2])  # 0: background, 1: foreground, ,
target_bboxes = np.zeros(shape=[45, 60, 9, 4])  # t_x, t_y, t_w, t_h
target_masks = np.zeros(shape=[45, 60,
                               9])  # negative_samples: -1, positive_samples: 1

################################### ENCODE INPUT #################################

for i in range(45):
    for j in range(60):
        for k in range(9):
            center_x = j * grid_width + grid_width * 0.5
Esempio n. 5
0
# selected_boxes = pred_boxes
selected_boxes = []
while len(pred_boxes) > 0:
    max_idx = np.argmax(pred_score)
    selected_box = pred_boxes[max_idx]
    selected_boxes.append(selected_box)
    pred_boxes = np.concatenate(
        [pred_boxes[:max_idx], pred_boxes[max_idx + 1:]])
    pred_score = np.concatenate(
        [pred_score[:max_idx], pred_score[max_idx + 1:]])
    ious = compute_iou(selected_box, pred_boxes)
    iou_mask = ious <= 0.1
    pred_boxes = pred_boxes[iou_mask]
    pred_score = pred_score[iou_mask]

selected_boxes = np.array(selected_boxes)
plot_boxes_on_image(raw_image, selected_boxes)
Image.fromarray(np.uint8(raw_image)).show()

grid_size = [45, 60]

grid_x = tf.range(grid_size[0], dtype=tf.int32)
grid_y = tf.range(grid_size[1], dtype=tf.int32)
a, b = tf.meshgrid(grid_x, grid_y)
x_offset = tf.reshape(a, (-1, 1))
y_offset = tf.reshape(b, (-1, 1))
xy_offset = tf.concat([x_offset, y_offset], axis=-1)
xy_offset = tf.reshape(x_y_offset, [grid_size[0], grid_size[1], 1, 2])
xy_offset = tf.cast(x_y_offset, tf.float32)
Esempio n. 6
0
pos_thresh = 0.5
neg_thresh = 0.1
iou_thresh = 0.5
grid_width = 16  # 网格的长宽都是16,因为从原始图片到 feature map 经历了16倍的缩放
grid_height = 16
image_height = 900
image_width = 900
wnum, hnum = math.floor(image_width / grid_width), math.floor(image_height /
                                                              grid_height)
image_path = "IMG_1278.jpg"
label_path = "test.txt"
gt_boxes = load_gt_boxes(label_path)  # 把 ground truth boxes 的坐标读取出来
raw_image = cv2.imread(image_path)  # 将图片读取出来 (高,宽,通道数)
image_with_gt_boxes = np.copy(raw_image)  # 复制原始图片
plot_boxes_on_image(image_with_gt_boxes,
                    gt_boxes)  # 将 ground truth boxes 画在图片上
Image.fromarray(image_with_gt_boxes).show()  # 展示画了 ground truth boxes 的图片
## 因为得到的 feature map 的长宽都是原始图片的 1/16,所以这里 wnum=720/16,hnum=960/16。
target_scores = np.zeros(shape=[wnum, hnum, 9,
                                2])  # 0: background, 1: foreground, ,
target_bboxes = np.zeros(shape=[wnum, hnum, 9, 4])  # t_x, t_y, t_w, t_h
target_masks = np.zeros(shape=[wnum, hnum,
                               9])  # negative_samples: -1, positive_samples: 1
################################### ENCODE INPUT #################################
## 将 feature map 分成 wnum*hnum 个小块
for i in range(wnum):
    for j in range(hnum):
        for k in range(9):
            center_x = j * grid_width + grid_width * 0.5  # 计算此小块的中心点横坐标
            center_y = i * grid_height + grid_height * 0.5  # 计算此小块的中心点纵坐标
            xmin = center_x - wandhG[k][