def encode_label(image, gt_boxes): target_scores = np.zeros(shape=[45, 60, 9, 2]) # 0: background, 1: foreground, , target_bboxes = np.zeros(shape=[45, 60, 9, 4]) # t_x, t_y, t_w, t_h target_masks = np.zeros( shape=[45, 60, 9]) # negative_samples: -1, positive_samples: 1 for i in range(45): # y: height for j in range(60): # x: width for k in range(9): center_x = j * grid_width + grid_width * 0.5 center_y = i * grid_height + grid_height * 0.5 xmin = center_x - wandhG[k][0] * 0.5 ymin = center_y - wandhG[k][1] * 0.5 xmax = center_x + wandhG[k][0] * 0.5 ymax = center_y + wandhG[k][1] * 0.5 # print(xmin, ymin, xmax, ymax) # ignore cross-boundary anchors if (xmin > -5) & (ymin > -5) & (xmax < (image_width + 5)) & ( ymax < (image_height + 5)): anchor_boxes = np.array([xmin, ymin, xmax, ymax]) anchor_boxes = np.expand_dims(anchor_boxes, axis=0) # compute iou between this anchor and all ground-truth boxes in image. ious = compute_iou(anchor_boxes, gt_boxes) positive_masks = ious > pos_thresh negative_masks = ious < neg_thresh if np.any(positive_masks): plot_boxes_on_image(image, anchor_boxes, thickness=1) print("=> encode: %d, %d, %d" % (i, j, k)) cv2.circle(image, center=(int(0.5 * (xmin + xmax)), int(0.5 * (ymin + ymax))), radius=1, color=[255, 0, 0], thickness=4) target_scores[i, j, k, 1] = 1. target_masks[i, j, k] = 1 # labeled as a positive sample # find out which ground-truth box matches this anchor max_iou_idx = np.argmax(ious) selected_gt_boxes = gt_boxes[max_iou_idx] target_bboxes[i, j, k] = compute_regression( selected_gt_boxes, anchor_boxes[0]) if np.all(negative_masks): target_scores[i, j, k, 0] = 1. target_masks[i, j, k] = -1 # labeled as a negative sample cv2.circle(image, center=(int(0.5 * (xmin + xmax)), int(0.5 * (ymin + ymax))), radius=1, color=[0, 0, 0], thickness=4) Image.fromarray(image).show() return target_scores, target_bboxes, target_masks
def encode_label(gt_boxes): target_scores = np.zeros(shape=[wnum, hnum, 9, 2]) # 0: background, 1: foreground, , target_bboxes = np.zeros(shape=[wnum, hnum, 9, 4]) # t_x, t_y, t_w, t_h target_masks = np.zeros( shape=[wnum, hnum, 9]) # negative_samples: -1, positive_samples: 1 for i in range(wnum): # y: height for j in range(hnum): # x: width for k in range(9): center_x = j * grid_width + grid_width * 0.5 center_y = i * grid_height + grid_height * 0.5 xmin = center_x - wandhG[k][0] * 0.5 ymin = center_y - wandhG[k][1] * 0.5 xmax = center_x + wandhG[k][0] * 0.5 ymax = center_y + wandhG[k][1] * 0.5 # print(xmin, ymin, xmax, ymax) # ignore cross-boundary anchors if (xmin > -5) & (ymin > -5) & (xmax < (image_width + 5)) & ( ymax < (image_height + 5)): anchor_boxes = np.array([xmin, ymin, xmax, ymax]) anchor_boxes = np.expand_dims(anchor_boxes, axis=0) # compute iou between this anchor and all ground-truth boxes in image. ious = compute_iou(anchor_boxes, gt_boxes) positive_masks = ious >= pos_thresh negative_masks = ious <= neg_thresh if np.any(positive_masks): target_scores[i, j, k, 1] = 1. target_masks[i, j, k] = 1 # labeled as a positive sample # find out which ground-truth box matches this anchor max_iou_idx = np.argmax(ious) selected_gt_boxes = gt_boxes[max_iou_idx] target_bboxes[i, j, k] = compute_regression( selected_gt_boxes, anchor_boxes[0]) if np.all(negative_masks): target_scores[i, j, k, 0] = 1. target_masks[i, j, k] = -1 # labeled as a negative sample return target_scores, target_bboxes, target_masks
thickness=1) print("=> Encoding positive sample: %d, %d, %d" % (i, j, k)) cv2.circle(encoded_image, center=(int(0.5 * (xmin + xmax)), int(0.5 * (ymin + ymax))), radius=1, color=[255, 0, 0], thickness=4) target_scores[i, j, k, 1] = 1. target_masks[i, j, k] = 1 # labeled as a positive sample # find out which ground-truth box matches this anchor max_iou_idx = np.argmax(ious) selected_gt_boxes = gt_boxes[max_iou_idx] target_bboxes[i, j, k] = compute_regression( selected_gt_boxes, anchor_boxes[0]) if np.all(negative_masks): target_scores[i, j, k, 0] = 1. target_masks[i, j, k] = -1 # labeled as a negative sample cv2.circle(encoded_image, center=(int(0.5 * (xmin + xmax)), int(0.5 * (ymin + ymax))), radius=1, color=[0, 0, 0], thickness=4) Image.fromarray(encoded_image).show() ################################### DECODE OUTPUT #################################