def decode(boxes, scores, all_anchors, ih, iw): """Decode outputs into boxes Parameters --------- boxes: an array of shape (1, h, w, Ax4) scores: an array of shape (1, h, w, Ax2), all_anchors: an array of shape (1, h, w, Ax4), [x1, y1, x2, y2] Returns -------- final_boxes: of shape (R x 4) classes: of shape (R) in {0,1,2,3... K-1} scores: of shape (R) in [0 ~ 1] """ h, w = boxes.shape[1], boxes.shape[2] if all_anchors == None: stride = 2**int(round(np.log2((iw + 0.0) / w))) all_anchors = anchors_plane(h, w, stride=stride) all_anchors = all_anchors.reshape((-1, 4)) boxes = boxes.reshape((-1, 4)) scores = scores.reshape((-1, 2)) assert scores.shape[0] == boxes.shape[0] == all_anchors.reshape[0], \ 'Anchor layer shape error %d vs %d vs %d' % (scores.shape[0],boxes.shape[0],all_anchors.reshape[0]) boxes = bbox_transform_inv(all_anchors, boxes) classes = np.argmax(scores, axis=1) scores = scores[:, 1] final_boxes = np.zeros((boxes.shape[0], 4)) for i in np.arange(final_boxes.shape[0]): c = classes[i] * 4 final_boxes[i, 0:4] = boxes[i, c:c + 4] final_boxes = clip_boxes(final_boxes, (ih, iw)) return final_boxes, classes, scores
def decode(boxes, scores, all_anchors, ih, iw): """Decode outputs into boxes Parameters --------- boxes: an array of shape (1, h, w, Ax4) scores: an array of shape (1, h, w, Ax2), all_anchors: an array of shape (1, h, w, Ax4), [x1, y1, x2, y2] #but in pyramid_network.py these are allready in form [-1,4] Returns -------- final_boxes: of shape (R x 4) classes: of shape (R) in {0,1,2,3... K-1} scores: of shape (R) in [0 ~ 1] """ # h, w = boxes.shape[1], boxes.shape[2] # if all_anchors is None: # stride = 2 ** int(round(np.log2((iw + 0.0) / w))) # all_anchors = anchors_plane(h, w, stride=stride) all_anchors = all_anchors.reshape((-1, 4)) boxes = boxes.reshape((-1, 4)) scores = scores.reshape((-1, 2)) assert scores.shape[0] == boxes.shape[0] == all_anchors.shape[0], \ 'Anchor layer shape error %d vs %d vs %d' % (scores.shape[0],boxes.shape[0],all_anchors.reshape[0]) boxes = bbox_transform_inv(all_anchors, boxes) classes = np.argmax(scores, axis=1) scores = scores[:, 1] #0 is background 1 is foreground. selects the probability of foregorund final_boxes = boxes final_boxes = clip_boxes(final_boxes, (ih, iw)) # does not reduce the number of rois classes = classes.astype(np.int32) return final_boxes, classes, scores
def decode(boxes, scores, all_anchors, ih, iw, num_classes=None): """Decode outputs into boxes Parameters --------- boxes: an array of shape (1, h, w, Ax4) scores: an array of shape (1, h, w, Ax2), all_anchors: an array of shape (1, h, w, Ax4), [x1, y1, x2, y2] Returns -------- final_boxes: of shape (R x 4) classes: of shape (R) in {0,1,2,3... K-1} scores: of shape (R, K) in [0 ~ 1] """ num_classes = cfg.num_classes if num_classes is None else num_classes all_anchors = all_anchors.reshape((-1, 4)) boxes = boxes.reshape((-1, 4)) scores = scores.reshape((-1, num_classes)) assert scores.shape[0] == boxes.shape[0] == all_anchors.shape[0], \ 'Anchor layer shape error %d vs %d vs %d' % (scores.shape[0], boxes.shape[0], all_anchors.reshape[0]) if cfg.rpn_box_encoding == 'fastrcnn': boxes = bbox_transform.bbox_transform_inv(all_anchors, boxes) elif cfg.rpn_box_encoding == 'linear': boxes = bbox_transform.bbox_transform_inv_linear(all_anchors, boxes) classes = np.argmax(scores, axis=1) final_boxes = boxes final_boxes = bbox_transform.clip_boxes(final_boxes, (ih, iw)) classes = classes.astype(np.int32) return final_boxes, classes, scores
def decode(boxes, scores, all_anchors, image_height, image_width): """Decode outputs into boxes Parameters --------- boxes: an array of shape (1, h, w, Ax4) scores: an array of shape (1, h, w, Ax2), all_anchors: an array of shape (1, h, w, Ax4), [x1, y1, x2, y2] Returns -------- final_boxes: of shape (R x 4) classes: of shape (R) in {0,1,2,3... K-1} scores: of shape (R) in [0 ~ 1] """ all_anchors = all_anchors.reshape((-1, 4)) boxes = boxes.reshape((-1, 4)) scores = scores.reshape((-1, 2)) assert scores.shape[0] == boxes.shape[0] == all_anchors.shape[0], \ 'Anchor layer shape error %d vs %d vs %d' % (scores.shape[0], boxes.shape[0], all_anchors.reshape[0]) boxes = bbox_transform_inv(all_anchors, boxes) boxes = clip_boxes(boxes, (image_height, image_width)) classes = np.argmax(scores, axis=1).astype(np.int32) scores = scores[:, 1] return boxes, classes, scores
def decode(boxes, scores, rois, ih, iw): """Decode prediction targets into boxes and only keep only one boxes of greatest possibility for each rois Parameters --------- boxes: an array of shape (R, Kx4), [x1, y1, x2, y2, x1, x2, y1, y2] scores: an array of shape (R, K), rois: an array of shape (R, 4), [x1, y1, x2, y2] Returns -------- final_boxes: of shape (R x 4) classes: of shape (R) in {0,1,2,3... K-1} scores: of shape (R) in [0 ~ 1] """ boxes = bbox_transform_inv(rois, deltas=boxes) classes = np.argmax(scores, axis=1) scores = np.max(scores, axis=1) final_boxes = np.zeros((boxes.shape[0], 4)) for i in np.arange(0, boxes.shape[0]): ind = classes[i] * 4 final_boxes[i, 0:4] = boxes[i, ind:ind + 4] final_boxes = clip_boxes(final_boxes, (ih, iw)) return final_boxes, classes, scores
def decode(boxes, scores, rois, ih, iw): """Decode prediction targets into boxes and only keep only one boxes of greatest possibility for each rois Parameters --------- boxes: an array of shape (R, Kx4), [x1, y1, x2, y2, x1, x2, y1, y2] scores: an array of shape (R, K), rois: an array of shape (R, 4), [x1, y1, x2, y2] Returns -------- final_boxes: of shape (R x 4) classes: of shape (R) in {0,1,2,3... K-1} scores: of shape (R) in [0 ~ 1] """ boxes = bbox_transform_inv(rois, deltas=boxes) classes = np.argmax(scores, axis=1) classes = classes.astype(np.int32) scores = np.max(scores, axis=1) final_boxes = np.zeros((boxes.shape[0], 4), dtype=np.float32) for i in np.arange(0, boxes.shape[0]): ind = classes[i]*4 final_boxes[i, 0:4] = boxes[i, ind:ind+4] final_boxes = clip_boxes(final_boxes, (ih, iw)) return final_boxes, classes, scores