def __call__(self, x, im_info): h, n = self.trunk(x), x.data.shape[0] rpn_cls_score = self.rpn_cls_score(h) c, hh, ww = rpn_cls_score.data.shape[1:] rpn_bbox_pred = self.rpn_bbox_pred(h) rpn_cls_score = F.reshape(rpn_cls_score, (n, 2, -1)) # RoI Proposal rpn_cls_prob = F.softmax(rpn_cls_score) rpn_cls_prob_reshape = F.reshape(rpn_cls_prob, (n, c, hh, ww)) rois = self.proposal_layer( rpn_cls_prob_reshape, rpn_bbox_pred, im_info, self.train) boxes = rois[:, 1:5] / im_info[0][2] rois = chainer.Variable(rois, volatile=not self.train) # RCNN pool5 = F.roi_pooling_2d(self.trunk.relu5_3_out, rois, 7, 7, 0.0625) fc6 = F.relu(self.fc6(pool5)) fc7 = F.relu(self.fc7(fc6)) self.scores = F.softmax(self.cls_score(fc7)) box_deltas = self.bbox_pred(fc7).data pred_boxes = bbox_transform_inv(boxes, box_deltas) self.pred_boxes = clip_boxes(pred_boxes, im_info[0][:2]) if self.train: # loss_cls = F.softmax_cross_entropy(cls_score, labels) # huber loss with delta=1 means SmoothL1Loss return None else: return self.scores, self.pred_boxes
def __call__(self, x, im_info): h, n = self.trunk(x), x.data.shape[0] rpn_cls_score = self.rpn_cls_score(h) c, hh, ww = rpn_cls_score.data.shape[1:] rpn_bbox_pred = self.rpn_bbox_pred(h) rpn_cls_score = F.reshape(rpn_cls_score, (n, 2, -1)) # RoI Proposal rpn_cls_prob = F.softmax(rpn_cls_score) rpn_cls_prob_reshape = F.reshape(rpn_cls_prob, (n, c, hh, ww)) rois = self.proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, self.train) if self.gpu >= 0: rois = to_gpu(rois, device=self.gpu) im_info = to_gpu(im_info, device=self.gpu) with chainer.cuda.Device(self.gpu): boxes = rois[:, 1:5] / im_info[0][2] else: boxes = rois[:, 1:5] / im_info[0][2] rois = chainer.Variable(rois, volatile=not self.train) # RCNN pool5 = roi_pooling_2d(self.trunk.feature, rois, 7, 7, 0.0625) fc6 = F.relu(self.fc6(pool5)) fc7 = F.relu(self.fc7(fc6)) self.scores = F.softmax(self.cls_score(fc7)) box_deltas = self.bbox_pred(fc7).data pred_boxes = bbox_transform_inv(boxes, box_deltas, self.gpu) self.pred_boxes = clip_boxes(pred_boxes, im_info[0][:2], self.gpu) if self.train: # loss_cls = F.softmax_cross_entropy(cls_score, labels) # huber loss with delta=1 means SmoothL1Loss return None else: return self.scores, self.pred_boxes
def __call__(self, rpn_cls_prob, rpn_bbox_pred, im_info, train): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) pre_nms_topN = self.TRAIN_RPN_PRE_NMS_TOP_N \ if train else self.TEST_RPN_PRE_NMS_TOP_N post_nms_topN = self.TRAIN_RPN_POST_NMS_TOP_N \ if train else self.TEST_RPN_POST_NMS_TOP_N nms_thresh = self.RPN_NMS_THRESH min_size = self.RPN_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = to_cpu(rpn_cls_prob.data[:, self._num_anchors:, :, :]) bbox_deltas = to_cpu(rpn_bbox_pred.data) im_info = im_info[0, :] # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.asarray(np.meshgrid(shift_x, shift_y)) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = self._anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas, -1) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) rois = np.asarray(np.hstack((batch_inds, proposals)), dtype=np.float32) return rois
def __call__(self, rpn_cls_prob, rpn_bbox_pred, im_info, train): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) pre_nms_topN = self.RPN_PRE_NMS_TOP_N if train else 6000 post_nms_topN = self.RPN_POST_NMS_TOP_N if train else 300 nms_thresh = self.RPN_NMS_THRESH min_size = self.RPN_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = to_cpu(rpn_cls_prob.data[:, self._num_anchors:, :, :]) bbox_deltas = to_cpu(rpn_bbox_pred.data) im_info = im_info[0, :] # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.asarray(np.meshgrid(shift_x, shift_y)) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = self._anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas, -1) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) rois = np.asarray(np.hstack((batch_inds, proposals)), dtype=np.float32) return rois
def __call__(self, x, im_info, gt_boxes=None): h = self.trunk(x) if isinstance(im_info, chainer.cuda.cupy.ndarray): im_info = chainer.cuda.cupy.asnumpy(im_info) if self.train: im_info = im_info.data gt_boxes = gt_boxes.data if isinstance(gt_boxes, chainer.cuda.cupy.ndarray): im_info = chainer.cuda.cupy.asnumpy(im_info) gt_boxes = chainer.cuda.cupy.asnumpy(gt_boxes) rpn_cls_loss, rpn_loss_bbox, rois = self.RPN( h, im_info, self.gpu, gt_boxes) else: rois = self.RPN(h, im_info, self.gpu, gt_boxes) if self.train: rois, labels, bbox_targets, bbox_inside_weights, \ bbox_outside_weights = self.proposal_target_layer( rois, gt_boxes) # Convert rois if self.gpu >= 0: rois = to_gpu(rois, device=self.gpu) im_info = to_gpu(im_info, device=self.gpu) with chainer.cuda.Device(self.gpu): boxes = rois[:, 1:5] / im_info[0][2] else: boxes = rois[:, 1:5] / im_info[0][2] # RCNN pool5 = roi_pooling_2d( self.trunk.feature, rois, 7, 7, self.spatial_scale) fc6 = F.dropout(F.relu(self.fc6(pool5)), train=self.train) fc7 = F.dropout(F.relu(self.fc7(fc6)), train=self.train) # Per class probability cls_score = self.cls_score(fc7) cls_prob = F.softmax(cls_score) # BBox predictions bbox_pred = self.bbox_pred(fc7) box_deltas = bbox_pred.data if self.train: if self.gpu >= 0: tg = lambda x: to_gpu(x, device=self.gpu) labels = tg(labels) bbox_targets = tg(bbox_targets) bbox_inside_weights = tg(bbox_inside_weights) bbox_outside_weights = tg(bbox_outside_weights) loss_cls = F.softmax_cross_entropy(cls_score, labels) labels = Variable(labels, volatile='off') bbox_targets = Variable(bbox_targets, volatile='off') loss_bbox = smooth_l1_loss( bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights, self.sigma) reporter.report({'rpn_loss_cls': rpn_cls_loss, 'rpn_loss_bbox': rpn_loss_bbox, 'loss_bbox': loss_bbox, 'loss_cls': loss_cls}, self) return rpn_cls_loss, rpn_loss_bbox, loss_bbox, loss_cls else: pred_boxes = bbox_transform_inv(boxes, box_deltas, self.gpu) pred_boxes = clip_boxes(pred_boxes, im_info[0][:2], self.gpu) return cls_prob, pred_boxes
def __call__(self, x, im_info, gt_boxes=None): h = self.trunk(x) if chainer.cuda.available \ and isinstance(im_info, chainer.cuda.cupy.ndarray): im_info = chainer.cuda.cupy.asnumpy(im_info) if self.train: im_info = im_info.data gt_boxes = gt_boxes.data if isinstance(gt_boxes, chainer.cuda.cupy.ndarray): im_info = chainer.cuda.cupy.asnumpy(im_info) gt_boxes = chainer.cuda.cupy.asnumpy(gt_boxes) rpn_cls_loss, rpn_loss_bbox, rois = self.RPN( h, im_info, self.gpu, gt_boxes) else: rois = self.RPN(h, im_info, self.gpu, gt_boxes) if self.train: rois, labels, bbox_targets, bbox_inside_weights, \ bbox_outside_weights = self.proposal_target_layer( rois, gt_boxes) # Convert rois if self.gpu >= 0: rois = to_gpu(rois, device=self.gpu) im_info = to_gpu(im_info, device=self.gpu) with chainer.cuda.Device(self.gpu): boxes = rois[:, 1:5] / im_info[0][2] else: boxes = rois[:, 1:5] / im_info[0][2] # RCNN pool5 = roi_pooling_2d(self.trunk.feature, rois, 7, 7, self.spatial_scale) fc6 = F.dropout(F.relu(self.fc6(pool5)), train=self.train) fc7 = F.dropout(F.relu(self.fc7(fc6)), train=self.train) # Per class probability cls_score = self.cls_score(fc7) cls_prob = F.softmax(cls_score) # BBox predictions bbox_pred = self.bbox_pred(fc7) box_deltas = bbox_pred.data if self.train: if self.gpu >= 0: tg = lambda x: to_gpu(x, device=self.gpu) labels = tg(labels) bbox_targets = tg(bbox_targets) bbox_inside_weights = tg(bbox_inside_weights) bbox_outside_weights = tg(bbox_outside_weights) loss_cls = F.softmax_cross_entropy(cls_score, labels) labels = Variable(labels, volatile='auto') bbox_targets = Variable(bbox_targets, volatile='auto') loss_bbox = smooth_l1_loss(bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights, self.sigma) reporter.report( { 'rpn_loss_cls': rpn_cls_loss, 'rpn_loss_bbox': rpn_loss_bbox, 'loss_bbox': loss_bbox, 'loss_cls': loss_cls }, self) return rpn_cls_loss, rpn_loss_bbox, loss_bbox, loss_cls else: pred_boxes = bbox_transform_inv(boxes, box_deltas, self.gpu) pred_boxes = clip_boxes(pred_boxes, im_info[0][:2], self.gpu) return cls_prob, pred_boxes