def proposal_layer(rpn_cls_prob,rpn_bbox_pred,im_info,_feat_stride,anchors,num_anchors,is_training): scores=rpn_cls_prob[:,:,:,num_anchors:] rpn_bbox_pred=rpn_bbox_pred.reshape((-1,4)) scores=scores.reshape((-1,1)) proposals=bbox_transform_inv(anchors,rpn_bbox_pred) proposals=clip_boxes(proposals,im_info[:2]) # pick the top region proposals: order=scores.ravel().argsort()[::-1] if is_training: order=order[:train_rpn_pre_nms_topN] else: order=order[:test_rpn_pre_nms_topN] proposals=proposals[order,:] scores=scores[order] keep=nms(np.hstack((proposals,scores)),rpn_nms_thresh) if is_training: keep=keep[:train_rpn_nms_post_topN] else: keep=keep[:test_rpn_nms_post_topN] proposals=proposals[keep,:] scores=scores[keep] #only support single image as input: batch_indx=np.zeros((proposals.shape[0],1),dtype=np.float32) blob=np.hstack((batch_indx,proposals.astype(np.float32,copy=False))) return blob,scores
def slpn_pred(ROIs, P_cls, P_regr, C, bbox_thresh=0.1, nms_thresh=0.3,roi_stride=8): # classifier output the box of x y w h and downscaled scores = np.squeeze(P_cls[:,:,0], axis=0) regr = np.squeeze(P_regr, axis=0) rois = np.squeeze(ROIs, axis=0) keep = np.where(scores>=bbox_thresh)[0] if len(keep)==0: return [], [] rois[:, 2] += rois[:, 0] rois[:, 3] += rois[:, 1] rois = rois[keep]*roi_stride scores = scores[keep] regr = regr[keep]*np.array(C.classifier_regr_std).astype(dtype=np.float32) # regr = regr[keep] pred_boxes = bbox_transform_inv(rois, regr) pred_boxes = clip_boxes(pred_boxes, [C.random_crop[0],C.random_crop[1]]) keep = np.where((pred_boxes[:,2]-pred_boxes[:,0]>=3)& (pred_boxes[:,3]-pred_boxes[:,1]>=3))[0] pred_boxes = pred_boxes[keep] scores = scores[keep].reshape((-1,1)) keep = nms(np.hstack((pred_boxes, scores)), nms_thresh, usegpu=False, gpu_id=0) pred_boxes = pred_boxes[keep] scores = scores[keep] return pred_boxes, scores
def pred_det(anchors, cls_pred, regr_pred, C, step=1): if step == 1: scores = cls_pred[0, :, :] elif step == 2: scores = anchors[:, -1:] * cls_pred[0, :, :] elif step == 3: scores = anchors[:, -2:-1] * anchors[:, -1:] * cls_pred[0, :, :] A = np.copy(anchors[:, :4]) bbox_deltas = regr_pred.reshape((-1, 4)) bbox_deltas = bbox_deltas * np.array( C.classifier_regr_std).astype(dtype=np.float32) proposals = bbox_transform_inv(A, bbox_deltas) proposals = clip_boxes(proposals, [C.random_crop[0], C.random_crop[1]]) keep = filter_boxes(proposals, C.roi_stride) proposals = proposals[keep, :] scores = scores[keep] order = scores.ravel().argsort()[::-1] order = order[:C.pre_nms_topN] proposals = proposals[order, :] scores = scores[order] keep = np.where(scores > C.scorethre)[0] proposals = proposals[keep, :] scores = scores[keep] keep = nms(np.hstack((proposals, scores)), C.overlap_thresh, usegpu=False, gpu_id=0) keep = keep[:C.post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] return proposals, scores
def pred_pp_2nd(anchors, cls_pred, regr_pred, C): scores = cls_pred[0, :, :] bbox_deltas = regr_pred.reshape((-1, 4)) bbox_deltas = bbox_deltas * np.array(C.classifier_regr_std).astype(dtype=np.float32) anchors[:, :4] = bbox_transform_inv(anchors[:, :4], bbox_deltas) anchors[:, :4] = clip_boxes(anchors[:, :4], [C.random_crop[0], C.random_crop[1]]) proposals = np.concatenate((anchors, scores), axis=-1) return proposals
def proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, im_info, _feat_stride, anchors, num_anchors): """A layer that just selects the top region proposals without using non-maximal suppression, For details please see the technical report self._im_info, # self._im_info = tf.placeholder(tf.float32, shape=[3]) self._feat_stride,#16 self._anchors, # 特征图的所有点的9个框对应原始坐标的 所有 坐标anchors anchor_length和个数length self._num_anchors#9 [tf.float32, tf.float32], name="proposal_top" """ rpn_top_n = cfg.TEST.RPN_TOP_N # cfg.TEST.RPN_TOP_N = 5000 #num_anchors 9 scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) scores = scores.reshape((-1, 1)) length = scores.shape[0] if length < rpn_top_n: # 5000 # Random selection, maybe unnecessary and loses good proposals # But such case rarely happens #choice() 方法返回一个列表,元组或字符串的随机项 top_inds = npr.choice(length, size=rpn_top_n, replace=True) # npr random else: top_inds = scores.argsort(0)[::-1] #argsort函数返回的是数组值从小到大的索引值 top_inds = top_inds[:rpn_top_n] #取5000个 top_inds = top_inds.reshape(rpn_top_n, ) # Do the selection here anchors = anchors[top_inds, :] #特征图映射到原图的所有框 top_inds 是5000个值 :是四个坐标值 rpn_bbox_pred = rpn_bbox_pred[top_inds, :] scores = scores[top_inds] # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, rpn_bbox_pred) #输入的是5000个特征图上映射到原图的框坐标 #输入的是5000个特征图上的框坐标 # Clip predicted boxes to image#限定范围 proposals = clip_boxes(proposals, im_info[:2]) # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores
def im_detect(net, im, boxes): """Detect object classes in an image given object proposals. Arguments: net (caffe.Net): Fast R-CNN network to use im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals Returns: scores (ndarray): R x K array of object class scores (K includes background as object category 0) boxes (ndarray): R x (4*K) array of predicted bounding boxes """ blobs, im_scales = _get_blobs(im, boxes) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. if cfg.DEDUP_BOXES > 0: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) blobs['rois'] = blobs['rois'][index, :] boxes = boxes[index, :] # do forward forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)} forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False) blobs_out = net.forward(**forward_kwargs) # use softmax estimated probabilities scores = blobs_out['cls_prob'] print scores if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = blobs_out['bbox_pred'] pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, im.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) if cfg.DEDUP_BOXES > 0: # Map scores and predictions back to the original set of boxes scores = scores[inv_index, :] pred_boxes = pred_boxes[inv_index, :] return scores, pred_boxes
def generate_pp_2nd(all_anchors, regr_layer, C): A = np.copy(all_anchors[:, :4]) proposals_batch = [] for i in range(regr_layer.shape[0]): proposals = np.ones_like(all_anchors) bbox_deltas = regr_layer[i, :, :] bbox_deltas = bbox_deltas * np.array(C.classifier_regr_std).astype(dtype=np.float32) proposals[:, :4] = bbox_transform_inv(A, bbox_deltas) proposals = clip_boxes(proposals, [C.random_crop[0], C.random_crop[1]]) proposals_batch.append(np.expand_dims(proposals, axis=0)) return np.concatenate(proposals_batch, axis=0)
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): """A simplified version compared to fast/er RCNN For details please see the technical report """ if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N # RPN_PRE_NMS_TOP_N = 6000 post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N # __C.TEST.RPN_POST_NMS_TOP_N = 300 非极大值抑制输出的 最大个数 nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # __C.TEST.RPN_NMS_THRESH = 0.7 # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) scores = scores.reshape((-1, 1)) proposals = bbox_transform_inv(anchors, rpn_bbox_pred) # shape = (length, 4) # proposals 就是真实预测的边框的四个坐标值 # 特征图映射到原图的所有的框anchors 与特征图的值rpn_bbox_pred 组合 进行回归预测 proposals = clip_boxes(proposals, im_info[:2]) # 限制预测坐标在原始图像上 限制这预测 的坐标的 值 在一定的范围内 # Pick the top region proposals order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # Non-maximal suppression keep = nms(np.hstack((proposals, scores)), nms_thresh) # Pick th top region proposals after NMS if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Only support single image as input batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores
def get_proposal(all_anchors, cls_layer, regr_layer, C, overlap_thresh=0.7,pre_nms_topN=1000,post_nms_topN=300, roi_stride=8): A = np.copy(all_anchors[:,:4]) scores = cls_layer.reshape((-1,1)) bbox_deltas = regr_layer.reshape((-1,4)) proposals = bbox_transform_inv(A, bbox_deltas) proposals = clip_boxes(proposals, [C.random_crop[0],C.random_crop[1]]) keep = filter_boxes(proposals, roi_stride) proposals = proposals[keep,:] scores = scores[keep] order = scores.ravel().argsort()[::-1] order = order[:pre_nms_topN] proposals = proposals[order,:] scores = scores[order] keep = nms(np.hstack((proposals, scores)), overlap_thresh, usegpu=False, gpu_id=0) keep = keep[:post_nms_topN] proposals = proposals[keep,:] return proposals
def proposal_layer(self, rpn_cls_prob, rpn_bbox_pred, rpn_trans_param, im_info): if self.is_train: pre_nms_top_n = self.config['train_rpn_pre_nms_top_n'] post_nms_top_n = self.config['train_rpn_post_nms_top_n'] nms_thresh = self.config['train_rpn_nms_thresh'] else: pre_nms_top_n = self.config['test_rpn_pre_nms_top_n'] post_nms_top_n = self.config['test_rpn_post_nms_top_n'] nms_thresh = self.config['test_rpn_nms_thresh'] # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, self.num_anchors:] rpn_bbox_pred = rpn_bbox_pred.view((-1, 4)) scores = scores.contiguous().view(-1, 1) rpn_trans_param = rpn_trans_param.view((-1, 6)) proposals = bbox_transform_inv(self.anchors, rpn_bbox_pred) proposals = clip_boxes(proposals, im_info[:2]) # Pick the top region proposals scores, order = scores.view(-1).sort(descending=True) if pre_nms_top_n > 0: order = order[:pre_nms_top_n] scores = scores[:pre_nms_top_n].view(-1, 1) proposals = proposals[order.data, :] trans_param = rpn_trans_param[order.data, :] # Non-maximal suppression keep = nms(torch.cat((proposals, scores), 1).data, nms_thresh) # Pick th top region proposals after NMS if post_nms_top_n > 0: keep = keep[:post_nms_top_n] proposals = proposals[keep, :] scores = scores[keep,] trans_param = trans_param[keep, :] # Only support single image as input batch_inds = Variable( proposals.data.new(proposals.size(0), 1).zero_()) blob = torch.cat((batch_inds, proposals), 1) return blob, scores, trans_param
def test_single(self, image, rois, image_size, image_resize_ratio): """ Test a single image on the net. Args: image: A preprocessed image or precomputed features of the image. As ndarray. rois: Rois sized for the the image. Ndarray: (image_index, x1, y1, x2, y2) image_size: The original image size. image_resize_ratio: What is the ratio that this image was resized on. """ rois_np, dedup_inv_index = self.dedup_boxes(rois.numpy()) image_var = Variable(image.cuda(), volatile=True) rois_var = Variable(torch.Tensor(rois_np).cuda(), volatile=True) # Run the img through the network out = self.model(image_var, rois_var) # predicted deltas deltas = out[1].data.cpu().numpy() deltas = self.unnormalize_deltas(deltas, self._targets_mean, self._targets_std) # transform rois using predicted deltas boxes = rois_np[:, 1:] / image_resize_ratio bboxes_inv_transformed = bbox_transform_inv(boxes, deltas) class_probas, class_indexes = torch.max(out[0], 1) indexes_np = np.squeeze(class_indexes.data.cpu().numpy()) # print('Total FG RoIs Detected: ', np.sum(indexes_np > 0)) scores = out[0].data.cpu().numpy() scores = np.exp(scores) # clip rois to image size bboxes_inv_transformed = clip_boxes(bboxes_inv_transformed, image_size) scores = scores[dedup_inv_index, :] bboxes_inv_transformed = bboxes_inv_transformed[dedup_inv_index, :] # Non-maximum supression of similar boxes all_boxes = self._nms_boxes(bboxes_inv_transformed, scores) return all_boxes
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, _feat_stride, anchors, num_anchors, mode='train'): """A simplified version compared to fast/er RCNN For details please see the technical report """ pre_nms_topN = 12000 post_nms_topN = 2000 nms_thresh = 0.7 if mode == 'test': pre_nms_topN = 3000 post_nms_topN = 300 # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) scores = scores.reshape((-1, 1)) proposals = bbox_transform_inv(anchors, rpn_bbox_pred) proposals = clip_boxes(proposals, im_info[:2]) # Pick the top region proposals order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # Non-maximal suppression keep = nms(np.hstack((proposals, scores)), nms_thresh) # Pick th top region proposals after NMS if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Only support single image as input batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores
def forward(self, epoch, speech_data, act_lens, gt_boxes, num_boxes): cfg_key = 'TRAIN' if self.training else 'TEST' batch_size = speech_data.size(0) # Feature extraction base_feat = self.feature_extractor(speech_data, act_lens) # RPN, get the proposals and anchors and predicted scores anchors_per_utt, proposals, rpn_cls_score, rpn_bbox_pred = self.rpn_nnet( base_feat) # here scores didn't go through the softmax # batch_size * num_anchors_per_utt * 2 (box_dim or score_dim) rois = clip_boxes(proposals, act_lens, batch_size) rpn_label = None # here we first calculate the rpn loss and then calculate the kws loss if self.training: # calculate rpn loss rpn_data = self.anchor_target_layer( anchors_per_utt, gt_boxes, act_lens ) # rpn trainning targets: labels, bbox_regression targets, bbox_inside_wieght, bbox_outside_weight rpn_label = rpn_data[0].long().view(-1) rpn_keep = rpn_label.ne(-1).nonzero().view(-1) rpn_label = torch.index_select(rpn_label, 0, rpn_keep) rpn_cls_score = torch.index_select( rpn_cls_score.view(-1, self.num_class), 0, rpn_keep) self.rpn_loss_cls = F.cross_entropy(rpn_cls_score, rpn_label) rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[ 1:] self.rpn_loss_bbox = smooth_l1_loss(rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights, sigma=10, dim=[0, 1]) return rois, rpn_cls_score, rpn_label, self.rpn_loss_cls, self.rpn_loss_bbox return rois, rpn_cls_score, anchors_per_utt
def proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, im_info, _feat_stride, anchors, num_anchors): """A layer that just selects the top region proposals without using non-maximal suppression, For details please see the technical report """ rpn_top_n = cfg.TEST.RPN_TOP_N scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) scores = scores.reshape((-1, 1)) length = scores.shape[0] if length < rpn_top_n: # Random selection, maybe unnecessary and loses good proposals # But such case rarely happens top_inds = npr.choice(length, size=rpn_top_n, replace=True) else: top_inds = scores.argsort(0)[::-1] top_inds = top_inds[:rpn_top_n] top_inds = top_inds.reshape(rpn_top_n, ) # Do the selection here anchors = anchors[top_inds, :] rpn_bbox_pred = rpn_bbox_pred[top_inds, :] scores = scores[top_inds] # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, rpn_bbox_pred) # Clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores
def forward(self, input): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) # the first set of _num_anchors channels are bg probs # the second set are the fg probs scores = input[0][:, self._num_anchors:, :, :] bbox_deltas = input[1] im_info = input[2] cfg_key = input[3] pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE batch_size = bbox_deltas.size(0) feat_height, feat_width = scores.size(2), scores.size(3) shift_x = np.arange(0, feat_width) * self._feat_stride shift_y = np.arange(0, feat_height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = torch.from_numpy( np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()) shifts = shifts.contiguous().type_as(scores).float() A = self._num_anchors K = shifts.size(0) self._anchors = self._anchors.type_as(scores) # anchors = self._anchors.view(1, A, 4) + shifts.view(1, K, 4).permute(1, 0, 2).contiguous() anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4) anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous() bbox_deltas = bbox_deltas.view(batch_size, -1, 4) # Same story for the scores: scores = scores.permute(0, 2, 3, 1).contiguous() scores = scores.view(batch_size, -1) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info, batch_size) # proposals = clip_boxes_batch(proposals, im_info, batch_size) # assign the score to 0 if it's non keep. # keep = self._filter_boxes(proposals, min_size * im_info[:, 2]) # trim keep index to make it euqal over batch # keep_idx = torch.cat(tuple(keep_idx), 0) # scores_keep = scores.view(-1)[keep_idx].view(batch_size, trim_size) # proposals_keep = proposals.view(-1, 4)[keep_idx, :].contiguous().view(batch_size, trim_size, 4) # _, order = torch.sort(scores_keep, 1, True) scores_keep = scores proposals_keep = proposals _, order = torch.sort(scores_keep, 1, True) output = scores.new(batch_size, post_nms_topN, 5).zero_() for i in range(batch_size): # # 3. remove predicted boxes with either height or width < threshold # # (NOTE: convert min_size to input image scale stored in im_info[2]) proposals_single = proposals_keep[i] scores_single = scores_keep[i] # # 4. sort all (proposal, score) pairs by score from highest to lowest # # 5. take top pre_nms_topN (e.g. 6000) order_single = order[i] if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel(): order_single = order_single[:pre_nms_topN] proposals_single = proposals_single[order_single, :] scores_single = scores_single[order_single].view(-1, 1) # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1), nms_thresh) keep_idx_i = keep_idx_i.long().view(-1) if post_nms_topN > 0: keep_idx_i = keep_idx_i[:post_nms_topN] proposals_single = proposals_single[keep_idx_i, :] scores_single = scores_single[keep_idx_i, :] # padding 0 at the end. num_proposal = proposals_single.size(0) output[i, :, 0] = i output[i, :num_proposal, 1:] = proposals_single return output
def __call__(self, rpn_cls_prob, rpn_bbox_pred, im_info, train): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) pre_nms_topN = self.RPN_PRE_NMS_TOP_N if train else 6000 post_nms_topN = self.RPN_POST_NMS_TOP_N if train else 300 nms_thresh = self.RPN_NMS_THRESH min_size = self.RPN_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = to_cpu(rpn_cls_prob.data[:, self._num_anchors:, :, :]) bbox_deltas = to_cpu(rpn_bbox_pred.data) im_info = im_info[0, :] # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.asarray(np.meshgrid(shift_x, shift_y)) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = self._anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas, -1) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) rois = np.asarray(np.hstack((batch_inds, proposals)), dtype=np.float32) return rois
def forward(self, input): # Algorithm: # # for each (L, H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) # the first set of _num_anchors channels are bg probs # the second set are the fg probs scores = input[0][:, :, 1] # batch_size x num_rois x 1 bbox_deltas = input[1] # batch_size x num_rois x 6 im_info = input[2] cfg_key = input[3] feat_shapes = input[4] pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE batch_size = bbox_deltas.size(0) anchors = torch.from_numpy(generate_anchors_all_pyramids(self._fpn_scales, self._anchor_ratios, l_ratios, feat_shapes, self._fpn_feature_strides, self._fpn_anchor_stride)).type_as(scores) num_anchors = anchors.size(0) anchors = anchors.view(1, num_anchors, 6).expand(batch_size, num_anchors, 6) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info, batch_size) # keep_idx = self._filter_boxes(proposals, min_size).squeeze().long().nonzero().squeeze() scores_keep = scores proposals_keep = proposals _, order = torch.sort(scores_keep, 1, True) output = scores.new(batch_size, post_nms_topN, 7).zero_() for i in range(batch_size): # # 3. remove predicted boxes with either height or width < threshold # # (NOTE: convert min_size to input image scale stored in im_info[2]) proposals_single = proposals_keep[i] scores_single = scores_keep[i] # # 4. sort all (proposal, score) pairs by score from highest to lowest # # 5. take top pre_nms_topN (e.g. 6000) order_single = order[i] if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel(): order_single = order_single[:pre_nms_topN] proposals_single = proposals_single[order_single, :] scores_single = scores_single[order_single].view(-1,1) # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1), nms_thresh) keep_idx_i = keep_idx_i.long().view(-1) if post_nms_topN > 0: keep_idx_i = keep_idx_i[:post_nms_topN] proposals_single = proposals_single[keep_idx_i, :] scores_single = scores_single[keep_idx_i, :] # padding 0 at the end. num_proposal = proposals_single.size(0) output[i,:,0] = i output[i,:num_proposal,1:] = proposals_single return output
def im_detect(net, im, boxes=None): """Detect object classes in an image given object proposals. Arguments: net (caffe.Net): Fast R-CNN network to use im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals or None (for RPN) Returns: scores (ndarray): R x K array of object class scores (K includes background as object category 0) boxes (ndarray): R x (4*K) array of predicted bounding boxes """ blobs, im_scales = _get_blobs(im, boxes) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) blobs['rois'] = blobs['rois'][index, :] boxes = boxes[index, :] if cfg.TEST.HAS_RPN: im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]], dtype=np.float32) # reshape network inputs net.blobs['data'].reshape(*(blobs['data'].shape)) if cfg.TEST.HAS_RPN: net.blobs['im_info'].reshape(*(blobs['im_info'].shape)) else: net.blobs['rois'].reshape(*(blobs['rois'].shape)) # do forward forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)} if cfg.TEST.HAS_RPN: forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32, copy=False) else: forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False) blobs_out = net.forward(**forward_kwargs) if cfg.TEST.HAS_RPN: assert len(im_scales) == 1, "Only single-image batch implemented" rois = net.blobs['rois'].data.copy() # unscale back to raw image space boxes = rois[:, 1:5] / im_scales[0] if cfg.TEST.SVM: # use the raw scores before softmax under the assumption they # were trained as linear SVMs # scores = net.blobs['cls_score'].data ### CHANGED scores = net.blobs['cls_score_box'].data else: # use softmax estimated probabilities scores = blobs_out['cls_prob'] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas # box_deltas = blobs_out['bbox_pred'] ### CHANGED box_deltas = blobs_out['bbox_pred_box'] pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, im.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: # Map scores and predictions back to the original set of boxes scores = scores[inv_index, :] pred_boxes = pred_boxes[inv_index, :] return scores, pred_boxes
def forward(self, input): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) # the first set of _num_anchors channels are bg probs # the second set are the fg probs scores = input[0][:, self._num_anchors:, :, :] bbox_deltas = input[1] im_info = input[2] cfg_key = input[3] pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE batch_size = bbox_deltas.size(0) feat_height, feat_width = scores.size(2), scores.size(3) shift_x = np.arange(0, feat_width) * self._feat_stride shift_y = np.arange(0, feat_height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = torch.from_numpy(np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()) shifts = shifts.contiguous().type_as(scores).float() A = self._num_anchors K = shifts.size(0) self._anchors = self._anchors.type_as(scores) # anchors = self._anchors.view(1, A, 4) + shifts.view(1, K, 4).permute(1, 0, 2).contiguous() anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4) anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous() bbox_deltas = bbox_deltas.view(batch_size, -1, 4) # Same story for the scores: scores = scores.permute(0, 2, 3, 1).contiguous() scores = scores.view(batch_size, -1) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info, batch_size) # proposals = clip_boxes_batch(proposals, im_info, batch_size) # assign the score to 0 if it's non keep. # keep = self._filter_boxes(proposals, min_size * im_info[:, 2]) # trim keep index to make it euqal over batch # keep_idx = torch.cat(tuple(keep_idx), 0) # scores_keep = scores.view(-1)[keep_idx].view(batch_size, trim_size) # proposals_keep = proposals.view(-1, 4)[keep_idx, :].contiguous().view(batch_size, trim_size, 4) # _, order = torch.sort(scores_keep, 1, True) scores_keep = scores proposals_keep = proposals _, order = torch.sort(scores_keep, 1, True) output = scores.new(batch_size, post_nms_topN, 5).zero_() for i in range(batch_size): # # 3. remove predicted boxes with either height or width < threshold # # (NOTE: convert min_size to input image scale stored in im_info[2]) proposals_single = proposals_keep[i] scores_single = scores_keep[i] # # 4. sort all (proposal, score) pairs by score from highest to lowest # # 5. take top pre_nms_topN (e.g. 6000) order_single = order[i] if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel(): order_single = order_single[:pre_nms_topN] proposals_single = proposals_single[order_single, :] scores_single = scores_single[order_single].view(-1,1) # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1), nms_thresh) keep_idx_i = keep_idx_i.long().view(-1) if post_nms_topN > 0: keep_idx_i = keep_idx_i[:post_nms_topN] proposals_single = proposals_single[keep_idx_i, :] scores_single = scores_single[keep_idx_i, :] # padding 0 at the end. num_proposal = proposals_single.size(0) output[i,:,0] = i output[i,:num_proposal,1:] = proposals_single return output
def _proposal_layer_py(rpn_bbox_cls_prob, rpn_bbox_pred, im_dims, cfg_key, _feat_stride, anchor_scales): ''' # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) # rpn_bbox_cls_prob shape : 1 , h , w , 2*9 # rpn_bbox_pred shape : 1 , h , w , 4*9 ''' _anchors = generate_anchor.generate_anchors(scales=np.array(anchor_scales)) # #_anchors ( 9, 4 ) _num_anchors = _anchors.shape[0] #9 rpn_bbox_cls_prob = np.transpose(rpn_bbox_cls_prob, [0, 3, 1, 2]) # rpn bbox _cls prob # 1, 18 , h , w rpn_bbox_pred = np.transpose(rpn_bbox_pred, [0, 3, 1, 2]) # 1, 36 , h , w # Only minibatch of 1 supported assert rpn_bbox_cls_prob.shape[0] == 1, \ 'Only single item batches are supported' if cfg_key: pre_nms_topN = cfg.TRAIN.RPN_PRE_NMS_TOP_N #12000 post_nms_topN = cfg.TRAIN.RPN_POST_NMS_TOP_N # 2000 nms_thresh = cfg.TRAIN.RPN_NMS_THRESH #0.1 min_size = cfg.TRAIN.RPN_MIN_SIZE # 16 else: # cfg_key == 'TEST': pre_nms_topN = cfg.TEST.RPN_PRE_NMS_TOP_N post_nms_topN = cfg.TEST.RPN_POST_NMS_TOP_N nms_thresh = cfg.TEST.RPN_NMS_THRESH min_size = cfg.TEST.RPN_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs # 1. Generate proposals from bbox deltas and shifted anchors n, ch , height, width = rpn_bbox_cls_prob.shape ## rpn bbox _cls prob # 1, 18 , h , w scores = rpn_bbox_cls_prob.reshape([1,2, ch//2 * height ,width]) scores = scores.transpose([0,2,3,1]) scores = scores.reshape([-1,2]) scores = scores[:,1] scores =scores.reshape([-1,1]) scores_ori = scores # Enumerate all shifts shift_x = np.arange(0, width) * _feat_stride shift_y = np.arange(0, height) * _feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors K = shifts.shape[0] #anchors = _anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = np.array([]) for i in range(len(_anchors)): if i == 0: anchors = np.add(shifts, _anchors[i]) else: anchors = np.concatenate((anchors, np.add(shifts, _anchors[i])), axis=0) anchors = anchors.reshape((K * A, 4)) ## BBOX TRANSPOSE (1,4*A,H,W --> A*H*W,4) shape = rpn_bbox_pred.shape # 1,4*A , H, W rpn_bbox_pred=rpn_bbox_pred.reshape([1, 4 , (shape[1]//4)*shape[2] , shape[3] ]) rpn_bbox_pred=rpn_bbox_pred.transpose([0,2,3,1]) rpn_bbox_pred = rpn_bbox_pred.reshape([-1,4]) bbox_deltas=rpn_bbox_pred ## CLS TRANSPOSE ## ## BBOX TRANSPOSE Using Anchor proposals = bbox_transform_inv(anchors, bbox_deltas) proposals_ori = proposals proposals = clip_boxes(proposals, im_dims) # image size 보다 큰 proposals 들이 줄어 들수 있도록 한다. keep = _filter_boxes(proposals, min_size) # min size = 16 # min보다 큰 놈들만 살아남았다 proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) #print 'scores : ',np.shape(scores) #421 ,13 <--여기 13이 자꾸 바귄다.. order = scores.ravel().argsort()[::-1] # 크기 순서를 뒤집는다 가장 큰 값이 먼저 오게 한다 if pre_nms_topN > 0: #120000 order = order[:pre_nms_topN] #print np.sum([scores>0.7]) scores = scores[order] proposals = proposals[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) #print np.shape(np.hstack ((proposals , scores))) # --> [x_start , y_start ,x_end, y_end , score ] 이런 형태로 만든다 # proposals ndim and scores ndim must be same """ NMS keep =non_maximum_supression(dets =np.hstack((proposals, scores)) , thresh = 0.3) keep = nms(np.hstack((proposals, scores)), nms_thresh) # nms_thresh = 0.7 | hstack --> axis =1 #keep = non_maximum_supression(proposals , nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] """ # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) # N , 5 return blob , scores , proposals_ori , scores_ori
def get_target_1st(all_anchors, regr_layer, img_data, C, roi_stride=10, igthre=0.5, posthre=0.7, negthre=0.5): A = np.copy(all_anchors[:, :4]) y_cls_batch, y_regr_batch = [], [] for i in range(regr_layer.shape[0]): gta = np.copy(img_data[i]['bboxes']) num_bboxes = len(gta) ignoreareas = img_data[i]['ignoreareas'] proposals = np.ones_like(all_anchors) bbox_deltas = regr_layer[i, :, :] bbox_deltas = bbox_deltas * np.array( C.classifier_regr_std).astype(dtype=np.float32) proposals[:, :4] = bbox_transform_inv(A, bbox_deltas) proposals = clip_boxes(proposals, [C.random_crop[0], C.random_crop[1]]) if len(ignoreareas) > 0: ignore_overlap = box_op( np.ascontiguousarray(proposals[:, :4], dtype=np.float), np.ascontiguousarray(ignoreareas, dtype=np.float)) ignore_sum = np.sum(ignore_overlap, axis=1) proposals[ignore_sum > igthre, -1] = 0 keep = filter_negboxes(proposals, roi_stride) proposals[keep, -1] = 0 valid_idxs = np.where(proposals[:, -1] == 1)[0] # initialise empty output objectives y_alf_overlap = np.zeros((all_anchors.shape[0], 1)) y_alf_negindex = np.zeros((all_anchors.shape[0], 1)) y_is_box_valid = np.zeros((all_anchors.shape[0], 1)) y_alf_regr = np.zeros((all_anchors.shape[0], 4)) valid_anchors = proposals[valid_idxs, :] valid_alf_overlap = np.zeros((valid_anchors.shape[0], 1)) valid_is_box_valid = np.zeros((valid_anchors.shape[0], 1)) valid_rpn_regr = np.zeros((valid_anchors.shape[0], 4)) if num_bboxes > 0: valid_overlap = bbox_overlaps( np.ascontiguousarray(valid_anchors, dtype=np.float), np.ascontiguousarray(gta, dtype=np.float)) # find every anchor close to which bbox argmax_overlaps = valid_overlap.argmax(axis=1) max_overlaps = valid_overlap[np.arange(len(valid_idxs)), argmax_overlaps] # find which anchor closest to every bbox gt_argmax_overlaps = valid_overlap.argmax(axis=0) gt_max_overlaps = valid_overlap[gt_argmax_overlaps, np.arange(num_bboxes)] gt_argmax_overlaps = np.where(valid_overlap == gt_max_overlaps)[0] valid_alf_overlap[gt_argmax_overlaps] = 1 valid_alf_overlap[max_overlaps >= posthre] = 1 for j in range(len(gta)): inds = valid_overlap[:, j].ravel().argsort()[-3:] valid_alf_overlap[inds] = 1 # get positives labels fg_inds = np.where(valid_alf_overlap == 1)[0] valid_is_box_valid[fg_inds] = 1 anchor_box = valid_anchors[fg_inds, :4] gt_box = gta[argmax_overlaps[fg_inds], :] # compute regression targets valid_rpn_regr[fg_inds, :] = compute_targets(anchor_box, gt_box, C.classifier_regr_std, std=True) # get negatives labels bg_inds = np.where((max_overlaps < negthre) & (valid_is_box_valid.reshape((-1)) == 0))[0] valid_is_box_valid[bg_inds] = 1 # transform to the original overlap and validbox y_alf_overlap[valid_idxs, :] = valid_alf_overlap y_is_box_valid[valid_idxs, :] = valid_is_box_valid y_alf_regr[valid_idxs, :] = valid_rpn_regr y_alf_negindex = y_is_box_valid - y_alf_overlap y_alf_cls = np.expand_dims(np.concatenate( [y_alf_overlap, y_alf_negindex], axis=1), axis=0) y_alf_regr = np.expand_dims(np.concatenate([y_alf_overlap, y_alf_regr], axis=1), axis=0) y_cls_batch.append(y_alf_cls) y_regr_batch.append(y_alf_regr) y_cls_batch = np.concatenate(np.array(y_cls_batch), axis=0) y_regr_batch = np.concatenate(np.array(y_regr_batch), axis=0) return [y_cls_batch, y_regr_batch]
def test(): os.environ['CUDA_VISIBLE_DEVICES'] = cfg.GPU roidata = Roidata(is_train=False, with_keypoints=False) img_ph = tf.placeholder( tf.float32, shape=[cfg.batch_size, cfg.image_size, cfg.image_size, 1]) logits = small_net(img_ph, 1.0, is_training=False) ckpt_file = 'samller_output_calibration/refine-31970' saver = tf.train.Saver() tfconfig = tf.ConfigProto() # tfconfig = tf.ConfigProto(allow_soft_placement=True) tfconfig.gpu_options.allow_growth = True tfconfig.gpu_options.per_process_gpu_memory_fraction = 1.0 sess = tf.Session(config=tfconfig) sess.run(tf.global_variables_initializer()) saver.restore(sess, ckpt_file) result_file = open( 'zebrish_yolo_143000_refine_smaller_with_gap_calibration.txt', 'w') test_timer = Timer() img, im_path, proposal, gt_boxes, score = roidata.get() # print(proposal, gt_boxes) feed_dict = {img_ph: img} # for i in range(100): # logits_val = sess.run(logits, feed_dict=feed_dict) for i in range(len(roidata.data)): # if score < 0.5: img, im_path, proposal, gt_boxes, score = roidata.get() # print(proposal, gt_boxes) feed_dict = {img_ph: img} # cv2.imshow("origin", img[0]) test_timer.tic() logits_val = sess.run(logits, feed_dict=feed_dict) test_timer.toc() logits_val = logits_val * np.array(cfg.BBOX_NORMALIZE_STDS) proposalnp = np.array([proposal], dtype=np.float32) pred_gt = bbox_transform_inv(proposalnp, logits_val) print('Average detecting time: {:.4f}s'.format( test_timer.average_time)) origin_img = cv2.imread(im_path) # # print(im_path) im_index = im_path.split('/')[-1][:-4] # # print(im_index) print(pred_gt) pred_gt = clip_boxes( pred_gt, [origin_img.shape[0], origin_img.shape[1]])[0].astype(np.int32) result_file.write('{:s} {:.4f} {:d} {:d} {:d} {:d}\n'.format( im_index, score, pred_gt[0], pred_gt[1], pred_gt[2], pred_gt[3])) cv2.rectangle(origin_img, (proposal[0], proposal[1]), (proposal[2], proposal[3]), (255, 0, 0)) cv2.rectangle(origin_img, (gt_boxes[0], gt_boxes[1]), (gt_boxes[2], gt_boxes[3]), (0, 255, 0)) cv2.rectangle(origin_img, (pred_gt[0], pred_gt[1]), (pred_gt[2], pred_gt[3]), (0, 0, 255)) cv2.imshow("test", origin_img) cv2.waitKey(0)
def forward(self, input): # input[0]: (batch_size, channels, H, W) = (1, 24, 19, 20) # input[1]: (batch_size, channels, H, W) = (1, 12*4, 19, 20) # input[2]: (batch_size, H, W) = (1, 240, 320) # input[3]: "TEST" or "TRAIN" all_anchors = self.all_anchors.cuda() scores = input[ 0][:, self. _num_anchors_type:, :, :] # class score (binary) for each feature map pixel bbox_deltas = input[ 1] # bbox for each feature map pixel, size (batch_size, 48, 19, 20) im_info = input[ 2] # image shape, for jhmdb, it is [[240, 320]] TODO1: change this to [240, 320] cfg_key = input[3] # TRAIN or TEST im_info = np.array(im_info) pre_nms_topN = cfg[ cfg_key].RPN_PRE_NMS_TOP_N # train: 12000, test: 6000 post_nms_topN = cfg[ cfg_key].RPN_POST_NMS_TOP_N # train: 2000, test: 300 nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # train: 0.7, test: 0.7 min_size = cfg[cfg_key].RPN_MIN_SIZE # train: 8, test: 16 batch_size = bbox_deltas.size(0) # mostly 1 # since the anchors are obtained from dataset, we can just use it, change it to # (batch_size, 3600, 4) TODO: this is different from origin all_anchors = all_anchors.contiguous() all_anchors = all_anchors.view(1, self.all_num_anchors, 4).expand(batch_size, self.all_num_anchors, 4) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: change to (batch_size, 19, 20, 48) bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous() bbox_deltas = bbox_deltas.view(batch_size, -1, 4) # Same story for the scores: # batch_size, 19, 25, 12 scores = scores.permute(0, 2, 3, 1).contiguous() ''' x = torch.randn(5, 4) print(x.stride(), x.is_contiguous()) print(x.t().stride(), x.t().is_contiguous()) x.view(4, 5) # ok x.t().view(4, 5) # fails ''' scores = scores.view(batch_size, -1) # Convert anchors into proposals via bbox transformations # so we get a big list of bbox ## slide anchors on each pixels on the feature map 19*20, get bounding boxes # achors, 630 * 4, means 630 anchors, with 4 coordinates # bbox_deltas, batch_size, 19, 20, 48. 48 means 4 cooridnates * 12 anchors # all_anchors.shape = 1x3600x4, bbox_deltas.shape=1x3600x4 proposals = bbox_transform_inv2(all_anchors, bbox_deltas, batch_size) # 2. clip predicted boxes to image: TODO: this line is useless, since our input anchor is already fixed with # image size. ## remove the bboxes that outside of the image boundary # proposals.shape = [1, 3600, 4]), im_info = [[240, 320]] proposals = clip_boxes(proposals, im_info, batch_size) scores_keep = scores #(batch_size, 12, 19, 25) proposals_keep = proposals _, order = torch.sort( scores_keep, 1, True) # sort 12 'anchors', here is the cnn output score output = scores.new(batch_size, post_nms_topN, 5).zero_() for i in range(batch_size): # # 3. remove predicted boxes with either height or width < threshold # # (NOTE: convert min_size to input image scale stored in im_info[2]) proposals_single = proposals_keep[ i] # for one batch, all the anchors for each feature map pixel # size: (12, 19, 25) scores_single = scores_keep[ i] # binary class score for each feature map pixel # size: (12, 19, 25) # # 4. sort all (proposal, score) pairs by score from highest to lowest # # 5. take top pre_nms_topN (e.g. 6000) order_single = order[i] if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel(): order_single = order_single[:pre_nms_topN] proposals_single = proposals_single[order_single, :] scores_single = scores_single[order_single].view(-1, 1) # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1), nms_thresh, force_cpu=not cfg.USE_GPU_NMS) keep_idx_i = keep_idx_i.long().view(-1) if post_nms_topN > 0: keep_idx_i = keep_idx_i[:post_nms_topN] proposals_single = proposals_single[keep_idx_i, :] scores_single = scores_single[keep_idx_i, :] # padding 0 at the end. num_proposal = proposals_single.size(0) output[i, :, 0] = i output[i, :num_proposal, 1:] = proposals_single return output
def detect(self, img, threshold=0.5, scales=[1.0], do_flip=False): #print('in_detect', threshold, scales, do_flip, do_nms) proposals_list = [] scores_list = [] landmarks_list = [] strides_list = [] timea = datetime.datetime.now() flips = [0] if do_flip: flips = [0, 1] imgs = [img] if isinstance(img, list): imgs = img for img in imgs: for im_scale in scales: for flip in flips: if im_scale != 1.0: im = cv2.resize(img, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) else: im = img.copy() if flip: im = im[:, ::-1, :] if self.nocrop: if im.shape[0] % 32 == 0: h = im.shape[0] else: h = (im.shape[0] // 32 + 1) * 32 if im.shape[1] % 32 == 0: w = im.shape[1] else: w = (im.shape[1] // 32 + 1) * 32 _im = np.zeros((h, w, 3), dtype=np.float32) _im[0:im.shape[0], 0:im.shape[1], :] = im im = _im else: im = im.astype(np.float32) if self.debug: timeb = datetime.datetime.now() diff = timeb - timea print('X1 uses', diff.total_seconds(), 'seconds') #self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False) #im_info = [im.shape[0], im.shape[1], im_scale] im_info = [im.shape[0], im.shape[1]] im_tensor = np.zeros((1, 3, im.shape[0], im.shape[1])) for i in range(3): im_tensor[0, i, :, :] = ( im[:, :, 2 - i] / self.pixel_scale - self.pixel_means[2 - i]) / self.pixel_stds[2 - i] if self.debug: timeb = datetime.datetime.now() diff = timeb - timea print('X2 uses', diff.total_seconds(), 'seconds') data = nd.array(im_tensor) db = mx.io.DataBatch(data=(data, ), provide_data=[('data', data.shape)]) if self.debug: timeb = datetime.datetime.now() diff = timeb - timea print('X3 uses', diff.total_seconds(), 'seconds') self.model.forward(db, is_train=False) net_out = self.model.get_outputs() #post_nms_topN = self._rpn_post_nms_top_n #min_size_dict = self._rpn_min_size_fpn sym_idx = 0 for _idx, s in enumerate(self._feat_stride_fpn): #if len(scales)>1 and s==32 and im_scale==scales[-1]: # continue _key = 'stride%s' % s stride = int(s) is_cascade = False if self.cascade: is_cascade = True #if self.vote and stride==4 and len(scales)>2 and (im_scale==scales[0]): # continue #print('getting', im_scale, stride, idx, len(net_out), data.shape, file=sys.stderr) scores = net_out[sym_idx].asnumpy() if self.debug: timeb = datetime.datetime.now() diff = timeb - timea print('A uses', diff.total_seconds(), 'seconds') #print(scores.shape) #print('scores',stride, scores.shape, file=sys.stderr) scores = scores[:, self._num_anchors['stride%s' % s]:, :, :] bbox_deltas = net_out[sym_idx + 1].asnumpy() #if DEBUG: # print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) # print 'scale: {}'.format(im_info[2]) #_height, _width = int(im_info[0] / stride), int(im_info[1] / stride) height, width = bbox_deltas.shape[ 2], bbox_deltas.shape[3] A = self._num_anchors['stride%s' % s] K = height * width anchors_fpn = self._anchors_fpn['stride%s' % s] anchors = anchors_plane(height, width, stride, anchors_fpn) #print((height, width), (_height, _width), anchors.shape, bbox_deltas.shape, scores.shape, file=sys.stderr) anchors = anchors.reshape((K * A, 4)) #print('num_anchors', self._num_anchors['stride%s'%s], file=sys.stderr) #print('HW', (height, width), file=sys.stderr) #print('anchors_fpn', anchors_fpn.shape, file=sys.stderr) #print('anchors', anchors.shape, file=sys.stderr) #print('bbox_deltas', bbox_deltas.shape, file=sys.stderr) #print('scores', scores.shape, file=sys.stderr) #scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape( (-1, 1)) #print('pre', bbox_deltas.shape, height, width) #bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) #print('after', bbox_deltas.shape, height, width) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)) bbox_pred_len = bbox_deltas.shape[3] // A #print(bbox_deltas.shape) bbox_deltas = bbox_deltas.reshape((-1, bbox_pred_len)) bbox_deltas[:, 0::4] = bbox_deltas[:, 0:: 4] * self.bbox_stds[0] bbox_deltas[:, 1::4] = bbox_deltas[:, 1:: 4] * self.bbox_stds[1] bbox_deltas[:, 2::4] = bbox_deltas[:, 2:: 4] * self.bbox_stds[2] bbox_deltas[:, 3::4] = bbox_deltas[:, 3:: 4] * self.bbox_stds[3] proposals = self.bbox_pred(anchors, bbox_deltas) #print(anchors.shape, bbox_deltas.shape, A, K, file=sys.stderr) if is_cascade: cascade_sym_num = 0 cls_cascade = False bbox_cascade = False __idx = [3, 4] if not self.use_landmarks: __idx = [2, 3] for diff_idx in __idx: if sym_idx + diff_idx >= len(net_out): break body = net_out[sym_idx + diff_idx].asnumpy() if body.shape[1] // A == 2: #cls branch if cls_cascade or bbox_cascade: break else: cascade_scores = body[:, self. _num_anchors[ 'stride%s' % s]:, :, :] cascade_scores = cascade_scores.transpose( (0, 2, 3, 1)).reshape((-1, 1)) #scores = (scores+cascade_scores)/2.0 scores = cascade_scores #TODO? cascade_sym_num += 1 cls_cascade = True #print('find cascade cls at stride', stride) elif body.shape[1] // A == 4: #bbox branch cascade_deltas = body.transpose( (0, 2, 3, 1)).reshape( (-1, bbox_pred_len)) cascade_deltas[:, 0:: 4] = cascade_deltas[:, 0:: 4] * self.bbox_stds[ 0] cascade_deltas[:, 1:: 4] = cascade_deltas[:, 1:: 4] * self.bbox_stds[ 1] cascade_deltas[:, 2:: 4] = cascade_deltas[:, 2:: 4] * self.bbox_stds[ 2] cascade_deltas[:, 3:: 4] = cascade_deltas[:, 3:: 4] * self.bbox_stds[ 3] proposals = self.bbox_pred( proposals, cascade_deltas) cascade_sym_num += 1 bbox_cascade = True #print('find cascade bbox at stride', stride) proposals = clip_boxes(proposals, im_info[:2]) #if self.vote: # if im_scale>1.0: # keep = self._filter_boxes2(proposals, 160*im_scale, -1) # else: # keep = self._filter_boxes2(proposals, -1, 100*im_scale) # if stride==4: # keep = self._filter_boxes2(proposals, 12*im_scale, -1) # proposals = proposals[keep, :] # scores = scores[keep] #keep = self._filter_boxes(proposals, min_size_dict['stride%s'%s] * im_info[2]) #proposals = proposals[keep, :] #scores = scores[keep] #print('333', proposals.shape) if stride == 4 and self.decay4 < 1.0: scores *= self.decay4 scores_ravel = scores.ravel() #print('__shapes', proposals.shape, scores_ravel.shape) #print('max score', np.max(scores_ravel)) order = np.where(scores_ravel >= threshold)[0] #_scores = scores_ravel[order] #_order = _scores.argsort()[::-1] #order = order[_order] proposals = proposals[order, :] scores = scores[order] if flip: oldx1 = proposals[:, 0].copy() oldx2 = proposals[:, 2].copy() proposals[:, 0] = im.shape[1] - oldx2 - 1 proposals[:, 2] = im.shape[1] - oldx1 - 1 proposals[:, 0:4] /= im_scale proposals_list.append(proposals) scores_list.append(scores) if self.nms_threshold < 0.0: _strides = np.empty(shape=(scores.shape), dtype=np.float32) _strides.fill(stride) strides_list.append(_strides) if not self.vote and self.use_landmarks: landmark_deltas = net_out[sym_idx + 2].asnumpy() #landmark_deltas = self._clip_pad(landmark_deltas, (height, width)) landmark_pred_len = landmark_deltas.shape[1] // A landmark_deltas = landmark_deltas.transpose( (0, 2, 3, 1)).reshape( (-1, 5, landmark_pred_len // 5)) landmark_deltas *= self.landmark_std #print(landmark_deltas.shape, landmark_deltas) landmarks = self.landmark_pred( anchors, landmark_deltas) landmarks = landmarks[order, :] if flip: landmarks[:, :, 0] = im.shape[1] - landmarks[:, :, 0] - 1 #for a in range(5): # oldx1 = landmarks[:, a].copy() # landmarks[:,a] = im.shape[1] - oldx1 - 1 order = [1, 0, 2, 4, 3] flandmarks = landmarks.copy() for idx, a in enumerate(order): flandmarks[:, idx, :] = landmarks[:, a, :] #flandmarks[:, idx*2] = landmarks[:,a*2] #flandmarks[:, idx*2+1] = landmarks[:,a*2+1] landmarks = flandmarks landmarks[:, :, 0:2] /= im_scale #landmarks /= im_scale #landmarks = landmarks.reshape( (-1, landmark_pred_len) ) landmarks_list.append(landmarks) #proposals = np.hstack((proposals, landmarks)) if self.use_landmarks: sym_idx += 3 else: sym_idx += 2 if is_cascade: sym_idx += cascade_sym_num if self.debug: timeb = datetime.datetime.now() diff = timeb - timea print('B uses', diff.total_seconds(), 'seconds') proposals = np.vstack(proposals_list) landmarks = None if proposals.shape[0] == 0: if self.use_landmarks: landmarks = np.zeros((0, 5, 2)) if self.nms_threshold < 0.0: return np.zeros((0, 6)), landmarks else: return np.zeros((0, 5)), landmarks scores = np.vstack(scores_list) #print('shapes', proposals.shape, scores.shape) scores_ravel = scores.ravel() order = scores_ravel.argsort()[::-1] #if config.TEST.SCORE_THRESH>0.0: # _count = np.sum(scores_ravel>config.TEST.SCORE_THRESH) # order = order[:_count] proposals = proposals[order, :] scores = scores[order] if self.nms_threshold < 0.0: strides = np.vstack(strides_list) strides = strides[order] if not self.vote and self.use_landmarks: landmarks = np.vstack(landmarks_list) landmarks = landmarks[order].astype(np.float32, copy=False) if self.nms_threshold > 0.0: pre_det = np.hstack((proposals[:, 0:4], scores)).astype(np.float32, copy=False) if not self.vote: keep = self.nms(pre_det) det = np.hstack((pre_det, proposals[:, 4:])) det = det[keep, :] if self.use_landmarks: landmarks = landmarks[keep] else: det = np.hstack((pre_det, proposals[:, 4:])) det = self.bbox_vote(det) elif self.nms_threshold < 0.0: det = np.hstack( (proposals[:, 0:4], scores, strides)).astype(np.float32, copy=False) else: det = np.hstack((proposals[:, 0:4], scores)).astype(np.float32, copy=False) if self.debug: timeb = datetime.datetime.now() diff = timeb - timea print('C uses', diff.total_seconds(), 'seconds') return det, landmarks
def proposal_layer_py(rpn_bbox_cls_prob, rpn_bbox_pred, im_dims, mode, feat_strides, anchor_scales): """ # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all ( proposal , score) pairs by score from highest to lowest # take top pre_nums_ no N proposal before non-maximal suppresion # appy NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposlas ( -> ROIs, top, scores top) """ anchors = generate_anchors.generate_anchors(base_size=16, ratios=[0.5, 1, 2], scales=anchor_scales) num_anchors = anchors.shape[0] rpn_bbox_cls_prob = np.transpose(rpn_bbox_cls_prob, [0, 3, 1, 2]) # [1, 9*2, height, width ] rpn_bbox_pred = np.transpose(rpn_bbox_pred, [0, 3, 1, 2]) # [1, 9*4, height, width ] if mode == 'train': pre_nms_topN = 12000 post_nms_topN = 2000 nms_thresh = 0.7 min_size = 16 else: pre_nms_topN = 6000 post_nms_topN = 300 nms_thresh = 0.7 min_size = 16 # the first set of num_anchors channels are bg probabilities, the second set are the fg probablilities. scores = rpn_bbox_cls_prob[:, : num_anchors, :, :] # score for fg probablilities, [1, 9, height, width] bbox_deltas = rpn_bbox_pred # [1, 9*4, height, width] # step1 : generate proposal from bbox deltas and shifted anchors height, width = scores.shape[-2:] shift_x = np.arange(0, width) * feat_strides shift_y = np.arange(0, height) * feat_strides shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack( (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())) shifts = shifts.transpose() # A = num_anchors # number of anchor per shift = 9 K = shifts.shape[0] # number of shift aaa = anchors.reshape((1, A, 4)) bbb = shifts.reshape(1, K, 4).transpose((1, 0, 2)) anchors = aaa + bbb #anchors = anchors.reshape((1, A, 4 )) + shifts.reshape( 1, K, 4).transpose((1, 0, 2)) anchors = anchors.reshape((K * A), 4) # [ K*A, 4] # transpose and reshape predicted bbox transformations to get the same order as anchors # bbox_deltas is [1, 4*A, H, W ] bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape( (-1, 4)) # [ A*K, 4] scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # [ A*K, 1] # convert anchor into proposals via bbox transformations proposals = bbox_transform.bbox_transform_inv(anchors, bbox_deltas) # [K*A, 4] # step2 : clip predicted boxes accodring to image size proposals = bbox_transform.clip_boxes(proposals, im_dims) # step3: remove predicted boxes with either height or width < threshold keep = filter_boxes(proposals, min_size) proposals = proposals[keep, :] scores = scores[keep] # step4: sort all (proposal, score) pairs by score from highest to lowest order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] # step5: take top pre_nms_topN proposals = proposals[order, :] scores = scores[order] # step6: apply nms ( e.g. threshold = 0.7 ) keep = cpu_nms.cpu_nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] # step7: take after_nms_topN proposals = proposals[keep, :] scores = scores[keep] print "proposals.shape after nms", proposals.shape print "scores.shape", scores.shape # step8: return the top proposal batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) # [ len(keep), 1] blob = np.hstack( (batch_inds, proposals.astype(np.float32, copy=False))) # proposal structure: [0,x1,y1,x2,y2] print "blob.shape", blob.shape return blob