def _rcnn_forward(self, im, thres=0.5): im, im_scale = resize(im, self.target_size, self.max_size) im_tensor = transform(im, self.input_mean, self.input_scale) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data = [mx.nd.array(im_tensor), mx.nd.array(im_info)] data_shapes = [('data', im_tensor.shape), ('im_info', im_info.shape)] data_batch = mx.io.DataBatch(data=data, label=None, provide_data=data_shapes, provide_label=None) output = self.predict(data_batch) rois = output['rois_output'].asnumpy()[:, 1:] # save output scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] # post processing pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_tensor.shape[-2:]) # we used scaled image & roi to train, so it is necessary to transform them back pred_boxes = pred_boxes / im_scale return self._post_process(scores, pred_boxes, thres)
def im_detect(predictor, data_batch, data_names, scale): output = predictor.predict(data_batch) data_dict = dict(zip(data_names, data_batch.data)) if config.TEST.HAS_RPN: rois = output['rois_output'].asnumpy()[:, 1:] else: rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:] im_shape = data_dict['data'].shape # save output scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] # post processing pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) # we used scaled image & roi to train, so it is necessary to transform them back pred_boxes = pred_boxes / scale return scores, pred_boxes, data_dict
def _rcnn_forward(self, im, rois=None, thres=0.5): debug = False im, im_scale = resize(im, self.target_size, self.max_size, self.image_stride) im_tensor = transform(im, self.input_mean, self.input_scale) if self.proposal_type == 'rpn': im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) if len(self.feat_sym) == 0: data_shapes = [('data', im_tensor.shape), ('im_info', im_info.shape)] data = [mx.nd.array(im_tensor), mx.nd.array(im_info)] else: data_shape = {'data': im_tensor.shape} feat_shapes = [] for feat in self.feat_sym: _, feat_shape, _ = feat.infer_shape(**data_shape)#get size of feature map for rpn, there are 4 feat_sym in fpn feat_shape = [int(i) for i in feat_shape[0]] feat_shapes.append(feat_shape) feat_shape_ = np.array(feat_shapes[0]) #print len(feat_shapes) for i in range(1, len(feat_shapes)): a = np.array(feat_shapes[i]) feat_shape_=np.vstack((feat_shape_, a))#[5,4] #print feat_shape_.shape final_feat_shape = feat_shape_[np.newaxis,:,:]#[1,5,4] data_shapes = [('data', im_tensor.shape), ('im_info', im_info.shape), ('feat_shape', final_feat_shape.shape)] data = [mx.nd.array(im_tensor), mx.nd.array(im_info), mx.nd.array(final_feat_shape)] elif self.proposal_type == 'existed_roi': assert rois is not None rois = rois.reshape(1, -1, 5) data = [mx.nd.array(im_tensor), mx.nd.array(rois)] data_shapes = [('data', im_tensor.shape), ('rois', rois.shape)] data_batch = mx.io.DataBatch(data=data, label=None, provide_data=data_shapes, provide_label=None) t = time.time() output = self.predict(data_batch) if self.proposal_type == 'rpn': rois = output['rois_output'].asnumpy()[:, 1:] if debug: f2 = open('predict.txt', 'w') roid_rpn = output['rois_output'].asnumpy()#1200,5 roid_pred = roid_rpn for i in range(roid_pred.shape[0]): w = max(0, int(roid_pred[i,3]-roid_pred[i,1])) h = max(0, int(roid_pred[i,4]-roid_pred[i,2])) s = w*h print im.shape if w < 50 or h < 300 or w > im.shape[1] or h > im.shape[0] or w > h: continue cv2.rectangle(im, (int(roid_pred[i,1]), int(roid_pred[i,2])), (int(roid_pred[i,3]), int(roid_pred[i,4])), (255,0,0), 1) f2.write(str(roid_rpn[i,:])+'\n') cv2.imwrite('rpn_result.jpg', im) elif self.proposal_type == 'existed_roi': rois = rois[0][:, 1:] # save output scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] t1 = time.time() - t # post processing print 'predict :{:.4f}s'.format(t1) if 0: for i in range(rois.shape[0]): f2.write('rois: ' + str(i)+' : '+str(rois[i,:]) + '#############bbox_deltas: ' + str(bbox_deltas[i,4:]) + '#################cls_pred: '+ str(scores[i,1])+'\n') person_score = scores[:,1] max_score = max(person_score) print max_score pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_tensor.shape[-2:]) # we used scaled image & roi to train, so it is necessary to transform them back pred_boxes = pred_boxes / im_scale return self._post_process(scores, pred_boxes, thres)
def forward(self, is_train, req, in_data, out_data, aux): nms = gpu_nms_wrapper(self._threshold, 0) batch_size = in_data[0].shape[0] if batch_size > 1: raise ValueError( "Sorry, multiple images each device is not implemented") # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) pre_nms_topN = self._rpn_pre_nms_top_n post_nms_topN = self._rpn_post_nms_top_n min_size = self._rpn_min_size # the first set of anchors are background probabilities # keep the second part scores_list = in_data[0].asnumpy() #[1,n] #print 'score_list shape:',scores_list.shape bbox_deltas_list = in_data[1].asnumpy() #[1,n*2] im_info = in_data[2].asnumpy()[0, :] feat_shape = in_data[3].asnumpy() #t = time.time() #print 'feat_shape:', feat_shape num_feat = feat_shape.shape[1] #[1,5,4] score_index_start = 0 bbox_index_start = 0 keep_proposal = [] keep_scores = [] #t_1 = time.time() for i in range(num_feat): feat_stride = int(self._feat_stride[i]) #4,8,16,32,64 #print 'feat_stride:', feat_stride anchor = generate_anchors(feat_stride, scales=self._scales, ratios=self._ratios) num_anchors = anchor.shape[0] #3 height = feat_shape[0, i, 2] width = feat_shape[0, i, 3] shift_x = np.arange(0, width) * feat_stride shift_y = np.arange(0, height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() A = num_anchors #3 K = shifts.shape[0] #height*width anchors = anchor.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) #3*height*widht,4 scores = (scores_list[ 0, int(score_index_start):int(score_index_start + K * A * 2)]).reshape( (1, int(2 * num_anchors), -1, int(width))) #1,2*3,h,w scores = scores[:, num_anchors:, :, :] #1,3,h,w bbox_deltas = (bbox_deltas_list[ 0, int(bbox_index_start):int(bbox_index_start + K * A * 4)]).reshape( (1, int(4 * num_anchors), -1, int(width))) #1,4*3,h,w score_index_start += K * A * 2 bbox_index_start += K * A * 4 bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape( (-1, 4)) #[1,h,w,12]--->[1*h*w*3,4] scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape( (-1, 1)) #[1,h,w,3]--->[1*h*w*3,1] proposals = bbox_pred(anchors, bbox_deltas) #debug here, corresponding? proposals = clip_boxes(proposals, im_info[:2]) keep = self._filter_boxes(proposals, min_size[i] * im_info[2]) keep_proposal.append(proposals[keep, :]) keep_scores.append(scores[keep]) proposals = keep_proposal[0] scores = keep_scores[0] for i in range(1, num_feat): proposals = np.vstack((proposals, keep_proposal[i])) scores = np.vstack((scores, keep_scores[i])) #print 'roi concate t_1 spends :{:.4f}s'.format(time.time()-t_1) # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) #t_2 = time.time() order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] #print 'roi concate t_2_1_1 spends :{:.4f}s'.format(time.time()-t_2) # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) #t_nms = time.time() det = np.hstack((proposals, scores)).astype(np.float32) keep = nms(det) #print 'roi concate nms spends :{:.4f}s'.format(time.time()-t_nms) if post_nms_topN > 0: keep = keep[:post_nms_topN] # pad to ensure output size remains unchanged if len(keep) < post_nms_topN: try: pad = npr.choice(keep, size=post_nms_topN - len(keep)) except: proposals = np.zeros((post_nms_topN, 4), dtype=np.float32) proposals[:, 2] = 16 proposals[:, 3] = 16 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack( (batch_inds, proposals.astype(np.float32, copy=False))) self.assign(out_data[0], req[0], blob) if self._output_score: self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False)) return keep = np.hstack((keep, pad)) proposals = proposals[keep, :] scores = scores[keep] #print 'roi concate t_2 spends :{:.4f}s'.format(time.time()-t_2) # Output rois array # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) self.assign(out_data[0], req[0], blob) if self._output_score: self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False))
def forward(self, is_train, req, in_data, out_data, aux): #nms = gpu_nms_wrapper(self._threshold, in_data[0].context.device_id) nms = py_nms_wrapper(self._threshold) batch_size = in_data[0].shape[0] if batch_size > 1: raise ValueError( "Sorry, multiple images each device is not implemented") # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) pre_nms_topN = self._rpn_pre_nms_top_n post_nms_topN = self._rpn_post_nms_top_n min_size = self._rpn_min_size # the first set of anchors are background probabilities # keep the second part scores = in_data[0].asnumpy()[:, self._num_anchors:, :, :] bbox_deltas = in_data[1].asnumpy() im_info = in_data[2].asnumpy()[0, :] if DEBUG: print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) print 'scale: {}'.format(im_info[2]) # feat_shape = in_data[3].asnumpy() # # 1. Generate proposals from bbox_deltas and shifted anchors # # use real image size instead of padded feature map sizes # height = feat_shape[0,i,2] # width = feat_shape[0,i,3] height, width = int(im_info[0] / self._feat_stride), int( im_info[1] / self._feat_stride) if DEBUG: print 'score map size: {}'.format(scores.shape) print "resudial: {}".format( (scores.shape[2] - height, scores.shape[3] - width)) # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = self._anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_pred(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = self._filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) det = np.hstack((proposals, scores)).astype(np.float32) keep = nms(det) if post_nms_topN > 0: keep = keep[:post_nms_topN] # pad to ensure output size remains unchanged if len(keep) < post_nms_topN: pad = npr.choice(keep, size=post_nms_topN - len(keep)) keep = np.hstack((keep, pad)) proposals = proposals[keep, :] scores = scores[keep] # Output rois array # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) self.assign(out_data[0], req[0], blob) if self._output_score: self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False))