def proposal_layer(rpn_cls_prob,rpn_bbox_pred,im_info,_feat_stride,anchors,num_anchors,is_training): scores=rpn_cls_prob[:,:,:,num_anchors:] rpn_bbox_pred=rpn_bbox_pred.reshape((-1,4)) scores=scores.reshape((-1,1)) proposals=bbox_transform_inv(anchors,rpn_bbox_pred) proposals=clip_boxes(proposals,im_info[:2]) # pick the top region proposals: order=scores.ravel().argsort()[::-1] if is_training: order=order[:train_rpn_pre_nms_topN] else: order=order[:test_rpn_pre_nms_topN] proposals=proposals[order,:] scores=scores[order] keep=nms(np.hstack((proposals,scores)),rpn_nms_thresh) if is_training: keep=keep[:train_rpn_nms_post_topN] else: keep=keep[:test_rpn_nms_post_topN] proposals=proposals[keep,:] scores=scores[keep] #only support single image as input: batch_indx=np.zeros((proposals.shape[0],1),dtype=np.float32) blob=np.hstack((batch_indx,proposals.astype(np.float32,copy=False))) return blob,scores
def pred_det(anchors, cls_pred, regr_pred, C, step=1): if step == 1: scores = cls_pred[0, :, :] elif step == 2: scores = anchors[:, -1:] * cls_pred[0, :, :] elif step == 3: scores = anchors[:, -2:-1] * anchors[:, -1:] * cls_pred[0, :, :] A = np.copy(anchors[:, :4]) bbox_deltas = regr_pred.reshape((-1, 4)) bbox_deltas = bbox_deltas * np.array( C.classifier_regr_std).astype(dtype=np.float32) proposals = bbox_transform_inv(A, bbox_deltas) proposals = clip_boxes(proposals, [C.random_crop[0], C.random_crop[1]]) keep = filter_boxes(proposals, C.roi_stride) proposals = proposals[keep, :] scores = scores[keep] order = scores.ravel().argsort()[::-1] order = order[:C.pre_nms_topN] proposals = proposals[order, :] scores = scores[order] keep = np.where(scores > C.scorethre)[0] proposals = proposals[keep, :] scores = scores[keep] keep = nms(np.hstack((proposals, scores)), C.overlap_thresh, usegpu=False, gpu_id=0) keep = keep[:C.post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] return proposals, scores
def slpn_pred(ROIs, P_cls, P_regr, C, bbox_thresh=0.1, nms_thresh=0.3,roi_stride=8): # classifier output the box of x y w h and downscaled scores = np.squeeze(P_cls[:,:,0], axis=0) regr = np.squeeze(P_regr, axis=0) rois = np.squeeze(ROIs, axis=0) keep = np.where(scores>=bbox_thresh)[0] if len(keep)==0: return [], [] rois[:, 2] += rois[:, 0] rois[:, 3] += rois[:, 1] rois = rois[keep]*roi_stride scores = scores[keep] regr = regr[keep]*np.array(C.classifier_regr_std).astype(dtype=np.float32) # regr = regr[keep] pred_boxes = bbox_transform_inv(rois, regr) pred_boxes = clip_boxes(pred_boxes, [C.random_crop[0],C.random_crop[1]]) keep = np.where((pred_boxes[:,2]-pred_boxes[:,0]>=3)& (pred_boxes[:,3]-pred_boxes[:,1]>=3))[0] pred_boxes = pred_boxes[keep] scores = scores[keep].reshape((-1,1)) keep = nms(np.hstack((pred_boxes, scores)), nms_thresh, usegpu=False, gpu_id=0) pred_boxes = pred_boxes[keep] scores = scores[keep] return pred_boxes, scores
def _inv_transform_layer_py(rpn_bbox_pred, is_training, _feat_stride, anchor_scales, indices): _anchors = generate_anchor.generate_anchors( scales=np.array(anchor_scales)) # #_anchors ( 9, 4 ) _num_anchors = _anchors.shape[0] #9 shape = np.shape(rpn_bbox_pred) rpn_bbox_pred = np.transpose(rpn_bbox_pred, [0, 3, 1, 2]) # 1, 36 , h , w rpn_bbox_pred = np.reshape( rpn_bbox_pred, [shape[0], 4, shape[3] // 4 * shape[1], shape[2]]) # 1, 4 , h * 9 , w rpn_bbox_pred = np.transpose(rpn_bbox_pred, (0, 2, 3, 1)) # 1, h * 9 , w , 4 bbox_deltas = rpn_bbox_pred bbox_deltas = bbox_deltas.reshape((-1, 4)) if is_training == 'TRAIN': pre_nms_topN = cfg.TRAIN.RPN_PRE_NMS_TOP_N #12000 post_nms_topN = cfg.TRAIN.RPN_POST_NMS_TOP_N # 2000 nms_thresh = cfg.TRAIN.RPN_NMS_THRESH #0.7 min_size = cfg.TRAIN.RPN_MIN_SIZE # 16 else: # cfg_key == 'TEST': pre_nms_topN = cfg.TEST.RPN_PRE_NMS_TOP_N post_nms_topN = cfg.TEST.RPN_POST_NMS_TOP_N nms_thresh = cfg.TEST.RPN_NMS_THRESH min_size = cfg.TEST.RPN_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs # 1. Generate proposals from bbox deltas and shifted anchors height, width = shape[1], shape[2] # Enumerate all shifts shift_x = np.arange(0, width) * _feat_stride shift_y = np.arange(0, height) * _feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors K = shifts.shape[0] #anchors = _anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = np.array([]) for i in range(len(_anchors)): if i == 0: anchors = np.add(shifts, _anchors[i]) else: anchors = np.concatenate((anchors, np.add(shifts, _anchors[i])), axis=0) anchors = anchors.reshape((K * A, 4)) # anchors ,bbox_deltas , scores 모두 같은 shape 여야 한다 proposals = bbox_transform_inv(anchors, bbox_deltas) #proposals = clip_boxes(proposals, im_dims) # image size 보다 큰 proposals 들이 줄어 들수 있도록 한다. target_proposals = proposals[indices] return proposals, target_proposals
def pred_pp_2nd(anchors, cls_pred, regr_pred, C): scores = cls_pred[0, :, :] bbox_deltas = regr_pred.reshape((-1, 4)) bbox_deltas = bbox_deltas * np.array(C.classifier_regr_std).astype(dtype=np.float32) anchors[:, :4] = bbox_transform_inv(anchors[:, :4], bbox_deltas) anchors[:, :4] = clip_boxes(anchors[:, :4], [C.random_crop[0], C.random_crop[1]]) proposals = np.concatenate((anchors, scores), axis=-1) return proposals
def forward(self, img_path, i): im = cv2.imread(img_path) input_size = 500 imageBuffer = np.zeros([input_size, input_size, 3]) crop_y1 = random.randint(0, max(0, im.shape[0] - input_size)) crop_x1 = random.randint(0, max(0, im.shape[1] - input_size)) crop_y2 = min(im.shape[0] - 1, crop_y1 + input_size - 1) crop_x2 = min(im.shape[1] - 1, crop_x1 + input_size - 1) crop_h = crop_y2 - crop_y1 + 1 crop_w = crop_x2 - crop_x1 + 1 paste_y1 = random.randint(0, input_size - crop_h) paste_x1 = random.randint(0, input_size - crop_w) paste_y2 = paste_y1 + crop_h - 1 paste_x2 = paste_x1 + crop_w - 1 imageBuffer[paste_y1:paste_y2 + 1, paste_x1:paste_x2 + 1, :] = im[crop_y1:crop_y2 + 1, crop_x1:crop_x2 + 1, :] cv2.imwrite('input.jpg', imageBuffer) blob = imageBuffer[:, :, ::-1].transpose(2, 0, 1) blob = mx.nd.array(blob[np.newaxis, :, :, :]) blob.copyto(self.exec_.arg_dict['data']) self.exec_.forward(is_train=False) outputs = [output.asnumpy() for output in self.exec_._get_outputs()] cls_map = outputs[0] reg_map = outputs[1] bbox_deltas = reg_map.transpose((0, 2, 3, 1)).reshape((-1, 4)) scores = cls_map[0, 1:2, :, :].reshape( (1, 25, 63, 63)) # (1,1,1575,63) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) proposals = bbox_transform_inv(self.anchors, bbox_deltas) #proposals = self.anchors #draw_boxes(imageBuffer, proposals[:100], 'res1') order = scores.ravel().argsort()[::-1] order = order[:6000] scores = scores[order] proposals = proposals[order, :] keep = nms(np.hstack((proposals, scores)), 0.05) keep = keep[:300] proposals = proposals[keep, :] scores = scores[keep] keep = np.where(scores > 0.4)[0] proposals = proposals[keep, :] scores = scores[keep] draw_boxes(imageBuffer, proposals, 'res_{}'.format(i))
def proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, im_info, _feat_stride, anchors, num_anchors): """A layer that just selects the top region proposals without using non-maximal suppression, For details please see the technical report self._im_info, # self._im_info = tf.placeholder(tf.float32, shape=[3]) self._feat_stride,#16 self._anchors, # 特征图的所有点的9个框对应原始坐标的 所有 坐标anchors anchor_length和个数length self._num_anchors#9 [tf.float32, tf.float32], name="proposal_top" """ rpn_top_n = cfg.TEST.RPN_TOP_N # cfg.TEST.RPN_TOP_N = 5000 #num_anchors 9 scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) scores = scores.reshape((-1, 1)) length = scores.shape[0] if length < rpn_top_n: # 5000 # Random selection, maybe unnecessary and loses good proposals # But such case rarely happens #choice() 方法返回一个列表,元组或字符串的随机项 top_inds = npr.choice(length, size=rpn_top_n, replace=True) # npr random else: top_inds = scores.argsort(0)[::-1] #argsort函数返回的是数组值从小到大的索引值 top_inds = top_inds[:rpn_top_n] #取5000个 top_inds = top_inds.reshape(rpn_top_n, ) # Do the selection here anchors = anchors[top_inds, :] #特征图映射到原图的所有框 top_inds 是5000个值 :是四个坐标值 rpn_bbox_pred = rpn_bbox_pred[top_inds, :] scores = scores[top_inds] # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, rpn_bbox_pred) #输入的是5000个特征图上映射到原图的框坐标 #输入的是5000个特征图上的框坐标 # Clip predicted boxes to image#限定范围 proposals = clip_boxes(proposals, im_info[:2]) # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores
def predict(self, img): session = tf.get_default_session() feed_dict = {self.input_img: img} rois, cls_prob, bbox_pred = session.run( [self.rois, self.cls_prob, self.bbox_pred], feed_dict=feed_dict) pred_class = np.argmax(cls_prob, axis=-1) boxes = rois[:, 1:5] bbox_pred = np.reshape(bbox_pred, [bbox_pred.shape[0], -1]) pred_boxes = bbox_transform_inv(boxes, bbox_pred) pred_boxes = _clip_boxes(pred_boxes, img.shape[1:-1]) pred_boxes = one_hot_box_transform(pred_class, pred_boxes) return pred_class, pred_boxes.astype(np.int)
def im_detect(net, im, boxes): """Detect object classes in an image given object proposals. Arguments: net (caffe.Net): Fast R-CNN network to use im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals Returns: scores (ndarray): R x K array of object class scores (K includes background as object category 0) boxes (ndarray): R x (4*K) array of predicted bounding boxes """ blobs, im_scales = _get_blobs(im, boxes) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. if cfg.DEDUP_BOXES > 0: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) blobs['rois'] = blobs['rois'][index, :] boxes = boxes[index, :] # do forward forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)} forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False) blobs_out = net.forward(**forward_kwargs) # use softmax estimated probabilities scores = blobs_out['cls_prob'] print scores if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = blobs_out['bbox_pred'] pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, im.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) if cfg.DEDUP_BOXES > 0: # Map scores and predictions back to the original set of boxes scores = scores[inv_index, :] pred_boxes = pred_boxes[inv_index, :] return scores, pred_boxes
def generate_pp_2nd(all_anchors, regr_layer, C): A = np.copy(all_anchors[:, :4]) proposals_batch = [] for i in range(regr_layer.shape[0]): proposals = np.ones_like(all_anchors) bbox_deltas = regr_layer[i, :, :] bbox_deltas = bbox_deltas * np.array(C.classifier_regr_std).astype(dtype=np.float32) proposals[:, :4] = bbox_transform_inv(A, bbox_deltas) proposals = clip_boxes(proposals, [C.random_crop[0], C.random_crop[1]]) proposals_batch.append(np.expand_dims(proposals, axis=0)) return np.concatenate(proposals_batch, axis=0)
def boxes_assemble_filter(all_pred_class_val, all_pred_box_val, all_xyz, all_gt_box , thresh = 0.05): #all_pred_boxes = np.zeros([1,8]) #l, w, h, theta, x, y, z, score all_pred_boxes = [] # saved in list num_batch = len(all_pred_class_val) batch_size = all_pred_class_val[0].shape[0] gt_box_ = [] num_anchors = cfg.TRAIN.NUM_ANCHORS num_class = cfg.TRAIN.NUM_CLASSES num_regression = cfg.TRAIN.NUM_REGRESSION # generate, (num_samples x num_point) x 8 for i in range(num_batch): for j in range(batch_size): index = i*batch_size + j temp_pred_class = np.array([all_pred_class_val[i][j,:,(x*num_class+1):((x+1)*num_class)] for x in range(num_anchors)]).transpose(1, 0, 2) ##shape: 512 x num_anchors x 1 temp_pred_class = temp_pred_class.reshape(-1, 1) # shape: n x 1 ''' # l, w, h, alpha, x, y ,z temp_pred_box_l = np.array([ np.exp(all_pred_box_val[i][j,:,(x*num_regression)])*anchor_length for x in range(num_anchors)]) temp_pred_box_l = temp_pred_box_l.reshape(-1,1) temp_pred_box_w = np.array([ np. exp(all_pred_box_val[i][j,:,(x*num_regression+1)])*anchor_width for x in range(num_anchors)]) temp_pred_box_w = temp_pred_box_w.reshape(-1,1) temp_pred_box_h = np.array([ np.exp(all_pred_box_val[i][j,:,(x*num_regression+2)])*anchor_height for x in range(num_anchors)]) temp_pred_box_h = temp_pred_box_h.reshape(-1,1) temp_pred_box_alpha = np.array([ all_pred_box_val[i][j,:,(x*num_regression+3)]*np.pi/4+anchor_alpha[x,0] for x in range(num_anchors)]) temp_pred_box_alpha = temp_pred_box_alpha.reshape(-1,1) temp_pred_box_x = np.array([ all_pred_box_val[i][j,:,(x*num_regression+4)]*anchor_length + all_xyz[i][j,:,0] for x in range(num_anchors) ]) temp_pred_box_x = temp_pred_box_x.reshape(-1,1) temp_pred_box_y = np.array([ all_pred_box_val[i][j,:,(x*num_regression+5)]*anchor_width + all_xyz[i][j,:,1] for x in range(num_anchors) ]) temp_pred_box_y = temp_pred_box_y.reshape(-1,1) temp_pred_box_z = np.array([ all_pred_box_val[i][j,:,(x*num_regression+6)]*anchor_height + all_xyz[i][j,:,3] for x in range(num_anchors) ]) temp_pred_box_z = temp_pred_box_z.reshape(-1,1) ''' # temp_pred_box = np.array([all_pred_box_val[i][j,:,(x*num_regression):((x+1)*num_regression)] for x in range(num_anchors)]).transpose(1,0,2) ## shape: 512 x num_anchors x 7 # temp_pred_box = temp_pred_box.reshape(-1, num_regression) # shape: n x 7 ## transform the prediction into real num temp_all_box = bbox_transform_inv(all_pred_box_val[i][j,:,:], all_xyz[i][j,:,:]) #temp_index = np.full((temp_pred_class.shape[0],1), index) # shape: n x 1 # temp_all_ = np.concatenate((temp_index, temp_pred_box_l, temp_pred_box_w, temp_pred_box_h, temp_pred_box_alpha, temp_pred_box_x, temp_pred_box_y, temp_pred_box_z, temp_pred_class),axis=1) # shape: n x 9 temp_all_ = np.concatenate(( temp_all_box,temp_pred_class), axis=1) ## getting box whose confidence is over thresh temp_all_ = temp_all_[ np.where( temp_all_[:,7] >= thresh)[0], :] ## temp_all_ shape: n x 8 ## useing nms if temp_all_.shape[0] > 0: ## there is no prediction box whose prediction is over thresh temp_all_ = nms_3d(temp_all_, cfg.TEST.NMS) all_pred_boxes.append(temp_all_) gt_box_.append(all_gt_box[i][j]) # all_pred_boxes = np.delete(all_pred_boxes, 0, 0) # all_pred_boxes = all_pred_boxes[ np.where( all_pred_boxes[:,8] >= thresh)[0], :] return all_pred_boxes, gt_box_
def __call__(self, bbox_deltas, scores, anchors, im_size, scale=1.): xp = cuda.get_array_module(bbox_deltas) bbox_deltas = cuda.to_cpu(bbox_deltas) scores = cuda.to_cpu(scores) anchors = cuda.to_cpu(anchors) height, width = im_size[0], im_size[1] # bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) proposals = bbox_transform_inv(anchors, bbox_deltas) # proposals = clip_boxes(proposals, im_size) proposals[:, slice(0, 4, 2)] = np.clip(proposals[:, slice(0, 4, 2)], 0, im_size[0]) proposals[:, slice(1, 4, 2)] = np.clip(proposals[:, slice(1, 4, 2)], 0, im_size[1]) # Remove predicted boxes with either height or width < threshold keep = _filter_boxes(proposals, self.min_size * scale) proposals = proposals[keep, :] scores = scores[keep] # Sort (proposal, scores) by score from highest to lowest # Take top pre_nms_topN order = scores.ravel().argsort()[::-1] if self.pre_nms_topN > 0: order = order[:self.pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # Apply NMS # Take after_nms_topN # Return the top proposals if xp != np and not self.force_cpu_nms: keep = non_maximum_suppression(cuda.to_gpu(proposals), thresh=self.nms_thresh) keep = cuda.to_cpu(keep) else: keep = non_maximum_suppression(proposals, thresh=self.nms_thresh) if self.post_nms_topN > 0: keep = keep[:self.post_nms_topN] proposals = proposals[keep] # Output ROIs blob # Batch_size = 1 so all batch_inds are 0 if xp != np: proposals = cuda.to_gpu(proposals) return proposals
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): """A simplified version compared to fast/er RCNN For details please see the technical report """ if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N # RPN_PRE_NMS_TOP_N = 6000 post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N # __C.TEST.RPN_POST_NMS_TOP_N = 300 非极大值抑制输出的 最大个数 nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # __C.TEST.RPN_NMS_THRESH = 0.7 # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) scores = scores.reshape((-1, 1)) proposals = bbox_transform_inv(anchors, rpn_bbox_pred) # shape = (length, 4) # proposals 就是真实预测的边框的四个坐标值 # 特征图映射到原图的所有的框anchors 与特征图的值rpn_bbox_pred 组合 进行回归预测 proposals = clip_boxes(proposals, im_info[:2]) # 限制预测坐标在原始图像上 限制这预测 的坐标的 值 在一定的范围内 # Pick the top region proposals order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # Non-maximal suppression keep = nms(np.hstack((proposals, scores)), nms_thresh) # Pick th top region proposals after NMS if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Only support single image as input batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores
def get_proposal(all_anchors, cls_layer, regr_layer, C, overlap_thresh=0.7,pre_nms_topN=1000,post_nms_topN=300, roi_stride=8): A = np.copy(all_anchors[:,:4]) scores = cls_layer.reshape((-1,1)) bbox_deltas = regr_layer.reshape((-1,4)) proposals = bbox_transform_inv(A, bbox_deltas) proposals = clip_boxes(proposals, [C.random_crop[0],C.random_crop[1]]) keep = filter_boxes(proposals, roi_stride) proposals = proposals[keep,:] scores = scores[keep] order = scores.ravel().argsort()[::-1] order = order[:pre_nms_topN] proposals = proposals[order,:] scores = scores[order] keep = nms(np.hstack((proposals, scores)), overlap_thresh, usegpu=False, gpu_id=0) keep = keep[:post_nms_topN] proposals = proposals[keep,:] return proposals
def forward(self, bbox_deltas): batch_size = bbox_deltas.size(0) feature_len = bbox_deltas.size(1) / self.num_anchors_per_frame # First dimension is batchsize, the second dimension is length of # the number of frames anchors_per_utt = self.anchor_generator.get_anchors_per_utt( feature_len) # anchors for a batch of utterance anchors = anchors_per_utt.view( 1, self.num_anchors_per_frame * feature_len, 2).expand(batch_size, self.num_anchors_per_frame * feature_len, 2) bbox_deltas.reshape(batch_size, self.num_anchors_per_frame * feature_len, 2) proposals = bbox_transform_inv(anchors, bbox_deltas) anchors_per_utt = anchors_per_utt.view( self.num_anchors_per_frame * feature_len, 2) return anchors_per_utt, proposals
def proposal_layer(self, rpn_cls_prob, rpn_bbox_pred, rpn_trans_param, im_info): if self.is_train: pre_nms_top_n = self.config['train_rpn_pre_nms_top_n'] post_nms_top_n = self.config['train_rpn_post_nms_top_n'] nms_thresh = self.config['train_rpn_nms_thresh'] else: pre_nms_top_n = self.config['test_rpn_pre_nms_top_n'] post_nms_top_n = self.config['test_rpn_post_nms_top_n'] nms_thresh = self.config['test_rpn_nms_thresh'] # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, self.num_anchors:] rpn_bbox_pred = rpn_bbox_pred.view((-1, 4)) scores = scores.contiguous().view(-1, 1) rpn_trans_param = rpn_trans_param.view((-1, 6)) proposals = bbox_transform_inv(self.anchors, rpn_bbox_pred) proposals = clip_boxes(proposals, im_info[:2]) # Pick the top region proposals scores, order = scores.view(-1).sort(descending=True) if pre_nms_top_n > 0: order = order[:pre_nms_top_n] scores = scores[:pre_nms_top_n].view(-1, 1) proposals = proposals[order.data, :] trans_param = rpn_trans_param[order.data, :] # Non-maximal suppression keep = nms(torch.cat((proposals, scores), 1).data, nms_thresh) # Pick th top region proposals after NMS if post_nms_top_n > 0: keep = keep[:post_nms_top_n] proposals = proposals[keep, :] scores = scores[keep,] trans_param = trans_param[keep, :] # Only support single image as input batch_inds = Variable( proposals.data.new(proposals.size(0), 1).zero_()) blob = torch.cat((batch_inds, proposals), 1) return blob, scores, trans_param
def repulsion(rois, box_deltas, gt_rois, rois_inside_ws, rois_outside_ws): deltas = Variable(box_deltas.view(rois.shape[0], 256, 4)) rois_inside_ws = Variable(rois_inside_ws.view(rois.shape[0], 256, 4)) rois_outside_ws = Variable(rois_outside_ws.view(rois.shape[0], 256, 4)) if int(torch.sum(rois_outside_ws == rois_inside_ws)) != 1024: import pdb pdb.set_trace() for i in range(rois.shape[0]): deltas[i] = deltas[i].view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda()+ \ torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() pred_boxes = bbox_transform_inv(rois[:, :, 1:5], deltas, 2) loss_repgt = repgt(pred_boxes, gt_rois, rois_inside_ws) loss_repbox = repbox(pred_boxes, gt_rois, rois_inside_ws) return loss_repgt, loss_repbox
def test_single(self, image, rois, image_size, image_resize_ratio): """ Test a single image on the net. Args: image: A preprocessed image or precomputed features of the image. As ndarray. rois: Rois sized for the the image. Ndarray: (image_index, x1, y1, x2, y2) image_size: The original image size. image_resize_ratio: What is the ratio that this image was resized on. """ rois_np, dedup_inv_index = self.dedup_boxes(rois.numpy()) image_var = Variable(image.cuda(), volatile=True) rois_var = Variable(torch.Tensor(rois_np).cuda(), volatile=True) # Run the img through the network out = self.model(image_var, rois_var) # predicted deltas deltas = out[1].data.cpu().numpy() deltas = self.unnormalize_deltas(deltas, self._targets_mean, self._targets_std) # transform rois using predicted deltas boxes = rois_np[:, 1:] / image_resize_ratio bboxes_inv_transformed = bbox_transform_inv(boxes, deltas) class_probas, class_indexes = torch.max(out[0], 1) indexes_np = np.squeeze(class_indexes.data.cpu().numpy()) # print('Total FG RoIs Detected: ', np.sum(indexes_np > 0)) scores = out[0].data.cpu().numpy() scores = np.exp(scores) # clip rois to image size bboxes_inv_transformed = clip_boxes(bboxes_inv_transformed, image_size) scores = scores[dedup_inv_index, :] bboxes_inv_transformed = bboxes_inv_transformed[dedup_inv_index, :] # Non-maximum supression of similar boxes all_boxes = self._nms_boxes(bboxes_inv_transformed, scores) return all_boxes
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, _feat_stride, anchors, num_anchors, mode='train'): """A simplified version compared to fast/er RCNN For details please see the technical report """ pre_nms_topN = 12000 post_nms_topN = 2000 nms_thresh = 0.7 if mode == 'test': pre_nms_topN = 3000 post_nms_topN = 300 # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) scores = scores.reshape((-1, 1)) proposals = bbox_transform_inv(anchors, rpn_bbox_pred) proposals = clip_boxes(proposals, im_info[:2]) # Pick the top region proposals order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # Non-maximal suppression keep = nms(np.hstack((proposals, scores)), nms_thresh) # Pick th top region proposals after NMS if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Only support single image as input batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores
def im_detect(sess, net, im): blobs, im_scales = _get_blobs(im) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs['data'] blobs['im_info'] = np.array([im_blob.shape[1], im_blob.shape[2], im_scales[0]], dtype=np.float32) _, scores, bbox_pred, rois = net.test_image(sess, blobs['data'], blobs['im_info']) boxes = rois[:, 1:5] / im_scales[0] scores = np.reshape(scores, [scores.shape[0], -1]) bbox_pred = np.reshape(bbox_pred, [bbox_pred.shape[0], -1]) if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = _clip_boxes(pred_boxes, im.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) return scores, pred_boxes
def proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, im_info, _feat_stride, anchors, num_anchors): """A layer that just selects the top region proposals without using non-maximal suppression, For details please see the technical report """ rpn_top_n = cfg.TEST.RPN_TOP_N scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) scores = scores.reshape((-1, 1)) length = scores.shape[0] if length < rpn_top_n: # Random selection, maybe unnecessary and loses good proposals # But such case rarely happens top_inds = npr.choice(length, size=rpn_top_n, replace=True) else: top_inds = scores.argsort(0)[::-1] top_inds = top_inds[:rpn_top_n] top_inds = top_inds.reshape(rpn_top_n, ) # Do the selection here anchors = anchors[top_inds, :] rpn_bbox_pred = rpn_bbox_pred[top_inds, :] scores = scores[top_inds] # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, rpn_bbox_pred) # Clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores
def forward(self, input): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) # the first set of _num_anchors channels are bg probs # the second set are the fg probs scores = input[0][:, self._num_anchors:, :, :] bbox_deltas = input[1] im_info = input[2] cfg_key = input[3] pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE batch_size = bbox_deltas.size(0) feat_height, feat_width = scores.size(2), scores.size(3) shift_x = np.arange(0, feat_width) * self._feat_stride shift_y = np.arange(0, feat_height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = torch.from_numpy( np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()) shifts = shifts.contiguous().type_as(scores).float() A = self._num_anchors K = shifts.size(0) self._anchors = self._anchors.type_as(scores) # anchors = self._anchors.view(1, A, 4) + shifts.view(1, K, 4).permute(1, 0, 2).contiguous() anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4) anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous() bbox_deltas = bbox_deltas.view(batch_size, -1, 4) # Same story for the scores: scores = scores.permute(0, 2, 3, 1).contiguous() scores = scores.view(batch_size, -1) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info, batch_size) # proposals = clip_boxes_batch(proposals, im_info, batch_size) # assign the score to 0 if it's non keep. # keep = self._filter_boxes(proposals, min_size * im_info[:, 2]) # trim keep index to make it euqal over batch # keep_idx = torch.cat(tuple(keep_idx), 0) # scores_keep = scores.view(-1)[keep_idx].view(batch_size, trim_size) # proposals_keep = proposals.view(-1, 4)[keep_idx, :].contiguous().view(batch_size, trim_size, 4) # _, order = torch.sort(scores_keep, 1, True) scores_keep = scores proposals_keep = proposals _, order = torch.sort(scores_keep, 1, True) output = scores.new(batch_size, post_nms_topN, 5).zero_() for i in range(batch_size): # # 3. remove predicted boxes with either height or width < threshold # # (NOTE: convert min_size to input image scale stored in im_info[2]) proposals_single = proposals_keep[i] scores_single = scores_keep[i] # # 4. sort all (proposal, score) pairs by score from highest to lowest # # 5. take top pre_nms_topN (e.g. 6000) order_single = order[i] if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel(): order_single = order_single[:pre_nms_topN] proposals_single = proposals_single[order_single, :] scores_single = scores_single[order_single].view(-1, 1) # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1), nms_thresh) keep_idx_i = keep_idx_i.long().view(-1) if post_nms_topN > 0: keep_idx_i = keep_idx_i[:post_nms_topN] proposals_single = proposals_single[keep_idx_i, :] scores_single = scores_single[keep_idx_i, :] # padding 0 at the end. num_proposal = proposals_single.size(0) output[i, :, 0] = i output[i, :num_proposal, 1:] = proposals_single return output
def forward(self, input): # Algorithm: # # for each (L, H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) # the first set of _num_anchors channels are bg probs # the second set are the fg probs scores = input[0][:, :, 1] # batch_size x num_rois x 1 bbox_deltas = input[1] # batch_size x num_rois x 6 im_info = input[2] cfg_key = input[3] feat_shapes = input[4] pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE batch_size = bbox_deltas.size(0) anchors = torch.from_numpy(generate_anchors_all_pyramids(self._fpn_scales, self._anchor_ratios, l_ratios, feat_shapes, self._fpn_feature_strides, self._fpn_anchor_stride)).type_as(scores) num_anchors = anchors.size(0) anchors = anchors.view(1, num_anchors, 6).expand(batch_size, num_anchors, 6) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info, batch_size) # keep_idx = self._filter_boxes(proposals, min_size).squeeze().long().nonzero().squeeze() scores_keep = scores proposals_keep = proposals _, order = torch.sort(scores_keep, 1, True) output = scores.new(batch_size, post_nms_topN, 7).zero_() for i in range(batch_size): # # 3. remove predicted boxes with either height or width < threshold # # (NOTE: convert min_size to input image scale stored in im_info[2]) proposals_single = proposals_keep[i] scores_single = scores_keep[i] # # 4. sort all (proposal, score) pairs by score from highest to lowest # # 5. take top pre_nms_topN (e.g. 6000) order_single = order[i] if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel(): order_single = order_single[:pre_nms_topN] proposals_single = proposals_single[order_single, :] scores_single = scores_single[order_single].view(-1,1) # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1), nms_thresh) keep_idx_i = keep_idx_i.long().view(-1) if post_nms_topN > 0: keep_idx_i = keep_idx_i[:post_nms_topN] proposals_single = proposals_single[keep_idx_i, :] scores_single = scores_single[keep_idx_i, :] # padding 0 at the end. num_proposal = proposals_single.size(0) output[i,:,0] = i output[i,:num_proposal,1:] = proposals_single return output
def im_detect(net, im, boxes=None): """Detect object classes in an image given object proposals. Arguments: net (caffe.Net): Fast R-CNN network to use im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals or None (for RPN) Returns: scores (ndarray): R x K array of object class scores (K includes background as object category 0) boxes (ndarray): R x (4*K) array of predicted bounding boxes """ blobs, im_scales = _get_blobs(im, boxes) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) blobs['rois'] = blobs['rois'][index, :] boxes = boxes[index, :] if cfg.TEST.HAS_RPN: im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]], dtype=np.float32) # reshape network inputs net.blobs['data'].reshape(*(blobs['data'].shape)) if cfg.TEST.HAS_RPN: net.blobs['im_info'].reshape(*(blobs['im_info'].shape)) else: net.blobs['rois'].reshape(*(blobs['rois'].shape)) # do forward forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)} if cfg.TEST.HAS_RPN: forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32, copy=False) else: forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False) blobs_out = net.forward(**forward_kwargs) if cfg.TEST.HAS_RPN: assert len(im_scales) == 1, "Only single-image batch implemented" rois = net.blobs['rois'].data.copy() # unscale back to raw image space boxes = rois[:, 1:5] / im_scales[0] if cfg.TEST.SVM: # use the raw scores before softmax under the assumption they # were trained as linear SVMs # scores = net.blobs['cls_score'].data ### CHANGED scores = net.blobs['cls_score_box'].data else: # use softmax estimated probabilities scores = blobs_out['cls_prob'] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas # box_deltas = blobs_out['bbox_pred'] ### CHANGED box_deltas = blobs_out['bbox_pred_box'] pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, im.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: # Map scores and predictions back to the original set of boxes scores = scores[inv_index, :] pred_boxes = pred_boxes[inv_index, :] return scores, pred_boxes
def _proposal_layer_py(rpn_bbox_cls_prob, rpn_bbox_pred, im_dims, cfg_key, _feat_stride, anchor_scales): ''' # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) # rpn_bbox_cls_prob shape : 1 , h , w , 2*9 # rpn_bbox_pred shape : 1 , h , w , 4*9 ''' _anchors = generate_anchor.generate_anchors(scales=np.array(anchor_scales)) # #_anchors ( 9, 4 ) _num_anchors = _anchors.shape[0] #9 rpn_bbox_cls_prob = np.transpose(rpn_bbox_cls_prob, [0, 3, 1, 2]) # rpn bbox _cls prob # 1, 18 , h , w rpn_bbox_pred = np.transpose(rpn_bbox_pred, [0, 3, 1, 2]) # 1, 36 , h , w # Only minibatch of 1 supported assert rpn_bbox_cls_prob.shape[0] == 1, \ 'Only single item batches are supported' if cfg_key: pre_nms_topN = cfg.TRAIN.RPN_PRE_NMS_TOP_N #12000 post_nms_topN = cfg.TRAIN.RPN_POST_NMS_TOP_N # 2000 nms_thresh = cfg.TRAIN.RPN_NMS_THRESH #0.1 min_size = cfg.TRAIN.RPN_MIN_SIZE # 16 else: # cfg_key == 'TEST': pre_nms_topN = cfg.TEST.RPN_PRE_NMS_TOP_N post_nms_topN = cfg.TEST.RPN_POST_NMS_TOP_N nms_thresh = cfg.TEST.RPN_NMS_THRESH min_size = cfg.TEST.RPN_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs # 1. Generate proposals from bbox deltas and shifted anchors n, ch , height, width = rpn_bbox_cls_prob.shape ## rpn bbox _cls prob # 1, 18 , h , w scores = rpn_bbox_cls_prob.reshape([1,2, ch//2 * height ,width]) scores = scores.transpose([0,2,3,1]) scores = scores.reshape([-1,2]) scores = scores[:,1] scores =scores.reshape([-1,1]) scores_ori = scores # Enumerate all shifts shift_x = np.arange(0, width) * _feat_stride shift_y = np.arange(0, height) * _feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors K = shifts.shape[0] #anchors = _anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = np.array([]) for i in range(len(_anchors)): if i == 0: anchors = np.add(shifts, _anchors[i]) else: anchors = np.concatenate((anchors, np.add(shifts, _anchors[i])), axis=0) anchors = anchors.reshape((K * A, 4)) ## BBOX TRANSPOSE (1,4*A,H,W --> A*H*W,4) shape = rpn_bbox_pred.shape # 1,4*A , H, W rpn_bbox_pred=rpn_bbox_pred.reshape([1, 4 , (shape[1]//4)*shape[2] , shape[3] ]) rpn_bbox_pred=rpn_bbox_pred.transpose([0,2,3,1]) rpn_bbox_pred = rpn_bbox_pred.reshape([-1,4]) bbox_deltas=rpn_bbox_pred ## CLS TRANSPOSE ## ## BBOX TRANSPOSE Using Anchor proposals = bbox_transform_inv(anchors, bbox_deltas) proposals_ori = proposals proposals = clip_boxes(proposals, im_dims) # image size 보다 큰 proposals 들이 줄어 들수 있도록 한다. keep = _filter_boxes(proposals, min_size) # min size = 16 # min보다 큰 놈들만 살아남았다 proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) #print 'scores : ',np.shape(scores) #421 ,13 <--여기 13이 자꾸 바귄다.. order = scores.ravel().argsort()[::-1] # 크기 순서를 뒤집는다 가장 큰 값이 먼저 오게 한다 if pre_nms_topN > 0: #120000 order = order[:pre_nms_topN] #print np.sum([scores>0.7]) scores = scores[order] proposals = proposals[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) #print np.shape(np.hstack ((proposals , scores))) # --> [x_start , y_start ,x_end, y_end , score ] 이런 형태로 만든다 # proposals ndim and scores ndim must be same """ NMS keep =non_maximum_supression(dets =np.hstack((proposals, scores)) , thresh = 0.3) keep = nms(np.hstack((proposals, scores)), nms_thresh) # nms_thresh = 0.7 | hstack --> axis =1 #keep = non_maximum_supression(proposals , nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] """ # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) # N , 5 return blob , scores , proposals_ori , scores_ori
def _inv_transform_layer_fastrcnn_py(rois , fast_rcnn_bbox): if np.ndim(fast_rcnn_bbox) ==4 or np.ndim(fast_rcnn_bbox) ==3 : fast_rcnn_bbox = np.reshape(fast_rcnn_bbox , np.shape(fast_rcnn_bbox)[-2:]) assert np.ndim(fast_rcnn_bbox) == 2 proposals = bbox_transform_inv(rois , fast_rcnn_bbox) return proposals
def test_gallery(net, dataset, use_cuda, output_dir, thresh=0.): """test gallery images""" with open('config.yml', 'r') as f: config = yaml.load(f) num_images = len(dataset) all_boxes = [0 for _ in range(num_images)] all_features = [0 for _ in range(num_images)] start = time.time() for i in range(num_images): im, im_info, orig_shape = dataset.next() im = im.transpose([0, 3, 1, 2]) with torch.no_grad(): if use_cuda: im = Variable(torch.from_numpy(im).cuda()) else: im = Variable(torch.from_numpy(im)) scores, bbox_pred, rois, features = net.forward(im, None, im_info) boxes = rois[:, 1:5] / im_info[2] scores = np.reshape(scores, [scores.shape[0], -1]) bbox_pred = np.reshape(bbox_pred, [bbox_pred.shape[0], -1]) if config['test_bbox_reg']: # Apply bounding-box regression deltas box_deltas = bbox_pred pred_boxes = bbox_transform_inv( torch.from_numpy(boxes), torch.from_numpy(box_deltas)).numpy() pred_boxes = clip_boxes(pred_boxes, orig_shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) boxes = pred_boxes # skip j = 0, because it's the background class j = 1 inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j * 4:(j + 1) * 4] cls_dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32, copy=False) keep = nms(torch.from_numpy(cls_dets), config['test_nms']).numpy() if cls_dets.size > 0 else [] cls_dets = cls_dets[keep, :] all_boxes[i] = cls_dets all_features[i] = features[inds][keep] end = time.time() print('im_detect: {:d}/{:d} {:.3f}s'.format(i + 1, num_images, (end - start) / (i + 1))) det_file = os.path.join(output_dir, 'gboxes.pkl') with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) feature_file = os.path.join(output_dir, 'gfeatures.pkl') with open(feature_file, 'wb') as f: pickle.dump(all_features, f, pickle.HIGHEST_PROTOCOL) return all_boxes, all_features
single_data['score'] = float(score) self.data.append(single_data) np.random.shuffle(self.data) print('Saving data to: ', self.cache_file) with open(self.cache_file, 'wb') as f: pickle.dump(self.data, f) print('Done prepare data') return if __name__ == '__main__': roidata = Roidata() data = roidata.data[0] print(data) proposalnp = np.array([data['proposal']], dtype=np.float32) gt_boxesnp = np.array([data['gt_boxes']], dtype=np.float32) # print(proposalnp, gt_boxesnp) targets = bbox_transform(proposalnp, gt_boxesnp) print(targets) # targets = targets / np.array(cfg.BBOX_NORMALIZE_STDS) targets = data['targets'] * np.array(cfg.BBOX_NORMALIZE_STDS) pred_gt = bbox_transform_inv(proposalnp, targets) print(pred_gt) # print(targets) print(len(roidata.data)) # for i in range(roidata.data_size): # img, labels = roidata.get() # print(img.shape, labels.shape)
def get_target_1st(all_anchors, regr_layer, img_data, C, roi_stride=10, igthre=0.5, posthre=0.7, negthre=0.5): A = np.copy(all_anchors[:, :4]) y_cls_batch, y_regr_batch = [], [] for i in range(regr_layer.shape[0]): gta = np.copy(img_data[i]['bboxes']) num_bboxes = len(gta) ignoreareas = img_data[i]['ignoreareas'] proposals = np.ones_like(all_anchors) bbox_deltas = regr_layer[i, :, :] bbox_deltas = bbox_deltas * np.array( C.classifier_regr_std).astype(dtype=np.float32) proposals[:, :4] = bbox_transform_inv(A, bbox_deltas) proposals = clip_boxes(proposals, [C.random_crop[0], C.random_crop[1]]) if len(ignoreareas) > 0: ignore_overlap = box_op( np.ascontiguousarray(proposals[:, :4], dtype=np.float), np.ascontiguousarray(ignoreareas, dtype=np.float)) ignore_sum = np.sum(ignore_overlap, axis=1) proposals[ignore_sum > igthre, -1] = 0 keep = filter_negboxes(proposals, roi_stride) proposals[keep, -1] = 0 valid_idxs = np.where(proposals[:, -1] == 1)[0] # initialise empty output objectives y_alf_overlap = np.zeros((all_anchors.shape[0], 1)) y_alf_negindex = np.zeros((all_anchors.shape[0], 1)) y_is_box_valid = np.zeros((all_anchors.shape[0], 1)) y_alf_regr = np.zeros((all_anchors.shape[0], 4)) valid_anchors = proposals[valid_idxs, :] valid_alf_overlap = np.zeros((valid_anchors.shape[0], 1)) valid_is_box_valid = np.zeros((valid_anchors.shape[0], 1)) valid_rpn_regr = np.zeros((valid_anchors.shape[0], 4)) if num_bboxes > 0: valid_overlap = bbox_overlaps( np.ascontiguousarray(valid_anchors, dtype=np.float), np.ascontiguousarray(gta, dtype=np.float)) # find every anchor close to which bbox argmax_overlaps = valid_overlap.argmax(axis=1) max_overlaps = valid_overlap[np.arange(len(valid_idxs)), argmax_overlaps] # find which anchor closest to every bbox gt_argmax_overlaps = valid_overlap.argmax(axis=0) gt_max_overlaps = valid_overlap[gt_argmax_overlaps, np.arange(num_bboxes)] gt_argmax_overlaps = np.where(valid_overlap == gt_max_overlaps)[0] valid_alf_overlap[gt_argmax_overlaps] = 1 valid_alf_overlap[max_overlaps >= posthre] = 1 for j in range(len(gta)): inds = valid_overlap[:, j].ravel().argsort()[-3:] valid_alf_overlap[inds] = 1 # get positives labels fg_inds = np.where(valid_alf_overlap == 1)[0] valid_is_box_valid[fg_inds] = 1 anchor_box = valid_anchors[fg_inds, :4] gt_box = gta[argmax_overlaps[fg_inds], :] # compute regression targets valid_rpn_regr[fg_inds, :] = compute_targets(anchor_box, gt_box, C.classifier_regr_std, std=True) # get negatives labels bg_inds = np.where((max_overlaps < negthre) & (valid_is_box_valid.reshape((-1)) == 0))[0] valid_is_box_valid[bg_inds] = 1 # transform to the original overlap and validbox y_alf_overlap[valid_idxs, :] = valid_alf_overlap y_is_box_valid[valid_idxs, :] = valid_is_box_valid y_alf_regr[valid_idxs, :] = valid_rpn_regr y_alf_negindex = y_is_box_valid - y_alf_overlap y_alf_cls = np.expand_dims(np.concatenate( [y_alf_overlap, y_alf_negindex], axis=1), axis=0) y_alf_regr = np.expand_dims(np.concatenate([y_alf_overlap, y_alf_regr], axis=1), axis=0) y_cls_batch.append(y_alf_cls) y_regr_batch.append(y_alf_regr) y_cls_batch = np.concatenate(np.array(y_cls_batch), axis=0) y_regr_batch = np.concatenate(np.array(y_regr_batch), axis=0) return [y_cls_batch, y_regr_batch]
def forward(self, input): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) # the first set of _num_anchors channels are bg probs # the second set are the fg probs scores = input[0][:, self._num_anchors:, :, :] bbox_deltas = input[1] im_info = input[2] cfg_key = input[3] pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE batch_size = bbox_deltas.size(0) feat_height, feat_width = scores.size(2), scores.size(3) shift_x = np.arange(0, feat_width) * self._feat_stride shift_y = np.arange(0, feat_height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = torch.from_numpy(np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()) shifts = shifts.contiguous().type_as(scores).float() A = self._num_anchors K = shifts.size(0) self._anchors = self._anchors.type_as(scores) # anchors = self._anchors.view(1, A, 4) + shifts.view(1, K, 4).permute(1, 0, 2).contiguous() anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4) anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous() bbox_deltas = bbox_deltas.view(batch_size, -1, 4) # Same story for the scores: scores = scores.permute(0, 2, 3, 1).contiguous() scores = scores.view(batch_size, -1) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info, batch_size) # proposals = clip_boxes_batch(proposals, im_info, batch_size) # assign the score to 0 if it's non keep. # keep = self._filter_boxes(proposals, min_size * im_info[:, 2]) # trim keep index to make it euqal over batch # keep_idx = torch.cat(tuple(keep_idx), 0) # scores_keep = scores.view(-1)[keep_idx].view(batch_size, trim_size) # proposals_keep = proposals.view(-1, 4)[keep_idx, :].contiguous().view(batch_size, trim_size, 4) # _, order = torch.sort(scores_keep, 1, True) scores_keep = scores proposals_keep = proposals _, order = torch.sort(scores_keep, 1, True) output = scores.new(batch_size, post_nms_topN, 5).zero_() for i in range(batch_size): # # 3. remove predicted boxes with either height or width < threshold # # (NOTE: convert min_size to input image scale stored in im_info[2]) proposals_single = proposals_keep[i] scores_single = scores_keep[i] # # 4. sort all (proposal, score) pairs by score from highest to lowest # # 5. take top pre_nms_topN (e.g. 6000) order_single = order[i] if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel(): order_single = order_single[:pre_nms_topN] proposals_single = proposals_single[order_single, :] scores_single = scores_single[order_single].view(-1,1) # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1), nms_thresh) keep_idx_i = keep_idx_i.long().view(-1) if post_nms_topN > 0: keep_idx_i = keep_idx_i[:post_nms_topN] proposals_single = proposals_single[keep_idx_i, :] scores_single = scores_single[keep_idx_i, :] # padding 0 at the end. num_proposal = proposals_single.size(0) output[i,:,0] = i output[i,:num_proposal,1:] = proposals_single return output
def eval_one_epoch(sess, ops, test_writer, epoch,eval_feed_buf_q): """ ops: dict mapping from string to tf ops """ is_training = False total_seen = 0.00001 log_string('----') num_batches = NUM_BATCH eval_logstr = '' t_batch_ls = [] all_gt_box = [] all_pred_class_val = [] all_pred_box_val = [] all_xyz = [] batch_idx = -1 # label while (batch_idx < num_batches-1) or (num_batches==None): t0 = time.time() batch_idx += 1 start_idx = batch_idx * BATCH_SIZE end_idx = (batch_idx+1) * BATCH_SIZE if eval_feed_buf_q == None: point_cloud_data, label_data, gt_box = data_provider._get_evaluation_minibatch(start_idx, end_idx) #cur_data,cur_label,cur_smp_weights = net_provider.get_eval_batch(start_idx,end_idx) else: if eval_feed_buf_q.qsize() == 0: print('eval_feed_buf_q.qsize == 0') break point_cloud_data, label_data, epoch_buf = eval_feed_buf_q.get() #assert batch_idx == batch_idx_buf and epoch== epoch_buf cur_smp_weights = np.ones((point_cloud_data.shape[0], point_cloud_data.shape[1])) t1 = time.time() print('time of reading is {}'.format(t1-t0)) if type(point_cloud_data) == type(None): print('batch_idx:%d, get None, reading finished'%(batch_idx)) break # all data reading finished feed_dict = {ops['pointclouds_pl']: point_cloud_data, ops['labels_pl']: label_data, ops['is_training_pl']: is_training, ops['smpws_pl']: cur_smp_weights } summary, step, loss_val, pred_class_val, pred_prob_val, pred_box_val, xyz_pl, classification_loss_val, regression_loss_val = sess.run([ops['merged'], ops['step'], ops['loss'], ops['pred_class'], ops['pred_prob'], ops['pred_box'], ops['xyz_pl'], ops['classification_loss'], ops['regression_loss']], feed_dict=feed_dict) ## generating the raw point cloud and downsampled point cloud color_1 = np.array([[0, 0, 0]]) color_1 = np.tile(color_1, (point_cloud_data.shape[1],1)) raw_point_cloud_color = np.concatenate((point_cloud_data[0], color_1),1) color_2 = np.array([[255, 255, 255]]) color_2 = np.tile(color_2, (xyz_pl.shape[1], 1)) xyz_pl[0] = xyz_pl[0] + np.array([[0.05, 0.05, 0.05]]) xyz_pl_color = np.concatenate((xyz_pl[0], color_2),1) xyz_color = np.concatenate((raw_point_cloud_color, xyz_pl_color),0) path_vis = os.path.join(ROOT_DIR,'data/visulization/','raw_xyz_'+str(batch_idx)+'.ply') create_ply(xyz_color, path_vis) t2 = time.time() print('time of generating is {}'.format(t2 - t1)) #create_ply(xyz_pl_color, path_vis) ## generating the raw point cloud and ground truth bounding boxes gt_box_ = get_box_coordinate(gt_box[0][:,1:8]) path_vis = os.path.join(ROOT_DIR,'data/visulization/','raw_gt_box_'+str(batch_idx)+'.ply') gen_box_pl(path_vis, gt_box_, point_cloud_data[0]) t3 = time.time() print('time of ground truth box is {}'.format(t3 - t2)) ## generating the raw point cloud and predicted bounding boxes num_anchors = cfg.TRAIN.NUM_ANCHORS num_class = cfg.TRAIN.NUM_CLASSES pred_box_ = bbox_transform_inv(pred_box_val[0], xyz_pl[0]) pred_class = np.array([pred_class_val[0, :,(x*num_class+1):((x+1)*num_class)] for x in range(num_anchors)]).transpose(1, 0, 2) ##shape: 512 x num_anchors x 1 pred_class = pred_class.reshape(-1, 1) pred_box_ = np.concatenate(( pred_box_, pred_class), axis=1) pred_box_ = pred_box_[ np.where( pred_box_[:,7] >= 0.2)[0], :] if pred_box_.shape[0]>0: pred_box_ = nms_3d( pred_box_, cfg.TEST.NMS) pred_box_ = get_box_coordinate(pred_box_[:,0:7]) path_vis = os.path.join(ROOT_DIR,'data/visulization/','raw_pred_box_'+str(batch_idx)+'.ply') gen_box_pl(path_vis, pred_box_, point_cloud_data[0]) t4 = time.time() print('time of predicting box is {}'.format(t4 - t3)) if batch_idx%40 == 0: print('the test batch is {}, the loss value is {}'.format(batch_idx, loss_val)) print('the classificaiton loss is {}, the regression loss is {}'.format(classification_loss_val, regression_loss_val)) print('Done!!') return 1