def im_detect(net, im, boxes=None): """Detect object classes in an image given object proposals. Arguments: net (caffe.Net): Fast R-CNN network to use im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals or None (for RPN) Returns: scores (ndarray): R x K array of object class scores (K includes background as object category 0) boxes (ndarray): R x (4*K) array of predicted bounding boxes """ blobs, im_scales = _get_blobs(im, boxes) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) blobs['rois'] = blobs['rois'][index, :] boxes = boxes[index, :] # now in blobs['data'] there are images # and in blobs['rois'] proposals ################################################### YOUR CODE GOES HERE # reshape network inputs to match blobs['data'].shape and blobs['rois'].shape # # do forward with blobs['data'] and blobs['rois'] # # use softmax estimated probabilities (net output) # scores = ... if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = blobs_out['bbox_pred'] pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, im.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: # Map scores and predictions back to the original set of boxes scores = scores[inv_index, :] pred_boxes = pred_boxes[inv_index, :] return scores, pred_boxes
def im_detect(net, im, boxes): """Detect object classes in an image given object proposals. Arguments: net (caffe.Net): Fast R-CNN network to use im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals Returns: scores (ndarray): R x K array of object class scores (K includes background as object category 0) boxes (ndarray): R x (4*K) array of predicted bounding boxes """ blobs, im_scales = _get_blobs(im, boxes) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. if cfg.DEDUP_BOXES > 0: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) blobs['rois'] = blobs['rois'][index, :] boxes = boxes[index, :] # do forward forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)} forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False) blobs_out = net.forward(**forward_kwargs) # use softmax estimated probabilities scores = blobs_out['cls_prob'] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = blobs_out['bbox_pred'] pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, im.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) if cfg.DEDUP_BOXES > 0: # Map scores and predictions back to the original set of boxes scores = scores[inv_index, :] pred_boxes = pred_boxes[inv_index, :] return scores, pred_boxes
def compute_rois_offset(rois, offset, im_info=None): """Compute bounding-box offset for region of interests""" assert rois.shape[1] == 4 assert offset.shape[1] == 4 if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev -- reverse the transformation offset_unnorm = offset * np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS) + np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS) else: offset_unnorm = offset.copy() rois_offset = bbox_transform_inv(rois, offset_unnorm) if not im_info is None: rois_offset = clip_boxes(rois_offset, im_info[:2]) return rois_offset
def forward(self, bottom, top): # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN # (i.e., rpn.proposal_layer.ProposalLayer), or any other source all_rois = bottom[0].data aaa = all_rois[:] # GT boxes (x1, y1, x2, y2, label) # TODO(rbg): it's annoying that sometimes I have extra info before # and other times after box coordinates -- normalize to one format gt_boxes = bottom[1].data im = bottom[2].data # Include ground-truth boxes in the set of candidate rois zeros = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) all_rois = np.vstack((all_rois, np.hstack((zeros, gt_boxes[:, :-1])))) num_images = 1 rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image) rois, labels, bbox_targets, bbox_weights, layer_indexs = _sample_rois( all_rois, gt_boxes, fg_rois_per_image, rois_per_image, self._num_classes, sample_type='fpn', k0=4) vis = False if vis: ind = np.where(labels != 0)[0] im_shape = im.shape means = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (21, 1)).ravel() stds = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (21, 1)).ravel() bbox_targets = bbox_targets * stds + means pred_boxes = bbox_transform_inv(rois[:, 1:], bbox_targets) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) l = labels[ind] ro = rois[ind, 1:] b = bbox_targets[ind, :] p = pred_boxes[ind, :] * bbox_weights[ind, :] r = [] for i in range(p.shape[0]): r.append(p[i, l[i] * 4:l[i] * 4 + 4]) r_ = np.vstack(r) # Optionally normalize targets by a precomputed mean and stdev vis_all_detection(im, aaa[:, 1:], l, 1) labels_all = [] bbox_targets_all = [] bbox_weights_all = [] rois_all = [] for i in range(4): index = (layer_indexs == (i + 2)) num_index = sum(index) if num_index == 0: rois_ = np.zeros((1 * 4, 5), dtype=rois.dtype) labels_ = np.ones((1 * 4, ), dtype=labels.dtype) * -1 bbox_targets_ = np.zeros((1 * 4, self._num_classes * 4), dtype=bbox_targets.dtype) bbox_weights_ = np.zeros((1 * 4, self._num_classes * 4), dtype=bbox_weights.dtype) else: rois_ = rois[index, :] labels_ = labels[index] bbox_weights_ = bbox_weights[index, :] bbox_targets_ = bbox_targets[index, :] rois_all.append(rois_) labels_all.append(labels_) bbox_targets_all.append(bbox_targets_) bbox_weights_all.append(bbox_weights_) rois_p2 = rois_all[0] rois_p3 = rois_all[1] rois_p4 = rois_all[2] rois_p5 = rois_all[3] labels_all = np.concatenate(labels_all) bbox_targets_all = np.concatenate(bbox_targets_all, axis=0) bbox_weights_all = np.concatenate(bbox_weights_all, axis=0) # print bbox_targets_all.shape,bbox_weights_all.shape, rois_p2.shape,rois_p3.shape,rois_p4.shape,rois_p5.shape,labels_all.shape top[0].reshape(*rois_p2.shape) top[0].data[...] = rois_p2 top[1].reshape(*rois_p3.shape) top[1].data[...] = rois_p3 top[2].reshape(*rois_p4.shape) top[2].data[...] = rois_p4 top[3].reshape(*rois_p5.shape) top[3].data[...] = rois_p5 # classification labels top[4].reshape(*labels_all.shape) top[4].data[...] = labels_all # bbox_targets top[5].reshape(*bbox_targets_all.shape) top[5].data[...] = bbox_targets_all # bbox_inside_weights top[6].reshape(*bbox_weights_all.shape) top[6].data[...] = bbox_weights_all # bbox_outside_weights top[7].reshape(*bbox_weights_all.shape) top[7].data[...] = np.array(bbox_weights_all > 0).astype(np.float32)
def run_batch(sess, net, inputs, outputs, ims, batch_boxes, batch_relations, bbox_reg, multi_iter): mi = multi_iter[-1] results = list() #t_start = time.time() # Convert an image and RoIs within that image into network inputs. im_scaled_list, rois_list = list(), list() for im, boxes in zip(ims, batch_boxes): im_scaled, im_scale_factors = _get_image_blob(im) rois = _get_rois_blob(boxes, im_scale_factors) im_scaled_list.append(im_scaled[0]) rois_list.append(rois) conv_outs = sess.run(net.layers['conv_out'], feed_dict={ inputs['ims']: np.stack(im_scaled_list, axis=0), net.keep_prob: 1, }) #print 'VGG takes', time.time() - t_start #t_start = time.time() for i in range(len(rois_list)): conv_out, rois, relations = np.expand_dims( conv_outs[i], axis=0), rois_list[i], batch_relations[i] relations = np.array(relations, dtype=np.int32) # all possible combinations num_roi = rois.shape[0] num_rel = relations.shape[0] feed_dict = { net.layers['conv_out']: conv_out, inputs['rois']: rois, inputs['relations']: relations, inputs['rel_rois']: data_utils.compute_rel_rois(num_rel, rois, relations), net.keep_prob: 1 } inputs_feed = data_utils.create_graph_data(num_roi, num_rel, relations) for k in inputs_feed: feed_dict[inputs[k]] = inputs_feed[k] ops_value = sess.run(outputs, feed_dict=feed_dict) rel_probs = None rel_probs_flat = ops_value['rel_probs'][mi] rel_probs = np.zeros([num_roi, num_roi, rel_probs_flat.shape[1]]) for i, rel in enumerate(relations): rel_probs[rel[0], rel[1], :] = rel_probs_flat[i, :] cls_probs = ops_value['cls_probs'][mi] if bbox_reg: # Apply bounding-box regression deltas pred_boxes = bbox_transform_inv(boxes, ops_value['bbox_deltas'][mi]) pred_boxes = clip_boxes(pred_boxes, im.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, cls_probs.shape[1])) results.append({ 'scores': cls_probs, 'boxes': pred_boxes, 'relations': rel_probs }) #print 'Scene takes', time.time() - t_start return results
def visualize(self, net, filename): blobs_out = net.forward() try: im = net.blobs['data'].data[0].copy() except: im = net.blobs['image'].data[0].copy() im = im.transpose((1, 2, 0)) # ch x h x w -> h x w x ch im += cfg.PIXEL_MEANS im = im[:, :, (2, 1, 0)] im_scale = float(cfg.TEST.SCALES[0]) / float(min(im.shape[:2])) if cfg.TEST.HAS_RPN: # assert len(im_scale) == 1, "Only single-image batch implemented" rois = net.blobs['rois'].data.copy() # unscale back to raw image space boxes = rois[:, 1:5] / im_scale elif cfg.DEDUP_BOXES > 0: raise NotImplementedError # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. # v = np.array([1, 1e3, 1e6, 1e9, 1e12]) # hashes = np.round(net.blobs['rois'].data.copy() * cfg.DEDUP_BOXES).dot(v) # _, index, inv_index = np.unique(hashes, return_index=True, # return_inverse=True) # rois = net.blobs['rois'][index, :] # boxes = boxes[index, :] # use softmax estimated probabilities scores = net.blobs['cls_score'].data.copy() scores = np.exp(scores) scores_sum = np.sum(scores, axis=1)[:, np.newaxis] scores /= scores_sum # scores = scores.max(axis=1) # scores = blobs_out['cls_score'] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas try: box_deltas = net.blobs['bbox_pred'].data.copy() except: box_deltas = net.blobs['bbox_pred_depth'].data.copy() box_deltas = box_deltas * self.bbox_stds + self.bbox_means # box_deltas = blobs_out['bbox_pred'] pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, im.shape) else: print '[Warning] Bounding-box regression is not applied at test phase.' # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) # if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: # # Map scores and predictions back to the original set of boxes # scores = scores[inv_index, :] # pred_boxes = pred_boxes[inv_index, :] # Post-processing imdb = self.imdb thresh = 0.8 clrs = sns.color_palette("Set2", imdb.num_classes) plt.figure(1, figsize=(15, 10)) plt.clf() plt.imshow(im.astype(np.uint8)) plt.gca().axis('off') # skip j = 0, because it's the background class n_det = 0 for j in xrange(1, imdb.num_classes - 1): inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] cls_boxes = pred_boxes[inds, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) # CPU NMS is much faster than GPU NMS when the number of boxes # is relative small (e.g., < 10k) keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=True) cls_dets = cls_dets[keep, :] n_det += len(inds) self.vis_detections(imdb.classes[j], cls_dets, clrs[j]) plt.title('%d objects are detected.' % n_det) plt.gca().legend() plt.savefig(filename)
def im_detect_ori(sess, net, im, boxes=None): """Detect object classes in an image given object proposals, along with the stroke orientation and facial area. Arguments: net (caffe.Net): Fast R-CNN network to use im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals Returns: scores (ndarray): R x K array of object class scores (K includes background as object category 0) boxes (ndarray): R x (4*K) array of predicted bounding boxes strokes (ndarray): R x 3 array of stroke orientation class prob areas (ndarray): R x 9 array of facial area prob """ blobs, im_scales = _get_blobs(im, boxes) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) blobs['rois'] = blobs['rois'][index, :] boxes = boxes[index, :] if cfg.TEST.HAS_RPN: im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) # forward pass if cfg.TEST.HAS_RPN: feed_dict = { net.data: blobs['data'], net.im_info: blobs['im_info'], net.keep_prob: 1.0 } else: feed_dict = { net.data: blobs['data'], net.rois: blobs['rois'], net.keep_prob: 1.0 } cls_score, cls_prob, bbox_pred, rois, eye, smile = sess.run( [ net.get_output('cls_score'), net.get_output('cls_prob'), net.get_output('bbox_pred'), net.get_output('rois'), net.get_output('eye_prob'), net.get_output('smile_prob') ], feed_dict=feed_dict) if cfg.TEST.HAS_RPN: assert len(im_scales) == 1, "Only single-image batch implemented" boxes = rois[:, 1:5] / im_scales[0] if cfg.TEST.SVM: # use the raw scores before softmax under the assumption they # were trained as linear SVMs scores = cls_score else: # use softmax estimated probabilities scores = cls_prob if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = _clip_boxes(pred_boxes, im.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: # Map scores and predictions back to the original set of boxes scores = scores[inv_index, :] pred_boxes = pred_boxes[inv_index, :] return scores, pred_boxes, eye, smile
def forward(self, bottom, top): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) assert bottom[0].data.shape[0] == 1, \ 'Only single item batches are supported' cfg_key = self.phase # either 'TRAIN' or 'TEST' if cfg_key == 0: cfg_ = cfg.TRAIN else: cfg_ = cfg.TEST pre_nms_topN = cfg_.RPN_PRE_NMS_TOP_N post_nms_topN = cfg_.RPN_POST_NMS_TOP_N nms_thresh = cfg_.RPN_NMS_THRESH min_size = cfg_.RPN_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = bottom[0].data[:, self._num_anchors:, :, :] bbox_deltas = bottom[1].data im_info = bottom[2].data[0, :] if DEBUG: print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) print 'scale: {}'.format(im_info[2]) # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] if DEBUG: print 'score map size: {}'.format(scores.shape) # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = self._anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) top[0].reshape(*(blob.shape)) top[0].data[...] = blob # [Optional] output scores blob if len(top) > 1: top[1].reshape(*(scores.shape)) top[1].data[...] = scores
def im_detect(feature_net, embed_net, recurrent_net, im, boxes=None, use_box_at = -1): """Detect object classes in an image given object proposals. Arguments: feature_net (caffe.Net): CNN model for extracting features embed_net (caffe.Net): A word embedding layer recurrent_net (caffe.Net): Recurrent model for generating captions and locations im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals or None (for RPN) use_box_at (int32): Use predicted box at a given timestep, default to the last one (use_box_at=-1) Returns: scores (ndarray): R x 1 array of object class scores pred_boxes (ndarray)): R x 4 array of predicted bounding boxes captions (list): length R list of list of word tokens (captions) """ # for bbox unnormalization bbox_mean = np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS).reshape((1,4)) bbox_stds = np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS).reshape((1,4)) blobs, im_scales = _get_blobs(im, boxes) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]], dtype=np.float32) # reshape network inputs feature_net.blobs['data'].reshape(*(blobs['data'].shape)) feature_net.blobs['im_info'].reshape(*(blobs['im_info'].shape)) feature_net.forward(data = im_blob, im_info = blobs['im_info']) region_features = feature_net.blobs['region_features'].data.copy() rois = feature_net.blobs['rois'].data.copy() # detection scores scores = feature_net.blobs['cls_probs'].data[:,1].copy() # proposal boxes boxes = rois[:, 1:5] / im_scales[0] proposal_n = rois.shape[0] feat_args = {'input_features': region_features} opt_args = {} # global feature as an optional input: context if 'global_features' in feature_net.blobs and 'global_features' in recurrent_net.blobs: #changed according to the global feature shape opt_args['global_features'] = np.tile(feature_net.blobs['global_features'].data, (1,proposal_n,1)) bbox_pred_direct = ('bbox_pred' in feature_net.blobs) if bbox_pred_direct: # do greedy search captions, _, logprobs = _greedy_search(embed_net, recurrent_net, feat_args, opt_args, proposal_n, pred_bbox = False) #bbox target unnormalization box_offsets = feature_net.blobs['bbox_pred'].data else: captions, box_offsets, logprobs = _greedy_search(embed_net, recurrent_net, feat_args, opt_args, proposal_n, \ pred_bbox = True, use_box_at = use_box_at) #bbox target unnormalization box_deltas = box_offsets * bbox_stds + bbox_mean #do the transformation pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, im.shape) return scores, pred_boxes, captions
def __call__(self, x, bbox_deltas, im_info): if isinstance(bbox_deltas.data, chainer.cuda.ndarray): bbox_deltas = chainer.cuda.to_cpu(bbox_deltas.data) if isinstance(x.data, chainer.cuda.ndarray): x = chainer.cuda.to_cpu(x.data) assert x.shape[0] == 1, 'Only single item batches are supported' if self.train: pre_nms_topN = self.TRAIN_RPN_PRE_NMS_TOP_N post_nms_topN = self.TRAIN_RPN_POST_NMS_TOP_N nms_thresh = self.TRAIN_RPN_NMS_THRESH min_size = self.TRAIN_RPN_MIN_SIZE else: pre_nms_topN = self.TEST_RPN_PRE_NMS_TOP_N post_nms_topN = self.TEST_RPN_POST_NMS_TOP_N nms_thresh = self.TEST_RPN_NMS_THRESH min_size = self.TEST_RPN_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = x[:, self.num_anchors:, :, :] # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] # Enumerate all shifts shift_x = np.arange(0, width) * self.feat_stride shift_y = np.arange(0, height) * self.feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self.num_anchors K = shifts.shape[0] anchors = self.anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack( (batch_inds, proposals.astype(np.float32, copy=False))) blob = chainer.cuda.cupy.asarray(blob, np.float32) rois = chainer.Variable(blob, volatile=not self.train) return rois
def _im_detect(net, im, roidb, blob_names=None): """Detect object classes in an image given object proposals. Arguments: net (caffe.Net): Fast R-CNN network to use im (ndarray): color image to test (in BGR order) roidb (an roidb item): to provide gt_boxes if necessary blob_names (list of str): list of feature blob names to be extracted Returns: boxes (ndarray): R x (4*K) array of predicted bounding boxes scores (ndarray): R x K array of object class scores (K includes background as object category 0) features (dict of ndarray): {blob name: R x D array of features} """ im_blob, im_scales = get_image_blob(im) assert len(im_scales) == 1, "Only single-image batch implemented" blobs = { 'data': im_blob, 'im_info': np.array( [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]], dtype=np.float32), } if 'gt_boxes' in net.blobs: # Supply gt_boxes as input. Used to get pid_labels for proposals. blobs['gt_boxes'] = get_gt_boxes_blob( roidb['boxes'], roidb['gt_classes'], roidb['gt_pids'], im_scales) # reshape network inputs for k, v in blobs.iteritems(): net.blobs[k].reshape(*(v.shape)) # do forward forward_kwargs = {k: v.astype(np.float32, copy=False) for k, v in blobs.iteritems()} blobs_out = net.forward(**forward_kwargs) # unscale rois back to raw image space rois = net.blobs['rois'].data.copy() boxes = rois[:, 1:5] / im_scales[0] if cfg.TEST.SVM: # use the raw scores before softmax under the assumption they # were trained as linear SVMs scores = net.blobs['cls_score'].data else: # the first column of the pid_prob is the non-person box score scores = blobs_out['pid_prob'][:, 0] scores = scores[:, np.newaxis] scores = np.hstack([scores, 1. - scores]) if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = blobs_out['bbox_pred'] # As we no longer scale and shift the bbox_pred weights when snapshot, # we need to manually do this during test. if cfg.TRAIN.BBOX_NORMALIZE_TARGETS and \ cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: num_classes = box_deltas.shape[1] // 4 stds = np.tile(cfg.TRAIN.BBOX_NORMALIZE_STDS, num_classes) means = np.tile(cfg.TRAIN.BBOX_NORMALIZE_MEANS, num_classes) box_deltas = box_deltas * stds + means boxes = bbox_transform_inv(boxes, box_deltas) boxes = clip_boxes(boxes, im.shape) else: # Simply repeat the boxes, once for each class boxes = np.tile(boxes, (1, scores.shape[1])) features = {blob: net.blobs[blob].data.copy() for blob in blob_names} \ if blob_names is not None else {} return boxes, scores, features
def im_detect(net, im1, im2, boxes=None): """Detect object classes in an image given object proposals. Arguments: net (caffe.Net): Fast R-CNN network to use im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals or None (for RPN) Returns: scores (ndarray): R x K array of object class scores (K includes background as object category 0) boxes (ndarray): R x (4*K) array of predicted bounding boxes """ blobs, im_scales = _get_blobs(im1, im2, boxes) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) blobs['rois'] = blobs['rois'][index, :] boxes = boxes[index, :] if cfg.TEST.HAS_RPN: im_blob = blobs['data_visible'] blobs['im_info'] = np.array( [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]], dtype=np.float32) # reshape network inputs net.blobs['data_visible'].reshape(*(blobs['data_visible'].shape)) net.blobs['data_lwir'].reshape(*(blobs['data_lwir'].shape)) if cfg.TEST.HAS_RPN: net.blobs['im_info'].reshape(*(blobs['im_info'].shape)) else: net.blobs['rois'].reshape(*(blobs['rois'].shape)) # do forward forward_kwargs = { 'data_visible': blobs['data_visible'].astype(np.float32, copy=False), 'data_lwir': blobs['data_lwir'].astype(np.float32, copy=False) } if cfg.TEST.HAS_RPN: forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32, copy=False) else: forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False) blobs_out = net.forward(**forward_kwargs) ######################################## Draw RPN ######################################## scores = net.blobs['rpn_scores'].data.copy() rois = net.blobs['rois'].data.copy() idx = np.where(scores > 0.9) if len(idx) == 0: pld.set_trace() proposals = rois[idx[0], 1:] img = net.blobs['data_visible'].data.copy()[0] img = img.transpose((1, 2, 0)) img += cfg.PIXEL_MEANS[:, :, :3] img = img[:, :, (2, 1, 0)] plt.figure(11) plt.clf() plt.title('Proposals, score >= 0.9') ax = plt.gca() ax.imshow(img.astype(np.uint8)) for pr in proposals: ax.add_patch( plt.Rectangle((pr[0], pr[1]), pr[2] - pr[0], pr[3] - pr[1], fill=False, edgecolor='r')) ########################################################################################### if cfg.TEST.HAS_RPN: assert len(im_scales) == 1, "Only single-image batch implemented" rois = net.blobs['rois'].data.copy() # unscale back to raw image space boxes = rois[:, 1:5] / im_scales[0] if cfg.TEST.SVM: # use the raw scores before softmax under the assumption they # were trained as linear SVMs scores = net.blobs['cls_score'].data else: # use softmax estimated probabilities scores = blobs_out['cls_prob'] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = blobs_out['bbox_pred'] pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, im1.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: # Map scores and predictions back to the original set of boxes scores = scores[inv_index, :] pred_boxes = pred_boxes[inv_index, :] return scores, pred_boxes
def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key, _feat_stride=[ 16, ], anchor_scales=[8, 16, 32], anchor_ratios=[0.5, 1, 2]): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) #layer_params = yaml.load(self.param_str_) _anchors = generate_anchors(ratios=anchor_ratios, scales=np.array(anchor_scales)) _num_anchors = _anchors.shape[0] rpn_cls_prob_reshape = np.transpose(rpn_cls_prob_reshape, [0, 3, 1, 2]) #print('rpn_bbox_pred 1',rpn_bbox_pred) rpn_bbox_pred = np.transpose(rpn_bbox_pred, [0, 3, 1, 2]) #print('rpn_bbox_pred 2',rpn_bbox_pred) #rpn_cls_prob_reshape = np.transpose(np.reshape(rpn_cls_prob_reshape,[1,rpn_cls_prob_reshape.shape[0],rpn_cls_prob_reshape.shape[1],rpn_cls_prob_reshape.shape[2]]),[0,3,2,1]) #rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,2,1]) im_info = im_info[0] assert rpn_cls_prob_reshape.shape[0] == 1, \ 'Only single item batches are supported' # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' #cfg_key = 'TEST' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = rpn_cls_prob_reshape[:, _num_anchors:, :, :] bbox_deltas = rpn_bbox_pred #print('bbox1',bbox_deltas) #im_info = bottom[2].data[0, :] if DEBUG: print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) print 'scale: {}'.format(im_info[2]) # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] if DEBUG: print 'score map size: {}'.format(scores.shape) # Enumerate all shifts shift_x = np.arange(0, width) * _feat_stride shift_y = np.arange(0, height) * _feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors K = shifts.shape[0] anchors = _anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) #print('anchors',anchors) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) #print('bbox_deltas',bbox_deltas) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) #print('proposals1',proposals) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) #print('proposals2',proposals) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # remove_option = 1 # if ('TEST' == cfg_key and remove_option in [1, 2]): # # get rid of boxes that are completely inside other boxes # # with options as to which one to get rid of # # 1. always the one with lower scores, 2. always the one inside # new_proposals = [] # removed_indices = set() # num_props = proposals.shape[0] # for i in range(num_props): # if (i in removed_indices): # continue # bxA = proposals[i, :] # for j in range(num_props): # if ((j == i) or (j in removed_indices)): # continue # bxB = proposals[j, :] # if (bbox_contains(bxA, bxB)): # if ((1 == remove_option) and (scores[i] != scores[j])): # if (scores[i] > scores[j]): # removed_indices.add(j) # else: # removed_indices.add(i) # else: # remove_option == 2 or scores[i] == scores[j] # removed_indices.add(j) # nr = len(removed_indices) # if (nr > 0): # new_proposals = sorted(set(range(num_props)) - removed_indices) # proposals = proposals[new_proposals, :] # scores = scores[new_proposals] # # padding to make the total number of proposals == post_nms_topN # proposals = np.vstack((proposals, [proposals[-1, :]] * nr)) # scores = np.vstack((scores, [scores[-1]] * nr)) # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 # batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) # BUT we NOW (18-Sep-2017) abuse batch inds, and use it for carrying scores if ('TEST' == cfg_key): batch_inds = np.reshape(scores, [proposals.shape[0], 1]) else: batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) if (DEBUG): print('blob shape: {0}'.format(blob.shape)) print('proposal shape: {0}'.format(proposals.shape)) return blob
def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key, _feat_stride=[ 16, ], anchor_scales=[8, 16, 32]): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) #layer_params = yaml.load(self.param_str_) _anchors = generate_anchors(scales=np.array(anchor_scales)) _num_anchors = _anchors.shape[0] if DEBUG: print "Proposal Layer number of anchors: {}".format(_num_anchors) rpn_cls_prob_reshape = np.transpose(rpn_cls_prob_reshape, [0, 3, 1, 2]) rpn_bbox_pred = np.transpose(rpn_bbox_pred, [0, 3, 1, 2]) if DEBUG: print "Proposal Layer rpn_cls_prob_reshape shape: {}".format( rpn_cls_prob_reshape.shape) print "Proposal Layer rpn_bbox_pred shape: {}".format( rpn_bbox_pred.shape) #rpn_cls_prob_reshape = np.transpose(np.reshape(rpn_cls_prob_reshape,[1,rpn_cls_prob_reshape.shape[0],rpn_cls_prob_reshape.shape[1],rpn_cls_prob_reshape.shape[2]]),[0,3,2,1]) #rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,2,1]) im_info = im_info[0] assert rpn_cls_prob_reshape.shape[0] == 1, \ 'Only single item batches are supported' # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' #cfg_key = 'TEST' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = rpn_cls_prob_reshape[:, _num_anchors:, :, :] bbox_deltas = rpn_bbox_pred #im_info = bottom[2].data[0, :] if DEBUG: print 'Proposal Layer im_size: ({}, {})'.format(im_info[0], im_info[1]) print 'Proposal Layer scale: {}'.format(im_info[2]) # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] if DEBUG: print 'Proposal Layer score map size: {}'.format(scores.shape) # Enumerate all shifts shift_x = np.arange(0, width) * _feat_stride shift_y = np.arange(0, height) * _feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors K = shifts.shape[0] anchors = _anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations # Apply to all anchors 9*h*w (h is the height of feature map, w is the # width of feature map) proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob
def im_detect(net, im, boxes=None): """Detect object classes in an image given object proposals. Arguments: net (caffe.Net): Fast R-CNN network to use im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals or None (for RPN) Returns: scores (ndarray): R x K array of object class scores (K includes background as object category 0) boxes (ndarray): R x (4*K) array of predicted bounding boxes """ blobs, im_scales = _get_blobs(im, boxes) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) blobs['rois'] = blobs['rois'][index, :] boxes = boxes[index, :] if cfg.TEST.HAS_RPN: im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]], dtype=np.float32) #blobs['foreverone'] = 0.0000001*np.ones((blobs['rois'].shape[0],cfg.TRAIN.mask_num,cfg.TRAIN.hidden_size),dtype = np.float32) #blobs['sample'] = np.random.normal(size=(blobs['rois'].shape[0],cfg.TRAIN.mask_num , cfg.TRAIN.hidden_size)) # reshape network inputs net.blobs['data'].reshape(*(blobs['data'].shape)) if cfg.TEST.HAS_RPN: net.blobs['im_info'].reshape(*(blobs['im_info'].shape)) else: net.blobs['rois'].reshape(*(blobs['rois'].shape)) #net.blobs['foreverone'].reshape(*(blobs['foreverone'].shape)) #net.blobs['sample'].reshape(*(blobs['sample'].shape)) # do forward forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)} if cfg.TEST.HAS_RPN: forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32, copy=False) else: forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False) #forward_kwargs['foreverone'] = blobs['foreverone'].astype(np.float32, copy=False) #forward_kwargs['sample'] = blobs['sample'].astype(np.float32, copy=False) blobs_out = net.forward(**forward_kwargs) ''' saveto = 'test.mat' netdata = dict() tmp = net.blobs['proposal'].data netdata['gpu'] = tmp sio.savemat(saveto,netdata) ''' if cfg.TEST.HAS_RPN: assert len(im_scales) == 1, "Only single-image batch implemented" rois = net.blobs['rois'].data.copy() # unscale back to raw image space boxes = rois[:, 1:5] / im_scales[0] if cfg.TEST.SVM: # use the raw scores before softmax under the assumption they # were trained as linear SVMs scores = net.blobs['cls_score'].data else: # use softmax estimated probabilities scores = blobs_out['cls_prob'] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = blobs_out['bbox_pred'] bbox_means = [ 0.0, 0.0, 0.0, 0.0, 1.03960042775271e-10, 0.00622199373803706, 0.0207805908339361, 0.0524860248101128 ] bbox_stds = [ 0.0, 0.0, 0.0, 0.0, 0.131444678954748, 0.125309184804088, 0.249703604170591, 0.216150527133179 ] box_deltas = box_deltas * (np.repeat( bbox_stds, box_deltas.shape[0]).reshape(box_deltas.shape[0], 8)) + np.repeat( bbox_means, box_deltas.shape[0]).reshape( box_deltas.shape[0], 8) pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, im.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: # Map scores and predictions back to the original set of boxes scores = scores[inv_index, :] pred_boxes = pred_boxes[inv_index, :] ''' if net.blobs.has_key('deterministic_prob') and cfg.TEST.deterministic_prob: objectness = net.blobs['deterministic_prob'].data objectness = objectness[inv_index, :] for i in xrange(objectness.shape[0]): if objectness[i,0] > 0.5: scores[i,0] = 0.0 ''' return scores, pred_boxes
def im_detect(net, im, boxes=None): """Detect object classes in an image given object proposals. Arguments: net (caffe.Net): Fast R-CNN network to use im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals or None (for RPN) Returns: scores (ndarray): R x K array of object class scores (K includes background as object category 0) boxes (ndarray): R x (4*K) array of predicted bounding boxes """ blobs, im_scales = _get_blobs(im, boxes) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) blobs['rois'] = blobs['rois'][index, :] boxes = boxes[index, :] if cfg.TEST.HAS_RPN: im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]], dtype=np.float32) # reshape network inputs net.blobs['data'].reshape(*(blobs['data'].shape)) if cfg.TEST.HAS_RPN: net.blobs['im_info'].reshape(*(blobs['im_info'].shape)) else: net.blobs['rois'].reshape(*(blobs['rois'].shape)) # do forward forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)} if cfg.TEST.HAS_RPN: forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32, copy=False) else: forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False) blobs_out = net.forward(**forward_kwargs) if cfg.TEST.HAS_RPN: assert len(im_scales) == 1, "Only single-image batch implemented" rois = net.blobs['rois'].data.copy() # unscale back to raw image space boxes = rois[:, 1:5] / im_scales[0] if cfg.TEST.SVM: # use the raw scores before softmax under the assumption they # were trained as linear SVMs scores = net.blobs['cls_score'].data else: # use softmax estimated probabilities scores = blobs_out['cls_prob'] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = blobs_out['bbox_pred'] pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, im.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: # Map scores and predictions back to the original set of boxes scores = scores[inv_index, :] pred_boxes = pred_boxes[inv_index, :] return scores, pred_boxes
def proposal_layer(rpn_cls_prob_reshape,rpn_bbox_pred,im_info,cfg_key,_feat_stride = [16,],anchor_scales = [8, 16, 32]): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) #layer_params = yaml.load(self.param_str_) #生成anchor的目的是将rpn预测的偏移量(代码中为delts)反变换为坐标值 #———————————————————————————————————————————————————————————————— _anchors = generate_anchors(scales=np.array(anchor_scales)) _num_anchors = _anchors.shape[0] rpn_cls_prob_reshape = np.transpose(rpn_cls_prob_reshape,[0,3,1,2]) rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,1,2]) #rpn_cls_prob_reshape = np.transpose(np.reshape(rpn_cls_prob_reshape,[1,rpn_cls_prob_reshape.shape[0],rpn_cls_prob_reshape.shape[1],rpn_cls_prob_reshape.shape[2]]),[0,3,2,1]) #rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,2,1]) im_info = im_info[0] assert rpn_cls_prob_reshape.shape[0] == 1, \ 'Only single item batches are supported' # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' #cfg_key = 'TEST' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = rpn_cls_prob_reshape[:, _num_anchors:, :, :] bbox_deltas = rpn_bbox_pred #im_info = bottom[2].data[0, :] if DEBUG: print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) print 'scale: {}'.format(im_info[2]) # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] if DEBUG: print 'score map size: {}'.format(scores.shape) # Enumerate all shifts shift_x = np.arange(0, width) * _feat_stride shift_y = np.arange(0, height) * _feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors K = shifts.shape[0] anchors = _anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) #_________________________________________________________ #Note: anchor_target_layer中选择pos和neg样本总量为256,只是为了训练rpn网络的(只有标签为0,1参与计算loss),但是标签为-1的也生成了proposal #所以proposal个数对应20000 #在总的流程是anchor --proposal ---(映射到原图)roi #proposal并不一定只是前景框, 因为按照score选取了前6000个再送入NMS成为2000个,如果属于前景的proposal不足2000,那么proposal中也会有背景框 #所以在fast rcnn中进一步筛选 #根据图片大小裁剪proposal # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) #min_size * im_info[2] 16* (1/16),意思是proposal要比设置的最小proposal大 # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] #NMS筛选 20000 -- 2000 # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) #根据score(属于前景的概率)取前6000个送入NMS, proposal order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) #取前2000个(post_nms_topN=2000) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 #返回的blob,第一列全0,表示proposal索引,第1,2,3,4列是x,y,x,y,score只是为了nms选择的依据,其他地方都没用到 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob
def im_detect(sess, net, im, boxes=None): """Detect object classes in an image given object proposals. Arguments: net (caffe.Net): Fast R-CNN network to use im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals Returns: scores (ndarray): R x K array of object class scores (K includes background as object category 0) boxes (ndarray): R x (4*K) array of predicted bounding boxes """ blobs, im_scales = _get_blobs(im, boxes) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) blobs['rois'] = blobs['rois'][index, :] boxes = boxes[index, :] if cfg.TEST.HAS_RPN: im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) # forward pass if cfg.TEST.HAS_RPN: feed_dict={net.data: blobs['data'], net.im_info: blobs['im_info'], net.keep_prob: 1.0} else: feed_dict={net.data: blobs['data'], net.rois: blobs['rois'], net.keep_prob: 1.0} run_options = None run_metadata = None if cfg.TEST.DEBUG_TIMELINE: run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() cls_score, cls_prob, bbox_pred, rois, mask_prob = sess.run([net.get_output('cls_score'), net.get_output('cls_prob'), net.get_output('bbox_pred'),net.get_output('rois'), net.get_output('mask_prob')], feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) if cfg.TEST.HAS_RPN: assert len(im_scales) == 1, "Only single-image batch implemented" boxes = rois[:, 1:5] # / im_scales[0] if cfg.TEST.SVM: # use the raw scores before softmax under the assumption they # were trained as linear SVMs scores = cls_score else: # use softmax estimated probabilities scores = cls_prob if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred if cfg.DEBUG: # print box_deltas[0,:] # print 'cls_prob' # print cls_prob[0:10,:] # print 'boxes:' # print boxes[0] # print 'mask' # print mask_prob[0,:,:,1] pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes /= im_scales[0] # print 'im_scales ======>' # print im_scales pred_boxes = _clip_boxes(pred_boxes, im.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: # Map scores and predictions back to the original set of boxes scores = scores[inv_index, :] pred_boxes = pred_boxes[inv_index, :] if cfg.TEST.DEBUG_TIMELINE: trace = timeline.Timeline(step_stats=run_metadata.step_stats) trace_file = open(str(long(time.time() * 1000)) + '-test-timeline.ctf.json', 'w') trace_file.write(trace.generate_chrome_trace_format(show_memory=False)) trace_file.close() score = np.amax(scores, axis=1) label = np.argmax(scores, axis=1) pred_box = np.zeros((pred_boxes.shape[0],4)) mask = np.zeros(mask_prob.shape[0:3]) for i in range(len(label)): l = label[i] pred_box[i,:] = pred_boxes[i,4*l:4*(l + 1)] mask[i,:,:] = mask_prob[i,:,:,l] if cfg.DEBUG: print 'scores shape: ' print scores.shape print 'mask shape: ' print mask.shape return score, label, pred_box, mask def vis_detections(im, class_name, dets, thresh=0.8): """Visual debugging of detections.""" import matplotlib.pyplot as plt #im = im[:, :, (2, 1, 0)] for i in xrange(np.minimum(10, dets.shape[0])): bbox = dets[i, :4] score = dets[i, -1] if score > thresh: #plt.cla() #plt.imshow(im) plt.gca().add_patch( plt.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0], bbox[3] - bbox[1], fill=False, edgecolor='g', linewidth=3) ) plt.gca().text(bbox[0], bbox[1] - 2, '{:s} {:.3f}'.format(class_name, score), bbox=dict(facecolor='blue', alpha=0.5), fontsize=14, color='white') plt.title('{} {:.3f}'.format(class_name, score)) #plt.show() def apply_nms(all_boxes, thresh): """Apply non-maximum suppression to all predicted boxes output by the test_net method. """ num_classes = len(all_boxes) num_images = len(all_boxes[0]) nms_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(num_classes)] for cls_ind in xrange(num_classes): for im_ind in xrange(num_images): dets = all_boxes[cls_ind][im_ind] if dets == []: continue x1 = dets[:, 0] y1 = dets[:, 1] x2 = dets[:, 2] y2 = dets[:, 3] scores = dets[:, 4] inds = np.where((x2 > x1) & (y2 > y1) & (scores > cfg.TEST.DET_THRESHOLD))[0] dets = dets[inds,:] if dets == []: continue keep = nms(dets, thresh) if len(keep) == 0: continue nms_boxes[cls_ind][im_ind] = dets[keep, :].copy() return nms_boxes def test_net(sess, net, imdb, weights_filename, output_dir , max_per_image=300, thresh=0.05, vis=False): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] # output_dir = get_output_dir(imdb, weights_filename) # timers _t = {'im_detect' : Timer(), 'misc' : Timer()} if not cfg.TEST.HAS_RPN: roidb = imdb.roidb for i in xrange(num_images): # filter out any ground truth boxes if cfg.TEST.HAS_RPN: box_proposals = None else: # The roidb may contain ground-truth rois (for example, if the roidb # comes from the training or val split). We only want to evaluate # detection on the *non*-ground-truth rois. We select those the rois # that have the gt_classes field set to 0, which means there's no # ground truth. box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0] im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() scores, boxes = im_detect(sess, net, im, box_proposals) _t['im_detect'].toc() _t['misc'].tic() if vis: image = im[:, :, (2, 1, 0)] plt.cla() plt.imshow(image) # skip j = 0, because it's the background class for j in xrange(1, imdb.num_classes): inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j*4:(j+1)*4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] if vis: vis_detections(image, imdb.classes[j], cls_dets) all_boxes[j][i] = cls_dets if vis: plt.show() # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'Evaluating detections' imdb.evaluate_detections(all_boxes, output_dir)
def forward(self, bottom, top): # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN # (i.e., rpn.proposal_layer.ProposalLayer), or any other source all_rois = bottom[0].data aaa = all_rois[:] # GT boxes (x1, y1, x2, y2, label) # TODO(rbg): it's annoying that sometimes I have extra info before # and other times after box coordinates -- normalize to one format gt_boxes = bottom[1].data im = bottom[2].data # Include ground-truth boxes in the set of candidate rois zeros = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) all_rois = np.vstack( (all_rois, np.hstack((zeros, gt_boxes[:, :-1]))) ) num_images = 1 rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image) rois, labels, bbox_targets, bbox_weights ,layer_indexs = _sample_rois( all_rois, gt_boxes, fg_rois_per_image, rois_per_image, self._num_classes,sample_type='fpn', k0 = 4) vis =False if vis: ind = np.where(labels!=0)[0] im_shape = im.shape means = np.tile( np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (21, 1)).ravel() stds = np.tile( np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (21, 1)).ravel() bbox_targets = bbox_targets*stds +means pred_boxes = bbox_transform_inv(rois[:,1:], bbox_targets) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) l =labels[ind] ro = rois[ind,1:] b = bbox_targets[ind,:] p = pred_boxes[ind,:]*bbox_weights[ind,:] r = [] for i in range(p.shape[0]): r.append(p[i,l[i]*4:l[i]*4+4]) r_ = np.vstack(r) # Optionally normalize targets by a precomputed mean and stdev vis_all_detection(im, aaa[:,1:], l, 1) rois_ = np.zeros((self._batch_rois*4, 5), dtype=rois.dtype) labels_all = np.ones((self._batch_rois*4, ), dtype=labels.dtype)*-1 bbox_targets_all = np.zeros((self._batch_rois*4, self._num_classes * 4), dtype=bbox_targets.dtype) bbox_weights_all = np.zeros((self._batch_rois*4, self._num_classes * 4), dtype=bbox_weights.dtype) rois_all =[] for i in range(4): index = (layer_indexs == (i + 2)) num_index = sum(index) start = self._batch_rois*i end = start+num_index index_range = range(start, end) rois_[index_range, :] = rois[index, :] rois_all.append(rois_[range(start,start + self._batch_rois), :]) labels_all[index_range] = labels[index] bbox_targets_all[index_range,:] = bbox_targets[index, :] bbox_weights_all[index_range,:] = bbox_weights[index, :] rois_p2 = rois_all[0] rois_p3 = rois_all[1] rois_p4 = rois_all[2] rois_p5 = rois_all[3] top[0].reshape(*rois_p2.shape) top[0].data[...] = rois_p2 top[1].reshape(*rois_p3.shape) top[1].data[...] = rois_p3 top[2].reshape(*rois_p4.shape) top[2].data[...] = rois_p4 top[3].reshape(*rois_p5.shape) top[3].data[...] = rois_p5 # classification labels top[4].reshape(*labels_all.shape) top[4].data[...] = labels_all # bbox_targets top[5].reshape(*bbox_targets_all.shape) top[5].data[...] = bbox_targets_all # bbox_inside_weights top[6].reshape(*bbox_weights_all.shape) top[6].data[...] = bbox_weights_all # bbox_outside_weights top[7].reshape(*bbox_weights_all.shape) top[7].data[...] = np.array(bbox_weights_all > 0).astype(np.float32)
def im_detect(net, im, feat_blob, boxes=None): """Detect object classes in an image given object proposals. Arguments: net (caffe.Net): Fast R-CNN network to use im (ndarray): color image to test (in BGR order) feat_blob (str): name of the feature blob to be extracted boxes (ndarray): R x 4 array of object proposals or None (for RPN) Returns: scores (ndarray): R x K array of object class scores (K includes background as object category 0) boxes (ndarray): R x (4*K) array of predicted bounding boxes features (ndarray): R x D array of features """ blobs, im_scales = _get_blobs(im, boxes) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) blobs['rois'] = blobs['rois'][index, :] boxes = boxes[index, :] if cfg.TEST.HAS_RPN: im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]], dtype=np.float32) # reshape network inputs net.blobs['data'].reshape(*(blobs['data'].shape)) if cfg.TEST.HAS_RPN: net.blobs['im_info'].reshape(*(blobs['im_info'].shape)) else: net.blobs['rois'].reshape(*(blobs['rois'].shape)) # do forward forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)} if cfg.TEST.HAS_RPN: forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32, copy=False) else: forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False) blobs_out = net.forward(**forward_kwargs) if cfg.TEST.HAS_RPN: assert len(im_scales) == 1, "Only single-image batch implemented" rois = net.blobs['rois'].data.copy() # unscale back to raw image space boxes = rois[:, 1:5] / im_scales[0] if cfg.TEST.SVM: # use the raw scores before softmax under the assumption they # were trained as linear SVMs scores = net.blobs['cls_score'].data else: # the last column of the pid_prob is the non-person box score scores = blobs_out['pid_prob'][:, -1] scores = scores[:, np.newaxis] scores = np.hstack([scores, 1. - scores]) if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = blobs_out['bbox_pred'] pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, im.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) features = net.blobs[feat_blob].data.copy() if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: # Map scores and predictions back to the original set of boxes scores = scores[inv_index, :] pred_boxes = pred_boxes[inv_index, :] features = features[inv_index, :] return scores, pred_boxes, features
def im_detect(net, im, boxes=None): """Detect object classes in an image given object proposals. Arguments: net (caffe.Net): Fast R-CNN network to use im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals or None (for RPN) Returns: scores (ndarray): R x K array of object class scores (K includes background as object category 0) boxes (ndarray): R x (4*K) array of predicted bounding boxes """ ### my # blobs: dict of {data, rois}, we only have data here, which is processed # input to the network, bgr order and meet n*c*h*w order, # h, w has meet input standard. # im_scales: To which extent scale the input the image, the output bounding # box should be sclaled back. im_scale = 600 / min or 1000 / max blobs, im_scales = _get_blobs(im, boxes) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. ### ignore this part, as we use rpn when tesing. if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) blobs['rois'] = blobs['rois'][index, :] boxes = boxes[index, :] ### im_info is [h, w, scale], h, w is in scaled manner. if cfg.TEST.HAS_RPN: im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]], dtype=np.float32) # reshape network inputs net.blobs['data'].reshape(*(blobs['data'].shape)) if cfg.TEST.HAS_RPN: net.blobs['im_info'].reshape(*(blobs['im_info'].shape)) else: net.blobs['rois'].reshape(*(blobs['rois'].shape)) # do forward forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)} if cfg.TEST.HAS_RPN: forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32, copy=False) else: forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False) blobs_out = net.forward(**forward_kwargs) ### here, fowrawd has finished, we can get the results now. # rois: 300 proposals output from rpn network # box_deltas: box refinement output from the network regression branch # pred_box: shifted box and is the final bounding box # scores: score of each box for each class, 21 for cifar-10 if cfg.TEST.HAS_RPN: assert len(im_scales) == 1, "Only single-image batch implemented" rois = net.blobs['rois'].data.copy() # print "Num of proposals is %d" % rois.shape[0] # rois = net.blobs['rois_1X'].data.copy() # unscale back to raw image space boxes = rois[:, 1:5] / im_scales[0] if cfg.TEST.SVM: # use the raw scores before softmax under the assumption they # were trained as linear SVMs scores = net.blobs['cls_score'].data else: # use softmax estimated probabilities scores = blobs_out['cls_prob'] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = blobs_out['bbox_pred'] pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, im.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: # Map scores and predictions back to the original set of boxes scores = scores[inv_index, :] pred_boxes = pred_boxes[inv_index, :] return scores, pred_boxes
def my_im_detect(net, im): """Detect object classes in an image given object proposals. Arguments: net (caffe.Net): Fast R-CNN network to use im (ndarray): color image to test (in BGR order) Returns: scores (ndarray): R x K array of object class scores (K includes background as object category 0) boxes (ndarray): R x (4*K) array of predicted bounding boxes """ blobs = {'data': None, 'rois': None} im_orig = im.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS im_shape = im_orig.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) processed_ims = [] im_scale_factors = [] ##这里图片都是一样大小 # for target_size in cfg.TEST.SCALES: # im_scale = float(target_size) / float(im_size_min) # # Prevent the biggest axis from being more than MAX_SIZE # if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE: # im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max) # im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, # interpolation=cv2.INTER_LINEAR) # im_scale_factors.append(im_scale) # processed_ims.append(im) im_scale = 1.0 im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) im_scale_factors.append(im_scale) processed_ims.append(im) max_shape = np.array([imn.shape for imn in processed_ims]).max(axis=0) num_images = len(processed_ims) blob = np.zeros((num_images, max_shape[0], max_shape[1], 3), dtype=np.float32) for i in xrange(num_images): imn = processed_ims[i] blob[i, 0:imn.shape[0], 0:imn.shape[1], :] = imn # Move channels (axis 3) to axis 1 # Axis order will become: (batch elem, channel, height, width) channel_swap = (0, 3, 1, 2) blob = blob.transpose(channel_swap) blobs['data'] = blob im_scales = np.array(im_scale_factors) im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]], dtype=np.float32) # reshape network inputs net.blobs['data'].reshape(*(blobs['data'].shape)) net.blobs['im_info'].reshape(*(blobs['im_info'].shape)) # do forward forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)} forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32, copy=False) blobs_out = net.forward(**forward_kwargs) assert len(im_scales) == 1, "Only single-image batch implemented" rois = net.blobs['rois'].data.copy() # unscale back to raw image space boxes = rois[:, 1:5] / im_scales[0] scores = blobs_out['cls_prob'] box_deltas = blobs_out['bbox_pred'] pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, im.shape) return scores, pred_boxes
def forward(self, bottom, top): # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN # (i.e., rpn.proposal_layer.ProposalLayer), or any other source rois = bottom[0].data # print ("==================================rois====================") # print rois ######## bbox_pred box_deltas = bottom[1].data ########class score scores = bottom[2].data ########image info im_info = bottom[3].data im_scale = im_info[0][2] # unscale back to raw image space boxes_0 = rois[:, 1:5] / im_scale pred_boxes = bbox_transform_inv(boxes_0, box_deltas) im_shape = [im_info[0][0], im_info[0][1] ] / im_scale #original size of input image boxes = clip_boxes( pred_boxes, im_shape) #clip predicted box using original input size # print("=========================rois from rpn.proposal_layer") # print("=========================shape: " + str(rois.shape)) # print rois # print("=========================rois from rpn.proposal_layer") # print("=========================shape: " + str(boxes.shape)) # print boxes max_per_image = self._max_per_image thresh = self._thresh num_classes = scores.shape[1] i = 0 #only support single image num_images = 1 # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(num_classes)] # print ("=========================num_classes: " + str(num_classes)) # print ("=========================image size: " + str(im_shape)) ## for each class (ignoring background class) for j in xrange(1, num_classes): # if j == 23: # print ("=========================scores[:,j]. j = " + str(j)) # print scores[:, j] inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j * 4:(j + 1) * 4] #get boxes correspond to class j cls_dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32, copy=False) # print ("===============================size of dets before nms: " + str(cls_dets.shape)) # cfg.TEST.NMS = 0.3 keep = nms(cls_dets, cfg.TEST.NMS) # print ("===============keep in rpn/pred_to_proposal_layer.py======: " + str(keep)) cls_dets = cls_dets[keep, :] # print ("===============================size of dets after nms: " + str(cls_dets.shape)) all_boxes[j][i] = cls_dets # print ("===================image: " + str(i) + " class: " + str(j)) # print ("===================shape of all_boxes[j][i]: " + str(all_boxes[j][i].shape)) # print all_boxes[j][i] # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in xrange(1, num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] # print ("===================image: " + str(i) + "class: " + str(j)) # print ("===================shape of all_boxes[j][i]: " + str(all_boxes[j][i].shape)) num_boxes = 0 for j in xrange(1, num_classes): num_boxes = num_boxes + all_boxes[j][i].shape[0] # print ("===========num_boxes========:" + str(num_boxes)) num_boxes = max(num_boxes, 1) #tranh loi 'Floating point exception(core dumped)' rois_for_mask = np.zeros((num_boxes, 5), dtype=np.float32) rois_class_score = -1 * np.ones((num_boxes, 1), dtype=np.float32) rois_class_ind = -1 * np.ones((num_boxes, 1), dtype=np.float32) rois_final = np.zeros((num_boxes, 5), dtype=np.float32) count = 0 for j in xrange(1, num_classes): all_boxes_j = all_boxes[j][i] #boxes correspond to class j c = all_boxes_j.shape[0] if c > 0: coors = all_boxes_j[:, 0:4] cl_scores = all_boxes_j[:, 4:5] rois_for_mask[ count:count + c, 1:5] = coors * im_scale # w.r.t big size, e.g., 600x1000 rois_final[ count:count + c, 1: 5] = coors # w.r.t. original image size. rois_final same rois_for_mask but with different scale rois_class_score[count:count + c, 0:1] = cl_scores rois_class_ind[count:count + c, 0:1] = np.tile(j, [c, 1]) count = count + c # print ("===================================rois_for_mask") # print ("===================================shape: " + str(rois_for_mask.shape)) # print rois_for_mask # rois_for_mask # print ("===========OK or NOT========") top[0].reshape(*rois_for_mask.shape) top[0].data[...] = rois_for_mask # print ("===========OK or NOT========") # classification score top[1].reshape(*rois_class_score.shape) top[1].data[...] = rois_class_score # class index top[2].reshape(*rois_class_ind.shape) top[2].data[...] = rois_class_ind # rois_final top[3].reshape(*rois_final.shape) top[3].data[...] = rois_final
def forward(self, bottom, top): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) assert bottom[0].data.shape[0] == 1, \ 'Only single item batches are supported' cfg_key = 'TRAIN' if self.phase == caffe.TRAIN else 'TEST' # either 'TRAIN' or 'TEST' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = bottom[0].data[:, self._num_anchors:, :, :] bbox_deltas = bottom[1].data im_info = bottom[2].data[0, :] # This is for the extra RoIs: if not used, everything should be # the same than Faster R-CNN with RPN if len(bottom) > 3: extra_rois = bottom[3].data n_extra_rois = extra_rois.shape[0] if n_extra_rois == 1 and np.all(extra_rois[0, :] == 0): n_extra_rois = 0 extra_rois = np.empty((0, 4), dtype=np.float32) if len(bottom) > 4: dontcare_rois = bottom[4].data n_dontcare_rois = dontcare_rois.shape[0] if n_dontcare_rois == 1 and np.all(dontcare_rois[0, :] == 0): n_dontcare_rois = 0 dontcare_rois = np.empty((0, 4), dtype=np.float32) else: n_extra_rois = 0 extra_rois = np.empty((0, 4), dtype=np.float32) n_dontcare_rois = 0 dontcare_rois = np.empty((0, 4), dtype=np.float32) if DEBUG: print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) print 'scale: {}'.format(im_info[2]) print 'external_rois: {}'.format(n_extra_rois) # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] if DEBUG: print 'score map size: {}'.format(scores.shape) # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = self._anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # External DontCare miniboxes if len(dontcare_rois) > 0: dontcare_rois = clip_boxes(dontcare_rois, im_info[:2]) dc_overlaps = np.empty((len(proposals), len(dontcare_rois)), dtype=np.float) #s1 = time.time() bbox_overlaps(np.ascontiguousarray(proposals, dtype=np.float), np.ascontiguousarray(dontcare_rois, dtype=np.float), dc_overlaps, 1) #e1 = time.time() #print 'bbox_overlaps (proposal_layer) {:f} with input1: {:d}, input2: {:d} '.format(e1 - s1, len(proposals), len(dontcare_rois)) n_squares = dc_overlaps.sum(axis=1) dc_roi_area = cfg.TRAIN.DONTCARE_BOX_SIDE * cfg.TRAIN.DONTCARE_BOX_SIDE * im_info[ 2] * im_info[2] proposal_areas = (proposals[:, 2] - proposals[:, 0] + 1) * \ (proposals[:, 3] - proposals[:, 1] + 1) overlapped_area = np.divide(np.multiply(n_squares, dc_roi_area), proposal_areas) keep = np.where( overlapped_area <= cfg.TRAIN.MIN_DONTCARE_OVERLAP)[0] if DEBUG: print 'Proposal layer saving:', len( keep), 'proposals out of', len(proposals) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:(post_nms_topN - n_extra_rois)] elif post_nms_topN == 0 and n_extra_rois > 0: keep = [] proposals = proposals[keep, :] scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 if DEBUG: print 'n_extra_rois', n_extra_rois if n_extra_rois > 0: batch_inds = np.zeros((n_extra_rois, 1), dtype=np.float32) a_extra_rois = np.hstack( (batch_inds, extra_rois.astype(np.float32, copy=False))) batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) a_proposals = np.hstack( (batch_inds, proposals.astype(np.float32, copy=False))) if n_extra_rois > 0: blob = np.vstack((a_extra_rois, a_proposals)) else: blob = a_proposals #TODO: ablation experiments # if n_extra_rois>0: # blob = a_extra_rois top[0].reshape(*(blob.shape)) top[0].data[...] = blob # [Optional] output scores blob if len(top) > 1: top[1].reshape(*(scores.shape)) top[1].data[...] = scores
def im_detect(net, im, boxes=None, svm=False, layer_name='cls_prob'): """Detect object classes in an image given object proposals. Arguments: net (caffe.Net): Fast R-CNN network to use im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals or None (for RPN) Returns: scores (ndarray): R x K array of object class scores (K includes background as object category 0) boxes (ndarray): R x (4*K) array of predicted bounding boxes """ # blobs, unused_im_scale_factors = _get_blobs(im, boxes) # rois_image = [0, 0, 0, im.shape[1], im.shape[0]] * unused_im_scale_factors # blobs['rois'] = np.vstack((rois_image, blobs['rois'])) # net.blobs['data'].reshape(*(blobs['data'].shape)) # net.blobs['rois'].reshape(*(blobs['rois'].shape)) # blobs_out = net.forward(data=blobs['data'].astype(np.float32, copy=False), # rois=blobs['rois'].astype(np.float32, copy=False)) blobs, im_scales = _get_blobs(im, boxes) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) blobs['rois'] = blobs['rois'][index, :] boxes = boxes[index, :] if cfg.TEST.HAS_RPN: im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]], dtype=np.float32) #zwang context code #rois_image = [0, 0, 0, im.shape[1], im.shape[0]] * im_scales #blobs['rois'] = np.vstack((rois_image, blobs['rois'])) ####end # reshape network inputs net.blobs['data'].reshape(*(blobs['data'].shape)) #print "display shapes:" #print blobs['data'].shape if cfg.TEST.HAS_RPN: net.blobs['im_info'].reshape(*(blobs['im_info'].shape)) #print blobs['im_info'].shape else: net.blobs['rois'].reshape(*(blobs['rois'].shape)) #print blobs['rois'].shape # do forward forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)} if cfg.TEST.HAS_RPN: forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32, copy=False) else: forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False) #print "net.forward" blobs_out = net.forward(**forward_kwargs) if cfg.TEST.HAS_RPN: assert len(im_scales) == 1, "Only single-image batch implemented" rois = net.blobs['rois'].data.copy() # unscale back to raw image space boxes = rois[:, 1:5] / im_scales[0] if cfg.TEST.SVM or svm: # use the raw scores before softmax under the assumption they # were trained as linear SVMs scores = net.blobs['cls_score'].data else: # use softmax estimated probabilities scores = net.blobs[layer_name].data if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = blobs_out['bbox_pred'].copy() pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, im.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) if cfg.TEST.LR_FLIP: flip_im = np.fliplr(im) im_height, im_width, _ = im.shape flip_boxes = boxes.copy() flip_boxes[:, 2] = im_width - 1 - boxes[:, 0] flip_boxes[:, 0] = im_width - 1 - boxes[:, 2] flip_blobs, im_scales = _get_blobs(flip_im, flip_boxes) # reshape network inputs net.blobs['data'].data[...] = flip_blobs['data'] net.blobs['rois'].data[...] = flip_blobs['rois'] flip_blobs_out = net.forward() flip_scores = flip_blobs_out['cls_prob'] flip_box_deltas = flip_blobs_out['bbox_pred'] flip_box_deltas[:, 0::4] = -flip_box_deltas[:, 0::4] scores = (scores + flip_scores) / 2 box_deltas = (box_deltas + flip_box_deltas) / 2 pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, im.shape) if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: # Map scores and predictions back to the original set of boxes scores = scores[inv_index, :] pred_boxes = pred_boxes[inv_index, :] return net, scores, pred_boxes
def im_detect(sess, net, inputs, im, boxes, relations, bbox_reg, multi_iter): blobs, im_scales = _get_blobs(im, boxes) relations = np.array(relations, dtype=np.int32) # all possible combinations num_roi = blobs['rois'].shape[0] num_rel = relations.shape[0] inputs_feed = data_utils.create_graph_data(num_roi, num_rel, relations) feed_dict = { inputs['ims']: blobs['data'], inputs['rois']: blobs['rois'], inputs['relations']: relations, net.keep_prob: 1 } for k in inputs_feed: feed_dict[inputs[k]] = inputs_feed[k] # compute relation rois feed_dict[inputs['rel_rois']] = \ data_utils.compute_rel_rois(num_rel, blobs['rois'], relations) # Stage 1: VGG feature extraction. #ops_vgg = { # 'conv_out': net.layers['conv_out'], #} # Stage 2. ops = { 'bbox_deltas': net.bbox_pred_output(multi_iter), 'rel_probs': net.rel_pred_output(multi_iter), 'cls_probs': net.cls_pred_output(multi_iter) } #t_start = time.time() #ops_vgg_value = sess.run(ops_vgg, feed_dict={ # inputs['ims']: blobs['data'], # net.keep_prob: 1, #}) #print 'VGG takes', time.time() - t_start #del feed_dict[inputs['ims']] #feed_dict[ops_vgg['conv_out']] = ops_vgg_value['conv_out'] #t_start = time.time() ops_value = sess.run(ops, feed_dict=feed_dict) #print 'Scene takes', time.time() - t_start out_dict = {} for mi in multi_iter: rel_probs = None rel_probs_flat = ops_value['rel_probs'][mi] rel_probs = np.zeros([num_roi, num_roi, rel_probs_flat.shape[1]]) for i, rel in enumerate(relations): rel_probs[rel[0], rel[1], :] = rel_probs_flat[i, :] cls_probs = ops_value['cls_probs'][mi] if bbox_reg: # Apply bounding-box regression deltas pred_boxes = bbox_transform_inv(boxes, ops_value['bbox_deltas'][mi]) pred_boxes = clip_boxes(pred_boxes, im.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, cls_probs.shape[1])) out_dict[mi] = { 'scores': cls_probs.copy(), 'boxes': pred_boxes.copy(), 'relations': rel_probs.copy() } return out_dict
def im_detect2(net, im, boxes=None): blobs, im_scales = _get_blobs(im, boxes) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) blobs['rois'] = blobs['rois'][index, :] boxes = boxes[index, :] if cfg.TEST.HAS_RPN: im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]], dtype=np.float32) # reshape network inputs net.blobs['data'].reshape(*(blobs['data'].shape)) if cfg.TEST.HAS_RPN: net.blobs['im_info'].reshape(*(blobs['im_info'].shape)) else: net.blobs['rois'].reshape(*(blobs['rois'].shape)) # do forward forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)} if cfg.TEST.HAS_RPN: forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32, copy=False) else: forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False) blobs_out = net.forward(**forward_kwargs) if cfg.TEST.HAS_RPN: assert len(im_scales) == 1, "Only single-image batch implemented" rois = net.blobs['rois'].data.copy() boxes = rois[:, 1:5] / im_scales[0] #################### VISUALIZATION BLOB ####################### # import matplotlib.pyplot as plt # conv5_2 = net.blobs['conv5_2'].data.copy() # # fig = plt.figure(figsize=(20, 20)) # columns = 10 # rows = 10 # for i in range(1, columns * rows + 1): # img = conv5_2[0,i,:,:] # fig.add_subplot(rows, columns, i) # plt.imshow(img) # # plt.savefig('gazebo2UMD_gazebo_conv5_2_100.png') # import matplotlib.pyplot as plt # fc7 = net.blobs['fc7'].data.copy() # # fig = plt.figure(figsize=(20, 20)) # img = fc7[0:100,0:500] # plt.imshow(img) # plt.savefig('gazebo_gazebo_fc7_100X500.png') #################### VISUALIZATION BLOB ####################### if cfg.TEST.SVM: # use the raw scores before softmax under the assumption they # were trained as linear SVMs # scores = net.blobs['cls_score'].data 1 else: # use softmax estimated probabilities # scores = blobs_out['cls_prob'] scores = net.blobs['cls_prob'].data.copy() if cfg.TEST.MASK_REG: rois_class_score = blobs_out['rois_class_score'] rois_class_ind = blobs_out['rois_class_ind'] rois_final = blobs_out['rois_final'] if cfg.TEST.BBOX_REG: box_deltas = net.blobs['bbox_pred'].data.copy() pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, im.shape) if cfg.TEST.MASK_REG: masks_out = blobs_out[ 'mask_prob'] #Nx2x14x14 where N is number of boxess #print '------------------ MASKS OUT SHAPE: ', masks_out.shape #masks_out = masks_out[:, 1, :, :] # masks = Nx14x14 ## DO NOT remove #channel class else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: # Map scores and predictions back to the original set of boxes scores = scores[inv_index, :] pred_boxes = pred_boxes[inv_index, :] if cfg.TEST.MASK_REG: #return scores, pred_boxes, pred_boxes_before_clip, masks return rois_final, rois_class_score, rois_class_ind, masks_out, scores, pred_boxes else: return scores, pred_boxes
def forward(self, bottom, top): cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = self._min_sizes # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want im_info = bottom[0].data[0, :] batch_size = bottom[1].data.shape[0] if batch_size > 1: raise ValueError( "Sorry, multiple images each device is not implemented") cls_prob_dict = { 'stride64': bottom[10].data, 'stride32': bottom[9].data, 'stride16': bottom[8].data, 'stride8': bottom[7].data, 'stride4': bottom[6].data, } bbox_pred_dict = { 'stride64': bottom[5].data, 'stride32': bottom[4].data, 'stride16': bottom[3].data, 'stride8': bottom[2].data, 'stride4': bottom[1].data, } proposal_list = [] score_list = [] for s in self._feat_stride: stride = int(s) sub_anchors = generate_anchors(base_size=stride, scales=self._scales, ratios=self._ratios) scores = cls_prob_dict['stride' + str(s)][:, self._num_anchors:, :, :] bbox_deltas = bbox_pred_dict['stride' + str(s)] # 1. Generate proposals from bbox_deltas and shifted anchors # use real image size instead of padded feature map sizes height, width = int(im_info[0] / stride), int(im_info[1] / stride) # Enumerate all shifts shift_x = np.arange(0, width) * stride shift_y = np.arange(0, height) * stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = sub_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = _clip_pad(bbox_deltas, (height, width)) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = _clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] proposal_list.append(proposals) score_list.append(scores) proposals = np.vstack(proposal_list) scores = np.vstack(score_list) # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) det = np.hstack((proposals, scores)).astype(np.float32) keep = nms(det, nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] # pad to ensure output size remains unchanged if len(keep) < post_nms_topN: pad = npr.choice(keep, size=post_nms_topN - len(keep)) keep = np.hstack((keep, pad)) # pad to ensure output size remains unchanged if len(keep) < post_nms_topN: try: pad = npr.choice(keep, size=post_nms_topN - len(keep)) except: proposals = np.zeros((post_nms_topN, 4), dtype=np.float32) proposals[:, 2] = 16 proposals[:, 3] = 16 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack( (batch_inds, proposals.astype(np.float32, copy=False))) top[0].reshape(*(blob.shape)) top[0].data[...] = blob return keep = np.hstack((keep, pad)) proposals = proposals[keep, :] scores = scores[keep] # Output rois array # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) # if is_train: top[0].reshape(*(blob.shape)) top[0].data[...] = blob
class ProposalLayer(caffe.Layer): """ Outputs object detection proposals by applying estimated bounding-box transformations to a set of regular boxes (called "anchors"). """ def setup(self, bottom, top): # parse the layer parameter string, which must be valid YAML layer_params = yaml.load(self.param_str_) self._feat_stride = layer_params['feat_stride'] anchor_scales = layer_params.get('scales', (8, 16, 32)) self._anchors = generate_anchors(scales=np.array(anchor_scales)) self._num_anchors = self._anchors.shape[0] if DEBUG: print 'feat_stride: {}'.format(self._feat_stride) print 'anchors:' print self._anchors # rois blob: holds R regions of interest, each is a 5-tuple # (n, x1, y1, x2, y2) specifying an image batch index n and a # rectangle (x1, y1, x2, y2) top[0].reshape(1, 5) # scores blob: holds scores for R regions of interest if len(top) > 1: top[1].reshape(1, 1, 1, 1) def forward(self, bottom, top): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) # detection_output: 1*1*num_proposals*7 det_label = bottom[0].data[0,0,:,1] det_conf = bottom[0].data[0,0,:,2] det_xmin = bottom[0].data[0,0,:,3] det_ymin = bottom[0].data[0,0,:,4] det_xmax = bottom[0].data[0,0,:,5] det_ymax = bottom[0].data[0,0,:,6] ground_truth = bottom[1].data assert bottom[0].data.shape[0] == 1, \ 'Only single item batches are supported' cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = bottom[0].data[:, self._num_anchors:, :, :] bbox_deltas = bottom[1].data im_info = bottom[2].data[0, :] if DEBUG: print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) print 'scale: {}'.format(im_info[2]) # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] if DEBUG: print 'score map size: {}'.format(scores.shape) # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = self._anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) top[0].reshape(*(blob.shape)) top[0].data[...] = blob # [Optional] output scores blob if len(top) > 1: top[1].reshape(*(scores.shape)) top[1].data[...] = scores
def im_detect(sess, net, im, boxes=None): """Detect object classes in an image given object proposals. Arguments: net (caffe.Net): Fast R-CNN network to use im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals Returns: scores (ndarray): R x K array of object class scores (K includes background as object category 0) boxes (ndarray): R x (4*K) array of predicted bounding boxes """ blobs, im_scales = _get_blobs(im, boxes) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) blobs['rois'] = blobs['rois'][index, :] boxes = boxes[index, :] if cfg.TEST.HAS_RPN: im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) # forward pass if cfg.TEST.HAS_RPN: feed_dict = { net.data: blobs['data'], net.im_info: blobs['im_info'], net.keep_prob: 1.0 } else: feed_dict = { net.data: blobs['data'], net.rois: blobs['rois'], net.keep_prob: 1.0 } run_options = None run_metadata = None if cfg.TEST.DEBUG_TIMELINE: run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() cls_score, cls_prob, bbox_pred, rois = sess.run([ net.get_output('cls_score'), net.get_output('cls_prob'), net.get_output('bbox_pred'), net.get_output('rois') ], feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) if cfg.TEST.HAS_RPN: assert len(im_scales) == 1, "Only single-image batch implemented" boxes = rois[:, 1:5] / im_scales[0] if cfg.TEST.SVM: # use the raw scores before softmax under the assumption they # were trained as linear SVMs scores = cls_score else: # use softmax estimated probabilities scores = cls_prob if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = _clip_boxes(pred_boxes, im.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: # Map scores and predictions back to the original set of boxes scores = scores[inv_index, :] pred_boxes = pred_boxes[inv_index, :] if cfg.TEST.DEBUG_TIMELINE: trace = timeline.Timeline(step_stats=run_metadata.step_stats) trace_file = open( str(long(time.time() * 1000)) + '-test-timeline.ctf.json', 'w') trace_file.write(trace.generate_chrome_trace_format(show_memory=False)) trace_file.close() return scores, pred_boxes
def im_detect(net, im, _t, boxes=None): """Detect object classes in an image given object proposals. Arguments: net (caffe.Net): Fast R-CNN network to use im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals or None (for RPN) Returns: scores (ndarray): R x K array of object class scores (K includes background as object category 0) boxes (ndarray): R x (4*K) array of predicted bounding boxes """ _t['im_preproc'].tic() blobs, im_scales = _get_blobs(im, boxes) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. if cfg.TEST.HAS_RPN: im_blob = blobs['data'] blobs['im_info'] = np.array( [np.hstack((im_blob.shape[2], im_blob.shape[3], im_scales[0]))], dtype=np.float32) # reshape network inputs net.blobs['data'].reshape(*(blobs['data'].shape)) if cfg.TEST.HAS_RPN: net.blobs['im_info'].reshape(*(blobs['im_info'].shape)) # do forward net.blobs['data'].data[...] = blobs['data'] #forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)} if cfg.TEST.HAS_RPN: net.blobs['im_info'].data[...] = blobs['im_info'] #forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32, copy=False) _t['im_preproc'].toc() _t['im_net'].tic() blobs_out = net.forward() _t['im_net'].toc() #blobs_out = net.forward(**forward_kwargs) _t['im_postproc'].tic() if cfg.TEST.HAS_RPN: assert len(im_scales) == 1, "Only single-image batch implemented" rois = net.blobs['rois'].data.copy() ''' print('rois', rois.shape) print('blobs_out', blobs_out['cls_prob'].shape) exit(0) ''' # unscale back to raw image space boxes = rois[:, 1:5] / im_scales[0] # use softmax estimated probabilities scores = blobs_out['cls_prob'] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = blobs_out['bbox_pred'] pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, im.shape) part_boxes = [] for i in range(3): part_deltas = blobs_out['part' + str(i+1) + '_pred'] parts = bbox_transform_inv(boxes, part_deltas) parts = clip_boxes(parts, im.shape) part_boxes.append(parts) _t['im_postproc'].toc() return scores, pred_boxes, part_boxes
def im_detect(sess, net, im, boxes=None, save_vis_dir=None, img_name='', include_rpn_score=False): """Detect object classes in an image given object proposals. Arguments: net (caffe.Net): Fast R-CNN network to use im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals Returns: scores (ndarray): R x K array of object class scores (K includes background as object category 0) boxes (ndarray): R x (4*K) array of predicted bounding boxes """ blobs, im_scales = _get_blobs(im, boxes) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) blobs['rois'] = blobs['rois'][index, :] boxes = boxes[index, :] if cfg.TEST.HAS_RPN: im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) # forward pass if cfg.TEST.HAS_RPN: feed_dict = { net.data: blobs['data'], net.im_info: blobs['im_info'], net.keep_prob: 1.0 } else: feed_dict = { net.data: blobs['data'], net.rois: blobs['rois'], net.keep_prob: 1.0 } run_options = None run_metadata = None if cfg.TEST.DEBUG_TIMELINE: run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() #theta_tensor = tf.get_default_graph().get_tensor_by_name('spt_trans_theta') cls_score, cls_prob, bbox_pred, rois = sess.run([ net.get_output('cls_score'), net.get_output('cls_prob'), net.get_output('bbox_pred'), net.get_output('rois') ], feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) if (save_vis_dir is not None and os.path.exists(save_vis_dir)): # first get the weights out with tf.variable_scope('conv5_3', reuse=True) as scope: conv5_3_weights = tf.get_variable("weights") conv5_3_weights_np, conv5_3_features, st_pool_features =\ sess.run([conv5_3_weights, net.get_output('conv5_3'), net.get_output('pool_5')], feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) np.save(os.path.join(save_vis_dir, '%s_conv5_3_w.npy' % img_name), conv5_3_weights_np) np.save(os.path.join(save_vis_dir, '%s_conv5_3_f.npy' % img_name), conv5_3_features) np.save(os.path.join(save_vis_dir, '%s_st_pool_f.npy' % img_name), st_pool_features) if cfg.TEST.HAS_RPN: assert len(im_scales) == 1, "Only single-image batch implemented" boxes = rois[:, 1:5] / im_scales[0] if cfg.TEST.SVM: # use the raw scores before softmax under the assumption they # were trained as linear SVMs scores = cls_score else: # use softmax estimated probabilities scores = cls_prob if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred pred_boxes = bbox_transform_inv(boxes, box_deltas) #project_bbox_inv(pred_boxes, theta) # project spatially transformed box back pred_boxes = _clip_boxes(pred_boxes, im.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: # Map scores and predictions back to the original set of boxes scores = scores[inv_index, :] pred_boxes = pred_boxes[inv_index, :] if cfg.TEST.DEBUG_TIMELINE: trace = timeline.Timeline(step_stats=run_metadata.step_stats) trace_file = open( str(int(time.time() * 1000)) + '-test-timeline.ctf.json', 'w') trace_file.write(trace.generate_chrome_trace_format(show_memory=False)) trace_file.close() if (include_rpn_score): # score is a joint prob instead of conditional prob scores *= np.reshape(rois[:, 0], [-1, 1]) return scores, pred_boxes
samp_idx = random.sample(range(n_all_samples), min(n_samp, n_all_samples)) print 'sampled {} images.'.format(len(samp_idx)) body_boxes = np.vstack(roidb[i]['body_boxes'] for i in samp_idx) head_boxes = np.vstack(roidb[i]['head_boxes'] for i in samp_idx) print 'load {} boxes.'.format(len(body_boxes)) # valid valid_inds = where_valid(head_boxes, body_boxes) body_boxes = body_boxes[valid_inds] head_boxes = head_boxes[valid_inds] # transform trans_params = bbox_transform(body_boxes, head_boxes) print '> trans_params:\n', trans_params trans_params = trans_params.mean(axis=0) print '> mean:\n', trans_params # show transformed example show_idx = 30 body_boxes = roidb[show_idx]['body_boxes'] head_trans_boxes = bbox_transform_inv(body_boxes, np.tile(trans_params, (len(body_boxes), 1))) image_path = imdb.image_path_at(show_idx) im = cv2.imread(image_path)[:, :, [2, 1, 0]] plt.imshow(im) ax = plt.gca() show_box(ax, body_boxes, head_trans_boxes) plt.title(image_path) plt.show()
def forward(self, bottom, top): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) assert bottom[0].data.shape[0] == 1, \ 'Only single item batches are supported' cfg_key = 'TEST' # str(self.phase) # either 'TRAIN' or 'TEST' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = bottom[0].data[:, self._num_anchors:, :, :] bbox_deltas = bottom[1].data im_info = bottom[2].data[0, :] if DEBUG: print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) print 'scale: {}'.format(im_info[2]) # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] if DEBUG: print 'score map size: {}'.format(scores.shape) # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = self._anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) top[0].reshape(*(blob.shape)) top[0].data[...] = blob # [Optional] output scores blob if len(top) > 1: top[1].reshape(*(scores.shape)) top[1].data[...] = scores
def im_detect(feature_net, embed_net, recurrent_net, im, boxes=None, use_box_at=-1): """Detect object classes in an image given object proposals. Arguments: feature_net (caffe.Net): CNN model for extracting features embed_net (caffe.Net): A word embedding layer recurrent_net (caffe.Net): Recurrent model for generating captions and locations im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals or None (for RPN) use_box_at (int32): Use predicted box at a given timestep, default to the last one (use_box_at=-1) Returns: scores (ndarray): R x 1 array of object class scores pred_boxes (ndarray)): R x 4 array of predicted bounding boxes captions (list): length R list of list of word tokens (captions) """ # for bbox unnormalization bbox_mean = np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS).reshape((1, 4)) bbox_stds = np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS).reshape((1, 4)) blobs, im_scales = _get_blobs(im, boxes) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]], dtype=np.float32) # reshape network inputs feature_net.blobs['data'].reshape(*(blobs['data'].shape)) feature_net.blobs['im_info'].reshape(*(blobs['im_info'].shape)) feature_net.forward(data=im_blob, im_info=blobs['im_info']) region_features = feature_net.blobs['region_features'].data.copy() rois = feature_net.blobs['rois'].data.copy() # detection scores scores = feature_net.blobs['cls_probs'].data[:, 1].copy() # proposal boxes boxes = rois[:, 1:5] / im_scales[0] proposal_n = rois.shape[0] feat_args = {'input_features': region_features} opt_args = {} # global feature as an optional input: context if 'global_features' in feature_net.blobs and 'global_features' in recurrent_net.blobs: #changed according to the global feature shape opt_args['global_features'] = np.tile( feature_net.blobs['global_features'].data, (1, proposal_n, 1)) bbox_pred_direct = ('bbox_pred' in feature_net.blobs) if bbox_pred_direct: # do greedy search captions, _, logprobs = _greedy_search(embed_net, recurrent_net, feat_args, opt_args, proposal_n, pred_bbox=False) #bbox target unnormalization box_offsets = feature_net.blobs['bbox_pred'].data else: captions, box_offsets, logprobs = _greedy_search(embed_net, recurrent_net, feat_args, opt_args, proposal_n, \ pred_bbox = True, use_box_at = use_box_at) #bbox target unnormalization box_deltas = box_offsets * bbox_stds + bbox_mean #do the transformation pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, im.shape) return scores, pred_boxes, captions
def im_detect(sess, net, im, boxes=None): """Detect object classes in an image given object proposals. Arguments: net (caffe.Net): Fast R-CNN network to use im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals Returns: scores (ndarray): R x K array of object class scores (K includes background as object category 0) boxes (ndarray): R x (4*K) array of predicted bounding boxes """ blobs, im_scales = _get_blobs(im, boxes) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) blobs['rois'] = blobs['rois'][index, :] boxes = boxes[index, :] if cfg.TEST.HAS_RPN: im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) # forward pass if cfg.TEST.HAS_RPN: feed_dict={net.data: blobs['data'], net.im_info: blobs['im_info'], net.keep_prob: 1.0} else: feed_dict={net.data: blobs['data'], net.rois: blobs['rois'], net.keep_prob: 1.0} run_options = None run_metadata = None if cfg.TEST.DEBUG_TIMELINE: run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() cls_score, cls_prob, bbox_pred, rois = sess.run([net.get_output('cls_score'), net.get_output('cls_prob'), net.get_output('bbox_pred'),net.get_output('rois')], feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) if cfg.TEST.HAS_RPN: assert len(im_scales) == 1, "Only single-image batch implemented" boxes = rois[:, 1:5] / im_scales[0] if cfg.TEST.SVM: # use the raw scores before softmax under the assumption they # were trained as linear SVMs scores = cls_score else: # use softmax estimated probabilities scores = cls_prob if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = _clip_boxes(pred_boxes, im.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: # Map scores and predictions back to the original set of boxes scores = scores[inv_index, :] pred_boxes = pred_boxes[inv_index, :] if cfg.TEST.DEBUG_TIMELINE: trace = timeline.Timeline(step_stats=run_metadata.step_stats) trace_file = open(str(long(time.time() * 1000)) + '-test-timeline.ctf.json', 'w') trace_file.write(trace.generate_chrome_trace_format(show_memory=False)) trace_file.close() return scores, pred_boxes
def im_detect(sess, net, inputs, im, boxes, bbox_reg, multi_iter): blobs, im_scales = _get_blobs(im, boxes) im_orig = im.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS im_shape = im_orig.shape im_size_min = np.min(im_shape[0:2]) relations = [] num_box_per_image = int(boxes.shape[0] / 10) for act_i in range(10): curr_box = boxes[act_i * num_box_per_image:(act_i + 1) * num_box_per_image] for i in range(num_box_per_image): for j in range(num_box_per_image): # if i != j: # and i<j x1 = curr_box[i][0] y1 = curr_box[i][1] x2 = curr_box[j][0] y2 = curr_box[j][1] if i < j and math.sqrt( (x1 - x2)**2 + (y1 - y2)**2 ) < im_size_min / 2 and i != num_box_per_image - 1 and j != num_box_per_image - 1: relations.append([ i + act_i * num_box_per_image, j + act_i * num_box_per_image ]) if len(relations) == 0: relations.append([0, 1]) print(len(relations)) relations = np.array(relations, dtype=np.int32) # all possible combinations spa_relations = relations.copy() num_roi = blobs['rois'].shape[0] num_rel = relations.shape[0] num_spa_rel = spa_relations.shape[0] inputs_feed = data_utils.create_graph_data(num_roi, num_rel, relations) feed_dict = { inputs['ims']: blobs['data'], inputs['rois']: blobs['rois'], inputs['relations']: relations, net.keep_prob: 1 } for k in inputs_feed: feed_dict[inputs[k]] = inputs_feed[k] # compute relation rois feed_dict[inputs['rel_rois']] = \ data_utils.compute_rel_rois(num_spa_rel, blobs['rois'], spa_relations) ops = {} # ops['bbox_deltas'] = net.bbox_pred_output(multi_iter) ops['rel_probs'] = net.rel_pred_output(multi_iter) ops['cls_probs'] = net.cls_pred_output(multi_iter) ops['vert'] = net.getver() ops_value = sess.run(ops, feed_dict=feed_dict) out_dict = {} for mi in multi_iter: rel_probs = None rel_probs_flat = ops_value['rel_probs'][mi] rel_probs = np.zeros([num_roi, num_roi, rel_probs_flat.shape[1]]) for i, rel in enumerate(relations): rel_probs[rel[0], rel[1], :] = rel_probs_flat[i, :] cls_probs = ops_value['cls_probs'][mi] if bbox_reg: # Apply bounding-box regression deltas pred_boxes = bbox_transform_inv(boxes, ops_value['bbox_deltas'][mi]) pred_boxes = clip_boxes(pred_boxes, im.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, cls_probs.shape[1])) out_dict[mi] = { 'scores': cls_probs.copy(), 'boxes': pred_boxes.copy(), 'relations': rel_probs.copy(), 'vert': ops_value['vert'].copy() } return out_dict
def forward(self, bottom, top): cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = self._min_sizes # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want im_info = bottom[0].data[0, :] batch_size = bottom[1].data.shape[0] if batch_size > 1: raise ValueError("Sorry, multiple images each device is not implemented") cls_prob_dict = { 'stride64': bottom[10].data, 'stride32': bottom[9].data, 'stride16': bottom[8].data, 'stride8': bottom[7].data, 'stride4': bottom[6].data, } bbox_pred_dict = { 'stride64': bottom[5].data, 'stride32': bottom[4].data, 'stride16': bottom[3].data, 'stride8': bottom[2].data, 'stride4': bottom[1].data, } proposal_list = [] score_list = [] for s in self._feat_stride: stride = int(s) sub_anchors = generate_anchors(base_size=stride, scales=self._scales, ratios=self._ratios) scores = cls_prob_dict['stride' + str(s)][:, self._num_anchors:, :, :] bbox_deltas = bbox_pred_dict['stride' + str(s)] # 1. Generate proposals from bbox_deltas and shifted anchors # use real image size instead of padded feature map sizes height, width = int(im_info[0] / stride), int(im_info[1] / stride) # Enumerate all shifts shift_x = np.arange(0, width) * stride shift_y = np.arange(0, height) * stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = sub_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = _clip_pad(bbox_deltas, (height, width)) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = _clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] proposal_list.append(proposals) score_list.append(scores) proposals = np.vstack(proposal_list) scores = np.vstack(score_list) # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) det = np.hstack((proposals, scores)).astype(np.float32) keep = nms(det,nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] # pad to ensure output size remains unchanged if len(keep) < post_nms_topN: pad = npr.choice(keep, size=post_nms_topN - len(keep)) keep = np.hstack((keep, pad)) # pad to ensure output size remains unchanged if len(keep) < post_nms_topN: try: pad = npr.choice(keep, size=post_nms_topN - len(keep)) except: proposals = np.zeros((post_nms_topN, 4), dtype=np.float32) proposals[:,2] = 16 proposals[:,3] = 16 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) top[0].reshape(*(blob.shape)) top[0].data[...] = blob return keep = np.hstack((keep, pad)) proposals = proposals[keep, :] scores = scores[keep] # Output rois array # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) # if is_train: top[0].reshape(*(blob.shape)) top[0].data[...] = blob
def im_detect(net, im, boxes=None): """Detect object classes in an image given object proposals. Arguments: net (caffe.Net): Fast R-CNN network to use im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals or None (for RPN) Returns: scores (ndarray): R x K array of object class scores (K includes background as object category 0) boxes (ndarray): R x (4*K) array of predicted bounding boxes """ blobs, im_scales = _get_blobs(im, boxes) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) blobs['rois'] = blobs['rois'][index, :] boxes = boxes[index, :] if cfg.TEST.HAS_RPN: im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]], dtype=np.float32) # reshape network inputs net.blobs['data'].reshape(*(blobs['data'].shape)) if cfg.TEST.HAS_RPN: net.blobs['im_info'].reshape(*(blobs['im_info'].shape)) else: net.blobs['rois'].reshape(*(blobs['rois'].shape)) # do forward forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)} if cfg.TEST.HAS_RPN: forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32, copy=False) else: forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False) blobs_out = net.forward(**forward_kwargs) if cfg.TEST.HAS_RPN: assert len(im_scales) == 1, "Only single-image batch implemented" rois = net.blobs['rois'].data.copy() # unscale back to raw image space boxes = rois[:, 1:5] / im_scales[0] if cfg.TEST.SVM: # use the raw scores before softmax under the assumption they # were trained as linear SVMs scores = net.blobs['cls_score'].data else: # use softmax estimated probabilities scores = blobs_out['cls_prob'] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = blobs_out['bbox_pred'] pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, im.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: # Map scores and predictions back to the original set of boxes scores = scores[inv_index, :] pred_boxes = pred_boxes[inv_index, :] ########################### used for DEBUG, ADDED BY LJ ######################## for blob_name, blob in net.blobs.iteritems(): print blob_name + '\t' + str(blob.data.shape) for para_name, para in net.params.iteritems(): print para_name + '\t' + str(para[0].data.shape) + str( para[1].data.shape) def vis_square(data, show_type): ''' Take an array of shape(n,height,width)('gray') and visualize each(height,width)thing in a grid of size approx.sqrt(n) by sqrt(n) ''' import matplotlib.pyplot as plt #normalize data for display data = (data - data.min()) / (data.max() - data.min()) #force the number of filters to be square n = int(np.ceil(np.sqrt(data.shape[0]))) if show_type == 'feature': padding = ((0, n**2 - data.shape[0]), (0, 5), (0, 5)) else: padding = ((0, n**2 - data.shape[0]), (0, 1), (0, 1)) data = np.pad(data, padding, mode='constant', constant_values=0) data = data.reshape((n, n) + data.shape[1:]).transpose((0, 2, 1, 3)) data = data.reshape((n * data.shape[1], n * data.shape[3])) plt.imshow(data) plt.axis('off') for blob_name, blob in net.blobs.iteritems(): print blob_name + '\t' + str(blob.data.shape) try: shape_val = blob.data.shape vis_square(blob.data.reshape(shape_val[0] * shape_val[1], shape_val[2], shape_val[3])[:64], show_type='feature') except: pass for para_name, para in net.params.iteritems(): print para_name + '\t' + str(para[0].data.shape) + str( para[1].data.shape) try: shape_val = para[0].data.shape vis_square(para[0].data.reshape(shape_val[0] * shape_val[1], shape_val[2], shape_val[3])[:256], show_type='params') except: pass ################################################################################# return scores, pred_boxes
def im_detect_array(net, imgs): """Detect object classes in an image given object proposals. Arguments: net (caffe.Net): Fast R-CNN network to use im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals or None (for RPN) Returns: scores (ndarray): R x K array of object class scores (K includes background as object category 0) boxes (ndarray): R x (4*K) array of predicted bounding boxes """ blobs, im_scales = _get_blobs(imgs) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) blobs['rois'] = blobs['rois'][index, :] boxes = boxes[index, :] if cfg.TEST.HAS_RPN: im_blob = blobs['data'] images = [] for i in xrange(blobs['data'].shape[0]): images.append(np.array( [[im_blob.shape[2], im_blob.shape[3], im_scales[i]]], dtype=np.float32)) blobs['im_info'] = np.array(images) # reshape network inputs net.blobs['data'].reshape(*(blobs['data'].shape)) if cfg.TEST.HAS_RPN: net.blobs['im_info'].reshape(*(blobs['im_info'].shape)) else: net.blobs['rois'].reshape(*(blobs['rois'].shape)) # do forward forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)} if cfg.TEST.HAS_RPN: forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32, copy=False) else: forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False) blobs_out = net.forward(**forward_kwargs) deviders = [] boxes = [] if cfg.TEST.HAS_RPN: assert len(im_scales) == len(imgs), "Only one scale per image implemented" rois = net.blobs['rois'].data.copy() for idx in xrange(im_scales.shape[0] - 1): deviders.append(np.searchsorted(rois[:, 0], idx + 1, 'left')) boxes = np.split(rois[:, 1:5], deviders) # unscale back to raw image space for idx in xrange(len(imgs)): boxes[idx] = boxes[idx] / im_scales[0] if cfg.TEST.SVM: # use the raw scores before softmax under the assumption they # were trained as linear SVMs scores = np.split(net.blobs['cls_score'].data, deviders) else: # use softmax estimated probabilities scores = np.split(blobs_out['cls_prob'], deviders) if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = np.split(blobs_out['bbox_pred'], deviders) pred_boxes = [] for idx in xrange(im_scales.shape[0]): pred_boxes.append(bbox_transform_inv(boxes[idx], box_deltas[idx])) pred_boxes[idx] = clip_boxes(pred_boxes[idx], imgs[idx].shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: # Map scores and predictions back to the original set of boxes scores = scores[inv_index, :] pred_boxes = pred_boxes[inv_index, :] return scores, pred_boxes
def forward(self, bottom, top): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) assert bottom[0].data.shape[0] == 1, \ 'Only single item batches are supported' cfg_key = str('TRAIN' if self.phase == 0 else 'TEST') # either 'TRAIN' or 'TEST' if cfg.TRAIN.FrozenTraing: cfg_key = 'TEST' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE if cfg.TRAIN.MULTI_SCALE_RPN == 1: rpn_proposals = [] rpn_scores = [] for i, ANCHOR_SCALES in enumerate(cfg.TRAIN.MULTI_SCALE_RPN_SCALE): pre_nms_topN = cfg[cfg_key].MULTI_SCALE_RPN_PRE_NMS_TOP_Ns[i] post_nms_topN = cfg[cfg_key].MULTI_SCALE_RPN_POST_NMS_TOP_Ns[i] # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = bottom[0+i*2].data[:, self._num_anchors[i]:, :, :] bbox_deltas = bottom[1+i*2].data im_info = bottom[-1].data[0, :] # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride[i] shift_y = np.arange(0, height) * self._feat_stride[i] shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors[i] K = shifts.shape[0] anchors = self._anchors[i].reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) nms_keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: nms_keep = nms_keep[:post_nms_topN] proposals = proposals[nms_keep, :] scores = scores[nms_keep] rpn_proposals.append(proposals) rpn_scores.append(scores) # concat several groups of proposals from other rpn maps # concat all proposals proposals = np.vstack(rpn_proposals) scores = np.vstack(rpn_scores) # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) # print blob.shape top[0].reshape(*(blob.shape)) top[0].data[...] = blob if cfg.TRAIN.RPN_KP_REGRESSION: # timer = Timer() # timer.tic() keyPoint_deltas = bottom[3].data keyPoints_num = cfg.TRAIN.ATTRIBUTES[0]['gt_keyPoints'] keyPoint_deltas = keyPoint_deltas.transpose((0, 2, 3, 1)).reshape((-1, keyPoints_num)) # m 1 # keyPoint_proposals = keyPoints_transform_inv(anchors, keyPoint_deltas) # keyPoint_proposals = keyPoint_proposals[keep, :] # keyPoint_proposals = keyPoint_proposals[order, :] # keyPoint_proposals = keyPoint_proposals[nms_keep, :] # m2 anchors_t = anchors[keep, :] anchors_t = anchors_t[order, :] anchors_t = anchors_t[nms_keep, :] keyPoint_deltas_t = keyPoint_deltas[keep, :] keyPoint_deltas_t = keyPoint_deltas_t[order, :] keyPoint_deltas_t = keyPoint_deltas_t[nms_keep, :] keyPoint_proposals = keyPoints_transform_inv(anchors_t, keyPoint_deltas_t) blob = np.hstack((batch_inds, keyPoint_proposals.astype(np.float32, copy=False))) # print blob.shape top[1].reshape(*(blob.shape)) top[1].data[...] = blob # [Optional] output scores blob if len(top) > 2: top[2].reshape(*(scores.shape)) top[2].data[...] = scores # timer.toc() # print ('proposal took {:.3f}s').format(timer.total_time) else: # [Optional] output scores blob if len(top) > 1: top[1].reshape(*(scores.shape)) top[1].data[...] = scores else: # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = bottom[0].data[:, self._num_anchors:, :, :] bbox_deltas = bottom[1].data im_info = bottom[2].data[0, :] if DEBUG: print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) print 'scale: {}'.format(im_info[2]) # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] if DEBUG: print 'score map size: {}'.format(scores.shape) # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = self._anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] if len(bottom) > 30: # forward anchor from con4_3 and combine all anchor # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores_from_conv4_3 = bottom[3].data[:, self._num_anchors_from_conv4_3:, :, :] bbox_deltas_from_conv4_3 = bottom[4].data im_info = bottom[2].data[0, :] if DEBUG: print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) print 'scale: {}'.format(im_info[2]) # 1. Generate proposals from bbox deltas and shifted anchors height_from_conv4_3, width_from_conv4_3 = scores_from_conv4_3.shape[-2:] if DEBUG: print 'score map size: {}'.format(scores_from_conv4_3.shape) # Enumerate all shifts shift_x = np.arange(0, width_from_conv4_3) * self._feat_stride_from_conv4_3 shift_y = np.arange(0, height_from_conv4_3) * self._feat_stride_from_conv4_3 shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts_from_conv4_3 = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors_from_conv4_3 K = shifts_from_conv4_3.shape[0] anchors_from_conv4_3 = self._anchors_from_conv4_3.reshape((1, A, 4)) + \ shifts_from_conv4_3.reshape((1, K, 4)).transpose((1, 0, 2)) anchors_from_conv4_3 = anchors_from_conv4_3.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas_from_conv4_3 = bbox_deltas_from_conv4_3.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores_from_conv4_3 = scores_from_conv4_3.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals_from_conv4_3 = bbox_transform_inv(anchors_from_conv4_3, bbox_deltas_from_conv4_3) # 2. clip predicted boxes to image proposals_from_conv4_3 = clip_boxes(proposals_from_conv4_3, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals_from_conv4_3, min_size * im_info[2]) proposals_from_conv4_3 = proposals_from_conv4_3[keep, :] scores_from_conv4_3 = scores_from_conv4_3[keep] # 3.1 combine all anchor from conv4_3 and conv5_3 # strategy 1 # proposals = np.vstack((proposals, proposals_from_conv4_3)) # scores = np.vstack((scores, scores_from_conv4_3)) # strategy 2 # sort all anchors from conv5_3 and conv4_3 and use nmx before combine them # order = scores.ravel().argsort()[::-1] # if pre_nms_topN > 0: # order = order[:pre_nms_topN] # proposals = proposals[order, :] # scores = scores[order] # keep = nms(np.hstack((proposals, scores)), nms_thresh) # if post_nms_topN > 0: # keep = keep[:post_nms_topN] # proposals = proposals[keep, :] # scores = scores[keep] # # order = scores_from_conv4_3.ravel().argsort()[::-1] # if pre_nms_topN > 0: # order = order[:pre_nms_topN] # proposals_from_conv4_3 = proposals_from_conv4_3[order, :] # scores_from_conv4_3 = scores_from_conv4_3[order] # keep = nms(np.hstack((proposals_from_conv4_3, scores_from_conv4_3)), nms_thresh) # if post_nms_topN > 0: # keep = keep[:post_nms_topN] # proposals_from_conv4_3 = proposals_from_conv4_3[keep, :] # scores_from_conv4_3 = scores_from_conv4_3[keep] # # proposals = np.vstack((proposals, proposals_from_conv4_3)) # scores = np.vstack((scores, scores_from_conv4_3)) # strategy 3 # proposals = proposals_from_conv4_3 # scores = scores_from_conv4_3 # ------------------------------ # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) nms_keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: nms_keep = nms_keep[:post_nms_topN] proposals = proposals[nms_keep, :] scores = scores[nms_keep] # if cfg.RPN_FILTER: # scores_i = np.where(scores[:, 0] > cfg.RPN_FILTER_thresh) # if len(scores_i[0]) == 0: # proposals = proposals[:5] # scores = scores[:5] # else: # proposals = proposals[scores_i] # scores = scores[scores_i] # areas = (proposals[:,2]-proposals[:,0])*(proposals[:,3]-proposals[:,1]) # argmax_area_i = np.argmax(areas) # proposals = proposals[argmax_area_i, np.newaxis] # scores = scores[argmax_area_i, np.newaxis] # concat several groups of proposals from other rpn maps if cfg.RPN_PYRAMID_MORE: RPN_PYRAMID_NUM = cfg.RPN_PYRAMID_NUM for j in range(1, RPN_PYRAMID_NUM): # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores_extend = bottom[1+2*j].data[:, self._num_anchors:, :, :] bbox_deltas = bottom[2+2*j].data if DEBUG: print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) print 'scale: {}'.format(im_info[2]) # 1. Generate proposals from bbox deltas and shifted anchors height_extend, width_extend = scores_extend.shape[-2:] if DEBUG: print 'score map size: {}'.format(scores_extend.shape) # Enumerate all shifts shift_x = np.arange(0, width_extend) * self._feat_stride shift_y = np.arange(0, height_extend) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = self._anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores_extend = scores_extend.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals_extend = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals_extend = clip_boxes(proposals_extend, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals_extend, min_size * im_info[2]) proposals_extend = proposals_extend[keep, :] scores_extend = scores_extend[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores_extend.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals_extend = proposals_extend[order, :] scores_extend = scores_extend[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals_extend, scores_extend)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals_extend = proposals_extend[keep, :] scores_extend = scores_extend[keep] # 9 concat all proposals proposals = np.vstack((proposals, proposals_extend)) scores = np.vstack((scores, scores_extend)) # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) # print blob.shape top[0].reshape(*(blob.shape)) top[0].data[...] = blob if cfg.TRAIN.RPN_KP_REGRESSION: # timer = Timer() # timer.tic() keyPoint_deltas = bottom[3].data keyPoints_num = cfg.TRAIN.ATTRIBUTES[0]['gt_keyPoints'] keyPoint_deltas = keyPoint_deltas.transpose((0, 2, 3, 1)).reshape((-1, keyPoints_num)) # m 1 # keyPoint_proposals = keyPoints_transform_inv(anchors, keyPoint_deltas) # keyPoint_proposals = keyPoint_proposals[keep, :] # keyPoint_proposals = keyPoint_proposals[order, :] # keyPoint_proposals = keyPoint_proposals[nms_keep, :] # m2 anchors_t = anchors[keep, :] anchors_t = anchors_t[order, :] anchors_t = anchors_t[nms_keep, :] keyPoint_deltas_t = keyPoint_deltas[keep, :] keyPoint_deltas_t = keyPoint_deltas_t[order, :] keyPoint_deltas_t = keyPoint_deltas_t[nms_keep, :] keyPoint_proposals = keyPoints_transform_inv(anchors_t, keyPoint_deltas_t) blob = np.hstack((batch_inds, keyPoint_proposals.astype(np.float32, copy=False))) # print blob.shape top[1].reshape(*(blob.shape)) top[1].data[...] = blob # [Optional] output scores blob if len(top) > 2: top[2].reshape(*(scores.shape)) top[2].data[...] = scores # timer.toc() # print ('proposal took {:.3f}s').format(timer.total_time) else: # [Optional] output scores blob if len(top) > 1: top[1].reshape(*(scores.shape)) top[1].data[...] = scores if cfg.PYRAMID_MORE: PYRAMID_NUM = len(cfg.PYRAMID_MORE_ANCHORS) for i in range(1, PYRAMID_NUM): # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = bottom[2+2*i-1].data[:, self._num_anchors_from_extends[i-1]:, :, :] bbox_deltas = bottom[2+2*i].data # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] if DEBUG: print 'score map size: {}'.format(scores.shape) # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride_from_extends[i-1] shift_y = np.arange(0, height) * self._feat_stride_from_extends[i-1] shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors_from_extends[i-1] K = shifts.shape[0] anchors = self._anchors_from_extends[i-1].reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) # print blob.shape top[i].reshape(*(blob.shape)) top[i].data[...] = blob
def forward(self, bottom, top): #conv5_3 = np.copy(bottom[0].data) assert(bottom[0].data.shape[0] == 1) box_deltas = np.copy(bottom[1].data) scores = np.copy(bottom[2].data) gt_mask_fg = np.copy(bottom[3].data) #print(np.mean(gt_mask_fg)) onlyface = np.copy(bottom[0].data) rois = np.copy(bottom[4].data) im_info = np.copy(bottom[5].data) boxes = rois[:, 1:5] pred_boxes = bbox_transform_inv(boxes, box_deltas) # boxes = clip_boxes(pred_boxes, gt_mask_fg[0,0,:,:].shape[::-1]) boxes = clip_boxes(pred_boxes, (int(im_info[0][0]),int(im_info[0][1]))) if np.all(np.unique(gt_mask_fg) == 1): ## masks for imges other than occlude are set ones onlyface = np.zeros(onlyface.shape) gt_mask_fg = np.zeros(gt_mask_fg.shape) #print(np.sum(gt_mask_fg)) else: #print('nonzero input !!!') CONF_THRESH = 0.6 NMS_THRESH = 0.25 zoom = 16 #find face areas cls_ind = 1 cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] keep = np.where(dets[:, 4] > CONF_THRESH) dets = dets[keep] #shape(n,5) n means n predictes boxes, 5 includes top left and bottom right coords and a score #enlarge boxes # dets[:,:4] *= 1.1 # print(dets) # print(dets.shape) # print(bottom[3].data.shape) # print(bottom[0].data.shape) #generate a mask for gt mask mask4gt = np.zeros(bottom[3].data.shape) for each in dets: mask4gt[:,:,each[1]:each[3]+1,each[0]:each[2]+1] = 1 # gt_mask_fg *= mask4gt # map to conv5_3 dets[:,:4] //= zoom #generate a mask for conv5_3 mask4conv = np.zeros(bottom[0].data.shape) for each in dets: mask4conv[:,:,each[1]:each[3]+1,each[0]:each[2]+1] = 1 # onlyface *= mask4conv self.onlyface_mask = mask4conv # print(np.sum(onlyface)) top_ind = self._name_to_top_map['onlyface'] top[top_ind].reshape(*(onlyface.shape)) top[top_ind].data[...] = onlyface.astype(np.float32, copy=False) top_ind = self._name_to_top_map['gt_mask_fg'] top[top_ind].reshape(*(gt_mask_fg.shape)) top[top_ind].data[...] = gt_mask_fg.astype(np.float32, copy=False)
def forward(self, bottom, top): #conv5_3 = np.copy(bottom[0].data) assert(bottom[0].data.shape[0] == 1) box_deltas = np.copy(bottom[1].data) scores = np.copy(bottom[2].data) onlyface = np.copy(bottom[0].data) rois = np.copy(bottom[3].data) im_info = np.copy(bottom[4].data) #print('layer rois: ',rois) boxes = rois[:, 1:5] pred_boxes = bbox_transform_inv(boxes, box_deltas) # boxes = clip_boxes(pred_boxes, gt_mask_fg[0,0,:,:].shape[::-1]) boxes = clip_boxes(pred_boxes, (int(im_info[0][0]),int(im_info[0][1]))) #print('im_info',(int(im_info[0][0]),int(im_info[0][1]),int(im_info[0][2]))) CONF_THRESH = 0.65 NMS_THRESH = 0.15 zoom = 16 # print('layerbox:', boxes) #find face areas cls_ind = 1 cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] keep = np.where(dets[:, 4] > CONF_THRESH) dets = dets[keep] #shape(n,5) n means n predictes boxes, 5 includes top left and bottom right coords and a score #enlarge boxes #print('dddets: ',dets) # dets[:,:4] *= 1 # print(dets) # print(dets.shape) # print(bottom[3].data.shape) # print(bottom[0].data.shape) #generate a mask for gt mask # mask4gt = np.zeros(bottom[3].data.shape) # for each in dets: # mask4gt[:,:,each[0]:each[2]+1,each[1]:each[3]+1] = 1 # gt_mask_fg *= mask4gt # map to conv5_3 dets[:,:4] //= zoom #print('conv53:', bottom[0].data.shape) #print('premask: ',dets.shape) #generate a mask for conv5_3 mask4conv = np.zeros(bottom[0].data.shape) for each in dets: mask4conv[:,:,each[1]:each[3]+1,each[0]:each[2]+1] = 1 # pickle.dump(mask4conv, open("vis.txt", "w")) onlyface *= mask4conv self.onlyface_mask = mask4conv # print(np.sum(onlyface)) top_ind = self._name_to_top_map['onlyface'] top[top_ind].reshape(*(onlyface.shape)) top[top_ind].data[...] = onlyface.astype(np.float32, copy=False)
def im_detect(net, im, boxes=None,num_classes=21): """Detect object classes in an image given object proposals. Arguments: net (caffe.Net): Fast R-CNN network to use im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals or None (for RPN) Returns: scores (ndarray): R x K array of object class scores (K includes background as object category 0) boxes (ndarray): R x (4*K) array of predicted bounding boxes """ blobs, im_scales = _get_blobs(im, boxes) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) blobs['rois'] = blobs['rois'][index, :] boxes = boxes[index, :] if cfg.TEST.HAS_RPN: im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]], dtype=np.float32) # reshape network inputs net.blobs['data'].reshape(*(blobs['data'].shape)) if cfg.TEST.HAS_RPN: net.blobs['im_info'].reshape(*(blobs['im_info'].shape)) else: net.blobs['rois'].reshape(*(blobs['rois'].shape)) # do forward forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)} if cfg.TEST.HAS_RPN: forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32, copy=False) else: forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False) blobs_out = net.forward(**forward_kwargs) if cfg.TEST.HAS_RPN: assert len(im_scales) == 1, "Only single-image batch implemented" rois = net.blobs['rois'].data.copy() # unscale back to raw image space boxes = rois[:, 1:5] index= np.where(np.sum(boxes,axis=1)!=0)[0] boxes = boxes[index,:] # / im_scales[0] if cfg.TEST.SVM: # use the raw scores before softmax under the assumption they # were trained as linear SVMs scores = net.blobs['cls_score'].data else: # use softmax estimated probabilities scores = blobs_out['cls_prob'] scores = scores[index] # print scores[0:10] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = blobs_out['bbox_pred'] box_deltas = box_deltas[index,:] if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: means = np.tile( np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (num_classes, 1)).ravel() stds = np.tile( np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (num_classes, 1)).ravel() # Optionally normalize targets by a precomputed mean and stdev box_deltas = box_deltas * stds + means # print boxes.shape,box_deltas.shape pred_boxes = bbox_transform_inv(boxes, box_deltas) s = (blobs['data'].astype(np.float32, copy=False).shape[2],blobs['data'].astype(np.float32, copy=False).shape[3],blobs['data'].astype(np.float32, copy=False).shape[1]) pred_boxes = clip_boxes(pred_boxes, s) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: # Map scores and predictions back to the original set of boxes scores = scores[inv_index, :] pred_boxes = pred_boxes[inv_index, :] vis = False if vis: vis_rois_detection(blobs['data'].astype(np.float32, copy=False),pred_boxes/ im_scales[0]) return scores, pred_boxes/ im_scales[0]
def forward(self, bottom, top): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE self._num_anchors = bottom[0].shape[0] / 2 # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = bottom[1].data[:, 1:, :, :] sio.savemat('scores',{'scores':scores}) bbox_deltas = bottom[2].data im_info = bottom[3].data[0, :] anchors = bottom[0].data # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas[0, :, :, 0] bbox_deltas = bbox_deltas.reshape((-1, 4)) #bbox_deltas = bbox_deltas.transpose(1, 0) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) top[0].reshape(*(blob.shape)) top[0].data[...] = blob # [Optional] output scores blob if len(top) > 1: top[1].reshape(*(scores.shape)) top[1].data[...] = scores