def forward(self, is_train, req, in_data, out_data, aux): bottom_rois = in_data[0].asnumpy() bbox_delta = in_data[1].asnumpy() cls_prob = in_data[2].asnumpy() im_info = in_data[3].asnumpy() num_rois = bottom_rois.shape[0] # 1. judge if bbox class-agnostic # 2. if not, calculate bbox_class_idx if self._bbox_class_agnostic: bbox_class_idx = np.ones((num_rois)) # (num_rois, 1) zeros else: bbox_class_idx = np.argmax(cls_prob[:, 1:], axis=1) + 1 bbox_class_idx = bbox_class_idx[:, np.newaxis] * 4 bbox_class_idx = np.hstack((bbox_class_idx, bbox_class_idx + 1, bbox_class_idx + 2, bbox_class_idx + 3)) # 3. get bbox_pred given bbox_class_idx rows = np.arange(num_rois, dtype=np.intp) bbox_delta = bbox_delta[rows[:, np.newaxis], bbox_class_idx.astype(np.intp)] # 4. calculate bbox_delta by bbox_pred[i] * std[i] + mean[i] means = np.array(self._bbox_means) stds = np.array(self._bbox_stds) vx = bbox_delta[:, 0] * stds[0] + means[0] vy = bbox_delta[:, 1] * stds[1] + means[1] vw = bbox_delta[:, 2] * stds[2] + means[2] vh = bbox_delta[:, 3] * stds[3] + means[3] bbox_delta = np.hstack((vx[:, np.newaxis], vy[:, np.newaxis], vw[:, np.newaxis], vh[:, np.newaxis])) # 6. calculate top_rois by bbox_pred proposal = bbox_pred(bottom_rois[:, 1:], bbox_delta) # 7. clip boxes if self._b_clip_boxes: proposal = clip_boxes(proposal, im_info[0, :2]) output = bottom_rois output[:, 1:] = proposal for ind, val in enumerate([output]): self.assign(out_data[ind], req[ind], val)
def coco_results_one_category_kernel(data_pack): cat_id = data_pack['cat_id'] ann_type = data_pack['ann_type'] binary_thresh = data_pack['binary_thresh'] all_im_info = data_pack['all_im_info'] boxes = data_pack['boxes'] if ann_type == 'bbox': masks = [] elif ann_type == 'segm': masks = data_pack['masks'] else: print('unimplemented ann_type: ' + ann_type) cat_results = [] for im_ind, im_info in enumerate(all_im_info): index = im_info['index'] dets = boxes[im_ind].astype(np.float) if len(dets) == 0: continue scores = dets[:, -1] if ann_type == 'bbox': xs = dets[:, 0] ys = dets[:, 1] ws = dets[:, 2] - xs + 1 hs = dets[:, 3] - ys + 1 result = [{ 'image_id': index, 'category_id': cat_id, 'bbox': [xs[k], ys[k], ws[k], hs[k]], 'score': scores[k] } for k in range(dets.shape[0])] elif ann_type == 'segm': width = im_info['width'] height = im_info['height'] dets[:, :4] = clip_boxes(dets[:, :4], [height, width]) mask_encode = mask_voc2coco(masks[im_ind], dets[:, :4], height, width, binary_thresh) result = [{ 'image_id': index, 'category_id': cat_id, 'segmentation': mask_encode[k], 'score': scores[k] } for k in range(len(mask_encode))] cat_results.extend(result) return cat_results
def get_image(roidb, config): """ preprocess image and return processed roidb :param roidb: a list of roidb :return: list of img as in mxnet format roidb add new item['im_info'] 0 --- x (width, second dim of im) | y (height, first dim of im) """ num_images = len(roidb) processed_ims = [] processed_roidb = [] for i in range(num_images): roi_rec = roidb[i] assert os.path.exists(roi_rec['image']), '%s does not exist'.format( roi_rec['image']) im = cv2.imread(roi_rec['image'], cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if roidb[i]['flipped']: im = im[:, ::-1, :] new_rec = roi_rec.copy() scale_ind = random.randrange(len(config.SCALES)) target_size = config.SCALES[scale_ind][0] max_size = config.SCALES[scale_ind][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) processed_ims.append(im_tensor) im_info = [im_tensor.shape[2], im_tensor.shape[3], im_scale] new_rec['boxes'] = clip_boxes( np.round(roi_rec['boxes'].copy() * im_scale), im_info[:2]) new_rec['im_info'] = im_info processed_roidb.append(new_rec) return processed_ims, processed_roidb
def main(): # get symbol ctx_id = [int(i) for i in config.gpus.split(',')] # ctx_id = [mx.cpu()] print(ctx_id) pprint.pprint(config) sym_instance = eval(config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # set up class names num_classes = 2 classes = ['nuclei'] # load demo data # image_root = '/home/daiab/machine_disk/work/kaggle_nuclei/data/stage1_test_images' image_root = '/home/daiab/machine_disk/work/kaggle_nuclei/data/LikeVOC/img' image_names = glob.glob(os.path.join(image_root, '*.png')) data = [] for im_name in image_names: im = cv2.imread(im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in range(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in range(len(data))] provide_label = [None for i in range(len(data))] arg_params, aux_params = load_param( '/home/daiab/machine_disk/projects/FCIS/' 'output/fcis/nuclei/nuclei_end2end/SDS_train/e2e', 2, process=True) data = data max_data_shape = max_data_shape provide_data = provide_data predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(ctx_id[0])], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) # warm up for i in range(2): data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0, provide_data=[[ (k, v.shape) for k, v in zip(data_names, data[0]) ]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in range(len(data_batch.data)) ] # print('-----------', data_batch) _, _, _, _ = im_detect(predictor, data_batch, data_names, scales, config) # test for idx, im_name in enumerate(image_names): data_batch = mx.io.DataBatch( data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in range(len(data_batch.data)) ] tic() scores, boxes, masks, data_dict = im_detect(predictor, data_batch, data_names, scales, config) im_shapes = [ data_batch.data[i][0].shape[2:4] for i in range(len(data_batch.data)) ] if not config.TEST.USE_MASK_MERGE: all_boxes = [[] for _ in range(num_classes)] all_masks = [[] for _ in range(num_classes)] nms = py_nms_wrapper(config.TEST.NMS) for j in range(1, num_classes): indexes = np.where(scores[0][:, j] > 0.7)[0] cls_scores = scores[0][indexes, j, np.newaxis] cls_masks = masks[0][indexes, 1, :, :] try: if config.CLASS_AGNOSTIC: cls_boxes = boxes[0][indexes, :] else: raise Exception() except: cls_boxes = boxes[0][indexes, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) all_boxes[j] = cls_dets[keep, :] all_masks[j] = cls_masks[keep, :] dets = [all_boxes[j] for j in range(1, num_classes)] masks = [all_masks[j] for j in range(1, num_classes)] else: masks = masks[0][:, 1:, :, :] im_height = np.round(im_shapes[0][0] / scales[0]).astype('int') im_width = np.round(im_shapes[0][1] / scales[0]).astype('int') print(im_height, im_width) boxes = clip_boxes(boxes[0], (im_height, im_width)) result_masks, result_dets = gpu_mask_voting( masks, boxes, scores[0], num_classes, 100, im_width, im_height, config.TEST.NMS, config.TEST.MASK_MERGE_THRESH, config.BINARY_THRESH, ctx_id[0]) dets = [result_dets[j] for j in range(1, num_classes)] masks = [ result_masks[j][:, 0, :, :] for j in range(1, num_classes) ] # print(dets) # print(masks) print('testing {} {:.4f}s'.format(im_name, toc())) # visualize for i in range(len(dets)): keep = np.where(dets[i][:, -1] > 0.7) dets[i] = dets[i][keep] masks[i] = masks[i][keep] im = cv2.imread(im_name) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) show_masks(im, dets, masks, classes, config) print('done')
def main(): # get symbol ctx_id = [int(i) for i in config.gpus.split(',')] # ctx_id = [mx.cpu()] print(ctx_id) pprint.pprint(config) sym_instance = eval(config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # set up class names num_classes = 81 classes = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] # load demo data image_names = ['COCO_test2015_000000000275.jpg', 'COCO_test2015_000000001412.jpg', 'COCO_test2015_000000073428.jpg', 'COCO_test2015_000000393281.jpg'] data = [] for im_name in image_names: assert os.path.exists(cur_path + '/../demo/' + im_name), ('%s does not exist'.format('../demo/' + im_name)) im = cv2.imread(cur_path + '/../demo/' + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in range(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in range(len(data))] provide_label = [None for i in range(len(data))] arg_params, aux_params = load_param(cur_path + '/../model/fcis_coco', 0, process=True) data = data max_data_shape = max_data_shape provide_data = provide_data predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(ctx_id[0])], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) # warm up for i in range(2): data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[0])]], provide_label=[None]) scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in range(len(data_batch.data))] # print('-----------', data_batch) _, _, _, _ = im_detect(predictor, data_batch, data_names, scales, config) # test for idx, im_name in enumerate(image_names): data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in range(len(data_batch.data))] tic() scores, boxes, masks, data_dict = im_detect(predictor, data_batch, data_names, scales, config) im_shapes = [data_batch.data[i][0].shape[2:4] for i in range(len(data_batch.data))] if not config.TEST.USE_MASK_MERGE: all_boxes = [[] for _ in range(num_classes)] all_masks = [[] for _ in range(num_classes)] nms = py_nms_wrapper(config.TEST.NMS) for j in range(1, num_classes): indexes = np.where(scores[0][:, j] > 0.7)[0] cls_scores = scores[0][indexes, j, np.newaxis] cls_masks = masks[0][indexes, 1, :, :] try: if config.CLASS_AGNOSTIC: cls_boxes = boxes[0][indexes, :] else: raise Exception() except: cls_boxes = boxes[0][indexes, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) all_boxes[j] = cls_dets[keep, :] all_masks[j] = cls_masks[keep, :] dets = [all_boxes[j] for j in range(1, num_classes)] masks = [all_masks[j] for j in range(1, num_classes)] else: print('=============') masks = masks[0][:, 1:, :, :] im_height = np.round(im_shapes[0][0] / scales[0]).astype('int') im_width = np.round(im_shapes[0][1] / scales[0]).astype('int') print(im_height, im_width) boxes = clip_boxes(boxes[0], (im_height, im_width)) result_masks, result_dets = gpu_mask_voting(masks, boxes, scores[0], num_classes, 100, im_width, im_height, config.TEST.NMS, config.TEST.MASK_MERGE_THRESH, config.BINARY_THRESH, ctx_id[0]) dets = [result_dets[j] for j in range(1, num_classes)] masks = [result_masks[j][:, 0, :, :] for j in range(1, num_classes)] # print(dets) # print(masks) print('testing {} {:.4f}s'.format(im_name, toc())) # visualize for i in range(len(dets)): keep = np.where(dets[i][:,-1]>0.7) dets[i] = dets[i][keep] masks[i] = masks[i][keep] im = cv2.imread(cur_path + '/../demo/' + im_name) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) show_masks(im, dets, masks, classes, config) print('done')
def forward(self, is_train, req, in_data, out_data, aux): nms = gpu_nms_wrapper(self._nms_thresh, in_data[0].context.device_id) batch_size = in_data[0].shape[0] if batch_size > 1: raise ValueError('Sorry, multiple images each device is not implemented') pre_nms_top_n = self._rpn_pre_nms_top_n post_nms_top_n = self._rpn_post_nms_top_n min_size = self._rpn_min_size # the first set of anchors are background probabilities # keep the second part scores = in_data[0].asnumpy()[:, self._num_anchors:, :, :] bbox_deltas = in_data[1].asnumpy() im_info = in_data[2].asnumpy()[0, :] height, width = int(im_info[0] / self._feat_stride), int(im_info[1] / self._feat_stride) # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = self._anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_pred(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = self._filter_boxes(proposals, min_size) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_top_n > 0: order = order[:pre_nms_top_n] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) det = np.hstack((proposals, scores)).astype(np.float32) keep = nms(det) if post_nms_top_n > 0: keep = keep[:post_nms_top_n] # pad to ensure output size remains unchanged if len(keep) < post_nms_top_n: pad = np.random.choice(keep, size=post_nms_top_n - len(keep)) keep = np.hstack((keep, pad)) proposals = proposals[keep, :] scores = scores[keep] # Output rois array # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) self.assign(out_data[0], req[0], blob) if self._output_score: self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False))
def forward(self,is_train, req, in_data, out_data, aux): batch_size = in_data[0].shape[0] if batch_size > 1: raise ValueError('sorry, only support single image') cls_prob_dict = { 'stride64': in_data[4], 'stride32': in_data[3], 'stride16': in_data[2], 'stride8': in_data[1], 'stride4': in_data[0], } bbox_pred_dict = { 'stride64': in_data[9], 'stride32': in_data[8], 'stride16': in_data[7], 'stride8': in_data[6], 'stride4': in_data[5], } #print(in_data[0]) pre_nms_topN = self._rpn_pre_nms_top_n post_nms_topN = self._rpn_post_nms_top_n min_size = self._rpn_min_size proposal_list = [] score_list = [] for idx, s in enumerate(self._feat_stride): stride = int(s) sub_anchors = generate_anchors(base_size = stride, scales = [self._scales[idx]],ratios = self._ratios) scores = cls_prob_dict['stride'+str(s)].asnumpy()[:,self._num_anchors:,:,:] bbox_deltas = bbox_pred_dict['stride'+str(s)].asnumpy() im_info = in_data[-1].asnumpy()[0,:] #step 1 height, width = int(im_info[0]/stride), int(im_info[1] / stride) shift_x = np.arange(width) * stride shift_y = np.arange(height) * stride shift_x, shift_y = np.meshgrid(shift_x,shift_y) shift = np.vstack([shift_x.ravel(),shift_y.ravel(),shift_x.ravel(),shift_y.ravel()]).transpose() #shift: K*4 anchors = (sub_anchors[None,:,:] + shift[:,None,:]).reshape((-1,4)) bbox_deltas = self._clip_pad(bbox_deltas, (height,width)) bbox_deltas = bbox_deltas.transpose((0,2,3,1)).reshape((-1,4)) scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0,2,3,1)).reshape((-1,1)) #(1,W*W*A,1) proposals = bbox_pred(anchors, bbox_deltas) #step 2 proposals = clip_boxes(proposals, im_info[:2]) #step 3 keep = self._filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep,:] scores = scores[keep] proposal_list.append(proposals) score_list.append(scores) proposals = np.vstack(proposal_list) scores = np.vstack(score_list) # step 4 order = scores.ravel().argsort()[::-1] #step 5 if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order,:] scores=scores[order] #step 6 det = np.hstack((proposals, scores)).astype(np.float32) keep = nms(det, self._threshold,in_data[0].context.device_id) #step 7 if post_nms_topN > 0: keep = keep[:post_nms_topN] #step 8 if len(keep) < post_nms_topN: pad = np.random.choice(keep, size=post_nms_topN - len(keep)) keep = np.hstack((keep,pad)) proposals = proposals[keep,:] scores = scores[keep] batch_inds = np.zeros((proposals.shape[0],1), dtype = np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32,copy = False))) self.assign(out_data[0], req[0], blob) if self._output_score: self.assign(out_data[1], req[1], scores.astype(np.float32, copy = False))
def _proposal_layer_py(rpn_bbox_cls_prob, rpn_bbox_pred, im_dims, cfg_key, _feat_stride, anchor_scales): ''' # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) ''' _anchors = generate_anchors( scales=np.array(anchor_scales)) #anchor_scales(8,16,32) _num_anchors = _anchors.shape[0] rpn_bbox_cls_prob = np.transpose(rpn_bbox_cls_prob, [0, 3, 1, 2]) #(n,18,H,W) rpn_bbox_pred = np.transpose(rpn_bbox_pred, [0, 3, 1, 2]) #(n,36,H,W) # Only minibatch of 1 supported assert rpn_bbox_cls_prob.shape[0] == 1, \ 'Only single item batches are supported' if cfg_key == 'TRAIN': pre_nms_topN = cfg.TRAIN.RPN_PRE_NMS_TOP_N post_nms_topN = cfg.TRAIN.RPN_POST_NMS_TOP_N nms_thresh = cfg.TRAIN.RPN_NMS_THRESH min_size = cfg.TRAIN.RPN_MIN_SIZE else: # cfg_key == 'TEST': pre_nms_topN = cfg.TEST.RPN_PRE_NMS_TOP_N post_nms_topN = cfg.TEST.RPN_POST_NMS_TOP_N nms_thresh = cfg.TEST.RPN_NMS_THRESH min_size = cfg.TEST.RPN_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = rpn_bbox_cls_prob[:, _num_anchors:, :, :] #(n,9,H,W) bbox_deltas = rpn_bbox_pred #(n,36,H,W) # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] # Enumerate all shifts shift_x = np.arange(0, width) * _feat_stride shift_y = np.arange(0, height) * _feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors K = shifts.shape[0] anchors = _anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) #(1*h*w*a,4) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) #(1*h*w*a,4) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_dims) #(1*h*w*a,4) # 3. remove predicted boxes with either height or width < threshold keep = _filter_boxes(proposals, min_size) proposals = proposals[keep, :] #(-1,4) scores = scores[keep] #(-1,4) # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) #(n,5) return blob
def pred_eval(predictor, test_data, imdb, cfg, vis=True, thresh=1e-3, logger=None, ignore_cache=False): det_file = os.path.join(imdb.result_path, imdb.name + '_detections.pkl') seg_file = os.path.join(imdb.result_path, imdb.name + '_masks.pkl') if os.path.exists(det_file) and os.path.exists( seg_file) and not ignore_cache: with open(det_file, 'rb') as f: all_boxes = cPickle.load(f) with open(seg_file, 'rb') as f: all_masks = cPickle.load(f) else: assert vis or not test_data.shuffle data_names = [k[0] for k in test_data.provide_data[0]] if not isinstance(test_data, PrefetchingIter): test_data = PrefetchingIter(test_data) # function pointers nms = py_nms_wrapper(cfg.TEST.NMS) mask_voting = gpu_mask_voting if cfg.TEST.USE_GPU_MASK_MERGE else cpu_mask_voting max_per_image = 100 if cfg.TEST.USE_MASK_MERGE else -1 num_images = imdb.num_images all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] all_masks = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] idx = 0 t = time.time() for data_batch in test_data: t1 = time.time() - t t = time.time() scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in range(len(data_batch.data)) ] scores_all, boxes_all, masks_all, data_dict_all = im_detect( predictor, data_batch, data_names, scales, cfg) im_shapes = [ data_batch.data[i][0].shape[2:4] for i in range(len(data_batch.data)) ] t2 = time.time() - t t = time.time() # post processing for delta, (scores, boxes, masks, data_dict) in enumerate( zip(scores_all, boxes_all, masks_all, data_dict_all)): if not cfg.TEST.USE_MASK_MERGE: for j in range(1, imdb.num_classes): indexes = np.where(scores[:, j] > thresh)[0] cls_scores = scores[indexes, j, np.newaxis] cls_masks = masks[indexes, 1, :, :] try: if cfg.CLASS_AGNOSTIC: cls_boxes = boxes[indexes, :] else: raise Exception() except: cls_boxes = boxes[indexes, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) all_boxes[j][idx + delta] = cls_dets[keep, :] all_masks[j][idx + delta] = cls_masks[keep, :] else: masks = masks[:, 1:, :, :] im_height = np.round(im_shapes[delta][0] / scales[delta]).astype('int') im_width = np.round(im_shapes[delta][1] / scales[delta]).astype('int') boxes = clip_boxes(boxes, (im_height, im_width)) result_mask, result_box = mask_voting( masks, boxes, scores, imdb.num_classes, max_per_image, im_width, im_height, cfg.TEST.NMS, cfg.TEST.MASK_MERGE_THRESH, cfg.BINARY_THRESH) # print(all_boxes) # print(result_box) for j in range(1, imdb.num_classes): all_boxes[j][idx + delta] = result_box[j] all_masks[j][idx + delta] = result_mask[j][:, 0, :, :] print(all_boxes) print(all_masks) if True: boxes_this_image = [[]] + [ all_boxes[j][idx + delta] for j in range(1, imdb.num_classes) ] masks_this_image = [[]] + [ all_masks[j][idx + delta] for j in range(1, imdb.num_classes) ] vis_all_mask(data_dict['data'].asnumpy(), boxes_this_image, masks_this_image, imdb.classes, scales[delta], cfg) idx += test_data.batch_size t3 = time.time() - t t = time.time() print('testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format( idx, imdb.num_images, t1, t2, t3)) if logger: logger.info( 'testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'. format(idx, imdb.num_images, t1, t2, t3)) with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, protocol=cPickle.HIGHEST_PROTOCOL) with open(seg_file, 'wb') as f: cPickle.dump(all_masks, f, protocol=cPickle.HIGHEST_PROTOCOL) info_str = imdb.evaluate_sds(all_boxes, all_masks) if logger: logger.info('evaluate detections: \n{}'.format(info_str))
def forward(self, is_train, req, in_data, out_data, aux): cls_pro = in_data[4] bbox_pred_dict = { 'stride128': in_data[3], 'stride64': in_data[2], 'stride32': in_data[1], 'stride16': in_data[0], } cls_prob_dict = { 'stride128': in_data[7], 'stride64': in_data[6], 'stride32': in_data[5], 'stride16': in_data[4], } im_info = in_data[8].asnumpy()[0, :] im = in_data[9].asnumpy() proposal_list = [] score_list = [] destore_rois_list = [] destore_cls_list = [] for s in self._feat_stride: stride = int(s) sub_anchors = generate_anchors(base_size=stride, scales=self._scales, ratios=self._ratios) bbox_deltas = bbox_pred_dict['stride' + str(s)].asnumpy() # im_info = in_data[-1].asnumpy()[0, :] # 1. Generate proposals from bbox_deltas and shifted anchors # use real image size instead of padded feature map sizes height, width = bbox_deltas.shape[2], bbox_deltas.shape[3] # Enumerate all shifts shift_x = np.arange(0, width) * stride shift_y = np.arange(0, height) * stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = sub_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) # Convert anchors into proposals via bbox transformations proposals = bbox_pred(anchors, bbox_deltas) proposals = clip_boxes(proposals, im_info[:2]) scores = cls_prob_dict['stride' + str(s)].asnumpy() s_list = [] start = 0 for i in range(self._num_classes): s = scores[:, start:start + self._num_anchors, :, :] start = start + self._num_anchors s = self._clip_pad(s, (height, width)) s = s.transpose((0, 2, 3, 1)).reshape((-1, 1)) s_list.append(s) scores = np.concatenate(s_list, axis=1) destore_rois_list.append(proposals) destore_cls_list.append(scores) destore_rois = np.concatenate(destore_rois_list, axis=0) destore_cls = np.concatenate(destore_cls_list, axis=0) # print destore_cls s = np.max(destore_cls, axis=1) # print s order = s.ravel().argsort()[::-1] order = order[:self._keep_num] destore_cls = destore_cls[order, :] destore_rois = destore_rois[order, :] vis = False if vis: vis_all_detection(im, destore_rois[:, :]) self.assign(out_data[0], req[0], mx.nd.array(destore_rois)) self.assign(out_data[1], req[1], mx.nd.array(destore_cls))