def im_detect(predictor, data_batch, data_names, scales, cfg): output_all = predictor.predict(data_batch) data_dict_all = [dict(zip(data_names, idata)) for idata in data_batch.data] scores_all = [] pred_boxes_all = [] ref_scores_all = [] ref_pred_boxes_all = [] for output, data_dict, scale in zip(output_all, data_dict_all, scales): if cfg.TEST.HAS_RPN: concat_rois = output['concat_rois_output'].asnumpy()[:, 1:] else: rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:] im_shape = data_dict['data'].shape rois, ref_rois = np.split(concat_rois, 2) scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] ref_scores = output['cls_prob_reshape_output'].asnumpy()[1] ref_bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[1] # post processing pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) pred_boxes /= scale ref_pred_boxes = bbox_pred(ref_rois, ref_bbox_deltas) ref_pred_boxes = clip_boxes(ref_pred_boxes, im_shape[-2:]) ref_pred_boxes /= scale pred_boxes_all.append(pred_boxes) scores_all.append(scores) ref_pred_boxes_all.append(ref_pred_boxes) ref_scores_all.append(ref_scores) return scores_all, pred_boxes_all, data_dict_all
def check_movements(ims, bef_ims, aft_ims, processed_roidb, delta_bef_roi, delta_aft_roi): save_name = '/home/wangshiyao/Documents/testdata/' + processed_roidb[0][ 'image'].split('/')[-1] print 'saving images to ' + save_name boxes = processed_roidb[0]['boxes'] ims.squeeze().transpose(1, 2, 0).astype(np.int8) bef_ims.squeeze().transpose(1, 2, 0).astype(np.int8) aft_ims.squeeze().transpose(1, 2, 0).astype(np.int8) delta_bef_roi = np.array(delta_bef_roi).transpose(1, 0, 2) delta_aft_roi = np.array(delta_aft_roi).transpose(1, 0, 2) for i in range(boxes.shape[0]): cv2.rectangle(ims, (int(boxes[i][0]), int(boxes[i][1])), (int(boxes[i][2]), int(boxes[i][3])), (55, 255, 155), 5) bef_box = bbox_pred(boxes[i].reshape(1, -1), delta_bef_roi[i]) cv2.rectangle(bef_ims, (int(bef_box[0][0]), int(bef_box[0][1])), (int(bef_box[0][2]), int(bef_box[0][3])), (55, 255, 155), 5) aft_box = bbox_pred(boxes[i].reshape(1, -1), delta_aft_roi[i]) cv2.rectangle(aft_ims, (int(aft_box[0][0]), int(aft_box[0][1])), (int(aft_box[0][2]), int(aft_box[0][3])), (55, 255, 155), 5) imageio.imsave(save_name, ims) imageio.imsave(save_name.split('.')[-2] + '_bef' + '.JPEG', bef_ims) imageio.imsave(save_name.split('.')[-2] + '_aft' + '.JPEG', aft_ims)
def detect(self, batch, scales): data = dict(zip(self.data_names, batch.data)) outputs = self.forward(batch) scores, preds = [], [] im_shapes = np.array([im.shape[-2:] for im in data['data']]).reshape(-1, self.batch_size, 2) im_ids = np.array([], dtype=int) for i, (gpu_out, gpu_scales, gpu_shapes) in enumerate(zip(outputs, scales, im_shapes)): gpu_rois = gpu_out[self.rpn_output_names['rois']].asnumpy() # Reshape crois nper_gpu = gpu_rois.shape[0] / self.batch_size gpu_scores = gpu_out[self.rcnn_output_names['cls']].asnumpy() gpu_deltas = gpu_out[self.rcnn_output_names['bbox']].asnumpy() im_ids = np.hstack((im_ids, gpu_out[self.rcnn_output_names['im_ids']].asnumpy().astype(int))) for idx in range(self.batch_size): cids = np.where(gpu_rois[:, 0] == idx)[0] assert len(cids) == nper_gpu, 'The number of rois per GPU should be fixed!' crois = gpu_rois[cids, 1:] cscores = gpu_scores[idx] cdeltas = gpu_deltas[idx] # Apply deltas and clip predictions cboxes = bbox_pred(crois, cdeltas) cboxes = clip_boxes(cboxes, gpu_shapes[idx]) # Re-scale boxes cboxes = cboxes / gpu_scales[idx] # Store predictions scores.append(cscores) preds.append(cboxes) return scores, preds, data, im_ids
def forward(self, is_train, req, in_data, out_data, aux): ''' :param is_train: :param req: :param in_data: in_data[0] rois: (128, 5) First col are all 0's. True coordinate. in_data[1] bbox_deltas: (128, 8) in_data[2] im_info: im.shape = (im_info[0], im_info[1]) :param out_data: :param aux: :return: ''' rois = in_data[0].asnumpy()[:, 1:] # (128, 4) Move 0's in first col. bbox_deltas = in_data[1].asnumpy() im_info = in_data[2].asnumpy()[0, :] if DEBUG: print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) print 'scale: {}'.format(im_info[2]) # 1. Convert anchors into proposals via bbox transformations proposals = bbox_pred(rois, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes( proposals, im_info[:2] ) # (128, 8) First 4 cols: background, last 4 cols: object proposals = proposals[:, 4:] # (128, 4) zeros = np.zeros((proposals.shape[0], 1), dtype=proposals.dtype) proposals = np.hstack((zeros, proposals)) self.assign(out_data[0], req[0], proposals) if DEBUG: print proposals
def im_detect(predictor, data_batch, data_names, scales, cfg): output_all = predictor.predict(data_batch) data_dict_all = [dict(zip(data_names, idata)) for idata in data_batch.data] scores_all = [] pred_boxes_all = [] for output, data_dict, scale in zip(output_all, data_dict_all, scales): if cfg.TEST.HAS_RPN: rois = output['rois_output'].asnumpy()[:, 1:] else: rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:] im_shape = data_dict['data'].shape # save output scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] # post processing pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) # we used scaled image & roi to train, so it is necessary to transform them back pred_boxes = pred_boxes / scale scores_all.append(scores) pred_boxes_all.append(pred_boxes) return scores_all, pred_boxes_all, data_dict_all
def forward(self, is_train, req, in_data, out_data, aux): rois = in_data[0].asnumpy()[:, 1:] cls_prob = in_data[1].asnumpy() assert self._cfg.CLASS_AGNOSTIC, 'Currently only support class agnostic' if self._cfg.CLASS_AGNOSTIC: bbox_deltas = in_data[2].asnumpy()[:, 4:8] else: fg_cls_prob = cls_prob[:, 1:] fg_cls_idx = np.argmax(fg_cls_prob, axis=1).astype(np.int) batch_idx_array = np.arange(fg_cls_idx.shape[0], dtype=np.int) # bbox_deltas = in_data[2].asnumpy()[batch_idx_array, fg_cls_idx * 4: (fg_cls_idx + 1) * 4] in_data2 = in_data[2].asnumpy() bbox_deltas = np.hstack( (in_data2[batch_idx_array, fg_cls_idx * 4].reshape(-1, 1), in_data2[batch_idx_array, fg_cls_idx * 4 + 1].reshape(-1, 1), in_data2[batch_idx_array, fg_cls_idx * 4 + 2].reshape(-1, 1), in_data2[batch_idx_array, fg_cls_idx * 4 + 3].reshape(-1, 1))) im_info = in_data[3].asnumpy()[0, :] # post processing # if self._is_train: # if self._cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED: # bbox_deltas = bbox_deltas * np.array(self._cfg.TRAIN.BBOX_STDS) + np.array(self._cfg.TRAIN.BBOX_MEANS) proposals = bbox_pred(rois, bbox_deltas) proposals = clip_boxes(proposals, im_info[:2]) batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) self.assign(out_data[0], req[0], blob)
def im_detect(predictor, data_batch, data_names, scales, cfg): output_all = predictor.predict(data_batch) data_dict_all = [dict(zip(data_names, idata)) for idata in data_batch.data] scores_all = [] pred_boxes_all = [] for output, data_dict, scale in zip(output_all, data_dict_all, scales): if cfg.TEST.HAS_RPN or cfg.network.ROIDispatch: rois = output['rois_output'].asnumpy()[:, 1:] else: rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:] im_shape = data_dict['data'].shape # save output if cfg.TEST.LEARN_NMS: pred_boxes = output['learn_nms_sorted_bbox'].asnumpy() # raw_scores = output['sorted_score_output'].asnumpy() scores = output['nms_final_score_output'].asnumpy() else: scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] # post processing pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) # we used scaled image & roi to train, so it is necessary to transform them back pred_boxes = pred_boxes / scale scores_all.append(scores) pred_boxes_all.append(pred_boxes) return scores_all, pred_boxes_all, data_dict_all
def im_detect(predictor, data_batch, data_names, scales, cfg): output_all = predictor.predict(data_batch) data_dict_all = [ dict(zip(data_names, data_batch.data[i])) for i in xrange(len(data_batch.data)) ] scores_all = [] pred_boxes_all = [] for output, data_dict, scale in zip(output_all, data_dict_all, scales): if cfg.TEST.HAS_RPN: rois = output['rois_output'].asnumpy()[:, 1:] else: rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:] im_shape = data_dict['data'].shape # save output scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] # post processing pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) # we used scaled image & roi to train, so it is necessary to transform them back pred_boxes = pred_boxes / scale scores_all.append(scores) pred_boxes_all.append(pred_boxes) if output_all[0].has_key('feat_conv_3x3_relu_output'): feat = output_all[0]['feat_conv_3x3_relu_output'] else: feat = None return scores_all, pred_boxes_all, data_dict_all, feat
def im_detect(predictor, data_batch, data_names, scales, cfg): output_all = predictor.predict(data_batch) print('output length: {}'.format(len(output_all))) print('data batch length: {}'.format(len(data_batch.data))) data_dict_all = [dict(zip(data_names, idata)) for idata in data_batch.data] scores_all = [] pred_boxes_all = [] for output, data_dict, scale in zip(output_all, data_dict_all, scales): if cfg.TEST.HAS_RPN: rois = output['rois_output'].asnumpy()[:, 1:] else: rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:] im_shape = data_dict['data'].shape # save output scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] # post processing pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) # we used scaled image & roi to train, so it is necessary to transform them back pred_boxes = pred_boxes / scale scores_all.append(scores) pred_boxes_all.append(pred_boxes) return scores_all, pred_boxes_all, data_dict_all
def detect(self, batch, scales): data = dict(zip(self.data_names, batch.data)) outputs = self.forward(batch) scores, preds = [], [] im_shapes = np.array([im.shape[-2:] for im in data['data']]).reshape(-1, self.batch_size, 2) im_ids = np.array([], dtype=int) for i, (gpu_out, gpu_scales, gpu_shapes) in enumerate(zip(outputs, scales, im_shapes)): gpu_rois = gpu_out[self.rpn_output_names['rois']].asnumpy() # Reshape crois nper_gpu = gpu_rois.shape[0] / self.batch_size gpu_scores = gpu_out[self.rcnn_output_names['cls']].asnumpy() gpu_deltas = gpu_out[self.rcnn_output_names['bbox']].asnumpy() im_ids = np.hstack((im_ids, gpu_out[self.rcnn_output_names['im_ids']].asnumpy().astype(int))) for idx in range(self.batch_size): cids = np.where(gpu_rois[:, 0] == idx)[0] assert len(cids) == nper_gpu, 'The number of rois per GPU should be fixed!' crois = gpu_rois[cids, 1:] cscores = gpu_scores[idx] cdeltas = gpu_deltas[idx] # Apply deltas and clip predictions cboxes = bbox_pred(crois, cdeltas) cboxes = clip_boxes(cboxes, gpu_shapes[idx]) # Re-scale boxes cboxes = cboxes / gpu_scales[idx] # Store predictions scores.append(cscores) preds.append(cboxes) return scores, preds, data, im_ids
def im_batch_detect(predictor, data_batch, data_names, scales, cfg): output_all = predictor.predict(data_batch) data_dict_all = [ dict(zip(data_names, data_batch.data[i])) for i in xrange(len(data_batch.data)) ] scores_all = [] pred_boxes_all = [] for output, data_dict, scale in zip(output_all, data_dict_all, scales): im_infos = data_dict['im_info'].asnumpy() # save output scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] rois = output['rois_output'].asnumpy() for im_idx in xrange(im_infos.shape[0]): bb_idxs = np.where(rois[:, 0] == im_idx)[0] im_shape = im_infos[im_idx, :2].astype(np.int) # post processing pred_boxes = bbox_pred(rois[bb_idxs, 1:], bbox_deltas[bb_idxs, :]) pred_boxes = clip_boxes(pred_boxes, im_shape) # we used scaled image & roi to train, so it is necessary to transform them back pred_boxes = pred_boxes / scale[im_idx] scores_all.append(scores[bb_idxs, :]) pred_boxes_all.append(pred_boxes) return scores_all, pred_boxes_all, data_dict_all
def im_batch_detect(predictor, data_batch, data_names, scales, cfg): output_all = predictor.predict(data_batch) data_dict_all = [dict(zip(data_names, data_batch.data[i])) for i in xrange(len(data_batch.data))] scores_all = [] pred_boxes_all = [] for output, data_dict, scale in zip(output_all, data_dict_all, scales): im_infos = data_dict['im_info'].asnumpy() # save output scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] rois = output['rois_output'].asnumpy() for im_idx in xrange(im_infos.shape[0]): bb_idxs = np.where(rois[:,0] == im_idx)[0] im_shape = im_infos[im_idx, :2].astype(np.int) # post processing pred_boxes = bbox_pred(rois[bb_idxs, 1:], bbox_deltas[bb_idxs, :]) pred_boxes = clip_boxes(pred_boxes, im_shape) # we used scaled image & roi to train, so it is necessary to transform them back pred_boxes = pred_boxes / scale[im_idx] scores_all.append(scores[bb_idxs, :]) pred_boxes_all.append(pred_boxes) return scores_all, pred_boxes_all, data_dict_all
def det(mod, fn): raw_img = cv2.imread(fn) if raw_img.shape[0] < raw_img.shape[1]: raw_img = cv2.copyMakeBorder(raw_img,0 ,raw_img.shape[1]-raw_img.shape[0], 0, 0, cv2.BORDER_CONSTANT) im_shape = [IMG_H,IMG_W] # reverse order img = cv2.resize(raw_img, (IMG_H,IMG_W)) raw_h = img.shape[0] raw_w = img.shape[1] im_tensor = image.transform(img, [124,117,104], 0.0167) im_info = np.array([[ IMG_H, IMG_W, 4.18300658e-01]]) batch = mx.io.DataBatch([mx.nd.array(im_tensor), mx.nd.array(im_info)]) start = time.time() mod.forward(batch) output_names = mod.output_names output_tensor = mod.get_outputs() mod.get_outputs()[0].wait_to_read() print ("time", time.time()-start, "secs.") output = dict(zip(output_names ,output_tensor)) rois = output['rois_output'].asnumpy()[:, 1:] scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) num_classes = 2 all_cls_dets = [[] for _ in range(num_classes)] for j in range(1, num_classes): indexes = np.where(scores[:, j] > 0.1)[0] cls_scores = scores[indexes, j, np.newaxis] cls_boxes = pred_boxes[indexes, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)).copy() all_cls_dets[j] = cls_dets for idx_class in range(1, num_classes): nms = py_nms_wrapper(0.3) keep = nms(all_cls_dets[idx_class]) all_cls_dets[idx_class] = all_cls_dets[idx_class][keep, :] for i in range(all_cls_dets[1].shape[0]): cv2.rectangle(img, (int(all_cls_dets[1][i][0]), int(all_cls_dets[1][i][1])) ,(int(all_cls_dets[1][i][2]), int(all_cls_dets[1][i][3])),(0,0,255),1) cv2.imshow("w", img) cv2.waitKey()
def double_im_detect(predictor, data_batch, data_names, cfg): output_all = predictor.predict(data_batch) data_dict_all = [ dict(zip(data_names, data_batch.data[i])) for i in xrange(len(data_batch.data)) ] scores_all = [] pred_boxes_all = [] ref_scores_all = [] ref_pred_boxes_all = [] for output, data_dict in zip(output_all, data_dict_all): scale = data_dict['im_info'][0, 2] rois = output['rois_output'].asnumpy()[:, 1:] ref_rois = output['ref_rois_output'].asnumpy()[:, 1:] im_shape = data_dict['data'].shape ref_im_shape = data_dict['ref_data'] non_ref_dim = rois.shape[0] # save output scores = output['cls_prob_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_output'].asnumpy()[0] ref_scores = output['cls_prob_output'].asnumpy()[1] ref_bbox_deltas = output['bbox_pred_output'].asnumpy()[1] # post processing pred_boxes = bbox_pred(rois, bbox_deltas) ref_pred_boxes = bbox_pred(ref_rois, ref_bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) ref_pred_boxes = clip_boxes(ref_pred_boxes, ref_im_shape[-2:]) # we used scaled image & roi to train, so it is necessary to transform them back pred_boxes = pred_boxes / scale ref_pred_boxes = ref_pred_boxes / scale scores_all.append(scores) pred_boxes_all.append(pred_boxes) ref_scores_all.append(ref_scores) ref_pred_boxes_all.append(ref_pred_boxes) return scores_all, pred_boxes_all, ref_scores_all, ref_pred_boxes_all, data_dict_all
def im_detect(predictor, data_batch, data_names, scales, cfg): output_all = predictor.predict(data_batch) data_dict_all = [dict(zip(data_names, idata)) for idata in data_batch.data] scores_all = [] pred_boxes_all = [] for output, data_dict, scale in zip(output_all, data_dict_all, scales): if cfg.TEST.HAS_RPN: rois = output['rois_output'].asnumpy()[:, 1:] else: rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:] im_shape = data_dict['data'].shape # save output scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] tmp_w = rois[:, 2] - rois[:, 0] + 1 tmp_h = rois[:, 3] - rois[:, 1] + 1 tmp_area = np.sqrt(tmp_w * tmp_h) range1 = np.where(tmp_area <= 90)[0] range2 = np.where((tmp_area >= 30) & (tmp_area <= 160))[0] range3 = np.where(tmp_area >= 90)[0] range2_add = range2 + rois.shape[0] range3_add = range3 + rois.shape[0] * 2 keep = np.hstack((range1, range2, range3)) keep_add = np.hstack((range1, range2_add, range3_add)) rois = rois[keep, :] scores = scores[keep_add, :] bbox_deltas = bbox_deltas[keep_add, :] #tot = rois.shape[0] #idxs1 = np.zeros((tot, 1), dtype=np.int32) #idxs2 = np.zeros((tot, 1), dtype=np.int32) #idxs3 = np.zeros((tot, 1), dtype=np.int32) #idxs1[range1, :] = 1 #idxs2[range2, :] = 1 #idxs3[range3, :] = 1 #scores = (scores[:tot, :] * idxs1 + scores[tot:tot*2, :] * idxs2 + scores[tot*2:tot*3, :] * idxs3) / (idxs1 + idxs2 + idxs3) #bbox_deltas = (bbox_deltas[:tot, :] * idxs1 + bbox_deltas[tot:tot*2, :] * idxs2 + bbox_deltas[tot*2:tot*3, :] * idxs3) / (idxs1 + idxs2 + idxs3) # post processing pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) # we used scaled image & roi to train, so it is necessary to transform them back pred_boxes = pred_boxes / scale scores_all.append(scores) pred_boxes_all.append(pred_boxes) return scores_all, pred_boxes_all, data_dict_all
def im_detect(predictor, data_batch, data_names, scales, cfg): output_all = predictor.predict(data_batch) data_dict_all = [dict(zip(data_names, data_batch.data))] scores_all = [] pred_boxes_all = [] for output, data_dict, scale in zip(output_all, data_dict_all,scales): rois = output['rois_output'].asnumpy()[:,1:] im_shape = data_dict['data'].shape scores = output['cls_prob_reshape_output'].asnumpy()[0] stds = np.tile(np.array(cfg.TRAIN.BBOX_STDS),cfg.dataset.NUM_CLASSES) bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] *stds pred_boxes = bbox_pred(rois,bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) pred_boxes = pred_boxes / scale scores_all.append(scores) pred_boxes_all.append(pred_boxes) if DEBUG: print("im shape: ",im_shape) print(pred_boxes.shape) print(scores.shape) max_scores = scores.argmax(axis = 1) max_scores_val = scores[np.arange(pred_boxes.shape[0]),max_scores] keep = np.where(max_scores>0)[0] max_scores = max_scores[keep] print(pred_boxes) bboxes = pred_boxes.copy()[keep]*scale max_scores_val = max_scores_val[keep] img = data_dict['data'].asnumpy().transpose((0,2,3,1))[0] img = (img * np.array([[[0.229, 0.224, 0.225]]]) +np.array([[[0.485, 0.456, 0.406]]])) * 255 img = np.clip(img,0,255) img = img.astype(np.uint8) print(type(img)) image = cv2.cvtColor(img,cv2.COLOR_BGR2RGB) print(img.shape) print(max_scores_val) maxid = max_scores_val.argsort()[-30:] for i, boxxes in enumerate(bboxes): if not i in maxid: continue #print("ith box:") #print(boxxes) #print(max_scores[i]) box = boxxes[max_scores[i]*4:(max_scores[i]+1)*4] box = box.astype(np.int64) print(box) cv2.rectangle(image,tuple(box[:2]),tuple(box[2:]),(255,0,0),1) cv2.putText(image,names[max_scores[i]]+" "+str(max_scores_val[i]),tuple(box[:2]),cv2.FONT_HERSHEY_COMPLEX,1,(0,0,255),1) cv2.imwrite("./det_images/det_img_{:3f}.png".format(np.random.randn()),image) #pdb.set_trace() return scores_all, pred_boxes_all, data_dict_all
def im_detect(predictor, data_batch, data_names, scales, cfg): output_all = predictor.predict(data_batch) data_dict_all = [dict(zip(data_names, idata)) for idata in data_batch.data] scores_all = [] pred_boxes_all = [] pred_kps_all = [] for output, data_dict in zip(output_all, data_dict_all): if cfg.TEST.HAS_RPN: batch_rois = output['rois_output'].asnumpy() else: rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:] im_shape = data_dict['data'].shape # save output batch_scores = output['cls_prob_reshape_output'].asnumpy() batch_bbox_deltas = output['bbox_pred_reshape_output'].asnumpy() batch_im_info = data_dict['im_info'].asnumpy() for i in range(cfg.TEST.BATCH_IMAGES): scale = batch_im_info[i, 2] if scale < 1e-6: break indices = np.where(batch_rois[:, 0] == i)[0] rois = batch_rois[indices, 1:] scores = batch_scores[i] bbox_deltas = batch_bbox_deltas[i] # post processing pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) # we used scaled image & roi to train, so it is necessary to transform them back pred_boxes = pred_boxes / scale scores_all.append(scores) pred_boxes_all.append(pred_boxes) if cfg.network.PREDICT_KEYPOINTS: assert cfg.TEST.BATCH_IMAGES == 1, "only support batch_size=1" kps_deltas = output['kps_pos_pred_reshape_output'].asnumpy( ) # [N, 2*K, G, G] kps_probs = output['kps_prob_output'].asnumpy() # [N*K, G*G] pred_kps = predict_keypoints(rois, kps_probs, kps_deltas, scale=scale) pred_kps_all.append(pred_kps) if cfg.network.PREDICT_KEYPOINTS: return scores_all, pred_boxes_all, pred_kps_all, data_dict_all return scores_all, pred_boxes_all, data_dict_all
def im_detect(predictor, data_batch, data_names, scales, cfg, aggr_feats=False): output_all = predictor.predict(data_batch) data_dict_all = [ dict(zip(data_names, data_batch.data[i])) for i in xrange(len(data_batch.data)) ] scores_all = [] pred_boxes_all = [] aggr_feats_all = [] for output, data_dict, scale in zip(output_all, data_dict_all, scales): if 'blockgrad0_output' in output: for i, key in enumerate([ '_', 'rois_output', 'cls_prob_reshape_output', 'bbox_pred_reshape_output', '_plus{}_output'.format(cfg.TEST.KEY_FRAME_INTERVAL * 2 - 1) ]): output[key] = output['blockgrad{}_output'.format(i)] if aggr_feats: aggr_feats_all.append( output['_plus{}_output'.format(cfg.TEST.KEY_FRAME_INTERVAL * 2 - 1)]) if cfg.TEST.HAS_RPN: rois = output['rois_output'].asnumpy()[:, 1:] else: rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:] im_shape = data_dict['data'].shape # save output scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] # post processing pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) # we used scaled image & roi to train, so it is necessary to transform them back pred_boxes = pred_boxes / scale scores_all.append(scores) pred_boxes_all.append(pred_boxes) if aggr_feats: return zip(scores_all, pred_boxes_all, data_dict_all), aggr_feats_all return zip(scores_all, pred_boxes_all, data_dict_all)
def im_detect(predictor, data_batch, data_names, scales, cfg): output_all = predictor.predict(data_batch) data_dict_all = [dict(data_names, data_batch.data)] scores_all = [] pred_boxes_all = [] for output, data_dict, scale in zip(output_all, data_dict_all,scales): rois = output['rois_output'].asnumpy()[:,1:] im_shape = data_dict['data'].im_shape scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] pred_boxes = bbox_pred(rois,bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) pred_boxes = pred_boxes / scale scores_all.append(scores) pred_boxes_all.append(pred_boxes) return scores_all, pred_boxes_all, data_dict_all
def im_detect_feats_stats(predictor, data_batch, data_names, scales, cfg, stat_type, scores_field='cls_prob_reshape'): output_all = predictor.predict(data_batch) data_dict_all = [dict(zip(data_names, idata)) for idata in data_batch.data] scores_all = [] pred_boxes_all = [] rois_feats_all = [] stats_all = [] for output, data_dict, scale in zip(output_all, data_dict_all, scales): if cfg.TEST.HAS_RPN: rois = output['rois_output'].asnumpy()[:, 1:] else: rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:] im_shape = data_dict['data'].shape # save output scores = output[scores_field + '_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] rois_feats = output['psp_final_embed_output'].asnumpy( ) # shape: [#rois, Embed_dim] rois_feats_all.append(rois_feats) if stat_type == 'ratio_val': stats = output['fc_val_reg_2_output'].asnumpy() stats_all.append(stats) if stat_type == 'feat_pred': stats = output['fc_score_hist_3_output'].asnumpy() stats_all.append(stats) # post processing pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) # we used scaled image & roi to train, so it is necessary to transform them back pred_boxes = pred_boxes / scale scores_all.append(scores) pred_boxes_all.append(pred_boxes) return scores_all, pred_boxes_all, data_dict_all, rois_feats_all, stats_all
def detect(self, batch, scales): data = dict(zip(self.data_names, batch.data)) outputs = self.forward(batch) scores, preds, maps = [], [], [] im_ids = np.array([], dtype=int) chip_ids = np.array([], dtype=int) has_focus_maps = True if self.rcnn_output_names[ 'scale_map'] in outputs[0] else False for i, (gpu_out, gpu_scales) in enumerate(zip(outputs, scales)): gpu_rois = gpu_out[self.rpn_output_names['rois']].asnumpy() # Reshape crois nper_gpu = gpu_rois.shape[0] / self.batch_size if has_focus_maps: scale_prob = gpu_out[ self.rcnn_output_names['scale_map']].asnumpy() gpu_scores = gpu_out[self.rcnn_output_names['cls']].asnumpy() gpu_deltas = gpu_out[self.rcnn_output_names['bbox']].asnumpy() gpu_infos = gpu_out[self.rcnn_output_names['im_info']].asnumpy() gpu_shapes = gpu_infos[:, :2] im_ids = np.hstack((im_ids, gpu_out[ self.rcnn_output_names['im_ids']].asnumpy().astype(int))) chip_ids = np.hstack((chip_ids, gpu_out[ self.rcnn_output_names['chip_ids']].asnumpy().astype(int))) for idx in range(self.batch_size): cids = np.where(gpu_rois[:, 0] == idx)[0] assert len( cids ) == nper_gpu, 'The number of rois per GPU should be fixed!' crois = gpu_rois[cids, 1:] cscores = gpu_scores[idx] cdeltas = gpu_deltas[idx] # Apply deltas and clip predictions cboxes = bbox_pred(crois, cdeltas) cboxes = clip_boxes(cboxes, gpu_shapes[idx]) # Re-scale boxes cboxes = cboxes / gpu_scales[idx] # Store predictions scores.append(cscores) preds.append(cboxes) if has_focus_maps: maps.append(scale_prob[idx]) return scores, preds, data, im_ids, maps, chip_ids
def forward(self, is_train, req, in_data, out_data, aux): bottom_rois = in_data[0].asnumpy() bbox_delta = in_data[1].asnumpy() cls_prob = in_data[2].asnumpy() im_info = in_data[3].asnumpy() num_rois = bottom_rois.shape[0] # 1. judge if bbox class-agnostic # 2. if not, calculate bbox_class_idx if self._bbox_class_agnostic: bbox_class_idx = np.ones((num_rois)) # (num_rois, 1) zeros else: bbox_class_idx = np.argmax(cls_prob[:, 1:], axis=1) + 1 bbox_class_idx = bbox_class_idx[:, np.newaxis] * 4 bbox_class_idx = np.hstack((bbox_class_idx, bbox_class_idx + 1, bbox_class_idx + 2, bbox_class_idx + 3)) # 3. get bbox_pred given bbox_class_idx rows = np.arange(num_rois, dtype=np.intp) bbox_delta = bbox_delta[rows[:, np.newaxis], bbox_class_idx.astype(np.intp)] # 4. calculate bbox_delta by bbox_pred[i] * std[i] + mean[i] means = np.array(self._bbox_means) stds = np.array(self._bbox_stds) vx = bbox_delta[:, 0] * stds[0] + means[0] vy = bbox_delta[:, 1] * stds[1] + means[1] vw = bbox_delta[:, 2] * stds[2] + means[2] vh = bbox_delta[:, 3] * stds[3] + means[3] bbox_delta = np.hstack((vx[:, np.newaxis], vy[:, np.newaxis], vw[:, np.newaxis], vh[:, np.newaxis])) # 6. calculate top_rois by bbox_pred proposal = bbox_pred(bottom_rois[:, 1:], bbox_delta) # 7. clip boxes if self._b_clip_boxes: proposal = clip_boxes(proposal, im_info[0, :2]) output = bottom_rois output[:, 1:] = proposal for ind, val in enumerate([output]): self.assign(out_data[ind], req[ind], val)
def im_detect(predictor, data_batch, data_names, scales, cfg): output_all = predictor.predict(data_batch) data_dict_all = [ dict(list(zip(data_names, idata))) for idata in data_batch.data ] scores_all = [] pred_boxes_all = [] roi_score_all = [] rois_all = [] roi_feat_all = [] for output, data_dict, scale in zip(output_all, data_dict_all, scales): if cfg.TEST.HAS_RPN: assert np.all(output['rois_output'].asnumpy()[:, 0] == 0.) rois = output['rois_output'].asnumpy()[:, 1:] else: rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:] im_shape = data_dict['data'].shape # save output scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] roi_score = output['rois_score'].asnumpy() indice = roi_score.flatten().argsort()[::-1] roi_score = roi_score[indice] roi_feat = output['roi_feat_output_output'].asnumpy()[indice] rois = rois[indice] # post processing pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) # we used scaled image & roi to train, so it is necessary to transform them back pred_boxes = pred_boxes / scale rois = rois / scale scores_all.append(scores) pred_boxes_all.append(pred_boxes) roi_score_all.append(roi_score) rois_all.append(rois) roi_feat_all.append(roi_feat) return scores_all, pred_boxes_all, roi_score_all, rois_all, roi_feat_all, data_dict_all
def forward(self, is_train, req, in_data, out_data, aux): rois = in_data[0].asnumpy()[:, 1:] bbox_deltas = in_data[1].asnumpy()[:, 4:8] im_info = in_data[2].asnumpy()[0, :] cls_prob = in_data[3].asnumpy()[:, 1:] # ignore bg num_keep_index = int(rois.shape[0] * self._top) # sort scores max_scores = np.amax(cls_prob, axis=1) # keep top scores keep_index = np.argsort(-max_scores)[:num_keep_index] proposals = bbox_pred(rois, bbox_deltas) proposals = clip_boxes(proposals, im_info[:2]) batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) self.assign(out_data[0], req[0], blob[keep_index, :]) self.assign(out_data[1], req[1], keep_index)
def im_detect(predictor, data_batch, data_names, scales, cfg): output_all = predictor.predict(data_batch) data_dict_all = [dict(zip(data_names, idata)) for idata in data_batch.data] scores_all = [] pred_boxes_all = [] for output, data_dict, scale in zip(output_all, data_dict_all, scales): if cfg.TEST.HAS_RPN: rois = output['rois_output'].asnumpy()[:, 1:] else: rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:] im_shape = data_dict['data'].shape # save output scores = output['cls_prob_reshape_output'].asnumpy()[0] if cfg.DCR.top == 1: dcr_scores = output['dcr_prob_reshape_output'].asnumpy()[0] scores = scores * dcr_scores elif cfg.DCR.top > 0: dcr_scores = output['dcr_prob_reshape_output'].asnumpy()[0] keep_index = output['keep_index_reshape_output'].asnumpy().astype( np.int)[0] dcr_final_scores = np.ones_like(scores) dcr_final_scores[keep_index, :] = dcr_scores scores = scores * dcr_final_scores bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] # post processing pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) # we used scaled image & roi to train, so it is necessary to transform them back pred_boxes = pred_boxes / scale scores_all.append(scores) pred_boxes_all.append(pred_boxes) return scores_all, pred_boxes_all, data_dict_all
def forward(self, is_train, req, in_data, out_data, aux): nms = gpu_nms_wrapper(self._threshold, in_data[0].context.device_id) batch_size = in_data[0].shape[0] if batch_size > 1: raise ValueError( "Sorry, multiple images each device is not implemented") # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) pre_nms_topN = self._rpn_pre_nms_top_n post_nms_topN = self._rpn_post_nms_top_n min_size = self._rpn_min_size # the first set of anchors are background probabilities # keep the second part scores = in_data[0].asnumpy()[:, self._num_anchors:, :, :] bbox_deltas = in_data[1].asnumpy() im_info = in_data[2].asnumpy()[0, :] if DEBUG: print('im_size: ({}, {})'.format(im_info[0], im_info[1])) print('scale: {}'.format(im_info[2])) # 1. Generate proposals from bbox_deltas and shifted anchors # use real image size instead of padded feature map sizes height, width = int(im_info[0] / self._feat_stride), int( im_info[1] / self._feat_stride) if DEBUG: print('score map size: {}'.format(scores.shape)) print("resudial: {}".format( (scores.shape[2] - height, scores.shape[3] - width))) # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = self._anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_pred(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = self._filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) det = np.hstack((proposals, scores)).astype(np.float32) keep = nms(det) if post_nms_topN > 0: keep = keep[:post_nms_topN] # pad to ensure output size remains unchanged if len(keep) < post_nms_topN: pad = npr.choice(keep, size=post_nms_topN - len(keep)) keep = np.hstack((keep, pad)) proposals = proposals[keep, :] scores = scores[keep] # Output rois array # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) self.assign(out_data[0], req[0], blob) if self._output_score: self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False))
def forward(self, is_train, req, in_data, out_data, aux): before_pyramid_proposal = datetime.now() nms = gpu_nms_wrapper(self._threshold, in_data[0].context.device_id) batch_size = in_data[0].shape[0] if batch_size > 1: raise ValueError( "Sorry, multiple images each device is not implemented") # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) LAYER_NUM = len(in_data) / 2 LAYER_NUM = 11 if LAYER_NUM == 7: cls_prob_dict = { 'stride64': in_data[6], 'stride32': in_data[5], 'stride16': in_data[4], 'stride8': in_data[3], 'stride4': in_data[2], 'stride2': in_data[1], 'stride1': in_data[0], } bbox_pred_dict = { 'stride64': in_data[13], 'stride32': in_data[12], 'stride16': in_data[11], 'stride8': in_data[10], 'stride4': in_data[9], 'stride2': in_data[8], 'stride1': in_data[7], } elif LAYER_NUM == 6: cls_prob_dict = { 'stride64': in_data[5], 'stride32': in_data[4], 'stride16': in_data[3], 'stride8': in_data[2], 'stride4': in_data[1], 'stride2': in_data[0], } bbox_pred_dict = { 'stride64': in_data[11], 'stride32': in_data[10], 'stride16': in_data[9], 'stride8': in_data[8], 'stride4': in_data[7], 'stride2': in_data[6], } elif LAYER_NUM == 5: cls_prob_dict = { 'stride64': in_data[4], 'stride32': in_data[3], 'stride16': in_data[2], 'stride8': in_data[1], 'stride4': in_data[0], } bbox_pred_dict = { 'stride64': in_data[9], 'stride32': in_data[8], 'stride16': in_data[7], 'stride8': in_data[6], 'stride4': in_data[5], } elif LAYER_NUM == 2: cls_prob_dict = { 'stride64': in_data[4], 'stride32': in_data[3], } bbox_pred_dict = { 'stride64': in_data[9], 'stride32': in_data[8], } elif LAYER_NUM == 11: cls_prob_dict = { 'stride64': in_data[0], } bbox_pred_dict = { 'stride64': in_data[1], } elif LAYER_NUM == 1: cls_prob_dict = { 'stride1': in_data[0], } bbox_pred_dict = { 'stride1': in_data[1], } elif LAYER_NUM == 3: cls_prob_dict = { 'stride64': in_data[2], 'stride32': in_data[1], 'stride1': in_data[0], } bbox_pred_dict = { 'stride64': in_data[5], 'stride32': in_data[4], 'stride1': in_data[3], } ''' cls_prob_dict = { 'stride8': in_data[3], 'stride4': in_data[2], 'stride2': in_data[1], 'stride1': in_data[0], } bbox_pred_dict = { 'stride8': in_data[7], 'stride4': in_data[6], 'stride2': in_data[5], 'stride1': in_data[4], } ''' ''' cls_prob_dict = { 'stride2': in_data[1], 'stride1': in_data[0], } bbox_pred_dict = { 'stride2': in_data[3], 'stride1': in_data[2], } ''' pre_nms_topN = self._rpn_pre_nms_top_n post_nms_topN = self._rpn_post_nms_top_n min_size = self._rpn_min_size proposal_list = [] score_list = [] channel_list = [] before_feat = datetime.now() for s in self._feat_stride: stride = int(s) sub_anchors = generate_anchors(base_size=stride, scales=self._scales, ratios=self._ratios) #print "cls_prob_dict['stride' + str(s)].shape:"+str(cls_prob_dict['stride' + str(s)].shape) scores = cls_prob_dict['stride' + str(s)].asnumpy()[:, self._num_anchors:, :, :] if DEBUG: scores1 = cls_prob_dict['stride' + str(s)].asnumpy() print "scores.shape:" + str(scores.shape) print "scores1.shape:" + str(scores1.shape) #print "scores.shape:"+str(scores.shape) bbox_deltas = bbox_pred_dict['stride' + str(s)].asnumpy() #print "bbox_deltas.shape:"+str(bbox_deltas.shape) im_info = in_data[-1].asnumpy()[0, :] # 1. Generate proposals from bbox_deltas and shifted anchors # use real image size instead of padded feature map sizes height, width = int(im_info[0] / stride), int(im_info[1] / stride) # Enumerate all shifts shift_x = np.arange(0, width) * stride shift_y = np.arange(0, height) * stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors before_enume = datetime.now() A = self._num_anchors K = shifts.shape[0] anchors = sub_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) after_enume = datetime.now() #print "enume time:"+str((after_enume-before_enume).seconds) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) if DEBUG: print "scores[:100]:" + str(scores[:50]) channels = np.ones((scores.shape)) * stride # Convert anchors into proposals via bbox transformations before_pred = datetime.now() proposals = bbox_pred(anchors, bbox_deltas) after_pred = datetime.now() #print "pred_time:" #print (after_pred-before_pred).seconds # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) if DEBUG: print str(min_size) print str(im_info[2]) keep = self._filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] if DEBUG: print "proposals3:" + str(proposals[0:10]) scores = scores[keep] channels = channels[keep] proposal_list.append(proposals) score_list.append(scores) channel_list.append(channels) after_feat = datetime.now() #print "feat time:" #print (after_feat-before_feat).seconds proposals = np.vstack(proposal_list) scores = np.vstack(score_list) channels = np.vstack(channel_list) # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) before_sort = datetime.now() order = scores.ravel().argsort()[::-1] after_sort = datetime.now() #print "sort time:" #print (after_sort-before_sort).seconds if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] channels = channels[order] if DEBUG: print '-------1-------' print channels.shape for s in self._feat_stride: print "stride:" + str(s) print len(np.where(channels == float(s))[0]) print "proposals:" + str(proposals[0:20]) # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) det = np.hstack((proposals, scores)).astype(np.float32) keep = nms(det) if post_nms_topN > 0: keep = keep[:post_nms_topN] # pad to ensure output size remains unchanged if len(keep) < post_nms_topN: pad = npr.choice(keep, size=post_nms_topN - len(keep)) keep = np.hstack((keep, pad)) proposals = proposals[keep, :] scores = scores[keep] channels = channels[keep] if DEBUG: print '-------2-------' print channels.shape for s in self._feat_stride: print "stride:" + str(s) print len(np.where(channels == float(s))[0]) print "proposals:" + str(proposals[0:20]) print "scores:" + str(scores[0:20]) f_chan = open('channels.txt', 'w') for ii in range(channels.shape[0]): f_chan.write(str(channels[ii][0]) + ' ') f_chan.close() # Output rois array # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) # if is_train: self.assign(out_data[0], req[0], blob) #print "out_data[0].shape"+str(out_data[0].shape) if self._output_score: self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False)) after_pyramid_proposal = datetime.now()
def im_detect(predictor, data_batch, data_names, scales, cfg): output_all = predictor.predict(data_batch) data_dict_all = [ dict(zip(data_names, data_batch.data[i])) for i in xrange(len(data_batch.data)) ] scores_all = [] pred_boxes_all = [] for output, data_dict, scale in zip(output_all, data_dict_all, scales): if cfg.TEST.HAS_RPN: concat_rois = output['concat_rois_output'].asnumpy()[:, 1:] rois, ref_rois = np.split(concat_rois, 2) else: rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:] im_shape = data_dict['data'].shape # save output if cfg.TEST.LEARN_NMS: concat_pred_boxes = output['concat_sorted_bbox_output'].asnumpy() # raw_scores = output['sorted_score_output'].asnumpy() concat_nms_scores = output['nms_final_score_output'].asnumpy() concat_pre_nms_scores = output['pre_nms_score_output'].asnumpy() # we used scaled image & roi to train, so it is necessary to transform them back concat_pred_boxes = concat_pred_boxes / scale concat_multi_scores = np.dstack( (concat_nms_scores, concat_pre_nms_scores)) # concat_nms_scores /= concat_pre_nms_scores pred_boxes, ref_pred_boxes = np.split(concat_pred_boxes, 2) scores, ref_scores = np.split(concat_multi_scores, 2) pred_boxes_all.append(pred_boxes) ref_pred_boxes_all.append(ref_pred_boxes) scores_all.append(scores) ref_scores_all.append(ref_scores) nms_multi_target = output['custom0_nms_multi_target'].asnumpy() target, ref_target = np.split(nms_multi_target, 2) concat_target_boxes = concat_pred_boxes[np.where(nms_multi_target) [:2]] concat_target_scores = concat_nms_scores[np.where(nms_multi_target) [:2]] concat_pre_target_scores = concat_pre_nms_scores[np.where( nms_multi_target)[:2]] # concat_target_boxes = concat_target_boxes / scale # construct gt style nms_multi_target, 0:30 classes concat_target_boxes = np.hstack( (concat_target_boxes, np.where(nms_multi_target)[1][:, np.newaxis])) concat_target_boxes = np.hstack( (concat_target_boxes, concat_target_scores[:, np.newaxis])) concat_target_boxes = np.hstack( (concat_target_boxes, concat_pre_target_scores[:, np.newaxis])) target_boxes, ref_target_boxes = np.split(concat_target_boxes, 2) data_dict['nms_multi_target'] = target_boxes data_dict['ref_nms_multi_target'] = ref_target_boxes else: # save output scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] # post processing pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) # we used scaled image & roi to train, so it is necessary to transform them back pred_boxes = pred_boxes / scale scores_all.append(scores) pred_boxes_all.append(pred_boxes) return scores_all, pred_boxes_all, data_dict_all
def forward(self, is_train, req, in_data, out_data, aux): cls_pro = in_data[4] bbox_pred_dict = { 'stride128': in_data[3], 'stride64': in_data[2], 'stride32': in_data[1], 'stride16': in_data[0], } cls_prob_dict = { 'stride128': in_data[7], 'stride64': in_data[6], 'stride32': in_data[5], 'stride16': in_data[4], } im_info = in_data[8].asnumpy()[0, :] im = in_data[9].asnumpy() proposal_list = [] score_list = [] destore_rois_list = [] destore_cls_list = [] for s in self._feat_stride: stride = int(s) sub_anchors = generate_anchors(base_size=stride, scales=self._scales, ratios=self._ratios) bbox_deltas = bbox_pred_dict['stride' + str(s)].asnumpy() # im_info = in_data[-1].asnumpy()[0, :] # 1. Generate proposals from bbox_deltas and shifted anchors # use real image size instead of padded feature map sizes height, width = bbox_deltas.shape[2], bbox_deltas.shape[3] # Enumerate all shifts shift_x = np.arange(0, width) * stride shift_y = np.arange(0, height) * stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = sub_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) # Convert anchors into proposals via bbox transformations proposals = bbox_pred(anchors, bbox_deltas) proposals = clip_boxes(proposals, im_info[:2]) scores = cls_prob_dict['stride' + str(s)].asnumpy() s_list = [] start = 0 for i in range(self._num_classes): s = scores[:, start:start + self._num_anchors, :, :] start = start + self._num_anchors s = self._clip_pad(s, (height, width)) s = s.transpose((0, 2, 3, 1)).reshape((-1, 1)) s_list.append(s) scores = np.concatenate(s_list, axis=1) destore_rois_list.append(proposals) destore_cls_list.append(scores) destore_rois = np.concatenate(destore_rois_list, axis=0) destore_cls = np.concatenate(destore_cls_list, axis=0) # print destore_cls s = np.max(destore_cls, axis=1) # print s order = s.ravel().argsort()[::-1] order = order[:self._keep_num] destore_cls = destore_cls[order, :] destore_rois = destore_rois[order, :] vis = False if vis: vis_all_detection(im, destore_rois[:, :]) self.assign(out_data[0], req[0], mx.nd.array(destore_rois)) self.assign(out_data[1], req[1], mx.nd.array(destore_cls))
def forward(self, is_train, req, in_data, out_data, aux): nms = gpu_nms_wrapper(self._threshold, in_data[0].context.device_id) batch_size = in_data[0].shape[0] if batch_size > 1: raise ValueError("Sorry, multiple images each device is not implemented") # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) cls_prob_dict = { 'stride64': in_data[4], 'stride32': in_data[3], 'stride16': in_data[2], 'stride8': in_data[1], 'stride4': in_data[0], } bbox_pred_dict = { 'stride64': in_data[9], 'stride32': in_data[8], 'stride16': in_data[7], 'stride8': in_data[6], 'stride4': in_data[5], } pre_nms_topN = self._rpn_pre_nms_top_n post_nms_topN = self._rpn_post_nms_top_n min_size = self._rpn_min_size proposal_list = [] score_list = [] for s in self._feat_stride: stride = int(s) sub_anchors = generate_anchors(base_size=stride, scales=self._scales, ratios=self._ratios) scores = cls_prob_dict['stride' + str(s)].asnumpy()[:, self._num_anchors:, :, :] bbox_deltas = bbox_pred_dict['stride' + str(s)].asnumpy() im_info = in_data[-1].asnumpy()[0, :] # 1. Generate proposals from bbox_deltas and shifted anchors # use real image size instead of padded feature map sizes height, width = int(im_info[0] / stride), int(im_info[1] / stride) # Enumerate all shifts shift_x = np.arange(0, width) * stride shift_y = np.arange(0, height) * stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = sub_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_pred(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = self._filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] proposal_list.append(proposals) score_list.append(scores) proposals = np.vstack(proposal_list) scores = np.vstack(score_list) # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) det = np.hstack((proposals, scores)).astype(np.float32) keep = nms(det) if post_nms_topN > 0: keep = keep[:post_nms_topN] # pad to ensure output size remains unchanged if len(keep) < post_nms_topN: pad = npr.choice(keep, size=post_nms_topN - len(keep)) keep = np.hstack((keep, pad)) proposals = proposals[keep, :] scores = scores[keep] # Output rois array # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) # if is_train: self.assign(out_data[0], req[0], blob) if self._output_score: self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False))
def forward(self, is_train, req, in_data, out_data, aux): assert self._batch_rois == -1 or self._batch_rois % self._batch_images == 0, \ 'batchimages {} must devide batch_rois {}'.format(self._batch_images, self._batch_rois) all_rois = in_data[0].asnumpy() gt_boxes = in_data[1].asnumpy() im = in_data[2].asnumpy() if self._batch_rois == -1: rois_per_image = all_rois.shape[0] + gt_boxes.shape[0] fg_rois_per_image = rois_per_image else: rois_per_image = self._batch_rois / self._batch_images fg_rois_per_image = np.round(self._fg_fraction * rois_per_image).astype(int) # Include ground-truth boxes in the set of candidate rois zeros = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) all_rois = np.vstack((all_rois, np.hstack((zeros, gt_boxes[:, :-1])))) # Sanity check: single batch only assert np.all( all_rois[:, 0] == 0), 'Only single item batches are supported' rois, labels, bbox_targets, bbox_weights ,layer_indexs= \ sample_rois(all_rois, fg_rois_per_image, rois_per_image, self._num_classes, self._cfg, gt_boxes=gt_boxes,sample_type='fpn', k0 = 4) vis = False if vis: ind = np.where(labels != 0)[0] im_shape = im.shape pred_boxes = bbox_pred(rois[:, 1:], bbox_targets) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) l = labels[ind] ro = rois[ind, 1:] b = bbox_targets[ind, :] p = pred_boxes[ind, :] * bbox_weights[ind, :] r = [] for i in range(p.shape[0]): r.append(p[i, l[i] * 4:l[i] * 4 + 4]) r_ = np.vstack(r) vis_all_detection(im, r_, l, 1) rois_all = np.zeros((self._batch_rois * 4, 5), dtype=rois.dtype) labels_all = np.ones((self._batch_rois * 4, ), dtype=labels.dtype) * -1 bbox_targets_all = np.zeros( (self._batch_rois * 4, self._num_classes * 4), dtype=bbox_targets.dtype) bbox_weights_all = np.zeros( (self._batch_rois * 4, self._num_classes * 4), dtype=bbox_weights.dtype) for i in range(4): index = (layer_indexs == (i + 2)) num_index = sum(index) start = self._batch_rois * i end = start + num_index index_range = range(start, end) rois_all[index_range, :] = rois[index, :] labels_all[index_range] = labels[index] bbox_targets_all[index_range, :] = bbox_targets[index, :] bbox_weights_all[index_range, :] = bbox_weights[index, :] if DEBUG: print "labels=", labels print 'num fg: {}'.format((labels > 0).sum()) print 'num bg: {}'.format((labels == 0).sum()) self._count += 1 self._fg_num += (labels > 0).sum() self._bg_num += (labels == 0).sum() print "self._count=", self._count print 'num fg avg: {}'.format(self._fg_num / self._count) print 'num bg avg: {}'.format(self._bg_num / self._count) print 'ratio: {:.3f}'.format( float(self._fg_num) / float(self._bg_num)) for ind, val in enumerate( [rois_all, labels_all, bbox_targets_all, bbox_weights_all]): self.assign(out_data[ind], req[0], val)
def im_detect(predictor, data_batch, data_names, scales, cfg, count): global im_shape output_all = predictor.predict(data_batch) data_dict_all = [ dict(zip(data_names, data_batch.data[i])) for i in xrange(len(data_batch.data)) ] scores_all = [] pred_boxes_all = [] for output, data_dict, scale in zip(output_all, data_dict_all, scales): if cfg.TEST.HAS_RPN: rois = output['rois_output'].asnumpy()[:, 1:] else: rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:] # im_shape = data_dict['data'].shape im_shape_tmp = data_dict['data'].shape if len(im_shape_tmp) == 4: im_shape = im_shape_tmp # import pickle # pickle.dump(im_shape, open('im_shape'+str(count)+'.pkl', 'wb')) # pickle.dump(rois, open('rois'+str(count)+'.pkl','wb')) # save output scores = output['cls_prob_reshape_output'].asnumpy()[0] # pickle.dump(scores, open('scores'+str(count)+'.pkl','wb')) bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] # pickle.dump(bbox_deltas, open('bbox_deltas'+str(count)+'.pkl','wb')) # post processing pred_boxes = bbox_pred(rois, bbox_deltas) # pickle.dump(pred_boxes, open('bbox_pred'+str(count)+'.pkl','wb')) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) # pickle.dump(pred_boxes, open('clip_boxes'+str(count)+'.pkl','wb')) # we used scaled image & roi to train, so it is necessary to transform them back pred_boxes = pred_boxes / scale scores_all.append(scores) pred_boxes_all.append(pred_boxes) if output_all[0].has_key('feat_conv_3x3_relu_output'): feat = output_all[0]['feat_conv_3x3_relu_output'] feat_after_motion = None else: feat = None feat_after_motion = output_all[0]['warping_feat_output'] #print(output_all[0]) # For debugging # rpn_cls_score_output = output_all[0]['rpn_cls_score_output'] # rpn_bbox_pred_output = output_all[0]['rpn_bbox_pred_output'] # rpn_cls_prob_output = output_all[0]['rpn_cls_prob_output'] # rois_output = output_all[0]['rois_output'] # rfcn_cls_output = output_all[0]['rfcn_cls_output'] # rfcn_bbox_output = output_all[0]['rfcn_bbox_output'] # cls_score_output = output_all[0]['ave_cls_scors_rois_output'] bbox_pred1_output = output_all[0]['ave_bbox_pred_rois_output'] bbox_pred2_output = output_all[0]['bbox_pred_reshape2_output'] bbox_pred_output = output_all[0]['bbox_pred_reshape_output'] return scores_all, pred_boxes_all, data_dict_all, feat, bbox_pred1_output, bbox_pred2_output, bbox_pred_output
def im_double_detect(predictor, data_batch, data_names, scales, cfg): output_all = predictor.predict(data_batch) data_dict_all = [dict(zip(data_names, idata)) for idata in data_batch.data] scores_all = [] pred_boxes_all = [] ref_scores_all = [] ref_pred_boxes_all = [] for output, data_dict, scale in zip(output_all, data_dict_all, scales): if cfg.TEST.HAS_RPN or cfg.network.ROIDispatch: concat_rois = output['concat_rois_output'].asnumpy()[:, 1:] else: rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:] im_shape = data_dict['data'].shape # save output if cfg.TEST.LEARN_NMS: concat_pred_boxes = output['concat_sorted_bbox_output'].asnumpy() # raw_scores = output['sorted_score_output'].asnumpy() concat_nms_scores = output['nms_final_score_output'].asnumpy() concat_pre_nms_scores = output['pre_nms_score_output'].asnumpy() # we used scaled image & roi to train, so it is necessary to transform them back concat_pred_boxes = concat_pred_boxes / scale concat_multi_scores = np.dstack( (concat_nms_scores, concat_pre_nms_scores)) # concat_nms_scores /= concat_pre_nms_scores pred_boxes, ref_pred_boxes = np.split(concat_pred_boxes, 2) scores, ref_scores = np.split(concat_multi_scores, 2) pred_boxes_all.append(pred_boxes) ref_pred_boxes_all.append(ref_pred_boxes) scores_all.append(scores) ref_scores_all.append(ref_scores) nms_multi_target = output['stable_nms_multi_target'].asnumpy() target, ref_target = np.split(nms_multi_target, 2) concat_target_boxes = concat_pred_boxes[np.where(nms_multi_target) [:2]] concat_target_scores = concat_nms_scores[np.where(nms_multi_target) [:2]] concat_pre_target_scores = concat_pre_nms_scores[np.where( nms_multi_target)[:2]] # concat_target_boxes = concat_target_boxes / scale # construct gt style nms_multi_target, 0:30 classes concat_target_boxes = np.hstack( (concat_target_boxes, np.where(nms_multi_target)[1][:, np.newaxis])) concat_target_boxes = np.hstack( (concat_target_boxes, concat_target_scores[:, np.newaxis])) concat_target_boxes = np.hstack( (concat_target_boxes, concat_pre_target_scores[:, np.newaxis])) target_boxes, ref_target_boxes = np.split(concat_target_boxes, 2) data_dict['nms_multi_target'] = target_boxes data_dict['ref_nms_multi_target'] = ref_target_boxes concat_nms_feats = output[ 'concat_nms_embedding_feat_output'].asnumpy() concat_target_feats = concat_nms_feats[np.where(nms_multi_target) [:2]] nms_feats, ref_nms_feats = np.split(concat_nms_feats, 2) target_nms_feats, ref_target_nms_feats = np.split( concat_target_feats, 2) high_score_feats = nms_feats[np.where(scores[:, :, 0] > 0.2)[:2]] ref_high_score_feats = ref_nms_feats[np.where( ref_scores[:, :, 0] > 0.2)[:2]] from scipy.spatial.distance import cosine dist_mat = np.zeros( (target_nms_feats.shape[0], high_score_feats.shape[0]), dtype=np.float) ref_dist_mat = np.zeros( (ref_target_nms_feats.shape[0], ref_high_score_feats.shape[0]), dtype=np.float) for i, nms_feat in enumerate(target_nms_feats): for j, high_score_feat in enumerate(high_score_feats): dist_mat[i, j] = cosine(nms_feat, high_score_feat) for i, nms_feat in enumerate(ref_target_nms_feats): for j, high_score_feat in enumerate(ref_high_score_feats): ref_dist_mat[i, j] = cosine(nms_feat, high_score_feat) pdb.set_trace() print(dist_mat) print(ref_dist_mat) else: rois, ref_rois = np.split(concat_rois, 2) scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] ref_scores = output['cls_prob_reshape_output'].asnumpy()[1] ref_bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[1] # post processing pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) pred_boxes /= scale ref_pred_boxes = bbox_pred(ref_rois, ref_bbox_deltas) ref_pred_boxes = clip_boxes(ref_pred_boxes, im_shape[-2:]) ref_pred_boxes /= scale pred_boxes_all.append(pred_boxes) scores_all.append(scores) ref_pred_boxes_all.append(ref_pred_boxes) ref_scores_all.append(ref_scores) return scores_all, pred_boxes_all, ref_scores_all, ref_pred_boxes_all, data_dict_all
def im_detect_bbox_aug(net, nms_wrapper, img_path, scales, pixel_means, bbox_stds, ctx, threshold=1e-3, viz=False): all_bboxes = [] all_scores = [] img_ori = cv2.imread(img_path.encode("utf-8")) for scale_min, scale_max in scales: fscale = 1.0 * scale_min / min(img_ori.shape[:2]) img_resized = cv2.resize(img_ori, (0, 0), fx=fscale, fy=fscale) h, w, c = img_resized.shape h_padded = h if h % 32 == 0 else h + 32 - h % 32 w_padded = w if w % 32 == 0 else w + 32 - w % 32 img_padded = np.zeros(shape=(h_padded, w_padded, c), dtype=img_resized.dtype) img_padded[:h, :w, :] = img_resized img = transform(img_padded, pixel_means=pixel_means) im_info = nd.array([[h_padded, w_padded, 1.0]], ctx=ctx[0]) data = nd.array(img, ctx=ctx[0]) rois, scores, bbox_deltas = net(data, im_info) rois = rois[:, 1:].asnumpy() bbox_deltas = bbox_deltas[0].asnumpy() bbox_deltas = pre_compute_deltas(bbox_deltas, bbox_stds=bbox_stds) bbox = bbox_pred(rois, bbox_deltas) bbox = clip_boxes(bbox, data.shape[2:4]) bbox /= fscale all_bboxes.append(bbox) all_scores.append(scores[0].asnumpy()) # hflip rois, scores, bbox_deltas = net(data[:, :, :, ::-1], im_info) rois = rois[:, 1:].asnumpy() bbox_deltas = bbox_deltas[0].asnumpy() bbox_deltas = pre_compute_deltas(bbox_deltas, bbox_stds=bbox_stds) bbox = bbox_pred(rois, bbox_deltas) bbox = clip_boxes(bbox, data.shape[2:4]) tmp = bbox[:, 0::4].copy() bbox[:, 0::4] = data.shape[3] - bbox[:, 2::4] - 1 # x0 = w - x0 bbox[:, 2::4] = data.shape[3] - tmp - 1 # x1 = w -x1 bbox /= fscale all_bboxes.append(bbox) all_scores.append(scores[0].asnumpy()) # # vflip rois, scores, bbox_deltas = net(data[:, :, ::-1], im_info) rois = rois[:, 1:].asnumpy() bbox_deltas = bbox_deltas[0].asnumpy() bbox_deltas = pre_compute_deltas(bbox_deltas, bbox_stds=bbox_stds) bbox = bbox_pred(rois, bbox_deltas) bbox = clip_boxes(bbox, data.shape[2:4]) tmp = bbox[:, 1::4].copy() bbox[:, 1::4] = data.shape[2] - bbox[:, 3::4] - 1 # x0 = w - x0 bbox[:, 3::4] = data.shape[2] - tmp - 1 # x1 = w -x1 bbox /= fscale all_bboxes.append(bbox) all_scores.append(scores[0].asnumpy()) # vhflip rois, scores, bbox_deltas = net(data[:, :, ::-1, ::-1], im_info) rois = rois[:, 1:].asnumpy() bbox_deltas = bbox_deltas[0].asnumpy() bbox_deltas = pre_compute_deltas(bbox_deltas, bbox_stds=bbox_stds) bbox = bbox_pred(rois, bbox_deltas) bbox = clip_boxes(bbox, data.shape[2:4]) tmp = bbox[:, 1::4].copy() bbox[:, 1::4] = data.shape[2] - bbox[:, 3::4] - 1 # x0 = w - x0 bbox[:, 3::4] = data.shape[2] - tmp - 1 # x1 = w -x1 tmp = bbox[:, 0::4].copy() bbox[:, 0::4] = data.shape[3] - bbox[:, 2::4] - 1 # x0 = w - x0 bbox[:, 2::4] = data.shape[3] - tmp - 1 # x1 = w -x1 bbox /= fscale all_bboxes.append(bbox) all_scores.append(scores[0].asnumpy()) all_bboxes = np.concatenate(all_bboxes, axis=0) all_scores = np.concatenate(all_scores, axis=0) pred_bboxes = [] pred_scores = [] pred_clsid = [] for j in range(1, all_scores.shape[1]): cls_scores = all_scores[:, j, np.newaxis] cls_boxes = all_bboxes[:, 4: 8] if config.CLASS_AGNOSTIC else all_bboxes[:, j * 4: (j + 1 ) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms_wrapper(cls_dets.astype('f')) cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > threshold, :] pred_bboxes.append(cls_dets[:, :4]) pred_scores.append(cls_dets[:, 4]) pred_clsid.append(j * np.ones(shape=(cls_dets.shape[0], ), dtype=np.int)) pred_bboxes = np.concatenate(pred_bboxes, axis=0) pred_scores = np.concatenate(pred_scores, axis=0) pred_clsid = np.concatenate(pred_clsid, axis=0) if viz: import gluoncv import matplotlib.pyplot as plt gluoncv.utils.viz.plot_bbox(img_ori[:, :, ::-1], bboxes=pred_bboxes, scores=pred_scores, labels=pred_clsid, thresh=.5) plt.show() return pred_bboxes, pred_scores, pred_clsid