def __init__(self, network, prefix, epoch, ctx_id=0, mask_nms=True): self.ctx_id = ctx_id self.ctx = mx.gpu(self.ctx_id) self.mask_nms = mask_nms #self.nms_threshold = 0.3 #self._bbox_pred = nonlinear_pred if not self.mask_nms: self.nms = gpu_nms_wrapper(config.TEST.NMS, self.ctx_id) else: self.nms = gpu_nms_wrapper(config.TEST.RPN_NMS_THRESH, self.ctx_id) #self.nms = py_nms_wrapper(config.TEST.NMS) sym = eval('get_' + network + '_mask_test')( num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS) #arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch) arg_params, aux_params = load_param(prefix, epoch, convert=True, ctx=self.ctx, process=True) split = False max_image_shape = (1, 3, 1600, 1600) #max_image_shape = (1,3,1200,2200) max_data_shapes = [("data", max_image_shape), ("im_info", (1, 3))] mod = MutableModule(symbol=sym, data_names=["data", "im_info"], label_names=None, max_data_shapes=max_data_shapes, context=self.ctx) mod.bind(data_shapes=max_data_shapes, label_shapes=None, for_training=False) mod.init_params(arg_params=arg_params, aux_params=aux_params) self.model = mod pass
def __init__(self, args, nms=0.4, verbose=False): self.threshold = args.threshold self.scales = args.scales self.nms_threshold = nms self.ctx_id = args.gpu self.margin = np.array([-args.face_margin, -args.face_margin, args.face_margin, args.face_margin]) self._feat_stride_fpn = [int(k) for k in anchor_shape.keys()] self.anchor_cfg = anchor_shape self.fpn_keys = [f'stride{s}' for s in self._feat_stride_fpn] anchors_fpn_list = generate_anchors_fpn(cfg=self.anchor_cfg) self._anchors_fpn = dict( zip(self.fpn_keys, np.asarray(anchors_fpn_list, dtype=np.float32))) self._num_anchors = dict( zip(self.fpn_keys, [anchors.shape[0] for anchors in self._anchors_fpn.values()])) if self.ctx_id < 0: self.ctx = mx.cpu() self.nms = cpu_nms_wrapper(self.nms_threshold) else: self.ctx = mx.gpu(self.ctx_id) self.nms = gpu_nms_wrapper(self.nms_threshold, self.ctx_id) sym, arg_params, aux_params = mx.model.load_checkpoint( args.retina_model, 0) self.model = mx.mod.Module(sym, context=self.ctx, label_names=None) self.model.bind( data_shapes=[('data', (1, 3, 640, 640))], for_training=False) self.model.set_params(arg_params, aux_params)
def __init__(self, prefix, epoch, ctx_id=0, test_mode=False): self.ctx_id = ctx_id self.test_mode = test_mode self.fpn_keys = [] fpn_stride = [] fpn_base_size = [] self._feat_stride_fpn = [32, 16, 8] for s in self._feat_stride_fpn: self.fpn_keys.append('stride%s' % s) fpn_stride.append(int(s)) fpn_base_size.append(16) self._scales = np.array([32, 16, 8, 4, 2, 1]) self._ratios = np.array([1.0] * len(self._feat_stride_fpn)) self._anchors_fpn = dict( zip( self.fpn_keys, generate_anchors_fpn(base_size=fpn_base_size, scales=self._scales, ratios=self._ratios))) self._num_anchors = dict( zip(self.fpn_keys, [anchors.shape[0] for anchors in self._anchors_fpn.values()])) self._rpn_pre_nms_top_n = 1000 # self._rpn_post_nms_top_n = rpn_post_nms_top_n # self.score_threshold = 0.05 self.nms_threshold = 0.3 self._bbox_pred = nonlinear_pred sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch) if self.ctx_id >= 0: self.ctx = mx.gpu(self.ctx_id) self.nms = gpu_nms_wrapper(self.nms_threshold, self.ctx_id) else: self.ctx = mx.cpu() self.nms = cpu_nms_wrapper(self.nms_threshold) # self.nms = py_nms_wrapper(self.nms_threshold) # self.pixel_means = np.array([103.939, 116.779, 123.68]) #BGR,VGG16 self.pixel_means = np.array([0.0, 0.0, 0.0]) #BGR,R50 if not self.test_mode: image_size = (640, 640) self.model = mx.mod.Module(symbol=sym, context=self.ctx, label_names=None) self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False) self.model.set_params(arg_params, aux_params) else: from rcnn.core.module import MutableModule image_size = (2400, 2400) data_shape = [('data', (1, 3, image_size[0], image_size[1]))] self.model = MutableModule(symbol=sym, data_names=['data'], label_names=None, context=self.ctx, max_data_shapes=data_shape) self.model.bind(data_shape, None, for_training=False) self.model.set_params(arg_params, aux_params)
def __init__(self, gpu=0, test_mode=False): self.ctx_id = gpu self.ctx = mx.gpu(self.ctx_id) self.fpn_keys = [] fpn_stride = [] fpn_base_size = [] self._feat_stride_fpn = [32, 16, 8] for s in self._feat_stride_fpn: self.fpn_keys.append('stride%s' % s) fpn_stride.append(int(s)) fpn_base_size.append(16) self._scales = np.array([32, 16, 8, 4, 2, 1]) self._ratios = np.array([1.0] * len(self._feat_stride_fpn)) self._anchors_fpn = dict( zip( self.fpn_keys, generate_anchors_fpn(base_size=fpn_base_size, scales=self._scales, ratios=self._ratios))) self._num_anchors = dict( zip(self.fpn_keys, [anchors.shape[0] for anchors in self._anchors_fpn.values()])) self._rpn_pre_nms_top_n = 1000 # self._rpn_post_nms_top_n = rpn_post_nms_top_n # self.score_threshold = 0.05 self.nms_threshold = config.TEST.NMS self._bbox_pred = nonlinear_pred base_path = os.path.dirname(__file__) sym, arg_params, aux_params = mx.model.load_checkpoint( base_path + '/model/model-v2.0/e2e', 1) self.nms = gpu_nms_wrapper(self.nms_threshold, self.ctx_id) self.pixel_means = np.array([103.939, 116.779, 123.68]) # BGR if not test_mode: image_size = (640, 640) self.model = mx.mod.Module(symbol=sym, context=self.ctx, label_names=None) self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False) self.model.set_params(arg_params, aux_params) else: from rcnn.core.module import MutableModule image_size = (640, 640) data_shape = [('data', (1, 3, image_size[0], image_size[1]))] self.model = MutableModule(symbol=sym, data_names=['data'], label_names=None, context=self.ctx, max_data_shapes=data_shape) self.model.bind(data_shape, None, for_training=False) self.model.set_params(arg_params, aux_params) print('init ssh success')
def __init__(self, model_weights, use_gpu_nms=True, nms=0.4, decay4=0.5): self.decay4 = decay4 self.nms_threshold = nms self.fpn_keys = [] self.anchor_cfg = None self.preprocess = False _ratio = (1., ) self._feat_stride_fpn = [32, 16, 8] self.anchor_cfg = { '32': { 'SCALES': (32, 16), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, '16': { 'SCALES': (8, 4), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, '8': { 'SCALES': (2, 1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, } for s in self._feat_stride_fpn: self.fpn_keys.append('stride%s' % s) self._anchors_fpn = dict( zip(self.fpn_keys, generate_anchors_fpn(dense_anchor=False, cfg=self.anchor_cfg))) for k in self._anchors_fpn: v = self._anchors_fpn[k].astype(np.float32) self._anchors_fpn[k] = v self._num_anchors = dict( zip(self.fpn_keys, [anchors.shape[0] for anchors in self._anchors_fpn.values()])) if use_gpu_nms: self.nms = gpu_nms_wrapper(self.nms_threshold, 0) else: self.nms = cpu_nms_wrapper(self.nms_threshold) pixel_means = [0.0, 0.0, 0.0] pixel_stds = [1.0, 1.0, 1.0] pixel_scale = 1.0 self.pixel_means = np.array(pixel_means, dtype=np.float32) self.pixel_stds = np.array(pixel_stds, dtype=np.float32) self.pixel_scale = float(pixel_scale) self.bbox_stds = [1.0, 1.0, 1.0, 1.0] self.scales = [1024, 1980] self.model = tf.function(RetinaFaceNetwork(model_weights).model, input_signature=(tf.TensorSpec( shape=[None, None, None, 3], dtype=np.float32), ))
def __init__(self, prefix, epoch, ctx_id=0): self.ctx_id = ctx_id self.ctx = mx.gpu(self.ctx_id) self.fpn_keys = [] fpn_stride = [] fpn_base_size = [] self._feat_stride_fpn = [32, 16, 8] for s in self._feat_stride_fpn: self.fpn_keys.append('stride%s'%s) fpn_stride.append(int(s)) fpn_base_size.append(16) self._scales = np.array([32,16,8,4,2,1]) self._ratios = np.array([1.0]*len(self._feat_stride_fpn)) self._anchors_fpn = dict(zip(self.fpn_keys, generate_anchors_fpn(base_size=fpn_base_size, scales=self._scales, ratios=self._ratios))) self._num_anchors = dict(zip(self.fpn_keys, [anchors.shape[0] for anchors in self._anchors_fpn.values()])) self._rpn_pre_nms_top_n = 1000 #self._rpn_post_nms_top_n = rpn_post_nms_top_n #self.score_threshold = 0.05 self.nms_threshold = 0.3 self._bbox_pred = nonlinear_pred sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch) #all_layers = sym.get_internals() #outs = [] #for stride in self._feat_stride_fpn: # _name = 'rpn_cls_score_stride%s_output' % stride # rpn_cls_score = all_layers[_name] # rpn_cls_score_reshape = mx.symbol.Reshape(data=rpn_cls_score, # shape=(0, 2, -1, 0), # name="rpn_cls_score_reshape_stride%d" % stride) # rpn_cls_prob = mx.symbol.SoftmaxActivation(data=rpn_cls_score_reshape, # mode="channel", # name="rpn_cls_prob_stride%d" % stride) # rpn_cls_prob_reshape = mx.symbol.Reshape(data=rpn_cls_prob, # shape=(0, 2 * num_anchors, -1, 0), # name='rpn_cls_prob_reshape_stride%d' % stride) # outs.append(rpn_cls_prob_reshape) # _name = 'rpn_bbox_pred_stride%s_output' % stride # rpn_bbox_pred = all_layers[_name] # outs.append(rpn_bbox_pred) #sym = mx.sym.Group(outs) self.model = mx.mod.Module(symbol=sym, context=self.ctx, label_names = None) image_size = (640, 640) self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False) self.model.set_params(arg_params, aux_params) self.nms = gpu_nms_wrapper(self.nms_threshold, self.ctx_id) pass
def __init__(self, prefix, epoch, ctx_id=1, test_mode=False): self.ctx_id = ctx_id self.ctx = mx.gpu(self.ctx_id) self.keys = [] strides = [] base_size = [] scales = [] self.feat_strides = config.RPN_FEAT_STRIDE for s in self.feat_strides: self.keys.append('stride%s' % s) strides.append(int(s)) base_size.append(config.RPN_ANCHOR_CFG[str(s)]['BASE_SIZE']) scales += config.RPN_ANCHOR_CFG[str(s)]['SCALES'] # self._scales = np.array([32, 16, 8, 4, 2, 1]) self._scales = np.array(scales) self._ratios = np.array([1.0]*len(self.feat_strides)) # self._anchors_fpn = dict(list(zip(self.keys, generate_anchors_fpn()))) self._anchors_fpn = dict(list(zip(self.keys, generate_anchors_fpn(base_size=base_size, scales=self._scales, ratios=self._ratios)))) self._num_anchors = dict(zip(self.keys, [anchors.shape[0] for anchors in self._anchors_fpn.values()])) self._rpn_pre_nms_top_n = 1000 #self._rpn_post_nms_top_n = rpn_post_nms_top_n self.nms_threshold = config.test_nms_threshold #值越大,同一个人脸产生的预测框越多 self._bbox_pred = nonlinear_pred sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch) self.nms = gpu_nms_wrapper(self.nms_threshold, self.ctx_id) self.pixel_means = np.array([103.939, 116.779, 123.68]) #BGR self.pixel_means = np.array([127., 127., 127.]) if not test_mode: image_size = (640, 640) self.model = mx.mod.Module(symbol=sym, context=self.ctx, label_names=None) self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False) self.model.set_params(arg_params, aux_params) else: from rcnn.core.module import MutableModule image_size = (2400, 2400) data_shape = [('data', (1, 3, image_size[0], image_size[1]))] self.model = MutableModule(symbol=sym, data_names=['data'], label_names=None, context=self.ctx, max_data_shapes=data_shape) self.model.bind(data_shape, None, for_training=False) self.model.set_params(arg_params, aux_params)
def __init__(self, prefix, epoch, ctx_id=0, test_mode=False): self.ctx_id = ctx_id self.ctx = mx.gpu(self.ctx_id) self.fpn_keys = [] fpn_stride = [] fpn_base_size = [] self._feat_stride_fpn = [32, 16, 8] for s in self._feat_stride_fpn: self.fpn_keys.append('stride%s'%s) fpn_stride.append(int(s)) fpn_base_size.append(16) self._scales = np.array([32,16,8,4,2,1]) self._ratios = np.array([1.0]*len(self._feat_stride_fpn)) self._anchors_fpn = dict(zip(self.fpn_keys, generate_anchors_fpn(base_size=fpn_base_size, scales=self._scales, ratios=self._ratios))) self._num_anchors = dict(zip(self.fpn_keys, [anchors.shape[0] for anchors in self._anchors_fpn.values()])) self._rpn_pre_nms_top_n = 1000 #self._rpn_post_nms_top_n = rpn_post_nms_top_n #self.score_threshold = 0.05 self.nms_threshold = 0.3 self._bbox_pred = nonlinear_pred sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch) self.nms = gpu_nms_wrapper(self.nms_threshold, self.ctx_id) self.pixel_means = np.array([103.939, 116.779, 123.68]) #BGR if not test_mode: image_size = (640, 640) self.model = mx.mod.Module(symbol=sym, context=self.ctx, label_names = None) self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False) self.model.set_params(arg_params, aux_params) else: from rcnn.core.module import MutableModule image_size = (2400, 2400) data_shape = [('data', (1,3,image_size[0], image_size[1]))] self.model = MutableModule(symbol=sym, data_names=['data'], label_names=None, context=self.ctx, max_data_shapes=data_shape) self.model.bind(data_shape, None, for_training=False) self.model.set_params(arg_params, aux_params)
def forward(self, is_train, req, in_data, out_data, aux): nms = gpu_nms_wrapper(self._threshold, in_data[0][0].context.device_id) #nms = cpu_nms_wrapper(self._threshold) cls_prob_dict = dict(zip(self.fpn_keys, in_data[0:len(self.fpn_keys)])) bbox_pred_dict = dict(zip(self.fpn_keys, in_data[len(self.fpn_keys):2*len(self.fpn_keys)])) #for i in xrange(6): # print(i, in_data[i].asnumpy().shape) batch_size = in_data[0].shape[0] if batch_size > 1: raise ValueError("Sorry, multiple images each device is not implemented") pre_nms_topN = self._rpn_pre_nms_top_n post_nms_topN = self._rpn_post_nms_top_n min_size_dict = self._rpn_min_size_fpn proposals_list = [] scores_list = [] for s in self._feat_stride_fpn: stride = int(s) scores = cls_prob_dict['stride%s'%s].asnumpy()[:, self._num_anchors['stride%s'%s]:, :, :] bbox_deltas = bbox_pred_dict['stride%s'%s].asnumpy() im_info = in_data[-1].asnumpy()[0, :] if DEBUG: print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) print 'scale: {}'.format(im_info[2]) #height, width = int(im_info[0] / stride), int(im_info[1] / stride) height, width = bbox_deltas.shape[2], bbox_deltas.shape[3] A = self._num_anchors['stride%s'%s] K = height * width anchors = anchors_plane(height, width, stride, self._anchors_fpn['stride%s'%s].astype(np.float32)) anchors = anchors.reshape((K * A, 4)) #print('pre', bbox_deltas.shape, height, width) bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) #print('after', bbox_deltas.shape, height, width) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) #print(anchors.shape, bbox_deltas.shape, A, K) proposals = self._bbox_pred(anchors, bbox_deltas) proposals = clip_boxes(proposals, im_info[:2]) keep = self._filter_boxes(proposals, min_size_dict['stride%s'%s] * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] proposals_list.append(proposals) scores_list.append(scores) proposals = np.vstack(proposals_list) scores = np.vstack(scores_list) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] det = np.hstack((proposals, scores)).astype(np.float32) if np.shape(det)[0] == 0: print "Something wrong with the input image(resolution is too low?), generate fake proposals for it." proposals = np.array([[1.0, 1.0, 2.0, 2.0]]*post_nms_topN, dtype=np.float32) scores = np.array([[0.9]]*post_nms_topN, dtype=np.float32) det = np.array([[1.0, 1.0, 2.0, 2.0, 0.9]]*post_nms_topN, dtype=np.float32) keep = nms(det) if post_nms_topN > 0: keep = keep[:post_nms_topN] if len(keep) < post_nms_topN: pad = npr.choice(keep, size=post_nms_topN - len(keep)) keep = np.hstack((keep, pad)) proposals = proposals[keep, :] scores = scores[keep] batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) self.assign(out_data[0], req[0], blob) if self._output_score: self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False))
def __init__(self, prefix, epoch, ctx_id=0, network='net3', nms=0.4, nocrop=False, decay4=0.5, vote=False): self.ctx_id = ctx_id self.network = network self.decay4 = decay4 self.nms_threshold = nms self.vote = vote self.nocrop = nocrop self.debug = False self.fpn_keys = [] self.anchor_cfg = None self.preprocess = False _ratio = (1., ) fmc = 3 if network == 'ssh' or network == 'vgg': self.preprocess = True elif network == 'net3': _ratio = (1., ) elif network == 'net3a': _ratio = (1., 1.5) elif network == 'net6': #like pyramidbox or s3fd fmc = 6 elif network == 'net5': #retinaface fmc = 5 elif network == 'net5a': fmc = 5 _ratio = (1., 1.5) elif network == 'net4': fmc = 4 elif network == 'net4a': fmc = 4 _ratio = (1., 1.5) else: assert False, 'network setting error %s' % network if fmc == 3: self._feat_stride_fpn = [32, 16, 8] self.anchor_cfg = { '32': { 'SCALES': (32, 16), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, '16': { 'SCALES': (8, 4), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, '8': { 'SCALES': (2, 1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, } elif fmc == 4: self._feat_stride_fpn = [32, 16, 8, 4] self.anchor_cfg = { '32': { 'SCALES': (32, 16), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, '16': { 'SCALES': (8, 4), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, '8': { 'SCALES': (2, 1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, '4': { 'SCALES': (2, 1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, } elif fmc == 6: self._feat_stride_fpn = [128, 64, 32, 16, 8, 4] self.anchor_cfg = { '128': { 'SCALES': (32, ), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, '64': { 'SCALES': (16, ), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, '32': { 'SCALES': (8, ), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, '16': { 'SCALES': (4, ), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, '8': { 'SCALES': (2, ), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, '4': { 'SCALES': (1, ), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, } elif fmc == 5: self._feat_stride_fpn = [64, 32, 16, 8, 4] self.anchor_cfg = {} _ass = 2.0**(1.0 / 3) _basescale = 1.0 for _stride in [4, 8, 16, 32, 64]: key = str(_stride) value = { 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 } scales = [] for _ in range(3): scales.append(_basescale) _basescale *= _ass value['SCALES'] = tuple(scales) self.anchor_cfg[key] = value print(self._feat_stride_fpn, self.anchor_cfg) for s in self._feat_stride_fpn: self.fpn_keys.append('stride%s' % s) dense_anchor = False self._anchors_fpn = dict( zip( self.fpn_keys, generate_anchors_fpn(dense_anchor=dense_anchor, cfg=self.anchor_cfg))) for k in self._anchors_fpn: v = self._anchors_fpn[k].astype(np.float32) self._anchors_fpn[k] = v self._num_anchors = dict( zip(self.fpn_keys, [anchors.shape[0] for anchors in self._anchors_fpn.values()])) sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch) if self.ctx_id >= 0: self.ctx = mx.gpu(self.ctx_id) self.nms = gpu_nms_wrapper(self.nms_threshold, self.ctx_id) else: self.ctx = mx.cpu() self.nms = cpu_nms_wrapper(self.nms_threshold) self.use_landmarks = False if len(sym) // len(self._feat_stride_fpn) == 3: self.use_landmarks = True print('use_landmarks', self.use_landmarks) image_size = (640, 640) self.model = mx.mod.Module(symbol=sym, context=self.ctx, label_names=None) self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False) self.model.set_params(arg_params, aux_params)
def forward(self, is_train, req, in_data, out_data, aux): """Implements forward computation. is_train : bool, whether forwarding for training or testing. req : list of {'null', 'write', 'inplace', 'add'}, how to assign to out_data. 'null' means skip assignment, etc. in_data : list of NDArray, input data. out_data : list of NDArray, pre-allocated output buffers. aux : list of NDArray, mutable auxiliary states. Usually not used. """ nms = gpu_nms_wrapper(self._threshold, in_data[0].context.device_id) batch_size = in_data[0].shape[0] if batch_size > 1: raise ValueError( "Sorry, multiple images each device is not implemented") # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) # 对(H,W)大小的特征图上的每一点i: # 以 i 为中心生成A个锚定框 # 利用回归的位置参数,修正这 A 个 anchor 的位置,得到 RoIs # 将预测的边界框裁剪成图像 # 清除掉预测边界框中长或宽 小于阈值的 # 按分数降序排列(proposal,score) # 在采用NMS取前N个预测边界框 # 使用阈值0.7对这N个框使用非极大值抑制 # 取使用NMS后前n个预测边界框 # 返回前Top n 个的边界框,进行分类和回归 pre_nms_topN = self._rpn_pre_nms_top_n post_nms_topN = self._rpn_post_nms_top_n min_size = self._rpn_min_size # the first set of anchors are background probabilities # keep the second part scores = in_data[0].asnumpy()[:, self._num_anchors:, :, :] bbox_deltas = in_data[1].asnumpy() im_info = in_data[2].asnumpy()[0, :] logger.debug('im_info: %s' % im_info) # 1. Generate proposals from bbox_deltas and shifted anchors # use real image size instead of padded feature map sizes height, width = int(im_info[0] / self._feat_stride), int( im_info[1] / self._feat_stride) logger.debug('score map size: (%d, %d)' % (scores.shape[2], scores.shape[3])) logger.debug('resudial: (%d, %d)' % (scores.shape[2] - height, scores.shape[3] - width)) # Enumerate all shifts # 这块的思路是生成一系列的shift, 然后每一个shift和9个anchor相加,迭代出每一个位置的9个框 shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) #产生一个以向量x为行,向量y为列的矩阵 #经过meshgrid shift_x = [[ 0 16 32 ..., 560 576 592] [ 0 16 32 ..., 560 576 592] [ 0 16 32 ..., 560 576 592] ..., [ 0 16 32 ..., 560 576 592] [ 0 16 32 ..., 560 576 592] [ 0 16 32 ..., 560 576 592]] #shift_y = [[ 0 0 0 ..., 0 0 0] [ 16 16 16 ..., 16 16 16] [ 32 32 32 ..., 32 32 32] ..., [560 560 560 ..., 560 560 560] [576 576 576 ..., 576 576 576] [592 592 592 ..., 592 592 592]] shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # 转至之后形成所有位移 # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] # _anchors中每一个anchor和每一个shift相加得出结果 anchors = self._anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2)) # K个位移,每个位移A个框 anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations # 根据回归的偏移量修正位置 proposals = bbox_pred(anchors, bbox_deltas) # 2. clip predicted boxes to image # 裁剪掉边框超出图片边界的部分 proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) # 清除掉预测边界框中长或宽 小于阈值的 keep = self._filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) # 按分数降序排列,并取前N个(proposal, score) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) det = np.hstack((proposals, scores)).astype(np.float32) keep = nms(det) if post_nms_topN > 0: keep = keep[:post_nms_topN] # pad to ensure output size remains unchanged # 如果不够,就随机选择不足的个数来填充 if len(keep) < post_nms_topN: pad = npr.choice(keep, size=post_nms_topN - len(keep)) keep = np.hstack((keep, pad)) proposals = proposals[keep, :] scores = scores[keep] # Output rois array # 输出ROIS,送给fast-rcnn训练 # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) # 形成五元组(0,x1,y1,x2,y2) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) self.assign(out_data[0], req[0], blob) if self._output_score: self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False))
def postprocess(net_out, threshold, ctx_id, im_scale, im_info): # im_info = [640, 640] flip = False decay4 = 0.5 vote = False fpn_keys = [] anchor_cfg = None bbox_stds = [1.0, 1.0, 1.0, 1.0] # im_scale = 1.0 landmark_std = 1.0 nms_threshold = 0.4 proposals_list = [] scores_list = [] landmarks_list = [] strides_list = [] use_landmarks = True if ctx_id >= 0: nms = gpu_nms_wrapper(nms_threshold, ctx_id) else: nms = cpu_nms_wrapper(nms_threshold) use_landmarks = True _ratio = (1., ) _feat_stride_fpn = [32, 16, 8] anchor_cfg = { '32': { 'SCALES': (32, 16), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, '16': { 'SCALES': (8, 4), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, '8': { 'SCALES': (2, 1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, } for s in _feat_stride_fpn: fpn_keys.append('stride%s' % s) dense_anchor = False _anchors_fpn = dict( zip(fpn_keys, generate_anchors_fpn(dense_anchor=dense_anchor, cfg=anchor_cfg))) for k in _anchors_fpn: v = _anchors_fpn[k].astype(np.float32) _anchors_fpn[k] = v _num_anchors = dict( zip(fpn_keys, [anchors.shape[0] for anchors in _anchors_fpn.values()])) sym_idx = 0 for _idx, s in enumerate(_feat_stride_fpn): # print(sym_idx) _key = 'stride%s' % s # print(_key) stride = int(s) scores = net_out[sym_idx] #.asnumpy() scores = scores[:, _num_anchors['stride%s' % s]:, :, :] bbox_deltas = net_out[sym_idx + 1] # .asnumpy() height, width = bbox_deltas.shape[2], bbox_deltas.shape[3] A = _num_anchors['stride%s' % s] K = height * width anchors_fpn = _anchors_fpn['stride%s' % s] anchors = anchors_plane(height, width, stride, anchors_fpn) anchors = anchors.reshape((K * A, 4)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)) bbox_pred_len = bbox_deltas.shape[3] // A bbox_deltas = bbox_deltas.reshape((-1, bbox_pred_len)) bbox_deltas[:, 0::4] = bbox_deltas[:, 0::4] * bbox_stds[0] bbox_deltas[:, 1::4] = bbox_deltas[:, 1::4] * bbox_stds[1] bbox_deltas[:, 2::4] = bbox_deltas[:, 2::4] * bbox_stds[2] bbox_deltas[:, 3::4] = bbox_deltas[:, 3::4] * bbox_stds[3] proposals = bbox_pred(anchors, bbox_deltas) proposals = clip_boxes(proposals, im_info[:2]) if stride == 4 and decay4 < 1.0: scores *= decay4 scores_ravel = scores.ravel() order = np.where(scores_ravel >= threshold)[0] proposals = proposals[order, :] scores = scores[order] if flip: oldx1 = proposals[:, 0].copy() oldx2 = proposals[:, 2].copy() proposals[:, 0] = im.shape[1] - oldx2 - 1 proposals[:, 2] = im.shape[1] - oldx1 - 1 #proposals[:,0:4] /= im_scale #print(proposals[:,0]) proposals[:, 0] /= im_scale[0] #print(pp) proposals[:, 1] /= im_scale[1] proposals[:, 2] /= im_scale[0] proposals[:, 3] /= im_scale[1] #print(proposals[:,0]) proposals_list.append(proposals) scores_list.append(scores) if nms_threshold < 0.0: _strides = np.empty(shape=(scores.shape), dtype=np.float32) _strides.fill(stride) strides_list.append(_strides) if not vote and use_landmarks: landmark_deltas = net_out[sym_idx + 2] #.asnumpy() # print(landmark_deltas) landmark_pred_len = landmark_deltas.shape[1] // A landmark_deltas = landmark_deltas.transpose((0, 2, 3, 1)).reshape( (-1, 5, landmark_pred_len // 5)) landmark_deltas *= landmark_std landmarks = landmark_pred(anchors, landmark_deltas) landmarks = landmarks[order, :] if flip: landmarks[:, :, 0] = im.shape[1] - landmarks[:, :, 0] - 1 order = [1, 0, 2, 4, 3] flandmarks = landmarks.copy() for idx, a in enumerate(order): flandmarks[:, idx, :] = landmarks[:, a, :] landmarks = flandmarks landmarks[:, :, 0:2] /= im_scale landmarks_list.append(landmarks) if use_landmarks: sym_idx += 3 else: sym_idx += 2 proposals = np.vstack(proposals_list) landmarks = None if proposals.shape[0] == 0: if use_landmarks: landmarks = np.zeros((0, 5, 2)) if nms_threshold < 0.0: return np.zeros((0, 6)), landmarks else: return np.zeros((0, 5)), landmarks scores = np.vstack(scores_list) scores_ravel = scores.ravel() order = scores_ravel.argsort()[::-1] proposals = proposals[order, :] scores = scores[order] if nms_threshold < 0.0: strides = np.vstack(strides_list) strides = strides[order] if not vote and use_landmarks: landmarks = np.vstack(landmarks_list) landmarks = landmarks[order].astype(np.float32, copy=False) if nms_threshold > 0.0: pre_det = np.hstack((proposals[:, 0:4], scores)).astype(np.float32, copy=False) if not vote: keep = nms(pre_det) det = np.hstack((pre_det, proposals[:, 4:])) det = det[keep, :] if use_landmarks: landmarks = landmarks[keep] else: det = np.hstack((pre_det, proposals[:, 4:])) det = bbox_vote(det, nms_threshold) elif nms_threshold < 0.0: det = np.hstack((proposals[:, 0:4], scores, strides)).astype(np.float32, copy=False) else: det = np.hstack((proposals[:, 0:4], scores)).astype(np.float32, copy=False) return det, landmarks
def __init__(self, network, gpu_id=0, nms=0.3, nocrop=False, decay4=0.5, vote=False): self.network = network self.gpu_id = gpu_id self.decay4 = decay4 self.nms_threshold = nms self.vote = vote self.nocrop = nocrop self.debug = False self.fpn_keys = [] self.anchor_cfg = None self.use_landmarks = True self.sess = None self.net = None # pixel_means=[0.0, 0.0, 0.0] pixel_means = config.PIXEL_MEANS pixel_stds = [1.0, 1.0, 1.0] pixel_scale = 1.0 self.preprocess = False dense_anchor = False _ratio = (1., ) image_size = (640, 640) self._feat_stride_fpn = config.RPN_FEAT_STRIDE self.anchor_cfg = config.RPN_ANCHOR_CFG for s in self._feat_stride_fpn: self.fpn_keys.append('stride%s' % s) self._anchors_fpn = dict( zip( self.fpn_keys, generate_anchors_fpn(dense_anchor=dense_anchor, cfg=self.anchor_cfg))) for k in self._anchors_fpn: v = self._anchors_fpn[k].astype(np.float32) self._anchors_fpn[k] = v ''' {'stride32': array([[-248., -248., 263., 263.],[-120., -120., 135., 135.]], dtype=float32), 'stride16': array([[-56., -56., 71., 71.],[-24., -24., 39., 39.]], dtype=float32), 'stride8': array([[-8., -8., 23., 23.],[ 0., 0., 15., 15.]], dtype=float32)} ''' # print('Retinaface: self.anchor_fpn=', self._anchors_fpn) self._num_anchors = dict( zip(self.fpn_keys, [anchors.shape[0] for anchors in self._anchors_fpn.values()])) #{'stride32': 2, 'stride16': 2, 'stride8': 2} # print('Retinaface: self._num_anchors=', self._num_anchors) if self.gpu_id >= 0: self.nms = gpu_nms_wrapper(self.nms_threshold, self.gpu_id) else: self.nms = cpu_nms_wrapper(self.nms_threshold) self.pixel_means = np.array(pixel_means, dtype=np.float32) self.pixel_stds = np.array(pixel_stds, dtype=np.float32) self.pixel_scale = float(pixel_scale) self.build_net()
def forward(self, is_train, req, in_data, out_data, aux): nms = gpu_nms_wrapper(self._threshold, in_data[0][0].context.device_id) cls_prob_dict = dict(zip(self.fpn_keys, in_data[0:len(self.fpn_keys)])) bbox_pred_dict = dict( zip(self.fpn_keys, in_data[len(self.fpn_keys):2 * len(self.fpn_keys)])) #for i in xrange(6): # print(i, in_data[i].asnumpy().shape) batch_size = in_data[0].shape[0] if batch_size > 1: raise ValueError( "Sorry, multiple images each device is not implemented") pre_nms_topN = self._rpn_pre_nms_top_n post_nms_topN = self._rpn_post_nms_top_n min_size_dict = self._rpn_min_size_fpn proposals_list = [] scores_list = [] for s in self._feat_stride_fpn: _key = 'stride%s' % s stride = int(s) scores = cls_prob_dict[_key].asnumpy() #print('scores',stride, scores.shape, file=sys.stderr) scores = scores[:, self._num_anchors['stride%s' % s]:, :, :] bbox_deltas = bbox_pred_dict['stride%s' % s].asnumpy() im_info = in_data[-1].asnumpy()[0, :] #if DEBUG: # print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) # print 'scale: {}'.format(im_info[2]) _height, _width = int(im_info[0] / stride), int(im_info[1] / stride) height, width = bbox_deltas.shape[2], bbox_deltas.shape[3] A = self._num_anchors['stride%s' % s] K = height * width anchors = anchors_plane( height, width, stride, self._anchors_fpn['stride%s' % s].astype(np.float32)) print((height, width), (_height, _width), anchors.shape, bbox_deltas.shape, scores.shape, file=sys.stderr) anchors = anchors.reshape((K * A, 4)) bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) #print(anchors.shape, bbox_deltas.shape, A, K, file=sys.stderr) proposals = self._bbox_pred(anchors, bbox_deltas) #proposals = anchors proposals = clip_boxes(proposals, im_info[:2]) #keep = self._filter_boxes(proposals, min_size_dict['stride%s'%s] * im_info[2]) #proposals = proposals[keep, :] #scores = scores[keep] #print('333', proposals.shape) scores_ravel = scores.ravel() order = scores_ravel.argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] proposals_list.append(proposals) scores_list.append(scores) proposals = np.vstack(proposals_list) scores = np.vstack(scores_list) scores_ravel = scores.ravel() order = scores_ravel.argsort()[::-1] #if config.TEST.SCORE_THRESH>0.0: # _count = np.sum(scores_ravel>config.TEST.SCORE_THRESH) # order = order[:_count] #if pre_nms_topN > 0: # order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] det = np.hstack((proposals, scores)).astype(np.float32) #if np.shape(det)[0] == 0: # print("Something wrong with the input image(resolution is too low?), generate fake proposals for it.") # proposals = np.array([[1.0, 1.0, 2.0, 2.0]]*post_nms_topN, dtype=np.float32) # scores = np.array([[0.9]]*post_nms_topN, dtype=np.float32) # det = np.array([[1.0, 1.0, 2.0, 2.0, 0.9]]*post_nms_topN, dtype=np.float32) if self._threshold < 1.0: keep = nms(det) else: keep = range(det.shape[0]) #print(det.shape, len(keep), post_nms_topN) if post_nms_topN > 0: keep = keep[:post_nms_topN] #print(det.shape, len(keep), post_nms_topN) num_keep = len(keep) #print('keep', keep, file=sys.stderr) if len(keep) < post_nms_topN: pad = npr.choice(keep, size=post_nms_topN - len(keep)) keep = np.hstack((keep, pad)) proposals = proposals[keep, :] scores = scores[keep] scores[num_keep:, :] = -1.0 #print('333 proposals', proposals[0:5,:], file=sys.stderr) #print('det', det.shape, num_keep) #print('first proposal', proposals[0], file=sys.stderr) batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) self.assign(out_data[0], req[0], blob) if self._output_score: self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False))
def __init__(self, prefix, epoch, ctx_id=0, network='net3', nms=0.4, nocrop=False, decay4=0.5, vote=False): self.ctx_id = ctx_id self.network = network self.decay4 = decay4 self.nms_threshold = nms self.vote = vote self.nocrop = nocrop self.debug = False self.fpn_keys = [] self.anchor_cfg = None pixel_means = [0.0, 0.0, 0.0] pixel_stds = [1.0, 1.0, 1.0] pixel_scale = 1.0 self.preprocess = False _ratio = (1., ) fmc = 3 if fmc == 3: self._feat_stride_fpn = [32, 16, 8] self.anchor_cfg = { '32': { 'SCALES': (32, 16), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, '16': { 'SCALES': (8, 4), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, '8': { 'SCALES': (2, 1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, } if self.debug: print(self._feat_stride_fpn, self.anchor_cfg) for s in self._feat_stride_fpn: self.fpn_keys.append('stride%s' % s) dense_anchor = False self._anchors_fpn = dict( zip( self.fpn_keys, generate_anchors_fpn(dense_anchor=dense_anchor, cfg=self.anchor_cfg))) for k in self._anchors_fpn: v = self._anchors_fpn[k].astype(np.float32) self._anchors_fpn[k] = v self._num_anchors = dict( zip(self.fpn_keys, [anchors.shape[0] for anchors in self._anchors_fpn.values()])) sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch) if self.ctx_id >= 0: self.ctx = mx.gpu(self.ctx_id) self.nms = gpu_nms_wrapper(self.nms_threshold, self.ctx_id) else: self.ctx = mx.cpu() self.nms = cpu_nms_wrapper(self.nms_threshold) self.pixel_means = np.array(pixel_means, dtype=np.float32) self.pixel_stds = np.array(pixel_stds, dtype=np.float32) self.pixel_scale = float(pixel_scale) if self.debug: print('means', self.pixel_means) self.use_landmarks = False if len(sym) // len(self._feat_stride_fpn) == 3: self.use_landmarks = True if self.debug: print('use_landmarks', self.use_landmarks) c = len(sym) // len(self._feat_stride_fpn) sym = sym[(c * 0):] self._feat_stride_fpn = [32, 16, 8] print('sym size:', len(sym)) image_size = (640, 640) self.model = mx.mod.Module(symbol=sym, context=self.ctx, label_names=None) self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False) self.model.set_params(arg_params, aux_params)
def __init__(self, prefix, epoch, ctx_id=0, network='net3', nms=0.4, nocrop=False, decay4=0.5, vote=False): self.ctx_id = ctx_id self.network = network self.decay4 = decay4 self.nms_threshold = nms self.vote = vote self.nocrop = nocrop self.debug = False self.fpn_keys = [] self.anchor_cfg = None pixel_means = [0.0, 0.0, 0.0] pixel_stds = [1.0, 1.0, 1.0] pixel_scale = 1.0 self.preprocess = False _ratio = (1., ) fmc = 3 if network == 'ssh' or network == 'vgg': pixel_means = [103.939, 116.779, 123.68] self.preprocess = True elif network == 'net3': _ratio = (1., ) elif network == 'net3a': _ratio = (1., 1.5) elif network == 'net6': #like pyramidbox or s3fd fmc = 6 elif network == 'net5': #retinaface fmc = 5 elif network == 'net5a': fmc = 5 _ratio = (1., 1.5) elif network == 'net4': fmc = 4 elif network == 'net4a': fmc = 4 _ratio = (1., 1.5) elif network == 'x5': fmc = 5 pixel_means = [103.52, 116.28, 123.675] pixel_stds = [57.375, 57.12, 58.395] elif network == 'x3': fmc = 3 pixel_means = [103.52, 116.28, 123.675] pixel_stds = [57.375, 57.12, 58.395] elif network == 'x3a': fmc = 3 _ratio = (1., 1.5) pixel_means = [103.52, 116.28, 123.675] pixel_stds = [57.375, 57.12, 58.395] else: assert False, 'network setting error %s' % network if fmc == 3: self._feat_stride_fpn = [32, 16, 8] self.anchor_cfg = { '32': { 'SCALES': (32, 16), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, '16': { 'SCALES': (8, 4), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, '8': { 'SCALES': (2, 1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, } elif fmc == 4: self._feat_stride_fpn = [32, 16, 8, 4] self.anchor_cfg = { '32': { 'SCALES': (32, 16), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, '16': { 'SCALES': (8, 4), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, '8': { 'SCALES': (2, 1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, '4': { 'SCALES': (2, 1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, } elif fmc == 6: self._feat_stride_fpn = [128, 64, 32, 16, 8, 4] self.anchor_cfg = { '128': { 'SCALES': (32, ), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, '64': { 'SCALES': (16, ), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, '32': { 'SCALES': (8, ), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, '16': { 'SCALES': (4, ), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, '8': { 'SCALES': (2, ), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, '4': { 'SCALES': (1, ), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, } elif fmc == 5: self._feat_stride_fpn = [64, 32, 16, 8, 4] self.anchor_cfg = {} _ass = 2.0**(1.0 / 3) _basescale = 1.0 for _stride in [4, 8, 16, 32, 64]: key = str(_stride) value = { 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 } scales = [] for _ in range(3): scales.append(_basescale) _basescale *= _ass value['SCALES'] = tuple(scales) self.anchor_cfg[key] = value print(self._feat_stride_fpn, self.anchor_cfg) for s in self._feat_stride_fpn: self.fpn_keys.append('stride%s' % s) dense_anchor = False #self._anchors_fpn = dict(zip(self.fpn_keys, generate_anchors_fpn(base_size=fpn_base_size, scales=self._scales, ratios=self._ratios))) self._anchors_fpn = dict( zip( self.fpn_keys, generate_anchors_fpn(dense_anchor=dense_anchor, cfg=self.anchor_cfg))) for k in self._anchors_fpn: v = self._anchors_fpn[k].astype(np.float32) self._anchors_fpn[k] = v self._num_anchors = dict( zip(self.fpn_keys, [anchors.shape[0] for anchors in self._anchors_fpn.values()])) #self._bbox_pred = nonlinear_pred #self._landmark_pred = landmark_pred sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch) if self.ctx_id >= 0: self.ctx = mx.gpu(self.ctx_id) self.nms = gpu_nms_wrapper(self.nms_threshold, self.ctx_id) else: self.ctx = mx.cpu() self.nms = cpu_nms_wrapper(self.nms_threshold) self.pixel_means = np.array(pixel_means, dtype=np.float32) self.pixel_stds = np.array(pixel_stds, dtype=np.float32) self.pixel_scale = float(pixel_scale) print('means', self.pixel_means) self.use_landmarks = False if len(sym) // len(self._feat_stride_fpn) >= 3: self.use_landmarks = True print('use_landmarks', self.use_landmarks) self.cascade = 0 if float(len(sym)) // len(self._feat_stride_fpn) > 3.0: self.cascade = 1 print('cascade', self.cascade) #self.bbox_stds = [0.1, 0.1, 0.2, 0.2] #self.landmark_std = 0.1 self.bbox_stds = [1.0, 1.0, 1.0, 1.0] self.landmark_std = 1.0 if self.debug: c = len(sym) // len(self._feat_stride_fpn) sym = sym[(c * 0):] self._feat_stride_fpn = [32, 16, 8] print('sym size:', len(sym)) image_size = (640, 640) self.model = mx.mod.Module(symbol=sym, context=self.ctx, label_names=None) self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False) self.model.set_params(arg_params, aux_params)
def forward(self, is_train, req, in_data, out_data, aux): nms = gpu_nms_wrapper(self._threshold, in_data[0].context.device_id) batch_size = in_data[0].shape[0] if batch_size > 1: raise ValueError("Sorry, multiple images each device is not implemented") # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) pre_nms_topN = self._rpn_pre_nms_top_n post_nms_topN = self._rpn_post_nms_top_n min_size = self._rpn_min_size # the first set of anchors are background probabilities # keep the second part scores = in_data[0].asnumpy()[:, self._num_anchors:, :, :] bbox_deltas = in_data[1].asnumpy() im_info = in_data[2].asnumpy()[0, :] if DEBUG: print('im_size: ({}, {})'.format(im_info[0], im_info[1])) print('scale: {}'.format(im_info[2])) # 1. Generate proposals from bbox_deltas and shifted anchors # use real image size instead of padded feature map sizes height, width = int(im_info[0] / self._feat_stride), int(im_info[1] / self._feat_stride) if DEBUG: print('score map size: {}'.format(scores.shape)) print("resudial: {}".format((scores.shape[2] - height, scores.shape[3] - width))) # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = self._anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_pred(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = self._filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) det = np.hstack((proposals, scores)).astype(np.float32) keep = nms(det) if post_nms_topN > 0: keep = keep[:post_nms_topN] # pad to ensure output size remains unchanged if len(keep) < post_nms_topN: pad = npr.choice(keep, size=post_nms_topN - len(keep)) keep = np.hstack((keep, pad)) proposals = proposals[keep, :] scores = scores[keep] # Output rois array # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) self.assign(out_data[0], req[0], blob) if self._output_score: self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False))
def gpu_mask_voting(masks, boxes, scores, num_classes, max_per_image, im_width, im_height, nms_thresh, merge_thresh, binary_thresh=0.4, device_id=0): """ A wrapper function, note we already know the class of boxes and masks """ nms = gpu_nms_wrapper(nms_thresh, device_id) # Intermediate results t_boxes = [[] for _ in xrange(num_classes)] t_scores = [[] for _ in xrange(num_classes)] t_all_scores = [] for i in xrange(1, num_classes): dets = np.hstack((boxes.astype(np.float32), scores[:, i:i + 1])) inds = nms(dets) num_keep = min(len(inds), max_per_image) inds = inds[:num_keep] t_boxes[i] = boxes[inds] t_scores[i] = scores[inds, i] t_all_scores.extend(scores[inds, i]) sorted_scores = np.sort(t_all_scores)[::-1] num_keep = min(len(sorted_scores), max_per_image) thresh = max(sorted_scores[num_keep - 1], 1e-3) # inds array to record which mask should be aggregated together candidate_inds = [] # weight for each element in the candidate inds candidate_weights = [] # start position for candidate array candidate_start = [] candidate_scores = [] class_bar = [[] for _ in xrange(num_classes)] for i in xrange(1, num_classes): keep = np.where(t_scores[i] >= thresh) t_boxes[i] = t_boxes[i][keep] t_scores[i] = t_scores[i][keep] # organize helper variable for gpu mask voting for c in xrange(1, num_classes): num_boxes = len(t_boxes[c]) for i in xrange(num_boxes): cur_ov = bbox_overlaps(boxes.astype(np.float), t_boxes[c][i, np.newaxis].astype(np.float)) cur_inds = np.where(cur_ov >= merge_thresh)[0] candidate_inds.extend(cur_inds) cur_weights = scores[cur_inds, c] cur_weights = cur_weights / sum(cur_weights) candidate_weights.extend(cur_weights) candidate_start.append(len(candidate_inds)) candidate_scores.extend(t_scores[c]) class_bar[c] = len(candidate_scores) candidate_inds = np.array(candidate_inds, dtype=np.int32) candidate_weights = np.array(candidate_weights, dtype=np.float32) candidate_start = np.array(candidate_start, dtype=np.int32) candidate_scores = np.array(candidate_scores, dtype=np.float32) # the input masks/boxes are relatively large # select only a subset of them are useful for mask merge unique_inds = np.unique(candidate_inds) unique_inds_order = unique_inds.argsort() unique_map = {} for i in xrange(len(unique_inds)): unique_map[unique_inds[i]] = unique_inds_order[i] for i in xrange(len(candidate_inds)): candidate_inds[i] = unique_map[candidate_inds[i]] boxes = boxes[unique_inds, ...] masks = masks[unique_inds, ...] boxes = np.round(boxes) result_mask, result_box = mask_voting_kernel(boxes, masks, candidate_inds, candidate_start, candidate_weights, binary_thresh, im_height, im_width, device_id) result_box = np.hstack((result_box, candidate_scores[:, np.newaxis])) list_result_box = [[] for _ in xrange(num_classes)] list_result_mask = [[] for _ in xrange(num_classes)] cls_start = 0 for i in xrange(1, num_classes): cls_end = class_bar[i] cls_box = result_box[cls_start:cls_end, :] cls_mask = result_mask[cls_start:cls_end, :] valid_ind = np.where((cls_box[:, 2] > cls_box[:, 0]) & (cls_box[:, 3] > cls_box[:, 1]))[0] list_result_box[i] = cls_box[valid_ind, :] list_result_mask[i] = cls_mask[valid_ind, :] cls_start = cls_end return list_result_mask, list_result_box