def get_image(roidb, config): """ preprocess image and return processed roidb :param roidb: a list of roidb :return: list of img as in mxnet format roidb add new item['im_info'] 0 --- x (width, second dim of im) | y (height, first dim of im) """ num_images = len(roidb) processed_ims = [] processed_roidb = [] for i in range(num_images): roi_rec = roidb[i] assert os.path.exists(roi_rec['image']), '%s does not exist'.format(roi_rec['image']) im = cv2.imread(roi_rec['image'], cv2.IMREAD_COLOR|cv2.IMREAD_IGNORE_ORIENTATION) if roidb[i]['flipped']: im = im[:, ::-1, :] new_rec = roi_rec.copy() scale_ind = random.randrange(len(config.SCALES)) target_size = config.SCALES[scale_ind][0] max_size = config.SCALES[scale_ind][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) processed_ims.append(im_tensor) im_info = [im_tensor.shape[2], im_tensor.shape[3], im_scale] new_rec['boxes'] = clip_boxes(np.round(roi_rec['boxes'].copy() * im_scale), im_info[:2]) new_rec['im_info'] = im_info processed_roidb.append(new_rec) return processed_ims, processed_roidb
def im_detect(predictor, data_batch, data_names, scale): st = time.time() output = predictor.predict(data_batch) et = time.time() print 'predict{:.4f}s'.format(et - st) data_dict = dict(zip(data_names, data_batch.data)) if config.TEST.HAS_RPN: rois = output['rois_output'].asnumpy()[:, 1:] else: rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:] im_shape = data_dict['data'].shape # save output scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] # post processing pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) # we used scaled image & roi to train, so it is necessary to transform them back pred_boxes = pred_boxes / scale et = time.time() print 'im_detect{:.4f}s'.format(et - st) return scores, pred_boxes, data_dict
def im_detect(predictor, data_batch, data_names, scale): output = predictor.predict(data_batch) data_dict = dict(zip(data_names, data_batch.data)) if config.TEST.HAS_RPN: rois = output['rois_output'].asnumpy()[:, 1:] else: rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:] im_shape = data_dict['data'].shape # save output scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] import ipdb ipdb.set_trace() # post processing pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) # we used scaled image & roi to train, so it is necessary to transform them back pred_boxes = pred_boxes / scale if config.HAS_PART: head_scores = output['head_prob_reshape_output'].asnumpy()[0] head_gids = np.argmax(head_scores, axis=1) head_deltas = output['head_pred_reshape_output'].asnumpy()[0] # means = config.TRAIN.BBOX_MEANS stds = np.reshape(np.array(config.TRAIN.BBOX_STDS), (-1, 4)) head_deltas *= np.tile(stds, (1, head_scores.shape[1])) head_boxes = pred_head(rois, head_deltas, head_gids, config.PART_GRID_HW) head_boxes /= scale joints_scores = [ output['joint_prob{}_reshape_output'.format(i)].asnumpy()[0] for i in range(4) ] joints_gids = [np.argmax(j, axis=1) for j in joints_scores] joints_deltas = [ output['joint_pred{}_reshape_output'.format(i)].asnumpy()[0] for i in range(4) ] joints_deltas = [ j * np.tile(stds[:, :2], (1, head_scores.shape[1])) for j in joints_deltas ] joints = [pred_joint(rois, jd, jid, config.PART_GRID_HW) \ for (jd, jid) in zip(joints_deltas, joints_gids)] joints = np.hstack(joints) joints /= scale return scores, pred_boxes, head_boxes, joints, data_dict return scores, pred_boxes, data_dict
def im_detect(self, im_array, im_info=None, roi_array=None): """ perform detection of designated im, box, must follow minibatch.get_testbatch format :param im_array: numpy.ndarray [b c h w] :param im_info: numpy.ndarray [b 3] :param roi_array: numpy.ndarray [roi_num 5] :return: scores, pred_boxes """ # fill in data if config.TEST.HAS_RPN: self.arg_params['data'] = mx.nd.array(im_array, self.ctx) self.arg_params['im_info'] = mx.nd.array(im_info, self.ctx) arg_shapes, out_shapes, aux_shapes = \ self.symbol.infer_shape(data=self.arg_params['data'].shape, im_info=self.arg_params['im_info'].shape) else: self.arg_params['data'] = mx.nd.array(im_array, self.ctx) self.arg_params['rois'] = mx.nd.array(roi_array, self.ctx) arg_shapes, out_shapes, aux_shapes = \ self.symbol.infer_shape(data=self.arg_params['data'].shape, rois=self.arg_params['rois'].shape) # fill in label arg_shapes_dict = { name: shape for name, shape in zip(self.symbol.list_arguments(), arg_shapes) } self.arg_params['cls_prob_label'] = mx.nd.zeros( arg_shapes_dict['cls_prob_label'], self.ctx) # execute self.executor = self.symbol.bind(self.ctx, self.arg_params, args_grad=None, grad_req='null', aux_states=self.aux_params) output_dict = { name: nd for name, nd in zip(self.symbol.list_outputs(), self.executor.outputs) } self.executor.forward(is_train=False) # save output scores = output_dict['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output_dict['bbox_pred_reshape_output'].asnumpy()[0] if config.TEST.HAS_RPN: rois = output_dict['rois_output'].asnumpy()[:, 1:] else: rois = roi_array[:, 1:] # post processing pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_array[0].shape[-2:]) return scores, pred_boxes
def coco_results_one_category_kernel(data_pack): cat_id = data_pack['cat_id'] ann_type = data_pack['ann_type'] binary_thresh = data_pack['binary_thresh'] all_im_info = data_pack['all_im_info'] boxes = data_pack['boxes'] if ann_type == 'bbox': masks = [] elif ann_type == 'segm': masks = data_pack['masks'] else: print 'unimplemented ann_type: ' + ann_type cat_results = [] for im_ind, im_info in enumerate(all_im_info): index = im_info['index'] try: dets = boxes[im_ind].astype(np.float) except: dets = boxes[im_ind] if len(dets) == 0: continue scores = dets[:, -1] if ann_type == 'bbox': xs = dets[:, 0] ys = dets[:, 1] ws = dets[:, 2] - xs + 1 hs = dets[:, 3] - ys + 1 result = [{ 'image_id': index, 'category_id': cat_id, 'bbox': [xs[k], ys[k], ws[k], hs[k]], 'score': scores[k] } for k in xrange(dets.shape[0])] elif ann_type == 'segm': width = im_info['width'] height = im_info['height'] dets[:, :4] = clip_boxes(dets[:, :4], [height, width]) mask_encode = mask_voc2coco(masks[im_ind], dets[:, :4], height, width, binary_thresh) result = [{ 'image_id': index, 'category_id': cat_id, 'segmentation': mask_encode[k], 'score': scores[k] } for k in xrange(len(mask_encode))] cat_results.extend(result) return cat_results
def im_detect(predictor, data_batch, data_names): output = predictor.predict(data_batch) data_dict = dict(zip(data_names, data_batch.data)) if config.TEST.HAS_RPN: rois = output['rois_output'].asnumpy()[:, 1:] else: rois = data_dict['rois'].asnumpy()[:, 1:] im_shape = data_dict['data'].shape # save output scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] # post processing pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) return scores, pred_boxes, data_dict
def im_detect(predictor, data_batch, data_names, scale): output = predictor.predict(data_batch) data_dict = dict(zip(data_names, data_batch.data)) if config.TEST.HAS_RPN: rois = output['rois_output'].asnumpy()[:, 1:] else: rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:] im_shape = data_dict['data'].shape # save output scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] # post processing pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) # we used scaled image & roi to train, so it is necessary to transform them back pred_boxes = pred_boxes / scale return scores, pred_boxes, data_dict
def im_rpn_detect(predictor, data_batch, data_names, scale): output = predictor.predict(data_batch) print(output.keys()) data_dict = dict(zip(data_names, data_batch.data)) if config.TEST.HAS_RPN: rois = output['rois_output'].asnumpy()[:, 1:] else: rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:] im_shape = data_dict['data'].shape # save output scores = output['rois_score'].asnumpy() #bbox_deltas = output['rpn_bbox_pred_output'].asnumpy()[0] # # post processing #pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(rois, im_shape[-2:]) #print(pred_boxes.shape) # # we used scaled image & roi to train, so it is necessary to transform them back pred_boxes = pred_boxes / scale return scores, pred_boxes, data_dict
def im_detect(predictor, data_batch, data_names, scales): output_all = predictor.predict(data_batch) data_dict_all = [ dict(zip(data_names, data_batch.data[i])) for i in xrange(len(data_batch.data)) ] scores_all = [] pred_boxes_all = [] pred_masks_all = [] rois_all = [] for output, data_dict, scale in zip(output_all, data_dict_all, scales): if config.TEST.HAS_RPN: rois = output['rois_output'].asnumpy()[:, 1:] else: raise NotImplementedError im_shape = data_dict['data'].shape # save output scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] pred_masks = output['mask_pred_output'].asnumpy() # post processing pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) # we used scaled image & roi to train, so it is necessary to transform them back rois = rois / scale pred_boxes = pred_boxes / scale #print scores.shape, rois.shape, pred_boxes.shape, pred_masks.shape scores_all.append(scores) rois_all.append(rois) pred_boxes_all.append(pred_boxes) pred_masks_all.append(pred_masks) return scores_all, rois_all, pred_boxes_all, pred_masks_all, data_dict_all
def im_detect_mask(predictor, data_batch, data_names, scale=1): output = predictor.predict(data_batch) data_dict = dict(zip(data_names, data_batch.data)) if config.TEST.HAS_RPN: rois = output['rois_output'].asnumpy()[:, 1:] else: raise NotImplementedError im_shape = data_dict['data'].shape if config.TEST.HAS_RPN: scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] mask_output = output['mask_prob_output'].asnumpy() else: raise NotImplementedError # post processing pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) # we used scaled image & roi to train, so it is necessary to transform them back pred_boxes = pred_boxes / scale return scores, pred_boxes, data_dict, mask_output
def demo_net(predictor, data, image_names, im_scales): data = [[mx.nd.array(data[i][name]) for name in DATA_NAMES] for i in xrange(len(data))] # warm up for i in xrange(2): data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0, provide_data=[[(k, v.shape) for k, v in zip(DATA_NAMES, data[0])]], provide_label=[None]) scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] _, _, _, _, _= im_detect(predictor, data_batch, DATA_NAMES, scales) # test for idx, im_name in enumerate(image_names): data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(DATA_NAMES, data[idx])]], provide_label=[None]) scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] tic() scores, boxes, boxes2, masks, data_dict = im_detect(predictor, data_batch, DATA_NAMES, scales) im_shapes = [data_batch.data[i][0].shape[2:4] for i in xrange(len(data_batch.data))] # mask output if not config.TEST.USE_MASK_MERGE: all_boxes = [[] for _ in xrange(config.NUM_CLASSES)] all_masks = [[] for _ in xrange(config.NUM_CLASSES)] nms = py_nms_wrapper(config.TEST.NMS) for j in range(1, config.NUM_CLASSES): indexes = np.where(scores[0][:, j] > 0.7)[0] cls_scores = scores[0][indexes, j, np.newaxis] cls_masks = masks[0][indexes, 1, :, :] try: if config.CLASS_AGNOSTIC: cls_boxes = boxes[0][indexes, :] else: raise Exception() except: cls_boxes = boxes[0][indexes, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) all_boxes[j] = cls_dets[keep, :] all_masks[j] = cls_masks[keep, :] dets = [all_boxes[j] for j in range(1, config.NUM_CLASSES)] masks = [all_masks[j] for j in range(1, config.NUM_CLASSES)] else: masks = masks[0][:, 1:, :, :] im_height = np.round(im_shapes[0][0] / scales[0]).astype('int') im_width = np.round(im_shapes[0][1] / scales[0]).astype('int') print (im_height, im_width) boxes_ = clip_boxes(boxes[0], (im_height, im_width)) result_masks, result_dets = gpu_mask_voting(masks, boxes_, scores[0], config.NUM_CLASSES, 100, im_width, im_height, config.TEST.NMS, config.TEST.MASK_MERGE_THRESH, config.BINARY_THRESH, 0) dets = [result_dets[j] for j in range(1, config.NUM_CLASSES)] masks = [result_masks[j][:, 0, :, :] for j in range(1, config.NUM_CLASSES)] print 'testing {} {:.4f}s'.format(im_name, toc()) # visualize for i in xrange(len(dets)): keep = np.where(dets[i][:,-1]>0.7) dets[i] = dets[i][keep] masks[i] = masks[i][keep] im = cv2.imread('../data/demo/' + im_name) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) show_masks(im, dets, masks, CLASSES) # debug ''' for ii in range(scores[0].shape[0]): for jj in range(1, scores[0].shape[1]): if scores[0][ii][jj]>0.7: print ii, jj, scores[0][ii][jj] ''' # bounding box output all_boxes = [[] for _ in CLASSES] nms = py_nms_wrapper(NMS_THRESH) for cls in CLASSES: cls_ind = CLASSES.index(cls)+1 cls_boxes = boxes2[0][:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[0][:, cls_ind, np.newaxis] keep = np.where(cls_scores >= CONF_THRESH)[0] #print cls, keep dets = np.hstack((cls_boxes, cls_scores)).astype(np.float32)[keep, :] keep = nms(dets) all_boxes[cls_ind-1] = dets[keep, :] boxes_this_image = [all_boxes[j] for j in range(len(CLASSES))] vis_all_detection(data_dict[0]['data'].asnumpy(), boxes_this_image, CLASSES, im_scales[idx]) print 'done'
def forward(self, is_train, req, in_data, out_data, aux): """Implements forward computation. is_train : bool, whether forwarding for training or testing. req : list of {'null', 'write', 'inplace', 'add'}, how to assign to out_data. 'null' means skip assignment, etc. in_data : list of NDArray, input data. out_data : list of NDArray, pre-allocated output buffers. aux : list of NDArray, mutable auxiliary states. Usually not used. """ nms = gpu_nms_wrapper(self._threshold, in_data[0].context.device_id) batch_size = in_data[0].shape[0] if batch_size > 1: raise ValueError( "Sorry, multiple images each device is not implemented") # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) # 对(H,W)大小的特征图上的每一点i: # 以 i 为中心生成A个锚定框 # 利用回归的位置参数,修正这 A 个 anchor 的位置,得到 RoIs # 将预测的边界框裁剪成图像 # 清除掉预测边界框中长或宽 小于阈值的 # 按分数降序排列(proposal,score) # 在采用NMS取前N个预测边界框 # 使用阈值0.7对这N个框使用非极大值抑制 # 取使用NMS后前n个预测边界框 # 返回前Top n 个的边界框,进行分类和回归 pre_nms_topN = self._rpn_pre_nms_top_n post_nms_topN = self._rpn_post_nms_top_n min_size = self._rpn_min_size # the first set of anchors are background probabilities # keep the second part scores = in_data[0].asnumpy()[:, self._num_anchors:, :, :] bbox_deltas = in_data[1].asnumpy() im_info = in_data[2].asnumpy()[0, :] logger.debug('im_info: %s' % im_info) # 1. Generate proposals from bbox_deltas and shifted anchors # use real image size instead of padded feature map sizes height, width = int(im_info[0] / self._feat_stride), int( im_info[1] / self._feat_stride) logger.debug('score map size: (%d, %d)' % (scores.shape[2], scores.shape[3])) logger.debug('resudial: (%d, %d)' % (scores.shape[2] - height, scores.shape[3] - width)) # Enumerate all shifts # 这块的思路是生成一系列的shift, 然后每一个shift和9个anchor相加,迭代出每一个位置的9个框 shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) #产生一个以向量x为行,向量y为列的矩阵 #经过meshgrid shift_x = [[ 0 16 32 ..., 560 576 592] [ 0 16 32 ..., 560 576 592] [ 0 16 32 ..., 560 576 592] ..., [ 0 16 32 ..., 560 576 592] [ 0 16 32 ..., 560 576 592] [ 0 16 32 ..., 560 576 592]] #shift_y = [[ 0 0 0 ..., 0 0 0] [ 16 16 16 ..., 16 16 16] [ 32 32 32 ..., 32 32 32] ..., [560 560 560 ..., 560 560 560] [576 576 576 ..., 576 576 576] [592 592 592 ..., 592 592 592]] shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # 转至之后形成所有位移 # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] # _anchors中每一个anchor和每一个shift相加得出结果 anchors = self._anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2)) # K个位移,每个位移A个框 anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations # 根据回归的偏移量修正位置 proposals = bbox_pred(anchors, bbox_deltas) # 2. clip predicted boxes to image # 裁剪掉边框超出图片边界的部分 proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) # 清除掉预测边界框中长或宽 小于阈值的 keep = self._filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) # 按分数降序排列,并取前N个(proposal, score) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) det = np.hstack((proposals, scores)).astype(np.float32) keep = nms(det) if post_nms_topN > 0: keep = keep[:post_nms_topN] # pad to ensure output size remains unchanged # 如果不够,就随机选择不足的个数来填充 if len(keep) < post_nms_topN: pad = npr.choice(keep, size=post_nms_topN - len(keep)) keep = np.hstack((keep, pad)) proposals = proposals[keep, :] scores = scores[keep] # Output rois array # 输出ROIS,送给fast-rcnn训练 # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) # 形成五元组(0,x1,y1,x2,y2) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) self.assign(out_data[0], req[0], blob) if self._output_score: self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False))
def forward(self, is_train, req, in_data, out_data, aux): nms = gpu_nms_wrapper(self._threshold, in_data[0].context.device_id) batch_size = in_data[0].shape[0] if batch_size > 1: raise ValueError("Sorry, multiple images each device is not implemented") # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) pre_nms_topN = self._rpn_pre_nms_top_n post_nms_topN = self._rpn_post_nms_top_n min_size = self._rpn_min_size # the first set of anchors are background probabilities # keep the second part scores = in_data[0].asnumpy()[:, self._num_anchors:, :, :] bbox_deltas = in_data[1].asnumpy() im_info = in_data[2].asnumpy()[0, :] if DEBUG: print('im_size: ({}, {})'.format(im_info[0], im_info[1])) print('scale: {}'.format(im_info[2])) # 1. Generate proposals from bbox_deltas and shifted anchors # use real image size instead of padded feature map sizes height, width = int(im_info[0] / self._feat_stride), int(im_info[1] / self._feat_stride) if DEBUG: print('score map size: {}'.format(scores.shape)) print("resudial: {}".format((scores.shape[2] - height, scores.shape[3] - width))) # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = self._anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_pred(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = self._filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) det = np.hstack((proposals, scores)).astype(np.float32) keep = nms(det) if post_nms_topN > 0: keep = keep[:post_nms_topN] # pad to ensure output size remains unchanged if len(keep) < post_nms_topN: pad = npr.choice(keep, size=post_nms_topN - len(keep)) keep = np.hstack((keep, pad)) proposals = proposals[keep, :] scores = scores[keep] # Output rois array # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) self.assign(out_data[0], req[0], blob) if self._output_score: self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False))
def detect(self, img, threshold=0.5, scales=[1.0], do_flip=False): #print('in_detect', threshold, scales, do_flip, do_nms) proposals_list = [] scores_list = [] landmarks_list = [] strides_list = [] timea = datetime.datetime.now() flips = [0] if do_flip: flips = [0, 1] imgs = [img] if isinstance(img, list): imgs = img for img in imgs: for im_scale in scales: for flip in flips: if im_scale != 1.0: im = cv2.resize(img, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) else: im = img.copy() if flip: im = im[:, ::-1, :] if self.nocrop: if im.shape[0] % 32 == 0: h = im.shape[0] else: h = (im.shape[0] // 32 + 1) * 32 if im.shape[1] % 32 == 0: w = im.shape[1] else: w = (im.shape[1] // 32 + 1) * 32 _im = np.zeros((h, w, 3), dtype=np.float32) _im[0:im.shape[0], 0:im.shape[1], :] = im im = _im else: im = im.astype(np.float32) if self.debug: timeb = datetime.datetime.now() diff = timeb - timea print('X1 uses', diff.total_seconds(), 'seconds') #self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False) #im_info = [im.shape[0], im.shape[1], im_scale] im_info = [im.shape[0], im.shape[1]] im_tensor = np.zeros((1, 3, im.shape[0], im.shape[1])) for i in range(3): im_tensor[0, i, :, :] = ( im[:, :, 2 - i] / self.pixel_scale - self.pixel_means[2 - i]) / self.pixel_stds[2 - i] if self.debug: timeb = datetime.datetime.now() diff = timeb - timea print('X2 uses', diff.total_seconds(), 'seconds') data = nd.array(im_tensor) db = mx.io.DataBatch(data=(data, ), provide_data=[('data', data.shape)]) if self.debug: timeb = datetime.datetime.now() diff = timeb - timea print('X3 uses', diff.total_seconds(), 'seconds') self.model.forward(db, is_train=False) net_out = self.model.get_outputs() #post_nms_topN = self._rpn_post_nms_top_n #min_size_dict = self._rpn_min_size_fpn sym_idx = 0 for _idx, s in enumerate(self._feat_stride_fpn): #if len(scales)>1 and s==32 and im_scale==scales[-1]: # continue _key = 'stride%s' % s stride = int(s) is_cascade = False if self.cascade: is_cascade = True #if self.vote and stride==4 and len(scales)>2 and (im_scale==scales[0]): # continue #print('getting', im_scale, stride, idx, len(net_out), data.shape, file=sys.stderr) scores = net_out[sym_idx].asnumpy() if self.debug: timeb = datetime.datetime.now() diff = timeb - timea print('A uses', diff.total_seconds(), 'seconds') #print(scores.shape) #print('scores',stride, scores.shape, file=sys.stderr) scores = scores[:, self._num_anchors['stride%s' % s]:, :, :] bbox_deltas = net_out[sym_idx + 1].asnumpy() #if DEBUG: # print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) # print 'scale: {}'.format(im_info[2]) #_height, _width = int(im_info[0] / stride), int(im_info[1] / stride) height, width = bbox_deltas.shape[ 2], bbox_deltas.shape[3] A = self._num_anchors['stride%s' % s] K = height * width anchors_fpn = self._anchors_fpn['stride%s' % s] anchors = anchors_plane(height, width, stride, anchors_fpn) #print((height, width), (_height, _width), anchors.shape, bbox_deltas.shape, scores.shape, file=sys.stderr) anchors = anchors.reshape((K * A, 4)) #print('num_anchors', self._num_anchors['stride%s'%s], file=sys.stderr) #print('HW', (height, width), file=sys.stderr) #print('anchors_fpn', anchors_fpn.shape, file=sys.stderr) #print('anchors', anchors.shape, file=sys.stderr) #print('bbox_deltas', bbox_deltas.shape, file=sys.stderr) #print('scores', scores.shape, file=sys.stderr) #scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape( (-1, 1)) #print('pre', bbox_deltas.shape, height, width) #bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) #print('after', bbox_deltas.shape, height, width) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)) bbox_pred_len = bbox_deltas.shape[3] // A #print(bbox_deltas.shape) bbox_deltas = bbox_deltas.reshape((-1, bbox_pred_len)) bbox_deltas[:, 0::4] = bbox_deltas[:, 0:: 4] * self.bbox_stds[0] bbox_deltas[:, 1::4] = bbox_deltas[:, 1:: 4] * self.bbox_stds[1] bbox_deltas[:, 2::4] = bbox_deltas[:, 2:: 4] * self.bbox_stds[2] bbox_deltas[:, 3::4] = bbox_deltas[:, 3:: 4] * self.bbox_stds[3] proposals = self.bbox_pred(anchors, bbox_deltas) #print(anchors.shape, bbox_deltas.shape, A, K, file=sys.stderr) if is_cascade: cascade_sym_num = 0 cls_cascade = False bbox_cascade = False __idx = [3, 4] if not self.use_landmarks: __idx = [2, 3] for diff_idx in __idx: if sym_idx + diff_idx >= len(net_out): break body = net_out[sym_idx + diff_idx].asnumpy() if body.shape[1] // A == 2: #cls branch if cls_cascade or bbox_cascade: break else: cascade_scores = body[:, self. _num_anchors[ 'stride%s' % s]:, :, :] cascade_scores = cascade_scores.transpose( (0, 2, 3, 1)).reshape((-1, 1)) #scores = (scores+cascade_scores)/2.0 scores = cascade_scores #TODO? cascade_sym_num += 1 cls_cascade = True #print('find cascade cls at stride', stride) elif body.shape[1] // A == 4: #bbox branch cascade_deltas = body.transpose( (0, 2, 3, 1)).reshape( (-1, bbox_pred_len)) cascade_deltas[:, 0:: 4] = cascade_deltas[:, 0:: 4] * self.bbox_stds[ 0] cascade_deltas[:, 1:: 4] = cascade_deltas[:, 1:: 4] * self.bbox_stds[ 1] cascade_deltas[:, 2:: 4] = cascade_deltas[:, 2:: 4] * self.bbox_stds[ 2] cascade_deltas[:, 3:: 4] = cascade_deltas[:, 3:: 4] * self.bbox_stds[ 3] proposals = self.bbox_pred( proposals, cascade_deltas) cascade_sym_num += 1 bbox_cascade = True #print('find cascade bbox at stride', stride) proposals = clip_boxes(proposals, im_info[:2]) #if self.vote: # if im_scale>1.0: # keep = self._filter_boxes2(proposals, 160*im_scale, -1) # else: # keep = self._filter_boxes2(proposals, -1, 100*im_scale) # if stride==4: # keep = self._filter_boxes2(proposals, 12*im_scale, -1) # proposals = proposals[keep, :] # scores = scores[keep] #keep = self._filter_boxes(proposals, min_size_dict['stride%s'%s] * im_info[2]) #proposals = proposals[keep, :] #scores = scores[keep] #print('333', proposals.shape) if stride == 4 and self.decay4 < 1.0: scores *= self.decay4 scores_ravel = scores.ravel() #print('__shapes', proposals.shape, scores_ravel.shape) #print('max score', np.max(scores_ravel)) order = np.where(scores_ravel >= threshold)[0] #_scores = scores_ravel[order] #_order = _scores.argsort()[::-1] #order = order[_order] proposals = proposals[order, :] scores = scores[order] if flip: oldx1 = proposals[:, 0].copy() oldx2 = proposals[:, 2].copy() proposals[:, 0] = im.shape[1] - oldx2 - 1 proposals[:, 2] = im.shape[1] - oldx1 - 1 proposals[:, 0:4] /= im_scale proposals_list.append(proposals) scores_list.append(scores) if self.nms_threshold < 0.0: _strides = np.empty(shape=(scores.shape), dtype=np.float32) _strides.fill(stride) strides_list.append(_strides) if not self.vote and self.use_landmarks: landmark_deltas = net_out[sym_idx + 2].asnumpy() #landmark_deltas = self._clip_pad(landmark_deltas, (height, width)) landmark_pred_len = landmark_deltas.shape[1] // A landmark_deltas = landmark_deltas.transpose( (0, 2, 3, 1)).reshape( (-1, 5, landmark_pred_len // 5)) landmark_deltas *= self.landmark_std #print(landmark_deltas.shape, landmark_deltas) landmarks = self.landmark_pred( anchors, landmark_deltas) landmarks = landmarks[order, :] if flip: landmarks[:, :, 0] = im.shape[1] - landmarks[:, :, 0] - 1 #for a in range(5): # oldx1 = landmarks[:, a].copy() # landmarks[:,a] = im.shape[1] - oldx1 - 1 order = [1, 0, 2, 4, 3] flandmarks = landmarks.copy() for idx, a in enumerate(order): flandmarks[:, idx, :] = landmarks[:, a, :] #flandmarks[:, idx*2] = landmarks[:,a*2] #flandmarks[:, idx*2+1] = landmarks[:,a*2+1] landmarks = flandmarks landmarks[:, :, 0:2] /= im_scale #landmarks /= im_scale #landmarks = landmarks.reshape( (-1, landmark_pred_len) ) landmarks_list.append(landmarks) #proposals = np.hstack((proposals, landmarks)) if self.use_landmarks: sym_idx += 3 else: sym_idx += 2 if is_cascade: sym_idx += cascade_sym_num if self.debug: timeb = datetime.datetime.now() diff = timeb - timea print('B uses', diff.total_seconds(), 'seconds') proposals = np.vstack(proposals_list) landmarks = None if proposals.shape[0] == 0: if self.use_landmarks: landmarks = np.zeros((0, 5, 2)) if self.nms_threshold < 0.0: return np.zeros((0, 6)), landmarks else: return np.zeros((0, 5)), landmarks scores = np.vstack(scores_list) #print('shapes', proposals.shape, scores.shape) scores_ravel = scores.ravel() order = scores_ravel.argsort()[::-1] #if config.TEST.SCORE_THRESH>0.0: # _count = np.sum(scores_ravel>config.TEST.SCORE_THRESH) # order = order[:_count] proposals = proposals[order, :] scores = scores[order] if self.nms_threshold < 0.0: strides = np.vstack(strides_list) strides = strides[order] if not self.vote and self.use_landmarks: landmarks = np.vstack(landmarks_list) landmarks = landmarks[order].astype(np.float32, copy=False) if self.nms_threshold > 0.0: pre_det = np.hstack((proposals[:, 0:4], scores)).astype(np.float32, copy=False) if not self.vote: keep = self.nms(pre_det) det = np.hstack((pre_det, proposals[:, 4:])) det = det[keep, :] if self.use_landmarks: landmarks = landmarks[keep] else: det = np.hstack((pre_det, proposals[:, 4:])) det = self.bbox_vote(det) elif self.nms_threshold < 0.0: det = np.hstack( (proposals[:, 0:4], scores, strides)).astype(np.float32, copy=False) else: det = np.hstack((proposals[:, 0:4], scores)).astype(np.float32, copy=False) if self.debug: timeb = datetime.datetime.now() diff = timeb - timea print('C uses', diff.total_seconds(), 'seconds') return det, landmarks
def forward(self, is_train, req, in_data, out_data, aux): nms = gpu_nms_wrapper(self._threshold, in_data[0][0].context.device_id) cls_prob_dict = dict(zip(self.fpn_keys, in_data[0:len(self.fpn_keys)])) bbox_pred_dict = dict( zip(self.fpn_keys, in_data[len(self.fpn_keys):2 * len(self.fpn_keys)])) #for i in xrange(6): # print(i, in_data[i].asnumpy().shape) batch_size = in_data[0].shape[0] if batch_size > 1: raise ValueError( "Sorry, multiple images each device is not implemented") pre_nms_topN = self._rpn_pre_nms_top_n post_nms_topN = self._rpn_post_nms_top_n min_size_dict = self._rpn_min_size_fpn proposals_list = [] scores_list = [] for s in self._feat_stride_fpn: _key = 'stride%s' % s stride = int(s) scores = cls_prob_dict[_key].asnumpy() #print('scores',stride, scores.shape, file=sys.stderr) scores = scores[:, self._num_anchors['stride%s' % s]:, :, :] bbox_deltas = bbox_pred_dict['stride%s' % s].asnumpy() im_info = in_data[-1].asnumpy()[0, :] #if DEBUG: # print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) # print 'scale: {}'.format(im_info[2]) _height, _width = int(im_info[0] / stride), int(im_info[1] / stride) height, width = bbox_deltas.shape[2], bbox_deltas.shape[3] A = self._num_anchors['stride%s' % s] K = height * width anchors = anchors_plane( height, width, stride, self._anchors_fpn['stride%s' % s].astype(np.float32)) print((height, width), (_height, _width), anchors.shape, bbox_deltas.shape, scores.shape, file=sys.stderr) anchors = anchors.reshape((K * A, 4)) bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) #print(anchors.shape, bbox_deltas.shape, A, K, file=sys.stderr) proposals = self._bbox_pred(anchors, bbox_deltas) #proposals = anchors proposals = clip_boxes(proposals, im_info[:2]) #keep = self._filter_boxes(proposals, min_size_dict['stride%s'%s] * im_info[2]) #proposals = proposals[keep, :] #scores = scores[keep] #print('333', proposals.shape) scores_ravel = scores.ravel() order = scores_ravel.argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] proposals_list.append(proposals) scores_list.append(scores) proposals = np.vstack(proposals_list) scores = np.vstack(scores_list) scores_ravel = scores.ravel() order = scores_ravel.argsort()[::-1] #if config.TEST.SCORE_THRESH>0.0: # _count = np.sum(scores_ravel>config.TEST.SCORE_THRESH) # order = order[:_count] #if pre_nms_topN > 0: # order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] det = np.hstack((proposals, scores)).astype(np.float32) #if np.shape(det)[0] == 0: # print("Something wrong with the input image(resolution is too low?), generate fake proposals for it.") # proposals = np.array([[1.0, 1.0, 2.0, 2.0]]*post_nms_topN, dtype=np.float32) # scores = np.array([[0.9]]*post_nms_topN, dtype=np.float32) # det = np.array([[1.0, 1.0, 2.0, 2.0, 0.9]]*post_nms_topN, dtype=np.float32) if self._threshold < 1.0: keep = nms(det) else: keep = range(det.shape[0]) #print(det.shape, len(keep), post_nms_topN) if post_nms_topN > 0: keep = keep[:post_nms_topN] #print(det.shape, len(keep), post_nms_topN) num_keep = len(keep) #print('keep', keep, file=sys.stderr) if len(keep) < post_nms_topN: pad = npr.choice(keep, size=post_nms_topN - len(keep)) keep = np.hstack((keep, pad)) proposals = proposals[keep, :] scores = scores[keep] scores[num_keep:, :] = -1.0 #print('333 proposals', proposals[0:5,:], file=sys.stderr) #print('det', det.shape, num_keep) #print('first proposal', proposals[0], file=sys.stderr) batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) self.assign(out_data[0], req[0], blob) if self._output_score: self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False))
def detect(self, img, threshold=0.5, im_scale=1.0): proposals_list = [] scores_list = [] landmarks_list = [] data = nd.array(img) db = mx.io.DataBatch(data=(data, ), provide_data=[('data', data.shape)]) self.model.forward(db, is_train=False) net_out = self.model.get_outputs() for _idx, s in enumerate(self._feat_stride_fpn): _key = 'stride%s' % s stride = int(s) if self.use_landmarks: idx = _idx * 3 else: idx = _idx * 2 scores = net_out[idx].asnumpy() scores = scores[:, self._num_anchors['stride%s' % s]:, :, :] idx += 1 bbox_deltas = net_out[idx].asnumpy() height, width = bbox_deltas.shape[2], bbox_deltas.shape[3] A = self._num_anchors['stride%s' % s] K = height * width anchors_fpn = self._anchors_fpn['stride%s' % s] anchors = anchors_plane(height, width, stride, anchors_fpn) anchors = anchors.reshape((K * A, 4)) scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)) bbox_pred_len = bbox_deltas.shape[3] // A bbox_deltas = bbox_deltas.reshape((-1, bbox_pred_len)) proposals = self.bbox_pred(anchors, bbox_deltas) proposals = clip_boxes(proposals, (img.shape[2], img.shape[3])) scores_ravel = scores.ravel() order = np.where(scores_ravel >= threshold)[0] proposals = proposals[order, :] scores = scores[order] if stride == 4 and self.decay4 < 1.0: scores *= self.decay4 proposals[:, 0:4] /= im_scale proposals_list.append(proposals) scores_list.append(scores) if not self.vote and self.use_landmarks: idx += 1 landmark_deltas = net_out[idx].asnumpy() landmark_deltas = self._clip_pad(landmark_deltas, (height, width)) landmark_pred_len = landmark_deltas.shape[1] // A landmark_deltas = landmark_deltas.transpose( (0, 2, 3, 1)).reshape((-1, 5, landmark_pred_len // 5)) landmarks = self.landmark_pred(anchors, landmark_deltas) landmarks = landmarks[order, :] landmarks[:, :, 0:2] /= im_scale landmarks_list.append(landmarks) proposals = np.vstack(proposals_list) landmarks = None if proposals.shape[0] == 0: if self.use_landmarks: landmarks = np.zeros((0, 5, 2)) return np.zeros((0, 5)), landmarks scores = np.vstack(scores_list) scores_ravel = scores.ravel() order = scores_ravel.argsort()[::-1] proposals = proposals[order, :] scores = scores[order] if not self.vote and self.use_landmarks: landmarks = np.vstack(landmarks_list) landmarks = landmarks[order].astype(np.float32, copy=False) pre_det = np.hstack((proposals[:, 0:4], scores)).astype(np.float32, copy=False) if not self.vote: keep = self.nms(pre_det) det = np.hstack((pre_det, proposals[:, 4:])) det = det[keep, :] if self.use_landmarks: landmarks = landmarks[keep] else: det = np.hstack((pre_det, proposals[:, 4:])) det = self.bbox_vote(det) return det, landmarks
def detect(self, img, threshold=0.5, scales=[1.0], do_flip=False): proposals_list = [] scores_list = [] landmarks_list = [] timea = datetime.datetime.now() flips = [0] if do_flip: flips = [0, 1] for im_scale in scales: for flip in flips: if im_scale != 1.0: im = cv2.resize(img, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) else: im = img.copy() if flip: im = im[:, ::-1, :] if self.nocrop: if im.shape[0] % 32 == 0: h = im.shape[0] else: h = (im.shape[0] // 32 + 1) * 32 if im.shape[1] % 32 == 0: w = im.shape[1] else: w = (im.shape[1] // 32 + 1) * 32 _im = np.zeros((h, w, 3), dtype=np.float32) _im[0:im.shape[0], 0:im.shape[1], :] = im im = _im else: im = im.astype(np.float32) if self.debug: timeb = datetime.datetime.now() diff = timeb - timea print('X1 uses', diff.total_seconds(), 'seconds') im_info = [im.shape[0], im.shape[1]] im_tensor = np.zeros((1, 3, im.shape[0], im.shape[1])) for i in range(3): im_tensor[0, i, :, :] = ( im[:, :, 2 - i] / self.pixel_scale - self.pixel_means[2 - i]) / self.pixel_stds[2 - i] if self.debug: timeb = datetime.datetime.now() diff = timeb - timea print('X2 uses', diff.total_seconds(), 'seconds') data = nd.array(im_tensor) db = mx.io.DataBatch(data=(data, ), provide_data=[('data', data.shape)]) if self.debug: timeb = datetime.datetime.now() diff = timeb - timea print('X3 uses', diff.total_seconds(), 'seconds') self.model.forward(db, is_train=False) net_out = self.model.get_outputs() for _idx, s in enumerate(self._feat_stride_fpn): _key = 'stride%s' % s stride = int(s) if self.use_landmarks: idx = _idx * 3 else: idx = _idx * 2 scores = net_out[idx].asnumpy() if self.debug: timeb = datetime.datetime.now() diff = timeb - timea print('A uses', diff.total_seconds(), 'seconds') scores = scores[:, self._num_anchors['stride%s' % s]:, :, :] idx += 1 bbox_deltas = net_out[idx].asnumpy() height, width = bbox_deltas.shape[2], bbox_deltas.shape[3] A = self._num_anchors['stride%s' % s] K = height * width anchors_fpn = self._anchors_fpn['stride%s' % s] anchors = anchors_plane(height, width, stride, anchors_fpn) anchors = anchors.reshape((K * A, 4)) scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)) bbox_pred_len = bbox_deltas.shape[3] // A bbox_deltas = bbox_deltas.reshape((-1, bbox_pred_len)) proposals = self.bbox_pred(anchors, bbox_deltas) proposals = clip_boxes(proposals, im_info[:2]) scores_ravel = scores.ravel() order = np.where(scores_ravel >= threshold)[0] proposals = proposals[order, :] scores = scores[order] if stride == 4 and self.decay4 < 1.0: scores *= self.decay4 if flip: oldx1 = proposals[:, 0].copy() oldx2 = proposals[:, 2].copy() proposals[:, 0] = im.shape[1] - oldx2 - 1 proposals[:, 2] = im.shape[1] - oldx1 - 1 proposals[:, 0:4] /= im_scale proposals_list.append(proposals) scores_list.append(scores) if not self.vote and self.use_landmarks: idx += 1 landmark_deltas = net_out[idx].asnumpy() landmark_deltas = self._clip_pad( landmark_deltas, (height, width)) landmark_pred_len = landmark_deltas.shape[1] // A landmark_deltas = landmark_deltas.transpose( (0, 2, 3, 1)).reshape( (-1, 5, landmark_pred_len // 5)) landmarks = self.landmark_pred(anchors, landmark_deltas) landmarks = landmarks[order, :] if flip: landmarks[:, :, 0] = im.shape[1] - landmarks[:, :, 0] - 1 order = [1, 0, 2, 4, 3] flandmarks = landmarks.copy() for idx, a in enumerate(order): flandmarks[:, idx, :] = landmarks[:, a, :] landmarks = flandmarks landmarks[:, :, 0:2] /= im_scale landmarks_list.append(landmarks) if self.debug: timeb = datetime.datetime.now() diff = timeb - timea print('B uses', diff.total_seconds(), 'seconds') proposals = np.vstack(proposals_list) landmarks = None if proposals.shape[0] == 0: if self.use_landmarks: landmarks = np.zeros((0, 5, 2)) return np.zeros((0, 5)), landmarks scores = np.vstack(scores_list) #print('shapes', proposals.shape, scores.shape) scores_ravel = scores.ravel() order = scores_ravel.argsort()[::-1] #if config.TEST.SCORE_THRESH>0.0: # _count = np.sum(scores_ravel>config.TEST.SCORE_THRESH) # order = order[:_count] proposals = proposals[order, :] scores = scores[order] if not self.vote and self.use_landmarks: landmarks = np.vstack(landmarks_list) landmarks = landmarks[order].astype(np.float32, copy=False) pre_det = np.hstack((proposals[:, 0:4], scores)).astype(np.float32, copy=False) if not self.vote: keep = self.nms(pre_det) det = np.hstack((pre_det, proposals[:, 4:])) det = det[keep, :] if self.use_landmarks: landmarks = landmarks[keep] else: det = np.hstack((pre_det, proposals[:, 4:])) det = self.bbox_vote(det) #if self.use_landmarks: # det = np.hstack((det, landmarks)) if self.debug: timeb = datetime.datetime.now() diff = timeb - timea print('C uses', diff.total_seconds(), 'seconds') return det, landmarks
def detect(self, img, threshold=0.5): proposals_list = [] scores_list = [] landmarks_list = [] im_info = [640, 640] if img.shape[0] != img.shape[1]: BLUE = (255, 0, 0) if img.shape[0] > img.shape[1]: img = cv2.copyMakeBorder(img, 0, 0, 0, img.shape[0] - img.shape[1], cv2.BORDER_CONSTANT, value=BLUE) else: img = cv2.copyMakeBorder(img, 0, img.shape[1] - img.shape[0], 0, 0, cv2.BORDER_CONSTANT, value=BLUE) re_scale = float(im_info[0]) / float(img.shape[0]) img = cv2.resize(img, (im_info[0], im_info[1])) img = img.astype(np.float32) im_tensor = np.zeros((1, 3, img.shape[0], img.shape[1])) for i in range(3): im_tensor[ 0, i, :, :] = (img[:, :, 2 - i] / self.pixel_scale - self.pixel_means[2 - i]) / self.pixel_stds[2 - i] data = nd.array(im_tensor) db = mx.io.DataBatch(data=(data, ), provide_data=[('data', data.shape)]) self.model.forward(db, is_train=False) net_out = self.model.get_outputs() for _idx, s in enumerate(self._feat_stride_fpn): stride = int(s) idx = _idx * 3 scores = net_out[idx].asnumpy() scores = scores[:, self._num_anchors['stride%s' % s]:, :, :] idx += 1 bbox_deltas = net_out[idx].asnumpy() idx += 1 landmark_deltas = net_out[idx].asnumpy() height, width = bbox_deltas.shape[2], bbox_deltas.shape[3] A = self._num_anchors['stride%s' % s] K = height * width anchors_fpn = self._anchors_fpn['stride%s' % s] anchors = anchors_plane(height, width, stride, anchors_fpn) anchors = anchors.reshape((K * A, 4)) scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)) bbox_pred_len = bbox_deltas.shape[3] // A bbox_deltas = bbox_deltas.reshape((-1, bbox_pred_len)) proposals = self.bbox_pred(anchors, bbox_deltas) proposals = clip_boxes(proposals, im_info[:2]) landmark_deltas = self._clip_pad(landmark_deltas, (height, width)) landmark_pred_len = landmark_deltas.shape[1] // A landmark_deltas = landmark_deltas.transpose((0, 2, 3, 1)).reshape( (-1, 5, landmark_pred_len // 5)) landmarks = self.landmark_pred(anchors, landmark_deltas) scores_ravel = scores.ravel() order = np.where(scores_ravel >= threshold)[0] scores = scores[order] proposals = proposals[order, :] landmarks = landmarks[order, :] if stride == 4 and self.decay4 < 1.0: scores *= self.decay4 proposals[:, 0:4] /= re_scale landmarks[:, :, 0:2] /= re_scale scores_list.append(scores) proposals_list.append(proposals) landmarks_list.append(landmarks) scores = np.vstack(scores_list) proposals = np.vstack(proposals_list) landmarks = np.vstack(landmarks_list) if proposals.shape[0] == 0: return np.zeros((0, 5)), np.zeros((0, 5, 2)) scores_ravel = scores.ravel() order = scores_ravel.argsort()[::-1] scores = scores[order] proposals = proposals[order, :] landmarks = landmarks[order].astype(np.float32, copy=False) pre_det = np.hstack((proposals[:, 0:4], scores)).astype(np.float32, copy=False) keep = self.nms(pre_det) det = np.hstack((pre_det, proposals[:, 4:])) det = det[keep, :] landmarks = landmarks[keep] return det, landmarks
def detect(self, img, thresh, scales=[1.0], do_flip=False): proposal_list = [] scores_list = [] landmarks_list = [] flips = [0,1] if do_flip else [0] for im_scale in scales: for flip in flips: if im_scale!=1.0: img = cv2.resize(img, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) else: img = img.copy() if flip: img = img[:,::-1,:] # img = self.pad_img_to_32(img) imgshape = [img.shape[0], img.shape[1]] img = self.preprocess(img) img = torch.from_numpy(img) if self.use_gpu: img = img.cuda() net_out = self.model(img) for _idx,s in enumerate(self.fpn_keys): idx = _idx * 3 scores = net_out[idx].detach().cpu().numpy() scores = scores[:, self._num_anchors[s]:] idx += 1 bbox_deltas = net_out[idx].detach().cpu().numpy() h, w = bbox_deltas.shape[2], bbox_deltas.shape[3] A = self._num_anchors[s] K = h*w anchors_fpn = self._anchors_fpn[s] anchors_fpn = np.float32(anchors_fpn) anchors = anchors_plane(h, w, s, anchors_fpn) anchors = anchors.reshape((K*A, 4)) scores = self._clip_pad(scores, (h, w)) scores = scores.transpose([0,2,3,1]).reshape([-1,1]) # print('SCR') # print(scores) # print(scores.shape) # input() bbox_deltas = self._clip_pad(bbox_deltas, (h,w)) bbox_deltas = bbox_deltas.transpose([0,2,3,1]) bbox_pred_len = bbox_deltas.shape[3]//A bbox_deltas = bbox_deltas.reshape([-1, bbox_pred_len]) proposals = self.bbox_pred(anchors, bbox_deltas) proposals = clip_boxes(proposals, imgshape) scores_ravel = scores.ravel() order = np.where(scores_ravel>=thresh)[0] proposals = proposals[order] scores = scores[order] if flip: oldx1 = proposals[:, 0].copy() oldx2 = proposals[:, 2].copy() proposals[:, 0] = im.shape[1] - oldx2 - 1 proposals[:, 2] = im.shape[1] - oldx1 - 1 proposals[:,:4] /= im_scale # print('proposals') # print(proposals) # print(proposals.shape) # input() proposal_list.append(proposals) scores_list.append(scores) # landmarks idx += 1 landmark_deltas = net_out[idx].detach().cpu().numpy() landmark_deltas = self._clip_pad(landmark_deltas, (h,w)) landmark_pred_len = landmark_deltas.shape[1]//A landmark_deltas = landmark_deltas.transpose((0,2,3,1)).reshape([-1,5,landmark_pred_len//5]) landmarks = self.landmark_pred(anchors, landmark_deltas) landmarks = landmarks[order, :] if flip: landmarks[:,:,0] = imgshape[1] - landmarks[:,:,0] - 1 order = [1,0,2,4,3] flandmarks = landmarks[:,np.int32(order)] landmarks[:,:,:2] /= im_scale landmarks_list.append(landmarks) # print('PROPOSAL', proposal_list) proposals = np.vstack(proposal_list) landmarks = None if proposals.shape[0]==0: return np.zeros([0,5]), np.zeros([0,5,2]) scores = np.vstack(scores_list) scores_ravel = scores.ravel() order = scores_ravel.argsort()[::-1] proposals = proposals[order] scores = scores[order] landmarks = np.vstack(landmarks_list) landmarks = np.float32(landmarks[order]) pre_det = np.hstack([proposals[:, 0:4], scores]) pre_det = np.float32(pre_det) keep = self.nms(pre_det) det = np.hstack([pre_det, proposals[:,4:]]) det = det[keep] landmarks = landmarks[keep] return det, landmarks
def detect(self, img, threshold=0.05, scales=[1.0]): proposals_list = [] scores_list = [] for im_scale in scales: if im_scale!=1.0: im = cv2.resize(img, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) else: im = img im = im.astype(np.float32) #self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False) im_info = [im.shape[0], im.shape[1], im_scale] im_tensor = np.zeros((1, 3, im.shape[0], im.shape[1])) for i in range(3): im_tensor[0, i, :, :] = im[:, :, 2 - i] - self.pixel_means[2 - i] data = nd.array(im_tensor) db = mx.io.DataBatch(data=(data,), provide_data=[('data', data.shape)]) self.model.forward(db, is_train=False) net_out = self.model.get_outputs() pre_nms_topN = self._rpn_pre_nms_top_n #post_nms_topN = self._rpn_post_nms_top_n #min_size_dict = self._rpn_min_size_fpn for s in self._feat_stride_fpn: if len(scales)>1 and s==32 and im_scale==scales[-1]: continue _key = 'stride%s'%s stride = int(s) idx = 0 if s==16: idx=2 elif s==8: idx=4 print('getting', im_scale, stride, idx, len(net_out), data.shape, file=sys.stderr) scores = net_out[idx].asnumpy() #print(scores.shape) idx+=1 #print('scores',stride, scores.shape, file=sys.stderr) scores = scores[:, self._num_anchors['stride%s'%s]:, :, :] bbox_deltas = net_out[idx].asnumpy() #if DEBUG: # print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) # print 'scale: {}'.format(im_info[2]) _height, _width = int(im_info[0] / stride), int(im_info[1] / stride) height, width = bbox_deltas.shape[2], bbox_deltas.shape[3] A = self._num_anchors['stride%s'%s] K = height * width anchors = anchors_plane(height, width, stride, self._anchors_fpn['stride%s'%s].astype(np.float32)) #print((height, width), (_height, _width), anchors.shape, bbox_deltas.shape, scores.shape, file=sys.stderr) anchors = anchors.reshape((K * A, 4)) #print('pre', bbox_deltas.shape, height, width) bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) #print('after', bbox_deltas.shape, height, width) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) #print(anchors.shape, bbox_deltas.shape, A, K, file=sys.stderr) proposals = self._bbox_pred(anchors, bbox_deltas) #proposals = anchors proposals = clip_boxes(proposals, im_info[:2]) #keep = self._filter_boxes(proposals, min_size_dict['stride%s'%s] * im_info[2]) #proposals = proposals[keep, :] #scores = scores[keep] #print('333', proposals.shape) scores_ravel = scores.ravel() order = scores_ravel.argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] proposals /= im_scale proposals_list.append(proposals) scores_list.append(scores) proposals = np.vstack(proposals_list) scores = np.vstack(scores_list) scores_ravel = scores.ravel() order = scores_ravel.argsort()[::-1] #if config.TEST.SCORE_THRESH>0.0: # _count = np.sum(scores_ravel>config.TEST.SCORE_THRESH) # order = order[:_count] #if pre_nms_topN > 0: # order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] det = np.hstack((proposals, scores)).astype(np.float32) #if np.shape(det)[0] == 0: # print("Something wrong with the input image(resolution is too low?), generate fake proposals for it.") # proposals = np.array([[1.0, 1.0, 2.0, 2.0]]*post_nms_topN, dtype=np.float32) # scores = np.array([[0.9]]*post_nms_topN, dtype=np.float32) # det = np.array([[1.0, 1.0, 2.0, 2.0, 0.9]]*post_nms_topN, dtype=np.float32) if self.nms_threshold<1.0: keep = self.nms(det) det = det[keep, :] if threshold>0.0: keep = np.where(det[:, 4] >= threshold)[0] det = det[keep, :] return det
def detect(self, img, threshold=0.5, scales=[1.0]): proposals_list = [] proposals_kp_list = [] scores_list = [] for im_scale in scales: if im_scale != 1.0: im = cv2.resize(img, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) else: im = img im = im.astype(np.float32) # im_shape = im.shape # self.model.bind(data_shapes=[('data', (1, 3, im_shape[0], im_shape[1]))], for_training=False) im_info = [im.shape[0], im.shape[1], im_scale] im_tensor = np.zeros((1, 3, im.shape[0], im.shape[1])) for i in range(3): im_tensor[0, i, :, :] = im[:, :, 2 - i] - self.pixel_means[2 - i] #bgr2rgb mxnet rgb opencv bgr data = nd.array(im_tensor) db = mx.io.DataBatch(data=(data,), provide_data=[('data', data.shape)]) timea = datetime.datetime.now() self.model.forward(db, is_train=False) timeb = datetime.datetime.now() diff = timeb - timea print('forward uses', diff.total_seconds(), 'seconds') net_out = self.model.get_outputs() #网络的输出为len=9的list,针对三个不同的stride,分为三大块的list,其中每个list分别代表score,bbox,kpoint三个维度的结果, pre_nms_topN = self._rpn_pre_nms_top_n #post_nms_topN = self._rpn_post_nms_top_n #min_size_dict = self._rpn_min_size_fpn for s in self.feat_strides: _key = 'stride%s' % s # print(_key) stride = int(s) if s == self.feat_strides[0]: idx = 0 if s == self.feat_strides[1]: idx = 3 elif s == self.feat_strides[2]: idx = 6 # print('getting', im_scale, stride, idx, len(net_out), data.shape, file=sys.stderr) scores = net_out[idx].asnumpy() #获取每个stride下的分类得分 idx += 1 # print('scores',stride, scores.shape, file=sys.stderr) scores = scores[:, self._num_anchors['stride%s'%s]:, :, :] #去掉了其中lable的值??? bbox_deltas = net_out[idx].asnumpy() idx += 1 _height, _width = int(im_info[0] / stride), int(im_info[1] / stride) height, width = bbox_deltas.shape[2], bbox_deltas.shape[3] # kpoint kpoint_deltas = net_out[idx].asnumpy() A = self._num_anchors['stride%s' % s] K = height * width anchors = anchors_plane(height, width, stride, self._anchors_fpn['stride%s' % s].astype(np.float32)) #RP映射回原图中的坐标位置 # print((height, width), (_height, _width), anchors.shape, bbox_deltas.shape, scores.shape, file=sys.stderr) anchors = anchors.reshape((K * A, 4)) # print('predict bbox_deltas', bbox_deltas.shape, height, width) bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) # print('after clip pad', bbox_deltas.shape, height, width) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) kpoint_deltas = self._clip_pad(kpoint_deltas, (height, width)) kpoint_deltas = kpoint_deltas.transpose((0, 2, 3, 1)).reshape((-1, 10)) scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # print(anchors.shape, bbox_deltas.shape, A, K, file=sys.stderr) proposals = self._bbox_pred(anchors, bbox_deltas) proposals = clip_boxes(proposals, im_info[:2]) #将超出图像的坐标去除掉 proposals_kp = kpoint_pred(anchors, kpoint_deltas) proposals_kp = clip_points(proposals_kp, im_info[:2]) #取出score的top N scores_ravel = scores.ravel() order = scores_ravel.argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] proposals_kp = proposals_kp[order, :] scores = scores[order] proposals /= im_scale proposals_kp /= im_scale proposals_list.append(proposals) proposals_kp_list.append(proposals_kp) scores_list.append(scores) proposals = np.vstack(proposals_list) proposals_kp = np.vstack(proposals_kp_list) scores = np.vstack(scores_list) scores_ravel = scores.ravel() order = scores_ravel.argsort()[::-1] #if config.TEST.SCORE_THRESH>0.0: # _count = np.sum(scores_ravel>config.TEST.SCORE_THRESH) # order = order[:_count] #if pre_nms_topN > 0: # order = order[:pre_nms_topN] proposals = proposals[order, :] proposals_kp = proposals_kp[order, :] scores = scores[order] det = np.hstack((proposals, scores, proposals_kp)).astype(np.float32) #if np.shape(det)[0] == 0: # print("Something wrong with the input image(resolution is too low?), generate fake proposals for it.") # proposals = np.array([[1.0, 1.0, 2.0, 2.0]]*post_nms_topN, dtype=np.float32) # scores = np.array([[0.9]]*post_nms_topN, dtype=np.float32) # det = np.array([[1.0, 1.0, 2.0, 2.0, 0.9]]*post_nms_topN, dtype=np.float32) if self.nms_threshold < 1.0: keep = self.nms(det) det = det[keep, :] if threshold > 0.0: keep = np.where(det[:, 4] >= threshold)[0] det = det[keep, :] return det
def detect(self, img, scales_index=0): proposals_list = [] scores_list = [] im_src = img.copy() CONSTANT = config.TEST.CONSTANT BLACK = [0, 0, 0] img = cv2.copyMakeBorder(img, CONSTANT, CONSTANT, CONSTANT, CONSTANT, cv2.BORDER_CONSTANT, value=BLACK) scales = self.get_boxes(img, scales_index) for im_scale in scales: if im_scale != 1.0: im = cv2.resize(img, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) else: im = img im = im.astype(np.float32) # self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False) im_info = [im.shape[0], im.shape[1], im_scale] im_tensor = np.zeros((1, 3, im.shape[0], im.shape[1])) for i in range(3): im_tensor[0, i, :, :] = im[:, :, 2 - i] - self.pixel_means[2 - i] data = nd.array(im_tensor) db = mx.io.DataBatch(data=(data,), provide_data=[('data', data.shape)]) self.model.forward(db, is_train=False) net_out = self.model.get_outputs() pre_nms_topN = self._rpn_pre_nms_top_n for s in self._feat_stride_fpn: if len(scales) > 1 and s == 32 and im_scale == scales[-1]: continue _key = 'stride%s' % s stride = int(s) idx = 0 if s == 16: idx = 2 elif s == 8: idx = 4 # print('getting', im_scale, stride, idx, len(net_out), data.shape, file=sys.stderr) scores = net_out[idx].asnumpy() # print(scores.shape) idx += 1 # print('scores',stride, scores.shape, file=sys.stderr) scores = scores[:, self._num_anchors['stride%s' % s]:, :, :] bbox_deltas = net_out[idx].asnumpy() _height, _width = int(im_info[0] / stride), int(im_info[1] / stride) height, width = bbox_deltas.shape[2], bbox_deltas.shape[3] A = self._num_anchors['stride%s' % s] K = height * width anchors = anchors_plane(height, width, stride, self._anchors_fpn['stride%s' % s].astype(np.float32)) # print((height, width), (_height, _width), anchors.shape, bbox_deltas.shape, scores.shape, file=sys.stderr) anchors = anchors.reshape((K * A, 4)) # print('pre', bbox_deltas.shape, height, width) bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) # print('after', bbox_deltas.shape, height, width) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # print(anchors.shape, bbox_deltas.shape, A, K, file=sys.stderr) proposals = self._bbox_pred(anchors, bbox_deltas) # proposals = anchors proposals = clip_boxes(proposals, im_info[:2]) scores_ravel = scores.ravel() order = scores_ravel.argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] proposals /= im_scale # #add by sai with pyramidbox to filt scale face # if im_scale > 1: # index = np.where( # np.minimum(proposals[:, 2] - proposals[:, 0] + 1, # proposals[:, 3] - proposals[:, 1] + 1) < 50)[0] # proposals = proposals[index, :] # scores = scores[index, :] # else: # index = np.where( # np.maximum(proposals[:, 2] - proposals[:, 0] + 1, # proposals[:, 3] - proposals[:, 1] + 1) > 20)[0] # proposals = proposals[index, :] # scores = scores[index, :] proposals_list.append(proposals) scores_list.append(scores) proposals = np.vstack(proposals_list) scores = np.vstack(scores_list) scores_ravel = scores.ravel() order = scores_ravel.argsort()[::-1] proposals = proposals[order, :] scores = scores[order] det = np.hstack((proposals, scores)).astype(np.float32) if self.nms_threshold < 1.0: keep = self.nms(det) det = det[keep, :] threshold = config.TEST.SCORE_THRESH if threshold > 0.0: keep = np.where(det[:, 4] >= threshold)[0] det = det[keep, :] # add by sai if det.shape[0] != 0: for i in range(det.shape[0]): det[i, :][0] = det[i, :][0] - CONSTANT det[i, :][1] = det[i, :][1] - CONSTANT det[i, :][2] = det[i, :][2] - CONSTANT det[i, :][3] = det[i, :][3] - CONSTANT if det[i, :][0] < 0: det[i, :][0] = 0 if det[i, :][2] > im_src.shape[1]: det[i, :][2] = im_src.shape[1] if det[i, :][1] < 0: det[i, :][1] = 0 if det[i, :][3] > im_src.shape[0]: det[i, :][3] = im_src.shape[0] return det
def demo_maskrcnn(network, ctx, prefix, epoch, vis=True, has_rpn=True, thresh=0.001): assert has_rpn, "Only has_rpn==True has been supported." sym = eval('get_' + network + '_mask_test')(num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS) arg_params, aux_params = load_param(prefix, epoch, convert=True, ctx=ctx, process=True) split = False max_image_shape = (1, 3, 1024, 1024) max_data_shapes = [("data", max_image_shape), ("im_info", (1, 3))] mod = MutableModule(symbol=sym, data_names=["data", "im_info"], label_names=None, max_data_shapes=max_data_shapes, context=ctx) mod.bind(data_shapes=max_data_shapes, label_shapes=None, for_training=False) mod.init_params(arg_params=arg_params, aux_params=aux_params) class OneDataBatch(): def __init__(self, img): im_info = mx.nd.array([[img.shape[0], img.shape[1], 1.0]]) img = np.transpose(img, (2, 0, 1)) img = img[np.newaxis, (2, 1, 0)] self.data = [mx.nd.array(img), im_info] self.label = None self.provide_label = None self.provide_data = [("data", (1, 3, img.shape[2], img.shape[3])), ("im_info", (1, 3))] imglist_file = os.path.join(default.dataset_path, 'imglists', 'test.lst') #print(default.dataset_path) assert os.path.exists(imglist_file), 'Path does not exist: {}'.format( imglist_file) imgfiles_list = [] with open(imglist_file, 'r') as f: for line in f: file_list = dict() label = line.strip().split('\t') file_list['img_path'] = label[1] imgfiles_list.append(file_list) roidb = [] index = 0 submit_dir = os.path.join(default.dataset_path, 'submit') if not os.path.exists(submit_dir): os.makedirs(submit_dir) img_dir = os.path.join(default.dataset_path, 'test_result_img') if not os.path.exists(img_dir): os.makedirs(img_dir) for im in range(len(imgfiles_list)): index = im + 1 img_path = os.path.join(default.dataset_path, 'ch4_test_images', 'img_' + str(index) + '.jpg') img_ori = cv2.imread(img_path) batch = OneDataBatch(img_ori) mod.forward(batch, False) results = mod.get_outputs() output = dict(zip(mod.output_names, results)) rois = output['rois_output'].asnumpy()[:, 1:] scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] mask_output = output['mask_prob_output'].asnumpy() pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, [img_ori.shape[0], img_ori.shape[1]]) nms = py_nms_wrapper(config.TEST.NMS) boxes = pred_boxes CLASSES = ('__background__', 'text') all_boxes = [[[] for _ in xrange(1)] for _ in xrange(len(CLASSES))] all_masks = [[[] for _ in xrange(1)] for _ in xrange(len(CLASSES))] label = np.argmax(scores, axis=1) label = label[:, np.newaxis] for cls in CLASSES: cls_ind = CLASSES.index(cls) cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_masks = mask_output[:, cls_ind, :, :] cls_scores = scores[:, cls_ind, np.newaxis] #print cls_scores.shape, label.shape keep = np.where((cls_scores >= thresh) & (label == cls_ind))[0] cls_masks = cls_masks[keep, :, :] dets = np.hstack( (cls_boxes, cls_scores)).astype(np.float32)[keep, :] keep_la = nms(dets) print('------------------------keep_la', keep_la) all_boxes[cls_ind] = dets[keep_la, :] all_masks[cls_ind] = cls_masks[keep_la, :, :] boxes_this_image = [[] ] + [all_boxes[j] for j in range(1, len(CLASSES))] masks_this_image = [[] ] + [all_masks[j] for j in range(1, len(CLASSES))] import copy import random class_names = CLASSES color_white = (255, 255, 255) scale = 1.0 im = copy.copy(img_ori) num_box = 1 num_boxes = 0 mini_box = np.zeros((4, 2)) mini_box = np.int32(mini_box) if (len(dets) == 0): submit_path = os.path.join(submit_dir, 'res_img_{}.txt'.format(index)) result_txt = open(submit_path, 'a') for i in range(0, 4): result_txt.write(str(mini_box[i][0])) result_txt.write(',') result_txt.write(str(mini_box[i][1])) if i < 3: result_txt.write(',') result_txt.write('\r\n') result_txt.close() for k, name in enumerate(class_names): if name == '__background__': continue color = (random.randint(0, 256), random.randint(0, 256), random.randint(0, 256)) # generate a random color dets = boxes_this_image[k] masks = masks_this_image[k] #im_binary_merge = np.zeros(im[:,:,0].shape) print('------------------------len(dets)', len(dets)) for i in range(len(dets)): bbox_i = dets[i, :4] * scale #if bbox[2] == bbox[0] or bbox[3] == bbox[1] or bbox[0] == bbox[1] or bbox[2] == bbox[3] : if bbox_i[2] == bbox_i[0] or bbox_i[3] == bbox_i[1]: continue score_i = dets[i, -1] bbox_i = map(int, bbox_i) mask_i = masks[i, :, :] mask_i = masks[i, :, :] mask_i = cv2.resize(mask_i, (bbox_i[2] - bbox_i[0], (bbox_i[3] - bbox_i[1])), interpolation=cv2.INTER_LINEAR) mask_i[mask_i > 0.3] = 1 mask_i[mask_i <= 0.3] = 0 im_binary_i = np.zeros(im[:, :, 0].shape) im_binary_i[bbox_i[1]:bbox_i[3], bbox_i[0]:bbox_i[2]] = im_binary_i[ bbox_i[1]:bbox_i[3], bbox_i[0]:bbox_i[2]] + mask_i #print("len(dets is )-------------------------",len(dets)) overlap = [] overlap_other = [] for j in range(len(dets)): if i == j: continue bbox_j = dets[j, :4] * scale #if bbox[2] == bbox[0] or bbox[3] == bbox[1] or bbox[0] == bbox[1] or bbox[2] == bbox[3] : if bbox_j[2] == bbox_j[0] or bbox_j[3] == bbox_j[1]: continue num_box += 1 score_j = dets[j, -1] bbox_j = map(int, bbox_j) mask_j = masks[j, :, :] mask_j = masks[j, :, :] mask_j = cv2.resize(mask_j, (bbox_j[2] - bbox_j[0], (bbox_j[3] - bbox_j[1])), interpolation=cv2.INTER_LINEAR) #print("mask_j,score_j,img_path------------------------",mask_j,score_j,img_path) mask_j[mask_j > 0.3] = 1 mask_j[mask_j <= 0.3] = 0 im_binary_j = np.zeros(im[:, :, 0].shape) im_binary_j[bbox_j[1]:bbox_j[3], bbox_j[0]:bbox_j[2]] = im_binary_j[ bbox_j[1]:bbox_j[3], bbox_j[0]:bbox_j[2]] + mask_j im_binary = im_binary_i + im_binary_j #mask_inter = mask_i+mask_j ni = np.sum(im_binary_i == 1) nj = np.sum(im_binary_j == 1) nij = np.sum(im_binary == 2) IOU_ratio = float(nij) / (ni + nj - nij) overlap.append(IOU_ratio) #if np.sum(im_binary_i == 1) == 0: # continue #if np.sum(im_binary_j == 1) == 0: # continue IOU_ratio_self = float( np.sum(im_binary == 2)) / np.sum(im_binary_i == 1) overlap_other.append(IOU_ratio_self) #IOU_ratio_other = float(np.sum(im_binary == 2)) / np.sum(im_binary_j == 1) #overlap_other.append(IOU_ratio_other) if num_box == 1: overlap.append(0) overlap_other.append(0) if np.max(overlap) < 0.6 and split == False and np.max( overlap_other) < 0.9: num_boxes += 1 #cv2.rectangle(im, (bbox_i[0], bbox_i[1]), (bbox_i[2], bbox_i[3]), color=color, thickness=2) cv2.putText(im, '%s %.3f' % (class_names[k], score_i), (bbox_i[0], bbox_i[1] + 10), color=color_white, fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=0.5) px = np.where(mask_i == 1) x_min = np.min(px[1]) y_min = np.min(px[0]) x_max = np.max(px[1]) y_max = np.max(px[0]) if x_max - x_min <= 1 or y_max - y_min <= 1: continue mask_color = random.randint(0, 255) c = random.randint(0, 2) mini_boxt = np.zeros((4, 2)) target = im[bbox_i[1]:bbox_i[3], bbox_i[0]:bbox_i[2], c] + mask_color * mask_i target[target >= 255] = 255 im[bbox_i[1]:bbox_i[3], bbox_i[0]:bbox_i[2], c] = target mini_box = minimum_bounding_rectangle(im_binary_i) mini_boxt[0][0] = mini_box[0][1] mini_boxt[0][1] = mini_box[0][0] mini_boxt[1][0] = mini_box[1][1] mini_boxt[1][1] = mini_box[1][0] mini_boxt[2][0] = mini_box[2][1] mini_boxt[2][1] = mini_box[2][0] mini_boxt[3][0] = mini_box[3][1] mini_boxt[3][1] = mini_box[3][0] mini_box = mini_boxt mini_box = np.int32(mini_box) #print("---------------",mini_box) cv2.polylines(im, [mini_box], 1, (255, 255, 255)) submit_path = os.path.join(submit_dir, 'res_img_{}.txt'.format(index)) result_txt = open(submit_path, 'a') for i in range(0, 4): result_txt.write(str(mini_box[i][0])) result_txt.write(',') result_txt.write(str(mini_box[i][1])) if i < 3: result_txt.write(',') result_txt.write('\r\n') result_txt.close() if split == True: if np.max(overlap_other) > 0.6: W = bbox_j[2] - bbox_j[0] H = bbox_j[3] - bbox_j[1] bbox_i[2] = bbox_i[2] - W bbox_i[3] = bbox_i[3] - H num_boxes += 1 cv2.rectangle(im, (bbox_i[0], bbox_i[1]), (bbox_i[2], bbox_i[3]), color=color, thickness=2) cv2.putText(im, '%s %.3f' % (class_names[k], score_i), (bbox_i[0], bbox_i[1] + 10), color=color_white, fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=0.5) px = np.where(mask_i == 1) x_min = np.min(px[1]) y_min = np.min(px[0]) x_max = np.max(px[1]) y_max = np.max(px[0]) if x_max - x_min <= 1 or y_max - y_min <= 1: continue mask_color = random.randint(0, 255) c = random.randint(0, 2) target = im[bbox_i[1]:bbox_i[3], bbox_i[0]:bbox_i[2], c] + mask_color * mask_i target[target >= 255] = 255 im[bbox_i[1]:bbox_i[3], bbox_i[0]:bbox_i[2], c] = target #inst_path = os.path.join(inst_dir,'result_{}_{}.mat'.format(index,num_boxes)) #io.savemat(inst_path, {'Segmentation': im_binary_i}) #numbox = open('data/boxnum.txt','a') #numbox.write(str(num_boxes)+'\n') #numbox.close() result_img_path = os.path.join(img_dir, 'result_{}.jpg'.format(index)) cv2.imwrite(result_img_path, im) #zip_submit_dir = 'script_test_ch4' zip_file = os.path.join('script_test_ch4', 'submit.zip') createZip(submit_dir, zip_file) os.system( "python ./script_test_ch4/script.py -g=./script_test_ch4/gt.zip -s=./script_test_ch4/submit.zip" )
def detect(self, img, threshold=0.5, scales=[1.0], do_flip=False): #print('in_detect', threshold, scales, do_flip, do_nms) #print('img_shape: ',img.shape) proposals_list = [] scores_list = [] landmarks_list = [] timea = datetime.datetime.now() flips = [0] if do_flip: flips = [0, 1] for im_scale in scales: for flip in flips: if im_scale != 1.0: im = cv2.resize(img, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) else: im = img.copy() im = im[:, :, np.newaxis] if flip: im = im[:, ::-1, :] if self.nocrop: if im.shape[0] % 32 == 0: h = im.shape[0] else: h = (im.shape[0] // 32 + 1) * 32 if im.shape[1] % 32 == 0: w = im.shape[1] else: w = (im.shape[1] // 32 + 1) * 32 _im = np.zeros((h, w, 1), dtype=np.float32) _im[0:im.shape[0], 0:im.shape[1], :] = im im = _im else: im = im.astype(np.float32) if self.debug: timeb = datetime.datetime.now() diff = timeb - timea print('X1 uses', diff.total_seconds(), 'seconds') #self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False) #im_info = [im.shape[0], im.shape[1], im_scale] im_info = [im.shape[0], im.shape[1]] im_tensor = np.zeros((1, 1, im.shape[0], im.shape[1])) #print('im_tensor_shape: ', im_tensor.shape) #print('im_shape: ', im.shape) for i in range(1): im_tensor[ 0, i, :, :] = (im[:, :, i] / self.pixel_scale - self.pixel_means[i]) / self.pixel_stds[i] if self.debug: timeb = datetime.datetime.now() diff = timeb - timea print('X2 uses', diff.total_seconds(), 'seconds') data = nd.array(im_tensor) db = mx.io.DataBatch(data=(data, ), provide_data=[('data', data.shape)]) if self.debug: timeb = datetime.datetime.now() diff = timeb - timea print('X3 uses', diff.total_seconds(), 'seconds') self.model.forward(db, is_train=False) net_out = self.model.get_outputs() #print('Len:out: ', len(net_out)) #print('out1: ', net_out[0]) #print('outbbox: ', net_out[1]) #post_nms_topN = self._rpn_post_nms_top_n #min_size_dict = self._rpn_min_size_fpn for _idx, s in enumerate(self._feat_stride_fpn): #if len(scales)>1 and s==32 and im_scale==scales[-1]: # continue _key = 'stride%s' % s stride = int(s) #if self.vote and stride==4 and len(scales)>2 and (im_scale==scales[0]): # continue if self.use_landmarks: idx = _idx * 3 else: idx = _idx * 2 #print('getting', im_scale, stride, idx, len(net_out), data.shape, file=sys.stderr) scores = net_out[idx].asnumpy() if self.debug: timeb = datetime.datetime.now() diff = timeb - timea print('A uses', diff.total_seconds(), 'seconds') #print(scores.shape) #print('scores',stride, scores.shape, file=sys.stderr) scores = scores[:, self._num_anchors['stride%s' % s]:, :, :] idx += 1 bbox_deltas = net_out[idx].asnumpy() #if DEBUG: # print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) # print 'scale: {}'.format(im_info[2]) #_height, _width = int(im_info[0] / stride), int(im_info[1] / stride) height, width = bbox_deltas.shape[2], bbox_deltas.shape[3] A = self._num_anchors['stride%s' % s] #print('A: ',A) K = height * width anchors_fpn = self._anchors_fpn['stride%s' % s] anchors = anchors_plane(height, width, stride, anchors_fpn) #print((height, width), (_height, _width), anchors.shape, bbox_deltas.shape, scores.shape, file=sys.stderr) anchors = anchors.reshape((K * A, 4)) #print('num_anchors', self._num_anchors['stride%s'%s], file=sys.stderr) #print('HW', (height, width), file=sys.stderr) #print('anchors_fpn', anchors_fpn.shape, file=sys.stderr) #print('anchors', anchors.shape, file=sys.stderr) #print('bbox_deltas', bbox_deltas.shape, file=sys.stderr) #print('scores', scores.shape, file=sys.stderr) scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) #print('pre', bbox_deltas.shape, height, width) bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) #print('after', bbox_deltas.shape, height, width) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)) #print('bbox_deltas.shape[3]:',bbox_deltas.shape[3]) bbox_pred_len = bbox_deltas.shape[3] // A #print('bbox_deltas.shape:',bbox_deltas.shape) #print('boxlen:',bbox_pred_len) bbox_deltas = bbox_deltas.reshape((-1, bbox_pred_len)) #print(anchors.shape, bbox_deltas.shape, A, K, file=sys.stderr) proposals = self.bbox_pred(anchors, bbox_deltas) proposals = clip_boxes(proposals, im_info[:2]) #if self.vote: # if im_scale>1.0: # keep = self._filter_boxes2(proposals, 160*im_scale, -1) # else: # keep = self._filter_boxes2(proposals, -1, 100*im_scale) # if stride==4: # keep = self._filter_boxes2(proposals, 12*im_scale, -1) # proposals = proposals[keep, :] # scores = scores[keep] #keep = self._filter_boxes(proposals, min_size_dict['stride%s'%s] * im_info[2]) #proposals = proposals[keep, :] #scores = scores[keep] #print('333', proposals.shape) scores_ravel = scores.ravel() #print('__shapes', proposals.shape, scores_ravel.shape) #print('max score', np.max(scores_ravel)) order = np.where(scores_ravel >= threshold)[0] #print('order:',order) #print('score_order:',scores_ravel[order]) #_scores = scores_ravel[order] #_order = _scores.argsort()[::-1] #order = order[_order] #order = [50] proposals = proposals[order, :] scores = scores[order] if stride == 4 and self.decay4 < 1.0: scores *= self.decay4 if flip: oldx1 = proposals[:, 0].copy() oldx2 = proposals[:, 2].copy() proposals[:, 0] = im.shape[1] - oldx2 - 1 proposals[:, 2] = im.shape[1] - oldx1 - 1 proposals[:, 0:4] /= im_scale proposals_list.append(proposals) scores_list.append(scores) if not self.vote and self.use_landmarks: idx += 1 landmark_deltas = net_out[idx].asnumpy() landmark_deltas = self._clip_pad( landmark_deltas, (height, width)) landmark_pred_len = landmark_deltas.shape[1] // A landmark_deltas = landmark_deltas.transpose( (0, 2, 3, 1)).reshape( (-1, 5, landmark_pred_len // 5)) #print(landmark_deltas.shape, landmark_deltas) landmarks = self.landmark_pred(anchors, landmark_deltas) landmarks = landmarks[order, :] if flip: landmarks[:, :, 0] = im.shape[1] - landmarks[:, :, 0] - 1 #for a in range(5): # oldx1 = landmarks[:, a].copy() # landmarks[:,a] = im.shape[1] - oldx1 - 1 order = [1, 0, 2, 4, 3] flandmarks = landmarks.copy() for idx, a in enumerate(order): flandmarks[:, idx, :] = landmarks[:, a, :] #flandmarks[:, idx*2] = landmarks[:,a*2] #flandmarks[:, idx*2+1] = landmarks[:,a*2+1] landmarks = flandmarks landmarks[:, :, 0:2] /= im_scale #landmarks /= im_scale #landmarks = landmarks.reshape( (-1, landmark_pred_len) ) landmarks_list.append(landmarks) #proposals = np.hstack((proposals, landmarks)) if self.debug: timeb = datetime.datetime.now() diff = timeb - timea print('B uses', diff.total_seconds(), 'seconds') proposals = np.vstack(proposals_list) landmarks = None if proposals.shape[0] == 0: if self.use_landmarks: landmarks = np.zeros((0, 5, 2)) return np.zeros((0, 5)), landmarks scores = np.vstack(scores_list) #print('shapes', proposals.shape, scores.shape) scores_ravel = scores.ravel() order = scores_ravel.argsort()[::-1] #if config.TEST.SCORE_THRESH>0.0: # _count = np.sum(scores_ravel>config.TEST.SCORE_THRESH) # order = order[:_count] proposals = proposals[order, :] scores = scores[order] if not self.vote and self.use_landmarks: landmarks = np.vstack(landmarks_list) landmarks = landmarks[order].astype(np.float32, copy=False) pre_det = np.hstack((proposals[:, 0:4], scores)).astype(np.float32, copy=False) if not self.vote: #print('pre_det_type: ', type(pre_det)) keep = self.nms(pre_det) det = np.hstack((pre_det, proposals[:, 4:])) det = det[keep, :] if self.use_landmarks: landmarks = landmarks[keep] else: det = np.hstack((pre_det, proposals[:, 4:])) det = self.bbox_vote(det) #if self.use_landmarks: # det = np.hstack((det, landmarks)) if self.debug: timeb = datetime.datetime.now() diff = timeb - timea print('C uses', diff.total_seconds(), 'seconds') return det, scores #landmarks
def detect(self, img, threshold=0.5): """ Detect all the faces and landmarks in an image :param img: input image :param threshold: detection threshold :return: tuple faces, landmarks """ proposals_list = [] scores_list = [] landmarks_list = [] im_tensor, im_info, im_scale = self._preprocess_image(img) net_out = self.model(im_tensor) net_out = [elt.numpy() for elt in net_out] sym_idx = 0 for _idx, s in enumerate(self._feat_stride_fpn): _key = 'stride%s' % s scores = net_out[sym_idx] scores = scores[:, :, :, self._num_anchors['stride%s' % s]:] bbox_deltas = net_out[sym_idx + 1] height, width = bbox_deltas.shape[1], bbox_deltas.shape[2] A = self._num_anchors['stride%s' % s] K = height * width anchors_fpn = self._anchors_fpn['stride%s' % s] anchors = anchors_plane(height, width, s, anchors_fpn) anchors = anchors.reshape((K * A, 4)) scores = scores.reshape((-1, 1)) bbox_deltas = bbox_deltas bbox_pred_len = bbox_deltas.shape[3] // A bbox_deltas = bbox_deltas.reshape((-1, bbox_pred_len)) bbox_deltas[:, 0::4] = bbox_deltas[:, 0::4] * self.bbox_stds[0] bbox_deltas[:, 1::4] = bbox_deltas[:, 1::4] * self.bbox_stds[1] bbox_deltas[:, 2::4] = bbox_deltas[:, 2::4] * self.bbox_stds[2] bbox_deltas[:, 3::4] = bbox_deltas[:, 3::4] * self.bbox_stds[3] proposals = self.bbox_pred(anchors, bbox_deltas) proposals = clip_boxes(proposals, im_info[:2]) if s == 4 and self.decay4 < 1.0: scores *= self.decay4 scores_ravel = scores.ravel() order = np.where(scores_ravel >= threshold)[0] proposals = proposals[order, :] scores = scores[order] proposals[:, 0:4] /= im_scale proposals_list.append(proposals) scores_list.append(scores) landmark_deltas = net_out[sym_idx + 2] landmark_pred_len = landmark_deltas.shape[3] // A landmark_deltas = landmark_deltas.reshape( (-1, 5, landmark_pred_len // 5)) landmarks = self.landmark_pred(anchors, landmark_deltas) landmarks = landmarks[order, :] landmarks[:, :, 0:2] /= im_scale landmarks_list.append(landmarks) sym_idx += 3 proposals = np.vstack(proposals_list) if proposals.shape[0] == 0: landmarks = np.zeros((0, 5, 2)) return np.zeros((0, 5)), landmarks scores = np.vstack(scores_list) scores_ravel = scores.ravel() order = scores_ravel.argsort()[::-1] proposals = proposals[order, :] scores = scores[order] landmarks = np.vstack(landmarks_list) landmarks = landmarks[order].astype(np.float32, copy=False) pre_det = np.hstack((proposals[:, 0:4], scores)).astype(np.float32, copy=False) keep = self.nms(pre_det) det = np.hstack((pre_det, proposals[:, 4:])) det = det[keep, :] landmarks = landmarks[keep] return det, landmarks
def forward(self, is_train, req, in_data, out_data, aux): nms = gpu_nms_wrapper(self._threshold, in_data[0][0].context.device_id) #nms = cpu_nms_wrapper(self._threshold) cls_prob_dict = dict(zip(self.fpn_keys, in_data[0:len(self.fpn_keys)])) bbox_pred_dict = dict(zip(self.fpn_keys, in_data[len(self.fpn_keys):2*len(self.fpn_keys)])) #for i in xrange(6): # print(i, in_data[i].asnumpy().shape) batch_size = in_data[0].shape[0] if batch_size > 1: raise ValueError("Sorry, multiple images each device is not implemented") pre_nms_topN = self._rpn_pre_nms_top_n post_nms_topN = self._rpn_post_nms_top_n min_size_dict = self._rpn_min_size_fpn proposals_list = [] scores_list = [] for s in self._feat_stride_fpn: stride = int(s) scores = cls_prob_dict['stride%s'%s].asnumpy()[:, self._num_anchors['stride%s'%s]:, :, :] bbox_deltas = bbox_pred_dict['stride%s'%s].asnumpy() im_info = in_data[-1].asnumpy()[0, :] if DEBUG: print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) print 'scale: {}'.format(im_info[2]) #height, width = int(im_info[0] / stride), int(im_info[1] / stride) height, width = bbox_deltas.shape[2], bbox_deltas.shape[3] A = self._num_anchors['stride%s'%s] K = height * width anchors = anchors_plane(height, width, stride, self._anchors_fpn['stride%s'%s].astype(np.float32)) anchors = anchors.reshape((K * A, 4)) #print('pre', bbox_deltas.shape, height, width) bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) #print('after', bbox_deltas.shape, height, width) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) #print(anchors.shape, bbox_deltas.shape, A, K) proposals = self._bbox_pred(anchors, bbox_deltas) proposals = clip_boxes(proposals, im_info[:2]) keep = self._filter_boxes(proposals, min_size_dict['stride%s'%s] * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] proposals_list.append(proposals) scores_list.append(scores) proposals = np.vstack(proposals_list) scores = np.vstack(scores_list) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] det = np.hstack((proposals, scores)).astype(np.float32) if np.shape(det)[0] == 0: print "Something wrong with the input image(resolution is too low?), generate fake proposals for it." proposals = np.array([[1.0, 1.0, 2.0, 2.0]]*post_nms_topN, dtype=np.float32) scores = np.array([[0.9]]*post_nms_topN, dtype=np.float32) det = np.array([[1.0, 1.0, 2.0, 2.0, 0.9]]*post_nms_topN, dtype=np.float32) keep = nms(det) if post_nms_topN > 0: keep = keep[:post_nms_topN] if len(keep) < post_nms_topN: pad = npr.choice(keep, size=post_nms_topN - len(keep)) keep = np.hstack((keep, pad)) proposals = proposals[keep, :] scores = scores[keep] batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) self.assign(out_data[0], req[0], blob) if self._output_score: self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False))
def postprocess(net_out, threshold, ctx_id, im_scale, im_info): # im_info = [640, 640] flip = False decay4 = 0.5 vote = False fpn_keys = [] anchor_cfg = None bbox_stds = [1.0, 1.0, 1.0, 1.0] # im_scale = 1.0 landmark_std = 1.0 nms_threshold = 0.4 proposals_list = [] scores_list = [] landmarks_list = [] strides_list = [] use_landmarks = True if ctx_id >= 0: nms = gpu_nms_wrapper(nms_threshold, ctx_id) else: nms = cpu_nms_wrapper(nms_threshold) use_landmarks = True _ratio = (1., ) _feat_stride_fpn = [32, 16, 8] anchor_cfg = { '32': { 'SCALES': (32, 16), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, '16': { 'SCALES': (8, 4), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, '8': { 'SCALES': (2, 1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999 }, } for s in _feat_stride_fpn: fpn_keys.append('stride%s' % s) dense_anchor = False _anchors_fpn = dict( zip(fpn_keys, generate_anchors_fpn(dense_anchor=dense_anchor, cfg=anchor_cfg))) for k in _anchors_fpn: v = _anchors_fpn[k].astype(np.float32) _anchors_fpn[k] = v _num_anchors = dict( zip(fpn_keys, [anchors.shape[0] for anchors in _anchors_fpn.values()])) sym_idx = 0 for _idx, s in enumerate(_feat_stride_fpn): # print(sym_idx) _key = 'stride%s' % s # print(_key) stride = int(s) scores = net_out[sym_idx] #.asnumpy() scores = scores[:, _num_anchors['stride%s' % s]:, :, :] bbox_deltas = net_out[sym_idx + 1] # .asnumpy() height, width = bbox_deltas.shape[2], bbox_deltas.shape[3] A = _num_anchors['stride%s' % s] K = height * width anchors_fpn = _anchors_fpn['stride%s' % s] anchors = anchors_plane(height, width, stride, anchors_fpn) anchors = anchors.reshape((K * A, 4)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)) bbox_pred_len = bbox_deltas.shape[3] // A bbox_deltas = bbox_deltas.reshape((-1, bbox_pred_len)) bbox_deltas[:, 0::4] = bbox_deltas[:, 0::4] * bbox_stds[0] bbox_deltas[:, 1::4] = bbox_deltas[:, 1::4] * bbox_stds[1] bbox_deltas[:, 2::4] = bbox_deltas[:, 2::4] * bbox_stds[2] bbox_deltas[:, 3::4] = bbox_deltas[:, 3::4] * bbox_stds[3] proposals = bbox_pred(anchors, bbox_deltas) proposals = clip_boxes(proposals, im_info[:2]) if stride == 4 and decay4 < 1.0: scores *= decay4 scores_ravel = scores.ravel() order = np.where(scores_ravel >= threshold)[0] proposals = proposals[order, :] scores = scores[order] if flip: oldx1 = proposals[:, 0].copy() oldx2 = proposals[:, 2].copy() proposals[:, 0] = im.shape[1] - oldx2 - 1 proposals[:, 2] = im.shape[1] - oldx1 - 1 #proposals[:,0:4] /= im_scale #print(proposals[:,0]) proposals[:, 0] /= im_scale[0] #print(pp) proposals[:, 1] /= im_scale[1] proposals[:, 2] /= im_scale[0] proposals[:, 3] /= im_scale[1] #print(proposals[:,0]) proposals_list.append(proposals) scores_list.append(scores) if nms_threshold < 0.0: _strides = np.empty(shape=(scores.shape), dtype=np.float32) _strides.fill(stride) strides_list.append(_strides) if not vote and use_landmarks: landmark_deltas = net_out[sym_idx + 2] #.asnumpy() # print(landmark_deltas) landmark_pred_len = landmark_deltas.shape[1] // A landmark_deltas = landmark_deltas.transpose((0, 2, 3, 1)).reshape( (-1, 5, landmark_pred_len // 5)) landmark_deltas *= landmark_std landmarks = landmark_pred(anchors, landmark_deltas) landmarks = landmarks[order, :] if flip: landmarks[:, :, 0] = im.shape[1] - landmarks[:, :, 0] - 1 order = [1, 0, 2, 4, 3] flandmarks = landmarks.copy() for idx, a in enumerate(order): flandmarks[:, idx, :] = landmarks[:, a, :] landmarks = flandmarks landmarks[:, :, 0:2] /= im_scale landmarks_list.append(landmarks) if use_landmarks: sym_idx += 3 else: sym_idx += 2 proposals = np.vstack(proposals_list) landmarks = None if proposals.shape[0] == 0: if use_landmarks: landmarks = np.zeros((0, 5, 2)) if nms_threshold < 0.0: return np.zeros((0, 6)), landmarks else: return np.zeros((0, 5)), landmarks scores = np.vstack(scores_list) scores_ravel = scores.ravel() order = scores_ravel.argsort()[::-1] proposals = proposals[order, :] scores = scores[order] if nms_threshold < 0.0: strides = np.vstack(strides_list) strides = strides[order] if not vote and use_landmarks: landmarks = np.vstack(landmarks_list) landmarks = landmarks[order].astype(np.float32, copy=False) if nms_threshold > 0.0: pre_det = np.hstack((proposals[:, 0:4], scores)).astype(np.float32, copy=False) if not vote: keep = nms(pre_det) det = np.hstack((pre_det, proposals[:, 4:])) det = det[keep, :] if use_landmarks: landmarks = landmarks[keep] else: det = np.hstack((pre_det, proposals[:, 4:])) det = bbox_vote(det, nms_threshold) elif nms_threshold < 0.0: det = np.hstack((proposals[:, 0:4], scores, strides)).astype(np.float32, copy=False) else: det = np.hstack((proposals[:, 0:4], scores)).astype(np.float32, copy=False) return det, landmarks
def detect(self, img, threshold=0.05, scales=[1.0]): proposals_list = [] scores_list = [] for im_scale in scales: if im_scale != 1.0: im = cv2.resize(img, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) else: im = img im = im.astype(np.float32) # self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False) im_info = [im.shape[0], im.shape[1], im_scale] im_tensor = np.zeros((1, 3, im.shape[0], im.shape[1])) for i in range(3): im_tensor[0, i, :, :] = im[:, :, 2 - i] - self.pixel_means[2 - i] data = nd.array(im_tensor) db = mx.io.DataBatch(data=(data, ), provide_data=[('data', data.shape)]) self.model.forward(db, is_train=False) net_out = self.model.get_outputs() pre_nms_topN = self._rpn_pre_nms_top_n # post_nms_topN = self._rpn_post_nms_top_n # min_size_dict = self._rpn_min_size_fpn for s in self._feat_stride_fpn: if len(scales) > 1 and s == 32 and im_scale == scales[-1]: continue _key = 'stride%s' % s stride = int(s) idx = 0 if s == 16: idx = 2 elif s == 8: idx = 4 print('getting', im_scale, stride, idx, len(net_out), data.shape, file=sys.stderr) scores = net_out[idx].asnumpy() # print(scores.shape) idx += 1 # print('scores',stride, scores.shape, file=sys.stderr) scores = scores[:, self._num_anchors['stride%s' % s]:, :, :] bbox_deltas = net_out[idx].asnumpy() # if DEBUG: # print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) # print 'scale: {}'.format(im_info[2]) _height, _width = int(im_info[0] / stride), int(im_info[1] / stride) height, width = bbox_deltas.shape[2], bbox_deltas.shape[3] A = self._num_anchors['stride%s' % s] K = height * width anchors = anchors_plane( height, width, stride, self._anchors_fpn['stride%s' % s].astype(np.float32)) # print((height, width), (_height, _width), anchors.shape, bbox_deltas.shape, scores.shape, file=sys.stderr) anchors = anchors.reshape((K * A, 4)) # print('pre', bbox_deltas.shape, height, width) bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) # print('after', bbox_deltas.shape, height, width) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape( (-1, 4)) scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # print(anchors.shape, bbox_deltas.shape, A, K, file=sys.stderr) proposals = self._bbox_pred(anchors, bbox_deltas) # proposals = anchors proposals = clip_boxes(proposals, im_info[:2]) # keep = self._filter_boxes(proposals, min_size_dict['stride%s'%s] * im_info[2]) # proposals = proposals[keep, :] # scores = scores[keep] # print('333', proposals.shape) scores_ravel = scores.ravel() order = scores_ravel.argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] proposals /= im_scale proposals_list.append(proposals) scores_list.append(scores) proposals = np.vstack(proposals_list) scores = np.vstack(scores_list) scores_ravel = scores.ravel() order = scores_ravel.argsort()[::-1] # if config.TEST.SCORE_THRESH>0.0: # _count = np.sum(scores_ravel>config.TEST.SCORE_THRESH) # order = order[:_count] # if pre_nms_topN > 0: # order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] det = np.hstack((proposals, scores)).astype(np.float32) # if np.shape(det)[0] == 0: # print("Something wrong with the input image(resolution is too low?), generate fake proposals for it.") # proposals = np.array([[1.0, 1.0, 2.0, 2.0]]*post_nms_topN, dtype=np.float32) # scores = np.array([[0.9]]*post_nms_topN, dtype=np.float32) # det = np.array([[1.0, 1.0, 2.0, 2.0, 0.9]]*post_nms_topN, dtype=np.float32) if self.nms_threshold < 1.0: keep = self.nms(det) det = det[keep, :] if threshold > 0.0: keep = np.where(det[:, 4] >= threshold)[0] det = det[keep, :] return det
def detect(self, img, threshold=0.5, scales=[1.0], do_flip=False): print( 'get into detect, confi thresold={}, scales={}, do_flip={}'.format( threshold, scales, do_flip)) proposals_list = [] scores_list = [] landmarks_list = [] timea = datetime.datetime.now() flips = [0] if do_flip: flips = [0, 1] #TODO 根据scale给输入的图片做resize for im_scale in scales: for flip in flips: if im_scale != 1.0: im = cv2.resize(img, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) else: im = img.copy() # 对图像做翻转 if flip: im = im[:, ::-1, :] # 对图像做裁剪 if self.nocrop: if im.shape[0] % 32 == 0: h = im.shape[0] else: h = (im.shape[0] // 32 + 1) * 32 if im.shape[1] % 32 == 0: w = im.shape[1] else: w = (im.shape[1] // 32 + 1) * 32 _im = np.zeros((h, w, 3), dtype=np.float32) _im[0:im.shape[0], 0:im.shape[1], :] = im im = _im else: im = im.astype(np.float32) im_info = [im.shape[0], im.shape[1]] #h,w im_tensor = np.zeros((1, 3, im.shape[0], im.shape[1])) for i in range(3): im_tensor[0, i, :, :] = ( im[:, :, 2 - i] / self.pixel_scale - self.pixel_means[2 - i]) / self.pixel_stds[ 2 - i] #TODO 这里好像将Channel顺序倒过来了,与image.py保持一致 data = np.array(im_tensor) # 读入模型进行推理,得到预测值 net_out = self.get_pred(data) if self.debug: for key in net_out.keys(): print('{} = {}\n'.format(key, net_out[key].shape)) for _idx, s in enumerate(self._feat_stride_fpn): # print('begin stride{}-------------------------------------------------\n'.format(s)) _key = 'stride%s' % s stride = int(s) # print('getting im_scale={}, stride={}, len(net_out)={}, data.shape={}'.format(im_scale, stride, len(net_out), data.shape)) scores = net_out['rpn_cls_prob_stride%s' % s] #TODO 要注意这里是nhwc不是nchw if self.debug: print('get score:', scores.shape) # print('stride{}: scores before shape={}, idx={}'.format(stride, scores.shape, self._num_anchors['stride%s' % s])) scores = scores[:, 1].reshape( (-1, 1)) #TODO: (H*W*A, 1) #这里的1表示正类的概率 if self.debug: print('AAAAstride{}: scores after shape={}'.format( stride, scores.shape)) bbox_deltas = net_out['rpn_bbox_pred_stride%s' % s] #TODO NHW8 height, width = bbox_deltas.shape[1], bbox_deltas.shape[2] A = self._num_anchors['stride%s' % s] K = height * width anchors_fpn = self._anchors_fpn['stride%s' % s] anchors = anchors_plane(height, width, stride, anchors_fpn) #获取该特征图上的所有anchor #print((height, width), (_height, _width), anchors.shape, bbox_deltas.shape, scores.shape, file=sys.stderr) anchors = anchors.reshape((K * A, 4)) if self.debug: print('HW', (height, width)) print('anchors_fpn', anchors_fpn) print('anchors', anchors.shape, '\n') # scores = self._clip_pad_NCHW(scores, (height, width)) #(1, 4, H, W) # scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) #(1, H, W, 4) # print('scores reshape', scores.shape) if self.debug: print('before bbox_deltas', bbox_deltas.shape) bbox_deltas = self._clip_pad_NHWC( bbox_deltas, (height, width)) #(1, H, W, 8) # bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1))#(1, H, W, 8) bbox_pred_len = bbox_deltas.shape[3] // A #4 bbox_deltas = bbox_deltas.reshape( (-1, bbox_pred_len)) #(H*W*2, 4) if self.debug: print('after bbox_deltas', bbox_deltas.shape, height, width, '\n') #print(anchors.shape, bbox_deltas.shape, A, K, file=sys.stderr) proposals = self.bbox_pred( anchors, bbox_deltas) #TODO important! 将anchor加上delta进行处理 proposals = clip_boxes(proposals, im_info[:2]) scores_ravel = scores.ravel() max_score = np.max(scores_ravel) print('proposals.shape={}, score_ravel.shape={}'.format( proposals.shape, scores_ravel.shape)) print('max score', max_score) order = np.where(scores_ravel >= threshold)[0] #_scores = scores_ravel[order] #_order = _scores.argsort()[::-1] #order = order[_order] proposals = proposals[order, :] scores = scores[order] if flip: oldx1 = proposals[:, 0].copy() oldx2 = proposals[:, 2].copy() proposals[:, 0] = im.shape[1] - oldx2 - 1 proposals[:, 2] = im.shape[1] - oldx1 - 1 proposals[:, 0: 4] /= im_scale #TODO important 在这里将找到的proposal给映射回原来图像的位置 proposals_list.append(proposals) scores_list.append(scores) if not self.vote and self.use_landmarks: landmark_deltas = net_out['rpn_landmark_pred_stride%s' % s] #(1,20,H,W) if self.debug: print('before landmark_deltas', landmark_deltas.shape) landmark_deltas = self._clip_pad_NCHW( landmark_deltas, (height, width)) landmark_pred_len = landmark_deltas.shape[1] // A landmark_deltas = landmark_deltas.transpose( (0, 2, 3, 1)).reshape( (-1, 5, landmark_pred_len // 5)) if self.debug: print('after landmark_deltas', landmark_deltas.shape, landmark_deltas) landmarks = self.landmark_pred(anchors, landmark_deltas) landmarks = landmarks[order, :] if flip: landmarks[:, :, 0] = im.shape[1] - landmarks[:, :, 0] - 1 #for a in range(5): # oldx1 = landmarks[:, a].copy() # landmarks[:,a] = im.shape[1] - oldx1 - 1 order = [1, 0, 2, 4, 3] flandmarks = landmarks.copy() for idx, a in enumerate(order): flandmarks[:, idx, :] = landmarks[:, a, :] #flandmarks[:, idx*2] = landmarks[:,a*2] #flandmarks[:, idx*2+1] = landmarks[:,a*2+1] landmarks = flandmarks landmarks[:, :, 0:2] /= im_scale landmarks_list.append(landmarks) if self.debug: print( 'end stride{}-------------------------------------------------\n' .format(s)) proposals = np.vstack(proposals_list) landmarks = None if proposals.shape[0] == 0: if self.use_landmarks: landmarks = np.zeros((0, 5, 2)) return np.zeros((0, 5)), landmarks # for i in range(len(scores_list)): # print('hhhhh score,shape=',scores_list[i].shape) scores = np.vstack(scores_list) print('finally!!! proposals.shape={}, score.shape={}'.format( proposals.shape, scores.shape)) scores_ravel = scores.ravel() order = scores_ravel.argsort()[::-1] # 按照score从大到小排序 proposals = proposals[order, :] scores = scores[order] if self.debug: print('sort score=', scores) if not self.vote and self.use_landmarks: landmarks = np.vstack(landmarks_list) landmarks = landmarks[order].astype(np.float32, copy=False) pre_det = np.hstack((proposals[:, 0:4], scores)).astype(np.float32, copy=False) if not self.vote: print('begin to NMS!!\n') keep = self.nms(pre_det) # print('before hstack: pre_det={}, proposals.shape={}, proposals[:,4:]={}'.format(pre_det.shape, proposals.shape, proposals[:,4:])) det = np.hstack((pre_det, proposals[:, 4:])) # print('after hstack: pre_det={}, proposals.shape={}'.format(pre_det.shape, proposals.shape)) det = det[keep, :] if self.use_landmarks: landmarks = landmarks[keep] else: det = np.hstack((pre_det, proposals[:, 4:])) det = self.bbox_vote(det) return det, landmarks
def demo_maskrcnn(network, ctx, prefix, epoch, img_path, vis=True, has_rpn=True, thresh=0.001): assert has_rpn, "Only has_rpn==True has been supported." #sym = eval('get_' + network + '_mask_test')(num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS) sym = eval('get_' + network + '_mask_test')(num_classes=config.NUM_CLASSES) arg_params, aux_params = load_param(prefix, epoch, convert=True, ctx=ctx, process=True) for k, v in arg_params.iteritems(): print(k, v.shape) max_image_shape = (1, 3, 1024, 1024) max_data_shapes = [("data", max_image_shape), ("im_info", (1, 3))] mod = MutableModule(symbol=sym, data_names=["data", "im_info"], label_names=None, max_data_shapes=max_data_shapes, context=ctx) mod.bind(data_shapes=max_data_shapes, label_shapes=None, for_training=False) mod.init_params(arg_params=arg_params, aux_params=aux_params) class OneDataBatch(): def __init__(self, img): im_info = mx.nd.array([[img.shape[0], img.shape[1], 1.0]]) img = np.transpose(img, (2, 0, 1)) img = img[np.newaxis, (2, 1, 0)] self.data = [mx.nd.array(img), im_info] self.label = None self.provide_label = None self.provide_data = [("data", (1, 3, img.shape[2], img.shape[3])), ("im_info", (1, 3))] img_ori = cv2.imread(img_path) batch = OneDataBatch(img_ori) mod.forward(batch, False) results = mod.get_outputs() output = dict(zip(mod.output_names, results)) rois = output['rois_output'].asnumpy()[:, 1:] scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] mask_output = output['mask_prob_output'].asnumpy() pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, [img_ori.shape[0], img_ori.shape[1]]) nms = py_nms_wrapper(config.TEST.NMS) #nms = processing_nms_wrapper(config.TEST.NMS, 0.7) boxes = pred_boxes CLASSES = ('__background__', 'text') all_boxes = [[[] for _ in xrange(1)] for _ in xrange(len(CLASSES))] all_masks = [[[] for _ in xrange(1)] for _ in xrange(len(CLASSES))] label = np.argmax(scores, axis=1) label = label[:, np.newaxis] for cls in CLASSES: cls_ind = CLASSES.index(cls) cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_masks = mask_output[:, cls_ind, :, :] cls_scores = scores[:, cls_ind, np.newaxis] #print cls_scores.shape, label.shape keep = np.where((cls_scores >= thresh) & (label == cls_ind))[0] cls_masks = cls_masks[keep, :, :] dets = np.hstack((cls_boxes, cls_scores)).astype(np.float32)[keep, :] keep = nms(dets) #print dets.shape, cls_masks.shape all_boxes[cls_ind] = dets[keep, :] all_masks[cls_ind] = cls_masks[keep, :, :] boxes_this_image = [[]] + [all_boxes[j] for j in range(1, len(CLASSES))] masks_this_image = [[]] + [all_masks[j] for j in range(1, len(CLASSES))] import copy import random class_names = CLASSES color_white = (255, 255, 255) scale = 1.0 im = copy.copy(img_ori) for j, name in enumerate(class_names): if name == '__background__': continue color = (random.randint(0, 256), random.randint(0, 256), random.randint(0, 256)) # generate a random color dets = boxes_this_image[j] masks = masks_this_image[j] for i in range(len(dets)): bbox = dets[i, :4] * scale if bbox[2] == bbox[0] or bbox[3] == bbox[1] or bbox[0] == bbox[ 1] or bbox[2] == bbox[3]: continue score = dets[i, -1] bbox = map(int, bbox) cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color=color, thickness=2) cv2.putText(im, '%s %.3f' % (class_names[j], score), (bbox[0], bbox[1] + 10), color=color_white, fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=0.5) mask = masks[i, :, :] mask = cv2.resize(mask, (bbox[2] - bbox[0], (bbox[3] - bbox[1])), interpolation=cv2.INTER_LINEAR) mask[mask > 0.5] = 1 mask[mask <= 0.5] = 0 mask_color = random.randint(0, 255) c = random.randint(0, 2) target = im[bbox[1]:bbox[3], bbox[0]:bbox[2], c] + mask_color * mask target[target >= 255] = 255 im[bbox[1]:bbox[3], bbox[0]:bbox[2], c] = target ##im = im[:,:,(2,1,0)] ##plt.imshow(im) cv2.imwrite("figures/test_result.jpg", im)
def demo_maskrcnn(network, ctx, prefix, epoch, vis=True, has_rpn=True, thresh=0.001): assert has_rpn, "Only has_rpn==True has been supported." sym = eval('get_' + network + '_mask_test')(num_classes=config.NUM_CLASSES) arg_params, aux_params = load_param(prefix, epoch, convert=True, ctx=ctx, process=True) max_image_shape = (1, 3, 1024, 1024) max_data_shapes = [("data", max_image_shape), ("im_info", (1, 3))] mod = MutableModule(symbol=sym, data_names=["data", "im_info"], label_names=None, max_data_shapes=max_data_shapes, context=ctx) mod.bind(data_shapes=max_data_shapes, label_shapes=None, for_training=False) mod.init_params(arg_params=arg_params, aux_params=aux_params) class OneDataBatch(): def __init__(self, img): im_info = mx.nd.array([[img.shape[0], img.shape[1], 1.0]]) img = np.transpose(img, (2, 0, 1)) img = img[np.newaxis, (2, 1, 0)] self.data = [mx.nd.array(img), im_info] self.label = None self.provide_label = None self.provide_data = [("data", (1, 3, img.shape[2], img.shape[3])), ("im_info", (1, 3))] #img_ori = cv2.imread(img_path) #batch = OneDataBatch(img_ori) #mod.forward(batch, False) #results = mod.get_outputs() #output = dict(zip(mod.output_names, results)) #rois = output['rois_output'].asnumpy()[:, 1:] #scores = output['cls_prob_reshape_output'].asnumpy()[0] #bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] #mask_output = output['mask_prob_output'].asnumpy() #pred_boxes = bbox_pred(rois, bbox_deltas) #pred_boxes = clip_boxes(pred_boxes, [img_ori.shape[0],img_ori.shape[1]]) #nms = py_nms_wrapper(config.TEST.NMS) #boxes= pred_boxes #CLASSES = ('__background__', 'person', 'rider', 'car', 'truck', 'bus', 'train', 'mcycle', 'bicycle') #CLASSES = ('__background__', 'text') #all_boxes = [[[] for _ in xrange(1)] # for _ in xrange(len(CLASSES))] #all_masks = [[[] for _ in xrange(1)] # for _ in xrange(len(CLASSES))] #label = np.argmax(scores, axis=1) #label = label[:, np.newaxis] #for cls in CLASSES: # cls_ind = CLASSES.index(cls) # cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] # cls_masks = mask_output[:, cls_ind, :, :] # cls_scores = scores[:, cls_ind, np.newaxis] # #print cls_scores.shape, label.shape # keep = np.where((cls_scores >= thresh) & (label == cls_ind))[0] # cls_masks = cls_masks[keep, :, :] # dets = np.hstack((cls_boxes, cls_scores)).astype(np.float32)[keep, :] # keep = nms(dets) # #print dets.shape, cls_masks.shape # all_boxes[cls_ind] = dets[keep, :] # all_masks[cls_ind] = cls_masks[keep, :, :] #boxes_this_image = [[]] + [all_boxes[j] for j in range(1, len(CLASSES))] #masks_this_image = [[]] + [all_masks[j] for j in range(1, len(CLASSES))] #import copy #import random # class_names = CLASSES #color_white = (255, 255, 255) #scale = 1.0 #im = copy.copy(img_ori) #for j, name in enumerate(class_names): # if name == '__background__': # continue # color = (random.randint(0, 256), random.randint(0, 256), random.randint(0, 256)) # generate a random color # dets = boxes_this_image[j] # masks = masks_this_image[j] # for i in range(len(dets)): # bbox = dets[i, :4] * scale # if bbox[2] == bbox[0] or bbox[3] == bbox[1] or bbox[0] == bbox[1] or bbox[2] == bbox[3] : # continue # score = dets[i, -1] # bbox = map(int, bbox) # cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color=color, thickness=2) # cv2.putText(im, '%s %.3f' % (class_names[j], score), (bbox[0], bbox[1] + 10), # color=color_white, fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=0.5) # mask = masks[i, :, :] # mask = cv2.resize(mask, (bbox[2] - bbox[0], (bbox[3] - bbox[1])), interpolation=cv2.INTER_LINEAR) #3 # mask[mask > 0.5] = 1 # mask[mask <= 0.5] = 0 # mask_color = random.randint(0, 255) # c = random.randint(0, 2) # target = im[bbox[1]: bbox[3], bbox[0]: bbox[2], c] + mask_color * mask # target[target >= 255] = 255 # im[bbox[1]: bbox[3], bbox[0]: bbox[2], c] = target #im = im[:,:,(2,1,0)] #cv2.imwrite("figures/test_result.jpg",im) #plt.imshow(im) #fig1 = plt.gcf() #plt.savefig("figures/test_result.jpg") #if vis: #plt.show() #else: imglist_file = os.path.join(default.dataset_path, 'imglists', 'test.lst') assert os.path.exists(imglist_file), 'Path does not exist: {}'.format( imglist_file) imgfiles_list = [] with open(imglist_file, 'r') as f: for line in f: file_list = dict() label = line.strip().split('\t') #file_list['img_id'] = label[0] file_list['img_path'] = label[1] #file_list['ins_seg_path'] = label[2].replace('labelTrainIds', 'instanceIds') imgfiles_list.append(file_list) #assert len(imgfiles_list) == self.num_images, 'number of boxes matrix must match number of images' roidb = [] index = 0 for im in range(len(imgfiles_list)): #print '===============================', im, '=====================================' #roi_rec = dict() #img_path = os.path.join(self.data_path, imgfiles_list[im]['img_path']) index = im + 1 img_path = os.path.join(default.dataset_path, 'ch4_test_images', 'img_' + str(index) + '.jpg') #size = cv2.imread(roi_rec['image']).shape #roi_rec['height'] = size[0] #roi_rec['width'] = size[1] #img_path = os.path.join(img_path, 'img_' + index + '.jpg') img_ori = cv2.imread(img_path) #img_ori = cv2.resize(img_ori, (, 28), interpolation=cv2.INTER_NEAREST) batch = OneDataBatch(img_ori) mod.forward(batch, False) results = mod.get_outputs() output = dict(zip(mod.output_names, results)) rois = output['rois_output'].asnumpy()[:, 1:] scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] mask_output = output['mask_prob_output'].asnumpy() pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, [img_ori.shape[0], img_ori.shape[1]]) #nms = py_nms_wrapper(config.TEST.NMS) nms = processing_nms_wrapper(config.TEST.NMS, 0.8) boxes = pred_boxes CLASSES = ('__background__', 'text') all_boxes = [[[] for _ in xrange(1)] for _ in xrange(len(CLASSES))] all_masks = [[[] for _ in xrange(1)] for _ in xrange(len(CLASSES))] label = np.argmax(scores, axis=1) label = label[:, np.newaxis] for cls in CLASSES: cls_ind = CLASSES.index(cls) cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_masks = mask_output[:, cls_ind, :, :] cls_scores = scores[:, cls_ind, np.newaxis] #print cls_scores.shape, label.shape keep = np.where((cls_scores >= thresh) & (label == cls_ind))[0] cls_masks = cls_masks[keep, :, :] dets = np.hstack( (cls_boxes, cls_scores)).astype(np.float32)[keep, :] keep = nms(dets) #print dets.shape, cls_masks.shape all_boxes[cls_ind] = dets[keep, :] all_masks[cls_ind] = cls_masks[keep, :, :] boxes_this_image = [[] ] + [all_boxes[j] for j in range(1, len(CLASSES))] masks_this_image = [[] ] + [all_masks[j] for j in range(1, len(CLASSES))] import copy import random class_names = CLASSES color_white = (255, 255, 255) scale = 1.0 im = copy.copy(img_ori) num_boxes = 0 for j, name in enumerate(class_names): if name == '__background__': continue color = (random.randint(0, 256), random.randint(0, 256), random.randint(0, 256)) # generate a random color dets = boxes_this_image[j] masks = masks_this_image[j] for i in range(len(dets)): #num_boxes += 1 bbox = dets[i, :4] * scale #if bbox[2] == bbox[0] or bbox[3] == bbox[1] or bbox[0] == bbox[1] or bbox[2] == bbox[3] : if bbox[2] == bbox[0] or bbox[3] == bbox[1]: continue num_boxes += 1 score = dets[i, -1] bbox = map(int, bbox) cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color=color, thickness=2) cv2.putText(im, '%s %.3f' % (class_names[j], score), (bbox[0], bbox[1] + 10), color=color_white, fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=0.5) mask = masks[i, :, :] mask = cv2.resize(mask, (bbox[2] - bbox[0], (bbox[3] - bbox[1])), interpolation=cv2.INTER_LINEAR) mask[mask > 0.5] = 1 mask[mask <= 0.5] = 0 px = np.where(mask == 1) x_min = np.min(px[1]) y_min = np.min(px[0]) x_max = np.max(px[1]) y_max = np.max(px[0]) #if x_max - x_min <= 1 or y_max - y_min <= 1: # continue im_binary = np.zeros(im[:, :, 0].shape) im_binary[bbox[1]:bbox[3], bbox[0]:bbox[2]] = im_binary[bbox[1]:bbox[3], bbox[0]:bbox[2]] + mask mask_color = random.randint(0, 255) c = random.randint(0, 2) target = im[bbox[1]:bbox[3], bbox[0]:bbox[2], c] + mask_color * mask target[target >= 255] = 255 im[bbox[1]:bbox[3], bbox[0]:bbox[2], c] = target #cv2.imwrite("figures/test_result.jpg",im) inst_dir = os.path.join(default.dataset_path, 'test_mat') if not os.path.exists(inst_dir): os.makedirs(inst_dir) inst_path = os.path.join( inst_dir, 'result_{}_{}.mat'.format(index, num_boxes)) io.savemat(inst_path, {'Segmentation': im_binary}) numbox = open('data/boxnum.txt', 'a') numbox.write(str(num_boxes) + '\n') numbox.close() img_dir = os.path.join(default.dataset_path, 'test_result_img') if not os.path.exists(img_dir): os.makedirs(img_dir) img_path = os.path.join(img_dir, 'result_{}.jpg'.format(index)) cv2.imwrite(img_path, im)
def demo_maskrcnn(network, ctx, prefix, epoch,img_path, vis= True, has_rpn = True, thresh = 0.001): assert has_rpn,"Only has_rpn==True has been supported." sym = eval('get_' + network + '_mask_test')(num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS) arg_params, aux_params = load_param(prefix, epoch, convert=True, ctx=ctx, process=True) max_image_shape = (1,3,1024,1024) max_data_shapes = [("data",max_image_shape),("im_info",(1,3))] mod = MutableModule(symbol = sym, data_names = ["data","im_info"], label_names= None, max_data_shapes = max_data_shapes, context=ctx) mod.bind(data_shapes = max_data_shapes, label_shapes = None, for_training=False) mod.init_params(arg_params=arg_params, aux_params=aux_params) class OneDataBatch(): def __init__(self,img): im_info = mx.nd.array([[img.shape[0],img.shape[1],1.0]]) img = np.transpose(img,(2,0,1)) img = img[np.newaxis,(2,1,0)] self.data = [mx.nd.array(img),im_info] self.label = None self.provide_label = None self.provide_data = [("data",(1,3,img.shape[2],img.shape[3])),("im_info",(1,3))] img_ori = cv2.imread(img_path) batch = OneDataBatch(img_ori) mod.forward(batch, False) results = mod.get_outputs() output = dict(zip(mod.output_names, results)) rois = output['rois_output'].asnumpy()[:, 1:] scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] mask_output = output['mask_prob_output'].asnumpy() pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, [img_ori.shape[0],img_ori.shape[1]]) nms = py_nms_wrapper(config.TEST.NMS) boxes= pred_boxes CLASSES = ('__background__', 'person', 'rider', 'car', 'truck', 'bus', 'train', 'mcycle', 'bicycle') all_boxes = [[[] for _ in xrange(1)] for _ in xrange(len(CLASSES))] all_masks = [[[] for _ in xrange(1)] for _ in xrange(len(CLASSES))] label = np.argmax(scores, axis=1) label = label[:, np.newaxis] for cls in CLASSES: cls_ind = CLASSES.index(cls) cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_masks = mask_output[:, cls_ind, :, :] cls_scores = scores[:, cls_ind, np.newaxis] #print cls_scores.shape, label.shape keep = np.where((cls_scores >= thresh) & (label == cls_ind))[0] cls_masks = cls_masks[keep, :, :] dets = np.hstack((cls_boxes, cls_scores)).astype(np.float32)[keep, :] keep = nms(dets) #print dets.shape, cls_masks.shape all_boxes[cls_ind] = dets[keep, :] all_masks[cls_ind] = cls_masks[keep, :, :] boxes_this_image = [[]] + [all_boxes[j] for j in range(1, len(CLASSES))] masks_this_image = [[]] + [all_masks[j] for j in range(1, len(CLASSES))] import copy import random class_names = CLASSES color_white = (255, 255, 255) scale = 1.0 im = copy.copy(img_ori) for j, name in enumerate(class_names): if name == '__background__': continue color = (random.randint(0, 256), random.randint(0, 256), random.randint(0, 256)) # generate a random color dets = boxes_this_image[j] masks = masks_this_image[j] for i in range(len(dets)): bbox = dets[i, :4] * scale if bbox[2] == bbox[0] or bbox[3] == bbox[1] or bbox[0] == bbox[1] or bbox[2] == bbox[3] : continue score = dets[i, -1] bbox = map(int, bbox) cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color=color, thickness=2) cv2.putText(im, '%s %.3f' % (class_names[j], score), (bbox[0], bbox[1] + 10), color=color_white, fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=0.5) mask = masks[i, :, :] mask = cv2.resize(mask, (bbox[2] - bbox[0], (bbox[3] - bbox[1])), interpolation=cv2.INTER_LINEAR) mask[mask > 0.5] = 1 mask[mask <= 0.5] = 0 mask_color = random.randint(0, 255) c = random.randint(0, 2) target = im[bbox[1]: bbox[3], bbox[0]: bbox[2], c] + mask_color * mask target[target >= 255] = 255 im[bbox[1]: bbox[3], bbox[0]: bbox[2], c] = target im = im[:,:,(2,1,0)] plt.imshow(im) if vis: plt.show() else: plt.savefig("figures/test_result.jpg")
def detect(self, img, scales=[1.], thresh=0.5): ret = [] #scale = scales[0] dets_all = None masks_all = None for scale in scales: if scale != 1.0: nimg = cv2.resize(img, None, None, fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR) else: nimg = img im_size = nimg.shape[0:2] #im_info = mx.nd.array([[nimg.shape[0],nimg.shape[1],1.0]]) #nimg = np.transpose(nimg,(2,0,1)) #nimg = nimg[np.newaxis,(2,1,0)] #nimg = mx.nd.array(nimg) #db = mx.io.DataBatch(data=(nimg,im_info)) db = OneDataBatch(nimg) self.model.forward(db, is_train=False) results = self.model.get_outputs() output = dict(zip(self.model.output_names, results)) rois = output['rois_output'].asnumpy()[:, 1:] scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] mask_output = output['mask_prob_output'].asnumpy() pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, [im_size[0], im_size[1]]) boxes = pred_boxes label = np.argmax(scores, axis=1) label = label[:, np.newaxis] cls_ind = 1 #text class cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] / scale cls_masks = mask_output[:, cls_ind, :, :] cls_scores = scores[:, cls_ind, np.newaxis] #print cls_scores.shape, label.shape keep = np.where((cls_scores >= thresh) & (label == cls_ind))[0] dets = np.hstack( (cls_boxes, cls_scores)).astype(np.float32)[keep, :] masks = cls_masks[keep, :, :] if dets.shape[0] == 0: continue if dets_all is None: dets_all = dets masks_all = masks else: dets_all = np.vstack((dets_all, dets)) masks_all = np.vstack((masks_all, masks)) #scores = dets[:,4] #index = np.argsort(scores)[::-1] #dets = dets[index] #print(dets) if dets_all is None: return np.zeros((0, 2)) dets = dets_all masks = masks_all keep = self.nms(dets) dets = dets[keep, :] masks = masks[keep, :, :] det_mask = np.zeros((dets.shape[0], ) + img.shape[0:2], dtype=np.int) mask_n = np.zeros((dets.shape[0], ), dtype=np.int) invalid = np.zeros((dets.shape[0], ), dtype=np.int) for i in range(dets.shape[0]): bbox_i = dets[i, :4] #if bbox[2] == bbox[0] or bbox[3] == bbox[1] or bbox[0] == bbox[1] or bbox[2] == bbox[3] : if bbox_i[2] == bbox_i[0] or bbox_i[3] == bbox_i[1]: invalid[i] = 1 continue score_i = dets[i, -1] #bbox_i = map(int, bbox_i) bbox_i = bbox_i.astype(np.int) mask_i = masks[i, :, :] mask_i = cv2.resize(mask_i, (bbox_i[2] - bbox_i[0], (bbox_i[3] - bbox_i[1])), interpolation=cv2.INTER_LINEAR) #avg_mask = np.mean(mask_i[mask_i>0.5]) #print('det', i, 'mask avg', avg_mask) mask_i[mask_i > 0.5] = 1 mask_i[mask_i <= 0.5] = 0 det_mask[i, bbox_i[1]:bbox_i[3], bbox_i[0]:bbox_i[2]] += mask_i.astype(np.int) mask_n[i] = np.sum(mask_i == 1) if self.mask_nms: for i in range(dets.shape[0]): if invalid[i] > 0: continue mask_i = det_mask[i] ni = mask_n[i] merge_list = [] for j in range(i + 1, dets.shape[0]): if invalid[j] > 0: continue mask_j = det_mask[j] nj = mask_n[j] mask_inter = mask_i + mask_j nij = np.sum(mask_inter == 2) iou = float(nij) / (ni + nj - nij) iou_i = float(nij) / ni iou_j = float(nij) / nj if iou_j > 0.7: invalid[j] = 1 if iou >= config.TEST.NMS: #if iou>=0.7: invalid[j] = 1 if iou >= MERGE_THRESH: merge_list.append(j) #mask_i = np.logical_or(mask_i, mask_j, dtype=np.int).astype(np.int) #det_mask[i] = mask_i #print(mask_i) for mm in merge_list: _mask = det_mask[mm] mask_i = np.logical_or(mask_i, _mask, dtype=np.int) if len(merge_list) > 0: det_mask[i] = mask_i.astype(np.int) for i in range(dets.shape[0]): if invalid[i] > 0: continue mask_i = det_mask[i] mini_box = minimum_bounding_rectangle(mask_i) mini_boxt = np.zeros((4, 2)) mini_boxt[0][0] = mini_box[0][1] mini_boxt[0][1] = mini_box[0][0] mini_boxt[1][0] = mini_box[1][1] mini_boxt[1][1] = mini_box[1][0] mini_boxt[2][0] = mini_box[2][1] mini_boxt[2][1] = mini_box[2][0] mini_boxt[3][0] = mini_box[3][1] mini_boxt[3][1] = mini_box[3][0] mini_box = mini_boxt mini_box = np.int32(mini_box) ret.append(mini_box) #scores.append(score_i) #print("---------------",mini_box) #cv2.polylines(im, [mini_box], 1, (255,255,255)) #submit_path = os.path.join(submit_dir,'res_img_{}.txt'.format(index)) #result_txt = open(submit_path,'a') #for i in range(0,4): # result_txt.write(str(mini_box[i][0])) # result_txt.write(',') # result_txt.write(str(mini_box[i][1])) # if i < 3: # result_txt.write(',') #result_txt.write('\r\n') #result_txt.close() return ret