def nms_detections(pred_boxes, scores, nms_thresh, inds=None): dets = np.hstack((pred_boxes, scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, nms_thresh) if inds is None: return pred_boxes[keep], scores[keep] return pred_boxes[keep], scores[keep], inds[keep]
def demo(sess, net, image_name): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name) im = cv2.imread(im_file) # Detect all object classes and regress object bounds timer = Timer() timer.tic() # 使用已经训练好的网络模型检测当前图片中所有的物体,得到所有predict boxes scores, boxes = im_detect(sess, net, im) timer.toc() print('Detection took {:.3f}s for {:d} object proposals'.format( timer.total_time, boxes.shape[0])) # Visualize detections for each class CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls_ind, cls in enumerate(CLASSES[1:]): """ 对于每个类,找到对应的predict boxes的概率得分和坐标描述,先进行nms缩减相近的boxes,对于保留的boxes,当概率得分大于CONF_THRESH 阈值时,通过vis_detections函数将box画出来。 """ cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] vis_detections(im, cls, dets, thresh=CONF_THRESH)
def demo(sess, net, image_name): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im_file = image_name # os.path.join(cfg.DATA_DIR, 'stairs_demo', image_name) im = cv2.imread(im_file) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(sess, net, im) timer.toc() print('Detection took {:.3f}s for {:d} object proposals'.format( timer.total_time, boxes.shape[0])) # Visualize detections for each class CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] vis_detections(im, cls, dets, thresh=CONF_THRESH)
def set_all_boxes(all_boxes, index, scores, boxes, CLASSES, max_per_image=100, thresh=0.): for j in range(1, len(CLASSES)): inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j * 4:(j + 1) * 4] cls_dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32, copy=False) keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] all_boxes[j][index] = cls_dets # 3维 # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][index][:, -1] for j in range(1, len(CLASSES))]) if len(image_scores) > max_per_image: # 若果超过100个,那么就选取置信度top100的 image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, len(CLASSES)): keep = np.where(all_boxes[j][index][:, -1] >= image_thresh)[0] all_boxes[j][index] = all_boxes[j][index][keep, :] return all_boxes
def video_demo(sess, net, image): # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes, _ = im_detect_bbox_kpoints(sess, net, image) # scores, boxes, points = im_detect(sess, net, image) # print("scores:", scores.shape) --> (n, 1) timer.toc() print('Detection took {:.3f}s for {:d} object proposals'.format( timer.total_time, boxes.shape[0])) # Visualize detections for each class CONF_THRESH = 0.6 NMS_THRESH = 0.3 inds = np.where(scores[:, 0] > CONF_THRESH)[0] scores = scores[inds, 0] boxes = boxes[inds, :] # points = points[inds, :] dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) # dets = np.hstack((boxes, scores[:, np.newaxis], points)).astype(np.float32, copy=False) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] return dets
def apply_nms(all_boxes, thresh): """Apply non-maximum suppression to all predicted boxes output by the test_net method. """ num_classes = len(all_boxes) num_images = len(all_boxes[0]) nms_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)] for cls_ind in range(num_classes): for im_ind in range(num_images): dets = all_boxes[cls_ind][im_ind] if dets == []: continue x1 = dets[:, 0] y1 = dets[:, 1] x2 = dets[:, 2] y2 = dets[:, 3] scores = dets[:, 4] inds = np.where((x2 > x1) & (y2 > y1))[0] dets = dets[inds, :] if dets == []: continue keep = nms(dets, thresh) if len(keep) == 0: continue nms_boxes[cls_ind][im_ind] = dets[keep, :].copy() return nms_boxes
def test_net(net, imdb, weights_filename, max_per_image=100, thresh=0.): np.random.seed(cfg.RNG_SEED) """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] output_dir = get_output_dir(imdb, weights_filename) # timers _t = {'im_detect': Timer(), 'misc': Timer()} for i in range(num_images): im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() scores, boxes = im_detect(net, im) _t['im_detect'].toc() _t['misc'].tic() # skip j = 0, because it's the background class for j in range(1, imdb.num_classes): inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(torch.from_numpy(cls_dets), cfg.TEST.NMS).numpy() if cls_dets.size > 0 else [] cls_dets = cls_dets[keep, :] all_boxes[j][i] = cls_dets # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in range(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] _t['misc'].toc() print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time(), _t['misc'].average_time())) det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') imdb.evaluate_detections(all_boxes, output_dir)
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, kpoints_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): """A simplified version compared to fast/er RCNN For details please see the technical report """ if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].ANCHOR_MIN_SIZE # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) scores = scores.reshape((-1, 1)) proposals = bbox_transform_inv(anchors, rpn_bbox_pred) proposals = clip_boxes(proposals, im_info[:2]) # Get the facial landmarks kpoints_pred = kpoints_pred.reshape(-1, 10) points = kpoints_transform_inv(anchors, kpoints_pred) points = clip_kpoints(points, im_info[:2]) # removed predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] points = points[keep] # Pick the top region proposals order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] points = points[order] # Non-maximal suppression keep = nms(np.hstack((proposals, scores)), nms_thresh) # Pick th top region proposals after NMS if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] points = points[keep] # Only support single image as input batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores, points
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): """ rpn_cls_prob = [n, h, w, c=a*2] rpn_bbox_pred = [n, h, w, c=4*a] """ if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # Get the scores and bounding boxes # 0:num_anchors is the probability of BG scores = rpn_cls_prob[:, :, :, num_anchors:] bbox_deltas = rpn_bbox_pred.view((-1, 4)) scores = scores.contiguous().view((-1, 1)) # 1.Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2.clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # # 3.remove predicted boxes with either height or width < threshold # # (NOTE: convert min_size to input image scale stored in im_info[2]) # keep = _filter_boxes(proposals, _feat_stride[0] * im_info[2]) # proposals = proposals[keep, :] # scores = scores[keep] # 4.sort all (proposal, score) pairs by score from highest to lowest # 5.take top pre_nms_topN (e.g. Train12000 Test6000) _, order = scores.view(-1).sort(descending=True) if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order.data, :] scores = scores[order.data, :] # 6.Non-maximal suppression apply nms (e.g. threshold = 0.7) keep = nms(torch.cat((proposals, scores), 1).data, nms_thresh) # 7.Pick th top region proposals after NMS (e.g. Train2000 Test300) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep, :] # 8.Only support single image as input batch_inds = Variable(proposals.data.new(proposals.size(0), 1).zero_()) blob = torch.cat((batch_inds, proposals), 1) del batch_inds return blob, scores
def test_net(sess, net, imdb, weights_filename, max_per_image=100, thresh=0.): np.random.seed(cfg.RNG_SEED) """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] output_dir = get_output_dir(imdb, weights_filename) # timers _t = {'im_detect': Timer(), 'misc': Timer()} for i in range(num_images): im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() scores, boxes = im_detect(sess, net, im) print(scores) _t['im_detect'].toc() _t['misc'].tic() # skip j = 0, because it's the background class for j in range(1, imdb.num_classes): inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] all_boxes[j][i] = cls_dets # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([all_boxes[j][i][:, -1] for j in range(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] _t['misc'].toc()
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): """A simplified version compared to fast/er RCNN For details please see the technical report """ if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, num_anchors:] #每个anchor 有打分 rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) scores = scores.reshape((-1, 1)) proposals = bbox_transform_inv( anchors, rpn_bbox_pred ) #通过这个函数 把 anchor 转化成 proposal? 根据anchor 与 rpn_bbox 返回一个预测的 proposal proposals = clip_boxes(proposals, im_info[:2]) # Pick the top region proposals order = scores.ravel().argsort()[::-1] #筛选出 top_N的 proposal if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # Non-maximal suppression keep = nms( np.hstack((proposals, scores)), nms_thresh ) #极大抑制算法 继续筛选proposal NMS threshold used on RPN proposals=0.7 重合度大于0.7视为同一类 # Pick th top region proposals after NMS if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] #挑选 排序最好的 几个proposal # Only support single image as input batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack( (batch_inds, proposals.astype(np.float32, copy=False))) #返回 proposal 列表 return blob, scores
def create_plot(scores, boxes, im_file): """Detect object classes in an image using pre-computed object proposals.""" global fig global ax global data fig, ax = plt.subplots() data = {'results': list()} # Load the demo image im = cv2.imread(im_file) assert (im is not None) data = {'results': list()} # Visualize detections for each class CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(torch.from_numpy(dets), NMS_THRESH) dets = dets[keep.numpy(), :] vis_detections(im, cls, cls_ind, dets, thresh=CONF_THRESH) j = plt.gcf() with open( os.path.join( os.path.dirname(os.path.abspath(__file__)), 'var/www/downloads/json/{}.json'.format( os.path.splitext(os.path.basename(im_file))[0])), 'w') as outfile: json.dump(data, outfile, indent=4) #j.savefig('./static/plot.png') png_output = BytesIO() j.savefig(png_output, format='jpg') png_output.seek(0) import base64 png_output = base64.b64encode(png_output.getvalue()) return png_output
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): """A simplified version compared to fast/er RCNN For details please see the technical report """ if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.view((-1, 4)) scores = scores.contiguous().view(-1, 1) proposals = bbox_transform_inv(anchors, rpn_bbox_pred) proposals = clip_boxes(proposals, im_info[:2]) # Pick the top region proposals scores, order = scores.view(-1).sort(descending=True) if pre_nms_topN > 0: order = order[:pre_nms_topN] scores = scores[:pre_nms_topN].view(-1, 1) proposals = proposals[order.data, :] # Non-maximal suppression keep = nms(torch.cat((proposals, scores), 1).data, nms_thresh) # Pick th top region proposals after NMS if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep, ] # Only support single image as input batch_inds = proposals.new_zeros(proposals.size(0), 1) blob = torch.cat((batch_inds, proposals), 1) return blob, scores
def demo(sess, net, img_path): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image once_time = 0 im = cv2.imread(img_path) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes, kpoints = im_detect_bbox_kpoints(sess, net, im) timer.toc() once_time = timer.total_time print('Detection took {:.3f}s for {:d} object proposals'.format( timer.total_time, boxes.shape[0])) # Visualize detections for each class CONF_THRESH = 0.85 NMS_THRESH = 0.3 inds = np.where(scores[:, 0] > CONF_THRESH)[0] scores = scores[inds, 0] boxes = boxes[inds, :] kpoints = kpoints[inds, :] dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] kpoints = kpoints[keep, :] print('>>>>>num_faces:', dets.shape[0]) for i in range(dets.shape[0]): print('>>>>>face width{}, height{}'.format( int(dets[i][2]) - int(dets[i][0]), int(dets[i][3]) - int(dets[i][1]))) cv2_vis(im, CLASSES[1], dets, kpoints) return once_time
def demo(sess, net, image_name, path_list): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im = cv2.imdecode(np.fromfile(image_name, dtype=np.uint8), -1) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(sess, net, im) timer.toc() #print('Detection took {:.3f}s for {:d} object proposals'.format(timer.total_time, boxes.shape[0])) # Visualize detections for each class CONF_THRESH = 0.5 NMS_THRESH = 0.3 result = np.zeros((46, 147,3), dtype=np.uint8) outout = np.zeros((224, 224,3), dtype=np.uint8) color = 'blue plate' for cls_ind, cls in enumerate(CLASSES[1:]): # print(cls_ind, cls) cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] #print(cls_boxes) # print('scores',scores) cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] # print(dets) result,outout,color = vis_detections(im, cls, dets, path_list, result,outout,color,thresh=CONF_THRESH) return result,outout,color
scores, boxes = im_detect(sess, net, frame) timer.toc() print('Detection took {:.3f}s for {:d} object proposals'.format( timer.total_time, boxes.shape[0])) # Visualize detections for each class CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] vis_detections(frame, cls, dets, thresh=CONF_THRESH) out.write(frame) cv2.imshow('stair detection', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break i += 1 cap.release() out.release() cv2.destroyAllWindows()
def test_net(sess, net, imdb, weights_filename, max_per_image=100, thresh=0.): np.random.seed(cfg.RNG_SEED) """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] output_dir = get_output_dir(imdb, weights_filename) # timers _t = {'im_detect': Timer(), 'misc': Timer()} for i in range(num_images): im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() # 根据模型预测当前图片中所有predict boxes和对应的分类概率 scores, boxes = im_detect(sess, net, im) _t['im_detect'].toc() _t['misc'].tic() """ skip j = 0, because it's the background class 按将所有predict boxes按“预测类型”和“image文件”进行组合,比如all_boxes第一行第二列代表属于第一个类型的,在第二个image文件的 所有predict boxes。 """ for j in range(1, imdb.num_classes): inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) # 进行nms缩减相近的boxes keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] all_boxes[j][i] = cls_dets # Limit to max_per_image detections *over all classes* # 限制一张图片最多预测max_per_image个boxes,选取分数最高的max_per_image个。 if max_per_image > 0: # 获取当前图片所有predict boxes对应类型的分数 image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in range(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] _t['misc'].toc() print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time)) det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') imdb.evaluate_detections(all_boxes, output_dir)
def test_model(self, resume_iter, max_per_image=100, thresh=0.): self.prepare_construct(resume_iter) """Test a Fast R-CNN network on an image database.""" num_images = len(self.imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_images)] for _ in range(self.imdb.num_classes)] output_dir = os.path.join(self.output_dir, 'fasterRcnn_iter_{}'.format(resume_iter)) if not os.path.exists(output_dir): os.makedirs(output_dir) # timers _t = {'im_detect': Timer(), 'misc': Timer()} for i in range(num_images): im = cv2.imread(self.imdb.image_path_at(i)) _t['im_detect'].tic() scores, boxes = im_detect(self.net, im) _t['im_detect'].toc() _t['misc'].tic() # skip j = 0, because it's the background class for j in range(1, self.imdb.num_classes): inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, cfg.TEST.NMS) if cls_dets.size > 0 else [] cls_dets = cls_dets[keep, :] all_boxes[j][i] = cls_dets # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([ all_boxes[j][i][:, -1] for j in range(1, self.imdb.num_classes) ]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, self.imdb.num_classes): keep = np.where( all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] _t['misc'].toc() print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].toc(average=False), _t['misc'].toc(average=False))) det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') self.imdb.evaluate_detections(all_boxes, output_dir)
def eval_model(self, resume_iter, max_per_image=100, thresh=0.): """Test a Fast R-CNN network on an image database.""" num_images = len(self.imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_images)] for _ in range(self.imdb.num_classes)] output_dir = os.path.join(self.output_dir, 'fasterRcnn_iter_{}'.format(resume_iter)) if not os.path.exists(output_dir): os.makedirs(output_dir) # timers _t = {'im_detect': Timer(), 'misc': Timer()} image_example_list = [ '_'.join(img.split('_')[:-1]) for img in self.imdb.image_index ] pred_objects = dict() # record how many image can not be bounded by a bbox images_can_bound_count = 0 images_all_count = 0 ignore_pred_list = list() confuse_pred_dict = dict() for i in range(num_images): imgs_path = self.imdb.image_path_at(i) imgs = cv2.imread(imgs_path) _t['im_detect'].tic() scores, boxes, rois_boxes = im_detect(self.net, imgs) _t['im_detect'].toc() _t['misc'].tic() # skip j = 0, because it's the background class for j in range(1, self.imdb.num_classes): inds = np.where(scores[:, j] > thresh)[0] or_cls_scores = scores[inds, :] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j * 4:(j + 1) * 4] cls_rois_boxes = rois_boxes[inds, :] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, cfg.TEST.NMS) if cls_dets.size > 0 else [] cls_dets = cls_dets[keep, :] cls_scores = or_cls_scores[keep, :] cls_rois_boxes = cls_rois_boxes[keep, :] all_boxes[j][i] = [cls_scores, cls_dets, cls_rois_boxes] images_all_count += 1 # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([ all_boxes[j][i][1][:, -1] for j in range(1, self.imdb.num_classes) ]) # Log num of proposal > 1 if len(image_scores) > 1: cls_dets = [ all_boxes[j][i] for j in xrange(1, self.imdb.num_classes) if len(all_boxes[j][i][1]) != 0 ] confuse_pred_dict[image_example_list[i]] = cls_dets if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, self.imdb.num_classes): keep = np.where( all_boxes[j][i][1][:, -1] >= image_thresh)[0] all_boxes[j][i] = [ all_boxes[j][i][0][keep, :], all_boxes[j][i][1][keep, :], all_boxes[j][i][2][keep, :] ] cls_dets = [ all_boxes[j][i] for j in xrange(1, self.imdb.num_classes) if len(all_boxes[j][i][1]) != 0 ] if (len(cls_dets) != 0): images_can_bound_count += 1 pred_objects[image_example_list[i]] = cls_dets else: ignore_pred_list.append(image_example_list[i]) else: cls_dets = [ all_boxes[j][i] for j in xrange(1, self.imdb.num_classes) if len(all_boxes[j][i][1]) != 0 ] if (len(cls_dets) != 0): images_can_bound_count += 1 pred_objects[image_example_list[i]] = cls_dets else: ignore_pred_list.append(image_example_list[i]) _t['misc'].toc() if self.model_dir is not None: print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].toc(average=False), _t['misc'].toc(average=False))) # write prediction result with open(os.path.join(output_dir, 'predict_objects.pkl'), 'wb') as f: pickle.dump(pred_objects, f) with open(os.path.join(output_dir, 'ignore_objects.pkl'), 'wb') as f: pickle.dump(ignore_pred_list, f) with open(os.path.join(output_dir, 'confuse_objects.pkl'), 'wb') as f: pickle.dump(confuse_pred_dict, f) if self.model_dir is not None: print('Evaluating Classification') print('Save in: ' + output_dir) print('Have predict Images {} / {} !!'.format( images_can_bound_count, images_all_count)) print('Confused predicted images {} / {} !!'.format( len(confuse_pred_dict), images_all_count)) print('Ignored predicted images {} / {} !!'.format( len(ignore_pred_list), images_all_count)) annopath = os.path.join(self.imdb._data_path, 'Xmls', '{:s}.xml') image_example_set = pred_objects.keys() cachedir = os.path.join(cfg.DATA_DIR, 'annotations_cache', self.imdb._data_type, self.imdb._image_set + self.imdb._image_type) gt_objects = self.load_GT_labels( annopath, [img for img in self.imdb.image_index], cachedir) metrics_cls = self.evaluate_classifications(pred_objects, gt_objects, image_example_set, self.imdb._image_type, self.imdb, output_dir) metrics_reg = self.evaluate_regressions(pred_objects, gt_objects, image_example_set, self.imdb, output_dir, 0.5) return metrics_cls, metrics_reg