def detect(self, text_proposals, scores, size): # 删除得分较低的proposal keep_inds=np.where(scores>TextLineCfg.TEXT_PROPOSALS_MIN_SCORE)[0] text_proposals, scores=text_proposals[keep_inds], scores[keep_inds] # 按得分排序 sorted_indices=np.argsort(scores.ravel())[::-1] text_proposals, scores=text_proposals[sorted_indices], scores[sorted_indices] # 对proposal做nms keep_inds=nms(np.hstack((text_proposals, scores)), TextLineCfg.TEXT_PROPOSALS_NMS_THRESH) text_proposals, scores=text_proposals[keep_inds], scores[keep_inds] # 获取检测结果 scores=normalize(scores) text_recs=self.text_proposal_connector.get_text_lines(text_proposals, scores, size) # 过滤boxes keep_inds=self.filter_boxes(text_recs) text_lines=text_recs[keep_inds] # 对lines做nms if text_lines.shape[0] != 0: keep_inds=nms(text_lines, TextLineCfg.TEXT_LINE_NMS_THRESH) text_lines=text_lines[keep_inds] return text_lines
def detect(self, text_proposals, scores, size): """ Detecting texts from an image :return: the bounding boxes of the detected texts """ # text_proposals, scores=self.text_proposal_detector.detect(im, cfg.MEAN) keep_inds = np.where(scores > cfg.TEXT_PROPOSALS_MIN_SCORE)[0] text_proposals, scores = text_proposals[keep_inds], scores[keep_inds] sorted_indices = np.argsort(scores.ravel())[::-1] text_proposals, scores = text_proposals[sorted_indices], scores[sorted_indices] # nms for text proposals keep_inds = nms(np.hstack((text_proposals, scores)), cfg.TEXT_PROPOSALS_NMS_THRESH) text_proposals, scores = text_proposals[keep_inds], scores[keep_inds] scores = normalize(scores) text_lines = self.text_proposal_connector.get_text_lines(text_proposals, scores, size) keep_inds = self.filter_boxes(text_lines) text_lines = text_lines[keep_inds] if text_lines.shape[0] != 0: keep_inds = nms(text_lines, cfg.TEXT_LINE_NMS_THRESH) text_lines = text_lines[keep_inds] return text_lines
def demo(sess, net, image_name): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im = cv2.imread(image_name) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(sess, net, im) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class im = im[:, :, (2, 1, 0)] fig, ax = plt.subplots(figsize=(12, 12)) ax.imshow(im, aspect='equal') CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] vis_detections(im, cls, dets, ax, thresh=CONF_THRESH)
def demo(sess, net, image_name, thresh=0.05): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image image = PIL.Image.open(image_name) im = cv2.imread(image_name) # Detect all object classes and regress object bounds timer = Timer() timer.tic() im_num = os.path.split(image_name)[1].split('.')[0] scores, boxes = im_detect(sess, net, im, save_feature=True, feature_path='./data/conv.npy') timer.toc() print('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class im = im[:, :, (2, 1, 0)] # fig, ax = plt.subplots(figsize=(12, 12)) # ax.imshow(im, aspect='equal') CONF_THRESH = 0.7 NMS_THRESH = 0.3 results = [] name = image_name.split('/')[-1] for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] cls_lables = np.full_like(cls_scores, cls_ind) dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis], cls_lables[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] inds = np.where(dets[:, -2] > thresh)[0] dets = dets[inds] for i in range(dets.shape[0]): name = str(name) category = int(dets[i, -1]) bbox = list(map(float, dets[i, :4])) bbox = [round(b, 2) for b in bbox] score = float(dets[i, -2]) dic = collections.OrderedDict() dic['name'] = str(name) dic['category'] = int(category) dic['bbox'] = bbox dic['score'] = float(score) results.append(dic) im = vis_detections(image, cls, dets, ax=None, thresh=CONF_THRESH) out_path = './data/detection_result' if not os.path.exists(out_path): os.makedirs(out_path) out_path = os.path.join(out_path, os.path.split(image_name)[-1]) image.save(out_path)
def draw_densecap(image, scores, rois, im_info, cap_probs, bbox_pred): """ bbox_pred: [None, 4] rois: [None, 5] """ # for bbox unnormalization bbox_mean = np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS).reshape((1, 4)) bbox_stds = np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS).reshape((1, 4)) boxes = rois[:, 1:5] / im_info[2] # [None, 12] cap_ids = np.argmax(cap_probs, axis=1).reshape((-1, cfg.TIME_STEPS)) # bbox target unnormalization box_deltas = bbox_pred * bbox_stds + bbox_mean # do the transformation pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, image.shape) pos_dets = np.hstack( (pred_boxes, scores[:, 1][:, np.newaxis])).astype(np.float32, copy=False) keep = nms(pos_dets, cfg.TEST.NMS) pos_boxes = boxes[keep, :] cap_ids = cap_ids[keep, :] im_info[2] = 1. img_cap = draw_bounding_boxes(image, pos_boxes, im_info, cap_ids) return img_cap
def demo(sess, net, image_name): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im = cv2.imread(image_name) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(sess, net, im) timer.toc() print('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class im = im[:, :, (2, 1, 0)] fig, ax = plt.subplots(figsize=(12, 12)) ax.imshow(im, aspect='equal') CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] vis_detections(im, cls, dets, ax, thresh=CONF_THRESH)
def ctpn(sess, net, image_name): timer = Timer() timer.tic() img = cv2.imread(image_name) img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) scores, boxes = test_ctpn(sess, net, img) new_scores = scores[:, np.newaxis] keep_inds = np.where(new_scores > TextLineCfg.TEXT_PROPOSALS_MIN_SCORE)[0] boxes, new_scores = boxes[keep_inds], new_scores[keep_inds] sorted_indices = np.argsort(new_scores.ravel())[::-1] boxes, new_scores = boxes[sorted_indices], new_scores[sorted_indices] keep_inds = nms(np.hstack((boxes, new_scores)), TextLineCfg.TEXT_PROPOSALS_NMS_THRESH) boxes, new_scores = boxes[keep_inds], new_scores[keep_inds] img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) plt.figure(figsize=(10, 14)) for key, box in enumerate(boxes): img_inside = img.copy() img_inside = cv2.rectangle(img_inside, (box[0], box[1]), (box[2], box[3]), color=(255, 0, 0), thickness=2) plt.imshow(img_inside) plt.title('Scores: {0}'.format(scores[key])) plt.savefig('./data/fig/fig_{0}.jpg'.format(key))
def process_frame(self, video_name, im_name, CLASSES, CONF_THRESH): # Output frame path im_path_ = os.path.join(api_config.upload_folder, video_name.split(".")[0], "annotated-frames", os.path.basename(im_name)) im = np.array(Image.open(im_name)) im = im[:, :, ::-1] timer = Timer() timer.tic() scores, boxes = im_detect(self.sess, self.net, im) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) NMS_THRESH = 0.3 im = im[:, :, (2, 1, 0)] fig, ax = plt.subplots(figsize=(12, 12)) ax.imshow(im, aspect='equal') self.annotation = xml_setup(im_name, im.shape) for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] self.draw(im_path_, cls, dets, ax, thresh=CONF_THRESH) xml_write(video_name, os.path.basename(im_name), self.annotation) plt.savefig(im_path_, bbox_inches='tight') plt.close()
def detect(self, text_proposals, scores, size): # 删除得分较低的proposal # 使用一个标准的非极大值抑制算法来滤除多余的proposals keep_inds = np.where(scores > TextLineCfg.TEXT_PROPOSALS_MIN_SCORE)[0] text_proposals, scores = text_proposals[keep_inds], scores[keep_inds] # 按得分排序(逆序) sorted_indices = np.argsort(scores.ravel())[::-1] text_proposals, scores = text_proposals[sorted_indices], scores[ sorted_indices] # 对proposal做nms # THRESH是nms的参数 keep_inds = nms(np.hstack((text_proposals, scores)), TextLineCfg.TEXT_PROPOSALS_NMS_THRESH) text_proposals, scores = text_proposals[keep_inds], scores[keep_inds] # 获取检测结果 # 文本线构造算法(多个细长的proposal合并成一条文本线)(边缘细化) # 得到筛选过后的boxes text_recs = self.text_proposal_connector.get_text_lines( text_proposals, scores, size) # 再次得到得到筛选过后的boxes的indexes keep_inds = self.filter_boxes(text_recs) return text_recs[keep_inds]
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): """A simplified version compared to fast/er RCNN For details please see the technical report """ if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) scores = scores.reshape((-1, 1)) proposals = bbox_transform_inv(anchors, rpn_bbox_pred) if cfg.DEBUG_ALL: print ('number of proposals before clip boxes to image board: {}'.format( proposals.shape[0] )) proposals = clip_boxes(proposals, im_info[:2]) # remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) if cfg.FILTER_SMALL_BOX: min_size = cfg[cfg_key].RPN_MIN_SIZE keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # Pick the top region proposals order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # Non-maximal suppression if cfg.DEBUG_ALL: print("number of proposals before nms: {}".format(proposals.shape[0])) keep = nms(np.hstack((proposals, scores)), nms_thresh) if cfg.DEBUG_ALL: print("number of proposals after nms: {}".format(len(keep))) # Pick th top region proposals after NMS if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Only support single image as input batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores
def nms_detections(pred_boxes, scores, nms_thresh, inds=None): keep = range(scores.shape[0]) keep, scores, pred_boxes = zip(*sorted(zip(keep, scores, pred_boxes), key=lambda x: x[1][0])[::-1]) keep, scores, pred_boxes = np.array(keep), np.array(scores), np.array(pred_boxes) dets = np.hstack((pred_boxes, scores[:,0][:, np.newaxis])).astype(np.float32) keep_keep = nms(dets, nms_thresh) keep_keep = keep_keep[:min(100, len(keep_keep))] keep = keep[keep_keep] if inds is None: return pred_boxes[keep_keep], scores[keep_keep], keep return pred_boxes[keep_keep], scores[keep_keep], inds[keep], keep
def test_im(sess, net, im_path, vocab, vis=True): im = cv2.imread(im_path) scores, boxes, captions = im_detect(sess, net, im, None, use_box_at=-1) pos_dets = np.hstack((boxes, scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(pos_dets, cfg.TEST.NMS) pos_dets = pos_dets[keep, :] pos_scores = scores[keep] pos_captions = [sentence(vocab, captions[idx]) for idx in keep] pos_boxes = boxes[keep, :] if vis: vis_detections(im_path, im, pos_captions, pos_dets, save_path='./demo')
def caption(sess, inp): img = np.array(inp['image']) scores, boxes, captions = im_detect(sess, net, img, None, use_box_at=-1) pos_dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = nms(pos_dets, cfg.TEST.NMS) pos_dets = pos_dets[keep, :] pos_scores = scores[keep] pos_captions = [sentence(vocab, captions[idx]) for idx in keep] pos_boxes = boxes[keep, :] return dict(captions=np.array(pos_captions), scores=np.array(pos_scores), boxes=np.array(pos_boxes))
def interpret_objects(cls_prob, bbox_pred, rois, im_info, nms_thres=-1., min_score=0.00001, use_gt_boxes=False, max_per_image=2000): box_deltas = bbox_pred.data.cpu().numpy() cls_prob = cls_prob.data.cpu().numpy() all_boxes = [[] for _ in xrange(cls_prob.shape[1])] for j in xrange(1, cls_prob.shape[1]): # skip the background inds = np.where(cls_prob[:, j] > min_score)[0] if len(inds) == 0: continue cls_scores = cls_prob[inds, j] if use_gt_boxes: cls_boxes = rois.data.cpu().numpy()[inds, 1:5] / im_info[0][2] else: t_box_deltas = np.asarray( [box_deltas[i, (j * 4):(j * 4 + 4)] for i in inds], dtype=np.float) cls_boxes = bbox_transform_inv_hdn( rois.data.cpu().numpy()[inds, 1:5], t_box_deltas) / im_info[0][2] cls_boxes = clip_boxes(cls_boxes, im_info[0][:2] / im_info[0][2]) cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) if nms_thres > 0.: keep = nms(cls_dets, nms_thres) cls_dets = cls_dets[keep, :] all_boxes[j] = cls_dets if max_per_image > 0: image_scores = np.hstack([ all_boxes[j][:, -1] for j in xrange(1, cls_prob.shape[1]) if len(all_boxes[j]) > 0 ]) #print('{} detections.'.format(len(image_scores))) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, cls_prob.shape[1]): if len(all_boxes[j]) == 0: continue keep = np.where(all_boxes[j][:, -1] >= image_thresh)[0] all_boxes[j] = all_boxes[j][keep, :] return all_boxes
def detect(self, img, ret=dict(), net='VGGnet_test', model=os.path.join(local_dir, 'models/VGGnet_fast_rcnn_iter_150000.ckpt')): """Detect object classes in an image using pre-computed object proposals.""" self.sess = tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) # load network self.net = get_network(net) #load model print('Loading network {:s}... '.format(net)), self.saver = tf.train.Saver() self.saver.restore(self.sess, model) print(' done.') starttime = time.time() # Load the demo image im = cv2.imread(img) print im.shape im = cv2.resize(im, (int((400.0 / im.shape[0]) * im.shape[1]), 400)) print im.shape # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(self.sess, self.net, im) timer.toc() print('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class # im = im[:, :, (2, 1, 0)] # fig, ax = plt.subplots(figsize=(12, 12)) # ax.imshow(im, aspect='equal') CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] im = self.vis_detections(im, cls, dets, CONF_THRESH) endtime = time.time() ret['result'] = (True, "%.3f" % (endtime - starttime)) ret['drawImg'] = im print "finish detecting" return im
def demo(sess, net, image_name): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im = cv2.imread(image_name) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(sess, net, im) timer.toc() print('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class im = im[:, :, (2, 1, 0)] fig, ax = plt.subplots(figsize=(12, 12)) ax.imshow(im, aspect='equal') CONF_THRESH = 0.2 NMS_THRESH = 0.2 cls_name = dict(zip(np.arange(len(CLASSES)), CLASSES)) cls_matrix = np.arange(len(CLASSES)).reshape([1, -1]) + np.zeros( [boxes.shape[0], 1]) cls_scores_fg = scores[:, 1:len(CLASSES)] cls_boxes_fg = boxes[:, 4:4 * (len(CLASSES))] cls_matrix_fg = cls_matrix[:, 1:len(CLASSES)] cls_scores_fg = cls_scores_fg.reshape([-1, 1]) cls_boxes_fg = cls_boxes_fg.reshape([-1, 4]) cls_matrix_fg = cls_matrix_fg.reshape([-1, 1]) keeps = np.where(cls_scores_fg >= CONF_THRESH)[0] cls_scores_fg = cls_scores_fg[keeps] cls_boxes_fg = cls_boxes_fg[keeps] cls_matrix_fg = cls_matrix_fg[keeps] dets = np.hstack((cls_boxes_fg, cls_scores_fg)).astype(np.float32) keep = nms(dets, NMS_THRESH) print len(keep) dets = dets[keep, :] cls_matrix_fg = cls_matrix_fg[keep, :] for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 cls_fg_ind = np.where(cls_matrix_fg == cls_ind)[0] detses = dets[cls_fg_ind, :] vis_detections(im, cls, detses, ax, thresh=CONF_THRESH)
def find_objects(self, input_image): # input_image = input_image.astype(float) # start_time = time.time() caffe.set_mode_gpu() caffe.set_device(0) scores, boxes = im_detect(self.net, input_image) # print 'CNN took: ', time.time() - start_time # Visualize detections for each class objects_detected = [] CONF_THRESH = 0.7 NMS_THRESH = 0.2 class_index = 11 class_name = 'tv' class_boxes = boxes[:, 4 * class_index:4 * (class_index + 1)] class_scores = scores[:, class_index] detections = np.hstack( (class_boxes, class_scores[:, np.newaxis])).astype(np.float32) keepers = nms(detections, NMS_THRESH) detections = detections[keepers, :] detections = detections[detections[:, -1] >= CONF_THRESH] detections[:, -1] = 3 # TODO: Hack so that TV returns as 3 which is tablet in the filter... objects_detected.append(detections) for cls_ind, cls in enumerate(self.class_list[15:]): cls_ind += 15 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] dets = dets[dets[:, -1] >= CONF_THRESH] dets[:, -1] = 2 # TODO: Hack so that all blocks have object ID 2 # append data structure with format [[x1, y1, x2, y2, obj_id], [x1, y1, x2, y2, obj_id], ...] for all boxes objects_detected.append(dets) return objects_detected
def demo(net, image_name): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im_file = os.path.join(cfg.DATA_DIR, args.IMAGE_PATH, image_name) #im__ = None im__ = cv2.imread(im_file) #print '----------', im__ #if im__ == None: # print " --- ERROR: Cannot Load Image: ", im_file # return False result = im__ # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im__) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class im__ = im__[:, :, (2, 1, 0)] #fig, ax = plt.subplots(figsize=(12, 12)) #ax.imshow(im__, aspect='equal') new_bboxes = [] for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, args.NMS_THRESH) dets = dets[keep, :] #vis_detections(new_bboxes, result, cls, dets, ax, thresh=args.CONF_THRESH) vis_detections(new_bboxes, result, cls, dets, thresh=args.CONF_THRESH) #Save result img_obj = bb.IMGBBox() img_obj.img_name = image_name img_obj.xml_name = image_name.strip().split('.')[0] + '.xml' img_obj.setIMG(result) img_obj.saveIMG(args.save_img_dir, args.save_img_dir) img_obj.bboxes = new_bboxes img_obj.saveXML(args.save_xml_dir, args.save_xml_dir)
def ctpn(sess, net, image_name): img = cv2.imread(image_name) im = check_img(img) timer = Timer() timer.tic() scores, boxes = test_ctpn(sess, net, im) timer.toc() CONF_THRESH = 0.9 NMS_THRESH = 0.3 dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] keep = np.where(dets[:, 4] >= 0.7)[0] dets = dets[keep, :] line = connect_proposal(dets[:, 0:4], dets[:, 4], im.shape) save_results(image_name, im, line, thresh=0.9)
def demo(self, image_name, is_init=True): """Detect object classes in an image using pre-computed object proposals.""" # Detect all object classes and regress object bounds timer = Timer() timer.tic() if is_init: raw_scores, raw_boxes, self.feature_map, self.rpn_boxes, self.rpn_scores, self.im_scales = im_detect( self.sess, self.net, image_name, is_part=False) CONF_THRESH = self.score_thresh NMS_THRESH = self.nms_thresh self.objects = [] for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = raw_boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = raw_scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] inds = np.where(dets[:, -1] >= CONF_THRESH)[0] if len(inds) > 0: for i in inds: bbox = dets[i, :4] score = dets[i, -1] box_height = bbox[3] - bbox[1] box_width = bbox[2] - bbox[0] c_x = np.round(bbox[0] + box_width / 2.0) c_y = np.round(bbox[1] + box_height / 2.0) if cls == 'stawberry': cls = 'strawberry' object_coordinates = { 'name': cls, 'score': score, 'boxes': list([c_x, c_y, box_width, box_height]) } self.objects.append(object_coordinates) else: _, _, self.feature_map, self.rpn_boxes, self.rpn_scores, self.im_scales = im_detect( self.sess, self.net, image_name, is_part=True) timer.toc()
def caption(sess, inp): img = np.array(inp['image']) width = img.shape[1] height = img.shape[0] scores, boxes, captions = im_detect(sess, net, img, None, use_box_at=-1) pos_dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = nms(pos_dets, cfg.TEST.NMS) pos_dets = pos_dets[keep, :] pos_scores = scores[keep] pos_captions = [sentence(vocab, captions[idx]) for idx in keep] pos_boxes = boxes[keep, :] bboxes = [] classes = [] scores = [] for i in range(min(inp['max_detections'], len(pos_captions))): bboxes.append(convert_rect(pos_boxes[i], width, height)) classes.append(pos_captions[i]) scores.append(float(pos_scores[i])) return dict(bboxes=bboxes, classes=classes, scores=scores)
def detect(self, text_proposals,scores,size): # 删除得分较低的proposal 将低于概率0.7的框都不要了 keep_inds=np.where(scores>TextLineCfg.TEXT_PROPOSALS_MIN_SCORE)[0] text_proposals, scores=text_proposals[keep_inds], scores[keep_inds] # 按得分排序 sorted_indices=np.argsort(scores.ravel())[::-1] text_proposals, scores=text_proposals[sorted_indices], scores[sorted_indices] # 对proposal做nms ,TEXT_PROPOSALS_NMS_THRESH : 0.2 keep_inds=nms(np.hstack((text_proposals, scores)), TextLineCfg.TEXT_PROPOSALS_NMS_THRESH) text_proposals, scores=text_proposals[keep_inds], scores[keep_inds] # 获取检测结果 text_recs=self.text_proposal_connector.get_text_lines(text_proposals, scores, size) # 最后检查框的高宽比,以及概率和最小宽度 # 高宽比 需要大于 0.5 # 概率 需要大于 0.9 # 宽度 需要大于 16 keep_inds=self.filter_boxes(text_recs) return text_recs[keep_inds]
def detect(self, text_proposals, scores, size): # 删除得分较低的proposal keep_inds = np.where(scores > TextLineCfg.TEXT_PROPOSALS_MIN_SCORE)[0] text_proposals, scores = text_proposals[keep_inds], scores[keep_inds] # 按得分排序 sorted_indices = np.argsort(scores.ravel())[::-1] text_proposals, scores = text_proposals[sorted_indices], scores[ sorted_indices] # 对proposal做nms keep_inds = nms(np.hstack((text_proposals, scores)), TextLineCfg.TEXT_PROPOSALS_NMS_THRESH) #keep_inds = soft_nms(np.hstack((text_proposals, scores)),threshold=TextLineCfg.TEXT_PROPOSALS_NMS_THRESH) text_proposals, scores = text_proposals[keep_inds], scores[keep_inds] # 获取检测结果 text_recs = self.text_proposal_connector.get_text_lines( text_proposals, scores, size) keep_inds = self.filter_boxes(text_recs) return text_proposals, scores, text_recs[keep_inds]
def ctpn(sess, net, image_name): img = cv2.imread(image_name) im = check_img(img) timer = Timer() timer.tic() scores, boxes = test_ctpn(sess, net, im) timer.toc() # print('Detection took {:.3f}s for ' # '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class CONF_THRESH = 0.9 NMS_THRESH = 0.3 dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] keep = np.where(dets[:, 4] >= 0.7)[0] dets = dets[keep, :] line = connect_proposal(dets[:, 0:4], dets[:, 4], im.shape) save_results(image_name, im, line, thresh=0.9)
parser.add_argument('--net', dest='demo_net', choices=['VGGnet_test', 'MSnet_test'], default='VGGnet_test') parser.add_argument('--model', dest='model', help='Model path', required=True) parser.add_argument('--cfg', dest='cfg_file', help='optional config file', default=None, type=str) args = parser.parse_args() return args if __name__ == '__main__': args = parse_args() if args.cfg_file is not None: cfg_from_file(args.cfg_file) pprint.pprint(cfg) sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) net = get_network(args.demo_net) print ('Loading network {:s}... '.format(args.demo_net)), saver = tf.train.Saver() saver.restore(sess, args.model) print (' done.') im_names = glob.glob(os.path.join(cfg.DATA_DIR, 'demo', '*.png')) + \ glob.glob(os.path.join(cfg.DATA_DIR, 'demo', '*.jpg')) timer = Timer() for im_name in im_names: print 'Demo for {:s}'.format(im_name) im = cv2.imread(im_name) timer.tic() scores, boxes = im_detect_rpn(sess, net, im) timer.toc() print('Detection took {:.3f}s for {:d} object proposals').format(timer.total_time, boxes.shape[0]) dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, 0.5) dets = dets[keep, :] vis_detections(im, dets, thresh=0.5)
def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key, _feat_stride=[ 16, ], anchor_scales=[ 16, ]): """ Parameters ---------- rpn_cls_prob_reshape: (1 , H , W , Ax2) outputs of RPN, prob of bg or fg NOTICE: the old version is ordered by (1, H, W, 2, A) !!!! rpn_bbox_pred: (1 , H , W , Ax4), rgs boxes output of RPN im_info: a list of [image_height, image_width, scale_ratios] cfg_key: 'TRAIN' or 'TEST' _feat_stride: the downsampling ratio of feature map to the original input image anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16]) ---------- Returns ---------- rpn_rois : (1 x H x W x A, 5) e.g. [0, x1, y1, x2, y2] # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) #layer_params = yaml.load(self.param_str_) """ cfg_key = cfg_key.decode('ascii') # TODO 后期可能进行修改anchor的尺度,因为文本较为密集,需要进行完善修改 # _anchors value # [[0 2 15 13] # [0 0 15 15] # [0 -4 15 19] # [0 -9 15 24] # [0 -16 15 31] # [0 -26 15 41] # [0 -41 15 56] # [0 -62 15 77] # [0 -91 15 106] # [0 -134 15 149]] _anchors = generate_anchors( scales=np.array(anchor_scales)) #生成基本的10个anchor _num_anchors = _anchors.shape[0] #10个anchor im_info = im_info[0] #原始图像的高宽、缩放尺度 assert rpn_cls_prob_reshape.shape[0] == 1, \ 'Only single item batches are supported' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N #12000,在做nms之前,最多保留的候选box数目 post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N #2000,做完nms之后,最多保留的box的数目 nms_thresh = cfg[cfg_key].RPN_NMS_THRESH #nms用参数,阈值是0.7 min_size = cfg[cfg_key].RPN_MIN_SIZE #候选box的最小尺寸,目前是16,高宽均要大于16 height, width = rpn_cls_prob_reshape.shape[1:3] #feature-map的高宽 # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want # (1, H, W, A) # 获取第一个分类结果 scores = np.reshape( np.reshape(rpn_cls_prob_reshape, [1, height, width, _num_anchors, 2])[:, :, :, :, 1], [1, height, width, _num_anchors]) #提取到object的分数,non-object的我们不关心 #并reshape到1*H*W*10 bbox_deltas = rpn_bbox_pred #模型输出的pred是相对值,需要进一步处理成真实图像中的坐标 #im_info = bottom[2].data[0, :] if DEBUG: print('im_size: ({}, {})'.format(im_info[0], im_info[1])) print('scale: {}'.format(im_info[2])) # 1. Generate proposals from bbox deltas and shifted anchors if DEBUG: print('score map size: {}'.format(scores.shape)) # Enumerate all shifts # 同anchor-target-layer-tf这个文件一样,生成anchor的shift,进一步得到整张图像上的所有anchor shift_x = np.arange(0, width) * _feat_stride shift_y = np.arange(0, height) * _feat_stride #print('w,h,x',width,height,width*height) # shift_x shape = [height, width] # 生成同样维度的两个矩阵 shift_x, shift_y = np.meshgrid(shift_x, shift_y) # print("shift_x", shift_x.shape) # print("shift_y", shift_y.shape) # shifts shape = [height*width,4] shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() #print("shift shape", shifts.shape) # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors # 10 K = shifts.shape[0] # height*width,[height*width,4] anchors = _anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) # print('_anchors.reshape((1, A, 4))',np.shape(_anchors.reshape((1, A, 4)))) # print('shifts.reshape((1, K, 4)).transpose((1, 0, 2))',np.shape(shifts.reshape((1, K, 4)).transpose((1, 0, 2)))) anchors = anchors.reshape((K * A, 4)) #这里得到的anchor就是整张图像上的所有anchor # print(anchors) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.reshape((-1, 4)) #(HxWxA, 4) # Same story for the scores: scores = scores.reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) #做逆变换,得到box在图像上的真实坐标 # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) #将所有的proposal修建一下,超出图像范围的将会被修剪掉 # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) #移除那些proposal小于一定尺寸的proposal proposals = proposals[keep, :] #保留剩下的proposal scores = scores[keep] bbox_deltas = bbox_deltas[keep, :] # # remove irregular boxes, too fat too tall # keep = _filter_irregular_boxes(proposals) # proposals = proposals[keep, :] # scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] #score按得分的高低进行排序 if pre_nms_topN > 0: #保留12000个proposal进去做nms order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] bbox_deltas = bbox_deltas[order, :] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) #进行nms操作,保留2000个proposal if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] bbox_deltas = bbox_deltas[keep, :] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 blob = np.hstack( (scores.astype(np.float32, copy=False), proposals.astype(np.float32, copy=False))) return blob, bbox_deltas
def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key, _feat_stride = [16,], anchor_scales = [16,]): """ Parameters ---------- rpn_cls_prob_reshape: (1 , H , W , Ax2) outputs of RPN, prob of bg or fg NOTICE: the old version is ordered by (1, H, W, 2, A) !!!! rpn_bbox_pred: (1 , H , W , Ax4), rgs boxes output of RPN im_info: a list of [image_height, image_width, scale_ratios] cfg_key: 'TRAIN' or 'TEST' _feat_stride: the downsampling ratio of feature map to the original input image anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16]) ---------- Returns ---------- rpn_rois : (1 x H x W x A, 5) e.g. [0, x1, y1, x2, y2] # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) #layer_params = yaml.load(self.param_str_) """ # cfg_key=cfg_key.decode('ascii') _anchors = generate_anchors(scales=np.array(anchor_scales))#生成基本的9个anchor _num_anchors = _anchors.shape[0]#9个anchor im_info = im_info[0]#原始图像的高宽、缩放尺度 assert rpn_cls_prob_reshape.shape[0] == 1, \ 'Only single item batches are supported' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N#12000,在做nms之前,最多保留的候选box数目 post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N#2000,做完nms之后,最多保留的box的数目 nms_thresh = cfg[cfg_key].RPN_NMS_THRESH#nms用参数,阈值是0.7 min_size = cfg[cfg_key].RPN_MIN_SIZE#候选box的最小尺寸,目前是16,高宽均要大于16 #TODO 后期需要修改这个最小尺寸,改为8? height, width = rpn_cls_prob_reshape.shape[1:3]#feature-map的高宽 # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want # (1, H, W, A) scores = np.reshape(np.reshape(rpn_cls_prob_reshape, [1, height, width, _num_anchors, 2])[:,:,:,:,1], [1, height, width, _num_anchors]) #提取到object的分数,non-object的我们不关心 #并reshape到1*H*W*9 bbox_deltas = rpn_bbox_pred#模型输出的pred是相对值,需要进一步处理成真实图像中的坐标 #im_info = bottom[2].data[0, :] if DEBUG: print('im_size: ({}, {})'.format(im_info[0], im_info[1])) print('scale: {}'.format(im_info[2])) # 1. Generate proposals from bbox deltas and shifted anchors if DEBUG: print('score map size: {}'.format(scores.shape)) # Enumerate all shifts # 同anchor-target-layer-tf这个文件一样,生成anchor的shift,进一步得到整张图像上的所有anchor shift_x = np.arange(0, width) * _feat_stride shift_y = np.arange(0, height) * _feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors K = shifts.shape[0] anchors = _anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4))#这里得到的anchor就是整张图像上的所有anchor # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.reshape((-1, 4)) #(HxWxA, 4) # Same story for the scores: scores = scores.reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas)#做逆变换,得到box在图像上的真实坐标 # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2])#将所有的proposal修建一下,超出图像范围的将会被修剪掉 # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2])#移除那些proposal小于一定尺寸的proposal proposals = proposals[keep, :]#保留剩下的proposal scores = scores[keep] bbox_deltas=bbox_deltas[keep,:] # # remove irregular boxes, too fat too tall # keep = _filter_irregular_boxes(proposals) # proposals = proposals[keep, :] # scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1]#score按得分的高低进行排序 if pre_nms_topN > 0: #保留12000个proposal进去做nms order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] bbox_deltas=bbox_deltas[order,:] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh)#进行nms操作,保留2000个proposal if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] bbox_deltas=bbox_deltas[keep,:] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 blob = np.hstack((scores.astype(np.float32, copy=False), proposals.astype(np.float32, copy=False))) return blob,bbox_deltas
def proposal_layer(rpn_cls_prob_reshape,rpn_bbox_pred,im_info,cfg_key,_feat_stride = [16,],anchor_scales = [8, 16, 32]): ''' input[0],input[1],input[2], cfg_key, _feat_stride, anchor_scales :param rpn_cls_prob_reshape: 提取得到的bbox的是否保存的score, shape is N, W, H, 18, 其实就是区分是前景还是是背景 fg:前景,bg:背景 :param rpn_bbox_pred: shape is N, W, H, 36, 提取得到的bbox的坐标 并不是ground truth :param im_info: :param cfg_key: :param _feat_stride: :param anchor_scales: :return: ''' # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) # layer_params = yaml.load(self.param_str_) _anchors = generate_anchors(scales=np.array(anchor_scales)) _num_anchors = _anchors.shape[0] rpn_cls_prob_reshape = np.transpose(rpn_cls_prob_reshape, [0,3,1,2]) rpn_bbox_pred = np.transpose(rpn_bbox_pred, [0,3,1,2]) #rpn_cls_prob_reshape = np.transpose(np.reshape(rpn_cls_prob_reshape,[1,rpn_cls_prob_reshape.shape[0],rpn_cls_prob_reshape.shape[1],rpn_cls_prob_reshape.shape[2]]),[0,3,2,1]) #rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,2,1]) im_info = im_info[0] assert rpn_cls_prob_reshape.shape[0] == 1, \ 'Only single item batches are supported' # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' #cfg_key = 'TEST' # 在执行NMS(non-maximize suppress, 非最大抑制)之前最多的proposal的个数 pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N # 在执行NMS(non-maximize suppress, 非最大抑制)之后最多的proposal的个数 post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N # non-maximize suppress所使用的阈值 nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale) min_size = cfg[cfg_key].RPN_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = rpn_cls_prob_reshape[:, _num_anchors:, :, :] bbox_deltas = rpn_bbox_pred #im_info = bottom[2].data[0, :] if DEBUG: print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) print 'scale: {}'.format(im_info[2]) # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] if DEBUG: print 'score map size: {}'.format(scores.shape) # Enumerate all shifts shift_x = np.arange(0, width) * _feat_stride shift_y = np.arange(0, height) * _feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors K = shifts.shape[0] anchors = _anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) # 上面的操作其实是将features map的坐标映射到原图中的位置,方便计算IoU anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations # 所以说anchor和bounding box还是有一定区别的,对anchor进行一定的放缩处理后才是proposal 也就是bounding box # 至于放缩的系数是bbox_deltas 预测得到的 proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image,将proposal切割成合法尺寸 proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob
timer.tic() scores, boxes = im_detect(sess, net, im) timer.toc() print('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) CONF_THRESH = 0.7 NMS_THRESH = 0.1 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] class_name = cls thresh = 0.5 # vis_detections(im, class_name, dets, ax, thresh=0.5):"""Draw detected bounding boxes.""" inds = np.where(dets[:, -1] >= thresh)[0] if len(inds) == 0: k = k + 1 else: f.write('video_' + str(cate) + ',' + im_name.split('/')[-1] + ',' + class_name + ',' + str(len(inds)) + ',') for i in inds: bbox = dets[i, :4]
def runForUI(imgWillBeDetected, imgOrigin, usegpu=0): # Setup caffe if usegpu >= 0: # caffe.mpi_init() caffe.set_mode_gpu() caffe.set_device(cfg.GPU_ID) else: # caffe.mpi_init() caffe.set_mode_cpu() # 设置应有的配置文件路径,此处仅仅是将其动态列举出来。更好的方法应该是放在一个文件里。有时间就完善 gallery_def = 'models/psdb/resnet50/eval_gallery.prototxt' # 所使用的gallery network的prototxt路径 probe_def = 'models/psdb/resnet50/eval_probe.prototxt' # 所使用的probe network的prototxt路径 caffemodel = 'output/psdb_train/resnet50/resnet50_iter_50000.caffemodel' # 训练的caffe模型的路径 det_thresh = 0.75 # 可被监控的阈值 cfg_file = 'experiments/cfgs/resnet50.yml' # 配置文件路径 set_cfgs = None # Get query image and roi query_img = imgOrigin query_roi = [0, 0, 1292, 3008] # [x1, y1, x2, y2] # Extract feature of the query person net = caffe.Net(probe_def, caffemodel, caffe.TEST) roi = np.asarray(query_roi).astype(np.float32).reshape(1, 4) feature = _im_exfeat(net, query_img, roi, ['feat']) query_feat = feature['feat'].squeeze() # query_feat = demo_exfeat(net, query_img, query_roi) del net # Necessary to release cuDNN conv static workspace # Detect and extract feature of persons in each gallery image net = caffe.Net(gallery_def, caffemodel, caffe.TEST) # Necessary to warm-up the net, otherwise the first image results are wrong # Don't know why. Possibly a bug in caffe's memory optimization. # Nevertheless, the results are correct after this warm-up. _im_detect(net, query_img) # 这一步是由caffe的bug导致的,可能会出错。 gallery_img = imgWillBeDetected boxes, scores, feat_dic = _im_detect(net, gallery_img, None, ['feat']) j = 1 # only consider j = 1 (foreground class) inds = np.where(scores[:, j] > det_thresh)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j * 4:(j + 1) * 4] boxes = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(boxes, cfg.TEST.NMS) boxes = boxes[keep] features = feat_dic['feat'][inds][keep] if boxes.shape[0] == 0: return None, None features = features.reshape(features.shape[0], -1) if boxes is None: print(gallery_img, 'no detections') return Cv2Imread(gallery_img) # Compute pairwise cosine similarities, # equals to inner-products, as features are already L2-normed similarities = features.dot(query_feat) # Visualize the results fig, ax = plt.subplots(figsize=(16, 9)) ax.imshow(plt.imread(gallery_img)) plt.axis('off') for box, sim in zip(boxes, similarities): x1, y1, x2, y2, _ = box ax.add_patch( plt.Rectangle((x1, y1), x2 - x1, y2 - y1, fill=False, edgecolor='#4CAF50', linewidth=3.5)) ax.add_patch( plt.Rectangle((x1, y1), x2 - x1, y2 - y1, fill=False, edgecolor='white', linewidth=1)) ax.text(x1 + 5, y1 - 18, '{:.2f}'.format(sim), bbox=dict(facecolor='#4CAF50', linewidth=0), fontsize=20, color='white') plt.tight_layout() #将使用plt处理之后的图保存到内存中(提高处理速度,也可以保存到文档当中),并返回以供opencv-Python读取 Buffer_ = BytesIO() #申请缓存 fig.savefig(Buffer_, format='png') Buffer_.seek(0) imgOutPut = PILImageOpen(Buffer_) Buffer_.close() del net return asarray(imgOutPut)
def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_infos, _feat_stride, opts, anchor_scales, anchor_ratios, mappings): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) # layer_params = yaml.load(self.param_str_) batch_size = rpn_cls_prob_reshape.shape[0] _anchors = generate_anchors.generate_anchors(scales=anchor_scales, ratios=anchor_ratios) _num_anchors = _anchors.shape[0] pre_nms_topN = opts['num_box_pre_NMS'] post_nms_topN = opts['num_box_post_NMS'] nms_thres = opts['nms_thres'] min_size = opts['min_size'] blob = [] for i in range(batch_size): im_info = im_infos[i] # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want height = mappings[int(im_info[0])] width = mappings[int(im_info[1])] scores = rpn_cls_prob_reshape[i, _num_anchors:, :height, :width] bbox_deltas = rpn_bbox_pred[i, :, :height, :width] if DEBUG: print( 'im_size: ({}, {})'.format(im_info[0], im_info[1])) print( 'scale: {}'.format(im_info[2])) if DEBUG: print( 'score map size: {}'.format(scores.shape)) # Enumerate all shifts shift_x = np.arange(0, width) * _feat_stride shift_y = np.arange(0, height) * _feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors K = shifts.shape[0] anchors = _anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((1, 2, 0)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((1, 2, 0)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image if opts['dropout_box_runoff_image']: _allowed_border = 16 inds_inside = np.where( (proposals[:, 0] >= -_allowed_border) & (proposals[:, 1] >= -_allowed_border) & (proposals[:, 2] < im_info[1] + _allowed_border) & # width (proposals[:, 3] < im_info[0] + _allowed_border) # height )[0] proposals = proposals[inds_inside, :] proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) # print 'proposals', proposals # print 'scores', scores keep = nms(np.hstack((proposals, scores)).astype(np.float32), nms_thres) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.ones((proposals.shape[0], 1), dtype=np.float32) * i blob.append(np.hstack((batch_inds, proposals.astype(np.float32, copy=False), scores.astype(np.float32, copy=False)))) return np.concatenate(blob, axis=0)
def getRes_Img(sess, net, image): """Detect object classes in an image using pre-computed object proposals.""" imgCon = image.imgcontent imgString = base64.b64decode(imgCon) nparr = np.fromstring(imgString, np.uint8) im = cv2.imdecode(nparr, cv2.IMREAD_COLOR) # im=cv2.imread(image) timer = Timer() timer.tic() scores, boxes = im_detect(sess, net, im) timer.toc() im = im[:, :, (2, 1, 0)] CONF_THRESH = 0.8 NMS_THRESH = 0.3 res_img = Res_Image() #初始化,图片中所有设备信息集合:设备信息+图片名称(编号) equiAllArr = [] #图片中所有设备的数组集合 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] equiarr = EquiArr(im, cls, dets, thresh=CONF_THRESH) #只能给出某一类设备的所有候选框的集合 if equiarr != None: for x in equiarr: equiAllArr.append(x) #将一张图片中所有设备的信息整合到一个数组中 equiAllArr.sort(key=lambda Reco_Equipment: Reco_Equipment.acreage, reverse=False) equiAllArrNew = [] #嵌套数组 ds = [] #数组索引 #候选框是并列的,人为的创建嵌套数组, for i in range(len(equiAllArr)): if '_' not in equiAllArr[i].equiName: if (i in ds): # 已经访问过的不再访问,直接跳过进行下一个 continue ds.append(i) xmin = equiAllArr[i].area.xmin ymin = equiAllArr[i].area.ymin xmax = equiAllArr[i].area.xmax ymax = equiAllArr[i].area.ymax equChilds = [] # 子集,即被嵌套的设备集合 for m in range(0, len(equiAllArr)): if (m in ds): continue xx1 = np.maximum(xmin, equiAllArr[m].area.xmin) yy1 = np.maximum(ymin, equiAllArr[m].area.ymin) xx2 = np.minimum(xmax, equiAllArr[m].area.xmax) yy2 = np.minimum(ymax, equiAllArr[m].area.ymax) w = np.maximum(0, xx2 - xx1 + 1) h = np.maximum(0, yy2 - yy1 + 1) inter = float(w * h) # 重叠部分面积 if (inter / equiAllArr[m].acreage >= 0.8) and equiAllArr[i].equiName == 'DLQ' and endwith( equiAllArr[m].equiName, '_CT'): h1 = np.maximum(ymin, equiAllArr[m].area.ymin) - np.minimum( ymin, equiAllArr[m].area.ymin) h2 = np.maximum(ymax, equiAllArr[m].area.ymax) - np.minimum( ymax, equiAllArr[m].area.ymax) if h1 <= h2: child = Reco_Equipment_child() child.equiName = equiAllArr[i].equiName + '_MHCT' child.area = equiAllArr[m].area equChilds.append(child) ds.append(m) else: child = Reco_Equipment_child() child.equiName = equiAllArr[i].equiName + '_ZZCT' child.area = equiAllArr[m].area equChilds.append(child) ds.append(m) elif (inter / equiAllArr[m].acreage >= 0.8) and endwith( equiAllArr[m].equiName, '_CT'): child = Reco_Equipment_child() child.equiName = equiAllArr[i].equiName + '_CT' child.area = equiAllArr[m].area equChilds.append(child) ds.append(m) elif (inter / equiAllArr[m].acreage >= 0.8) and endwith( equiAllArr[m].equiName, '_JT'): child = Reco_Equipment_child() child.equiName = equiAllArr[i].equiName + '_JT' child.area = equiAllArr[m].area equChilds.append(child) ds.append(m) elif (inter / equiAllArr[m].acreage >= 0.8): child = Reco_Equipment_child() child.equiName = equiAllArr[m].equiName child.area = equiAllArr[m].area equChilds.append(child) ds.append(m) equiAllArr[i].children = equChilds equiAllArrNew.append(equiAllArr[i]) for i in range(len(equiAllArr)): if (i in ds): # 已经访问过的不再访问,直接跳过进行下一个 continue equChilds = [] equiAllArr[i].children = equChilds equiAllArrNew.append(equiAllArr[i]) res_img.imgID = image.imgID # 按照条件选取需要的内容 if image.equiptype == '': res_img.equipments = equiAllArrNew else: equiresArr = [] res_equip = str.lower(image.equiptype) for n in range(len(equiAllArrNew)): if res_equip in equiAllArrNew[n].equiName: equiresArr.append(equiAllArrNew[n]) res_img.equipments = equiresArr return res_img