def rpn_generate(queue=None, imdb_name=None, rpn_model_path=None, cfg=None, rpn_test_prototxt=None): """Use a trained RPN to generate proposals. """ cfg.TEST.RPN_PRE_NMS_TOP_N = -1 # no pre NMS filtering cfg.TEST.RPN_POST_NMS_TOP_N = 2000 # limit top boxes after NMS print 'RPN model: {}'.format(rpn_model_path) print('Using config:') pprint.pprint(cfg) import caffe _init_caffe(cfg) if '+' in imdb_name: imdbs_list = imdb_name.split('+') imdb = datasets.imdb.imdb(imdb_name) output_dir = get_output_dir(imdb, None) print 'Output will be saved to `{:s}`'.format(output_dir) else: imdbs_list = [imdb_name] output_dir = None # Gets set later for single database case rpn_proposals_path = [None] * len(imdbs_list) for i, imdb_name in enumerate(imdbs_list): # NOTE: the matlab implementation computes proposals on flipped images, too. # We compute them on the image once and then flip the already computed # proposals. This might cause a minor loss in mAP (less proposal jittering). imdb = get_imdb(imdb_name) print 'Loaded dataset `{:s}` for proposal generation'.format(imdb.name) if output_dir is None: output_dir = get_output_dir(imdb, None) print 'Output will be saved to `{:s}`'.format(output_dir) # Load RPN and configure output directory rpn_net = caffe.Net(rpn_test_prototxt, rpn_model_path, caffe.TEST) rpn_net_name = os.path.splitext(os.path.basename(rpn_model_path))[0] rpn_proposals_path[i] = os.path.join( output_dir, rpn_net_name + '_' + imdb_name + '_proposals.pkl') # Check if rpn proposals have already been computed # If so, don't recompute if not os.path.isfile(rpn_proposals_path[i]): # Generate proposals on the imdb rpn_proposals = imdb_proposals(rpn_net, imdb) # Write proposals to disk with open(rpn_proposals_path[i], 'wb') as f: cPickle.dump(rpn_proposals, f, cPickle.HIGHEST_PROTOCOL) print 'Wrote RPN proposals to {}'.format(rpn_proposals_path[i]) else: print "Proposals for " + imdb_name + " exist already." if len(rpn_proposals_path) == 1: rpn_proposals_path = rpn_proposals_path[0] # Send the proposal file path through the # multiprocessing queue queue.put({'proposal_path': rpn_proposals_path})
def test_rpn_msr_net(net, imdb): output_dir = get_output_dir(imdb, net) if not os.path.exists(output_dir): os.makedirs(output_dir) det_file = os.path.join(output_dir, 'detections.pkl') if os.path.exists(det_file): with open(det_file, 'rb') as fid: all_boxes = cPickle.load(fid) print 'Detections loaded from {}'.format(det_file) print 'Evaluating detections' imdb.evaluate_proposals_msr(all_boxes, output_dir) return # Generate proposals on the imdb all_boxes = imdb_proposals_det(net, imdb) det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'Evaluating detections' imdb.evaluate_proposals_msr(all_boxes, output_dir)
def main(args_list): args = parse_args(args_list) print('Called with args:') print(args) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) cfg.GPU_ID = args.GPU_ID print('Using config:') pprint.pprint(cfg) if not args.randomize: # fix the random seeds (numpy and caffe) for reproducibility np.random.seed(cfg.RNG_SEED) caffe.set_random_seed(cfg.RNG_SEED) print 'Setting GPU device %d for training' % cfg.GPU_ID caffe.set_mode_gpu() caffe.set_device(cfg.GPU_ID) imdb, roidb = combined_roidb(args.imdb_name) print '{:d} roidb entries'.format(len(roidb)) output_dir = get_output_dir(imdb) print 'Output will be saved to `{:s}`'.format(output_dir) train_net(args.solver, roidb, output_dir, pretrained_model=args.pretrained_model, max_iters=args.max_iters)
def train_rpn(queue=None, imdb_name=None, init_model=None, solver=None, max_iters=None, cfg=None): """Train a Region Proposal Network in a separate training process. """ # Not using any proposals, just ground-truth boxes cfg.TRAIN.HAS_RPN = True cfg.TRAIN.BBOX_REG = False # applies only to Fast R-CNN bbox regression cfg.TRAIN.PROPOSAL_METHOD = 'gt' cfg.TRAIN.IMS_PER_BATCH = 1 print 'Init model: {}'.format(init_model) print('Using config:') pprint.pprint(cfg) import caffe _init_caffe(cfg) roidb, imdb = get_roidb(imdb_name) print 'roidb len: {}'.format(len(roidb)) output_dir = get_output_dir(imdb, None) print 'Output will be saved to `{:s}`'.format(output_dir) model_paths = train_net(solver, roidb, output_dir, pretrained_model=init_model, max_iters=max_iters) # Cleanup all but the final model for i in model_paths[:-1]: os.remove(i) rpn_model_path = model_paths[-1] # Send final model path through the multiprocessing queue return {'model_path': rpn_model_path}
def train_fast_rcnn(queue=None, imdb_name=None, init_model=None, solver=None, max_iters=None, cfg=None, rpn_file=None): """Train a Fast R-CNN using proposals generated by an RPN. """ cfg.TRAIN.HAS_RPN = False # not generating prosals on-the-fly cfg.TRAIN.PROPOSAL_METHOD = 'rpn' # use pre-computed RPN proposals instead cfg.TRAIN.IMS_PER_BATCH = 2 print 'Init model: {}'.format(init_model) print 'RPN proposals: {}'.format(rpn_file) print('Using config:') pprint.pprint(cfg) import caffe _init_caffe(cfg) roidb, imdb = get_roidb(imdb_name, rpn_file=rpn_file) output_dir = get_output_dir(imdb, None) print 'Output will be saved to `{:s}`'.format(output_dir) # Train Fast R-CNN model_paths = train_net(solver, roidb, output_dir, pretrained_model=init_model, max_iters=max_iters) # Cleanup all but the final model for i in model_paths[:-1]: os.remove(i) fast_rcnn_model_path = model_paths[-1] # Send Fast R-CNN model path over the multiprocessing queue return {'model_path': fast_rcnn_model_path}
def rpn_generate(queue=None, imdb_name=None, rpn_model_path=None, cfg=None, rpn_test_prototxt=None): """Use a trained RPN to generate proposals. """ cfg.TEST.RPN_PRE_NMS_TOP_N = -1 # no pre NMS filtering cfg.TEST.RPN_POST_NMS_TOP_N = 2000 # limit top boxes after NMS print 'RPN model: {}'.format(rpn_model_path) print('Using config:') pprint.pprint(cfg) import caffe _init_caffe(cfg) # NOTE: the matlab implementation computes proposals on flipped images, too. # We compute them on the image once and then flip the already computed # proposals. This might cause a minor loss in mAP (less proposal jittering). imdb = get_imdb(imdb_name) print 'Loaded dataset `{:s}` for proposal generation'.format(imdb.name) # Load RPN and configure output directory rpn_net = caffe.Net(rpn_test_prototxt, rpn_model_path, caffe.TEST) output_dir = get_output_dir(imdb, None) print 'Output will be saved to `{:s}`'.format(output_dir) # Generate proposals on the imdb rpn_proposals = imdb_proposals(rpn_net, imdb) # Write proposals to disk and send the proposal file path through the # multiprocessing queue rpn_net_name = os.path.splitext(os.path.basename(rpn_model_path))[0] rpn_proposals_path = os.path.join( output_dir, rpn_net_name + '_proposals.pkl') with open(rpn_proposals_path, 'wb') as f: cPickle.dump(rpn_proposals, f, cPickle.HIGHEST_PROTOCOL) print 'Wrote RPN proposals to {}'.format(rpn_proposals_path) return {'proposal_path': rpn_proposals_path}
def test_net(net, imdb): """Test a R*CNN network on an image database.""" num_images = len(imdb.image_index) num_classes = imdb.num_classes all_scores = {} all_labels = {} for a in xrange(num_classes): all_scores[imdb.classes[a]] = np.zeros((num_images,1), dtype = np.float32) all_labels[imdb.classes[a]] = -np.ones((num_images,1), dtype = np.int16) output_dir = get_output_dir(imdb, net) if not os.path.exists(output_dir): os.makedirs(output_dir) # timers _t = {'im_detect' : Timer()} roidb = imdb.roidb for i in xrange(num_images): im = cv2.imread(imdb.image_path_at(i)) gt = np.where(roidb[i]['gt_classes']>-1)[0] gt_boxes = roidb[i]['boxes'][gt] gt_class = roidb[i]['gt_classes'][gt] assert (gt_boxes.shape[0]==1) _t['im_detect'].tic() scores, secondary_scores, selected_boxes = im_detect(net, im, roidb[i]['boxes'], roidb[i]['gt_classes']) _t['im_detect'].toc() # Visualize detections # vis_detections(im, gt_boxes, scores, imdb.classes) for a in xrange(num_classes): all_scores[imdb.classes[a]][i] = scores[0,a] all_labels[imdb.classes[a]][i] = gt_class print 'im_detect: {:d}/{:d} {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time) #f = {'images': imdb.image_index, 'scores': all_boxes, 'classes': imdb.classes, 'context_boxes': all_selected_boxes} #output_dir = os.path.join(os.path.dirname(__file__), 'output', # 'Action_MIL_wContextBoxes_'+ imdb.name) #if not os.path.exists(output_dir): # os.makedirs(output_dir) #det_file = os.path.join(output_dir, 'res.mat') #sio.savemat(det_file, {'data': f}) #print 'Saving in', det_file imdb._ap(all_scores, all_labels)
def test_net(net, imdb): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # heuristic: keep an average of 40 detections per class per images prior # to NMS max_per_set = 40 * num_images # heuristic: keep at most 100 detection per class per image prior to NMS max_per_image = 100 # detection thresold for each class (this is adaptively set based on the # max_per_set constraint) thresh = -np.inf * np.ones(imdb.num_classes) # top_scores will hold one minheap of scores per class (used to enforce # the max_per_set constraint) top_scores = [[] for _ in xrange(imdb.num_classes)] # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) if not os.path.exists(output_dir): os.makedirs(output_dir) # timers _t = {'im_detect' : Timer(), 'misc' : Timer()} roidb = imdb.roidb for i in xrange(num_images): im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() scores, boxes = im_detect(net, im, roidb[i]['boxes']) _t['im_detect'].toc() _t['misc'].tic() inds = np.where(roidb[i]['gt_classes'] == 0)[0] cls_scores = scores[inds, 1] cls_boxes = boxes[inds, 4:8] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32, copy=False) sio.savemat('%s/%d.mat' % (output_dir, i), {'dets': dets}) if 0: keep = nms(dets, 0.3) vis_detections(im, imdb.classes[1], dets[keep, :]) _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) """
def test_net(net, imdb): """Test a R*CNN network on an image database.""" num_images = len(imdb.image_index) num_classes = imdb.num_classes all_boxes = np.zeros((0, 2+num_classes), dtype = np.float32) all_selected_boxes = np.zeros((num_classes, 4, 0)) output_dir = get_output_dir(imdb, net) if not os.path.exists(output_dir): os.makedirs(output_dir) # timers _t = {'im_detect' : Timer()} roidb = imdb.roidb for i in xrange(num_images): im = cv2.imread(imdb.image_path_at(i)) gt = np.where(roidb[i]['gt_classes']>-1)[0] gt_boxes = roidb[i]['boxes'][gt] _t['im_detect'].tic() scores, secondary_scores, selected_boxes = im_detect(net, im, roidb[i]['boxes'], roidb[i]['gt_classes']) _t['im_detect'].toc() # Visualize detections # vis_detections(im, gt_boxes, scores, imdb.classes) for j in xrange(gt_boxes.shape[0]): # store image id and voc_id (1-indexed) temp = np.array([i+1, j+1], ndmin=2) temp = np.concatenate((temp, np.array(scores[j,:],ndmin=2)), axis=1) all_boxes = np.concatenate((all_boxes, temp), axis=0) all_selected_boxes = np.concatenate((all_selected_boxes, selected_boxes), axis = 2) print 'im_detect: {:d}/{:d} {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time) print 'Writing VOC results' imdb._write_voc_results_file(all_boxes)
def single_scale_test_net(net, imdb, targe_size=(320, 320), vis=False): num_images = len(imdb.image_index) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) net_time = 0 for i in xrange(num_images): t = time.time() im = cv2.imread(imdb.image_path_at(i)) det = im_detect_ratio(net, im, targe_size[0], targe_size[1]) t2 = time.time() - t net_time += t2 for j in xrange(1, imdb.num_classes): inds = np.where(det[:, -1] == j)[0] if inds.shape[0] > 0: cls_dets = det[inds, :-1].astype(np.float32) if 'coco' in imdb.name: keep = soft_nms(cls_dets, sigma=0.5, Nt=0.30, threshold=cfg.confidence_threshold, method=1) cls_dets = cls_dets[keep, :] all_boxes[j][i] = cls_dets if vis: vis_detections(im, imdb.classes[j], cls_dets) print 'im_detect: {:d}/{:d} {:.4f}s'.format(i + 1, num_images, net_time / (i + 1)) det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) if imdb.name == 'voc_2012_test': print 'Saving detections' imdb.config['use_salt'] = False imdb._write_voc_results_file(all_boxes) else: print 'Evaluating detections' imdb.evaluate_detections(all_boxes, output_dir)
def train_rfcn(queue=None, imdb_name=None, init_model=None, solver=None, max_iters=None, cfg=None, rpn_file=None, output_cache=None): """Train a R-FCN using proposals generated by an RPN. """ cfg.TRAIN.HAS_RPN = False # not generating prosals on-the-fly cfg.TRAIN.PROPOSAL_METHOD = 'rpn' # use pre-computed RPN proposals instead cfg.TRAIN.IMS_PER_BATCH = 1 print 'Init model: {}'.format(init_model) print 'RPN proposals: {}'.format(rpn_file) print('Using config:') pprint.pprint(cfg) import caffe _init_caffe(cfg) roidb, imdb = get_roidb(imdb_name, rpn_file=rpn_file) output_dir = get_output_dir(imdb) print 'Output will be saved to `{:s}`'.format(output_dir) # Train R-FCN # Send R-FCN model path over the multiprocessing queue final_caffemodel = os.path.join(output_dir, output_cache) if os.path.exists(final_caffemodel): queue.put({'model_path': final_caffemodel}) else: model_paths = train_net(solver, roidb, output_dir, pretrained_model=init_model, max_iters=max_iters) # Cleanup all but the final model for i in model_paths[:-1]: os.remove(i) rfcn_model_path = model_paths[-1] # Send final model path through the multiprocessing queue shutil.copyfile(rfcn_model_path, final_caffemodel) queue.put({'model_path': final_caffemodel})
def test_net(net, imdb, max_per_image=100, thresh=0.05, vis=False): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # all attributes are collected into: # all_probs[image] = 40 x 2 array of attributes in # (score1, score2) all_probs = [[] for _ in xrange(num_images)] output_dir = get_output_dir(imdb, net) # timers _t = {'im_attr': Timer()} if not cfg.TEST.HAS_RPN: roidb = imdb.roidb # dataset eval # imdb.dataset_eval() for i in xrange(1): # filter out any ground truth boxes if cfg.TEST.HAS_RPN: box_proposals = None else: # The roidb may contain ground-truth rois (for example, if the roidb # comes from the training or val split). We only want to evaluate # detection on the *non*-ground-truth rois. We select those the rois # that have the gt_classes field set to 0, which means there's no # ground truth. box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0] k = 2001 im = cv2.imread(imdb.image_path_at(k)) # resize to 178*218 # im_resized = cv2.resize(im, (178, 218)) _t['im_attr'].tic() probs, select_rois = im_attr(net, im, box_proposals) _t['im_attr'].toc() # print probs # print select_rois all_probs[k] = probs # draw rois detected on the image '''for j in xrange(select_rois.shape[0]-1): source_im = Image.open(imdb.image_path_at(k)) source_draw = ImageDraw.Draw(source_im) source_draw.rectangle(select_rois[j,1:] / 3.37, outline=0x2c2cee) source_im.show() ''' # write attributes attributes_present = np.zeros(probs.shape[0]) for j in xrange(probs.shape[0]): if probs[j, 1] >= probs[j, 0]: attributes_present[j] = 1 attributes_gt = imdb.load_celeba_annotation( imdb.image_index[k])['face_attrs'] with open(os.path.join('./results', str(k) + '.txt'), 'w') as file: file.write('{}\n'.format(imdb.image_path_at(k))) file.write('\\hline\n') file.write( '{\\bf Attribute} &{\\bf Prediction} & {\\bf Ground-truth} \ &{\\bf Attribute} &{\\bf Prediction} &{\\bf Ground-truth} \\\\ \n' ) print '{}'.format(imdb.image_path_at(k)) for j in xrange(len(imdb.face_attributes_name())): if attributes_present[j] == 1: present = 'Yes' else: present = 'No' if attributes_gt[j] == 1: gt = 'Yes' else: gt = 'No' if j % 2 == 0: file.write('\\hline\n') file.write('{} & {} & {} &'.format( imdb.face_attributes_name()[j], present, gt)) else: file.write('{} & {} & {} \\\\ \n'.format( imdb.face_attributes_name()[j], present, gt)) print '{} {}'.format(imdb.face_attributes_name()[j], present) file.write('\\hline') print 'im_attr: {:d}/{:d} {:.3f}s' \ .format(i + 1, num_images, _t['im_attr'].average_time) attr_file = os.path.join(output_dir, 'attributes.pkl') with open(attr_file, 'wb') as f: cPickle.dump(all_probs, f, cPickle.HIGHEST_PROTOCOL) print 'Evaluating attributes' imdb.evaluate_attributes(all_probs, output_dir)
def test_net(net, imdb ,args): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # heuristic: keep an average of 40 detections per class per images prior # to NMS max_per_set = 40 * num_images #changed from 40 # heuristic: keep at most 100 detection per class per image prior to NMS max_per_image = 100 # detection thresold for each class (this is adaptively set based on the # max_per_set constraint) thresh = -np.inf * np.ones(imdb.num_classes) # top_scores will hold one minheap of scores per class (used to enforce # the max_per_set constraint) top_scores = [[] for _ in xrange(imdb.num_classes)] # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) if not os.path.exists(output_dir): os.makedirs(output_dir) flip_str='' if args.use_flip: flip_str='flip' #print "--------",args.reusedet itr= args.caffemodel.split('_')[-1].split('.')[0] try: if not(args.reusedet): generate_error det_file = os.path.join(output_dir, 'detections%s%s.pkl'%(flip_str,itr)) with open(det_file, 'rb') as f: all_boxes = cPickle.load(f) except: # timers _t = {'im_detect' : Timer(), 'misc' : Timer()} roidb = imdb.roidb lfeat = [] for i in xrange(num_images): im = cv2.imread(imdb.image_path_at(i)) if roidb[i]["flipped"]: im = im[:,::-1,:] _t['im_detect'].tic() scores, boxes = im_detect(net, im, roidb[i]['boxes'],args.feat_file,args.eval_segm) _t['im_detect'].toc() #lfeat.append(feat) if args.visdet: import pylab pylab.figure(1) pylab.clf() pylab.imshow(im) _t['misc'].tic() for j in xrange(1, imdb.num_classes): inds = np.where((scores[:, j] > thresh[j]) & (roidb[i]['gt_classes'] == 0))[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j*4:(j+1)*4] top_inds = np.argsort(-cls_scores)[:max_per_image] cls_scores = cls_scores[top_inds] cls_boxes = cls_boxes[top_inds, :] # push new scores onto the minheap for val in cls_scores: heapq.heappush(top_scores[j], val) # if we've collected more than the max number of detection, # then pop items off the minheap and update the class threshold if len(top_scores[j]) > max_per_set: while len(top_scores[j]) > max_per_set: heapq.heappop(top_scores[j]) if args.imdb_name!='voc_2007_trainval':#test thresh[j] = top_scores[j][0] all_boxes[j][i] = \ np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) if args.visdet: keep = nms(all_boxes[j][i], 0.3) #import pylab #pylab.figure(1) vis_detections(im, imdb.classes[j], all_boxes[j][i][keep, :],0.3) #pylab.draw() #pylab.show() #raw_input() _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) if args.visdet: pylab.draw() pylab.show() raw_input() for j in xrange(1, imdb.num_classes): for i in xrange(num_images): inds = np.where(all_boxes[j][i][:, -1] > thresh[j])[0] all_boxes[j][i] = all_boxes[j][i][inds, :] if args.feat_file!=None: print 'Saving the features in ',os.path.join(output_dir,args.feat_file) with open(os.path.join(output_dir,args.feat_file), 'wb') as f: cPickle.dump(lfeat, f, cPickle.HIGHEST_PROTOCOL) det_file = os.path.join(output_dir, 'detections%s%s.pkl'%(flip_str,itr)) with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) if args.use_flip: print "Merging Left Right Detections" all_boxes2 = all_boxes #all_boxes=merge_detections(all_boxes2,imdb) all_boxes=merge_detections(all_boxes2,imdb) from datasets.factory import get_imdb imdb = get_imdb(args.imdb_name) print 'Applying NMS to all detections' nms_dets = apply_nms(all_boxes, cfg.TEST.NMS) if args.imdb_name=='voc_2007_trainval': print 'Evaluate CorLoc' corloc=evalCorLoc(imdb,nms_dets) print "CorLoc",corloc print "Mean",corloc.mean() corlocover=[] for l in range(11): corlocover.append(evalCorLoc(imdb,nms_dets,overlap=float(l)/10.0).mean()) print "Overlap",float(l)/10.0, print "CorLoc",corlocover[-1] if 1: import pylab pylab.figure() pylab.plot(corlocover) pylab.plot([0,10],[corlocover[0],corlocover[-1]]) pylab.show() else: print 'Evaluating detections' imdb.evaluate_detections(nms_dets, output_dir, args.overlap)
def test_net(net, imdb, image_id="003202"): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # heuristic: keep an average of 40 detections per class per images prior # to NMS max_per_set = 40 * num_images # heuristic: keep at most 100 detection per class per image prior to NMS max_per_image = 100 # detection thresold for each class (this is adaptively set based on the # max_per_set constraint) thresh = -np.inf * np.ones(imdb.num_classes) # top_scores will hold one minheap of scores per class (used to enforce # the max_per_set constraint) top_scores = [[] for _ in xrange(imdb.num_classes)] # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) if not os.path.exists(output_dir): os.makedirs(output_dir) # timers _t = {'im_detect' : Timer(), 'misc' : Timer()} if not cfg.TEST.HAS_RPN: roidb = imdb.roidb if cfg.TEST.HAS_RPN: box_proposals = None else: box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0] ## # code below added by chaitu ## image_index_chaitu = image_id box_proposals = _load_pascal_annotation(image_index_chaitu) #box_proposals['boxes'] = np.array([box_proposals['boxes'][0]]) #box_proposals['gt_classes'] = np.array([box_proposals['gt_classes'][0]]) box_proposals['boxes'] = box_proposals['boxes'] box_proposals['gt_classes'] = box_proposals['gt_classes'] gt_new_image = box_proposals['gt_classes'][0] ## # code above added by chaitu ## im = cv2.imread('/var/services/homes/kchakka/py-faster-rcnn/data/VOCdevkit2007/VOC2007/JPEGImages/'+image_index_chaitu+'.jpg') import pdb pdb.set_trace() feature_vector_, clas_prob = im_detect(net, im, box_proposals['boxes']) import pdb pdb.set_trace() feature_vector_ = feature_vector_.squeeze() feature_vector_ = feature_vector_/np.linalg.norm(feature_vector_) max_fv_ = 0 output = [] dot_prod_values = [] #num_images = 10 m = {} for i in xrange(num_images): print i # filter out any ground truth boxes if cfg.TEST.HAS_RPN: box_proposals = None else: box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0] ## # code below added by chaitu ## image_index_chaitu = imdb.image_path_at(i).split('/')[-1].split('.')[0] m[i] = {} box_proposals = _load_pascal_annotation(image_index_chaitu) ## # code above added by chaitu ## im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() fv_, class_prob = im_detect(net, im, box_proposals['boxes']) import pdb pdb.set_trace() counter = 0 similar_label = [] non_similar_label = [] import sys max_int = sys.maxint for fv_i in range(len(fv_)): #for f_ in fv_: f_ = fv_[fv_i] box = box_proposals['boxes'][fv_i] f_ = f_.squeeze() f_ = f_/np.linalg.norm(f_) dot_prod = np.dot(feature_vector_, f_) l2_distance = np.linalg.norm(feature_vector_ - f_) similar_image_index = [dot_prod, i, fv_i,box_proposals['gt_classes'][counter]] m[i][fv_i] = [box[0], box[1], box[2], box[3]] output.append(similar_image_index) _t['im_detect'].toc() output = sorted(output, reverse=True) #output = output[:20] train_txt = "/var/services/homes/kchakka/py-faster-rcnn/VOCdevkit/VOC2007/ImageSets/Main/train.txt" f = open(train_txt,'r') train_images = [] similar_image_indices = [] for line in f.readlines(): train_images.append(line.strip()) box_info = [] for boxes in output: similar_image_indices.append(train_images[boxes[1]]) box_info.append((train_images[boxes[1]], m[boxes[1]][boxes[2]])) boxes.append(int(train_images[boxes[1]])) print output[:20] print "Similar Image Indices : " , similar_image_indices[:20] import pdb pdb.set_trace() print "Images with box information : " , box_info for boxes in output: if boxes[3] == gt_new_image: if len(similar_label) <= 10: similar_label.append(boxes) else: if len(non_similar_label) <= 10: non_similar_label.append(boxes) if len(similar_label) > 10 and len(non_similar_label) > 10: break pdb.set_trace() fine_tune_data = similar_label + non_similar_label print fine_tune_data f = open('finetune.txt', 'w') for label in fine_tune_data: id_length = len(str(label[4])) zeros = "0"*(6-id_length) f.write(zeros + str(label[4]) + "\n") f.close()
def test_net(net, imdb, image_id="003202"): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # heuristic: keep an average of 40 detections per class per images prior # to NMS max_per_set = 40 * num_images # heuristic: keep at most 100 detection per class per image prior to NMS max_per_image = 100 # detection thresold for each class (this is adaptively set based on the # max_per_set constraint) thresh = -np.inf * np.ones(imdb.num_classes) # top_scores will hold one minheap of scores per class (used to enforce # the max_per_set constraint) top_scores = [[] for _ in xrange(imdb.num_classes)] # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) if not os.path.exists(output_dir): os.makedirs(output_dir) # timers _t = {'im_detect': Timer(), 'misc': Timer()} if not cfg.TEST.HAS_RPN: roidb = imdb.roidb if cfg.TEST.HAS_RPN: box_proposals = None else: box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0] ## # code below added by chaitu ## image_index_chaitu = image_id box_proposals = _load_pascal_annotation(image_index_chaitu) #box_proposals['boxes'] = np.array([box_proposals['boxes'][0]]) #box_proposals['gt_classes'] = np.array([box_proposals['gt_classes'][0]]) box_proposals['boxes'] = box_proposals['boxes'] box_proposals['gt_classes'] = box_proposals['gt_classes'] gt_new_image = box_proposals['gt_classes'][0] ## # code above added by chaitu ## im = cv2.imread( '/var/services/homes/kchakka/py-faster-rcnn/data/VOCdevkit2007/VOC2007/JPEGImages/' + image_index_chaitu + '.jpg') import pdb pdb.set_trace() feature_vector_, clas_prob = im_detect(net, im, box_proposals['boxes']) import pdb pdb.set_trace() feature_vector_ = feature_vector_.squeeze() feature_vector_ = feature_vector_ / np.linalg.norm(feature_vector_) max_fv_ = 0 output = [] dot_prod_values = [] #num_images = 10 m = {} for i in xrange(num_images): print i # filter out any ground truth boxes if cfg.TEST.HAS_RPN: box_proposals = None else: box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0] ## # code below added by chaitu ## image_index_chaitu = imdb.image_path_at(i).split('/')[-1].split('.')[0] m[i] = {} box_proposals = _load_pascal_annotation(image_index_chaitu) ## # code above added by chaitu ## im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() fv_, class_prob = im_detect(net, im, box_proposals['boxes']) import pdb pdb.set_trace() counter = 0 similar_label = [] non_similar_label = [] import sys max_int = sys.maxint for fv_i in range(len(fv_)): #for f_ in fv_: f_ = fv_[fv_i] box = box_proposals['boxes'][fv_i] f_ = f_.squeeze() f_ = f_ / np.linalg.norm(f_) dot_prod = np.dot(feature_vector_, f_) l2_distance = np.linalg.norm(feature_vector_ - f_) similar_image_index = [ dot_prod, i, fv_i, box_proposals['gt_classes'][counter] ] m[i][fv_i] = [box[0], box[1], box[2], box[3]] output.append(similar_image_index) _t['im_detect'].toc() output = sorted(output, reverse=True) #output = output[:20] train_txt = "/var/services/homes/kchakka/py-faster-rcnn/VOCdevkit/VOC2007/ImageSets/Main/train.txt" f = open(train_txt, 'r') train_images = [] similar_image_indices = [] for line in f.readlines(): train_images.append(line.strip()) box_info = [] for boxes in output: similar_image_indices.append(train_images[boxes[1]]) box_info.append((train_images[boxes[1]], m[boxes[1]][boxes[2]])) boxes.append(int(train_images[boxes[1]])) print output[:20] print "Similar Image Indices : ", similar_image_indices[:20] import pdb pdb.set_trace() print "Images with box information : ", box_info for boxes in output: if boxes[3] == gt_new_image: if len(similar_label) <= 10: similar_label.append(boxes) else: if len(non_similar_label) <= 10: non_similar_label.append(boxes) if len(similar_label) > 10 and len(non_similar_label) > 10: break pdb.set_trace() fine_tune_data = similar_label + non_similar_label print fine_tune_data f = open('finetune.txt', 'w') for label in fine_tune_data: id_length = len(str(label[4])) zeros = "0" * (6 - id_length) f.write(zeros + str(label[4]) + "\n") f.close()
def gen_recall(): cfg_from_file('experiments/cfgs/rfcn_end2end_iccv_eb.yml') #cfg_from_file('experiments/cfgs/rfcn_end2end_iccv_eb.yml') imdb, roidb = combined_roidb('sg_vrd_2016_test') m = h5py.File('/home/zawlin/Dropbox/proj/sg_vrd_meta.h5', 'r', 'core') import cv2 h5path = 'data/sg_vrd_2016/EB/eb.h5' h5f = h5py.File(h5path,driver='core') h5_rois = {} for i in h5f['test/']: data=h5f['test/%s'%i][...].astype(np.float32) idx = np.argsort(data[:,-1],axis=0) data_sorted = data[idx][::-1] data_sorted_idx = np.where((data_sorted[:,2]-data_sorted[:,0]>20) & (data_sorted[:,3]-data_sorted[:,1]>20)) data_sorted = data_sorted[data_sorted_idx] #print data_sorted h5_rois[i] = data_sorted[:4000,:4] #cfg.TEST.HAS_RPN=False net =None #prototxt = 'models/pascal_voc/ResNet-50/rfcn_end2end/test_iccv_eb_sigmoid.prototxt' prototxt = 'models/sg_vrd/wsd/test_eb_wsddn_s.prototxt' #model = 'data/rfcn_models/resnet50_rfcn_iter_1200.caffemodel' #model = 'output/rfcn_end2end/voc_0712_train/resnet50_rfcn_iter_16000.caffemodel' #model = 'output/rfcn_end2end/voc_0712_train/eb_wsddn_s_iter_5000.caffemodel' model = 'output/rfcn_end2end/sg_vrd_2016_train/eb_wsddn_s_iter_11000.caffemodel' #model = 'data/rfcn_models/resnet50_rfcn_final.caffemodel' #model = 'output/rfcn_end2end/voc_0712_train/resnet50_rfcn_eb_sigx_iter_100000.caffemodel' #model = 'data/rfcn_models_iccv/eb_resnet50_rfcn_iter_600.caffemodel' caffe.set_mode_gpu() caffe.set_device(0) net = caffe.Net(prototxt, model, caffe.TEST) #prototxt = 'models/pascal_voc/ResNet-50/rfcn_end2end/test_iccv_rpn.prototxt' #model = 'data/rfcn_models_iccv/eb_resnet50_rfcn_iter_800.caffemodel' #model = 'output/rfcn_end2end/voc_0712_train/resnet50_rfcn_iter_1600.caffemodel' #model = 'data/rfcn_models_iccv/eb_resnet50_rfcn_iter_800.caffemodel' #net2 = caffe.Net(prototxt, model, caffe.TEST) #net.params['conv_new_1_zl'][0].data[...] = net2.params['conv_new_1_zl'][0].data[...] #net.params['conv_new_1_zl'][1].data[...] = net2.params['conv_new_1_zl'][1].data[...] #net2 = None net.name = 'resnet50_rfcn_iter_1200' num_images = len(imdb.image_index) #num_images = 100 #del imdb.image_index[num_images:] all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) zl.tic() # timers _t = {'im_detect' : Timer(), 'misc' : Timer()} max_per_image =20 thresh = 0.00001 cv2.namedWindow('im',0) cnt = 0 mat_pred_label = [] mat_pred_conf = [] mat_pred_bb = [] mat_gt_label = [] mat_gt_bb = [] for i in xrange(num_images): cnt+=1 # filter out any ground truth boxes im_path = imdb.image_path_at(i) im_name = im_path.split('/')[-1] imid = im_name[:-4] eb_roi = h5_rois[im_name] im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() scores, boxes = im_detect_iccv(net, im, eb_roi) _t['im_detect'].toc() _t['misc'].tic() boxes_tosort = [] for j in xrange(1, 101): inds = np.where(scores[:, j-1] > 0.00001)[0] cls_scores = scores[inds, j-1] cls_boxes = boxes[inds, 1:] #cls_boxes = boxes[inds, j * 4:(j + 1) * 4] # cls_boxes = boxes[inds] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, .7, force_cpu=True) # nms threshold # keep = nms_fast(cls_dets,.3) cls_dets = cls_dets[keep, :] boxes_tosort.append(cls_dets) mat_pred_label_i = [] mat_pred_conf_i = [] mat_pred_bb_i = [] for j in xrange(len(boxes_tosort)): cls_dets = boxes_tosort[j] idx = np.argsort(cls_dets[:,-1],axis=0)[::-1] cls_dets = cls_dets[idx] if cls_dets.shape[0]>max_per_image: cls_dets = cls_dets[:max_per_image,:] for di in xrange(cls_dets.shape[0]): # print 'here' di = cls_dets[di] score = di[-1] cls_idx = j + 1 cls_name = zl.idx2name_cls(m,cls_idx) #cls_name = str(m['meta/cls/idx2name/' + str(cls_idx)][...]) if score > 1: score = 1 if score < thresh: continue cv2.rectangle(im,(di[0],di[1]),(di[2],di[3]),(255,0,0),2) x, y = int(di[0]), int(di[1]) if x < 10: x = 15 if y < 10: y = 15 mat_pred_label_i.append(cls_idx) mat_pred_conf_i.append(score) mat_pred_bb_i.append([di[0],di[1],di[2],di[3]]) cv2.putText(im,cls_name,(x,y),cv2.FONT_HERSHEY_SIMPLEX,1.0,(0,0,255),2) res_line = '%s %d %f %d %d %d %d'%(imid,cls_idx,score,di[0],di[1],di[2],di[3]) mat_pred_label.append(mat_pred_label_i) mat_pred_conf.append(mat_pred_conf_i) mat_pred_bb.append(mat_pred_bb_i) obj_boxes = m['gt/test/%s/obj_boxes'%imid][...] sub_boxes = m['gt/test/%s/sub_boxes'%imid][...] rlp_labels = m['gt/test/%s/rlp_labels'%imid][...] mat_gt_label_i = [] mat_gt_bb_i = [] mat_gt_i = [] for gti in xrange(obj_boxes.shape[0]): mat_gt_i.append([rlp_labels[gti,0],sub_boxes[gti,0],sub_boxes[gti,1],sub_boxes[gti,2],sub_boxes[gti,3]]) mat_gt_i.append([rlp_labels[gti,2],obj_boxes[gti,0],obj_boxes[gti,1],obj_boxes[gti,2],obj_boxes[gti,3]]) if len(mat_gt_i)>0: mat_gt_i = np.array(mat_gt_i) mat_gt_i=zl.unique_arr(mat_gt_i) for gti in xrange(mat_gt_i.shape[0]): mat_gt_bb_i.append(mat_gt_i[gti,1:]) mat_gt_label_i.append(mat_gt_i[gti,0]) mat_gt_label.append(mat_gt_label_i) mat_gt_bb.append(mat_gt_bb_i) #matlab_gt.append(matlab_gt_i) #now get gt cv2.imshow('im',im) if cv2.waitKey(0) == 27: exit(0) _t['misc'].toc() print 'im_detect: {:d} {:.3f}s {:.3f}s' \ .format(cnt, _t['im_detect'].average_time, _t['misc'].average_time) sio.savemat('output/sg_vrd_objs.mat', {'pred_bb': mat_pred_bb, 'pred_conf':mat_pred_conf, 'pred_label':mat_pred_label, 'gt_bb':mat_gt_bb, 'gt_label':mat_gt_label })
def test_net(sess, net, imdb, weights_filename): output_dir = get_output_dir(imdb, weights_filename) if not os.path.exists(output_dir): os.makedirs(output_dir) det_file = os.path.join(output_dir, 'detections.pkl') print imdb.name if os.path.exists(det_file): with open(det_file, 'rb') as fid: all_boxes = cPickle.load(fid) print 'Detections loaded from {}'.format(det_file) if cfg.IS_RPN: print 'Evaluating detections' imdb.evaluate_proposals(all_boxes, output_dir) else: print 'Applying NMS to all detections' nms_dets = apply_nms(all_boxes, cfg.TEST.NMS) print 'Evaluating detections' print imdb.name if not 'imagenet3d' in imdb.name: imdb.evaluate_detections(nms_dets, output_dir) imdb.evaluate_detections_one_file(nms_dets, output_dir) return """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # heuristic: keep an average of 40 detections per class per images prior # to NMS if ('voc' in imdb.name or 'pascal' in imdb.name or 'imagenet3d' in imdb.name) and cfg.IS_RPN == False: max_per_set = 40 * num_images max_per_image = 100 else: max_per_set = np.inf # heuristic: keep at most 100 detection per class per image prior to NMS max_per_image = 10000 # detection thresold for each class (this is adaptively set based on the # max_per_set constraint) if cfg.IS_RPN: thresh = -np.inf * np.ones(imdb.num_classes) else: thresh = cfg.TEST.DET_THRESHOLD * np.ones(imdb.num_classes) # top_scores will hold one minheap of scores per class (used to enforce the max_per_set constraint) top_scores = [[] for _ in xrange(imdb.num_classes)] # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] # timers _t = {'im_detect': Timer(), 'misc': Timer()} if cfg.IS_RPN == False: roidb = imdb.roidb for i in xrange(num_images): im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() if cfg.IS_RPN: boxes_grid, _, _ = get_boxes_grid(im.shape[0], im.shape[1]) scores, boxes, scores_subcls, labels, views, conv5 = im_detect_proposal( net, im, boxes_grid, imdb.num_classes, imdb.num_subclasses, imdb.subclass_mapping) # save conv5 features index = imdb._image_index[i] filename = os.path.join(output_dir, index[5:] + '_conv5.pkl') with open(filename, 'wb') as f: cPickle.dump(conv5, f, cPickle.HIGHEST_PROTOCOL) else: scores, boxes, scores_subcls, views = im_detect( sess, net, im, roidb[i]['boxes'], imdb.num_classes, imdb.num_subclasses) _t['im_detect'].toc() _t['misc'].tic() count = 0 for j in xrange(1, imdb.num_classes): if cfg.IS_RPN: # inds = np.where(scores[:, j] > thresh[j])[0] inds = np.where(labels == j)[0] else: inds = np.where((scores[:, j] > thresh[j]) & (roidb[i]['gt_classes'] == 0))[0] cls_scores = scores[inds, j] subcls_scores = scores_subcls[inds, :] cls_boxes = boxes[inds, j * 4:(j + 1) * 4] cls_views = views[inds, j * 3:(j + 1) * 3] top_inds = np.argsort(-cls_scores)[:max_per_image] cls_scores = cls_scores[top_inds] subcls_scores = subcls_scores[top_inds, :] cls_boxes = cls_boxes[top_inds, :] cls_views = cls_views[top_inds, :] if cfg.IS_RPN == False: # push new scores onto the minheap for val in cls_scores: heapq.heappush(top_scores[j], val) # if we've collected more than the max number of detection, # then pop items off the minheap and update the class threshold if len(top_scores[j]) > max_per_set: while len(top_scores[j]) > max_per_set: heapq.heappop(top_scores[j]) thresh[j] = top_scores[j][0] # select the maximum score subclass in this class if cfg.TEST.SUBCLS and cfg.IS_RPN == False: index = np.where(imdb.subclass_mapping == j)[0] max_indexes = subcls_scores[:, index].argmax(axis=1) sub_classes = index[max_indexes] else: if subcls_scores.shape[0] == 0: sub_classes = cls_scores else: sub_classes = subcls_scores.argmax(axis=1).ravel() all_boxes[j][i] = \ np.hstack((cls_boxes, cls_scores[:, np.newaxis], sub_classes[:, np.newaxis], cls_views)) \ .astype(np.float32, copy=False) count = count + len(cls_scores) if 0: keep = nms(all_boxes[j][i], cfg.TEST.NMS) vis_detections(im, imdb.classes[j], all_boxes[j][i][keep, :]) _t['misc'].toc() print 'im_detect: {:d}/{:d} {:d} object detected {:.3f}s {:.3f}s' \ .format(i + 1, num_images, count, _t['im_detect'].average_time, _t['misc'].average_time) for j in xrange(1, imdb.num_classes): for i in xrange(num_images): inds = np.where(all_boxes[j][i][:, 4] > thresh[j])[0] all_boxes[j][i] = all_boxes[j][i][inds, :] det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) if cfg.IS_RPN: print 'Evaluating detections' imdb.evaluate_proposals(all_boxes, output_dir) else: print 'Applying NMS to all detections' nms_dets = apply_nms(all_boxes, cfg.TEST.NMS) print 'Evaluating detections' if not 'imagenet3d' in imdb.name: imdb.evaluate_detections(nms_dets, output_dir) imdb.evaluate_detections_one_file(nms_dets, output_dir)
def test_net(net, imdb, max_per_image=100, thresh=0.05, vis=False): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [ [[] for _ in range(num_images)] # python3 # xrange for _ in range(imdb.num_classes) ] # python3 # xrange output_dir = get_output_dir(imdb, net) # timers _t = {'im_detect': Timer(), 'misc': Timer()} if not cfg.TEST.HAS_RPN: roidb = imdb.roidb for i in range(num_images): # python3 # xrange # filter out any ground truth boxes if cfg.TEST.HAS_RPN: box_proposals = None else: # The roidb may contain ground-truth rois (for example, if the roidb # comes from the training or val split). We only want to evaluate # detection on the *non*-ground-truth rois. We select those the rois # that have the gt_classes field set to 0, which means there's no # ground truth. box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0] if cfg.IS_COLOR_IMG: # loki # cv2.imread # (rgb or bgr) color image im = cv2.imread(imdb.image_path_at(i), cv2.IMREAD_COLOR) else: # loki # cv2.imread # gray image im = cv2.imread(imdb.image_path_at(i), cv2.IMREAD_GRAYSCALE) if cfg.IS_3C_IMG: # loki # (3 channels) gray image im = cv2.cvtColor(im, cv2.COLOR_GRAY2BGR) _t['im_detect'].tic() scores, boxes = im_detect(net, im, box_proposals) _t['im_detect'].toc() _t['misc'].tic() # skip j = 0, because it's the background class for j in range(1, imdb.num_classes): # python3 # xrange inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] if vis: vis_detections(im, imdb.classes[j], cls_dets) all_boxes[j][i] = cls_dets # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([ all_boxes[j][i][:, -1] for j in range(1, imdb.num_classes) ]) # python3 # xrange if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): # python3 # xrange keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] _t['misc'].toc() print ('im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time)) # python3 # print det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print('Evaluating detections') # python3 # print imdb.evaluate_detections(all_boxes, output_dir)
def test_net(net, imdb, max_per_image=100, thresh=0.05, vis=False): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) if cfg.TEST.SEG: n_seg_classes = cfg.SEG_CLASSES confcounts = np.zeros((n_seg_classes, n_seg_classes)) # timers _t = {'im_detect' : Timer(), 'misc' : Timer()} if not cfg.TEST.HAS_RPN: roidb = imdb.roidb for i in xrange(num_images): # filter out any ground truth boxes if cfg.TEST.HAS_RPN: box_proposals = None else: # The roidb may contain ground-truth rois (for example, if the roidb # comes from the training or val split). We only want to evaluate # detection on the *non*-ground-truth rois. We select those the rois # that have the gt_classes field set to 0, which means there's no # ground truth. box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0] im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() if cfg.TEST.SEG: seg_gt = cv2.imread(get_seg_path(imdb._data_path, imdb.image_path_at(i)), -1) if seg_gt is None: print 'Could not read ', get_seg_path(imdb._data_path, imdb.image_path_at(i)) scores, boxes, seg_scores = im_detect(net, im, box_proposals) else: scores, boxes = im_detect(net, im, box_proposals) _t['im_detect'].toc() _t['misc'].tic() # skip j = 0, because it's the background class for j in xrange(1, imdb.num_classes): inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j*4:(j+1)*4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] if vis: vis_detections(im, imdb.classes[j], cls_dets) all_boxes[j][i] = cls_dets # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] _t['misc'].toc() if cfg.TEST.SEG: # evaluate the segmentation seg_labels = np.argmax(seg_scores, axis=2).astype(int) seg_labels = cv2.resize(seg_labels, (seg_gt.shape[1], seg_gt.shape[0]), interpolation=cv2.INTER_NEAREST) sumim = seg_gt + seg_labels * n_seg_classes hs = np.bincount(sumim.flatten(), minlength=n_seg_classes*n_seg_classes) confcounts += hs.reshape((n_seg_classes, n_seg_classes)) print 'Segmentation evaluation' conf = 100.0 * np.divide(confcounts, 1e-20 + confcounts.sum(axis=1)) np.save(output_dir + '/seg_confusion.npy', conf) acc = np.zeros(n_seg_classes) for j in xrange(n_seg_classes): gtj = sum(confcounts[j, :]) resj = sum(confcounts[:, j]) gtresj = confcounts[j, j] acc[j] = 100.0 * gtresj / (gtj + resj - gtresj) print 'Accuracies', acc print 'Mean accuracy', np.mean(acc) print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'Evaluating detections' imdb.evaluate_detections(all_boxes, output_dir)
def test_net(net, imdb, max_per_image=100, thresh=-np.inf, vis=False): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] if cfg.TEST.MASK_ON: all_masks = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) # timers _t = {'im_detect' : Timer(), 'misc' : Timer(),'im_detect1' : Timer(),'im_detect2' : Timer()} if not cfg.TEST.HAS_RPN: roidb = imdb.roidb for i in xrange(num_images): #for i in xrange(1): # filter out any ground truth boxes if cfg.TEST.HAS_RPN: box_proposals = None else: # The roidb may contain ground-truth rois (for example, if the roidb # comes from the training or val split). We only want to evaluate # detection on the *non*-ground-truth rois. We select those the rois # that have the gt_classes field set to 0, which means there's no # ground truth. box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0] im = cv2.imread(imdb.image_path_at(i)) scores = np.empty(shape=[0,cfg.DATASET_CATEGORY_NUMBER]) if cfg.TEST.MASK_ON: masks = np.empty( shape=[0,cfg.DATASET_CATEGORY_NUMBER,cfg.TEST.MASK_RESOLUTION,cfg.TEST.MASK_RESOLUTION] ) _t['im_detect'].tic() for test_scale in np.array([600]): if cfg.TEST.MASK_ON: scores, boxes,masks = im_detect(net, im, box_proposals) else: scores, boxes = im_detect(net, im, box_proposals) _t['im_detect'].toc() _t['misc'].tic() # skip j = 0, because it's the background class for j in xrange(1, imdb.num_classes): inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] if cfg.TEST.MASK_ON: cls_masks = masks[inds,j,:,:] if cfg.TEST.AGNOSTIC: cls_boxes = boxes[inds, 4:8] else: cls_boxes = boxes[inds, j*4:(j+1)*4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] if cfg.TEST.MASK_ON: cls_masks = cls_masks[keep,:,:] if vis: vis_detections(im, imdb.classes[j], cls_dets) all_boxes[j][i] = cls_dets if cfg.TEST.MASK_ON: all_masks[j][i] = cls_masks # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] if cfg.TEST.MASK_ON: all_masks[j][i] = all_masks[j][i][keep,:,:] if cfg.TEST.MASK_ON: for j in xrange(1, imdb.num_classes): cls_masks = all_masks[j][i] cls_dets = all_boxes[j][i] cls_masks_encode = [] for k in range(cls_dets.shape[0]): per_box_height = int(cls_dets[k,3]) - int(cls_dets[k,1]) per_box_width = int(cls_dets[k,2]) - int(cls_dets[k,0]) per_box_masks = cls_masks[k,:,:] per_box_masks = cv2.resize(per_box_masks, dsize=(per_box_width, per_box_height),interpolation=cv2.INTER_LINEAR) per_box_masks_in_image = np.zeros((im.shape[0],im.shape[1])) per_box_masks_in_image[int(cls_dets[k,1]):int(cls_dets[k,3]),int(cls_dets[k,0]):int(cls_dets[k,2])] = np.round(per_box_masks) rle = maskUtils.encode(np.array(per_box_masks_in_image[:, :, np.newaxis], order="F").astype(np.uint8))[0] rle["counts"] = rle["counts"].decode("utf-8") cls_masks_encode.append(rle) cls_masks_encode.append(cls_dets[k,4]) all_masks[j][i] = cls_masks_encode _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) if cfg.TEST.MASK_ON: mask_file = os.path.join(output_dir, 'segmentations.pkl') with open(mask_file, 'wb') as ff: cPickle.dump(all_masks, ff, cPickle.HIGHEST_PROTOCOL) print 'Evaluating detections' imdb.evaluate_detections(all_boxes, output_dir) if cfg.TEST.MASK_ON: imdb.evaluate_masks(all_masks, output_dir)
def test_net(sess, net, imdb, weights_filename, max_per_image=300, thresh=0.05, vis=False): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) #num_images = 10 # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, weights_filename) #print ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>res:" #print output_dir # timers _t = {'im_detect': Timer(), 'misc': Timer()} if not cfg.TEST.HAS_RPN: roidb = imdb.roidb for i in xrange(num_images): # filter out any ground truth boxes if cfg.TEST.HAS_RPN: box_proposals = None else: # The roidb may contain ground-truth rois (for example, if the roidb # comes from the training or val split). We only want to evaluate # detection on the *non*-ground-truth rois. We select those the rois # that have the gt_classes field set to 0, which means there's no # ground truth. box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0] im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() scores, boxes = im_detect(sess, net, im, box_proposals) _t['im_detect'].toc() _t['misc'].tic() if vis: image = im[:, :, (2, 1, 0)] plt.cla() plt.imshow(image) # skip j = 0, because it's the background class for j in xrange(1, imdb.num_classes): #inds = [] #for x in range(len(scores)): # it = scores[x] # print it # if j == list(it).index(max(it)): # inds.append(x) inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] if vis: vis_detections(image, imdb.classes[j], cls_dets) all_boxes[j][i] = cls_dets if vis: plt.show() # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) #print all_boxes det_file = os.path.join(output_dir, 'detections.pkl') #print det_file with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'Evaluating detections' imdb.evaluate_detections(all_boxes, output_dir)
if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) print('Using config:') pprint.pprint(cfg) if not args.randomize: # fix the random seeds (numpy and caffe) for reproducibility np.random.seed(cfg.RNG_SEED) imdb = get_imdb(args.imdb_name) print('Loaded dataset `{:s}` for training'.format(imdb.name)) roidb = get_training_roidb(imdb) output_dir = get_output_dir(imdb, None) print('Output will be saved to `{:s}`'.format(output_dir)) device_name = '/{}:{:d}'.format(args.device, args.device_id) print(device_name) network = get_network(args.network_name) print('Use network `{:s}` in training'.format(args.network_name)) train_net(network, imdb, roidb, output_dir, pretrained_model=args.pretrained_model, max_iters=args.max_iters)
def test_net(net, imdb, max_per_image=400, thresh=0.0001, vis=False): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] output_dir = get_output_dir(imdb, net) # timers _t = {'im_detect' : Timer(), 'misc' : Timer()} if not cfg.TEST.HAS_RPN: roidb = imdb.roidb p = Pool(27) for i in range(num_images): # filter out any ground truth boxes if cfg.TEST.HAS_RPN: box_proposals = None else: # The roidb may contain ground-truth rois (for example, if the roidb # comes from the training or val split). We only want to evaluate # detection on the *non*-ground-truth rois. We select those the rois # that have the gt_classes field set to 0, which means there's no # ground truth. box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0] im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() scores, boxes = im_detect(net, im, box_proposals) _t['im_detect'].toc() _t['misc'].tic() commands = [] # skip j = 0, because it's the background class for j in range(1, imdb.num_classes): inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j*4:(j+1)*4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) commands.append(cls_dets) # keep = nms(cls_dets, cfg.TEST.NMS) # cls_dets = cls_dets[keep, :] # print cls_scores nms_dets = p.map(psoft, commands) for j in xrange(1, imdb.num_classes): if vis: # vis_detections(im, imdb.classes[j], nms_dets[j-1]) cla = imdb.classes[j] save_root = '/home/yangshun/PycharmProjects/roi_roi/output/roi_kitti_txt_375/data/' if not (os.path.exists(save_root)): os.makedirs(save_root) save_dir = save_root + os.path.split(imdb.image_path_at(i))[1].split('.')[0] + '.txt' result_file = open(save_dir, 'w') for k in range(np.minimum(10, nms_dets[j-1].shape[0])): bbox = nms_dets[j-1][k, :4] score = nms_dets[j-1][k, -1] if score > 0.5: cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 255), 2) text = str(cla) + ": " + str(format(score * 100, '.2f')) font = cv2.FONT_HERSHEY_SIMPLEX cv2.putText(im, text, (bbox[0], bbox[1]), font, .4, (0, 0, 255), 1) result_file.write("%s -1 -1 -10 %.3f %.3f %.3f %.3f -1 -1 -1 -1000 -1000 -1000 -10 %.8f\n" % (cla.capitalize(), bbox[0], bbox[1], bbox[2], bbox[3], score)) all_boxes[j][i] = nms_dets[j-1] # if vis: # cv2.imshow('t', im) # if cv2.waitKey(0) & 0xFF == ord('q'): # cv2.destroyAllWindows() # exit() # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([all_boxes[j][i][:, -1] for j in range(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) p.close() det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'Evaluating detections' imdb.evaluate_detections(all_boxes, output_dir)
def test_net(net, imdb, max_per_image=100, thresh=0.05, vis=False): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) # timers _t = {'im_detect': Timer(), 'misc': Timer()} if not cfg.TEST.HAS_RPN: roidb = imdb.roidb for i in xrange(num_images): # filter out any ground truth boxes if cfg.TEST.HAS_RPN: box_proposals = None else: # The roidb may contain ground-truth rois (for example, if the roidb # comes from the training or val split). We only want to evaluate # detection on the *non*-ground-truth rois. We select those the rois # that have the gt_classes field set to 0, which means there's no # ground truth. box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0] im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() scores, boxes = im_detect(net, im, box_proposals) _t['im_detect'].toc() _t['misc'].tic() imj = im name = 'output/bads/' + str(i) + '.jpg' for jj in xrange(1, imdb.num_classes): indsj = np.where(scores[:, jj] > thresh)[0] cls_scoresj = scores[indsj, jj] cls_boxesj = boxes[indsj, jj * 4:(jj + 1) * 4] cls_detsj = np.hstack((cls_boxesj, cls_scoresj[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_detsj, cfg.TEST.NMS) cls_detsj = cls_detsj[keep, :] detsj = cls_detsj for ii in xrange(np.minimum(10, detsj.shape[0])): bboxj = detsj[ii, :4] scorej = detsj[ii, -1] if bboxj != []: x1 = bboxj[0] y1 = bboxj[3] x2 = bboxj[2] y2 = bboxj[1] if x1 < 0: x1 = 0 if y1 > imj.shape[1]: y1 = imj.shape[1] - 1 if x2 > imj.shape[0]: x2 = imj.shape[0] - 1 if y2 < 0: y2 = 0 if scorej > thresh: cv2.rectangle(imj, (x1, y1), (x2, y2), (0, 255, 0), 4) text = str(jj) + ": " + str(scorej) font = cv2.FONT_HERSHEY_SIMPLEX cv2.putText(imj, text, (x1, y1), font, 1, (0, 0, 255), 4) cv2.imwrite(name, imj) #aaa # skip j = 0, because it's the background class for j in xrange(1, imdb.num_classes): inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] # print cls_scores if vis: vis_detections(im, imdb.classes[j], cls_dets) all_boxes[j][i] = cls_dets # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'Evaluating detections' imdb.evaluate_detections(all_boxes, output_dir)
def test_net(net, imdb): output_dir = get_output_dir(imdb, net) if not os.path.exists(output_dir): os.makedirs(output_dir) if 'nissan' in imdb.name: output_dir_center = os.path.join(output_dir, 'imagesCenter') if not os.path.exists(output_dir_center): os.makedirs(output_dir_center) output_dir_left = os.path.join(output_dir, 'imagesLeft') if not os.path.exists(output_dir_left): os.makedirs(output_dir_left) output_dir_right = os.path.join(output_dir, 'imagesRight') if not os.path.exists(output_dir_right): os.makedirs(output_dir_right) det_file = os.path.join(output_dir, 'detections.pkl') print imdb.name if os.path.exists(det_file): with open(det_file, 'rb') as fid: all_boxes = cPickle.load(fid) print 'Detections loaded from {}'.format(det_file) if cfg.IS_RPN: print 'Evaluating detections' imdb.evaluate_proposals(all_boxes, output_dir) else: print 'Applying NMS to all detections' nms_dets = apply_nms(all_boxes, cfg.TEST.NMS) print 'Evaluating detections' print imdb.name if not 'imagenet3d' in imdb.name: imdb.evaluate_detections(nms_dets, output_dir) imdb.evaluate_detections_one_file(nms_dets, output_dir) return """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # heuristic: keep an average of 40 detections per class per images prior # to NMS if ('voc' in imdb.name or 'pascal' in imdb.name or 'imagenet3d' in imdb.name) and cfg.IS_RPN == False: max_per_set = 40 * num_images max_per_image = 100 else: max_per_set = np.inf # heuristic: keep at most 100 detection per class per image prior to NMS max_per_image = 10000 # detection thresold for each class (this is adaptively set based on the # max_per_set constraint) if cfg.IS_RPN: thresh = -np.inf * np.ones(imdb.num_classes) else: thresh = cfg.TEST.DET_THRESHOLD * np.ones(imdb.num_classes) # top_scores will hold one minheap of scores per class (used to enforce the max_per_set constraint) top_scores = [[] for _ in xrange(imdb.num_classes)] # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] # timers _t = {'im_detect' : Timer(), 'misc' : Timer()} if cfg.IS_RPN == False: roidb = imdb.roidb for i in xrange(num_images): im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() if cfg.IS_RPN: boxes_grid, _, _ = get_boxes_grid(im.shape[0], im.shape[1]) scores, boxes, scores_subcls, labels, views = im_detect_proposal(net, im, boxes_grid, imdb.num_classes, imdb.num_subclasses, imdb.subclass_mapping) # save conv5 features # index = imdb._image_index[i] # filename = os.path.join(output_dir, index[5:] + '_conv5.pkl') # with open(filename, 'wb') as f: # cPickle.dump(conv5, f, cPickle.HIGHEST_PROTOCOL) else: if cfg.TEST.IS_PATCH: scores, boxes, scores_subcls, views = im_detect_patch(net, im, roidb[i]['boxes'], imdb.num_classes, imdb.num_subclasses) else: scores, boxes, scores_subcls, views = im_detect(net, im, roidb[i]['boxes'], imdb.num_classes, imdb.num_subclasses) _t['im_detect'].toc() _t['misc'].tic() count = 0 for j in xrange(1, imdb.num_classes): if cfg.IS_RPN: # inds = np.where(scores[:, j] > thresh[j])[0] inds = np.where(labels == j)[0] else: inds = np.where((scores[:, j] > thresh[j]) & (roidb[i]['gt_classes'] == 0))[0] cls_scores = scores[inds, j] subcls_scores = scores_subcls[inds, :] cls_boxes = boxes[inds, j*4:(j+1)*4] cls_views = views[inds, j*3:(j+1)*3] top_inds = np.argsort(-cls_scores)[:max_per_image] cls_scores = cls_scores[top_inds] subcls_scores = subcls_scores[top_inds, :] cls_boxes = cls_boxes[top_inds, :] cls_views = cls_views[top_inds, :] if cfg.IS_RPN == False: # push new scores onto the minheap for val in cls_scores: heapq.heappush(top_scores[j], val) # if we've collected more than the max number of detection, # then pop items off the minheap and update the class threshold if len(top_scores[j]) > max_per_set: while len(top_scores[j]) > max_per_set: heapq.heappop(top_scores[j]) thresh[j] = top_scores[j][0] # select the maximum score subclass in this class if cfg.TEST.SUBCLS and cfg.IS_RPN == False: index = np.where(imdb.subclass_mapping == j)[0] max_indexes = subcls_scores[:,index].argmax(axis = 1) sub_classes = index[max_indexes] else: if subcls_scores.shape[0] == 0: sub_classes = cls_scores else: sub_classes = subcls_scores.argmax(axis = 1).ravel() all_boxes[j][i] = \ np.hstack((cls_boxes, cls_scores[:, np.newaxis], sub_classes[:, np.newaxis], cls_views)) \ .astype(np.float32, copy=False) count = count + len(cls_scores) if 0: keep = nms(all_boxes[j][i], cfg.TEST.NMS) vis_detections(im, imdb.classes[j], all_boxes[j][i][keep, :]) _t['misc'].toc() print 'im_detect: {:d}/{:d} {:d} object detected {:.3f}s {:.3f}s' \ .format(i + 1, num_images, count, _t['im_detect'].average_time, _t['misc'].average_time) for j in xrange(1, imdb.num_classes): for i in xrange(num_images): inds = np.where(all_boxes[j][i][:, 4] > thresh[j])[0] all_boxes[j][i] = all_boxes[j][i][inds, :] det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) if cfg.IS_RPN: print 'Evaluating detections' imdb.evaluate_proposals(all_boxes, output_dir) if 'mot' in imdb.name: imdb.evaluate_proposals_one_file(all_boxes, output_dir) else: print 'Applying NMS to all detections' nms_dets = apply_nms(all_boxes, cfg.TEST.NMS) print 'Evaluating detections' if not 'imagenet3d' in imdb.name: imdb.evaluate_detections(nms_dets, output_dir) imdb.evaluate_detections_one_file(nms_dets, output_dir)
def test_net(net, imdb, max_per_image=400, thresh=0.0001, vis=False): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) # timers _t = {'im_detect' : Timer(), 'misc' : Timer()} if not cfg.TEST.HAS_RPN: roidb = imdb.roidb p = Pool(27) for i in xrange(num_images): # filter out any ground truth boxes if cfg.TEST.HAS_RPN: box_proposals = None else: # The roidb may contain ground-truth rois (for example, if the roidb # comes from the training or val split). We only want to evaluate # detection on the *non*-ground-truth rois. We select those the rois # that have the gt_classes field set to 0, which means there's no # ground truth. box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0] im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() scores, boxes = im_detect(net, im, box_proposals) _t['im_detect'].toc() _t['misc'].tic() commands = [] # skip j = 0, because it's the background class for j in xrange(1, imdb.num_classes): inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] if cfg.TEST.AGNOSTIC: cls_boxes = boxes[inds, 4:8] else: cls_boxes = boxes[inds, j*4:(j+1)*4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) commands.append(cls_dets) nms_dets = p.map(psoft, commands) for j in xrange(1, imdb.num_classes): if vis: vis_detections(im, imdb.classes[j], nms_dets[j-1]) all_boxes[j][i] = nms_dets[j-1] # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) p.close() det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'Evaluating detections' imdb.evaluate_detections(all_boxes, output_dir)
def test_net(nets, imdb, proposal, proposal_file, classification_file, output_dir): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # DJDJ #num_images = 100 base_thresh = 0.01 # heuristic: keep an average of 40 detections per class per images prior # to NMS max_per_set = 40 * num_images # heuristic: keep at most 100 detection per class per image prior to NMS max_per_image = 100 # detection thresold for each class (this is adaptively set based on the # max_per_set constraint) thresh = -np.inf * np.ones(imdb.num_classes) # top_scores will hold one minheap of scores per class (used to enforce # the max_per_set constraint) top_scores = [[] for _ in xrange(imdb.num_classes)] # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] if len(output_dir) == 0: output_dir = get_output_dir(imdb, nets[0]) else: output_dir = osp.abspath(output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) if proposal_file != '': candidiate_db = leveldb.LevelDB(proposal_file) if classification_file != None: classification_db = leveldb.LevelDB(classification_file) else: classification_db = None # timers _t = {'im_detect' : Timer(), 'misc' : Timer()} roidb = imdb.roidb for i in xrange(num_images): im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() if roidb != None: data_id = roidb[i]['label_file'].split('.')[0] else: # Test dataset data_id = imdb.image_index[i] proposals = candidiate_db.Get(data_id) proposals = cPickle.loads(proposals) proposals = proposals[:cfg.MAX_PROPOSAL_NO] if classification_db != None: classifications = classification_db.Get(data_id + '.JPEG') classifications = cPickle.loads(classifications) classifications = np.tile(classifications, (len(proposals), 1)) else: classifications = None if 'voc' in imdb.name: total_scores = np.zeros((len(nets), len(proposals), 21)) total_boxes = np.zeros((len(nets), len(proposals), 84)) elif 'imagenet' in imdb.name: total_scores = np.zeros((len(nets), len(proposals), 201)) total_boxes = np.zeros((len(nets), len(proposals), 804)) if len(proposals) > 0: net_no = 0 for net in nets: scores, boxes = im_detect(net, im, proposals, classifications) total_scores[net_no, :, :] = scores total_boxes[net_no, :, :] = boxes net_no += 1 scores = np.average(total_scores, axis=0) boxes = np.average(total_boxes, axis=0) _t['im_detect'].toc() _t['misc'].tic() for j in xrange(1, imdb.num_classes): if cfg.TEST.SVM == True: #inds = np.where((scores[:, j] > thresh[j]) & # (roidb[i]['gt_classes'] == 0))[0] inds = np.where((scores[:, j] > thresh[j]))[0] else: inds = np.where((scores[:, j] >= base_thresh))[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j*4:(j+1)*4] #cls_scores = scores[:, j] #cls_boxes = boxes[:, j*4:(j+1)*4] top_inds = np.argsort(-cls_scores)[:max_per_image] cls_scores = cls_scores[top_inds] cls_boxes = cls_boxes[top_inds, :] # push new scores onto the minheap for val in cls_scores: heapq.heappush(top_scores[j], val) # if we've collected more than the max number of detection, # then pop items off the minheap and update the class threshold if len(top_scores[j]) > max_per_set: while len(top_scores[j]) > max_per_set: heapq.heappop(top_scores[j]) thresh[j] = top_scores[j][0] all_boxes[j][i] = \ np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) if 0: keep = nms(all_boxes[j][i], 0.3) vis_detections(im, imdb.classes[j], all_boxes[j][i][keep, :]) _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time) else: all_boxes[j][i] = np.zeros((1, 5)) \ .astype(np.float32, copy=False) for j in xrange(1, imdb.num_classes): for i in xrange(num_images): inds = np.where(all_boxes[j][i][:, -1] > thresh[j])[0] all_boxes[j][i] = all_boxes[j][i][inds, :] det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'Applying NMS to all detections' nms_dets = apply_nms(all_boxes, cfg.TEST.NMS) all_boxes = None print 'Evaluating detections' imdb.evaluate_detections(nms_dets, output_dir)
# Also available for further evaluation (From MATLAB, not used yet): miss[iter] = cfg.miss #roc[iter] = cfg.roc #gt[iter] = cfg.gt #dt[iter] = cfg.dt # Just to check data structure: #print("Miss : " + str( miss.get(miss.keys()[0]) )) #print("ROCs : " + str( roc.get(roc.keys()[0]) )) #print("GTs : " + str( gt.get(gt.keys()[0]) )) #print("DTs : " + str( dt.get(dt.keys()[0]) )) keys = mAP.keys() keys.sort() templine = [] mAP_outFile = os.path.join(get_output_dir(imdb, forKaist=True), 'mAP_mPrec_mRec_laMiss.txt') print( "########################################################################" ) print( "########################################################################" ) print("KEY \tmAP \tmPrec \tmRec \tlaMiss:") for key in keys: value_mAP = mAP.get(key) value_mPrec = mPrec.get(key) value_mRec = mRec.get(key) value_miss = miss.get(key) print("%d\t%.4f\t%.4f\t%.4f\t%.4f" % (key, value_mAP, value_mPrec, value_mRec, value_miss))
def test_net(feature_net, embed_net, recurrent_net, imdb, vis=True, use_box_at = -1): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) if DEBUG: print 'number of images: %d' % num_images # all detections are collected into: # all_regions[image] = list of {'image_id', caption', 'location', 'location_seq'} all_regions = [None] * num_images results = {} output_dir = get_output_dir(imdb, feature_net) # timers _t = {'im_detect' : Timer(), 'misc' : Timer()} if not cfg.TEST.HAS_RPN: roidb = imdb.roidb #read vocabulary & add <eos> tag vocab = list(imdb.get_vocabulary()) vocab.insert(0, '<EOS>') for i in xrange(num_images): # filter out any ground truth boxes if cfg.TEST.HAS_RPN: box_proposals = None else: # The roidb may contain ground-truth rois (for example, if the roidb # comes from the training or val split). We only want to evaluate # detection on the *non*-ground-truth rois. We select those the rois # that have the gt_classes field set to 0, which means there's no # ground truth. box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0] im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() scores, boxes, captions = im_detect(feature_net, embed_net, recurrent_net, im, box_proposals, use_box_at=use_box_at) _t['im_detect'].toc() _t['misc'].tic() # only one positive class if DEBUG: print 'shape of scores' print scores.shape pos_dets = np.hstack((boxes, scores[:,np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(pos_dets, cfg.TEST.NMS) pos_dets = pos_dets[keep, :] pos_scores = scores[keep] pos_captions = [sentence(vocab, captions[idx]) for idx in keep] pos_boxes = boxes[keep,:] if vis: vis_detections(imdb.image_path_at(i), im, pos_captions, pos_dets, save_path = os.path.join(output_dir,'vis')) all_regions[i] = [] #follow the format of baseline models routine for cap, box, prob in zip(pos_captions, pos_boxes, pos_scores): anno = {'image_id':i, 'prob': format(prob,'.3f'), 'caption':cap, \ 'location': box.tolist()} all_regions[i].append(anno) key = imdb.image_path_at(i).split('/')[-1] results[key] = {} results[key]['boxes'] = pos_boxes.tolist() results[key]['logprobs'] = np.log(pos_scores + eps).tolist() results[key]['captions'] = pos_captions _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) # write file for evaluation with Torch code from Justin print 'write to result.json' det_file = os.path.join(output_dir, 'results.json') with open(det_file, 'w') as f: json.dump(results, f) print 'Evaluating detections' #gt_regions = imdb.get_gt_regions() # is a list gt_regions_merged = [None] * num_images #transform gt_regions into the baseline model routine for i, image_index in enumerate(imdb.image_index): new_gt_regions = [] regions = imdb.get_gt_regions_index(image_index) for reg in regions['regions']: loc = np.array([reg['x'], reg['y'], reg['x'] + reg['width'], reg['y'] + reg['height']]) anno = {'image_id':i, 'caption': reg['phrase'].encode('ascii','ignore'), 'location': loc} new_gt_regions.append(anno) #merge regions with large overlapped areas assert(len(new_gt_regions) > 0) gt_regions_merged[i] = region_merge(new_gt_regions) image_ids = range(num_images) vg_evaluator = VgEvalCap(gt_regions_merged, all_regions) vg_evaluator.params['image_id'] = image_ids vg_evaluator.evaluate()
def test_net(net, imdb, max_per_image=100, thresh=0.05, vis=False, output_dir=None): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] if output_dir is None: output_dir = get_output_dir(imdb, net) if not os.path.exists(output_dir): os.makedirs(output_dir) det_file = os.path.join(output_dir, 'detections.pkl') if not os.path.exists(det_file): # timers _t = {'im_detect': Timer(), 'misc': Timer()} if not cfg.TEST.HAS_RPN: roidb = imdb.roidb for i in xrange(num_images): # filter out any ground truth boxes if cfg.TEST.HAS_RPN: box_proposals = None else: # The roidb may contain ground-truth rois (for example, if the roidb # comes from the training or val split). We only want to evaluate # detection on the *non*-ground-truth rois. We select those the rois # that have the gt_classes field set to 0, which means there's no # ground truth. box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0] im = cv2.imread(imdb.image_path_at(i)) if 'depth' in cfg.INPUT: height, width = im.shape[:2] dp = np.load(imdb.depth_path_at(i)) dp[dp == -1] = 0 #dp = np.memmap(imdb.depth_path_at(i), dtype=np.float32, shape=(height, width)) #dp = np.asarray(dp) ims = [im, dp] _t['im_detect'].tic() scores, boxes = im_detect_depth(net, ims, box_proposals) _t['im_detect'].toc() else: _t['im_detect'].tic() scores, boxes = im_detect(net, im, box_proposals) _t['im_detect'].toc() _t['misc'].tic() # skip j = 0, because it's the background class for j in xrange(1, imdb.num_classes - 1): inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] if vis: vis_detections(im, imdb.classes[j], cls_dets) all_boxes[j][i] = cls_dets # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([ all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes - 1) ]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where( all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) # det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) else: print 'The "detection.pkl" exists. Load detection boxes from file.' with open(det_file, 'rb') as f: all_boxes = cPickle.load(f) print 'Evaluating detections' imdb.evaluate_detections(all_boxes, output_dir)
def test_net_with_gt_boxes(net, imdb, max_per_image=400, thresh=-np.inf, vis=False, load_cache=False): """Test a Fast R-CNN network on an image database, evaluating attribute and relation detections given ground truth boxes.""" num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb.num_attributes)] rel_boxes = [[[] for _ in range(num_images)] for _ in range(imdb.num_relations)] output_dir = get_output_dir(imdb, net, attributes=True) det_file = os.path.join(output_dir, 'attribute_detections.pkl') rel_file = os.path.join(output_dir, 'relation_detections.pkl') if load_cache and os.path.exists(det_file): print('Loading pickled detections from %s' % det_file) with open(det_file, 'rb') as f: all_boxes = pickle.load(f) with open(rel_file, 'rb') as f: rel_boxes = pickle.load(f) else: # timers _t = {'im_detect': Timer(), 'misc': Timer()} roidb = imdb.gt_roidb() for i in range(num_images): box_proposals = roidb[i]['boxes'] im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() scores, boxes, attr_scores, rel_scores = im_detect( net, im, box_proposals, force_boxes=True) _t['im_detect'].toc() _t['misc'].tic() # skip j = 0, because it's the no attribute class if attr_scores.shape[1] < imdb.num_attributes: attr_scores = np.hstack((np.zeros( (attr_scores.shape[0], 1)), attr_scores)) if rel_scores and rel_scores.shape[1] < imdb.num_relations: rel_scores = np.hstack((np.zeros( (rel_scores.shape[0], 1)), rel_scores)) for j in range(1, imdb.num_attributes): inds = np.where(attr_scores[:, j] > thresh)[0] cls_scores = attr_scores[inds, j] cls_boxes = box_proposals[inds, :] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) all_boxes[j][i] = cls_dets # Limit to max_per_image detections *over all attributes* if max_per_image > 0: image_scores = np.hstack([ all_boxes[j][i][:, 4] for j in range(1, imdb.num_attributes) ]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_attributes): keep = np.where( all_boxes[j][i][:, 4] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] if vis: im_boxes = [ all_boxes[j][i] for j in range(imdb.num_attributes) ] vis_multiple(im, imdb.attributes, im_boxes, filename='attr_%d.png' % i) if rel_scores: vis_relations(im, imdb.relations, box_proposals, rel_scores, filename='rel_%d.png' % i) _t['misc'].toc() print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time)) with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating attribute and / or relation detections') imdb.evaluate_attributes(all_boxes, output_dir)
def multi_scale_test_net_512(net, imdb, vis=False, redoInference=True): targe_size = 512 num_images = len(imdb.image_index) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) det_file = os.path.join(output_dir, 'detections.pkl') # Only redo inference when redo flag is set or detections file does not exist if redoInference or not os.path.isfile(det_file): print 'Detection files not existing OR redo parameter set --> Re-executing inference...' for i in xrange(num_images): im = cv2.imread(imdb.image_path_at(i)) # ori and flip det0 = im_detect(net, im, targe_size) det0_f = flip_im_detect(net, im, targe_size) det0 = np.row_stack((det0, det0_f)) det_r = im_detect_ratio(net, im, targe_size, int(0.75*targe_size)) det_r_f = flip_im_detect_ratio(net, im, targe_size, int(0.75*targe_size)) det_r = np.row_stack((det_r, det_r_f)) # shrink: only detect big object det_s1 = im_detect(net, im, int(0.5*targe_size)) det_s1_f = flip_im_detect(net, im, int(0.5*targe_size)) det_s1 = np.row_stack((det_s1, det_s1_f)) det_s2 = im_detect(net, im, int(0.75*targe_size)) det_s2_f = flip_im_detect(net, im, int(0.75*targe_size)) det_s2 = np.row_stack((det_s2, det_s2_f)) # #enlarge: only detect small object det3 = im_detect(net, im, int(1.75*targe_size)) det3_f = flip_im_detect(net, im, int(1.75*targe_size)) det3 = np.row_stack((det3, det3_f)) index = np.where(np.minimum(det3[:, 2] - det3[:, 0] + 1, det3[:, 3] - det3[:, 1] + 1) < 128)[0] det3 = det3[index, :] det4 = im_detect(net, im, int(1.5*targe_size)) det4_f = flip_im_detect(net, im, int(1.5*targe_size)) det4 = np.row_stack((det4, det4_f)) index = np.where(np.minimum(det4[:, 2] - det4[:, 0] + 1, det4[:, 3] - det4[:, 1] + 1) < 192)[0] det4 = det4[index, :] # More scales make coco get better performance if 'coco' in imdb.name: det5 = im_detect(net, im, int(1.25*targe_size)) det5_f = flip_im_detect(net, im, int(1.25*targe_size)) det5 = np.row_stack((det5, det5_f)) index = np.where(np.minimum(det5[:, 2] - det5[:, 0] + 1, det5[:, 3] - det5[:, 1] + 1) < 224)[0] det5 = det5[index, :] det6 = im_detect(net, im, int(2*targe_size)) det6_f = flip_im_detect(net, im, int(2*targe_size)) det6 = np.row_stack((det6, det6_f)) index = np.where(np.minimum(det6[:, 2] - det6[:, 0] + 1, det6[:, 3] - det6[:, 1] + 1) < 96)[0] det6 = det6[index, :] det7 = im_detect(net, im, int(2.25*targe_size)) det7_f = flip_im_detect(net, im, int(2.25*targe_size)) det7 = np.row_stack((det7, det7_f)) index = np.where(np.minimum(det7[:, 2] - det7[:, 0] + 1, det7[:, 3] - det7[:, 1] + 1) < 64)[0] det7 = det7[index, :] det = np.row_stack((det0, det_r, det_s1, det_s2, det3, det4, det5, det6, det7)) else: det = np.row_stack((det0, det_r, det_s1, det_s2, det3, det4)) for j in xrange(1, imdb.num_classes): inds = np.where(det[:, -1] == j)[0] if inds.shape[0] > 0: cls_dets = det[inds, :-1].astype(np.float32) if 'coco' in imdb.name: cls_dets = soft_bbox_vote(cls_dets) else: cls_dets = bbox_vote(cls_dets) all_boxes[j][i] = cls_dets if vis: vis_detections(im, imdb.classes[j], cls_dets) print 'im_detect: {:d}/{:d}'.format(i + 1, num_images) with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) # Else: Load dumped detections file and proceed evaluation else: print 'Detection files already existing --> Loading detections from file...' with open(det_file, 'rb') as f: all_boxes = cPickle.load(f) if imdb.name == 'voc_2012_test': print 'Saving detections' imdb.config['use_salt'] = False imdb._write_voc_results_file(all_boxes) else: print 'Evaluating detections' imdb.evaluate_detections(all_boxes, output_dir)
# parse gpus gpus = map(int, args.gpu.split(',')) assert len(gpus) >= mpi_size, "Number of GPUs must be >= MPI size" cfg.GPU_ID = gpus[mpi_rank] print('Using config:') pprint.pprint(cfg) # set up caffe caffe.mpi_init() caffe.set_mode_gpu() caffe.set_device(cfg.GPU_ID) if not args.randomize: # fix the random seeds (numpy and caffe) for reproducibility np.random.seed(cfg.RNG_SEED) caffe.set_random_seed(cfg.RNG_SEED) imdb, roidb = combined_roidb(args.imdb_name) print '{:d} roidb entries'.format(len(roidb)) output_dir = get_output_dir(imdb.name) print 'Output will be saved to `{:s}`'.format(output_dir) train_net(args.solver, roidb, output_dir, previous_state=args.previous_state, pretrained_model=args.pretrained_model, max_iters=args.max_iters) caffe.mpi_finalize()
def eval(): cfg_from_file('experiments/cfgs/rfcn_end2end.yml') #cfg_from_file('experiments/cfgs/rfcn_end2end_iccv_eb.yml') imdb, roidb = combined_roidb('voc_0712_test') import cv2 net =None prototxt = 'models/pascal_voc/ResNet-50/rfcn_end2end/test_iccv_rpn.prototxt' #model = 'data/rfcn_models/resnet50_rfcn_iter_1200.caffemodel' model = 'output/rfcn_end2end/voc_0712_train/resnet50_rfcn_iter_1600.caffemodel' #model = 'data/rfcn_models_iccv/eb_resnet50_rfcn_iter_600.caffemodel' caffe.set_mode_gpu() caffe.set_device(0) net = caffe.Net(prototxt, model, caffe.TEST) #prototxt = 'models/pascal_voc/ResNet-50/rfcn_end2end/test_iccv_rpn.prototxt' ##model = 'data/rfcn_models_iccv/eb_resnet50_rfcn_iter_800.caffemodel' #model = 'output/rfcn_end2end/voc_0712_train/resnet50_rfcn_iter_1600.caffemodel' #model = 'data/rfcn_models_iccv/eb_resnet50_rfcn_iter_800.caffemodel' #net2 = caffe.Net(prototxt, model, caffe.TEST) #net.params['conv_new_1_zl'][0].data[...] = net2.params['conv_new_1_zl'][0].data[...] #net.params['conv_new_1_zl'][1].data[...] = net2.params['conv_new_1_zl'][1].data[...] #net2 = None net.name = 'resnet50_rfcn_iter_1200' num_images = len(imdb.image_index) num_images = 100 del imdb.image_index[num_images:] #num_images = 10#len(imdb.image_index) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) # timers _t = {'im_detect' : Timer(), 'misc' : Timer()} if not cfg.TEST.HAS_RPN: roidb = imdb.roidb max_per_image = 300 thresh = 0.0 for i in xrange(num_images): # filter out any ground truth boxes if cfg.TEST.HAS_RPN: box_proposals = None else: # The roidb may contain ground-truth rois (for example, if the roidb # comes from the training or val split). We only want to evaluate # detection on the *non*-ground-truth rois. We select those the rois # that have the gt_classes field set to 0, which means there's no # ground truth. box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0] im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() scores, boxes = im_detect(net, im, box_proposals) attention = net.blobs['attention'].data.squeeze() #net.blobs['attention'].data #scores = np.multiply(scores,attention) _t['im_detect'].toc() _t['misc'].tic() # skip j = 0, because it's the background class for j in xrange(1, imdb.num_classes): inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] if cfg.TEST.AGNOSTIC: cls_boxes = boxes[inds, 1:] else: cls_boxes = boxes[inds, j*4:(j+1)*4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, cfg.TEST.NMS,force_cpu=True) cls_dets = cls_dets[keep, :] all_boxes[j][i] = cls_dets cls_str = imdb.classes[j] for roi in all_boxes[j][i]: cv2.putText(im,cls_str,(roi[0],roi[1]),cv2.FONT_HERSHEY_COMPLEX,1.0,(255,0,0),1) cv2.rectangle(im,(roi[0],roi[1]),(roi[2],roi[3]),(0,0,255),1) # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] #cv2.imshow('vis',im) #cv2.waitKey(0) _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'Evaluating detections' imdb.evaluate_detections(all_boxes, output_dir)
def test_net(net, imdb): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # heuristic: keep an average of 40 detections per class per images prior # to NMS max_per_set = cfg.TEST.MAX_PER_SET_F * num_images # heuristic: keep at most 100 detection per class per image prior to NMS max_per_image = cfg.TEST.MAX_PER_IMAGE # detection thresold for each class (this is adaptively set based on the # max_per_set constraint) thresh = -np.inf * np.ones(imdb.num_classes) # top_scores will hold one minheap of scores per class (used to enforce # the max_per_set constraint) top_scores = [[] for _ in xrange(imdb.num_classes)] # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) if not os.path.exists(output_dir): os.makedirs(output_dir) # timers _t = {'im_detect' : Timer(), 'misc' : Timer()} roidb = imdb.roidb for i in xrange(num_images): image_paths = imdb.image_path_at(i); im = []; for image_path in image_paths: image_path2 = image_path + '_hha.png' im1 = cv2.imread(image_path) im2 = cv2.imread(image_path2) ims = np.zeros((im1.shape[0], im1.shape[1], 6)) # TODO: to test on lua pre-trained model use: im1 = im1[:, :, ::-1] im2 = im2[:, :, ::-1] ims[:,:,0:3] = im1 ims[:,:,3:6] = im2 im.append(ims) _t['im_detect'].tic() scores, boxes = im_detect(net, im, roidb[i]['boxes']) _t['im_detect'].toc() _t['misc'].tic() for j in xrange(1, imdb.num_classes): inds = np.where((scores[:, j] > thresh[j]) & (roidb[i]['gt_classes'] == 0))[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j*4:(j+1)*4] top_inds = np.argsort(-cls_scores)[:max_per_image] cls_scores = cls_scores[top_inds] cls_boxes = cls_boxes[top_inds, :] # push new scores onto the minheap for val in cls_scores: heapq.heappush(top_scores[j], val) # if we've collected more than the max number of detection, # then pop items off the minheap and update the class threshold if len(top_scores[j]) > max_per_set: while len(top_scores[j]) > max_per_set: heapq.heappop(top_scores[j]) thresh[j] = top_scores[j][0] all_boxes[j][i] = \ np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) if 0: keep = nms(all_boxes[j][i], 0.3) vis_detections(im, imdb.classes[j], all_boxes[j][i][keep, :]) _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) for j in xrange(1, imdb.num_classes): for i in xrange(num_images): inds = np.where(all_boxes[j][i][:, -1] > thresh[j])[0] all_boxes[j][i] = all_boxes[j][i][inds, :] det_file = os.path.join(output_dir, 'detections' + cfg.TEST.DET_SALT + '.pkl') g_utils.save_variables(det_file, [all_boxes], ['all_boxes'], overwrite = True) det_file = os.path.join(output_dir, 'detections' + cfg.TEST.DET_SALT + '.pkl') g_utils.scio.savemat(det_file, {'all_boxes': all_boxes}, do_compression = True) print 'Applying NMS to all detections' nms_dets = apply_nms(all_boxes, cfg.TEST.NMS) print 'Evaluating detections' ap, prec, rec, classes, class_to_ind = imdb.evaluate_detections(nms_dets, output_dir, cfg.TEST.DET_SALT, cfg.TEST.EVAL_SALT)
def test_net(sess, net, imdb, weights_filename, max_per_image=300, thresh=0.05, vis=False, force=False): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] output_dir = get_output_dir(imdb, weights_filename) det_file = os.path.join(output_dir, 'detections.pkl') if (force and os.path.exists(det_file)): os.remove(det_file) if (not os.path.exists(det_file)): # timers _t = {'im_detect': Timer(), 'misc': Timer()} if not cfg.TEST.HAS_RPN: roidb = imdb.roidb for i in range(num_images): # filter out any ground truth boxes if cfg.TEST.HAS_RPN: box_proposals = None else: # The roidb may contain ground-truth rois (for example, if the roidb # comes from the training or val split). We only want to evaluate # detection on the *non*-ground-truth rois. We select those the rois # that have the gt_classes field set to 0, which means there's no # ground truth. box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0] im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() scores, boxes = im_detect(sess, net, im, box_proposals) _t['im_detect'].toc() _t['misc'].tic() if vis: image = im[:, :, (2, 1, 0)] plt.cla() plt.imshow(image) # skip j = 0, because it's the background class ttt = 0 bbox_img = [] bscore_img = [] bbc = 0 #bbox count index_map = dict() for j in range(1, imdb.num_classes): inds = np.where(scores[:, j] > thresh)[0] ttt += len(inds) cls_scores = scores[inds, j] cls_boxes = boxes[inds, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] if vis: vis_detections(image, imdb.classes[j], cls_dets) all_boxes[j][i] = cls_dets #cls_dets.shape == [nb_detections_for_cls_j, 5] # we need to get all bboxes in a image regardless of classes # if (cls_dets.shape[0] > 0): # bbox_img.append(cls_dets[:, 0:-1]) # bscore_img.append(np.reshape(cls_dets[:, -1], [-1, 1])) # # remember the mapping # for bc in range(cls_dets.shape[0]): # index_map[bbc] = (j, bc) # bbc += 1 removed = 0 # if (len(bbox_img) > 0): # boxes = np.vstack(bbox_img) # scores = np.vstack(bscore_img) # keep_indices = remove_embedded(boxes, scores, remove_option=1) # removed = bbc - len(keep_indices) # # need to find out which j, and which k correspond to which index # cls_keep = defaultdict(list) # for ki in keep_indices: # j, bc = index_map[ki] # cls_keep[j].append(bc) # # for j in xrange(1, imdb.num_classes): # if (j in cls_keep): # all_boxes[j][i] = all_boxes[j][i][cls_keep[j], :] if vis: plt.show() # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([ all_boxes[j][i][:, -1] for j in range(1, imdb.num_classes) ]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): keep = np.where( all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] _t['misc'].toc() print('im_detect: {:d}/{:d} {:d} detection {:d} removed {:.3f}s' \ .format(i + 1, num_images, ttt, removed, _t['im_detect'].average_time)) with open(det_file, 'wb') as f: six.moves.cPickle.dump(all_boxes, f, six.moves.cPickle.HIGHEST_PROTOCOL) else: with open(det_file, 'r') as fin: all_boxes = six.moves.cPickle.load(fin) print('Evaluating detections') imdb.evaluate_detections(all_boxes, output_dir)
def multi_scale_test_net_320(net, imdb, vis=False): targe_size = 320 num_images = len(imdb.image_index) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) for i in xrange(num_images): im = cv2.imread(imdb.image_path_at(i)) # ori and flip det0 = im_detect(net, im, targe_size) det0_f = flip_im_detect(net, im, targe_size) det0 = np.row_stack((det0, det0_f)) det_r = im_detect_ratio(net, im, targe_size, int(0.6*targe_size)) det_r_f = flip_im_detect_ratio(net, im, targe_size, int(0.6*targe_size)) det_r = np.row_stack((det_r, det_r_f)) # shrink: only detect big object det1 = im_detect(net, im, int(0.6*targe_size)) det1_f = flip_im_detect(net, im, int(0.6*targe_size)) det1 = np.row_stack((det1, det1_f)) index = np.where(np.maximum(det1[:, 2] - det1[:, 0] + 1, det1[:, 3] - det1[:, 1] + 1) > 32)[0] det1 = det1[index, :] #enlarge: only detect small object det2 = im_detect(net, im, int(1.2*targe_size)) det2_f = flip_im_detect(net, im, int(1.2*targe_size)) det2 = np.row_stack((det2, det2_f)) index = np.where(np.minimum(det2[:, 2] - det2[:, 0] + 1, det2[:, 3] - det2[:, 1] + 1) < 160)[0] det2 = det2[index, :] det3 = im_detect(net, im, int(1.4*targe_size)) det3_f = flip_im_detect(net, im, int(1.4*targe_size)) det3 = np.row_stack((det3, det3_f)) index = np.where(np.minimum(det3[:, 2] - det3[:, 0] + 1, det3[:, 3] - det3[:, 1] + 1) < 128)[0] det3 = det3[index, :] det4 = im_detect(net, im, int(1.6*targe_size)) det4_f = flip_im_detect(net, im, int(1.6*targe_size)) det4 = np.row_stack((det4, det4_f)) index = np.where(np.minimum(det4[:, 2] - det4[:, 0] + 1, det4[:, 3] - det4[:, 1] + 1) < 96)[0] det4 = det4[index, :] det5 = im_detect(net, im, int(1.8*targe_size)) det5_f = flip_im_detect(net, im, int(1.8*targe_size)) det5 = np.row_stack((det5, det5_f)) index = np.where(np.minimum(det5[:, 2] - det5[:, 0] + 1, det5[:, 3] - det5[:, 1] + 1) < 64)[0] det5 = det5[index, :] det7 = im_detect(net, im, int(2.2*targe_size)) det7_f = flip_im_detect(net, im, int(2.2*targe_size)) det7 = np.row_stack((det7, det7_f)) index = np.where(np.minimum(det7[:, 2] - det7[:, 0] + 1, det7[:, 3] - det7[:, 1] + 1) < 32)[0] det7 = det7[index, :] # More scales make coco get better performance if 'coco' in imdb.name: det6 = im_detect(net, im, int(2.0*targe_size)) det6_f = flip_im_detect(net, im, int(2.0*targe_size)) det6 = np.row_stack((det6, det6_f)) index = np.where(np.minimum(det6[:, 2] - det6[:, 0] + 1, det6[:, 3] - det6[:, 1] + 1) < 48)[0] det6 = det6[index, :] det = np.row_stack((det0, det_r, det1, det2, det3, det4, det5, det7, det6)) else: det = np.row_stack((det0, det_r, det1, det2, det3, det4, det5, det7)) for j in xrange(1, imdb.num_classes): inds = np.where(det[:, -1] == j)[0] if inds.shape[0] > 0: cls_dets = det[inds, :-1].astype(np.float32) if 'coco' in imdb.name: cls_dets = soft_bbox_vote(cls_dets) else: cls_dets = bbox_vote(cls_dets) all_boxes[j][i] = cls_dets if vis: vis_detections(im, imdb.classes[j], cls_dets) print 'im_detect: {:d}/{:d}'.format(i + 1, num_images) det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) if imdb.name == 'voc_2012_test': print 'Saving detections' imdb.config['use_salt'] = False imdb._write_voc_results_file(all_boxes) else: print 'Evaluating detections' imdb.evaluate_detections(all_boxes, output_dir)
np.random.seed(cfg.RNG_SEED) caffe.set_random_seed(cfg.RNG_SEED) # set up caffe caffe.set_mode_gpu() caffe.set_device(cfg.GPU_ID) if __name__ == "__main__": # config cfg.TRAIN.HAS_RPN = True cfg.TRAIN.PROPOSAL_METHOD = 'gt' cfg.TRAIN.BBOX_REG = False cfg.TRAIN.IMS_PER_BATCH = 1 # get_imdb() imdb = FtBody('train') # imdb = pascal_voc('train', '2007') imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD) roidb = get_training_roidb(imdb) output_dir = get_output_dir(imdb) print output_dir solver = 'models/ft_body/ZF/faster_rcnn_alt_opt/stage1_rpn_solver60k80k.pt' max_iters = 80000 rpn_test_prototxt = 'models/ft_body/ZF/faster_rcnn_alt_opt/rpn_test.pt' init_model = 'data/imagenet_models/ZF.v2.caffemodel' _init_caffe(cfg) model_paths = train_net(solver, roidb, output_dir, pretrained_model=init_model, max_iters=max_iters) print model_paths
def multi_scale_test_net_512(net, imdb, vis=False): targe_size = 512 num_images = len(imdb.image_index) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) for i in xrange(num_images): im = cv2.imread(imdb.image_path_at(i)) # ori and flip det0 = im_detect(net, im, targe_size) det0_f = flip_im_detect(net, im, targe_size) det0 = np.row_stack((det0, det0_f)) det_r = im_detect_ratio(net, im, targe_size, int(0.75*targe_size)) det_r_f = flip_im_detect_ratio(net, im, targe_size, int(0.75*targe_size)) det_r = np.row_stack((det_r, det_r_f)) # shrink: only detect big object det_s1 = im_detect(net, im, int(0.5*targe_size)) det_s1_f = flip_im_detect(net, im, int(0.5*targe_size)) det_s1 = np.row_stack((det_s1, det_s1_f)) det_s2 = im_detect(net, im, int(0.75*targe_size)) det_s2_f = flip_im_detect(net, im, int(0.75*targe_size)) det_s2 = np.row_stack((det_s2, det_s2_f)) # #enlarge: only detect small object det3 = im_detect(net, im, int(1.75*targe_size)) det3_f = flip_im_detect(net, im, int(1.75*targe_size)) det3 = np.row_stack((det3, det3_f)) index = np.where(np.minimum(det3[:, 2] - det3[:, 0] + 1, det3[:, 3] - det3[:, 1] + 1) < 128)[0] det3 = det3[index, :] det4 = im_detect(net, im, int(1.5*targe_size)) det4_f = flip_im_detect(net, im, int(1.5*targe_size)) det4 = np.row_stack((det4, det4_f)) index = np.where(np.minimum(det4[:, 2] - det4[:, 0] + 1, det4[:, 3] - det4[:, 1] + 1) < 192)[0] det4 = det4[index, :] # More scales make coco get better performance if 'coco' in imdb.name: det5 = im_detect(net, im, int(1.25*targe_size)) det5_f = flip_im_detect(net, im, int(1.25*targe_size)) det5 = np.row_stack((det5, det5_f)) index = np.where(np.minimum(det5[:, 2] - det5[:, 0] + 1, det5[:, 3] - det5[:, 1] + 1) < 224)[0] det5 = det5[index, :] det6 = im_detect(net, im, int(2*targe_size)) det6_f = flip_im_detect(net, im, int(2*targe_size)) det6 = np.row_stack((det6, det6_f)) index = np.where(np.minimum(det6[:, 2] - det6[:, 0] + 1, det6[:, 3] - det6[:, 1] + 1) < 96)[0] det6 = det6[index, :] det7 = im_detect(net, im, int(2.25*targe_size)) det7_f = flip_im_detect(net, im, int(2.25*targe_size)) det7 = np.row_stack((det7, det7_f)) index = np.where(np.minimum(det7[:, 2] - det7[:, 0] + 1, det7[:, 3] - det7[:, 1] + 1) < 64)[0] det7 = det7[index, :] det = np.row_stack((det0, det_r, det_s1, det_s2, det3, det4, det5, det6, det7)) else: det = np.row_stack((det0, det_r, det_s1, det_s2, det3, det4)) for j in xrange(1, imdb.num_classes): inds = np.where(det[:, -1] == j)[0] if inds.shape[0] > 0: cls_dets = det[inds, :-1].astype(np.float32) if 'coco' in imdb.name: cls_dets = soft_bbox_vote(cls_dets) else: cls_dets = bbox_vote(cls_dets) all_boxes[j][i] = cls_dets if vis: vis_detections(im, imdb.classes[j], cls_dets) print 'im_detect: {:d}/{:d}'.format(i + 1, num_images) det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) if imdb.name == 'voc_2012_test': print 'Saving detections' imdb.config['use_salt'] = False imdb._write_voc_results_file(all_boxes) else: print 'Evaluating detections' imdb.evaluate_detections(all_boxes, output_dir)
def test_net(net, imdb): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # heuristic: keep an average of 40 detections per class per images prior # to NMS max_per_set = cfg.TEST.MAX_PER_SET_F * num_images # heuristic: keep at most 100 detection per class per image prior to NMS max_per_image = cfg.TEST.MAX_PER_IMAGE # detection thresold for each class (this is adaptively set based on the # max_per_set constraint) thresh = -np.inf * np.ones(imdb.num_classes) # top_scores will hold one minheap of scores per class (used to enforce # the max_per_set constraint) top_scores = [[] for _ in xrange(imdb.num_classes)] # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) if not os.path.exists(output_dir): os.makedirs(output_dir) # timers _t = {'im_detect': Timer(), 'misc': Timer()} roidb = imdb.roidb for i in xrange(num_images): image_paths = imdb.image_path_at(i) im = [] for image_path in image_paths: im.append(cv2.imread(image_path)) _t['im_detect'].tic() scores, boxes = im_detect(net, im, roidb[i]['boxes']) _t['im_detect'].toc() _t['misc'].tic() for j in xrange(1, imdb.num_classes): inds = np.where((scores[:, j] > thresh[j]) & (roidb[i]['gt_classes'] == 0))[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j * 4:(j + 1) * 4] top_inds = np.argsort(-cls_scores)[:max_per_image] cls_scores = cls_scores[top_inds] cls_boxes = cls_boxes[top_inds, :] # push new scores onto the minheap for val in cls_scores: heapq.heappush(top_scores[j], val) # if we've collected more than the max number of detection, # then pop items off the minheap and update the class threshold if len(top_scores[j]) > max_per_set: while len(top_scores[j]) > max_per_set: heapq.heappop(top_scores[j]) thresh[j] = top_scores[j][0] all_boxes[j][i] = \ np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) if 0: keep = nms(all_boxes[j][i], 0.3) vis_detections(im, imdb.classes[j], all_boxes[j][i][keep, :]) _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) for j in xrange(1, imdb.num_classes): for i in xrange(num_images): inds = np.where(all_boxes[j][i][:, -1] > thresh[j])[0] all_boxes[j][i] = all_boxes[j][i][inds, :] det_file = os.path.join(output_dir, 'detections' + cfg.TEST.DET_SALT + '.pkl') g_utils.save_variables(det_file, [all_boxes], ['all_boxes'], overwrite=True) det_file = os.path.join(output_dir, 'detections' + cfg.TEST.DET_SALT + '.pkl') g_utils.scio.savemat(det_file, {'all_boxes': all_boxes}, do_compression=True) print 'Applying NMS to all detections' nms_dets = apply_nms(all_boxes, cfg.TEST.NMS) print 'Evaluating detections' ap, prec, rec, classes, class_to_ind = imdb.evaluate_detections( nms_dets, output_dir, cfg.TEST.DET_SALT, cfg.TEST.EVAL_SALT)
def test_net(sess, net, imdb, weights_filename, max_per_image=300, thresh=0.05, vis=False): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) # all_boxes_cnr[cls][image] = N x 25 array of detections in # (x0-x7, y0-y7, z0-z7, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] all_boxes_img = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] all_boxes_cnr = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] all_calib = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] all_score = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, weights_filename) # timers _t = {'im_detect': Timer(), 'misc': Timer()} # conv1_1 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv1_1") # conv1_2 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv1_2") # conv2_1 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv2_1") # conv2_2 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv2_2") # conv3_1 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv3_1") # conv3_2 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv3_2") # conv3_3 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv3_3") # conv4_1 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv4_1") # conv4_2 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv4_2") # conv4_3 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv4_3") # conv5_1 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv5_1") # conv5_2 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv5_2") # conv5_3 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv5_3") # rpn_w = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="rpn_conv/3x3")[0] # rpn_b = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="rpn_conv/3x3")[1] # rpn_w2 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="rpn_cls_score")[0] # rpn_b2 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="rpn_cls_score")[1] # rpn_w3 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="rpn_bbox_pred")[0] # rpn_b3 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="rpn_bbox_pred")[1] # weights = { # 'conv1_1' : {"weights" : conv1_1[0].eval(session=sess), "biases": conv1_1[1].eval(session=sess)}, # 'conv1_2' : {"weights" : conv1_2[0].eval(session=sess), "biases": conv1_2[1].eval(session=sess)}, # 'conv2_1' : {"weights" : conv2_1[0].eval(session=sess), "biases": conv2_1[1].eval(session=sess)}, # 'conv2_2' : {"weights" : conv2_2[0].eval(session=sess), "biases": conv2_2[1].eval(session=sess)}, # 'conv3_1' : {"weights" : conv3_1[0].eval(session=sess), "biases": conv3_1[1].eval(session=sess)}, # 'conv3_2' : {"weights" : conv3_2[0].eval(session=sess), "biases": conv3_2[1].eval(session=sess)}, # 'conv3_3' : {"weights" : conv3_3[0].eval(session=sess), "biases": conv3_3[1].eval(session=sess)}, # 'conv4_1' : {"weights" : conv4_1[0].eval(session=sess), "biases": conv4_1[1].eval(session=sess)}, # 'conv4_2' : {"weights" : conv4_2[0].eval(session=sess), "biases": conv4_2[1].eval(session=sess)}, # 'conv4_3' : {"weights" : conv4_3[0].eval(session=sess), "biases": conv4_3[1].eval(session=sess)}, # 'conv5_1' : {"weights" : conv5_1[0].eval(session=sess), "biases": conv5_1[1].eval(session=sess)}, # 'conv5_2' : {"weights" : conv5_2[0].eval(session=sess), "biases": conv5_2[1].eval(session=sess)}, # 'conv5_3' : {"weights" : conv5_3[0].eval(session=sess), "biases": conv5_3[1].eval(session=sess)}, # 'rpn_conv/3x3' : {"weights" : rpn_w.eval(session=sess), "biases": rpn_b.eval(session=sess)}, # 'rpn_cls_score' : {"weights" : rpn_w2.eval(session=sess), "biases": rpn_b2.eval(session=sess)}, # 'rpn_bbox_pred' : {"weights" : rpn_w3.eval(session=sess), "biases": rpn_b3.eval(session=sess)}, # } # # print rpn_w.eval(session=sess) # np.save('rpn_data.npy', weights) # deconv2 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="deconv_4x_1")[0] # shape_conv5_3 = conv5_3.get_shape().as_list() # shape1 = deconv1.get_shape().as_list() # shape2 = deconv2.get_shape().as_list() # print 'conv5_3 shape', shape_conv5_3 # print 'deconv_2x_1 shape', shape1 # print 'deconv_4x_1 shape', shape2 for i in xrange(num_images): # filter out any ground truth boxes if cfg.TEST.HAS_RPN: box_proposals = None im = cv2.imread(imdb.image_path_at(i)) bv = np.load(imdb.lidar_path_at(i)) lidar3D = imdb.lidar3D_path_at(i) GT_boxes3D_corners = imdb.GT_annotation_at(i)["boxes_corners"] GT_boxes3D_camera_corners = imdb.GT_annotation_at( i)["boxes3D_cam_corners"] print "GT_boxes3D_corners", GT_boxes3D_corners # print "GT_boxes3D_camera_corners:",GT_boxes3D_camera_corners calib = imdb.calib_at(i) print "Inference: ", imdb.lidar_path_at(i) _t['im_detect'].tic() scores, boxes_bv, boxes_cnr, boxes_cnr_r = box_detect( sess, net, im, bv, calib, box_proposals) _t['im_detect'].toc() _t['misc'].tic() if vis: image = im[:, :, (2, 1, 0)] plt.cla() plt.imshow(image) thresh = 0.05 #thresh = 0.8 # skip j = 0, because it's the background class #for j in xrange(1, imdb.num_classes): for j in xrange(1, 2): inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] #cls_boxes = boxes_bv[inds, j*4:(j+1)*4] #cls_boxes_cnr = boxes_cnr[inds, j * 24:(j + 1) * 24] cls_boxes = boxes_bv[inds, 0:4] cls_boxes_cnr = boxes_cnr[inds, 0:24] cls_boxes_cnr_r = boxes_cnr_r[inds, j * 24:(j + 1) * 24] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) cls_dets_cnr = np.hstack((cls_boxes_cnr, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) cls_dets_cnr_r = np.hstack((cls_boxes_cnr_r, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) # print "scores: ", scores.shape # print "cls_scores: ",cls_scores.shape # print "boxes_bv: ", boxes_bv.shape # print "cls_dets: ", cls_dets.shape # print "inds: ",inds.shape # print "boxes_cnr: ", boxes_cnr.shape # print "cls_dets_cnr: ",cls_dets_cnr.shape keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] cls_dets_cnr = cls_dets_cnr[keep, :] cls_dets_cnr_r = cls_dets_cnr_r[keep, :] cls_scores = cls_scores[keep] #img_boxes = cls_dets_cnr_r[:,4] # project to image if np.any(cls_dets_cnr): plt.rcParams['figure.figsize'] = (10, 10) img_boxes = lidar_cnr_to_img(cls_dets_cnr_r[:, :24], calib[3], calib[2], calib[0]) img = show_image_boxes(im, img_boxes) # plt.imshow(img) # plt.show() all_boxes[j][i] = img_boxes image_bv = show_image_boxes( scale_to_255(bv[:, :, 8], min=0, max=2), cls_dets[:, :4]) image_cnr = show_lidar_corners(im, cls_dets_cnr_r[:, :24], calib) if 1: import mayavi.mlab as mlab #filename = os.path.join(imdb.lidar_path_at(i)[:-19], 'velodyne', str(3).zfill(6)+'.bin') filename = lidar3D print filename scan = np.fromfile(filename, dtype=np.float32) scan = scan.reshape((-1, 4)) corners = cls_dets_cnr[:, :24].reshape( (-1, 3, 8)).transpose((0, 2, 1)) corners_r = cls_dets_cnr_r[:, :24].reshape( (-1, 3, 8)).transpose((0, 2, 1)) GT_corners = GT_boxes3D_corners[:, :24].reshape( (-1, 3, 8)).transpose((0, 2, 1)) # print corners_r # print GT_corners #print GT_corners #camera_cors_r = lidar_cnr_to_camera(corners_r,calib[3]) fig = mlab.figure(figure=None, bgcolor=(0, 0, 0), fgcolor=None, engine=None, size=(1000, 500)) draw_lidar(scan, fig=fig) draw_gt_boxes3d(corners, fig=fig) draw_gt_boxes3d(corners_r, color=(1, 0, 0), fig=fig) draw_gt_boxes3d(GT_corners, color=(0, 1, 0), fig=fig) mlab.show() # plt.subplot(211) # plt.title('bv proposal') # plt.imshow(image_bv, cmap='jet') # plt.subplot(212) # plt.imshow(image_cnr) # plt.show() all_boxes_cnr[j][i] = cls_dets_cnr_r[:, :24] all_calib[j][i] = calib[3] all_score[j][i] = cls_scores # if vis: # plt.show() # # Limit to max_per_image detections *over all classes* # if max_per_image > 0: # image_scores = np.hstack([all_boxes[j][i][:, -1] # for j in xrange(1, imdb.num_classes)]) # if len(image_scores) > max_per_image: # image_thresh = np.sort(image_scores)[-max_per_image] # for j in xrange(1, imdb.num_classes): # keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] # all_boxes[j][i] = all_boxes[j][i][keep, :] # # all_boxes_img[j][i] = all_boxes_img[j][i][keep, :] # all_boxes_cnr[j][i] = all_boxes_cnr[j][i][keep, :] _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) det_cnr_file = os.path.join(output_dir, 'detections_cnr.pkl') with open(det_cnr_file, 'wb') as f: cPickle.dump(all_boxes_cnr, f, cPickle.HIGHEST_PROTOCOL) #print 'Evaluating detections' #imdb.evaluate_detections(all_boxes, all_boxes_cnr, output_dir) imdb.evaluate_detections3D(all_boxes, all_boxes_cnr, all_calib, all_score, output_dir)
while not osp.exists(args.caffemodel) and args.wait: print('Waiting for {} to exist...'.format(args.caffemodel)) time.sleep(10) caffe.set_mode_gpu() caffe.set_device(args.gpu_id) # Detect and store re-id features for all the images in the test images pool net = caffe.Net(args.gallery_def, args.caffemodel, caffe.TEST) net.name = osp.splitext(osp.basename(args.caffemodel))[0] imdb = get_imdb(args.imdb_name) imdb.competition_mode(args.comp_mode) if not cfg.TEST.HAS_RPN: imdb.set_proposal_method(cfg.TEST.PROPOSAL_METHOD) test_net_on_gallery_set(net, imdb, args.feat_blob, max_per_image=args.max_per_image, vis=args.vis) root_dir = imdb._root_dir images_dir = imdb._data_path output_dir = get_output_dir(imdb, net) # Extract features for probe people net = caffe.Net(args.probe_def, args.caffemodel, caffe.TEST) net.name = os.path.splitext(osp.basename(args.caffemodel))[0] protoc, probe_images, probe_rois = load_probe( root_dir, images_dir, args.gallery_size) test_net_on_probe_set(net, probe_images, probe_rois, args.feat_blob, output_dir) # Evaluate evaluate(protoc, imdb.image_index, output_dir)
def eval(): cfg_from_file('experiments/cfgs/rfcn_end2end_iccv_eb.yml') #cfg_from_file('experiments/cfgs/rfcn_end2end_iccv_eb.yml') imdb, roidb = combined_roidb('sg_vrd_2016_test') import cv2 #h5f = h5py.File('/media/zawlin/ssd/iccv2017/data/voc/gen_eb.h5',driver='core') h5path = 'data/sg_vrd_2016/EB/eb.h5' h5f = h5py.File(h5path,driver='core') h5_rois = {} for i in h5f['test/']: data=h5f['test/%s'%i][...].astype(np.float32) idx = np.argsort(data[:,-1],axis=0) data_sorted = data[idx][::-1] data_sorted_idx = np.where((data_sorted[:,2]-data_sorted[:,0]>20) & (data_sorted[:,3]-data_sorted[:,1]>20)) data_sorted = data_sorted[data_sorted_idx] #print data_sorted h5_rois[i] = data_sorted[:1000,:4] #cfg.TEST.HAS_RPN=False net =None #prototxt = 'models/pascal_voc/ResNet-50/rfcn_end2end/test_iccv_eb_sigmoid.prototxt' prototxt = 'models/sg_vrd/wsd/test_eb_wsddn_s.prototxt' #model = 'data/rfcn_models/resnet50_rfcn_iter_1200.caffemodel' #model = 'output/rfcn_end2end/voc_0712_train/resnet50_rfcn_iter_16000.caffemodel' #model = 'output/rfcn_end2end/voc_0712_train/eb_wsddn_s_iter_5000.caffemodel' model = 'output/rfcn_end2end/sg_vrd_2016_train/eb_wsddn_s_iter_9400.caffemodel' #model = 'data/rfcn_models/resnet50_rfcn_final.caffemodel' #model = 'output/rfcn_end2end/voc_0712_train/resnet50_rfcn_eb_sigx_iter_100000.caffemodel' #model = 'data/rfcn_models_iccv/eb_resnet50_rfcn_iter_600.caffemodel' caffe.set_mode_gpu() caffe.set_device(0) net = caffe.Net(prototxt, model, caffe.TEST) #prototxt = 'models/pascal_voc/ResNet-50/rfcn_end2end/test_iccv_rpn.prototxt' #model = 'data/rfcn_models_iccv/eb_resnet50_rfcn_iter_800.caffemodel' #model = 'output/rfcn_end2end/voc_0712_train/resnet50_rfcn_iter_1600.caffemodel' #model = 'data/rfcn_models_iccv/eb_resnet50_rfcn_iter_800.caffemodel' #net2 = caffe.Net(prototxt, model, caffe.TEST) #net.params['conv_new_1_zl'][0].data[...] = net2.params['conv_new_1_zl'][0].data[...] #net.params['conv_new_1_zl'][1].data[...] = net2.params['conv_new_1_zl'][1].data[...] #net2 = None net.name = 'resnet50_rfcn_iter_1200' num_images = len(imdb.image_index) num_images = 100 del imdb.image_index[num_images:] all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) zl.tic() # timers _t = {'im_detect' : Timer(), 'misc' : Timer()} max_per_image =200 thresh = 0.00001 cv2.namedWindow('im',0) cnt = 0 for i in xrange(num_images): # filter out any ground truth boxes im_path = imdb.image_path_at(i) im_name = im_path.split('/')[-1] eb_roi = h5_rois[im_name] im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() #scores, boxes = im_detect(net, im, box_proposals) scores, boxes = im_detect_iccv(net, im, eb_roi) #attention = net.blobs['attention'].data.squeeze() #net.blobs['attention'].data #scores = np.multiply(scores,attention) _t['im_detect'].toc() _t['misc'].tic() # skip j = 0, because it's the background class for j in xrange(1, imdb.num_classes): if j == 15: dfdfd=1 dfdfd += 1 inds = np.where(scores[:, j-1] > thresh)[0] cls_scores = scores[inds, j-1] if cfg.TEST.AGNOSTIC: cls_boxes = boxes[inds, 1:] else: cls_boxes = boxes[inds, j*4:(j+1)*4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, cfg.TEST.NMS,force_cpu=True) cls_dets = cls_dets[keep, :] all_boxes[j][i] = cls_dets # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] for j in xrange(1, imdb.num_classes): cls_str = imdb.classes[j] for roi in all_boxes[j][i]: cv2.putText(im,cls_str,(roi[0],roi[1]),cv2.FONT_HERSHEY_COMPLEX,1.0,(255,0,0),1) cv2.rectangle(im,(roi[0],roi[1]),(roi[2],roi[3]),(0,0,255),1) cnt += 1 cv2.imwrite('/home/zawlin/%d.jpg'%cnt,im) cv2.imshow('vis',im) cv2.waitKey(0) _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'Evaluating detections' imdb.evaluate_detections(all_boxes, output_dir) print zl.toc()
def test_net(net, imdb, max_per_image=100, thresh=0.05, vis=False): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) # timers _t = {'im_detect' : Timer(), 'misc' : Timer()} roidb = imdb.roidb for i in xrange(num_images): # filter out any ground truth boxes # The roidb may contain ground-truth rois (for example, if the roidb # comes from the training or val split). We only want to evaluate # detection on the *non*-ground-truth rois. We select those the rois # that have the gt_classes field set to 0, which means there's no # ground truth. box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0] im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() scores, boxes = im_detect(net, im, box_proposals) _t['im_detect'].toc() _t['misc'].tic() # skip j = 0, because it's the background class for j in xrange(1, imdb.num_classes): inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j*4:(j+1)*4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] if vis: vis_detections(im, imdb.classes[j], cls_dets) all_boxes[j][i] = cls_dets # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'Evaluating detections' imdb.evaluate_detections(all_boxes, output_dir)
def test_net(net, imdb): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # heuristic: keep an average of 40 detections per class per images prior # to NMS max_per_set = 40 * num_images # heuristic: keep at most 100 detection per class per image prior to NMS max_per_image = 100 # detection thresold for each class (this is adaptively set based on the # max_per_set constraint) thresh = -np.inf * np.ones(imdb.num_classes) # top_scores will hold one minheap of scores per class (used to enforce # the max_per_set constraint) top_scores = [[] for _ in xrange(imdb.num_classes)] # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) if not os.path.exists(output_dir): os.makedirs(output_dir) # timers _t = {"im_detect": Timer(), "misc": Timer()} roidb = imdb.roidb for i in xrange(num_images): # im = cv2.imread(imdb.image_path_at(i)) imname = imdb.image_path_at(i) imnames = imname.split("/") imname2 = imnames[-1] imid = int(imname2) srcdir = imname[0 : -len(imname2)] im_scale = 1 im = 0 for j in range(10): nowimid = imid + j nowname = "{0:06d}".format(nowimid) nowname = srcdir + nowname xname = nowname + "_x.jpg" yname = nowname + "_y.jpg" imx = cv2.imread(xname, cv2.CV_LOAD_IMAGE_GRAYSCALE) imy = cv2.imread(yname, cv2.CV_LOAD_IMAGE_GRAYSCALE) if j == 0: im = np.zeros((imx.shape[0], imx.shape[1], 20)) im = im.astype("uint8") im[:, :, j * 2] = imx im[:, :, j * 2 + 1] = imy _t["im_detect"].tic() scores, boxes = im_detect(net, im, roidb[i]["boxes"]) _t["im_detect"].toc() _t["misc"].tic() for j in xrange(1, imdb.num_classes): inds = np.where((scores[:, j] > thresh[j]) & (roidb[i]["gt_classes"] == 0))[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j * 4 : (j + 1) * 4] top_inds = np.argsort(-cls_scores)[:max_per_image] cls_scores = cls_scores[top_inds] cls_boxes = cls_boxes[top_inds, :] # push new scores onto the minheap for val in cls_scores: heapq.heappush(top_scores[j], val) # if we've collected more than the max number of detection, # then pop items off the minheap and update the class threshold if len(top_scores[j]) > max_per_set: while len(top_scores[j]) > max_per_set: heapq.heappop(top_scores[j]) thresh[j] = top_scores[j][0] all_boxes[j][i] = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32, copy=False) if 0: keep = nms(all_boxes[j][i], 0.3) vis_detections(im, imdb.classes[j], all_boxes[j][i][keep, :]) _t["misc"].toc() print "im_detect: {:d}/{:d} {:.3f}s {:.3f}s".format( i + 1, num_images, _t["im_detect"].average_time, _t["misc"].average_time ) for j in xrange(1, imdb.num_classes): for i in xrange(num_images): inds = np.where(all_boxes[j][i][:, -1] > thresh[j])[0] all_boxes[j][i] = all_boxes[j][i][inds, :] det_file = os.path.join(output_dir, "detections.pkl") with open(det_file, "wb") as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print "Applying NMS to all detections" nms_dets = apply_nms(all_boxes, cfg.TEST.NMS) print "Evaluating detections" imdb.evaluate_detections(nms_dets, output_dir)
def test_net(net, imdb): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # heuristic: keep an average of 40 detections per class per images prior # to NMS max_per_set = 40 * num_images # heuristic: keep at most 100 detection per class per image prior to NMS max_per_image = 100 # detection threshold for each class (this is adaptively set based on the # max_per_set constraint) thresh = -np.inf * np.ones(imdb.num_classes) # top_scores will hold one minheap of scores per class (used to enforce # the max_per_set constraint) top_scores = [[] for _ in xrange(imdb.num_classes)] # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) if not os.path.exists(output_dir): os.makedirs(output_dir) # timers _t = {'im_detect' : Timer(), 'misc' : Timer()} roidb = imdb.roidb for i in xrange(num_images): im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() scores, boxes = im_detect(net, im, roidb[i]['boxes']) _t['im_detect'].toc() _t['misc'].tic() for j in xrange(1, imdb.num_classes): inds = np.where((scores[:, j] > thresh[j]) & (roidb[i]['gt_classes'] == 0))[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j*4:(j+1)*4] top_inds = np.argsort(-cls_scores)[:max_per_image] cls_scores = cls_scores[top_inds] cls_boxes = cls_boxes[top_inds, :] # push new scores onto the minheap for val in cls_scores: heapq.heappush(top_scores[j], val) # if we've collected more than the max number of detection, # then pop items off the minheap and update the class threshold if len(top_scores[j]) > max_per_set: while len(top_scores[j]) > max_per_set: heapq.heappop(top_scores[j]) thresh[j] = top_scores[j][0] all_boxes[j][i] = \ np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) if 0: keep = nms(all_boxes[j][i], 0.3) vis_detections(im, imdb.classes[j], all_boxes[j][i][keep, :]) _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) for j in xrange(1, imdb.num_classes): for i in xrange(num_images): inds = np.where(all_boxes[j][i][:, -1] > thresh[j])[0] all_boxes[j][i] = all_boxes[j][i][inds, :] det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'Applying NMS to all detections' nms_dets = apply_nms(all_boxes, cfg.TEST.NMS) print 'Evaluating detections' imdb.evaluate_detections(nms_dets, output_dir)
cache_path = os.path.abspath(os.path.join(ROOT_DIR, 'data', 'cache')) cache_file = os.path.join(cache_path, \ args.imdb_name + '_3CL=' + str(cfg.ThreeClass) + \ '_MULTI_LABEL=' + str(cfg.MULTI_LABEL) + \ '_SOFTMAX=' + str(cfg.MULTI_LABEL_SOFTMAX) + \ '_BLC=' + str(cfg.BALANCED) + \ '_COF=' + str(cfg.BALANCED_COF) + \ '_TT1000=' + str(cfg.TESTTYPE1000) + \ '_solver_roidb.pkl') if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: roidb = cPickle.load(fid) print('The precomputed roidb loaded') output_dir = get_output_dir(args.imdb_name, None) else: imdb = get_imdb(args.imdb_name) print 'Loaded dataset `{:s}` for training'.format(imdb.name) roidb = get_training_roidb(imdb) output_dir = get_output_dir(imdb.name, None) with open(cache_file, 'wb') as fid: cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL) print 'The precomputed roidb saved to {}'.format(cache_file) #//output_dir = output_dir + '_test' print 'Output will be saved to `{:s}`'.format(output_dir) train_net(args.solver, roidb, output_dir, pretrained_model=args.pretrained_model,
return imdb, roidb if __name__ == '__main__': args = parse_args() print('Called with args:') print(args) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) gpu_id = args.gpu_id gpu_list = gpu_id.split(',') gpus = [int(i) for i in gpu_list] print('Using config:') pprint.pprint(cfg) imdb, roidb = combined_roidb(args.imdb_name) print '{:d} roidb entries'.format(len(roidb)) snap_pre = get_snapshot_prefix(args.solver) output_dir = get_output_dir(imdb, postfix=snap_pre) print 'Output will be saved to `{:s}`'.format(output_dir) train_net_multi_gpu(args.solver, roidb, output_dir, pretrained_model=args.pretrained_model, max_iter=args.max_iters, reload=args.reload, gpus=gpus)
def test_net_localsearch(net, imdb): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # heuristic: keep an average of 40 detections per class per images prior # to NMS max_per_set = 40 * num_images # heuristic: keep at most 100 detection per class per image prior to NMS max_per_image = 100 # detection thresold for each class (this is adaptively set based on the # max_per_set constraint) thresh = -np.inf * np.ones(imdb.num_classes) # top_scores will hold one minheap of scores per class (used to enforce # the max_per_set constraint) top_scores = [[] for _ in xrange(imdb.num_classes)] # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) if not os.path.exists(output_dir): os.makedirs(output_dir) # timers _t = {'im_detect' : Timer(), 'misc' : Timer()} roidb = imdb.roidb for i in xrange(num_images): im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() scores, boxes = im_detect(net, im, roidb[i]['boxes']) _t['im_detect'].toc() _t['misc'].tic() input_proposals=np.zeros([0,4]).astype(np.float32) for j in xrange(1, imdb.num_classes): inds = np.where((scores[:, j] > thresh[j]) & (roidb[i]['gt_classes'] == 0))[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j*4:(j+1)*4] top_inds = np.argsort(-cls_scores)[:max_per_image] cls_scores = cls_scores[top_inds] cls_boxes = cls_boxes[top_inds, :] # push new scores onto the minheap for val in cls_scores: heapq.heappush(top_scores[j], val) # if we've collected more than the max number of detection, # then pop items off the minheap and update the class threshold if len(top_scores[j]) > max_per_set: while len(top_scores[j]) > max_per_set: heapq.heappop(top_scores[j]) thresh[j] = top_scores[j][0] all_boxes[j][i] = \ np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) input_proposals=np.vstack((input_proposals,roidb[i]['boxes'][top_inds,:])) _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) if input_proposals.shape[0]==0: continue BOXSIZE_THRESH=2500 BOXSIZE_TRANSLATE_LIM=10 for ib in range(input_proposals.shape[0]): bb=input_proposals[ib,:] new_box_list=np.zeros([0,4]).astype(np.float32) #select the small proposals , and use local search if (bb[3]-bb[1])*(bb[2]-bb[0])<=BOXSIZE_THRESH: translate_v=[0,random.randint(1, BOXSIZE_TRANSLATE_LIM), -1*random.randint(1, BOXSIZE_TRANSLATE_LIM)] scale_v=[1,random.uniform(0.5,0.99), random.uniform(1.1, 1.5)] w=bb[2]-bb[0] h=bb[3]-bb[1] x=bb[0]+w/2 y=bb[1]+h/2 for _x in translate_v: for _y in translate_v: for _w in scale_v: for _h in scale_v: n_x=x+_x n_y=y+_y n_w=w*_w n_h=h*_h if (n_x-n_w/2)<0 or (n_y-n_h/2)<0 or (n_x+n_w/2)>im.shape[1] or (n_x+n_w/2)>im.shape[0]: continue new_box_list=np.vstack((new_box_list, np.array([n_x-n_w/2, n_y-n_h/2, n_x+n_w/2, n_y+n_h/2]))) input_proposals=np.vstack((input_proposals, new_box_list[1:,:])) _t['im_detect'].tic() scores, boxes = im_detect(net, im, input_proposals) _t['im_detect'].toc() _t['misc'].tic() for j in xrange(1, imdb.num_classes): inds = np.where(scores[:, j] > thresh[j])[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j*4:(j+1)*4] top_inds = np.argsort(-cls_scores)[:max_per_image] cls_scores = cls_scores[top_inds] cls_boxes = cls_boxes[top_inds, :] # push new scores onto the minheap for val in cls_scores: heapq.heappush(top_scores[j], val) # if we've collected more than the max number of detection, # then pop items off the minheap and update the class threshold if len(top_scores[j]) > max_per_set: while len(top_scores[j]) > max_per_set: heapq.heappop(top_scores[j]) thresh[j] = top_scores[j][0] all_boxes[j][i] = \ np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) if 0: keep = nms(all_boxes[j][i], 0.3) vis_detections(im, imdb.classes[j], all_boxes[j][i][keep, :]) _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s finetune' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) for j in xrange(1, imdb.num_classes): for i in xrange(num_images): inds = np.where(all_boxes[j][i][:, -1] > thresh[j])[0] all_boxes[j][i] = all_boxes[j][i][inds, :] det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'Applying NMS to all detections' nms_dets = apply_nms(all_boxes, cfg.TEST.NMS) print 'Evaluating detections' imdb.evaluate_detections(nms_dets, output_dir)
def test_net(name, net, imdb, max_per_image=300, thresh=0.05, visualize=False, logger=None, step=None): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes + 1)] output_dir = get_output_dir(imdb, name) # timers _t = {'im_detect': Timer(), 'misc': Timer()} det_file = os.path.join(output_dir, 'detections.pkl') roidb = imdb.roidb skip_num = 100 # for i in range(0, num_images,skip_num): for i in range(0, num_images): im = cv2.imread(imdb.image_path_at(i)) rois = imdb.roidb[i]['boxes'] _t['im_detect'].tic() scores, boxes = im_detect(net, im, rois) detect_time = _t['im_detect'].toc(average=False) _t['misc'].tic() if visualize: # im2show = np.copy(im[:, :, (2, 1, 0)]) im2show = np.copy(im) # skip j = 0, because it's the background class for j in xrange(1, imdb.num_classes + 1): newj = j - 1 inds = np.where(scores[:, newj] > thresh)[0] cls_scores = scores[inds, newj] cls_boxes = boxes[inds, newj * 4:(newj + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] if visualize: im2show = vis_detections(im2show, imdb.classes[newj], cls_dets) all_boxes[j][i] = cls_dets # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] nms_time = _t['misc'].toc(average=False) print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s'.format( i + 1, num_images, detect_time, nms_time)) if visualize and np.random.rand() < 0.01: # TODO: Visualize here using tensorboard # TODO: use the logger that is an argument to this function print('Visualizing') #cv2.imshow('test', im2show) #cv2.waitKey(1) with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print('Evaluating detections') aps = imdb.evaluate_detections(all_boxes, output_dir) return aps
gpus = map(int, args.gpu.split(',')) assert len(gpus) >= mpi_size, "Number of GPUs must be >= MPI size" cfg.GPU_ID = gpus[mpi_rank] print('Using config:') pprint.pprint(cfg) # set up caffe caffe.mpi_init() caffe.set_mode_gpu() caffe.set_device(cfg.GPU_ID) if not args.randomize: # fix the random seeds (numpy and caffe) for reproducibility np.random.seed(cfg.RNG_SEED) caffe.set_random_seed(cfg.RNG_SEED) imdb, roidb = combined_roidb(args.imdb_name) print '{:d} roidb entries'.format(len(roidb)) output_dir = get_output_dir(imdb.name) print 'Output will be saved to `{:s}`'.format(output_dir) train_net(args.solver, roidb, output_dir, previous_state=args.previous_state, pretrained_model=args.pretrained_model, max_iters=args.max_iters) caffe.mpi_finalize()
def test_net(net, imdb, vis=0): """Test RON network on an image database.""" num_images = len(imdb.image_index) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) if not os.path.exists(output_dir): os.makedirs(output_dir) # timers _t = {'im_detect': Timer(), 'misc': Timer()} for i in xrange(0, num_images, cfg.TEST.BATCH_SIZE): _t['misc'].tic() ims = [] for im_i in xrange(cfg.TEST.BATCH_SIZE): im = cv2.imread(imdb.image_path_at(i + im_i)) ims.append(im) _t['im_detect'].tic() batch_scores, batch_boxes = im_detect(net, ims) _t['im_detect'].toc() for im_i in xrange(cfg.TEST.BATCH_SIZE): im = ims[im_i] scores = batch_scores[im_i] boxes = batch_boxes[im_i] # filter boxes according to prob scores keeps = np.where(scores[:, 0] > cfg.TEST.PROB)[0] scores = scores[keeps, :] boxes = boxes[keeps, :] # change boxes according to input size and the original image size im_shape = im.shape[0:2] im_scales = float(cfg.TEST.SCALES[0]) / np.array(im_shape) boxes[:, 0::2] = boxes[:, 0::2] / im_scales[1] boxes[:, 1::2] = boxes[:, 1::2] / im_scales[0] # filter boxes with small sizes boxes = clip_boxes(boxes, im_shape) keep = filter_boxes(boxes, cfg.TEST.RON_MIN_SIZE) scores = scores[keep, :] boxes = boxes[keep, :] scores = np.tile(scores[:, 0], (imdb.num_classes, 1)).transpose() * scores for j in xrange(1, imdb.num_classes): inds = np.where(scores[:, j] > cfg.TEST.DET_MIN_PROB)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, :] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] if len(keep) > cfg.TEST.BOXES_PER_CLASS: cls_dets = cls_dets[:cfg.TEST.BOXES_PER_CLASS, :] all_boxes[j][i + im_i] = cls_dets if vis: vis_detections(im, imdb.classes[j], cls_dets) _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'Evaluating detections' imdb.evaluate_detections(all_boxes, output_dir)
## cfg.TRAIN.HAS_RPN = True cfg.TRAIN.IMS_PER_BATCH = 1 cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED = True cfg.TRAIN.RPN_POSITIVE_OVERLAP = 0.7 cfg.TRAIN.RPN_BATCHSIZE = 256 cfg.TRAIN.PROPOSAL_METHOD = 'gt' cfg.TRAIN.BG_THRESH_LO = 0.0 print('Using config:') pprint.pprint(cfg) if not args.randomize: # fix the random seeds (numpy and caffe) for reproducibility np.random.seed(cfg.RNG_SEED) caffe.set_random_seed(cfg.RNG_SEED) # set up caffe caffe.set_mode_gpu() caffe.set_device(args.gpu_id) imdb, roidb = combined_roidb(args.imdb_name) print '{:d} roidb entries'.format(len(roidb)) print "printing config file", cfg output_dir = get_output_dir(imdb, None) #output_dir = get_output_dir(imdb) print 'Output will be saved to `{:s}`'.format(output_dir) print "HAS RPN: ", cfg.TRAIN.HAS_RPN train_net(args.solver, roidb, output_dir, pretrained_model=args.pretrained_model, max_iters=args.max_iters)
net.name = osp.splitext(osp.basename(args.caffemodel))[0] imdb = get_imdb(args.imdb_name) imdb.competition_mode(args.comp_mode) test_net_on_gallery_set(net, imdb, args.feat_blob, max_per_image=args.max_per_image, vis=True) if not cfg.TEST.HAS_RPN: imdb.set_proposal_method(cfg.TEST.PROPOSAL_METHOD) test_net_on_gallery_set(net, imdb, args.feat_blob, max_per_image=args.max_per_image, vis=args.vis) root_dir = imdb._root_dir images_dir = imdb._data_path output_dir = get_output_dir(imdb, net) # Extract features for probe people net = caffe.Net(args.probe_def, args.caffemodel, caffe.TEST) net.name = os.path.splitext(osp.basename(args.caffemodel))[0] protoc, probe_images, probe_rois, probe_labels = load_probe( root_dir, images_dir, args.gallery_size) test_net_on_probe_set(net, probe_images, probe_rois, args.feat_blob, output_dir) # Evaluate evaluate(protoc, images_dir, imdb.image_index, output_dir, args.gallery_size)
def main(args): if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) # parse gpus gpus = map(int, args.gpus.split(',')) assert len(gpus) >= mpi_size, "Number of GPUs must be >= MPI size" cfg.GPU_ID = gpus[mpi_rank] # parse feature blob names blob_names = args.blob_names.split(',') print('Using config:') pprint.pprint(cfg) while not osp.exists(args.caffemodel) and args.wait: print('Waiting for {} to exist...'.format(args.caffemodel)) time.sleep(10) # load imdb imdb = get_imdb(args.imdb_name) root_dir = imdb._root_dir images_dir = imdb._data_path output_dir = get_output_dir(imdb.name, osp.splitext(osp.basename(args.caffemodel))[0]) if args.eval_only: def _load(fname): fpath = osp.join(output_dir, fname) assert osp.isfile(fpath), "Must have extracted detections and " \ "features first before evaluation" return unpickle(fpath) if mpi_rank == 0: gboxes = _load('gallery_detections.pkl') gfeatures = _load('gallery_features.pkl') pfeatures = _load('probe_features.pkl') else: # setup caffe caffe.mpi_init() caffe.set_mode_gpu() caffe.set_device(cfg.GPU_ID) # 1. Detect and extract features from all the gallery images in the imdb start, end = mpi_dispatch(len(imdb.image_index), mpi_size, mpi_rank) if args.use_gt: net = caffe.Net(args.probe_def, args.caffemodel, caffe.TEST) gboxes, gfeatures = usegt_and_exfeat(net, imdb, start=start, end=end, blob_names=blob_names) else: net = caffe.Net(args.gallery_def, args.caffemodel, caffe.TEST) gboxes, gfeatures = detect_and_exfeat(net, imdb, start=start, end=end, blob_names=blob_names) gboxes = mpi_collect(mpi_comm, mpi_rank, gboxes) gfeatures = mpi_collect(mpi_comm, mpi_rank, gfeatures) del net # to release the cudnn conv static workspace # 2. Only extract features from given probe rois start, end = mpi_dispatch(len(imdb.probes), mpi_size, mpi_rank) net = caffe.Net(args.probe_def, args.caffemodel, caffe.TEST) pfeatures = exfeat(net, imdb.probes, start=start, end=end, blob_names=blob_names) pfeatures = mpi_collect(mpi_comm, mpi_rank, pfeatures) del net # Save if mpi_rank == 0: pickle(gboxes, osp.join(output_dir, 'gallery_detections.pkl')) pickle(gfeatures, osp.join(output_dir, 'gallery_features.pkl')) pickle(pfeatures, osp.join(output_dir, 'probe_features.pkl')) # Evaluate if mpi_rank == 0: imdb.evaluate_detections(gboxes, det_thresh=args.det_thresh) imdb.evaluate_detections(gboxes, det_thresh=args.det_thresh, labeled_only=True) imdb.evaluate_search(gboxes, gfeatures['feat'], pfeatures['feat'], det_thresh=args.det_thresh, gallery_size=args.gallery_size, dump_json=osp.join(output_dir, 'results.json'))
#val_roidb = get_training_roidb(val_imdb) val_imdb = None val_roidb = None else: val_imdb = get_imdb(args.val_imdb_name) val_imdb.set_object(object_name) val_roidb = get_training_roidb(val_imdb) print('Loaded dataset `{:s}` for training'.format(imdb.name)) if not args.randomize: np.random.seed(cfg.RNG_SEED) #labels roidb = get_training_roidb(imdb) #output dir output_dir = get_output_dir( imdb, 'voxelnet_eval_' + object_name + '_' + args_in.exp_name) print('Output will be saved to `{:s}`'.format(output_dir)) #network load, rest every time #input data from roi_data_layer.layer import RoIDataLayer data_layer = RoIDataLayer(roidb, imdb.num_classes) blobs = data_layer.forward() for key in blobs: print(key) data_layer = RoIDataLayer(roidb, imdb.num_classes) blobs = data_layer.forward() print(blobs['im_info']) print(cfg_voxels.INPUT_WIDTH, cfg_voxels.INPUT_HEIGHT) print(blobs['gt_rys'].shape) print(blobs['voxel_data']['coordinate_buffer'].shape)
imdb = get_imdb(imdb_names) return imdb, roidb if __name__ == '__main__': if CFG_FILE is not None: cfg_from_file(CFG_FILE) if SET_CFGS is not None: cfg_from_list(SET_CFGS) print('Using config:') pprint.pprint(cfg) if not RANDOMIZE: # fix the random seeds (numpy and caffe) for reproducibility np.random.seed(cfg.RNG_SEED) caffe.set_random_seed(cfg.RNG_SEED) # set up caffe caffe.set_mode_cpu() imdb, roidb = combined_roidb(IMDB_NAME) print '{:d} roidb entries'.format(len(roidb)) output_dir = get_output_dir(imdb) print 'Output will be saved to `{:s}`'.format(output_dir) # train_net(SOLVER, roidb, output_dir, # pretrained_model=PRETRAINED_WEIGHTS, # max_iters=MAX_ITERS)
def test_net(net, imdb): """Test an OICR network on an image database.""" num_images = len(imdb.image_index) # heuristic: keep an average of 40 detections per class per images prior # to NMS max_per_set = 40 * num_images # heuristic: keep at most 100 detection per class per image prior to NMS max_per_image = 100 # detection thresold for each class (this is adaptively set based on the # max_per_set constraint) thresh = -np.inf * np.ones(imdb.num_classes) # thresh = 0.1 * np.ones(imdb.num_classes) # top_scores will hold one minheap of scores per class (used to enforce # the max_per_set constraint) top_scores = [[] for _ in xrange(imdb.num_classes)] # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) if not os.path.exists(output_dir): os.makedirs(output_dir) # timers _t = {'im_detect': Timer(), 'misc': Timer()} scores_all = [] boxes_all = [] roidb = imdb.roidb for i in xrange(num_images): im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() scores, boxes = im_detect(net, im, roidb[i]['boxes']) _t['im_detect'].toc() scores_all.append(scores) boxes_all.append(boxes) _t['misc'].tic() for j in xrange(0, imdb.num_classes): inds = np.where((scores[:, j] > thresh[j]))[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j * 4:(j + 1) * 4] top_inds = np.argsort(-cls_scores)[:max_per_image] cls_scores = cls_scores[top_inds] cls_boxes = cls_boxes[top_inds, :] # push new scores onto the minheap for val in cls_scores: heapq.heappush(top_scores[j], val) # if we've collected more than the max number of detection, # then pop items off the minheap and update the class threshold if len(top_scores[j]) > max_per_set: while len(top_scores[j]) > max_per_set: heapq.heappop(top_scores[j]) thresh[j] = top_scores[j][0] all_boxes[j][i] = \ np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) if 0: keep = nms(all_boxes[j][i], 0.3) vis_detections(im, imdb.classes[j], all_boxes[j][i][keep, :]) _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) for j in xrange(imdb.num_classes): for i in xrange(num_images): inds = np.where(all_boxes[j][i][:, -1] > thresh[j])[0] all_boxes[j][i] = all_boxes[j][i][inds, :] det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) det_file_all = os.path.join(output_dir, 'detections_all.pkl') results_all = {'scores_all': scores_all, 'boxes_all': boxes_all} with open(det_file_all, 'wb') as f: cPickle.dump(results_all, f, cPickle.HIGHEST_PROTOCOL) print 'Applying NMS to all detections' nms_dets = apply_nms(all_boxes, cfg.TEST.NMS) print 'Evaluating detections' imdb.evaluate_detections(nms_dets, output_dir)
def test_net(net, imdb, max_per_image=100, thresh=0.05, vis=False): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) # timers _t = {'im_detect' : Timer(), 'misc' : Timer()} if not cfg.TEST.HAS_RPN: roidb = imdb.roidb for i in xrange(num_images): # filter out any ground truth boxes if cfg.TEST.HAS_RPN: box_proposals = None else: # The roidb may contain ground-truth rois (for example, if the roidb # comes from the training or val split). We only want to evaluate # detection on the *non*-ground-truth rois. We select those the rois # that have the gt_classes field set to 0, which means there's no # ground truth. box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0] im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() scores, boxes = im_detect(net, im, box_proposals,imdb.num_classes) _t['im_detect'].toc() _t['misc'].tic() vis = False if vis: imj =im name = 'output/bads/'+ str(i) + '.jpg' for jj in xrange(1, imdb.num_classes): indsj = np.where(scores[:, jj] > thresh)[0] cls_scoresj = scores[indsj, jj] cls_boxesj = boxes[indsj, jj*4:(jj+1)*4] cls_detsj = np.hstack((cls_boxesj, cls_scoresj[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_detsj, cfg.TEST.NMS) cls_detsj = cls_detsj[keep, :] detsj = cls_detsj for ii in xrange(np.minimum(10, detsj.shape[0])): bboxj = detsj[ii, :4] scorej = detsj[ii, -1] if bboxj != []: x1 = bboxj[0] y1 = bboxj[3] x2 = bboxj[2] y2 = bboxj[1] # if x1 < 0: # x1=0 # if y1> imj.shape[1]: # y1=imj.shape[1]-1 # if x2 > imj.shape[0]: # x2 = imj.shape[0]-1 # if y2 < 0: # y2 = 0 if scorej > 0.1: cv2.rectangle(imj, (x1, y1), (x2,y2),(0,255,0), 4) text = str(jj) + ": " + str(scorej) font = cv2.FONT_HERSHEY_SIMPLEX cv2.putText(imj, text, (x1, y1),font , 1, (0,0,255), 4) cv2.imwrite(name, imj) #aaa # skip j = 0, because it's the background class for j in xrange(1, imdb.num_classes): inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j*4:(j+1)*4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, cfg.TEST.NMS) dets_NMSed = cls_dets[keep, :] cls_dets = bbox_vote(dets_NMSed, cls_dets) # print cls_scores if vis: vis_detections(im, imdb.classes[j], cls_dets) all_boxes[j][i] = cls_dets # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'Evaluating detections' imdb.evaluate_detections(all_boxes, output_dir)
def test_net(net, imdb, max_per_image=400, thresh=-np.inf, vis=False): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[] for _ in xrange(num_images)] output_dir = get_output_dir(imdb, net) # timers _t = {'im_detect' : Timer(), 'misc' : Timer()} if not cfg.TEST.HAS_RPN: roidb = imdb.roidb #test_preds = np.ndarray((num_images,imdb.num_classes), dtype='float') for i in xrange(num_images): # filter out any ground truth boxes if cfg.TEST.HAS_RPN: box_proposals = None else: # The roidb may contain ground-truth rois (for example, if the roidb # comes from the training or val split). We only want to evaluate # detection on the *non*-ground-truth rois. We select those the rois # that have the gt_classes field set to 0, which means there's no # ground truth. box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0] im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() scores, boxes = im_detect(net, im, box_proposals) _t['im_detect'].toc() _t['misc'].tic() # skip j = 0, because it's the background class scores_max = np.amax(scores[:,1:], axis=1, keepdims=True) index = np.argmax(scores[:,1:], axis=1) boxes_max = np.zeros((scores.shape[0],4)) for k in xrange(scores.shape[0]): if cfg.TEST.AGNOSTIC: boxes_max[k] = boxes[k, 4:8] else: boxes_max[k] = boxes[k, (index[k]+1)*4:(index[k]+2)*4] dets = np.hstack((boxes_max, scores_max)).astype(np.float32, copy=False) keep = nms(dets, cfg.TEST.NMS) dets = dets[keep, :] dets_full = np.hstack((boxes_max, scores)).astype(np.float32, copy=False) dets_full = dets_full[keep, :] # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = dets[:, 4] if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] keep = np.where(dets[:, 4] >= image_thresh)[0] dets_full = dets_full[keep, :] all_boxes[i] = dets_full _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) #index_wobg = np.argmax(np.amax(scores[:,1:], axis=1)) #test_preds[i,:] = scores[index_wobg,:] #test_preds_file = os.path.join(output_dir, 'test_preds.pkl') #with open(test_preds_file, 'wb') as f: # cPickle.dump(test_preds, f, cPickle.HIGHEST_PROTOCOL) det_file = os.path.join(output_dir, 'detections_full_AGNOSTICnms.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'Evaluating detections'