def eval_net(val_dataset, val_loader, net, detector, cfg, transform, max_per_image=300, thresh=0.01, batch_size=1): net.eval() num_images = len(val_dataset) num_classes = cfg['num_classes'] eval_save_folder = "./eval/" if not os.path.exists(eval_save_folder): os.mkdir(eval_save_folder) all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)] det_file = os.path.join(eval_save_folder, 'detections.pkl') _t = {'im_detect': Timer(), 'misc': Timer()} if args.retest: f = open(det_file, 'rb') all_boxes = pickle.load(f) print('Evaluating detections') val_dataset.evaluate_detections(all_boxes, eval_save_folder) return for idx, (imgs, _, img_info) in enumerate(val_loader): with torch.no_grad(): t1 = time.time() x = imgs x = x.cuda() output = net(x) t4 = time.time() boxes, scores = detector.forward(output) t2 = time.time() for k in range(boxes.size(0)): i = idx * batch_size + k boxes_ = boxes[k] scores_ = scores[k] boxes_ = boxes_.cpu().numpy() scores_ = scores_.cpu().numpy() img_wh = img_info[k] scale = np.array([img_wh[0], img_wh[1], img_wh[0], img_wh[1]]) boxes_ *= scale for j in range(1, num_classes): inds = np.where(scores_[:, j] > thresh)[0] if len(inds) == 0: all_boxes[j][i] = np.empty([0, 5], dtype=np.float32) continue c_bboxes = boxes_[inds] c_scores = scores_[inds, j] c_dets = np.hstack( (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32, copy=False) keep = nms(c_dets, 0.45, force_cpu=True) keep = keep[:50] c_dets = c_dets[keep, :] all_boxes[j][i] = c_dets t3 = time.time() detect_time = t2 - t1 nms_time = t3 - t2 forward_time = t4 - t1 if idx % 10 == 0: print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s {:.3f}s'.format( i + 1, num_images, forward_time, detect_time, nms_time)) with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') val_dataset.evaluate_detections(all_boxes, eval_save_folder) print("detect time: ", time.time() - st)
def demo(net, image_name): """Detect object classes in an image using pre-computed object proposals.""" lst = [ ] #INITIALISATION OF LIST, IT WILL BE USED TO STORE ENTITIES LIKE IMAGE_NAME, OBJECT_CLASS, ETC count_target = 0 #A COUNTER USED FOR THE CLASSES THAT ARE NOT PRESENT IN A PARTICULAR IMAGE, SO, IF IT IS FOUR THAT MEANS THE IMAGE HAS NO TARGET OBJECTS # Load the demo image im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name) im = cv2.imread(im_file) fname = im_file.split( '/' )[-1] #CONVERTED THE WHOLE IMAGE PATH TO IMAGE FILENAME THAT IS NEEDED TO BE SPECIFIED IN THE OUTPUT CSV GENERATED. # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im) timer.toc() print('Detection took {:.3f}s for {:d} object proposals'.format( timer.total_time(), boxes.shape[0])) # Visualize detections for each class thresh = 0.8 # CONF_THRESH NMS_THRESH = 0.3 im = im[:, :, (2, 1, 0)] fig, ax = plt.subplots(figsize=(12, 12)) ax.imshow(im, aspect='equal') cntr = -1 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(torch.from_numpy(cls_boxes), torch.from_numpy(cls_scores), NMS_THRESH) dets = dets[keep.numpy(), :] inds = np.where(dets[:, -1] >= thresh)[0] if len(inds) == 0: count_target += 1 #INCREMENTING THE COUNT SO THAT NO. OF CLASSES NOT PRESENT CAN BE CALCULATED, AS IN THIS PART THE LENGTH OF inds OF if count_target == 4: #PARTICULAR OBJECT IS CHEKED IF IT IS ZERO, THEN continue IS USED SO THAT THE FURTHER DETECTION PROCESS COULD NOT BE DONE. writer( lst, count_target, fname ) #fname IS PROVIDED AS A PARAMETER SEPARATELY TO THE writer FUNCTION FOR THOSE IMAGES WHICH DONT HAVE ANY TARGET OBJECT/CLASS, FOR IMAGES HAVING THE TARGET, A SEPARATE LIST 'a' IS CREATED BELOW. continue else: cntr += 1 for i in inds: bbox = dets[i, :4] score = dets[i, -1] ax.add_patch( plt.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0], bbox[3] - bbox[1], fill=False, edgecolor=COLORS[cntr % len(COLORS)], linewidth=3.5)) ax.text(bbox[0], bbox[1] - 2, '{:s} {:.3f}'.format(cls, score), bbox=dict(facecolor='blue', alpha=0.5), fontsize=14, color='white') lst = [ fname, cls, bbox[0], bbox[1], bbox[2], bbox[3], score ] #A LIST 'a' WHICH CONTAINS PARAMETERS TAKEN FROM THE DETECTED IMAGE. IT HAS PARAMETERS LIKE FILENAME, CLASS, BOUNDING BOX CO-ORD. IN THE FORM[x y w h] (x,y)->CO-ORD. OF TOP LEFT CORNER OF BBOX, w & h ARE THE WIDTH OF BBOX. WHETHER TARGET IS PRESENT OR NOT, IS NOT A PART OF THIS LIST AS ENTRIES OF THIS COLUMN ARE FILLED SEPARATELY INSIDE THE writer FN. ACCORDING TO THE CONDITION. writer( lst, count_target, fname ) #writer FUNCTION IS CALLED AND LIST(a) IS PASSED AS A PARAMETER TO IT ax.set_title('All detections with threshold >= {:.1f}'.format(thresh), fontsize=14) plt.axis('off') plt.tight_layout() plt.savefig(os.path.join('img_results', 'demo_' + image_name)) if count_target == 4: print('No target objects present') print('Saved to `{}`'.format( os.path.join(os.getcwd(), 'img_results', 'demo_' + image_name)))
def test_net(sess, net, imdb, weights_filename, max_per_image=300, thresh=0.05, vis=False): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, weights_filename) # timers _t = {'im_detect': Timer(), 'misc': Timer()} if not cfg.TEST.HAS_RPN: roidb = imdb.roidb for i in xrange(num_images): # filter out any ground truth boxes if cfg.TEST.HAS_RPN: box_proposals = None else: # The roidb may contain ground-truth rois (for example, if the roidb # comes from the training or val split). We only want to evaluate # detection on the *non*-ground-truth rois. We select those the rois # that have the gt_classes field set to 0, which means there's no # ground truth. box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0] im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() scores, boxes = im_detect(sess, net, im, box_proposals) _t['im_detect'].toc() _t['misc'].tic() if vis: image = im[:, :, (2, 1, 0)] plt.cla() plt.imshow(image) # skip j = 0, because it's the background class for j in xrange(1, imdb.num_classes): inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] if vis: vis_detections(image, imdb.classes[j], cls_dets) all_boxes[j][i] = cls_dets if vis: save_path = os.path.join(output_dir, 'image_{:05d}.png'.format(i)) print save_path plt.savefig(save_path) plt.close() # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'Evaluating detections' imdb.evaluate_detections(all_boxes, output_dir)
def test_net(sess, net, imdb, weights_filename, max_per_image=300, thresh=0.05, vis=False): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) # all_boxes_cnr[cls][image] = N x 25 array of detections in # (x0-x7, y0-y7, z0-z7, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] all_boxes_img = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] all_boxes_cnr = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] all_calib = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] all_score = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, weights_filename) # timers _t = {'im_detect': Timer(), 'misc': Timer()} # conv1_1 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv1_1") # conv1_2 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv1_2") # conv2_1 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv2_1") # conv2_2 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv2_2") # conv3_1 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv3_1") # conv3_2 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv3_2") # conv3_3 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv3_3") # conv4_1 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv4_1") # conv4_2 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv4_2") # conv4_3 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv4_3") # conv5_1 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv5_1") # conv5_2 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv5_2") # conv5_3 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="conv5_3") # rpn_w = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="rpn_conv/3x3")[0] # rpn_b = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="rpn_conv/3x3")[1] # rpn_w2 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="rpn_cls_score")[0] # rpn_b2 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="rpn_cls_score")[1] # rpn_w3 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="rpn_bbox_pred")[0] # rpn_b3 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="rpn_bbox_pred")[1] # weights = { # 'conv1_1' : {"weights" : conv1_1[0].eval(session=sess), "biases": conv1_1[1].eval(session=sess)}, # 'conv1_2' : {"weights" : conv1_2[0].eval(session=sess), "biases": conv1_2[1].eval(session=sess)}, # 'conv2_1' : {"weights" : conv2_1[0].eval(session=sess), "biases": conv2_1[1].eval(session=sess)}, # 'conv2_2' : {"weights" : conv2_2[0].eval(session=sess), "biases": conv2_2[1].eval(session=sess)}, # 'conv3_1' : {"weights" : conv3_1[0].eval(session=sess), "biases": conv3_1[1].eval(session=sess)}, # 'conv3_2' : {"weights" : conv3_2[0].eval(session=sess), "biases": conv3_2[1].eval(session=sess)}, # 'conv3_3' : {"weights" : conv3_3[0].eval(session=sess), "biases": conv3_3[1].eval(session=sess)}, # 'conv4_1' : {"weights" : conv4_1[0].eval(session=sess), "biases": conv4_1[1].eval(session=sess)}, # 'conv4_2' : {"weights" : conv4_2[0].eval(session=sess), "biases": conv4_2[1].eval(session=sess)}, # 'conv4_3' : {"weights" : conv4_3[0].eval(session=sess), "biases": conv4_3[1].eval(session=sess)}, # 'conv5_1' : {"weights" : conv5_1[0].eval(session=sess), "biases": conv5_1[1].eval(session=sess)}, # 'conv5_2' : {"weights" : conv5_2[0].eval(session=sess), "biases": conv5_2[1].eval(session=sess)}, # 'conv5_3' : {"weights" : conv5_3[0].eval(session=sess), "biases": conv5_3[1].eval(session=sess)}, # 'rpn_conv/3x3' : {"weights" : rpn_w.eval(session=sess), "biases": rpn_b.eval(session=sess)}, # 'rpn_cls_score' : {"weights" : rpn_w2.eval(session=sess), "biases": rpn_b2.eval(session=sess)}, # 'rpn_bbox_pred' : {"weights" : rpn_w3.eval(session=sess), "biases": rpn_b3.eval(session=sess)}, # } # # print rpn_w.eval(session=sess) # np.save('rpn_data.npy', weights) # deconv2 = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="deconv_4x_1")[0] # shape_conv5_3 = conv5_3.get_shape().as_list() # shape1 = deconv1.get_shape().as_list() # shape2 = deconv2.get_shape().as_list() # print 'conv5_3 shape', shape_conv5_3 # print 'deconv_2x_1 shape', shape1 # print 'deconv_4x_1 shape', shape2 for i in xrange(num_images): # filter out any ground truth boxes if cfg.TEST.HAS_RPN: box_proposals = None im = cv2.imread(imdb.image_path_at(i)) bv = np.load(imdb.lidar_path_at(i)) lidar3D = imdb.lidar3D_path_at(i) GT_boxes3D_corners = imdb.GT_annotation_at(i)["boxes_corners"] GT_boxes3D_camera_corners = imdb.GT_annotation_at( i)["boxes3D_cam_corners"] print "GT_boxes3D_corners", GT_boxes3D_corners # print "GT_boxes3D_camera_corners:",GT_boxes3D_camera_corners calib = imdb.calib_at(i) print "Inference: ", imdb.lidar_path_at(i) _t['im_detect'].tic() scores, boxes_bv, boxes_cnr, boxes_cnr_r = box_detect( sess, net, im, bv, calib, box_proposals) _t['im_detect'].toc() _t['misc'].tic() if vis: image = im[:, :, (2, 1, 0)] plt.cla() plt.imshow(image) thresh = 0.05 #thresh = 0.8 # skip j = 0, because it's the background class #for j in xrange(1, imdb.num_classes): for j in xrange(1, 2): inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] #cls_boxes = boxes_bv[inds, j*4:(j+1)*4] #cls_boxes_cnr = boxes_cnr[inds, j * 24:(j + 1) * 24] cls_boxes = boxes_bv[inds, 0:4] cls_boxes_cnr = boxes_cnr[inds, 0:24] cls_boxes_cnr_r = boxes_cnr_r[inds, j * 24:(j + 1) * 24] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) cls_dets_cnr = np.hstack((cls_boxes_cnr, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) cls_dets_cnr_r = np.hstack((cls_boxes_cnr_r, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) # print "scores: ", scores.shape # print "cls_scores: ",cls_scores.shape # print "boxes_bv: ", boxes_bv.shape # print "cls_dets: ", cls_dets.shape # print "inds: ",inds.shape # print "boxes_cnr: ", boxes_cnr.shape # print "cls_dets_cnr: ",cls_dets_cnr.shape keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] cls_dets_cnr = cls_dets_cnr[keep, :] cls_dets_cnr_r = cls_dets_cnr_r[keep, :] cls_scores = cls_scores[keep] #img_boxes = cls_dets_cnr_r[:,4] # project to image if np.any(cls_dets_cnr): plt.rcParams['figure.figsize'] = (10, 10) img_boxes = lidar_cnr_to_img(cls_dets_cnr_r[:, :24], calib[3], calib[2], calib[0]) img = show_image_boxes(im, img_boxes) # plt.imshow(img) # plt.show() all_boxes[j][i] = img_boxes image_bv = show_image_boxes( scale_to_255(bv[:, :, 8], min=0, max=2), cls_dets[:, :4]) image_cnr = show_lidar_corners(im, cls_dets_cnr_r[:, :24], calib) if 1: import mayavi.mlab as mlab #filename = os.path.join(imdb.lidar_path_at(i)[:-19], 'velodyne', str(3).zfill(6)+'.bin') filename = lidar3D print filename scan = np.fromfile(filename, dtype=np.float32) scan = scan.reshape((-1, 4)) corners = cls_dets_cnr[:, :24].reshape( (-1, 3, 8)).transpose((0, 2, 1)) corners_r = cls_dets_cnr_r[:, :24].reshape( (-1, 3, 8)).transpose((0, 2, 1)) GT_corners = GT_boxes3D_corners[:, :24].reshape( (-1, 3, 8)).transpose((0, 2, 1)) # print corners_r # print GT_corners #print GT_corners #camera_cors_r = lidar_cnr_to_camera(corners_r,calib[3]) fig = mlab.figure(figure=None, bgcolor=(0, 0, 0), fgcolor=None, engine=None, size=(1000, 500)) draw_lidar(scan, fig=fig) draw_gt_boxes3d(corners, fig=fig) draw_gt_boxes3d(corners_r, color=(1, 0, 0), fig=fig) draw_gt_boxes3d(GT_corners, color=(0, 1, 0), fig=fig) mlab.show() # plt.subplot(211) # plt.title('bv proposal') # plt.imshow(image_bv, cmap='jet') # plt.subplot(212) # plt.imshow(image_cnr) # plt.show() all_boxes_cnr[j][i] = cls_dets_cnr_r[:, :24] all_calib[j][i] = calib[3] all_score[j][i] = cls_scores # if vis: # plt.show() # # Limit to max_per_image detections *over all classes* # if max_per_image > 0: # image_scores = np.hstack([all_boxes[j][i][:, -1] # for j in xrange(1, imdb.num_classes)]) # if len(image_scores) > max_per_image: # image_thresh = np.sort(image_scores)[-max_per_image] # for j in xrange(1, imdb.num_classes): # keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] # all_boxes[j][i] = all_boxes[j][i][keep, :] # # all_boxes_img[j][i] = all_boxes_img[j][i][keep, :] # all_boxes_cnr[j][i] = all_boxes_cnr[j][i][keep, :] _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) det_cnr_file = os.path.join(output_dir, 'detections_cnr.pkl') with open(det_cnr_file, 'wb') as f: cPickle.dump(all_boxes_cnr, f, cPickle.HIGHEST_PROTOCOL) #print 'Evaluating detections' #imdb.evaluate_detections(all_boxes, all_boxes_cnr, output_dir) imdb.evaluate_detections3D(all_boxes, all_boxes_cnr, all_calib, all_score, output_dir)
def demo(sess, net, image_name, CONF_THRESHES): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im_file = os.path.join(cfg.DATA_DIR, 'demo', 'Images', image_name) im = cv2.imread(im_file, cv2.IMREAD_UNCHANGED) scene_name = image_name[:10] # 'scene_0021' # scene_index = scene_name[-4:] image_index = image_name[11:15] # '0003' theta, true_polygon_list = get_true_grasps(scene_name, image_index) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(sess, net, im) timer.toc() print('Detection took {:.3f}s'.format(timer.total_time)) NMS_THRESH = 0.3 num_conf_threshes = len(CONF_THRESHES) # Count the TP num and total num of the image image_true_positive_num = np.zeros( num_conf_threshes ) # number of true positive proposed grasps in the image for EACH CONF_THRESH image_total_num = np.zeros( num_conf_threshes ) # number of proposed grasps in the image for EACH CONF_THRESH image_total_gt_num = theta.shape[ 0] # the number of ground truth grasps in the image. IT IS A NUMBER, NOT ARRAY! detected_gt_grasp_index = [ set() for i in range(num_conf_threshes) ] # the list of sets of indexes of the detected ground truth grasps detected_gt_grasp_num = np.zeros( num_conf_threshes ) # number of detected ground truth grasps in the image for EACH THRESH for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] class_true_positive_num = np.zeros(num_conf_threshes) class_total_num = np.zeros(num_conf_threshes) for i in range(num_conf_threshes): CONF_THRESH = CONF_THRESHES[i] class_true_positive_num[i], class_total_num[ i] = count_true_positive(cls, dets, theta, true_polygon_list, detected_gt_grasp_index[i], thresh=CONF_THRESH) image_true_positive_num += class_true_positive_num image_total_num += class_total_num for j in range(num_conf_threshes): detected_gt_grasp_num[j] = len(detected_gt_grasp_index[j]) return image_true_positive_num, image_total_num, image_total_gt_num, detected_gt_grasp_num
num_devices=cfg.NUM_DEVICES, ) # do validation by default if True: val_model, _, _= \ model_builder_rel.create(cfg.MODEL.MODEL_NAME, train=False, split='val') logger.info('Validation model built.') total_val_iters = int( math.ceil( float(len(val_model.roi_data_loader._roidb)) / float(cfg.NUM_DEVICES))) + 5 val_evaluator = evaluator_rel.Evaluator( split=cfg.VAL.DATA_TYPE, roidb_size=len(val_model.roi_data_loader._roidb)) val_timer = Timer() logger.info('Val epoch iters: {}'.format(total_val_iters)) accumulated_accs = {} for key in val_evaluator.__dict__.keys(): if key.find('acc') >= 0: accumulated_accs[key] = [] # wins are for showing different plots wins = {} for key in val_evaluator.__dict__.keys(): if key.find('acc') >= 0: wins[key] = None prev_checkpointed_lr = None lr_iters = model_builder_rel.get_lr_steps()
def train(cfg): startup_prog = fluid.Program() train_prog = fluid.Program() test_prog = fluid.Program() if args.enable_ce: startup_prog.random_seed = 1000 train_prog.random_seed = 1000 drop_last = True dataset = SegDataset(file_list=cfg.DATASET.TRAIN_FILE_LIST, mode=ModelPhase.TRAIN, shuffle=True, data_dir=cfg.DATASET.DATA_DIR) def data_generator(): if args.use_mpio: data_gen = dataset.multiprocess_generator( num_processes=cfg.DATALOADER.NUM_WORKERS, max_queue_size=cfg.DATALOADER.BUF_SIZE) else: data_gen = dataset.generator() batch_data = [] for b in data_gen: batch_data.append(b) if len(batch_data) == (cfg.BATCH_SIZE // cfg.NUM_TRAINERS): for item in batch_data: yield item[0], item[1], item[2] batch_data = [] # If use sync batch norm strategy, drop last batch if number of samples # in batch_data is less then cfg.BATCH_SIZE to avoid NCCL hang issues if not cfg.TRAIN.SYNC_BATCH_NORM: for item in batch_data: yield item[0], item[1], item[2] # Get device environment gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace() places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places() # Get number of GPU dev_count = cfg.NUM_TRAINERS if cfg.NUM_TRAINERS > 1 else len(places) print_info("#Device count: {}".format(dev_count)) # Make sure BATCH_SIZE can divided by GPU cards assert cfg.BATCH_SIZE % dev_count == 0, ( 'BATCH_SIZE:{} not divisble by number of GPUs:{}'.format( cfg.BATCH_SIZE, dev_count)) # If use multi-gpu training mode, batch data will allocated to each GPU evenly batch_size_per_dev = cfg.BATCH_SIZE // dev_count print_info("batch_size_per_dev: {}".format(batch_size_per_dev)) data_loader, avg_loss, lr, pred, grts, masks = build_model( train_prog, startup_prog, phase=ModelPhase.TRAIN) build_model(test_prog, fluid.Program(), phase=ModelPhase.EVAL) data_loader.set_sample_generator(data_generator, batch_size=batch_size_per_dev, drop_last=drop_last) exe = fluid.Executor(place) exe.run(startup_prog) exec_strategy = fluid.ExecutionStrategy() # Clear temporary variables every 100 iteration if args.use_gpu: exec_strategy.num_threads = fluid.core.get_cuda_device_count() exec_strategy.num_iteration_per_drop_scope = 100 build_strategy = fluid.BuildStrategy() if cfg.NUM_TRAINERS > 1 and args.use_gpu: dist_utils.prepare_for_multi_process(exe, build_strategy, train_prog) exec_strategy.num_threads = 1 if cfg.TRAIN.SYNC_BATCH_NORM and args.use_gpu: if dev_count > 1: # Apply sync batch norm strategy print_info("Sync BatchNorm strategy is effective.") build_strategy.sync_batch_norm = True else: print_info( "Sync BatchNorm strategy will not be effective if GPU device" " count <= 1") compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel( loss_name=avg_loss.name, exec_strategy=exec_strategy, build_strategy=build_strategy) # Resume training begin_epoch = cfg.SOLVER.BEGIN_EPOCH if cfg.TRAIN.RESUME_MODEL_DIR: begin_epoch = load_checkpoint(exe, train_prog) # Load pretrained model elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR): load_pretrained_weights(exe, train_prog, cfg.TRAIN.PRETRAINED_MODEL_DIR) else: print_info( 'Pretrained model dir {} not exists, training from scratch...'. format(cfg.TRAIN.PRETRAINED_MODEL_DIR)) fetch_list = [avg_loss.name, lr.name] if args.debug: # Fetch more variable info and use streaming confusion matrix to # calculate IoU results if in debug mode np.set_printoptions(precision=4, suppress=True, linewidth=160, floatmode="fixed") fetch_list.extend([pred.name, grts.name, masks.name]) cm = ConfusionMatrix(cfg.DATASET.NUM_CLASSES, streaming=True) if args.use_vdl: if not args.vdl_log_dir: print_info("Please specify the log directory by --vdl_log_dir.") exit(1) from visualdl import LogWriter log_writer = LogWriter(args.vdl_log_dir) # trainer_id = int(os.getenv("PADDLE_TRAINER_ID", 0)) # num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) step = 0 all_step = cfg.DATASET.TRAIN_TOTAL_IMAGES // cfg.BATCH_SIZE if cfg.DATASET.TRAIN_TOTAL_IMAGES % cfg.BATCH_SIZE and drop_last != True: all_step += 1 all_step *= (cfg.SOLVER.NUM_EPOCHS - begin_epoch + 1) avg_loss = 0.0 best_mIoU = 0.0 timer = Timer() timer.start() if begin_epoch > cfg.SOLVER.NUM_EPOCHS: raise ValueError(( "begin epoch[{}] is larger than cfg.SOLVER.NUM_EPOCHS[{}]").format( begin_epoch, cfg.SOLVER.NUM_EPOCHS)) if args.use_mpio: print_info("Use multiprocess reader") else: print_info("Use multi-thread reader") for epoch in range(begin_epoch, cfg.SOLVER.NUM_EPOCHS + 1): data_loader.start() while True: try: if args.debug: # Print category IoU and accuracy to check whether the # traning process is corresponed to expectation loss, lr, pred, grts, masks = exe.run( program=compiled_train_prog, fetch_list=fetch_list, return_numpy=True) cm.calculate(pred, grts, masks) avg_loss += np.mean(np.array(loss)) step += 1 if step % args.log_steps == 0: speed = args.log_steps / timer.elapsed_time() avg_loss /= args.log_steps category_acc, mean_acc = cm.accuracy() category_iou, mean_iou = cm.mean_iou() print_info(( "epoch={} step={} lr={:.5f} loss={:.4f} acc={:.5f} mIoU={:.5f} step/sec={:.3f} | ETA {}" ).format(epoch, step, lr[0], avg_loss, mean_acc, mean_iou, speed, calculate_eta(all_step - step, speed))) print_info("Category IoU: ", category_iou) print_info("Category Acc: ", category_acc) if args.use_vdl: log_writer.add_scalar('Train/mean_iou', mean_iou, step) log_writer.add_scalar('Train/mean_acc', mean_acc, step) log_writer.add_scalar('Train/loss', avg_loss, step) log_writer.add_scalar('Train/lr', lr[0], step) log_writer.add_scalar('Train/step/sec', speed, step) sys.stdout.flush() avg_loss = 0.0 cm.zero_matrix() timer.restart() else: # If not in debug mode, avoid unnessary log and calculate loss, lr = exe.run(program=compiled_train_prog, fetch_list=fetch_list, return_numpy=True) avg_loss += np.mean(np.array(loss)) step += 1 if step % args.log_steps == 0 and cfg.TRAINER_ID == 0: avg_loss /= args.log_steps speed = args.log_steps / timer.elapsed_time() print(( "epoch={} step={} lr={:.5f} loss={:.4f} step/sec={:.3f} | ETA {}" ).format(epoch, step, lr[0], avg_loss, speed, calculate_eta(all_step - step, speed))) if args.use_vdl: log_writer.add_scalar('Train/loss', avg_loss, step) log_writer.add_scalar('Train/lr', lr[0], step) log_writer.add_scalar('Train/speed', speed, step) sys.stdout.flush() avg_loss = 0.0 timer.restart() # NOTE : used for benchmark, profiler tools if args.is_profiler and epoch == 1 and step == args.log_steps: profiler.start_profiler("All") elif args.is_profiler and epoch == 1 and step == args.log_steps + 5: profiler.stop_profiler("total", args.profiler_path) return except fluid.core.EOFException: data_loader.reset() break except Exception as e: print(e) if (epoch % cfg.TRAIN.SNAPSHOT_EPOCH == 0 or epoch == cfg.SOLVER.NUM_EPOCHS) and cfg.TRAINER_ID == 0: ckpt_dir = save_checkpoint(train_prog, epoch) save_infer_program(test_prog, ckpt_dir) if args.do_eval: print("Evaluation start") _, mean_iou, _, mean_acc = evaluate(cfg=cfg, ckpt_dir=ckpt_dir, use_gpu=args.use_gpu, use_mpio=args.use_mpio) if args.use_vdl: log_writer.add_scalar('Evaluate/mean_iou', mean_iou, step) log_writer.add_scalar('Evaluate/mean_acc', mean_acc, step) if mean_iou > best_mIoU: best_mIoU = mean_iou update_best_model(ckpt_dir) print_info( "Save best model {} to {}, mIoU = {:.4f}".format( ckpt_dir, os.path.join(cfg.TRAIN.MODEL_SAVE_DIR, 'best_model'), mean_iou)) # Use VisualDL to visualize results if args.use_vdl and cfg.DATASET.VIS_FILE_LIST is not None: visualize(cfg=cfg, use_gpu=args.use_gpu, vis_file_list=cfg.DATASET.VIS_FILE_LIST, vis_dir="visual", ckpt_dir=ckpt_dir, log_writer=log_writer) # save final model if cfg.TRAINER_ID == 0: ckpt_dir = save_checkpoint(train_prog, 'final') save_infer_program(test_prog, ckpt_dir)
def testxx(self): print('Testing...', self.base_folder) video_frames = glob.glob(self.base_folder + '*.tif') video_frames = sorted([os.path.splitext(os.path.basename(frame))[0][1:] for frame in video_frames]) #video_frames = video_frames[100:] #coord_factors = 0.001 #min_cluster_size = 100 #min_samples = 100 #min_label_size_per_stack = 100 tracker = EmbeddingTracker(coord_factors=self.coord_factors, stack_neighboring_slices=2, min_cluster_size=self.min_samples, min_samples=self.min_samples, min_label_size_per_stack=self.min_samples / 2, save_label_stack=True, image_ignore_border=self.border_size, parent_search_dilation_size=self.parent_dilation, max_parent_search_frames=self.parent_frame_search) first = True current_predictions = [] current_predictions_2 = [] current_images = [] # reset_every_frames = 20 for i, video_frame in enumerate(video_frames): #if int(video_frame) < 150 or int(video_frame) > 250: # continue with Timer('processing video frame ' + str(video_frame)): dataset_entry = self.dataset_val.get({'image_id': video_frame}) datasources = dataset_entry['datasources'] generators = dataset_entry['generators'] feed_dict = {self.data_val: np.expand_dims(generators['image'], axis=0)} # run loss and update loss accumulators if not first: for i in range(len(self.lstm_input_states_val)): feed_dict[self.lstm_input_states_val[i]] = current_lstm_states[i] run_tuple = self.sess.run([self.embeddings_normalized_val, self.embeddings_normalized_2_val] + list(self.lstm_output_states_val), feed_dict=feed_dict) # print(iv[0].decode()) embeddings_softmax = np.squeeze(run_tuple[0], axis=0) embeddings_softmax_2 = np.squeeze(run_tuple[1], axis=0) current_lstm_states = run_tuple[2:] #current_predictions.append(embeddings_softmax) #current_predictions_2.append(embeddings_softmax_2) current_images.append(generators['image']) # current_instances.append(instance_segmentation_test.get_instances_cosine_kmeans_2d(embeddings_softmax)) first = False datasources = dataset_entry['datasources'] input_image = datasources['image'] transformations = dataset_entry['transformations'] transformation = transformations['image'] # embeddings_original = utils.sitk_image.transform_np_output_to_sitk_input(embeddings_softmax_2, # output_spacing=None, # channel_axis=2, # input_image_sitk=input_image, # transform=transformation, # interpolator='linear', # output_pixel_type=sitk.sitkFloat32) # embeddings_softmax_2 = utils.sitk_np.sitk_list_to_np(embeddings_original, axis=2) current_predictions_2.append(embeddings_softmax_2) # if not first and i % reset_every_frames != 0: # run_tuple = self.sess.run([self.embeddings_normalized_val, self.embeddings_normalized_2_val] + list(self.lstm_output_states_val), feed_dict=feed_dict) # embeddings_softmax_2 = np.squeeze(run_tuple[1], axis=0) # tracker.add_reset_slice(np.transpose(embeddings_softmax_2, [2, 0, 1])) # prediction = np.stack(current_predictions, axis=self.time_stack_axis) # del current_predictions # utils.io.image.write_np(prediction, os.path.join(self.output_folder, 'embeddings.mha'), self.data_format) # del prediction prediction_2 = np.stack(current_predictions_2, axis=self.time_stack_axis) del current_predictions_2 utils.io.image.write_np(prediction_2, os.path.join(self.output_folder, 'embeddings_2.mha'), self.data_format) del prediction_2 images = np.stack(current_images, axis=self.time_stack_axis) del current_images utils.io.image.write_np(images, os.path.join(self.output_folder, 'image.mha'), self.data_format) del images transformations = dataset_entry['transformations'] transformation = transformations['image'] sitk.WriteTransform(transformation, os.path.join(self.output_folder, 'transform.txt'))
def test(self): print('Testing...', self.base_folder) video_frames = glob.glob(self.base_folder + '*.tif') video_frames = sorted([os.path.splitext(os.path.basename(frame))[0][1:] for frame in video_frames]) video_frames = video_frames[:5] #coord_factors = 0.001 #min_cluster_size = 100 #min_samples = 100 #min_label_size_per_stack = 100 tracker = EmbeddingTracker(coord_factors=self.coord_factors, stack_neighboring_slices=2, min_cluster_size=self.min_samples, min_samples=self.min_samples, min_label_size_per_stack=self.min_samples / 2, save_label_stack=True, image_ignore_border=self.border_size, parent_search_dilation_size=self.parent_dilation, max_parent_search_frames=self.parent_frame_search) first = True current_predictions = [] current_predictions_2 = [] current_images = [] # reset_every_frames = 20 for i, video_frame in enumerate(video_frames): #if int(video_frame) < 150 or int(video_frame) > 250: # continue with Timer('processing video frame ' + str(video_frame)): dataset_entry = self.dataset_val.get({'image_id': video_frame}) datasources = dataset_entry['datasources'] generators = dataset_entry['generators'] feed_dict = {self.data_val: np.expand_dims(generators['image'], axis=0)} # run loss and update loss accumulators if not first: for i in range(len(self.lstm_input_states_val)): feed_dict[self.lstm_input_states_val[i]] = current_lstm_states[i] run_tuple = self.sess.run([self.embeddings_normalized_val, self.embeddings_normalized_2_val] + list(self.lstm_output_states_val), feed_dict=feed_dict) # print(iv[0].decode()) embeddings_softmax = np.squeeze(run_tuple[0], axis=0) embeddings_softmax_2 = np.squeeze(run_tuple[1], axis=0) current_lstm_states = run_tuple[2:] #current_predictions.append(embeddings_softmax) #current_predictions_2.append(embeddings_softmax_2) current_images.append(generators['image']) # current_instances.append(instance_segmentation_test.get_instances_cosine_kmeans_2d(embeddings_softmax)) first = False datasources = dataset_entry['datasources'] input_image = datasources['image'] transformations = dataset_entry['transformations'] transformation = transformations['image'] # embeddings_original = utils.sitk_image.transform_np_output_to_sitk_input(embeddings_softmax_2, # output_spacing=None, # channel_axis=2, # input_image_sitk=input_image, # transform=transformation, # interpolator='linear', # output_pixel_type=sitk.sitkFloat32) # embeddings_softmax_2 = utils.sitk_np.sitk_list_to_np(embeddings_original, axis=2) current_predictions_2.append(embeddings_softmax_2) tracker.add_slice(np.transpose(embeddings_softmax_2, [2, 0, 1])) if tracker.stacked_label_image is not None: utils.io.image.write_np(tracker.stacked_label_image, os.path.join(self.output_folder, 'merged.mha')) # if not first and i % reset_every_frames != 0: # run_tuple = self.sess.run([self.embeddings_normalized_val, self.embeddings_normalized_2_val] + list(self.lstm_output_states_val), feed_dict=feed_dict) # embeddings_softmax_2 = np.squeeze(run_tuple[1], axis=0) # tracker.add_reset_slice(np.transpose(embeddings_softmax_2, [2, 0, 1])) # prediction = np.stack(current_predictions, axis=self.time_stack_axis) # del current_predictions # utils.io.image.write_np(prediction, os.path.join(self.output_folder, 'embeddings.mha'), self.data_format) # del prediction prediction_2 = np.stack(current_predictions_2, axis=self.time_stack_axis) del current_predictions_2 utils.io.image.write_np(prediction_2, os.path.join(self.output_folder, 'embeddings_2.mha'), self.data_format) del prediction_2 images = np.stack(current_images, axis=self.time_stack_axis) del current_images utils.io.image.write_np(images, os.path.join(self.output_folder, 'image.mha'), self.data_format) del images transformations = dataset_entry['transformations'] transformation = transformations['image'] sitk.WriteTransform(transformation, os.path.join(self.output_folder, 'transform.txt')) #if self.data_format == 'channels_last': # prediction_2 = np.transpose(prediction_2, [3, 0, 1, 2]) # two_slices = tracker.get_instances_cosine_dbscan_slice_by_slice(prediction_2) # utils.io.image.write_np(two_slices, os.path.join(self.output_folder, 'two_slices.mha')) # merged = tracker.merge_consecutive_slices(two_slices, slice_neighbour_size=2) # utils.io.image.write_np(merged, os.path.join(self.output_folder, 'merged.mha'), self.data_format) datasources = dataset_entry['datasources'] input_image = datasources['image'] if self.sigma == 1: interpolator = 'label_gaussian' else: interpolator = 'nearest' merged = tracker.stacked_label_image final_predictions = utils.sitk_image.transform_np_output_to_sitk_input(merged, output_spacing=None, channel_axis=0, input_image_sitk=input_image, transform=transformation, interpolator=interpolator, output_pixel_type=sitk.sitkUInt16) tracker.stacked_label_image = np.stack([utils.sitk_np.sitk_to_np(sitk_im) for sitk_im in final_predictions], axis=0) tracker.finalize() final_predictions = [utils.sitk_np.np_to_sitk(sitk_im) for sitk_im in tracker.stacked_label_image] track_tuples = tracker.track_tuples #final_predictions = [utils.sitk_np.np_to_sitk(np.squeeze(im), type=np.uint16) for im in np.split(merged, merged.shape[0], axis=0)] #final_predictions_smoothed_2 = [utils.sitk_image.apply_np_image_function(im, lambda x: self.label_smooth(x, sigma=2)) for im in final_predictions] if self.sigma > 1: final_predictions = [utils.sitk_image.apply_np_image_function(im, lambda x: self.label_smooth(x, sigma=self.sigma)) for im in final_predictions] for video_frame, final_prediction in zip(video_frames, final_predictions): utils.io.image.write(final_prediction, os.path.join(self.output_folder, self.image_prefix + video_frame + '.tif')) utils.io.image.write_np(np.stack(tracker.label_stack_list, axis=1), os.path.join(self.output_folder, 'label_stack.mha')) final_predictions_stacked = utils.sitk_image.accumulate(final_predictions) utils.io.image.write(final_predictions_stacked, os.path.join(self.output_folder, 'stacked.mha')) #utils.io.image.write(utils.sitk_image.accumulate(final_predictions_smoothed_2), os.path.join(self.output_folder, 'stacked_2.mha')) #utils.io.image.write(utils.sitk_image.accumulate(final_predictions_smoothed_4), os.path.join(self.output_folder, 'stacked_4.mha')) print(track_tuples) utils.io.text.save_list_csv(track_tuples, os.path.join(self.output_folder, self.track_file_name), delimiter=' ')
def demo(sess, net, im_file, vis_file, fits_fn, conf_thresh=0.8, eval_class=True): """ Detect object classes in an image using pre-computed object proposals. im_file: The "fused" image file path vis_file: The background image file on which detections are laid. Normallly, this is just the IR image file path fits_fn: The FITS file path eval_class: True - use traditional per class-based evaluation style False - use per RoI-based evaluation """ show_img_size = cfg.TEST.SCALES[0] if (not os.path.exists(im_file)): print('%s cannot be found' % (im_file)) return -1 im = cv2.imread(im_file) # Detect all object classes and regress object bounds timer = Timer() timer.tic() image_name = osp.basename(im_file) scores, boxes = im_detect(sess, net, im, save_vis_dir=None, img_name=os.path.splitext(image_name)[0]) boxes *= float(show_img_size) / float(im.shape[0]) timer.toc() sys.stdout.write('Done in {:.3f} secs'.format(timer.total_time)) sys.stdout.flush() print(scores) im = cv2.imread(vis_file) my_dpi = 100 fig = plt.figure() fig.set_size_inches(show_img_size / my_dpi, show_img_size / my_dpi) ax = plt.Axes(fig, [0., 0., 1., 1.]) ax.set_axis_off() fig.add_axes(ax) ax.set_xlim([0, show_img_size]) ax.set_ylim([show_img_size, 0]) #ax.set_aspect('equal') im = cv2.resize(im, (show_img_size, show_img_size)) im = im[:, :, (2, 1, 0)] ax.imshow(im, aspect='equal') if (fits_fn is not None): patch_contour = fuse(fits_fn, im, None, sigma_level=4, mask_ir=False, get_path_patch_only=True) ax.add_patch(patch_contour) NMS_THRESH = cfg.TEST.NMS #cfg.TEST.RPN_NMS_THRESH # 0.3 tt_vis = 0 bbox_img = [] bscore_img = [] num_sources = 0 #if (eval_class): for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind : 4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis]))#.astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] num_sources += vis_detections(im, cls, dets, ax, thresh=conf_thresh) #dets = np.hstack((dets, np.ones([dets.shape[0], 1]) * cls_ind)) # if (dets.shape[0] > 0): # bbox_img.append(dets) # bscore_img.append(np.reshape(dets[:, -2], [-1, 1])) # else: # for eoi_ind, eoi in enumerate(boxes): # eoi_scores = scores[eoi_ind, 1:] # skip background # cls_ind = np.argmax(eoi_scores) + 1 # add the background index back # cls_boxes = boxes[eoi_ind, 4 * cls_ind : 4 * (cls_ind + 1)] # cls_scores = scores[eoi_ind, cls_ind] # dets = np.hstack((np.reshape(cls_boxes, [1, -1]), # np.reshape(cls_scores, [-1, 1])))#.astype(np.float32) # dets = np.hstack((dets, np.ones([dets.shape[0], 1]) * cls_ind)) # bbox_img.append(dets) # bscore_img.append(np.reshape(dets[:, -2], [-1, 1])) # # boxes_im = np.vstack(bbox_img) # scores_im = np.vstack(bscore_img) # # #if (not eval_class): # # a numpy float is a C double, so need to use float32 # keep = nms(boxes_im[:, :-1].astype(np.float32), NMS_THRESH) # boxes_im = boxes_im[keep, :] # scores_im = scores_im[keep, :] # # keep_indices = range(boxes_im.shape[0]) #num_sources = vis_detections(im, None, boxes_im[keep_indices, :], ax, thresh=conf_thresh) print(', found %d sources' % num_sources) return 0
def test_net3(net, imdb, all_boxes2_name): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # heuristic: keep an average of 40 detections per class per images prior # to NMS max_per_set = cfg.TEST.MAX_PER_SET_F * num_images # heuristic: keep at most 100 detection per class per image prior to NMS max_per_image = cfg.TEST.MAX_PER_IMAGE # detection thresold for each class (this is adaptively set based on the # max_per_set constraint) thresh = -np.inf * np.ones(imdb.num_classes) # top_scores will hold one minheap of scores per class (used to enforce # the max_per_set constraint) top_scores = [[] for _ in xrange(imdb.num_classes)] # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) if not os.path.exists(output_dir): os.makedirs(output_dir) # timers _t = {'im_detect': Timer(), 'misc': Timer()} roidb = imdb.roidb # another scores d = g_utils.load_variables(all_boxes2_name) all_boxes2 = d['all_boxes'] for i in xrange(num_images): image_paths = imdb.image_path_at(i) im = [] for image_path in image_paths: image_path2 = image_path + '_norm.png' im1 = cv2.imread(image_path) im2 = cv2.imread(image_path2) ims = np.zeros((im1.shape[0], im1.shape[1], 6)) # TODO: to test on lua pre-trained model use: im1 = im1[:, :, ::-1] im2 = im2[:, :, ::-1] ims[:, :, 0:3] = im1 ims[:, :, 3:6] = im2 im.append(ims) _t['im_detect'].tic() scores, boxes = im_detect(net, im, roidb[i]['boxes']) _t['im_detect'].toc() _t['misc'].tic() for j in xrange(1, imdb.num_classes): # adding another scores scores[:, j] = (scores[:, j] + all_boxes2[j][i][:, 4]) / 2 boxes[:, j * 4:(j + 1) * 4] = (boxes[:, j * 4:(j + 1) * 4] + all_boxes2[j][i][:, 0:4]) / 2 inds = np.where((scores[:, j] > thresh[j]) & (roidb[i]['gt_classes'] == 0))[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j * 4:(j + 1) * 4] top_inds = np.argsort(-cls_scores)[:max_per_image] cls_scores = cls_scores[top_inds] cls_boxes = cls_boxes[top_inds, :] # push new scores onto the minheap for val in cls_scores: heapq.heappush(top_scores[j], val) # if we've collected more than the max number of detection, # then pop items off the minheap and update the class threshold if len(top_scores[j]) > max_per_set: while len(top_scores[j]) > max_per_set: heapq.heappop(top_scores[j]) thresh[j] = top_scores[j][0] all_boxes[j][i] = \ np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) if 0: keep = nms(all_boxes[j][i], 0.3) vis_detections(im, imdb.classes[j], all_boxes[j][i][keep, :]) _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) for j in xrange(1, imdb.num_classes): for i in xrange(num_images): inds = np.where(all_boxes[j][i][:, -1] > thresh[j])[0] all_boxes[j][i] = all_boxes[j][i][inds, :] det_file = os.path.join(output_dir, 'detections' + cfg.TEST.DET_SALT + '.pkl') g_utils.save_variables(det_file, [all_boxes], ['all_boxes'], overwrite=True) det_file = os.path.join(output_dir, 'detections' + cfg.TEST.DET_SALT + '.pkl') g_utils.scio.savemat(det_file, {'all_boxes': all_boxes}, do_compression=True) print 'Applying NMS to all detections' nms_dets = apply_nms(all_boxes, cfg.TEST.NMS) print 'Evaluating detections' ap, prec, rec, classes, class_to_ind = imdb.evaluate_detections( nms_dets, output_dir, cfg.TEST.DET_SALT, cfg.TEST.EVAL_SALT)
def train_model(self, sess, max_iters): """Network training loop.""" data_layer = get_data_layer(self.roidb, self.imdb.num_classes) # classification loss cls_score = self.net.get_output('cls_score') label = tf.placeholder(tf.int32, shape=[None]) cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(cls_score, label)) # subcategory classification loss if cfg.TRAIN.SUBCLS: subcls_score = self.net.get_output('subcls_score') sublabel = tf.placeholder(tf.int32, shape=[None]) subcls_cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(subcls_score, sublabel)) # bounding box regression L1 loss bbox_pred = self.net.get_output('bbox_pred') bbox_targets = tf.placeholder(tf.float32, shape=[None, 4 * self.imdb.num_classes]) bbox_weights = tf.placeholder(tf.float32, shape=[None, 4 * self.imdb.num_classes]) loss_box = tf.reduce_mean(tf.reduce_sum(tf.mul(bbox_weights, tf.abs(tf.sub(bbox_pred, bbox_targets))), reduction_indices=[1])) # multi-task loss if cfg.TRAIN.SUBCLS: loss = cross_entropy + subcls_cross_entropy + loss_box else: loss = cross_entropy + loss_box # optimizer lr = tf.Variable(cfg.TRAIN.LEARNING_RATE, trainable=False) momentum = cfg.TRAIN.MOMENTUM train_op = tf.train.MomentumOptimizer(lr, momentum).minimize(loss) # intialize variables sess.run(tf.initialize_all_variables()) if self.pretrained_model is not None: print ('Loading pretrained model ' 'weights from {:s}').format(self.pretrained_model) self.net.load(self.pretrained_model, sess, True) last_snapshot_iter = -1 timer = Timer() for iter in range(max_iters): # learning rate if iter >= cfg.TRAIN.STEPSIZE: sess.run(tf.assign(lr, cfg.TRAIN.LEARNING_RATE * cfg.TRAIN.GAMMA)) else: sess.run(tf.assign(lr, cfg.TRAIN.LEARNING_RATE)) # get one batch blobs = data_layer.forward() # Make one SGD update if cfg.TRAIN.SUBCLS: feed_dict={self.net.data: blobs['data'], self.net.rois: blobs['rois'], self.net.keep_prob: 0.5, \ label: blobs['labels'], sublabel: blobs['sublabels'], bbox_targets: blobs['bbox_targets'], bbox_weights: blobs['bbox_inside_weights']} else: feed_dict={self.net.data: blobs['data'], self.net.rois: blobs['rois'], self.net.keep_prob: 0.5, \ label: blobs['labels'], bbox_targets: blobs['bbox_targets'], bbox_weights: blobs['bbox_inside_weights']} timer.tic() if cfg.TRAIN.SUBCLS: loss_cls_value, loss_subcls_value, loss_box_value, _ = sess.run([cross_entropy, subcls_cross_entropy, loss_box, train_op], feed_dict=feed_dict) else: loss_cls_value, loss_box_value, _ = sess.run([cross_entropy, loss_box, train_op], feed_dict=feed_dict) timer.toc() if cfg.TRAIN.SUBCLS: print 'iter: %d / %d, loss_cls: %.4f, loss_subcls: %.4f, loss_box: %.4f, lr: %f, time: %f' %\ (iter+1, max_iters, loss_cls_value, loss_subcls_value, loss_box_value, lr.eval(), timer.diff) else: print 'iter: %d / %d, loss_cls: %.4f, loss_box: %.4f, lr: %f' %\ (iter+1, max_iters, loss_cls_value, loss_box_value, lr.eval()) if (iter+1) % (10 * cfg.TRAIN.DISPLAY) == 0: print 'speed: {:.3f}s / iter'.format(timer.average_time) if (iter+1) % cfg.TRAIN.SNAPSHOT_ITERS == 0: last_snapshot_iter = iter self.snapshot(sess, iter) if last_snapshot_iter != iter: self.snapshot(sess, iter)
def test_net(save_folder, net, detector, cuda, testset, transform, max_per_image=300, thresh=0.005): >>>>>>> 6544e535e60c169d1904751184fb44cdf61ff894 if not os.path.exists(save_folder): os.mkdir(save_folder) # dump predictions and assoc. ground truth to text file for now num_images = len(testset) <<<<<<< HEAD num_classes = num_classes ======= num_classes = (21, 81)[args.dataset == 'COCO'] >>>>>>> 6544e535e60c169d1904751184fb44cdf61ff894 all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)] _t = {'im_detect': Timer(), 'misc': Timer()} det_file = os.path.join(save_folder, 'detections.pkl') if args.retest: f = open(det_file,'rb') all_boxes = pickle.load(f) print('Evaluating detections') testset.evaluate_detections(all_boxes, save_folder) return for i in range(num_images): img = testset.pull_image(i) x = Variable(transform(img).unsqueeze(0),volatile=True) if cuda: x = x.cuda()
def train(cfg): startup_prog = fluid.Program() train_prog = fluid.Program() if args.enable_ce: startup_prog.random_seed = 1000 train_prog.random_seed = 1000 drop_last = True dataset = SegDataset(file_list=cfg.DATASET.TRAIN_FILE_LIST, mode=ModelPhase.TRAIN, shuffle=True, data_dir=cfg.DATASET.DATA_DIR) def data_generator(): if args.use_mpio: data_gen = dataset.multiprocess_generator( num_processes=cfg.DATALOADER.NUM_WORKERS, max_queue_size=cfg.DATALOADER.BUF_SIZE) else: data_gen = dataset.generator() batch_data = [] for b in data_gen: batch_data.append(b) if len(batch_data) == (cfg.BATCH_SIZE // cfg.NUM_TRAINERS): for item in batch_data: yield item[0], item[1], item[2] batch_data = [] # If use sync batch norm strategy, drop last batch if number of samples # in batch_data is less then cfg.BATCH_SIZE to avoid NCCL hang issues if not cfg.TRAIN.SYNC_BATCH_NORM: for item in batch_data: yield item[0], item[1], item[2] # Get device environment gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace() places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places() # Get number of GPU dev_count = cfg.NUM_TRAINERS if cfg.NUM_TRAINERS > 1 else len(places) print_info("#Device count: {}".format(dev_count)) # Make sure BATCH_SIZE can divided by GPU cards assert cfg.BATCH_SIZE % dev_count == 0, ( 'BATCH_SIZE:{} not divisble by number of GPUs:{}'.format( cfg.BATCH_SIZE, dev_count)) # If use multi-gpu training mode, batch data will allocated to each GPU evenly batch_size_per_dev = cfg.BATCH_SIZE // dev_count print_info("batch_size_per_dev: {}".format(batch_size_per_dev)) config_info = {'input_size': 769, 'output_size': 1, 'block_num': 7} config = ([(cfg.SLIM.NAS_SPACE_NAME, config_info)]) factory = SearchSpaceFactory() space = factory.get_search_space(config) port = cfg.SLIM.NAS_PORT server_address = (cfg.SLIM.NAS_ADDRESS, port) sa_nas = SANAS(config, server_addr=server_address, search_steps=cfg.SLIM.NAS_SEARCH_STEPS, is_server=cfg.SLIM.NAS_IS_SERVER) for step in range(cfg.SLIM.NAS_SEARCH_STEPS): arch = sa_nas.next_archs()[0] start_prog = fluid.Program() train_prog = fluid.Program() data_loader, avg_loss, lr, pred, grts, masks = build_model( train_prog, start_prog, arch=arch, phase=ModelPhase.TRAIN) cur_flops = flops(train_prog) print('current step:', step, 'flops:', cur_flops) data_loader.set_sample_generator(data_generator, batch_size=batch_size_per_dev, drop_last=drop_last) exe = fluid.Executor(place) exe.run(start_prog) exec_strategy = fluid.ExecutionStrategy() # Clear temporary variables every 100 iteration if args.use_gpu: exec_strategy.num_threads = fluid.core.get_cuda_device_count() exec_strategy.num_iteration_per_drop_scope = 100 build_strategy = fluid.BuildStrategy() if cfg.NUM_TRAINERS > 1 and args.use_gpu: dist_utils.prepare_for_multi_process(exe, build_strategy, train_prog) exec_strategy.num_threads = 1 if cfg.TRAIN.SYNC_BATCH_NORM and args.use_gpu: if dev_count > 1: # Apply sync batch norm strategy print_info("Sync BatchNorm strategy is effective.") build_strategy.sync_batch_norm = True else: print_info( "Sync BatchNorm strategy will not be effective if GPU device" " count <= 1") compiled_train_prog = fluid.CompiledProgram( train_prog).with_data_parallel(loss_name=avg_loss.name, exec_strategy=exec_strategy, build_strategy=build_strategy) # Resume training begin_epoch = cfg.SOLVER.BEGIN_EPOCH if cfg.TRAIN.RESUME_MODEL_DIR: begin_epoch = load_checkpoint(exe, train_prog) # Load pretrained model elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR): load_pretrained_weights(exe, train_prog, cfg.TRAIN.PRETRAINED_MODEL_DIR) else: print_info( 'Pretrained model dir {} not exists, training from scratch...'. format(cfg.TRAIN.PRETRAINED_MODEL_DIR)) fetch_list = [avg_loss.name, lr.name] global_step = 0 all_step = cfg.DATASET.TRAIN_TOTAL_IMAGES // cfg.BATCH_SIZE if cfg.DATASET.TRAIN_TOTAL_IMAGES % cfg.BATCH_SIZE and drop_last != True: all_step += 1 all_step *= (cfg.SOLVER.NUM_EPOCHS - begin_epoch + 1) avg_loss = 0.0 timer = Timer() timer.start() if begin_epoch > cfg.SOLVER.NUM_EPOCHS: raise ValueError( ("begin epoch[{}] is larger than cfg.SOLVER.NUM_EPOCHS[{}]" ).format(begin_epoch, cfg.SOLVER.NUM_EPOCHS)) if args.use_mpio: print_info("Use multiprocess reader") else: print_info("Use multi-thread reader") best_miou = 0.0 for epoch in range(begin_epoch, cfg.SOLVER.NUM_EPOCHS + 1): data_loader.start() while True: try: loss, lr = exe.run(program=compiled_train_prog, fetch_list=fetch_list, return_numpy=True) avg_loss += np.mean(np.array(loss)) global_step += 1 if global_step % args.log_steps == 0 and cfg.TRAINER_ID == 0: avg_loss /= args.log_steps speed = args.log_steps / timer.elapsed_time() print(( "epoch={} step={} lr={:.5f} loss={:.4f} step/sec={:.3f} | ETA {}" ).format(epoch, global_step, lr[0], avg_loss, speed, calculate_eta(all_step - global_step, speed))) sys.stdout.flush() avg_loss = 0.0 timer.restart() except fluid.core.EOFException: data_loader.reset() break except Exception as e: print(e) if epoch > cfg.SLIM.NAS_START_EVAL_EPOCH: ckpt_dir = save_checkpoint(train_prog, '{}_tmp'.format(port)) _, mean_iou, _, mean_acc = evaluate(cfg=cfg, arch=arch, ckpt_dir=ckpt_dir, use_gpu=args.use_gpu, use_mpio=args.use_mpio) if best_miou < mean_iou: print('search step {}, epoch {} best iou {}'.format( step, epoch, mean_iou)) best_miou = mean_iou sa_nas.reward(float(best_miou))
def test_net(net, imdb): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # heuristic: keep an average of 40 detections per class per images prior # to NMS max_per_set = 40 * num_images # heuristic: keep at most 100 detection per class per image prior to NMS max_per_image = 100 # detection thresold for each class (this is adaptively set based on the # max_per_set constraint) thresh = -np.inf * np.ones(imdb.num_classes) # top_scores will hold one minheap of scores per class (used to enforce # the max_per_set constraint) top_scores = [[] for _ in xrange(imdb.num_classes)] # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) if not os.path.exists(output_dir): os.makedirs(output_dir) # timers _t = {'im_detect': Timer(), 'misc': Timer()} if not cfg.TEST.HAS_RPN: roidb = imdb.roidb for i in xrange(num_images): # filter out any ground truth boxes if cfg.TEST.HAS_RPN: box_proposals = None else: box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0] im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() scores, boxes = im_detect(net, im, box_proposals) _t['im_detect'].toc() _t['misc'].tic() for j in xrange(1, imdb.num_classes): inds = np.where(scores[:, j] > thresh[j])[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j * 4:(j + 1) * 4] top_inds = np.argsort(-cls_scores)[:max_per_image] cls_scores = cls_scores[top_inds] cls_boxes = cls_boxes[top_inds, :] # push new scores onto the minheap for val in cls_scores: heapq.heappush(top_scores[j], val) # if we've collected more than the max number of detection, # then pop items off the minheap and update the class threshold if len(top_scores[j]) > max_per_set: while len(top_scores[j]) > max_per_set: heapq.heappop(top_scores[j]) thresh[j] = top_scores[j][0] all_boxes[j][i] = \ np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) if 0: keep = nms(all_boxes[j][i], 0.3) vis_detections(im, imdb.classes[j], all_boxes[j][i][keep, :]) _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) for j in xrange(1, imdb.num_classes): for i in xrange(num_images): inds = np.where(all_boxes[j][i][:, -1] > thresh[j])[0] all_boxes[j][i] = all_boxes[j][i][inds, :] det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'Applying NMS to all detections' nms_dets = apply_nms(all_boxes, cfg.TEST.NMS) print 'Evaluating detections' imdb.evaluate_detections(nms_dets, output_dir)
def test_net(net, imdb, max_per_image=100, thresh=0.000000001, vis=False): """Test a network on an image database.""" if 'coco' in imdb.name: max_per_image = 100 print 'max_per_image: ', max_per_image print 'thresh: ', thresh num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] all_scores = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] all_boxes_o = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) if cfg.OPG_DEBUG: vis_dir = get_vis_dir(imdb, net) # timers _t = {'im_detect': Timer(), 'misc': Timer()} roidb = imdb.roidb test_scales = cfg.TEST.SCALES save_id = 0 for i in xrange(num_images): # if imdb.image_index[i] != '001547': # continue # if i > 100: # break if vis: import matplotlib.pyplot as plt # 关闭所有窗口 # plt.close('all') box_proposals = roidb[i]['boxes'] rois_per_this_image = min(cfg.TEST.ROIS_PER_IM, len(box_proposals)) box_proposals = box_proposals[0:rois_per_this_image, :] if cfg.USE_ROI_SCORE: box_scores = roidb[i]['box_scores'] else: box_scores = None im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() scores = None boxes = None for target_size in test_scales: if cfg.OPG_DEBUG: save_path = os.path.join(vis_dir, str(save_id) + '_.png') save_debug_im(im, target_size, save_path) save_id += 1 cfg.TEST.SCALES = (target_size, ) scores_scale, boxes_scale = im_detect(net, im, box_proposals, box_scores) if scores is None: scores = scores_scale boxes = boxes_scale else: # TODO(YH): something to do scores += scores_scale assert np.array_equal( boxes, boxes_scale), 'boxes at each scale should be the same' if cfg.OPG_DEBUG: os.remove(save_path) if cfg.TEST.USE_FLIPPED: im_flip = im[:, ::-1, :] box_proposals_flip = box_proposals.copy() oldx1 = box_proposals_flip[:, 0].copy() oldx2 = box_proposals_flip[:, 2].copy() box_proposals_flip[:, 0] = im.shape[1] - oldx2 - 1 box_proposals_flip[:, 2] = im.shape[1] - oldx1 - 1 for target_size in test_scales: boxes_scale_o = boxes_scale if cfg.OPG_DEBUG: save_path = os.path.join(vis_dir, str(save_id) + '_.png') save_debug_im(im_flip, target_size, save_path) save_id += 1 cfg.TEST.SCALES = (target_size, ) scores_scale, boxes_scale, = im_detect(net, im_flip, box_proposals_flip, box_scores) scores += scores_scale if cfg.OPG_DEBUG: os.remove(save_path) _t['im_detect'].toc() _t['misc'].tic() # skip j = 0, because it's the background class # f**k skip for j in xrange(0, imdb.num_classes): if 'trainval' in imdb.name: if imdb.image_classes_at(i)[j] == 0: all_boxes[j][i] = np.zeros((0, 5), dtype=np.float32) all_boxes_o[j][i] = np.zeros((0, 5), dtype=np.float32) continue all_scores[j][i] = sum(scores[:, j]) inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] if vis: # vis_heatmap(im, i, imdb.classes[j], cls_dets, thresh=0.3) # vis_detections_highest( # im, imdb.classes[j], cls_dets, thresh=0.3) vis_detections(im, imdb.classes[j], cls_dets, thresh=0.03) all_boxes[j][i] = cls_dets # 保留原始检测结果 cls_scores_o = scores[:, j] cls_boxes_o = boxes[:, j * 4:(j + 1) * 4] cls_dets_o = np.hstack((cls_boxes_o, cls_scores_o[:, np.newaxis])) \ .astype(np.float32, copy=False) all_boxes_o[j][i] = cls_dets_o # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in xrange(0, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(0, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) if cfg.OPG_DEBUG: return det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) det_file_o = os.path.join(output_dir, 'detections_o.pkl') with open(det_file_o, 'wb') as f: cPickle.dump(all_boxes_o, f, cPickle.HIGHEST_PROTOCOL) print 'Evaluating detections' imdb.evaluate_detections(all_boxes, output_dir, all_scores=all_scores)
def train_model(self, sess, max_iters): # Build data layers for both training and validation set self.data_layer = RoIDataLayer(self.roidb, self.imdb.num_classes) self.data_layer_val = RoIDataLayer(self.valroidb, self.imdb.num_classes, random=True) # Construct the computation graph lr, train_op = self.construct_graph(sess) # Find previous snapshots if there is any to restore from lsf, nfiles, sfiles = self.find_previous() # Initialize the variables or restore them from the last snapshot if lsf == 0: rate, last_snapshot_iter, stepsizes, np_paths, ss_paths = self.initialize(sess) else: rate, last_snapshot_iter, stepsizes, np_paths, ss_paths = self.restore(sess, str(sfiles[-1]), str(nfiles[-1])) timer = Timer() iter = last_snapshot_iter + 1 last_summary_time = time.time() # Make sure the lists are not empty stepsizes.append(max_iters) stepsizes.reverse() next_stepsize = stepsizes.pop() while iter < max_iters + 1: # Learning rate if iter == next_stepsize + 1: # Add snapshot here before reducing the learning rate self.snapshot(sess, iter) rate *= cfg.TRAIN.GAMMA sess.run(tf.assign(lr, rate)) next_stepsize = stepsizes.pop() timer.tic() # Get training data, one batch at a time blobs = self.data_layer.forward() now = time.time() if iter == 1 or now - last_summary_time > cfg.TRAIN.SUMMARY_INTERVAL: # Compute the graph with summary rpn_loss_cls, rpn_loss_box, loss_cls, loss_box, total_loss, summary = \ self.net.train_step_with_summary(sess, blobs, train_op) self.writer.add_summary(summary, float(iter)) # Also check the summary on the validation set print('train_model before self.data_layer_val.forward') blobs_val = self.data_layer_val.forward() print('train_model after self.data_layer_val.forward') summary_val = self.net.get_summary(sess, blobs_val) self.valwriter.add_summary(summary_val, float(iter)) last_summary_time = now else: # Compute the graph without summary rpn_loss_cls, rpn_loss_box, loss_cls, loss_box, total_loss = \ self.net.train_step(sess, blobs, train_op) timer.toc() # Display training information if iter % (cfg.TRAIN.DISPLAY) == 0: print('iter: %d / %d, total loss: %.6f\n >>> rpn_loss_cls: %.6f\n ' '>>> rpn_loss_box: %.6f\n >>> loss_cls: %.6f\n >>> loss_box: %.6f\n >>> lr: %f' % \ (iter, max_iters, total_loss, rpn_loss_cls, rpn_loss_box, loss_cls, loss_box, lr.eval())) print('speed: {:.3f}s / iter'.format(timer.average_time)) # Snapshotting if iter % cfg.TRAIN.SNAPSHOT_ITERS == 0: last_snapshot_iter = iter ss_path, np_path = self.snapshot(sess, iter) np_paths.append(np_path) ss_paths.append(ss_path) # Remove the old snapshots if there are too many if len(np_paths) > cfg.TRAIN.SNAPSHOT_KEPT: self.remove_snapshot(np_paths, ss_paths) iter += 1 if last_snapshot_iter != iter - 1: self.snapshot(sess, iter - 1) self.writer.close() self.valwriter.close()
def test_net_ensemble(det_dirs, imdb, max_per_image=100, thresh=0.000000001): print 'max_per_image: ', max_per_image print 'thresh: ', thresh num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] all_scores = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, None) # load all the detection results all_boxes_cache = None for det_dir in det_dirs: det_path = os.path.join(det_dir, 'detections_o.pkl') print 'load det: ', det_path assert os.path.isfile(det_path), 'no det file: ' + det_path with open(det_path, 'rb') as f: all_boxes_cache_this = cPickle.load(f) print 'all_boxes_cache_this: ', len(all_boxes_cache_this), len( all_boxes_cache_this[0]) print 'all_boxes_cache_this[0][0]: ', all_boxes_cache_this[0][0].shape # print 'all_boxes_cache_this[0][0][0]: ', all_boxes_cache_this[0][0][0] # print 'all_boxes_cache_this[14][0]: ', all_boxes_cache_this[14][0].shape # print 'all_boxes_cache_this[14][0][0]: ', # all_boxes_cache_this[14][0][0] if all_boxes_cache is None: all_boxes_cache = all_boxes_cache_this else: print 'Sum up all result' print 'If error happen here, it counld be that the dimensions miss match.' for c in xrange(imdb.num_classes): for n in xrange(num_images): all_boxes_cache[c][n][:, 4] += all_boxes_cache_this[c][n][:, 4] print 'all_boxes_cache: ', len(all_boxes_cache), len(all_boxes_cache[0]) print 'all_boxes_cache[0][0]: ', all_boxes_cache[0][0].shape # print 'all_boxes_cache[0][0][0]: ', all_boxes_cache[0][0][0] # print 'all_boxes_cache[14][0]: ', all_boxes_cache[14][0].shape # print 'all_boxes_cache[14][0][0]: ', all_boxes_cache[14][0][0] # timers _t = {'im_detect': Timer(), 'misc': Timer()} roidb = imdb.roidb for i in xrange(num_images): _t['im_detect'].tic() _t['im_detect'].toc() _t['misc'].tic() # skip j = 0, because it's the background class # f**k skip for j in xrange(0, imdb.num_classes): # all_scores[j][i] = sum(scores[:, j]) all_scores[j][i] = sum(all_boxes_cache[j][i][:, -1]) # inds = np.where(scores[:, j] > thresh)[0] # cls_scores = scores[inds, j] inds = np.where(all_boxes_cache[j][i][:, -1] > thresh)[0] cls_scores = all_boxes_cache[j][i][inds, -1] # cls_boxes = boxes[inds, j * 4:(j + 1) * 4] cls_boxes = all_boxes_cache[j][i][inds, 0:4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] all_boxes[j][i] = cls_dets # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in xrange(0, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(0, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'Evaluating detections' imdb.evaluate_detections(all_boxes, output_dir, all_scores=all_scores)
import os import caffe import json from core.config import cfg import numpy as np import numpy.random as npr from core.bbox_transform import width_height_transform from utils.timer import Timer from utils.cython_bbox_maps import (get_bbox_coverage, get_objects_size_regression_matrix, get_bbox_levels) DEBUG = False t = Timer() class BboxSegmentationLayer(caffe.Layer): """ Assign anchors to ground-truth targets. Produces anchor classification labels and bounding-box regression targets. """ def setup(self, bottom, top): layer_params = json.loads(self.param_str) self._feat_stride = layer_params['feat_stride'] self._iters = 0 self._batchsize = layer_params['batchsize'] self._fg_fraction = layer_params['fg_fraction']
def test_net_ensemble2(det_dirs, imdb, max_per_image=100, thresh=0.000000001): print 'max_per_image: ', max_per_image print 'thresh: ', thresh num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] all_scores = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, None) # load all the detection results # all_boxes_cache = None all_boxes_cache = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] image_index = imdb.image_index for det_dir in det_dirs: p = 1.0 if '2' in det_dir: p = 10.0 for dirpath, dirnames, filenames in os.walk(det_dir): for filename in filenames: print 'load res: ', os.path.join(dirpath, filename) c = -1 for c_i, cls in enumerate(imdb.classes): if cls + '.txt' in filename: c = c_i break assert c > -1 with open(os.path.join(dirpath, filename), 'r') as f: for line in f.readlines(): line = line.strip() im_id, score, xmin, ymin, xmax, ymax = line.split(' ') im_i = image_index.index(im_id) all_boxes_cache[c][im_i].append([ float(xmin) - 1, float(ymin) - 1, float(xmax) - 1, float(ymax) - 1, float(score) * p ]) for n in xrange(num_images): for c in xrange(imdb.num_classes): if len(all_boxes_cache[c][n]) == 0: all_boxes_cache[c][n] = np.zeros((0, 5), dtype=np.float32) else: all_boxes_cache[c][n] = np.array(all_boxes_cache[c][n], dtype=np.float32) print 'all_boxes_cache: ', len(all_boxes_cache), len(all_boxes_cache[0]) print 'all_boxes_cache[0][0]: ', all_boxes_cache[0][0].shape # print 'all_boxes_cache[0][0][0]: ', all_boxes_cache[0][0][0] # print 'all_boxes_cache[14][0]: ', all_boxes_cache[14][0].shape # print 'all_boxes_cache[14][0][0]: ', all_boxes_cache[14][0][0] # timers _t = {'im_detect': Timer(), 'misc': Timer()} roidb = imdb.roidb for i in xrange(num_images): _t['im_detect'].tic() _t['im_detect'].toc() _t['misc'].tic() # skip j = 0, because it's the background class # f**k skip for j in xrange(0, imdb.num_classes): # all_scores[j][i] = sum(scores[:, j]) all_scores[j][i] = sum(all_boxes_cache[j][i][:, -1]) # inds = np.where(scores[:, j] > thresh)[0] # cls_scores = scores[inds, j] inds = np.where(all_boxes_cache[j][i][:, -1] > thresh)[0] cls_scores = all_boxes_cache[j][i][inds, -1] # cls_boxes = boxes[inds, j * 4:(j + 1) * 4] cls_boxes = all_boxes_cache[j][i][inds, 0:4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] all_boxes[j][i] = cls_dets # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in xrange(0, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(0, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'Evaluating detections' imdb.evaluate_detections(all_boxes, output_dir, all_scores=all_scores)
predict_image_paths = my_utils.get_all_file_paths(predict_folder) extensions = args.ext.split(",") predict_image_paths = my_utils.get_files_with_extension( predict_image_paths, extensions) total_pred_images = len(predict_image_paths) # testing scale # if args.dataset == "FDDB": # resize = 3 # elif args.dataset == "PASCAL": # resize = 2.5 # elif args.dataset == "AFW": # resize = 1 resize = 1 _t = {'forward_pass': Timer(), 'misc': Timer()} # predicting begin id2det = {} test_time, total_detect_time, total_nms_time = 0, 0, 0 total_boxes = 0 num_pred_images = 0 error_image_paths = [] for i, image_path in enumerate(predict_image_paths): image_path = os.path.abspath(image_path) image_name = os.path.basename(image_path) # if i < 2: # print("Image_path : {} - Image name : {}".format(image_path, image_name)) try: img = np.float32(cv2.imread(image_path, cv2.IMREAD_COLOR))
def test_net_cache(net, imdb, max_per_image=100, thresh=0.000000001, vis=False, scale=1.0): """Test a network on an image database.""" print 'max_per_image: ', max_per_image print 'thresh: ', thresh num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] all_scores = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) if cfg.OPG_DEBUG: vis_dir = get_vis_dir(imdb, net) det_file = os.path.join(output_dir, 'detections.pkl') if not os.path.isfile(det_file): print 'file not exists: ', det_file # we make sure all region all left origin_NMS = cfg.TEST.NMS cfg.TEST.NMS = 1.1 test_net(net, imdb, max_per_image=99999, thresh=0.0000, vis=False) cfg.TEST.NMS = origin_NMS with open(det_file, 'rb') as f: all_boxes_cache = cPickle.load(f) print 'all_boxes_cache: ', len(all_boxes_cache), len(all_boxes_cache[0]) print 'all_boxes_cache: ', all_boxes_cache[0][0].shape print 'all_boxes_cache: ', all_boxes_cache[14][0].shape # timers _t = {'im_detect': Timer(), 'misc': Timer()} roidb = imdb.roidb test_scales = cfg.TEST.SCALES save_id = 0 for i in xrange(num_images): _t['im_detect'].tic() _t['im_detect'].toc() _t['misc'].tic() # skip j = 0, because it's the background class # f**k skip for j in xrange(0, imdb.num_classes): # all_scores[j][i] = sum(scores[:, j]) all_scores[j][i] = sum(all_boxes_cache[j][i][:, -1]) # inds = np.where(scores[:, j] > thresh)[0] # cls_scores = scores[inds, j] inds = np.where(all_boxes_cache[j][i][:, -1] > thresh)[0] cls_scores = all_boxes_cache[j][i][inds, -1] # cls_boxes = boxes[inds, j * 4:(j + 1) * 4] cls_boxes = all_boxes_cache[j][i][inds, 0:4] cls_boxes = resize_boxes(cls_boxes, scale) cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) if vis: vis_heatmap(im, i, imdb.classes[j], cls_dets, thresh=0.3) keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] # if vis: # vis_detections(im, imdb.classes[j], cls_dets, thresh=thresh) all_boxes[j][i] = cls_dets if vis: import matplotlib.pyplot as plt # plt.show() plt.close('all') # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in xrange(0, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(0, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) print 'Evaluating detections' imdb.evaluate_detections(all_boxes, output_dir, all_scores=all_scores)
for filename in os.listdir(fromDir): if not 'mp4' in filename: #skin DS_Store continue print(filename) if filename == "20180627_momo_0007.mp4": continue if filename == "20180627_momo_0023.mp4": continue video = cv2.VideoCapture(fromDir + filename) numberVideo += 1 success, im = video.read() numFrame = 0 while success: numFrame += 1 savename = filename.split('.')[0] + '_f' + str(numFrame) + '.jpg' timer = Timer() timer.tic() scores, boxes = im_detect(net, im) timer.toc() print('No.{:d} - {:d} took {:.3f}s for ' '{:d} object proposals').format(numberVideo, numFrame, timer.total_time, boxes.shape[0]) timeUsed = timeUsed + timer.total_time CONF_THRESH = 0.9 NMS_THRESH = 0.01 numGesture = 0 gestureboxes = {} for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 #because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] #300*4矩阵
def test_net_bbox(net, imdb, max_per_image=100, thresh=0.00000001, vis=False): """Test a network on an image database.""" num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] all_scores = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) if cfg.OPG_DEBUG: vis_dir = get_vis_dir(imdb, net) # timers _t = {'im_detect_bbox': Timer(), 'misc': Timer()} roidb = imdb.roidb test_scales = cfg.TEST.SCALES save_id = 0 for i in xrange(num_images): # if imdb.image_index[i] != '001547': # continue # if i>100: # continue # filter out any ground truth boxes # The roidb may contain ground-truth rois (for example, if the roidb # comes from the training or val split). We only want to evaluate # detection on the *non*-ground-truth rois. We select those the rois # that have the gt_classes field set to 0, which means there's no # ground truth. # box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0] box_proposals = roidb[i]['boxes'] rois_per_this_image = min(cfg.TEST.ROIS_PER_IM, len(box_proposals)) box_proposals = box_proposals[0:rois_per_this_image, :] if cfg.USE_ROI_SCORE: box_scores = roidb[i]['box_scores'] else: box_scores = None im = cv2.imread(imdb.image_path_at(i)) _t['im_detect_bbox'].tic() scores = None boxes = None for target_size in test_scales: if cfg.OPG_DEBUG: # save_subdir = time.strftime("%Y-%m-%d", time.gmtime()) # save_dir = os.path.join('tmp', save_subdir) # if not os.path.exists(save_dir): # os.makedirs(save_dir) cv2.imwrite(os.path.join(vis_dir, str(save_id) + '_.png'), im) save_id += 1 cfg.TEST.SCALES = (target_size, ) scores_scale, boxes_scale = im_detect_bbox(net, im, box_proposals, box_scores) if scores is None: scores = scores_scale boxes = boxes_scale else: scores = np.vstack((scores, scores_scale)) boxes = np.vstack((boxes, boxes_scale)) if cfg.TEST.USE_FLIPPED: im_flip = im[:, ::-1, :] box_proposals_flip = box_proposals.copy() oldx1 = box_proposals_flip[:, 0].copy() oldx2 = box_proposals_flip[:, 2].copy() box_proposals_flip[:, 0] = im.shape[1] - oldx2 - 1 box_proposals_flip[:, 2] = im.shape[1] - oldx1 - 1 for target_size in test_scales: if cfg.OPG_DEBUG: # save_subdir = time.strftime("%Y-%m-%d", time.gmtime()) # save_dir = os.path.join('tmp', save_subdir) cv2.imwrite(os.path.join(vis_dir, str(save_id) + '_.png'), im_flip) save_id += 1 cfg.TEST.SCALES = (target_size, ) scores_scale, boxes_scale = im_detect_bbox( net, im_flip, box_proposals_flip, box_scores) # scores = np.vstack((scores, scores_scale)) # boxes = np.vstack((boxes, boxes_scale)) _t['im_detect_bbox'].toc() _t['misc'].tic() # skip j = 0, because it's the background class # f**k skip for j in xrange(0, imdb.num_classes): all_scores[j][i] = sum(scores[:, j]) inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] # if len(cls_scores) > 0: # sum_score = sum(cls_scores) # max_score = max(cls_scores) # print cls_scores # cls_scores *= (sum_score / max_score) # print sum_score, max_score, sum_score / max_score # print cls_scores cls_boxes = boxes[inds, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) if vis: vis_heatmap(im, i, imdb.classes[j], cls_dets, thresh=0.3) keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] # if vis: # vis_detections(im, imdb.classes[j], cls_dets, thresh=thresh) all_boxes[j][i] = cls_dets if vis: import matplotlib.pyplot as plt # plt.show() plt.close('all') # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in xrange(0, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(0, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] _t['misc'].toc() print 'im_detect_bbox: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect_bbox'].average_time, _t['misc'].average_time) det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'Evaluating detections' imdb.evaluate_detections(all_boxes, output_dir, all_scores=all_scores)
def test_net(save_folder, net, detector, cuda, testset, transform, max_per_image=300, thresh=0.005): if not os.path.exists(save_folder): os.mkdir(save_folder) # dump predictions and assoc. ground truth to text file for now num_images = len(testset) num_classes = (21, 81)[args.dataset == 'COCO'] all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)] _t = {'im_detect': Timer(), 'misc': Timer()} det_file = os.path.join(save_folder, 'detections.pkl') if args.retest: f = open(det_file, 'rb') all_boxes = pickle.load(f) print('Evaluating detections') testset.evaluate_detections(all_boxes, save_folder) return for i in range(num_images): img = testset.pull_image(i) x = Variable(transform(img).unsqueeze(0), volatile=True) if cuda: x = x.cuda() _t['im_detect'].tic() out = net(x=x, test=True) # forward pass boxes, scores = detector.forward(out, priors) detect_time = _t['im_detect'].toc() boxes = boxes[0] scores = scores[0] boxes = boxes.cpu().numpy() scores = scores.cpu().numpy() # scale each detection back up to the image scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]).cpu().numpy() boxes *= scale _t['misc'].tic() for j in range(1, num_classes): inds = np.where(scores[:, j] > thresh)[0] if len(inds) == 0: all_boxes[j][i] = np.empty([0, 5], dtype=np.float32) continue c_bboxes = boxes[inds] c_scores = scores[inds, j] c_dets = np.hstack( (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32, copy=False) if args.dataset == 'VOC': cpu = False else: cpu = False keep = nms(c_dets, 0.45, force_cpu=cpu) keep = keep[:50] c_dets = c_dets[keep, :] all_boxes[j][i] = c_dets if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in range(1, num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] nms_time = _t['misc'].toc() if i % 20 == 0: print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s'.format( i + 1, num_images, detect_time, nms_time)) _t['im_detect'].clear() _t['misc'].clear() with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') if args.dataset == 'VOC': APs, mAP = testset.evaluate_detections(all_boxes, save_folder) return APs, mAP else: testset.evaluate_detections(all_boxes, save_folder)
model) #loading pretrained weights into the network if cfg.use_cuda: model = model.cuda() model.eval() print("Model loaded successfully.") print("Setting Model to Evaluation Mode") # pretrained_model = os.path.join(cfg.train_output_dir, # 'darknet19_voc07trainval_exp1_63.h5') # pretrained_model = cfg.trained_model # net_utils.load_net(pretrained_model, net) # model.load_from_npz(cfg.pretrained_model, num_conv=18) t_det = Timer() t_total = Timer() t_cap = Timer() cap = cv2.VideoCapture("/dev/video1") i = 0 while (True): t_cap.tic() # Capture frame by frame ret, frame = cap.read() cap_time = t_cap.toc() # Our operations on the frame come here #gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) t_total.tic()
def face_extract(retinaface, cfg, image: Image.Image): cudnn.benchmark = True device = torch.device("cpu" if True else "cuda") retinaface = retinaface.to(device) #Resize small image if image.shape[0] < 300 or image.shape[1] < 300: dim = (500, 500) image = cv2.resize(image, dim, interpolation=cv2.INTER_AREA) elif image.shape[0] > 2000 or image.shape[1] > 2000: dim = (1500, 1500) image = cv2.resize(image, dim, interpolation=cv2.INTER_AREA) img_raw = image image, scale, im_height, im_width, resize = image_preprocessing(image) image = image.to(device) scale = scale.to(device) _t = {'forward_pass': Timer(), 'misc': Timer()} _t['forward_pass'].tic() loc, conf, landms = retinaface(image) # forward pass _t['forward_pass'].toc() _t['misc'].tic() priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([ image.shape[3], image.shape[2], image.shape[3], image.shape[2], image.shape[3], image.shape[2], image.shape[3], image.shape[2], image.shape[3], image.shape[2] ]) scale1 = scale1.to(device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > 0.02)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1] # order = scores.argsort()[::-1][:args.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, 0.4) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] landms = landms[keep] dets = np.concatenate((dets, landms), axis=1) _t['misc'].toc() return dets, img_raw
def train_model(self, sess, max_iters): """Network training loop.""" data_layer = get_data_layer(self.roidb, self.imdb.num_classes) part_features_fc7 = self.net.get_output( 'pool_5')[:self.proposal_number, :] part_features_fc71 = self.net1.get_output( 'pool_5')[:self.proposal_number, :] part_features_fc72 = self.net2.get_output( 'pool_5')[:self.proposal_number, :] part_features_fc73 = self.net3.get_output( 'pool_5')[:self.proposal_number, :] part_features_fc74 = self.net4.get_output( 'pool_5')[:self.proposal_number, :] part_features_fc75 = self.net5.get_output( 'pool_5')[:self.proposal_number, :] part_features_fc76 = self.net6.get_output( 'pool_5')[:self.proposal_number, :] part_features_fc77 = self.net7.get_output( 'pool_5')[:self.proposal_number, :] part_features_fc78 = self.net8.get_output( 'pool_5')[:self.proposal_number, :] part_features_fc79 = self.net9.get_output( 'pool_5')[:self.proposal_number, :] part_features_fc710 = self.net10.get_output( 'pool_5')[:self.proposal_number, :] part_features_fc711 = self.net11.get_output( 'pool_5')[:self.proposal_number, :] #print(part_features) # learning matrix 1 Matrix_L1_S1 = tf.get_variable( 'L1_S1', [self.feature_size, self.feature_size], initializer=tf.random_normal_initializer( stddev=1 / math.sqrt(self.feature_size * self.feature_size))) # learning matrix 2 Matrix_L1_S2 = tf.get_variable( 'L1_S2', [self.feature_size, self.feature_size], initializer=tf.random_normal_initializer( stddev=1 / math.sqrt(self.feature_size * self.feature_size))) # # learning matrix 3 # Matrix_L2_S1 = tf.get_variable('L2_S1', [self.feature_size, self.feature_size], initializer=tf.random_normal_initializer( # stddev=1 / math.sqrt(self.feature_size * self.feature_size))) # learning matrix 4 #Matrix_L1_S3 = tf.get_variable('L1_S3', [self.hidden_size, self.hidden_size], # initializer=tf.random_normal_initializer( # stddev=1 / math.sqrt(self.hidden_size * self.hidden_size))) ################################ #### get the region feature #### ######### max pooling ########## ################################ part_features_fc7 = tf.reduce_max(tf.reshape( part_features_fc7, [self.proposal_number, 49, 512]), axis=1) part_features_fc71 = tf.reduce_max(tf.reshape( part_features_fc71, [self.proposal_number, 49, 512]), axis=1) part_features_fc72 = tf.reduce_max(tf.reshape( part_features_fc72, [self.proposal_number, 49, 512]), axis=1) part_features_fc73 = tf.reduce_max(tf.reshape( part_features_fc73, [self.proposal_number, 49, 512]), axis=1) part_features_fc74 = tf.reduce_max(tf.reshape( part_features_fc74, [self.proposal_number, 49, 512]), axis=1) part_features_fc75 = tf.reduce_max(tf.reshape( part_features_fc75, [self.proposal_number, 49, 512]), axis=1) part_features_fc76 = tf.reduce_max(tf.reshape( part_features_fc76, [self.proposal_number, 49, 512]), axis=1) part_features_fc77 = tf.reduce_max(tf.reshape( part_features_fc77, [self.proposal_number, 49, 512]), axis=1) part_features_fc78 = tf.reduce_max(tf.reshape( part_features_fc78, [self.proposal_number, 49, 512]), axis=1) part_features_fc79 = tf.reduce_max(tf.reshape( part_features_fc79, [self.proposal_number, 49, 512]), axis=1) part_features_fc710 = tf.reduce_max(tf.reshape( part_features_fc710, [self.proposal_number, 49, 512]), axis=1) part_features_fc711 = tf.reduce_max(tf.reshape( part_features_fc711, [self.proposal_number, 49, 512]), axis=1) #######get model parts ######### ''' part_features = tf.stack([part_features_fc7, part_features_fc71], axis=0) part_features = tf.concat([part_features, [part_features_fc72]], axis=0) part_features = tf.concat([part_features, [part_features_fc73]], axis=0) part_features = tf.concat([part_features, [part_features_fc74]], axis=0) part_features = tf.concat([part_features, [part_features_fc75]], axis=0) part_features = tf.concat([part_features, [part_features_fc76]], axis=0) part_features = tf.concat([part_features, [part_features_fc77]], axis=0) part_features = tf.concat([part_features, [part_features_fc78]], axis=0) part_features = tf.concat([part_features, [part_features_fc79]], axis=0) part_features = tf.concat([part_features, [part_features_fc710]], axis=0) part_features = tf.concat([part_features, [part_features_fc711]], axis=0) ''' ############################## ######### L1_S1 ############## ############################## ''' #no part attention similarity = tf.constant([[1.0 / self.proposal_number]] * self.proposal_number, dtype=tf.float32) similarity1 = similarity similarity2 = similarity similarity3 = similarity similarity4 = similarity similarity5 = similarity similarity6 = similarity similarity7 = similarity similarity8 = similarity similarity9 = similarity similarity10 = similarity similarity11 = similarity part_sum = tf.reduce_sum(tf.multiply(similarity, part_features_fc7), axis=0, keep_dims=True) part_sum1 = tf.reduce_sum(tf.multiply(similarity1, part_features_fc71), axis=0, keep_dims=True) part_sum2 = tf.reduce_sum(tf.multiply(similarity2, part_features_fc72), axis=0, keep_dims=True) part_sum3 = tf.reduce_sum(tf.multiply(similarity3, part_features_fc73), axis=0, keep_dims=True) part_sum4 = tf.reduce_sum(tf.multiply(similarity4, part_features_fc74), axis=0, keep_dims=True) part_sum5 = tf.reduce_sum(tf.multiply(similarity5, part_features_fc75), axis=0, keep_dims=True) part_sum6 = tf.reduce_sum(tf.multiply(similarity6, part_features_fc76), axis=0, keep_dims=True) part_sum7 = tf.reduce_sum(tf.multiply(similarity7, part_features_fc77), axis=0, keep_dims=True) part_sum8 = tf.reduce_sum(tf.multiply(similarity8, part_features_fc78), axis=0, keep_dims=True) part_sum9 = tf.reduce_sum(tf.multiply(similarity9, part_features_fc79), axis=0, keep_dims=True) part_sum10 = tf.reduce_sum(tf.multiply(similarity10, part_features_fc710), axis=0, keep_dims=True) part_sum11 = tf.reduce_sum(tf.multiply(similarity11, part_features_fc711), axis=0, keep_dims=True) ''' # view 0 L1_S1_Similarity = tf.nn.softmax( tf.matmul(tf.matmul(part_features_fc7, Matrix_L1_S1), tf.transpose(part_features_fc7))) similarity = tf.reduce_sum(L1_S1_Similarity, axis=0, keep_dims=True) / self.proposal_number similarity = tf.transpose(similarity) part_sum = tf.reduce_sum(tf.multiply(similarity, part_features_fc7), axis=0, keep_dims=True) # view 1 L1_S1_Similarity1 = tf.nn.softmax( tf.matmul(tf.matmul(part_features_fc71, Matrix_L1_S1), tf.transpose(part_features_fc71))) similarity1 = tf.reduce_sum(L1_S1_Similarity1, axis=0, keep_dims=True) / self.proposal_number similarity1 = tf.transpose(similarity1) part_sum1 = tf.reduce_sum(tf.multiply(similarity1, part_features_fc71), axis=0, keep_dims=True) # view 2 L1_S1_Similarity2 = tf.nn.softmax( tf.matmul(tf.matmul(part_features_fc72, Matrix_L1_S1), tf.transpose(part_features_fc72))) similarity2 = tf.reduce_sum(L1_S1_Similarity2, axis=0, keep_dims=True) / self.proposal_number similarity2 = tf.transpose(similarity2) part_sum2 = tf.reduce_sum(tf.multiply(similarity2, part_features_fc72), axis=0, keep_dims=True) # view 3 L1_S1_Similarity3 = tf.nn.softmax( tf.matmul(tf.matmul(part_features_fc73, Matrix_L1_S1), tf.transpose(part_features_fc73))) similarity3 = tf.reduce_sum(L1_S1_Similarity3, axis=0, keep_dims=True) / self.proposal_number similarity3 = tf.transpose(similarity3) part_sum3 = tf.reduce_sum(tf.multiply(similarity3, part_features_fc73), axis=0, keep_dims=True) # view 4 L1_S1_Similarity4 = tf.nn.softmax( tf.matmul(tf.matmul(part_features_fc74, Matrix_L1_S1), tf.transpose(part_features_fc74))) similarity4 = tf.reduce_sum(L1_S1_Similarity4, axis=0, keep_dims=True) / self.proposal_number similarity4 = tf.transpose(similarity4) part_sum4 = tf.reduce_sum(tf.multiply(similarity4, part_features_fc74), axis=0, keep_dims=True) # view 5 L1_S1_Similarity5 = tf.nn.softmax( tf.matmul(tf.matmul(part_features_fc75, Matrix_L1_S1), tf.transpose(part_features_fc75))) similarity5 = tf.reduce_sum(L1_S1_Similarity5, axis=0, keep_dims=True) / self.proposal_number similarity5 = tf.transpose(similarity5) part_sum5 = tf.reduce_sum(tf.multiply(similarity5, part_features_fc75), axis=0, keep_dims=True) # view 6 L1_S1_Similarity6 = tf.nn.softmax( tf.matmul(tf.matmul(part_features_fc76, Matrix_L1_S1), tf.transpose(part_features_fc76))) similarity6 = tf.reduce_sum(L1_S1_Similarity6, axis=0, keep_dims=True) / self.proposal_number similarity6 = tf.transpose(similarity6) part_sum6 = tf.reduce_sum(tf.multiply(similarity6, part_features_fc76), axis=0, keep_dims=True) # view 7 L1_S1_Similarity7 = tf.nn.softmax( tf.matmul(tf.matmul(part_features_fc77, Matrix_L1_S1), tf.transpose(part_features_fc77))) similarity7 = tf.reduce_sum(L1_S1_Similarity7, axis=0, keep_dims=True) / self.proposal_number similarity7 = tf.transpose(similarity7) part_sum7 = tf.reduce_sum(tf.multiply(similarity7, part_features_fc77), axis=0, keep_dims=True) # view 8 L1_S1_Similarity8 = tf.nn.softmax( tf.matmul(tf.matmul(part_features_fc78, Matrix_L1_S1), tf.transpose(part_features_fc78))) similarity8 = tf.reduce_sum(L1_S1_Similarity8, axis=0, keep_dims=True) / self.proposal_number similarity8 = tf.transpose(similarity8) part_sum8 = tf.reduce_sum(tf.multiply(similarity8, part_features_fc78), axis=0, keep_dims=True) # view 9 L1_S1_Similarity9 = tf.nn.softmax( tf.matmul(tf.matmul(part_features_fc79, Matrix_L1_S1), tf.transpose(part_features_fc79))) similarity9 = tf.reduce_sum(L1_S1_Similarity9, axis=0, keep_dims=True) / self.proposal_number similarity9 = tf.transpose(similarity9) part_sum9 = tf.reduce_sum(tf.multiply(similarity9, part_features_fc79), axis=0, keep_dims=True) # view 10 L1_S1_Similarity10 = tf.nn.softmax( tf.matmul(tf.matmul(part_features_fc710, Matrix_L1_S1), tf.transpose(part_features_fc710))) similarity10 = tf.reduce_sum( L1_S1_Similarity10, axis=0, keep_dims=True) / self.proposal_number similarity10 = tf.transpose(similarity10) part_sum10 = tf.reduce_sum(tf.multiply(similarity10, part_features_fc710), axis=0, keep_dims=True) # view 11 L1_S1_Similarity11 = tf.nn.softmax( tf.matmul(tf.matmul(part_features_fc711, Matrix_L1_S1), tf.transpose(part_features_fc711))) similarity11 = tf.reduce_sum( L1_S1_Similarity11, axis=0, keep_dims=True) / self.proposal_number similarity11 = tf.transpose(similarity11) part_sum11 = tf.reduce_sum(tf.multiply(similarity11, part_features_fc711), axis=0, keep_dims=True) # concat views view_parts = tf.concat([part_sum, part_sum1], axis=0) view_parts = tf.concat([view_parts, part_sum2], axis=0) view_parts = tf.concat([view_parts, part_sum3], axis=0) view_parts = tf.concat([view_parts, part_sum4], axis=0) view_parts = tf.concat([view_parts, part_sum5], axis=0) view_parts = tf.concat([view_parts, part_sum6], axis=0) view_parts = tf.concat([view_parts, part_sum7], axis=0) view_parts = tf.concat([view_parts, part_sum8], axis=0) view_parts = tf.concat([view_parts, part_sum9], axis=0) view_parts = tf.concat([view_parts, part_sum10], axis=0) view_parts = tf.concat([view_parts, part_sum11], axis=0) view_parts = tf.nn.l2_normalize(view_parts, 1) # no view attention #view_similarity = tf.constant([[1.0 / self.views]] * self.views, dtype=tf.float32) #view_sums = tf.reduce_sum(tf.multiply(view_similarity, view_parts), axis=0, keep_dims=True) '''L1_S2''' #view attention L1_S2_Similarity = tf.nn.softmax( tf.matmul(tf.matmul(view_parts, Matrix_L1_S2), tf.transpose(view_parts))) view_similarity = tf.reduce_sum( L1_S2_Similarity, axis=0, keep_dims=True) / self.views view_similarity = tf.transpose(view_similarity) view_sums = tf.reduce_sum(tf.multiply(view_similarity, view_parts), axis=0, keep_dims=True) view_sums = tf.nn.l2_normalize(view_sums, 1) # view_sums_extend = tf.tile(view_sums, [self.views, 1]) views_input = tf.add(view_parts, view_sums_extend) #view_extend = tf.expand_dims(views_input, 0) view_extend = [views_input] view_sequence = tf.unstack(view_extend, self.rnn_steps, 1) ######RNN Part########## ######################## ######################## outputs, states = self.build_RNN(view_sequence) #use outputs outputs = tf.reshape(outputs, [-1, self.views, self.hidden_size]) model_feature = tf.reduce_max(outputs, 1) #model_feature = tf.reduce_max(tf.concat(outputs, 2),1) # states = tf.nn.l2_normalize(states, 1) # states = states.h # output_similarity = tf.nn.softmax(tf.matmul(tf.matmul(outputs, Matrix_L1_S3), tf.transpose(outputs))) # output_similarity = tf.reduce_sum(output_similarity, axis=0, keep_dims=True) / self.views # output_similarity = tf.transpose(output_similarity) # output_sums = tf.reduce_sum(tf.multiply(output_similarity, outputs), axis=0, keep_dims=True) #second branch # '''L2_S1''' # part_features = tf.reshape(part_features, [self.views*self.proposal_number, self.feature_size]) # L2_S1_Similarity = tf.nn.softmax(tf.matmul(tf.matmul(part_features, Matrix_L2_S1), # tf.transpose(part_features))) # global_similarity = tf.reduce_sum(L2_S1_Similarity, axis=0, keep_dims=True) / (self.proposal_number * self.views) # global_similarity = tf.transpose(global_similarity) # global_sums = tf.reduce_sum(tf.multiply(global_similarity, part_features), axis=0, keep_dims=True) # global_sums = tf.nn.l2_normalize(global_sums, 1) # # #global_sums = tf.nn.softmax(global_sums) # # # model_feature = tf.concat([global_sums, states], axis=1) # #print(model_feature) # classification layer # second attention part is related to the acutual classes w_init = tf.truncated_normal_initializer(stddev=0.1) b_init = tf.constant_initializer(0.1) fc2_w = tf.get_variable('fc2_w', [self.hidden_size, self.classes], dtype=tf.float32, initializer=w_init) fc2_b = tf.get_variable('fc2_b', [self.classes], dtype=tf.float32, initializer=b_init) cls_logits = tf.matmul(model_feature, fc2_w) + fc2_b cls_prob = tf.nn.softmax(cls_logits) cls_output = tf.placeholder(tf.float32, [self.classes], name='cls_output') #Euclidean distance #loss = tf.reduce_sum((vlad_prob - cls_output) ** 2) #cross entropy loss = tf.reduce_sum( tf.nn.softmax_cross_entropy_with_logits(labels=cls_output, logits=cls_logits)) # optimizer and learning rate, Stochastic Gradient Descent #global_step = tf.Variable(0, trainable=False) #lr = tf.train.exponential_decay(cfg.TRAIN.LEARNING_RATE, global_step, # cfg.TRAIN.STEPSIZE, 0.9, staircase=True) #momentum = cfg.TRAIN.MOMENTUM #train_op = tf.train.MomentumOptimizer(lr, momentum).minimize(loss, global_step=global_step) # Adam Optimizer train_op = tf.train.AdamOptimizer( cfg.TRAIN.LEARNING_RATE).minimize(loss) # initialize variables sess.run(tf.global_variables_initializer()) self.net.load(self.pretrained_model, sess, self.saver, True) print('loaded:%s' % (self.pretrained_model)) # model saver saver1 = tf.train.Saver(max_to_keep=150) self.saver = saver1 last_snapshot_iter = -1 timer = Timer() # training steps for iter in range(max_iters): # get model label train_target = data_layer.model_target() randnum = data_layer.rand_target() # get model images blobs = data_layer.forward() blobs1 = data_layer.forward() blobs2 = data_layer.forward() blobs3 = data_layer.forward() blobs4 = data_layer.forward() blobs5 = data_layer.forward() blobs6 = data_layer.forward() blobs7 = data_layer.forward() blobs8 = data_layer.forward() blobs9 = data_layer.forward() blobs10 = data_layer.forward() blobs11 = data_layer.forward() # blobl = [blobs, blobs1, blobs2, blobs3, blobs4, blobs5, blobs6, blobs7, blobs8, blobs9, blobs10, blobs11] # bloblist = blobl[randnum:self.views] + blobl[0:randnum] # feed_dict = {self.net.data: bloblist[0]['data'], self.net.im_info: bloblist[0]['im_info'], self.net.keep_prob: 1.0, # self.net1.data: bloblist[1]['data'], self.net1.im_info: bloblist[1]['im_info'], self.net1.keep_prob: 1.0, # self.net2.data: bloblist[2]['data'], self.net2.im_info: bloblist[2]['im_info'], self.net2.keep_prob: 1.0, # self.net3.data: bloblist[3]['data'], self.net3.im_info: bloblist[3]['im_info'], self.net3.keep_prob: 1.0, # self.net4.data: bloblist[4]['data'], self.net4.im_info: bloblist[4]['im_info'], self.net4.keep_prob: 1.0, # self.net5.data: bloblist[5]['data'], self.net5.im_info: bloblist[5]['im_info'], self.net5.keep_prob: 1.0, # self.net6.data: bloblist[6]['data'], self.net6.im_info: bloblist[6]['im_info'], self.net6.keep_prob: 1.0, # self.net7.data: bloblist[7]['data'], self.net7.im_info: bloblist[7]['im_info'], self.net7.keep_prob: 1.0, # self.net8.data: bloblist[8]['data'], self.net8.im_info: bloblist[8]['im_info'], self.net8.keep_prob: 1.0, # self.net9.data: bloblist[9]['data'], self.net9.im_info: bloblist[9]['im_info'], self.net9.keep_prob: 1.0, # self.net10.data: bloblist[10]['data'], self.net10.im_info: bloblist[10]['im_info'], # self.net10.keep_prob: 1.0, # self.net11.data: bloblist[11]['data'], self.net11.im_info: bloblist[11]['im_info'], # self.net11.keep_prob: 1.0, # cls_output: train_target} # '''''' # # # build feed_dict batch feed_dict = { self.net.data: blobs['data'], self.net.im_info: blobs['im_info'], self.net.keep_prob: 1.0, self.net1.data: blobs1['data'], self.net1.im_info: blobs1['im_info'], self.net1.keep_prob: 1.0, self.net2.data: blobs2['data'], self.net2.im_info: blobs2['im_info'], self.net2.keep_prob: 1.0, self.net3.data: blobs3['data'], self.net3.im_info: blobs3['im_info'], self.net3.keep_prob: 1.0, self.net4.data: blobs4['data'], self.net4.im_info: blobs4['im_info'], self.net4.keep_prob: 1.0, self.net5.data: blobs5['data'], self.net5.im_info: blobs5['im_info'], self.net5.keep_prob: 1.0, self.net6.data: blobs6['data'], self.net6.im_info: blobs6['im_info'], self.net6.keep_prob: 1.0, self.net7.data: blobs7['data'], self.net7.im_info: blobs7['im_info'], self.net7.keep_prob: 1.0, self.net8.data: blobs8['data'], self.net8.im_info: blobs8['im_info'], self.net8.keep_prob: 1.0, self.net9.data: blobs9['data'], self.net9.im_info: blobs9['im_info'], self.net9.keep_prob: 1.0, self.net10.data: blobs10['data'], self.net10.im_info: blobs10['im_info'], self.net10.keep_prob: 1.0, self.net11.data: blobs11['data'], self.net11.im_info: blobs11['im_info'], self.net11.keep_prob: 1.0, cls_output: train_target } run_options = None run_metadata = None if cfg.TRAIN.DEBUG_TIMELINE: run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() timer.tic() #training loss_value, _ = sess.run([loss, train_op], feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) timer.toc() if cfg.TRAIN.DEBUG_TIMELINE: trace = timeline.Timeline(step_stats=run_metadata.step_stats) trace_file = open( str(long(time.time() * 1000)) + '-train-timeline.ctf.json', 'w') trace_file.write( trace.generate_chrome_trace_format(show_memory=False)) trace_file.close() #print debug informations if (iter + 1) % (cfg.TRAIN.DISPLAY) == 0: # print('iter: %d / %d, loss: %.4f, lr: %f, randnum: %d' % (iter + 1, max_iters, loss_value, cfg.TRAIN.LEARNING_RATE, randnum)) print( 'iter: %d / %d, loss: %.4f, lr: %.8f' % (iter + 1, max_iters, loss_value, cfg.TRAIN.LEARNING_RATE)) #print('iter: %d / %d, loss: %.4f' % (iter + 1, max_iters, loss_value)) # print 'speed: {:.3f}s / iter'.format(timer.average_time) if (iter + 1) % cfg.TRAIN.SNAPSHOT_ITERS == 0: last_snapshot_iter = iter self.snapshot(sess, iter) if last_snapshot_iter != iter: self.snapshot(sess, iter)
torch.cuda.synchronize() return counts if __name__ == "__main__": N_points = 1024 * 16 * 16 cube_edge = 10 points = np.random.rand(3, N_points) * cube_edge - cube_edge / 3 points = torch.from_numpy(points.astype(np.float32)).cuda() points[:, 1] = points[:, 0] tree = generate_octree(points) with Timer(message="Octree creation"): tree = generate_octree(points) import sys sys.exit(0) with Timer(message="Chamfer calculation"): chamfer(points, tree, own_tree=True) # expected value here: count_radius = 0.5 expected_neighbours_per_point = 4 / 3 * np.pi * count_radius**3 * N_points / cube_edge**3 print("Radius count: expected roughly %f neighbours per point" % expected_neighbours_per_point) with Timer(message="Radius count"): point_counts = radius_count(tree, radius=count_radius)
def test_net(net, imdb, weights_filename, max_per_image=100, thresh=0.): vis = False np.random.seed(cfg.RNG_SEED) """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] ## original_all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] ## output_dir = get_output_dir(imdb, weights_filename) # timers _t = {'im_detect': Timer(), 'misc': Timer()} # extract gt objects for this class class_recs = {} npos = 0 for i in range(num_images): im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() scores, boxes = im_detect(net, im) _t['im_detect'].toc() _t['misc'].tic() # skip j = 0, because it's the background class for j in range(1, imdb.num_classes): inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(torch.from_numpy(cls_dets), cfg.TEST.NMS).numpy() if cls_dets.size > 0 else [] # ## # original_all_boxes[j][i] = cls_dets # ## cls_dets = cls_dets[keep, :] all_boxes[j][i] = cls_dets ## obj_scores = net.roi_scores.cpu().data.numpy() inds = np.where(obj_scores[:] > thresh)[0] cls_scores = obj_scores[inds] cls_boxes = boxes[inds, 4:8] cls_dets = np.hstack((cls_boxes, obj_scores[:])) \ .astype(np.float32, copy=False) original_all_boxes[j][i] = cls_dets # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in range(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] _t['misc'].toc() print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time(), _t['misc'].average_time())) det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') imdb.evaluate_detections(all_boxes, output_dir)