def imdb_rpn_compute_stats(net, imdb, anchor_scales=(8, 16, 32), feature_stride=16): raw_anchors = generate_anchors(scales=np.array(anchor_scales)) print(raw_anchors.shape) sums = 0 squred_sums = 0 counts = 0 roidb = filter_roidb(imdb.roidb) # Compute a map of input image size and output feature map blob map_w = {} map_h = {} for i in range(50, cfg.TRAIN.MAX_SIZE + 10): blobs = { 'data': np.zeros((1, 3, i, i)), 'im_info': np.asarray([[i, i, 1.0]]) } net.blobs['data'].reshape(*(blobs['data'].shape)) net.blobs['im_info'].reshape(*(blobs['im_info'].shape)) blobs_out = net.forward(data=blobs['data'].astype(np.float32, copy=False), im_info=blobs['im_info'].astype(np.float32, copy=False)) height, width = net.blobs['rpn/output'].data.shape[-2:] map_w[i] = width map_h[i] = height for i in range(len(roidb)): if not i % 5000: print('computing %d/%d' % (i, imdb.num_images)) im = cv2.imread(roidb[i]['image']) im_data, im_info = _get_image_blob(im) gt_boxes = roidb[i]['boxes'] gt_boxes = gt_boxes * im_info[0, 2] height = map_h[im_data.shape[2]] width = map_w[im_data.shape[3]] # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * feature_stride shift_y = np.arange(0, height) * feature_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = raw_anchors.shape[0] K = shifts.shape[0] all_anchors = (raw_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) # only keep anchors inside the image inds_inside = np.where((all_anchors[:, 0] >= 0) & (all_anchors[:, 1] >= 0) & (all_anchors[:, 2] < im_info[0, 1]) & # width (all_anchors[:, 3] < im_info[0, 0]) # height )[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] overlaps = bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) # There are 2 types of bbox targets # 1. anchor whose overlaps with gt is greater than RPN_POSITIVE_OVERLAP argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] fg_inds = np.where(max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP)[0] # 2. anchors which best match certain gt gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] fg_inds = np.unique(np.hstack((fg_inds, gt_argmax_overlaps))) gt_rois = gt_boxes[argmax_overlaps, :] anchors = anchors[fg_inds, :] gt_rois = gt_rois[fg_inds, :] targets = bbox_transform(anchors, gt_rois[:, :4]).astype(np.float32, copy=False) sums += targets.sum(axis=0) squred_sums += (targets**2).sum(axis=0) counts += targets.shape[0] means = old_div(sums, counts) stds = np.sqrt(old_div(squred_sums, counts) - means**2) print(means) print(stds) return means, stds
source_imdb = get_imdb(args.source_imdb_name) source_imdb.competition_mode(args.comp_mode) print 'Loaded dataset `{:s}` for training'.format(source_imdb.name) source_roidb = get_training_roidb(source_imdb) target_network = get_network(args.target_network_name) print 'Use network `{:s}` in training'.format(args.target_network_name) target_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='target') source_network = get_network(args.source_network_name) print 'Use network `{:s}` in training'.format(args.source_network_name) source_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='source') target_roidb = filter_roidb(target_roidb) source_roidb = filter_roidb(source_roidb) target_restorer = tf.train.Saver(var_list=target_vars, write_version=tf.train.SaverDef.V1) source_restorer = tf.train.Saver(var_list=source_vars, write_version=tf.train.SaverDef.V1) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options)) as sess: target_restorer.restore(sess, args.target_pretrained_model) source_restorer.restore(sess, args.source_pretrained_model) sw = SolverWrapper(sess, target_vars, source_vars, target_restorer, source_restorer, target_network, source_network, target_imdb, source_imdb, target_roidb,
def imdb_rpn_compute_stats(net, imdb, anchor_scales=(8,16,32), feature_stride=16): raw_anchors = generate_anchors(scales=np.array(anchor_scales)) print raw_anchors.shape sums = 0 squred_sums = 0 counts = 0 roidb = filter_roidb(imdb.roidb) # Compute a map of input image size and output feature map blob map_w = {} map_h = {} for i in xrange(50, cfg.TRAIN.MAX_SIZE + 10): blobs = { 'data': np.zeros((1, 3, i, i)), 'im_info': np.asarray([[i, i, 1.0]]) } net.blobs['data'].reshape(*(blobs['data'].shape)) net.blobs['im_info'].reshape(*(blobs['im_info'].shape)) blobs_out = net.forward( data=blobs['data'].astype(np.float32, copy=False), im_info=blobs['im_info'].astype(np.float32, copy=False)) height, width = net.blobs['rpn/output'].data.shape[-2:] map_w[i] = width map_h[i] = height for i in xrange(len(roidb)): if not i % 5000: print 'computing %d/%d' % (i, imdb.num_images) im = None if cfg.TRAIN.FORMAT == 'pickle': with open(roidb[i]['image'], 'rb') as f: im = cPickle.load(f) else: im = cv2.imread(roidb[i]['image']) im_data, im_info = _get_image_blob(im) gt_boxes = roidb[i]['boxes'] gt_boxes = gt_boxes * im_info[0, 2] height = map_h[im_data.shape[2]] width = map_w[im_data.shape[3]] # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * feature_stride shift_y = np.arange(0, height) * feature_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = raw_anchors.shape[0] K = shifts.shape[0] all_anchors = (raw_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) # only keep anchors inside the image inds_inside = np.where( (all_anchors[:, 0] >= 0) & (all_anchors[:, 1] >= 0) & (all_anchors[:, 2] < im_info[0, 1]) & # width (all_anchors[:, 3] < im_info[0, 0]) # height )[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] overlaps = bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) # There are 2 types of bbox targets # 1. anchor whose overlaps with gt is greater than RPN_POSITIVE_OVERLAP argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] fg_inds = np.where(max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP)[0] # 2. anchors which best match certain gt gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] fg_inds = np.unique(np.hstack((fg_inds, gt_argmax_overlaps))) gt_rois = gt_boxes[argmax_overlaps, :] anchors = anchors[fg_inds, :] gt_rois = gt_rois[fg_inds, :] targets = bbox_transform(anchors, gt_rois[:, :4]).astype(np.float32, copy=False) sums += targets.sum(axis=0) squred_sums += (targets ** 2).sum(axis=0) counts += targets.shape[0] means = sums / counts stds = np.sqrt(squred_sums / counts - means ** 2) print means print stds return means, stds
#solver = '/net/wujial/py-R-FCN/models/pascal_voc/vgg16/soft_rfcn_alt_opt_5step_ohem/stage1_rfcn_ohem_solver80k120k.pt' imdb_name = 'voc_2007_trainval' solver = '/net/wujial/py-R-FCN/models/pascal_voc/ResNet-50/soft_rfcn_alt_opt_5step_ohem/stage1_rfcn_ohem_solver80k120k.pt' print 'Init model: {}'.format(init_model) print 'RPN proposals: {}'.format(rpn_file) print('Using config:') pprint.pprint(cfg) _init_caffe(cfg) roidb, imdb = get_roidb(imdb_name, rpn_file=rpn_file) output_dir = get_output_dir(imdb) print 'Output will be saved to `{:s}`'.format(output_dir) # Train R-FCN # Send R-FCN model path over the multiprocessing queue final_caffemodel = os.path.join(output_dir, output_cache) roidb = filter_roidb(roidb) sw = SolverWrapper(solver, roidb, output_dir, init_model) sw.solver.step(1) net = sw.solver.net netdata = dict() saveto = 'test.mat' netdata['data'] = net.blobs['data'].data netdata['res5c'] = net.blobs['res5c'].data sio.savemat(saveto,netdata) if os.path.exists(final_caffemodel): print 'has done' else: model_paths = train_net(solver, roidb, output_dir,