Example #1
0
def imdb_rpn_compute_stats(net,
                           imdb,
                           anchor_scales=(8, 16, 32),
                           feature_stride=16):
    raw_anchors = generate_anchors(scales=np.array(anchor_scales))
    print(raw_anchors.shape)
    sums = 0
    squred_sums = 0
    counts = 0
    roidb = filter_roidb(imdb.roidb)
    # Compute a map of input image size and output feature map blob
    map_w = {}
    map_h = {}
    for i in range(50, cfg.TRAIN.MAX_SIZE + 10):
        blobs = {
            'data': np.zeros((1, 3, i, i)),
            'im_info': np.asarray([[i, i, 1.0]])
        }
        net.blobs['data'].reshape(*(blobs['data'].shape))
        net.blobs['im_info'].reshape(*(blobs['im_info'].shape))
        blobs_out = net.forward(data=blobs['data'].astype(np.float32,
                                                          copy=False),
                                im_info=blobs['im_info'].astype(np.float32,
                                                                copy=False))
        height, width = net.blobs['rpn/output'].data.shape[-2:]
        map_w[i] = width
        map_h[i] = height

    for i in range(len(roidb)):
        if not i % 5000:
            print('computing %d/%d' % (i, imdb.num_images))
        im = cv2.imread(roidb[i]['image'])
        im_data, im_info = _get_image_blob(im)
        gt_boxes = roidb[i]['boxes']
        gt_boxes = gt_boxes * im_info[0, 2]
        height = map_h[im_data.shape[2]]
        width = map_w[im_data.shape[3]]
        # 1. Generate proposals from bbox deltas and shifted anchors
        shift_x = np.arange(0, width) * feature_stride
        shift_y = np.arange(0, height) * feature_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                            shift_y.ravel())).transpose()
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = raw_anchors.shape[0]
        K = shifts.shape[0]
        all_anchors = (raw_anchors.reshape((1, A, 4)) + shifts.reshape(
            (1, K, 4)).transpose((1, 0, 2)))
        all_anchors = all_anchors.reshape((K * A, 4))

        # only keep anchors inside the image
        inds_inside = np.where((all_anchors[:, 0] >= 0)
                               & (all_anchors[:, 1] >= 0)
                               & (all_anchors[:, 2] < im_info[0, 1]) &  # width
                               (all_anchors[:, 3] < im_info[0, 0])  # height
                               )[0]

        # keep only inside anchors
        anchors = all_anchors[inds_inside, :]

        overlaps = bbox_overlaps(
            np.ascontiguousarray(anchors, dtype=np.float),
            np.ascontiguousarray(gt_boxes, dtype=np.float))

        # There are 2 types of bbox targets
        # 1. anchor whose overlaps with gt is greater than RPN_POSITIVE_OVERLAP
        argmax_overlaps = overlaps.argmax(axis=1)
        max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
        fg_inds = np.where(max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP)[0]
        # 2. anchors which best match certain gt
        gt_argmax_overlaps = overlaps.argmax(axis=0)
        gt_max_overlaps = overlaps[gt_argmax_overlaps,
                                   np.arange(overlaps.shape[1])]
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
        fg_inds = np.unique(np.hstack((fg_inds, gt_argmax_overlaps)))
        gt_rois = gt_boxes[argmax_overlaps, :]

        anchors = anchors[fg_inds, :]
        gt_rois = gt_rois[fg_inds, :]
        targets = bbox_transform(anchors, gt_rois[:, :4]).astype(np.float32,
                                                                 copy=False)
        sums += targets.sum(axis=0)
        squred_sums += (targets**2).sum(axis=0)
        counts += targets.shape[0]

    means = old_div(sums, counts)
    stds = np.sqrt(old_div(squred_sums, counts) - means**2)
    print(means)
    print(stds)
    return means, stds
    source_imdb = get_imdb(args.source_imdb_name)
    source_imdb.competition_mode(args.comp_mode)
    print 'Loaded dataset `{:s}` for training'.format(source_imdb.name)
    source_roidb = get_training_roidb(source_imdb)

    target_network = get_network(args.target_network_name)
    print 'Use network `{:s}` in training'.format(args.target_network_name)
    target_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                    scope='target')

    source_network = get_network(args.source_network_name)
    print 'Use network `{:s}` in training'.format(args.source_network_name)
    source_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                    scope='source')

    target_roidb = filter_roidb(target_roidb)
    source_roidb = filter_roidb(source_roidb)

    target_restorer = tf.train.Saver(var_list=target_vars,
                                     write_version=tf.train.SaverDef.V1)
    source_restorer = tf.train.Saver(var_list=source_vars,
                                     write_version=tf.train.SaverDef.V1)

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5)
    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
                                          gpu_options=gpu_options)) as sess:
        target_restorer.restore(sess, args.target_pretrained_model)
        source_restorer.restore(sess, args.source_pretrained_model)
        sw = SolverWrapper(sess, target_vars, source_vars, target_restorer,
                           source_restorer, target_network, source_network,
                           target_imdb, source_imdb, target_roidb,
Example #3
0
def imdb_rpn_compute_stats(net, imdb, anchor_scales=(8,16,32),
                           feature_stride=16):
    raw_anchors = generate_anchors(scales=np.array(anchor_scales))
    print raw_anchors.shape
    sums = 0
    squred_sums = 0
    counts = 0
    roidb = filter_roidb(imdb.roidb)
    # Compute a map of input image size and output feature map blob
    map_w = {}
    map_h = {}
    for i in xrange(50, cfg.TRAIN.MAX_SIZE + 10):
        blobs = {
            'data': np.zeros((1, 3, i, i)),
            'im_info': np.asarray([[i, i, 1.0]])
        }
        net.blobs['data'].reshape(*(blobs['data'].shape))
        net.blobs['im_info'].reshape(*(blobs['im_info'].shape))
        blobs_out = net.forward(
            data=blobs['data'].astype(np.float32, copy=False),
            im_info=blobs['im_info'].astype(np.float32, copy=False))
        height, width = net.blobs['rpn/output'].data.shape[-2:]
        map_w[i] = width
        map_h[i] = height

    for i in xrange(len(roidb)):
        if not i % 5000:
            print 'computing %d/%d' % (i, imdb.num_images)
        im = None
        if cfg.TRAIN.FORMAT == 'pickle':
            with open(roidb[i]['image'], 'rb') as f:
                im = cPickle.load(f)
        else:
            im = cv2.imread(roidb[i]['image'])

        im_data, im_info = _get_image_blob(im)
        gt_boxes = roidb[i]['boxes']
        gt_boxes = gt_boxes * im_info[0, 2]
        height = map_h[im_data.shape[2]]
        width = map_w[im_data.shape[3]]
        # 1. Generate proposals from bbox deltas and shifted anchors
        shift_x = np.arange(0, width) * feature_stride
        shift_y = np.arange(0, height) * feature_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                            shift_x.ravel(), shift_y.ravel())).transpose()
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = raw_anchors.shape[0]
        K = shifts.shape[0]
        all_anchors = (raw_anchors.reshape((1, A, 4)) +
                       shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
        all_anchors = all_anchors.reshape((K * A, 4))

        # only keep anchors inside the image
        inds_inside = np.where(
            (all_anchors[:, 0] >= 0) &
            (all_anchors[:, 1] >= 0) &
            (all_anchors[:, 2] < im_info[0, 1]) &  # width
            (all_anchors[:, 3] < im_info[0, 0])  # height
        )[0]

        # keep only inside anchors
        anchors = all_anchors[inds_inside, :]

        overlaps = bbox_overlaps(
            np.ascontiguousarray(anchors, dtype=np.float),
            np.ascontiguousarray(gt_boxes, dtype=np.float))

        # There are 2 types of bbox targets
        # 1. anchor whose overlaps with gt is greater than RPN_POSITIVE_OVERLAP
        argmax_overlaps = overlaps.argmax(axis=1)
        max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
        fg_inds = np.where(max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP)[0]
        # 2. anchors which best match certain gt
        gt_argmax_overlaps = overlaps.argmax(axis=0)
        gt_max_overlaps = overlaps[gt_argmax_overlaps,
                                   np.arange(overlaps.shape[1])]
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
        fg_inds = np.unique(np.hstack((fg_inds, gt_argmax_overlaps)))
        gt_rois = gt_boxes[argmax_overlaps, :]

        anchors = anchors[fg_inds, :]
        gt_rois = gt_rois[fg_inds, :]
        targets = bbox_transform(anchors, gt_rois[:, :4]).astype(np.float32, copy=False)
        sums += targets.sum(axis=0)
        squred_sums += (targets ** 2).sum(axis=0)
        counts += targets.shape[0]

    means = sums / counts
    stds = np.sqrt(squred_sums / counts - means ** 2)
    print means
    print stds
    return means, stds
Example #4
0
#solver = '/net/wujial/py-R-FCN/models/pascal_voc/vgg16/soft_rfcn_alt_opt_5step_ohem/stage1_rfcn_ohem_solver80k120k.pt'
imdb_name = 'voc_2007_trainval'
solver = '/net/wujial/py-R-FCN/models/pascal_voc/ResNet-50/soft_rfcn_alt_opt_5step_ohem/stage1_rfcn_ohem_solver80k120k.pt'

print 'Init model: {}'.format(init_model)
print 'RPN proposals: {}'.format(rpn_file)
print('Using config:')
pprint.pprint(cfg)
_init_caffe(cfg)
roidb, imdb = get_roidb(imdb_name, rpn_file=rpn_file)
output_dir = get_output_dir(imdb)
print 'Output will be saved to `{:s}`'.format(output_dir)
# Train R-FCN
# Send R-FCN model path over the multiprocessing queue
final_caffemodel = os.path.join(output_dir, output_cache)
roidb = filter_roidb(roidb)
sw = SolverWrapper(solver, roidb, output_dir, init_model)
sw.solver.step(1)
net = sw.solver.net
netdata = dict()
saveto = 'test.mat'
netdata['data'] = net.blobs['data'].data
netdata['res5c'] = net.blobs['res5c'].data

sio.savemat(saveto,netdata)


if os.path.exists(final_caffemodel):
    print 'has done'
else:
    model_paths = train_net(solver, roidb, output_dir,