def main():
    network_generators = {
        'ZF': ZFGenerator
    }

    args = parse_args(network_generators.keys())

    if args.cfg is not None:
        cfg_from_file(args.cfg)

    apollocaffe.set_random_seed(cfg.RNG_SEED)
    np.random.seed(cfg.RNG_SEED)

    if args.gpu_id >= 0:
        apollocaffe.set_device(args.gpu_id)

    apollocaffe.set_cpp_loglevel(3)

    train_roidb = None
    if args.train_imdb is not None:
        train_imdb =  get_imdb(args.train_imdb)
        train_imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD)
        train_roidb = get_training_roidb(train_imdb)

    test_roidb = None
    if args.test_imdb is not None:
        test_imdb = get_imdb(args.test_imdb)
        test_imdb.set_proposal_method('gt')
        prepare_roidb(test_imdb)
        test_roidb = test_imdb.roidb

    faster_rcnn = FasterRCNN(args, network_generators, train_roidb=train_roidb, test_roidb=test_roidb)
    faster_rcnn.train()

    return 0
Exemple #2
0
def get_training_roidb(imdb):
    """Returns a roidb (Region of Interest database) for use in training."""
    if cfg.TRAIN.USE_FLIPPED:
        print('Appending horizontally-flipped training examples...')
        imdb.append_flipped_images()
        print('done')

    print('Preparing training data...')
    rdl_roidb.prepare_roidb(imdb)
    print('done')
    return imdb.roidb
Exemple #3
0
def get_training_roidb(imdb, model_to_use, proposal_file):
    """Returns a roidb (Region of Interest database) for use in training."""
    if cfg.TRAIN.USE_FLIPPED:
        print 'Appending horizontally-flipped training examples...'
        imdb.append_flipped_images()
        print 'done'

    print 'Preparing training data...'
    rdl_roidb.prepare_roidb(imdb, model_to_use, proposal_file)
    print 'done'

    return imdb.roidb
def get_training_roidb(imdb):
    """Returns a roidb (Region of Interest database) for use in training."""
    #import ipdb; ipdb.set_trace()
    if cfg.TRAIN.USE_FLIPPED:
        print 'Appending horizontally-flipped training examples...'
        imdb.append_flipped_images()
        print 'done'

    print 'Preparing training data...'
    #import ipdb; ipdb.set_trace()
    rdl_roidb.prepare_roidb(imdb)
    print 'done'

    return imdb.roidb
Exemple #5
0
def get_training_roidb(imdb):
    """Returns a roidb (Region of Interest database) for use in training."""
    if cfg.TRAIN.USE_FLIPPED:
        print 'Appending horizontally-flipped training examples...'
        imdb.append_flipped_images()
        print 'done'

    print 'Preparing training data...'
    
    ## these data are actual for training,  it will call lib/roi_data_layer/roidb.py, then use its prepara_roidb() function
    rdl_roidb.prepare_roidb(imdb)
    print 'done'

    return imdb.roidb
Exemple #6
0
def get_training_roidb(imdb):
    """Returns a roidb (Region of Interest database) for use in training."""
    if cfg.TRAIN.USE_FLIPPED:
        print 'Appending horizontally-flipped training examples...'
        ## create roidb which is list of dicts with keys ('bbox', 'gt_classes', 'gt_overlaps'\
        ##    , 'flipped', 'seg_areas')
        imdb.append_flipped_images()
        print 'done'
    ## enrich the imdb's roidb by adding more useful keys.
    ##     adding: ('image(image_path)', 'width', 'height', 'max_classes', 'max_overlaps')
    print 'Preparing training data...'
    rdl_roidb.prepare_roidb(imdb)
    print 'done'

    return imdb.roidb
Exemple #7
0
    def _get_next_minibatch(self, db_inds):
        """Return the blobs to be used for the next minibatch.
        """
        # pdb.set_trace()
        minibatch_db = [self._roidb[i] for i in db_inds]
        # if cfg.TRAIN.USE_RPN_DB:
        #     minibatch_db = self.imdb.add_rpn_rois(minibatch_db)
        prepare_roidb(minibatch_db)
        add_bbox_regression_targets(minibatch_db, self.bbox_means,
                                    self.bbox_stds)

        blobs = get_minibatch(minibatch_db, self._num_classes)
        if blobs is not None:
            blobs['db_inds'] = db_inds
        return blobs
Exemple #8
0
def get_training_roidb(imdb):
    """Returns a roidb (Region of Interest database) for use in training."""
    if cfg.TRAIN.USE_FLIPPED:
        print 'Appending horizontally-flipped training examples...'
        #hack to remove empty frames
        rdl_roidb.prepare_roidb(imdb)
        imdb.append_flipped_images()
        print 'done'
        #return imdb.roidb

    print 'Preparing training data...'
    rdl_roidb.prepare_roidb(imdb)
    print 'done'

    return imdb.roidb
Exemple #9
0
def get_training_roidb(imdb):
    #返回用于训练的感兴趣区域数据库
    """Returns a roidb (Region of Interest database) for use in training."""
    if cfg.TRAIN.USE_FLIPPED:
        print('Appending horizontally-flipped training examples...')
        #加入翻转的图像
        imdb.append_flipped_images()
        print('done')

    print('Preparing training data...')
    #rdl_roidb来自roi_data_layer模块
    rdl_roidb.prepare_roidb(imdb)
    print('done')

    return imdb.roidb
Exemple #10
0
def get_training_roidb(imdb):
    """Returns a roidb (Region of Interest database) for use in training."""
    if cfg.TRAIN.USE_FLIPPED:
        print('Appending horizontally-flipped training examples...')
        imdb.append_flipped_images()
        print('done')

    print('Preparing training data...')
    if cfg.TRAIN.HAS_RPN:
        rdl_roidb.prepare_roidb(imdb)
    else:
        rdl_roidb.prepare_roidb(imdb)
    print('done')

    return imdb.roidb
Exemple #11
0
def get_training_roidb(imdb):
    """Returns a roidb (Region of Interest database) for use in training."""
    cfg.TRAIN.USE_FLIPPED=True
    print cfg
    if cfg.TRAIN.USE_FLIPPED:
        print 'Appending horizontally-flipped training examples...'
        imdb.append_flipped_roidb()
        print 'done'

    print 'Preparing training data...'
    print(imdb)
    rdl_roidb.prepare_roidb(imdb)
    print 'done'

    return imdb.roidb
Exemple #12
0
def get_training_roidb(imdb):
    """Returns a roidb (Region of Interest database) for use in training."""
    if cfg.TRAIN.USE_FLIPPED:
        print 'Appending horizontally-flipped training examples...'
        #print len(imdb.roidb)
        #print imdb.roidb[0:2]
        imdb.append_flipped_images()
        #print len(imdb.roidb)
        #print imdb.roidb[4156:4158]
        print 'done'
    #exit(1)
    print 'Preparing training data...'
    rdl_roidb.prepare_roidb(imdb)
    print 'done'

    return imdb.roidb
Exemple #13
0
def get_training_roidb(imdb):
    """Returns a roidb (Region of Interest database) for use in training."""
    print('===> Start get_training_roidb in train.py')
    if cfg.TRAIN.USE_FLIPPED:
        print 'Appending horizontally-flipped training examples...'
        imdb.append_flipped_images()
        #print 'done'

    if type(imdb.roidb[0])!=str:
      print 'Preparing training data...'
      rdl_roidb.prepare_roidb(imdb)
      #print 'done'

    print('===> Start get_training_roidb in train.py')
    sys.stdout.flush()
    return imdb.roidb
Exemple #14
0
def get_training_roidb(imdb):
    """Returns a roidb (Region of Interest database) for use in training."""

    if cfg.TRAIN.USE_FLIPPED:
        print 'Appending horizontally-flipped training examples...'
        imdb.append_flipped_images()
        print 'done'

    if cfg.TRAIN.RANDOM_CROP_SCALE:
        print 'Appending pyramid cropped images...'
        imdb.append_cropped_scale_images()

    print 'Preparing training data...'
    rdl_roidb.prepare_roidb(imdb)
    print 'done'

    return imdb.roidb
Exemple #15
0
    def get_training_roidb(self, imdb):
        """Returns a roidb (Region of Interest database) for use in training.
        Args:
            imdb: A sequence of samples.
        Returns:
            The roidb for the given samples.
        """
        if cfg.TRAIN.USE_FLIPPED and self.mode is not 'test':
            print('Appending horizontally-flipped training examples...')
            imdb.append_flipped_images()
            print('done')

        print('Preparing training data...')
        rdl_roidb.prepare_roidb(imdb)
        print('done')

        return imdb.roidb
Exemple #16
0
def get_training_roidb(imdb):
    """Returns a roidb (Region of Interest database) for use in training."""
    """if cfg.TRAIN.USE_FLIPPED:
        print 'Appending horizontally-flipped training examples...'
        imdb.append_flipped_images()
        print 'done'"""   # think about including this flipping operation again....

    print 'Preparing training data...'
    if cfg.TRAIN.HAS_RPN:
        if cfg.IS_MULTISCALE:
            gdl_roidb.prepare_roidb(imdb)
        else:
            rdl_roidb.prepare_roidb(imdb)
    else:
        rdl_roidb.prepare_roidb(imdb)
    print 'done'

    return imdb.roidb
Exemple #17
0
def get_training_roidb(imdb):
    """Returns a roidb (Region of Interest database) for use in training."""
    if cfg.TRAIN.USE_FLIPPED:
        print 'Appending horizontally-flipped training examples...'
        imdb.append_flipped_images()
        print 'done'

    print 'Preparing training data...'
    if cfg.TRAIN.HAS_RPN:
        if cfg.IS_MULTISCALE:
            gdl_roidb.prepare_roidb(imdb)
        else:
            rdl_roidb.prepare_roidb(imdb)
    else:
        rdl_roidb.prepare_roidb(imdb)
    print 'done'

    return imdb.roidb
Exemple #18
0
def get_training_roidb(imdb):
    """Returns a roidb (Region of Interest database) for use in training."""
    # if cfg.TRAIN.USE_FLIPPED:
    #     print 'Appending horizontally-flipped training examples...'
    #     imdb.append_flipped_images()
    #     print 'done'

    print 'Preparing training data...'
    if cfg.TRAIN.HAS_RPN:
        if cfg.IS_MULTISCALE:
            pass  #gdl_roidb.prepare_roidb(imdb)
        else:
            rdl_roidb.prepare_roidb(imdb)
    else:
        rdl_roidb.prepare_roidb(imdb)
    print 'done'

    return imdb.roidb
def rcnn_cache_pool5_features(imdb, crop_mode, crop_padding, net_file, cache_name, start = 0, end = 0):
    opts = Opts()
    opts.net_def_file = './model-defs/rcnn_batch_256_output_pool5.prototxt'
    opts.output_dir = "".join(['./feat_cache/', cache_name, '/', imdb.name, '/'])
    rdl_roidb.prepare_roidb(imdb)
    roidb = imdb.roidb
    rcnn_model = RcnnModel(opts.net_def_file, net_file)
    # rcnn_load_model(rcnn_model)
    rcnn_model.detectors.crop_mode = crop_mode
    rcnn_model.detectors.crop_padding = crop_padding
    image_ids = imdb.image_index
    if end == 0:
        end = len(image_ids)
    pool = multiprocessing.Pool(12)
    for i in xrange(start, end):
        pool.apply_async(multi_wrapper, (opts, image_ids[i], roidb[i], imdb.image_path_at(i), i, rcnn_model))
    pool.close()
    pool.join()
Exemple #20
0
def get_training_roidb(imdb):
    """Returns a roidb (Region of Interest database) for use in training."""
    print 'orgirnal num of image: %d' % len(imdb.roidb)
    if cfg.TRAIN.USE_FLIPPED:
        print 'Appending horizontally-flipped training examples...'
        imdb.append_flipped_images()  # including gt_roidb and flipped_roidb
        print 'the num of image after flipped: %d' % len(imdb.roidb)

    if cfg.TRAIN.MULTI_SCALE:
        print 'Appending training examples with multi-scale %s...' % str(
            cfg.TRAIN.MULTI_SCALE_LIST)
        imdb.append_multiScaled_images(
        )  # including gt_roidb and flipped_roidb
        print 'the num of image after multiscale: %d' % len(imdb.roidb)

    print 'Preparing training data...'
    rdl_roidb.prepare_roidb(imdb)  # extend more attributes for roidb
    print 'done'

    return imdb.roidb
Exemple #21
0
def get_training_roidb(imdb):
    """Returns a roidb (Region of Interest database) for use in training."""
    if cfg.TRAIN.USE_FLIPPED:
        print('Appending horizontally-flipped training examples...')
        imdb.append_flipped_images()
        print('done')
    if cfg.TRAIN.USE_NOISE_AUG:
        print('Appending noise to training examples...')
        imdb.append_noise_images()
        print('done')
    if cfg.TRAIN.USE_JPG_AUG:
        print('Appending jpg compression to training examples...')
        imdb.append_jpg_images()
        print('done')

    print('Preparing training data...')
    rdl_roidb.prepare_roidb(imdb)
    print('done')

    return imdb.roidb
Exemple #22
0
def get_training_roidb(imdb):
    """Returns a roidb (Region of Interest database) for use in training."""
    if cfg.TRAIN.USE_FLIPPED:
        print 'Appending horizontally-flipped training examples...'
        #parse xml annotation
        #box = imdb.roidb[i]实现了解析,因为imdb.roidb = imdb._roidb_handler = pascal_voc.gt_roidb
        imdb.append_flipped_images()
        print 'done'

    print 'Preparing training data...'
    if cfg.TRAIN.HAS_RPN:
        if cfg.IS_MULTISCALE:
            gdl_roidb.prepare_roidb(imdb)
        else:
            rdl_roidb.prepare_roidb(imdb)
    else:
        rdl_roidb.prepare_roidb(imdb)
    print 'done'

    return imdb.roidb
Exemple #23
0
def get_training_roidb(imdb):
    """Returns a roidb (Region of Interest database) for use in training."""
    if cfg.TRAIN.USE_FLIPPED:
        print 'Appending horizontally-flipped training examples...'
        imdb.append_flipped_images()
        print 'done'
    if cfg.TRAIN.USE_NOISY:
        print 'Appending noisy images...'
        imdb.append_noisy_images()
        print 'done'
    if cfg.TRAIN.USE_CROPPED:
        print 'Appending cropped images...'
        imdb.append_cropped_images()
        print 'done'

    print 'Preparing training data...'
    rdl_roidb.prepare_roidb(imdb)
    print 'done'

    return imdb.roidb
Exemple #24
0
def get_training_roidb(imdb):
    """Returns a roidb (Region of Interest database) for use in training."""
    ori_num_images = len(imdb.roidb)
    if cfg.TRAIN.USE_HOR_FLIPPED:
        print('Appending horizontally-flipped training examples...')
        imdb.append_hor_flipped_images(ori_num_images)
        print('done')
    if cfg.TRAIN.USE_VER_FLIPPED:
        print('Appending vertical-flipped training examples...')
        imdb.append_ver_flipped_images(ori_num_images)
        print('done')
    if cfg.TRAIN.BRIGHT_ADJUEST:
        if len(cfg.TRAIN.BRIGHT_ADJUEST_SCALE) != 0:
            print('Appending bright-adjuest training examples...')
            imdb.append_bright_adjuest_images(ori_num_images)
            print('done')
    if cfg.TRAIN.ROTATE_ADJUEST:
        if len(cfg.TRAIN.ROTATE_ADJUEST_ANGLE) != 0:
            print('Appending rotate-adjuest training examples...')
            imdb.append_rotate_adjuest_images(ori_num_images)
            print('done')
    if cfg.TRAIN.SHIFT_ADJUEST:
        if cfg.TRAIN.SHIFT_ADJUEST_X != 0 or cfg.TRAIN.SHIFT_ADJUEST_Y != 0:
            print('Appending shift-adjuest training examples...')
            imdb.append_shift_adjuest_images(ori_num_images)
            print('done')
    if cfg.TRAIN.ZOOM_ADJUEST:
        if len(cfg.TRAIN.ZOOM_ADJUEST_SCALE) != 0:
            print('Appending zoom-adjuest training examples...')
            imdb.append_zoom_adjuest_images(ori_num_images)
            print('done')
    if cfg.TRAIN.CROP_IMAGES:
        if len(cfg.TRAIN.CROP_SIZE) != 0 and cfg.TRAIN.RESIZE_SCALE >= 800:
            print('Appending random-crop training examples...')
            imdb.append_random_crop_images(ori_num_images)
            print('done')
    print('Preparing training data...')
    rdl_roidb.prepare_roidb(imdb)
    print('done')

    return imdb.roidb
Exemple #25
0
def get_clean_nodes_helper(im_set, data_set, sess, net, fetch_list, nodes_dir,
                           sub_dirs):

    print('[prepare the dataset...]')
    imdb = get_imdb(
        data_set)  # gt_box, absolute pixel value 0-based from annotation files
    imdb.competition_mode(True)  # use_salt: False; cleanup: False
    imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD)  # ground_truth propose
    rdl_roidb.prepare_roidb(imdb)  # get gt rois for training

    assert len(cfg.TEST.SCALES) == 1
    target_size = cfg.TEST.SCALES[0]
    _t = Timer()
    num_images = len(im_set)
    for idx, i in enumerate(im_set):
        #if idx > 15:
        #	break
        i = im_set[idx]
        _t.tic()
        im_cv, im, im_info, gt_boxes = get_image_prepared(
            cfg, imdb.roidb[idx], target_size)
        feed_dict = {
            net.data: np.expand_dims(im, axis=0),
            net.im_info: np.expand_dims(im_info, axis=0),
            net.appearance_drop_rate: 0.0
        }

        e_matrix, _, cls_pred, _ = sess.run(fetch_list, feed_dict=feed_dict)
        pred_cls_ids = np.argmax(cls_pred, 1)
        for matrix_idx, sub_dir in enumerate(sub_dirs):
            for node_idx, cls_id in enumerate(pred_cls_ids):
                #if cls_id != 0:
                #	continue
                np_name = '/'.join([
                    nodes_dir.format(classes[cls_id]), sub_dir,
                    'im{:s}_node{:d}.npy'.format(str(i), node_idx)
                ])
                np.save(np_name, e_matrix[matrix_idx][node_idx])
        _t.toc()
        print('im_detect: {:d}/{:d} {:.3f}s'\
           .format(idx+1, num_images, _t.average_time))
def get_training_roidb(imdb):
    """Returns a roidb (Region of Interest database) for use in training."""
    if cfg.TRAIN.USE_FLIPPED:
        print 'Appending horizontally-flipped training examples...'
        imdb.append_flipped_images()
        print 'done'

    print 'Preparing training data...'
    # 训练时是否使用RPN
    if cfg.TRAIN.HAS_RPN:
        # 是否多尺度
        if cfg.IS_MULTISCALE:
            gdl_roidb.prepare_roidb(imdb)
        else:
            rdl_roidb.prepare_roidb(imdb)
    else:
        rdl_roidb.prepare_roidb(imdb)
    print 'done'

    # 返回imdb中最关键的属性roidb,此时roidb的每个元素包含更多的信息,视gdl或rdl而定
    return imdb.roidb
Exemple #27
0
def get_training_roidb(imdb):
    """Returns a roidb (Region of Interest database) for use in training."""
    # cfg.TRAIN.USE_FLIPPED已经定义为TRUE,表示使用水平反转图像(数据增强),防止过拟合
    if cfg.TRAIN.USE_FLIPPED:
        print('Appending horizontally-flipped training examples...'
              )  # clw modify: for py3
        imdb.append_flipped_images()
        print('done')  # clw modify: for py3

    print('Preparing training data...')  # clw modify: for py3
    if cfg.TRAIN.HAS_RPN:  #False
        if cfg.IS_MULTISCALE:
            gdl_roidb.prepare_roidb(imdb)
        else:
            rdl_roidb.prepare_roidb(imdb)
    else:
        # 就是对roidb进行进一步的操作,添加了image.weight.height.max_classes.max_overlaps
        rdl_roidb.prepare_roidb(imdb)
    print('done')  # clw modify: for py3

    return imdb.roidb
def get_training_roidb(imdb, roidbs_cache_path=None):
    """Returns a roidb (Region of Interest database) for use in training."""
    print "roidbs_cache_path:", roidbs_cache_path
    if roidbs_cache_path is not None and len(roidbs_cache_path) > 0:
        imdb.config['roidbs_cache_path'] = roidbs_cache_path
        imdb.cache_rpn_roidb()
        return None
    else:
        if cfg.TRAIN.USE_FLIPPED:
            print 'Appending horizontally-flipped training examples...'
            print "In lib/fast_rcnn/train.py -- get_training_roidb func..."
            imdb.append_flipped_images()

            print 'Appending horizontally-flipped training examples done...'
            print

        print 'Preparing training data...'
        rdl_roidb.prepare_roidb(imdb)
        print 'done'

        return imdb.roidb
Exemple #29
0
def get_training_roidb(imdb):
    """Returns a roidb (Region of Interest database) for use in training."""
    _ = imdb.roidb  # initially load the roidb

    if cfg.TRAIN.CLIP_SIZE:
        imdb.resizeRoidbByAnnoSize(cfg.TRAIN.CLIP_SIZE)

    if cfg.TRAIN.USE_FLIPPED:
        print('Appending horizontally-flipped training examples...')
        imdb.append_flipped_images()  #
        print('done')

    print('Preparing training data...')
    if cfg.TASK == "object_detection":
        rdl_roidb.prepare_roidb(imdb)  # gets image sizes.. might be nice
    elif cfg.TASK == "classification":
        cls_roidb.prepare_roidb(imdb)
    elif cfg.TASK == "regeneration":
        vae_rdl_roidb.prepare_roidb(imdb)

    return imdb.roidb
Exemple #30
0
def get_training_roidb(imdb):
    """Returns a roidb (Region of Interest database) for use in training."""
    if cfg.TRAIN.USE_FLIPPED:
        print 'Appending horizontally-flipped training examples...'
        # 在训练期间使用水平翻转图像
        # 不光把数据加载进来,还要把翻转之后的数据加载进来,可以数量量加倍
        imdb.append_flipped_images()
        print 'done'

    print 'Preparing training data...'
    if cfg.TRAIN.HAS_RPN:
        if cfg.IS_MULTISCALE:
            gdl_roidb.prepare_roidb(imdb)
        else:
            # 准备数据 对roidb做处理 (把之前结果命名更规范一些)
            rdl_roidb.prepare_roidb(imdb)
    else:
        rdl_roidb.prepare_roidb(imdb)
    print 'done'

    return imdb.roidb
Exemple #31
0
def get_training_roidb(imdb):
    """Returns a roidb (Region of Interest database) for use in training."""
    if cfg.TRAIN.USE_FLIPPED:
        print('Appending horizontally-flipped training examples...')
        imdb.append_flipped_images()
        print('done')

    print('Preparing training data...')
    if cfg.TRAIN.HAS_RPN:
        if cfg.IS_MULTISCALE:
            # TODO: fix multiscale training (single scale is already a good trade-off)
            print('#### warning: multi-scale has not been tested.')
            print(
                '#### warning: using single scale by setting IS_MULTISCALE: False.'
            )
            gdl_roidb.prepare_roidb(imdb)
        else:
            rdl_roidb.prepare_roidb(imdb)
    else:
        rdl_roidb.prepare_roidb(imdb)
    print('done')

    return imdb.roidb
Exemple #32
0
def get_training_roidb(imdb):
    """Returns a roidb (Region of Interest database) for use in training."""
    """if cfg.TRAIN.USE_FLIPPED:
        print 'Appending horizontally-flipped training examples...'
        imdb.append_flipped_images()
        print 'done'"""                                     # think about including this flipping operation again....

    print 'Preparing training data...'
    if cfg.TRAIN.HAS_RPN:
        if cfg.IS_MULTISCALE:
            gdl_roidb.prepare_roidb(imdb)
        else:
            rdl_roidb.prepare_roidb(imdb)
            """print('&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&')
            print(imdb.image_index)
            print(len(imdb.image_index))        # is twice as long as it should be!! <- due to flipping!
            print('&&&&&&&&&&&&&&&&&&&&&&&')"""

    else:
        rdl_roidb.prepare_roidb(imdb)
    print 'done'

    return imdb.roidb
def get_training_roidb(imdb):
    """Returns a roidb (Region of Interest database) for use in training."""
    #cfg.TRAIN.USE_FLIPPED已经定义为TRUE,表示使用水平反转图像(数据增强),防止过拟合
    if cfg.TRAIN.USE_FLIPPED:
        print 'Appending horizontally-flipped training examples...'
        # append_flipped_images:将图片水平对称变换,增加样本数量
        # roidb是imdb的一个成员变量,roidb是一个list(每个元素对应一张图片),list中的元素是一个字典,字典中存放了5个key,分别是boxes信息,每个box的class信息,是否是flipped的标志位,重叠信息gt_overlaps,以及seg_areas;分析该函数可知,将box的值按照水平对称,原先roidb中只有5011个元素,经过水平对称后通过append增加到5011*2=10022个
        imdb.append_flipped_images()
        print('done')

    print('Preparing training data...')
    #cfg.TRAIN.HAS_RPN为false
    if cfg.TRAIN.HAS_RPN:
        if cfg.IS_MULTISCALE:
            gdl_roidb.prepare_roidb(imdb)
        else:
            rdl_roidb.prepare_roidb(imdb)
    else:
        #就是对roidb进行进一步的操作,添加了image.weight.height.max_classes.max_overlaps
        rdl_roidb.prepare_roidb(imdb)
    print 'done'

    return imdb.roidb
Exemple #34
0
def load_roidb(imdb_name, proposal_method, has_gt_bbox=True, filter_no_gt=True):
    imdb = get_imdb(imdb_name)
    imdb.set_proposal_method(proposal_method)
    prepare_roidb(imdb)
    roidb = imdb.roidb

    if not has_gt_bbox:
        num_images = len(imdb.image_index)
        for i in xrange(num_images):
            roidb[i]['boxes'] = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0]

    # filter roidb
    if filter_no_gt:
        print 'filtering out images with no gt...'
        gt_roidb = imdb.gt_roidb()
        assert len(roidb) == len(gt_roidb)
        remove_indices = []
        for i in xrange(len(gt_roidb)):
            if gt_roidb[i]['boxes'].shape[0] == 0:
                print '{} has no gt bbox!'.format(roidb[i]['image'])
                remove_indices.append(i)
        roidb = [i for j, i in enumerate(roidb) if j not in remove_indices]
        print '{} images are filtered'.format(len(remove_indices))
    return roidb
Exemple #35
0
def get_training_roidb_quantized(imdb):
    print 'Preparing training data...'
    rdl_roidb.prepare_roidb(imdb)
    print 'done'

    return imdb.roidb
# ------------

if rand_seed is not None:
    np.random.seed(rand_seed)

# load config file and get hyperparameters
cfg_from_file(cfg_file)
lr = cfg.TRAIN.LEARNING_RATE
momentum = cfg.TRAIN.MOMENTUM
weight_decay = cfg.TRAIN.WEIGHT_DECAY
disp_interval = cfg.TRAIN.DISPLAY
log_interval = cfg.TRAIN.LOG_IMAGE_ITERS

# load imdb and create data later
imdb = get_imdb(imdb_name)
rdl_roidb.prepare_roidb(imdb)
roidb = imdb.roidb
data_layer = RoIDataLayer(roidb, imdb.num_classes)

#pdb.set_trace()

# Create network and initialize
net = WSDDN(classes=imdb.classes, debug=_DEBUG)
network.weights_normal_init(net, dev=0.001)
if os.path.exists('pretrained_alexnet.pkl'):
    pret_net = pkl.load(open('pretrained_alexnet.pkl', 'r'))
else:
    pret_net = model_zoo.load_url(
        'https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth')
    pkl.dump(pret_net, open('pretrained_alexnet.pkl', 'wb'),
             pkl.HIGHEST_PROTOCOL)
Exemple #37
0
def test_net(net_name, weight_name, imdb, mode, max_per_image=100):
    sess = tf.Session()

    # set up testing mode
    rois = tf.placeholder(dtype=tf.float32, shape=[None, 5], name='rois')
    rel_rois = tf.placeholder(dtype=tf.float32, shape=[None, 5], name='rel_rois')
    ims = tf.placeholder(dtype=tf.float32, shape=[None, None, None, 3], name='ims')
    relations = tf.placeholder(dtype=tf.int32, shape=[None, 2], name='relations')
    inputs = {'rois': rois,
              'rel_rois': rel_rois,
              'ims': ims,
              'relations': relations,
              'num_roi': tf.placeholder(dtype=tf.int32, shape=[]),
              'num_rel': tf.placeholder(dtype=tf.int32, shape=[]),
              'num_classes': imdb.num_classes,
              'num_predicates': imdb.num_predicates,
              'rel_mask_inds': tf.placeholder(dtype=tf.int32, shape=[None]),
              'rel_segment_inds': tf.placeholder(dtype=tf.int32, shape=[None]),
              'rel_pair_mask_inds': tf.placeholder(dtype=tf.int32, shape=[None, 2]),
              'rel_pair_segment_inds': tf.placeholder(dtype=tf.int32, shape=[None]),
              'n_iter': cfg.TEST.INFERENCE_ITER}


    net = get_network(net_name)(inputs)
    net.setup()
    print ('Loading model weights from {:s}').format(weight_name)
    saver = tf.train.Saver()
    saver.restore(sess, weight_name)

    roidb = imdb.roidb
    if cfg.TEST.USE_RPN_DB:
        imdb.add_rpn_rois(roidb, make_copy=False)
    prepare_roidb(roidb)

    num_images = len(imdb.image_index)

    # timers
    _t = {'im_detect' : Timer(), 'evaluate' : Timer()}

    if mode == 'all':
        eval_modes = ['pred_cls', 'sg_cls', 'sg_det']
    else:
        eval_modes = [mode]
    multi_iter = [net.n_iter - 1] if net.iterable else [0]
    print('Graph Inference Iteration ='),
    print(multi_iter)
    print('EVAL MODES ='),
    print(eval_modes)

    # initialize evaluator for each task
    evaluators = {}
    for m in eval_modes:
        evaluators[m] = {}
        for it in multi_iter:
            evaluators[m][it] = SceneGraphEvaluator(imdb, mode=m)

    for im_i in xrange(num_images):

        im = imdb.im_getter(im_i)

        for mode in eval_modes:
            bbox_reg = True
            if mode == 'pred_cls' or mode == 'sg_cls':
                # use ground truth object locations
                bbox_reg = False
                box_proposals = gt_rois(roidb[im_i])
            else:
                # use RPN-proposed object locations
                box_proposals, roi_scores = non_gt_rois(roidb[im_i])
                roi_scores = np.expand_dims(roi_scores, axis=1)
                nms_keep = cpu_nms(np.hstack((box_proposals, roi_scores)).astype(np.float32),
                            cfg.TEST.PROPOSAL_NMS)
                nms_keep = np.array(nms_keep)
                num_proposal = min(cfg.TEST.NUM_PROPOSALS, nms_keep.shape[0])
                keep = nms_keep[:num_proposal]
                box_proposals = box_proposals[keep, :]


            if box_proposals.size == 0 or box_proposals.shape[0] < 2:
                # continue if no graph
                continue

            _t['im_detect'].tic()
            out_dict = im_detect(sess, net, inputs, im, box_proposals,
                                 bbox_reg, multi_iter)
            _t['im_detect'].toc()
            _t['evaluate'].tic()
            for iter_n in multi_iter:
                sg_entry = out_dict[iter_n]
                evaluators[mode][iter_n].evaluate_scene_graph_entry(sg_entry, im_i, iou_thresh=0.5)
            _t['evaluate'].toc()

        print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
              .format(im_i + 1, num_images, _t['im_detect'].average_time,
                      _t['evaluate'].average_time)

    # print out evaluation results 打印结果
    for mode in eval_modes:
        for iter_n in multi_iter:
            evaluators[mode][iter_n].print_stats()