コード例 #1
0
def alternate_train(args, ctx, pretrained, epoch, rpn_epoch, rpn_lr,
                    rpn_lr_step):
    # basic config
    begin_epoch = 0
    config.TRAIN.BG_THRESH_LO = 0.0

    logger.info('########## TRAIN RPN WITH IMAGENET INIT')
    train_rpn(args.network,
              args.dataset,
              args.image_set,
              args.root_path,
              args.dataset_path,
              args.frequent,
              args.kvstore,
              args.work_load_list,
              args.no_flip,
              args.no_shuffle,
              args.resume,
              ctx,
              pretrained,
              epoch,
              'model/rpn1',
              begin_epoch,
              rpn_epoch,
              train_shared=False,
              lr=rpn_lr,
              lr_step=rpn_lr_step)
コード例 #2
0
def main():
    args = parse_args()
    logger.info('Called with argument: %s' % args)
    ctx = [mx.gpu(int(i)) for i in args.gpus.split(',')]
    alternate_train(args, ctx, args.pretrained, args.pretrained_epoch,
                    args.rpn_epoch, args.rpn_lr, args.rpn_lr_step,
                    args.rcnn_epoch, args.rcnn_lr, args.rcnn_lr_step)
コード例 #3
0
def main():
    args = parse_args()
    logger.info('Called with argument: %s' % args)
    ctx = [mx.gpu(int(i)) for i in args.gpus.split(',')]
    alternate_train(args, ctx, args.pretrained, args.pretrained_epoch,
                    args.rpn_epoch, args.rpn_lr, args.rpn_lr_step,
                    args.rcnn_epoch, args.rcnn_lr, args.rcnn_lr_step)
コード例 #4
0
ファイル: test.py プロジェクト: xiaogang00/Learn-Mxnet
def main():
    args = parse_args()
    logger.info('Called with argument: %s' % args)
    ctx = mx.gpu(args.gpu)
    test_rcnn(args.network, args.dataset, args.image_set, args.root_path, args.dataset_path,
              ctx, args.prefix, args.epoch,
              args.vis, args.shuffle, args.has_rpn, args.proposal, args.thresh)
コード例 #5
0
def main():
    global args
    args = parse_args()
    args.pyramid = False
    args.bbox_vote = False
    if args.mode == 1:
        args.pyramid = True
        args.bbox_vote = True
    logger.info('Called with argument: %s' % args)
    test(args)
コード例 #6
0
def main():
    args = parse_args()
    logger.info('Called with argument: %s' % args)
    ctx = [mx.gpu(int(i)) for i in args.gpus.split(',')]
    train_net(args,
              ctx,
              args.pretrained,
              args.pretrained_epoch,
              args.prefix,
              args.begin_epoch,
              args.end_epoch,
              lr=args.lr,
              lr_step=args.lr_step)
コード例 #7
0
def test_net(prefix, iter_no):
    logger.info('Testing ...')
    default.testing = True
    # ctx = mx.gpu(int(default.val_gpu))
    ctx = mx.gpu(int(default.gpus.split(',')[0]))
    acc = test_rcnn(default.network, default.dataset, default.test_image_set,
                    default.dataset_path, ctx, prefix, iter_no,
                    default.val_vis, default.val_shuffle, default.val_has_rpn,
                    default.proposal, default.val_max_box, default.val_thresh)

    prop_file = 'proposals_%s_%s.mat' % (default.test_image_set,
                                         default.exp_name)
    savemat(prop_file, default.res_dict)
    default.testing = False
コード例 #8
0
ファイル: train.py プロジェクト: xiyou1024/CompreFace
def main():
    args = parse_args()
    logger.info('Called with argument: %s' % args)
    #ctx = [mx.gpu(int(i)) for i in args.gpus.split(',')]
    ctx = []
    cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip()
    if len(cvd)>0:
      for i in range(len(cvd.split(','))):
        ctx.append(mx.gpu(i))
    if len(ctx)==0:
      ctx = [mx.cpu()]
      print('use cpu')
    else:
      print('gpu num:', len(ctx))
    train_net(args, ctx, args.pretrained, args.pretrained_epoch, args.prefix, args.begin_epoch, args.end_epoch,
              lr=args.lr, lr_step=args.lr_step)
コード例 #9
0
ファイル: det_batch.py プロジェクト: chenzx921020/faster-rcnn
def demo_net(predictor, image_name, result_txt, vis=False):
    """
    generate data_batch -> im_detect -> post process
    :param predictor: Predictor
    :param image_name: image name
    :param vis: will save as a new image if not visualized
    :return: None
    """
    assert os.path.exists(image_name), image_name + ' not found'
    print image_name
    im = cv2.imread(image_name)
    data_batch, data_names, im_scale = generate_batch(im)
    scores, boxes, data_dict = im_detect(predictor, data_batch, data_names,
                                         im_scale)

    result_txt.write(
        image_name.split('/')[-2] + '/' + image_name.split('/')[-1] + '\n')

    all_boxes = [[] for _ in CLASSES]
    for cls in CLASSES:
        cls_ind = CLASSES.index(cls)
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind, np.newaxis]
        keep = np.where(cls_scores >= CONF_THRESH)[0]
        dets = np.hstack((cls_boxes, cls_scores)).astype(np.float32)[keep, :]
        keep = nms(dets)
        all_boxes[cls_ind] = dets[keep, :]

    boxes_this_image = [[]] + [all_boxes[j] for j in range(1, len(CLASSES))]

    # print results
    #logger.info('---class---')
    #logger.info('[[x1, x2, y1, y2, confidence]]')
    for ind, boxes in enumerate(boxes_this_image):
        #print ind
        if len(boxes) > 0:
            logger.info('---%s---' % CLASSES[ind])
            logger.info('%s' % boxes)
            #print len(boxes)
            for iii in range(0, len(boxes)):
                result_txt.write(
                    str(boxes[iii][0]) + ' ' + str(boxes[iii][1]) + ' ' +
                    str(boxes[iii][2]) + ' ' + str(boxes[iii][3]) + ' ' +
                    str(boxes[iii][4]) + ' ')

    result_txt.write('\n')
コード例 #10
0
def demo_net(predictor, image_name, vis=False):
    """
    generate data_batch -> im_detect -> post process
    :param predictor: Predictor
    :param image_name: image name
    :param vis: will save as a new image if not visualized
    :return: None
    """
    assert os.path.exists(image_name), image_name + ' not found'
    im = cv2.imread(image_name)
    data_batch, data_names, im_scale = generate_batch(im)
    scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, im_scale)

    all_boxes = [[] for _ in CLASSES]
    for cls in CLASSES:
        cls_ind = CLASSES.index(cls)
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind, np.newaxis]
        print(cls_scores.shape)
        keep = np.where(cls_scores >= CONF_THRESH)[0]
        dets = np.hstack((cls_boxes, cls_scores)).astype(np.float32)[keep, :]
        keep = nms(dets)
        all_boxes[cls_ind] = dets[keep, :]

    boxes_this_image = [[]] + [all_boxes[j] for j in range(1, len(CLASSES))]

    # print results
    logger.info('---class---')
    logger.info('[[x1, x2, y1, y2, confidence]]')
    for ind, boxes in enumerate(boxes_this_image):
        if len(boxes) > 0:
            logger.info('---%s---' % CLASSES[ind])
            logger.info('%s' % boxes)

    if vis:
        vis_all_detection(data_dict['data'].asnumpy(), boxes_this_image, CLASSES, im_scale)
    else:
        
        idx = [i for i, v in enumerate(image_name) if v == '/'][-1]
        result_file = "data/VOCdevkit/results/test/" + image_name[idx+1:]

        result_file = result_file.replace('.', '_result.')
        logger.info('results saved to %s' % result_file)
        im = draw_all_detection(data_dict['data'].asnumpy(), boxes_this_image, CLASSES, im_scale)
        cv2.imwrite(result_file, im)
コード例 #11
0
ファイル: train.py プロジェクト: LXYTSOS/lesion_det_dual_att
def get_optimizer(args, arg_names, num_iter_per_epoch, iter_size):
    # decide learning rate
    base_lr = args.e2e_lr
    lr_factor = args.lr_factor
    lr_epoch = [float(ep) for ep in args.e2e_lr_step.split(',')]
    lr_epoch_diff = [
        ep - args.begin_epoch for ep in lr_epoch if ep > args.begin_epoch
    ]
    lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [
        int(ep * num_iter_per_epoch / iter_size) for ep in lr_epoch_diff
    ]
    logger.info('lr %f lr_epoch_diff %s lr_iters %s' %
                (lr, lr_epoch_diff, lr_iters))
    lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor)

    # optimizer
    lr_dict = dict()
    wd_dict = dict()
    param_idx2name = {}
    for i, arg_name in enumerate(arg_names):
        param_idx2name[i] = arg_name
        lr_dict[arg_name] = 1.
        if arg_name.endswith(
                '_bias'
        ):  # for biases, set the learning rate to 2, weight_decay to 0
            wd_dict[arg_name] = 0
            lr_dict[arg_name] = 2

    optimizer_params = {
        'momentum': 0.9,
        'wd': args.weight_decay,
        'learning_rate': lr,
        'lr_scheduler': lr_scheduler,
        # 'rescale_grad': (1.0 / batch_size),  # rescale_grad is done in loss functions
        'param_idx2name': param_idx2name,
        'clip_gradient': 5
    }

    opt = mx.optimizer.SGD(**optimizer_params)
    opt.set_wd_mult(wd_dict)
    opt.set_lr_mult(lr_dict)

    return opt
コード例 #12
0
def validate(prefix, iter_no):
    logger.info('Validating ...')
    default.testing = True
    ctx = mx.gpu(int(default.val_gpu))
    # ctx = mx.gpu(int(default.gpus.split(',')[0]))
    epoch = iter_no + 1
    acc = test_rcnn(default.network, default.dataset, default.val_image_set,
                    default.dataset_path, ctx, prefix, epoch, default.val_vis,
                    default.val_shuffle, default.val_has_rpn, default.proposal,
                    default.val_max_box, default.val_thresh)

    fn = '%s-%04d.params' % (prefix, epoch)
    fn_to_del = None
    if len(default.accs.keys()) == 0:
        default.best_model = fn
        default.best_acc = acc
        default.best_epoch = epoch
    else:
        if acc > default.best_acc:
            fn_to_del = default.best_model
            default.best_model = fn
            default.best_acc = acc
            default.best_epoch = epoch
        else:
            fn_to_del = fn

    default.accs[str(epoch)] = acc
    epochs = np.sort([int(a) for a in default.accs.keys()]).tolist()
    acc = 0
    for e in epochs:
        print 'Iter %s: %.4f' % (e, default.accs[str(e)])
        acc = default.accs[str(e)]
    sys.stdout.flush()

    if default.keep_best_model and fn_to_del:
        os.remove(fn_to_del)
        print fn_to_del, 'deleted to keep only the best model'
        sys.stdout.flush()

    default.testing = False
    return acc
コード例 #13
0
ファイル: demo.py プロジェクト: CoderHHX/incubator-mxnet
def demo_net(predictor, image_name, vis=False):
    """
    generate data_batch -> im_detect -> post process
    :param predictor: Predictor
    :param image_name: image name
    :param vis: will save as a new image if not visualized
    :return: None
    """
    assert os.path.exists(image_name), image_name + ' not found'
    im = cv2.imread(image_name)
    data_batch, data_names, im_scale = generate_batch(im)
    scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, im_scale)

    all_boxes = [[] for _ in CLASSES]
    for cls in CLASSES:
        cls_ind = CLASSES.index(cls)
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind, np.newaxis]
        keep = np.where(cls_scores >= CONF_THRESH)[0]
        dets = np.hstack((cls_boxes, cls_scores)).astype(np.float32)[keep, :]
        keep = nms(dets)
        all_boxes[cls_ind] = dets[keep, :]

    boxes_this_image = [[]] + [all_boxes[j] for j in range(1, len(CLASSES))]

    # print results
    logger.info('---class---')
    logger.info('[[x1, x2, y1, y2, confidence]]')
    for ind, boxes in enumerate(boxes_this_image):
        if len(boxes) > 0:
            logger.info('---%s---' % CLASSES[ind])
            logger.info('%s' % boxes)

    if vis:
        vis_all_detection(data_dict['data'].asnumpy(), boxes_this_image, CLASSES, im_scale)
    else:
        result_file = image_name.replace('.', '_result.')
        logger.info('results saved to %s' % result_file)
        im = draw_all_detection(data_dict['data'].asnumpy(), boxes_this_image, CLASSES, im_scale)
        cv2.imwrite(result_file, im)
コード例 #14
0
ファイル: demo.py プロジェクト: zhangxiaoqiang7/rcnn
def main():
    args = parse_args()
    ctx = mx.gpu(args.gpu)
    symbol = get_vgg_test(num_classes=config.NUM_CLASSES,
                          num_anchors=config.NUM_ANCHORS)
    predictor = get_net(symbol, args.prefix, args.epoch, ctx)
    if args.image:
        # single test image
        demo_net(predictor, args.image, args.vis)
    else:
        # a image dir for test
        # import pdb
        img_list = os.listdir(args.dir)
        num = len(img_list)
        for line in img_list:
            img_path = os.path.join(args.dir, line)
            if os.path.isfile(img_path) and os.path.splitext(img_path)[-1] in [
                    '.jpg'
            ]:
                # pdb.set_trace()
                logger.info('%s' % num)
                demo_net(predictor, img_path, args.vis)
                num = num - 1
コード例 #15
0
def load_checkpoint(prefix, epoch):
    """
    Load model checkpoint from file.
    :param prefix: Prefix of model name.
    :param epoch: Epoch number of model we would like to load.
    :return: (arg_params, aux_params)
    arg_params : dict of str to NDArray
        Model parameter, dict of name to NDArray of net's weights.
    aux_params : dict of str to NDArray
        Model parameter, dict of name to NDArray of net's auxiliary states.
    """
    fn = '%s-%04d.params' % (prefix, epoch)
    logger.info('loading parameters from %s', fn)
    save_dict = mx.nd.load(fn)
    arg_params = {}
    aux_params = {}
    for k, v in save_dict.items():
        tp, name = k.split(':', 1)
        if tp == 'arg':
            arg_params[name] = v
        if tp == 'aux':
            aux_params[name] = v
    return arg_params, aux_params
コード例 #16
0
ファイル: result.py プロジェクト: luwei001/MXNet-Faster-RCNN
def demo_net(predictor, image_name, vis=True):
    """
    generate data_batch -> im_detect -> post process
    :param predictor: Predictor
    :param image_name: image name
    :param vis: will save as a new image if not visualized
    :return: None
    """
    BOOL = 0
    assert os.path.exists(image_name), image_name + ' not found'
    im = cv2.imread(image_name)
    data_batch, data_names, im_scale = generate_batch(im)
    scores, boxes, data_dict = im_detect(predictor, data_batch, data_names,
                                         im_scale)
    all_boxes = [[] for _ in Global.CLASSES]
    for cls in Global.CLASSES:
        cls_ind = Global.CLASSES.index(cls)
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind, np.newaxis]
        keep = np.where(cls_scores >= Global.conf_thresh_value)[0]
        dets = np.hstack((cls_boxes, cls_scores)).astype(np.float32)[keep, :]
        keep = py_nms_wrapper(Global.nms_thresh_value)(dets)
        all_boxes[cls_ind] = dets[keep, :]

    boxes_this_image = [[]] + [
        all_boxes[j] for j in range(1, len(Global.CLASSES))
    ]

    for ind, boxes in enumerate(boxes_this_image):
        if len(boxes) > 0:
            BOOL = 1
            logger.info('---%s---' % Global.CLASSES[ind])
            logger.info('%s' % boxes)

    result_file = image_name.replace(str(Global.open_img_dir),
                                     str(Global.save_path))
    print result_file
    logger.info('results saved to %s' % result_file)
    im, CLASS, SCORE = draw_all_detection(data_dict['data'].asnumpy(),
                                          boxes_this_image, Global.CLASSES,
                                          im_scale)
    cv2.imwrite(result_file, im)
    Global.PICTURE_INFO[0].append(result_file)
    Global.PICTURE_INFO[1].append(CLASS)
    Global.PICTURE_INFO[2].append(SCORE)
    return CLASS, SCORE, BOOL
コード例 #17
0
ファイル: train.py プロジェクト: danigunawan/AIfuture
def train_net(args,
              ctx,
              pretrained,
              epoch,
              prefix,
              begin_epoch,
              end_epoch,
              lr=0.001,
              lr_step='5'):
    # setup config
    #init_config()
    #print(config)
    # setup multi-gpu

    input_batch_size = config.TRAIN.BATCH_IMAGES * len(ctx)

    # print config
    logger.info(pprint.pformat(config))

    # load dataset and prepare imdb for training
    image_sets = [iset for iset in args.image_set.split('+')]
    roidbs = [
        load_gt_roidb(args.dataset,
                      image_set,
                      args.root_path,
                      args.dataset_path,
                      flip=not args.no_flip) for image_set in image_sets
    ]
    #roidb = merge_roidb(roidbs)
    #roidb = filter_roidb(roidb)
    roidb = roidbs[0]

    # load symbol
    #sym = eval('get_' + args.network + '_train')(num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS)
    #feat_sym = sym.get_internals()['rpn_cls_score_output']
    #train_data = AnchorLoader(feat_sym, roidb, batch_size=input_batch_size, shuffle=not args.no_shuffle,
    #                          ctx=ctx, work_load_list=args.work_load_list,
    #                          feat_stride=config.RPN_FEAT_STRIDE, anchor_scales=config.ANCHOR_SCALES,
    #                          anchor_ratios=config.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING)

    # load and initialize params
    sym = None
    if len(pretrained) == 0:
        arg_params = {}
        aux_params = {}
    else:
        logger.info('loading %s,%d' % (pretrained, epoch))
        sym, arg_params, aux_params = mx.model.load_checkpoint(
            pretrained, epoch)
        #arg_params, aux_params = load_param(pretrained, epoch, convert=True)
        #for k in ['rpn_conv_3x3', 'rpn_cls_score', 'rpn_bbox_pred', 'cls_score', 'bbox_pred']:
        #  _k = k+"_weight"
        #  if _k in arg_shape_dict:
        #    v = 0.001 if _k.startswith('bbox_') else 0.01
        #    arg_params[_k] = mx.random.normal(0, v, shape=arg_shape_dict[_k])
        #    print('init %s with normal %.5f'%(_k,v))
        #  _k = k+"_bias"
        #  if _k in arg_shape_dict:
        #    arg_params[_k] = mx.nd.zeros(shape=arg_shape_dict[_k])
        #    print('init %s with zero'%(_k))

    sym = eval('get_' + args.network + '_train')(sym)
    feat_sym = []
    for stride in config.RPN_FEAT_STRIDE:
        feat_sym.append(
            sym.get_internals()['face_rpn_cls_score_stride%s_output' % stride])

    train_data = CropLoader(feat_sym,
                            roidb,
                            batch_size=input_batch_size,
                            shuffle=not args.no_shuffle,
                            ctx=ctx,
                            work_load_list=args.work_load_list)

    # infer max shape
    max_data_shape = [('data', (1, 3, max([v[1] for v in config.SCALES]),
                                max([v[1] for v in config.SCALES])))]
    #max_data_shape = [('data', (1, 3, max([v[1] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]
    max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
    max_data_shape.append(('gt_boxes', (1, roidb[0]['max_num_boxes'], 5)))
    logger.info('providing maximum shape %s %s' %
                (max_data_shape, max_label_shape))

    # infer shape
    data_shape_dict = dict(train_data.provide_data + train_data.provide_label)
    arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict)
    arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))
    out_shape_dict = dict(zip(sym.list_outputs(), out_shape))
    aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape))
    logger.info('output shape %s' % pprint.pformat(out_shape_dict))

    for k, v in arg_shape_dict.items():
        if k.find('upsampling') >= 0:
            print('initializing upsampling_weight', k)
            arg_params[k] = mx.nd.zeros(shape=v)
            init = mx.init.Initializer()
            init._init_bilinear(k, arg_params[k])
            #print(args[k])

    # check parameter shapes
    #for k in sym.list_arguments():
    #    if k in data_shape_dict:
    #        continue
    #    assert k in arg_params, k + ' not initialized'
    #    assert arg_params[k].shape == arg_shape_dict[k], \
    #        'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape)
    #for k in sym.list_auxiliary_states():
    #    assert k in aux_params, k + ' not initialized'
    #    assert aux_params[k].shape == aux_shape_dict[k], \
    #        'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape)

    fixed_param_prefix = config.FIXED_PARAMS
    # create solver
    data_names = [k[0] for k in train_data.provide_data]
    label_names = [k[0] for k in train_data.provide_label]
    fixed_param_names = get_fixed_params(sym, fixed_param_prefix)
    print('fixed', fixed_param_names, file=sys.stderr)
    mod = Module(sym,
                 data_names=data_names,
                 label_names=label_names,
                 logger=logger,
                 context=ctx,
                 work_load_list=args.work_load_list,
                 fixed_param_names=fixed_param_names)

    # metric
    eval_metrics = mx.metric.CompositeEvalMetric()
    mid = 0
    for m in range(len(config.RPN_FEAT_STRIDE)):
        stride = config.RPN_FEAT_STRIDE[m]
        #mid = m*MSTEP
        _metric = metric.RPNAccMetric(pred_idx=mid,
                                      label_idx=mid + 1,
                                      name='RPNAcc_s%s' % stride)
        eval_metrics.add(_metric)
        mid += 2
        #_metric = metric.RPNLogLossMetric(pred_idx=mid, label_idx=mid+1)
        #eval_metrics.add(_metric)

        _metric = metric.RPNL1LossMetric(loss_idx=mid,
                                         weight_idx=mid + 1,
                                         name='RPNL1Loss_s%s' % stride)
        eval_metrics.add(_metric)
        mid += 2
        if config.FACE_LANDMARK:
            _metric = metric.RPNL1LossMetric(loss_idx=mid,
                                             weight_idx=mid + 1,
                                             name='RPNLandMarkL1Loss_s%s' %
                                             stride)
            eval_metrics.add(_metric)
            mid += 2
        if config.HEAD_BOX:
            _metric = metric.RPNAccMetric(pred_idx=mid,
                                          label_idx=mid + 1,
                                          name='RPNAcc_head_s%s' % stride)
            eval_metrics.add(_metric)
            mid += 2
            #_metric = metric.RPNLogLossMetric(pred_idx=mid, label_idx=mid+1)
            #eval_metrics.add(_metric)

            _metric = metric.RPNL1LossMetric(loss_idx=mid,
                                             weight_idx=mid + 1,
                                             name='RPNL1Loss_head_s%s' %
                                             stride)
            eval_metrics.add(_metric)
            mid += 2

    # callback
    #means = np.tile(np.array(config.TRAIN.BBOX_MEANS), config.NUM_CLASSES)
    #stds = np.tile(np.array(config.TRAIN.BBOX_STDS), config.NUM_CLASSES)
    #epoch_end_callback = callback.do_checkpoint(prefix)
    epoch_end_callback = None
    # decide learning rate
    #base_lr = lr
    #lr_factor = 0.1
    #lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff)))

    lr_epoch = [int(epoch) for epoch in lr_step.split(',')]
    lr_epoch_diff = [
        epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch
    ]
    lr_iters = [
        int(epoch * len(roidb) / input_batch_size) for epoch in lr_epoch_diff
    ]

    lr_steps = []
    if len(lr_iters) == 5:
        factors = [0.5, 0.5, 0.4, 0.1, 0.1]
        for i in range(5):
            lr_steps.append((lr_iters[i], factors[i]))
    elif len(lr_iters) == 8:  #warmup
        for li in lr_iters[0:5]:
            lr_steps.append((li, 1.5849))
        for li in lr_iters[5:]:
            lr_steps.append((li, 0.1))
    else:
        for li in lr_iters:
            lr_steps.append((li, 0.1))
    #lr_steps = [ (20,0.1), (40, 0.1) ] #XXX

    end_epoch = 10000
    logger.info('lr %f lr_epoch_diff %s lr_steps %s' %
                (lr, lr_epoch_diff, lr_steps))
    # optimizer
    opt = optimizer.SGD(learning_rate=lr,
                        momentum=0.9,
                        wd=0.0005,
                        rescale_grad=1.0 / len(ctx),
                        clip_gradient=None)
    initializer = mx.init.Xavier()
    #initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="out", magnitude=2) #resnet style

    train_data = mx.io.PrefetchingIter(train_data)

    _cb = mx.callback.Speedometer(train_data.batch_size,
                                  frequent=args.frequent,
                                  auto_reset=False)
    global_step = [0]

    def save_model(epoch):
        arg, aux = mod.get_params()
        all_layers = mod.symbol.get_internals()
        outs = []
        for stride in config.RPN_FEAT_STRIDE:
            num_anchors = config.RPN_ANCHOR_CFG[str(stride)]['NUM_ANCHORS']
            _name = 'face_rpn_cls_score_stride%d_output' % stride
            rpn_cls_score = all_layers[_name]

            # prepare rpn data
            rpn_cls_score_reshape = mx.symbol.Reshape(
                data=rpn_cls_score,
                shape=(0, 2, -1, 0),
                name="face_rpn_cls_score_reshape_stride%d" % stride)

            rpn_cls_prob = mx.symbol.SoftmaxActivation(
                data=rpn_cls_score_reshape,
                mode="channel",
                name="face_rpn_cls_prob_stride%d" % stride)
            rpn_cls_prob_reshape = mx.symbol.Reshape(
                data=rpn_cls_prob,
                shape=(0, 2 * num_anchors, -1, 0),
                name='face_rpn_cls_prob_reshape_stride%d' % stride)
            _name = 'face_rpn_bbox_pred_stride%d_output' % stride
            rpn_bbox_pred = all_layers[_name]
            outs.append(rpn_cls_prob_reshape)
            outs.append(rpn_bbox_pred)
            if config.FACE_LANDMARK:
                _name = 'face_rpn_landmark_pred_stride%d_output' % stride
                rpn_landmark_pred = all_layers[_name]
                outs.append(rpn_landmark_pred)
        _sym = mx.sym.Group(outs)
        mx.model.save_checkpoint(prefix, epoch, _sym, arg, aux)

    def _batch_callback(param):
        #global global_step
        _cb(param)
        global_step[0] += 1
        mbatch = global_step[0]
        for step in lr_steps:
            if mbatch == step[0]:
                opt.lr *= step[1]
                print('lr change to',
                      opt.lr,
                      ' in batch',
                      mbatch,
                      file=sys.stderr)
                break

        if mbatch == lr_steps[-1][0]:
            print('saving final checkpoint', mbatch, file=sys.stderr)
            save_model(0)
            #arg, aux = mod.get_params()
            #mx.model.save_checkpoint(prefix, 99, mod.symbol, arg, aux)
            sys.exit(0)

    if args.checkpoint is not None:
        _, arg_params, aux_params = mx.model.load_checkpoint(
            args.checkpoint, 0)

    # train
    mod.fit(train_data,
            eval_metric=eval_metrics,
            epoch_end_callback=checkpoint_callback('model/testR50'),
            batch_end_callback=_batch_callback,
            kvstore=args.kvstore,
            optimizer=opt,
            initializer=initializer,
            arg_params=arg_params,
            aux_params=aux_params,
            begin_epoch=begin_epoch,
            num_epoch=end_epoch)
コード例 #18
0
ファイル: tester.py プロジェクト: chenzx921020/faster-rcnn
def pred_eval(predictor, test_data, imdb, vis=False, thresh=1e-3):
    """
    wrapper for calculating offline validation for faster data analysis
    in this example, all threshold are set by hand
    :param predictor: Predictor
    :param test_data: data iterator, must be non-shuffle
    :param imdb: image database
    :param vis: controls visualization
    :param thresh: valid detection threshold
    :return:
    """
    assert vis or not test_data.shuffle
    data_names = [k[0] for k in test_data.provide_data]

    nms = py_nms_wrapper(config.TEST.NMS)

    # limit detections to max_per_image over all classes
    max_per_image = -1

    num_images = imdb.num_images
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]

    i = 0
    t = time.time()
    for im_info, data_batch in test_data:
        t1 = time.time() - t
        t = time.time()

        scale = im_info[0, 2]
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names,
                                             scale)

        t2 = time.time() - t
        t = time.time()

        for j in range(1, imdb.num_classes):
            indexes = np.where(scores[:, j] > thresh)[0]
            cls_scores = scores[indexes, j, np.newaxis]
            cls_boxes = boxes[indexes, j * 4:(j + 1) * 4]
            cls_dets = np.hstack((cls_boxes, cls_scores))
            keep = nms(cls_dets)
            all_boxes[j][i] = cls_dets[keep, :]

        if max_per_image > 0:
            image_scores = np.hstack(
                [all_boxes[j][i][:, -1] for j in range(1, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in range(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]

        if vis:
            boxes_this_image = [[]] + [
                all_boxes[j][i] for j in range(1, imdb.num_classes)
            ]
            vis_all_detection(data_dict['data'].asnumpy(), boxes_this_image,
                              imdb.classes, scale)

        t3 = time.time() - t
        t = time.time()
        logger.info('testing %d/%d data %.4fs net %.4fs post %.4fs' %
                    (i, imdb.num_images, t1, t2, t3))
        i += 1

    det_file = os.path.join(imdb.cache_path, imdb.name + '_detections.pkl')
    with open(det_file, 'wb') as f:
        cPickle.dump(all_boxes, f, protocol=cPickle.HIGHEST_PROTOCOL)

    imdb.evaluate_detections(all_boxes)
コード例 #19
0
ファイル: tester.py プロジェクト: chenzx921020/faster-rcnn
def generate_proposals(predictor, test_data, imdb, vis=False, thresh=0.):
    """
    Generate detections results using RPN.
    :param predictor: Predictor
    :param test_data: data iterator, must be non-shuffled
    :param imdb: image database
    :param vis: controls visualization
    :param thresh: thresh for valid detections
    :return: list of detected boxes
    """
    assert vis or not test_data.shuffle
    data_names = [k[0] for k in test_data.provide_data]

    i = 0
    t = time.time()
    imdb_boxes = list()
    original_boxes = list()
    for im_info, data_batch in test_data:
        t1 = time.time() - t
        t = time.time()

        scale = im_info[0, 2]
        scores, boxes, data_dict = im_proposal(predictor, data_batch,
                                               data_names, scale)
        t2 = time.time() - t
        t = time.time()

        # assemble proposals
        dets = np.hstack((boxes, scores))
        original_boxes.append(dets)

        # filter proposals
        keep = np.where(dets[:, 4:] > thresh)[0]
        dets = dets[keep, :]
        imdb_boxes.append(dets)

        if vis:
            vis_all_detection(data_dict['data'].asnumpy(), [dets], ['obj'],
                              scale)

        logger.info('generating %d/%d ' % (i + 1, imdb.num_images) +
                    'proposal %d ' % (dets.shape[0]) + 'data %.4fs net %.4fs' %
                    (t1, t2))
        i += 1

    assert len(imdb_boxes) == imdb.num_images, 'calculations not complete'

    # save results
    rpn_folder = os.path.join(imdb.root_path, 'rpn_data')
    if not os.path.exists(rpn_folder):
        os.mkdir(rpn_folder)

    rpn_file = os.path.join(rpn_folder, imdb.name + '_rpn.pkl')
    with open(rpn_file, 'wb') as f:
        cPickle.dump(imdb_boxes, f, cPickle.HIGHEST_PROTOCOL)

    if thresh > 0:
        full_rpn_file = os.path.join(rpn_folder, imdb.name + '_full_rpn.pkl')
        with open(full_rpn_file, 'wb') as f:
            cPickle.dump(original_boxes, f, cPickle.HIGHEST_PROTOCOL)

    logger.info('wrote rpn proposals to %s' % rpn_file)
    return imdb_boxes
コード例 #20
0
def alternate_train(args, ctx, pretrained, epoch,
                    rpn_epoch, rpn_lr, rpn_lr_step,
                    rcnn_epoch, rcnn_lr, rcnn_lr_step):
    # basic config
    begin_epoch = 0
    config.TRAIN.BG_THRESH_LO = 0.0

    logger.info('########## TRAIN RPN WITH IMAGENET INIT')
    train_rpn(args.network, args.dataset, args.image_set, args.root_path, args.dataset_path,
              args.frequent, args.kvstore, args.work_load_list, args.no_flip, args.no_shuffle, args.resume,
              ctx, pretrained, epoch, 'model/rpn1', begin_epoch, rpn_epoch,
              train_shared=False, lr=rpn_lr, lr_step=rpn_lr_step)

    logger.info('########## GENERATE RPN DETECTION')
    image_sets = [iset for iset in args.image_set.split('+')]
    for image_set in image_sets:
        test_rpn(args.network, args.dataset, image_set, args.root_path, args.dataset_path,
                 ctx[0], 'model/rpn1', rpn_epoch,
                 vis=False, shuffle=False, thresh=0)

    logger.info('########## TRAIN RCNN WITH IMAGENET INIT AND RPN DETECTION')
    train_rcnn(args.network, args.dataset, args.image_set, args.root_path, args.dataset_path,
               args.frequent, args.kvstore, args.work_load_list, args.no_flip, args.no_shuffle, args.resume,
               ctx, pretrained, epoch, 'model/rcnn1', begin_epoch, rcnn_epoch,
               train_shared=False, lr=rcnn_lr, lr_step=rcnn_lr_step, proposal='rpn')

    logger.info('########## TRAIN RPN WITH RCNN INIT')
    train_rpn(args.network, args.dataset, args.image_set, args.root_path, args.dataset_path,
              args.frequent, args.kvstore, args.work_load_list, args.no_flip, args.no_shuffle, args.resume,
              ctx, 'model/rcnn1', rcnn_epoch, 'model/rpn2', begin_epoch, rpn_epoch,
              train_shared=True, lr=rpn_lr, lr_step=rpn_lr_step)

    logger.info('########## GENERATE RPN DETECTION')
    image_sets = [iset for iset in args.image_set.split('+')]
    for image_set in image_sets:
        test_rpn(args.network, args.dataset, image_set, args.root_path, args.dataset_path,
                 ctx[0], 'model/rpn2', rpn_epoch,
                 vis=False, shuffle=False, thresh=0)

    logger.info('########## COMBINE RPN2 WITH RCNN1')
    combine_model('model/rpn2', rpn_epoch, 'model/rcnn1', rcnn_epoch, 'model/rcnn2', 0)

    logger.info('########## TRAIN RCNN WITH RPN INIT AND DETECTION')
    train_rcnn(args.network, args.dataset, args.image_set, args.root_path, args.dataset_path,
               args.frequent, args.kvstore, args.work_load_list, args.no_flip, args.no_shuffle, args.resume,
               ctx, 'model/rcnn2', 0, 'model/rcnn2', begin_epoch, rcnn_epoch,
               train_shared=True, lr=rcnn_lr, lr_step=rcnn_lr_step, proposal='rpn')

    logger.info('########## COMBINE RPN2 WITH RCNN2')
    combine_model('model/rpn2', rpn_epoch, 'model/rcnn2', rcnn_epoch, 'model/final', 0)
コード例 #21
0
def alternate_train(args, ctx, pretrained, epoch,
                    rpn_epoch, rpn_lr, rpn_lr_step,
                    rcnn_epoch, rcnn_lr, rcnn_lr_step):
    # basic config
    begin_epoch = 0
    config.TRAIN.BG_THRESH_LO = 0.0

    logger.info('########## TRAIN RPN WITH IMAGENET INIT')
    """
    train_rpn(args.network, args.dataset, args.image_set, args.root_path, args.dataset_path,
              args.frequent, args.kvstore, args.work_load_list, args.no_flip, args.no_shuffle, args.resume,
              ctx, pretrained, epoch, 'model/rpn1', begin_epoch, rpn_epoch,
              train_shared=False, lr=rpn_lr, lr_step=rpn_lr_step)
    """

    logger.info('########## GENERATE RPN DETECTION')
    image_sets = [iset for iset in args.image_set.split('+')]
    for image_set in image_sets:
        test_rpn(args.network, args.dataset, image_set, args.root_path, args.dataset_path,
                 ctx[0], 'model/blouse', 0,
                 vis=True, shuffle=False, thresh=0.97)

    logger.info('########## TRAIN RCNN WITH IMAGENET INIT AND RPN DETECTION')
    train_rcnn(args.network, args.dataset, args.image_set, args.root_path, args.dataset_path,
               args.frequent, args.kvstore, args.work_load_list, args.no_flip, args.no_shuffle, args.resume,
               ctx, pretrained, epoch, 'model/rcnn1', begin_epoch, rcnn_epoch,
               train_shared=False, lr=rcnn_lr, lr_step=rcnn_lr_step, proposal='rpn')

    logger.info('########## TRAIN RPN WITH RCNN INIT')
    train_rpn(args.network, args.dataset, args.image_set, args.root_path, args.dataset_path,
              args.frequent, args.kvstore, args.work_load_list, args.no_flip, args.no_shuffle, args.resume,
              ctx, 'model/rcnn1', rcnn_epoch, 'model/rpn2', begin_epoch, rpn_epoch,
              train_shared=True, lr=rpn_lr, lr_step=rpn_lr_step)

    logger.info('########## GENERATE RPN DETECTION')
    image_sets = [iset for iset in args.image_set.split('+')]
    for image_set in image_sets:
        test_rpn(args.network, args.dataset, image_set, args.root_path, args.dataset_path,
                 ctx[0], 'model/rpn2', rpn_epoch,
                 vis=False, shuffle=False, thresh=0)

    logger.info('########## COMBINE RPN2 WITH RCNN1')
    combine_model('model/rpn2', rpn_epoch, 'model/rcnn1', rcnn_epoch, 'model/rcnn2', 0)

    logger.info('########## TRAIN RCNN WITH RPN INIT AND DETECTION')
    train_rcnn(args.network, args.dataset, args.image_set, args.root_path, args.dataset_path,
               args.frequent, args.kvstore, args.work_load_list, args.no_flip, args.no_shuffle, args.resume,
               ctx, 'model/rcnn2', 0, 'model/rcnn2', begin_epoch, rcnn_epoch,
               train_shared=True, lr=rcnn_lr, lr_step=rcnn_lr_step, proposal='rpn')

    logger.info('########## COMBINE RPN2 WITH RCNN2')
    combine_model('model/rpn2', rpn_epoch, 'model/rcnn2', rcnn_epoch, 'model/final', 0)
コード例 #22
0
def test_proposals(predictor, test_data, imdb, roidb, vis=False):
    """
    Test detections results using RPN.
    :param predictor: Predictor
    :param test_data: data iterator, must be non-shuffled
    :param imdb: image database
    :param roidb: roidb 
    :param vis: controls visualization
    :return: recall, mAP
    """
    assert vis or not test_data.shuffle
    data_names = [k[0] for k in test_data.provide_data]

    #bbox_file = os.path.join(rpn_folder, imdb.name + '_bbox.txt')
    #bbox_f = open(bbox_file, 'w')

    i = 0
    t = time.time()
    output_folder = os.path.join(imdb.root_path, 'output')
    if not os.path.exists(output_folder):
        os.mkdir(output_folder)
    imdb_boxes = list()
    original_boxes = list()
    gt_overlaps = np.zeros(0)
    overall = [0.0, 0.0]
    gt_max = np.array((0.0, 0.0))
    num_pos = 0
    #apply scale, for SSH
    #_, roidb = image.get_image(roidb)
    for im_info, data_batch in test_data:
        t1 = time.time() - t
        t = time.time()

        oscale = im_info[0, 2]
        #print('scale', scale, file=sys.stderr)
        scale = 1.0  #fix scale=1.0 for SSH face detector
        scores, boxes, data_dict = im_proposal(predictor, data_batch,
                                               data_names, scale)
        #print(scores.shape, boxes.shape, file=sys.stderr)
        t2 = time.time() - t
        t = time.time()

        # assemble proposals
        dets = np.hstack((boxes, scores))
        original_boxes.append(dets)

        # filter proposals
        keep = np.where(dets[:, 4:] > config.TEST.SCORE_THRESH)[0]
        dets = dets[keep, :]
        imdb_boxes.append(dets)

        logger.info('generating %d/%d ' % (i + 1, imdb.num_images) +
                    'proposal %d ' % (dets.shape[0]) + 'data %.4fs net %.4fs' %
                    (t1, t2))

        #if dets.shape[0]==0:
        #  continue
        if vis:
            vis_all_detection(data_dict['data'].asnumpy(), [dets], ['obj'],
                              scale)
        boxes = dets
        #max_gt_overlaps = roidb[i]['gt_overlaps'].max(axis=1)
        #gt_inds = np.where((roidb[i]['gt_classes'] > 0) & (max_gt_overlaps == 1))[0]
        #gt_boxes = roidb[i]['boxes'][gt_inds, :]
        gt_boxes = roidb[i]['boxes'].copy(
        ) * oscale  # as roidb is the original one, need to scale GT for SSH
        gt_areas = (gt_boxes[:, 2] - gt_boxes[:, 0] + 1) * (gt_boxes[:, 3] -
                                                            gt_boxes[:, 1] + 1)
        num_pos += gt_boxes.shape[0]

        overlaps = bbox_overlaps(boxes.astype(np.float),
                                 gt_boxes.astype(np.float))
        #print(im_info, gt_boxes.shape, boxes.shape, overlaps.shape, file=sys.stderr)

        _gt_overlaps = np.zeros((gt_boxes.shape[0]))
        # choose whatever is smaller to iterate

        #for j in range(gt_boxes.shape[0]):
        #  print('gt %d,%d,%d,%d'% (gt_boxes[j][0], gt_boxes[j][1], gt_boxes[j][2]-gt_boxes[j][0], gt_boxes[j][3]-gt_boxes[j][1]), file=sys.stderr)
        #  gt_max = np.maximum( gt_max, np.array( (gt_boxes[j][2], gt_boxes[j][3]) ) )
        #print('gt max', gt_max, file=sys.stderr)
        #for j in range(boxes.shape[0]):
        #  print('anchor_box %.2f,%.2f,%.2f,%.2f'% (boxes[j][0], boxes[j][1], boxes[j][2]-boxes[j][0], boxes[j][3]-boxes[j][1]), file=sys.stderr)

        #rounds = min(boxes.shape[0], gt_boxes.shape[0])
        #for j in range(rounds):
        #    # find which proposal maximally covers each gt box
        #    argmax_overlaps = overlaps.argmax(axis=0)
        #    print(j, 'argmax_overlaps', argmax_overlaps, file=sys.stderr)
        #    # get the IoU amount of coverage for each gt box
        #    max_overlaps = overlaps.max(axis=0)
        #    print(j, 'max_overlaps', max_overlaps, file=sys.stderr)
        #    # find which gt box is covered by most IoU
        #    gt_ind = max_overlaps.argmax()
        #    gt_ovr = max_overlaps.max()
        #    assert (gt_ovr >= 0), '%s\n%s\n%s' % (boxes, gt_boxes, overlaps)
        #    # find the proposal box that covers the best covered gt box
        #    box_ind = argmax_overlaps[gt_ind]
        #    print('max box', gt_ind, box_ind, (boxes[box_ind][0], boxes[box_ind][1], boxes[box_ind][2]-boxes[box_ind][0], boxes[box_ind][3]-boxes[box_ind][1], boxes[box_ind][4]), file=sys.stderr)
        #    # record the IoU coverage of this gt box
        #    _gt_overlaps[j] = overlaps[box_ind, gt_ind]
        #    assert (_gt_overlaps[j] == gt_ovr)
        #    # mark the proposal box and the gt box as used
        #    overlaps[box_ind, :] = -1
        #    overlaps[:, gt_ind] = -1

        if boxes.shape[0] > 0:
            _gt_overlaps = overlaps.max(axis=0)
            #print('max_overlaps', _gt_overlaps, file=sys.stderr)
            for j in range(len(_gt_overlaps)):
                if _gt_overlaps[j] > config.TEST.IOU_THRESH:
                    continue
                print(j,
                      'failed',
                      gt_boxes[j],
                      'max_overlap:',
                      _gt_overlaps[j],
                      file=sys.stderr)
                #_idx = np.where(overlaps[:,j]>0.4)[0]
                #print(j, _idx, file=sys.stderr)
                #print(overlaps[_idx,j], file=sys.stderr)
                #for __idx in _idx:
                #  print(gt_boxes[j], boxes[__idx], overlaps[__idx,j], IOU(gt_boxes[j], boxes[__idx,0:4]), file=sys.stderr)

            # append recorded IoU coverage level
            found = (_gt_overlaps > config.TEST.IOU_THRESH).sum()
            _recall = found / float(gt_boxes.shape[0])
            print('recall',
                  _recall,
                  gt_boxes.shape[0],
                  boxes.shape[0],
                  gt_areas,
                  file=sys.stderr)
            overall[0] += found
            overall[1] += gt_boxes.shape[0]
            #gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))
            #_recall = (gt_overlaps >= threshold).sum() / float(num_pos)
            _recall = float(overall[0]) / overall[1]
            print('recall_all', _recall, file=sys.stderr)

        boxes[:, 0:4] /= oscale
        _vec = roidb[i]['image'].split('/')
        out_dir = os.path.join(output_folder, _vec[-2])
        if not os.path.exists(out_dir):
            os.mkdir(out_dir)
        out_file = os.path.join(out_dir, _vec[-1].replace('jpg', 'txt'))
        with open(out_file, 'w') as f:
            name = '/'.join(roidb[i]['image'].split('/')[-2:])
            f.write("%s\n" % (name))
            f.write("%d\n" % (boxes.shape[0]))
            for b in range(boxes.shape[0]):
                box = boxes[b]
                f.write(
                    "%d %d %d %d %g \n" %
                    (box[0], box[1], box[2] - box[0], box[3] - box[1], box[4]))
        i += 1

    #bbox_f.close()
    return
    gt_overlaps = np.sort(gt_overlaps)
    recalls = np.zeros_like(thresholds)

    # compute recall for each IoU threshold
    for i, t in enumerate(thresholds):
        recalls[i] = (gt_overlaps >= t).sum() / float(num_pos)
    ar = recalls.mean()

    # print results
    print('average recall for {}: {:.3f}'.format(area_name, ar))
    for threshold, recall in zip(thresholds, recalls):
        print('recall @{:.2f}: {:.3f}'.format(threshold, recall))

    assert len(imdb_boxes) == imdb.num_images, 'calculations not complete'

    # save results

    rpn_file = os.path.join(rpn_folder, imdb.name + '_rpn.pkl')
    with open(rpn_file, 'wb') as f:
        pickle.dump(imdb_boxes, f, pickle.HIGHEST_PROTOCOL)

    logger.info('wrote rpn proposals to %s' % rpn_file)
    return imdb_boxes
コード例 #23
0
def demo_net(predictor, image_name, vis=False):
    """
    generate data_batch -> im_detect -> post process
    :param predictor: Predictor
    :param image_name: image name
    :param vis: will save as a new image if not visualized
    :return: None
    """
    for image_name in listfile:

        ll = []

        ll.append(image_name.split('/')[-1] + ',')
        assert os.path.exists(image_name), image_name + ' not found'
        im = cv2.imread(image_name)
        width, height, _ = im.shape
        data_batch, data_names, im_scale = generate_batch(im)
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names,
                                             im_scale)

        all_boxes = [[] for _ in CLASSES]
        for cls in CLASSES:
            cls_ind = CLASSES.index(cls)
            cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
            cls_scores = scores[:, cls_ind, np.newaxis]
            keep = np.where(cls_scores >= CONF_THRESH)[0]
            dets = np.hstack(
                (cls_boxes, cls_scores)).astype(np.float32)[keep, :]
            keep = nms(dets)
            all_boxes[cls_ind] = dets[keep, :]

        boxes_this_image = [[]
                            ] + [all_boxes[j] for j in range(1, len(CLASSES))]
        if len(CLASSES) == 0:
            ll.extend([2, 0, 183, 272, 517])

        # print results
        logger.info('---class---')
        logger.info('[[x1, x2, y1, y2, confidence]]')
        for ind, boxes in enumerate(boxes_this_image):
            if len(boxes) > 0:
                logger.info('---%s---' % CLASSES[ind])
                logger.info('%s' % boxes)
                print(boxes)
                item = boxes[0]
                xmin = int(round(item[0]))
                ymin = int(round(item[1]))
                xmax = int(round(item[2]))
                ymax = int(round(item[3]))
                if xmin < 0:
                    xmin = 0
                if ymin < 0:
                    ymin = 0
                if xmax > width:
                    xmax = width
                if ymax > height:
                    ymax = height
                ll.extend([bq[CLASSES[ind]], xmin, ymin, xmax, ymax])
                gl = [
                    str(i) for i in [
                        image_name.split('/')[-1], bq[CLASSES[ind]], item[4],
                        xmin, ymin, xmax, ymax
                    ]
                ]
                file2 = 'fasterrcnnrh.txt'
                with open(file2, 'a+') as f1:
                    f1.write(','.join(gl))
                    f1.write('\n')
                    print(ll)
        ll = [str(i) for i in ll]
        file = 'fasterrcnn.txt'
        with open(file, 'a+') as f:
            f.write(' '.join(ll))
            f.write('\n')
コード例 #24
0
ファイル: train_ssh.py プロジェクト: hariag/mxnet-SSH
def train_net(args,
              ctx,
              pretrained,
              epoch,
              prefix,
              begin_epoch,
              end_epoch,
              lr=0.001,
              lr_step='5'):
    # setup config
    #init_config()
    #print(config)
    # setup multi-gpu

    input_batch_size = config.TRAIN.BATCH_IMAGES * len(ctx)

    # print config
    logger.info(pprint.pformat(config))

    # load dataset and prepare imdb for training
    image_sets = [iset for iset in args.image_set.split('+')]
    roidbs = [
        load_gt_roidb(args.dataset,
                      image_set,
                      args.root_path,
                      args.dataset_path,
                      flip=not args.no_flip) for image_set in image_sets
    ]
    roidb = merge_roidb(roidbs)
    roidb = filter_roidb(roidb)

    # load symbol
    #sym = eval('get_' + args.network + '_train')(num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS)
    #feat_sym = sym.get_internals()['rpn_cls_score_output']
    #train_data = AnchorLoader(feat_sym, roidb, batch_size=input_batch_size, shuffle=not args.no_shuffle,
    #                          ctx=ctx, work_load_list=args.work_load_list,
    #                          feat_stride=config.RPN_FEAT_STRIDE, anchor_scales=config.ANCHOR_SCALES,
    #                          anchor_ratios=config.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING)

    sym = eval('get_' + args.network + '_train')()
    #print(sym.get_internals())
    feat_sym = []
    for stride in config.RPN_FEAT_STRIDE:
        feat_sym.append(sym.get_internals()['rpn_cls_score_stride%s_output' %
                                            stride])

    #train_data = AnchorLoaderFPN(feat_sym, roidb, batch_size=input_batch_size, shuffle=not args.no_shuffle,
    #                              ctx=ctx, work_load_list=args.work_load_list)
    train_data = CropLoader(feat_sym,
                            roidb,
                            batch_size=input_batch_size,
                            shuffle=not args.no_shuffle,
                            ctx=ctx,
                            work_load_list=args.work_load_list)

    # infer max shape
    max_data_shape = [('data', (1, 3, max([v[1] for v in config.SCALES]),
                                max([v[1] for v in config.SCALES])))]
    #max_data_shape = [('data', (1, 3, max([v[1] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]
    max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
    max_data_shape.append(('gt_boxes', (1, roidb[0]['max_num_boxes'], 5)))
    logger.info('providing maximum shape %s %s' %
                (max_data_shape, max_label_shape))

    # infer shape
    data_shape_dict = dict(train_data.provide_data + train_data.provide_label)
    arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict)
    arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))
    out_shape_dict = dict(zip(sym.list_outputs(), out_shape))
    aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape))
    logger.info('output shape %s' % pprint.pformat(out_shape_dict))

    # load and initialize params
    if args.resume:
        arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
    else:
        arg_params, aux_params = load_param(pretrained, epoch, convert=True)
        #for k in ['rpn_conv_3x3', 'rpn_cls_score', 'rpn_bbox_pred', 'cls_score', 'bbox_pred']:
        #  _k = k+"_weight"
        #  if _k in arg_shape_dict:
        #    v = 0.001 if _k.startswith('bbox_') else 0.01
        #    arg_params[_k] = mx.random.normal(0, v, shape=arg_shape_dict[_k])
        #    print('init %s with normal %.5f'%(_k,v))
        #  _k = k+"_bias"
        #  if _k in arg_shape_dict:
        #    arg_params[_k] = mx.nd.zeros(shape=arg_shape_dict[_k])
        #    print('init %s with zero'%(_k))

        for k, v in arg_shape_dict.iteritems():
            if k.find('upsampling') >= 0:
                print('initializing upsampling_weight', k)
                arg_params[k] = mx.nd.zeros(shape=v)
                init = mx.init.Initializer()
                init._init_bilinear(k, arg_params[k])
                #print(args[k])

    # check parameter shapes
    #for k in sym.list_arguments():
    #    if k in data_shape_dict:
    #        continue
    #    assert k in arg_params, k + ' not initialized'
    #    assert arg_params[k].shape == arg_shape_dict[k], \
    #        'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape)
    #for k in sym.list_auxiliary_states():
    #    assert k in aux_params, k + ' not initialized'
    #    assert aux_params[k].shape == aux_shape_dict[k], \
    #        'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape)

    # create solver
    fixed_param_prefix = config.FIXED_PARAMS
    data_names = [k[0] for k in train_data.provide_data]
    label_names = [k[0] for k in train_data.provide_label]
    #mod = MutableModule(sym, data_names=data_names, label_names=label_names,
    #                    logger=logger, context=ctx, work_load_list=args.work_load_list,
    #                    max_data_shapes=max_data_shape, max_label_shapes=max_label_shape,
    #                    fixed_param_prefix=fixed_param_prefix)
    fixed_param_names = get_fixed_params(sym, fixed_param_prefix)
    print('fixed', fixed_param_names, file=sys.stderr)
    mod = Module(sym,
                 data_names=data_names,
                 label_names=label_names,
                 logger=logger,
                 context=ctx,
                 work_load_list=args.work_load_list,
                 fixed_param_names=fixed_param_names)

    # decide training params
    # metric
    eval_metrics = mx.metric.CompositeEvalMetric()
    #if len(sym.list_outputs())>4:
    #  metric_names = ['RPNAccMetric', 'RPNLogLossMetric', 'RPNL1LossMetric', 'RCNNAccMetric', 'RCNNLogLossMetric', 'RCNNL1LossMetric']
    #else:#train rpn only
    #print('sym', sym.list_outputs())
    #metric_names = ['RPNAccMetric', 'RPNLogLossMetric', 'RPNL1LossMetric']
    mids = [0, 4, 8]
    for mid in mids:
        _metric = metric.RPNAccMetric(pred_idx=mid, label_idx=mid + 1)
        eval_metrics.add(_metric)
        #_metric = metric.RPNLogLossMetric(pred_idx=mid, label_idx=mid+1)
        #eval_metrics.add(_metric)
        _metric = metric.RPNL1LossMetric(loss_idx=mid + 2, weight_idx=mid + 3)
        eval_metrics.add(_metric)

    #rpn_eval_metric = metric.RPNAccMetric()
    #rpn_cls_metric = metric.RPNLogLossMetric()
    #rpn_bbox_metric = metric.RPNL1LossMetric()
    #eval_metric = metric.RCNNAccMetric()
    #cls_metric = metric.RCNNLogLossMetric()
    #bbox_metric = metric.RCNNL1LossMetric()
    #for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric]:
    #    eval_metrics.add(child_metric)
    # callback
    means = np.tile(np.array(config.TRAIN.BBOX_MEANS), config.NUM_CLASSES)
    stds = np.tile(np.array(config.TRAIN.BBOX_STDS), config.NUM_CLASSES)
    #epoch_end_callback = callback.do_checkpoint(prefix, means, stds)
    epoch_end_callback = None
    # decide learning rate
    base_lr = lr
    lr_factor = 0.1
    lr_epoch = [int(epoch) for epoch in lr_step.split(',')]
    lr_epoch_diff = [
        epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch
    ]
    lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [
        int(epoch * len(roidb) / input_batch_size) for epoch in lr_epoch_diff
    ]

    #lr_iters = [36000,42000] #TODO
    #lr_iters = [40000,50000,60000] #TODO
    #lr_iters = [40,50,60] #TODO
    end_epoch = 10000
    #lr_iters = [4,8] #TODO
    logger.info('lr %f lr_epoch_diff %s lr_iters %s' %
                (lr, lr_epoch_diff, lr_iters))
    #lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor)
    # optimizer
    opt = optimizer.SGD(learning_rate=lr,
                        momentum=0.9,
                        wd=0.0005,
                        rescale_grad=1.0 / len(ctx),
                        clip_gradient=None)
    initializer = mx.init.Xavier()
    #initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="out", magnitude=2) #resnet style

    if len(ctx) > 1:
        train_data = mx.io.PrefetchingIter(train_data)

    _cb = mx.callback.Speedometer(train_data.batch_size,
                                  frequent=args.frequent,
                                  auto_reset=False)
    global_step = [0]

    def save_model(epoch):
        arg, aux = mod.get_params()
        all_layers = mod.symbol.get_internals()
        outs = []
        for stride in config.RPN_FEAT_STRIDE:
            num_anchors = config.RPN_ANCHOR_CFG[str(stride)]['NUM_ANCHORS']
            _name = 'rpn_cls_score_stride%d_output' % stride
            rpn_cls_score = all_layers[_name]

            # prepare rpn data
            rpn_cls_score_reshape = mx.symbol.Reshape(
                data=rpn_cls_score,
                shape=(0, 2, -1, 0),
                name="rpn_cls_score_reshape_stride%d" % stride)

            rpn_cls_prob = mx.symbol.SoftmaxActivation(
                data=rpn_cls_score_reshape,
                mode="channel",
                name="rpn_cls_prob_stride%d" % stride)
            rpn_cls_prob_reshape = mx.symbol.Reshape(
                data=rpn_cls_prob,
                shape=(0, 2 * num_anchors, -1, 0),
                name='rpn_cls_prob_reshape_stride%d' % stride)
            _name = 'rpn_bbox_pred_stride%d_output' % stride
            rpn_bbox_pred = all_layers[_name]
            outs.append(rpn_cls_prob_reshape)
            outs.append(rpn_bbox_pred)
        _sym = mx.sym.Group(outs)
        mx.model.save_checkpoint(prefix, epoch, _sym, arg, aux)

    def _batch_callback(param):
        #global global_step
        _cb(param)
        global_step[0] += 1
        mbatch = global_step[0]
        for _iter in lr_iters:
            if mbatch == _iter:
                opt.lr *= 0.1
                print('lr change to',
                      opt.lr,
                      ' in batch',
                      mbatch,
                      file=sys.stderr)
                break

        if mbatch % 1000 == 0:
            print('saving final checkpoint', mbatch, file=sys.stderr)
            save_model(mbatch)

        if mbatch == lr_iters[-1]:
            print('saving final checkpoint', mbatch, file=sys.stderr)
            save_model(0)
            #arg, aux = mod.get_params()
            #mx.model.save_checkpoint(prefix, 99, mod.symbol, arg, aux)
            sys.exit(0)

    # train
    mod.fit(train_data,
            eval_metric=eval_metrics,
            epoch_end_callback=epoch_end_callback,
            batch_end_callback=_batch_callback,
            kvstore=args.kvstore,
            optimizer=opt,
            initializer=initializer,
            allow_missing=True,
            arg_params=arg_params,
            aux_params=aux_params,
            begin_epoch=begin_epoch,
            num_epoch=end_epoch)
コード例 #25
0
ファイル: tester.py プロジェクト: CoderHHX/incubator-mxnet
def pred_eval(predictor, test_data, imdb, vis=False, thresh=1e-3):
    """
    wrapper for calculating offline validation for faster data analysis
    in this example, all threshold are set by hand
    :param predictor: Predictor
    :param test_data: data iterator, must be non-shuffle
    :param imdb: image database
    :param vis: controls visualization
    :param thresh: valid detection threshold
    :return:
    """
    assert vis or not test_data.shuffle
    data_names = [k[0] for k in test_data.provide_data]

    nms = py_nms_wrapper(config.TEST.NMS)

    # limit detections to max_per_image over all classes
    max_per_image = -1

    num_images = imdb.num_images
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in range(num_images)]
                 for _ in range(imdb.num_classes)]

    i = 0
    t = time.time()
    for im_info, data_batch in test_data:
        t1 = time.time() - t
        t = time.time()

        scale = im_info[0, 2]
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scale)

        t2 = time.time() - t
        t = time.time()

        for j in range(1, imdb.num_classes):
            indexes = np.where(scores[:, j] > thresh)[0]
            cls_scores = scores[indexes, j, np.newaxis]
            cls_boxes = boxes[indexes, j * 4:(j + 1) * 4]
            cls_dets = np.hstack((cls_boxes, cls_scores))
            keep = nms(cls_dets)
            all_boxes[j][i] = cls_dets[keep, :]

        if max_per_image > 0:
            image_scores = np.hstack([all_boxes[j][i][:, -1]
                                      for j in range(1, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in range(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    all_boxes[j][i] = all_boxes[j][i][keep, :]

        if vis:
            boxes_this_image = [[]] + [all_boxes[j][i] for j in range(1, imdb.num_classes)]
            vis_all_detection(data_dict['data'].asnumpy(), boxes_this_image, imdb.classes, scale)

        t3 = time.time() - t
        t = time.time()
        logger.info('testing %d/%d data %.4fs net %.4fs post %.4fs' % (i, imdb.num_images, t1, t2, t3))
        i += 1

    det_file = os.path.join(imdb.cache_path, imdb.name + '_detections.pkl')
    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, protocol=pickle.HIGHEST_PROTOCOL)

    imdb.evaluate_detections(all_boxes)
コード例 #26
0
ファイル: train.py プロジェクト: jpulidojr/3DCE_new
def train_net(args):

    if args.rand_seed > 0:
        np.random.seed(args.rand_seed)
        mx.random.seed(args.rand_seed)
        random.seed(args.rand_seed)

    # print config
    logger.info(pprint.pformat(config))
    logger.info(pprint.pformat(args))

    # load dataset and prepare imdb for training
    image_sets = [iset for iset in args.image_set.split('+')]
    roidbs = [load_gt_roidb(args.dataset, image_set, args.dataset_path,
                            flip=args.flip)
              for image_set in image_sets]
    roidb = merge_roidb(roidbs)
    roidb = filter_roidb(roidb)

    samplepcnt = args.begin_sample

    if samplepcnt == 100:
        sroidb = roidb
    else:
        sroidb = sample_roidb(roidb, samplepcnt)  # Sample by percentage of all images
    logger.info('Sampling %d pcnt : %d training slices' % (samplepcnt, len(sroidb)))

    # Debug to see if we can concatenate ROIDB's
    #print(sroidb)
    #dir(sroidb)
    #newroidb = sroidb + roidb
    #newroidb = append_roidb(sroidb, roidb)
    #print( "--Append test: " + str(len(sroidb)) +" " + str(len(roidb)) + " = " + str(len(newroidb)) ) 

    # load symbol
    sym = eval('get_' + args.network)(is_train=True, num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS)
    feat_sym = sym.get_internals()['rpn_cls_score_output']

    # setup multi-gpu
    ctx = [mx.gpu(int(i)) for i in args.gpus.split(',')]
    batch_size = len(ctx)
    input_batch_size = config.TRAIN.SAMPLES_PER_BATCH * batch_size

    # load training data
    train_data = AnchorLoader(feat_sym, sroidb, batch_size=input_batch_size, shuffle=args.shuffle,
                              ctx=ctx, work_load_list=args.work_load_list,
                              feat_stride=config.RPN_FEAT_STRIDE, anchor_scales=config.ANCHOR_SCALES,
                              anchor_ratios=config.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING,
                              nThreads=default.prefetch_thread_num)

    # infer max shape
    max_data_shape = [('data', (input_batch_size*config.NUM_IMAGES_3DCE, config.NUM_SLICES, config.MAX_SIZE, config.MAX_SIZE))]
    max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
    max_data_shape.append(('gt_boxes', (input_batch_size*config.NUM_IMAGES_3DCE, 5, 5)))
    logger.info('providing maximum shape %s %s' % (max_data_shape, max_label_shape))

    # load and initialize and check params
    arg_params, aux_params = init_params(args, sym, train_data)

    # create solver
    fixed_param_prefix = config.FIXED_PARAMS
    data_names = [k[0] for k in train_data.provide_data]
    label_names = [k[0] for k in train_data.provide_label]
    mod = MutableModule(sym, data_names=data_names, label_names=label_names,
                        logger=logger, context=ctx, work_load_list=args.work_load_list,
                        max_data_shapes=max_data_shape, max_label_shapes=max_label_shape,
                        fixed_param_prefix=fixed_param_prefix)

    # decide training params
    # metric
    # rpn_eval_metric = metric.RPNAccMetric()
    rpn_cls_metric = metric.RPNLogLossMetric()
    rpn_bbox_metric = metric.RPNL1LossMetric()
    # eval_metric = metric.RCNNAccMetric()
    cls_metric = metric.RCNNLogLossMetric()
    bbox_metric = metric.RCNNL1LossMetric()
    eval_metrics = mx.metric.CompositeEvalMetric()
    for child_metric in [rpn_cls_metric, rpn_bbox_metric,  cls_metric, bbox_metric]:
        eval_metrics.add(child_metric)

    # callback
    batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=args.frequent)
    means = np.tile(np.array(config.TRAIN.BBOX_MEANS), config.NUM_CLASSES)
    stds = np.tile(np.array(config.TRAIN.BBOX_STDS), config.NUM_CLASSES)
    epoch_end_callback = (callback.do_checkpoint(args.e2e_prefix, means, stds),
                          callback.do_validate(args.e2e_prefix))

    arg_names = [x for x in sym.list_arguments() if x not in data_names+label_names]
    opt = get_optimizer(args, arg_names, len(sroidb) / input_batch_size, args.iter_size)

    # train
    default.testing = False
    mod.fit(train_data, roidb, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback, kvstore=args.kvstore,
            optimizer=opt, iter_size=args.iter_size,
            arg_params=arg_params, aux_params=aux_params,
            begin_epoch=args.begin_epoch, num_epoch=args.e2e_epoch)
コード例 #27
0
ファイル: train.py プロジェクト: jpulidojr/3DCE_new
def init_params(args, sym, train_data):
    # infer shape
    data_shape_dict = dict(train_data.provide_data + train_data.provide_label)
    arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict)
    arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))
    out_shape_dict = dict(zip(sym.list_outputs(), out_shape))
    aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape))
    logger.info('output shape %s' % pprint.pformat(out_shape_dict))

    if args.resume:  # load params from previously trained model
        arg_params, aux_params = load_param(args.e2e_prefix, args.begin_epoch, convert=True)
    else:  # initialize weights from pretrained model and random numbers
        arg_params, aux_params = load_param(args.pretrained, args.pretrained_epoch, convert=True)

        # deal with multiple input CT slices, see 3DCE paper.
        # if NUM_SLICES = 3, pretrained weights won't be changed
        # if NUM_SLICES > 3, extra input channels in conv1_1 will be initialized to 0
        nCh = config.NUM_SLICES
        w1 = arg_params['conv1_1_weight'].asnumpy()
        w1_new = np.zeros((64, nCh, 3, 3), dtype=float)
        w1_new[:, (nCh - 3) / 2:(nCh - 3) / 2 + 3, :, :] = w1

        arg_params['conv1_1_new_weight'] = mx.nd.array(w1_new)
        arg_params['conv1_1_new_bias'] = arg_params['conv1_1_bias']
        del arg_params['conv1_1_weight']

        arg_params['rpn_conv_3x3_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_conv_3x3_weight'])
        arg_params['rpn_conv_3x3_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_conv_3x3_bias'])
        arg_params['rpn_cls_score_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_cls_score_weight'])
        arg_params['rpn_cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_cls_score_bias'])
        arg_params['rpn_bbox_pred_weight'] = mx.random.normal(0, 0.001, shape=arg_shape_dict['rpn_bbox_pred_weight'])
        arg_params['rpn_bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_bbox_pred_bias'])

        if config.FRAMEWORK == '3DCE':
            arg_params['conv_new_1_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['conv_new_1_weight'])
            arg_params['conv_new_1_bias'] = mx.nd.zeros(shape=arg_shape_dict['conv_new_1_bias'])
            arg_params['fc6_weight'] = mx.random.normal(0, 0.001, shape=arg_shape_dict['fc6_weight'])
            arg_params['fc6_bias'] = mx.nd.zeros(shape=arg_shape_dict['fc6_bias'])

            arg_params['cls_score_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['cls_score_weight'])
            arg_params['cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['cls_score_bias'])
            arg_params['bbox_pred_weight'] = mx.random.normal(0, 0.001, shape=arg_shape_dict['bbox_pred_weight'])
            arg_params['bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['bbox_pred_bias'])

        elif config.FRAMEWORK == 'RFCN':
            arg_params['conv_new_1_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['conv_new_1_weight'])
            arg_params['conv_new_1_bias'] = mx.nd.zeros(shape=arg_shape_dict['conv_new_1_bias'])
            arg_params['rfcn_cls_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rfcn_cls_weight'])
            arg_params['rfcn_cls_bias'] = mx.nd.zeros(shape=arg_shape_dict['rfcn_cls_bias'])
            arg_params['rfcn_bbox_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rfcn_bbox_weight'])
            arg_params['rfcn_bbox_bias'] = mx.nd.zeros(shape=arg_shape_dict['rfcn_bbox_bias'])

        elif config.FRAMEWORK == 'Faster':
            arg_params['fc6_small_weight'] = mx.random.normal(0, 0.001, shape=arg_shape_dict['fc6_small_weight'])
            arg_params['fc6_small_bias'] = mx.nd.zeros(shape=arg_shape_dict['fc6_small_bias'])
            arg_params['fc7_small_weight'] = mx.random.normal(0, 0.001, shape=arg_shape_dict['fc7_small_weight'])
            arg_params['fc7_small_bias'] = mx.nd.zeros(shape=arg_shape_dict['fc7_small_bias'])

            arg_params['cls_score_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['cls_score_weight'])
            arg_params['cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['cls_score_bias'])
            arg_params['bbox_pred_weight'] = mx.random.normal(0, 0.001, shape=arg_shape_dict['bbox_pred_weight'])
            arg_params['bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['bbox_pred_bias'])

    # check parameter shapes
    for k in sym.list_arguments():
        if k in data_shape_dict:
            continue
        assert k in arg_params, k + ' not initialized'
        assert arg_params[k].shape == arg_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape)
    for k in sym.list_auxiliary_states():
        assert k in aux_params, k + ' not initialized'
        assert aux_params[k].shape == aux_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape)

    logger.info('load param done')
    return arg_params, aux_params
コード例 #28
0
ファイル: train_end2end.py プロジェクト: Piyush3dB/mxnet
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch,
              lr=0.001, lr_step='5'):
    # setup config
    config.TRAIN.BATCH_IMAGES = 1
    config.TRAIN.BATCH_ROIS = 128
    config.TRAIN.END2END = True
    config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED = True

    # load symbol
    sym = eval('get_' + args.network + '_train')(num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS)
    feat_sym = sym.get_internals()['rpn_cls_score_output']

    # setup multi-gpu
    batch_size = len(ctx)
    input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size

    # print config
    logger.info(pprint.pformat(config))

    # load dataset and prepare imdb for training
    image_sets = [iset for iset in args.image_set.split('+')]
    roidbs = [load_gt_roidb(args.dataset, image_set, args.root_path, args.dataset_path,
                            flip=not args.no_flip)
              for image_set in image_sets]
    roidb = merge_roidb(roidbs)
    roidb = filter_roidb(roidb)

    # load training data
    train_data = AnchorLoader(feat_sym, roidb, batch_size=input_batch_size, shuffle=not args.no_shuffle,
                              ctx=ctx, work_load_list=args.work_load_list,
                              feat_stride=config.RPN_FEAT_STRIDE, anchor_scales=config.ANCHOR_SCALES,
                              anchor_ratios=config.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING)

    # infer max shape
    max_data_shape = [('data', (input_batch_size, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]
    max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
    max_data_shape.append(('gt_boxes', (input_batch_size, 100, 5)))
    logger.info('providing maximum shape %s %s' % (max_data_shape, max_label_shape))

    # infer shape
    data_shape_dict = dict(train_data.provide_data + train_data.provide_label)
    arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict)
    arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))
    out_shape_dict = dict(zip(sym.list_outputs(), out_shape))
    aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape))
    logger.info('output shape %s' % pprint.pformat(out_shape_dict))

    # load and initialize params
    if args.resume:
        arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
    else:
        arg_params, aux_params = load_param(pretrained, epoch, convert=True)
        arg_params['rpn_conv_3x3_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_conv_3x3_weight'])
        arg_params['rpn_conv_3x3_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_conv_3x3_bias'])
        arg_params['rpn_cls_score_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_cls_score_weight'])
        arg_params['rpn_cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_cls_score_bias'])
        arg_params['rpn_bbox_pred_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_bbox_pred_weight'])
        arg_params['rpn_bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_bbox_pred_bias'])
        arg_params['cls_score_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['cls_score_weight'])
        arg_params['cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['cls_score_bias'])
        arg_params['bbox_pred_weight'] = mx.random.normal(0, 0.001, shape=arg_shape_dict['bbox_pred_weight'])
        arg_params['bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['bbox_pred_bias'])

    # check parameter shapes
    for k in sym.list_arguments():
        if k in data_shape_dict:
            continue
        assert k in arg_params, k + ' not initialized'
        assert arg_params[k].shape == arg_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape)
    for k in sym.list_auxiliary_states():
        assert k in aux_params, k + ' not initialized'
        assert aux_params[k].shape == aux_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape)

    # create solver
    fixed_param_prefix = config.FIXED_PARAMS
    data_names = [k[0] for k in train_data.provide_data]
    label_names = [k[0] for k in train_data.provide_label]
    mod = MutableModule(sym, data_names=data_names, label_names=label_names,
                        logger=logger, context=ctx, work_load_list=args.work_load_list,
                        max_data_shapes=max_data_shape, max_label_shapes=max_label_shape,
                        fixed_param_prefix=fixed_param_prefix)

    # decide training params
    # metric
    rpn_eval_metric = metric.RPNAccMetric()
    rpn_cls_metric = metric.RPNLogLossMetric()
    rpn_bbox_metric = metric.RPNL1LossMetric()
    eval_metric = metric.RCNNAccMetric()
    cls_metric = metric.RCNNLogLossMetric()
    bbox_metric = metric.RCNNL1LossMetric()
    eval_metrics = mx.metric.CompositeEvalMetric()
    for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric]:
        eval_metrics.add(child_metric)
    # callback
    batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=args.frequent)
    means = np.tile(np.array(config.TRAIN.BBOX_MEANS), config.NUM_CLASSES)
    stds = np.tile(np.array(config.TRAIN.BBOX_STDS), config.NUM_CLASSES)
    epoch_end_callback = callback.do_checkpoint(prefix, means, stds)
    # decide learning rate
    base_lr = lr
    lr_factor = 0.1
    lr_epoch = [int(epoch) for epoch in lr_step.split(',')]
    lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch]
    lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff]
    logger.info('lr %f lr_epoch_diff %s lr_iters %s' % (lr, lr_epoch_diff, lr_iters))
    lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor)
    # optimizer
    optimizer_params = {'momentum': 0.9,
                        'wd': 0.0005,
                        'learning_rate': lr,
                        'lr_scheduler': lr_scheduler,
                        'rescale_grad': (1.0 / batch_size),
                        'clip_gradient': 5}

    # train
    mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback, kvstore=args.kvstore,
            optimizer='sgd', optimizer_params=optimizer_params,
            arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
コード例 #29
0
ファイル: train_end2end.py プロジェクト: Piyush3dB/mxnet
def main():
    args = parse_args()
    logger.info('Called with argument: %s' % args)
    ctx = [mx.gpu(int(i)) for i in args.gpus.split(',')]
    train_net(args, ctx, args.pretrained, args.pretrained_epoch, args.prefix, args.begin_epoch, args.end_epoch,
              lr=args.lr, lr_step=args.lr_step)
コード例 #30
0
def main():
    global args
    args = parse_args()
    logger.info('Called with argument: %s' % args)
    test(args)
コード例 #31
0
def train_net(args,
              ctx,
              pretrained,
              epoch,
              prefix,
              begin_epoch,
              end_epoch,
              lr=0.001,
              lr_step='5'):
    # setup config
    config.TRAIN.BATCH_IMAGES = 1
    config.TRAIN.BATCH_ROIS = 128
    config.TRAIN.END2END = True
    config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED = True

    # load symbol
    sym = eval('get_' + args.network + '_train')(
        num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS)
    feat_sym = sym.get_internals()['rpn_cls_score_output']

    # setup multi-gpu
    batch_size = len(ctx)
    input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size

    # print config
    logger.info(pprint.pformat(config))

    # load dataset and prepare imdb for training
    image_sets = [iset for iset in args.image_set.split('+')]
    roidbs = [
        load_gt_roidb(args.dataset,
                      image_set,
                      args.root_path,
                      args.dataset_path,
                      flip=not args.no_flip) for image_set in image_sets
    ]
    roidb = merge_roidb(roidbs)
    roidb = filter_roidb(roidb)

    # load training data
    train_data = AnchorLoader(feat_sym,
                              roidb,
                              batch_size=input_batch_size,
                              shuffle=not args.no_shuffle,
                              ctx=ctx,
                              work_load_list=args.work_load_list,
                              feat_stride=config.RPN_FEAT_STRIDE,
                              anchor_scales=config.ANCHOR_SCALES,
                              anchor_ratios=config.ANCHOR_RATIOS,
                              aspect_grouping=config.TRAIN.ASPECT_GROUPING)

    # infer max shape
    max_data_shape = [('data', (input_batch_size, 3,
                                max([v[0] for v in config.SCALES]),
                                max([v[1] for v in config.SCALES])))]
    max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
    max_data_shape.append(('gt_boxes', (input_batch_size, 100, 5)))
    logger.info('providing maximum shape %s %s' %
                (max_data_shape, max_label_shape))

    # infer shape
    data_shape_dict = dict(train_data.provide_data + train_data.provide_label)
    arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict)
    arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))
    out_shape_dict = dict(zip(sym.list_outputs(), out_shape))
    aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape))
    logger.info('output shape %s' % pprint.pformat(out_shape_dict))

    # load and initialize params
    if args.resume:
        arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
    else:
        arg_params, aux_params = load_param(pretrained, epoch, convert=True)
        arg_params['rpn_conv_3x3_weight'] = mx.random.normal(
            0, 0.01, shape=arg_shape_dict['rpn_conv_3x3_weight'])
        arg_params['rpn_conv_3x3_bias'] = mx.nd.zeros(
            shape=arg_shape_dict['rpn_conv_3x3_bias'])
        arg_params['rpn_cls_score_weight'] = mx.random.normal(
            0, 0.01, shape=arg_shape_dict['rpn_cls_score_weight'])
        arg_params['rpn_cls_score_bias'] = mx.nd.zeros(
            shape=arg_shape_dict['rpn_cls_score_bias'])
        arg_params['rpn_bbox_pred_weight'] = mx.random.normal(
            0, 0.01, shape=arg_shape_dict['rpn_bbox_pred_weight'])
        arg_params['rpn_bbox_pred_bias'] = mx.nd.zeros(
            shape=arg_shape_dict['rpn_bbox_pred_bias'])
        arg_params['cls_score_weight'] = mx.random.normal(
            0, 0.01, shape=arg_shape_dict['cls_score_weight'])
        arg_params['cls_score_bias'] = mx.nd.zeros(
            shape=arg_shape_dict['cls_score_bias'])
        arg_params['bbox_pred_weight'] = mx.random.normal(
            0, 0.001, shape=arg_shape_dict['bbox_pred_weight'])
        arg_params['bbox_pred_bias'] = mx.nd.zeros(
            shape=arg_shape_dict['bbox_pred_bias'])

    # check parameter shapes
    for k in sym.list_arguments():
        if k in data_shape_dict:
            continue
        assert k in arg_params, k + ' not initialized'
        assert arg_params[k].shape == arg_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape)
    for k in sym.list_auxiliary_states():
        assert k in aux_params, k + ' not initialized'
        assert aux_params[k].shape == aux_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape)

    # create solver
    fixed_param_prefix = config.FIXED_PARAMS
    data_names = [k[0] for k in train_data.provide_data]
    label_names = [k[0] for k in train_data.provide_label]
    mod = MutableModule(sym,
                        data_names=data_names,
                        label_names=label_names,
                        logger=logger,
                        context=ctx,
                        work_load_list=args.work_load_list,
                        max_data_shapes=max_data_shape,
                        max_label_shapes=max_label_shape,
                        fixed_param_prefix=fixed_param_prefix)

    # decide training params
    # metric
    rpn_eval_metric = metric.RPNAccMetric()
    rpn_cls_metric = metric.RPNLogLossMetric()
    rpn_bbox_metric = metric.RPNL1LossMetric()
    eval_metric = metric.RCNNAccMetric()
    cls_metric = metric.RCNNLogLossMetric()
    bbox_metric = metric.RCNNL1LossMetric()
    eval_metrics = mx.metric.CompositeEvalMetric()
    for child_metric in [
            rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric,
            cls_metric, bbox_metric
    ]:
        eval_metrics.add(child_metric)
    # callback
    batch_end_callback = mx.callback.Speedometer(train_data.batch_size,
                                                 frequent=args.frequent,
                                                 auto_reset=False)
    means = np.tile(np.array(config.TRAIN.BBOX_MEANS), config.NUM_CLASSES)
    stds = np.tile(np.array(config.TRAIN.BBOX_STDS), config.NUM_CLASSES)
    epoch_end_callback = callback.do_checkpoint(prefix, means, stds)
    # decide learning rate
    base_lr = lr
    lr_factor = 0.1
    lr_epoch = [int(epoch) for epoch in lr_step.split(',')]
    lr_epoch_diff = [
        epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch
    ]
    lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [
        int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff
    ]
    logger.info('lr %f lr_epoch_diff %s lr_iters %s' %
                (lr, lr_epoch_diff, lr_iters))
    lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor)
    # optimizer
    optimizer_params = {
        'momentum': 0.9,
        'wd': 0.0005,
        'learning_rate': lr,
        'lr_scheduler': lr_scheduler,
        'rescale_grad': (1.0 / batch_size),
        'clip_gradient': 5
    }

    # train
    mod.fit(train_data,
            eval_metric=eval_metrics,
            epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback,
            kvstore=args.kvstore,
            optimizer='sgd',
            optimizer_params=optimizer_params,
            arg_params=arg_params,
            aux_params=aux_params,
            begin_epoch=begin_epoch,
            num_epoch=end_epoch)
コード例 #32
0
ファイル: tester.py プロジェクト: CoderHHX/incubator-mxnet
def generate_proposals(predictor, test_data, imdb, vis=False, thresh=0.):
    """
    Generate detections results using RPN.
    :param predictor: Predictor
    :param test_data: data iterator, must be non-shuffled
    :param imdb: image database
    :param vis: controls visualization
    :param thresh: thresh for valid detections
    :return: list of detected boxes
    """
    assert vis or not test_data.shuffle
    data_names = [k[0] for k in test_data.provide_data]

    i = 0
    t = time.time()
    imdb_boxes = list()
    original_boxes = list()
    for im_info, data_batch in test_data:
        t1 = time.time() - t
        t = time.time()

        scale = im_info[0, 2]
        scores, boxes, data_dict = im_proposal(predictor, data_batch, data_names, scale)
        t2 = time.time() - t
        t = time.time()

        # assemble proposals
        dets = np.hstack((boxes, scores))
        original_boxes.append(dets)

        # filter proposals
        keep = np.where(dets[:, 4:] > thresh)[0]
        dets = dets[keep, :]
        imdb_boxes.append(dets)

        if vis:
            vis_all_detection(data_dict['data'].asnumpy(), [dets], ['obj'], scale)

        logger.info('generating %d/%d ' % (i + 1, imdb.num_images) +
                    'proposal %d ' % (dets.shape[0]) +
                    'data %.4fs net %.4fs' % (t1, t2))
        i += 1

    assert len(imdb_boxes) == imdb.num_images, 'calculations not complete'

    # save results
    rpn_folder = os.path.join(imdb.root_path, 'rpn_data')
    if not os.path.exists(rpn_folder):
        os.mkdir(rpn_folder)

    rpn_file = os.path.join(rpn_folder, imdb.name + '_rpn.pkl')
    with open(rpn_file, 'wb') as f:
        pickle.dump(imdb_boxes, f, pickle.HIGHEST_PROTOCOL)

    if thresh > 0:
        full_rpn_file = os.path.join(rpn_folder, imdb.name + '_full_rpn.pkl')
        with open(full_rpn_file, 'wb') as f:
            pickle.dump(original_boxes, f, pickle.HIGHEST_PROTOCOL)

    logger.info('wrote rpn proposals to %s' % rpn_file)
    return imdb_boxes
コード例 #33
0
ファイル: train.py プロジェクト: jpulidojr/3DCE_new
    merge_a_into_b(config_file, config)
    config.NUM_ANCHORS = len(config.ANCHOR_SCALES) * len(config.ANCHOR_RATIOS)

    if config.FRAMEWORK != '3DCE':
        assert config.NUM_IMAGES_3DCE == 1, "Combining multiple images is only possible in 3DCE"

    default_file = cfg_from_file('default.yml')
    merge_a_into_b(default_file, default)
    default.e2e_prefix = 'model/' + default.exp_name
    if default.begin_epoch != 0:
        default.resume = True
    default.accs = dict()

    if default.gpus == '':  # auto select GPU
        import GPUtil
        deviceIDs = GPUtil.getAvailable(order='lowest', limit=1, maxMemory=.2)
        if len(deviceIDs) == 0:
            deviceIDs = GPUtil.getAvailable(order='lowest', limit=1, maxMemory=.9, maxLoad=1)

        GPUs = GPUtil.getGPUs()
        default.gpus = str(len(GPUs)-1-deviceIDs[0])
        logger.info('using gpu '+default.gpus)
    default.val_gpu = default.gpus[0]
    # default.prefetch_thread_num = min(default.prefetch_thread_num, config.TRAIN.SAMPLES_PER_BATCH)

    train_net(default)

    # test the best model on the test set
    from test import test_net
    test_net(default.e2e_prefix, default.best_epoch)
コード例 #34
0
def pred_eval(predictor, test_data, imdb, vis=False, max_box=-1, thresh=1e-3):
    """
    wrapper for calculating offline validation for faster data analysis
    in this example, all threshold are set by hand
    :param predictor: Predictor
    :param test_data: data iterator, must be non-shuffle
    :param imdb: image database
    :param vis: controls visualization
    :param max_box: maximum number of boxes detected in each image
    :param thresh: valid detection threshold
    :return:
    """
    # assert vis or not test_data.shuffle
    data_names = [k[0] for k in test_data.provide_data]

    nms = py_nms_wrapper(config.TEST.NMS)

    # limit detections to max_per_image over all classes
    max_per_image = max_box

    num_images = imdb.num_images
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]
    kept_boxes = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]
    all_gts = [[[] for _ in xrange(num_images)]
                 for _ in xrange(imdb.num_classes)]
    all_iminfos = []
    all_imnames = []
    all_crops = []

    i = 0
    _t = {'data': Timer(), 'im_detect' : Timer(), 'misc' : Timer()}
    _t['data'].tic()
    num_image = config.NUM_IMAGES_3DCE
    key_idx = (num_image - 1) / 2  # adjust image for 3DCE
    for im_info, imname, crop, data_batch in test_data:
        _t['data'].toc()
        _t['im_detect'].tic()

        all_iminfos.append(im_info)
        all_imnames.append(imname)
        all_crops.append(crop)

        # scale = im_info[0, 2]
        scale = 1.  # we have scaled the label in get_image(), so no need to scale the pred_box
        gt_boxes = data_batch.label[0].asnumpy()[key_idx, :, :]
        data_batch.label = None
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scale)
        _t['im_detect'].toc()
        _t['misc'].tic()

        for j in range(1, imdb.num_classes):
            indexes = np.where(scores[:, j] > thresh)[0]
            cls_scores = scores[indexes, j, np.newaxis]
            cls_boxes = boxes[indexes, j * 4:(j + 1) * 4]
            cls_boxes = map_box_back(cls_boxes, crop[2], crop[0], im_info[0,2])
            cls_dets = np.hstack((cls_boxes, cls_scores))
            keep = nms(cls_dets)
            all_boxes[j][i] = cls_dets[keep, :]
            all_gts[j][i] = map_box_back(gt_boxes, crop[2], crop[0], im_info[0,2])

        if max_per_image > 0:
            image_scores = np.hstack([all_boxes[j][i][:, -1]
                                      for j in range(1, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in range(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
                    kept_boxes[j][i] = all_boxes[j][i][keep, :]

        if vis:
            boxes_this_image = [[]] + [kept_boxes[j][i] for j in range(1, imdb.num_classes)]
            vis_all_detection(data_dict['data'].asnumpy(), boxes_this_image, imdb.classes, scale)

        _t['misc'].toc()

        if i % 200 == 0:
            if i <= 400:
                logger.info('im_detect: {:d}/{:d} data {:.3f}s im_detect {:.3f}s misc {:.3f}s'
                            .format(i, imdb.num_images, _t['data'].average_time, _t['im_detect'].average_time,
                                    _t['misc'].average_time))
            else:
                print i,
                sys.stdout.flush()
        # logger.info('testing %d/%d data %.4fs net %.4fs post %.4fs' % (i, imdb.num_images, t1, t2, t3))
        i += 1
        _t['data'].tic()

    print
    sys.stdout.flush()
    det_file = os.path.join(imdb.cache_path, imdb.name + '_detections.pkl')
    with open(det_file, 'wb') as f:
        cPickle.dump(kept_boxes, f, protocol=cPickle.HIGHEST_PROTOCOL)

    default.res_dict = {'imname': all_imnames, 'boxes': all_boxes[1], 'gts': all_gts[1]}
    # default.res_dict = {'imname': all_imnames, 'im_info': all_iminfos, 'crops': all_crops, 'boxes': all_boxes[1], 'gts': all_gts[1]}
    
    acc = my_evaluate_detections(all_boxes, all_gts)
    sys.stdout.flush()
    return acc