Esempio n. 1
0
    def __init__(self, prefix, epoch, ctx_id=0, test_mode=False):
        self.ctx_id = ctx_id
        self.test_mode = test_mode
        self.fpn_keys = []
        fpn_stride = []
        fpn_base_size = []
        self._feat_stride_fpn = [32, 16, 8]

        for s in self._feat_stride_fpn:
            self.fpn_keys.append('stride%s' % s)
            fpn_stride.append(int(s))
            fpn_base_size.append(16)

        self._scales = np.array([32, 16, 8, 4, 2, 1])
        self._ratios = np.array([1.0] * len(self._feat_stride_fpn))
        self._anchors_fpn = dict(
            zip(
                self.fpn_keys,
                generate_anchors_fpn(base_size=fpn_base_size,
                                     scales=self._scales,
                                     ratios=self._ratios)))
        self._num_anchors = dict(
            zip(self.fpn_keys,
                [anchors.shape[0] for anchors in self._anchors_fpn.values()]))
        self._rpn_pre_nms_top_n = 1000
        # self._rpn_post_nms_top_n = rpn_post_nms_top_n
        # self.score_threshold = 0.05
        self.nms_threshold = 0.3
        self._bbox_pred = nonlinear_pred
        sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
        if self.ctx_id >= 0:
            self.ctx = mx.gpu(self.ctx_id)
            self.nms = gpu_nms_wrapper(self.nms_threshold, self.ctx_id)
        else:
            self.ctx = mx.cpu()
            self.nms = cpu_nms_wrapper(self.nms_threshold)
        # self.nms = py_nms_wrapper(self.nms_threshold)
        # self.pixel_means = np.array([103.939, 116.779, 123.68]) #BGR,VGG16
        self.pixel_means = np.array([0.0, 0.0, 0.0])  #BGR,R50

        if not self.test_mode:
            image_size = (640, 640)
            self.model = mx.mod.Module(symbol=sym,
                                       context=self.ctx,
                                       label_names=None)
            self.model.bind(data_shapes=[('data', (1, 3, image_size[0],
                                                   image_size[1]))],
                            for_training=False)
            self.model.set_params(arg_params, aux_params)
        else:
            from rcnn.core.module import MutableModule
            image_size = (2400, 2400)
            data_shape = [('data', (1, 3, image_size[0], image_size[1]))]
            self.model = MutableModule(symbol=sym,
                                       data_names=['data'],
                                       label_names=None,
                                       context=self.ctx,
                                       max_data_shapes=data_shape)
            self.model.bind(data_shape, None, for_training=False)
            self.model.set_params(arg_params, aux_params)
Esempio n. 2
0
 def __init__(self, symbol, data_names, label_names,
              context=mx.cpu(), max_data_shapes=None,
              provide_data=None, provide_label=None,
              arg_params=None, aux_params=None):
     self._mod = MutableModule(symbol, data_names, label_names,
                               context=context, max_data_shapes=max_data_shapes)
     self._mod.bind(provide_data, provide_label, for_training=False)
     self._mod.init_params(arg_params=arg_params, aux_params=aux_params)
    def __init__(self, gpu=0, test_mode=False):
        self.ctx_id = gpu
        self.ctx = mx.gpu(self.ctx_id)
        self.fpn_keys = []
        fpn_stride = []
        fpn_base_size = []
        self._feat_stride_fpn = [32, 16, 8]

        for s in self._feat_stride_fpn:
            self.fpn_keys.append('stride%s' % s)
            fpn_stride.append(int(s))
            fpn_base_size.append(16)

        self._scales = np.array([32, 16, 8, 4, 2, 1])
        self._ratios = np.array([1.0] * len(self._feat_stride_fpn))
        self._anchors_fpn = dict(
            zip(
                self.fpn_keys,
                generate_anchors_fpn(base_size=fpn_base_size,
                                     scales=self._scales,
                                     ratios=self._ratios)))
        self._num_anchors = dict(
            zip(self.fpn_keys,
                [anchors.shape[0] for anchors in self._anchors_fpn.values()]))
        self._rpn_pre_nms_top_n = 1000
        # self._rpn_post_nms_top_n = rpn_post_nms_top_n
        # self.score_threshold = 0.05
        self.nms_threshold = config.TEST.NMS
        self._bbox_pred = nonlinear_pred

        base_path = os.path.dirname(__file__)
        sym, arg_params, aux_params = mx.model.load_checkpoint(
            base_path + '/model/model-v2.0/e2e', 1)
        self.nms = gpu_nms_wrapper(self.nms_threshold, self.ctx_id)
        self.pixel_means = np.array([103.939, 116.779, 123.68])  # BGR

        if not test_mode:
            image_size = (640, 640)
            self.model = mx.mod.Module(symbol=sym,
                                       context=self.ctx,
                                       label_names=None)
            self.model.bind(data_shapes=[('data', (1, 3, image_size[0],
                                                   image_size[1]))],
                            for_training=False)
            self.model.set_params(arg_params, aux_params)
        else:
            from rcnn.core.module import MutableModule
            image_size = (640, 640)
            data_shape = [('data', (1, 3, image_size[0], image_size[1]))]
            self.model = MutableModule(symbol=sym,
                                       data_names=['data'],
                                       label_names=None,
                                       context=self.ctx,
                                       max_data_shapes=data_shape)
            self.model.bind(data_shape, None, for_training=False)
            self.model.set_params(arg_params, aux_params)

        print('init ssh success')
Esempio n. 4
0
    def __init__(self, prefix, epoch, ctx_id=1, test_mode=False):
        self.ctx_id = ctx_id
        self.ctx = mx.gpu(self.ctx_id)
        self.keys = []
        strides = []
        base_size = []
        scales = []
        self.feat_strides = config.RPN_FEAT_STRIDE

        for s in self.feat_strides:
            self.keys.append('stride%s' % s)
            strides.append(int(s))
            base_size.append(config.RPN_ANCHOR_CFG[str(s)]['BASE_SIZE'])
            scales += config.RPN_ANCHOR_CFG[str(s)]['SCALES']

        # self._scales = np.array([32, 16, 8, 4, 2, 1])
        self._scales = np.array(scales)
        self._ratios = np.array([1.0]*len(self.feat_strides))
        # self._anchors_fpn = dict(list(zip(self.keys, generate_anchors_fpn())))
        self._anchors_fpn = dict(list(zip(self.keys, generate_anchors_fpn(base_size=base_size, scales=self._scales, ratios=self._ratios))))

        self._num_anchors = dict(zip(self.keys, [anchors.shape[0] for anchors in self._anchors_fpn.values()]))
        self._rpn_pre_nms_top_n = 1000
        #self._rpn_post_nms_top_n = rpn_post_nms_top_n
        self.nms_threshold = config.test_nms_threshold      #值越大,同一个人脸产生的预测框越多
        self._bbox_pred = nonlinear_pred
        sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
        self.nms = gpu_nms_wrapper(self.nms_threshold, self.ctx_id)
        self.pixel_means = np.array([103.939, 116.779, 123.68]) #BGR
        self.pixel_means = np.array([127., 127., 127.])

        if not test_mode:
            image_size = (640, 640)
            self.model = mx.mod.Module(symbol=sym, context=self.ctx, label_names=None)
            self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False)
            self.model.set_params(arg_params, aux_params)
        else:
            from rcnn.core.module import MutableModule
            image_size = (2400, 2400)
            data_shape = [('data', (1, 3, image_size[0], image_size[1]))]
            self.model = MutableModule(symbol=sym, data_names=['data'], label_names=None,
                                       context=self.ctx, max_data_shapes=data_shape)
            self.model.bind(data_shape, None, for_training=False)
            self.model.set_params(arg_params, aux_params)
    def __init__(self, network, prefix, epoch, ctx_id=0, mask_nms=True):
        self.ctx_id = ctx_id
        self.ctx = mx.gpu(self.ctx_id)
        self.mask_nms = mask_nms
        #self.nms_threshold = 0.3
        #self._bbox_pred = nonlinear_pred
        if not self.mask_nms:
            self.nms = gpu_nms_wrapper(config.TEST.NMS, self.ctx_id)
        else:
            self.nms = gpu_nms_wrapper(config.TEST.RPN_NMS_THRESH, self.ctx_id)
        #self.nms = py_nms_wrapper(config.TEST.NMS)

        sym = eval('get_' + network + '_mask_test')(
            num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS)
        #arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
        arg_params, aux_params = load_param(prefix,
                                            epoch,
                                            convert=True,
                                            ctx=self.ctx,
                                            process=True)
        split = False
        max_image_shape = (1, 3, 1600, 1600)
        #max_image_shape = (1,3,1200,2200)
        max_data_shapes = [("data", max_image_shape), ("im_info", (1, 3))]
        mod = MutableModule(symbol=sym,
                            data_names=["data", "im_info"],
                            label_names=None,
                            max_data_shapes=max_data_shapes,
                            context=self.ctx)
        mod.bind(data_shapes=max_data_shapes,
                 label_shapes=None,
                 for_training=False)
        mod.init_params(arg_params=arg_params, aux_params=aux_params)
        self.model = mod
        pass
Esempio n. 6
0
class Predictor(object):
    def __init__(self, symbol, data_names, label_names,
                 context=mx.cpu(), max_data_shapes=None,
                 provide_data=None, provide_label=None,
                 arg_params=None, aux_params=None):
        self._mod = MutableModule(symbol, data_names, label_names,
                                  context=context, max_data_shapes=max_data_shapes)
        self._mod.bind(provide_data, provide_label, for_training=False)
        self._mod.init_params(arg_params=arg_params, aux_params=aux_params)

    def predict(self, data_batch):
        self._mod.forward(data_batch)
        return dict(zip(self._mod.output_names, self._mod.get_outputs()))
Esempio n. 7
0
  def __init__(self, prefix, epoch, ctx_id=0, test_mode=False):
    self.ctx_id = ctx_id
    self.ctx = mx.gpu(self.ctx_id)
    self.fpn_keys = []
    fpn_stride = []
    fpn_base_size = []
    self._feat_stride_fpn = [32, 16, 8]

    for s in self._feat_stride_fpn:
        self.fpn_keys.append('stride%s'%s)
        fpn_stride.append(int(s))
        fpn_base_size.append(16)

    self._scales = np.array([32,16,8,4,2,1])
    self._ratios = np.array([1.0]*len(self._feat_stride_fpn))
    self._anchors_fpn = dict(zip(self.fpn_keys, generate_anchors_fpn(base_size=fpn_base_size, scales=self._scales, ratios=self._ratios)))
    self._num_anchors = dict(zip(self.fpn_keys, [anchors.shape[0] for anchors in self._anchors_fpn.values()]))
    self._rpn_pre_nms_top_n = 1000
    #self._rpn_post_nms_top_n = rpn_post_nms_top_n
    #self.score_threshold = 0.05
    self.nms_threshold = 0.3
    self._bbox_pred = nonlinear_pred
    sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
    self.nms = gpu_nms_wrapper(self.nms_threshold, self.ctx_id)
    self.pixel_means = np.array([103.939, 116.779, 123.68]) #BGR

    if not test_mode:
      image_size = (640, 640)
      self.model = mx.mod.Module(symbol=sym, context=self.ctx, label_names = None)
      self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False)
      self.model.set_params(arg_params, aux_params)
    else:
      from rcnn.core.module import MutableModule
      image_size = (2400, 2400)
      data_shape = [('data', (1,3,image_size[0], image_size[1]))]
      self.model = MutableModule(symbol=sym, data_names=['data'], label_names=None,
                                context=self.ctx, max_data_shapes=data_shape)
      self.model.bind(data_shape, None, for_training=False)
      self.model.set_params(arg_params, aux_params)
Esempio n. 8
0
def train_net(args,
              ctx,
              pretrained,
              epoch,
              prefix,
              begin_epoch,
              end_epoch,
              lr=0.001,
              lr_step='5'):
    # set up logger
    logging.basicConfig()
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)

    # setup config
    config.TRAIN.BATCH_IMAGES = 1
    config.TRAIN.BATCH_ROIS = 128
    config.TRAIN.END2END = True
    config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED = True

    # load symbol

    if args.use_global_context or args.use_roi_align:
        sym = eval('get_' + args.network + '_train')(
            num_classes=config.NUM_CLASSES,
            num_anchors=config.NUM_ANCHORS,
            use_global_context=args.use_global_context,
            use_roi_align=args.use_roi_align)
    else:
        sym = eval('get_' + args.network + '_train')(
            num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS)

    feat_sym = sym.get_internals()['rpn_cls_score_output']

    # setup multi-gpu
    batch_size = len(ctx)
    input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size

    # print config
    pprint.pprint(config)

    if not args.use_ava_recordio:
        # load dataset and prepare imdb for training
        image_sets = [iset for iset in args.image_set.split('+')]
        roidbs = [
            load_gt_roidb(args.dataset,
                          image_set,
                          args.root_path,
                          args.dataset_path,
                          flip=not args.no_flip) for image_set in image_sets
        ]
        roidb = merge_roidb(roidbs)
        roidb = filter_roidb(roidb)

        # load training data
        train_data = AnchorLoader(
            feat_sym,
            roidb,
            batch_size=input_batch_size,
            shuffle=not args.no_shuffle,
            ctx=ctx,
            work_load_list=args.work_load_list,
            feat_stride=config.RPN_FEAT_STRIDE,
            anchor_scales=config.ANCHOR_SCALES,
            anchor_ratios=config.ANCHOR_RATIOS,
            aspect_grouping=config.TRAIN.ASPECT_GROUPING,
            use_data_augmentation=args.use_data_augmentation)
    else:
        f = open(args.classes_names)
        classes = ['__background__']
        for line in f.readlines():
            classes.append(line.strip().split(' ')[0])

        path_imgidx = args.ava_recordio_name + '.idx'
        path_imgrec = args.ava_recordio_name + '.rec'

        record = mx.recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, 'r')  # pylint: disable=redefined-variable-type

        train_data = AnchorLoaderAvaRecordIO(
            feat_sym,
            record,
            classes,
            batch_size=input_batch_size,
            shuffle=not args.no_shuffle,
            ctx=ctx,
            work_load_list=args.work_load_list,
            feat_stride=config.RPN_FEAT_STRIDE,
            anchor_scales=config.ANCHOR_SCALES,
            anchor_ratios=config.ANCHOR_RATIOS,
            aspect_grouping=config.TRAIN.ASPECT_GROUPING,
            use_data_augmentation=args.use_data_augmentation)

    # infer max shape
    max_data_shape = [('data', (input_batch_size, 3,
                                max([v[0] for v in config.SCALES]),
                                max([v[1] for v in config.SCALES])))]
    max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
    max_data_shape.append(('gt_boxes', (input_batch_size, 100, 5)))
    print('providing maximum shape', max_data_shape, max_label_shape)

    # infer shape
    data_shape_dict = dict(train_data.provide_data + train_data.provide_label)
    arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict)
    arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))
    out_shape_dict = dict(zip(sym.list_outputs(), out_shape))
    aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape))
    print('output shape')
    pprint.pprint(out_shape_dict)
    print('arg shape')
    #  pprint.pprint(arg_shape_dict)
    # load and initialize params
    if args.resume:
        arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
    else:
        arg_params, aux_params = load_param(pretrained, epoch, convert=True)
        arg_params['rpn_conv_3x3_weight'] = mx.random.normal(
            0, 0.01, shape=arg_shape_dict['rpn_conv_3x3_weight'])
        arg_params['rpn_conv_3x3_bias'] = mx.nd.zeros(
            shape=arg_shape_dict['rpn_conv_3x3_bias'])
        arg_params['rpn_cls_score_weight'] = mx.random.normal(
            0, 0.01, shape=arg_shape_dict['rpn_cls_score_weight'])
        arg_params['rpn_cls_score_bias'] = mx.nd.zeros(
            shape=arg_shape_dict['rpn_cls_score_bias'])
        arg_params['rpn_bbox_pred_weight'] = mx.random.normal(
            0, 0.01, shape=arg_shape_dict['rpn_bbox_pred_weight'])
        arg_params['rpn_bbox_pred_bias'] = mx.nd.zeros(
            shape=arg_shape_dict['rpn_bbox_pred_bias'])
        arg_params['cls_score_weight'] = mx.random.normal(
            0, 0.01, shape=arg_shape_dict['cls_score_weight'])
        arg_params['cls_score_bias'] = mx.nd.zeros(
            shape=arg_shape_dict['cls_score_bias'])
        arg_params['bbox_pred_weight'] = mx.random.normal(
            0, 0.001, shape=arg_shape_dict['bbox_pred_weight'])
        arg_params['bbox_pred_bias'] = mx.nd.zeros(
            shape=arg_shape_dict['bbox_pred_bias'])

    if args.use_global_context:
        # additional params for using global context
        """
        for arg_param_name in sym.list_arguments():
            if 'stage5' in arg_param_name:
                # print(arg_param_name, arg_param_name.replace('stage5', 'stage4'))
                arg_params[arg_param_name] = arg_params[arg_param_name.replace('stage5', 'stage4')].copy()  # params of stage5 is initialized from stage4
        arg_params['bn2_gamma'] = arg_params['bn1_gamma'].copy()
        arg_params['bn2_beta'] = arg_params['bn1_beta'].copy()
        """
        for aux_param_name in sym.list_auxiliary_states():
            if 'stage5' in aux_param_name:
                # print(aux_param_name, aux_param_name.replace('stage5', 'stage4'))
                aux_params[aux_param_name] = aux_params[aux_param_name.replace(
                    'stage5', 'stage4')].copy(
                    )  # params of stage5 is initialized from stage4
        aux_params['bn2_moving_mean'] = aux_params['bn1_moving_mean'].copy()
        aux_params['bn2_moving_var'] = aux_params['bn1_moving_var'].copy()

    # check parameter shapes
    for k in sym.list_arguments():
        if k in data_shape_dict:
            continue
        assert k in arg_params, k + ' not initialized'
        assert arg_params[k].shape == arg_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape)
    for k in sym.list_auxiliary_states():
        assert k in aux_params, k + ' not initialized'
        assert aux_params[k].shape == aux_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape)

    # create solver
    fixed_param_prefix = config.FIXED_PARAMS
    data_names = [k[0] for k in train_data.provide_data]
    label_names = [k[0] for k in train_data.provide_label]
    mod = MutableModule(sym,
                        data_names=data_names,
                        label_names=label_names,
                        logger=logger,
                        context=ctx,
                        work_load_list=args.work_load_list,
                        max_data_shapes=max_data_shape,
                        max_label_shapes=max_label_shape,
                        fixed_param_prefix=fixed_param_prefix)

    # decide training params
    # metric
    rpn_eval_metric = metric.RPNAccMetric()
    rpn_cls_metric = metric.RPNLogLossMetric()
    rpn_bbox_metric = metric.RPNL1LossMetric()
    eval_metric = metric.RCNNAccMetric()
    cls_metric = metric.RCNNLogLossMetric()
    bbox_metric = metric.RCNNL1LossMetric()
    eval_metrics = mx.metric.CompositeEvalMetric()
    for child_metric in [
            rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric,
            cls_metric, bbox_metric
    ]:
        eval_metrics.add(child_metric)
    # callback
    batch_end_callback = callback.Speedometer(train_data.batch_size,
                                              frequent=args.frequent)
    means = np.tile(np.array(config.TRAIN.BBOX_MEANS), config.NUM_CLASSES)
    stds = np.tile(np.array(config.TRAIN.BBOX_STDS), config.NUM_CLASSES)
    epoch_end_callback = callback.do_checkpoint(prefix, means, stds)
    # decide learning rate
    base_lr = lr
    lr_factor = 0.1
    lr_epoch = [int(epoch) for epoch in lr_step.split(',')]
    lr_epoch_diff = [
        epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch
    ]
    lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff)))
    if not args.use_ava_recordio:
        lr_iters = [
            int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff
        ]
    else:
        lr_iters = [
            int(epoch * train_data.provide_size() / batch_size)
            for epoch in lr_epoch_diff
        ]
    print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters)
    lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor)
    # optimizer
    optimizer_params = {
        'momentum': 0.9,
        'wd': 0.0005,
        'learning_rate': lr,
        'lr_scheduler': lr_scheduler,
        'rescale_grad': (1.0 / batch_size),
        'clip_gradient': 5
    }

    # train
    mod.fit(train_data,
            eval_metric=eval_metrics,
            epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback,
            kvstore=args.kvstore,
            optimizer='sgd',
            optimizer_params=optimizer_params,
            arg_params=arg_params,
            aux_params=aux_params,
            begin_epoch=begin_epoch,
            num_epoch=end_epoch)
Esempio n. 9
0
class SSHDetector:
    def __init__(self, prefix, epoch, ctx_id=0, test_mode=False):
        self.ctx_id = -1
        if ctx_id >= 0:
            self.ctx = mx.gpu(ctx_id)
        else:
            self.ctx = mx.cpu()
        self.fpn_keys = []
        fpn_stride = []
        fpn_base_size = []
        self._feat_stride_fpn = [32, 16, 8]

        for s in self._feat_stride_fpn:
            self.fpn_keys.append('stride%s' % s)
            fpn_stride.append(int(s))
            fpn_base_size.append(16)

        self._scales = np.array([32, 16, 8, 4, 2, 1])
        self._ratios = np.array([1.0] * len(self._feat_stride_fpn))
        # self._anchors_fpn = dict(zip(self.fpn_keys, generate_anchors_fpn(base_size=fpn_base_size, scales=self._scales, ratios=self._ratios)))
        self._anchors_fpn = dict(zip(self.fpn_keys, generate_anchors_fpn()))
        self._num_anchors = dict(
            zip(self.fpn_keys,
                [anchors.shape[0] for anchors in self._anchors_fpn.values()]))
        self._rpn_pre_nms_top_n = 1000
        # self._rpn_post_nms_top_n = rpn_post_nms_top_n
        # self.score_threshold = 0.05
        self.nms_threshold = 0.3
        self._bbox_pred = nonlinear_pred
        sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
        self.nms = gpu_nms_wrapper(self.nms_threshold, self.ctx_id)
        self.pixel_means = np.array([103.939, 116.779, 123.68])  # BGR
        self.pixel_means = config.PIXEL_MEANS
        print('means', self.pixel_means)

        if not test_mode:
            image_size = (640, 640)
            self.model = mx.mod.Module(symbol=sym,
                                       context=self.ctx,
                                       label_names=None)
            self.model.bind(data_shapes=[('data', (1, 3, image_size[0],
                                                   image_size[1]))],
                            for_training=False)
            self.model.set_params(arg_params, aux_params)
        else:
            from rcnn.core.module import MutableModule
            image_size = (2400, 2400)
            data_shape = [('data', (1, 3, image_size[0], image_size[1]))]
            self.model = MutableModule(symbol=sym,
                                       data_names=['data'],
                                       label_names=None,
                                       context=self.ctx,
                                       max_data_shapes=data_shape)
            self.model.bind(data_shape, None, for_training=False)
            self.model.set_params(arg_params, aux_params)

    def detect(self, img, threshold=0.05, scales=[1.0]):
        proposals_list = []
        scores_list = []

        for im_scale in scales:

            if im_scale != 1.0:
                im = cv2.resize(img,
                                None,
                                None,
                                fx=im_scale,
                                fy=im_scale,
                                interpolation=cv2.INTER_LINEAR)
            else:
                im = img
            im = im.astype(np.float32)
            # self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False)
            im_info = [im.shape[0], im.shape[1], im_scale]
            im_tensor = np.zeros((1, 3, im.shape[0], im.shape[1]))
            for i in range(3):
                im_tensor[0,
                          i, :, :] = im[:, :, 2 - i] - self.pixel_means[2 - i]
            data = nd.array(im_tensor)
            db = mx.io.DataBatch(data=(data, ),
                                 provide_data=[('data', data.shape)])
            self.model.forward(db, is_train=False)
            net_out = self.model.get_outputs()
            pre_nms_topN = self._rpn_pre_nms_top_n
            # post_nms_topN = self._rpn_post_nms_top_n
            # min_size_dict = self._rpn_min_size_fpn

            for s in self._feat_stride_fpn:
                if len(scales) > 1 and s == 32 and im_scale == scales[-1]:
                    continue
                _key = 'stride%s' % s
                stride = int(s)
                idx = 0
                if s == 16:
                    idx = 2
                elif s == 8:
                    idx = 4
                print('getting',
                      im_scale,
                      stride,
                      idx,
                      len(net_out),
                      data.shape,
                      file=sys.stderr)
                scores = net_out[idx].asnumpy()
                # print(scores.shape)
                idx += 1
                # print('scores',stride, scores.shape, file=sys.stderr)
                scores = scores[:, self._num_anchors['stride%s' % s]:, :, :]
                bbox_deltas = net_out[idx].asnumpy()

                # if DEBUG:
                #    print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
                #    print 'scale: {}'.format(im_info[2])

                _height, _width = int(im_info[0] / stride), int(im_info[1] /
                                                                stride)
                height, width = bbox_deltas.shape[2], bbox_deltas.shape[3]

                A = self._num_anchors['stride%s' % s]
                K = height * width

                anchors = anchors_plane(
                    height, width, stride,
                    self._anchors_fpn['stride%s' % s].astype(np.float32))
                # print((height, width), (_height, _width), anchors.shape, bbox_deltas.shape, scores.shape, file=sys.stderr)
                anchors = anchors.reshape((K * A, 4))

                # print('pre', bbox_deltas.shape, height, width)
                bbox_deltas = self._clip_pad(bbox_deltas, (height, width))
                # print('after', bbox_deltas.shape, height, width)
                bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape(
                    (-1, 4))

                scores = self._clip_pad(scores, (height, width))
                scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

                # print(anchors.shape, bbox_deltas.shape, A, K, file=sys.stderr)
                proposals = self._bbox_pred(anchors, bbox_deltas)
                # proposals = anchors

                proposals = clip_boxes(proposals, im_info[:2])

                # keep = self._filter_boxes(proposals, min_size_dict['stride%s'%s] * im_info[2])
                # proposals = proposals[keep, :]
                # scores = scores[keep]
                # print('333', proposals.shape)

                scores_ravel = scores.ravel()
                order = scores_ravel.argsort()[::-1]
                if pre_nms_topN > 0:
                    order = order[:pre_nms_topN]
                proposals = proposals[order, :]
                scores = scores[order]

                proposals /= im_scale

                proposals_list.append(proposals)
                scores_list.append(scores)

        proposals = np.vstack(proposals_list)
        scores = np.vstack(scores_list)
        scores_ravel = scores.ravel()
        order = scores_ravel.argsort()[::-1]
        # if config.TEST.SCORE_THRESH>0.0:
        #  _count = np.sum(scores_ravel>config.TEST.SCORE_THRESH)
        #  order = order[:_count]
        # if pre_nms_topN > 0:
        #    order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        det = np.hstack((proposals, scores)).astype(np.float32)

        # if np.shape(det)[0] == 0:
        #    print("Something wrong with the input image(resolution is too low?), generate fake proposals for it.")
        #    proposals = np.array([[1.0, 1.0, 2.0, 2.0]]*post_nms_topN, dtype=np.float32)
        #    scores = np.array([[0.9]]*post_nms_topN, dtype=np.float32)
        #    det = np.array([[1.0, 1.0, 2.0, 2.0, 0.9]]*post_nms_topN, dtype=np.float32)

        if self.nms_threshold < 1.0:
            keep = self.nms(det)
            det = det[keep, :]
        if threshold > 0.0:
            keep = np.where(det[:, 4] >= threshold)[0]
            det = det[keep, :]
        return det

    @staticmethod
    def _filter_boxes(boxes, min_size):
        """ Remove all boxes with any side smaller than min_size """
        ws = boxes[:, 2] - boxes[:, 0] + 1
        hs = boxes[:, 3] - boxes[:, 1] + 1
        keep = np.where((ws >= min_size) & (hs >= min_size))[0]
        return keep

    @staticmethod
    def _clip_pad(tensor, pad_shape):
        """
        Clip boxes of the pad area.
        :param tensor: [n, c, H, W]
        :param pad_shape: [h, w]
        :return: [n, c, h, w]
        """
        H, W = tensor.shape[2:]
        h, w = pad_shape

        if h < H or w < W:
            tensor = tensor[:, :, :h, :w].copy()

        return tensor
Esempio n. 10
0
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch,
              lr=0.001, lr_step='5'):
    # set up logger
    logging.basicConfig()
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)

    # setup config
    config.TRAIN.BATCH_IMAGES = 1
    config.TRAIN.BATCH_ROIS = 128
    config.TRAIN.END2END = True
    config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED = True

    # load symbol
    sym_instance = eval('symbol_' + args.network)()
    sym_gen = sym_instance.get_symbol
    sym = sym_gen(46,config,is_train=True)
    feat_sym = sym.get_internals()['rpn_cls_score_output']

    # setup multi-gpu
    batch_size = len(ctx)
    input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size

    # print config
    pprint.pprint(config)

    # load dataset and prepare imdb for training
    dataset = Dataset(args.root_path,args.dataset,args.subset,split = args.split)
    roidb = dataset.gt_roidb()
    W = dataset.W

    # load training data
    train_data = AnchorLoader(feat_sym, roidb, batch_size=input_batch_size, shuffle=not args.no_shuffle,
                              ctx=ctx, work_load_list=args.work_load_list,
                              feat_stride=config.RPN_FEAT_STRIDE, anchor_scales=config.ANCHOR_SCALES,
                              anchor_ratios=config.ANCHOR_RATIOS)

    # infer max shape
    max_data_shape = [('data', (input_batch_size, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]
    max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
    max_data_shape.append(('gt_boxes', (input_batch_size, 100, 5)))
    print('providing maximum shape', max_data_shape, max_label_shape)

    # infer shape
    #get a new symbol
    bucket_key = train_data.bucket_key
    print(train_data.provide_data)
    data_shape_dict = dict(train_data.provide_data + train_data.provide_label)
    sym_instance.infer_shape(data_shape_dict)
    #arg_shape, out_shape, aux_shape = curr_sym.infer_shape(**data_shape_dict)
    #arg_shape_dict = dict(zip(curr_sym.list_arguments(), arg_shape))
    #out_shape_dict = dict(zip(curr_sym.list_outputs(), out_shape))
    #aux_shape_dict = dict(zip(curr_sym.list_auxiliary_states(), aux_shape))
    #del arg_shape_dict['lstm_parameters']
    #print(curr_sym.list_arguments())
    #print(aux_shape_dict)

    # load and initialize params
    if args.resume:
        print("continue training from epoch {}".format(begin_epoch))
        arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
    else:
        
        arg_params, aux_params = load_param(pretrained, epoch, convert=True)
        if config.RNN.USE_W2V:
            arg_params['embed_weight'] = mx.nd.array(W)
        else:
            arg_params['embed_weight'] = mx.random.uniform(0,0.01,shape=arg_shape_dict['embed_weight'])
        sym_instance.init_weight(config,arg_params,aux_params)
    #no checking
    #for k in arg_shape_dict.iterkeys():
     #   if k in data_shape_dict:
      #      continue
       # assert k in arg_params, k + ' not initialized'
        #assert arg_params[k].shape == arg_shape_dict[k], \
         #   'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape)
    #for k in sym.list_auxiliary_states():
     #   assert k in aux_params, k + ' not initialized'
      #  assert aux_params[k].shape == aux_shape_dict[k], \
       #     'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape)

    # create solver
    fixed_param_prefix = config.FIXED_PARAMS
    data_names = [k[0] for k in train_data.provide_data]
    label_names = [k[0] for k in train_data.provide_label]
    mod = MutableModule(sym_gen,config, data_names=data_names, label_names=label_names,
                        logger=logger, context=ctx, work_load_list=args.work_load_list,
                        max_data_shapes=max_data_shape, max_label_shapes=max_label_shape,
                        fixed_param_prefix=fixed_param_prefix)

    # decide training params
    # metric
    rpn_eval_metric = metric.RPNAccMetric()
    rpn_cls_metric = metric.RPNLogLossMetric()
    rpn_bbox_metric = metric.RPNL1LossMetric()
    eval_metric = metric.RCNNAccMetric()
    cls_metric = metric.RCNNLogLossMetric()
    bbox_metric = metric.RCNNL1LossMetric()
    eval_metrics = mx.metric.CompositeEvalMetric()
    for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric]:
        eval_metrics.add(child_metric)
    # callback
    batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=args.frequent)
    means = np.tile(np.array(config.TRAIN.BBOX_MEANS), config.NUM_CLASSES)
    stds = np.tile(np.array(config.TRAIN.BBOX_STDS), config.NUM_CLASSES)
    epoch_end_callback = callback.do_checkpoint(config.ENCODER_CELL,prefix, means, stds)
    # decide learning rate
    base_lr = lr
    lr_factor = 0.1
    lr_epoch = [int(epoch) for epoch in lr_step.split(',')]
    lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch]
    lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff]
    print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters)
    lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor)
    # optimizer
    optimizer_params = {'momentum': 0.9,
                        'wd': 0.0005,
                        'learning_rate': lr,
                        'lr_scheduler': lr_scheduler,
                        'rescale_grad': (1.0 / batch_size),
                        'clip_gradient': 5}
    #initializer for fused RNN
    #TODO:not successfully added,try ask it on github issues.
    initializer = mx.initializer.FusedRNN(init=mx.init.Xavier(factor_type='in', magnitude=2.34),
                                          num_hidden = 1024,num_layers=2,mode='lstm')
    # train
    mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback, kvstore=args.kvstore,
            optimizer='sgd', optimizer_params=optimizer_params,allow_missing=True,initializer=mx.init.Xavier(factor_type='in', magnitude=2.34),
            arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
class SSHDetector:
    def __init__(self, gpu=0, test_mode=False):
        self.ctx_id = gpu
        self.ctx = mx.gpu(self.ctx_id)
        self.fpn_keys = []
        fpn_stride = []
        fpn_base_size = []
        self._feat_stride_fpn = [32, 16, 8]

        for s in self._feat_stride_fpn:
            self.fpn_keys.append('stride%s' % s)
            fpn_stride.append(int(s))
            fpn_base_size.append(16)

        self._scales = np.array([32, 16, 8, 4, 2, 1])
        self._ratios = np.array([1.0] * len(self._feat_stride_fpn))
        self._anchors_fpn = dict(
            zip(self.fpn_keys, generate_anchors_fpn(base_size=fpn_base_size, scales=self._scales, ratios=self._ratios)))
        self._num_anchors = dict(zip(self.fpn_keys, [anchors.shape[0] for anchors in self._anchors_fpn.values()]))
        self._rpn_pre_nms_top_n = 1000
        # self._rpn_post_nms_top_n = rpn_post_nms_top_n
        # self.score_threshold = 0.05
        self.nms_threshold = config.TEST.NMS
        self._bbox_pred = nonlinear_pred

        base_path = os.path.dirname(__file__)
        sym, arg_params, aux_params = mx.model.load_checkpoint(base_path + '/model/e2e', 0)
        self.nms = gpu_nms_wrapper(self.nms_threshold, self.ctx_id)
        self.pixel_means = np.array([103.939, 116.779, 123.68])  # BGR

        if not test_mode:
            image_size = (640, 640)
            self.model = mx.mod.Module(symbol=sym, context=self.ctx, label_names=None)
            self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False)
            self.model.set_params(arg_params, aux_params)
        else:
            from rcnn.core.module import MutableModule
            image_size = (640, 640)
            data_shape = [('data', (1, 3, image_size[0], image_size[1]))]
            self.model = MutableModule(symbol=sym, data_names=['data'], label_names=None,
                                       context=self.ctx, max_data_shapes=data_shape)
            self.model.bind(data_shape, None, for_training=False)
            self.model.set_params(arg_params, aux_params)

        print('init ssh success')

    def get_boxes(self, im, scales_index):
        # prevent bigger axis from being more than max_size:
        if scales_index == 0:
            scales = [1.0]  # 使用原始图片
        elif scales_index == 1:
            scales = config.TEST.PYRAMID_SCALES  # 切换到pyramidbox的scale方法
        else:
            im_shape = im.shape
            target_h = 800.
            target_w = 1200.
            src_h = im_shape[0] + 20 * 2
            src_w = im_shape[1] + 20 * 2
            im_scale = min(target_w / src_w, target_h / src_h)
            if im_shape[0] < 800 and im_shape[1] < 1200:
                scales = [1.0]
            else:
                scales = [im_scale]
        return scales

    def detect(self, img, scales_index=0):
        proposals_list = []
        scores_list = []

        im_src = img.copy()

        CONSTANT = config.TEST.CONSTANT
        BLACK = [0, 0, 0]
        img = cv2.copyMakeBorder(img, CONSTANT, CONSTANT, CONSTANT, CONSTANT, cv2.BORDER_CONSTANT, value=BLACK)

        scales = self.get_boxes(img, scales_index)

        for im_scale in scales:
            if im_scale != 1.0:
                im = cv2.resize(img, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR)
            else:
                im = img
            im = im.astype(np.float32)
            # self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False)
            im_info = [im.shape[0], im.shape[1], im_scale]
            im_tensor = np.zeros((1, 3, im.shape[0], im.shape[1]))
            for i in range(3):
                im_tensor[0, i, :, :] = im[:, :, 2 - i] - self.pixel_means[2 - i]
            data = nd.array(im_tensor)
            db = mx.io.DataBatch(data=(data,), provide_data=[('data', data.shape)])
            self.model.forward(db, is_train=False)
            net_out = self.model.get_outputs()
            pre_nms_topN = self._rpn_pre_nms_top_n

            for s in self._feat_stride_fpn:
                if len(scales) > 1 and s == 32 and im_scale == scales[-1]:
                    continue
                _key = 'stride%s' % s
                stride = int(s)
                idx = 0
                if s == 16:
                    idx = 2
                elif s == 8:
                    idx = 4
                # print('getting', im_scale, stride, idx, len(net_out), data.shape, file=sys.stderr)
                scores = net_out[idx].asnumpy()
                # print(scores.shape)
                idx += 1
                # print('scores',stride, scores.shape, file=sys.stderr)
                scores = scores[:, self._num_anchors['stride%s' % s]:, :, :]
                bbox_deltas = net_out[idx].asnumpy()

                _height, _width = int(im_info[0] / stride), int(im_info[1] / stride)
                height, width = bbox_deltas.shape[2], bbox_deltas.shape[3]

                A = self._num_anchors['stride%s' % s]
                K = height * width

                anchors = anchors_plane(height, width, stride, self._anchors_fpn['stride%s' % s].astype(np.float32))
                # print((height, width), (_height, _width), anchors.shape, bbox_deltas.shape, scores.shape, file=sys.stderr)
                anchors = anchors.reshape((K * A, 4))

                # print('pre', bbox_deltas.shape, height, width)
                bbox_deltas = self._clip_pad(bbox_deltas, (height, width))
                # print('after', bbox_deltas.shape, height, width)
                bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

                scores = self._clip_pad(scores, (height, width))
                scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

                # print(anchors.shape, bbox_deltas.shape, A, K, file=sys.stderr)
                proposals = self._bbox_pred(anchors, bbox_deltas)
                # proposals = anchors

                proposals = clip_boxes(proposals, im_info[:2])

                scores_ravel = scores.ravel()
                order = scores_ravel.argsort()[::-1]
                if pre_nms_topN > 0:
                    order = order[:pre_nms_topN]
                proposals = proposals[order, :]
                scores = scores[order]

                proposals /= im_scale

                # #add by sai with pyramidbox to filt scale face
                # if im_scale > 1:
                #     index = np.where(
                #         np.minimum(proposals[:, 2] - proposals[:, 0] + 1,
                #                    proposals[:, 3] - proposals[:, 1] + 1) < 50)[0]
                #     proposals = proposals[index, :]
                #     scores = scores[index, :]
                # else:
                #     index = np.where(
                #         np.maximum(proposals[:, 2] - proposals[:, 0] + 1,
                #                    proposals[:, 3] - proposals[:, 1] + 1) > 20)[0]
                #     proposals = proposals[index, :]
                #     scores = scores[index, :]

                proposals_list.append(proposals)
                scores_list.append(scores)
        proposals = np.vstack(proposals_list)
        scores = np.vstack(scores_list)
        scores_ravel = scores.ravel()
        order = scores_ravel.argsort()[::-1]

        proposals = proposals[order, :]
        scores = scores[order]

        det = np.hstack((proposals, scores)).astype(np.float32)

        if self.nms_threshold < 1.0:
            keep = self.nms(det)
            det = det[keep, :]
        threshold = config.TEST.SCORE_THRESH
        if threshold > 0.0:
            keep = np.where(det[:, 4] >= threshold)[0]
            det = det[keep, :]

            # add by sai
        if det.shape[0] != 0:
            for i in range(det.shape[0]):
                det[i, :][0] = det[i, :][0] - CONSTANT
                det[i, :][1] = det[i, :][1] - CONSTANT
                det[i, :][2] = det[i, :][2] - CONSTANT
                det[i, :][3] = det[i, :][3] - CONSTANT
                if det[i, :][0] < 0:
                    det[i, :][0] = 0
                if det[i, :][2] > im_src.shape[1]:
                    det[i, :][2] = im_src.shape[1]
                if det[i, :][1] < 0:
                    det[i, :][1] = 0
                if det[i, :][3] > im_src.shape[0]:
                    det[i, :][3] = im_src.shape[0]
        return det

    @staticmethod
    def _filter_boxes(boxes, min_size):
        """ Remove all boxes with any side smaller than min_size """
        ws = boxes[:, 2] - boxes[:, 0] + 1
        hs = boxes[:, 3] - boxes[:, 1] + 1
        keep = np.where((ws >= min_size) & (hs >= min_size))[0]
        return keep

    @staticmethod
    def _clip_pad(tensor, pad_shape):
        """
        Clip boxes of the pad area.
        :param tensor: [n, c, H, W]
        :param pad_shape: [h, w]
        :return: [n, c, h, w]
        """
        H, W = tensor.shape[2:]
        h, w = pad_shape

        if h < H or w < W:
            tensor = tensor[:, :, :h, :w].copy()

        return tensor
Esempio n. 12
0
def train_net(args):

    if args.rand_seed > 0:
        np.random.seed(args.rand_seed)
        mx.random.seed(args.rand_seed)
        random.seed(args.rand_seed)

    # print config
    logger.info(pprint.pformat(config))
    logger.info(pprint.pformat(args))

    # load dataset and prepare imdb for training
    image_sets = [iset for iset in args.image_set.split('+')]
    roidbs = [load_gt_roidb(args.dataset, image_set, args.dataset_path,
                            flip=args.flip)
              for image_set in image_sets]
    roidb = merge_roidb(roidbs)
    roidb = filter_roidb(roidb)

    samplepcnt = args.begin_sample

    if samplepcnt == 100:
        sroidb = roidb
    else:
        sroidb = sample_roidb(roidb, samplepcnt)  # Sample by percentage of all images
    logger.info('Sampling %d pcnt : %d training slices' % (samplepcnt, len(sroidb)))

    # Debug to see if we can concatenate ROIDB's
    #print(sroidb)
    #dir(sroidb)
    #newroidb = sroidb + roidb
    #newroidb = append_roidb(sroidb, roidb)
    #print( "--Append test: " + str(len(sroidb)) +" " + str(len(roidb)) + " = " + str(len(newroidb)) ) 

    # load symbol
    sym = eval('get_' + args.network)(is_train=True, num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS)
    feat_sym = sym.get_internals()['rpn_cls_score_output']

    # setup multi-gpu
    ctx = [mx.gpu(int(i)) for i in args.gpus.split(',')]
    batch_size = len(ctx)
    input_batch_size = config.TRAIN.SAMPLES_PER_BATCH * batch_size

    # load training data
    train_data = AnchorLoader(feat_sym, sroidb, batch_size=input_batch_size, shuffle=args.shuffle,
                              ctx=ctx, work_load_list=args.work_load_list,
                              feat_stride=config.RPN_FEAT_STRIDE, anchor_scales=config.ANCHOR_SCALES,
                              anchor_ratios=config.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING,
                              nThreads=default.prefetch_thread_num)

    # infer max shape
    max_data_shape = [('data', (input_batch_size*config.NUM_IMAGES_3DCE, config.NUM_SLICES, config.MAX_SIZE, config.MAX_SIZE))]
    max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
    max_data_shape.append(('gt_boxes', (input_batch_size*config.NUM_IMAGES_3DCE, 5, 5)))
    logger.info('providing maximum shape %s %s' % (max_data_shape, max_label_shape))

    # load and initialize and check params
    arg_params, aux_params = init_params(args, sym, train_data)

    # create solver
    fixed_param_prefix = config.FIXED_PARAMS
    data_names = [k[0] for k in train_data.provide_data]
    label_names = [k[0] for k in train_data.provide_label]
    mod = MutableModule(sym, data_names=data_names, label_names=label_names,
                        logger=logger, context=ctx, work_load_list=args.work_load_list,
                        max_data_shapes=max_data_shape, max_label_shapes=max_label_shape,
                        fixed_param_prefix=fixed_param_prefix)

    # decide training params
    # metric
    # rpn_eval_metric = metric.RPNAccMetric()
    rpn_cls_metric = metric.RPNLogLossMetric()
    rpn_bbox_metric = metric.RPNL1LossMetric()
    # eval_metric = metric.RCNNAccMetric()
    cls_metric = metric.RCNNLogLossMetric()
    bbox_metric = metric.RCNNL1LossMetric()
    eval_metrics = mx.metric.CompositeEvalMetric()
    for child_metric in [rpn_cls_metric, rpn_bbox_metric,  cls_metric, bbox_metric]:
        eval_metrics.add(child_metric)

    # callback
    batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=args.frequent)
    means = np.tile(np.array(config.TRAIN.BBOX_MEANS), config.NUM_CLASSES)
    stds = np.tile(np.array(config.TRAIN.BBOX_STDS), config.NUM_CLASSES)
    epoch_end_callback = (callback.do_checkpoint(args.e2e_prefix, means, stds),
                          callback.do_validate(args.e2e_prefix))

    arg_names = [x for x in sym.list_arguments() if x not in data_names+label_names]
    opt = get_optimizer(args, arg_names, len(sroidb) / input_batch_size, args.iter_size)

    # train
    default.testing = False
    mod.fit(train_data, roidb, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback, kvstore=args.kvstore,
            optimizer=opt, iter_size=args.iter_size,
            arg_params=arg_params, aux_params=aux_params,
            begin_epoch=args.begin_epoch, num_epoch=args.e2e_epoch)
Esempio n. 13
0
def train_net(network,
              dataset,
              image_set,
              root_path,
              dataset_path,
              frequent,
              kvstore,
              work_load_list,
              no_flip,
              no_shuffle,
              resume,
              ctx,
              pretrained,
              epoch,
              prefix,
              begin_epoch,
              end_epoch,
              train_shared,
              lr,
              lr_step,
              proposal,
              maskrcnn_stage=None):

    # set up logger
    logging.basicConfig()
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)

    # setup config
    config.TRAIN.BATCH_IMAGES = 1
    config.TRAIN.BATCH_ROIS = 128
    config.TRAIN.END2END = True
    config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED = True

    # load symbol
    sym = eval('get_' + network + '_train')(num_classes=config.NUM_CLASSES,
                                            num_anchors=config.NUM_ANCHORS)
    feat_sym = []
    for stride in config.RPN_FEAT_STRIDE:
        feat_sym.append(sym.get_internals()['rpn_cls_score_stride%s_output' %
                                            stride])

    # setup multi-gpu
    batch_size = len(ctx)
    input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size

    # print config
    logger.info(pprint.pformat(config))

    roidb_file = root_path + '/cache/' + dataset + '_roidb_with_mask.pkl'
    mean_file = root_path + '/cache/' + dataset + '_roidb_mean.pkl'
    std_file = root_path + '/cache/' + dataset + '_roidb_std.pkl'
    if maskrcnn_stage is not None:
        roidb_file = root_path + '/cache/' + dataset + '_roidb_with_mask_' + maskrcnn_stage + '.pkl'
        mean_file = root_path + '/cache/' + dataset + '_roidb_mean_' + maskrcnn_stage + '.pkl'
        std_file = root_path + '/cache/' + dataset + '_roidb_std_' + maskrcnn_stage + '.pkl'

    if osp.exists(roidb_file) and osp.exists(mean_file) and osp.exists(
            std_file):
        print('Load ' + roidb_file)
        with open(roidb_file, 'r') as f:
            roidb = pkl.load(f)
        print('Load ' + mean_file)
        with open(mean_file, 'r') as f:
            means = pkl.load(f)
        print('Load ' + std_file)
        with open(std_file, 'r') as f:
            stds = pkl.load(f)
    else:
        # load dataset and prepare imdb for training
        image_sets = [iset for iset in image_set.split('+')]
        roidbs = [
            load_proposal_roidb(dataset,
                                image_set,
                                root_path,
                                dataset_path,
                                proposal=proposal,
                                append_gt=True,
                                flip=not no_flip) for image_set in image_sets
        ]
        roidb = merge_roidb(roidbs)

        def filter_roidb(roidb):
            """ remove roidb entries without usable rois """
            def is_valid(entry):
                """ valid images have at least 1 fg or bg roi """
                overlaps = entry['max_overlaps']
                fg_inds = np.where(overlaps >= config.TRAIN.FG_THRESH)[0]
                bg_inds = np.where((overlaps < config.TRAIN.BG_THRESH_HI) &
                                   (overlaps >= config.TRAIN.BG_THRESH_LO))[0]
                valid = len(fg_inds) > 0 and len(bg_inds) > 0
                return valid

            num = len(roidb)
            filtered_roidb = [entry for entry in roidb if is_valid(entry)]
            num_after = len(filtered_roidb)
            print('filtered %d roidb entries: %d -> %d' %
                  (num - num_after, num, num_after))

            return filtered_roidb

        roidb = filter_roidb(roidb)
        means, stds = add_bbox_regression_targets(roidb)
        add_assign_targets(roidb)
        add_mask_targets(roidb)
        for file, obj in zip([roidb_file, mean_file, std_file],
                             [roidb, means, stds]):
            with open(file, 'w') as f:
                pkl.dump(obj, f, -1)

    # load training data
    train_data = MaskROIIter(roidb,
                             batch_size=input_batch_size,
                             shuffle=not no_shuffle,
                             ctx=ctx,
                             work_load_list=work_load_list,
                             aspect_grouping=config.TRAIN.ASPECT_GROUPING)
    # infer max shape
    max_data_shape = [('data', (input_batch_size, 3,
                                max([v[0] for v in config.SCALES]),
                                max([v[1] for v in config.SCALES])))]
    max_label_shape = []
    for s in config.RCNN_FEAT_STRIDE:
        max_data_shape.append(('rois_stride%s' % s,
                               (input_batch_size, config.TRAIN.BATCH_ROIS, 5)))
        max_label_shape.append(('label_stride%s' % s,
                                (input_batch_size, config.TRAIN.BATCH_ROIS)))
        max_label_shape.append(
            ('bbox_target_stride%s' % s,
             (input_batch_size,
              config.TRAIN.BATCH_ROIS * config.NUM_CLASSES * 4)))
        max_label_shape.append(
            ('bbox_weight_stride%s' % s,
             (input_batch_size,
              config.TRAIN.BATCH_ROIS * config.NUM_CLASSES * 4)))
        max_label_shape.append(('mask_target_stride%s' % s,
                                (input_batch_size, config.TRAIN.BATCH_ROIS,
                                 config.NUM_CLASSES, 28, 28)))
        max_label_shape.append(('mask_weight_stride%s' % s,
                                (input_batch_size, config.TRAIN.BATCH_ROIS,
                                 config.NUM_CLASSES, 1, 1)))
    # infer shape
    data_shape_dict = dict(train_data.provide_data + train_data.provide_label)

    arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict)
    arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))
    out_shape_dict = zip(sym.list_outputs(), out_shape)
    aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape))
    print('output shape:')
    pprint.pprint(out_shape_dict)

    # load and initialize params
    if resume:
        arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
    else:
        arg_params, aux_params = load_param(pretrained, epoch, convert=True)
        init_bbox_pred = mx.init.Normal(sigma=0.001)
        init_internal = mx.init.Normal(sigma=0.01)
        init = mx.init.Xavier(factor_type="in",
                              rnd_type='gaussian',
                              magnitude=2)
        for k in sym.list_arguments():
            if k in data_shape_dict:
                continue
            if k not in arg_params:
                print('init', k)
                arg_params[k] = mx.nd.zeros(shape=arg_shape_dict[k])
                init_internal(k, arg_params[k])
                if k in ['rcnn_fc_bbox_weight', 'bbox_pred_weight']:
                    init_bbox_pred(k, arg_params[k])
                if k.endswith('bias'):
                    arg_params[k] = mx.nd.zeros(shape=arg_shape_dict[k])
                if 'ctx_red_weight' in k:
                    ctx_shape = np.array(arg_shape_dict[k])
                    ctx_shape[1] /= 2
                    arg_params[k][:] = np.concatenate((np.eye(
                        ctx_shape[1]).reshape(ctx_shape), np.zeros(ctx_shape)),
                                                      axis=1)

        for k in sym.list_auxiliary_states():
            if k not in aux_params:
                print('init', k)
                aux_params[k] = mx.nd.zeros(shape=aux_shape_dict[k])
                init(k, aux_params[k])

    # check parameter shapes
    for k in sym.list_arguments():
        if k in data_shape_dict:
            continue
        assert k in arg_params, k + ' not initialized'
        assert arg_params[k].shape == arg_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape)
    for k in sym.list_auxiliary_states():
        assert k in aux_params, k + ' not initialized'
        assert aux_params[k].shape == aux_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape)

    # create solver
    data_names = [k[0] for k in train_data.provide_data]
    label_names = [k[0] for k in train_data.provide_label]
    if train_shared:
        fixed_param_prefix = config.FIXED_PARAMS_SHARED
    else:
        fixed_param_prefix = config.FIXED_PARAMS
    mod = MutableModule(sym,
                        data_names=data_names,
                        label_names=label_names,
                        logger=logger,
                        context=ctx,
                        work_load_list=work_load_list,
                        max_data_shapes=max_data_shape,
                        max_label_shapes=max_label_shape,
                        fixed_param_prefix=fixed_param_prefix)

    # decide training params
    # metric
    eval_metric = metric.RCNNAccMetric()
    cls_metric = metric.RCNNLogLossMetric()
    bbox_metric = metric.RCNNRegLossMetric()
    mask_acc_metric = metric.MaskAccMetric()
    mask_log_metric = metric.MaskLogLossMetric()
    eval_metrics = mx.metric.CompositeEvalMetric()
    for child_metric in [
            eval_metric, cls_metric, bbox_metric, mask_acc_metric,
            mask_log_metric
    ]:
        eval_metrics.add(child_metric)
    # callback
    batch_end_callback = callback.Speedometer(train_data.batch_size,
                                              frequent=frequent)
    epoch_end_callback = callback.do_checkpoint(prefix, means, stds)
    # decide learning rate
    base_lr = lr
    lr_factor = 0.1
    lr_epoch = [int(epoch) for epoch in lr_step.split(',')]
    lr_epoch_diff = [
        epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch
    ]
    lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [
        int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff
    ]
    print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters)
    lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor)
    # optimizer
    optimizer_params = {
        'momentum': 0.9,
        'wd': 0.0001,
        'learning_rate': lr,
        'lr_scheduler': lr_scheduler,
        'rescale_grad': (1.0 / batch_size),
        'clip_gradient': 5
    }

    # train
    mod.fit(train_data,
            eval_metric=eval_metrics,
            epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback,
            kvstore=kvstore,
            optimizer='sgd',
            optimizer_params=optimizer_params,
            arg_params=arg_params,
            aux_params=aux_params,
            begin_epoch=begin_epoch,
            num_epoch=end_epoch)
Esempio n. 14
0
class SSHDetector:
    def __init__(self, prefix, epoch, ctx_id=1, test_mode=False):
        self.ctx_id = ctx_id
        self.ctx = mx.gpu(self.ctx_id)
        self.keys = []
        strides = []
        base_size = []
        scales = []
        self.feat_strides = config.RPN_FEAT_STRIDE

        for s in self.feat_strides:
            self.keys.append('stride%s' % s)
            strides.append(int(s))
            base_size.append(config.RPN_ANCHOR_CFG[str(s)]['BASE_SIZE'])
            scales += config.RPN_ANCHOR_CFG[str(s)]['SCALES']

        # self._scales = np.array([32, 16, 8, 4, 2, 1])
        self._scales = np.array(scales)
        self._ratios = np.array([1.0]*len(self.feat_strides))
        # self._anchors_fpn = dict(list(zip(self.keys, generate_anchors_fpn())))
        self._anchors_fpn = dict(list(zip(self.keys, generate_anchors_fpn(base_size=base_size, scales=self._scales, ratios=self._ratios))))

        self._num_anchors = dict(zip(self.keys, [anchors.shape[0] for anchors in self._anchors_fpn.values()]))
        self._rpn_pre_nms_top_n = 1000
        #self._rpn_post_nms_top_n = rpn_post_nms_top_n
        self.nms_threshold = config.test_nms_threshold      #值越大,同一个人脸产生的预测框越多
        self._bbox_pred = nonlinear_pred
        sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
        self.nms = gpu_nms_wrapper(self.nms_threshold, self.ctx_id)
        self.pixel_means = np.array([103.939, 116.779, 123.68]) #BGR
        self.pixel_means = np.array([127., 127., 127.])

        if not test_mode:
            image_size = (640, 640)
            self.model = mx.mod.Module(symbol=sym, context=self.ctx, label_names=None)
            self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False)
            self.model.set_params(arg_params, aux_params)
        else:
            from rcnn.core.module import MutableModule
            image_size = (2400, 2400)
            data_shape = [('data', (1, 3, image_size[0], image_size[1]))]
            self.model = MutableModule(symbol=sym, data_names=['data'], label_names=None,
                                       context=self.ctx, max_data_shapes=data_shape)
            self.model.bind(data_shape, None, for_training=False)
            self.model.set_params(arg_params, aux_params)


    def detect(self, img, threshold=0.5, scales=[1.0]):
        proposals_list = []
        proposals_kp_list = []
        scores_list = []

        for im_scale in scales:
            if im_scale != 1.0:
                im = cv2.resize(img, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR)
            else:
                im = img
            im = im.astype(np.float32)
            # im_shape = im.shape
            # self.model.bind(data_shapes=[('data', (1, 3, im_shape[0], im_shape[1]))], for_training=False)
            im_info = [im.shape[0], im.shape[1], im_scale]
            im_tensor = np.zeros((1, 3, im.shape[0], im.shape[1]))
            for i in range(3):
                im_tensor[0, i, :, :] = im[:, :, 2 - i] - self.pixel_means[2 - i] #bgr2rgb  mxnet rgb  opencv bgr
            data = nd.array(im_tensor)
            db = mx.io.DataBatch(data=(data,), provide_data=[('data', data.shape)])
            
            timea = datetime.datetime.now()
            self.model.forward(db, is_train=False)
            timeb = datetime.datetime.now()
            diff = timeb - timea
            print('forward uses', diff.total_seconds(), 'seconds')

            net_out = self.model.get_outputs()      #网络的输出为len=9的list,针对三个不同的stride,分为三大块的list,其中每个list分别代表score,bbox,kpoint三个维度的结果,
            pre_nms_topN = self._rpn_pre_nms_top_n
            #post_nms_topN = self._rpn_post_nms_top_n
            #min_size_dict = self._rpn_min_size_fpn

            for s in self.feat_strides:
                _key = 'stride%s' % s
                # print(_key)
                stride = int(s)
                if s == self.feat_strides[0]:
                    idx = 0
                if s == self.feat_strides[1]:
                    idx = 3
                elif s == self.feat_strides[2]:
                    idx = 6
                # print('getting', im_scale, stride, idx, len(net_out), data.shape, file=sys.stderr)
                scores = net_out[idx].asnumpy()     #获取每个stride下的分类得分

                idx += 1
                # print('scores',stride, scores.shape, file=sys.stderr)
                scores = scores[:, self._num_anchors['stride%s'%s]:, :, :]    #去掉了其中lable的值???
                bbox_deltas = net_out[idx].asnumpy()
                idx += 1
                _height, _width = int(im_info[0] / stride), int(im_info[1] / stride)
                height, width = bbox_deltas.shape[2], bbox_deltas.shape[3]

                # kpoint
                kpoint_deltas = net_out[idx].asnumpy()

                A = self._num_anchors['stride%s' % s]
                K = height * width
                anchors = anchors_plane(height, width, stride, self._anchors_fpn['stride%s' % s].astype(np.float32))       #RP映射回原图中的坐标位置
                # print((height, width), (_height, _width), anchors.shape, bbox_deltas.shape, scores.shape, file=sys.stderr)
                anchors = anchors.reshape((K * A, 4))

                # print('predict bbox_deltas', bbox_deltas.shape, height, width)
                bbox_deltas = self._clip_pad(bbox_deltas, (height, width))
                # print('after clip pad', bbox_deltas.shape, height, width)
                bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

                kpoint_deltas = self._clip_pad(kpoint_deltas, (height, width))
                kpoint_deltas = kpoint_deltas.transpose((0, 2, 3, 1)).reshape((-1, 10))

                scores = self._clip_pad(scores, (height, width))
                scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

                # print(anchors.shape, bbox_deltas.shape, A, K, file=sys.stderr)
                proposals = self._bbox_pred(anchors, bbox_deltas)
                proposals = clip_boxes(proposals, im_info[:2])  #将超出图像的坐标去除掉

                proposals_kp = kpoint_pred(anchors, kpoint_deltas)
                proposals_kp = clip_points(proposals_kp, im_info[:2])
                #取出score的top N
                scores_ravel = scores.ravel()
                order = scores_ravel.argsort()[::-1]
                if pre_nms_topN > 0:
                    order = order[:pre_nms_topN]
                proposals = proposals[order, :]
                proposals_kp = proposals_kp[order, :]
                scores = scores[order]

                proposals /= im_scale
                proposals_kp /= im_scale

                proposals_list.append(proposals)
                proposals_kp_list.append(proposals_kp)
                scores_list.append(scores)

        proposals = np.vstack(proposals_list)
        proposals_kp = np.vstack(proposals_kp_list)
        scores = np.vstack(scores_list)
        scores_ravel = scores.ravel()
        order = scores_ravel.argsort()[::-1]
        #if config.TEST.SCORE_THRESH>0.0:
        #  _count = np.sum(scores_ravel>config.TEST.SCORE_THRESH)
        #  order = order[:_count]
        #if pre_nms_topN > 0:
        #    order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        proposals_kp = proposals_kp[order, :]
        scores = scores[order]

        det = np.hstack((proposals, scores, proposals_kp)).astype(np.float32)

        #if np.shape(det)[0] == 0:
        #    print("Something wrong with the input image(resolution is too low?), generate fake proposals for it.")
        #    proposals = np.array([[1.0, 1.0, 2.0, 2.0]]*post_nms_topN, dtype=np.float32)
        #    scores = np.array([[0.9]]*post_nms_topN, dtype=np.float32)
        #    det = np.array([[1.0, 1.0, 2.0, 2.0, 0.9]]*post_nms_topN, dtype=np.float32)


        if self.nms_threshold < 1.0:
            keep = self.nms(det)
            det = det[keep, :]
        if threshold > 0.0:
            keep = np.where(det[:, 4] >= threshold)[0]
            det = det[keep, :]
        return det

    @staticmethod
    def _filter_boxes(boxes, min_size):
        """ Remove all boxes with any side smaller than min_size """
        ws = boxes[:, 2] - boxes[:, 0] + 1
        hs = boxes[:, 3] - boxes[:, 1] + 1
        keep = np.where((ws >= min_size) & (hs >= min_size))[0]
        return keep

    @staticmethod
    def _clip_pad(tensor, pad_shape):
        """
        Clip boxes of the pad area.
        :param tensor: [n, c, H, W]
        :param pad_shape: [h, w]
        :return: [n, c, h, w]
        """
        H, W = tensor.shape[2:]
        h, w = pad_shape

        if h < H or w < W:
            tensor = tensor[:, :, :h, :w].copy()

        return tensor
Esempio n. 15
0
class SSHDetector:
  def __init__(self, prefix, epoch, ctx_id=0, test_mode=False):
    self.ctx_id = ctx_id
    self.ctx = mx.gpu(self.ctx_id)
    self.fpn_keys = []
    fpn_stride = []
    fpn_base_size = []
    self._feat_stride_fpn = [32, 16, 8]

    for s in self._feat_stride_fpn:
        self.fpn_keys.append('stride%s'%s)
        fpn_stride.append(int(s))
        fpn_base_size.append(16)

    self._scales = np.array([32,16,8,4,2,1])
    self._ratios = np.array([1.0]*len(self._feat_stride_fpn))
    self._anchors_fpn = dict(zip(self.fpn_keys, generate_anchors_fpn(base_size=fpn_base_size, scales=self._scales, ratios=self._ratios)))
    self._num_anchors = dict(zip(self.fpn_keys, [anchors.shape[0] for anchors in self._anchors_fpn.values()]))
    self._rpn_pre_nms_top_n = 1000
    #self._rpn_post_nms_top_n = rpn_post_nms_top_n
    #self.score_threshold = 0.05
    self.nms_threshold = 0.3
    self._bbox_pred = nonlinear_pred
    sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
    self.nms = gpu_nms_wrapper(self.nms_threshold, self.ctx_id)
    self.pixel_means = np.array([103.939, 116.779, 123.68]) #BGR

    if not test_mode:
      image_size = (640, 640)
      self.model = mx.mod.Module(symbol=sym, context=self.ctx, label_names = None)
      self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False)
      self.model.set_params(arg_params, aux_params)
    else:
      from rcnn.core.module import MutableModule
      image_size = (2400, 2400)
      data_shape = [('data', (1,3,image_size[0], image_size[1]))]
      self.model = MutableModule(symbol=sym, data_names=['data'], label_names=None,
                                context=self.ctx, max_data_shapes=data_shape)
      self.model.bind(data_shape, None, for_training=False)
      self.model.set_params(arg_params, aux_params)


  def detect(self, img, threshold=0.05, scales=[1.0]):
    proposals_list = []
    scores_list = []

    for im_scale in scales:

      if im_scale!=1.0:
        im = cv2.resize(img, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR)
      else:
        im = img
      im = im.astype(np.float32)
      #self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False)
      im_info = [im.shape[0], im.shape[1], im_scale]
      im_tensor = np.zeros((1, 3, im.shape[0], im.shape[1]))
      for i in range(3):
          im_tensor[0, i, :, :] = im[:, :, 2 - i] - self.pixel_means[2 - i]
      data = nd.array(im_tensor)
      db = mx.io.DataBatch(data=(data,), provide_data=[('data', data.shape)])
      self.model.forward(db, is_train=False)
      net_out = self.model.get_outputs()
      pre_nms_topN = self._rpn_pre_nms_top_n
      #post_nms_topN = self._rpn_post_nms_top_n
      #min_size_dict = self._rpn_min_size_fpn

      for s in self._feat_stride_fpn:
          if len(scales)>1 and s==32 and im_scale==scales[-1]:
            continue
          _key = 'stride%s'%s
          stride = int(s)
          idx = 0
          if s==16:
            idx=2
          elif s==8:
            idx=4
          print('getting', im_scale, stride, idx, len(net_out), data.shape, file=sys.stderr)
          scores = net_out[idx].asnumpy()
          #print(scores.shape)
          idx+=1
          #print('scores',stride, scores.shape, file=sys.stderr)
          scores = scores[:, self._num_anchors['stride%s'%s]:, :, :]
          bbox_deltas = net_out[idx].asnumpy()

          #if DEBUG:
          #    print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
          #    print 'scale: {}'.format(im_info[2])

          _height, _width = int(im_info[0] / stride), int(im_info[1] / stride)
          height, width = bbox_deltas.shape[2], bbox_deltas.shape[3]

          A = self._num_anchors['stride%s'%s]
          K = height * width

          anchors = anchors_plane(height, width, stride, self._anchors_fpn['stride%s'%s].astype(np.float32))
          #print((height, width), (_height, _width), anchors.shape, bbox_deltas.shape, scores.shape, file=sys.stderr)
          anchors = anchors.reshape((K * A, 4))

          #print('pre', bbox_deltas.shape, height, width)
          bbox_deltas = self._clip_pad(bbox_deltas, (height, width))
          #print('after', bbox_deltas.shape, height, width)
          bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

          scores = self._clip_pad(scores, (height, width))
          scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

          #print(anchors.shape, bbox_deltas.shape, A, K, file=sys.stderr)
          proposals = self._bbox_pred(anchors, bbox_deltas)
          #proposals = anchors

          proposals = clip_boxes(proposals, im_info[:2])

          #keep = self._filter_boxes(proposals, min_size_dict['stride%s'%s] * im_info[2])
          #proposals = proposals[keep, :]
          #scores = scores[keep]
          #print('333', proposals.shape)

          scores_ravel = scores.ravel()
          order = scores_ravel.argsort()[::-1]
          if pre_nms_topN > 0:
              order = order[:pre_nms_topN]
          proposals = proposals[order, :]
          scores = scores[order]

          proposals /= im_scale

          proposals_list.append(proposals)
          scores_list.append(scores)

    proposals = np.vstack(proposals_list)
    scores = np.vstack(scores_list)
    scores_ravel = scores.ravel()
    order = scores_ravel.argsort()[::-1]
    #if config.TEST.SCORE_THRESH>0.0:
    #  _count = np.sum(scores_ravel>config.TEST.SCORE_THRESH)
    #  order = order[:_count]
    #if pre_nms_topN > 0:
    #    order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]

    det = np.hstack((proposals, scores)).astype(np.float32)

    #if np.shape(det)[0] == 0:
    #    print("Something wrong with the input image(resolution is too low?), generate fake proposals for it.")
    #    proposals = np.array([[1.0, 1.0, 2.0, 2.0]]*post_nms_topN, dtype=np.float32)
    #    scores = np.array([[0.9]]*post_nms_topN, dtype=np.float32)
    #    det = np.array([[1.0, 1.0, 2.0, 2.0, 0.9]]*post_nms_topN, dtype=np.float32)

    
    if self.nms_threshold<1.0:
      keep = self.nms(det)
      det = det[keep, :]
    if threshold>0.0:
      keep = np.where(det[:, 4] >= threshold)[0]
      det = det[keep, :]
    return det

  @staticmethod
  def _filter_boxes(boxes, min_size):
      """ Remove all boxes with any side smaller than min_size """
      ws = boxes[:, 2] - boxes[:, 0] + 1
      hs = boxes[:, 3] - boxes[:, 1] + 1
      keep = np.where((ws >= min_size) & (hs >= min_size))[0]
      return keep

  @staticmethod
  def _clip_pad(tensor, pad_shape):
      """
      Clip boxes of the pad area.
      :param tensor: [n, c, H, W]
      :param pad_shape: [h, w]
      :return: [n, c, h, w]
      """
      H, W = tensor.shape[2:]
      h, w = pad_shape

      if h < H or w < W:
        tensor = tensor[:, :, :h, :w].copy()

      return tensor
Esempio n. 16
0
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch,
              lr=0.001, lr_step='5'):
    # set up logger
    logging.basicConfig()
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)

    # setup config
    config.TRAIN.BATCH_IMAGES = 1
    config.TRAIN.BATCH_ROIS = 128
    config.TRAIN.END2END = True
    config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED = True

    # load symbol
    sym = eval('get_' + args.network + '_train')(num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS)
    feat_sym = sym.get_internals()['rpn_cls_score_output']

    # setup multi-gpu
    batch_size = len(ctx)
    input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size

    # print config
    pprint.pprint(config)

    # load dataset and prepare imdb for training
    image_sets = [iset for iset in args.image_set.split('+')]
    roidbs = [load_gt_roidb(args.dataset, image_set, args.root_path, args.dataset_path,
                            flip=not args.no_flip)
              for image_set in image_sets]
    roidb = merge_roidb(roidbs)
    roidb = filter_roidb(roidb)

    # load training data
    train_data = AnchorLoader(feat_sym, roidb, batch_size=input_batch_size, shuffle=not args.no_shuffle,
                              ctx=ctx, work_load_list=args.work_load_list,
                              feat_stride=config.RPN_FEAT_STRIDE, anchor_scales=config.ANCHOR_SCALES,
                              anchor_ratios=config.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING)

    # infer max shape
    max_data_shape = [('data', (input_batch_size, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]
    max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
    max_data_shape.append(('gt_boxes', (input_batch_size, 100, 5)))
    print('providing maximum shape', max_data_shape, max_label_shape)

    # infer shape
    data_shape_dict = dict(train_data.provide_data + train_data.provide_label)
    arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict)
    arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))
    out_shape_dict = dict(zip(sym.list_outputs(), out_shape))
    aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape))
    print('output shape')
    pprint.pprint(out_shape_dict)

    # load and initialize params
    if args.resume:
        arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
    else:
        arg_params, aux_params = load_param(pretrained, epoch, convert=True)
        arg_params['rpn_conv_3x3_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_conv_3x3_weight'])
        arg_params['rpn_conv_3x3_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_conv_3x3_bias'])
        arg_params['rpn_cls_score_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_cls_score_weight'])
        arg_params['rpn_cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_cls_score_bias'])
        arg_params['rpn_bbox_pred_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_bbox_pred_weight'])
        arg_params['rpn_bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_bbox_pred_bias'])
        arg_params['cls_score_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['cls_score_weight'])
        arg_params['cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['cls_score_bias'])
        arg_params['bbox_pred_weight'] = mx.random.normal(0, 0.001, shape=arg_shape_dict['bbox_pred_weight'])
        arg_params['bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['bbox_pred_bias'])

    # check parameter shapes
    for k in sym.list_arguments():
        if k in data_shape_dict:
            continue
        assert k in arg_params, k + ' not initialized'
        assert arg_params[k].shape == arg_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape)
    for k in sym.list_auxiliary_states():
        assert k in aux_params, k + ' not initialized'
        assert aux_params[k].shape == aux_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape)

    # create solver
    fixed_param_prefix = config.FIXED_PARAMS
    data_names = [k[0] for k in train_data.provide_data]
    label_names = [k[0] for k in train_data.provide_label]
    mod = MutableModule(sym, data_names=data_names, label_names=label_names,
                        logger=logger, context=ctx, work_load_list=args.work_load_list,
                        max_data_shapes=max_data_shape, max_label_shapes=max_label_shape,
                        fixed_param_prefix=fixed_param_prefix)

    # decide training params
    # metric
    rpn_eval_metric = metric.RPNAccMetric()
    rpn_cls_metric = metric.RPNLogLossMetric()
    rpn_bbox_metric = metric.RPNL1LossMetric()
    eval_metric = metric.RCNNAccMetric()
    cls_metric = metric.RCNNLogLossMetric()
    bbox_metric = metric.RCNNL1LossMetric()
    eval_metrics = mx.metric.CompositeEvalMetric()
    for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric]:
        eval_metrics.add(child_metric)
    # callback
    batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=args.frequent)
    means = np.tile(np.array(config.TRAIN.BBOX_MEANS), config.NUM_CLASSES)
    stds = np.tile(np.array(config.TRAIN.BBOX_STDS), config.NUM_CLASSES)
    epoch_end_callback = callback.do_checkpoint(prefix, means, stds)
    # decide learning rate
    base_lr = lr
    lr_factor = 0.1
    lr_epoch = [int(epoch) for epoch in lr_step.split(',')]
    lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch]
    lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff]
    print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters)
    lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor)
    # optimizer
    optimizer_params = {'momentum': 0.9,
                        'wd': 0.0005,
                        'learning_rate': lr,
                        'lr_scheduler': lr_scheduler,
                        'rescale_grad': (1.0 / batch_size),
                        'clip_gradient': 5}

    # train
    mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback, kvstore=args.kvstore,
            optimizer='sgd', optimizer_params=optimizer_params,
            arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
Esempio n. 17
0
def train_net(args,
              ctx,
              pretrained,
              epoch,
              prefix,
              begin_epoch,
              end_epoch,
              lr=0.001,
              lr_step='5'):
    # setup config
    config.TRAIN.BATCH_IMAGES = 1
    config.TRAIN.BATCH_ROIS = 128
    config.TRAIN.END2END = True
    config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED = True

    # load symbol
    sym = eval('get_' + args.network + '_train')(
        num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS)
    feat_sym = sym.get_internals()['rpn_cls_score_output']

    # setup multi-gpu
    batch_size = len(ctx)
    input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size

    # print config
    logger.info(pprint.pformat(config))

    # load dataset and prepare imdb for training
    image_sets = [iset for iset in args.image_set.split('+')]
    roidbs = [
        load_gt_roidb(args.dataset,
                      image_set,
                      args.root_path,
                      args.dataset_path,
                      flip=not args.no_flip) for image_set in image_sets
    ]
    roidb = merge_roidb(roidbs)
    roidb = filter_roidb(roidb)

    # load training data
    train_data = AnchorLoader(feat_sym,
                              roidb,
                              batch_size=input_batch_size,
                              shuffle=not args.no_shuffle,
                              ctx=ctx,
                              work_load_list=args.work_load_list,
                              feat_stride=config.RPN_FEAT_STRIDE,
                              anchor_scales=config.ANCHOR_SCALES,
                              anchor_ratios=config.ANCHOR_RATIOS,
                              aspect_grouping=config.TRAIN.ASPECT_GROUPING)

    # infer max shape
    max_data_shape = [('data', (input_batch_size, 3,
                                max([v[0] for v in config.SCALES]),
                                max([v[1] for v in config.SCALES])))]
    max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
    max_data_shape.append(('gt_boxes', (input_batch_size, 100, 5)))
    logger.info('providing maximum shape %s %s' %
                (max_data_shape, max_label_shape))

    # infer shape
    data_shape_dict = dict(train_data.provide_data + train_data.provide_label)
    arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict)
    arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))
    out_shape_dict = dict(zip(sym.list_outputs(), out_shape))
    aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape))
    logger.info('output shape %s' % pprint.pformat(out_shape_dict))

    # load and initialize params
    if args.resume:
        arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
    else:
        arg_params, aux_params = load_param(pretrained, epoch, convert=True)
        arg_params['rpn_conv_3x3_weight'] = mx.random.normal(
            0, 0.01, shape=arg_shape_dict['rpn_conv_3x3_weight'])
        arg_params['rpn_conv_3x3_bias'] = mx.nd.zeros(
            shape=arg_shape_dict['rpn_conv_3x3_bias'])
        arg_params['rpn_cls_score_weight'] = mx.random.normal(
            0, 0.01, shape=arg_shape_dict['rpn_cls_score_weight'])
        arg_params['rpn_cls_score_bias'] = mx.nd.zeros(
            shape=arg_shape_dict['rpn_cls_score_bias'])
        arg_params['rpn_bbox_pred_weight'] = mx.random.normal(
            0, 0.01, shape=arg_shape_dict['rpn_bbox_pred_weight'])
        arg_params['rpn_bbox_pred_bias'] = mx.nd.zeros(
            shape=arg_shape_dict['rpn_bbox_pred_bias'])
        arg_params['cls_score_weight'] = mx.random.normal(
            0, 0.01, shape=arg_shape_dict['cls_score_weight'])
        arg_params['cls_score_bias'] = mx.nd.zeros(
            shape=arg_shape_dict['cls_score_bias'])
        arg_params['bbox_pred_weight'] = mx.random.normal(
            0, 0.001, shape=arg_shape_dict['bbox_pred_weight'])
        arg_params['bbox_pred_bias'] = mx.nd.zeros(
            shape=arg_shape_dict['bbox_pred_bias'])

    # check parameter shapes
    for k in sym.list_arguments():
        if k in data_shape_dict:
            continue
        assert k in arg_params, k + ' not initialized'
        assert arg_params[k].shape == arg_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape)
    for k in sym.list_auxiliary_states():
        assert k in aux_params, k + ' not initialized'
        assert aux_params[k].shape == aux_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape)

    # create solver
    fixed_param_prefix = config.FIXED_PARAMS
    data_names = [k[0] for k in train_data.provide_data]
    label_names = [k[0] for k in train_data.provide_label]
    mod = MutableModule(sym,
                        data_names=data_names,
                        label_names=label_names,
                        logger=logger,
                        context=ctx,
                        work_load_list=args.work_load_list,
                        max_data_shapes=max_data_shape,
                        max_label_shapes=max_label_shape,
                        fixed_param_prefix=fixed_param_prefix)

    # decide training params
    # metric
    rpn_eval_metric = metric.RPNAccMetric()
    rpn_cls_metric = metric.RPNLogLossMetric()
    rpn_bbox_metric = metric.RPNL1LossMetric()
    eval_metric = metric.RCNNAccMetric()
    cls_metric = metric.RCNNLogLossMetric()
    bbox_metric = metric.RCNNL1LossMetric()
    eval_metrics = mx.metric.CompositeEvalMetric()
    for child_metric in [
            rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric,
            cls_metric, bbox_metric
    ]:
        eval_metrics.add(child_metric)
    # callback
    batch_end_callback = mx.callback.Speedometer(train_data.batch_size,
                                                 frequent=args.frequent,
                                                 auto_reset=False)
    means = np.tile(np.array(config.TRAIN.BBOX_MEANS), config.NUM_CLASSES)
    stds = np.tile(np.array(config.TRAIN.BBOX_STDS), config.NUM_CLASSES)
    epoch_end_callback = callback.do_checkpoint(prefix, means, stds)
    # decide learning rate
    base_lr = lr
    lr_factor = 0.1
    lr_epoch = [int(epoch) for epoch in lr_step.split(',')]
    lr_epoch_diff = [
        epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch
    ]
    lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [
        int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff
    ]
    logger.info('lr %f lr_epoch_diff %s lr_iters %s' %
                (lr, lr_epoch_diff, lr_iters))
    lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor)
    # optimizer
    optimizer_params = {
        'momentum': 0.9,
        'wd': 0.0005,
        'learning_rate': lr,
        'lr_scheduler': lr_scheduler,
        'rescale_grad': (1.0 / batch_size),
        'clip_gradient': 5
    }

    # train
    mod.fit(train_data,
            eval_metric=eval_metrics,
            epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback,
            kvstore=args.kvstore,
            optimizer='sgd',
            optimizer_params=optimizer_params,
            arg_params=arg_params,
            aux_params=aux_params,
            begin_epoch=begin_epoch,
            num_epoch=end_epoch)
Esempio n. 18
0
def train_net(args,
              ctx,
              pretrained,
              epoch,
              prefix,
              begin_epoch,
              end_epoch,
              lr=0.001,
              lr_step=50000):
    # set up logger
    logging.basicConfig()
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)

    # setup config
    config.TRAIN.HAS_RPN = True
    config.TRAIN.BATCH_SIZE = 1
    config.TRAIN.BATCH_IMAGES = 1
    config.TRAIN.BATCH_ROIS = 128
    config.TRAIN.END2END = True
    config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED = True
    config.TRAIN.BG_THRESH_LO = 0.0

    # load symbol
    sym = eval('get_' + args.network + '_train')()
    feat_sym = sym.get_internals()['rpn_cls_score_output']

    # setup multi-gpu
    config.TRAIN.BATCH_IMAGES *= len(ctx)
    config.TRAIN.BATCH_SIZE *= len(ctx)

    # print config
    pprint.pprint(config)

    # load dataset and prepare imdb for training
    imdb = eval(args.dataset)(args.image_set, args.root_path,
                              args.dataset_path)
    roidb = imdb.gt_roidb()
    if args.flip:
        roidb = imdb.append_flipped_images(roidb)

    # load training data
    train_data = AnchorLoader(feat_sym,
                              roidb,
                              batch_size=config.TRAIN.BATCH_SIZE,
                              shuffle=True,
                              ctx=ctx,
                              work_load_list=args.work_load_list)

    # infer max shape
    max_data_shape = [('data', (config.TRAIN.BATCH_SIZE, 3, 1000, 1000))]
    max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
    max_data_shape.append(('gt_boxes', (config.TRAIN.BATCH_SIZE, 100, 5)))
    print 'providing maximum shape', max_data_shape, max_label_shape

    # load pretrained
    arg_params, aux_params = load_param(pretrained, epoch, convert=True)

    # infer shape
    data_shape_dict = dict(train_data.provide_data + train_data.provide_label)
    arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict)
    arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))
    out_shape_dict = dict(zip(sym.list_outputs(), out_shape))
    aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape))
    print 'output shape'
    pprint.pprint(out_shape_dict)

    # initialize params
    if not args.resume:
        arg_params['rpn_conv_3x3_weight'] = mx.random.normal(
            0, 0.01, shape=arg_shape_dict['rpn_conv_3x3_weight'])
        arg_params['rpn_conv_3x3_bias'] = mx.nd.zeros(
            shape=arg_shape_dict['rpn_conv_3x3_bias'])
        arg_params['rpn_cls_score_weight'] = mx.random.normal(
            0, 0.01, shape=arg_shape_dict['rpn_cls_score_weight'])
        arg_params['rpn_cls_score_bias'] = mx.nd.zeros(
            shape=arg_shape_dict['rpn_cls_score_bias'])
        arg_params['rpn_bbox_pred_weight'] = mx.random.normal(
            0, 0.01, shape=arg_shape_dict['rpn_bbox_pred_weight'])
        arg_params['rpn_bbox_pred_bias'] = mx.nd.zeros(
            shape=arg_shape_dict['rpn_bbox_pred_bias'])
        arg_params['cls_score_weight'] = mx.random.normal(
            0, 0.01, shape=arg_shape_dict['cls_score_weight'])
        arg_params['cls_score_bias'] = mx.nd.zeros(
            shape=arg_shape_dict['cls_score_bias'])
        arg_params['bbox_pred_weight'] = mx.random.normal(
            0, 0.001, shape=arg_shape_dict['bbox_pred_weight'])
        arg_params['bbox_pred_bias'] = mx.nd.zeros(
            shape=arg_shape_dict['bbox_pred_bias'])

    # check parameter shapes
    for k in sym.list_arguments():
        if k in data_shape_dict:
            continue
        assert arg_params[k].shape == arg_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape)
    for k in sym.list_auxiliary_states():
        assert aux_params[k].shape == aux_shape_dict[k], \
            'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape)

    # create solver
    fixed_param_prefix = ['conv1', 'conv2']
    data_names = [k[0] for k in train_data.provide_data]
    label_names = [k[0] for k in train_data.provide_label]
    mod = MutableModule(sym,
                        data_names=data_names,
                        label_names=label_names,
                        logger=logger,
                        context=ctx,
                        work_load_list=args.work_load_list,
                        max_data_shapes=max_data_shape,
                        max_label_shapes=max_label_shape,
                        fixed_param_prefix=fixed_param_prefix)

    # decide training params
    # metric
    rpn_eval_metric = metric.RPNAccMetric()
    rpn_cls_metric = metric.RPNLogLossMetric()
    rpn_bbox_metric = metric.RPNL1LossMetric()
    eval_metric = metric.RCNNAccMetric()
    cls_metric = metric.RCNNLogLossMetric()
    bbox_metric = metric.RCNNL1LossMetric()
    eval_metrics = mx.metric.CompositeEvalMetric()
    for child_metric in [
            rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric,
            cls_metric, bbox_metric
    ]:
        eval_metrics.add(child_metric)
    # callback
    batch_end_callback = callback.Speedometer(train_data.batch_size,
                                              frequent=args.frequent)
    means = np.tile(np.array(config.TRAIN.BBOX_MEANS), imdb.num_classes)
    stds = np.tile(np.array(config.TRAIN.BBOX_STDS), imdb.num_classes)
    epoch_end_callback = callback.do_checkpoint(prefix, means, stds)
    # optimizer
    optimizer_params = {
        'momentum': 0.9,
        'wd': 0.0005,
        'learning_rate': lr,
        'lr_scheduler': mx.lr_scheduler.FactorScheduler(lr_step, 0.1),
        'rescale_grad': (1.0 / config.TRAIN.BATCH_SIZE)
    }

    # train
    mod.fit(train_data,
            eval_metric=eval_metrics,
            epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback,
            kvstore=args.kvstore,
            optimizer='sgd',
            optimizer_params=optimizer_params,
            arg_params=arg_params,
            aux_params=aux_params,
            begin_epoch=begin_epoch,
            num_epoch=end_epoch)