def train_rpn(image_set, year, root_path, devkit_path, pretrained, epoch, prefix, ctx, begin_epoch, end_epoch, frequent, kv_store, work_load_list=None, resume=False): # set up logger logger = logging.getLogger() logger.setLevel(logging.INFO) # load symbol sym = get_vgg_rpn() feat_sym = get_vgg_rpn().get_internals()['rpn_cls_score_output'] # setup multi-gpu config.TRAIN.BATCH_IMAGES *= len(ctx) config.TRAIN.BATCH_SIZE *= len(ctx) # load training data voc, roidb = load_gt_roidb(image_set, year, root_path, devkit_path, flip=True) train_data = AnchorLoader(feat_sym, roidb, batch_size=config.TRAIN.BATCH_SIZE, shuffle=True, mode='train', ctx=ctx, work_load_list=work_load_list) # infer max shape max_data_shape = [('data', (config.TRAIN.BATCH_SIZE, 3, 1000, 1000))] max_data_shape_dict = {k: v for k, v in max_data_shape} _, feat_shape, _ = feat_sym.infer_shape(**max_data_shape_dict) from rcnn.minibatch import assign_anchor import numpy as np label = assign_anchor(feat_shape[0], np.zeros((0, 5)), [[1000, 1000, 1.0]]) max_label_shape = [ ('label', label['label'].shape), ('bbox_target', label['bbox_target'].shape), ('bbox_inside_weight', label['bbox_inside_weight'].shape), ('bbox_outside_weight', label['bbox_outside_weight'].shape) ] print 'providing maximum shape', max_data_shape, max_label_shape # load pretrained args, auxs = load_param(pretrained, epoch, convert=True) # initialize params if not resume: input_shapes = { k: v for k, v in train_data.provide_data + train_data.provide_label } arg_shape, _, _ = sym.infer_shape(**input_shapes) arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) args['rpn_conv_3x3_weight'] = mx.random.normal( 0, 0.01, shape=arg_shape_dict['rpn_conv_3x3_weight']) args['rpn_conv_3x3_bias'] = mx.nd.zeros( shape=arg_shape_dict['rpn_conv_3x3_bias']) args['rpn_cls_score_weight'] = mx.random.normal( 0, 0.01, shape=arg_shape_dict['rpn_cls_score_weight']) args['rpn_cls_score_bias'] = mx.nd.zeros( shape=arg_shape_dict['rpn_cls_score_bias']) args['rpn_bbox_pred_weight'] = mx.random.normal( 0, 0.01, shape=arg_shape_dict['rpn_bbox_pred_weight']) args['rpn_bbox_pred_bias'] = mx.nd.zeros( shape=arg_shape_dict['rpn_bbox_pred_bias']) # prepare training if config.TRAIN.FINETUNE: fixed_param_prefix = ['conv1', 'conv2', 'conv3', 'conv4', 'conv5'] else: fixed_param_prefix = ['conv1', 'conv2'] data_names = [k[0] for k in train_data.provide_data] label_names = [k[0] for k in train_data.provide_label] batch_end_callback = Speedometer(train_data.batch_size, frequent=frequent) epoch_end_callback = mx.callback.do_checkpoint(prefix) if config.TRAIN.HAS_RPN is True: eval_metric = AccuracyMetric(use_ignore=True, ignore=-1) cls_metric = LogLossMetric(use_ignore=True, ignore=-1) else: eval_metric = AccuracyMetric() cls_metric = LogLossMetric() bbox_metric = SmoothL1LossMetric() eval_metrics = mx.metric.CompositeEvalMetric() for child_metric in [eval_metric, cls_metric, bbox_metric]: eval_metrics.add(child_metric) optimizer_params = { 'momentum': 0.9, 'wd': 0.0005, 'learning_rate': 0.001, 'lr_scheduler': mx.lr_scheduler.FactorScheduler(60000, 0.1), 'rescale_grad': (1.0 / config.TRAIN.BATCH_SIZE) } # train mod = MutableModule(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, work_load_list=work_load_list, max_data_shapes=max_data_shape, max_label_shapes=max_label_shape, fixed_param_prefix=fixed_param_prefix) mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=kv_store, optimizer='sgd', optimizer_params=optimizer_params, arg_params=args, aux_params=auxs, begin_epoch=begin_epoch, num_epoch=end_epoch)