def test_rcnn(imageset, year, root_path, devkit_path, prefix, epoch, ctx, vis=False, has_rpn=True, proposal='rpn', end2end=False): # load symbol and testing data if has_rpn: # sym = get_vgg_test() config.TRAIN.AGNOSTIC = True config.END2END = 1 config.PIXEL_MEANS = np.array([[[0,0,0]]]) sym = resnext_101(num_class=21) config.TEST.HAS_RPN = True config.TEST.RPN_PRE_NMS_TOP_N = 6000 config.TEST.RPN_POST_NMS_TOP_N = 300 voc, roidb = load_gt_roidb(imageset, year, root_path, devkit_path) else: sym = get_vgg_rcnn_test() voc, roidb = eval('load_test_' + proposal + '_roidb')(imageset, year, root_path, devkit_path) # get test data iter test_data = ROIIter(roidb, batch_size=1, shuffle=False, mode='test') # load model args, auxs, _ = load_param(prefix, epoch, convert=True, ctx=ctx) # detect detector = Detector(sym, ctx, args, auxs) pred_eval(detector, test_data, voc, vis=vis)
def test_rcnn(imageset, year, root_path, devkit_path, prefix, epoch, ctx, vis=False, has_rpn=True, proposal='rpn'): # load symbol and testing data if has_rpn: sym = get_vgg_test() config.TEST.HAS_RPN = True config.TEST.RPN_PRE_NMS_TOP_N = 6000 config.TEST.RPN_POST_NMS_TOP_N = 300 voc, roidb = load_gt_roidb(imageset, year, root_path, devkit_path) else: sym = get_vgg_rcnn_test() voc, roidb = eval('load_test_' + proposal + '_roidb')(imageset, year, root_path, devkit_path) # get test data iter test_data = ROIIter(roidb, batch_size=1, shuffle=False, mode='test') # load model args, auxs, _ = load_param(prefix, epoch, convert=True, ctx=ctx) # detect detector = Detector(sym, ctx, args, auxs) pred_eval(detector, test_data, voc, vis=vis)
def main(): logging.info('########## TRAIN FASTER-RCNN WITH APPROXIMATE JOINT END2END #############') init_config() if "resnet" in args.pretrained: sym = resnet_50(num_class=args.num_classes, bn_mom=args.bn_mom, bn_global=True, is_train=True) # consider background else: sym = get_faster_rcnn(num_classes=args.num_classes) # consider background feat_sym = sym.get_internals()['rpn_cls_score_output'] # setup for multi-gpu ctx = [mx.gpu(int(i)) for i in args.gpu_ids.split(',')] config.TRAIN.IMS_PER_BATCH *= len(ctx) max_data_shape, max_label_shape = get_max_shape(feat_sym) # data # voc, roidb = load_gt_roidb_from_list(args.dataset_name, args.lst, args.dataset_root, # args.outdata_path, flip=not args.no_flip) voc, roidb = load_gt_roidb(args.image_set, args.year, args.root_path, args.devkit_path, flip=not args.no_flip) train_data = AnchorLoader(feat_sym, roidb, batch_size=config.TRAIN.IMS_PER_BATCH, anchor_scales=(4, 8, 16, 32), shuffle=not args.no_shuffle, mode='train', ctx=ctx, need_mean=args.need_mean) # model args_params, auxs_params, _ = load_param(args.pretrained, args.load_epoch, convert=True) if not args.resume: args_params, auxs_params= init_model(args_params, auxs_params, train_data, sym, args.pretrained) data_names = [k[0] for k in train_data.provide_data] label_names = [k[0] for k in train_data.provide_label] batch_end_callback = Speedometer(train_data.batch_size, frequent=args.frequent) epoch_end_callback = do_checkpoint(args.prefix) optimizer_params = {'momentum': args.mom, 'wd': args.wd, 'learning_rate': args.lr, # 'lr_scheduler': WarmupScheduler(args.factor_step, 0.1, warmup_lr=0.1*args.lr, warmup_step=200) \ # if not args.resume else mx.lr_scheduler.FactorScheduler(args.factor_step, 0.1), 'lr_scheduler': mx.lr_scheduler.FactorScheduler(args.factor_step, 0.1), # seems no need warm up 'clip_gradient': 1.0, 'rescale_grad': 1.0} if "resnet" in args.pretrained: # only consider resnet-50 here fixed_param_prefix = ['conv0', 'stage1', 'stage2', 'bn_data', 'bn0'] else: fixed_param_prefix = ['conv1', 'conv2', 'conv3'] # train mod = MutableModule(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, max_data_shapes=max_data_shape, max_label_shapes=max_label_shape, fixed_param_prefix=fixed_param_prefix) mod.fit(train_data, eval_metric=metric(), epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=args.kv_store, optimizer='sgd', optimizer_params=optimizer_params, arg_params=args_params, aux_params=auxs_params, begin_epoch=args.load_epoch, num_epoch=args.num_epoch)
def train_net(image_set, year, root_path, devkit_path, pretrained, epoch, prefix, ctx, begin_epoch, end_epoch, frequent, kv_store, work_load_list=None, resume=False): # set up logger logger = logging.getLogger() logger.setLevel(logging.INFO) # load symbol sym = get_vgg_rpn() feat_sym = get_vgg_rpn().get_internals()['rpn_cls_score_output'] # setup multi-gpu config.TRAIN.BATCH_IMAGES *= len(ctx) config.TRAIN.BATCH_SIZE *= len(ctx) # load training data voc, roidb = load_gt_roidb(image_set, year, root_path, devkit_path, flip=True) train_data = AnchorLoader(feat_sym, roidb, batch_size=config.TRAIN.BATCH_SIZE, shuffle=True, mode='train', ctx=ctx, work_load_list=work_load_list) # infer max shape max_data_shape = [('data', (1, 3, 1000, 1000))] max_data_shape_dict = {k: v for k, v in max_data_shape} _, feat_shape, _ = feat_sym.infer_shape(**max_data_shape_dict) from rcnn.minibatch import assign_anchor import numpy as np label = assign_anchor(feat_shape[0], np.zeros((0, 5)), [[1000, 1000, 1.0]]) max_label_shape = [('label', label['label'].shape), ('bbox_target', label['bbox_target'].shape), ('bbox_inside_weight', label['bbox_inside_weight'].shape), ('bbox_outside_weight', label['bbox_outside_weight'].shape)] print 'providing maximum shape', max_data_shape, max_label_shape # load pretrained args, auxs = load_param(pretrained, epoch, convert=True) # initialize params if not resume: arg_shape, _, _ = sym.infer_shape(data=(1, 3, 224, 224)) arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) args['rpn_conv_3x3_weight'] = mx.random.normal(mean=0, stdvar=0.01, shape=arg_shape_dict['rpn_conv_3x3_weight']) args['rpn_conv_3x3_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_conv_3x3_bias']) args['rpn_cls_score_weight'] = mx.random.normal(mean=0, stdvar=0.01, shape=arg_shape_dict['rpn_cls_score_weight']) args['rpn_cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_cls_score_bias']) args['rpn_bbox_pred_weight'] = mx.random.normal(mean=0, stdvar=0.01, shape=arg_shape_dict['rpn_bbox_pred_weight']) args['rpn_bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_bbox_pred_bias']) # train solver = Solver(prefix, sym, ctx, begin_epoch, end_epoch, kv_store, args, auxs, momentum=0.9, wd=0.0005, learning_rate=1e-3, lr_scheduler=mx.lr_scheduler.FactorScheduler(60000, 0.1), mutable_data_shape=True, max_data_shape=max_data_shape, max_label_shape=max_label_shape) solver.fit(train_data, frequent=frequent)
def test_rpn(image_set, year, root_path, devkit_path, prefix, epoch, ctx, vis=False): # load symbol sym = get_vgg_rpn_test() # load testing data voc, roidb = load_gt_roidb(image_set, year, root_path, devkit_path) test_data = ROIIter(roidb, batch_size=1, shuffle=False, mode='test') # load model args, auxs, _ = load_param(prefix, epoch, convert=True, ctx=ctx) # start testing detector = Detector(sym, ctx, args, auxs) imdb_boxes = generate_detections(detector, test_data, voc, vis=vis) voc.evaluate_recall(roidb, candidate_boxes=imdb_boxes)
def test_rpn(image_set, year, root_path, devkit_path, trained, epoch, ctx): from rcnn.rpn.generate import Detector, generate_detections # load symbol sym = get_vgg_rpn_test() # load testing data voc, roidb = load_gt_roidb(image_set, year, root_path, devkit_path) test_data = ROIIter(roidb, batch_size=1, shuffle=False, mode='test') # load trained args, auxs = load_param(trained, epoch, convert=True, ctx=ctx[0]) # start testing detector = Detector(sym, ctx[0], args, auxs) imdb_boxes = generate_detections(detector, test_data, voc, vis=False) voc.evaluate_recall(roidb, candidate_boxes=imdb_boxes)
def test_rpn(image_set, year, root_path, devkit_path, trained, epoch, ctx): from rcnn.rpn.generate import Detector, generate_detections # load symbol sym = get_vgg_rpn_test() # load testing data voc, roidb = load_gt_roidb(image_set, year, root_path, devkit_path) test_data = ROIIter(roidb, batch_size=1, shuffle=False, mode="test") # load trained args, auxs = load_param(trained, epoch, convert=True, ctx=ctx[0]) # start testing detector = Detector(sym, ctx[0], args, auxs) imdb_boxes = generate_detections(detector, test_data, voc, vis=False) voc.evaluate_recall(roidb, candidate_boxes=imdb_boxes)
def train_net(config): General, KvstoreParam, RpnParam, RoiParam, BboxParam, DatasetParam, ModelParam, \ OptimizeParam, TestParam, transform, data_name, label_name, metric_list = config.generate_config(is_train=True) pGen = patch_config_as_nothrow(General) pKv = patch_config_as_nothrow(KvstoreParam) pRpn = patch_config_as_nothrow(RpnParam) pRoi = patch_config_as_nothrow(RoiParam) pBbox = patch_config_as_nothrow(BboxParam) pDataset = patch_config_as_nothrow(DatasetParam) pModel = patch_config_as_nothrow(ModelParam) pOpt = patch_config_as_nothrow(OptimizeParam) pTest = patch_config_as_nothrow(TestParam) gpus = pKv.gpus if len(gpus) == 0: ctx = [mx.cpu()] else: ctx = [mx.gpu(i) for i in gpus] input_batch_size = pKv.batch_image * len(ctx) pretrain_prefix = pModel.pretrain.prefix pretrain_epoch = pModel.pretrain.epoch save_path = os.path.join('experiments', pGen.name) model_prefix = os.path.join(save_path, 'checkpoint') begin_epoch = pOpt.schedule.begin_epoch end_epoch = pOpt.schedule.end_epoch lr_steps = pOpt.schedule.lr_steps ## load dataset if pDataset.Dataset == 'widerface': image_set = pDataset.image_set roidb = load_gt_roidb(pDataset.Dataset, image_set, root_path='data', dataset_path='data/widerface', flip=True) net = pModel.train_network if pOpt.schedule.begin_epoch != 0: net.load_model(model_prefix, pOpt.schedule.begin_epoch) else: net.load_model(pretrain_prefix) print('hello github!')
def test_rpn(image_set, year, root_path, devkit_path, prefix, epoch, ctx, vis): # set config config.TEST.HAS_RPN = True config.TEST.RPN_PRE_NMS_TOP_N = -1 config.TEST.RPN_POST_NMS_TOP_N = 2000 # load symbol sym = get_vgg_rpn_test() # load testing data voc, roidb = load_gt_roidb(image_set, year, root_path, devkit_path) test_data = ROIIter(roidb, batch_size=1, shuffle=False, mode='test') # load model args, auxs = load_param(prefix, epoch, convert=True, ctx=ctx) # start testing detector = Detector(sym, ctx, args, auxs) imdb_boxes = generate_detections(detector, test_data, voc, vis=vis) voc.evaluate_recall(roidb, candidate_boxes=imdb_boxes)
def test_net(imageset, year, root_path, devkit_path, prefix, epoch, ctx, vis): # set config config.TEST.HAS_RPN = True # set up logger logger = logging.getLogger() logger.setLevel(logging.INFO) # load testing data voc, roidb = load_gt_roidb(imageset, year, root_path, devkit_path) test_data = ROIIter(roidb, batch_size=1, shuffle=False, mode='test') # load model args, auxs = load_param(prefix, epoch, convert=True, ctx=ctx) # load symbol sym = get_vgg_test() # detect detector = Detector(sym, ctx, args, auxs) pred_eval(detector, test_data, voc, vis=vis)
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr, lr_step): mx.random.seed(3) np.random.seed(3) logger, final_output_path = create_logger(config.output_path, args.cfg, config.dataset.image_set) prefix = os.path.join(final_output_path, prefix) config['final_output_path'] = final_output_path # load symbol shutil.copy2(os.path.join(curr_path, 'symbols', config.symbol + '.py'), final_output_path) sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=True) feat_pyramid_level = np.log2(config.network.RPN_FEAT_STRIDE).astype(int) feat_sym = [ sym.get_internals()['rpn_cls_score_p' + str(x) + '_output'] for x in feat_pyramid_level ] # setup multi-gpu batch_size = len(ctx) input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size # print config pprint.pprint(config) logger.info('training config:{}\n'.format(pprint.pformat(config))) #leonid: adding semicolumn ";" support to allow several different datasets to be merged datasets = config.dataset.dataset.split(';') image_sets = config.dataset.image_set.split(';') data_paths = config.dataset.dataset_path.split(';') if type(config.dataset.per_category_epoch_max) is str: per_category_epoch_max = [ float(x) for x in config.dataset.per_category_epoch_max.split(';') ] else: per_category_epoch_max = [float(config.dataset.per_category_epoch_max)] roidbs = [] categ_index_offs = 0 if 'classes_list_fname' not in config.dataset: classes_list_fname = '' else: classes_list_fname = config.dataset.classes_list_fname if 'num_ex_per_class' not in config.dataset: num_ex_per_class = '' else: num_ex_per_class = config.dataset.num_ex_per_class for iD, dataset in enumerate(datasets): # load dataset and prepare imdb for training image_sets_cur = [iset for iset in image_sets[iD].split('+')] for image_set in image_sets_cur: cur_roidb, cur_num_classes = load_gt_roidb( dataset, image_set, config.dataset.root_path, data_paths[iD], flip=config.TRAIN.FLIP, per_category_epoch_max=per_category_epoch_max[iD], return_num_classes=True, categ_index_offs=categ_index_offs, classes_list_fname=classes_list_fname, num_ex_per_class=num_ex_per_class) roidbs.append(cur_roidb) categ_index_offs += cur_num_classes # roidbs.extend([ # load_gt_roidb( # dataset, # image_set, # config.dataset.root_path, # data_paths[iD], # flip=config.TRAIN.FLIP, # per_category_epoch_max=per_category_epoch_max[iD]) # for image_set in image_sets]) roidb = merge_roidb(roidbs) roidb = filter_roidb(roidb, config) # load training data train_data = PyramidAnchorIterator( feat_sym, roidb, config, batch_size=input_batch_size, shuffle=config.TRAIN.SHUFFLE, ctx=ctx, feat_strides=config.network.RPN_FEAT_STRIDE, anchor_scales=config.network.ANCHOR_SCALES, anchor_ratios=config.network.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING, allowed_border=np.inf) # infer max shape max_data_shape = [('data', (config.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape) max_data_shape.append(('gt_boxes', (config.TRAIN.BATCH_IMAGES, 100, 5))) print 'providing maximum shape', max_data_shape, max_label_shape if not config.network.base_net_lock: data_shape_dict = dict(train_data.provide_data_single + train_data.provide_label_single) else: data_shape_dict = dict(train_data.provide_data_single) pprint.pprint(data_shape_dict) sym_instance.infer_shape(data_shape_dict) # load and initialize params if config.TRAIN.RESUME: print('continue training from ', begin_epoch) arg_params, aux_params = load_param(prefix, begin_epoch, convert=True) else: arg_params, aux_params = load_param(pretrained, epoch, convert=True) sym_instance.init_weight(config, arg_params, aux_params) if config.TRAIN.LOAD_EMBEDDING: import cPickle with open(config.TRAIN.EMBEDDING_FNAME, 'rb') as fid: model_data = cPickle.load(fid) for fcn in ['1', '2', '3']: layer = model_data['dense_' + fcn] weight = ListList2ndarray(layer[0]) bias = mx.nd.array(layer[1]) arg_params['embed_dense_' + fcn + '_weight'] = weight arg_params['embed_dense_' + fcn + '_bias'] = bias # check parameter shapes sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict) # create solver fixed_param_prefix = config.network.FIXED_PARAMS alt_fixed_param_prefix = config.network.ALT_FIXED_PARAMS data_names = [k[0] for k in train_data.provide_data_single] if not config.network.base_net_lock: label_names = [k[0] for k in train_data.provide_label_single] else: label_names = [] mod = MutableModule( sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, max_data_shapes=[max_data_shape for _ in range(batch_size)], max_label_shapes=[max_label_shape for _ in range(batch_size)], fixed_param_prefix=fixed_param_prefix, alt_fixed_param_prefix=alt_fixed_param_prefix) # Leonid: Comment out the following two lines if switching to smaller number of GPUs and resuming training, then after it starts running un-comment back # if config.TRAIN.RESUME: # mod._preload_opt_states = '%s-%04d.states'%(prefix, begin_epoch) #TODO: release this. # decide training params # metric if not config.network.base_net_lock: rpn_eval_metric = metric.RPNAccMetric() rpn_cls_metric = metric.RPNLogLossMetric() rpn_bbox_metric = metric.RPNL1LossMetric() rpn_fg_metric = metric.RPNFGFraction(config) eval_metric = metric.RCNNAccMetric(config) eval_fg_metric = metric.RCNNFGAccuracy(config) cls_metric = metric.RCNNLogLossMetric(config) bbox_metric = metric.RCNNL1LossMetric(config) eval_metrics = mx.metric.CompositeEvalMetric() # rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric if not config.network.base_net_lock: all_child_metrics = [ rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, rpn_fg_metric, eval_fg_metric, eval_metric, cls_metric, bbox_metric ] else: all_child_metrics = [ rpn_fg_metric, eval_fg_metric, eval_metric, cls_metric, bbox_metric ] # all_child_metrics = [rpn_eval_metric, rpn_bbox_metric, rpn_fg_metric, eval_fg_metric, eval_metric, cls_metric, bbox_metric] ################################################ ### added / updated by Leonid to support oneshot ################################################ if config.network.EMBEDDING_DIM != 0: if config.network.EMBED_LOSS_ENABLED: all_child_metrics += [ metric.RepresentativesMetric(config, final_output_path) ] # moved from above. JS. all_child_metrics += [metric.EmbedMetric(config)] if config.network.BG_REPS: all_child_metrics += [metric.BGModelMetric(config)] if config.network.REPS_CLS_LOSS: all_child_metrics += [metric.RepsCLSMetric(config)] if config.network.ADDITIONAL_LINEAR_CLS_LOSS: all_child_metrics += [metric.RCNNLinLogLossMetric(config)] if config.network.VAL_FILTER_REGRESS: all_child_metrics += [metric.ValRegMetric(config)] if config.network.SCORE_HIST_REGRESS: all_child_metrics += [metric.ScoreHistMetric(config)] ################################################ for child_metric in all_child_metrics: eval_metrics.add(child_metric) # callback batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=args.frequent) means = np.tile(np.array(config.TRAIN.BBOX_MEANS), 2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES) stds = np.tile(np.array(config.TRAIN.BBOX_STDS), 2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES) epoch_end_callback = [ mx.callback.module_checkpoint(mod, prefix, period=1, save_optimizer_states=True), callback.do_checkpoint(prefix, means, stds) ] # decide learning rate base_lr = lr lr_factor = config.TRAIN.lr_factor lr_epoch = [float(epoch) for epoch in lr_step.split(',')] lr_epoch_diff = [ epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch ] lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff))) lr_iters = [ int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff ] print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters) lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor, config.TRAIN.warmup, config.TRAIN.warmup_lr, config.TRAIN.warmup_step) # optimizer optimizer_params = { 'momentum': config.TRAIN.momentum, 'wd': config.TRAIN.wd, 'learning_rate': lr, 'lr_scheduler': lr_scheduler, 'clip_gradient': None } # if not isinstance(train_data, PrefetchingIter): train_data = PrefetchingIter(train_data) if args.debug == 1: import copy arg_params_ = copy.deepcopy(arg_params) aux_params_ = copy.deepcopy(aux_params) # train mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=config.default.kvstore, optimizer='sgd', optimizer_params=optimizer_params, arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch, config=config) if args.debug == 1: t = dictCompare(aux_params_, aux_params) t = dictCompare(arg_params_, arg_params)
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr, lr_step): np.random.seed(0) mx.random.seed(0) logger, final_output_path = create_logger(config.output_path, args.cfg, config.dataset.image_set) prefix = os.path.join(final_output_path, prefix) # load symbol shutil.copy2(os.path.join(curr_path, 'symbols', config.symbol + '.py'), final_output_path) sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=True) feat_sym = sym.get_internals()['rpn_cls_score_output'] # setup multi-gpu batch_size = len(ctx) input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size # print config pprint.pprint(config) logger.info('training config:{}\n'.format(pprint.pformat(config))) # load dataset and prepare imdb for training image_sets = [iset for iset in config.dataset.image_set.split('+')] roidbs = [load_gt_roidb(config.dataset.dataset, image_set, config.dataset.root_path, config.dataset.dataset_path, flip=config.TRAIN.FLIP) for image_set in image_sets] roidb = merge_roidb(roidbs) roidb = filter_roidb(roidb, config) # load training data train_data = AnchorLoader(feat_sym, roidb, config, batch_size=input_batch_size, shuffle=config.TRAIN.SHUFFLE, ctx=ctx, feat_stride=config.network.RPN_FEAT_STRIDE, anchor_scales=config.network.ANCHOR_SCALES, anchor_ratios=config.network.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING) # infer max shape # max_dats_shape=['data', (1,3,600,1000)] max_data_shape = [('data', (config.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] # max_data_shape=[], max_lable_shape=[] max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape) max_data_shape.append(('gt_boxes', (config.TRAIN.BATCH_IMAGES, 100, 5))) logger.info('providing maximum shape'+str(max_data_shape)+" "+str(max_label_shape)) data_shape_dict = dict(train_data.provide_data_single + train_data.provide_label_single) # add by chaojie logger.info("data_sahpe_dict:\n{}".format(pprint.pformat(data_shape_dict))) pprint.pprint(data_shape_dict) sym_instance.infer_shape(data_shape_dict) pprint.pprint(sym_instance.arg_shape_dict) logger.info("sym_instance.arg_shape_dict\n") logging.info(pprint.pformat(sym_instance.arg_shape_dict)) #dot = mx.viz.plot_network(sym, node_attrs={'shape': 'rect', 'fixedsize': 'false'}) #dot.render(os.path.join('./output/rcnn/network_vis', config.symbol + '_rcnn')) # load and initialize params if config.TRAIN.RESUME: print('continue training from ', begin_epoch) arg_params, aux_params = load_param(prefix, begin_epoch, convert=True) else: arg_params, aux_params = load_param(pretrained, epoch, convert=True) sym_instance.init_weight(config, arg_params, aux_params) # check parameter shapes sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict) # create solver fixed_param_prefix = config.network.FIXED_PARAMS data_names = [k[0] for k in train_data.provide_data_single] label_names = [k[0] for k in train_data.provide_label_single] mod = MutableModule(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, max_data_shapes=[max_data_shape for _ in range(batch_size)], max_label_shapes=[max_label_shape for _ in range(batch_size)], fixed_param_prefix=fixed_param_prefix) if config.TRAIN.RESUME: mod._preload_opt_states = '%s-%04d.states'%(prefix, begin_epoch) # decide training params # metric eval_metric = metric.RCNNAccMetric(config) cls_metric = metric.RCNNLogLossMetric(config) bbox_metric = metric.RCNNL1LossMetric(config) eval_metrics = mx.metric.CompositeEvalMetric() # rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric if config.TRAIN.JOINT_TRAINING or (not config.TRAIN.LEARN_NMS): rpn_eval_metric = metric.RPNAccMetric() rpn_cls_metric = metric.RPNLogLossMetric() rpn_bbox_metric = metric.RPNL1LossMetric() for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric]: eval_metrics.add(child_metric) for child_metric in [eval_metric, cls_metric, bbox_metric]: eval_metrics.add(child_metric) if config.TRAIN.LEARN_NMS: eval_metrics.add(metric.NMSLossMetric(config, 'pos')) eval_metrics.add(metric.NMSLossMetric(config, 'neg')) eval_metrics.add(metric.NMSAccMetric(config)) # callback batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=args.frequent) means = np.tile(np.array(config.TRAIN.BBOX_MEANS), 2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES) stds = np.tile(np.array(config.TRAIN.BBOX_STDS), 2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES) epoch_end_callback = [mx.callback.module_checkpoint(mod, prefix, period=1, save_optimizer_states=True), callback.do_checkpoint(prefix, means, stds)] # decide learning rate base_lr = lr lr_factor = config.TRAIN.lr_factor lr_epoch = [float(epoch) for epoch in lr_step.split(',')] lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch] lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff))) lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff] print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters) lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor, config.TRAIN.warmup, config.TRAIN.warmup_lr, config.TRAIN.warmup_step) # optimizer optimizer_params = {'momentum': config.TRAIN.momentum, 'wd': config.TRAIN.wd, 'learning_rate': lr, 'lr_scheduler': lr_scheduler, 'rescale_grad': 1.0, 'clip_gradient': None} if not isinstance(train_data, PrefetchingIter): train_data = PrefetchingIter(train_data) # train mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=config.default.kvstore, optimizer='sgd', optimizer_params=optimizer_params, arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr, lr_step): # 创建logger和对应的输出路径 logger, final_output_path = create_logger(config.output_path, args.cfg, config.dataset.image_set) prefix = os.path.join(final_output_path, prefix) # load symbol shutil.copy2(os.path.join(curr_path, 'symbols', config.symbol + '.py'), final_output_path) sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=True) # 特征symbol,从网络sym中获取rpn_cls_score_output feat_sym = sym.get_internals()['rpn_cls_score_output'] # setup multi-gpu # 使能多GPU训练,每一张卡训练一个batch batch_size = len(ctx) input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size # print config pprint.pprint(config) logger.info('training config:{}\n'.format(pprint.pformat(config))) # load dataset and prepare imdb for training # 加载数据集同时准备训练的imdb,使用+分割不同的图像数据集,比如2007_trainval+2012_trainval image_sets = [iset for iset in config.dataset.image_set.split('+')] # load gt roidb加载gt roidb,根据数据集类型,图像集具体子类,数据集根目录和数据集路径,同时配置相关TRAIN为FLIP来增广数据 roidbs = [ load_gt_roidb(config.dataset.dataset, image_set, config.dataset.root_path, config.dataset.dataset_path, flip=config.TRAIN.FLIP) for image_set in image_sets ] # 合并不同的roidb roidb = merge_roidb(roidbs) # 根据配置文件中对应的过滤规则来滤出roi roidb = filter_roidb(roidb, config) # load training data # 加载训练数据,anchor Loader为对应分类和回归的锚点加载,通过对应的roidb,查找对应的正负样本的锚点,该生成器需要参数锚点尺度,ratios和对应的feature的stride train_data = AnchorLoader(feat_sym, roidb, config, batch_size=input_batch_size, shuffle=config.TRAIN.SHUFFLE, ctx=ctx, feat_stride=config.network.RPN_FEAT_STRIDE, anchor_scales=config.network.ANCHOR_SCALES, anchor_ratios=config.network.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING) # infer max shape max_data_shape = [('data', (config.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape) max_data_shape.append(('gt_boxes', (config.TRAIN.BATCH_IMAGES, 100, 5))) print('providing maximum shape', max_data_shape, max_label_shape) data_shape_dict = dict(train_data.provide_data_single + train_data.provide_label_single) pprint.pprint(data_shape_dict) sym_instance.infer_shape(data_shape_dict) # load and initialize params # 加载并且初始化参数,如果训练中是继续上次的训练,也就是RESUME这一flag设置为True if config.TRAIN.RESUME: print('continue training from ', begin_epoch) # 从前缀和being_epoch中加载RESUME的arg参数和aux参数,同时需要转换为GPU NDArray arg_params, aux_params = load_param(prefix, begin_epoch, convert=True) else: arg_params, aux_params = load_param(pretrained, epoch, convert=True) sym_instance.init_weight(config, arg_params, aux_params) # check parameter shapes # 检查相关参数的shapes sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict) # create solver # 创造求解器 fixed_param_prefix = config.network.FIXED_PARAMS data_names = [k[0] for k in train_data.provide_data_single] label_names = [k[0] for k in train_data.provide_label_single] mod = MutableModule( sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, max_data_shapes=[max_data_shape for _ in range(batch_size)], max_label_shapes=[max_label_shape for _ in range(batch_size)], fixed_param_prefix=fixed_param_prefix) if config.TRAIN.RESUME: mod._preload_opt_states = '%s-%04d.states' % (prefix, begin_epoch) # decide training params # metric # 以下主要是RPN和RCNN相关的一些评价指标 rpn_eval_metric = metric.RPNAccMetric() rpn_cls_metric = metric.RPNLogLossMetric() rpn_bbox_metric = metric.RPNL1LossMetric() eval_metric = metric.RCNNAccMetric(config) cls_metric = metric.RCNNLogLossMetric(config) bbox_metric = metric.RCNNL1LossMetric(config) # mxnet中合成的评估指标,可以增加以上所有的评估指标,包括rpn_eval_metrix、rpn_cls_metric、rpn_bbox_metric和rcnn_eval_metric、rcnn_cls_metric、rcnn_bbox_metric eval_metrics = mx.metric.CompositeEvalMetric() # rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric for child_metric in [ rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric ]: eval_metrics.add(child_metric) # callback # batch后的callback回调以及epoch后的callback回调 # batch_end_callback是在训练一定batch_size后进行的相应回调,回调频率为frequent batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=args.frequent) # means和stds,如果BBOX是类无关的,那么means为复制means两个,否则复制数量为NUM_CLASSES means = np.tile(np.array(config.TRAIN.BBOX_MEANS), 2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES) stds = np.tile(np.array(config.TRAIN.BBOX_STDS), 2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES) # epoch为一个周期结束后的回调 epoch_end_callback = [ mx.callback.module_checkpoint(mod, prefix, period=1, save_optimizer_states=True), callback.do_checkpoint(prefix, means, stds) ] # decide learning rate # 以下主要根据不同的学习率调整策略来决定学习率,这里如voc中默认的初始lr为0.0005 base_lr = lr # 学习率调整因子 lr_factor = config.TRAIN.lr_factor # 学习率调整周期,lr_step一般格式为3, 5,表示在3和5周期中进行学习率调整 lr_epoch = [float(epoch) for epoch in lr_step.split(',')] # 如果当前周期大于begin_epoch那么lr_epoch_diff为epoch-begin_epoch lr_epoch_diff = [ epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch ] print('lr_epoch', lr_epoch, 'begin_epoch', begin_epoch) # 通过当前的epoch来计算当前应该具有的lr lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff))) lr_iters = [ int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff ] print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters) # learning rate调整机制,warmup multi factor scheduler预训练多因子调整器 lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor, config.TRAIN.warmup, config.TRAIN.warmup_lr, config.TRAIN.warmup_step) # optimizer # 优化器参数,包含momentum、wd、lr、lr_scheduler、rescale_grad和clip_gradient optimizer_params = { 'momentum': config.TRAIN.momentum, 'wd': config.TRAIN.wd, 'learning_rate': lr, 'lr_scheduler': lr_scheduler, 'rescale_grad': 1.0, 'clip_gradient': None } if not isinstance(train_data, PrefetchingIter): print('!!!train_data is not PrefetchingIter!!!') train_data = PrefetchingIter(train_data) # train # 模型训练过程,输入train_data,评估指标包括eval_metrics等一系列指标,每一个epoch结束后进入epoch_end_callback,每一个batch结束后进入batch_end_callback,优化器使用sgd,同时优化参数、输入参数和辅助参数以及begin周期和end周期 mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=config.default.kvstore, optimizer='sgd', optimizer_params=optimizer_params, arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr, lr_step): mx.random.seed(3) np.random.seed(3) logger, final_output_path = create_logger(config.output_path, args.cfg, config.dataset.image_set) prefix = os.path.join(final_output_path, prefix) shutil.copy2(os.path.join(curr_path, 'symbols', config.symbol + '.py'), final_output_path) sym_instance = detnet.detnet() sym = sym_instance.get_symbol(config, is_train=True) feat_pyramid_level = np.log2(config.network.RPN_FEAT_STRIDE).astype(int) feat_sym = [ sym.get_internals()['rpn_cls_score_p' + str(x) + '_output'] for x in feat_pyramid_level ] batch_size = len(ctx) input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size pprint.pprint(config) logger.info('training config:{}\n'.format(pprint.pformat(config))) image_sets = [iset for iset in config.dataset.image_set.split('+')] roidbs = [ load_gt_roidb(config.dataset.dataset, image_set, config.dataset.root_path, config.dataset.dataset_path, flip=config.TRAIN.FLIP) for image_set in image_sets ] roidb = merge_roidb(roidbs) roidb = filter_roidb(roidb, config) train_data = PyramidAnchorIterator( feat_sym, roidb, config, batch_size=input_batch_size, shuffle=config.TRAIN.SHUFFLE, ctx=ctx, feat_strides=config.network.RPN_FEAT_STRIDE, anchor_scales=config.network.ANCHOR_SCALES, anchor_ratios=config.network.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING, allowed_border=np.inf) max_data_shape = [('data', (config.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in config.SCALES]), max([int(v[1] // 16 * 16) for v in config.SCALES])))] max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape) max_data_shape.append(('gt_boxes', (config.TRAIN.BATCH_IMAGES, 100, 5))) print('providing maximum shape', max_data_shape, max_label_shape) data_shape_dict = dict(train_data.provide_data + train_data.provide_label) pprint.pprint(data_shape_dict) sym_instance.infer_shape(data_shape_dict) if config.TRAIN.RESUME: print('continue training from ', begin_epoch) arg_params, aux_params = load_param(prefix, begin_epoch, convert=True) else: arg_params, aux_params = None, None #sym_instance.init_weight(config, arg_params, aux_params) #sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict) data_names = [k[0] for k in train_data.provide_data] label_names = [k[0] for k in train_data.provide_label] mod = mx.mod.Module(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx) rpn_eval_metric = metric.RPNAccMetric() rpn_cls_metric = metric.RPNLogLossMetric() rpn_bbox_metric = metric.RPNL1LossMetric() rpn_fg_metric = metric.RPNFGFraction(config) eval_metric = metric.RCNNAccMetric(config) eval_fg_metric = metric.RCNNFGAccuracy(config) cls_metric = metric.RCNNLogLossMetric(config) bbox_metric = metric.RCNNL1LossMetric(config) eval_metrics = mx.metric.CompositeEvalMetric() for child_metric in [ rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, rpn_fg_metric, eval_fg_metric, eval_metric, cls_metric, bbox_metric ]: eval_metrics.add(child_metric) batch_end_callback = [ mx.callback.Speedometer(train_data.batch_size, frequent=1, auto_reset=False) ] epoch_end_callback = [mx.callback.do_checkpoint(prefix, period=1)] base_lr = lr lr_factor = config.TRAIN.lr_factor lr_epoch = [float(epoch) for epoch in lr_step.split(',')] lr_epoch_diff = [ epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch ] lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff))) lr_iters = [ int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff ] print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters) lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(step=lr_iters, factor=lr_factor) optimizer_params = { "momentum": config.TRAIN.momentum, 'wd': config.TRAIN.wd, 'learning_rate': lr, 'lr_scheduler': lr_scheduler, 'clip_gradient': None } if not isinstance(train_data, mx.io.PrefetchingIter): train_data = mx.io.PrefetchingIter(train_data) if DEBUG: train_data.reset() it = train_data.next() mod.bind(data_shapes=train_data.provide_data, label_shapes=train_data.provide_label, for_training=True, force_rebind=False) mod.init_params(arg_params=arg_params, aux_params=aux_params, allow_missing=True) mod.init_optimizer(optimizer_params=optimizer_params) eval_metrics.reset() next_data_batch = train_data.next() for i in range(100): print(i) mod.forward_backward(next_data_batch) mod.update() mod.update_metric(eval_metrics, next_data_batch.label) print(eval_metrics) mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, optimizer='sgd', optimizer_params=optimizer_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
def train_rpn(image_set, year, root_path, devkit_path, pretrained, epoch, prefix, ctx, begin_epoch, end_epoch, frequent, kv_store, work_load_list=None, resume=False): # set up logger logger = logging.getLogger() logger.setLevel(logging.INFO) # load symbol sym = get_vgg_rpn() feat_sym = get_vgg_rpn().get_internals()['rpn_cls_score_output'] # setup multi-gpu config.TRAIN.BATCH_IMAGES *= len(ctx) config.TRAIN.BATCH_SIZE *= len(ctx) # load training data voc, roidb = load_gt_roidb(image_set, year, root_path, devkit_path, flip=True) train_data = AnchorLoader(feat_sym, roidb, batch_size=config.TRAIN.BATCH_SIZE, shuffle=True, mode='train', ctx=ctx, work_load_list=work_load_list) # infer max shape max_data_shape = [('data', (config.TRAIN.BATCH_SIZE, 3, 1000, 1000))] max_data_shape_dict = {k: v for k, v in max_data_shape} _, feat_shape, _ = feat_sym.infer_shape(**max_data_shape_dict) from rcnn.minibatch import assign_anchor import numpy as np label = assign_anchor(feat_shape[0], np.zeros((0, 5)), [[1000, 1000, 1.0]]) max_label_shape = [('label', label['label'].shape), ('bbox_target', label['bbox_target'].shape), ('bbox_inside_weight', label['bbox_inside_weight'].shape), ('bbox_outside_weight', label['bbox_outside_weight'].shape)] print 'providing maximum shape', max_data_shape, max_label_shape # load pretrained args, auxs = load_param(pretrained, epoch, convert=True) # initialize params if not resume: input_shapes = {k: v for k, v in train_data.provide_data + train_data.provide_label} arg_shape, _, _ = sym.infer_shape(**input_shapes) arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) args['rpn_conv_3x3_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_conv_3x3_weight']) args['rpn_conv_3x3_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_conv_3x3_bias']) args['rpn_cls_score_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_cls_score_weight']) args['rpn_cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_cls_score_bias']) args['rpn_bbox_pred_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_bbox_pred_weight']) args['rpn_bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_bbox_pred_bias']) # prepare training if config.TRAIN.FINETUNE: fixed_param_prefix = ['conv1', 'conv2', 'conv3', 'conv4', 'conv5'] else: fixed_param_prefix = ['conv1', 'conv2'] data_names = [k[0] for k in train_data.provide_data] label_names = [k[0] for k in train_data.provide_label] batch_end_callback = Speedometer(train_data.batch_size, frequent=frequent) epoch_end_callback = mx.callback.do_checkpoint(prefix) if config.TRAIN.HAS_RPN is True: eval_metric = AccuracyMetric(use_ignore=True, ignore=-1) cls_metric = LogLossMetric(use_ignore=True, ignore=-1) else: eval_metric = AccuracyMetric() cls_metric = LogLossMetric() bbox_metric = SmoothL1LossMetric() eval_metrics = mx.metric.CompositeEvalMetric() for child_metric in [eval_metric, cls_metric, bbox_metric]: eval_metrics.add(child_metric) optimizer_params = {'momentum': 0.9, 'wd': 0.0005, 'learning_rate': 0.001, 'lr_scheduler': mx.lr_scheduler.FactorScheduler(60000, 0.1), 'rescale_grad': (1.0 / config.TRAIN.BATCH_SIZE)} # train mod = MutableModule(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, work_load_list=work_load_list, max_data_shapes=max_data_shape, max_label_shapes=max_label_shape, fixed_param_prefix=fixed_param_prefix) mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=kv_store, optimizer='sgd', optimizer_params=optimizer_params, arg_params=args, aux_params=auxs, begin_epoch=begin_epoch, num_epoch=end_epoch)
def train_rpn(cfg, dataset, image_set, root_path, dataset_path, frequent, kvstore, flip, shuffle, resume, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, train_shared, lr, lr_step, logger=None, output_path=None): # set up logger if not logger: logging.basicConfig() logger = logging.getLogger() logger.setLevel(logging.INFO) # set up config cfg.TRAIN.BATCH_IMAGES = cfg.TRAIN.ALTERNATE.RPN_BATCH_IMAGES # load symbol sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() sym = sym_instance.get_symbol_rpn(cfg, is_train=True) feat_sym = sym.get_internals()['rpn_cls_score_output'] # setup multi-gpu batch_size = len(ctx) input_batch_size = cfg.TRAIN.BATCH_IMAGES * batch_size # print cfg pprint.pprint(cfg) logger.info('training rpn cfg:{}\n'.format(pprint.pformat(cfg))) # load dataset and prepare imdb for training image_sets = [iset for iset in image_set.split('+')] roidbs = [ load_gt_roidb(dataset, image_set, root_path, dataset_path, result_path=output_path, flip=flip) for image_set in image_sets ] roidb = merge_roidb(roidbs) roidb = filter_roidb(roidb, cfg) # load training data train_data = AnchorLoader(feat_sym, roidb, cfg, batch_size=input_batch_size, shuffle=shuffle, ctx=ctx, feat_stride=cfg.network.RPN_FEAT_STRIDE, anchor_scales=cfg.network.ANCHOR_SCALES, anchor_ratios=cfg.network.ANCHOR_RATIOS, aspect_grouping=cfg.TRAIN.ASPECT_GROUPING) # infer max shape max_data_shape = [('data', (cfg.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES])))] max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape) print('providing maximum shape', max_data_shape, max_label_shape) # infer shape data_shape_dict = dict(train_data.provide_data_single + train_data.provide_label_single) sym_instance.infer_shape(data_shape_dict) # load and initialize params if resume: print('continue training from ', begin_epoch) arg_params, aux_params = load_param(prefix, begin_epoch, convert=True) else: arg_params, aux_params = load_param(pretrained, epoch, convert=True) sym_instance.init_weight_rpn(cfg, arg_params, aux_params) # check parameter shapes sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict) # create solver data_names = [k[0] for k in train_data.provide_data_single] label_names = [k[0] for k in train_data.provide_label_single] if train_shared: fixed_param_prefix = cfg.network.FIXED_PARAMS_SHARED else: fixed_param_prefix = cfg.network.FIXED_PARAMS mod = MutableModule( sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, max_data_shapes=[max_data_shape for _ in xrange(batch_size)], max_label_shapes=[max_label_shape for _ in xrange(batch_size)], fixed_param_prefix=fixed_param_prefix) # decide training params # metric eval_metric = metric.RPNAccMetric() cls_metric = metric.RPNLogLossMetric() bbox_metric = metric.RPNL1LossMetric() eval_metrics = mx.metric.CompositeEvalMetric() for child_metric in [eval_metric, cls_metric, bbox_metric]: eval_metrics.add(child_metric) # callback batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=frequent) # epoch_end_callback = mx.callback.do_checkpoint(prefix) epoch_end_callback = mx.callback.module_checkpoint( mod, prefix, period=1, save_optimizer_states=True) # decide learning rate base_lr = lr lr_factor = cfg.TRAIN.lr_factor lr_epoch = [int(epoch) for epoch in lr_step.split(',')] lr_epoch_diff = [ epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch ] lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff))) lr_iters = [ int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff ] print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters) lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor, cfg.TRAIN.warmup, cfg.TRAIN.warmup_lr, cfg.TRAIN.warmup_step) # optimizer optimizer_params = { 'momentum': cfg.TRAIN.momentum, 'wd': cfg.TRAIN.wd, 'learning_rate': lr, 'lr_scheduler': lr_scheduler, 'rescale_grad': 1.0, 'clip_gradient': None } if not isinstance(train_data, PrefetchingIter): train_data = PrefetchingIter(train_data) # train mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=kvstore, optimizer='sgd', optimizer_params=optimizer_params, arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr, lr_step): mx.random.seed(3) np.random.seed(3) logger, final_output_path = create_logger(config.output_path, args.cfg, config.dataset.image_set) prefix = os.path.join(final_output_path, prefix) # load symbol shutil.copy2(os.path.join(curr_path, 'symbols', config.symbol + '.py'), final_output_path) sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=True) feat_pyramid_level = np.log2(config.network.RPN_FEAT_STRIDE).astype(int) feat_sym = [ sym.get_internals()['rpn_cls_score_p' + str(x) + '_output'] for x in feat_pyramid_level ] # setup multi-gpu batch_size = len(ctx) input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size # print config pprint.pprint(config) logger.info('training config:{}\n'.format(pprint.pformat(config))) # load dataset and prepare imdb for training image_sets = [iset for iset in config.dataset.image_set.split('+')] roidbs = [ load_gt_roidb(config.dataset.dataset, image_set, config.dataset.root_path, config.dataset.dataset_path, flip=config.TRAIN.FLIP) for image_set in image_sets ] roidb = merge_roidb(roidbs) roidb = filter_roidb(roidb, config) # load training data train_data = PyramidAnchorIterator( feat_sym, roidb, config, batch_size=input_batch_size, shuffle=config.TRAIN.SHUFFLE, ctx=ctx, feat_strides=config.network.RPN_FEAT_STRIDE, anchor_scales=config.network.ANCHOR_SCALES, anchor_ratios=config.network.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING, allowed_border=np.inf) # infer max shape max_data_shape = [('data', (config.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape) max_data_shape.append(('gt_boxes', (config.TRAIN.BATCH_IMAGES, 100, 5))) print 'providing maximum shape', max_data_shape, max_label_shape data_shape_dict = dict(train_data.provide_data_single + train_data.provide_label_single) pprint.pprint(data_shape_dict) sym_instance.infer_shape(data_shape_dict) # load and initialize params if config.TRAIN.RESUME: print('continue training from ', begin_epoch) arg_params, aux_params = load_param(prefix, begin_epoch, convert=True) else: arg_params, aux_params = load_param(pretrained, epoch, convert=True) # sym_instance.init_weight(config, arg_params, aux_params) # check parameter shapes # sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict) # decide training params # metric rpn_eval_metric = metric.RPNAccMetric() rpn_cls_metric = metric.RPNLogLossMetric() rpn_bbox_metric = metric.RPNL1LossMetric() rpn_fg_metric = metric.RPNFGFraction(config) eval_metric = metric.RCNNAccMetric(config) eval_fg_metric = metric.RCNNFGAccuracy(config) cls_metric = metric.RCNNLogLossMetric(config) bbox_metric = metric.RCNNL1LossMetric(config) eval_metrics = mx.metric.CompositeEvalMetric() # rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric for child_metric in [ rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, rpn_fg_metric, eval_fg_metric, eval_metric, cls_metric, bbox_metric ]: eval_metrics.add(child_metric) # callback # batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=args.frequent) means = np.tile(np.array(config.TRAIN.BBOX_MEANS), 2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES) stds = np.tile(np.array(config.TRAIN.BBOX_STDS), 2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES) # epoch_end_callback = [mx.callback.module_checkpoint(mod, prefix, period=1, # save_optimizer_states=True), callback.do_checkpoint(prefix, means, stds)] # decide learning rate base_lr = lr lr_factor = config.TRAIN.lr_factor lr_epoch = [float(epoch) for epoch in lr_step.split(',')] lr_epoch_diff = [ epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch ] lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff))) lr_iters = [ int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff ] print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters) lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor, config.TRAIN.warmup, config.TRAIN.warmup_lr, config.TRAIN.warmup_step) # optimizer optimizer_params = { 'momentum': config.TRAIN.momentum, 'wd': config.TRAIN.wd, 'learning_rate': lr, 'lr_scheduler': lr_scheduler, 'clip_gradient': None } if not isinstance(train_data, PrefetchingIter): train_data = PrefetchingIter(train_data) net = FPNNet(sym, args_pretrained=arg_params, auxes_pretrained=aux_params) # create multi-threaded DataParallel Model. net_parallel = DataParallelModel(net, ctx_list=ctx) # create trainer, # !Important: A trainer can be only created after the function `resnet_ctx` is called. # Please Note that DataParallelModel will call reset_ctx to initialize parameters on gpus. trainer = mx.gluon.Trainer(net.collect_params(), 'sgd', optimizer_params) for epoch in range(begin_epoch, config.TRAIN.end_epoch): train_data.reset() net.hybridize(static_alloc=True, static_shape=False) progress_bar = tqdm.tqdm(total=len(roidb)) for nbatch, data_batch in enumerate(train_data): inputs = [[ x.astype('f').as_in_context(c) for x in d + l ] for c, d, l in zip(ctx, data_batch.data, data_batch.label)] with ag.record(): outputs = net_parallel(*inputs) ag.backward(sum(outputs, ())) trainer.step(1) eval_metrics.update(data_batch.label[0], outputs[0]) if nbatch % 100 == 0: msg = ','.join([ '{}={:.3f}'.format(w, v) for w, v in zip(*eval_metrics.get()) ]) msg += ",lr={}".format(trainer.learning_rate) logger.info(msg) print(msg) eval_metrics.reset() progress_bar.update(len(inputs)) progress_bar.close() net.hybridize(static_alloc=True, static_shape=False) re = ("mAP", 0.0) logger.info(re) save_path = "{}-{}-{}.params".format(prefix, epoch, re[1]) net.collect_params().save(save_path) logger.info("Saved checkpoint to {}.".format(save_path))
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr, lr_step): if config.dataset.dataset != 'JSONList': logger, final_output_path = create_logger(config.output_path, args.cfg, config.dataset.image_set) prefix = os.path.join(final_output_path, prefix) shutil.copy2(args.cfg, prefix+'.yaml') else: import datetime import logging final_output_path = config.output_path prefix = prefix + '_' + datetime.datetime.now().strftime("%Y-%m-%d_%H_%M_%S") prefix = os.path.join(final_output_path, prefix) shutil.copy2(args.cfg, prefix+'.yaml') log_file = prefix + '.log' head = '%(asctime)-15s %(message)s' logging.basicConfig(filename=log_file, format=head) logger = logging.getLogger() logger.setLevel(logging.INFO) logger.info('prefix: %s' % prefix) print('prefix: %s' % prefix) # load symbol #shutil.copy2(os.path.join(curr_path, '..', 'symbols', config.symbol + '.py'), final_output_path) sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=True) # setup multi-gpu batch_size = config.TRAIN.IMAGES_PER_GPU * len(ctx) # print config pprint.pprint(config) logger.info('training config:{}\n'.format(pprint.pformat(config))) # load dataset and prepare imdb for training image_sets = [iset for iset in config.dataset.image_set.split('+')] roidbs = [load_gt_roidb(config.dataset.dataset, image_set, config.dataset.root_path, config.dataset.dataset_path, flip=config.TRAIN.FLIP) for image_set in image_sets] roidb = merge_roidb(roidbs) roidb = filter_roidb(roidb, config) # load training data if config.network.MULTI_RPN: assert Fasle, 'still developing' ### num_layers = len(config.network.MULTI_RPN_STRIDES) rpn_syms = [sym.get_internals()['rpn%d_cls_score_output'% l] for l in range(num_layers)] train_data = PyramidAnchorLoader(rpn_syms, roidb, config, batch_size=batch_size, shuffle=config.TRAIN.SHUFFLE, ctx=ctx, feat_strides=config.network.MULTI_RPN_STRIDES, anchor_scales=config.network.ANCHOR_SCALES, anchor_ratios=config.network.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING, allowed_border=np.inf) else: feat_sym = sym.get_internals()['rpn_cls_score_output'] train_data = AnchorLoader(feat_sym, roidb, config, batch_size=batch_size, shuffle=config.TRAIN.SHUFFLE, ctx=ctx, feat_stride=config.network.RPN_FEAT_STRIDE, anchor_scales=config.network.ANCHOR_SCALES, anchor_ratios=config.network.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING) # infer max shape data_shape_dict = dict(train_data.provide_data + train_data.provide_label) pprint.pprint(data_shape_dict) sym_instance.infer_shape(data_shape_dict) # load and initialize params if config.TRAIN.RESUME: print('continue training from ', begin_epoch) arg_params, aux_params = load_param(prefix, begin_epoch, convert=True) else: arg_params, aux_params = load_param(pretrained, epoch, convert=True) sym_instance.init_weight(config, arg_params, aux_params) # check parameter shapes sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict) # create solver fixed_param_prefix = config.network.FIXED_PARAMS mod = MutableModule(sym, train_data.data_names, train_data.label_names, context=ctx, logger=logger, fixed_param_prefix=fixed_param_prefix) if config.TRAIN.RESUME: mod._preload_opt_states = '%s-%04d.states'%(prefix, begin_epoch) # decide training params # metric rpn_eval_metric = metric.RPNAccMetric() rpn_cls_metric = metric.RPNLogLossMetric() rpn_bbox_metric = metric.RPNL1LossMetric() eval_metric = metric.RCNNAccMetric(config) cls_metric = metric.RCNNLogLossMetric(config) bbox_metric = metric.RCNNL1LossMetric(config) eval_metrics = mx.metric.CompositeEvalMetric() # rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric]: eval_metrics.add(child_metric) if config.network.PREDICT_KEYPOINTS: kps_cls_acc = metric.KeypointAccMetric(config) kps_cls_loss = metric.KeypointLogLossMetric(config) kps_pos_loss = metric.KeypointL1LossMetric(config) eval_metrics.add(kps_cls_acc) eval_metrics.add(kps_cls_loss) eval_metrics.add(kps_pos_loss) # callback batch_end_callback = callback.Speedometer(batch_size, frequent=args.frequent) means = np.tile(np.array(config.TRAIN.BBOX_MEANS), 2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES) stds = np.tile(np.array(config.TRAIN.BBOX_STDS), 2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES) epoch_end_callback = [mx.callback.do_checkpoint(prefix)] # decide learning rate base_lr = lr lr_factor = config.TRAIN.lr_factor lr_epoch = [float(epoch) for epoch in lr_step.split(',')] lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch] lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff))) lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff] print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters) lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor, config.TRAIN.warmup, config.TRAIN.warmup_lr, config.TRAIN.warmup_step) # optimizer optimizer_params = {'momentum': config.TRAIN.momentum, 'wd': config.TRAIN.wd, 'learning_rate': lr, 'lr_scheduler': lr_scheduler, 'rescale_grad': 1.0, 'clip_gradient': None} if not isinstance(train_data, PrefetchingIter): train_data = PrefetchingIter(train_data) # train mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=config.TRAIN.kvstore, optimizer='sgd', optimizer_params=optimizer_params, arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch) time.sleep(10) train_data.iters[0].terminate()
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr, lr_step): # 创建logger和对应的输出路径 logger, final_output_path = create_logger(config.output_path, args.cfg, config.dataset.image_set) prefix = os.path.join(final_output_path, prefix) # load symbol shutil.copy2(os.path.join(curr_path, 'symbols', config.symbol + '.py'), final_output_path) sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=True) # 特征symbol,从网络sym中获取rpn_cls_score_output feat_sym = sym.get_internals()['rpn_cls_score_output'] # setup multi-gpu # 使能多GPU训练,每一张卡训练一个batch batch_size = len(ctx) input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size # print config pprint.pprint(config) logger.info('training config:{}\n'.format(pprint.pformat(config))) # load dataset and prepare imdb for training # 加载数据集同时准备训练的imdb,使用+分割不同的图像数据集,比如2007_trainval+2012_trainval image_sets = [iset for iset in config.dataset.image_set.split('+')] # load gt roidb加载gt roidb,根据数据集类型,图像集具体子类,数据集根目录和数据集路径,同时配置相关TRAIN为FLIP来增广数据 roidbs = [load_gt_roidb(config.dataset.dataset, image_set, config.dataset.root_path, config.dataset.dataset_path, flip=config.TRAIN.FLIP) for image_set in image_sets] # 合并不同的roidb roidb = merge_roidb(roidbs) # 根据配置文件中对应的过滤规则来滤出roi roidb = filter_roidb(roidb, config) # load training data # 加载训练数据,anchor Loader为对应分类和回归的锚点加载,通过对应的roidb,查找对应的正负样本的锚点,该生成器需要参数锚点尺度,ratios和对应的feature的stride train_data = AnchorLoader(feat_sym, roidb, config, batch_size=input_batch_size, shuffle=config.TRAIN.SHUFFLE, ctx=ctx, feat_stride=config.network.RPN_FEAT_STRIDE, anchor_scales=config.network.ANCHOR_SCALES, anchor_ratios=config.network.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING) # infer max shape max_data_shape = [('data', (config.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape) max_data_shape.append(('gt_boxes', (config.TRAIN.BATCH_IMAGES, 100, 5))) print('providing maximum shape', max_data_shape, max_label_shape) data_shape_dict = dict(train_data.provide_data_single + train_data.provide_label_single) pprint.pprint(data_shape_dict) sym_instance.infer_shape(data_shape_dict) # load and initialize params if config.TRAIN.RESUME: print('continue training from ', begin_epoch) arg_params, aux_params = load_param(prefix, begin_epoch, convert=True) else: arg_params, aux_params = load_param(pretrained, epoch, convert=True) sym_instance.init_weight(config, arg_params, aux_params) # check parameter shapes sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict) # create solver fixed_param_prefix = config.network.FIXED_PARAMS data_names = [k[0] for k in train_data.provide_data_single] label_names = [k[0] for k in train_data.provide_label_single] mod = MutableModule(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, max_data_shapes=[max_data_shape for _ in range(batch_size)], max_label_shapes=[max_label_shape for _ in range(batch_size)], fixed_param_prefix=fixed_param_prefix) if config.TRAIN.RESUME: mod._preload_opt_states = '%s-%04d.states'%(prefix, begin_epoch) # decide training params # metric rpn_eval_metric = metric.RPNAccMetric() rpn_cls_metric = metric.RPNLogLossMetric() rpn_bbox_metric = metric.RPNL1LossMetric() eval_metric = metric.RCNNAccMetric(config) cls_metric = metric.RCNNLogLossMetric(config) bbox_metric = metric.RCNNL1LossMetric(config) eval_metrics = mx.metric.CompositeEvalMetric() # rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric]: eval_metrics.add(child_metric) # callback batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=args.frequent) means = np.tile(np.array(config.TRAIN.BBOX_MEANS), 2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES) stds = np.tile(np.array(config.TRAIN.BBOX_STDS), 2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES) epoch_end_callback = [mx.callback.module_checkpoint(mod, prefix, period=1, save_optimizer_states=True), callback.do_checkpoint(prefix, means, stds)] # decide learning rate base_lr = lr lr_factor = config.TRAIN.lr_factor lr_epoch = [float(epoch) for epoch in lr_step.split(',')] lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch] lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff))) lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff] print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters) lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor, config.TRAIN.warmup, config.TRAIN.warmup_lr, config.TRAIN.warmup_step) # optimizer optimizer_params = {'momentum': config.TRAIN.momentum, 'wd': config.TRAIN.wd, 'learning_rate': lr, 'lr_scheduler': lr_scheduler, 'rescale_grad': 1.0, 'clip_gradient': None} if not isinstance(train_data, PrefetchingIter): train_data = PrefetchingIter(train_data) # train mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=config.default.kvstore, optimizer='sgd', optimizer_params=optimizer_params, arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
def train_feature_distance_net(args, ctx, pretrained, pretrained_flow, epoch, prefix, begin_epoch, end_epoch, lr, lr_step): # ==============prepare logger============== logger, final_output_path = create_logger(config.output_path, args.cfg, config.dataset.image_set) prefix = os.path.join(final_output_path, prefix) # ==============load symbol============== shutil.copy2(os.path.join(curr_path, 'symbols', config.symbol + '.py'), final_output_path) sym_instance = eval(config.symbol + '.' + config.symbol)() if config.TRAIN.G_type == 0: sym = sym_instance.get_train_feature_distance_symbol(config) elif config.TRAIN.G_type == 1: sym = sym_instance.get_train_feature_distance_symbol_res(config) # ==============setup multi-gpu============== batch_size = len(ctx) input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size # ==============print config============== pprint.pprint(config) logger.info('training config:{}\n'.format(pprint.pformat(config))) # ==============load dataset and prepare imdb for training============== image_sets = [iset for iset in config.dataset.image_set.split('+')] roidbs = [ load_gt_roidb(config.dataset.dataset, image_set, config.dataset.root_path, config.dataset.dataset_path, flip=config.TRAIN.FLIP) for image_set in image_sets ] roidb = merge_roidb(roidbs) roidb = filter_roidb(roidb, config) train_iter = ImagenetVIDIter(roidb, input_batch_size, config, config.TRAIN.SHUFFLE, ctx) # infer max shape max_data_shape = [('data', (config.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES]))), ('data_ref', (config.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] print 'providing maximum shape', max_data_shape data_shape_dict = dict(train_iter.provide_data_single) pprint.pprint(data_shape_dict) sym_instance.infer_shape(data_shape_dict) # ==============init params============== if config.TRAIN.RESUME: print('continue training from ', begin_epoch) arg_params, aux_params = load_param(prefix, begin_epoch, convert=True) else: arg_params, aux_params = load_param(pretrained, epoch, convert=True) # arg_params_flow, aux_params_flow = load_param(pretrained_flow, epoch, convert=True) # arg_params.update(arg_params_flow) # aux_params.update(aux_params_flow) sym_instance.init_weight(config, arg_params, aux_params) # ==============check parameter shapes============== # sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict) # ==============create solver============== fixed_param_prefix = config.network.FIXED_PARAMS data_names = train_iter.data_name label_names = train_iter.label_name mod = MutableModule( sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, max_data_shapes=[max_data_shape for _ in range(batch_size)], max_label_shapes=None, fixed_param_prefix=fixed_param_prefix) if config.TRAIN.RESUME: mod._preload_opt_states = '%s-%04d.states' % (prefix, begin_epoch) # ==============optimizer============== optimizer_params = { 'learning_rate': 0.00005, } if not isinstance(train_iter, PrefetchingIter): train_iter = PrefetchingIter(train_iter) batch_end_callback = callback.Speedometer(train_iter.batch_size, frequent=args.frequent) epoch_end_callback = [ mx.callback.module_checkpoint(mod, prefix, period=1, save_optimizer_states=True), callback.do_checkpoint(prefix) ] feature_L2_loss = metric.FeatureL2LossMetric(config) eval_metrics = mx.metric.CompositeEvalMetric() eval_metrics.add(feature_L2_loss) mod.fit(train_iter, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=config.default.kvstore, optimizer='RMSprop', optimizer_params=optimizer_params, arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch, initializer=mx.init.Normal(0.02), allow_missing=True)
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr, lr_step): mx.random.seed(3) np.random.seed(3) if not os.path.exists(config.output_path): os.mkdir(config.output_path) logger, final_output_path = create_logger(config.output_path, args.cfg, config.dataset.image_set) prefix = os.path.join(final_output_path, prefix) # load symbol shutil.copy2(os.path.join(curr_path, 'symbols', config.symbol + '.py'), final_output_path) sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=True) feat_pyramid_level = np.log2(config.network.RPN_FEAT_STRIDE).astype(int) feat_sym = [ sym.get_internals()['rpn_cls_score_p' + str(x) + '_output'] for x in feat_pyramid_level ] # setup multi-gpu batch_size = len(ctx) input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size # print config #pprint.pprint(config) #logger.info('training config:{}\n'.format(pprint.pformat(config))) # load dataset and prepare imdb for training image_sets = [iset for iset in config.dataset.image_set.split('+')] roidbs = [ load_gt_roidb(config.dataset.dataset, image_set, config.dataset.root_path, config.dataset.dataset_path, flip=config.TRAIN.FLIP) for image_set in image_sets ] roidb = merge_roidb(roidbs) roidb = filter_roidb(roidb, config) # load training data train_data = PyramidAnchorIterator( feat_sym, roidb, config, batch_size=input_batch_size, shuffle=config.TRAIN.SHUFFLE, ctx=ctx, feat_strides=config.network.RPN_FEAT_STRIDE, anchor_scales=config.network.ANCHOR_SCALES, anchor_ratios=config.network.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING, allowed_border=np.inf) # infer max shape max_data_shape = [('data', (config.TRAIN.BATCH_IMAGES, 3 * config.CROP_NUM * config.CROP_NUM, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape) max_data_shape.append(('gt_boxes', (config.TRAIN.BATCH_IMAGES, 100, 6))) #change gt_boxes to 1,100,6 print 'providing maximum shape', max_data_shape, max_label_shape data_shape_dict = dict(train_data.provide_data_single + train_data.provide_label_single) pprint.pprint(data_shape_dict) sym_instance.infer_shape(data_shape_dict) # load and initialize params if config.TRAIN.RESUME: print('continue training from ', begin_epoch) arg_params, aux_params = load_param(prefix, begin_epoch, convert=True) else: arg_params, aux_params = load_param(pretrained, epoch, convert=True) single_shape = arg_params['conv1_weight'].shape temp_conv1_weight = nd.empty( (single_shape[0], single_shape[1] * config.CROP_NUM * config.CROP_NUM, single_shape[2], single_shape[3]), dtype=arg_params['conv1_weight'].dtype) for i in range(config.CROP_NUM * config.CROP_NUM): temp_conv1_weight[:, i * single_shape[1]:(i + 1) * single_shape[1], :, :] = arg_params[ 'conv1_weight'][:, :, :, :] del arg_params['conv1_weight'] arg_params['conv1_weight'] = temp_conv1_weight sym_instance.init_weight(config, arg_params, aux_params) # check parameter shapes sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict) # create solver fixed_param_prefix = config.network.FIXED_PARAMS data_names = [k[0] for k in train_data.provide_data_single] label_names = [k[0] for k in train_data.provide_label_single] mod = MutableModule( sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, max_data_shapes=[max_data_shape for _ in range(batch_size)], max_label_shapes=[max_label_shape for _ in range(batch_size)], fixed_param_prefix=fixed_param_prefix) if config.TRAIN.RESUME: mod._preload_opt_states = '%s-%04d.states' % (prefix, begin_epoch) # decide training params # metric rpn_eval_metric = metric.RPNAccMetric() rpn_cls_metric = metric.RPNLogLossMetric() rpn_bbox_metric = metric.RPNL1LossMetric() rpn_fg_metric = metric.RPNFGFraction(config) eval_metric = metric.RCNNAccMetric(config) eval_fg_metric = metric.RCNNFGAccuracy(config) cls_metric = metric.RCNNLogLossMetric(config) bbox_metric = metric.RCNNL1LossMetric(config) #fl_metric = metric.FocalLoss() eval_metrics = mx.metric.CompositeEvalMetric() # rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric for child_metric in [ rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, rpn_fg_metric, eval_fg_metric, eval_metric, cls_metric, bbox_metric ]: eval_metrics.add(child_metric) # callback batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=args.frequent) means = np.tile(np.array(config.TRAIN.BBOX_MEANS), 2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES) stds = np.tile(np.array(config.TRAIN.BBOX_STDS), 2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES) epoch_end_callback = [ mx.callback.module_checkpoint(mod, prefix, period=1, save_optimizer_states=True), callback.do_checkpoint(prefix, means, stds) ] # decide learning rate base_lr = lr lr_factor = config.TRAIN.lr_factor lr_epoch = [float(epoch) for epoch in lr_step.split(',')] lr_epoch_diff = [ epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch ] lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff))) lr_iters = [ int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff ] print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters) lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor, config.TRAIN.warmup, config.TRAIN.warmup_lr, config.TRAIN.warmup_step) # optimizer optimizer_params = { 'momentum': config.TRAIN.momentum, 'wd': config.TRAIN.wd, 'learning_rate': lr, 'lr_scheduler': lr_scheduler, 'clip_gradient': None } # if not isinstance(train_data, PrefetchingIter): train_data = PrefetchingIter(train_data) # train mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=config.default.kvstore, optimizer='sgd', optimizer_params=optimizer_params, arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr, lr_step): logger, final_output_path = create_logger(config.output_path, args.cfg, config.dataset.image_set) prefix = os.path.join(final_output_path, prefix) # load symbol shutil.copy2(os.path.join(curr_path, 'symbols', config.symbol + '.py'), final_output_path) sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_retina_symbol(config, is_train=True) feat_sym = [] feat_sym_p4 = sym.get_internals()['box_pred/p4_output'] feat_sym_p5 = sym.get_internals()['box_pred/p5_output'] feat_sym_p6 = sym.get_internals()['box_pred/p6_output'] feat_sym_p7 = sym.get_internals()['box_pred/p7_output'] feat_sym.append(feat_sym_p4) feat_sym.append(feat_sym_p5) feat_sym.append(feat_sym_p6) feat_sym.append(feat_sym_p7) ####### feat_stride = [] feat_stride.append(config.network.p4_RPN_FEAT_STRIDE) feat_stride.append(config.network.p5_RPN_FEAT_STRIDE) feat_stride.append(config.network.p6_RPN_FEAT_STRIDE) feat_stride.append(config.network.p7_RPN_FEAT_STRIDE) anchor_scales = [] anchor_scales.append(config.network.p4_ANCHOR_SCALES) anchor_scales.append(config.network.p5_ANCHOR_SCALES) anchor_scales.append(config.network.p6_ANCHOR_SCALES) anchor_scales.append(config.network.p7_ANCHOR_SCALES) anchor_ratios = [] anchor_ratios.append(config.network.p4_ANCHOR_RATIOS) anchor_ratios.append(config.network.p5_ANCHOR_RATIOS) anchor_ratios.append(config.network.p6_ANCHOR_RATIOS) anchor_ratios.append(config.network.p7_ANCHOR_RATIOS) ############# # setup multi-gpu batch_size = len(ctx) input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size # print config pprint.pprint(config) logger.info('training config:{}\n'.format(pprint.pformat(config))) # load dataset and prepare imdb for training image_sets = [iset for iset in config.dataset.image_set.split('+')] roidbs = [ load_gt_roidb(config.dataset.dataset, image_set, config.dataset.root_path, config.dataset.dataset_path, flip=config.TRAIN.FLIP) for image_set in image_sets ] roidb = merge_roidb(roidbs) roidb = filter_roidb(roidb, config) # load training data train_data = AnchorLoader(feat_sym, feat_stride, anchor_scales, anchor_ratios, roidb, config, batch_size=input_batch_size, shuffle=config.TRAIN.SHUFFLE, ctx=ctx, aspect_grouping=config.TRAIN.ASPECT_GROUPING) # infer max shape max_data_shape = [('data', (config.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape) max_data_shape.append(('gt_boxes', (config.TRAIN.BATCH_IMAGES, 100, 5))) print 'providing maximum shape', max_data_shape, max_label_shape # infer max shape data_shape_dict = dict(train_data.provide_data_single + train_data.provide_label_single) pprint.pprint(data_shape_dict) sym_instance.infer_shape(data_shape_dict) # load and initialize params if config.TRAIN.RESUME: print('continue training from ', begin_epoch) arg_params, aux_params = load_param(prefix, begin_epoch, convert=True) else: arg_params, aux_params = load_param(pretrained, epoch, convert=True) sym_instance.init_weight(config, arg_params, aux_params) # check parameter shapes sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict) # create solver fixed_param_prefix = config.network.FIXED_PARAMS data_names = [k[0] for k in train_data.provide_data_single] label_names = [k[0] for k in train_data.provide_label_single] mod = MutableModule( sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, max_data_shapes=[max_data_shape for _ in range(batch_size)], max_label_shapes=[max_label_shape for _ in range(batch_size)], fixed_param_prefix=fixed_param_prefix) if config.TRAIN.RESUME: mod._preload_opt_states = '%s-%04d.states' % (prefix, begin_epoch) # decide training params # metric Retina_toal_eval_metric = metric.RetinaToalAccMetric() Retina_cls_metric = metric.RetinaFocalLossMetric() Retina_bbox_metric = metric.RetinaL1LossMetric() eval_metrics = mx.metric.CompositeEvalMetric() # rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric for child_metric in [ Retina_toal_eval_metric, Retina_cls_metric, Retina_bbox_metric ]: eval_metrics.add(child_metric) # callback batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=args.frequent) means = np.tile(np.array(config.TRAIN.BBOX_MEANS), 2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES) stds = np.tile(np.array(config.TRAIN.BBOX_STDS), 2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES) epoch_end_callback = [ mx.callback.module_checkpoint(mod, prefix, period=1, save_optimizer_states=True), callback.do_checkpoint(prefix, means, stds) ] # decide learning rate base_lr = lr lr_factor = config.TRAIN.lr_factor lr_epoch = [float(epoch) for epoch in lr_step.split(',')] lr_epoch_diff = [ epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch ] lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff))) lr_iters = [ int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff ] print lr_step.split(',') print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters) lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor, config.TRAIN.warmup, config.TRAIN.warmup_lr, config.TRAIN.warmup_step) # optimizer optimizer_params = { 'learning_rate': lr, 'wd': 0.0001, } if not isinstance(train_data, PrefetchingIter): train_data = PrefetchingIter(train_data) # train initializer = mx.init.MSRAPrelu(factor_type='out', slope=0) # adam = mx.optimizer.AdaDelta(rho=0.09, epsilon=1e-14) #optimizer_params=optimizer_params, print "-----------------------train--------------------------------" mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=config.default.kvstore, optimizer='adam', optimizer_params=optimizer_params, initializer=initializer, arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr, lr_step): logger, final_output_path = create_logger(config.output_path, args.cfg, config.dataset.image_set) prefix = os.path.join(final_output_path, prefix) # load symbol shutil.copy2(os.path.join(curr_path, 'symbols', config.symbol + '.py'), final_output_path) sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=True) feat_sym = sym.get_internals()['rpn_cls_score_output'] # setup multi-gpu batch_size = len(ctx) input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size # print config pprint.pprint(config) logger.info('training config:{}\n'.format(pprint.pformat(config))) # load dataset and prepare imdb for training image_sets = [iset for iset in config.dataset.image_set.split('+')] roidbs = [load_gt_roidb(config.dataset.dataset, image_set, config.dataset.root_path, config.dataset.dataset_path, flip=config.TRAIN.FLIP) for image_set in image_sets] roidb = merge_roidb(roidbs) roidb = filter_roidb(roidb, config) # load training data train_data = AnchorLoader(feat_sym, roidb, config, batch_size=input_batch_size, shuffle=config.TRAIN.SHUFFLE, ctx=ctx, feat_stride=config.network.RPN_FEAT_STRIDE, anchor_scales=config.network.ANCHOR_SCALES, anchor_ratios=config.network.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING) # infer max shape max_data_shape = [('data', (config.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape) max_data_shape.append(('gt_boxes', (config.TRAIN.BATCH_IMAGES, 100, 5))) print 'providing maximum shape', max_data_shape, max_label_shape data_shape_dict = dict(train_data.provide_data_single + train_data.provide_label_single) pprint.pprint(data_shape_dict) sym_instance.infer_shape(data_shape_dict) # load and initialize params if config.TRAIN.RESUME: print('continue training from ', begin_epoch) arg_params, aux_params = load_param(prefix, begin_epoch, convert=True) else: arg_params, aux_params = load_param(pretrained, epoch, convert=True) sym_instance.init_weight(config, arg_params, aux_params) # check parameter shapes sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict) # create solver fixed_param_prefix = config.network.FIXED_PARAMS data_names = [k[0] for k in train_data.provide_data_single] label_names = [k[0] for k in train_data.provide_label_single] mod = MutableModule(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, max_data_shapes=[max_data_shape for _ in range(batch_size)], max_label_shapes=[max_label_shape for _ in range(batch_size)], fixed_param_prefix=fixed_param_prefix) if config.TRAIN.RESUME: mod._preload_opt_states = '%s-%04d.states'%(prefix, begin_epoch) # decide training params # metric rpn_eval_metric = metric.RPNAccMetric() rpn_cls_metric = metric.RPNLogLossMetric() rpn_bbox_metric = metric.RPNL1LossMetric() eval_metric = metric.RCNNAccMetric(config) cls_metric = metric.RCNNLogLossMetric(config) bbox_metric = metric.RCNNL1LossMetric(config) eval_metrics = mx.metric.CompositeEvalMetric() # rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric]: eval_metrics.add(child_metric) # callback batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=args.frequent) means = np.tile(np.array(config.TRAIN.BBOX_MEANS), 2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES) stds = np.tile(np.array(config.TRAIN.BBOX_STDS), 2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES) epoch_end_callback = [mx.callback.module_checkpoint(mod, prefix, period=1, save_optimizer_states=True), callback.do_checkpoint(prefix, means, stds)] # decide learning rate base_lr = lr lr_factor = config.TRAIN.lr_factor lr_epoch = [float(epoch) for epoch in lr_step.split(',')] lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch] lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff))) lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff] print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters) lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor, config.TRAIN.warmup, config.TRAIN.warmup_lr, config.TRAIN.warmup_step) # optimizer optimizer_params = {'momentum': config.TRAIN.momentum, 'wd': config.TRAIN.wd, 'learning_rate': lr, 'lr_scheduler': lr_scheduler, 'rescale_grad': 1.0, 'clip_gradient': None} if not isinstance(train_data, PrefetchingIter): train_data = PrefetchingIter(train_data) # train mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=config.default.kvstore, optimizer='sgd', optimizer_params=optimizer_params, arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
def train_rpn( image_set, year, root_path, devkit_path, pretrained, epoch, prefix, ctx, begin_epoch, end_epoch, frequent, kv_store, work_load_list=None, ): # load symbol sym = get_vgg_rpn() feat_sym = get_vgg_rpn().get_internals()["rpn_cls_score_output"] # setup multi-gpu config.TRAIN.BATCH_IMAGES *= len(ctx) config.TRAIN.BATCH_SIZE *= len(ctx) # load training data voc, roidb = load_gt_roidb(image_set, year, root_path, devkit_path, flip=True) train_data = AnchorLoader( feat_sym, roidb, batch_size=config.TRAIN.BATCH_SIZE, shuffle=True, mode="train", ctx=ctx, work_load_list=work_load_list, ) # infer max shape max_data_shape = [("data", (1, 3, 1000, 1000))] max_data_shape_dict = {k: v for k, v in max_data_shape} _, feat_shape, _ = feat_sym.infer_shape(**max_data_shape_dict) from rcnn.minibatch import assign_anchor import numpy as np label = assign_anchor(feat_shape[0], np.zeros((0, 5)), [[1000, 1000, 1.0]]) max_label_shape = [ ("label", label["label"].shape), ("bbox_target", label["bbox_target"].shape), ("bbox_inside_weight", label["bbox_inside_weight"].shape), ("bbox_outside_weight", label["bbox_outside_weight"].shape), ] print "providing maximum shape", max_data_shape, max_label_shape # load pretrained args, auxs = load_param(pretrained, epoch, convert=True) # initialize params arg_shape, _, _ = sym.infer_shape(data=(1, 3, 224, 224)) arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) args["rpn_conv_3x3_weight"] = mx.random.normal(mean=0, stdvar=0.01, shape=arg_shape_dict["rpn_conv_3x3_weight"]) args["rpn_conv_3x3_bias"] = mx.nd.zeros(shape=arg_shape_dict["rpn_conv_3x3_bias"]) args["rpn_cls_score_weight"] = mx.random.normal(mean=0, stdvar=0.01, shape=arg_shape_dict["rpn_cls_score_weight"]) args["rpn_cls_score_bias"] = mx.nd.zeros(shape=arg_shape_dict["rpn_cls_score_bias"]) args["rpn_bbox_pred_weight"] = mx.random.normal(mean=0, stdvar=0.01, shape=arg_shape_dict["rpn_bbox_pred_weight"]) args["rpn_bbox_pred_bias"] = mx.nd.zeros(shape=arg_shape_dict["rpn_bbox_pred_bias"]) # train solver = Solver( prefix, sym, ctx, begin_epoch, end_epoch, kv_store, args, auxs, momentum=0.9, wd=0.0005, learning_rate=1e-3, lr_scheduler=mx.lr_scheduler.FactorScheduler(60000, 0.1), mutable_data_shape=True, max_data_shape=max_data_shape, max_label_shape=max_label_shape, ) solver.fit(train_data, frequent=frequent)
def train_net(args, ctx, pretrained_dir, pretrained_resnet, epoch, prefix, begin_epoch, end_epoch, lr, lr_step): logger, final_output_path = create_logger(config.output_path, args.cfg, config.dataset.image_set) prefix = os.path.join(final_output_path, prefix) # load symbol shutil.copy2(os.path.join(curr_path, 'symbols', config.symbol + '.py'), final_output_path) sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=True) feat_sym = sym.get_internals()['rpn_cls_score_output'] # setup multi-gpu batch_size = len(ctx) input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size # print config pprint.pprint(config) logger.info('training config:{}\n'.format(pprint.pformat(config))) git_commit_id = commands.getoutput('git rev-parse HEAD') print("Git commit id:", git_commit_id) logger.info('Git commit id: {}'.format(git_commit_id)) # load dataset and prepare imdb for training image_sets = [iset for iset in config.dataset.image_set.split('+')] roidbs = [ load_gt_roidb(config.dataset.dataset, image_set, config.dataset.root_path, config.dataset.dataset_path, motion_iou_path=config.dataset.motion_iou_path, flip=config.TRAIN.FLIP, use_philly=args.usePhilly) for image_set in image_sets ] roidb = merge_roidb(roidbs) roidb = filter_roidb(roidb, config) # load training data train_data = AnchorLoader(feat_sym, roidb, config, batch_size=input_batch_size, shuffle=config.TRAIN.SHUFFLE, ctx=ctx, feat_stride=config.network.RPN_FEAT_STRIDE, anchor_scales=config.network.ANCHOR_SCALES, anchor_ratios=config.network.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING, normalize_target=config.network.NORMALIZE_RPN, bbox_mean=config.network.ANCHOR_MEANS, bbox_std=config.network.ANCHOR_STDS) # infer max shape max_data_shape = [('data', (config.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape) max_data_shape.append(('gt_boxes', (config.TRAIN.BATCH_IMAGES, 100, 5))) print('providing maximum shape', max_data_shape, max_label_shape) data_shape_dict = dict(train_data.provide_data_single + train_data.provide_label_single) pprint.pprint(data_shape_dict) sym_instance.infer_shape(data_shape_dict) # create solver fixed_param_prefix = config.network.FIXED_PARAMS data_names = [k[0] for k in train_data.provide_data_single] label_names = [k[0] for k in train_data.provide_label_single] mod = MutableModule( sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, max_data_shapes=[max_data_shape for _ in range(batch_size)], max_label_shapes=[max_label_shape for _ in range(batch_size)], fixed_param_prefix=fixed_param_prefix) # load and initialize params params_loaded = False if config.TRAIN.RESUME: arg_params, aux_params = load_param(prefix, begin_epoch, convert=True) mod._preload_opt_states = '%s-%04d.states' % (prefix, begin_epoch) print('continue training from ', begin_epoch) logger.info('continue training from ', begin_epoch) params_loaded = True elif config.TRAIN.AUTO_RESUME: for cur_epoch in range(end_epoch - 1, begin_epoch, -1): params_filename = '{}-{:04d}.params'.format(prefix, cur_epoch) states_filename = '{}-{:04d}.states'.format(prefix, cur_epoch) if os.path.exists(params_filename) and os.path.exists( states_filename): begin_epoch = cur_epoch arg_params, aux_params = load_param(prefix, cur_epoch, convert=True) mod._preload_opt_states = states_filename print('auto continue training from {}, {}'.format( params_filename, states_filename)) logger.info('auto continue training from {}, {}'.format( params_filename, states_filename)) params_loaded = True break if not params_loaded: arg_params, aux_params = load_param(os.path.join( pretrained_dir, pretrained_resnet), epoch, convert=True) sym_instance.init_weight(config, arg_params, aux_params) # check parameter shapes sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict) # decide training params # metric eval_metric = metric.RCNNAccMetric(config) cls_metric = metric.RCNNLogLossMetric(config) bbox_metric = metric.RCNNL1LossMetric(config) eval_metrics = mx.metric.CompositeEvalMetric() for child_metric in [eval_metric, cls_metric, bbox_metric]: eval_metrics.add(child_metric) if config.TRAIN.JOINT_TRAINING or (not config.TRAIN.LEARN_NMS): rpn_eval_metric = metric.RPNAccMetric() rpn_cls_metric = metric.RPNLogLossMetric() rpn_bbox_metric = metric.RPNL1LossMetric() for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric]: eval_metrics.add(child_metric) if config.TRAIN.LEARN_NMS: eval_metrics.add(metric.NMSLossMetric(config, 'pos')) eval_metrics.add(metric.NMSLossMetric(config, 'neg')) eval_metrics.add(metric.NMSAccMetric(config)) # callback batch_end_callback = [ callback.Speedometer(train_data.batch_size, frequent=args.frequent) ] if config.USE_PHILLY: total_iter = (end_epoch - begin_epoch) * len(roidb) / input_batch_size progress_frequent = min(args.frequent * 10, 100) batch_end_callback.append( callback.PhillyProgressCallback(total_iter, progress_frequent)) means = np.tile(np.array(config.TRAIN.BBOX_MEANS), 2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES) stds = np.tile(np.array(config.TRAIN.BBOX_STDS), 2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES) epoch_end_callback = [ mx.callback.module_checkpoint(mod, prefix, period=1, save_optimizer_states=True), callback.do_checkpoint(prefix, means, stds) ] # decide learning rate # base_lr = lr * len(ctx) * config.TRAIN.BATCH_IMAGES base_lr = lr lr_factor = config.TRAIN.lr_factor lr_epoch = [float(epoch) for epoch in lr_step.split(',')] lr_epoch_diff = [ epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch ] lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff))) lr_iters = [ int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff ] print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters) lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor, config.TRAIN.warmup, config.TRAIN.warmup_lr, config.TRAIN.warmup_step) # optimizer optimizer_params = { 'momentum': config.TRAIN.momentum, 'wd': config.TRAIN.wd, 'learning_rate': lr, 'lr_scheduler': lr_scheduler, 'rescale_grad': 1.0, 'clip_gradient': None } if not isinstance(train_data, PrefetchingIter): train_data = PrefetchingIter(train_data) # train mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=config.default.kvstore, optimizer='sgd', optimizer_params=optimizer_params, arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr, lr_step): logger, final_output_path = create_logger(config.output_path, args.cfg, config.dataset.image_set) prefix = os.path.join(final_output_path, prefix) # load symbol shutil.copy2(os.path.join(curr_path, 'symbols', config.symbol + '.py'), final_output_path) sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=True) feat_sym = sym.get_internals()['rpn_cls_score_output'] # setup multi-gpu batch_size = len(ctx) input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size # print config pprint.pprint(config) logger.info('training config:{}\n'.format(pprint.pformat(config))) # load dataset and prepare imdb for training image_sets = [iset for iset in config.dataset.image_set.split('+')] roidbs = [ load_gt_roidb(config.dataset.dataset, image_set, config.dataset.root_path, config.dataset.dataset_path, flip=config.TRAIN.FLIP) for image_set in image_sets ] roidb = merge_roidb(roidbs) roidb = filter_roidb(roidb, config) # load training data train_data = AnchorLoader(feat_sym, roidb, config, batch_size=input_batch_size, shuffle=config.TRAIN.SHUFFLE, ctx=ctx, feat_stride=config.network.RPN_FEAT_STRIDE, anchor_scales=config.network.ANCHOR_SCALES, anchor_ratios=config.network.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING) # infer max shape max_data_shape = [('data', (config.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape) max_data_shape.append(('gt_boxes', (config.TRAIN.BATCH_IMAGES, 100, 5))) print('providing maximum shape', max_data_shape, max_label_shape) data_shape_dict = dict(train_data.provide_data_single + train_data.provide_label_single) pprint.pprint(data_shape_dict) sym_instance.infer_shape(data_shape_dict) # load and initialize params #if config.TRAIN.RESUME: # print('continue training from ', begin_epoch) # arg_params, aux_params = load_param(prefix, begin_epoch, convert=True) #else: # arg_params, aux_params = load_param(pretrained, epoch, convert=True) # sym_instance.init_weight(config, arg_params, aux_params) print('transfer learning...') # Choose the initialization weights (COCO or UADETRAC or pretrained) #arg_params, aux_params = load_param('/raid10/home_ext/Deformable-ConvNets/output/rfcn_dcn_Shuo_UADTRAC/resnet_v1_101_voc0712_rfcn_dcn_Shuo_UADETRAC/trainlist_full/rfcn_UADTRAC', 5, convert=True) #arg_params, aux_params = load_param('/raid10/home_ext/Deformable-ConvNets/model/rfcn_dcn_coco', 0, convert=True) arg_params, aux_params = load_param( '/raid10/home_ext/Deformable-ConvNets/output/rfcn_dcn_Shuo_AICity/resnet_v1_101_voc0712_rfcn_dcn_Shuo_AICityVOC1080_FreezeCOCO_rpnOnly_all/1080_all/rfcn_AICityVOC1080_FreezeCOCO_rpnOnly_all', 4, convert=True) sym_instance.init_weight_Shuo(config, arg_params, aux_params) # check parameter shapes sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict) # create solver fixed_param_prefix = config.network.FIXED_PARAMS data_names = [k[0] for k in train_data.provide_data_single] label_names = [k[0] for k in train_data.provide_label_single] mod = MutableModule( sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, max_data_shapes=[max_data_shape for _ in range(batch_size)], max_label_shapes=[max_label_shape for _ in range(batch_size)], fixed_param_prefix=fixed_param_prefix) #freeze parameters using fixed_param_names:list of str para_file = open( '/raid10/home_ext/Deformable-ConvNets/rfcn/symbols/arg_params.txt') para_list = [line.split('<')[0] for line in para_file.readlines()] # para_list.remove('rfcn_cls_weight') # para_list.remove('rfcn_cls_bias') # para_list.remove('rfcn_cls_offset_t_weight') # para_list.remove('rfcn_cls_offset_t_bias') # para_list.remove('res5a_branch2b_offset_weight') para_list.remove('res5a_branch2b_offset_bias') para_list.remove('res5b_branch2b_offset_weight') para_list.remove('res5b_branch2b_offset_bias') para_list.remove('res5c_branch2b_offset_weight') para_list.remove('res5c_branch2b_offset_bias') para_list.remove('conv_new_1_weight') para_list.remove('conv_new_1_bias') para_list.remove('rfcn_bbox_weight') para_list.remove('rfcn_bbox_bias') para_list.remove('rfcn_bbox_offset_t_weight') para_list.remove('rfcn_bbox_offset_t_bias') mod = MutableModule_Shuo( sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, max_data_shapes=[max_data_shape for _ in range(batch_size)], max_label_shapes=[max_label_shape for _ in range(batch_size)], fixed_param_prefix=fixed_param_prefix, fixed_param_names=para_list) if config.TRAIN.RESUME: mod._preload_opt_states = '%s-%04d.states' % (prefix, begin_epoch) # decide training params # metric rpn_eval_metric = metric.RPNAccMetric() rpn_cls_metric = metric.RPNLogLossMetric() rpn_bbox_metric = metric.RPNL1LossMetric() eval_metric = metric.RCNNAccMetric(config) cls_metric = metric.RCNNLogLossMetric(config) bbox_metric = metric.RCNNL1LossMetric(config) eval_metrics = mx.metric.CompositeEvalMetric() # rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric for child_metric in [ rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric ]: eval_metrics.add(child_metric) # callback batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=args.frequent) means = np.tile(np.array(config.TRAIN.BBOX_MEANS), 2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES) stds = np.tile(np.array(config.TRAIN.BBOX_STDS), 2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES) epoch_end_callback = [ mx.callback.module_checkpoint(mod, prefix, period=1, save_optimizer_states=True), callback.do_checkpoint(prefix, means, stds) ] # decide learning rate base_lr = lr lr_factor = config.TRAIN.lr_factor lr_epoch = [float(epoch) for epoch in lr_step.split(',')] lr_epoch_diff = [ epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch ] lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff))) lr_iters = [ int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff ] print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters) lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor, config.TRAIN.warmup, config.TRAIN.warmup_lr, config.TRAIN.warmup_step) # optimizer optimizer_params = { 'momentum': config.TRAIN.momentum, 'wd': config.TRAIN.wd, 'learning_rate': lr, 'lr_scheduler': lr_scheduler, 'rescale_grad': 1.0, 'clip_gradient': None } if not isinstance(train_data, PrefetchingIter): train_data = PrefetchingIter(train_data) # train mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=config.default.kvstore, optimizer='sgd', optimizer_params=optimizer_params, arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
def end2end_train(image_set, test_image_set, year, root_path, devkit_path, pretrained, epoch, prefix, ctx, begin_epoch, num_epoch, frequent, kv_store, mom, wd, lr, num_classes, monitor, work_load_list=None, resume=False, use_flip=True, factor_step=50000): # set up logger logger = logging.getLogger() logger.setLevel(logging.INFO) mon = None config.TRAIN.BG_THRESH_HI = 0.5 # TODO(verify) config.TRAIN.BG_THRESH_LO = 0.0 # TODO(verify) config.TRAIN.RPN_MIN_SIZE = 16 logging.info('########## TRAIN FASTER-RCNN WITH APPROXIMATE JOINT END2END #############') config.TRAIN.HAS_RPN = True config.END2END = 1 config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED = True sym = get_faster_rcnn(num_classes=num_classes) feat_sym = sym.get_internals()['rpn_cls_score_output'] # setup multi-gpu config.TRAIN.IMS_PER_BATCH *= len(ctx) config.TRAIN.BATCH_SIZE *= len(ctx) # no used here # infer max shape max_data_shape = [('data', (config.TRAIN.IMS_PER_BATCH, 3, 1000, 1000))] max_data_shape_dict = {k: v for k, v in max_data_shape} _, feat_shape, _ = feat_sym.infer_shape(**max_data_shape_dict) from rcnn.minibatch import assign_anchor import numpy as np label = assign_anchor(feat_shape[0], np.zeros((0, 5)), [[1000, 1000, 1.0]]) max_label_shape = [('label', label['label'].shape), ('bbox_target', label['bbox_target'].shape), ('bbox_inside_weight', label['bbox_inside_weight'].shape), ('bbox_outside_weight', label['bbox_outside_weight'].shape), ('gt_boxes', (config.TRAIN.IMS_PER_BATCH, 5*100))] # assume at most 100 object in image print 'providing maximum shape', max_data_shape, max_label_shape # load training data voc, roidb = load_gt_roidb(image_set, year, root_path, devkit_path, flip=use_flip) train_data = AnchorLoader(feat_sym, roidb, batch_size=config.TRAIN.IMS_PER_BATCH, shuffle=True, mode='train', ctx=ctx, work_load_list=work_load_list) # load pretrained args, auxs, _ = load_param(pretrained, epoch, convert=True) # initialize params if not resume: del args['fc8_weight'] del args['fc8_bias'] input_shapes = {k: (1,)+ v[1::] for k, v in train_data.provide_data + train_data.provide_label} arg_shape, _, _ = sym.infer_shape(**input_shapes) arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) args['rpn_conv_3x3_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_conv_3x3_weight']) args['rpn_conv_3x3_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_conv_3x3_bias']) args['rpn_cls_score_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_cls_score_weight']) args['rpn_cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_cls_score_bias']) args['rpn_bbox_pred_weight'] = mx.random.normal(0, 0.001, shape=arg_shape_dict['rpn_bbox_pred_weight']) # guarantee not likely explode with bbox_delta args['rpn_bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_bbox_pred_bias']) args['cls_score_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['cls_score_weight']) args['cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['cls_score_bias']) args['bbox_pred_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['bbox_pred_weight']) args['bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['bbox_pred_bias']) # prepare training if config.TRAIN.FINETUNE: fixed_param_prefix = ['conv1', 'conv2', 'conv3', 'conv4', 'conv5'] else: fixed_param_prefix = ['conv1', 'conv2'] data_names = [k[0] for k in train_data.provide_data] label_names = [k[0] for k in train_data.provide_label] batch_end_callback = Speedometer(train_data.batch_size, frequent=frequent) epoch_end_callback = do_checkpoint(prefix) rpn_eval_metric = AccuracyMetric(use_ignore=True, ignore=-1, ex_rpn=True) rpn_cls_metric = LogLossMetric(use_ignore=True, ignore=-1, ex_rpn=True) rpn_bbox_metric = SmoothL1LossMetric(ex_rpn=True) eval_metric = AccuracyMetric() cls_metric = LogLossMetric() bbox_metric = SmoothL1LossMetric() eval_metrics = mx.metric.CompositeEvalMetric() for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric]: eval_metrics.add(child_metric) optimizer_params = {'momentum': mom, 'wd': wd, 'learning_rate': lr, 'lr_scheduler': mx.lr_scheduler.FactorScheduler(factor_step, 0.1), 'clip_gradient': 1.0, 'rescale_grad': 1.0 } # 'rescale_grad': (1.0 / config.TRAIN.RPN_BATCH_SIZE)} # train mod = MutableModule(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, work_load_list=work_load_list, max_data_shapes=max_data_shape, max_label_shapes=max_label_shape, fixed_param_prefix=fixed_param_prefix) if monitor: def norm_stat(d): return mx.nd.norm(d)/np.sqrt(d.size) mon = mx.mon.Monitor(100, norm_stat) mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=kv_store, optimizer='sgd', optimizer_params=optimizer_params, monitor=mon, arg_params=args, aux_params=auxs, begin_epoch=begin_epoch, num_epoch=num_epoch)
def train_rpn(cfg, dataset, image_set, root_path, dataset_path, frequent, kvstore, flip, shuffle, resume, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, train_shared, lr, lr_step, logger=None, output_path=None): # set up logger if not logger: logging.basicConfig() logger = logging.getLogger() logger.setLevel(logging.INFO) # set up config cfg.TRAIN.BATCH_IMAGES = cfg.TRAIN.ALTERNATE.RPN_BATCH_IMAGES # load symbol sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() sym = sym_instance.get_symbol_rpn(cfg, is_train=True) feat_sym = sym.get_internals()['rpn_cls_score_output'] # setup multi-gpu batch_size = len(ctx) input_batch_size = cfg.TRAIN.BATCH_IMAGES * batch_size # print cfg pprint.pprint(cfg) logger.info('training rpn cfg:{}\n'.format(pprint.pformat(cfg))) # load dataset and prepare imdb for training image_sets = [iset for iset in image_set.split('+')] roidbs = [load_gt_roidb(dataset, image_set, root_path, dataset_path, result_path=output_path, flip=flip) for image_set in image_sets] roidb = merge_roidb(roidbs) roidb = filter_roidb(roidb, cfg) # load training data train_data = AnchorLoader(feat_sym, roidb, cfg, batch_size=input_batch_size, shuffle=shuffle, ctx=ctx, feat_stride=cfg.network.RPN_FEAT_STRIDE, anchor_scales=cfg.network.ANCHOR_SCALES, anchor_ratios=cfg.network.ANCHOR_RATIOS, aspect_grouping=cfg.TRAIN.ASPECT_GROUPING) # infer max shape max_data_shape = [('data', (cfg.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES])))] max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape) print('providing maximum shape', max_data_shape, max_label_shape) # infer shape data_shape_dict = dict(train_data.provide_data_single + train_data.provide_label_single) sym_instance.infer_shape(data_shape_dict) # load and initialize params if resume: print('continue training from ', begin_epoch) arg_params, aux_params = load_param(prefix, begin_epoch, convert=True) else: arg_params, aux_params = load_param(pretrained, epoch, convert=True) sym_instance.init_weight_rpn(cfg, arg_params, aux_params) # check parameter shapes sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict) # create solver data_names = [k[0] for k in train_data.provide_data_single] label_names = [k[0] for k in train_data.provide_label_single] if train_shared: fixed_param_prefix = cfg.network.FIXED_PARAMS_SHARED else: fixed_param_prefix = cfg.network.FIXED_PARAMS mod = MutableModule(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, max_data_shapes=[max_data_shape for _ in xrange(batch_size)], max_label_shapes=[max_label_shape for _ in xrange(batch_size)], fixed_param_prefix=fixed_param_prefix) # decide training params # metric eval_metric = metric.RPNAccMetric() cls_metric = metric.RPNLogLossMetric() bbox_metric = metric.RPNL1LossMetric() eval_metrics = mx.metric.CompositeEvalMetric() for child_metric in [eval_metric, cls_metric, bbox_metric]: eval_metrics.add(child_metric) # callback batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=frequent) # epoch_end_callback = mx.callback.do_checkpoint(prefix) epoch_end_callback = mx.callback.module_checkpoint(mod, prefix, period=1, save_optimizer_states=True) # decide learning rate base_lr = lr lr_factor = cfg.TRAIN.lr_factor lr_epoch = [int(epoch) for epoch in lr_step.split(',')] lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch] lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff))) lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff] print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters) lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor, cfg.TRAIN.warmup, cfg.TRAIN.warmup_lr, cfg.TRAIN.warmup_step) # optimizer optimizer_params = {'momentum': cfg.TRAIN.momentum, 'wd': cfg.TRAIN.wd, 'learning_rate': lr, 'lr_scheduler': lr_scheduler, 'rescale_grad': 1.0, 'clip_gradient': None} if not isinstance(train_data, PrefetchingIter): train_data = PrefetchingIter(train_data) # train mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=kvstore, optimizer='sgd', optimizer_params=optimizer_params, arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
def train_net(args, ctx, pretrained, pretrained_flow, epoch, prefix, begin_epoch, end_epoch, lr, lr_step): logger, final_output_path = create_logger(config.output_path, args.cfg, config.dataset.image_set) prefix = os.path.join(final_output_path, prefix) # load symbol shutil.copy2(os.path.join(curr_path, 'symbols', config.symbol + '.py'), final_output_path) sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_train_symbol(config) feat_sym = sym.get_internals()['rpn_cls_score_output'] feat_conv_3x3_relu = sym.get_internals()['feat_conv_3x3_relu_output'] # setup multi-gpu batch_size = len(ctx) input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size # print config pprint.pprint(config) logger.info('training config:{}\n'.format(pprint.pformat(config))) # load dataset and prepare imdb for training image_sets = [iset for iset in config.dataset.image_set.split('+')] roidbs = [ load_gt_roidb(config.dataset.dataset, image_set, config.dataset.root_path, config.dataset.dataset_path, flip=config.TRAIN.FLIP) for image_set in image_sets ] roidb = merge_roidb(roidbs) roidb = filter_roidb(roidb, config) # load training data train_data = AnchorLoader(feat_sym, feat_conv_3x3_relu, roidb, config, batch_size=input_batch_size, shuffle=config.TRAIN.SHUFFLE, ctx=ctx, feat_stride=config.network.RPN_FEAT_STRIDE, anchor_scales=config.network.ANCHOR_SCALES, anchor_ratios=config.network.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING, normalize_target=config.network.NORMALIZE_RPN, bbox_mean=config.network.ANCHOR_MEANS, bbox_std=config.network.ANCHOR_STDS) # infer max shape #max_data_shape = [('data', (config.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES]))), # ('data_ref', (config.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES]))), # ('eq_flag', (1,))] data_shape1 = { 'data_ref': (config.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])), } _, feat_shape111, _ = feat_conv_3x3_relu.infer_shape(**data_shape1) max_data_shape = [('data_ref', (config.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES]))), ('eq_flag', (1, )), ('motion_vector', (config.TRAIN.BATCH_IMAGES, 2, int(feat_shape111[0][2]), int(feat_shape111[0][3])))] max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape) max_data_shape.append(('gt_boxes', (config.TRAIN.BATCH_IMAGES, 100, 5))) print 'providing maximum shape', max_data_shape, max_label_shape data_shape_dict = dict(train_data.provide_data_single + train_data.provide_label_single) pprint.pprint(data_shape_dict) sym_instance.infer_shape(data_shape_dict) # load and initialize params if config.TRAIN.RESUME: print('continue training from ', begin_epoch) arg_params, aux_params = load_param(prefix, begin_epoch, convert=True) else: arg_params, aux_params = load_param(pretrained, epoch, convert=True) #arg_params_flow, aux_params_flow = load_param(pretrained_flow, epoch, convert=True) #arg_params.update(arg_params_flow) #aux_params.update(aux_params_flow) sym_instance.init_weight(config, arg_params, aux_params) # check parameter shapes sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict) # create solver fixed_param_prefix = config.network.FIXED_PARAMS data_names = [k[0] for k in train_data.provide_data_single] label_names = [k[0] for k in train_data.provide_label_single] mod = MutableModule( sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, max_data_shapes=[max_data_shape for _ in range(batch_size)], max_label_shapes=[max_label_shape for _ in range(batch_size)], fixed_param_prefix=fixed_param_prefix) if config.TRAIN.RESUME: mod._preload_opt_states = '%s-%04d.states' % (prefix, begin_epoch) # decide training params # metric rpn_eval_metric = metric.RPNAccMetric() rpn_cls_metric = metric.RPNLogLossMetric() rpn_bbox_metric = metric.RPNL1LossMetric() eval_metric = metric.RCNNAccMetric(config) cls_metric = metric.RCNNLogLossMetric(config) bbox_metric = metric.RCNNL1LossMetric(config) eval_metrics = mx.metric.CompositeEvalMetric() # rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric for child_metric in [ rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric ]: eval_metrics.add(child_metric) # callback batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=args.frequent) means = np.tile(np.array(config.TRAIN.BBOX_MEANS), 2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES) stds = np.tile(np.array(config.TRAIN.BBOX_STDS), 2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES) epoch_end_callback = [ mx.callback.module_checkpoint(mod, prefix, period=1, save_optimizer_states=True), callback.do_checkpoint(prefix, means, stds) ] # decide learning rate base_lr = lr lr_factor = config.TRAIN.lr_factor lr_epoch = [float(epoch) for epoch in lr_step.split(',')] lr_epoch_diff = [ epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch ] lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff))) lr_iters = [ int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff ] print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters) lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor, config.TRAIN.warmup, config.TRAIN.warmup_lr, config.TRAIN.warmup_step) # optimizer optimizer_params = { 'momentum': config.TRAIN.momentum, 'wd': config.TRAIN.wd, 'learning_rate': lr, 'lr_scheduler': lr_scheduler, 'rescale_grad': 1.0, 'clip_gradient': None } if not isinstance(train_data, PrefetchingIter): train_data = PrefetchingIter(train_data) print('Start to train model') # train mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=config.default.kvstore, optimizer='sgd', optimizer_params=optimizer_params, arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
def train_rpn(image_set, year, root_path, devkit_path, pretrained, epoch, prefix, ctx, begin_epoch, end_epoch, frequent, kv_store, work_load_list=None, resume=False): # set up logger logger = logging.getLogger() logger.setLevel(logging.INFO) # load symbol sym = get_vgg_rpn() feat_sym = get_vgg_rpn().get_internals()['rpn_cls_score_output'] # setup multi-gpu config.TRAIN.BATCH_IMAGES *= len(ctx) config.TRAIN.BATCH_SIZE *= len(ctx) # load training data voc, roidb = load_gt_roidb(image_set, year, root_path, devkit_path, flip=True) train_data = AnchorLoader(feat_sym, roidb, batch_size=config.TRAIN.BATCH_SIZE, shuffle=True, mode='train', ctx=ctx, work_load_list=work_load_list) # infer max shape max_data_shape = [('data', (config.TRAIN.BATCH_SIZE, 3, 1000, 1000))] max_data_shape_dict = {k: v for k, v in max_data_shape} _, feat_shape, _ = feat_sym.infer_shape(**max_data_shape_dict) from rcnn.minibatch import assign_anchor import numpy as np label = assign_anchor(feat_shape[0], np.zeros((0, 5)), [[1000, 1000, 1.0]]) max_label_shape = [ ('label', label['label'].shape), ('bbox_target', label['bbox_target'].shape), ('bbox_inside_weight', label['bbox_inside_weight'].shape), ('bbox_outside_weight', label['bbox_outside_weight'].shape) ] print 'providing maximum shape', max_data_shape, max_label_shape # load pretrained args, auxs = load_param(pretrained, epoch, convert=True) # initialize params if not resume: input_shapes = { k: v for k, v in train_data.provide_data + train_data.provide_label } arg_shape, _, _ = sym.infer_shape(**input_shapes) arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) args['rpn_conv_3x3_weight'] = mx.random.normal( 0, 0.01, shape=arg_shape_dict['rpn_conv_3x3_weight']) args['rpn_conv_3x3_bias'] = mx.nd.zeros( shape=arg_shape_dict['rpn_conv_3x3_bias']) args['rpn_cls_score_weight'] = mx.random.normal( 0, 0.01, shape=arg_shape_dict['rpn_cls_score_weight']) args['rpn_cls_score_bias'] = mx.nd.zeros( shape=arg_shape_dict['rpn_cls_score_bias']) args['rpn_bbox_pred_weight'] = mx.random.normal( 0, 0.01, shape=arg_shape_dict['rpn_bbox_pred_weight']) args['rpn_bbox_pred_bias'] = mx.nd.zeros( shape=arg_shape_dict['rpn_bbox_pred_bias']) # prepare training if config.TRAIN.FINETUNE: fixed_param_prefix = ['conv1', 'conv2', 'conv3', 'conv4', 'conv5'] else: fixed_param_prefix = ['conv1', 'conv2'] data_names = [k[0] for k in train_data.provide_data] label_names = [k[0] for k in train_data.provide_label] batch_end_callback = Speedometer(train_data.batch_size, frequent=frequent) epoch_end_callback = mx.callback.do_checkpoint(prefix) if config.TRAIN.HAS_RPN is True: eval_metric = AccuracyMetric(use_ignore=True, ignore=-1) cls_metric = LogLossMetric(use_ignore=True, ignore=-1) else: eval_metric = AccuracyMetric() cls_metric = LogLossMetric() bbox_metric = SmoothL1LossMetric() eval_metrics = mx.metric.CompositeEvalMetric() for child_metric in [eval_metric, cls_metric, bbox_metric]: eval_metrics.add(child_metric) optimizer_params = { 'momentum': 0.9, 'wd': 0.0005, 'learning_rate': 0.001, 'lr_scheduler': mx.lr_scheduler.FactorScheduler(60000, 0.1), 'rescale_grad': (1.0 / config.TRAIN.BATCH_SIZE) } # train mod = MutableModule(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, work_load_list=work_load_list, max_data_shapes=max_data_shape, max_label_shapes=max_label_shape, fixed_param_prefix=fixed_param_prefix) mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=kv_store, optimizer='sgd', optimizer_params=optimizer_params, arg_params=args, aux_params=auxs, begin_epoch=begin_epoch, num_epoch=end_epoch)