Ejemplo n.º 1
0
def test_rcnn(imageset, year, root_path, devkit_path, prefix, epoch, ctx, vis=False, has_rpn=True, proposal='rpn',
              end2end=False):
    # load symbol and testing data
    if has_rpn:
        # sym = get_vgg_test()
        config.TRAIN.AGNOSTIC = True
        config.END2END = 1
        config.PIXEL_MEANS = np.array([[[0,0,0]]])
        sym = resnext_101(num_class=21)
        config.TEST.HAS_RPN = True
        config.TEST.RPN_PRE_NMS_TOP_N = 6000
        config.TEST.RPN_POST_NMS_TOP_N = 300
        voc, roidb = load_gt_roidb(imageset, year, root_path, devkit_path)
    else:
        sym = get_vgg_rcnn_test()
        voc, roidb = eval('load_test_' + proposal + '_roidb')(imageset, year, root_path, devkit_path)

    # get test data iter
    test_data = ROIIter(roidb, batch_size=1, shuffle=False, mode='test')

    # load model
    args, auxs, _ = load_param(prefix, epoch, convert=True, ctx=ctx)

    # detect
    detector = Detector(sym, ctx, args, auxs)
    pred_eval(detector, test_data, voc, vis=vis)
Ejemplo n.º 2
0
def test_rcnn(imageset,
              year,
              root_path,
              devkit_path,
              prefix,
              epoch,
              ctx,
              vis=False,
              has_rpn=True,
              proposal='rpn'):
    # load symbol and testing data
    if has_rpn:
        sym = get_vgg_test()
        config.TEST.HAS_RPN = True
        config.TEST.RPN_PRE_NMS_TOP_N = 6000
        config.TEST.RPN_POST_NMS_TOP_N = 300
        voc, roidb = load_gt_roidb(imageset, year, root_path, devkit_path)
    else:
        sym = get_vgg_rcnn_test()
        voc, roidb = eval('load_test_' + proposal + '_roidb')(imageset, year,
                                                              root_path,
                                                              devkit_path)

    # get test data iter
    test_data = ROIIter(roidb, batch_size=1, shuffle=False, mode='test')

    # load model
    args, auxs, _ = load_param(prefix, epoch, convert=True, ctx=ctx)

    # detect
    detector = Detector(sym, ctx, args, auxs)
    pred_eval(detector, test_data, voc, vis=vis)
Ejemplo n.º 3
0
def main():
    logging.info('########## TRAIN FASTER-RCNN WITH APPROXIMATE JOINT END2END #############')
    init_config()
    if "resnet" in args.pretrained:
        sym = resnet_50(num_class=args.num_classes, bn_mom=args.bn_mom, bn_global=True, is_train=True)  # consider background
    else:
        sym = get_faster_rcnn(num_classes=args.num_classes)  # consider background

    feat_sym = sym.get_internals()['rpn_cls_score_output']
    # setup for multi-gpu
    ctx = [mx.gpu(int(i)) for i in args.gpu_ids.split(',')]
    config.TRAIN.IMS_PER_BATCH *= len(ctx)
    max_data_shape, max_label_shape = get_max_shape(feat_sym)

    # data
    # voc, roidb = load_gt_roidb_from_list(args.dataset_name, args.lst, args.dataset_root,
    #                                      args.outdata_path, flip=not args.no_flip)
    voc, roidb = load_gt_roidb(args.image_set, args.year, args.root_path, args.devkit_path, flip=not args.no_flip)
    train_data = AnchorLoader(feat_sym, roidb, batch_size=config.TRAIN.IMS_PER_BATCH, anchor_scales=(4, 8, 16, 32),
                              shuffle=not args.no_shuffle, mode='train', ctx=ctx, need_mean=args.need_mean)
    # model
    args_params, auxs_params, _ = load_param(args.pretrained, args.load_epoch, convert=True)
    if not args.resume:
        args_params, auxs_params= init_model(args_params, auxs_params, train_data, sym, args.pretrained)
    data_names = [k[0] for k in train_data.provide_data]
    label_names = [k[0] for k in train_data.provide_label]
    batch_end_callback = Speedometer(train_data.batch_size, frequent=args.frequent)
    epoch_end_callback = do_checkpoint(args.prefix)

    optimizer_params = {'momentum':         args.mom,
                        'wd':               args.wd,
                        'learning_rate':    args.lr,
                        # 'lr_scheduler':     WarmupScheduler(args.factor_step, 0.1, warmup_lr=0.1*args.lr, warmup_step=200) \
                        #                     if not args.resume else mx.lr_scheduler.FactorScheduler(args.factor_step, 0.1),
                        'lr_scheduler':     mx.lr_scheduler.FactorScheduler(args.factor_step, 0.1), # seems no need warm up
                        'clip_gradient':    1.0,
                        'rescale_grad':     1.0}

    if "resnet" in args.pretrained:
        # only consider resnet-50 here
        fixed_param_prefix = ['conv0', 'stage1', 'stage2', 'bn_data', 'bn0']
    else:
        fixed_param_prefix = ['conv1', 'conv2', 'conv3']
    # train
    mod = MutableModule(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx,
                        max_data_shapes=max_data_shape, max_label_shapes=max_label_shape,
                        fixed_param_prefix=fixed_param_prefix)
    mod.fit(train_data, eval_metric=metric(), epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback, kvstore=args.kv_store,
            optimizer='sgd', optimizer_params=optimizer_params, arg_params=args_params, aux_params=auxs_params,
            begin_epoch=args.load_epoch, num_epoch=args.num_epoch)
Ejemplo n.º 4
0
def train_net(image_set, year, root_path, devkit_path, pretrained, epoch,
              prefix, ctx, begin_epoch, end_epoch, frequent, kv_store, work_load_list=None, resume=False):
    # set up logger
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)

    # load symbol
    sym = get_vgg_rpn()
    feat_sym = get_vgg_rpn().get_internals()['rpn_cls_score_output']

    # setup multi-gpu
    config.TRAIN.BATCH_IMAGES *= len(ctx)
    config.TRAIN.BATCH_SIZE *= len(ctx)

    # load training data
    voc, roidb = load_gt_roidb(image_set, year, root_path, devkit_path, flip=True)
    train_data = AnchorLoader(feat_sym, roidb, batch_size=config.TRAIN.BATCH_SIZE, shuffle=True, mode='train',
                              ctx=ctx, work_load_list=work_load_list)

    # infer max shape
    max_data_shape = [('data', (1, 3, 1000, 1000))]
    max_data_shape_dict = {k: v for k, v in max_data_shape}
    _, feat_shape, _ = feat_sym.infer_shape(**max_data_shape_dict)
    from rcnn.minibatch import assign_anchor
    import numpy as np
    label = assign_anchor(feat_shape[0], np.zeros((0, 5)), [[1000, 1000, 1.0]])
    max_label_shape = [('label', label['label'].shape),
                       ('bbox_target', label['bbox_target'].shape),
                       ('bbox_inside_weight', label['bbox_inside_weight'].shape),
                       ('bbox_outside_weight', label['bbox_outside_weight'].shape)]
    print 'providing maximum shape', max_data_shape, max_label_shape

    # load pretrained
    args, auxs = load_param(pretrained, epoch, convert=True)

    # initialize params
    if not resume:
        arg_shape, _, _ = sym.infer_shape(data=(1, 3, 224, 224))
        arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))
        args['rpn_conv_3x3_weight'] = mx.random.normal(mean=0, stdvar=0.01, shape=arg_shape_dict['rpn_conv_3x3_weight'])
        args['rpn_conv_3x3_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_conv_3x3_bias'])
        args['rpn_cls_score_weight'] = mx.random.normal(mean=0, stdvar=0.01, shape=arg_shape_dict['rpn_cls_score_weight'])
        args['rpn_cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_cls_score_bias'])
        args['rpn_bbox_pred_weight'] = mx.random.normal(mean=0, stdvar=0.01, shape=arg_shape_dict['rpn_bbox_pred_weight'])
        args['rpn_bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_bbox_pred_bias'])

    # train
    solver = Solver(prefix, sym, ctx, begin_epoch, end_epoch, kv_store, args, auxs, momentum=0.9, wd=0.0005,
                    learning_rate=1e-3, lr_scheduler=mx.lr_scheduler.FactorScheduler(60000, 0.1),
                    mutable_data_shape=True, max_data_shape=max_data_shape, max_label_shape=max_label_shape)
    solver.fit(train_data, frequent=frequent)
Ejemplo n.º 5
0
def test_rpn(image_set, year, root_path, devkit_path, prefix, epoch, ctx, vis=False):
    # load symbol
    sym = get_vgg_rpn_test()

    # load testing data
    voc, roidb = load_gt_roidb(image_set, year, root_path, devkit_path)
    test_data = ROIIter(roidb, batch_size=1, shuffle=False, mode='test')

    # load model
    args, auxs, _ = load_param(prefix, epoch, convert=True, ctx=ctx)

    # start testing
    detector = Detector(sym, ctx, args, auxs)
    imdb_boxes = generate_detections(detector, test_data, voc, vis=vis)
    voc.evaluate_recall(roidb, candidate_boxes=imdb_boxes)
Ejemplo n.º 6
0
def test_rpn(image_set, year, root_path, devkit_path, trained, epoch, ctx):
    from rcnn.rpn.generate import Detector, generate_detections

    # load symbol
    sym = get_vgg_rpn_test()

    # load testing data
    voc, roidb = load_gt_roidb(image_set, year, root_path, devkit_path)
    test_data = ROIIter(roidb, batch_size=1, shuffle=False, mode='test')

    # load trained
    args, auxs = load_param(trained, epoch, convert=True, ctx=ctx[0])

    # start testing
    detector = Detector(sym, ctx[0], args, auxs)
    imdb_boxes = generate_detections(detector, test_data, voc, vis=False)
    voc.evaluate_recall(roidb, candidate_boxes=imdb_boxes)
Ejemplo n.º 7
0
def test_rpn(image_set, year, root_path, devkit_path, trained, epoch, ctx):
    from rcnn.rpn.generate import Detector, generate_detections

    # load symbol
    sym = get_vgg_rpn_test()

    # load testing data
    voc, roidb = load_gt_roidb(image_set, year, root_path, devkit_path)
    test_data = ROIIter(roidb, batch_size=1, shuffle=False, mode="test")

    # load trained
    args, auxs = load_param(trained, epoch, convert=True, ctx=ctx[0])

    # start testing
    detector = Detector(sym, ctx[0], args, auxs)
    imdb_boxes = generate_detections(detector, test_data, voc, vis=False)
    voc.evaluate_recall(roidb, candidate_boxes=imdb_boxes)
Ejemplo n.º 8
0
def train_net(config):
    General, KvstoreParam, RpnParam, RoiParam, BboxParam, DatasetParam, ModelParam, \
    OptimizeParam, TestParam, transform, data_name, label_name, metric_list = config.generate_config(is_train=True)
    pGen = patch_config_as_nothrow(General)
    pKv = patch_config_as_nothrow(KvstoreParam)
    pRpn = patch_config_as_nothrow(RpnParam)
    pRoi = patch_config_as_nothrow(RoiParam)
    pBbox = patch_config_as_nothrow(BboxParam)
    pDataset = patch_config_as_nothrow(DatasetParam)
    pModel = patch_config_as_nothrow(ModelParam)
    pOpt = patch_config_as_nothrow(OptimizeParam)
    pTest = patch_config_as_nothrow(TestParam)

    gpus = pKv.gpus
    if len(gpus) == 0:
        ctx = [mx.cpu()]
    else:
        ctx = [mx.gpu(i) for i in gpus]

    input_batch_size = pKv.batch_image * len(ctx)
    pretrain_prefix = pModel.pretrain.prefix
    pretrain_epoch = pModel.pretrain.epoch
    save_path = os.path.join('experiments', pGen.name)
    model_prefix = os.path.join(save_path, 'checkpoint')
    begin_epoch = pOpt.schedule.begin_epoch
    end_epoch = pOpt.schedule.end_epoch
    lr_steps = pOpt.schedule.lr_steps

    ## load dataset
    if pDataset.Dataset == 'widerface':
        image_set = pDataset.image_set
        roidb = load_gt_roidb(pDataset.Dataset,
                              image_set,
                              root_path='data',
                              dataset_path='data/widerface',
                              flip=True)

    net = pModel.train_network

    if pOpt.schedule.begin_epoch != 0:
        net.load_model(model_prefix, pOpt.schedule.begin_epoch)
    else:
        net.load_model(pretrain_prefix)

    print('hello github!')
Ejemplo n.º 9
0
def test_rpn(image_set, year, root_path, devkit_path, prefix, epoch, ctx, vis):
    # set config
    config.TEST.HAS_RPN = True
    config.TEST.RPN_PRE_NMS_TOP_N = -1
    config.TEST.RPN_POST_NMS_TOP_N = 2000

    # load symbol
    sym = get_vgg_rpn_test()

    # load testing data
    voc, roidb = load_gt_roidb(image_set, year, root_path, devkit_path)
    test_data = ROIIter(roidb, batch_size=1, shuffle=False, mode='test')

    # load model
    args, auxs = load_param(prefix, epoch, convert=True, ctx=ctx)

    # start testing
    detector = Detector(sym, ctx, args, auxs)
    imdb_boxes = generate_detections(detector, test_data, voc, vis=vis)
    voc.evaluate_recall(roidb, candidate_boxes=imdb_boxes)
Ejemplo n.º 10
0
def test_net(imageset, year, root_path, devkit_path, prefix, epoch, ctx, vis):
    # set config
    config.TEST.HAS_RPN = True

    # set up logger
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)

    # load testing data
    voc, roidb = load_gt_roidb(imageset, year, root_path, devkit_path)
    test_data = ROIIter(roidb, batch_size=1, shuffle=False, mode='test')

    # load model
    args, auxs = load_param(prefix, epoch, convert=True, ctx=ctx)

    # load symbol
    sym = get_vgg_test()

    # detect
    detector = Detector(sym, ctx, args, auxs)
    pred_eval(detector, test_data, voc, vis=vis)
Ejemplo n.º 11
0
def test_net(imageset, year, root_path, devkit_path, prefix, epoch, ctx, vis):
    # set config
    config.TEST.HAS_RPN = True

    # set up logger
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)

    # load testing data
    voc, roidb = load_gt_roidb(imageset, year, root_path, devkit_path)
    test_data = ROIIter(roidb, batch_size=1, shuffle=False, mode='test')

    # load model
    args, auxs = load_param(prefix, epoch, convert=True, ctx=ctx)

    # load symbol
    sym = get_vgg_test()

    # detect
    detector = Detector(sym, ctx, args, auxs)
    pred_eval(detector, test_data, voc, vis=vis)
Ejemplo n.º 12
0
def test_rcnn(imageset, year, root_path, devkit_path, prefix, epoch, ctx, vis=False, has_rpn=True, proposal='rpn'):
    # load symbol and testing data
    if has_rpn:
        sym = get_vgg_test()
        config.TEST.HAS_RPN = True
        config.TEST.RPN_PRE_NMS_TOP_N = 6000
        config.TEST.RPN_POST_NMS_TOP_N = 300
        voc, roidb = load_gt_roidb(imageset, year, root_path, devkit_path)
    else:
        sym = get_vgg_rcnn_test()
        voc, roidb = eval('load_test_' + proposal + '_roidb')(imageset, year, root_path, devkit_path)

    # get test data iter
    test_data = ROIIter(roidb, batch_size=1, shuffle=False, mode='test')

    # load model
    args, auxs, _ = load_param(prefix, epoch, convert=True, ctx=ctx)

    # detect
    detector = Detector(sym, ctx, args, auxs)
    pred_eval(detector, test_data, voc, vis=vis)
Ejemplo n.º 13
0
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr,
              lr_step):
    mx.random.seed(3)
    np.random.seed(3)
    logger, final_output_path = create_logger(config.output_path, args.cfg,
                                              config.dataset.image_set)
    prefix = os.path.join(final_output_path, prefix)

    config['final_output_path'] = final_output_path

    # load symbol
    shutil.copy2(os.path.join(curr_path, 'symbols', config.symbol + '.py'),
                 final_output_path)
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=True)

    feat_pyramid_level = np.log2(config.network.RPN_FEAT_STRIDE).astype(int)
    feat_sym = [
        sym.get_internals()['rpn_cls_score_p' + str(x) + '_output']
        for x in feat_pyramid_level
    ]

    # setup multi-gpu
    batch_size = len(ctx)
    input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size

    # print config
    pprint.pprint(config)
    logger.info('training config:{}\n'.format(pprint.pformat(config)))

    #leonid: adding semicolumn ";" support to allow several different datasets to be merged
    datasets = config.dataset.dataset.split(';')
    image_sets = config.dataset.image_set.split(';')
    data_paths = config.dataset.dataset_path.split(';')
    if type(config.dataset.per_category_epoch_max) is str:
        per_category_epoch_max = [
            float(x) for x in config.dataset.per_category_epoch_max.split(';')
        ]
    else:
        per_category_epoch_max = [float(config.dataset.per_category_epoch_max)]
    roidbs = []
    categ_index_offs = 0
    if 'classes_list_fname' not in config.dataset:
        classes_list_fname = ''
    else:
        classes_list_fname = config.dataset.classes_list_fname

    if 'num_ex_per_class' not in config.dataset:
        num_ex_per_class = ''
    else:
        num_ex_per_class = config.dataset.num_ex_per_class

    for iD, dataset in enumerate(datasets):
        # load dataset and prepare imdb for training
        image_sets_cur = [iset for iset in image_sets[iD].split('+')]
        for image_set in image_sets_cur:
            cur_roidb, cur_num_classes = load_gt_roidb(
                dataset,
                image_set,
                config.dataset.root_path,
                data_paths[iD],
                flip=config.TRAIN.FLIP,
                per_category_epoch_max=per_category_epoch_max[iD],
                return_num_classes=True,
                categ_index_offs=categ_index_offs,
                classes_list_fname=classes_list_fname,
                num_ex_per_class=num_ex_per_class)

            roidbs.append(cur_roidb)
        categ_index_offs += cur_num_classes
        # roidbs.extend([
        #     load_gt_roidb(
        #         dataset,
        #         image_set,
        #         config.dataset.root_path,
        #         data_paths[iD],
        #         flip=config.TRAIN.FLIP,
        #         per_category_epoch_max=per_category_epoch_max[iD])
        #     for image_set in image_sets])
    roidb = merge_roidb(roidbs)
    roidb = filter_roidb(roidb, config)

    # load training data

    train_data = PyramidAnchorIterator(
        feat_sym,
        roidb,
        config,
        batch_size=input_batch_size,
        shuffle=config.TRAIN.SHUFFLE,
        ctx=ctx,
        feat_strides=config.network.RPN_FEAT_STRIDE,
        anchor_scales=config.network.ANCHOR_SCALES,
        anchor_ratios=config.network.ANCHOR_RATIOS,
        aspect_grouping=config.TRAIN.ASPECT_GROUPING,
        allowed_border=np.inf)

    # infer max shape
    max_data_shape = [('data', (config.TRAIN.BATCH_IMAGES, 3,
                                max([v[0] for v in config.SCALES]),
                                max([v[1] for v in config.SCALES])))]
    max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
    max_data_shape.append(('gt_boxes', (config.TRAIN.BATCH_IMAGES, 100, 5)))
    print 'providing maximum shape', max_data_shape, max_label_shape

    if not config.network.base_net_lock:
        data_shape_dict = dict(train_data.provide_data_single +
                               train_data.provide_label_single)
    else:
        data_shape_dict = dict(train_data.provide_data_single)
    pprint.pprint(data_shape_dict)
    sym_instance.infer_shape(data_shape_dict)

    # load and initialize params
    if config.TRAIN.RESUME:
        print('continue training from ', begin_epoch)
        arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
    else:
        arg_params, aux_params = load_param(pretrained, epoch, convert=True)
        sym_instance.init_weight(config, arg_params, aux_params)

    if config.TRAIN.LOAD_EMBEDDING:
        import cPickle
        with open(config.TRAIN.EMBEDDING_FNAME, 'rb') as fid:
            model_data = cPickle.load(fid)
        for fcn in ['1', '2', '3']:
            layer = model_data['dense_' + fcn]
            weight = ListList2ndarray(layer[0])
            bias = mx.nd.array(layer[1])
            arg_params['embed_dense_' + fcn + '_weight'] = weight
            arg_params['embed_dense_' + fcn + '_bias'] = bias

    # check parameter shapes
    sym_instance.check_parameter_shapes(arg_params, aux_params,
                                        data_shape_dict)

    # create solver
    fixed_param_prefix = config.network.FIXED_PARAMS
    alt_fixed_param_prefix = config.network.ALT_FIXED_PARAMS
    data_names = [k[0] for k in train_data.provide_data_single]
    if not config.network.base_net_lock:
        label_names = [k[0] for k in train_data.provide_label_single]
    else:
        label_names = []

    mod = MutableModule(
        sym,
        data_names=data_names,
        label_names=label_names,
        logger=logger,
        context=ctx,
        max_data_shapes=[max_data_shape for _ in range(batch_size)],
        max_label_shapes=[max_label_shape for _ in range(batch_size)],
        fixed_param_prefix=fixed_param_prefix,
        alt_fixed_param_prefix=alt_fixed_param_prefix)

    # Leonid: Comment out the following two lines if switching to smaller number of GPUs and resuming training, then after it starts running un-comment back
    # if config.TRAIN.RESUME:
    #     mod._preload_opt_states = '%s-%04d.states'%(prefix, begin_epoch)
    #TODO: release this.
    # decide training params
    # metric
    if not config.network.base_net_lock:
        rpn_eval_metric = metric.RPNAccMetric()
        rpn_cls_metric = metric.RPNLogLossMetric()
        rpn_bbox_metric = metric.RPNL1LossMetric()
    rpn_fg_metric = metric.RPNFGFraction(config)
    eval_metric = metric.RCNNAccMetric(config)
    eval_fg_metric = metric.RCNNFGAccuracy(config)
    cls_metric = metric.RCNNLogLossMetric(config)
    bbox_metric = metric.RCNNL1LossMetric(config)
    eval_metrics = mx.metric.CompositeEvalMetric()

    # rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric
    if not config.network.base_net_lock:
        all_child_metrics = [
            rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, rpn_fg_metric,
            eval_fg_metric, eval_metric, cls_metric, bbox_metric
        ]
    else:
        all_child_metrics = [
            rpn_fg_metric, eval_fg_metric, eval_metric, cls_metric, bbox_metric
        ]
    # all_child_metrics = [rpn_eval_metric, rpn_bbox_metric, rpn_fg_metric, eval_fg_metric, eval_metric, cls_metric, bbox_metric]

    ################################################
    ### added / updated by Leonid to support oneshot
    ################################################
    if config.network.EMBEDDING_DIM != 0:
        if config.network.EMBED_LOSS_ENABLED:
            all_child_metrics += [
                metric.RepresentativesMetric(config, final_output_path)
            ]  # moved from above. JS.
            all_child_metrics += [metric.EmbedMetric(config)]
            if config.network.BG_REPS:
                all_child_metrics += [metric.BGModelMetric(config)]
        if config.network.REPS_CLS_LOSS:
            all_child_metrics += [metric.RepsCLSMetric(config)]
        if config.network.ADDITIONAL_LINEAR_CLS_LOSS:
            all_child_metrics += [metric.RCNNLinLogLossMetric(config)]
        if config.network.VAL_FILTER_REGRESS:
            all_child_metrics += [metric.ValRegMetric(config)]
        if config.network.SCORE_HIST_REGRESS:
            all_child_metrics += [metric.ScoreHistMetric(config)]
    ################################################

    for child_metric in all_child_metrics:
        eval_metrics.add(child_metric)

    # callback
    batch_end_callback = callback.Speedometer(train_data.batch_size,
                                              frequent=args.frequent)
    means = np.tile(np.array(config.TRAIN.BBOX_MEANS),
                    2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES)
    stds = np.tile(np.array(config.TRAIN.BBOX_STDS),
                   2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES)
    epoch_end_callback = [
        mx.callback.module_checkpoint(mod,
                                      prefix,
                                      period=1,
                                      save_optimizer_states=True),
        callback.do_checkpoint(prefix, means, stds)
    ]
    # decide learning rate
    base_lr = lr
    lr_factor = config.TRAIN.lr_factor
    lr_epoch = [float(epoch) for epoch in lr_step.split(',')]
    lr_epoch_diff = [
        epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch
    ]
    lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [
        int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff
    ]
    print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters)
    lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor,
                                              config.TRAIN.warmup,
                                              config.TRAIN.warmup_lr,
                                              config.TRAIN.warmup_step)
    # optimizer
    optimizer_params = {
        'momentum': config.TRAIN.momentum,
        'wd': config.TRAIN.wd,
        'learning_rate': lr,
        'lr_scheduler': lr_scheduler,
        'clip_gradient': None
    }
    #
    if not isinstance(train_data, PrefetchingIter):
        train_data = PrefetchingIter(train_data)

    if args.debug == 1:
        import copy
        arg_params_ = copy.deepcopy(arg_params)
        aux_params_ = copy.deepcopy(aux_params)

    # train
    mod.fit(train_data,
            eval_metric=eval_metrics,
            epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback,
            kvstore=config.default.kvstore,
            optimizer='sgd',
            optimizer_params=optimizer_params,
            arg_params=arg_params,
            aux_params=aux_params,
            begin_epoch=begin_epoch,
            num_epoch=end_epoch,
            config=config)

    if args.debug == 1:
        t = dictCompare(aux_params_, aux_params)
        t = dictCompare(arg_params_, arg_params)
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr, lr_step):
    np.random.seed(0)
    mx.random.seed(0)
    logger, final_output_path = create_logger(config.output_path, args.cfg, config.dataset.image_set)
    prefix = os.path.join(final_output_path, prefix)

    # load symbol
    shutil.copy2(os.path.join(curr_path, 'symbols', config.symbol + '.py'), final_output_path)
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=True)
    feat_sym = sym.get_internals()['rpn_cls_score_output']

    # setup multi-gpu
    batch_size = len(ctx)
    input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size

    # print config
    pprint.pprint(config)
    logger.info('training config:{}\n'.format(pprint.pformat(config)))

    # load dataset and prepare imdb for training
    image_sets = [iset for iset in config.dataset.image_set.split('+')]
    roidbs = [load_gt_roidb(config.dataset.dataset, image_set, config.dataset.root_path, config.dataset.dataset_path,
                            flip=config.TRAIN.FLIP)
              for image_set in image_sets]
    roidb = merge_roidb(roidbs)
    roidb = filter_roidb(roidb, config)

    # load training data
    train_data = AnchorLoader(feat_sym, roidb, config, batch_size=input_batch_size, shuffle=config.TRAIN.SHUFFLE, ctx=ctx,
                              feat_stride=config.network.RPN_FEAT_STRIDE, anchor_scales=config.network.ANCHOR_SCALES,
                              anchor_ratios=config.network.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING)

    # infer max shape

    # max_dats_shape=['data', (1,3,600,1000)]
    max_data_shape = [('data', (config.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]
    # max_data_shape=[], max_lable_shape=[]
    max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
    max_data_shape.append(('gt_boxes', (config.TRAIN.BATCH_IMAGES, 100, 5)))
    logger.info('providing maximum shape'+str(max_data_shape)+"  "+str(max_label_shape))

    data_shape_dict = dict(train_data.provide_data_single + train_data.provide_label_single)

    # add by chaojie
    logger.info("data_sahpe_dict:\n{}".format(pprint.pformat(data_shape_dict)))

    pprint.pprint(data_shape_dict)
    sym_instance.infer_shape(data_shape_dict)
    pprint.pprint(sym_instance.arg_shape_dict)

    logger.info("sym_instance.arg_shape_dict\n")
    logging.info(pprint.pformat(sym_instance.arg_shape_dict))
    #dot = mx.viz.plot_network(sym, node_attrs={'shape': 'rect', 'fixedsize': 'false'})
    #dot.render(os.path.join('./output/rcnn/network_vis', config.symbol + '_rcnn'))

    # load and initialize params
    if config.TRAIN.RESUME:
        print('continue training from ', begin_epoch)
        arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
    else:
        arg_params, aux_params = load_param(pretrained, epoch, convert=True)
        sym_instance.init_weight(config, arg_params, aux_params)

    # check parameter shapes
    sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict)

    # create solver
    fixed_param_prefix = config.network.FIXED_PARAMS
    data_names = [k[0] for k in train_data.provide_data_single]
    label_names = [k[0] for k in train_data.provide_label_single]

    mod = MutableModule(sym, data_names=data_names, label_names=label_names,
                        logger=logger, context=ctx, max_data_shapes=[max_data_shape for _ in range(batch_size)],
                        max_label_shapes=[max_label_shape for _ in range(batch_size)], fixed_param_prefix=fixed_param_prefix)

    if config.TRAIN.RESUME:
        mod._preload_opt_states = '%s-%04d.states'%(prefix, begin_epoch)

    # decide training params
    # metric
    eval_metric = metric.RCNNAccMetric(config)
    cls_metric = metric.RCNNLogLossMetric(config)
    bbox_metric = metric.RCNNL1LossMetric(config)
    eval_metrics = mx.metric.CompositeEvalMetric()
    # rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric
    if config.TRAIN.JOINT_TRAINING or (not config.TRAIN.LEARN_NMS):
        rpn_eval_metric = metric.RPNAccMetric()
        rpn_cls_metric = metric.RPNLogLossMetric()
        rpn_bbox_metric = metric.RPNL1LossMetric()
        for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric]:
            eval_metrics.add(child_metric)
    for child_metric in [eval_metric, cls_metric, bbox_metric]:
        eval_metrics.add(child_metric)
    if config.TRAIN.LEARN_NMS:
        eval_metrics.add(metric.NMSLossMetric(config, 'pos'))
        eval_metrics.add(metric.NMSLossMetric(config, 'neg'))
        eval_metrics.add(metric.NMSAccMetric(config))

    # callback
    batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=args.frequent)
    means = np.tile(np.array(config.TRAIN.BBOX_MEANS), 2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES)
    stds = np.tile(np.array(config.TRAIN.BBOX_STDS), 2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES)
    epoch_end_callback = [mx.callback.module_checkpoint(mod, prefix, period=1, save_optimizer_states=True),
                          callback.do_checkpoint(prefix, means, stds)]
    # decide learning rate
    base_lr = lr
    lr_factor = config.TRAIN.lr_factor
    lr_epoch = [float(epoch) for epoch in lr_step.split(',')]
    lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch]
    lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff]
    print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters)
    lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor, config.TRAIN.warmup, config.TRAIN.warmup_lr, config.TRAIN.warmup_step)
    # optimizer
    optimizer_params = {'momentum': config.TRAIN.momentum,
                        'wd': config.TRAIN.wd,
                        'learning_rate': lr,
                        'lr_scheduler': lr_scheduler,
                        'rescale_grad': 1.0,
                        'clip_gradient': None}

    if not isinstance(train_data, PrefetchingIter):
        train_data = PrefetchingIter(train_data)

    # train
    mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback, kvstore=config.default.kvstore,
            optimizer='sgd', optimizer_params=optimizer_params,
            arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
Ejemplo n.º 15
0
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr,
              lr_step):
    # 创建logger和对应的输出路径
    logger, final_output_path = create_logger(config.output_path, args.cfg,
                                              config.dataset.image_set)
    prefix = os.path.join(final_output_path, prefix)

    # load symbol
    shutil.copy2(os.path.join(curr_path, 'symbols', config.symbol + '.py'),
                 final_output_path)
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=True)
    # 特征symbol,从网络sym中获取rpn_cls_score_output
    feat_sym = sym.get_internals()['rpn_cls_score_output']

    # setup multi-gpu
    # 使能多GPU训练,每一张卡训练一个batch
    batch_size = len(ctx)
    input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size

    # print config
    pprint.pprint(config)
    logger.info('training config:{}\n'.format(pprint.pformat(config)))

    # load dataset and prepare imdb for training
    # 加载数据集同时准备训练的imdb,使用+分割不同的图像数据集,比如2007_trainval+2012_trainval
    image_sets = [iset for iset in config.dataset.image_set.split('+')]
    # load gt roidb加载gt roidb,根据数据集类型,图像集具体子类,数据集根目录和数据集路径,同时配置相关TRAIN为FLIP来增广数据
    roidbs = [
        load_gt_roidb(config.dataset.dataset,
                      image_set,
                      config.dataset.root_path,
                      config.dataset.dataset_path,
                      flip=config.TRAIN.FLIP) for image_set in image_sets
    ]
    # 合并不同的roidb
    roidb = merge_roidb(roidbs)
    # 根据配置文件中对应的过滤规则来滤出roi
    roidb = filter_roidb(roidb, config)
    # load training data
    # 加载训练数据,anchor Loader为对应分类和回归的锚点加载,通过对应的roidb,查找对应的正负样本的锚点,该生成器需要参数锚点尺度,ratios和对应的feature的stride
    train_data = AnchorLoader(feat_sym,
                              roidb,
                              config,
                              batch_size=input_batch_size,
                              shuffle=config.TRAIN.SHUFFLE,
                              ctx=ctx,
                              feat_stride=config.network.RPN_FEAT_STRIDE,
                              anchor_scales=config.network.ANCHOR_SCALES,
                              anchor_ratios=config.network.ANCHOR_RATIOS,
                              aspect_grouping=config.TRAIN.ASPECT_GROUPING)

    # infer max shape
    max_data_shape = [('data', (config.TRAIN.BATCH_IMAGES, 3,
                                max([v[0] for v in config.SCALES]),
                                max([v[1] for v in config.SCALES])))]
    max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
    max_data_shape.append(('gt_boxes', (config.TRAIN.BATCH_IMAGES, 100, 5)))
    print('providing maximum shape', max_data_shape, max_label_shape)

    data_shape_dict = dict(train_data.provide_data_single +
                           train_data.provide_label_single)
    pprint.pprint(data_shape_dict)
    sym_instance.infer_shape(data_shape_dict)

    # load and initialize params
    # 加载并且初始化参数,如果训练中是继续上次的训练,也就是RESUME这一flag设置为True
    if config.TRAIN.RESUME:
        print('continue training from ', begin_epoch)
        # 从前缀和being_epoch中加载RESUME的arg参数和aux参数,同时需要转换为GPU NDArray
        arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
    else:
        arg_params, aux_params = load_param(pretrained, epoch, convert=True)
        sym_instance.init_weight(config, arg_params, aux_params)

    # check parameter shapes
    # 检查相关参数的shapes
    sym_instance.check_parameter_shapes(arg_params, aux_params,
                                        data_shape_dict)

    # create solver
    # 创造求解器
    fixed_param_prefix = config.network.FIXED_PARAMS
    data_names = [k[0] for k in train_data.provide_data_single]
    label_names = [k[0] for k in train_data.provide_label_single]

    mod = MutableModule(
        sym,
        data_names=data_names,
        label_names=label_names,
        logger=logger,
        context=ctx,
        max_data_shapes=[max_data_shape for _ in range(batch_size)],
        max_label_shapes=[max_label_shape for _ in range(batch_size)],
        fixed_param_prefix=fixed_param_prefix)

    if config.TRAIN.RESUME:
        mod._preload_opt_states = '%s-%04d.states' % (prefix, begin_epoch)

    # decide training params
    # metric
    # 以下主要是RPN和RCNN相关的一些评价指标
    rpn_eval_metric = metric.RPNAccMetric()
    rpn_cls_metric = metric.RPNLogLossMetric()
    rpn_bbox_metric = metric.RPNL1LossMetric()
    eval_metric = metric.RCNNAccMetric(config)
    cls_metric = metric.RCNNLogLossMetric(config)
    bbox_metric = metric.RCNNL1LossMetric(config)
    # mxnet中合成的评估指标,可以增加以上所有的评估指标,包括rpn_eval_metrix、rpn_cls_metric、rpn_bbox_metric和rcnn_eval_metric、rcnn_cls_metric、rcnn_bbox_metric
    eval_metrics = mx.metric.CompositeEvalMetric()
    # rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric
    for child_metric in [
            rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric,
            cls_metric, bbox_metric
    ]:
        eval_metrics.add(child_metric)

    # callback
    # batch后的callback回调以及epoch后的callback回调
    # batch_end_callback是在训练一定batch_size后进行的相应回调,回调频率为frequent
    batch_end_callback = callback.Speedometer(train_data.batch_size,
                                              frequent=args.frequent)
    # means和stds,如果BBOX是类无关的,那么means为复制means两个,否则复制数量为NUM_CLASSES
    means = np.tile(np.array(config.TRAIN.BBOX_MEANS),
                    2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES)
    stds = np.tile(np.array(config.TRAIN.BBOX_STDS),
                   2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES)
    # epoch为一个周期结束后的回调
    epoch_end_callback = [
        mx.callback.module_checkpoint(mod,
                                      prefix,
                                      period=1,
                                      save_optimizer_states=True),
        callback.do_checkpoint(prefix, means, stds)
    ]
    # decide learning rate
    # 以下主要根据不同的学习率调整策略来决定学习率,这里如voc中默认的初始lr为0.0005
    base_lr = lr
    # 学习率调整因子
    lr_factor = config.TRAIN.lr_factor
    # 学习率调整周期,lr_step一般格式为3, 5,表示在3和5周期中进行学习率调整
    lr_epoch = [float(epoch) for epoch in lr_step.split(',')]
    # 如果当前周期大于begin_epoch那么lr_epoch_diff为epoch-begin_epoch
    lr_epoch_diff = [
        epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch
    ]
    print('lr_epoch', lr_epoch, 'begin_epoch', begin_epoch)
    # 通过当前的epoch来计算当前应该具有的lr
    lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [
        int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff
    ]
    print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters)
    # learning rate调整机制,warmup multi factor scheduler预训练多因子调整器
    lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor,
                                              config.TRAIN.warmup,
                                              config.TRAIN.warmup_lr,
                                              config.TRAIN.warmup_step)
    # optimizer
    # 优化器参数,包含momentum、wd、lr、lr_scheduler、rescale_grad和clip_gradient
    optimizer_params = {
        'momentum': config.TRAIN.momentum,
        'wd': config.TRAIN.wd,
        'learning_rate': lr,
        'lr_scheduler': lr_scheduler,
        'rescale_grad': 1.0,
        'clip_gradient': None
    }

    if not isinstance(train_data, PrefetchingIter):
        print('!!!train_data is not PrefetchingIter!!!')
        train_data = PrefetchingIter(train_data)

    # train
    # 模型训练过程,输入train_data,评估指标包括eval_metrics等一系列指标,每一个epoch结束后进入epoch_end_callback,每一个batch结束后进入batch_end_callback,优化器使用sgd,同时优化参数、输入参数和辅助参数以及begin周期和end周期
    mod.fit(train_data,
            eval_metric=eval_metrics,
            epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback,
            kvstore=config.default.kvstore,
            optimizer='sgd',
            optimizer_params=optimizer_params,
            arg_params=arg_params,
            aux_params=aux_params,
            begin_epoch=begin_epoch,
            num_epoch=end_epoch)
Ejemplo n.º 16
0
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr,
              lr_step):
    mx.random.seed(3)
    np.random.seed(3)
    logger, final_output_path = create_logger(config.output_path, args.cfg,
                                              config.dataset.image_set)
    prefix = os.path.join(final_output_path, prefix)

    shutil.copy2(os.path.join(curr_path, 'symbols', config.symbol + '.py'),
                 final_output_path)
    sym_instance = detnet.detnet()
    sym = sym_instance.get_symbol(config, is_train=True)

    feat_pyramid_level = np.log2(config.network.RPN_FEAT_STRIDE).astype(int)
    feat_sym = [
        sym.get_internals()['rpn_cls_score_p' + str(x) + '_output']
        for x in feat_pyramid_level
    ]

    batch_size = len(ctx)
    input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size

    pprint.pprint(config)
    logger.info('training config:{}\n'.format(pprint.pformat(config)))

    image_sets = [iset for iset in config.dataset.image_set.split('+')]
    roidbs = [
        load_gt_roidb(config.dataset.dataset,
                      image_set,
                      config.dataset.root_path,
                      config.dataset.dataset_path,
                      flip=config.TRAIN.FLIP) for image_set in image_sets
    ]

    roidb = merge_roidb(roidbs)
    roidb = filter_roidb(roidb, config)

    train_data = PyramidAnchorIterator(
        feat_sym,
        roidb,
        config,
        batch_size=input_batch_size,
        shuffle=config.TRAIN.SHUFFLE,
        ctx=ctx,
        feat_strides=config.network.RPN_FEAT_STRIDE,
        anchor_scales=config.network.ANCHOR_SCALES,
        anchor_ratios=config.network.ANCHOR_RATIOS,
        aspect_grouping=config.TRAIN.ASPECT_GROUPING,
        allowed_border=np.inf)

    max_data_shape = [('data',
                       (config.TRAIN.BATCH_IMAGES, 3,
                        max([v[0] for v in config.SCALES]),
                        max([int(v[1] // 16 * 16) for v in config.SCALES])))]
    max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
    max_data_shape.append(('gt_boxes', (config.TRAIN.BATCH_IMAGES, 100, 5)))
    print('providing maximum shape', max_data_shape, max_label_shape)

    data_shape_dict = dict(train_data.provide_data + train_data.provide_label)
    pprint.pprint(data_shape_dict)
    sym_instance.infer_shape(data_shape_dict)

    if config.TRAIN.RESUME:
        print('continue training from ', begin_epoch)
        arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
    else:
        arg_params, aux_params = None, None
        #sym_instance.init_weight(config, arg_params, aux_params)

    #sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict)
    data_names = [k[0] for k in train_data.provide_data]
    label_names = [k[0] for k in train_data.provide_label]
    mod = mx.mod.Module(sym,
                        data_names=data_names,
                        label_names=label_names,
                        logger=logger,
                        context=ctx)

    rpn_eval_metric = metric.RPNAccMetric()
    rpn_cls_metric = metric.RPNLogLossMetric()
    rpn_bbox_metric = metric.RPNL1LossMetric()
    rpn_fg_metric = metric.RPNFGFraction(config)
    eval_metric = metric.RCNNAccMetric(config)
    eval_fg_metric = metric.RCNNFGAccuracy(config)
    cls_metric = metric.RCNNLogLossMetric(config)
    bbox_metric = metric.RCNNL1LossMetric(config)
    eval_metrics = mx.metric.CompositeEvalMetric()
    for child_metric in [
            rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, rpn_fg_metric,
            eval_fg_metric, eval_metric, cls_metric, bbox_metric
    ]:
        eval_metrics.add(child_metric)
    batch_end_callback = [
        mx.callback.Speedometer(train_data.batch_size,
                                frequent=1,
                                auto_reset=False)
    ]
    epoch_end_callback = [mx.callback.do_checkpoint(prefix, period=1)]
    base_lr = lr
    lr_factor = config.TRAIN.lr_factor
    lr_epoch = [float(epoch) for epoch in lr_step.split(',')]
    lr_epoch_diff = [
        epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch
    ]
    lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [
        int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff
    ]
    print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters)
    lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(step=lr_iters,
                                                        factor=lr_factor)
    optimizer_params = {
        "momentum": config.TRAIN.momentum,
        'wd': config.TRAIN.wd,
        'learning_rate': lr,
        'lr_scheduler': lr_scheduler,
        'clip_gradient': None
    }

    if not isinstance(train_data, mx.io.PrefetchingIter):
        train_data = mx.io.PrefetchingIter(train_data)
    if DEBUG:
        train_data.reset()
        it = train_data.next()
        mod.bind(data_shapes=train_data.provide_data,
                 label_shapes=train_data.provide_label,
                 for_training=True,
                 force_rebind=False)
        mod.init_params(arg_params=arg_params,
                        aux_params=aux_params,
                        allow_missing=True)
        mod.init_optimizer(optimizer_params=optimizer_params)
        eval_metrics.reset()
        next_data_batch = train_data.next()

        for i in range(100):
            print(i)
            mod.forward_backward(next_data_batch)
            mod.update()
            mod.update_metric(eval_metrics, next_data_batch.label)
            print(eval_metrics)

    mod.fit(train_data,
            eval_metric=eval_metrics,
            epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback,
            optimizer='sgd',
            optimizer_params=optimizer_params,
            begin_epoch=begin_epoch,
            num_epoch=end_epoch)
Ejemplo n.º 17
0
def train_rpn(image_set, year, root_path, devkit_path, pretrained, epoch,
              prefix, ctx, begin_epoch, end_epoch, frequent, kv_store, work_load_list=None, resume=False):
    # set up logger
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)

    # load symbol
    sym = get_vgg_rpn()
    feat_sym = get_vgg_rpn().get_internals()['rpn_cls_score_output']

    # setup multi-gpu
    config.TRAIN.BATCH_IMAGES *= len(ctx)
    config.TRAIN.BATCH_SIZE *= len(ctx)

    # load training data
    voc, roidb = load_gt_roidb(image_set, year, root_path, devkit_path, flip=True)
    train_data = AnchorLoader(feat_sym, roidb, batch_size=config.TRAIN.BATCH_SIZE, shuffle=True, mode='train',
                              ctx=ctx, work_load_list=work_load_list)

    # infer max shape
    max_data_shape = [('data', (config.TRAIN.BATCH_SIZE, 3, 1000, 1000))]
    max_data_shape_dict = {k: v for k, v in max_data_shape}
    _, feat_shape, _ = feat_sym.infer_shape(**max_data_shape_dict)
    from rcnn.minibatch import assign_anchor
    import numpy as np
    label = assign_anchor(feat_shape[0], np.zeros((0, 5)), [[1000, 1000, 1.0]])
    max_label_shape = [('label', label['label'].shape),
                       ('bbox_target', label['bbox_target'].shape),
                       ('bbox_inside_weight', label['bbox_inside_weight'].shape),
                       ('bbox_outside_weight', label['bbox_outside_weight'].shape)]
    print 'providing maximum shape', max_data_shape, max_label_shape

    # load pretrained
    args, auxs = load_param(pretrained, epoch, convert=True)

    # initialize params
    if not resume:
        input_shapes = {k: v for k, v in train_data.provide_data + train_data.provide_label}
        arg_shape, _, _ = sym.infer_shape(**input_shapes)
        arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))
        args['rpn_conv_3x3_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_conv_3x3_weight'])
        args['rpn_conv_3x3_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_conv_3x3_bias'])
        args['rpn_cls_score_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_cls_score_weight'])
        args['rpn_cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_cls_score_bias'])
        args['rpn_bbox_pred_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_bbox_pred_weight'])
        args['rpn_bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_bbox_pred_bias'])

    # prepare training
    if config.TRAIN.FINETUNE:
        fixed_param_prefix = ['conv1', 'conv2', 'conv3', 'conv4', 'conv5']
    else:
        fixed_param_prefix = ['conv1', 'conv2']
    data_names = [k[0] for k in train_data.provide_data]
    label_names = [k[0] for k in train_data.provide_label]
    batch_end_callback = Speedometer(train_data.batch_size, frequent=frequent)
    epoch_end_callback = mx.callback.do_checkpoint(prefix)
    if config.TRAIN.HAS_RPN is True:
        eval_metric = AccuracyMetric(use_ignore=True, ignore=-1)
        cls_metric = LogLossMetric(use_ignore=True, ignore=-1)
    else:
        eval_metric = AccuracyMetric()
        cls_metric = LogLossMetric()
    bbox_metric = SmoothL1LossMetric()
    eval_metrics = mx.metric.CompositeEvalMetric()
    for child_metric in [eval_metric, cls_metric, bbox_metric]:
        eval_metrics.add(child_metric)
    optimizer_params = {'momentum': 0.9,
                        'wd': 0.0005,
                        'learning_rate': 0.001,
                        'lr_scheduler': mx.lr_scheduler.FactorScheduler(60000, 0.1),
                        'rescale_grad': (1.0 / config.TRAIN.BATCH_SIZE)}

    # train
    mod = MutableModule(sym, data_names=data_names, label_names=label_names,
                        logger=logger, context=ctx, work_load_list=work_load_list,
                        max_data_shapes=max_data_shape, max_label_shapes=max_label_shape,
                        fixed_param_prefix=fixed_param_prefix)
    mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback, kvstore=kv_store,
            optimizer='sgd', optimizer_params=optimizer_params,
            arg_params=args, aux_params=auxs, begin_epoch=begin_epoch, num_epoch=end_epoch)
def train_rpn(cfg,
              dataset,
              image_set,
              root_path,
              dataset_path,
              frequent,
              kvstore,
              flip,
              shuffle,
              resume,
              ctx,
              pretrained,
              epoch,
              prefix,
              begin_epoch,
              end_epoch,
              train_shared,
              lr,
              lr_step,
              logger=None,
              output_path=None):
    # set up logger
    if not logger:
        logging.basicConfig()
        logger = logging.getLogger()
        logger.setLevel(logging.INFO)

    # set up config
    cfg.TRAIN.BATCH_IMAGES = cfg.TRAIN.ALTERNATE.RPN_BATCH_IMAGES

    # load symbol
    sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()
    sym = sym_instance.get_symbol_rpn(cfg, is_train=True)
    feat_sym = sym.get_internals()['rpn_cls_score_output']

    # setup multi-gpu
    batch_size = len(ctx)
    input_batch_size = cfg.TRAIN.BATCH_IMAGES * batch_size

    # print cfg
    pprint.pprint(cfg)
    logger.info('training rpn cfg:{}\n'.format(pprint.pformat(cfg)))

    # load dataset and prepare imdb for training
    image_sets = [iset for iset in image_set.split('+')]
    roidbs = [
        load_gt_roidb(dataset,
                      image_set,
                      root_path,
                      dataset_path,
                      result_path=output_path,
                      flip=flip) for image_set in image_sets
    ]
    roidb = merge_roidb(roidbs)
    roidb = filter_roidb(roidb, cfg)

    # load training data
    train_data = AnchorLoader(feat_sym,
                              roidb,
                              cfg,
                              batch_size=input_batch_size,
                              shuffle=shuffle,
                              ctx=ctx,
                              feat_stride=cfg.network.RPN_FEAT_STRIDE,
                              anchor_scales=cfg.network.ANCHOR_SCALES,
                              anchor_ratios=cfg.network.ANCHOR_RATIOS,
                              aspect_grouping=cfg.TRAIN.ASPECT_GROUPING)

    # infer max shape
    max_data_shape = [('data', (cfg.TRAIN.BATCH_IMAGES, 3,
                                max([v[0] for v in cfg.SCALES]),
                                max([v[1] for v in cfg.SCALES])))]
    max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
    print('providing maximum shape', max_data_shape, max_label_shape)

    # infer shape
    data_shape_dict = dict(train_data.provide_data_single +
                           train_data.provide_label_single)
    sym_instance.infer_shape(data_shape_dict)

    # load and initialize params
    if resume:
        print('continue training from ', begin_epoch)
        arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
    else:
        arg_params, aux_params = load_param(pretrained, epoch, convert=True)
        sym_instance.init_weight_rpn(cfg, arg_params, aux_params)

    # check parameter shapes
    sym_instance.check_parameter_shapes(arg_params, aux_params,
                                        data_shape_dict)

    # create solver
    data_names = [k[0] for k in train_data.provide_data_single]
    label_names = [k[0] for k in train_data.provide_label_single]
    if train_shared:
        fixed_param_prefix = cfg.network.FIXED_PARAMS_SHARED
    else:
        fixed_param_prefix = cfg.network.FIXED_PARAMS
    mod = MutableModule(
        sym,
        data_names=data_names,
        label_names=label_names,
        logger=logger,
        context=ctx,
        max_data_shapes=[max_data_shape for _ in xrange(batch_size)],
        max_label_shapes=[max_label_shape for _ in xrange(batch_size)],
        fixed_param_prefix=fixed_param_prefix)

    # decide training params
    # metric
    eval_metric = metric.RPNAccMetric()
    cls_metric = metric.RPNLogLossMetric()
    bbox_metric = metric.RPNL1LossMetric()
    eval_metrics = mx.metric.CompositeEvalMetric()
    for child_metric in [eval_metric, cls_metric, bbox_metric]:
        eval_metrics.add(child_metric)
    # callback
    batch_end_callback = callback.Speedometer(train_data.batch_size,
                                              frequent=frequent)
    # epoch_end_callback = mx.callback.do_checkpoint(prefix)
    epoch_end_callback = mx.callback.module_checkpoint(
        mod, prefix, period=1, save_optimizer_states=True)
    # decide learning rate
    base_lr = lr
    lr_factor = cfg.TRAIN.lr_factor
    lr_epoch = [int(epoch) for epoch in lr_step.split(',')]
    lr_epoch_diff = [
        epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch
    ]
    lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [
        int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff
    ]
    print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters)
    lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor,
                                              cfg.TRAIN.warmup,
                                              cfg.TRAIN.warmup_lr,
                                              cfg.TRAIN.warmup_step)
    # optimizer
    optimizer_params = {
        'momentum': cfg.TRAIN.momentum,
        'wd': cfg.TRAIN.wd,
        'learning_rate': lr,
        'lr_scheduler': lr_scheduler,
        'rescale_grad': 1.0,
        'clip_gradient': None
    }

    if not isinstance(train_data, PrefetchingIter):
        train_data = PrefetchingIter(train_data)

    # train
    mod.fit(train_data,
            eval_metric=eval_metrics,
            epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback,
            kvstore=kvstore,
            optimizer='sgd',
            optimizer_params=optimizer_params,
            arg_params=arg_params,
            aux_params=aux_params,
            begin_epoch=begin_epoch,
            num_epoch=end_epoch)
Ejemplo n.º 19
0
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr,
              lr_step):
    mx.random.seed(3)
    np.random.seed(3)
    logger, final_output_path = create_logger(config.output_path, args.cfg,
                                              config.dataset.image_set)
    prefix = os.path.join(final_output_path, prefix)

    # load symbol
    shutil.copy2(os.path.join(curr_path, 'symbols', config.symbol + '.py'),
                 final_output_path)
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=True)

    feat_pyramid_level = np.log2(config.network.RPN_FEAT_STRIDE).astype(int)
    feat_sym = [
        sym.get_internals()['rpn_cls_score_p' + str(x) + '_output']
        for x in feat_pyramid_level
    ]

    # setup multi-gpu
    batch_size = len(ctx)
    input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size

    # print config
    pprint.pprint(config)
    logger.info('training config:{}\n'.format(pprint.pformat(config)))

    # load dataset and prepare imdb for training
    image_sets = [iset for iset in config.dataset.image_set.split('+')]
    roidbs = [
        load_gt_roidb(config.dataset.dataset,
                      image_set,
                      config.dataset.root_path,
                      config.dataset.dataset_path,
                      flip=config.TRAIN.FLIP) for image_set in image_sets
    ]
    roidb = merge_roidb(roidbs)
    roidb = filter_roidb(roidb, config)

    # load training data

    train_data = PyramidAnchorIterator(
        feat_sym,
        roidb,
        config,
        batch_size=input_batch_size,
        shuffle=config.TRAIN.SHUFFLE,
        ctx=ctx,
        feat_strides=config.network.RPN_FEAT_STRIDE,
        anchor_scales=config.network.ANCHOR_SCALES,
        anchor_ratios=config.network.ANCHOR_RATIOS,
        aspect_grouping=config.TRAIN.ASPECT_GROUPING,
        allowed_border=np.inf)

    # infer max shape
    max_data_shape = [('data', (config.TRAIN.BATCH_IMAGES, 3,
                                max([v[0] for v in config.SCALES]),
                                max([v[1] for v in config.SCALES])))]
    max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
    max_data_shape.append(('gt_boxes', (config.TRAIN.BATCH_IMAGES, 100, 5)))
    print 'providing maximum shape', max_data_shape, max_label_shape

    data_shape_dict = dict(train_data.provide_data_single +
                           train_data.provide_label_single)
    pprint.pprint(data_shape_dict)
    sym_instance.infer_shape(data_shape_dict)

    # load and initialize params
    if config.TRAIN.RESUME:
        print('continue training from ', begin_epoch)
        arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
    else:
        arg_params, aux_params = load_param(pretrained, epoch, convert=True)
        # sym_instance.init_weight(config, arg_params, aux_params)

    # check parameter shapes
    # sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict)

    # decide training params
    # metric
    rpn_eval_metric = metric.RPNAccMetric()
    rpn_cls_metric = metric.RPNLogLossMetric()
    rpn_bbox_metric = metric.RPNL1LossMetric()
    rpn_fg_metric = metric.RPNFGFraction(config)
    eval_metric = metric.RCNNAccMetric(config)
    eval_fg_metric = metric.RCNNFGAccuracy(config)
    cls_metric = metric.RCNNLogLossMetric(config)
    bbox_metric = metric.RCNNL1LossMetric(config)
    eval_metrics = mx.metric.CompositeEvalMetric()
    # rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric
    for child_metric in [
            rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, rpn_fg_metric,
            eval_fg_metric, eval_metric, cls_metric, bbox_metric
    ]:
        eval_metrics.add(child_metric)
    # callback
    # batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=args.frequent)
    means = np.tile(np.array(config.TRAIN.BBOX_MEANS),
                    2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES)
    stds = np.tile(np.array(config.TRAIN.BBOX_STDS),
                   2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES)
    # epoch_end_callback = [mx.callback.module_checkpoint(mod, prefix, period=1,
    # save_optimizer_states=True), callback.do_checkpoint(prefix, means, stds)]
    # decide learning rate
    base_lr = lr
    lr_factor = config.TRAIN.lr_factor
    lr_epoch = [float(epoch) for epoch in lr_step.split(',')]
    lr_epoch_diff = [
        epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch
    ]
    lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [
        int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff
    ]
    print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters)
    lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor,
                                              config.TRAIN.warmup,
                                              config.TRAIN.warmup_lr,
                                              config.TRAIN.warmup_step)
    # optimizer
    optimizer_params = {
        'momentum': config.TRAIN.momentum,
        'wd': config.TRAIN.wd,
        'learning_rate': lr,
        'lr_scheduler': lr_scheduler,
        'clip_gradient': None
    }
    if not isinstance(train_data, PrefetchingIter):
        train_data = PrefetchingIter(train_data)

    net = FPNNet(sym, args_pretrained=arg_params, auxes_pretrained=aux_params)

    # create multi-threaded DataParallel Model.
    net_parallel = DataParallelModel(net, ctx_list=ctx)

    # create trainer,
    # !Important: A trainer can be only created after the function `resnet_ctx` is called.
    # Please Note that DataParallelModel will call reset_ctx to initialize parameters on gpus.
    trainer = mx.gluon.Trainer(net.collect_params(), 'sgd', optimizer_params)

    for epoch in range(begin_epoch, config.TRAIN.end_epoch):
        train_data.reset()
        net.hybridize(static_alloc=True, static_shape=False)
        progress_bar = tqdm.tqdm(total=len(roidb))
        for nbatch, data_batch in enumerate(train_data):
            inputs = [[
                x.astype('f').as_in_context(c) for x in d + l
            ] for c, d, l in zip(ctx, data_batch.data, data_batch.label)]
            with ag.record():
                outputs = net_parallel(*inputs)
                ag.backward(sum(outputs, ()))
            trainer.step(1)
            eval_metrics.update(data_batch.label[0], outputs[0])
            if nbatch % 100 == 0:
                msg = ','.join([
                    '{}={:.3f}'.format(w, v)
                    for w, v in zip(*eval_metrics.get())
                ])
                msg += ",lr={}".format(trainer.learning_rate)
                logger.info(msg)
                print(msg)
                eval_metrics.reset()
            progress_bar.update(len(inputs))
        progress_bar.close()
        net.hybridize(static_alloc=True, static_shape=False)
        re = ("mAP", 0.0)
        logger.info(re)
        save_path = "{}-{}-{}.params".format(prefix, epoch, re[1])
        net.collect_params().save(save_path)
        logger.info("Saved checkpoint to {}.".format(save_path))
Ejemplo n.º 20
0
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr, lr_step):
    if config.dataset.dataset != 'JSONList':
        logger, final_output_path = create_logger(config.output_path, args.cfg, config.dataset.image_set)
        prefix = os.path.join(final_output_path, prefix)
        shutil.copy2(args.cfg, prefix+'.yaml')
    else:
        import datetime
        import logging
        final_output_path = config.output_path
        prefix = prefix + '_' + datetime.datetime.now().strftime("%Y-%m-%d_%H_%M_%S")
        prefix = os.path.join(final_output_path, prefix)
        shutil.copy2(args.cfg, prefix+'.yaml')
        log_file = prefix + '.log'
        head = '%(asctime)-15s %(message)s'
        logging.basicConfig(filename=log_file, format=head)
        logger = logging.getLogger()
        logger.setLevel(logging.INFO)
        logger.info('prefix: %s' % prefix)
        print('prefix: %s' % prefix)

    # load symbol
    #shutil.copy2(os.path.join(curr_path, '..', 'symbols', config.symbol + '.py'), final_output_path)
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=True)

    # setup multi-gpu
    batch_size = config.TRAIN.IMAGES_PER_GPU * len(ctx)

    # print config
    pprint.pprint(config)
    logger.info('training config:{}\n'.format(pprint.pformat(config)))

    # load dataset and prepare imdb for training
    image_sets = [iset for iset in config.dataset.image_set.split('+')]
    roidbs = [load_gt_roidb(config.dataset.dataset, image_set, config.dataset.root_path, config.dataset.dataset_path,
                            flip=config.TRAIN.FLIP)
              for image_set in image_sets]
    roidb = merge_roidb(roidbs)
    roidb = filter_roidb(roidb, config)
    # load training data
    if config.network.MULTI_RPN:
        assert Fasle, 'still developing' ###
        num_layers = len(config.network.MULTI_RPN_STRIDES)
        rpn_syms = [sym.get_internals()['rpn%d_cls_score_output'% l] for l in range(num_layers)]
        train_data = PyramidAnchorLoader(rpn_syms, roidb, config, batch_size=batch_size, shuffle=config.TRAIN.SHUFFLE,
                                         ctx=ctx, feat_strides=config.network.MULTI_RPN_STRIDES, anchor_scales=config.network.ANCHOR_SCALES,
                                         anchor_ratios=config.network.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING,
                                         allowed_border=np.inf)
    else:
        feat_sym = sym.get_internals()['rpn_cls_score_output']
        train_data = AnchorLoader(feat_sym, roidb, config, batch_size=batch_size, shuffle=config.TRAIN.SHUFFLE, ctx=ctx,
                                  feat_stride=config.network.RPN_FEAT_STRIDE, anchor_scales=config.network.ANCHOR_SCALES,
                                  anchor_ratios=config.network.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING)

    # infer max shape
    data_shape_dict = dict(train_data.provide_data + train_data.provide_label)
    pprint.pprint(data_shape_dict)
    sym_instance.infer_shape(data_shape_dict)

    # load and initialize params
    if config.TRAIN.RESUME:
        print('continue training from ', begin_epoch)
        arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
    else:
        arg_params, aux_params = load_param(pretrained, epoch, convert=True)
        sym_instance.init_weight(config, arg_params, aux_params)

    # check parameter shapes
    sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict)

    # create solver
    fixed_param_prefix = config.network.FIXED_PARAMS
    mod = MutableModule(sym,
                        train_data.data_names,
                        train_data.label_names,
                        context=ctx,
                        logger=logger,
                        fixed_param_prefix=fixed_param_prefix)

    if config.TRAIN.RESUME:
        mod._preload_opt_states = '%s-%04d.states'%(prefix, begin_epoch)

    # decide training params
    # metric
    rpn_eval_metric = metric.RPNAccMetric()
    rpn_cls_metric = metric.RPNLogLossMetric()
    rpn_bbox_metric = metric.RPNL1LossMetric()
    eval_metric = metric.RCNNAccMetric(config)
    cls_metric = metric.RCNNLogLossMetric(config)
    bbox_metric = metric.RCNNL1LossMetric(config)
    eval_metrics = mx.metric.CompositeEvalMetric()
    # rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric
    for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric]:
        eval_metrics.add(child_metric)
    if config.network.PREDICT_KEYPOINTS:
        kps_cls_acc = metric.KeypointAccMetric(config)
        kps_cls_loss = metric.KeypointLogLossMetric(config)
        kps_pos_loss = metric.KeypointL1LossMetric(config)
        eval_metrics.add(kps_cls_acc)
        eval_metrics.add(kps_cls_loss)
        eval_metrics.add(kps_pos_loss)

    # callback
    batch_end_callback = callback.Speedometer(batch_size, frequent=args.frequent)
    means = np.tile(np.array(config.TRAIN.BBOX_MEANS), 2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES)
    stds = np.tile(np.array(config.TRAIN.BBOX_STDS), 2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES)
    epoch_end_callback = [mx.callback.do_checkpoint(prefix)]
    # decide learning rate
    base_lr = lr
    lr_factor = config.TRAIN.lr_factor
    lr_epoch = [float(epoch) for epoch in lr_step.split(',')]
    lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch]
    lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff]
    print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters)
    lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor, config.TRAIN.warmup, config.TRAIN.warmup_lr, config.TRAIN.warmup_step)
    # optimizer
    optimizer_params = {'momentum': config.TRAIN.momentum,
                        'wd': config.TRAIN.wd,
                        'learning_rate': lr,
                        'lr_scheduler': lr_scheduler,
                        'rescale_grad': 1.0,
                        'clip_gradient': None}

    if not isinstance(train_data, PrefetchingIter):
        train_data = PrefetchingIter(train_data)

    # train
    mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback, kvstore=config.TRAIN.kvstore,
            optimizer='sgd', optimizer_params=optimizer_params,
            arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
    time.sleep(10)
    train_data.iters[0].terminate()
Ejemplo n.º 21
0
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr, lr_step):
    # 创建logger和对应的输出路径
    logger, final_output_path = create_logger(config.output_path, args.cfg, config.dataset.image_set)
    prefix = os.path.join(final_output_path, prefix)

    # load symbol
    shutil.copy2(os.path.join(curr_path, 'symbols', config.symbol + '.py'), final_output_path)
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=True)
    # 特征symbol,从网络sym中获取rpn_cls_score_output
    feat_sym = sym.get_internals()['rpn_cls_score_output']

    # setup multi-gpu
    # 使能多GPU训练,每一张卡训练一个batch
    batch_size = len(ctx)
    input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size

    # print config
    pprint.pprint(config)
    logger.info('training config:{}\n'.format(pprint.pformat(config)))

    # load dataset and prepare imdb for training
    # 加载数据集同时准备训练的imdb,使用+分割不同的图像数据集,比如2007_trainval+2012_trainval
    image_sets = [iset for iset in config.dataset.image_set.split('+')]
    # load gt roidb加载gt roidb,根据数据集类型,图像集具体子类,数据集根目录和数据集路径,同时配置相关TRAIN为FLIP来增广数据
    roidbs = [load_gt_roidb(config.dataset.dataset, image_set, config.dataset.root_path, config.dataset.dataset_path,
                            flip=config.TRAIN.FLIP)
              for image_set in image_sets]
    # 合并不同的roidb
    roidb = merge_roidb(roidbs)
    # 根据配置文件中对应的过滤规则来滤出roi
    roidb = filter_roidb(roidb, config)
    # load training data
    # 加载训练数据,anchor Loader为对应分类和回归的锚点加载,通过对应的roidb,查找对应的正负样本的锚点,该生成器需要参数锚点尺度,ratios和对应的feature的stride
    train_data = AnchorLoader(feat_sym, roidb, config, batch_size=input_batch_size, shuffle=config.TRAIN.SHUFFLE, ctx=ctx,
                              feat_stride=config.network.RPN_FEAT_STRIDE, anchor_scales=config.network.ANCHOR_SCALES,
                              anchor_ratios=config.network.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING)

    # infer max shape
    max_data_shape = [('data', (config.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]
    max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
    max_data_shape.append(('gt_boxes', (config.TRAIN.BATCH_IMAGES, 100, 5)))
    print('providing maximum shape', max_data_shape, max_label_shape)

    data_shape_dict = dict(train_data.provide_data_single + train_data.provide_label_single)
    pprint.pprint(data_shape_dict)
    sym_instance.infer_shape(data_shape_dict)

    # load and initialize params
    if config.TRAIN.RESUME:
        print('continue training from ', begin_epoch)
        arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
    else:
        arg_params, aux_params = load_param(pretrained, epoch, convert=True)
        sym_instance.init_weight(config, arg_params, aux_params)

    # check parameter shapes
    sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict)

    # create solver
    fixed_param_prefix = config.network.FIXED_PARAMS
    data_names = [k[0] for k in train_data.provide_data_single]
    label_names = [k[0] for k in train_data.provide_label_single]

    mod = MutableModule(sym, data_names=data_names, label_names=label_names,
                        logger=logger, context=ctx, max_data_shapes=[max_data_shape for _ in range(batch_size)],
                        max_label_shapes=[max_label_shape for _ in range(batch_size)], fixed_param_prefix=fixed_param_prefix)

    if config.TRAIN.RESUME:
        mod._preload_opt_states = '%s-%04d.states'%(prefix, begin_epoch)

    # decide training params
    # metric
    rpn_eval_metric = metric.RPNAccMetric()
    rpn_cls_metric = metric.RPNLogLossMetric()
    rpn_bbox_metric = metric.RPNL1LossMetric()
    eval_metric = metric.RCNNAccMetric(config)
    cls_metric = metric.RCNNLogLossMetric(config)
    bbox_metric = metric.RCNNL1LossMetric(config)
    eval_metrics = mx.metric.CompositeEvalMetric()
    # rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric
    for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric]:
        eval_metrics.add(child_metric)
    # callback
    batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=args.frequent)
    means = np.tile(np.array(config.TRAIN.BBOX_MEANS), 2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES)
    stds = np.tile(np.array(config.TRAIN.BBOX_STDS), 2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES)
    epoch_end_callback = [mx.callback.module_checkpoint(mod, prefix, period=1, save_optimizer_states=True), callback.do_checkpoint(prefix, means, stds)]
    # decide learning rate
    base_lr = lr
    lr_factor = config.TRAIN.lr_factor
    lr_epoch = [float(epoch) for epoch in lr_step.split(',')]
    lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch]
    lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff]
    print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters)
    lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor, config.TRAIN.warmup, config.TRAIN.warmup_lr, config.TRAIN.warmup_step)
    # optimizer
    optimizer_params = {'momentum': config.TRAIN.momentum,
                        'wd': config.TRAIN.wd,
                        'learning_rate': lr,
                        'lr_scheduler': lr_scheduler,
                        'rescale_grad': 1.0,
                        'clip_gradient': None}

    if not isinstance(train_data, PrefetchingIter):
        train_data = PrefetchingIter(train_data)

    # train
    mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback, kvstore=config.default.kvstore,
            optimizer='sgd', optimizer_params=optimizer_params,
            arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
Ejemplo n.º 22
0
def train_feature_distance_net(args, ctx, pretrained, pretrained_flow, epoch,
                               prefix, begin_epoch, end_epoch, lr, lr_step):
    # ==============prepare logger==============
    logger, final_output_path = create_logger(config.output_path, args.cfg,
                                              config.dataset.image_set)
    prefix = os.path.join(final_output_path, prefix)

    # ==============load symbol==============
    shutil.copy2(os.path.join(curr_path, 'symbols', config.symbol + '.py'),
                 final_output_path)
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    if config.TRAIN.G_type == 0:
        sym = sym_instance.get_train_feature_distance_symbol(config)
    elif config.TRAIN.G_type == 1:
        sym = sym_instance.get_train_feature_distance_symbol_res(config)

    # ==============setup multi-gpu==============
    batch_size = len(ctx)
    input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size

    # ==============print config==============
    pprint.pprint(config)
    logger.info('training config:{}\n'.format(pprint.pformat(config)))

    # ==============load dataset and prepare imdb for training==============
    image_sets = [iset for iset in config.dataset.image_set.split('+')]
    roidbs = [
        load_gt_roidb(config.dataset.dataset,
                      image_set,
                      config.dataset.root_path,
                      config.dataset.dataset_path,
                      flip=config.TRAIN.FLIP) for image_set in image_sets
    ]
    roidb = merge_roidb(roidbs)
    roidb = filter_roidb(roidb, config)
    train_iter = ImagenetVIDIter(roidb, input_batch_size, config,
                                 config.TRAIN.SHUFFLE, ctx)

    # infer max shape
    max_data_shape = [('data', (config.TRAIN.BATCH_IMAGES, 3,
                                max([v[0] for v in config.SCALES]),
                                max([v[1] for v in config.SCALES]))),
                      ('data_ref', (config.TRAIN.BATCH_IMAGES, 3,
                                    max([v[0] for v in config.SCALES]),
                                    max([v[1] for v in config.SCALES])))]
    print 'providing maximum shape', max_data_shape

    data_shape_dict = dict(train_iter.provide_data_single)
    pprint.pprint(data_shape_dict)
    sym_instance.infer_shape(data_shape_dict)

    # ==============init params==============
    if config.TRAIN.RESUME:
        print('continue training from ', begin_epoch)
        arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
    else:
        arg_params, aux_params = load_param(pretrained, epoch, convert=True)
        # arg_params_flow, aux_params_flow = load_param(pretrained_flow, epoch, convert=True)
        # arg_params.update(arg_params_flow)
        # aux_params.update(aux_params_flow)
        sym_instance.init_weight(config, arg_params, aux_params)

    # ==============check parameter shapes==============
    # sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict)

    # ==============create solver==============
    fixed_param_prefix = config.network.FIXED_PARAMS
    data_names = train_iter.data_name
    label_names = train_iter.label_name

    mod = MutableModule(
        sym,
        data_names=data_names,
        label_names=label_names,
        logger=logger,
        context=ctx,
        max_data_shapes=[max_data_shape for _ in range(batch_size)],
        max_label_shapes=None,
        fixed_param_prefix=fixed_param_prefix)

    if config.TRAIN.RESUME:
        mod._preload_opt_states = '%s-%04d.states' % (prefix, begin_epoch)

    # ==============optimizer==============
    optimizer_params = {
        'learning_rate': 0.00005,
    }

    if not isinstance(train_iter, PrefetchingIter):
        train_iter = PrefetchingIter(train_iter)

    batch_end_callback = callback.Speedometer(train_iter.batch_size,
                                              frequent=args.frequent)
    epoch_end_callback = [
        mx.callback.module_checkpoint(mod,
                                      prefix,
                                      period=1,
                                      save_optimizer_states=True),
        callback.do_checkpoint(prefix)
    ]

    feature_L2_loss = metric.FeatureL2LossMetric(config)
    eval_metrics = mx.metric.CompositeEvalMetric()
    eval_metrics.add(feature_L2_loss)

    mod.fit(train_iter,
            eval_metric=eval_metrics,
            epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback,
            kvstore=config.default.kvstore,
            optimizer='RMSprop',
            optimizer_params=optimizer_params,
            arg_params=arg_params,
            aux_params=aux_params,
            begin_epoch=begin_epoch,
            num_epoch=end_epoch,
            initializer=mx.init.Normal(0.02),
            allow_missing=True)
Ejemplo n.º 23
0
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr,
              lr_step):
    mx.random.seed(3)
    np.random.seed(3)
    if not os.path.exists(config.output_path):
        os.mkdir(config.output_path)
    logger, final_output_path = create_logger(config.output_path, args.cfg,
                                              config.dataset.image_set)
    prefix = os.path.join(final_output_path, prefix)

    # load symbol
    shutil.copy2(os.path.join(curr_path, 'symbols', config.symbol + '.py'),
                 final_output_path)
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=True)

    feat_pyramid_level = np.log2(config.network.RPN_FEAT_STRIDE).astype(int)
    feat_sym = [
        sym.get_internals()['rpn_cls_score_p' + str(x) + '_output']
        for x in feat_pyramid_level
    ]

    # setup multi-gpu
    batch_size = len(ctx)
    input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size

    # print config
    #pprint.pprint(config)
    #logger.info('training config:{}\n'.format(pprint.pformat(config)))

    # load dataset and prepare imdb for training
    image_sets = [iset for iset in config.dataset.image_set.split('+')]
    roidbs = [
        load_gt_roidb(config.dataset.dataset,
                      image_set,
                      config.dataset.root_path,
                      config.dataset.dataset_path,
                      flip=config.TRAIN.FLIP) for image_set in image_sets
    ]
    roidb = merge_roidb(roidbs)
    roidb = filter_roidb(roidb, config)

    # load training data

    train_data = PyramidAnchorIterator(
        feat_sym,
        roidb,
        config,
        batch_size=input_batch_size,
        shuffle=config.TRAIN.SHUFFLE,
        ctx=ctx,
        feat_strides=config.network.RPN_FEAT_STRIDE,
        anchor_scales=config.network.ANCHOR_SCALES,
        anchor_ratios=config.network.ANCHOR_RATIOS,
        aspect_grouping=config.TRAIN.ASPECT_GROUPING,
        allowed_border=np.inf)

    # infer max shape
    max_data_shape = [('data', (config.TRAIN.BATCH_IMAGES,
                                3 * config.CROP_NUM * config.CROP_NUM,
                                max([v[0] for v in config.SCALES]),
                                max([v[1] for v in config.SCALES])))]
    max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
    max_data_shape.append(('gt_boxes', (config.TRAIN.BATCH_IMAGES, 100,
                                        6)))  #change gt_boxes to 1,100,6
    print 'providing maximum shape', max_data_shape, max_label_shape

    data_shape_dict = dict(train_data.provide_data_single +
                           train_data.provide_label_single)
    pprint.pprint(data_shape_dict)
    sym_instance.infer_shape(data_shape_dict)

    # load and initialize params
    if config.TRAIN.RESUME:
        print('continue training from ', begin_epoch)
        arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
    else:
        arg_params, aux_params = load_param(pretrained, epoch, convert=True)
        single_shape = arg_params['conv1_weight'].shape
        temp_conv1_weight = nd.empty(
            (single_shape[0], single_shape[1] * config.CROP_NUM *
             config.CROP_NUM, single_shape[2], single_shape[3]),
            dtype=arg_params['conv1_weight'].dtype)
        for i in range(config.CROP_NUM * config.CROP_NUM):
            temp_conv1_weight[:, i * single_shape[1]:(i + 1) *
                              single_shape[1], :, :] = arg_params[
                                  'conv1_weight'][:, :, :, :]
        del arg_params['conv1_weight']
        arg_params['conv1_weight'] = temp_conv1_weight
        sym_instance.init_weight(config, arg_params, aux_params)

    # check parameter shapes
    sym_instance.check_parameter_shapes(arg_params, aux_params,
                                        data_shape_dict)

    # create solver
    fixed_param_prefix = config.network.FIXED_PARAMS
    data_names = [k[0] for k in train_data.provide_data_single]
    label_names = [k[0] for k in train_data.provide_label_single]

    mod = MutableModule(
        sym,
        data_names=data_names,
        label_names=label_names,
        logger=logger,
        context=ctx,
        max_data_shapes=[max_data_shape for _ in range(batch_size)],
        max_label_shapes=[max_label_shape for _ in range(batch_size)],
        fixed_param_prefix=fixed_param_prefix)

    if config.TRAIN.RESUME:
        mod._preload_opt_states = '%s-%04d.states' % (prefix, begin_epoch)

    # decide training params
    # metric
    rpn_eval_metric = metric.RPNAccMetric()
    rpn_cls_metric = metric.RPNLogLossMetric()
    rpn_bbox_metric = metric.RPNL1LossMetric()
    rpn_fg_metric = metric.RPNFGFraction(config)
    eval_metric = metric.RCNNAccMetric(config)
    eval_fg_metric = metric.RCNNFGAccuracy(config)
    cls_metric = metric.RCNNLogLossMetric(config)
    bbox_metric = metric.RCNNL1LossMetric(config)
    #fl_metric = metric.FocalLoss()
    eval_metrics = mx.metric.CompositeEvalMetric()
    # rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric
    for child_metric in [
            rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, rpn_fg_metric,
            eval_fg_metric, eval_metric, cls_metric, bbox_metric
    ]:
        eval_metrics.add(child_metric)
    # callback
    batch_end_callback = callback.Speedometer(train_data.batch_size,
                                              frequent=args.frequent)
    means = np.tile(np.array(config.TRAIN.BBOX_MEANS),
                    2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES)
    stds = np.tile(np.array(config.TRAIN.BBOX_STDS),
                   2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES)
    epoch_end_callback = [
        mx.callback.module_checkpoint(mod,
                                      prefix,
                                      period=1,
                                      save_optimizer_states=True),
        callback.do_checkpoint(prefix, means, stds)
    ]
    # decide learning rate
    base_lr = lr
    lr_factor = config.TRAIN.lr_factor
    lr_epoch = [float(epoch) for epoch in lr_step.split(',')]
    lr_epoch_diff = [
        epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch
    ]
    lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [
        int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff
    ]
    print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters)
    lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor,
                                              config.TRAIN.warmup,
                                              config.TRAIN.warmup_lr,
                                              config.TRAIN.warmup_step)
    # optimizer
    optimizer_params = {
        'momentum': config.TRAIN.momentum,
        'wd': config.TRAIN.wd,
        'learning_rate': lr,
        'lr_scheduler': lr_scheduler,
        'clip_gradient': None
    }
    #
    if not isinstance(train_data, PrefetchingIter):
        train_data = PrefetchingIter(train_data)

    # train
    mod.fit(train_data,
            eval_metric=eval_metrics,
            epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback,
            kvstore=config.default.kvstore,
            optimizer='sgd',
            optimizer_params=optimizer_params,
            arg_params=arg_params,
            aux_params=aux_params,
            begin_epoch=begin_epoch,
            num_epoch=end_epoch)
Ejemplo n.º 24
0
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr,
              lr_step):
    logger, final_output_path = create_logger(config.output_path, args.cfg,
                                              config.dataset.image_set)
    prefix = os.path.join(final_output_path, prefix)
    # load symbol
    shutil.copy2(os.path.join(curr_path, 'symbols', config.symbol + '.py'),
                 final_output_path)
    sym_instance = eval(config.symbol + '.' + config.symbol)()

    sym = sym_instance.get_retina_symbol(config, is_train=True)
    feat_sym = []

    feat_sym_p4 = sym.get_internals()['box_pred/p4_output']
    feat_sym_p5 = sym.get_internals()['box_pred/p5_output']
    feat_sym_p6 = sym.get_internals()['box_pred/p6_output']
    feat_sym_p7 = sym.get_internals()['box_pred/p7_output']

    feat_sym.append(feat_sym_p4)
    feat_sym.append(feat_sym_p5)
    feat_sym.append(feat_sym_p6)
    feat_sym.append(feat_sym_p7)
    #######
    feat_stride = []
    feat_stride.append(config.network.p4_RPN_FEAT_STRIDE)
    feat_stride.append(config.network.p5_RPN_FEAT_STRIDE)
    feat_stride.append(config.network.p6_RPN_FEAT_STRIDE)
    feat_stride.append(config.network.p7_RPN_FEAT_STRIDE)
    anchor_scales = []

    anchor_scales.append(config.network.p4_ANCHOR_SCALES)
    anchor_scales.append(config.network.p5_ANCHOR_SCALES)
    anchor_scales.append(config.network.p6_ANCHOR_SCALES)
    anchor_scales.append(config.network.p7_ANCHOR_SCALES)
    anchor_ratios = []

    anchor_ratios.append(config.network.p4_ANCHOR_RATIOS)
    anchor_ratios.append(config.network.p5_ANCHOR_RATIOS)
    anchor_ratios.append(config.network.p6_ANCHOR_RATIOS)
    anchor_ratios.append(config.network.p7_ANCHOR_RATIOS)
    #############

    # setup multi-gpu
    batch_size = len(ctx)
    input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size

    # print config
    pprint.pprint(config)
    logger.info('training config:{}\n'.format(pprint.pformat(config)))

    # load dataset and prepare imdb for training
    image_sets = [iset for iset in config.dataset.image_set.split('+')]
    roidbs = [
        load_gt_roidb(config.dataset.dataset,
                      image_set,
                      config.dataset.root_path,
                      config.dataset.dataset_path,
                      flip=config.TRAIN.FLIP) for image_set in image_sets
    ]
    roidb = merge_roidb(roidbs)

    roidb = filter_roidb(roidb, config)

    # load training data
    train_data = AnchorLoader(feat_sym,
                              feat_stride,
                              anchor_scales,
                              anchor_ratios,
                              roidb,
                              config,
                              batch_size=input_batch_size,
                              shuffle=config.TRAIN.SHUFFLE,
                              ctx=ctx,
                              aspect_grouping=config.TRAIN.ASPECT_GROUPING)
    # infer max shape
    max_data_shape = [('data', (config.TRAIN.BATCH_IMAGES, 3,
                                max([v[0] for v in config.SCALES]),
                                max([v[1] for v in config.SCALES])))]
    max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
    max_data_shape.append(('gt_boxes', (config.TRAIN.BATCH_IMAGES, 100, 5)))
    print 'providing maximum shape', max_data_shape, max_label_shape
    # infer max shape

    data_shape_dict = dict(train_data.provide_data_single +
                           train_data.provide_label_single)
    pprint.pprint(data_shape_dict)
    sym_instance.infer_shape(data_shape_dict)

    # load and initialize params
    if config.TRAIN.RESUME:
        print('continue training from ', begin_epoch)
        arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
    else:
        arg_params, aux_params = load_param(pretrained, epoch, convert=True)
        sym_instance.init_weight(config, arg_params, aux_params)

    # check parameter shapes
    sym_instance.check_parameter_shapes(arg_params, aux_params,
                                        data_shape_dict)
    # create solver
    fixed_param_prefix = config.network.FIXED_PARAMS
    data_names = [k[0] for k in train_data.provide_data_single]
    label_names = [k[0] for k in train_data.provide_label_single]

    mod = MutableModule(
        sym,
        data_names=data_names,
        label_names=label_names,
        logger=logger,
        context=ctx,
        max_data_shapes=[max_data_shape for _ in range(batch_size)],
        max_label_shapes=[max_label_shape for _ in range(batch_size)],
        fixed_param_prefix=fixed_param_prefix)

    if config.TRAIN.RESUME:
        mod._preload_opt_states = '%s-%04d.states' % (prefix, begin_epoch)

    # decide training params
    # metric
    Retina_toal_eval_metric = metric.RetinaToalAccMetric()
    Retina_cls_metric = metric.RetinaFocalLossMetric()
    Retina_bbox_metric = metric.RetinaL1LossMetric()

    eval_metrics = mx.metric.CompositeEvalMetric()
    # rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric
    for child_metric in [
            Retina_toal_eval_metric, Retina_cls_metric, Retina_bbox_metric
    ]:
        eval_metrics.add(child_metric)
    # callback
    batch_end_callback = callback.Speedometer(train_data.batch_size,
                                              frequent=args.frequent)
    means = np.tile(np.array(config.TRAIN.BBOX_MEANS),
                    2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES)
    stds = np.tile(np.array(config.TRAIN.BBOX_STDS),
                   2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES)
    epoch_end_callback = [
        mx.callback.module_checkpoint(mod,
                                      prefix,
                                      period=1,
                                      save_optimizer_states=True),
        callback.do_checkpoint(prefix, means, stds)
    ]
    # decide learning rate
    base_lr = lr
    lr_factor = config.TRAIN.lr_factor
    lr_epoch = [float(epoch) for epoch in lr_step.split(',')]
    lr_epoch_diff = [
        epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch
    ]
    lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [
        int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff
    ]
    print lr_step.split(',')
    print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters)
    lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor,
                                              config.TRAIN.warmup,
                                              config.TRAIN.warmup_lr,
                                              config.TRAIN.warmup_step)
    # optimizer
    optimizer_params = {
        'learning_rate': lr,
        'wd': 0.0001,
    }

    if not isinstance(train_data, PrefetchingIter):
        train_data = PrefetchingIter(train_data)
    # train
    initializer = mx.init.MSRAPrelu(factor_type='out', slope=0)
    # adam = mx.optimizer.AdaDelta(rho=0.09,  epsilon=1e-14)
    #optimizer_params=optimizer_params,

    print "-----------------------train--------------------------------"
    mod.fit(train_data,
            eval_metric=eval_metrics,
            epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback,
            kvstore=config.default.kvstore,
            optimizer='adam',
            optimizer_params=optimizer_params,
            initializer=initializer,
            arg_params=arg_params,
            aux_params=aux_params,
            begin_epoch=begin_epoch,
            num_epoch=end_epoch)
Ejemplo n.º 25
0
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr, lr_step):
    logger, final_output_path = create_logger(config.output_path, args.cfg, config.dataset.image_set)
    prefix = os.path.join(final_output_path, prefix)

    # load symbol
    shutil.copy2(os.path.join(curr_path, 'symbols', config.symbol + '.py'), final_output_path)
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=True)
    feat_sym = sym.get_internals()['rpn_cls_score_output']

    # setup multi-gpu
    batch_size = len(ctx)
    input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size

    # print config
    pprint.pprint(config)
    logger.info('training config:{}\n'.format(pprint.pformat(config)))

    # load dataset and prepare imdb for training
    image_sets = [iset for iset in config.dataset.image_set.split('+')]
    roidbs = [load_gt_roidb(config.dataset.dataset, image_set, config.dataset.root_path, config.dataset.dataset_path,
                            flip=config.TRAIN.FLIP)
              for image_set in image_sets]
    roidb = merge_roidb(roidbs)
    roidb = filter_roidb(roidb, config)

    # load training data
    train_data = AnchorLoader(feat_sym, roidb, config, batch_size=input_batch_size, shuffle=config.TRAIN.SHUFFLE, ctx=ctx,
                              feat_stride=config.network.RPN_FEAT_STRIDE, anchor_scales=config.network.ANCHOR_SCALES,
                              anchor_ratios=config.network.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING)

    # infer max shape
    max_data_shape = [('data', (config.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]
    max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
    max_data_shape.append(('gt_boxes', (config.TRAIN.BATCH_IMAGES, 100, 5)))
    print 'providing maximum shape', max_data_shape, max_label_shape

    data_shape_dict = dict(train_data.provide_data_single + train_data.provide_label_single)
    pprint.pprint(data_shape_dict)
    sym_instance.infer_shape(data_shape_dict)

    # load and initialize params
    if config.TRAIN.RESUME:
        print('continue training from ', begin_epoch)
        arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
    else:
        arg_params, aux_params = load_param(pretrained, epoch, convert=True)
        sym_instance.init_weight(config, arg_params, aux_params)

    # check parameter shapes
    sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict)

    # create solver
    fixed_param_prefix = config.network.FIXED_PARAMS
    data_names = [k[0] for k in train_data.provide_data_single]
    label_names = [k[0] for k in train_data.provide_label_single]

    mod = MutableModule(sym, data_names=data_names, label_names=label_names,
                        logger=logger, context=ctx, max_data_shapes=[max_data_shape for _ in range(batch_size)],
                        max_label_shapes=[max_label_shape for _ in range(batch_size)], fixed_param_prefix=fixed_param_prefix)

    if config.TRAIN.RESUME:
        mod._preload_opt_states = '%s-%04d.states'%(prefix, begin_epoch)

    # decide training params
    # metric
    rpn_eval_metric = metric.RPNAccMetric()
    rpn_cls_metric = metric.RPNLogLossMetric()
    rpn_bbox_metric = metric.RPNL1LossMetric()
    eval_metric = metric.RCNNAccMetric(config)
    cls_metric = metric.RCNNLogLossMetric(config)
    bbox_metric = metric.RCNNL1LossMetric(config)
    eval_metrics = mx.metric.CompositeEvalMetric()
    # rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric
    for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric]:
        eval_metrics.add(child_metric)
    # callback
    batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=args.frequent)
    means = np.tile(np.array(config.TRAIN.BBOX_MEANS), 2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES)
    stds = np.tile(np.array(config.TRAIN.BBOX_STDS), 2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES)
    epoch_end_callback = [mx.callback.module_checkpoint(mod, prefix, period=1, save_optimizer_states=True), callback.do_checkpoint(prefix, means, stds)]
    # decide learning rate
    base_lr = lr
    lr_factor = config.TRAIN.lr_factor
    lr_epoch = [float(epoch) for epoch in lr_step.split(',')]
    lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch]
    lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff]
    print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters)
    lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor, config.TRAIN.warmup, config.TRAIN.warmup_lr, config.TRAIN.warmup_step)
    # optimizer
    optimizer_params = {'momentum': config.TRAIN.momentum,
                        'wd': config.TRAIN.wd,
                        'learning_rate': lr,
                        'lr_scheduler': lr_scheduler,
                        'rescale_grad': 1.0,
                        'clip_gradient': None}

    if not isinstance(train_data, PrefetchingIter):
        train_data = PrefetchingIter(train_data)

    # train
    mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback, kvstore=config.default.kvstore,
            optimizer='sgd', optimizer_params=optimizer_params,
            arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
Ejemplo n.º 26
0
def train_rpn(
    image_set,
    year,
    root_path,
    devkit_path,
    pretrained,
    epoch,
    prefix,
    ctx,
    begin_epoch,
    end_epoch,
    frequent,
    kv_store,
    work_load_list=None,
):
    # load symbol
    sym = get_vgg_rpn()
    feat_sym = get_vgg_rpn().get_internals()["rpn_cls_score_output"]

    # setup multi-gpu
    config.TRAIN.BATCH_IMAGES *= len(ctx)
    config.TRAIN.BATCH_SIZE *= len(ctx)

    # load training data
    voc, roidb = load_gt_roidb(image_set, year, root_path, devkit_path, flip=True)
    train_data = AnchorLoader(
        feat_sym,
        roidb,
        batch_size=config.TRAIN.BATCH_SIZE,
        shuffle=True,
        mode="train",
        ctx=ctx,
        work_load_list=work_load_list,
    )

    # infer max shape
    max_data_shape = [("data", (1, 3, 1000, 1000))]
    max_data_shape_dict = {k: v for k, v in max_data_shape}
    _, feat_shape, _ = feat_sym.infer_shape(**max_data_shape_dict)
    from rcnn.minibatch import assign_anchor
    import numpy as np

    label = assign_anchor(feat_shape[0], np.zeros((0, 5)), [[1000, 1000, 1.0]])
    max_label_shape = [
        ("label", label["label"].shape),
        ("bbox_target", label["bbox_target"].shape),
        ("bbox_inside_weight", label["bbox_inside_weight"].shape),
        ("bbox_outside_weight", label["bbox_outside_weight"].shape),
    ]
    print "providing maximum shape", max_data_shape, max_label_shape

    # load pretrained
    args, auxs = load_param(pretrained, epoch, convert=True)

    # initialize params
    arg_shape, _, _ = sym.infer_shape(data=(1, 3, 224, 224))
    arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))
    args["rpn_conv_3x3_weight"] = mx.random.normal(mean=0, stdvar=0.01, shape=arg_shape_dict["rpn_conv_3x3_weight"])
    args["rpn_conv_3x3_bias"] = mx.nd.zeros(shape=arg_shape_dict["rpn_conv_3x3_bias"])
    args["rpn_cls_score_weight"] = mx.random.normal(mean=0, stdvar=0.01, shape=arg_shape_dict["rpn_cls_score_weight"])
    args["rpn_cls_score_bias"] = mx.nd.zeros(shape=arg_shape_dict["rpn_cls_score_bias"])
    args["rpn_bbox_pred_weight"] = mx.random.normal(mean=0, stdvar=0.01, shape=arg_shape_dict["rpn_bbox_pred_weight"])
    args["rpn_bbox_pred_bias"] = mx.nd.zeros(shape=arg_shape_dict["rpn_bbox_pred_bias"])

    # train
    solver = Solver(
        prefix,
        sym,
        ctx,
        begin_epoch,
        end_epoch,
        kv_store,
        args,
        auxs,
        momentum=0.9,
        wd=0.0005,
        learning_rate=1e-3,
        lr_scheduler=mx.lr_scheduler.FactorScheduler(60000, 0.1),
        mutable_data_shape=True,
        max_data_shape=max_data_shape,
        max_label_shape=max_label_shape,
    )
    solver.fit(train_data, frequent=frequent)
Ejemplo n.º 27
0
def train_net(args, ctx, pretrained_dir, pretrained_resnet, epoch, prefix,
              begin_epoch, end_epoch, lr, lr_step):
    logger, final_output_path = create_logger(config.output_path, args.cfg,
                                              config.dataset.image_set)
    prefix = os.path.join(final_output_path, prefix)

    # load symbol
    shutil.copy2(os.path.join(curr_path, 'symbols', config.symbol + '.py'),
                 final_output_path)
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=True)
    feat_sym = sym.get_internals()['rpn_cls_score_output']

    # setup multi-gpu
    batch_size = len(ctx)
    input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size

    # print config
    pprint.pprint(config)
    logger.info('training config:{}\n'.format(pprint.pformat(config)))

    git_commit_id = commands.getoutput('git rev-parse HEAD')
    print("Git commit id:", git_commit_id)
    logger.info('Git commit id: {}'.format(git_commit_id))

    # load dataset and prepare imdb for training
    image_sets = [iset for iset in config.dataset.image_set.split('+')]
    roidbs = [
        load_gt_roidb(config.dataset.dataset,
                      image_set,
                      config.dataset.root_path,
                      config.dataset.dataset_path,
                      motion_iou_path=config.dataset.motion_iou_path,
                      flip=config.TRAIN.FLIP,
                      use_philly=args.usePhilly) for image_set in image_sets
    ]
    roidb = merge_roidb(roidbs)
    roidb = filter_roidb(roidb, config)
    # load training data
    train_data = AnchorLoader(feat_sym,
                              roidb,
                              config,
                              batch_size=input_batch_size,
                              shuffle=config.TRAIN.SHUFFLE,
                              ctx=ctx,
                              feat_stride=config.network.RPN_FEAT_STRIDE,
                              anchor_scales=config.network.ANCHOR_SCALES,
                              anchor_ratios=config.network.ANCHOR_RATIOS,
                              aspect_grouping=config.TRAIN.ASPECT_GROUPING,
                              normalize_target=config.network.NORMALIZE_RPN,
                              bbox_mean=config.network.ANCHOR_MEANS,
                              bbox_std=config.network.ANCHOR_STDS)

    # infer max shape
    max_data_shape = [('data', (config.TRAIN.BATCH_IMAGES, 3,
                                max([v[0] for v in config.SCALES]),
                                max([v[1] for v in config.SCALES])))]
    max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
    max_data_shape.append(('gt_boxes', (config.TRAIN.BATCH_IMAGES, 100, 5)))
    print('providing maximum shape', max_data_shape, max_label_shape)

    data_shape_dict = dict(train_data.provide_data_single +
                           train_data.provide_label_single)
    pprint.pprint(data_shape_dict)
    sym_instance.infer_shape(data_shape_dict)

    # create solver
    fixed_param_prefix = config.network.FIXED_PARAMS
    data_names = [k[0] for k in train_data.provide_data_single]
    label_names = [k[0] for k in train_data.provide_label_single]

    mod = MutableModule(
        sym,
        data_names=data_names,
        label_names=label_names,
        logger=logger,
        context=ctx,
        max_data_shapes=[max_data_shape for _ in range(batch_size)],
        max_label_shapes=[max_label_shape for _ in range(batch_size)],
        fixed_param_prefix=fixed_param_prefix)

    # load and initialize params
    params_loaded = False
    if config.TRAIN.RESUME:
        arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
        mod._preload_opt_states = '%s-%04d.states' % (prefix, begin_epoch)
        print('continue training from ', begin_epoch)
        logger.info('continue training from ', begin_epoch)
        params_loaded = True
    elif config.TRAIN.AUTO_RESUME:
        for cur_epoch in range(end_epoch - 1, begin_epoch, -1):
            params_filename = '{}-{:04d}.params'.format(prefix, cur_epoch)
            states_filename = '{}-{:04d}.states'.format(prefix, cur_epoch)
            if os.path.exists(params_filename) and os.path.exists(
                    states_filename):
                begin_epoch = cur_epoch
                arg_params, aux_params = load_param(prefix,
                                                    cur_epoch,
                                                    convert=True)
                mod._preload_opt_states = states_filename
                print('auto continue training from {}, {}'.format(
                    params_filename, states_filename))
                logger.info('auto continue training from {}, {}'.format(
                    params_filename, states_filename))
                params_loaded = True
                break
    if not params_loaded:
        arg_params, aux_params = load_param(os.path.join(
            pretrained_dir, pretrained_resnet),
                                            epoch,
                                            convert=True)

    sym_instance.init_weight(config, arg_params, aux_params)
    # check parameter shapes
    sym_instance.check_parameter_shapes(arg_params, aux_params,
                                        data_shape_dict)

    # decide training params
    # metric
    eval_metric = metric.RCNNAccMetric(config)
    cls_metric = metric.RCNNLogLossMetric(config)
    bbox_metric = metric.RCNNL1LossMetric(config)
    eval_metrics = mx.metric.CompositeEvalMetric()

    for child_metric in [eval_metric, cls_metric, bbox_metric]:
        eval_metrics.add(child_metric)
    if config.TRAIN.JOINT_TRAINING or (not config.TRAIN.LEARN_NMS):
        rpn_eval_metric = metric.RPNAccMetric()
        rpn_cls_metric = metric.RPNLogLossMetric()
        rpn_bbox_metric = metric.RPNL1LossMetric()
        for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric]:
            eval_metrics.add(child_metric)
    if config.TRAIN.LEARN_NMS:
        eval_metrics.add(metric.NMSLossMetric(config, 'pos'))
        eval_metrics.add(metric.NMSLossMetric(config, 'neg'))
        eval_metrics.add(metric.NMSAccMetric(config))

    # callback
    batch_end_callback = [
        callback.Speedometer(train_data.batch_size, frequent=args.frequent)
    ]

    if config.USE_PHILLY:
        total_iter = (end_epoch - begin_epoch) * len(roidb) / input_batch_size
        progress_frequent = min(args.frequent * 10, 100)
        batch_end_callback.append(
            callback.PhillyProgressCallback(total_iter, progress_frequent))

    means = np.tile(np.array(config.TRAIN.BBOX_MEANS),
                    2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES)
    stds = np.tile(np.array(config.TRAIN.BBOX_STDS),
                   2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES)
    epoch_end_callback = [
        mx.callback.module_checkpoint(mod,
                                      prefix,
                                      period=1,
                                      save_optimizer_states=True),
        callback.do_checkpoint(prefix, means, stds)
    ]
    # decide learning rate
    # base_lr = lr * len(ctx) * config.TRAIN.BATCH_IMAGES
    base_lr = lr
    lr_factor = config.TRAIN.lr_factor
    lr_epoch = [float(epoch) for epoch in lr_step.split(',')]
    lr_epoch_diff = [
        epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch
    ]
    lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [
        int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff
    ]
    print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters)
    lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor,
                                              config.TRAIN.warmup,
                                              config.TRAIN.warmup_lr,
                                              config.TRAIN.warmup_step)
    # optimizer
    optimizer_params = {
        'momentum': config.TRAIN.momentum,
        'wd': config.TRAIN.wd,
        'learning_rate': lr,
        'lr_scheduler': lr_scheduler,
        'rescale_grad': 1.0,
        'clip_gradient': None
    }

    if not isinstance(train_data, PrefetchingIter):
        train_data = PrefetchingIter(train_data)

    # train
    mod.fit(train_data,
            eval_metric=eval_metrics,
            epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback,
            kvstore=config.default.kvstore,
            optimizer='sgd',
            optimizer_params=optimizer_params,
            arg_params=arg_params,
            aux_params=aux_params,
            begin_epoch=begin_epoch,
            num_epoch=end_epoch)
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr,
              lr_step):
    logger, final_output_path = create_logger(config.output_path, args.cfg,
                                              config.dataset.image_set)
    prefix = os.path.join(final_output_path, prefix)

    # load symbol
    shutil.copy2(os.path.join(curr_path, 'symbols', config.symbol + '.py'),
                 final_output_path)
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=True)
    feat_sym = sym.get_internals()['rpn_cls_score_output']

    # setup multi-gpu
    batch_size = len(ctx)
    input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size

    # print config
    pprint.pprint(config)
    logger.info('training config:{}\n'.format(pprint.pformat(config)))

    # load dataset and prepare imdb for training
    image_sets = [iset for iset in config.dataset.image_set.split('+')]
    roidbs = [
        load_gt_roidb(config.dataset.dataset,
                      image_set,
                      config.dataset.root_path,
                      config.dataset.dataset_path,
                      flip=config.TRAIN.FLIP) for image_set in image_sets
    ]
    roidb = merge_roidb(roidbs)
    roidb = filter_roidb(roidb, config)
    # load training data
    train_data = AnchorLoader(feat_sym,
                              roidb,
                              config,
                              batch_size=input_batch_size,
                              shuffle=config.TRAIN.SHUFFLE,
                              ctx=ctx,
                              feat_stride=config.network.RPN_FEAT_STRIDE,
                              anchor_scales=config.network.ANCHOR_SCALES,
                              anchor_ratios=config.network.ANCHOR_RATIOS,
                              aspect_grouping=config.TRAIN.ASPECT_GROUPING)

    # infer max shape
    max_data_shape = [('data', (config.TRAIN.BATCH_IMAGES, 3,
                                max([v[0] for v in config.SCALES]),
                                max([v[1] for v in config.SCALES])))]
    max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
    max_data_shape.append(('gt_boxes', (config.TRAIN.BATCH_IMAGES, 100, 5)))
    print('providing maximum shape', max_data_shape, max_label_shape)

    data_shape_dict = dict(train_data.provide_data_single +
                           train_data.provide_label_single)
    pprint.pprint(data_shape_dict)
    sym_instance.infer_shape(data_shape_dict)

    # load and initialize params
    #if config.TRAIN.RESUME:
    #    print('continue training from ', begin_epoch)
    #    arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
    #else:
    #    arg_params, aux_params = load_param(pretrained, epoch, convert=True)
    #    sym_instance.init_weight(config, arg_params, aux_params)

    print('transfer learning...')

    # Choose the initialization weights (COCO or UADETRAC or pretrained)
    #arg_params, aux_params = load_param('/raid10/home_ext/Deformable-ConvNets/output/rfcn_dcn_Shuo_UADTRAC/resnet_v1_101_voc0712_rfcn_dcn_Shuo_UADETRAC/trainlist_full/rfcn_UADTRAC', 5, convert=True)
    #arg_params, aux_params = load_param('/raid10/home_ext/Deformable-ConvNets/model/rfcn_dcn_coco', 0, convert=True)
    arg_params, aux_params = load_param(
        '/raid10/home_ext/Deformable-ConvNets/output/rfcn_dcn_Shuo_AICity/resnet_v1_101_voc0712_rfcn_dcn_Shuo_AICityVOC1080_FreezeCOCO_rpnOnly_all/1080_all/rfcn_AICityVOC1080_FreezeCOCO_rpnOnly_all',
        4,
        convert=True)

    sym_instance.init_weight_Shuo(config, arg_params, aux_params)

    # check parameter shapes
    sym_instance.check_parameter_shapes(arg_params, aux_params,
                                        data_shape_dict)

    # create solver
    fixed_param_prefix = config.network.FIXED_PARAMS
    data_names = [k[0] for k in train_data.provide_data_single]
    label_names = [k[0] for k in train_data.provide_label_single]

    mod = MutableModule(
        sym,
        data_names=data_names,
        label_names=label_names,
        logger=logger,
        context=ctx,
        max_data_shapes=[max_data_shape for _ in range(batch_size)],
        max_label_shapes=[max_label_shape for _ in range(batch_size)],
        fixed_param_prefix=fixed_param_prefix)

    #freeze parameters using fixed_param_names:list of str
    para_file = open(
        '/raid10/home_ext/Deformable-ConvNets/rfcn/symbols/arg_params.txt')
    para_list = [line.split('<')[0] for line in para_file.readlines()]
    #    para_list.remove('rfcn_cls_weight')
    #    para_list.remove('rfcn_cls_bias')
    #    para_list.remove('rfcn_cls_offset_t_weight')
    #    para_list.remove('rfcn_cls_offset_t_bias')
    #
    para_list.remove('res5a_branch2b_offset_weight')
    para_list.remove('res5a_branch2b_offset_bias')
    para_list.remove('res5b_branch2b_offset_weight')
    para_list.remove('res5b_branch2b_offset_bias')
    para_list.remove('res5c_branch2b_offset_weight')
    para_list.remove('res5c_branch2b_offset_bias')
    para_list.remove('conv_new_1_weight')
    para_list.remove('conv_new_1_bias')
    para_list.remove('rfcn_bbox_weight')
    para_list.remove('rfcn_bbox_bias')
    para_list.remove('rfcn_bbox_offset_t_weight')
    para_list.remove('rfcn_bbox_offset_t_bias')

    mod = MutableModule_Shuo(
        sym,
        data_names=data_names,
        label_names=label_names,
        logger=logger,
        context=ctx,
        max_data_shapes=[max_data_shape for _ in range(batch_size)],
        max_label_shapes=[max_label_shape for _ in range(batch_size)],
        fixed_param_prefix=fixed_param_prefix,
        fixed_param_names=para_list)

    if config.TRAIN.RESUME:
        mod._preload_opt_states = '%s-%04d.states' % (prefix, begin_epoch)

    # decide training params
    # metric
    rpn_eval_metric = metric.RPNAccMetric()
    rpn_cls_metric = metric.RPNLogLossMetric()
    rpn_bbox_metric = metric.RPNL1LossMetric()
    eval_metric = metric.RCNNAccMetric(config)
    cls_metric = metric.RCNNLogLossMetric(config)
    bbox_metric = metric.RCNNL1LossMetric(config)
    eval_metrics = mx.metric.CompositeEvalMetric()
    # rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric
    for child_metric in [
            rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric,
            cls_metric, bbox_metric
    ]:
        eval_metrics.add(child_metric)
    # callback
    batch_end_callback = callback.Speedometer(train_data.batch_size,
                                              frequent=args.frequent)
    means = np.tile(np.array(config.TRAIN.BBOX_MEANS),
                    2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES)
    stds = np.tile(np.array(config.TRAIN.BBOX_STDS),
                   2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES)
    epoch_end_callback = [
        mx.callback.module_checkpoint(mod,
                                      prefix,
                                      period=1,
                                      save_optimizer_states=True),
        callback.do_checkpoint(prefix, means, stds)
    ]
    # decide learning rate
    base_lr = lr
    lr_factor = config.TRAIN.lr_factor
    lr_epoch = [float(epoch) for epoch in lr_step.split(',')]
    lr_epoch_diff = [
        epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch
    ]
    lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [
        int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff
    ]
    print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters)
    lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor,
                                              config.TRAIN.warmup,
                                              config.TRAIN.warmup_lr,
                                              config.TRAIN.warmup_step)
    # optimizer
    optimizer_params = {
        'momentum': config.TRAIN.momentum,
        'wd': config.TRAIN.wd,
        'learning_rate': lr,
        'lr_scheduler': lr_scheduler,
        'rescale_grad': 1.0,
        'clip_gradient': None
    }

    if not isinstance(train_data, PrefetchingIter):
        train_data = PrefetchingIter(train_data)

    # train
    mod.fit(train_data,
            eval_metric=eval_metrics,
            epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback,
            kvstore=config.default.kvstore,
            optimizer='sgd',
            optimizer_params=optimizer_params,
            arg_params=arg_params,
            aux_params=aux_params,
            begin_epoch=begin_epoch,
            num_epoch=end_epoch)
Ejemplo n.º 29
0
def end2end_train(image_set, test_image_set, year, root_path, devkit_path, pretrained, epoch, prefix,
                  ctx, begin_epoch, num_epoch, frequent, kv_store, mom, wd, lr, num_classes, monitor,
                  work_load_list=None, resume=False, use_flip=True, factor_step=50000):
    # set up logger
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    mon = None
    config.TRAIN.BG_THRESH_HI = 0.5  # TODO(verify)
    config.TRAIN.BG_THRESH_LO = 0.0  # TODO(verify)
    config.TRAIN.RPN_MIN_SIZE = 16

    logging.info('########## TRAIN FASTER-RCNN WITH APPROXIMATE JOINT END2END #############')
    config.TRAIN.HAS_RPN = True
    config.END2END = 1
    config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED = True
    sym = get_faster_rcnn(num_classes=num_classes)
    feat_sym = sym.get_internals()['rpn_cls_score_output']

    # setup multi-gpu
    config.TRAIN.IMS_PER_BATCH *= len(ctx)
    config.TRAIN.BATCH_SIZE *= len(ctx)  # no used here

    # infer max shape
    max_data_shape = [('data', (config.TRAIN.IMS_PER_BATCH, 3, 1000, 1000))]
    max_data_shape_dict = {k: v for k, v in max_data_shape}
    _, feat_shape, _ = feat_sym.infer_shape(**max_data_shape_dict)
    from rcnn.minibatch import assign_anchor
    import numpy as np
    label = assign_anchor(feat_shape[0], np.zeros((0, 5)), [[1000, 1000, 1.0]])
    max_label_shape = [('label', label['label'].shape),
                       ('bbox_target', label['bbox_target'].shape),
                       ('bbox_inside_weight', label['bbox_inside_weight'].shape),
                       ('bbox_outside_weight', label['bbox_outside_weight'].shape),
                       ('gt_boxes', (config.TRAIN.IMS_PER_BATCH, 5*100))]  # assume at most 100 object in image
    print 'providing maximum shape', max_data_shape, max_label_shape

    # load training data
    voc, roidb = load_gt_roidb(image_set, year, root_path, devkit_path, flip=use_flip)
    train_data = AnchorLoader(feat_sym, roidb, batch_size=config.TRAIN.IMS_PER_BATCH, shuffle=True, mode='train',
                              ctx=ctx, work_load_list=work_load_list)
    # load pretrained
    args, auxs, _ = load_param(pretrained, epoch, convert=True)

    # initialize params
    if not resume:
        del args['fc8_weight']
        del args['fc8_bias']
        input_shapes = {k: (1,)+ v[1::] for k, v in train_data.provide_data + train_data.provide_label}
        arg_shape, _, _ = sym.infer_shape(**input_shapes)
        arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))

        args['rpn_conv_3x3_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_conv_3x3_weight'])
        args['rpn_conv_3x3_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_conv_3x3_bias'])
        args['rpn_cls_score_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_cls_score_weight'])
        args['rpn_cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_cls_score_bias'])
        args['rpn_bbox_pred_weight'] = mx.random.normal(0, 0.001, shape=arg_shape_dict['rpn_bbox_pred_weight'])  # guarantee not likely explode with bbox_delta
        args['rpn_bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_bbox_pred_bias'])
        args['cls_score_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['cls_score_weight'])
        args['cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['cls_score_bias'])
        args['bbox_pred_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['bbox_pred_weight'])
        args['bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['bbox_pred_bias'])

    # prepare training
    if config.TRAIN.FINETUNE:
        fixed_param_prefix = ['conv1', 'conv2', 'conv3', 'conv4', 'conv5']
    else:
        fixed_param_prefix = ['conv1', 'conv2']
    data_names = [k[0] for k in train_data.provide_data]
    label_names = [k[0] for k in train_data.provide_label]
    batch_end_callback = Speedometer(train_data.batch_size, frequent=frequent)
    epoch_end_callback = do_checkpoint(prefix)
    rpn_eval_metric = AccuracyMetric(use_ignore=True, ignore=-1, ex_rpn=True)
    rpn_cls_metric = LogLossMetric(use_ignore=True, ignore=-1, ex_rpn=True)
    rpn_bbox_metric = SmoothL1LossMetric(ex_rpn=True)
    eval_metric = AccuracyMetric()
    cls_metric = LogLossMetric()
    bbox_metric = SmoothL1LossMetric()
    eval_metrics = mx.metric.CompositeEvalMetric()
    for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric]:
        eval_metrics.add(child_metric)
    optimizer_params = {'momentum': mom,
                        'wd': wd,
                        'learning_rate': lr,
                        'lr_scheduler': mx.lr_scheduler.FactorScheduler(factor_step, 0.1),
                        'clip_gradient': 1.0,
                        'rescale_grad': 1.0 }
                        # 'rescale_grad': (1.0 / config.TRAIN.RPN_BATCH_SIZE)}
    # train
    mod = MutableModule(sym, data_names=data_names, label_names=label_names,
                        logger=logger, context=ctx, work_load_list=work_load_list,
                        max_data_shapes=max_data_shape, max_label_shapes=max_label_shape,
                        fixed_param_prefix=fixed_param_prefix)
    if monitor:
        def norm_stat(d):
            return mx.nd.norm(d)/np.sqrt(d.size)
        mon = mx.mon.Monitor(100, norm_stat)

    mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback, kvstore=kv_store,
            optimizer='sgd', optimizer_params=optimizer_params, monitor=mon,
            arg_params=args, aux_params=auxs, begin_epoch=begin_epoch, num_epoch=num_epoch)
def train_rpn(cfg, dataset, image_set, root_path, dataset_path,
              frequent, kvstore, flip, shuffle, resume,
              ctx, pretrained, epoch, prefix, begin_epoch, end_epoch,
              train_shared, lr, lr_step, logger=None, output_path=None):
    # set up logger
    if not logger:
        logging.basicConfig()
        logger = logging.getLogger()
        logger.setLevel(logging.INFO)

    # set up config
    cfg.TRAIN.BATCH_IMAGES = cfg.TRAIN.ALTERNATE.RPN_BATCH_IMAGES

    # load symbol
    sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()
    sym = sym_instance.get_symbol_rpn(cfg, is_train=True)
    feat_sym = sym.get_internals()['rpn_cls_score_output']

    # setup multi-gpu
    batch_size = len(ctx)
    input_batch_size = cfg.TRAIN.BATCH_IMAGES * batch_size

    # print cfg
    pprint.pprint(cfg)
    logger.info('training rpn cfg:{}\n'.format(pprint.pformat(cfg)))

    # load dataset and prepare imdb for training
    image_sets = [iset for iset in image_set.split('+')]
    roidbs = [load_gt_roidb(dataset, image_set, root_path, dataset_path, result_path=output_path,
                            flip=flip)
              for image_set in image_sets]
    roidb = merge_roidb(roidbs)
    roidb = filter_roidb(roidb, cfg)

    # load training data
    train_data = AnchorLoader(feat_sym, roidb, cfg, batch_size=input_batch_size, shuffle=shuffle,
                              ctx=ctx, feat_stride=cfg.network.RPN_FEAT_STRIDE, anchor_scales=cfg.network.ANCHOR_SCALES,
                              anchor_ratios=cfg.network.ANCHOR_RATIOS, aspect_grouping=cfg.TRAIN.ASPECT_GROUPING)

    # infer max shape
    max_data_shape = [('data', (cfg.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES])))]
    max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
    print('providing maximum shape', max_data_shape, max_label_shape)

    # infer shape
    data_shape_dict = dict(train_data.provide_data_single + train_data.provide_label_single)
    sym_instance.infer_shape(data_shape_dict)

    # load and initialize params
    if resume:
        print('continue training from ', begin_epoch)
        arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
    else:
        arg_params, aux_params = load_param(pretrained, epoch, convert=True)
        sym_instance.init_weight_rpn(cfg, arg_params, aux_params)

    # check parameter shapes
    sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict)

    # create solver
    data_names = [k[0] for k in train_data.provide_data_single]
    label_names = [k[0] for k in train_data.provide_label_single]
    if train_shared:
        fixed_param_prefix = cfg.network.FIXED_PARAMS_SHARED
    else:
        fixed_param_prefix = cfg.network.FIXED_PARAMS
    mod = MutableModule(sym, data_names=data_names, label_names=label_names,
                        logger=logger, context=ctx, max_data_shapes=[max_data_shape for _ in xrange(batch_size)],
                        max_label_shapes=[max_label_shape for _ in xrange(batch_size)], fixed_param_prefix=fixed_param_prefix)

    # decide training params
    # metric
    eval_metric = metric.RPNAccMetric()
    cls_metric = metric.RPNLogLossMetric()
    bbox_metric = metric.RPNL1LossMetric()
    eval_metrics = mx.metric.CompositeEvalMetric()
    for child_metric in [eval_metric, cls_metric, bbox_metric]:
        eval_metrics.add(child_metric)
    # callback
    batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=frequent)
    # epoch_end_callback = mx.callback.do_checkpoint(prefix)
    epoch_end_callback = mx.callback.module_checkpoint(mod, prefix, period=1, save_optimizer_states=True)
    # decide learning rate
    base_lr = lr
    lr_factor = cfg.TRAIN.lr_factor
    lr_epoch = [int(epoch) for epoch in lr_step.split(',')]
    lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch]
    lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff]
    print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters)
    lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor, cfg.TRAIN.warmup, cfg.TRAIN.warmup_lr, cfg.TRAIN.warmup_step)
    # optimizer
    optimizer_params = {'momentum': cfg.TRAIN.momentum,
                        'wd': cfg.TRAIN.wd,
                        'learning_rate': lr,
                        'lr_scheduler': lr_scheduler,
                        'rescale_grad': 1.0,
                        'clip_gradient': None}

    if not isinstance(train_data, PrefetchingIter):
        train_data = PrefetchingIter(train_data)

    # train
    mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback, kvstore=kvstore,
            optimizer='sgd', optimizer_params=optimizer_params,
            arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
def train_net(args, ctx, pretrained, pretrained_flow, epoch, prefix,
              begin_epoch, end_epoch, lr, lr_step):
    logger, final_output_path = create_logger(config.output_path, args.cfg,
                                              config.dataset.image_set)
    prefix = os.path.join(final_output_path, prefix)

    # load symbol
    shutil.copy2(os.path.join(curr_path, 'symbols', config.symbol + '.py'),
                 final_output_path)
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_train_symbol(config)
    feat_sym = sym.get_internals()['rpn_cls_score_output']
    feat_conv_3x3_relu = sym.get_internals()['feat_conv_3x3_relu_output']

    # setup multi-gpu
    batch_size = len(ctx)
    input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size

    # print config
    pprint.pprint(config)
    logger.info('training config:{}\n'.format(pprint.pformat(config)))

    # load dataset and prepare imdb for training
    image_sets = [iset for iset in config.dataset.image_set.split('+')]
    roidbs = [
        load_gt_roidb(config.dataset.dataset,
                      image_set,
                      config.dataset.root_path,
                      config.dataset.dataset_path,
                      flip=config.TRAIN.FLIP) for image_set in image_sets
    ]
    roidb = merge_roidb(roidbs)
    roidb = filter_roidb(roidb, config)
    # load training data
    train_data = AnchorLoader(feat_sym,
                              feat_conv_3x3_relu,
                              roidb,
                              config,
                              batch_size=input_batch_size,
                              shuffle=config.TRAIN.SHUFFLE,
                              ctx=ctx,
                              feat_stride=config.network.RPN_FEAT_STRIDE,
                              anchor_scales=config.network.ANCHOR_SCALES,
                              anchor_ratios=config.network.ANCHOR_RATIOS,
                              aspect_grouping=config.TRAIN.ASPECT_GROUPING,
                              normalize_target=config.network.NORMALIZE_RPN,
                              bbox_mean=config.network.ANCHOR_MEANS,
                              bbox_std=config.network.ANCHOR_STDS)

    # infer max shape
    #max_data_shape = [('data', (config.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES]))),
    #                  ('data_ref', (config.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES]))),
    #                  ('eq_flag', (1,))]
    data_shape1 = {
        'data_ref':
        (config.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in config.SCALES]),
         max([v[1] for v in config.SCALES])),
    }
    _, feat_shape111, _ = feat_conv_3x3_relu.infer_shape(**data_shape1)

    max_data_shape = [('data_ref', (config.TRAIN.BATCH_IMAGES, 3,
                                    max([v[0] for v in config.SCALES]),
                                    max([v[1] for v in config.SCALES]))),
                      ('eq_flag', (1, )),
                      ('motion_vector', (config.TRAIN.BATCH_IMAGES, 2,
                                         int(feat_shape111[0][2]),
                                         int(feat_shape111[0][3])))]

    max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
    max_data_shape.append(('gt_boxes', (config.TRAIN.BATCH_IMAGES, 100, 5)))
    print 'providing maximum shape', max_data_shape, max_label_shape

    data_shape_dict = dict(train_data.provide_data_single +
                           train_data.provide_label_single)
    pprint.pprint(data_shape_dict)
    sym_instance.infer_shape(data_shape_dict)

    # load and initialize params
    if config.TRAIN.RESUME:
        print('continue training from ', begin_epoch)
        arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
    else:
        arg_params, aux_params = load_param(pretrained, epoch, convert=True)
        #arg_params_flow, aux_params_flow = load_param(pretrained_flow, epoch, convert=True)
        #arg_params.update(arg_params_flow)
        #aux_params.update(aux_params_flow)
        sym_instance.init_weight(config, arg_params, aux_params)

    # check parameter shapes
    sym_instance.check_parameter_shapes(arg_params, aux_params,
                                        data_shape_dict)

    # create solver
    fixed_param_prefix = config.network.FIXED_PARAMS
    data_names = [k[0] for k in train_data.provide_data_single]
    label_names = [k[0] for k in train_data.provide_label_single]

    mod = MutableModule(
        sym,
        data_names=data_names,
        label_names=label_names,
        logger=logger,
        context=ctx,
        max_data_shapes=[max_data_shape for _ in range(batch_size)],
        max_label_shapes=[max_label_shape for _ in range(batch_size)],
        fixed_param_prefix=fixed_param_prefix)

    if config.TRAIN.RESUME:
        mod._preload_opt_states = '%s-%04d.states' % (prefix, begin_epoch)

    # decide training params
    # metric
    rpn_eval_metric = metric.RPNAccMetric()
    rpn_cls_metric = metric.RPNLogLossMetric()
    rpn_bbox_metric = metric.RPNL1LossMetric()
    eval_metric = metric.RCNNAccMetric(config)
    cls_metric = metric.RCNNLogLossMetric(config)
    bbox_metric = metric.RCNNL1LossMetric(config)
    eval_metrics = mx.metric.CompositeEvalMetric()
    # rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric
    for child_metric in [
            rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric,
            cls_metric, bbox_metric
    ]:
        eval_metrics.add(child_metric)
    # callback
    batch_end_callback = callback.Speedometer(train_data.batch_size,
                                              frequent=args.frequent)
    means = np.tile(np.array(config.TRAIN.BBOX_MEANS),
                    2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES)
    stds = np.tile(np.array(config.TRAIN.BBOX_STDS),
                   2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES)
    epoch_end_callback = [
        mx.callback.module_checkpoint(mod,
                                      prefix,
                                      period=1,
                                      save_optimizer_states=True),
        callback.do_checkpoint(prefix, means, stds)
    ]
    # decide learning rate
    base_lr = lr
    lr_factor = config.TRAIN.lr_factor
    lr_epoch = [float(epoch) for epoch in lr_step.split(',')]
    lr_epoch_diff = [
        epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch
    ]
    lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [
        int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff
    ]
    print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters)
    lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor,
                                              config.TRAIN.warmup,
                                              config.TRAIN.warmup_lr,
                                              config.TRAIN.warmup_step)
    # optimizer
    optimizer_params = {
        'momentum': config.TRAIN.momentum,
        'wd': config.TRAIN.wd,
        'learning_rate': lr,
        'lr_scheduler': lr_scheduler,
        'rescale_grad': 1.0,
        'clip_gradient': None
    }

    if not isinstance(train_data, PrefetchingIter):
        train_data = PrefetchingIter(train_data)

    print('Start to train model')
    # train
    mod.fit(train_data,
            eval_metric=eval_metrics,
            epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback,
            kvstore=config.default.kvstore,
            optimizer='sgd',
            optimizer_params=optimizer_params,
            arg_params=arg_params,
            aux_params=aux_params,
            begin_epoch=begin_epoch,
            num_epoch=end_epoch)
def train_rpn(image_set,
              year,
              root_path,
              devkit_path,
              pretrained,
              epoch,
              prefix,
              ctx,
              begin_epoch,
              end_epoch,
              frequent,
              kv_store,
              work_load_list=None,
              resume=False):
    # set up logger
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)

    # load symbol
    sym = get_vgg_rpn()
    feat_sym = get_vgg_rpn().get_internals()['rpn_cls_score_output']

    # setup multi-gpu
    config.TRAIN.BATCH_IMAGES *= len(ctx)
    config.TRAIN.BATCH_SIZE *= len(ctx)

    # load training data
    voc, roidb = load_gt_roidb(image_set,
                               year,
                               root_path,
                               devkit_path,
                               flip=True)
    train_data = AnchorLoader(feat_sym,
                              roidb,
                              batch_size=config.TRAIN.BATCH_SIZE,
                              shuffle=True,
                              mode='train',
                              ctx=ctx,
                              work_load_list=work_load_list)

    # infer max shape
    max_data_shape = [('data', (config.TRAIN.BATCH_SIZE, 3, 1000, 1000))]
    max_data_shape_dict = {k: v for k, v in max_data_shape}
    _, feat_shape, _ = feat_sym.infer_shape(**max_data_shape_dict)
    from rcnn.minibatch import assign_anchor
    import numpy as np
    label = assign_anchor(feat_shape[0], np.zeros((0, 5)), [[1000, 1000, 1.0]])
    max_label_shape = [
        ('label', label['label'].shape),
        ('bbox_target', label['bbox_target'].shape),
        ('bbox_inside_weight', label['bbox_inside_weight'].shape),
        ('bbox_outside_weight', label['bbox_outside_weight'].shape)
    ]
    print 'providing maximum shape', max_data_shape, max_label_shape

    # load pretrained
    args, auxs = load_param(pretrained, epoch, convert=True)

    # initialize params
    if not resume:
        input_shapes = {
            k: v
            for k, v in train_data.provide_data + train_data.provide_label
        }
        arg_shape, _, _ = sym.infer_shape(**input_shapes)
        arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))
        args['rpn_conv_3x3_weight'] = mx.random.normal(
            0, 0.01, shape=arg_shape_dict['rpn_conv_3x3_weight'])
        args['rpn_conv_3x3_bias'] = mx.nd.zeros(
            shape=arg_shape_dict['rpn_conv_3x3_bias'])
        args['rpn_cls_score_weight'] = mx.random.normal(
            0, 0.01, shape=arg_shape_dict['rpn_cls_score_weight'])
        args['rpn_cls_score_bias'] = mx.nd.zeros(
            shape=arg_shape_dict['rpn_cls_score_bias'])
        args['rpn_bbox_pred_weight'] = mx.random.normal(
            0, 0.01, shape=arg_shape_dict['rpn_bbox_pred_weight'])
        args['rpn_bbox_pred_bias'] = mx.nd.zeros(
            shape=arg_shape_dict['rpn_bbox_pred_bias'])

    # prepare training
    if config.TRAIN.FINETUNE:
        fixed_param_prefix = ['conv1', 'conv2', 'conv3', 'conv4', 'conv5']
    else:
        fixed_param_prefix = ['conv1', 'conv2']
    data_names = [k[0] for k in train_data.provide_data]
    label_names = [k[0] for k in train_data.provide_label]
    batch_end_callback = Speedometer(train_data.batch_size, frequent=frequent)
    epoch_end_callback = mx.callback.do_checkpoint(prefix)
    if config.TRAIN.HAS_RPN is True:
        eval_metric = AccuracyMetric(use_ignore=True, ignore=-1)
        cls_metric = LogLossMetric(use_ignore=True, ignore=-1)
    else:
        eval_metric = AccuracyMetric()
        cls_metric = LogLossMetric()
    bbox_metric = SmoothL1LossMetric()
    eval_metrics = mx.metric.CompositeEvalMetric()
    for child_metric in [eval_metric, cls_metric, bbox_metric]:
        eval_metrics.add(child_metric)
    optimizer_params = {
        'momentum': 0.9,
        'wd': 0.0005,
        'learning_rate': 0.001,
        'lr_scheduler': mx.lr_scheduler.FactorScheduler(60000, 0.1),
        'rescale_grad': (1.0 / config.TRAIN.BATCH_SIZE)
    }

    # train
    mod = MutableModule(sym,
                        data_names=data_names,
                        label_names=label_names,
                        logger=logger,
                        context=ctx,
                        work_load_list=work_load_list,
                        max_data_shapes=max_data_shape,
                        max_label_shapes=max_label_shape,
                        fixed_param_prefix=fixed_param_prefix)
    mod.fit(train_data,
            eval_metric=eval_metrics,
            epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback,
            kvstore=kv_store,
            optimizer='sgd',
            optimizer_params=optimizer_params,
            arg_params=args,
            aux_params=auxs,
            begin_epoch=begin_epoch,
            num_epoch=end_epoch)