def main():
    print('Called with argument:', args)
    ctx = [mx.gpu(int(i)) for i in config.gpus.split(',')]
    logger, output_path = create_logger(config.output_path, args.cfg,
                                        config.dataset.image_set)
    shutil.copy2(os.path.join(curr_path, 'symbols', config.symbol + '.py'),
                 output_path)

    assert config.TRAIN.END2END == False
    prefix = os.path.join(output_path, config.TRAIN.model_prefix)
    logging.info('########## TRAIN rcnn WITH IMAGENET INIT AND RPN DETECTION')
    train_rpn(config,
              config.dataset.dataset,
              config.dataset.image_set,
              config.dataset.root_path,
              config.dataset.dataset_path,
              args.frequent,
              config.default.kvstore,
              config.TRAIN.FLIP,
              config.TRAIN.SHUFFLE,
              config.TRAIN.RESUME,
              ctx,
              config.network.pretrained,
              config.network.pretrained_epoch,
              prefix,
              config.TRAIN.begin_epoch,
              config.TRAIN.end_epoch,
              train_shared=False,
              lr=config.TRAIN.lr,
              lr_step=config.TRAIN.lr_step,
              logger=logger,
              output_path=output_path)
def main():
    ctx = [mx.gpu(int(i)) for i in config.gpus.split(',')]
    print args

    logger, final_output_path = create_logger(config.output_path, args.cfg,
                                              config.dataset.test_image_set)

    # test_rcnn(config, config.dataset.dataset, config.dataset.test_image_set, config.dataset.root_path, config.dataset.dataset_path,
    #           ctx, os.path.join(final_output_path, '..', '_'.join([iset for iset in config.dataset.image_set.split('+')]), config.TRAIN.model_prefix), config.TEST.test_epoch,
    #           args.vis, args.ignore_cache, args.shuffle, config.TEST.HAS_RPN, config.dataset.proposal, args.thresh, logger=logger, output_path=final_output_path)
    if args.vis:
        assert len(ctx) == 1, "debugger must use 1 gpu"
    debug_rcnn(config,
               config.dataset.dataset,
               'VID_val_videos_small',
               config.dataset.root_path,
               config.dataset.dataset_path,
               ctx,
               os.path.join(
                   final_output_path, '..', '_'.join(
                       [iset for iset in config.dataset.image_set.split('+')]),
                   config.TRAIN.model_prefix),
               config.TEST.test_epoch,
               args.vis,
               args.show_gt,
               args.ignore_cache,
               args.shuffle,
               config.TEST.HAS_RPN,
               config.dataset.proposal,
               args.thresh,
               logger=logger,
               output_path=final_output_path)
Example #3
0
def main():
    args = parse_args()
    print args

    ctx = [mx.gpu(int(i)) for i in config.gpus.split(',')]

    logger, final_output_path = create_logger(config.output_path, args.cfg,
                                              config.dataset.test_image_set)

    test_rcnn(config,
              config.dataset.dataset,
              config.dataset.test_image_set,
              config.dataset.root_path,
              config.dataset.dataset_path,
              ctx,
              os.path.join(
                  final_output_path, '..', '_'.join(
                      [iset for iset in config.dataset.image_set.split('+')]),
                  config.TRAIN.model_prefix),
              config.TEST.test_epoch,
              args.vis,
              args.ignore_cache,
              args.shuffle,
              config.TEST.HAS_RPN,
              config.dataset.proposal,
              args.thresh,
              logger=logger,
              output_path=final_output_path)
Example #4
0
def cnr_to_file(infile, mask_file, outdir=None, force=False):
    if outdir == None:
        outdir, _ = os.path.split(infile)
    # Name outfile based on mask file
    fname = os.path.basename(mask_file).split('.')[0] + '.txt'
    exists, outfile = make_outfile(outdir, fname)
    if (exists and force == False):
        print "{} exists, delete before running or use --force flag.".format(outfile)
        return
    # Create logger
    logname = 'calc_cnr_' + os.path.basename(mask_file).split('.')[0] + '.log'
    logger = create_logger(outdir, name=logname)
    logger.info("Image file: {}".format(infile))
    logger.info("Mask file: {}".format(mask_file))
    # Run error checks
    error_status = run_error_checks(mask_file)
    # Get ROI values put into dataframe
    resultsdf = get_roi_vals(infile, mask_file)
    # Change slices from 0-based index to 1-based
    resultsdf.index = resultsdf.index + 1
    # Calculate contrast to noise ratio
    resultsdf['CNR'] = get_cnr(resultsdf['Left_LC'], resultsdf['Right_LC'], resultsdf['PT'])
    # Save results to file
    try:
        resultsdf.to_csv(outfile, index_label="Slice", sep="\t")
    except IOError:
        print 'File could not be saved'
    logger.info("Results saved to: {}".format(outfile))
    # Close log files
    for hndlr in logger.handlers[:]:
        logger.removeHandler(hndlr)
        hndlr.close()
Example #5
0
def main():
    ctx = [mx.gpu(int(i)) for i in config.gpus.split(',')]
    print args

    logger, final_output_path = create_logger(config.output_path, args.cfg,
                                              config.dataset.test_image_set)

    # test_rcnn(config, config.dataset.dataset, config.dataset.test_image_set, config.dataset.root_path, config.dataset.dataset_path,
    #           ctx, os.path.join(final_output_path, '..', '_'.join([iset for iset in config.dataset.image_set.split('+')]), config.TRAIN.model_prefix), config.TEST.test_epoch,
    #           args.vis, args.ignore_cache, args.shuffle, config.TEST.HAS_RPN, config.dataset.proposal, args.thresh, logger=logger, output_path=final_output_path)
    debug_rcnn(config,
               config.dataset.dataset,
               config.dataset.test_image_set,
               config.dataset.root_path,
               config.dataset.dataset_path,
               ctx,
               os.path.join('./model', 'double_drfcn_vid_learn_nms'),
               2,
               args.vis,
               args.show_gt,
               args.ignore_cache,
               args.shuffle,
               config.TEST.HAS_RPN,
               config.dataset.proposal,
               args.thresh,
               logger=logger,
               output_path=final_output_path)
def main():
    ctx = [mx.gpu(int(i)) for i in config.gpus.split(',')]
    print(args)

    if args.sample_stride != -1:
        config.TEST.sample_stride = args.sample_stride
    if args.key_frame_interval != -1:
        config.TEST.KEY_FRAME_INTERVAL = args.key_frame_interval
    if args.video_shuffle:
        config.TEST.video_shuffle = args.video_shuffle


    logger, final_output_path, tb_log_path = create_logger(config.output_path, config.log_path, args.cfg,
                                                           config.dataset.test_image_set)

    trained_model = os.path.join(final_output_path, '..', '_'.join(
        [iset for iset in config.dataset.image_set.split('+')]),
                                 config.TRAIN.model_prefix)
    test_epoch = config.TEST.test_epoch
    if args.test_pretrained:
        trained_model = args.test_pretrained
        test_epoch = 0

    test_rcnn(config, config.dataset.dataset, config.dataset.test_image_set, config.dataset.root_path,
              config.dataset.dataset_path, config.dataset.motion_iou_path,
              ctx,
              trained_model,
              test_epoch,
              args.vis, args.ignore_cache, args.shuffle, config.TEST.HAS_RPN, config.dataset.proposal, args.thresh,
              logger=logger, output_path=final_output_path,
              enable_detailed_eval=config.dataset.enable_detailed_eval)
def main():
    # ctx为gpu(...),其中配置项在yaml配置文件中
    ctx = [mx.gpu(int(i)) for i in config.gpus.split(',')]
    # print('ctx:', ctx)
    print args

    # config.output_path在yaml文件中定义,cfg为对应的yaml文件路径
    logger, final_output_path = create_logger(config.output_path, args.cfg,
                                              config.dataset.test_image_set)

    # config.dataset.dataset=ImageNetVID
    test_rcnn(config,
              config.dataset.dataset,
              config.dataset.test_image_set,
              config.dataset.root_path,
              config.dataset.dataset_path,
              config.dataset.motion_iou_path,
              ctx,
              os.path.join(
                  final_output_path, '..', '_'.join(
                      [iset for iset in config.dataset.image_set.split('+')]),
                  config.TRAIN.model_prefix),
              config.TEST.test_epoch,
              args.vis,
              args.ignore_cache,
              args.shuffle,
              config.TEST.HAS_RPN,
              config.dataset.proposal,
              args.thresh,
              logger=logger,
              output_path=final_output_path,
              enable_detailed_eval=config.dataset.enable_detailed_eval)
Example #8
0
def main():
    #ctx = [mx.gpu(int(i)) for i in config.gpus.split(',')]
    ctx = [mx.gpu(0), mx.gpu(1), mx.gpu(2), mx.gpu(3)]
    print args
    #gpu_nums = [int(i) for i in config.gpus.split(',')]
    gpu_nums = [0, 1, 2, 3]
    nms_dets = gpu_nms_wrapper(config.TEST.NMS, gpu_nums[0])
    logger, final_output_path = create_logger(config.output_path, args.cfg,
                                              config.dataset.test_image_set)
    output_path = os.path.join(
        final_output_path, '..',
        '+'.join([iset for iset in config.dataset.image_set.split('+')]),
        config.TRAIN.model_prefix)
    test_rcnn(config,
              config.dataset.dataset,
              config.dataset.test_image_set,
              config.dataset.root_path,
              config.dataset.dataset_path,
              ctx,
              output_path,
              config.TEST.test_epoch,
              args.vis,
              args.ignore_cache,
              args.shuffle,
              config.TEST.HAS_RPN,
              config.dataset.proposal,
              args.thresh,
              logger=logger,
              output_path=final_output_path,
              nms_dets=nms_dets,
              is_docker=args.is_docker)
Example #9
0
File: test.py Project: lyj96/STSN
def main():
    ctx = [mx.gpu(int(i)) for i in config.gpus.split(',')]
    print args

    logger, final_output_path, _ = create_logger(config.output_path,
                                                 config.log_path, args.cfg,
                                                 config.dataset.test_image_set)

    test_rcnn(config,
              config.dataset.dataset,
              config.dataset.test_image_set,
              config.dataset.root_path,
              config.dataset.dataset_path,
              config.dataset.motion_iou_path,
              ctx,
              os.path.join(
                  final_output_path, '..', '_'.join(
                      [iset for iset in config.dataset.image_set.split('+')]),
                  config.TRAIN.model_prefix),
              config.TEST.test_epoch,
              args.vis,
              args.ignore_cache,
              args.shuffle,
              config.TEST.HAS_RPN,
              config.dataset.proposal,
              args.thresh,
              logger=logger,
              output_path=final_output_path,
              enable_detailed_eval=config.dataset.enable_detailed_eval)
Example #10
0
def main():
    ctx = [mx.gpu(int(i)) for i in config.gpus.split(',')]
    print args

    logger, final_output_path = create_logger(config.output_path, args.cfg, config.dataset.test_image_set)

    test_rcnn(config, config.dataset.dataset, config.dataset.test_image_set, config.dataset.root_path, config.dataset.dataset_path,
              ctx, os.path.join(final_output_path, '..', '_'.join([iset for iset in config.dataset.image_set.split('+')]), config.TRAIN.model_prefix), config.TEST.test_epoch,
              args.vis, args.ignore_cache, args.shuffle, config.TEST.HAS_RPN, config.dataset.proposal, args.thresh, logger=logger, output_path=final_output_path)
Example #11
0
def main():
    ctx = [mx.gpu(int(i)) for i in config.gpus.split(',')]
    print args

    logger, final_output_path = create_logger(config.output_path, args.cfg, config.dataset.test_image_set)

    test_rcnn_dota_quadrangle(config, config.dataset.dataset, config.dataset.test_image_set, config.dataset.root_path, config.dataset.dataset_path,
              ctx, config.TRAIN.model_path, config.TEST.test_epoch,
              args.vis, args.ignore_cache, args.shuffle, config.TEST.HAS_RPN, config.dataset.proposal, args.thresh, logger=logger, output_path=final_output_path)
def main():
    ctx = [mx.gpu(int(i)) for i in config.gpus.split(',')]
    print args

    logger, final_output_path = create_logger(config.output_path, args.cfg, config.dataset.test_image_set)

    demo_rfcn(config, config.dataset.dataset, config.dataset.test_image_set, config.dataset.root_path, config.dataset.dataset_path,
              ctx, os.path.join(final_output_path, '..', '_'.join([iset for iset in config.dataset.image_set.split('+')]), config.TRAIN.model_prefix), config.TEST.test_epoch,
              args.vis, config.TEST.HAS_RPN, args.thresh, args.use_box_voting)
def main():
    ctx = [mx.gpu(int(args.gpu))]
    print args

    logger, final_output_path = create_logger(config.output_path, args.cfg, config.dataset.test_image_set)

    #arg_params, aux_params = load_param(prefix, epoch, process=False)
    demo_rfcn(config, config.dataset.dataset, config.dataset.test_image_set, config.dataset.root_path, config.dataset.dataset_path,
              ctx, os.path.join(final_output_path, '..', '_'.join([iset for iset in config.dataset.image_set.split('+')]), config.TRAIN.model_prefix), args.epoch,
              args.vis, config.TEST.HAS_RPN, args.thresh, args.use_box_voting, args.test_file, args.out_prefix)
def main():
    print ('Called with argument:', args)
    ctx = [mx.gpu(int(i)) for i in config.gpus.split(',')]
    logger, output_path = create_logger(config.output_path, args.cfg, config.dataset.image_set)
    shutil.copy2(os.path.join(curr_path, 'symbols', config.symbol + '.py'), output_path)

    prefix = os.path.join(output_path, 'rfcn')
    logging.info('########## TRAIN rfcn WITH IMAGENET INIT AND RPN DETECTION')
    train_rcnn(config, config.dataset.dataset, config.dataset.image_set, config.dataset.root_path, config.dataset.dataset_path,
               args.frequent, config.default.kvstore, config.TRAIN.FLIP, config.TRAIN.SHUFFLE, config.TRAIN.RESUME,
               ctx, config.network.pretrained, config.network.pretrained_epoch, prefix, config.TRAIN.begin_epoch,
               config.TRAIN.end_epoch, train_shared=False, lr=config.TRAIN.lr, lr_step=config.TRAIN.lr_step,
               proposal=config.dataset.proposal, logger=logger)
Example #15
0
def main():
    import mxnet as mx
    import mxnet.ndarray as nd
    nd.zeros((1, 3, 600, 1000), mx.gpu(0), dtype=float)
    print('GPU ok')

    ctx = [mx.gpu(0)]
    print(args)

    logger, final_output_path = create_logger(config.output_path, args.cfg, config.dataset.test_image_set)

    test_rcnn(config, config.dataset.dataset, config.dataset.test_image_set,
              config.dataset.root_path, config.dataset.dataset_path,
              ctx, os.path.join('model', config.TRAIN.model_prefix), config.TEST.test_epoch,
              args.vis, args.ignore_cache, args.shuffle, config.TEST.HAS_RPN,
              config.dataset.proposal, args.thresh, logger=logger, output_path=final_output_path)
Example #16
0
def test_deeplab():
    epoch = config.TEST.test_epoch
    ctx = [mx.gpu(int(i)) for i in config.gpus.split(',')]
    image_set = config.dataset.test_image_set
    root_path = config.dataset.root_path
    dataset = config.dataset.dataset
    dataset_path = config.dataset.dataset_path

    logger, final_output_path = create_logger(config.output_path, args.cfg, image_set)
    prefix = os.path.join(final_output_path, '..', '_'.join([iset for iset in config.dataset.image_set.split('+')]), config.TRAIN.model_prefix)

    # print config
    pprint.pprint(config)
    logger.info('testing config:{}\n'.format(pprint.pformat(config)))

    # load symbol and testing data
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=final_output_path)
    segdb = imdb.gt_segdb()

    # get test data iter
    test_data = TestDataLoader(segdb, config=config, batch_size=len(ctx))

    # infer shape
    data_shape_dict = dict(test_data.provide_data_single)
    sym_instance.infer_shape(data_shape_dict)

    # load model and check parameters
    arg_params, aux_params = load_param(prefix, epoch, process=True)

    sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False)

    # decide maximum shape
    data_names = [k[0] for k in test_data.provide_data_single]
    label_names = ['softmax_label']
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]]

    # create predictor
    predictor = Predictor(sym, data_names, label_names,
                          context=ctx, max_data_shapes=max_data_shape,
                          provide_data=test_data.provide_data, provide_label=test_data.provide_label,
                          arg_params=arg_params, aux_params=aux_params)

    # start detection
    pred_eval(predictor, test_data, imdb, vis=args.vis, ignore_cache=args.ignore_cache, logger=logger)
def test_deeplab():
    epoch = config.TEST.test_epoch
    ctx = [mx.gpu(int(i)) for i in config.gpus.split(',')]
    image_set = config.dataset.test_image_set
    root_path = config.dataset.root_path
    dataset = config.dataset.dataset
    dataset_path = config.dataset.dataset_path

    logger, final_output_path = create_logger(config.output_path, args.cfg, image_set)
    prefix = os.path.join(final_output_path, '..', '_'.join([iset for iset in config.dataset.image_set.split('+')]), config.TRAIN.model_prefix)

    # print config
    pprint.pprint(config)
    logger.info('testing config:{}\n'.format(pprint.pformat(config)))

    # load symbol and testing data
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=final_output_path)
    segdb = imdb.gt_segdb()

    # get test data iter
    test_data = TestDataLoader(segdb, config=config, batch_size=len(ctx))

    # infer shape
    data_shape_dict = dict(test_data.provide_data_single)
    sym_instance.infer_shape(data_shape_dict)

    # load model and check parameters
    arg_params, aux_params = load_param(prefix, epoch, process=True)

    sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False)

    # decide maximum shape
    data_names = [k[0] for k in test_data.provide_data_single]
    label_names = ['softmax_label']
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]]

    # create predictor
    predictor = Predictor(sym, data_names, label_names,
                          context=ctx, max_data_shapes=max_data_shape,
                          provide_data=test_data.provide_data, provide_label=test_data.provide_label,
                          arg_params=arg_params, aux_params=aux_params)

    # start detection
    pred_eval(predictor, test_data, imdb, vis=args.vis, ignore_cache=args.ignore_cache, logger=logger)
Example #18
0
def main():
    args = parse_args()
    print 'Called with argument:', args
    cfg_path = args.cfg
    update_config(cfg_path)

    # create logger
    logger, output_path = create_logger(config.output_path, cfg_path,
                                        config.dataset.image_set)

    # print config
    pprint.pprint(config)
    logger.info('training config: {}\n'.format(pprint.pformat(config)))

    # train_net(cfg_path, ctx, config.network.pretrained, config.network.pretrained_epoch,
    #           config.TRAIN.model_prefix, config.TRAIN.begin_epoch, config.TRAIN.end_epoch,
    #           config.TRAIN.lr, config.TRAIN.lr_step)
    train_net(config, output_path, logger)
Example #19
0
def main():
    ctx = [mx.gpu(int(i)) for i in config.gpus.split(',')]
    print args

    logger, final_output_path = create_logger(config.output_path, args.cfg,
                                              config.dataset.test_image_set)

    if config.TRAIN.online:
        test_rcnn_impression_online(config,
                                    config.dataset.dataset,
                                    config.dataset.test_image_set,
                                    config.dataset.root_path,
                                    config.dataset.dataset_path,
                                    ctx,
                                    os.path.join(config.output_path,
                                                 config.TRAIN.model_prefix),
                                    config.TEST.test_epoch,
                                    args.vis,
                                    args.ignore_cache,
                                    args.shuffle,
                                    config.TEST.HAS_RPN,
                                    config.dataset.proposal,
                                    args.thresh,
                                    logger=logger,
                                    output_path=final_output_path)

    else:
        test_rcnn_impression_offline(config,
                                     config.dataset.dataset,
                                     config.dataset.test_image_set,
                                     config.dataset.root_path,
                                     config.dataset.dataset_path,
                                     ctx,
                                     os.path.join(config.output_path,
                                                  config.TRAIN.model_prefix),
                                     config.TEST.test_epoch,
                                     args.vis,
                                     args.ignore_cache,
                                     args.shuffle,
                                     config.TEST.HAS_RPN,
                                     config.dataset.proposal,
                                     args.thresh,
                                     logger=logger,
                                     output_path=final_output_path)
Example #20
0
def main():
    args = parse_args()
    pprint.pprint(config)

    if config.TEST.HAS_RPN:
        sym_instance = eval(config.symbol + '.' + config.symbol)()
        sym = sym_instance.get_symbol(config, is_train=False)
    else:
        sym_instance = eval(config.symbol + '.' + config.symbol)()
        sym = sym_instance.get_symbol_rcnn(config, is_train=False)

    logger, final_output_path = create_logger(config.output_path, args.cfg, config.dataset.test_image_set)
    prefix = os.path.join(final_output_path, '..', '_'.join([iset for iset in config.dataset.image_set.split('+')]), config.TRAIN.model_prefix)
    arg_params, aux_params = load_param(prefix, config.TEST.test_epoch, process=True)

    data_names = ['data', 'im_info']
    label_names = None

    mod = mx.mod.Module(symbol=sym, context=mx.gpu(0), data_names=data_names, label_names=label_names)
    mod.bind(for_training=False, data_shapes=[('data', (1, 3, 1024, 1024)), ('im_info', (1, 3))], label_shapes=None, force_rebind=False)
    mod.set_params(arg_params=arg_params, aux_params=aux_params, force_init=False)

    mod.save_checkpoint('test_traffic',0)
Example #21
0
def main():
    ctx = [mx.gpu(int(i)) for i in config.gpus.split(',')]
    output_dir = "/tmp/res"
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)

    print args

    logger, final_output_path = create_logger(config.output_path, args.cfg, config.dataset.test_image_set)

    sets = "/media/indoordesk/653ce34c-0c14-4427-8029-be7afe6d1989/test_sets/ImageSets"
    sets = "/data2/test_sets/ImageSets"
    #for epoc in range(1, 30):

    maps = []

    #for epoc in range(1, 30):
    for file_name in os.listdir(sets):
        if "_eval" in file_name:
            continue


        logger.info("About to test with images:" + file_name)
        print ("About to test with images:" + file_name)
        try:
            res = test_rcnn(config, config.dataset.dataset, file_name.replace(".txt", ""), config.dataset.root_path, config.dataset.dataset_path, config.dataset.motion_iou_path,
                      ctx, join(final_output_path, '..', '_'.join([iset for iset in config.dataset.image_set.split('+')]), config.TRAIN.model_prefix), epoc,
                      args.vis, args.ignore_cache, args.shuffle, config.TEST.HAS_RPN, config.dataset.proposal, args.thresh, logger=logger, output_path=final_output_path,
                      enable_detailed_eval=config.dataset.enable_detailed_eval)
            with open(join(output_dir, file_name), "a") as f:
                f.write('epoc: %s res: %s \n' % (epoc, res))
        except Exception as e:
            logger.error(e)
            print e

    print maps
Example #22
0
def main():
    print('Called with argument:', args)
    # 配置文件中gpu使用ID
    ctx = [mx.gpu(int(i)) for i in config.gpus.split(',')]
    # 创建logger
    logger, output_path = create_logger(config.output_path, args.cfg,
                                        config.dataset.image_set)
    # 拷贝对应的symbols代码到输出path中
    shutil.copy2(os.path.join(curr_path, 'symbols', config.symbol + '.py'),
                 output_path)

    #
    prefix = os.path.join(output_path, 'rfcn')
    logging.info('########## TRAIN rfcn WITH IMAGENET INIT AND RPN DETECTION')
    # 训练R-FCN,输入包括配置dict,数据集名字,图像集,数据根目录,数据集路径,训练日志打印频率
    train_rcnn(config,
               config.dataset.dataset,
               config.dataset.image_set,
               config.dataset.root_path,
               config.dataset.dataset_path,
               args.frequent,
               config.default.kvstore,
               config.TRAIN.FLIP,
               config.TRAIN.SHUFFLE,
               config.TRAIN.RESUME,
               ctx,
               config.network.pretrained,
               config.network.pretrained_epoch,
               prefix,
               config.TRAIN.begin_epoch,
               config.TRAIN.end_epoch,
               train_shared=False,
               lr=config.TRAIN.lr,
               lr_step=config.TRAIN.lr_step,
               proposal=config.dataset.proposal,
               logger=logger)
Example #23
0
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr,
              lr_step):
    mx.random.seed(3)
    np.random.seed(3)
    logger, final_output_path = create_logger(config.output_path, args.cfg,
                                              config.dataset.image_set)
    prefix = os.path.join(final_output_path, prefix)

    # load symbol
    shutil.copy2(os.path.join(curr_path, 'symbols', config.symbol + '.py'),
                 final_output_path)
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=True)

    feat_pyramid_level = np.log2(config.network.RPN_FEAT_STRIDE).astype(int)
    feat_sym = [
        sym.get_internals()['rpn_cls_score_p' + str(x) + '_output']
        for x in feat_pyramid_level
    ]

    # setup multi-gpu
    batch_size = len(ctx)
    input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size

    # print config
    pprint.pprint(config)
    logger.info('training config:{}\n'.format(pprint.pformat(config)))

    # load dataset and prepare imdb for training
    image_sets = [iset for iset in config.dataset.image_set.split('+')]
    roidbs = [
        load_gt_roidb(config.dataset.dataset,
                      image_set,
                      config.dataset.root_path,
                      config.dataset.dataset_path,
                      flip=config.TRAIN.FLIP) for image_set in image_sets
    ]
    roidb = merge_roidb(roidbs)
    roidb = filter_roidb(roidb, config)

    # load training data

    train_data = PyramidAnchorIterator(
        feat_sym,
        roidb,
        config,
        batch_size=input_batch_size,
        shuffle=config.TRAIN.SHUFFLE,
        ctx=ctx,
        feat_strides=config.network.RPN_FEAT_STRIDE,
        anchor_scales=config.network.ANCHOR_SCALES,
        anchor_ratios=config.network.ANCHOR_RATIOS,
        aspect_grouping=config.TRAIN.ASPECT_GROUPING,
        allowed_border=np.inf)

    # infer max shape
    max_data_shape = [('data', (config.TRAIN.BATCH_IMAGES, 3,
                                max([v[0] for v in config.SCALES]),
                                max([v[1] for v in config.SCALES])))]
    max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
    max_data_shape.append(('gt_boxes', (config.TRAIN.BATCH_IMAGES, 100, 5)))
    print 'providing maximum shape', max_data_shape, max_label_shape

    data_shape_dict = dict(train_data.provide_data_single +
                           train_data.provide_label_single)
    pprint.pprint(data_shape_dict)
    sym_instance.infer_shape(data_shape_dict)

    # load and initialize params
    if config.TRAIN.RESUME:
        print('continue training from ', begin_epoch)
        arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
    else:
        arg_params, aux_params = load_param(pretrained, epoch, convert=True)
        # sym_instance.init_weight(config, arg_params, aux_params)

    # check parameter shapes
    # sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict)

    # decide training params
    # metric
    rpn_eval_metric = metric.RPNAccMetric()
    rpn_cls_metric = metric.RPNLogLossMetric()
    rpn_bbox_metric = metric.RPNL1LossMetric()
    rpn_fg_metric = metric.RPNFGFraction(config)
    eval_metric = metric.RCNNAccMetric(config)
    eval_fg_metric = metric.RCNNFGAccuracy(config)
    cls_metric = metric.RCNNLogLossMetric(config)
    bbox_metric = metric.RCNNL1LossMetric(config)
    eval_metrics = mx.metric.CompositeEvalMetric()
    # rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric
    for child_metric in [
            rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, rpn_fg_metric,
            eval_fg_metric, eval_metric, cls_metric, bbox_metric
    ]:
        eval_metrics.add(child_metric)
    # callback
    # batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=args.frequent)
    means = np.tile(np.array(config.TRAIN.BBOX_MEANS),
                    2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES)
    stds = np.tile(np.array(config.TRAIN.BBOX_STDS),
                   2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES)
    # epoch_end_callback = [mx.callback.module_checkpoint(mod, prefix, period=1,
    # save_optimizer_states=True), callback.do_checkpoint(prefix, means, stds)]
    # decide learning rate
    base_lr = lr
    lr_factor = config.TRAIN.lr_factor
    lr_epoch = [float(epoch) for epoch in lr_step.split(',')]
    lr_epoch_diff = [
        epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch
    ]
    lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [
        int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff
    ]
    print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters)
    lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor,
                                              config.TRAIN.warmup,
                                              config.TRAIN.warmup_lr,
                                              config.TRAIN.warmup_step)
    # optimizer
    optimizer_params = {
        'momentum': config.TRAIN.momentum,
        'wd': config.TRAIN.wd,
        'learning_rate': lr,
        'lr_scheduler': lr_scheduler,
        'clip_gradient': None
    }
    if not isinstance(train_data, PrefetchingIter):
        train_data = PrefetchingIter(train_data)

    net = FPNNet(sym, args_pretrained=arg_params, auxes_pretrained=aux_params)

    # create multi-threaded DataParallel Model.
    net_parallel = DataParallelModel(net, ctx_list=ctx)

    # create trainer,
    # !Important: A trainer can be only created after the function `resnet_ctx` is called.
    # Please Note that DataParallelModel will call reset_ctx to initialize parameters on gpus.
    trainer = mx.gluon.Trainer(net.collect_params(), 'sgd', optimizer_params)

    for epoch in range(begin_epoch, config.TRAIN.end_epoch):
        train_data.reset()
        net.hybridize(static_alloc=True, static_shape=False)
        progress_bar = tqdm.tqdm(total=len(roidb))
        for nbatch, data_batch in enumerate(train_data):
            inputs = [[
                x.astype('f').as_in_context(c) for x in d + l
            ] for c, d, l in zip(ctx, data_batch.data, data_batch.label)]
            with ag.record():
                outputs = net_parallel(*inputs)
                ag.backward(sum(outputs, ()))
            trainer.step(1)
            eval_metrics.update(data_batch.label[0], outputs[0])
            if nbatch % 100 == 0:
                msg = ','.join([
                    '{}={:.3f}'.format(w, v)
                    for w, v in zip(*eval_metrics.get())
                ])
                msg += ",lr={}".format(trainer.learning_rate)
                logger.info(msg)
                print(msg)
                eval_metrics.reset()
            progress_bar.update(len(inputs))
        progress_bar.close()
        net.hybridize(static_alloc=True, static_shape=False)
        re = ("mAP", 0.0)
        logger.info(re)
        save_path = "{}-{}-{}.params".format(prefix, epoch, re[1])
        net.collect_params().save(save_path)
        logger.info("Saved checkpoint to {}.".format(save_path))
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr, lr_step):
    logger, final_output_path = create_logger(config.output_path, args.cfg, config.dataset.image_set)
    prefix = os.path.join(final_output_path, prefix)

    # load symbol
    shutil.copy2(os.path.join(curr_path, 'symbols', config.symbol + '.py'), final_output_path)
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=True)
    feat_sym = sym.get_internals()['rpn_cls_score_output']

    # setup multi-gpu
    batch_size = len(ctx)
    input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size

    # print config
    pprint.pprint(config)
    logger.info('training config:{}\n'.format(pprint.pformat(config)))

    # load dataset and prepare imdb for training
    image_sets = [iset for iset in config.dataset.image_set.split('+')]
    roidbs = [load_gt_roidb(config.dataset.dataset, image_set, config.dataset.root_path, config.dataset.dataset_path,
                            flip=config.TRAIN.FLIP)
              for image_set in image_sets]
    roidb = merge_roidb(roidbs)
    roidb = filter_roidb(roidb, config)

    # load training data
    train_data = AnchorLoader(feat_sym, roidb, config, batch_size=input_batch_size, shuffle=config.TRAIN.SHUFFLE, ctx=ctx,
                              feat_stride=config.network.RPN_FEAT_STRIDE, anchor_scales=config.network.ANCHOR_SCALES,
                              anchor_ratios=config.network.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING)

    # infer max shape
    max_data_shape = [('data', (config.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]
    max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
    max_data_shape.append(('gt_boxes', (config.TRAIN.BATCH_IMAGES, 100, 5)))
    print 'providing maximum shape', max_data_shape, max_label_shape

    data_shape_dict = dict(train_data.provide_data_single + train_data.provide_label_single)
    pprint.pprint(data_shape_dict)
    sym_instance.infer_shape(data_shape_dict)

    # load and initialize params
    if config.TRAIN.RESUME:
        print('continue training from ', begin_epoch)
        arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
    else:
        arg_params, aux_params = load_param(pretrained, epoch, convert=True)
        sym_instance.init_weight(config, arg_params, aux_params)

    # check parameter shapes
    sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict)

    # create solver
    fixed_param_prefix = config.network.FIXED_PARAMS
    data_names = [k[0] for k in train_data.provide_data_single]
    label_names = [k[0] for k in train_data.provide_label_single]

    mod = MutableModule(sym, data_names=data_names, label_names=label_names,
                        logger=logger, context=ctx, max_data_shapes=[max_data_shape for _ in range(batch_size)],
                        max_label_shapes=[max_label_shape for _ in range(batch_size)], fixed_param_prefix=fixed_param_prefix)

    if config.TRAIN.RESUME:
        mod._preload_opt_states = '%s-%04d.states'%(prefix, begin_epoch)

    # decide training params
    # metric
    rpn_eval_metric = metric.RPNAccMetric()
    rpn_cls_metric = metric.RPNLogLossMetric()
    rpn_bbox_metric = metric.RPNL1LossMetric()
    eval_metric = metric.RCNNAccMetric(config)
    cls_metric = metric.RCNNLogLossMetric(config)
    bbox_metric = metric.RCNNL1LossMetric(config)
    eval_metrics = mx.metric.CompositeEvalMetric()
    # rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric
    for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric]:
        eval_metrics.add(child_metric)
    # callback
    batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=args.frequent)
    means = np.tile(np.array(config.TRAIN.BBOX_MEANS), 2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES)
    stds = np.tile(np.array(config.TRAIN.BBOX_STDS), 2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES)
    epoch_end_callback = [mx.callback.module_checkpoint(mod, prefix, period=1, save_optimizer_states=True), callback.do_checkpoint(prefix, means, stds)]
    # decide learning rate
    base_lr = lr
    lr_factor = config.TRAIN.lr_factor
    lr_epoch = [float(epoch) for epoch in lr_step.split(',')]
    lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch]
    lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff]
    print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters)
    lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor, config.TRAIN.warmup, config.TRAIN.warmup_lr, config.TRAIN.warmup_step)
    # optimizer
    optimizer_params = {'momentum': config.TRAIN.momentum,
                        'wd': config.TRAIN.wd,
                        'learning_rate': lr,
                        'lr_scheduler': lr_scheduler,
                        'rescale_grad': 1.0,
                        'clip_gradient': None}

    if not isinstance(train_data, PrefetchingIter):
        train_data = PrefetchingIter(train_data)

    # train
    mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback, kvstore=config.default.kvstore,
            optimizer='sgd', optimizer_params=optimizer_params,
            arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr, lr_step):
    logger, final_output_path = create_logger(config.output_path, args.cfg, config.dataset.image_set)
    prefix = os.path.join(final_output_path, prefix)

    # load symbol
    shutil.copy2(os.path.join(curr_path, 'symbols', config.symbol + '.py'), final_output_path)
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=True)
    #sym = eval('get_' + args.network + '_train')(num_classes=config.dataset.NUM_CLASSES)

    # setup multi-gpu
    batch_size = len(ctx)
    input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size

    # print config
    pprint.pprint(config)
    logger.info('training config:{}\n'.format(pprint.pformat(config)))

    # load dataset and prepare imdb for training
    image_sets = [iset for iset in config.dataset.image_set.split('+')]
    segdbs = [load_gt_segdb(config.dataset.dataset, image_set, config.dataset.root_path, config.dataset.dataset_path,
                            result_path=final_output_path, flip=config.TRAIN.FLIP)
              for image_set in image_sets]
    segdb = merge_segdb(segdbs)

    # load training data
    train_data = TrainDataLoader(sym, segdb, config, batch_size=input_batch_size, crop_height=config.TRAIN.CROP_HEIGHT, crop_width=config.TRAIN.CROP_WIDTH,
                                 shuffle=config.TRAIN.SHUFFLE, ctx=ctx)

    # infer max shape
    max_scale = [(config.TRAIN.CROP_HEIGHT, config.TRAIN.CROP_WIDTH)]
    max_data_shape = [('data', (config.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in max_scale]), max([v[1] for v in max_scale])))]
    max_label_shape = [('label', (config.TRAIN.BATCH_IMAGES, 1, max([v[0] for v in max_scale]), max([v[1] for v in max_scale])))]
    # max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape, max_label_shape)
    print 'providing maximum shape', max_data_shape, max_label_shape

    # infer shape
    data_shape_dict = dict(train_data.provide_data_single + train_data.provide_label_single)
    pprint.pprint(data_shape_dict)
    sym_instance.infer_shape(data_shape_dict)

    # load and initialize params
    if config.TRAIN.RESUME:
        print 'continue training from ', begin_epoch
        arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
    else:
        print pretrained
        arg_params, aux_params = load_param(pretrained, epoch, convert=True)
        sym_instance.init_weights(config, arg_params, aux_params)

    # check parameter shapes
    sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict)

    # create solver
    fixed_param_prefix = config.network.FIXED_PARAMS
    data_names = [k[0] for k in train_data.provide_data_single]
    label_names = [k[0] for k in train_data.provide_label_single]

    mod = MutableModule(sym, data_names=data_names, label_names=label_names,
                        logger=logger, context=ctx, max_data_shapes=[max_data_shape for _ in xrange(batch_size)],
                        max_label_shapes=[max_label_shape for _ in xrange(batch_size)], fixed_param_prefix=fixed_param_prefix)

    # decide training params
    # metric
    fcn_loss_metric = metric.FCNLogLossMetric(config.default.frequent * batch_size)
    eval_metrics = mx.metric.CompositeEvalMetric()

    # rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric
    for child_metric in [fcn_loss_metric]:
        eval_metrics.add(child_metric)

    # callback
    batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=args.frequent)
    epoch_end_callback = mx.callback.module_checkpoint(mod, prefix, period=1, save_optimizer_states=True)

    # decide learning rate
    base_lr = lr
    lr_factor = 0.1
    lr_epoch = [float(epoch) for epoch in lr_step.split(',')]
    lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch]
    lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [int(epoch * len(segdb) / batch_size) for epoch in lr_epoch_diff]
    print 'lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters

    lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor, config.TRAIN.warmup, config.TRAIN.warmup_lr, config.TRAIN.warmup_step)

    # optimizer
    optimizer_params = {'momentum': config.TRAIN.momentum,
                        'wd': config.TRAIN.wd,
                        'learning_rate': lr,
                        'lr_scheduler': lr_scheduler,
                        'rescale_grad': 1.0,
                        'clip_gradient': None}

    if not isinstance(train_data, PrefetchingIter):
        train_data = PrefetchingIter(train_data)

    # train
    mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback, kvstore=config.default.kvstore,
            optimizer='sgd', optimizer_params=optimizer_params,
            arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
X, y = load_data(args.name)
x_train, y_train = X.astype(gfs.settings.float_type), y.astype(
    gfs.settings.float_type)
print('# Training Data:', x_train.shape[0])
mean_x, std_x = np.mean(x_train), np.std(x_train)
mean_y, std_y = np.mean(y_train), np.std(y_train)

normalized_x_train = (x_train - mean_x) / std_x
normalized_y_train = (y_train - mean_y) / std_y
inputs, targets = tf.constant(normalized_x_train), tf.constant(
    normalized_y_train)
x_train_extended = tf.concat(
    [inputs, inputs - np.min(normalized_x_train) + np.max(normalized_x_train)],
    axis=0)

logger = create_logger('results/time-series/', args.name, __file__)
print = logger.info

############################## setup parameters ##############################
epochs = 20000
plot_interval = 5000
print_interval = 100

############################## build NKN ##############################
ls = median_distance_local(normalized_x_train).astype('float32')
ls[abs(ls) < 1e-6] = 1.
input_dim = 1

kernel = dict(nkn=[{
    'name': 'Linear',
    'params': {
Example #27
0
def train_net(args, ctx, pretrained_dir, pretrained_resnet, epoch, prefix,
              begin_epoch, end_epoch, lr, lr_step):
    logger, final_output_path = create_logger(config.output_path, args.cfg,
                                              config.dataset.image_set)
    prefix = os.path.join(final_output_path, prefix)

    # load symbol
    shutil.copy2(os.path.join(curr_path, 'symbols', config.symbol + '.py'),
                 final_output_path)
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=True)
    feat_sym = sym.get_internals()['rpn_cls_score_output']

    # setup multi-gpu
    batch_size = len(ctx)
    input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size

    # print config
    pprint.pprint(config)
    logger.info('training config:{}\n'.format(pprint.pformat(config)))

    git_commit_id = commands.getoutput('git rev-parse HEAD')
    print("Git commit id:", git_commit_id)
    logger.info('Git commit id: {}'.format(git_commit_id))

    # load dataset and prepare imdb for training
    image_sets = [iset for iset in config.dataset.image_set.split('+')]
    roidbs = [
        load_gt_roidb(config.dataset.dataset,
                      image_set,
                      config.dataset.root_path,
                      config.dataset.dataset_path,
                      motion_iou_path=config.dataset.motion_iou_path,
                      flip=config.TRAIN.FLIP,
                      use_philly=args.usePhilly) for image_set in image_sets
    ]
    roidb = merge_roidb(roidbs)
    roidb = filter_roidb(roidb, config)
    # load training data
    train_data = AnchorLoader(feat_sym,
                              roidb,
                              config,
                              batch_size=input_batch_size,
                              shuffle=config.TRAIN.SHUFFLE,
                              ctx=ctx,
                              feat_stride=config.network.RPN_FEAT_STRIDE,
                              anchor_scales=config.network.ANCHOR_SCALES,
                              anchor_ratios=config.network.ANCHOR_RATIOS,
                              aspect_grouping=config.TRAIN.ASPECT_GROUPING,
                              normalize_target=config.network.NORMALIZE_RPN,
                              bbox_mean=config.network.ANCHOR_MEANS,
                              bbox_std=config.network.ANCHOR_STDS)

    # infer max shape
    max_data_shape = [('data', (config.TRAIN.BATCH_IMAGES, 3,
                                max([v[0] for v in config.SCALES]),
                                max([v[1] for v in config.SCALES])))]
    max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
    max_data_shape.append(('gt_boxes', (config.TRAIN.BATCH_IMAGES, 100, 5)))
    print('providing maximum shape', max_data_shape, max_label_shape)

    data_shape_dict = dict(train_data.provide_data_single +
                           train_data.provide_label_single)
    pprint.pprint(data_shape_dict)
    sym_instance.infer_shape(data_shape_dict)

    # create solver
    fixed_param_prefix = config.network.FIXED_PARAMS
    data_names = [k[0] for k in train_data.provide_data_single]
    label_names = [k[0] for k in train_data.provide_label_single]

    mod = MutableModule(
        sym,
        data_names=data_names,
        label_names=label_names,
        logger=logger,
        context=ctx,
        max_data_shapes=[max_data_shape for _ in range(batch_size)],
        max_label_shapes=[max_label_shape for _ in range(batch_size)],
        fixed_param_prefix=fixed_param_prefix)

    # load and initialize params
    params_loaded = False
    if config.TRAIN.RESUME:
        arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
        mod._preload_opt_states = '%s-%04d.states' % (prefix, begin_epoch)
        print('continue training from ', begin_epoch)
        logger.info('continue training from ', begin_epoch)
        params_loaded = True
    elif config.TRAIN.AUTO_RESUME:
        for cur_epoch in range(end_epoch - 1, begin_epoch, -1):
            params_filename = '{}-{:04d}.params'.format(prefix, cur_epoch)
            states_filename = '{}-{:04d}.states'.format(prefix, cur_epoch)
            if os.path.exists(params_filename) and os.path.exists(
                    states_filename):
                begin_epoch = cur_epoch
                arg_params, aux_params = load_param(prefix,
                                                    cur_epoch,
                                                    convert=True)
                mod._preload_opt_states = states_filename
                print('auto continue training from {}, {}'.format(
                    params_filename, states_filename))
                logger.info('auto continue training from {}, {}'.format(
                    params_filename, states_filename))
                params_loaded = True
                break
    if not params_loaded:
        arg_params, aux_params = load_param(os.path.join(
            pretrained_dir, pretrained_resnet),
                                            epoch,
                                            convert=True)

    sym_instance.init_weight(config, arg_params, aux_params)
    # check parameter shapes
    sym_instance.check_parameter_shapes(arg_params, aux_params,
                                        data_shape_dict)

    # decide training params
    # metric
    eval_metric = metric.RCNNAccMetric(config)
    cls_metric = metric.RCNNLogLossMetric(config)
    bbox_metric = metric.RCNNL1LossMetric(config)
    eval_metrics = mx.metric.CompositeEvalMetric()

    for child_metric in [eval_metric, cls_metric, bbox_metric]:
        eval_metrics.add(child_metric)
    if config.TRAIN.JOINT_TRAINING or (not config.TRAIN.LEARN_NMS):
        rpn_eval_metric = metric.RPNAccMetric()
        rpn_cls_metric = metric.RPNLogLossMetric()
        rpn_bbox_metric = metric.RPNL1LossMetric()
        for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric]:
            eval_metrics.add(child_metric)
    if config.TRAIN.LEARN_NMS:
        eval_metrics.add(metric.NMSLossMetric(config, 'pos'))
        eval_metrics.add(metric.NMSLossMetric(config, 'neg'))
        eval_metrics.add(metric.NMSAccMetric(config))

    # callback
    batch_end_callback = [
        callback.Speedometer(train_data.batch_size, frequent=args.frequent)
    ]

    if config.USE_PHILLY:
        total_iter = (end_epoch - begin_epoch) * len(roidb) / input_batch_size
        progress_frequent = min(args.frequent * 10, 100)
        batch_end_callback.append(
            callback.PhillyProgressCallback(total_iter, progress_frequent))

    means = np.tile(np.array(config.TRAIN.BBOX_MEANS),
                    2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES)
    stds = np.tile(np.array(config.TRAIN.BBOX_STDS),
                   2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES)
    epoch_end_callback = [
        mx.callback.module_checkpoint(mod,
                                      prefix,
                                      period=1,
                                      save_optimizer_states=True),
        callback.do_checkpoint(prefix, means, stds)
    ]
    # decide learning rate
    # base_lr = lr * len(ctx) * config.TRAIN.BATCH_IMAGES
    base_lr = lr
    lr_factor = config.TRAIN.lr_factor
    lr_epoch = [float(epoch) for epoch in lr_step.split(',')]
    lr_epoch_diff = [
        epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch
    ]
    lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [
        int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff
    ]
    print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters)
    lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor,
                                              config.TRAIN.warmup,
                                              config.TRAIN.warmup_lr,
                                              config.TRAIN.warmup_step)
    # optimizer
    optimizer_params = {
        'momentum': config.TRAIN.momentum,
        'wd': config.TRAIN.wd,
        'learning_rate': lr,
        'lr_scheduler': lr_scheduler,
        'rescale_grad': 1.0,
        'clip_gradient': None
    }

    if not isinstance(train_data, PrefetchingIter):
        train_data = PrefetchingIter(train_data)

    # train
    mod.fit(train_data,
            eval_metric=eval_metrics,
            epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback,
            kvstore=config.default.kvstore,
            optimizer='sgd',
            optimizer_params=optimizer_params,
            arg_params=arg_params,
            aux_params=aux_params,
            begin_epoch=begin_epoch,
            num_epoch=end_epoch)
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr,
              lr_step):
    # 创建logger和对应的输出路径
    logger, final_output_path = create_logger(config.output_path, args.cfg,
                                              config.dataset.image_set)
    prefix = os.path.join(final_output_path, prefix)

    # load symbol
    shutil.copy2(os.path.join(curr_path, 'symbols', config.symbol + '.py'),
                 final_output_path)
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=True)
    # 特征symbol,从网络sym中获取rpn_cls_score_output
    feat_sym = sym.get_internals()['rpn_cls_score_output']

    # setup multi-gpu
    # 使能多GPU训练,每一张卡训练一个batch
    batch_size = len(ctx)
    input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size

    # print config
    pprint.pprint(config)
    logger.info('training config:{}\n'.format(pprint.pformat(config)))

    # load dataset and prepare imdb for training
    # 加载数据集同时准备训练的imdb,使用+分割不同的图像数据集,比如2007_trainval+2012_trainval
    image_sets = [iset for iset in config.dataset.image_set.split('+')]
    # load gt roidb加载gt roidb,根据数据集类型,图像集具体子类,数据集根目录和数据集路径,同时配置相关TRAIN为FLIP来增广数据
    roidbs = [
        load_gt_roidb(config.dataset.dataset,
                      image_set,
                      config.dataset.root_path,
                      config.dataset.dataset_path,
                      flip=config.TRAIN.FLIP) for image_set in image_sets
    ]
    # 合并不同的roidb
    roidb = merge_roidb(roidbs)
    # 根据配置文件中对应的过滤规则来滤出roi
    roidb = filter_roidb(roidb, config)
    # load training data
    # 加载训练数据,anchor Loader为对应分类和回归的锚点加载,通过对应的roidb,查找对应的正负样本的锚点,该生成器需要参数锚点尺度,ratios和对应的feature的stride
    train_data = AnchorLoader(feat_sym,
                              roidb,
                              config,
                              batch_size=input_batch_size,
                              shuffle=config.TRAIN.SHUFFLE,
                              ctx=ctx,
                              feat_stride=config.network.RPN_FEAT_STRIDE,
                              anchor_scales=config.network.ANCHOR_SCALES,
                              anchor_ratios=config.network.ANCHOR_RATIOS,
                              aspect_grouping=config.TRAIN.ASPECT_GROUPING)

    # infer max shape
    max_data_shape = [('data', (config.TRAIN.BATCH_IMAGES, 3,
                                max([v[0] for v in config.SCALES]),
                                max([v[1] for v in config.SCALES])))]
    max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
    max_data_shape.append(('gt_boxes', (config.TRAIN.BATCH_IMAGES, 100, 5)))
    print('providing maximum shape', max_data_shape, max_label_shape)

    data_shape_dict = dict(train_data.provide_data_single +
                           train_data.provide_label_single)
    pprint.pprint(data_shape_dict)
    sym_instance.infer_shape(data_shape_dict)

    # load and initialize params
    # 加载并且初始化参数,如果训练中是继续上次的训练,也就是RESUME这一flag设置为True
    if config.TRAIN.RESUME:
        print('continue training from ', begin_epoch)
        # 从前缀和being_epoch中加载RESUME的arg参数和aux参数,同时需要转换为GPU NDArray
        arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
    else:
        arg_params, aux_params = load_param(pretrained, epoch, convert=True)
        sym_instance.init_weight(config, arg_params, aux_params)

    # check parameter shapes
    # 检查相关参数的shapes
    sym_instance.check_parameter_shapes(arg_params, aux_params,
                                        data_shape_dict)

    # create solver
    # 创造求解器
    fixed_param_prefix = config.network.FIXED_PARAMS
    data_names = [k[0] for k in train_data.provide_data_single]
    label_names = [k[0] for k in train_data.provide_label_single]

    mod = MutableModule(
        sym,
        data_names=data_names,
        label_names=label_names,
        logger=logger,
        context=ctx,
        max_data_shapes=[max_data_shape for _ in range(batch_size)],
        max_label_shapes=[max_label_shape for _ in range(batch_size)],
        fixed_param_prefix=fixed_param_prefix)

    if config.TRAIN.RESUME:
        mod._preload_opt_states = '%s-%04d.states' % (prefix, begin_epoch)

    # decide training params
    # metric
    # 以下主要是RPN和RCNN相关的一些评价指标
    rpn_eval_metric = metric.RPNAccMetric()
    rpn_cls_metric = metric.RPNLogLossMetric()
    rpn_bbox_metric = metric.RPNL1LossMetric()
    eval_metric = metric.RCNNAccMetric(config)
    cls_metric = metric.RCNNLogLossMetric(config)
    bbox_metric = metric.RCNNL1LossMetric(config)
    # mxnet中合成的评估指标,可以增加以上所有的评估指标,包括rpn_eval_metrix、rpn_cls_metric、rpn_bbox_metric和rcnn_eval_metric、rcnn_cls_metric、rcnn_bbox_metric
    eval_metrics = mx.metric.CompositeEvalMetric()
    # rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric
    for child_metric in [
            rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric,
            cls_metric, bbox_metric
    ]:
        eval_metrics.add(child_metric)

    # callback
    # batch后的callback回调以及epoch后的callback回调
    # batch_end_callback是在训练一定batch_size后进行的相应回调,回调频率为frequent
    batch_end_callback = callback.Speedometer(train_data.batch_size,
                                              frequent=args.frequent)
    # means和stds,如果BBOX是类无关的,那么means为复制means两个,否则复制数量为NUM_CLASSES
    means = np.tile(np.array(config.TRAIN.BBOX_MEANS),
                    2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES)
    stds = np.tile(np.array(config.TRAIN.BBOX_STDS),
                   2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES)
    # epoch为一个周期结束后的回调
    epoch_end_callback = [
        mx.callback.module_checkpoint(mod,
                                      prefix,
                                      period=1,
                                      save_optimizer_states=True),
        callback.do_checkpoint(prefix, means, stds)
    ]
    # decide learning rate
    # 以下主要根据不同的学习率调整策略来决定学习率,这里如voc中默认的初始lr为0.0005
    base_lr = lr
    # 学习率调整因子
    lr_factor = config.TRAIN.lr_factor
    # 学习率调整周期,lr_step一般格式为3, 5,表示在3和5周期中进行学习率调整
    lr_epoch = [float(epoch) for epoch in lr_step.split(',')]
    # 如果当前周期大于begin_epoch那么lr_epoch_diff为epoch-begin_epoch
    lr_epoch_diff = [
        epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch
    ]
    print('lr_epoch', lr_epoch, 'begin_epoch', begin_epoch)
    # 通过当前的epoch来计算当前应该具有的lr
    lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [
        int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff
    ]
    print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters)
    # learning rate调整机制,warmup multi factor scheduler预训练多因子调整器
    lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor,
                                              config.TRAIN.warmup,
                                              config.TRAIN.warmup_lr,
                                              config.TRAIN.warmup_step)
    # optimizer
    # 优化器参数,包含momentum、wd、lr、lr_scheduler、rescale_grad和clip_gradient
    optimizer_params = {
        'momentum': config.TRAIN.momentum,
        'wd': config.TRAIN.wd,
        'learning_rate': lr,
        'lr_scheduler': lr_scheduler,
        'rescale_grad': 1.0,
        'clip_gradient': None
    }

    if not isinstance(train_data, PrefetchingIter):
        print('!!!train_data is not PrefetchingIter!!!')
        train_data = PrefetchingIter(train_data)

    # train
    # 模型训练过程,输入train_data,评估指标包括eval_metrics等一系列指标,每一个epoch结束后进入epoch_end_callback,每一个batch结束后进入batch_end_callback,优化器使用sgd,同时优化参数、输入参数和辅助参数以及begin周期和end周期
    mod.fit(train_data,
            eval_metric=eval_metrics,
            epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback,
            kvstore=config.default.kvstore,
            optimizer='sgd',
            optimizer_params=optimizer_params,
            arg_params=arg_params,
            aux_params=aux_params,
            begin_epoch=begin_epoch,
            num_epoch=end_epoch)
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr,
              lr_step):
    logger, final_output_path = create_logger(config.output_path, args.cfg,
                                              config.dataset.image_set)
    prefix = os.path.join(final_output_path, prefix)

    # load symbol
    shutil.copy2(os.path.join(curr_path, 'symbols', config.symbol + '.py'),
                 final_output_path)
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=True)
    feat_sym = sym.get_internals()['rpn_cls_score_output']

    # setup multi-gpu
    batch_size = len(ctx)
    input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size

    # print config
    pprint.pprint(config)
    logger.info('training config:{}\n'.format(pprint.pformat(config)))

    # load dataset and prepare imdb for training
    image_sets = [iset for iset in config.dataset.image_set.split('+')]
    roidbs = [
        load_gt_roidb(config.dataset.dataset,
                      image_set,
                      config.dataset.root_path,
                      config.dataset.dataset_path,
                      flip=config.TRAIN.FLIP) for image_set in image_sets
    ]
    roidb = merge_roidb(roidbs)
    roidb = filter_roidb(roidb, config)
    # load training data
    train_data = AnchorLoader(feat_sym,
                              roidb,
                              config,
                              batch_size=input_batch_size,
                              shuffle=config.TRAIN.SHUFFLE,
                              ctx=ctx,
                              feat_stride=config.network.RPN_FEAT_STRIDE,
                              anchor_scales=config.network.ANCHOR_SCALES,
                              anchor_ratios=config.network.ANCHOR_RATIOS,
                              aspect_grouping=config.TRAIN.ASPECT_GROUPING)

    # infer max shape
    max_data_shape = [('data', (config.TRAIN.BATCH_IMAGES, 3,
                                max([v[0] for v in config.SCALES]),
                                max([v[1] for v in config.SCALES])))]
    max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
    max_data_shape.append(('gt_boxes', (config.TRAIN.BATCH_IMAGES, 100, 5)))
    print('providing maximum shape', max_data_shape, max_label_shape)

    data_shape_dict = dict(train_data.provide_data_single +
                           train_data.provide_label_single)
    pprint.pprint(data_shape_dict)
    sym_instance.infer_shape(data_shape_dict)

    # load and initialize params
    #if config.TRAIN.RESUME:
    #    print('continue training from ', begin_epoch)
    #    arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
    #else:
    #    arg_params, aux_params = load_param(pretrained, epoch, convert=True)
    #    sym_instance.init_weight(config, arg_params, aux_params)

    print('transfer learning...')

    # Choose the initialization weights (COCO or UADETRAC or pretrained)
    #arg_params, aux_params = load_param('/raid10/home_ext/Deformable-ConvNets/output/rfcn_dcn_Shuo_UADTRAC/resnet_v1_101_voc0712_rfcn_dcn_Shuo_UADETRAC/trainlist_full/rfcn_UADTRAC', 5, convert=True)
    #arg_params, aux_params = load_param('/raid10/home_ext/Deformable-ConvNets/model/rfcn_dcn_coco', 0, convert=True)
    arg_params, aux_params = load_param(
        '/raid10/home_ext/Deformable-ConvNets/output/rfcn_dcn_Shuo_AICity/resnet_v1_101_voc0712_rfcn_dcn_Shuo_AICityVOC1080_FreezeCOCO_rpnOnly_all/1080_all/rfcn_AICityVOC1080_FreezeCOCO_rpnOnly_all',
        4,
        convert=True)

    sym_instance.init_weight_Shuo(config, arg_params, aux_params)

    # check parameter shapes
    sym_instance.check_parameter_shapes(arg_params, aux_params,
                                        data_shape_dict)

    # create solver
    fixed_param_prefix = config.network.FIXED_PARAMS
    data_names = [k[0] for k in train_data.provide_data_single]
    label_names = [k[0] for k in train_data.provide_label_single]

    mod = MutableModule(
        sym,
        data_names=data_names,
        label_names=label_names,
        logger=logger,
        context=ctx,
        max_data_shapes=[max_data_shape for _ in range(batch_size)],
        max_label_shapes=[max_label_shape for _ in range(batch_size)],
        fixed_param_prefix=fixed_param_prefix)

    #freeze parameters using fixed_param_names:list of str
    para_file = open(
        '/raid10/home_ext/Deformable-ConvNets/rfcn/symbols/arg_params.txt')
    para_list = [line.split('<')[0] for line in para_file.readlines()]
    #    para_list.remove('rfcn_cls_weight')
    #    para_list.remove('rfcn_cls_bias')
    #    para_list.remove('rfcn_cls_offset_t_weight')
    #    para_list.remove('rfcn_cls_offset_t_bias')
    #
    para_list.remove('res5a_branch2b_offset_weight')
    para_list.remove('res5a_branch2b_offset_bias')
    para_list.remove('res5b_branch2b_offset_weight')
    para_list.remove('res5b_branch2b_offset_bias')
    para_list.remove('res5c_branch2b_offset_weight')
    para_list.remove('res5c_branch2b_offset_bias')
    para_list.remove('conv_new_1_weight')
    para_list.remove('conv_new_1_bias')
    para_list.remove('rfcn_bbox_weight')
    para_list.remove('rfcn_bbox_bias')
    para_list.remove('rfcn_bbox_offset_t_weight')
    para_list.remove('rfcn_bbox_offset_t_bias')

    mod = MutableModule_Shuo(
        sym,
        data_names=data_names,
        label_names=label_names,
        logger=logger,
        context=ctx,
        max_data_shapes=[max_data_shape for _ in range(batch_size)],
        max_label_shapes=[max_label_shape for _ in range(batch_size)],
        fixed_param_prefix=fixed_param_prefix,
        fixed_param_names=para_list)

    if config.TRAIN.RESUME:
        mod._preload_opt_states = '%s-%04d.states' % (prefix, begin_epoch)

    # decide training params
    # metric
    rpn_eval_metric = metric.RPNAccMetric()
    rpn_cls_metric = metric.RPNLogLossMetric()
    rpn_bbox_metric = metric.RPNL1LossMetric()
    eval_metric = metric.RCNNAccMetric(config)
    cls_metric = metric.RCNNLogLossMetric(config)
    bbox_metric = metric.RCNNL1LossMetric(config)
    eval_metrics = mx.metric.CompositeEvalMetric()
    # rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric
    for child_metric in [
            rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric,
            cls_metric, bbox_metric
    ]:
        eval_metrics.add(child_metric)
    # callback
    batch_end_callback = callback.Speedometer(train_data.batch_size,
                                              frequent=args.frequent)
    means = np.tile(np.array(config.TRAIN.BBOX_MEANS),
                    2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES)
    stds = np.tile(np.array(config.TRAIN.BBOX_STDS),
                   2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES)
    epoch_end_callback = [
        mx.callback.module_checkpoint(mod,
                                      prefix,
                                      period=1,
                                      save_optimizer_states=True),
        callback.do_checkpoint(prefix, means, stds)
    ]
    # decide learning rate
    base_lr = lr
    lr_factor = config.TRAIN.lr_factor
    lr_epoch = [float(epoch) for epoch in lr_step.split(',')]
    lr_epoch_diff = [
        epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch
    ]
    lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [
        int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff
    ]
    print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters)
    lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor,
                                              config.TRAIN.warmup,
                                              config.TRAIN.warmup_lr,
                                              config.TRAIN.warmup_step)
    # optimizer
    optimizer_params = {
        'momentum': config.TRAIN.momentum,
        'wd': config.TRAIN.wd,
        'learning_rate': lr,
        'lr_scheduler': lr_scheduler,
        'rescale_grad': 1.0,
        'clip_gradient': None
    }

    if not isinstance(train_data, PrefetchingIter):
        train_data = PrefetchingIter(train_data)

    # train
    mod.fit(train_data,
            eval_metric=eval_metrics,
            epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback,
            kvstore=config.default.kvstore,
            optimizer='sgd',
            optimizer_params=optimizer_params,
            arg_params=arg_params,
            aux_params=aux_params,
            begin_epoch=begin_epoch,
            num_epoch=end_epoch)
import tensorflow as tf
import argparse
import numpy as np

from bo_functions import Michalewicz, Stybtang, Stybtang_transform
from utils.create_logger import create_logger
from bayesianOpt import BayesianOptimization
from kernels import KernelWrapper

# Training settings
parser = argparse.ArgumentParser(description='Neural-Kernel-Network')
parser.add_argument('--name', type=str, default='sty')
parser.add_argument('--kern', type=str, default='rbf')
parser.add_argument('--run', type=int, default=-1, help='indx of run')
args = parser.parse_args()
logger = create_logger('results/bo/' + args.name, 'bo', __file__)
logger.info(args)

num_iters = 200
num_runs = 10
input_dim = 10
grid_size = 10000
iterations = 5000
all_dim_groups = []


def NKNInfo(dimGroups=None):
    ls = 0.3
    kernel = dict(oracle=[{
        'name': 'RBF',
        'params': {
Example #31
0
def train_net(args, ctx, pretrained, pretrained_base, pretrained_ec, epoch,
              prefix, begin_epoch, end_epoch, lr, lr_step):
    logger, final_output_path = create_logger(config.output_path, args.cfg,
                                              config.dataset.image_set)
    prefix = os.path.join(final_output_path, prefix)

    # load symbol
    shutil.copy2(os.path.join(curr_path, 'symbols', config.symbol + '.py'),
                 final_output_path)
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_train_symbol(config)

    # setup multi-gpu
    batch_size = len(ctx)
    input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size

    # print config
    pprint.pprint(config)
    logger.info('training config:{}\n'.format(pprint.pformat(config)))

    # load dataset and prepare imdb for training
    image_sets = [iset for iset in config.dataset.image_set.split('+')]
    segdbs = [
        load_gt_segdb(config.dataset.dataset,
                      image_set,
                      config.dataset.root_path,
                      config.dataset.dataset_path,
                      result_path=final_output_path,
                      flip=config.TRAIN.FLIP) for image_set in image_sets
    ]
    segdb = merge_segdb(segdbs)

    # load training data
    train_data = TrainDataLoader(sym,
                                 segdb,
                                 config,
                                 batch_size=input_batch_size,
                                 crop_height=config.TRAIN.CROP_HEIGHT,
                                 crop_width=config.TRAIN.CROP_WIDTH,
                                 shuffle=config.TRAIN.SHUFFLE,
                                 ctx=ctx)

    # infer max shape
    max_data_shape = [('data', (config.TRAIN.BATCH_IMAGES, 3,
                                max([v[0] for v in config.SCALES]),
                                max([v[1] for v in config.SCALES]))),
                      ('data_ref', (config.TRAIN.KEY_INTERVAL - 1, 3,
                                    max([v[0] for v in config.SCALES]),
                                    max([v[1] for v in config.SCALES]))),
                      ('eq_flag', (1, ))]
    max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
    print 'providing maximum shape', max_data_shape, max_label_shape

    data_shape_dict = dict(train_data.provide_data_single +
                           train_data.provide_label_single)
    pprint.pprint(data_shape_dict)
    sym_instance.infer_shape(data_shape_dict)

    # load and initialize params
    if config.TRAIN.RESUME:
        print('continue training from ', begin_epoch)
        arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
    else:
        print pretrained
        arg_params, aux_params = load_param(pretrained, epoch, convert=True)
        arg_params_base, aux_params_base = load_param(pretrained_base,
                                                      epoch,
                                                      convert=True)
        arg_params.update(arg_params_base)
        aux_params.update(aux_params_base)
        arg_params_ec, aux_params_ec = load_param(
            pretrained_ec,
            epoch,
            convert=True,
            argprefix=config.TRAIN.arg_prefix)
        arg_params.update(arg_params_ec)
        aux_params.update(aux_params_ec)
        sym_instance.init_weight(config, arg_params, aux_params)

    # check parameter shapes
    sym_instance.check_parameter_shapes(arg_params, aux_params,
                                        data_shape_dict)

    # create solver
    fixed_param_prefix = config.network.FIXED_PARAMS
    data_names = [k[0] for k in train_data.provide_data_single]
    label_names = [k[0] for k in train_data.provide_label_single]

    mod = MutableModule(
        sym,
        data_names=data_names,
        label_names=label_names,
        logger=logger,
        context=ctx,
        max_data_shapes=[max_data_shape for _ in range(batch_size)],
        max_label_shapes=[max_label_shape for _ in range(batch_size)],
        fixed_param_prefix=fixed_param_prefix)

    if config.TRAIN.RESUME:
        mod._preload_opt_states = '%s-%04d.states' % (prefix, begin_epoch)

    # decide training params
    # metric
    fcn_loss_metric = metric.FCNLogLossMetric(config.default.frequent *
                                              batch_size)
    eval_metrics = mx.metric.CompositeEvalMetric()

    for child_metric in [fcn_loss_metric]:
        eval_metrics.add(child_metric)

    # callback
    batch_end_callback = callback.Speedometer(train_data.batch_size,
                                              frequent=args.frequent)
    epoch_end_callback = mx.callback.module_checkpoint(
        mod, prefix, period=1, save_optimizer_states=True)

    # decide learning rate
    base_lr = lr
    lr_factor = 0.1
    lr_epoch = [float(epoch) for epoch in lr_step.split(',')]
    lr_epoch_diff = [
        epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch
    ]
    lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [
        int(epoch * len(segdb) / batch_size) for epoch in lr_epoch_diff
    ]
    print 'lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters

    lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor,
                                              config.TRAIN.warmup,
                                              config.TRAIN.warmup_lr,
                                              config.TRAIN.warmup_step)

    # optimizer
    optimizer_params = {
        'momentum': config.TRAIN.momentum,
        'wd': config.TRAIN.wd,
        'learning_rate': lr,
        'lr_scheduler': lr_scheduler,
        'rescale_grad': 1.0,
        'clip_gradient': None
    }

    if not isinstance(train_data, PrefetchingIter):
        train_data = PrefetchingIter(train_data)

    # train
    mod.fit(train_data,
            eval_metric=eval_metrics,
            epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback,
            kvstore=config.default.kvstore,
            optimizer='sgd',
            optimizer_params=optimizer_params,
            arg_params=arg_params,
            aux_params=aux_params,
            begin_epoch=begin_epoch,
            num_epoch=end_epoch)
Example #32
0
def test_net(args):
    # init config
    cfg_path = args.cfg
    update_config(cfg_path)

    # test parameters
    has_rpn = config.TEST.HAS_RPN
    if not has_rpn:
        raise NotImplementedError, "Network without RPN is not implemented"

    # load model
    model_path = args.model
    if '.params' not in model_path:
        model_path += ".params"
    assert osp.exists(model_path), ("Could not find model path %s" %
                                    (model_path))
    arg_params, aux_params = load_param_file(model_path, process=True)
    print("\nLoaded model %s\n" % (model_path))

    # gpu stuff
    ctx = [mx.gpu(int(i)) for i in config.gpus.split(',')]

    # load test dataset
    cfg_ds = config.dataset
    ds_name = cfg_ds.dataset
    ds_path = cfg_ds.dataset_path
    test_image_set = cfg_ds.test_image_set

    # logger
    logger, output_path = create_logger(config.output_path, args.cfg,
                                        config.dataset.test_image_set)
    logger.info('testing config:{}\n'.format(pprint.pformat(config)))

    if ds_name.lower() == "labelme":
        # from utils.load_data import load_labelme_gt_sdsdb
        imdb = labelme(test_image_set,
                       ds_path,
                       cfg_ds.root_path,
                       mask_size=config.MASK_SIZE,
                       binary_thresh=config.BINARY_THRESH,
                       classes=cfg_ds.CLASSES)
    else:
        imdb = eval(ds_name)(test_image_set,
                             cfg_ds.root_path,
                             ds_path,
                             result_path=output_path,
                             binary_thresh=config.BINARY_THRESH,
                             mask_size=config.MASK_SIZE)
    sdsdb = imdb.gt_sdsdb()

    # load network
    network = resnet_v1_101_fcis()
    sym = network.get_symbol(config, is_train=False)

    # get test data iter
    test_data = TestLoader(sdsdb,
                           config,
                           batch_size=len(ctx),
                           shuffle=args.shuffle,
                           has_rpn=has_rpn)

    # infer shape
    data_shape_dict = dict(test_data.provide_data_single)
    network.infer_shape(data_shape_dict)

    network.check_parameter_shapes(arg_params,
                                   aux_params,
                                   data_shape_dict,
                                   is_train=False)

    # decide maximum shape
    data_names = [k[0] for k in test_data.provide_data_single]
    label_names = []
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                 max([v[1] for v in config.SCALES])))]]

    # # create predictor
    predictor = Predictor(sym,
                          data_names,
                          label_names,
                          context=ctx,
                          max_data_shapes=max_data_shape,
                          provide_data=test_data.provide_data,
                          provide_label=test_data.provide_label,
                          arg_params=arg_params,
                          aux_params=aux_params)

    # print(test_data.provide_data_single[0][1])
    # print(test_data.provide_label)

    # start detection
    pred_eval(predictor,
              test_data,
              imdb,
              config,
              vis=args.vis,
              ignore_cache=args.ignore_cache,
              thresh=args.thresh,
              logger=logger)
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr, lr_step):
    np.random.seed(0)
    mx.random.seed(0)
    logger, final_output_path = create_logger(config.output_path, args.cfg, config.dataset.image_set)
    prefix = os.path.join(final_output_path, prefix)

    # load symbol
    shutil.copy2(os.path.join(curr_path, 'symbols', config.symbol + '.py'), final_output_path)
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=True)
    feat_sym = sym.get_internals()['rpn_cls_score_output']

    # setup multi-gpu
    batch_size = len(ctx)
    input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size

    # print config
    pprint.pprint(config)
    logger.info('training config:{}\n'.format(pprint.pformat(config)))

    # load dataset and prepare imdb for training
    image_sets = [iset for iset in config.dataset.image_set.split('+')]
    roidbs = [load_gt_roidb(config.dataset.dataset, image_set, config.dataset.root_path, config.dataset.dataset_path,
                            flip=config.TRAIN.FLIP)
              for image_set in image_sets]
    roidb = merge_roidb(roidbs)
    roidb = filter_roidb(roidb, config)

    # load training data
    train_data = AnchorLoader(feat_sym, roidb, config, batch_size=input_batch_size, shuffle=config.TRAIN.SHUFFLE, ctx=ctx,
                              feat_stride=config.network.RPN_FEAT_STRIDE, anchor_scales=config.network.ANCHOR_SCALES,
                              anchor_ratios=config.network.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING)

    # infer max shape

    # max_dats_shape=['data', (1,3,600,1000)]
    max_data_shape = [('data', (config.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]
    # max_data_shape=[], max_lable_shape=[]
    max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
    max_data_shape.append(('gt_boxes', (config.TRAIN.BATCH_IMAGES, 100, 5)))
    logger.info('providing maximum shape'+str(max_data_shape)+"  "+str(max_label_shape))

    data_shape_dict = dict(train_data.provide_data_single + train_data.provide_label_single)

    # add by chaojie
    logger.info("data_sahpe_dict:\n{}".format(pprint.pformat(data_shape_dict)))

    pprint.pprint(data_shape_dict)
    sym_instance.infer_shape(data_shape_dict)
    pprint.pprint(sym_instance.arg_shape_dict)

    logger.info("sym_instance.arg_shape_dict\n")
    logging.info(pprint.pformat(sym_instance.arg_shape_dict))
    #dot = mx.viz.plot_network(sym, node_attrs={'shape': 'rect', 'fixedsize': 'false'})
    #dot.render(os.path.join('./output/rcnn/network_vis', config.symbol + '_rcnn'))

    # load and initialize params
    if config.TRAIN.RESUME:
        print('continue training from ', begin_epoch)
        arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
    else:
        arg_params, aux_params = load_param(pretrained, epoch, convert=True)
        sym_instance.init_weight(config, arg_params, aux_params)

    # check parameter shapes
    sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict)

    # create solver
    fixed_param_prefix = config.network.FIXED_PARAMS
    data_names = [k[0] for k in train_data.provide_data_single]
    label_names = [k[0] for k in train_data.provide_label_single]

    mod = MutableModule(sym, data_names=data_names, label_names=label_names,
                        logger=logger, context=ctx, max_data_shapes=[max_data_shape for _ in range(batch_size)],
                        max_label_shapes=[max_label_shape for _ in range(batch_size)], fixed_param_prefix=fixed_param_prefix)

    if config.TRAIN.RESUME:
        mod._preload_opt_states = '%s-%04d.states'%(prefix, begin_epoch)

    # decide training params
    # metric
    eval_metric = metric.RCNNAccMetric(config)
    cls_metric = metric.RCNNLogLossMetric(config)
    bbox_metric = metric.RCNNL1LossMetric(config)
    eval_metrics = mx.metric.CompositeEvalMetric()
    # rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric
    if config.TRAIN.JOINT_TRAINING or (not config.TRAIN.LEARN_NMS):
        rpn_eval_metric = metric.RPNAccMetric()
        rpn_cls_metric = metric.RPNLogLossMetric()
        rpn_bbox_metric = metric.RPNL1LossMetric()
        for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric]:
            eval_metrics.add(child_metric)
    for child_metric in [eval_metric, cls_metric, bbox_metric]:
        eval_metrics.add(child_metric)
    if config.TRAIN.LEARN_NMS:
        eval_metrics.add(metric.NMSLossMetric(config, 'pos'))
        eval_metrics.add(metric.NMSLossMetric(config, 'neg'))
        eval_metrics.add(metric.NMSAccMetric(config))

    # callback
    batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=args.frequent)
    means = np.tile(np.array(config.TRAIN.BBOX_MEANS), 2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES)
    stds = np.tile(np.array(config.TRAIN.BBOX_STDS), 2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES)
    epoch_end_callback = [mx.callback.module_checkpoint(mod, prefix, period=1, save_optimizer_states=True),
                          callback.do_checkpoint(prefix, means, stds)]
    # decide learning rate
    base_lr = lr
    lr_factor = config.TRAIN.lr_factor
    lr_epoch = [float(epoch) for epoch in lr_step.split(',')]
    lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch]
    lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff]
    print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters)
    lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor, config.TRAIN.warmup, config.TRAIN.warmup_lr, config.TRAIN.warmup_step)
    # optimizer
    optimizer_params = {'momentum': config.TRAIN.momentum,
                        'wd': config.TRAIN.wd,
                        'learning_rate': lr,
                        'lr_scheduler': lr_scheduler,
                        'rescale_grad': 1.0,
                        'clip_gradient': None}

    if not isinstance(train_data, PrefetchingIter):
        train_data = PrefetchingIter(train_data)

    # train
    mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback, kvstore=config.default.kvstore,
            optimizer='sgd', optimizer_params=optimizer_params,
            arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
def alternate_train(args, ctx, pretrained, epoch):
    # set up logger
    logger, output_path = create_logger(config.output_path, args.cfg,
                                        config.dataset.image_set)

    # basic config
    begin_epoch = 0

    # logging.info('########## TRAIN RPN WITH IMAGENET INIT')
    rpn1_prefix = os.path.join(output_path, 'rpn1')

    if not os.path.exists(rpn1_prefix):
        os.makedirs(rpn1_prefix)

    config.TRAIN.BATCH_IMAGES = config.TRAIN.ALTERNATE.RPN_BATCH_IMAGES
    train_rpn(config,
              config.dataset.dataset,
              config.dataset.image_set,
              config.dataset.root_path,
              config.dataset.dataset_path,
              args.frequent,
              config.default.kvstore,
              config.TRAIN.FLIP,
              config.TRAIN.SHUFFLE,
              config.TRAIN.RESUME,
              ctx,
              pretrained,
              epoch,
              rpn1_prefix,
              begin_epoch,
              config.TRAIN.ALTERNATE.rpn1_epoch,
              train_shared=False,
              lr=config.TRAIN.ALTERNATE.rpn1_lr,
              lr_step=config.TRAIN.ALTERNATE.rpn1_lr_step,
              logger=logger,
              output_path=output_path)

    logging.info('########## GENERATE RPN DETECTION')
    image_sets = [iset for iset in config.dataset.image_set.split('+')]
    image_sets.extend(
        [iset for iset in config.dataset.test_image_set.split('+')])
    for image_set in image_sets:
        test_rpn(config,
                 config.dataset.dataset,
                 image_set,
                 config.dataset.root_path,
                 config.dataset.dataset_path,
                 ctx,
                 rpn1_prefix,
                 config.TRAIN.ALTERNATE.rpn1_epoch,
                 vis=False,
                 shuffle=False,
                 thresh=0,
                 logger=logger,
                 output_path=rpn1_prefix)

    logging.info('########## TRAIN rfcn WITH IMAGENET INIT AND RPN DETECTION')
    rfcn1_prefix = os.path.join(output_path, 'rfcn1')
    config.TRAIN.BATCH_IMAGES = config.TRAIN.ALTERNATE.RCNN_BATCH_IMAGES
    train_rcnn(config,
               config.dataset.dataset,
               config.dataset.image_set,
               config.dataset.root_path,
               config.dataset.dataset_path,
               args.frequent,
               config.default.kvstore,
               config.TRAIN.FLIP,
               config.TRAIN.SHUFFLE,
               config.TRAIN.RESUME,
               ctx,
               pretrained,
               epoch,
               rfcn1_prefix,
               begin_epoch,
               config.TRAIN.ALTERNATE.rfcn1_epoch,
               train_shared=False,
               lr=config.TRAIN.ALTERNATE.rfcn1_lr,
               lr_step=config.TRAIN.ALTERNATE.rfcn1_lr_step,
               proposal='rpn',
               logger=logger,
               output_path=rpn1_prefix)

    logging.info('########## TRAIN RPN WITH rfcn INIT')
    rpn2_prefix = os.path.join(output_path, 'rpn2')

    if not os.path.exists(rpn2_prefix):
        os.makedirs(rpn2_prefix)

    config.TRAIN.BATCH_IMAGES = config.TRAIN.ALTERNATE.RPN_BATCH_IMAGES
    train_rpn(config,
              config.dataset.dataset,
              config.dataset.image_set,
              config.dataset.root_path,
              config.dataset.dataset_path,
              args.frequent,
              config.default.kvstore,
              config.TRAIN.FLIP,
              config.TRAIN.SHUFFLE,
              config.TRAIN.RESUME,
              ctx,
              rfcn1_prefix,
              config.TRAIN.ALTERNATE.rpn2_epoch,
              rpn2_prefix,
              begin_epoch,
              config.TRAIN.ALTERNATE.rpn2_epoch,
              train_shared=True,
              lr=config.TRAIN.ALTERNATE.rpn2_lr,
              lr_step=config.TRAIN.ALTERNATE.rpn2_lr_step,
              logger=logger,
              output_path=output_path)

    logging.info('########## GENERATE RPN  FIXED_PARAMS_SHAREDDETECTION')
    image_sets = [iset for iset in config.dataset.image_set.split('+')]
    for image_set in image_sets:
        test_rpn(config,
                 config.dataset.dataset,
                 image_set,
                 config.dataset.root_path,
                 config.dataset.dataset_path,
                 ctx,
                 rpn2_prefix,
                 config.TRAIN.ALTERNATE.rpn2_epoch,
                 vis=False,
                 shuffle=False,
                 thresh=0,
                 logger=logger,
                 output_path=rpn2_prefix)

    logger.info('########## COMBINE RPN2 WITH rfcn1')
    rfcn2_prefix = os.path.join(output_path, 'rfcn2')
    combine_model(rpn2_prefix, config.TRAIN.ALTERNATE.rpn2_epoch, rfcn1_prefix,
                  config.TRAIN.ALTERNATE.rfcn1_epoch, rfcn2_prefix, 0)

    logger.info('########## TRAIN rfcn WITH RPN INIT AND DETECTION')
    config.TRAIN.BATCH_IMAGES = config.TRAIN.ALTERNATE.RCNN_BATCH_IMAGES
    train_rcnn(config,
               config.dataset.dataset,
               config.dataset.image_set,
               config.dataset.root_path,
               config.dataset.dataset_path,
               args.frequent,
               config.default.kvstore,
               config.TRAIN.FLIP,
               config.TRAIN.SHUFFLE,
               config.TRAIN.RESUME,
               ctx,
               rfcn2_prefix,
               0,
               rfcn2_prefix,
               begin_epoch,
               config.TRAIN.ALTERNATE.rfcn2_epoch,
               train_shared=True,
               lr=config.TRAIN.ALTERNATE.rfcn2_lr,
               lr_step=config.TRAIN.ALTERNATE.rfcn2_lr_step,
               proposal='rpn',
               logger=logger,
               output_path=rpn2_prefix)

    logger.info('########## COMBINE RPN2 WITH rfcn2')
    final_prefix = os.path.join(output_path, 'final')
    combine_model(rpn2_prefix, config.TRAIN.ALTERNATE.rpn2_epoch, rfcn2_prefix,
                  config.TRAIN.ALTERNATE.rfcn2_epoch, final_prefix, 0)
Example #35
0
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr,
              lr_step):
    mx.random.seed(3)
    np.random.seed(3)
    logger, final_output_path = create_logger(config.output_path, args.cfg,
                                              config.dataset.image_set)
    prefix = os.path.join(final_output_path, prefix)

    config['final_output_path'] = final_output_path

    # load symbol
    shutil.copy2(os.path.join(curr_path, 'symbols', config.symbol + '.py'),
                 final_output_path)
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=True)

    feat_pyramid_level = np.log2(config.network.RPN_FEAT_STRIDE).astype(int)
    feat_sym = [
        sym.get_internals()['rpn_cls_score_p' + str(x) + '_output']
        for x in feat_pyramid_level
    ]

    # setup multi-gpu
    batch_size = len(ctx)
    input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size

    # print config
    pprint.pprint(config)
    logger.info('training config:{}\n'.format(pprint.pformat(config)))

    #leonid: adding semicolumn ";" support to allow several different datasets to be merged
    datasets = config.dataset.dataset.split(';')
    image_sets = config.dataset.image_set.split(';')
    data_paths = config.dataset.dataset_path.split(';')
    if type(config.dataset.per_category_epoch_max) is str:
        per_category_epoch_max = [
            float(x) for x in config.dataset.per_category_epoch_max.split(';')
        ]
    else:
        per_category_epoch_max = [float(config.dataset.per_category_epoch_max)]
    roidbs = []
    categ_index_offs = 0
    if 'classes_list_fname' not in config.dataset:
        classes_list_fname = ''
    else:
        classes_list_fname = config.dataset.classes_list_fname

    if 'num_ex_per_class' not in config.dataset:
        num_ex_per_class = ''
    else:
        num_ex_per_class = config.dataset.num_ex_per_class

    for iD, dataset in enumerate(datasets):
        # load dataset and prepare imdb for training
        image_sets_cur = [iset for iset in image_sets[iD].split('+')]
        for image_set in image_sets_cur:
            cur_roidb, cur_num_classes = load_gt_roidb(
                dataset,
                image_set,
                config.dataset.root_path,
                data_paths[iD],
                flip=config.TRAIN.FLIP,
                per_category_epoch_max=per_category_epoch_max[iD],
                return_num_classes=True,
                categ_index_offs=categ_index_offs,
                classes_list_fname=classes_list_fname,
                num_ex_per_class=num_ex_per_class)

            roidbs.append(cur_roidb)
        categ_index_offs += cur_num_classes
        # roidbs.extend([
        #     load_gt_roidb(
        #         dataset,
        #         image_set,
        #         config.dataset.root_path,
        #         data_paths[iD],
        #         flip=config.TRAIN.FLIP,
        #         per_category_epoch_max=per_category_epoch_max[iD])
        #     for image_set in image_sets])
    roidb = merge_roidb(roidbs)
    roidb = filter_roidb(roidb, config)

    # load training data

    train_data = PyramidAnchorIterator(
        feat_sym,
        roidb,
        config,
        batch_size=input_batch_size,
        shuffle=config.TRAIN.SHUFFLE,
        ctx=ctx,
        feat_strides=config.network.RPN_FEAT_STRIDE,
        anchor_scales=config.network.ANCHOR_SCALES,
        anchor_ratios=config.network.ANCHOR_RATIOS,
        aspect_grouping=config.TRAIN.ASPECT_GROUPING,
        allowed_border=np.inf)

    # infer max shape
    max_data_shape = [('data', (config.TRAIN.BATCH_IMAGES, 3,
                                max([v[0] for v in config.SCALES]),
                                max([v[1] for v in config.SCALES])))]
    max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
    max_data_shape.append(('gt_boxes', (config.TRAIN.BATCH_IMAGES, 100, 5)))
    print 'providing maximum shape', max_data_shape, max_label_shape

    if not config.network.base_net_lock:
        data_shape_dict = dict(train_data.provide_data_single +
                               train_data.provide_label_single)
    else:
        data_shape_dict = dict(train_data.provide_data_single)
    pprint.pprint(data_shape_dict)
    sym_instance.infer_shape(data_shape_dict)

    # load and initialize params
    if config.TRAIN.RESUME:
        print('continue training from ', begin_epoch)
        arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
    else:
        arg_params, aux_params = load_param(pretrained, epoch, convert=True)
        sym_instance.init_weight(config, arg_params, aux_params)

    if config.TRAIN.LOAD_EMBEDDING:
        import cPickle
        with open(config.TRAIN.EMBEDDING_FNAME, 'rb') as fid:
            model_data = cPickle.load(fid)
        for fcn in ['1', '2', '3']:
            layer = model_data['dense_' + fcn]
            weight = ListList2ndarray(layer[0])
            bias = mx.nd.array(layer[1])
            arg_params['embed_dense_' + fcn + '_weight'] = weight
            arg_params['embed_dense_' + fcn + '_bias'] = bias

    # check parameter shapes
    sym_instance.check_parameter_shapes(arg_params, aux_params,
                                        data_shape_dict)

    # create solver
    fixed_param_prefix = config.network.FIXED_PARAMS
    alt_fixed_param_prefix = config.network.ALT_FIXED_PARAMS
    data_names = [k[0] for k in train_data.provide_data_single]
    if not config.network.base_net_lock:
        label_names = [k[0] for k in train_data.provide_label_single]
    else:
        label_names = []

    mod = MutableModule(
        sym,
        data_names=data_names,
        label_names=label_names,
        logger=logger,
        context=ctx,
        max_data_shapes=[max_data_shape for _ in range(batch_size)],
        max_label_shapes=[max_label_shape for _ in range(batch_size)],
        fixed_param_prefix=fixed_param_prefix,
        alt_fixed_param_prefix=alt_fixed_param_prefix)

    # Leonid: Comment out the following two lines if switching to smaller number of GPUs and resuming training, then after it starts running un-comment back
    # if config.TRAIN.RESUME:
    #     mod._preload_opt_states = '%s-%04d.states'%(prefix, begin_epoch)
    #TODO: release this.
    # decide training params
    # metric
    if not config.network.base_net_lock:
        rpn_eval_metric = metric.RPNAccMetric()
        rpn_cls_metric = metric.RPNLogLossMetric()
        rpn_bbox_metric = metric.RPNL1LossMetric()
    rpn_fg_metric = metric.RPNFGFraction(config)
    eval_metric = metric.RCNNAccMetric(config)
    eval_fg_metric = metric.RCNNFGAccuracy(config)
    cls_metric = metric.RCNNLogLossMetric(config)
    bbox_metric = metric.RCNNL1LossMetric(config)
    eval_metrics = mx.metric.CompositeEvalMetric()

    # rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric
    if not config.network.base_net_lock:
        all_child_metrics = [
            rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, rpn_fg_metric,
            eval_fg_metric, eval_metric, cls_metric, bbox_metric
        ]
    else:
        all_child_metrics = [
            rpn_fg_metric, eval_fg_metric, eval_metric, cls_metric, bbox_metric
        ]
    # all_child_metrics = [rpn_eval_metric, rpn_bbox_metric, rpn_fg_metric, eval_fg_metric, eval_metric, cls_metric, bbox_metric]

    ################################################
    ### added / updated by Leonid to support oneshot
    ################################################
    if config.network.EMBEDDING_DIM != 0:
        if config.network.EMBED_LOSS_ENABLED:
            all_child_metrics += [
                metric.RepresentativesMetric(config, final_output_path)
            ]  # moved from above. JS.
            all_child_metrics += [metric.EmbedMetric(config)]
            if config.network.BG_REPS:
                all_child_metrics += [metric.BGModelMetric(config)]
        if config.network.REPS_CLS_LOSS:
            all_child_metrics += [metric.RepsCLSMetric(config)]
        if config.network.ADDITIONAL_LINEAR_CLS_LOSS:
            all_child_metrics += [metric.RCNNLinLogLossMetric(config)]
        if config.network.VAL_FILTER_REGRESS:
            all_child_metrics += [metric.ValRegMetric(config)]
        if config.network.SCORE_HIST_REGRESS:
            all_child_metrics += [metric.ScoreHistMetric(config)]
    ################################################

    for child_metric in all_child_metrics:
        eval_metrics.add(child_metric)

    # callback
    batch_end_callback = callback.Speedometer(train_data.batch_size,
                                              frequent=args.frequent)
    means = np.tile(np.array(config.TRAIN.BBOX_MEANS),
                    2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES)
    stds = np.tile(np.array(config.TRAIN.BBOX_STDS),
                   2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES)
    epoch_end_callback = [
        mx.callback.module_checkpoint(mod,
                                      prefix,
                                      period=1,
                                      save_optimizer_states=True),
        callback.do_checkpoint(prefix, means, stds)
    ]
    # decide learning rate
    base_lr = lr
    lr_factor = config.TRAIN.lr_factor
    lr_epoch = [float(epoch) for epoch in lr_step.split(',')]
    lr_epoch_diff = [
        epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch
    ]
    lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [
        int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff
    ]
    print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters)
    lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor,
                                              config.TRAIN.warmup,
                                              config.TRAIN.warmup_lr,
                                              config.TRAIN.warmup_step)
    # optimizer
    optimizer_params = {
        'momentum': config.TRAIN.momentum,
        'wd': config.TRAIN.wd,
        'learning_rate': lr,
        'lr_scheduler': lr_scheduler,
        'clip_gradient': None
    }
    #
    if not isinstance(train_data, PrefetchingIter):
        train_data = PrefetchingIter(train_data)

    if args.debug == 1:
        import copy
        arg_params_ = copy.deepcopy(arg_params)
        aux_params_ = copy.deepcopy(aux_params)

    # train
    mod.fit(train_data,
            eval_metric=eval_metrics,
            epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback,
            kvstore=config.default.kvstore,
            optimizer='sgd',
            optimizer_params=optimizer_params,
            arg_params=arg_params,
            aux_params=aux_params,
            begin_epoch=begin_epoch,
            num_epoch=end_epoch,
            config=config)

    if args.debug == 1:
        t = dictCompare(aux_params_, aux_params)
        t = dictCompare(arg_params_, arg_params)
def train_net(args, ctx, pretrained, pretrained_flow, epoch, prefix,
              begin_epoch, end_epoch, lr, lr_step):
    logger, final_output_path = create_logger(config.output_path, args.cfg,
                                              config.dataset.image_set)
    prefix = os.path.join(final_output_path, prefix)

    # load symbol
    shutil.copy2(os.path.join(curr_path, 'symbols', config.symbol + '.py'),
                 final_output_path)
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_train_symbol(config)
    feat_sym = sym.get_internals()['rpn_cls_score_output']
    feat_conv_3x3_relu = sym.get_internals()['feat_conv_3x3_relu_output']

    # setup multi-gpu
    batch_size = len(ctx)
    input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size

    # print config
    pprint.pprint(config)
    logger.info('training config:{}\n'.format(pprint.pformat(config)))

    # load dataset and prepare imdb for training
    image_sets = [iset for iset in config.dataset.image_set.split('+')]
    roidbs = [
        load_gt_roidb(config.dataset.dataset,
                      image_set,
                      config.dataset.root_path,
                      config.dataset.dataset_path,
                      flip=config.TRAIN.FLIP) for image_set in image_sets
    ]
    roidb = merge_roidb(roidbs)
    roidb = filter_roidb(roidb, config)
    # load training data
    train_data = AnchorLoader(feat_sym,
                              feat_conv_3x3_relu,
                              roidb,
                              config,
                              batch_size=input_batch_size,
                              shuffle=config.TRAIN.SHUFFLE,
                              ctx=ctx,
                              feat_stride=config.network.RPN_FEAT_STRIDE,
                              anchor_scales=config.network.ANCHOR_SCALES,
                              anchor_ratios=config.network.ANCHOR_RATIOS,
                              aspect_grouping=config.TRAIN.ASPECT_GROUPING,
                              normalize_target=config.network.NORMALIZE_RPN,
                              bbox_mean=config.network.ANCHOR_MEANS,
                              bbox_std=config.network.ANCHOR_STDS)

    # infer max shape
    #max_data_shape = [('data', (config.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES]))),
    #                  ('data_ref', (config.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES]))),
    #                  ('eq_flag', (1,))]
    data_shape1 = {
        'data_ref':
        (config.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in config.SCALES]),
         max([v[1] for v in config.SCALES])),
    }
    _, feat_shape111, _ = feat_conv_3x3_relu.infer_shape(**data_shape1)

    max_data_shape = [('data_ref', (config.TRAIN.BATCH_IMAGES, 3,
                                    max([v[0] for v in config.SCALES]),
                                    max([v[1] for v in config.SCALES]))),
                      ('eq_flag', (1, )),
                      ('motion_vector', (config.TRAIN.BATCH_IMAGES, 2,
                                         int(feat_shape111[0][2]),
                                         int(feat_shape111[0][3])))]

    max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
    max_data_shape.append(('gt_boxes', (config.TRAIN.BATCH_IMAGES, 100, 5)))
    print 'providing maximum shape', max_data_shape, max_label_shape

    data_shape_dict = dict(train_data.provide_data_single +
                           train_data.provide_label_single)
    pprint.pprint(data_shape_dict)
    sym_instance.infer_shape(data_shape_dict)

    # load and initialize params
    if config.TRAIN.RESUME:
        print('continue training from ', begin_epoch)
        arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
    else:
        arg_params, aux_params = load_param(pretrained, epoch, convert=True)
        #arg_params_flow, aux_params_flow = load_param(pretrained_flow, epoch, convert=True)
        #arg_params.update(arg_params_flow)
        #aux_params.update(aux_params_flow)
        sym_instance.init_weight(config, arg_params, aux_params)

    # check parameter shapes
    sym_instance.check_parameter_shapes(arg_params, aux_params,
                                        data_shape_dict)

    # create solver
    fixed_param_prefix = config.network.FIXED_PARAMS
    data_names = [k[0] for k in train_data.provide_data_single]
    label_names = [k[0] for k in train_data.provide_label_single]

    mod = MutableModule(
        sym,
        data_names=data_names,
        label_names=label_names,
        logger=logger,
        context=ctx,
        max_data_shapes=[max_data_shape for _ in range(batch_size)],
        max_label_shapes=[max_label_shape for _ in range(batch_size)],
        fixed_param_prefix=fixed_param_prefix)

    if config.TRAIN.RESUME:
        mod._preload_opt_states = '%s-%04d.states' % (prefix, begin_epoch)

    # decide training params
    # metric
    rpn_eval_metric = metric.RPNAccMetric()
    rpn_cls_metric = metric.RPNLogLossMetric()
    rpn_bbox_metric = metric.RPNL1LossMetric()
    eval_metric = metric.RCNNAccMetric(config)
    cls_metric = metric.RCNNLogLossMetric(config)
    bbox_metric = metric.RCNNL1LossMetric(config)
    eval_metrics = mx.metric.CompositeEvalMetric()
    # rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric
    for child_metric in [
            rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric,
            cls_metric, bbox_metric
    ]:
        eval_metrics.add(child_metric)
    # callback
    batch_end_callback = callback.Speedometer(train_data.batch_size,
                                              frequent=args.frequent)
    means = np.tile(np.array(config.TRAIN.BBOX_MEANS),
                    2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES)
    stds = np.tile(np.array(config.TRAIN.BBOX_STDS),
                   2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES)
    epoch_end_callback = [
        mx.callback.module_checkpoint(mod,
                                      prefix,
                                      period=1,
                                      save_optimizer_states=True),
        callback.do_checkpoint(prefix, means, stds)
    ]
    # decide learning rate
    base_lr = lr
    lr_factor = config.TRAIN.lr_factor
    lr_epoch = [float(epoch) for epoch in lr_step.split(',')]
    lr_epoch_diff = [
        epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch
    ]
    lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [
        int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff
    ]
    print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters)
    lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor,
                                              config.TRAIN.warmup,
                                              config.TRAIN.warmup_lr,
                                              config.TRAIN.warmup_step)
    # optimizer
    optimizer_params = {
        'momentum': config.TRAIN.momentum,
        'wd': config.TRAIN.wd,
        'learning_rate': lr,
        'lr_scheduler': lr_scheduler,
        'rescale_grad': 1.0,
        'clip_gradient': None
    }

    if not isinstance(train_data, PrefetchingIter):
        train_data = PrefetchingIter(train_data)

    print('Start to train model')
    # train
    mod.fit(train_data,
            eval_metric=eval_metrics,
            epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback,
            kvstore=config.default.kvstore,
            optimizer='sgd',
            optimizer_params=optimizer_params,
            arg_params=arg_params,
            aux_params=aux_params,
            begin_epoch=begin_epoch,
            num_epoch=end_epoch)
def alternate_train(args, ctx, pretrained, epoch):
    # set up logger
    logger, output_path = create_logger(config.output_path, args.cfg, config.dataset.image_set)

    # basic config
    begin_epoch = 0

    # logging.info('########## TRAIN RPN WITH IMAGENET INIT')
    rpn1_prefix = os.path.join(output_path, 'rpn1')

    if not os.path.exists(rpn1_prefix):
        os.makedirs(rpn1_prefix)

    config.TRAIN.BATCH_IMAGES = config.TRAIN.ALTERNATE.RPN_BATCH_IMAGES
    train_rpn(config, config.dataset.dataset, config.dataset.image_set, config.dataset.root_path, config.dataset.dataset_path,
              args.frequent, config.default.kvstore, config.TRAIN.FLIP, config.TRAIN.SHUFFLE, config.TRAIN.RESUME,
              ctx, pretrained, epoch, rpn1_prefix, begin_epoch, config.TRAIN.ALTERNATE.rpn1_epoch, train_shared=False,
              lr=config.TRAIN.ALTERNATE.rpn1_lr, lr_step=config.TRAIN.ALTERNATE.rpn1_lr_step, logger=logger, output_path=output_path)

    logging.info('########## GENERATE RPN DETECTION')
    image_sets = [iset for iset in config.dataset.image_set.split('+')]
    image_sets.extend([iset for iset in config.dataset.test_image_set.split('+')])
    for image_set in image_sets:
        test_rpn(config, config.dataset.dataset, image_set, config.dataset.root_path, config.dataset.dataset_path,
                 ctx, rpn1_prefix, config.TRAIN.ALTERNATE.rpn1_epoch, vis=False, shuffle=False, thresh=0, logger=logger,
                 output_path=rpn1_prefix)

    logging.info('########## TRAIN rfcn WITH IMAGENET INIT AND RPN DETECTION')
    rfcn1_prefix = os.path.join(output_path, 'rfcn1')
    config.TRAIN.BATCH_IMAGES = config.TRAIN.ALTERNATE.RCNN_BATCH_IMAGES
    train_rcnn(config, config.dataset.dataset, config.dataset.image_set, config.dataset.root_path, config.dataset.dataset_path,
               args.frequent, config.default.kvstore, config.TRAIN.FLIP, config.TRAIN.SHUFFLE, config.TRAIN.RESUME,
               ctx, pretrained, epoch, rfcn1_prefix, begin_epoch, config.TRAIN.ALTERNATE.rfcn1_epoch, train_shared=False,
               lr=config.TRAIN.ALTERNATE.rfcn1_lr, lr_step=config.TRAIN.ALTERNATE.rfcn1_lr_step, proposal='rpn', logger=logger,
               output_path=rpn1_prefix)

    logging.info('########## TRAIN RPN WITH rfcn INIT')
    rpn2_prefix = os.path.join(output_path, 'rpn2')

    if not os.path.exists(rpn2_prefix):
        os.makedirs(rpn2_prefix)

    config.TRAIN.BATCH_IMAGES = config.TRAIN.ALTERNATE.RPN_BATCH_IMAGES
    train_rpn(config, config.dataset.dataset, config.dataset.image_set, config.dataset.root_path, config.dataset.dataset_path,
              args.frequent, config.default.kvstore, config.TRAIN.FLIP, config.TRAIN.SHUFFLE, config.TRAIN.RESUME,
              ctx, rfcn1_prefix, config.TRAIN.ALTERNATE.rpn2_epoch, rpn2_prefix, begin_epoch, config.TRAIN.ALTERNATE.rpn2_epoch,
              train_shared=True, lr=config.TRAIN.ALTERNATE.rpn2_lr, lr_step=config.TRAIN.ALTERNATE.rpn2_lr_step, logger=logger,
              output_path=output_path)

    logging.info('########## GENERATE RPN DETECTION')
    image_sets = [iset for iset in config.dataset.image_set.split('+')]
    for image_set in image_sets:
        test_rpn(config, config.dataset.dataset, image_set, config.dataset.root_path, config.dataset.dataset_path,
                 ctx, rpn2_prefix, config.TRAIN.ALTERNATE.rpn2_epoch, vis=False, shuffle=False, thresh=0, logger=logger,
                 output_path=rpn2_prefix)

    logger.info('########## COMBINE RPN2 WITH rfcn1')
    rfcn2_prefix = os.path.join(output_path, 'rfcn2')
    combine_model(rpn2_prefix, config.TRAIN.ALTERNATE.rpn2_epoch, rfcn1_prefix, config.TRAIN.ALTERNATE.rfcn1_epoch, rfcn2_prefix, 0)

    logger.info('########## TRAIN rfcn WITH RPN INIT AND DETECTION')
    config.TRAIN.BATCH_IMAGES = config.TRAIN.ALTERNATE.RCNN_BATCH_IMAGES
    train_rcnn(config, config.dataset.dataset, config.dataset.image_set, config.dataset.root_path, config.dataset.dataset_path,
               args.frequent, config.default.kvstore, config.TRAIN.FLIP, config.TRAIN.SHUFFLE, config.TRAIN.RESUME,
               ctx, rfcn2_prefix, 0, rfcn2_prefix, begin_epoch, config.TRAIN.ALTERNATE.rfcn2_epoch, train_shared=True,
               lr=config.TRAIN.ALTERNATE.rfcn2_lr, lr_step=config.TRAIN.ALTERNATE.rfcn2_lr_step, proposal='rpn', logger=logger,
               output_path=rpn2_prefix)

    logger.info('########## COMBINE RPN2 WITH rfcn2')
    final_prefix = os.path.join(output_path, 'final')
    combine_model(rpn2_prefix, config.TRAIN.ALTERNATE.rpn2_epoch, rfcn2_prefix, config.TRAIN.ALTERNATE.rfcn2_epoch, final_prefix, 0)