def __init__(self, symbol, prefix, epoch, provide_data, provide_label=[], ctx=mx.cpu(), arg_params=None, aux_params=None, has_json_symbol=False): if has_json_symbol: symbol, arg_params, aux_params = mx.model.load_checkpoint( prefix, epoch) elif arg_params is None: arg_params, aux_params = load_param(prefix, epoch, convert=True, ctx=ctx, process=True) self.symbol = symbol data_names = [k[0] for k in provide_data] label_names = [k[0] for k in provide_label] self._mod = mx.module.Module(symbol, data_names=data_names, label_names=label_names, context=ctx) self._mod.bind(provide_data, for_training=False) self._mod.set_params(arg_params, aux_params)
def test_rcnn(imageset, year, root_path, devkit_path, prefix, epoch, ctx, vis=False, has_rpn=True, proposal='rpn', end2end=False): # load symbol and testing data if has_rpn: # sym = get_vgg_test() config.TRAIN.AGNOSTIC = True config.END2END = 1 config.PIXEL_MEANS = np.array([[[0,0,0]]]) sym = resnext_101(num_class=21) config.TEST.HAS_RPN = True config.TEST.RPN_PRE_NMS_TOP_N = 6000 config.TEST.RPN_POST_NMS_TOP_N = 300 voc, roidb = load_gt_roidb(imageset, year, root_path, devkit_path) else: sym = get_vgg_rcnn_test() voc, roidb = eval('load_test_' + proposal + '_roidb')(imageset, year, root_path, devkit_path) # get test data iter test_data = ROIIter(roidb, batch_size=1, shuffle=False, mode='test') # load model args, auxs, _ = load_param(prefix, epoch, convert=True, ctx=ctx) # detect detector = Detector(sym, ctx, args, auxs) pred_eval(detector, test_data, voc, vis=vis)
def __init__(self): pprint.pprint(config) config.symbol = 'resnet_v1_101_rfcn_dcn' if not args.rfcn_only else 'resnet_v1_101_rfcn' sym_instance = eval(config.symbol + '.' + config.symbol)() self.sym = sym_instance.get_symbol(config, is_train=False) self.arg_params, self.aux_params = load_param(cur_path + '/../model/' + ( 'rfcn_dcn_coco' if not args.rfcn_only else 'rfcn_coco'), 0, process=True) # set up class names self.num_classes = 81 self.classes = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] #self.need_classes = ['person', 'car', 'motorbike','bus','truck'] self.need_classes = ['car', 'motorbike', 'bus', 'truck'] self.colors = {'car': (rand(), rand(), rand()), 'motorbike': (rand(), rand(), rand()), 'bus': (rand(), rand(), rand()), 'truck': (rand(), rand(), rand())} #self.warm_up() pass
def get_net(prefix, epoch, ctx): config.TRAIN.AGNOSTIC = True args, auxs, num_class = load_param(prefix, epoch, convert=True, ctx=ctx) sym = resnext_101(num_class=num_class) #sym = resnet_50(num_class=num_class) detector = Detector(sym, ctx, args, auxs) return detector
def loadModel(self): # get symbol ctx_id = [int(i) for i in config.gpus.split(',')] # pprint.pprint(config) sym_instance = eval(config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # get predictor data_names = ['data', 'im_info'] label_names = [] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[max_data_shape[0][0], ('im_info', (1L, 3L))]] provide_label = [None] arg_params, aux_params = load_param(cur_path + config.PREMODEL, 0, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(ctx_id[0])], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) return ctx_id, data_names, predictor
def test_rcnn(cfg, dataset, image_set, root_path, dataset_path, ctx, prefix, epoch, vis, ignore_cache, shuffle, has_rpn, proposal, thresh, logger = None, output_path = None): pprint.pprint(cfg) logger.info('testing cfg:{}\n'.format(pprint.pformat(cfg))) if has_rpn: sym_instance = eval(cfg.symbol+'.'+cfg.symbol)() sym = sym_instance.get_symbol(cfg, is_train = False) imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=output_path) roidb = imdb.get_roidb() else: assert False,'do not support' test_data = TestLoader(roidb,cfg,batch_size = len(ctx),shuffle = shuffle, has_rpn = has_rpn) arg_params, aux_params = load_param(prefix, epoch, process = True) data_shape_dict = dict(test_data.provide_data) sym_instance.infer_shape(data_shape_dict) data_names = [k[0] for k in test_data.provide_data] label_names = None max_data_shape = [('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([int(v[1]//16*16) for v in cfg.SCALES])))] predictor = Predictor(sym, data_names, label_names, context = ctx, max_data_shapes = max_data_shape, provide_data = test_data.provide_data,provide_label = test_data.provide_label, arg_params = arg_params, aux_params = aux_params) pred_eval(predictor, test_data, imdb, cfg, vis = vis, ignore_cache = ignore_cache, thresh = thresh, logger = logger)
def test_rcnn(imageset, year, root_path, devkit_path, prefix, epoch, ctx, vis=False, has_rpn=True, proposal='rpn'): # load symbol and testing data if has_rpn: sym = get_vgg_test() config.TEST.HAS_RPN = True config.TEST.RPN_PRE_NMS_TOP_N = 6000 config.TEST.RPN_POST_NMS_TOP_N = 300 voc, roidb = load_gt_roidb(imageset, year, root_path, devkit_path) else: sym = get_vgg_rcnn_test() voc, roidb = eval('load_test_' + proposal + '_roidb')(imageset, year, root_path, devkit_path) # get test data iter test_data = ROIIter(roidb, batch_size=1, shuffle=False, mode='test') # load model args, auxs, _ = load_param(prefix, epoch, convert=True, ctx=ctx) # detect detector = Detector(sym, ctx, args, auxs) pred_eval(detector, test_data, voc, vis=vis)
def __init__(self): sym_instance = eval(config.symbol + '.' + config.symbol)() self.symbol = sym_instance.get_symbol(config, is_train=False) self.classes = ['box', 'robot'] logging.debug("Classes: {}".format(self.classes)) self.scales = config.SCALES[0] logging.debug("Scales: {}".format(self.scales)) self.data_shape_conf = [[('data', (1, 3, self.scales[0], self.scales[1])), ('im_info', (1, 3))]] self.arg_params, self.aux_params = load_param(os.path.join( cur_path, '..', 'models', "rfcn_voc"), 0, process=True) self.data_names = ['data', 'im_info'] self.predictor = Predictor(self.symbol, ['data', 'im_info'], [], context=[mx.gpu(0)], max_data_shapes=self.data_shape_conf, provide_data=self.data_shape_conf, provide_label=[None], arg_params=self.arg_params, aux_params=self.aux_params) self.nms = gpu_nms_wrapper(config.TEST.NMS, 0) logging.info("Deformable detector initialized")
def train_net(image_set, year, root_path, devkit_path, pretrained, epoch, prefix, ctx, begin_epoch, end_epoch, frequent, kv_store, work_load_list=None, resume=False): # set up logger logger = logging.getLogger() logger.setLevel(logging.INFO) # load symbol sym = get_vgg_rcnn() # setup multi-gpu config.TRAIN.BATCH_IMAGES *= len(ctx) config.TRAIN.BATCH_SIZE *= len(ctx) # load training data voc, roidb, means, stds = load_ss_roidb(image_set, year, root_path, devkit_path, flip=True) train_data = ROIIter(roidb, batch_size=config.TRAIN.BATCH_IMAGES, shuffle=True, mode='train', ctx=ctx, work_load_list=work_load_list) # infer max shape max_data_shape = [('data', (1, 3, 1000, 1000))] # load pretrained args, auxs = load_param(pretrained, epoch, convert=True) # initialize params if not resume: arg_shape, _, _ = sym.infer_shape(data=(1, 3, 224, 224), rois=(1, 5)) arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) args['cls_score_weight'] = mx.random.normal(mean=0, stdvar=0.01, shape=arg_shape_dict['cls_score_weight']) args['cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['cls_score_bias']) args['bbox_pred_weight'] = mx.random.normal(mean=0, stdvar=0.001, shape=arg_shape_dict['bbox_pred_weight']) args['bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['bbox_pred_bias']) # train solver = Solver(prefix, sym, ctx, begin_epoch, end_epoch, kv_store, args, auxs, momentum=0.9, wd=0.0005, learning_rate=1e-3, lr_scheduler=mx.lr_scheduler.FactorScheduler(30000, 0.1), mutable_data_shape=True, max_data_shape=max_data_shape) solver.fit(train_data, frequent=frequent) # edit params and save for epoch in range(begin_epoch + 1, end_epoch + 1): arg_params, aux_params = load_param(pretrained, epoch, convert=True) arg_params['bbox_pred_weight'] = (arg_params['bbox_pred_weight'].T * mx.nd.array(stds)).T arg_params['bbox_pred_bias'] = arg_params['bbox_pred_bias'] * mx.nd.array(stds) + \ mx.nd.array(means) save_checkpoint(prefix, epoch, arg_params, aux_params)
def init_detect_model(): # get symbol pprint.pprint(config) config.symbol = 'resnet_v1_101_rfcn_dcn' sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) arg_params, aux_params = load_param(os.path.join( RFCN_DCN_CONFIG['modelParam']['modelBasePath'], 'rfcn_voc'), RFCN_DCN_CONFIG['modelParam']['epoch'], process=True) return [sym, arg_params, aux_params]
def test_rcnn(cfg, dataset, image_set, root_path, dataset_path, ctx, prefix, epoch, vis, ignore_cache, shuffle, has_rpn, proposal, thresh, logger=None, output_path=None): if not logger: assert False, 'require a logger' # print cfg pprint.pprint(cfg) logger.info('testing cfg:{}\n'.format(pprint.pformat(cfg))) # load symbol and testing data if has_rpn: sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() sym = sym_instance.get_symbol(cfg, is_train=False) imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=output_path) roidb = imdb.gt_roidb() else: sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() sym = sym_instance.get_symbol_rcnn(cfg, is_train=False) imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=output_path) gt_roidb = imdb.gt_roidb() roidb = eval('imdb.' + proposal + '_roidb')(gt_roidb) # get test data iter test_data = TestLoader(roidb, cfg, batch_size=len(ctx), shuffle=shuffle, has_rpn=has_rpn) # load model arg_params, aux_params = load_param(prefix, epoch, process=True) # infer shape data_shape_dict = dict(test_data.provide_data_single) sym_instance.infer_shape(data_shape_dict) sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False) # decide maximum shape data_names = [k[0] for k in test_data.provide_data_single] label_names = None max_data_shape = [[('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES])))]] if not has_rpn: max_data_shape.append(('rois', (cfg.TEST.PROPOSAL_POST_NMS_TOP_N + 30, 5))) # create predictor predictor = Predictor(sym, data_names, label_names, context=ctx, max_data_shapes=max_data_shape, provide_data=test_data.provide_data, provide_label=test_data.provide_label, arg_params=arg_params, aux_params=aux_params) # start detection pred_eval(predictor, test_data, imdb, cfg, vis=vis, ignore_cache=ignore_cache, thresh=thresh, logger=logger)
def init_detect_model(): # get symbol pprint.pprint(config) config.symbol = 'resnet_v1_101_rfcn_dcn' sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) arg_params, aux_params = load_param(cur_path + '/demo/models/' + ('rfcn_voc'), int(args.epoch), process=True) return [sym, arg_params, aux_params]
def main(): # get symbol pprint.pprint(config) sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol_rfcn(config, is_train=False) # load demo data image_names = ['000057.jpg', '000149.jpg', '000351.jpg', '002535.jpg'] image_all = [] # ground truth boxes gt_boxes_all = [np.array([[132, 52, 384, 357]]), np.array([[113, 1, 350, 360]]), np.array([[0, 27, 329, 155]]), np.array([[8, 40, 499, 289]])] gt_classes_all = [np.array([3]), np.array([16]), np.array([7]), np.array([12])] data = [] for idx, im_name in enumerate(image_names): assert os.path.exists(cur_path + '/../demo/deform_psroi/' + im_name), \ ('%s does not exist'.format('../demo/deform_psroi/' + im_name)) im = cv2.imread(cur_path + '/../demo/deform_psroi/' + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) image_all.append(im) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) gt_boxes = gt_boxes_all[idx] gt_boxes = np.round(gt_boxes * im_scale) data.append({'data': im_tensor, 'rois': np.hstack((np.zeros((gt_boxes.shape[0], 1)), gt_boxes))}) # get predictor data_names = ['data', 'rois'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param(cur_path + '/../model/deform_psroi', 0, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) # test for idx, _ in enumerate(image_names): data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) output = predictor.predict(data_batch) cls_offset = output[0]['rfcn_cls_offset_output'].asnumpy() im = image_all[idx] im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) boxes = gt_boxes_all[idx] show_dpsroi_offset(im, boxes, cls_offset, gt_classes_all[idx])
def test_deeplab(network, dataset, image_set, root_path, dataset_path, ctx, prefix, epoch, vis, logger=None, output_path=None): if not logger: assert False, 'require a logger' # print config pprint.pprint(config) logger.info('testing config:{}\n'.format(pprint.pformat(config))) # load symbol and testing data sym = eval('get_' + network + '_test')(num_classes=config.dataset.NUM_CLASSES) imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=output_path) segdb = imdb.gt_segdb() # get test data iter test_data = TestDataLoader(segdb, batch_size=len(ctx)) # load model # arg_params, aux_params = load_param(prefix, epoch, convert=True, ctx=ctx, process=True) arg_params, aux_params = load_param(prefix, epoch, process=True) # infer shape data_shape_dict = dict(test_data.provide_data_single) arg_shape, _, aux_shape = sym.infer_shape(**data_shape_dict) arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape)) # check parameters for k in sym.list_arguments(): if k in data_shape_dict or k in ['softmax_label']: continue assert k in arg_params, k + ' not initialized' assert arg_params[k].shape == arg_shape_dict[k], \ 'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape) for k in sym.list_auxiliary_states(): assert k in aux_params, k + ' not initialized' assert aux_params[k].shape == aux_shape_dict[k], \ 'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape) # decide maximum shape data_names = [k[0] for k in test_data.provide_data_single] label_names = ['softmax_label'] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] # create predictor predictor = Predictor(sym, data_names, label_names, context=ctx, max_data_shapes=max_data_shape, provide_data=test_data.provide_data, provide_label=test_data.provide_label, arg_params=arg_params, aux_params=aux_params) # start detection pred_eval(predictor, test_data, imdb, vis=vis, logger=logger)
def test_rcnn(cfg, dataset, image_set, root_path, dataset_path, motion_iou_path, ctx, prefix, epoch, vis, ignore_cache, shuffle, has_rpn, proposal, thresh, logger=None, output_path=None, enable_detailed_eval=True): if not logger: assert False, 'require a logger' # print cfg pprint.pprint(cfg) logger.info('testing cfg:{}\n'.format(pprint.pformat(cfg))) # load symbol and testing data feat_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() aggr_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() feat_sym = feat_sym_instance.get_feat_symbol(cfg) aggr_sym = aggr_sym_instance.get_aggregation_symbol(cfg) imdb = eval(dataset)(image_set, root_path, dataset_path, motion_iou_path, result_path=output_path, enable_detailed_eval=enable_detailed_eval) roidb = imdb.gt_roidb() # get test data iter # split roidbs gpu_num = len(ctx) roidbs = [[] for x in range(gpu_num)] roidbs_seg_lens = np.zeros(gpu_num, dtype=np.int) for x in roidb: gpu_id = np.argmin(roidbs_seg_lens) roidbs[gpu_id].append(x) if 'frame_seg_len' in x: roidbs_seg_lens[gpu_id] += x['frame_seg_len'] elif 'video_len' in x: roidbs_seg_lens[gpu_id] += x['video_len'] # get test data iter test_datas = [TestLoader(x, cfg, batch_size=1, shuffle=shuffle, video_shuffle=cfg.TEST.video_shuffle, has_rpn=has_rpn) for x in roidbs] # load model print('load param from', prefix, epoch) arg_params, aux_params = load_param(prefix, epoch, process=True) # create predictor feat_predictors = [get_predictor(feat_sym, feat_sym_instance, cfg, arg_params, aux_params, test_datas[i], [ctx[i]]) for i in range(gpu_num)] aggr_predictors = [get_predictor(aggr_sym, aggr_sym_instance, cfg, arg_params, aux_params, test_datas[i], [ctx[i]]) for i in range(gpu_num)] # start detection pred_eval_multiprocess(gpu_num, feat_predictors, aggr_predictors, test_datas, imdb, cfg, vis=vis, ignore_cache=ignore_cache, thresh=thresh, logger=logger)
def main(): logging.info('########## TRAIN FASTER-RCNN WITH APPROXIMATE JOINT END2END #############') init_config() if "resnet" in args.pretrained: sym = resnet_50(num_class=args.num_classes, bn_mom=args.bn_mom, bn_global=True, is_train=True) # consider background else: sym = get_faster_rcnn(num_classes=args.num_classes) # consider background feat_sym = sym.get_internals()['rpn_cls_score_output'] # setup for multi-gpu ctx = [mx.gpu(int(i)) for i in args.gpu_ids.split(',')] config.TRAIN.IMS_PER_BATCH *= len(ctx) max_data_shape, max_label_shape = get_max_shape(feat_sym) # data # voc, roidb = load_gt_roidb_from_list(args.dataset_name, args.lst, args.dataset_root, # args.outdata_path, flip=not args.no_flip) voc, roidb = load_gt_roidb(args.image_set, args.year, args.root_path, args.devkit_path, flip=not args.no_flip) train_data = AnchorLoader(feat_sym, roidb, batch_size=config.TRAIN.IMS_PER_BATCH, anchor_scales=(4, 8, 16, 32), shuffle=not args.no_shuffle, mode='train', ctx=ctx, need_mean=args.need_mean) # model args_params, auxs_params, _ = load_param(args.pretrained, args.load_epoch, convert=True) if not args.resume: args_params, auxs_params= init_model(args_params, auxs_params, train_data, sym, args.pretrained) data_names = [k[0] for k in train_data.provide_data] label_names = [k[0] for k in train_data.provide_label] batch_end_callback = Speedometer(train_data.batch_size, frequent=args.frequent) epoch_end_callback = do_checkpoint(args.prefix) optimizer_params = {'momentum': args.mom, 'wd': args.wd, 'learning_rate': args.lr, # 'lr_scheduler': WarmupScheduler(args.factor_step, 0.1, warmup_lr=0.1*args.lr, warmup_step=200) \ # if not args.resume else mx.lr_scheduler.FactorScheduler(args.factor_step, 0.1), 'lr_scheduler': mx.lr_scheduler.FactorScheduler(args.factor_step, 0.1), # seems no need warm up 'clip_gradient': 1.0, 'rescale_grad': 1.0} if "resnet" in args.pretrained: # only consider resnet-50 here fixed_param_prefix = ['conv0', 'stage1', 'stage2', 'bn_data', 'bn0'] else: fixed_param_prefix = ['conv1', 'conv2', 'conv3'] # train mod = MutableModule(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, max_data_shapes=max_data_shape, max_label_shapes=max_label_shape, fixed_param_prefix=fixed_param_prefix) mod.fit(train_data, eval_metric=metric(), epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=args.kv_store, optimizer='sgd', optimizer_params=optimizer_params, arg_params=args_params, aux_params=auxs_params, begin_epoch=args.load_epoch, num_epoch=args.num_epoch)
def test_rpn(cfg, dataset, image_set, root_path, dataset_path, ctx, prefix, epoch, vis, shuffle, thresh, logger=None, output_path=None): # set up logger if not logger: logging.basicConfig() logger = logging.getLogger() logger.setLevel(logging.INFO) # rpn generate proposal cfg cfg.TEST.HAS_RPN = True # print cfg pprint.pprint(cfg) logger.info('testing rpn cfg:{}\n'.format(pprint.pformat(cfg))) # load symbol sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() sym = sym_instance.get_symbol_rpn(cfg, is_train=False) # load dataset and prepare imdb for training imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=output_path) roidb = imdb.gt_roidb() test_data = TestLoader(roidb, cfg, batch_size=len(ctx), shuffle=shuffle, has_rpn=True) # load model arg_params, aux_params = load_param(prefix, epoch) # infer shape data_shape_dict = dict(test_data.provide_data_single) sym_instance.infer_shape(data_shape_dict) # check parameters sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False) # decide maximum shape data_names = [k[0] for k in test_data.provide_data[0]] label_names = None if test_data.provide_label[0] is None else [k[0] for k in test_data.provide_label[0]] max_data_shape = [[('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES])))]] # create predictor predictor = Predictor(sym, data_names, label_names, context=ctx, max_data_shapes=max_data_shape, provide_data=test_data.provide_data, provide_label=test_data.provide_label, arg_params=arg_params, aux_params=aux_params) # start testing imdb_boxes = generate_proposals(predictor, test_data, imdb, cfg, vis=vis, thresh=thresh) all_log_info = imdb.evaluate_recall(roidb, candidate_boxes=imdb_boxes) logger.info(all_log_info)
def train_net(image_set, year, root_path, devkit_path, pretrained, epoch, prefix, ctx, begin_epoch, end_epoch, frequent, kv_store, work_load_list=None, resume=False): # set up logger logger = logging.getLogger() logger.setLevel(logging.INFO) # load symbol sym = get_vgg_rpn() feat_sym = get_vgg_rpn().get_internals()['rpn_cls_score_output'] # setup multi-gpu config.TRAIN.BATCH_IMAGES *= len(ctx) config.TRAIN.BATCH_SIZE *= len(ctx) # load training data voc, roidb = load_gt_roidb(image_set, year, root_path, devkit_path, flip=True) train_data = AnchorLoader(feat_sym, roidb, batch_size=config.TRAIN.BATCH_SIZE, shuffle=True, mode='train', ctx=ctx, work_load_list=work_load_list) # infer max shape max_data_shape = [('data', (1, 3, 1000, 1000))] max_data_shape_dict = {k: v for k, v in max_data_shape} _, feat_shape, _ = feat_sym.infer_shape(**max_data_shape_dict) from rcnn.minibatch import assign_anchor import numpy as np label = assign_anchor(feat_shape[0], np.zeros((0, 5)), [[1000, 1000, 1.0]]) max_label_shape = [('label', label['label'].shape), ('bbox_target', label['bbox_target'].shape), ('bbox_inside_weight', label['bbox_inside_weight'].shape), ('bbox_outside_weight', label['bbox_outside_weight'].shape)] print 'providing maximum shape', max_data_shape, max_label_shape # load pretrained args, auxs = load_param(pretrained, epoch, convert=True) # initialize params if not resume: arg_shape, _, _ = sym.infer_shape(data=(1, 3, 224, 224)) arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) args['rpn_conv_3x3_weight'] = mx.random.normal(mean=0, stdvar=0.01, shape=arg_shape_dict['rpn_conv_3x3_weight']) args['rpn_conv_3x3_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_conv_3x3_bias']) args['rpn_cls_score_weight'] = mx.random.normal(mean=0, stdvar=0.01, shape=arg_shape_dict['rpn_cls_score_weight']) args['rpn_cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_cls_score_bias']) args['rpn_bbox_pred_weight'] = mx.random.normal(mean=0, stdvar=0.01, shape=arg_shape_dict['rpn_bbox_pred_weight']) args['rpn_bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_bbox_pred_bias']) # train solver = Solver(prefix, sym, ctx, begin_epoch, end_epoch, kv_store, args, auxs, momentum=0.9, wd=0.0005, learning_rate=1e-3, lr_scheduler=mx.lr_scheduler.FactorScheduler(60000, 0.1), mutable_data_shape=True, max_data_shape=max_data_shape, max_label_shape=max_label_shape) solver.fit(train_data, frequent=frequent)
def test_deeplab(): epoch = config.TEST.test_epoch ctx = [mx.gpu(int(i)) for i in config.gpus.split(',')] image_set = config.dataset.test_image_set root_path = config.dataset.root_path dataset = config.dataset.dataset dataset_path = config.dataset.dataset_path logger, final_output_path, experiments_path, _ = create_env(config.output_path, args.cfg, image_set) prefix = os.path.join(final_output_path, '..', '_'.join([iset for iset in config.dataset.image_set.split('+')]), config.TRAIN.model_prefix) # print config logger.info('testing config:{}\n'.format(pprint.pformat(config))) # load symbol and testing data sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=experiments_path) segdb = imdb.gt_segdb() #get test data iter batch_size = (config.TEST.BATCH_IMAGES) * len(ctx) mctx = ctx test_data = TestDataLoader(segdb, config=config, batch_size=batch_size,shuffle=False,ctx=mctx,has_label=imdb.has_label) # infer shape data_shape_dict = dict(test_data.provide_data_single) sym_instance.infer_shape(data_shape_dict) arg_params, aux_params = load_param(prefix, epoch, process=True) sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False) # decide maximum shape data_names = [k[0] for k in test_data.provide_data_single] label_names = ['label'] max_data_shape = [[('data', (config.TEST.BATCH_IMAGES, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] # create predictor predictor = Predictor(sym, data_names, label_names, context=ctx, max_data_shapes=max_data_shape, provide_data=test_data.provide_data, provide_label=test_data.provide_label, arg_params=arg_params, aux_params=aux_params) # start detection args.ignore_cache = True pred_eval(predictor, test_data, imdb, vis=args.vis, ignore_cache=args.ignore_cache, logger=logger)
def test_rcnn(cfg, dataset, image_set, root_path, dataset_path, ctx, prefix, epoch, vis, ignore_cache, shuffle, has_rpn, proposal, thresh, logger=None, output_path=None): if not logger: assert False, 'require a logger' # print cfg pprint.pprint(cfg) logger.info('testing cfg:{}\n'.format(pprint.pformat(cfg))) # load symbol and testing data if has_rpn: sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() sym = sym_instance.get_symbol(cfg, is_train=False) imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=output_path) roidb = imdb.gt_roidb() else: sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() sym = sym_instance.get_symbol_rfcn(cfg, is_train=False) imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=output_path) gt_roidb = imdb.gt_roidb() roidb = eval('imdb.' + proposal + '_roidb')(gt_roidb) # get test data iter test_data = TestLoader(roidb, cfg, batch_size=len(ctx), shuffle=shuffle, has_rpn=has_rpn) # load model arg_params, aux_params = load_param(prefix, epoch, process=True) # infer shape data_shape_dict = dict(test_data.provide_data_single) sym_instance.infer_shape(data_shape_dict) sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False) # decide maximum shape data_names = [k[0] for k in test_data.provide_data_single] label_names = None max_data_shape = [[('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES])))]] if not has_rpn: max_data_shape.append(('rois', (cfg.TEST.PROPOSAL_POST_NMS_TOP_N + 30, 5))) # create predictor predictor = Predictor(sym, data_names, label_names, context=ctx, max_data_shapes=max_data_shape, provide_data=test_data.provide_data, provide_label=test_data.provide_label, arg_params=arg_params, aux_params=aux_params) # start detection pred_eval(predictor, test_data, imdb, cfg, vis=vis, ignore_cache=ignore_cache, thresh=thresh, logger=logger)
def main(): logging.info('########## TRAIN FASTER-RCNN WITH APPROXIMATE JOINT END2END #############') init_config() if "resnet" in args.pretrained: sym = resnet_50(num_class=args.num_classes, bn_mom=args.bn_mom, bn_global=True, is_train=True) # consider background else: sym = get_faster_rcnn(num_classes=args.num_classes) # consider background feat_sym = sym.get_internals()['rpn_cls_score_output'] # setup for multi-gpu ctx = [mx.gpu(int(i)) for i in args.gpu_ids.split(',')] config.TRAIN.IMS_PER_BATCH *= len(ctx) max_data_shape, max_label_shape = get_max_shape(feat_sym) # data voc, roidb = load_gt_roidb_from_list(args.dataset_name, args.lst, args.dataset_root, args.outdata_path, flip=not args.no_flip) train_data = AnchorLoader(feat_sym, roidb, batch_size=config.TRAIN.IMS_PER_BATCH, anchor_scales=(4, 8, 16, 32), shuffle=not args.no_shuffle, mode='train', ctx=ctx, need_mean=args.need_mean) # model args_params, auxs_params, _ = load_param(args.pretrained, args.load_epoch, convert=True) if not args.resume: args_params, auxs_params= init_model(args_params, auxs_params, train_data, sym, args.pretrained) data_names = [k[0] for k in train_data.provide_data] label_names = [k[0] for k in train_data.provide_label] batch_end_callback = Speedometer(train_data.batch_size, frequent=args.frequent) epoch_end_callback = do_checkpoint(args.prefix) optimizer_params = {'momentum': args.mom, 'wd': args.wd, 'learning_rate': args.lr, # 'lr_scheduler': WarmupScheduler(args.factor_step, 0.1, warmup_lr=0.1*args.lr, warmup_step=200) \ # if not args.resume else mx.lr_scheduler.FactorScheduler(args.factor_step, 0.1), 'lr_scheduler': mx.lr_scheduler.FactorScheduler(args.factor_step, 0.1), # seems no need warm up 'clip_gradient': 1.0, 'rescale_grad': 1.0} if "resnet" in args.pretrained: # only consider resnet-50 here fixed_param_prefix = ['conv0', 'stage1', 'stage2', 'bn_data', 'bn0'] else: fixed_param_prefix = ['conv1', 'conv2', 'conv3'] # train mod = MutableModule(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, max_data_shapes=max_data_shape, max_label_shapes=max_label_shape, fixed_param_prefix=fixed_param_prefix) mod.fit(train_data, eval_metric=metric(), epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=args.kv_store, optimizer='sgd', optimizer_params=optimizer_params, arg_params=args_params, aux_params=auxs_params, begin_epoch=args.load_epoch, num_epoch=args.num_epoch)
def main(): # get symbol pprint.pprint(config) sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # load demo data image_names = ['000240.jpg', '000437.jpg', '004072.jpg', '007912.jpg'] image_all = [] data = [] for im_name in image_names: assert os.path.exists(cur_path + '/../demo/deform_conv/' + im_name), \ ('%s does not exist'.format('../demo/deform_conv/' + im_name)) im = cv2.imread(cur_path + '/../demo/deform_conv/' + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) image_all.append(im) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param(cur_path + '/../model/deform_conv', 0, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) # test for idx, _ in enumerate(image_names): data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) output = predictor.predict(data_batch) res5a_offset = output[0]['res5a_branch2b_offset_output'].asnumpy() res5b_offset = output[0]['res5b_branch2b_offset_output'].asnumpy() res5c_offset = output[0]['res5c_branch2b_offset_output'].asnumpy() im = image_all[idx] im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) show_dconv_offset(im, [res5c_offset, res5b_offset, res5a_offset])
def test_rcnn(cfg, dataset, image_set, root_path, dataset_path, ctx, prefix, epoch, vis, ignore_cache, shuffle, has_rpn, proposal, thresh, logger=None, output_path=None): if not logger: assert False, 'require a logger' # print cfg pprint.pprint(cfg) logger.info('testing cfg:{}\n'.format(pprint.pformat(cfg))) # load symbol and testing data key_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() cur_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() key_sym = key_sym_instance.get_key_test_symbol(cfg) cur_sym = cur_sym_instance.get_cur_test_symbol(cfg) imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=output_path) roidb = imdb.gt_roidb() feat_conv_3x3_relu = key_sym.get_internals()['feat_conv_3x3_relu_output'] # get test data iter # split roidbs gpu_num = len(ctx) roidbs = [[] for x in range(gpu_num)] roidbs_seg_lens = np.zeros(gpu_num, dtype=np.int) for x in roidb: gpu_id = np.argmin(roidbs_seg_lens) roidbs[gpu_id].append(x) roidbs_seg_lens[gpu_id] += x['frame_seg_len'] # get test data iter test_datas = [TestLoader(x, feat_conv_3x3_relu, cfg, batch_size=1, shuffle=shuffle, has_rpn=has_rpn) for x in roidbs] # load model arg_params, aux_params = load_param(prefix, epoch, process=True) # print('arg_params: ', arg_params) # print('aux_params: ', aux_params) # create predictor key_predictors = [get_predictor(key_sym, key_sym_instance, cfg, arg_params, aux_params, test_datas[i], [ctx[i]], True) for i in range(gpu_num)] print('Got key_predictors') cur_predictors = [get_predictor(cur_sym, cur_sym_instance, cfg, arg_params, aux_params, test_datas[i], [ctx[i]], False) for i in range(gpu_num)] print('Got cur_predictors') # start detection #pred_eval(0, key_predictors[0], cur_predictors[0], test_datas[0], imdb, cfg, vis=vis, ignore_cache=ignore_cache, thresh=thresh, logger=logger) pred_eval_multiprocess(gpu_num, key_predictors, cur_predictors, test_datas, imdb, cfg, vis=vis, ignore_cache=ignore_cache, thresh=thresh, logger=logger)
def test_rpn(image_set, year, root_path, devkit_path, prefix, epoch, ctx, vis=False): # load symbol sym = get_vgg_rpn_test() # load testing data voc, roidb = load_gt_roidb(image_set, year, root_path, devkit_path) test_data = ROIIter(roidb, batch_size=1, shuffle=False, mode='test') # load model args, auxs, _ = load_param(prefix, epoch, convert=True, ctx=ctx) # start testing detector = Detector(sym, ctx, args, auxs) imdb_boxes = generate_detections(detector, test_data, voc, vis=vis) voc.evaluate_recall(roidb, candidate_boxes=imdb_boxes)
def demo_rcnn(cfg, dataset, image_set, root_path, dataset_path, ctx, prefix, epoch, vis, has_rpn, thresh, use_box_voting): if has_rpn: sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() sym = sym_instance.get_symbol(cfg, is_train=False) else: sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() sym = sym_instance.get_symbol_rcnn(cfg, is_train=False) # load model arg_params, aux_params = load_param(prefix, epoch, process=True) # infer shape SHORT_SIDE = config.SCALES[0][0] LONG_SIDE = config.SCALES[0][1] DATA_NAMES = ['data', 'im_info'] LABEL_NAMES = None DATA_SHAPES = [('data', (1, 3, LONG_SIDE, SHORT_SIDE)), ('im_info', (1, 3))] LABEL_SHAPES = None data_shape_dict = dict(DATA_SHAPES) sym_instance.infer_shape(data_shape_dict) sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False) # decide maximum shape max_data_shape = [[('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES])))]] if not has_rpn: max_data_shape.append( ('rois', (cfg.TEST.PROPOSAL_POST_NMS_TOP_N + 30, 5))) # create predictor predictor = Predictor(sym, DATA_NAMES, LABEL_NAMES, context=ctx, max_data_shapes=max_data_shape, provide_data=[DATA_SHAPES], provide_label=[LABEL_SHAPES], arg_params=arg_params, aux_params=aux_params) demo_net(predictor, dataset, image_set, root_path, dataset_path, thresh, vis, use_box_voting)
def test_fcis(config, dataset, image_set, root_path, dataset_path, ctx, prefix, epoch, vis, ignore_cache, shuffle, has_rpn, proposal, thresh, logger=None, output_path=None): if not logger: assert False, 'require a logger' # print config pprint.pprint(config) logger.info('testing config:{}\n'.format(pprint.pformat(config))) # load symbol and testing data if has_rpn: sym_instance = eval(config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=output_path, binary_thresh=config.BINARY_THRESH, mask_size=config.MASK_SIZE) sdsdb = imdb.gt_sdsdb() else: raise NotImplementedError # get test data iter test_data = TestLoader(sdsdb, config, batch_size=len(ctx), shuffle=shuffle, has_rpn=has_rpn) # load model arg_params, aux_params = load_param(prefix, epoch, process=True) # infer shape data_shape_dict = dict(test_data.provide_data_single) sym_instance.infer_shape(data_shape_dict) sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False) # decide maximum shape data_names = [k[0] for k in test_data.provide_data_single] label_names = [] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] if not has_rpn: raise NotImplementedError() # create predictor predictor = Predictor(sym, data_names, label_names, context=ctx, max_data_shapes=max_data_shape, provide_data=test_data.provide_data, provide_label=test_data.provide_label, arg_params=arg_params, aux_params=aux_params) # start detection pred_eval(predictor, test_data, imdb, config, vis=vis, ignore_cache=ignore_cache, thresh=thresh, logger=logger)
def train_net(image_set, year, root_path, devkit_path, pretrained, epoch, prefix, ctx, begin_epoch, end_epoch, frequent, kv_store, work_load_list=None, resume=False): # set up logger logger = logging.getLogger() logger.setLevel(logging.INFO) # load symbol sym = get_vgg_rcnn() # setup multi-gpu config.TRAIN.BATCH_IMAGES *= len(ctx) config.TRAIN.BATCH_SIZE *= len(ctx) # load training data voc, roidb, means, stds = load_rpn_roidb(image_set, year, root_path, devkit_path, flip=True) train_data = ROIIter(roidb, batch_size=config.TRAIN.BATCH_IMAGES, shuffle=True, mode='train', ctx=ctx, work_load_list=work_load_list) # infer max shape max_data_shape = [('data', (1, 3, 1000, 1000))] # load pretrained args, auxs = load_param(pretrained, epoch, convert=True) # initialize params if not resume: input_shapes = {k: v for k, v in train_data.provide_data + train_data.provide_label} arg_shape, _, _ = sym.infer_shape(**input_shapes) arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) args['cls_score_weight'] = mx.random.normal(mean=0, stdvar=0.01, shape=arg_shape_dict['cls_score_weight']) args['cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['cls_score_bias']) args['bbox_pred_weight'] = mx.random.normal(mean=0, stdvar=0.001, shape=arg_shape_dict['bbox_pred_weight']) args['bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['bbox_pred_bias']) # train solver = Solver(prefix, sym, ctx, begin_epoch, end_epoch, kv_store, args, auxs, momentum=0.9, wd=0.0005, learning_rate=1e-3, lr_scheduler=mx.lr_scheduler.FactorScheduler(30000, 0.1), mutable_data_shape=True, max_data_shape=max_data_shape) solver.fit(train_data, frequent=frequent) # edit params and save for epoch in range(begin_epoch + 1, end_epoch + 1): arg_params, aux_params = load_checkpoint(prefix, epoch) arg_params['bbox_pred_weight'] = (arg_params['bbox_pred_weight'].T * mx.nd.array(stds)).T arg_params['bbox_pred_bias'] = arg_params['bbox_pred_bias'] * mx.nd.array(stds) + \ mx.nd.array(means) save_checkpoint(prefix, epoch, arg_params, aux_params)
def test_deeplab(): epoch = config.TEST.test_epoch ctx = [mx.gpu(int(i)) for i in config.gpus.split(',')] image_set = config.dataset.test_image_set root_path = config.dataset.root_path dataset = config.dataset.dataset dataset_path = config.dataset.dataset_path logger, final_output_path = create_logger(config.output_path, args.cfg, image_set) prefix = os.path.join(final_output_path, '..', '_'.join([iset for iset in config.dataset.image_set.split('+')]), config.TRAIN.model_prefix) # print config pprint.pprint(config) logger.info('testing config:{}\n'.format(pprint.pformat(config))) # load symbol and testing data sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=final_output_path) segdb = imdb.gt_segdb() # get test data iter test_data = TestDataLoader(segdb, config=config, batch_size=len(ctx)) # infer shape data_shape_dict = dict(test_data.provide_data_single) sym_instance.infer_shape(data_shape_dict) # load model and check parameters arg_params, aux_params = load_param(prefix, epoch, process=True) sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False) # decide maximum shape data_names = [k[0] for k in test_data.provide_data_single] label_names = ['softmax_label'] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] # create predictor predictor = Predictor(sym, data_names, label_names, context=ctx, max_data_shapes=max_data_shape, provide_data=test_data.provide_data, provide_label=test_data.provide_label, arg_params=arg_params, aux_params=aux_params) # start detection pred_eval(predictor, test_data, imdb, vis=args.vis, ignore_cache=args.ignore_cache, logger=logger)
def test_rpn(image_set, year, root_path, devkit_path, trained, epoch, ctx): from rcnn.rpn.generate import Detector, generate_detections # load symbol sym = get_vgg_rpn_test() # load testing data voc, roidb = load_gt_roidb(image_set, year, root_path, devkit_path) test_data = ROIIter(roidb, batch_size=1, shuffle=False, mode='test') # load trained args, auxs = load_param(trained, epoch, convert=True, ctx=ctx[0]) # start testing detector = Detector(sym, ctx[0], args, auxs) imdb_boxes = generate_detections(detector, test_data, voc, vis=False) voc.evaluate_recall(roidb, candidate_boxes=imdb_boxes)
def test_rpn(image_set, year, root_path, devkit_path, trained, epoch, ctx): from rcnn.rpn.generate import Detector, generate_detections # load symbol sym = get_vgg_rpn_test() # load testing data voc, roidb = load_gt_roidb(image_set, year, root_path, devkit_path) test_data = ROIIter(roidb, batch_size=1, shuffle=False, mode="test") # load trained args, auxs = load_param(trained, epoch, convert=True, ctx=ctx[0]) # start testing detector = Detector(sym, ctx[0], args, auxs) imdb_boxes = generate_detections(detector, test_data, voc, vis=False) voc.evaluate_recall(roidb, candidate_boxes=imdb_boxes)
def training_process(num,Acc_dic): dataset = "mnist" load_data = False viz_on = False path = './trained_models/'+dataset+'/' model_path = './trained_models/'+dataset+'/model'+str(num)+'.pt' spec,img_size = load_param(path) print(spec) print("Training Start!!! :{}".format(num)) batch_size = spec['batch_size'] lr = spec['lr'][0] epochs = spec['epochs'][0] # Check for cuda use_cuda = torch.cuda.is_available() print("Use_cuda:%s"%use_cuda) # Load data data_loader,_= get_mnist_dataloaders(batch_size=batch_size) # Define latent spec and model latent_spec = spec['latent_spec'] locals()['model_'+str(i)] = VAE(img_size=img_size, latent_spec=latent_spec, use_cuda=use_cuda) if use_cuda: locals()['model_'+str(i)].cuda() # Define optimizer optimizer = optim.Adam(locals()['model_'+str(i)].parameters(), lr=lr) # Define trainer trainer = Trainer(locals()['model_'+str(i)], optimizer, cont_capacity=spec['cont_capacity'], disc_capacity=spec['disc_capacity'], spec=spec, viz_on = viz_on, use_cuda=use_cuda, num = num) # Train model for 100 epochs acc = trainer.train(data_loader, epochs) Acc_dic[num] = acc # Save trained model torch.save(trainer.model.state_dict(), model_path) print("Training finished!!! :{}".format(num))
def test_net(imageset, year, root_path, devkit_path, prefix, epoch, ctx, vis): # set up logger logger = logging.getLogger() logger.setLevel(logging.INFO) # load testing data voc, roidb = load_test_rpn_roidb(imageset, year, root_path, devkit_path) test_data = ROIIter(roidb, batch_size=1, shuffle=False, mode="test") # load model args, auxs = load_param(prefix, epoch, convert=True, ctx=ctx) # load symbol sym = get_vgg_rcnn_test() # detect detector = Detector(sym, ctx, args, auxs) pred_eval(detector, test_data, voc, vis=vis)
def test_net(imageset, year, root_path, devkit_path, prefix, epoch, ctx, vis): # set up logger logger = logging.getLogger() logger.setLevel(logging.INFO) # load testing data voc, roidb = load_test_rpn_roidb(imageset, year, root_path, devkit_path) test_data = ROIIter(roidb, batch_size=1, shuffle=False, mode='test') # load model args, auxs = load_param(prefix, epoch, convert=True, ctx=ctx) # load symbol sym = get_vgg_rcnn_test() # detect detector = Detector(sym, ctx, args, auxs) pred_eval(detector, test_data, voc, vis=vis)
def test_rpn(image_set, year, root_path, devkit_path, prefix, epoch, ctx, vis): # set config config.TEST.HAS_RPN = True config.TEST.RPN_PRE_NMS_TOP_N = -1 config.TEST.RPN_POST_NMS_TOP_N = 2000 # load symbol sym = get_vgg_rpn_test() # load testing data voc, roidb = load_gt_roidb(image_set, year, root_path, devkit_path) test_data = ROIIter(roidb, batch_size=1, shuffle=False, mode='test') # load model args, auxs = load_param(prefix, epoch, convert=True, ctx=ctx) # start testing detector = Detector(sym, ctx, args, auxs) imdb_boxes = generate_detections(detector, test_data, voc, vis=vis) voc.evaluate_recall(roidb, candidate_boxes=imdb_boxes)
def test_rcnn(cfg, dataset, image_set, root_path, dataset_path, ctx, prefix, epoch, vis, ignore_cache, shuffle, has_rpn, proposal, thresh, logger=None, output_path=None): if not logger: assert False, 'require a logger' # print cfg pprint.pprint(cfg) logger.info('testing cfg:{}\n'.format(pprint.pformat(cfg))) # load symbol and testing data key_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() cur_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() key_sym = key_sym_instance.get_key_test_symbol(cfg) cur_sym = cur_sym_instance.get_cur_test_symbol(cfg) imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=output_path) roidb = imdb.gt_roidb() # get test data iter # split roidbs gpu_num = len(ctx) roidbs = [[] for x in range(gpu_num)] roidbs_seg_lens = np.zeros(gpu_num, dtype=np.int) for x in roidb: gpu_id = np.argmin(roidbs_seg_lens) roidbs[gpu_id].append(x) roidbs_seg_lens[gpu_id] += x['frame_seg_len'] # get test data iter test_datas = [TestLoader(x, cfg, batch_size=1, shuffle=shuffle, has_rpn=has_rpn) for x in roidbs] # load model arg_params, aux_params = load_param(prefix, epoch, process=True) # create predictor key_predictors = [get_predictor(key_sym, key_sym_instance, cfg, arg_params, aux_params, test_datas[i], [ctx[i]]) for i in range(gpu_num)] cur_predictors = [get_predictor(cur_sym, cur_sym_instance, cfg, arg_params, aux_params, test_datas[i], [ctx[i]]) for i in range(gpu_num)] # start detection #pred_eval(0, key_predictors[0], cur_predictors[0], test_datas[0], imdb, cfg, vis=vis, ignore_cache=ignore_cache, thresh=thresh, logger=logger) pred_eval_multiprocess(gpu_num, key_predictors, cur_predictors, test_datas, imdb, cfg, vis=vis, ignore_cache=ignore_cache, thresh=thresh, logger=logger)
def main(): args = parse_args() pprint.pprint(config) if config.TEST.HAS_RPN: sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) else: sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol_rcnn(config, is_train=False) logger, final_output_path = create_logger(config.output_path, args.cfg, config.dataset.test_image_set) prefix = os.path.join(final_output_path, '..', '_'.join([iset for iset in config.dataset.image_set.split('+')]), config.TRAIN.model_prefix) arg_params, aux_params = load_param(prefix, config.TEST.test_epoch, process=True) data_names = ['data', 'im_info'] label_names = None mod = mx.mod.Module(symbol=sym, context=mx.gpu(0), data_names=data_names, label_names=label_names) mod.bind(for_training=False, data_shapes=[('data', (1, 3, 1024, 1024)), ('im_info', (1, 3))], label_shapes=None, force_rebind=False) mod.set_params(arg_params=arg_params, aux_params=aux_params, force_init=False) mod.save_checkpoint('test_traffic',0)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--dataset') args = parser.parse_args() if args.dataset == 'coco': n_class = 81 model = fcis.models.FCISResNet101(n_class) prefix = filepath + '/../model/fcis_coco' epoch = 0 elif args.dataset == 'voc': n_class = 21 model = fcis.models.FCISResNet101(n_class, ratios=(0.5, 1.0, 2.0), anchor_scales=(8, 16, 32), rpn_min_size=16) prefix = filepath + '/../model/e2e' epoch = 21 else: print('dataset must be coco or voc') arg_params, aux_params = load_param(prefix, epoch, process=True) model = convert(model, arg_params, aux_params) chainer.serializers.save_npz('./fcis_{}.npz'.format(args.dataset), model)
def __init__(self, symbol, prefix, epoch, provide_data, provide_label=[], ctx=mx.cpu(), arg_params=None, aux_params=None): data_names = [k[0] for k in provide_data] label_names = [k[0] for k in provide_label] self._mod = MutableModule(symbol, data_names, label_names, context=ctx) self._mod.bind(provide_data, for_training=False) if arg_params is None: arg_params, aux_params = load_param(prefix, epoch, convert=True, ctx=ctx, process=True) self._mod.set_params(arg_params, aux_params) self.symbol = symbol self.ctx = ctx
def load_rfcn_model(cfg,has_rpn,prefix,epoch,ctx): if has_rpn: sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() sym = sym_instance.get_symbol(cfg, is_train=False) else: sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() sym = sym_instance.get_symbol_rcnn(cfg, is_train=False) # sym.save('1.json') # load model arg_params, aux_params = load_param(prefix, epoch, process=True) num_gpu = len(ctx) # infer shape SHORT_SIDE = config.SCALES[0][0] LONG_SIDE = config.SCALES[0][1] DATA_NAMES = ['data', 'im_info'] LABEL_NAMES = None DATA_SHAPES = [('data', (num_gpu, 3, LONG_SIDE, SHORT_SIDE)), ('im_info', (num_gpu, 3))] LABEL_SHAPES = None data_shape_dict = dict(DATA_SHAPES) sym_instance.infer_shape(data_shape_dict) sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False) # decide maximum shape max_data_shape = [[('data', (num_gpu, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES])))]] if not has_rpn: max_data_shape.append(('rois', (cfg.TEST.PROPOSAL_POST_NMS_TOP_N + 30, 5))) # create predictor predictor = Predictor(sym, DATA_NAMES, LABEL_NAMES, context=ctx, max_data_shapes=max_data_shape, provide_data=[DATA_SHAPES], provide_label=[LABEL_SHAPES], arg_params=arg_params, aux_params=aux_params) return predictor
def end2end_train(image_set, test_image_set, year, root_path, devkit_path, pretrained, epoch, prefix, ctx, begin_epoch, num_epoch, frequent, kv_store, mom, wd, lr, num_classes, monitor, work_load_list=None, resume=False, use_flip=True, factor_step=50000): # set up logger logger = logging.getLogger() logger.setLevel(logging.INFO) mon = None config.TRAIN.BG_THRESH_HI = 0.5 # TODO(verify) config.TRAIN.BG_THRESH_LO = 0.0 # TODO(verify) config.TRAIN.RPN_MIN_SIZE = 16 logging.info('########## TRAIN FASTER-RCNN WITH APPROXIMATE JOINT END2END #############') config.TRAIN.HAS_RPN = True config.END2END = 1 config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED = True sym = get_faster_rcnn(num_classes=num_classes) feat_sym = sym.get_internals()['rpn_cls_score_output'] # setup multi-gpu config.TRAIN.IMS_PER_BATCH *= len(ctx) config.TRAIN.BATCH_SIZE *= len(ctx) # no used here # infer max shape max_data_shape = [('data', (config.TRAIN.IMS_PER_BATCH, 3, 1000, 1000))] max_data_shape_dict = {k: v for k, v in max_data_shape} _, feat_shape, _ = feat_sym.infer_shape(**max_data_shape_dict) from rcnn.minibatch import assign_anchor import numpy as np label = assign_anchor(feat_shape[0], np.zeros((0, 5)), [[1000, 1000, 1.0]]) max_label_shape = [('label', label['label'].shape), ('bbox_target', label['bbox_target'].shape), ('bbox_inside_weight', label['bbox_inside_weight'].shape), ('bbox_outside_weight', label['bbox_outside_weight'].shape), ('gt_boxes', (config.TRAIN.IMS_PER_BATCH, 5*100))] # assume at most 100 object in image print 'providing maximum shape', max_data_shape, max_label_shape # load training data voc, roidb = load_gt_roidb(image_set, year, root_path, devkit_path, flip=use_flip) train_data = AnchorLoader(feat_sym, roidb, batch_size=config.TRAIN.IMS_PER_BATCH, shuffle=True, mode='train', ctx=ctx, work_load_list=work_load_list) # load pretrained args, auxs, _ = load_param(pretrained, epoch, convert=True) # initialize params if not resume: del args['fc8_weight'] del args['fc8_bias'] input_shapes = {k: (1,)+ v[1::] for k, v in train_data.provide_data + train_data.provide_label} arg_shape, _, _ = sym.infer_shape(**input_shapes) arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) args['rpn_conv_3x3_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_conv_3x3_weight']) args['rpn_conv_3x3_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_conv_3x3_bias']) args['rpn_cls_score_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_cls_score_weight']) args['rpn_cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_cls_score_bias']) args['rpn_bbox_pred_weight'] = mx.random.normal(0, 0.001, shape=arg_shape_dict['rpn_bbox_pred_weight']) # guarantee not likely explode with bbox_delta args['rpn_bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_bbox_pred_bias']) args['cls_score_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['cls_score_weight']) args['cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['cls_score_bias']) args['bbox_pred_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['bbox_pred_weight']) args['bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['bbox_pred_bias']) # prepare training if config.TRAIN.FINETUNE: fixed_param_prefix = ['conv1', 'conv2', 'conv3', 'conv4', 'conv5'] else: fixed_param_prefix = ['conv1', 'conv2'] data_names = [k[0] for k in train_data.provide_data] label_names = [k[0] for k in train_data.provide_label] batch_end_callback = Speedometer(train_data.batch_size, frequent=frequent) epoch_end_callback = do_checkpoint(prefix) rpn_eval_metric = AccuracyMetric(use_ignore=True, ignore=-1, ex_rpn=True) rpn_cls_metric = LogLossMetric(use_ignore=True, ignore=-1, ex_rpn=True) rpn_bbox_metric = SmoothL1LossMetric(ex_rpn=True) eval_metric = AccuracyMetric() cls_metric = LogLossMetric() bbox_metric = SmoothL1LossMetric() eval_metrics = mx.metric.CompositeEvalMetric() for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric]: eval_metrics.add(child_metric) optimizer_params = {'momentum': mom, 'wd': wd, 'learning_rate': lr, 'lr_scheduler': mx.lr_scheduler.FactorScheduler(factor_step, 0.1), 'clip_gradient': 1.0, 'rescale_grad': 1.0 } # 'rescale_grad': (1.0 / config.TRAIN.RPN_BATCH_SIZE)} # train mod = MutableModule(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, work_load_list=work_load_list, max_data_shapes=max_data_shape, max_label_shapes=max_label_shape, fixed_param_prefix=fixed_param_prefix) if monitor: def norm_stat(d): return mx.nd.norm(d)/np.sqrt(d.size) mon = mx.mon.Monitor(100, norm_stat) mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=kv_store, optimizer='sgd', optimizer_params=optimizer_params, monitor=mon, arg_params=args, aux_params=auxs, begin_epoch=begin_epoch, num_epoch=num_epoch)
def main(): # get symbol pprint.pprint(config) config.symbol = 'resnet_v1_101_deeplab_dcn' if not args.deeplab_only else 'resnet_v1_101_deeplab' sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # set up class names num_classes = 19 # load demo data image_names = ['frankfurt_000001_073088_leftImg8bit.png', 'lindau_000024_000019_leftImg8bit.png'] data = [] for im_name in image_names: assert os.path.exists(cur_path + '/../demo/' + im_name), ('%s does not exist'.format('../demo/' + im_name)) im = cv2.imread(cur_path + '/../demo/' + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data'] label_names = ['softmax_label'] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param(cur_path + '/../model/' + ('deeplab_dcn_cityscapes' if not args.deeplab_only else 'deeplab_cityscapes'), 0, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) # warm up for j in xrange(2): data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[0])]], provide_label=[None]) output_all = predictor.predict(data_batch) output_all = [mx.ndarray.argmax(output['softmax_output'], axis=1).asnumpy() for output in output_all] # test for idx, im_name in enumerate(image_names): data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) tic() output_all = predictor.predict(data_batch) output_all = [mx.ndarray.argmax(output['softmax_output'], axis=1).asnumpy() for output in output_all] pallete = getpallete(256) segmentation_result = np.uint8(np.squeeze(output_all)) segmentation_result = Image.fromarray(segmentation_result) segmentation_result.putpalette(pallete) print 'testing {} {:.4f}s'.format(im_name, toc()) pure_im_name, ext_im_name = os.path.splitext(im_name) segmentation_result.save(cur_path + '/../demo/seg_' + pure_im_name + '.png') # visualize im_raw = cv2.imread(cur_path + '/../demo/' + im_name) seg_res = cv2.imread(cur_path + '/../demo/seg_' + pure_im_name + '.png') cv2.imshow('Raw Image', im_raw) cv2.imshow('segmentation_result', seg_res) cv2.waitKey(0) print 'done'
def main(): # get symbol pprint.pprint(config) config.symbol = 'resnet_v1_101_rfcn_dcn' if not args.rfcn_only else 'resnet_v1_101_rfcn' sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # set up class names num_classes = 81 classes = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] # load demo data image_names = ['COCO_test2015_000000000891.jpg', 'COCO_test2015_000000001669.jpg'] data = [] for im_name in image_names: assert os.path.exists(cur_path + '/../demo/' + im_name), ('%s does not exist'.format('../demo/' + im_name)) im = cv2.imread(cur_path + '/../demo/' + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param(cur_path + '/../model/' + ('rfcn_dcn_coco' if not args.rfcn_only else 'rfcn_coco'), 0, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = gpu_nms_wrapper(config.TEST.NMS, 0) # warm up for j in xrange(2): data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[0])]], provide_label=[None]) scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) # test for idx, im_name in enumerate(image_names): data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] tic() scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) boxes = boxes[0].astype('f') scores = scores[0].astype('f') dets_nms = [] for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :] dets_nms.append(cls_dets) print 'testing {} {:.4f}s'.format(im_name, toc()) # visualize im = cv2.imread(cur_path + '/../demo/' + im_name) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) show_boxes(im, dets_nms, classes, 1) print 'done'
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr, lr_step): logger, final_output_path = create_logger(config.output_path, args.cfg, config.dataset.image_set) prefix = os.path.join(final_output_path, prefix) # load symbol shutil.copy2(os.path.join(curr_path, 'symbols', config.symbol + '.py'), final_output_path) sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=True) feat_sym = sym.get_internals()['rpn_cls_score_output'] # setup multi-gpu batch_size = len(ctx) input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size # print config pprint.pprint(config) logger.info('training config:{}\n'.format(pprint.pformat(config))) # load dataset and prepare imdb for training image_sets = [iset for iset in config.dataset.image_set.split('+')] roidbs = [load_gt_roidb(config.dataset.dataset, image_set, config.dataset.root_path, config.dataset.dataset_path, flip=config.TRAIN.FLIP) for image_set in image_sets] roidb = merge_roidb(roidbs) roidb = filter_roidb(roidb, config) # load training data train_data = AnchorLoader(feat_sym, roidb, config, batch_size=input_batch_size, shuffle=config.TRAIN.SHUFFLE, ctx=ctx, feat_stride=config.network.RPN_FEAT_STRIDE, anchor_scales=config.network.ANCHOR_SCALES, anchor_ratios=config.network.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING) # infer max shape max_data_shape = [('data', (config.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))] max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape) max_data_shape.append(('gt_boxes', (config.TRAIN.BATCH_IMAGES, 100, 5))) print 'providing maximum shape', max_data_shape, max_label_shape data_shape_dict = dict(train_data.provide_data_single + train_data.provide_label_single) pprint.pprint(data_shape_dict) sym_instance.infer_shape(data_shape_dict) # load and initialize params if config.TRAIN.RESUME: print('continue training from ', begin_epoch) arg_params, aux_params = load_param(prefix, begin_epoch, convert=True) else: arg_params, aux_params = load_param(pretrained, epoch, convert=True) sym_instance.init_weight(config, arg_params, aux_params) # check parameter shapes sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict) # create solver fixed_param_prefix = config.network.FIXED_PARAMS data_names = [k[0] for k in train_data.provide_data_single] label_names = [k[0] for k in train_data.provide_label_single] mod = MutableModule(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, max_data_shapes=[max_data_shape for _ in range(batch_size)], max_label_shapes=[max_label_shape for _ in range(batch_size)], fixed_param_prefix=fixed_param_prefix) if config.TRAIN.RESUME: mod._preload_opt_states = '%s-%04d.states'%(prefix, begin_epoch) # decide training params # metric rpn_eval_metric = metric.RPNAccMetric() rpn_cls_metric = metric.RPNLogLossMetric() rpn_bbox_metric = metric.RPNL1LossMetric() eval_metric = metric.RCNNAccMetric(config) cls_metric = metric.RCNNLogLossMetric(config) bbox_metric = metric.RCNNL1LossMetric(config) eval_metrics = mx.metric.CompositeEvalMetric() # rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric]: eval_metrics.add(child_metric) # callback batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=args.frequent) means = np.tile(np.array(config.TRAIN.BBOX_MEANS), 2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES) stds = np.tile(np.array(config.TRAIN.BBOX_STDS), 2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES) epoch_end_callback = [mx.callback.module_checkpoint(mod, prefix, period=1, save_optimizer_states=True), callback.do_checkpoint(prefix, means, stds)] # decide learning rate base_lr = lr lr_factor = config.TRAIN.lr_factor lr_epoch = [float(epoch) for epoch in lr_step.split(',')] lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch] lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff))) lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff] print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters) lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor, config.TRAIN.warmup, config.TRAIN.warmup_lr, config.TRAIN.warmup_step) # optimizer optimizer_params = {'momentum': config.TRAIN.momentum, 'wd': config.TRAIN.wd, 'learning_rate': lr, 'lr_scheduler': lr_scheduler, 'rescale_grad': 1.0, 'clip_gradient': None} if not isinstance(train_data, PrefetchingIter): train_data = PrefetchingIter(train_data) # train mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=config.default.kvstore, optimizer='sgd', optimizer_params=optimizer_params, arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
def train_rcnn(cfg, dataset, image_set, root_path, dataset_path, frequent, kvstore, flip, shuffle, resume, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, train_shared, lr, lr_step, proposal, logger=None, output_path=None): mx.random.seed(3) np.random.seed(3) # set up logger if not logger: logging.basicConfig() logger = logging.getLogger() logger.setLevel(logging.INFO) # load symbol sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() sym = sym_instance.get_symbol_rcnn(cfg, is_train=True) # setup multi-gpu batch_size = len(ctx) input_batch_size = cfg.TRAIN.BATCH_IMAGES * batch_size # print cfg pprint.pprint(cfg) logger.info('training rcnn cfg:{}\n'.format(pprint.pformat(cfg))) # load dataset and prepare imdb for training image_sets = [iset for iset in image_set.split('+')] roidbs = [load_proposal_roidb(dataset, image_set, root_path, dataset_path, proposal=proposal, append_gt=True, flip=flip, result_path=output_path) for image_set in image_sets] roidb = merge_roidb(roidbs) roidb = filter_roidb(roidb, cfg) means, stds = add_bbox_regression_targets(roidb, cfg) # load training data train_data = ROIIter(roidb, cfg, batch_size=input_batch_size, shuffle=shuffle, ctx=ctx, aspect_grouping=cfg.TRAIN.ASPECT_GROUPING) # infer max shape max_data_shape = [('data', (cfg.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES])))] # infer shape data_shape_dict = dict(train_data.provide_data_single + train_data.provide_label_single) sym_instance.infer_shape(data_shape_dict) # load and initialize params if resume: print('continue training from ', begin_epoch) arg_params, aux_params = load_param(prefix, begin_epoch, convert=True) else: arg_params, aux_params = load_param(pretrained, epoch, convert=True) sym_instance.init_weight_rcnn(cfg, arg_params, aux_params) # check parameter shapes sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict) # prepare training # create solver data_names = [k[0] for k in train_data.provide_data_single] label_names = [k[0] for k in train_data.provide_label_single] if train_shared: fixed_param_prefix = cfg.network.FIXED_PARAMS_SHARED else: fixed_param_prefix = cfg.network.FIXED_PARAMS mod = MutableModule(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, max_data_shapes=[max_data_shape for _ in range(batch_size)], fixed_param_prefix=fixed_param_prefix) if cfg.TRAIN.RESUME: mod._preload_opt_states = '%s-%04d.states'%(prefix, begin_epoch) # decide training params # metric eval_metric = metric.RCNNAccMetric(cfg) cls_metric = metric.RCNNLogLossMetric(cfg) bbox_metric = metric.RCNNL1LossMetric(cfg) eval_metrics = mx.metric.CompositeEvalMetric() for child_metric in [eval_metric, cls_metric, bbox_metric]: eval_metrics.add(child_metric) # callback batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=frequent) epoch_end_callback = [mx.callback.module_checkpoint(mod, prefix, period=1, save_optimizer_states=True), callback.do_checkpoint(prefix, means, stds)] # decide learning rate base_lr = lr lr_factor = cfg.TRAIN.lr_factor lr_epoch = [float(epoch) for epoch in lr_step.split(',')] lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch] lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff))) lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff] print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters) lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor, cfg.TRAIN.warmup, cfg.TRAIN.warmup_lr, cfg.TRAIN.warmup_step) # optimizer optimizer_params = {'momentum': cfg.TRAIN.momentum, 'wd': cfg.TRAIN.wd, 'learning_rate': lr, 'lr_scheduler': lr_scheduler, 'rescale_grad': 1.0, 'clip_gradient': None} # train if not isinstance(train_data, PrefetchingIter): train_data = PrefetchingIter(train_data) mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=kvstore, optimizer='sgd', optimizer_params=optimizer_params, arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
def get_net(prefix, epoch, ctx): args, auxs = load_param(prefix, epoch, convert=True, ctx=ctx) sym = get_vgg_rcnn_test() detector = Detector(sym, ctx, args, auxs) return detector
def train_net(args, ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, lr, lr_step): logger, final_output_path = create_logger(config.output_path, args.cfg, config.dataset.image_set) prefix = os.path.join(final_output_path, prefix) # load symbol shutil.copy2(os.path.join(curr_path, 'symbols', config.symbol + '.py'), final_output_path) sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=True) #sym = eval('get_' + args.network + '_train')(num_classes=config.dataset.NUM_CLASSES) # setup multi-gpu batch_size = len(ctx) input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size # print config pprint.pprint(config) logger.info('training config:{}\n'.format(pprint.pformat(config))) # load dataset and prepare imdb for training image_sets = [iset for iset in config.dataset.image_set.split('+')] segdbs = [load_gt_segdb(config.dataset.dataset, image_set, config.dataset.root_path, config.dataset.dataset_path, result_path=final_output_path, flip=config.TRAIN.FLIP) for image_set in image_sets] segdb = merge_segdb(segdbs) # load training data train_data = TrainDataLoader(sym, segdb, config, batch_size=input_batch_size, crop_height=config.TRAIN.CROP_HEIGHT, crop_width=config.TRAIN.CROP_WIDTH, shuffle=config.TRAIN.SHUFFLE, ctx=ctx) # infer max shape max_scale = [(config.TRAIN.CROP_HEIGHT, config.TRAIN.CROP_WIDTH)] max_data_shape = [('data', (config.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in max_scale]), max([v[1] for v in max_scale])))] max_label_shape = [('label', (config.TRAIN.BATCH_IMAGES, 1, max([v[0] for v in max_scale]), max([v[1] for v in max_scale])))] # max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape, max_label_shape) print 'providing maximum shape', max_data_shape, max_label_shape # infer shape data_shape_dict = dict(train_data.provide_data_single + train_data.provide_label_single) pprint.pprint(data_shape_dict) sym_instance.infer_shape(data_shape_dict) # load and initialize params if config.TRAIN.RESUME: print 'continue training from ', begin_epoch arg_params, aux_params = load_param(prefix, begin_epoch, convert=True) else: print pretrained arg_params, aux_params = load_param(pretrained, epoch, convert=True) sym_instance.init_weights(config, arg_params, aux_params) # check parameter shapes sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict) # create solver fixed_param_prefix = config.network.FIXED_PARAMS data_names = [k[0] for k in train_data.provide_data_single] label_names = [k[0] for k in train_data.provide_label_single] mod = MutableModule(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, max_data_shapes=[max_data_shape for _ in xrange(batch_size)], max_label_shapes=[max_label_shape for _ in xrange(batch_size)], fixed_param_prefix=fixed_param_prefix) # decide training params # metric fcn_loss_metric = metric.FCNLogLossMetric(config.default.frequent * batch_size) eval_metrics = mx.metric.CompositeEvalMetric() # rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric for child_metric in [fcn_loss_metric]: eval_metrics.add(child_metric) # callback batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=args.frequent) epoch_end_callback = mx.callback.module_checkpoint(mod, prefix, period=1, save_optimizer_states=True) # decide learning rate base_lr = lr lr_factor = 0.1 lr_epoch = [float(epoch) for epoch in lr_step.split(',')] lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch] lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff))) lr_iters = [int(epoch * len(segdb) / batch_size) for epoch in lr_epoch_diff] print 'lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor, config.TRAIN.warmup, config.TRAIN.warmup_lr, config.TRAIN.warmup_step) # optimizer optimizer_params = {'momentum': config.TRAIN.momentum, 'wd': config.TRAIN.wd, 'learning_rate': lr, 'lr_scheduler': lr_scheduler, 'rescale_grad': 1.0, 'clip_gradient': None} if not isinstance(train_data, PrefetchingIter): train_data = PrefetchingIter(train_data) # train mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=config.default.kvstore, optimizer='sgd', optimizer_params=optimizer_params, arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
def train_rcnn(image_set, year, root_path, devkit_path, pretrained, epoch, prefix, ctx, begin_epoch, end_epoch, frequent, kv_store, work_load_list=None, resume=False, proposal='rpn'): # set up logger logger = logging.getLogger() logger.setLevel(logging.INFO) # load symbol sym = get_vgg_rcnn() # setup multi-gpu config.TRAIN.BATCH_IMAGES *= len(ctx) config.TRAIN.BATCH_SIZE *= len(ctx) # load training data voc, roidb, means, stds = eval('load_' + proposal + '_roidb')(image_set, year, root_path, devkit_path, flip=True) train_data = ROIIter(roidb, batch_size=config.TRAIN.BATCH_IMAGES, shuffle=True, mode='train', ctx=ctx, work_load_list=work_load_list) # infer max shape max_data_shape = [('data', (config.TRAIN.BATCH_IMAGES, 3, 1000, 1000))] # load pretrained args, auxs = load_param(pretrained, epoch, convert=True) # initialize params if not resume: input_shapes = {k: v for k, v in train_data.provide_data + train_data.provide_label} arg_shape, _, _ = sym.infer_shape(**input_shapes) arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) args['cls_score_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['cls_score_weight']) args['cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['cls_score_bias']) args['bbox_pred_weight'] = mx.random.normal(0, 0.001, shape=arg_shape_dict['bbox_pred_weight']) args['bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['bbox_pred_bias']) # prepare training if config.TRAIN.FINETUNE: fixed_param_prefix = ['conv1', 'conv2', 'conv3', 'conv4', 'conv5'] else: fixed_param_prefix = ['conv1', 'conv2'] data_names = [k[0] for k in train_data.provide_data] label_names = [k[0] for k in train_data.provide_label] batch_end_callback = Speedometer(train_data.batch_size, frequent=frequent) epoch_end_callback = mx.callback.do_checkpoint(prefix) if config.TRAIN.HAS_RPN is True: eval_metric = AccuracyMetric(use_ignore=True, ignore=-1) cls_metric = LogLossMetric(use_ignore=True, ignore=-1) else: eval_metric = AccuracyMetric() cls_metric = LogLossMetric() bbox_metric = SmoothL1LossMetric() eval_metrics = mx.metric.CompositeEvalMetric() for child_metric in [eval_metric, cls_metric, bbox_metric]: eval_metrics.add(child_metric) optimizer_params = {'momentum': 0.9, 'wd': 0.0005, 'learning_rate': 0.001, 'lr_scheduler': mx.lr_scheduler.FactorScheduler(30000, 0.1), 'rescale_grad': (1.0 / config.TRAIN.BATCH_SIZE)} # train mod = MutableModule(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, work_load_list=work_load_list, max_data_shapes=max_data_shape, fixed_param_prefix=fixed_param_prefix) mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=kv_store, optimizer='sgd', optimizer_params=optimizer_params, arg_params=args, aux_params=auxs, begin_epoch=begin_epoch, num_epoch=end_epoch) # edit params and save for epoch in range(begin_epoch + 1, end_epoch + 1): arg_params, aux_params = load_checkpoint(prefix, epoch) arg_params['bbox_pred_weight'] = (arg_params['bbox_pred_weight'].T * mx.nd.array(stds)).T arg_params['bbox_pred_bias'] = arg_params['bbox_pred_bias'] * mx.nd.array(stds) + \ mx.nd.array(means) save_checkpoint(prefix, epoch, arg_params, aux_params)
def train_rpn(image_set, year, root_path, devkit_path, pretrained, epoch, prefix, ctx, begin_epoch, end_epoch, frequent, kv_store, work_load_list=None, resume=False): # set up logger logger = logging.getLogger() logger.setLevel(logging.INFO) # load symbol sym = get_vgg_rpn() feat_sym = get_vgg_rpn().get_internals()['rpn_cls_score_output'] # setup multi-gpu config.TRAIN.BATCH_IMAGES *= len(ctx) config.TRAIN.BATCH_SIZE *= len(ctx) # load training data voc, roidb = load_gt_roidb(image_set, year, root_path, devkit_path, flip=True) train_data = AnchorLoader(feat_sym, roidb, batch_size=config.TRAIN.BATCH_SIZE, shuffle=True, mode='train', ctx=ctx, work_load_list=work_load_list) # infer max shape max_data_shape = [('data', (config.TRAIN.BATCH_SIZE, 3, 1000, 1000))] max_data_shape_dict = {k: v for k, v in max_data_shape} _, feat_shape, _ = feat_sym.infer_shape(**max_data_shape_dict) from rcnn.minibatch import assign_anchor import numpy as np label = assign_anchor(feat_shape[0], np.zeros((0, 5)), [[1000, 1000, 1.0]]) max_label_shape = [('label', label['label'].shape), ('bbox_target', label['bbox_target'].shape), ('bbox_inside_weight', label['bbox_inside_weight'].shape), ('bbox_outside_weight', label['bbox_outside_weight'].shape)] print 'providing maximum shape', max_data_shape, max_label_shape # load pretrained args, auxs = load_param(pretrained, epoch, convert=True) # initialize params if not resume: input_shapes = {k: v for k, v in train_data.provide_data + train_data.provide_label} arg_shape, _, _ = sym.infer_shape(**input_shapes) arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) args['rpn_conv_3x3_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_conv_3x3_weight']) args['rpn_conv_3x3_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_conv_3x3_bias']) args['rpn_cls_score_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_cls_score_weight']) args['rpn_cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_cls_score_bias']) args['rpn_bbox_pred_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_bbox_pred_weight']) args['rpn_bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_bbox_pred_bias']) # prepare training if config.TRAIN.FINETUNE: fixed_param_prefix = ['conv1', 'conv2', 'conv3', 'conv4', 'conv5'] else: fixed_param_prefix = ['conv1', 'conv2'] data_names = [k[0] for k in train_data.provide_data] label_names = [k[0] for k in train_data.provide_label] batch_end_callback = Speedometer(train_data.batch_size, frequent=frequent) epoch_end_callback = mx.callback.do_checkpoint(prefix) if config.TRAIN.HAS_RPN is True: eval_metric = AccuracyMetric(use_ignore=True, ignore=-1) cls_metric = LogLossMetric(use_ignore=True, ignore=-1) else: eval_metric = AccuracyMetric() cls_metric = LogLossMetric() bbox_metric = SmoothL1LossMetric() eval_metrics = mx.metric.CompositeEvalMetric() for child_metric in [eval_metric, cls_metric, bbox_metric]: eval_metrics.add(child_metric) optimizer_params = {'momentum': 0.9, 'wd': 0.0005, 'learning_rate': 0.001, 'lr_scheduler': mx.lr_scheduler.FactorScheduler(60000, 0.1), 'rescale_grad': (1.0 / config.TRAIN.BATCH_SIZE)} # train mod = MutableModule(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, work_load_list=work_load_list, max_data_shapes=max_data_shape, max_label_shapes=max_label_shape, fixed_param_prefix=fixed_param_prefix) mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=kv_store, optimizer='sgd', optimizer_params=optimizer_params, arg_params=args, aux_params=auxs, begin_epoch=begin_epoch, num_epoch=end_epoch)
def train_rpn( image_set, year, root_path, devkit_path, pretrained, epoch, prefix, ctx, begin_epoch, end_epoch, frequent, kv_store, work_load_list=None, ): # load symbol sym = get_vgg_rpn() feat_sym = get_vgg_rpn().get_internals()["rpn_cls_score_output"] # setup multi-gpu config.TRAIN.BATCH_IMAGES *= len(ctx) config.TRAIN.BATCH_SIZE *= len(ctx) # load training data voc, roidb = load_gt_roidb(image_set, year, root_path, devkit_path, flip=True) train_data = AnchorLoader( feat_sym, roidb, batch_size=config.TRAIN.BATCH_SIZE, shuffle=True, mode="train", ctx=ctx, work_load_list=work_load_list, ) # infer max shape max_data_shape = [("data", (1, 3, 1000, 1000))] max_data_shape_dict = {k: v for k, v in max_data_shape} _, feat_shape, _ = feat_sym.infer_shape(**max_data_shape_dict) from rcnn.minibatch import assign_anchor import numpy as np label = assign_anchor(feat_shape[0], np.zeros((0, 5)), [[1000, 1000, 1.0]]) max_label_shape = [ ("label", label["label"].shape), ("bbox_target", label["bbox_target"].shape), ("bbox_inside_weight", label["bbox_inside_weight"].shape), ("bbox_outside_weight", label["bbox_outside_weight"].shape), ] print "providing maximum shape", max_data_shape, max_label_shape # load pretrained args, auxs = load_param(pretrained, epoch, convert=True) # initialize params arg_shape, _, _ = sym.infer_shape(data=(1, 3, 224, 224)) arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape)) args["rpn_conv_3x3_weight"] = mx.random.normal(mean=0, stdvar=0.01, shape=arg_shape_dict["rpn_conv_3x3_weight"]) args["rpn_conv_3x3_bias"] = mx.nd.zeros(shape=arg_shape_dict["rpn_conv_3x3_bias"]) args["rpn_cls_score_weight"] = mx.random.normal(mean=0, stdvar=0.01, shape=arg_shape_dict["rpn_cls_score_weight"]) args["rpn_cls_score_bias"] = mx.nd.zeros(shape=arg_shape_dict["rpn_cls_score_bias"]) args["rpn_bbox_pred_weight"] = mx.random.normal(mean=0, stdvar=0.01, shape=arg_shape_dict["rpn_bbox_pred_weight"]) args["rpn_bbox_pred_bias"] = mx.nd.zeros(shape=arg_shape_dict["rpn_bbox_pred_bias"]) # train solver = Solver( prefix, sym, ctx, begin_epoch, end_epoch, kv_store, args, auxs, momentum=0.9, wd=0.0005, learning_rate=1e-3, lr_scheduler=mx.lr_scheduler.FactorScheduler(60000, 0.1), mutable_data_shape=True, max_data_shape=max_data_shape, max_label_shape=max_label_shape, ) solver.fit(train_data, frequent=frequent)
def main(): # get symbol pprint.pprint(config) config.symbol = 'resnet_v1_101_rfcn' model = '/../model/rfcn_vid' sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_test_symbol(config) # set up class names num_classes = 31 classes = ['airplane', 'antelope', 'bear', 'bicycle', 'bird', 'bus', 'car', 'cattle', 'dog', 'domestic_cat', 'elephant', 'fox', 'giant_panda', 'hamster', 'horse', 'lion', 'lizard', 'monkey', 'motorcycle', 'rabbit', 'red_panda', 'sheep', 'snake', 'squirrel', 'tiger', 'train', 'turtle', 'watercraft', 'whale', 'zebra'] # load demo data image_names = glob.glob(cur_path + '/../demo/ILSVRC2015_val_00007010/*.JPEG') output_dir = cur_path + '/../demo/rfcn/' if not os.path.exists(output_dir): os.makedirs(output_dir) # data = [] for im_name in image_names: assert os.path.exists(im_name), ('%s does not exist'.format(im_name)) im = cv2.imread(im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param(cur_path + model, 0, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = gpu_nms_wrapper(config.TEST.NMS, 0) # warm up for j in xrange(2): data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[0])]], provide_label=[None]) scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) # test time = 0 count = 0 for idx, im_name in enumerate(image_names): data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] tic() scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) time += toc() count += 1 print 'testing {} {:.4f}s'.format(im_name, time/count) boxes = boxes[0].astype('f') scores = scores[0].astype('f') dets_nms = [] for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :] dets_nms.append(cls_dets) # visualize im = cv2.imread(im_name) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) # show_boxes(im, dets_nms, classes, 1) out_im = draw_boxes(im, dets_nms, classes, 1) _, filename = os.path.split(im_name) cv2.imwrite(output_dir + filename,out_im) print 'done'
def train_net(args, ctx, pretrained, pretrained_flow, epoch, prefix, begin_epoch, end_epoch, lr, lr_step): logger, final_output_path = create_logger(config.output_path, args.cfg, config.dataset.image_set) prefix = os.path.join(final_output_path, prefix) # load symbol shutil.copy2(os.path.join(curr_path, 'symbols', config.symbol + '.py'), final_output_path) sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_train_symbol(config) feat_sym = sym.get_internals()['rpn_cls_score_output'] feat_conv_3x3_relu = sym.get_internals()['feat_conv_3x3_relu_output'] # setup multi-gpu batch_size = len(ctx) input_batch_size = config.TRAIN.BATCH_IMAGES * batch_size # print config pprint.pprint(config) logger.info('training config:{}\n'.format(pprint.pformat(config))) # load dataset and prepare imdb for training image_sets = [iset for iset in config.dataset.image_set.split('+')] roidbs = [ load_gt_roidb(config.dataset.dataset, image_set, config.dataset.root_path, config.dataset.dataset_path, flip=config.TRAIN.FLIP) for image_set in image_sets ] roidb = merge_roidb(roidbs) roidb = filter_roidb(roidb, config) # load training data train_data = AnchorLoader(feat_sym, feat_conv_3x3_relu, roidb, config, batch_size=input_batch_size, shuffle=config.TRAIN.SHUFFLE, ctx=ctx, feat_stride=config.network.RPN_FEAT_STRIDE, anchor_scales=config.network.ANCHOR_SCALES, anchor_ratios=config.network.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING, normalize_target=config.network.NORMALIZE_RPN, bbox_mean=config.network.ANCHOR_MEANS, bbox_std=config.network.ANCHOR_STDS) # infer max shape #max_data_shape = [('data', (config.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES]))), # ('data_ref', (config.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES]))), # ('eq_flag', (1,))] data_shape1 = { 'data_ref': (config.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])), } _, feat_shape111, _ = feat_conv_3x3_relu.infer_shape(**data_shape1) max_data_shape = [('data_ref', (config.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES]))), ('eq_flag', (1, )), ('motion_vector', (config.TRAIN.BATCH_IMAGES, 2, int(feat_shape111[0][2]), int(feat_shape111[0][3])))] max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape) max_data_shape.append(('gt_boxes', (config.TRAIN.BATCH_IMAGES, 100, 5))) print 'providing maximum shape', max_data_shape, max_label_shape data_shape_dict = dict(train_data.provide_data_single + train_data.provide_label_single) pprint.pprint(data_shape_dict) sym_instance.infer_shape(data_shape_dict) # load and initialize params if config.TRAIN.RESUME: print('continue training from ', begin_epoch) arg_params, aux_params = load_param(prefix, begin_epoch, convert=True) else: arg_params, aux_params = load_param(pretrained, epoch, convert=True) #arg_params_flow, aux_params_flow = load_param(pretrained_flow, epoch, convert=True) #arg_params.update(arg_params_flow) #aux_params.update(aux_params_flow) sym_instance.init_weight(config, arg_params, aux_params) # check parameter shapes sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict) # create solver fixed_param_prefix = config.network.FIXED_PARAMS data_names = [k[0] for k in train_data.provide_data_single] label_names = [k[0] for k in train_data.provide_label_single] mod = MutableModule( sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, max_data_shapes=[max_data_shape for _ in range(batch_size)], max_label_shapes=[max_label_shape for _ in range(batch_size)], fixed_param_prefix=fixed_param_prefix) if config.TRAIN.RESUME: mod._preload_opt_states = '%s-%04d.states' % (prefix, begin_epoch) # decide training params # metric rpn_eval_metric = metric.RPNAccMetric() rpn_cls_metric = metric.RPNLogLossMetric() rpn_bbox_metric = metric.RPNL1LossMetric() eval_metric = metric.RCNNAccMetric(config) cls_metric = metric.RCNNLogLossMetric(config) bbox_metric = metric.RCNNL1LossMetric(config) eval_metrics = mx.metric.CompositeEvalMetric() # rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric for child_metric in [ rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric ]: eval_metrics.add(child_metric) # callback batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=args.frequent) means = np.tile(np.array(config.TRAIN.BBOX_MEANS), 2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES) stds = np.tile(np.array(config.TRAIN.BBOX_STDS), 2 if config.CLASS_AGNOSTIC else config.dataset.NUM_CLASSES) epoch_end_callback = [ mx.callback.module_checkpoint(mod, prefix, period=1, save_optimizer_states=True), callback.do_checkpoint(prefix, means, stds) ] # decide learning rate base_lr = lr lr_factor = config.TRAIN.lr_factor lr_epoch = [float(epoch) for epoch in lr_step.split(',')] lr_epoch_diff = [ epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch ] lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff))) lr_iters = [ int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff ] print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters) lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor, config.TRAIN.warmup, config.TRAIN.warmup_lr, config.TRAIN.warmup_step) # optimizer optimizer_params = { 'momentum': config.TRAIN.momentum, 'wd': config.TRAIN.wd, 'learning_rate': lr, 'lr_scheduler': lr_scheduler, 'rescale_grad': 1.0, 'clip_gradient': None } if not isinstance(train_data, PrefetchingIter): train_data = PrefetchingIter(train_data) print('Start to train model') # train mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=config.default.kvstore, optimizer='sgd', optimizer_params=optimizer_params, arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch)
def get_net(prefix, epoch, ctx): args, auxs, num_class = load_param(prefix, epoch, convert=True, ctx=ctx) sym = get_vgg_test(num_classes=num_class) detector = Detector(sym, ctx, args, auxs) return detector