def __init__(self, configFn, ctx, outFolder, threshold): os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"] = "0" config = importlib.import_module( configFn.replace('.py', '').replace('/', '.')) _, _, _, _, _, _, self.__pModel, _, self.__pTest, self.transform, _, _, _ = config.get_config( is_train=False) self.__pModel = patch_config_as_nothrow(self.__pModel) self.__pTest = patch_config_as_nothrow(self.__pTest) self.resizeParam = (800, 1200) if callable(self.__pTest.nms.type): self.__nms = self.__pTest.nms.type(self.__pTest.nms.thr) else: from operator_py.nms import py_nms_wrapper self.__nms = py_nms_wrapper(self.__pTest.nms.thr) arg_params, aux_params = load_checkpoint(self.__pTest.model.prefix, self.__pTest.model.epoch) sym = self.__pModel.test_symbol from utils.graph_optimize import merge_bn sym, arg_params, aux_params = merge_bn(sym, arg_params, aux_params) self.__mod = DetModule( sym, data_names=['data', 'im_info', 'im_id', 'rec_id'], context=ctx) self.__mod.bind(data_shapes=[('data', (1, 3, self.resizeParam[0], self.resizeParam[1])), ('im_info', (1, 3)), ('im_id', (1, )), ('rec_id', (1, ))], for_training=False) self.__mod.set_params(arg_params, aux_params, allow_extra=False) self.__saveSymbol(sym, outFolder, self.__pTest.model.prefix.split('/')[-1]) self.__threshold = threshold self.outFolder = outFolder
def __init__(self, pBbox): self.p = patch_config_as_nothrow(pBbox) # declare weight and bias xavier_init = mx.init.Xavier(factor_type="in", rnd_type="uniform", magnitude=3) self.fc3_weight = X.var("bbox_fc3_weight", init=xavier_init) self.fc3_bias = X.var("bbox_fc3_bias") self._head_feat = None
def train_net(config): General, KvstoreParam, RpnParam, RoiParam, BboxParam, DatasetParam, ModelParam, \ OptimizeParam, TestParam, transform, data_name, label_name, metric_list = config.generate_config(is_train=True) pGen = patch_config_as_nothrow(General) pKv = patch_config_as_nothrow(KvstoreParam) pRpn = patch_config_as_nothrow(RpnParam) pRoi = patch_config_as_nothrow(RoiParam) pBbox = patch_config_as_nothrow(BboxParam) pDataset = patch_config_as_nothrow(DatasetParam) pModel = patch_config_as_nothrow(ModelParam) pOpt = patch_config_as_nothrow(OptimizeParam) pTest = patch_config_as_nothrow(TestParam) gpus = pKv.gpus if len(gpus) == 0: ctx = [mx.cpu()] else: ctx = [mx.gpu(i) for i in gpus] input_batch_size = pKv.batch_image * len(ctx) pretrain_prefix = pModel.pretrain.prefix pretrain_epoch = pModel.pretrain.epoch save_path = os.path.join('experiments', pGen.name) model_prefix = os.path.join(save_path, 'checkpoint') begin_epoch = pOpt.schedule.begin_epoch end_epoch = pOpt.schedule.end_epoch lr_steps = pOpt.schedule.lr_steps ## load dataset if pDataset.Dataset == 'widerface': image_set = pDataset.image_set roidb = load_gt_roidb(pDataset.Dataset, image_set, root_path='data', dataset_path='data/widerface', flip=True) net = pModel.train_network if pOpt.schedule.begin_epoch != 0: net.load_model(model_prefix, pOpt.schedule.begin_epoch) else: net.load_model(pretrain_prefix) print('hello github!')
def __init__(self, pNeck, pSEPC): super().__init__(pNeck) self.psepc = patch_config_as_nothrow(pSEPC) self.neck_with_sepc = None stride, pad_sizes = pSEPC.stride, pSEPC.pad_sizes for i in range(len(stride)): if pad_sizes[0] % stride[i] != 0 or pad_sizes[1] % stride[i] != 0: print('Warning: This implementation of ibn used in SEPC expects (it\'s better) the (padded) input sizes {} dividable by the stride {}. '\ 'When this is not satisfied, you should manually check that the feature_sizes at stride \'s\' statisfy the following: ' \ '\'ceil(pad_sizes[0]/s)==feature_sizes[0]\' and \'ceil(pad_sizes[1]/s)==feature_size[1]\''.format(pad_sizes, stride[i])) self.feat_sizes = [[ math.ceil(pad_sizes[0] / stride[i]), math.ceil(pad_sizes[1] / stride[i]) ] for i in range(len(stride))]
def train_net(config): pGen, pKv, pRpn, pRoi, pBbox, pDataset, pModel, pOpt, pTest, \ transform, data_name, label_name, metric_list = config.get_config(is_train=True) pGen = patch_config_as_nothrow(pGen) pKv = patch_config_as_nothrow(pKv) pRpn = patch_config_as_nothrow(pRpn) pRoi = patch_config_as_nothrow(pRoi) pBbox = patch_config_as_nothrow(pBbox) pDataset = patch_config_as_nothrow(pDataset) pModel = patch_config_as_nothrow(pModel) pOpt = patch_config_as_nothrow(pOpt) pTest = patch_config_as_nothrow(pTest) ctx = [mx.gpu(int(i)) for i in pKv.gpus] pretrain_prefix = pModel.pretrain.prefix pretrain_epoch = pModel.pretrain.epoch prefix = pGen.name save_path = os.path.join("experiments", prefix) begin_epoch = pOpt.schedule.begin_epoch end_epoch = pOpt.schedule.end_epoch lr_iter = pOpt.schedule.lr_iter # only rank==0 print all debug infos kvstore_type = "dist_sync" if os.environ.get( "DMLC_ROLE") == "worker" else pKv.kvstore kv = mx.kvstore.create(kvstore_type) rank = kv.rank # for distributed training using shared file system os.makedirs(save_path, exist_ok=True) from utils.logger import config_logger config_logger(os.path.join(save_path, "log.txt")) model_prefix = os.path.join(save_path, "checkpoint") # set up logger logger = logging.getLogger() sym = pModel.train_symbol # setup multi-gpu input_batch_size = pKv.batch_image * len(ctx) # print config # if rank == 0: # logger.info(pprint.pformat(config)) # load dataset and prepare imdb for training image_sets = pDataset.image_set roidbs = [ pkl.load(open("data/cache/{}.roidb".format(i), "rb"), encoding="latin1") for i in image_sets ] roidb = reduce(lambda x, y: x + y, roidbs) # filter empty image roidb = [rec for rec in roidb if rec["gt_bbox"].shape[0] > 0] # add flip roi record flipped_roidb = [] for rec in roidb: new_rec = rec.copy() new_rec["flipped"] = True flipped_roidb.append(new_rec) roidb = roidb + flipped_roidb from core.detection_input import AnchorLoader train_data = AnchorLoader(roidb=roidb, transform=transform, data_name=data_name, label_name=label_name, batch_size=input_batch_size, shuffle=True, kv=kv, num_worker=pGen.loader_worker or 12, num_collector=pGen.loader_collector or 1, worker_queue_depth=2, collector_queue_depth=2) # infer shape worker_data_shape = dict(train_data.provide_data + train_data.provide_label) for key in worker_data_shape: worker_data_shape[key] = ( pKv.batch_image, ) + worker_data_shape[key][1:] arg_shape, _, aux_shape = sym.infer_shape(**worker_data_shape) _, out_shape, _ = sym.get_internals().infer_shape(**worker_data_shape) out_shape_dict = list(zip(sym.get_internals().list_outputs(), out_shape)) _, out_shape, _ = sym.infer_shape(**worker_data_shape) terminal_out_shape_dict = zip(sym.list_outputs(), out_shape) if rank == 0: logger.info('parameter shape') logger.info( pprint.pformat( [i for i in out_shape_dict if not i[0].endswith('output')])) logger.info('intermediate output shape') logger.info( pprint.pformat( [i for i in out_shape_dict if i[0].endswith('output')])) logger.info('terminal output shape') logger.info(pprint.pformat([i for i in terminal_out_shape_dict])) # memonger if pModel.memonger: last_block = pModel.memonger_until or "" if rank == 0: logger.info("do memonger up to {}".format(last_block)) type_dict = {k: np.float32 for k in worker_data_shape} sym = search_plan_to_layer(sym, last_block, 1000, type_dict=type_dict, **worker_data_shape) # load and initialize params if pOpt.schedule.begin_epoch != 0: arg_params, aux_params = load_checkpoint(model_prefix, begin_epoch) elif pModel.from_scratch: arg_params, aux_params = dict(), dict() else: arg_params, aux_params = load_checkpoint(pretrain_prefix, pretrain_epoch) if pModel.process_weight is not None: pModel.process_weight(sym, arg_params, aux_params) ''' there are some conflicts between `mergebn` and `attach_quantized_node` in graph_optimize.py when mergebn ahead of attach_quantized_node such as `Symbol.ComposeKeyword` ''' if pModel.QuantizeTrainingParam is not None and pModel.QuantizeTrainingParam.quantize_flag: pQuant = pModel.QuantizeTrainingParam assert pGen.fp16 == False, "current quantize training only support fp32 mode." from utils.graph_optimize import attach_quantize_node _, out_shape, _ = sym.get_internals().infer_shape(**worker_data_shape) out_shape_dictoinary = dict( zip(sym.get_internals().list_outputs(), out_shape)) sym = attach_quantize_node(sym, out_shape_dictoinary, pQuant.WeightQuantizeParam, pQuant.ActQuantizeParam, pQuant.quantized_op) # merge batch normalization to save memory in fix bn training from utils.graph_optimize import merge_bn sym, arg_params, aux_params = merge_bn(sym, arg_params, aux_params) if pModel.random: import time mx.random.seed(int(time.time())) np.random.seed(int(time.time())) init = mx.init.Xavier(factor_type="in", rnd_type='gaussian', magnitude=2) init.set_verbosity(verbose=True) # create solver fixed_param = pModel.pretrain.fixed_param excluded_param = pModel.pretrain.excluded_param data_names = [k[0] for k in train_data.provide_data] label_names = [k[0] for k in train_data.provide_label] if pModel.teacher_param: from models.KD.utils import create_teacher_module from models.KD.detection_module import KDDetModule t_mod, t_label_name, t_label_shape = create_teacher_module( pModel.teacher_param, worker_data_shape, input_batch_size, ctx, rank, logger) mod = KDDetModule(sym, teacher_module=t_mod, teacher_label_names=t_label_name, teacher_label_shapes=t_label_shape, data_names=data_names, label_names=label_names, logger=logger, context=ctx, fixed_param=fixed_param, excluded_param=excluded_param) else: mod = DetModule(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, fixed_param=fixed_param, excluded_param=excluded_param) eval_metrics = mx.metric.CompositeEvalMetric(metric_list) # callback batch_end_callback = [ callback.Speedometer(train_data.batch_size, frequent=pGen.log_frequency) ] batch_end_callback += pModel.batch_end_callbacks or [] epoch_end_callback = callback.do_checkpoint(model_prefix) sym.save(model_prefix + ".json") # decide learning rate lr_mode = pOpt.optimizer.lr_mode or 'step' base_lr = pOpt.optimizer.lr * kv.num_workers lr_factor = pOpt.schedule.lr_factor or 0.1 iter_per_epoch = len(train_data) // input_batch_size total_iter = iter_per_epoch * (end_epoch - begin_epoch) lr_iter = [total_iter + it if it < 0 else it for it in lr_iter] lr_iter = [it // kv.num_workers for it in lr_iter] lr_iter = [it - iter_per_epoch * begin_epoch for it in lr_iter] lr_iter_discount = [it for it in lr_iter if it > 0] current_lr = base_lr * (lr_factor**(len(lr_iter) - len(lr_iter_discount))) if rank == 0: logging.info('total iter {}'.format(total_iter)) logging.info('lr {}, lr_iters {}'.format(current_lr, lr_iter_discount)) logging.info('lr mode: {}'.format(lr_mode)) if pOpt.warmup and pOpt.schedule.begin_epoch == 0: if rank == 0: logging.info('warmup lr {}, warmup step {}'.format( pOpt.warmup.lr, pOpt.warmup.iter)) if lr_mode == 'step': lr_scheduler = WarmupMultiFactorScheduler( step=lr_iter_discount, factor=lr_factor, warmup=True, warmup_type=pOpt.warmup.type, warmup_lr=pOpt.warmup.lr, warmup_step=pOpt.warmup.iter) elif lr_mode == 'cosine': warmup_lr_scheduler = AdvancedLRScheduler(mode='linear', base_lr=pOpt.warmup.lr, target_lr=base_lr, niters=pOpt.warmup.iter) cosine_lr_scheduler = AdvancedLRScheduler( mode='cosine', base_lr=base_lr, target_lr=0, niters=(iter_per_epoch * (end_epoch - begin_epoch)) - pOpt.warmup.iter) lr_scheduler = LRSequential( [warmup_lr_scheduler, cosine_lr_scheduler]) else: raise NotImplementedError else: if lr_mode == 'step': lr_scheduler = WarmupMultiFactorScheduler(step=lr_iter_discount, factor=lr_factor) elif lr_mode == 'cosine': lr_scheduler = AdvancedLRScheduler(mode='cosine', base_lr=base_lr, target_lr=0, niters=iter_per_epoch * (end_epoch - begin_epoch)) else: lr_scheduler = None # optimizer optimizer_params = dict(momentum=pOpt.optimizer.momentum, wd=pOpt.optimizer.wd, learning_rate=current_lr, lr_scheduler=lr_scheduler, rescale_grad=1.0 / (len(ctx) * kv.num_workers), clip_gradient=pOpt.optimizer.clip_gradient) if pKv.fp16: optimizer_params['multi_precision'] = True optimizer_params['rescale_grad'] /= 128.0 profile = pGen.profile or False if profile: mx.profiler.set_config(profile_all=True, filename=os.path.join(save_path, "profile.json")) # train mod.fit(train_data=train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=kv, optimizer=pOpt.optimizer.type, optimizer_params=optimizer_params, initializer=init, allow_missing=True, arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch, profile=profile) logging.info("Training has done") time.sleep(10) logging.info("Exiting")
# create dummy data batch data = mx.nd.ones(shape=[1, 3] + shape) im_info = mx.nd.array([x / 2.0 for x in shape] + [2.0]).reshape(1, 3) im_id = mx.nd.array([1]) rec_id = mx.nd.array([1]) data_names = ["data", "im_info", "im_id", "rec_id"] data_shape = [[1, 3] + shape, [1, 3], [1], [1]] data_shape = [(name, shape) for name, shape in zip(data_names, data_shape)] data_batch = mx.io.DataBatch(data=[data, im_info, im_id, rec_id]) ''' there are some conflicts between `mergebn` and `attach_quantized_node` in graph_optimize.py when mergebn ahead of attach_quantized_node such as `Symbol.ComposeKeyword` ''' pModel = patch_config_as_nothrow(pModel) if pModel.QuantizeTrainingParam is not None and pModel.QuantizeTrainingParam.quantize_flag: pQuant = pModel.QuantizeTrainingParam assert pGen.fp16 == False, "current quantize training only support fp32 mode." from utils.graph_optimize import attach_quantize_node worker_data_shape = dict([(name, tuple(shape)) for name, shape in data_shape]) # print(worker_data_shape) # raise NotImplementedError _, out_shape, _ = sym.get_internals().infer_shape(**worker_data_shape) out_shape_dictoinary = dict(zip(sym.get_internals().list_outputs(), out_shape)) sym = attach_quantize_node(sym, out_shape_dictoinary, pQuant.WeightQuantizeParam, pQuant.ActQuantizeParam, pQuant.quantized_op) sym.save(pTest.model.prefix + "_infer_speed.json") ctx = mx.gpu(gpu)
def __init__(self, pRoi): self.p = patch_config_as_nothrow(pRoi)
def __init__(self, pNeck): self.p = patch_config_as_nothrow(pNeck)
def __init__(self, pBackbone): self.p = patch_config_as_nothrow(pBackbone)
def __init__(self, pBbox): self.p = patch_config_as_nothrow(pBbox) self._head_feat = None
def __init__(self, pTest): self.p = patch_config_as_nothrow(pTest)
def __init__(self, pBbox, pMask, pMaskRoi): self.pBbox = patch_config_as_nothrow(pBbox) self.pMask = patch_config_as_nothrow(pMask) self.pMaskRoi = patch_config_as_nothrow(pMaskRoi) self._head_feat = None
def __init__(self, pNeck): self.p = patch_config_as_nothrow(pNeck) self.fpn_feat = None
def __init__(self, pHead): self.p = patch_config_as_nothrow(pHead) num_points = self.p.point_generate.num_points self.dcn_kernel = int(math.sqrt(num_points)) self.dcn_pad = int((self.dcn_kernel - 1) / 2) assert self.dcn_kernel * self.dcn_kernel == num_points, \ "The points number should be square." assert self.dcn_kernel % 2 == 1, "The dcn kernel size should be odd." # init moment method dtype = "float16" if self.p.fp16 else "float32" self.moment_transfer = X.var(name="moment_transfer", shape=(2, ), init=X.zero_init(), lr_mult=0.01, dtype=dtype) # init bias for cls prior_prob = 0.01 pi = -math.log((1 - prior_prob) / prior_prob) # shared classification weight and bias self.cls_conv1_weight = X.var("cls_conv1_weight", init=X.gauss(std=0.01)) self.cls_conv1_bias = X.var("cls_conv1_bias", init=X.zero_init()) self.cls_conv2_weight = X.var("cls_conv2_weight", init=X.gauss(std=0.01)) self.cls_conv2_bias = X.var("cls_conv2_bias", init=X.zero_init()) self.cls_conv3_weight = X.var("cls_conv3_weight", init=X.gauss(std=0.01)) self.cls_conv3_bias = X.var("cls_conv3_bias", init=X.zero_init()) self.cls_conv_weight = X.var("cls_conv_weight", init=X.gauss(std=0.01)) self.cls_conv_bias = X.var("cls_conv_bias", init=X.zero_init()) self.cls_out_weight = X.var("cls_out_weight", init=X.gauss(std=0.01)) self.cls_out_bias = X.var("cls_out_bias", init=X.constant(pi)) # shared regression weight and bias self.reg_conv1_weight = X.var("reg_conv1_weight", init=X.gauss(std=0.01)) self.reg_conv1_bias = X.var("reg_conv1_bias", init=X.zero_init()) self.reg_conv2_weight = X.var("reg_conv2_weight", init=X.gauss(std=0.01)) self.reg_conv2_bias = X.var("reg_conv2_bias", init=X.zero_init()) self.reg_conv3_weight = X.var("reg_conv3_weight", init=X.gauss(std=0.01)) self.reg_conv3_bias = X.var("reg_conv3_bias", init=X.zero_init()) self.pts_init_conv_weight = X.var("pts_init_conv_weight", init=X.gauss(std=0.01)) self.pts_init_conv_bias = X.var("pts_init_conv_bias", init=X.zero_init()) self.pts_init_out_weight = X.var("pts_init_out_weight", init=X.gauss(std=0.01)) self.pts_init_out_bias = X.var("pts_init_out_bias", init=X.zero_init()) self.pts_refine_conv_weight = X.var("pts_refine_conv_weight", init=X.gauss(std=0.01)) self.pts_refine_conv_bias = X.var("pts_refine_conv_bias", init=X.zero_init()) self.pts_refine_out_weight = X.var("pts_refine_out_weight", init=X.gauss(std=0.01)) self.pts_refine_out_bias = X.var("pts_refine_out_bias", init=X.zero_init()) self._pts_out_inits = None self._pts_out_refines = None self._cls_outs = None
default=None) args = parser.parse_args() config = importlib.import_module( args.config.replace('.py', '').replace('/', '.')) return config, args if __name__ == "__main__": os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"] = "0" config, args = parse_args() pGen, pKv, pRpn, pRoi, pBbox, pDataset, pModel, pOpt, pTest, \ transform, data_name, label_name, metric_list = config.get_config(is_train=False) pGen = patch_config_as_nothrow(pGen) pKv = patch_config_as_nothrow(pKv) pRpn = patch_config_as_nothrow(pRpn) pRoi = patch_config_as_nothrow(pRoi) pBbox = patch_config_as_nothrow(pBbox) pDataset = patch_config_as_nothrow(pDataset) pModel = patch_config_as_nothrow(pModel) pOpt = patch_config_as_nothrow(pOpt) pTest = patch_config_as_nothrow(pTest) sym = pModel.test_symbol image_sets = pDataset.image_set roidbs_all = [ pkl.load(open("data/cache/{}.roidb".format(i), "rb"), encoding="latin1") for i in image_sets
def __init__(self, pRpn): self.p = patch_config_as_nothrow(pRpn) self._cls_logit = None self._bbox_delta = None self._proposal = None
def __init__(self, pRpn, pMask): super().__init__(pRpn) self.pMask = patch_config_as_nothrow(pMask)
def __init__(self, pRpn): self.p = patch_config_as_nothrow(pRpn) self.centerness_logit_dict = None self.cls_logit_dict = None self.offset_logit_dict = None