def __init__(self, configFn, ctx, outFolder, threshold): os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"] = "0" config = importlib.import_module( configFn.replace('.py', '').replace('/', '.')) _, _, _, _, _, _, self.__pModel, _, self.__pTest, self.transform, _, _, _ = config.get_config( is_train=False) self.__pModel = patch_config_as_nothrow(self.__pModel) self.__pTest = patch_config_as_nothrow(self.__pTest) self.resizeParam = (800, 1200) if callable(self.__pTest.nms.type): self.__nms = self.__pTest.nms.type(self.__pTest.nms.thr) else: from operator_py.nms import py_nms_wrapper self.__nms = py_nms_wrapper(self.__pTest.nms.thr) arg_params, aux_params = load_checkpoint(self.__pTest.model.prefix, self.__pTest.model.epoch) sym = self.__pModel.test_symbol from utils.graph_optimize import merge_bn sym, arg_params, aux_params = merge_bn(sym, arg_params, aux_params) self.__mod = DetModule( sym, data_names=['data', 'im_info', 'im_id', 'rec_id'], context=ctx) self.__mod.bind(data_shapes=[('data', (1, 3, self.resizeParam[0], self.resizeParam[1])), ('im_info', (1, 3)), ('im_id', (1, )), ('rec_id', (1, ))], for_training=False) self.__mod.set_params(arg_params, aux_params, allow_extra=False) self.__saveSymbol(sym, outFolder, self.__pTest.model.prefix.split('/')[-1]) self.__threshold = threshold self.outFolder = outFolder
def __init__(self, config, batch_size, gpu_id, thresh): self.config = config self.batch_size = batch_size self.thresh = thresh # Parse the parameter file of model pGen, pKv, pRpn, pRoi, pBbox, pDataset, pModel, pOpt, pTest, \ transform, data_name, label_name, metric_list = config.get_config(is_train=False) self.data_name = data_name self.label_name = label_name self.p_long, self.p_short = transform[1].p.long, transform[1].p.short # Define NMS type if callable(pTest.nms.type): self.do_nms = pTest.nms.type(pTest.nms.thr) else: from operator_py.nms import py_nms_wrapper self.do_nms = py_nms_wrapper(pTest.nms.thr) sym = pModel.test_symbol sym.save(pTest.model.prefix + "_test.json") ctx = mx.gpu(gpu_id) data_shape = [ ('data', (batch_size, 3, 800, 1200)), ("im_info", (1, 3)), ("im_id", (1, )), ("rec_id", (1, )), ] # Load network arg_params, aux_params = load_checkpoint(pTest.model.prefix, pTest.model.epoch) self.mod = DetModule(sym, data_names=data_name, context=ctx) self.mod.bind(data_shapes=data_shape, for_training=False) self.mod.set_params(arg_params, aux_params, allow_extra=False)
def create_teacher_module(pTeacherModel, worker_data_shape, input_batch_size, ctx, rank, logger): t_prefix = pTeacherModel.prefix t_epoch = pTeacherModel.epoch t_endpoint = pTeacherModel.endpoint t_data_name = pTeacherModel.data_name t_label_name = pTeacherModel.label_name if rank == 0: logger.info( 'Building teacher module with endpoint: {}'.format(t_endpoint)) t_sym = pTeacherModel.prefix + '-symbol.json' t_sym = mx.sym.load(t_sym) t_sym = mx.sym.Group([t_sym.get_internals()[out] for out in t_endpoint]) t_worker_data_shape = {key: worker_data_shape[key] for key in t_data_name} _, t_out_shape, _ = t_sym.infer_shape(**t_worker_data_shape) t_terminal_out_shape_dict = zip(t_sym.list_outputs(), t_out_shape) t_data_shape = [] for idx, data_name in enumerate(t_data_name): data_shape = t_worker_data_shape[data_name] data_shape = (input_batch_size, ) + data_shape[1:] t_data_shape.append((data_name, data_shape)) t_label_shape = [] for idx, label_name in enumerate(t_label_name): label_shape = t_out_shape[idx] label_shape = (input_batch_size, ) + label_shape[1:] t_label_shape.append((label_name, label_shape)) if rank == 0: logger.info('Teacher data_name: {}'.format(t_data_name)) logger.info('Teacher data_shape: {}'.format(t_data_shape)) logger.info('Teacher label_name: {}'.format(t_label_name)) logger.info('Teacher label_shape: {}'.format(t_label_shape)) if rank == 0: logger.info('Teacher terminal output shape') logger.info(pprint.pformat([i for i in t_terminal_out_shape_dict])) t_arg_params, t_aux_params = load_checkpoint(t_prefix, t_epoch) t_mod = DetModule(t_sym, data_names=t_data_name, label_names=None, logger=logger, context=ctx) t_mod.bind(data_shapes=t_data_shape, for_training=False, grad_req='null') t_mod.set_params(t_arg_params, t_aux_params) if rank == 0: logger.info('Finish teacher module build') return t_mod, t_label_name, t_label_shape
terminal_out_shape_dict = zip(sym.list_outputs(), out_shape) print('parameter shape') print( pprint.pformat([ i for i in out_shape_dict if not i[0].endswith('output') ])) print('intermediate output shape') print( pprint.pformat( [i for i in out_shape_dict if i[0].endswith('output')])) print('terminal output shape') print(pprint.pformat([i for i in terminal_out_shape_dict])) for i in pKv.gpus: ctx = mx.gpu(i) mod = DetModule(sym, data_names=data_names, context=ctx) mod.bind(data_shapes=loader.provide_data, for_training=False) mod.set_params(arg_params, aux_params, allow_extra=False) execs.append(mod) all_outputs = [] if index_split == 0: def eval_worker(exe, data_queue, result_queue): while True: batch = data_queue.get() exe.forward(batch, is_train=False) out = [x.asnumpy() for x in exe.get_outputs()] result_queue.put(out)
def train_net(config): pGen, pKv, pRpn, pRoi, pBbox, pDataset, pModel, pOpt, pTest, \ transform, data_name, label_name, metric_list = config.get_config(is_train=True) pGen = patch_config_as_nothrow(pGen) pKv = patch_config_as_nothrow(pKv) pRpn = patch_config_as_nothrow(pRpn) pRoi = patch_config_as_nothrow(pRoi) pBbox = patch_config_as_nothrow(pBbox) pDataset = patch_config_as_nothrow(pDataset) pModel = patch_config_as_nothrow(pModel) pOpt = patch_config_as_nothrow(pOpt) pTest = patch_config_as_nothrow(pTest) ctx = [mx.gpu(int(i)) for i in pKv.gpus] pretrain_prefix = pModel.pretrain.prefix pretrain_epoch = pModel.pretrain.epoch prefix = pGen.name save_path = os.path.join("experiments", prefix) begin_epoch = pOpt.schedule.begin_epoch end_epoch = pOpt.schedule.end_epoch lr_iter = pOpt.schedule.lr_iter # only rank==0 print all debug infos kvstore_type = "dist_sync" if os.environ.get( "DMLC_ROLE") == "worker" else pKv.kvstore kv = mx.kvstore.create(kvstore_type) rank = kv.rank # for distributed training using shared file system os.makedirs(save_path, exist_ok=True) from utils.logger import config_logger config_logger(os.path.join(save_path, "log.txt")) model_prefix = os.path.join(save_path, "checkpoint") # set up logger logger = logging.getLogger() sym = pModel.train_symbol # setup multi-gpu input_batch_size = pKv.batch_image * len(ctx) # print config # if rank == 0: # logger.info(pprint.pformat(config)) # load dataset and prepare imdb for training image_sets = pDataset.image_set roidbs = [ pkl.load(open("data/cache/{}.roidb".format(i), "rb"), encoding="latin1") for i in image_sets ] roidb = reduce(lambda x, y: x + y, roidbs) # filter empty image roidb = [rec for rec in roidb if rec["gt_bbox"].shape[0] > 0] # add flip roi record flipped_roidb = [] for rec in roidb: new_rec = rec.copy() new_rec["flipped"] = True flipped_roidb.append(new_rec) roidb = roidb + flipped_roidb from core.detection_input import AnchorLoader train_data = AnchorLoader(roidb=roidb, transform=transform, data_name=data_name, label_name=label_name, batch_size=input_batch_size, shuffle=True, kv=kv, num_worker=pGen.loader_worker or 12, num_collector=pGen.loader_collector or 1, worker_queue_depth=2, collector_queue_depth=2) # infer shape worker_data_shape = dict(train_data.provide_data + train_data.provide_label) for key in worker_data_shape: worker_data_shape[key] = ( pKv.batch_image, ) + worker_data_shape[key][1:] arg_shape, _, aux_shape = sym.infer_shape(**worker_data_shape) _, out_shape, _ = sym.get_internals().infer_shape(**worker_data_shape) out_shape_dict = list(zip(sym.get_internals().list_outputs(), out_shape)) _, out_shape, _ = sym.infer_shape(**worker_data_shape) terminal_out_shape_dict = zip(sym.list_outputs(), out_shape) if rank == 0: logger.info('parameter shape') logger.info( pprint.pformat( [i for i in out_shape_dict if not i[0].endswith('output')])) logger.info('intermediate output shape') logger.info( pprint.pformat( [i for i in out_shape_dict if i[0].endswith('output')])) logger.info('terminal output shape') logger.info(pprint.pformat([i for i in terminal_out_shape_dict])) # memonger if pModel.memonger: last_block = pModel.memonger_until or "" if rank == 0: logger.info("do memonger up to {}".format(last_block)) type_dict = {k: np.float32 for k in worker_data_shape} sym = search_plan_to_layer(sym, last_block, 1000, type_dict=type_dict, **worker_data_shape) # load and initialize params if pOpt.schedule.begin_epoch != 0: arg_params, aux_params = load_checkpoint(model_prefix, begin_epoch) elif pModel.from_scratch: arg_params, aux_params = dict(), dict() else: arg_params, aux_params = load_checkpoint(pretrain_prefix, pretrain_epoch) if pModel.process_weight is not None: pModel.process_weight(sym, arg_params, aux_params) ''' there are some conflicts between `mergebn` and `attach_quantized_node` in graph_optimize.py when mergebn ahead of attach_quantized_node such as `Symbol.ComposeKeyword` ''' if pModel.QuantizeTrainingParam is not None and pModel.QuantizeTrainingParam.quantize_flag: pQuant = pModel.QuantizeTrainingParam assert pGen.fp16 == False, "current quantize training only support fp32 mode." from utils.graph_optimize import attach_quantize_node _, out_shape, _ = sym.get_internals().infer_shape(**worker_data_shape) out_shape_dictoinary = dict( zip(sym.get_internals().list_outputs(), out_shape)) sym = attach_quantize_node(sym, out_shape_dictoinary, pQuant.WeightQuantizeParam, pQuant.ActQuantizeParam, pQuant.quantized_op) # merge batch normalization to save memory in fix bn training from utils.graph_optimize import merge_bn sym, arg_params, aux_params = merge_bn(sym, arg_params, aux_params) if pModel.random: import time mx.random.seed(int(time.time())) np.random.seed(int(time.time())) init = mx.init.Xavier(factor_type="in", rnd_type='gaussian', magnitude=2) init.set_verbosity(verbose=True) # create solver fixed_param = pModel.pretrain.fixed_param excluded_param = pModel.pretrain.excluded_param data_names = [k[0] for k in train_data.provide_data] label_names = [k[0] for k in train_data.provide_label] if pModel.teacher_param: from models.KD.utils import create_teacher_module from models.KD.detection_module import KDDetModule t_mod, t_label_name, t_label_shape = create_teacher_module( pModel.teacher_param, worker_data_shape, input_batch_size, ctx, rank, logger) mod = KDDetModule(sym, teacher_module=t_mod, teacher_label_names=t_label_name, teacher_label_shapes=t_label_shape, data_names=data_names, label_names=label_names, logger=logger, context=ctx, fixed_param=fixed_param, excluded_param=excluded_param) else: mod = DetModule(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, fixed_param=fixed_param, excluded_param=excluded_param) eval_metrics = mx.metric.CompositeEvalMetric(metric_list) # callback batch_end_callback = [ callback.Speedometer(train_data.batch_size, frequent=pGen.log_frequency) ] batch_end_callback += pModel.batch_end_callbacks or [] epoch_end_callback = callback.do_checkpoint(model_prefix) sym.save(model_prefix + ".json") # decide learning rate lr_mode = pOpt.optimizer.lr_mode or 'step' base_lr = pOpt.optimizer.lr * kv.num_workers lr_factor = pOpt.schedule.lr_factor or 0.1 iter_per_epoch = len(train_data) // input_batch_size total_iter = iter_per_epoch * (end_epoch - begin_epoch) lr_iter = [total_iter + it if it < 0 else it for it in lr_iter] lr_iter = [it // kv.num_workers for it in lr_iter] lr_iter = [it - iter_per_epoch * begin_epoch for it in lr_iter] lr_iter_discount = [it for it in lr_iter if it > 0] current_lr = base_lr * (lr_factor**(len(lr_iter) - len(lr_iter_discount))) if rank == 0: logging.info('total iter {}'.format(total_iter)) logging.info('lr {}, lr_iters {}'.format(current_lr, lr_iter_discount)) logging.info('lr mode: {}'.format(lr_mode)) if pOpt.warmup and pOpt.schedule.begin_epoch == 0: if rank == 0: logging.info('warmup lr {}, warmup step {}'.format( pOpt.warmup.lr, pOpt.warmup.iter)) if lr_mode == 'step': lr_scheduler = WarmupMultiFactorScheduler( step=lr_iter_discount, factor=lr_factor, warmup=True, warmup_type=pOpt.warmup.type, warmup_lr=pOpt.warmup.lr, warmup_step=pOpt.warmup.iter) elif lr_mode == 'cosine': warmup_lr_scheduler = AdvancedLRScheduler(mode='linear', base_lr=pOpt.warmup.lr, target_lr=base_lr, niters=pOpt.warmup.iter) cosine_lr_scheduler = AdvancedLRScheduler( mode='cosine', base_lr=base_lr, target_lr=0, niters=(iter_per_epoch * (end_epoch - begin_epoch)) - pOpt.warmup.iter) lr_scheduler = LRSequential( [warmup_lr_scheduler, cosine_lr_scheduler]) else: raise NotImplementedError else: if lr_mode == 'step': lr_scheduler = WarmupMultiFactorScheduler(step=lr_iter_discount, factor=lr_factor) elif lr_mode == 'cosine': lr_scheduler = AdvancedLRScheduler(mode='cosine', base_lr=base_lr, target_lr=0, niters=iter_per_epoch * (end_epoch - begin_epoch)) else: lr_scheduler = None # optimizer optimizer_params = dict(momentum=pOpt.optimizer.momentum, wd=pOpt.optimizer.wd, learning_rate=current_lr, lr_scheduler=lr_scheduler, rescale_grad=1.0 / (len(ctx) * kv.num_workers), clip_gradient=pOpt.optimizer.clip_gradient) if pKv.fp16: optimizer_params['multi_precision'] = True optimizer_params['rescale_grad'] /= 128.0 profile = pGen.profile or False if profile: mx.profiler.set_config(profile_all=True, filename=os.path.join(save_path, "profile.json")) # train mod.fit(train_data=train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=kv, optimizer=pOpt.optimizer.type, optimizer_params=optimizer_params, initializer=init, allow_missing=True, arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch, profile=profile) logging.info("Training has done") time.sleep(10) logging.info("Exiting")
if pModel.QuantizeTrainingParam is not None and pModel.QuantizeTrainingParam.quantize_flag: pQuant = pModel.QuantizeTrainingParam assert pGen.fp16 == False, "current quantize training only support fp32 mode." from utils.graph_optimize import attach_quantize_node worker_data_shape = dict([(name, tuple(shape)) for name, shape in data_shape]) # print(worker_data_shape) # raise NotImplementedError _, out_shape, _ = sym.get_internals().infer_shape(**worker_data_shape) out_shape_dictoinary = dict(zip(sym.get_internals().list_outputs(), out_shape)) sym = attach_quantize_node(sym, out_shape_dictoinary, pQuant.WeightQuantizeParam, pQuant.ActQuantizeParam, pQuant.quantized_op) sym.save(pTest.model.prefix + "_infer_speed.json") ctx = mx.gpu(gpu) mod = DetModule(sym, data_names=data_names, context=ctx) mod.bind(data_shapes=data_shape, for_training=False) mod.set_params({}, {}, True) # let AUTOTUNE run for once mod.forward(data_batch, is_train=False) for output in mod.get_outputs(): output.wait_to_read() tic = time.time() for _ in range(count): mod.forward(data_batch, is_train=False) for output in mod.get_outputs(): output.wait_to_read() toc = time.time()
if __name__ == "__main__": # os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"] = "0" args, config = parse_args() pGen, pKv, pRpn, pRoi, pBbox, pDataset, pModel, pOpt, pTest, \ transform, data_name, label_name, metric_list = config.get_config(is_train=False) nms = py_nms_wrapper(pTest.nms.thr) sym = pModel.test_symbol pshort = 800 plong = 2000 arg_params, aux_params = load_checkpoint(pTest.model.prefix, args.epoch) mod = DetModule(sym, data_names=["data", "im_info", "im_id", "rec_id"], context=mx.gpu(args.gpu_id)) provide_data = [("data", (1, 3, pshort, plong)), ("im_info", (1, 3)), ("im_id", (1, )), ("rec_id", (1, ))] mod.bind(data_shapes=provide_data, for_training=False) mod.set_params(arg_params, aux_params, allow_extra=False) image_list = [] if os.path.isfile(args.path): if ".txt" in args.path: list_file = open(args.path, 'r') list_lines = list_file.readlines() list_file.close() (fpath, fname) = os.path.split(args.path) for aline in list_lines: uints = aline.split(' ')
class predictor(object): def __init__(self, config, batch_size, gpu_id, thresh): self.config = config self.batch_size = batch_size self.thresh = thresh # Parse the parameter file of model pGen, pKv, pRpn, pRoi, pBbox, pDataset, pModel, pOpt, pTest, \ transform, data_name, label_name, metric_list = config.get_config(is_train=False) self.data_name = data_name self.label_name = label_name self.p_long, self.p_short = transform[1].p.long, transform[1].p.short # Define NMS type if callable(pTest.nms.type): self.do_nms = pTest.nms.type(pTest.nms.thr) else: from operator_py.nms import py_nms_wrapper self.do_nms = py_nms_wrapper(pTest.nms.thr) sym = pModel.test_symbol sym.save(pTest.model.prefix + "_test.json") ctx = mx.gpu(gpu_id) data_shape = [ ('data', (batch_size, 3, 800, 1200)), ("im_info", (1, 3)), ("im_id", (1, )), ("rec_id", (1, )), ] # Load network arg_params, aux_params = load_checkpoint(pTest.model.prefix, pTest.model.epoch) self.mod = DetModule(sym, data_names=data_name, context=ctx) self.mod.bind(data_shapes=data_shape, for_training=False) self.mod.set_params(arg_params, aux_params, allow_extra=False) def preprocess_image(self, input_img): image = input_img[:, :, ::-1] # BGR -> RGB short = min(image.shape[:2]) long = max(image.shape[:2]) scale = min(self.p_short / short, self.p_long / long) h, w = image.shape[:2] im_info = (round(h * scale), round(w * scale), scale) image = cv2.resize(image, None, None, scale, scale, interpolation=cv2.INTER_LINEAR) image = image.transpose((2, 0, 1)) # HWC -> CHW return image, im_info def run_image(self, img_path): image = cv2.imread(img_path, cv2.IMREAD_COLOR) image, im_info = self.preprocess_image(image) input_data = { 'data': [image], 'im_info': [im_info], 'im_id': [0], 'rec_id': [0], } data = [mx.nd.array(input_data[name]) for name in self.data_name] label = [] provide_data = [(k, v.shape) for k, v in zip(self.data_name, data)] provide_label = [(k, v.shape) for k, v in zip(self.label_name, label)] data_batch = mx.io.DataBatch(data=data, label=label, provide_data=provide_data, provide_label=provide_label) self.mod.forward(data_batch, is_train=False) out = [x.asnumpy() for x in self.mod.get_outputs()] cls_score = out[3] bboxes = out[4] result = {} for cid in range(cls_score.shape[1]): if cid == 0: # Ignore the background continue score = cls_score[:, cid] if bboxes.shape[1] != 4: cls_box = bboxes[:, cid * 4:(cid + 1) * 4] else: cls_box = bboxes valid_inds = np.where(score >= self.thresh)[0] box = cls_box[valid_inds] score = score[valid_inds] det = np.concatenate((box, score.reshape(-1, 1)), axis=1).astype(np.float32) det = self.do_nms(det) if len(det) > 0: det[:, :4] = det[:, :4] / im_info[ 2] # Restore to the original size result[CATEGORIES[cid]] = det return result
class TDNDetector: def __init__(self, configFn, ctx, outFolder, threshold): os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"] = "0" config = importlib.import_module(configFn.replace('.py', '').replace('/', '.')) _,_,_,_,_,_, self.__pModel,_, self.__pTest, self.transform,_,_,_ = config.get_config(is_train=False) self.__pModel = patch_config_as_nothrow(self.__pModel) self.__pTest = patch_config_as_nothrow(self.__pTest) self.resizeParam = (800, 1200) if callable(self.__pTest.nms.type): self.__nms = self.__pTest.nms.type(self.__pTest.nms.thr) else: from operator_py.nms import py_nms_wrapper self.__nms = py_nms_wrapper(self.__pTest.nms.thr) arg_params, aux_params = load_checkpoint(self.__pTest.model.prefix, self.__pTest.model.epoch) sym = self.__pModel.test_symbol from utils.graph_optimize import merge_bn sym, arg_params, aux_params = merge_bn(sym, arg_params, aux_params) self.__mod = DetModule(sym, data_names=['data','im_info','im_id','rec_id'], context=ctx) self.__mod.bind(data_shapes=[('data', (1, 3, self.resizeParam[0], self.resizeParam[1])), ('im_info', (1, 3)), ('im_id', (1,)), ('rec_id', (1,))], for_training=False) self.__mod.set_params(arg_params, aux_params, allow_extra=False) self.__saveSymbol(sym, outFolder, self.__pTest.model.prefix.split('/')[-1]) self.__threshold = threshold def __call__(self, imgFilename): # detect onto image roi_record, scale = self.__readImg(imgFilename) h, w = roi_record['data'][0].shape im_c1 = roi_record['data'][0].reshape(1,1,h,w) im_c2 = roi_record['data'][1].reshape(1,1,h,w) im_c3 = roi_record['data'][2].reshape(1,1,h,w) im_data = np.concatenate((im_c1, im_c2, im_c3), axis=1) im_info, im_id, rec_id = [(h, w, scale)], [1], [1] data = mx.io.DataBatch(data=[mx.nd.array(im_data), mx.nd.array(im_info), mx.nd.array(im_id), mx.nd.array(rec_id)]) self.__mod.forward(data, is_train=False) # extract results outputs = self.__mod.get_outputs(merge_multi_context=False) rid, id, info, cls, box = [x[0].asnumpy() for x in outputs] rid, id, info, cls, box = rid.squeeze(), id.squeeze(), info.squeeze(), cls.squeeze(), box.squeeze() cls = cls[:, 1:] # remove background box = box / scale output_record = dict(rec_id=rid, im_id=id, im_info=info, bbox_xyxy=box, cls_score=cls) output_record = self.__pTest.process_output([output_record], None)[0] final_result = self.__do_nms(output_record) # obtain representable output detections = [] for cid ,bbox in final_result.items(): idx = np.where(bbox[:,-1] > self.__threshold)[0] for i in idx: final_box = bbox[i][:4] score = bbox[i][-1] detections.append({'cls':cid, 'box':final_box, 'score':score}) return detections,None def __do_nms(self, all_output): box = all_output['bbox_xyxy'] score = all_output['cls_score'] final_dets = {} for cid in range(score.shape[1]): score_cls = score[:, cid] valid_inds = np.where(score_cls > self.__threshold)[0] box_cls = box[valid_inds] score_cls = score_cls[valid_inds] if valid_inds.shape[0]==0: continue det = np.concatenate((box_cls, score_cls.reshape(-1, 1)), axis=1).astype(np.float32) det = self.__nms(det) cls = coco[cid] final_dets[cls] = det return final_dets def __readImg(self, imgFilename): img = cv2.imread(imgFilename, cv2.IMREAD_COLOR) height, width, channels = img.shape roi_record = {'gt_bbox': np.array([[0., 0., 0., 0.]]),'gt_class': np.array([0])} roi_record['image_url'] = imgFilename roi_record['h'] = height roi_record['w'] = width for trans in self.transform: trans.apply(roi_record) img_shape = [roi_record['h'], roi_record['w']] shorts, longs = min(img_shape), max(img_shape) scale = min(self.resizeParam[0] / shorts, self.resizeParam[1] / longs) return roi_record, scale def __saveSymbol(self, sym, outFolder, fnPrefix): if not os.path.exists(outFolder): os.makedirs(outFolder) resFilename = os.path.join(outFolder, fnPrefix + "_symbol_test.json") sym.save(resFilename)
def train_net(config): pGen, pKv, pRpn, pRoi, pBbox, pDataset, pModel, pOpt, pTest, \ transform, data_name, label_name, metric_list = config.get_config(is_train=True) ctx = [mx.gpu(int(i)) for i in pKv.gpus] pretrain_prefix = pModel.pretrain.prefix pretrain_epoch = pModel.pretrain.epoch prefix = pGen.name save_path = os.path.join("experiments", prefix) begin_epoch = pOpt.schedule.begin_epoch end_epoch = pOpt.schedule.end_epoch lr_iter = pOpt.schedule.lr_iter # only rank==0 print all debug infos kvstore_type = "dist_sync" if os.environ.get( "DMLC_ROLE") == "worker" else pKv.kvstore kv = mx.kvstore.create(kvstore_type) rank = kv.rank # for distributed training using shared file system if rank == 0: if not os.path.exists(save_path): os.makedirs(save_path) from utils.logger import config_logger config_logger(os.path.join(save_path, "log.txt")) model_prefix = os.path.join(save_path, "checkpoint") # set up logger logger = logging.getLogger() sym = pModel.train_symbol # setup multi-gpu input_batch_size = pKv.batch_image * len(ctx) # print config # if rank == 0: # logger.info(pprint.pformat(config)) # load dataset and prepare imdb for training image_sets = pDataset.image_set roidbs = [ pkl.load(open("data/cache/{}.roidb".format(i), "rb"), encoding="latin1") for i in image_sets ] roidb = reduce(lambda x, y: x + y, roidbs) # filter empty image roidb = [rec for rec in roidb if rec["gt_bbox"].shape[0] > 0] # add flip roi record flipped_roidb = [] for rec in roidb: new_rec = rec.copy() new_rec["flipped"] = True flipped_roidb.append(new_rec) roidb = roidb + flipped_roidb from core.detection_input import AnchorLoader train_data = AnchorLoader(roidb=roidb, transform=transform, data_name=data_name, label_name=label_name, batch_size=input_batch_size, shuffle=True, kv=kv) # infer shape worker_data_shape = dict(train_data.provide_data + train_data.provide_label) for key in worker_data_shape: worker_data_shape[key] = ( pKv.batch_image, ) + worker_data_shape[key][1:] arg_shape, _, aux_shape = sym.infer_shape(**worker_data_shape) _, out_shape, _ = sym.get_internals().infer_shape(**worker_data_shape) out_shape_dict = list(zip(sym.get_internals().list_outputs(), out_shape)) _, out_shape, _ = sym.infer_shape(**worker_data_shape) terminal_out_shape_dict = zip(sym.list_outputs(), out_shape) if rank == 0: logger.info('parameter shape') logger.info( pprint.pformat( [i for i in out_shape_dict if not i[0].endswith('output')])) logger.info('intermediate output shape') logger.info( pprint.pformat( [i for i in out_shape_dict if i[0].endswith('output')])) logger.info('terminal output shape') logger.info(pprint.pformat([i for i in terminal_out_shape_dict])) # memonger if pModel.memonger: last_block = pModel.memonger_until or "" if rank == 0: logger.info("do memonger up to {}".format(last_block)) type_dict = {k: np.float32 for k in worker_data_shape} sym = search_plan_to_layer(sym, last_block, 1000, type_dict=type_dict, **worker_data_shape) # load and initialize params if pOpt.schedule.begin_epoch != 0: arg_params, aux_params = load_checkpoint(model_prefix, begin_epoch) elif pModel.from_scratch: arg_params, aux_params = dict(), dict() else: arg_params, aux_params = load_checkpoint(pretrain_prefix, pretrain_epoch) try: pModel.process_weight(sym, arg_params, aux_params) except AttributeError: pass if pModel.random: import time mx.random.seed(int(time.time())) np.random.seed(int(time.time())) init = mx.init.Xavier(factor_type="in", rnd_type='gaussian', magnitude=2) init.set_verbosity(verbose=True) # create solver fixed_param_prefix = pModel.pretrain.fixed_param data_names = [k[0] for k in train_data.provide_data] label_names = [k[0] for k in train_data.provide_label] mod = DetModule(sym, data_names=data_names, label_names=label_names, logger=logger, context=ctx, fixed_param_prefix=fixed_param_prefix) eval_metrics = mx.metric.CompositeEvalMetric(metric_list) # callback batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=pGen.log_frequency) epoch_end_callback = callback.do_checkpoint(model_prefix) sym.save(model_prefix + ".json") # decide learning rate base_lr = pOpt.optimizer.lr * kv.num_workers lr_factor = 0.1 iter_per_epoch = len(train_data) // input_batch_size lr_iter = [it // kv.num_workers for it in lr_iter] lr_iter = [it - iter_per_epoch * begin_epoch for it in lr_iter] lr_iter_discount = [it for it in lr_iter if it > 0] current_lr = base_lr * (lr_factor**(len(lr_iter) - len(lr_iter_discount))) if rank == 0: logging.info('total iter {}'.format(iter_per_epoch * (end_epoch - begin_epoch))) logging.info('lr {}, lr_iters {}'.format(current_lr, lr_iter_discount)) if pOpt.warmup is not None and pOpt.schedule.begin_epoch == 0: if rank == 0: logging.info('warmup lr {}, warmup step {}'.format( pOpt.warmup.lr, pOpt.warmup.iter)) lr_scheduler = WarmupMultiFactorScheduler(step=lr_iter_discount, factor=lr_factor, warmup=True, warmup_type=pOpt.warmup.type, warmup_lr=pOpt.warmup.lr, warmup_step=pOpt.warmup.iter) else: if len(lr_iter_discount) > 0: lr_scheduler = mx.lr_scheduler.MultiFactorScheduler( lr_iter_discount, lr_factor) else: lr_scheduler = None # optimizer optimizer_params = dict(momentum=pOpt.optimizer.momentum, wd=pOpt.optimizer.wd, learning_rate=current_lr, lr_scheduler=lr_scheduler, rescale_grad=1.0 / (len(pKv.gpus) * kv.num_workers), clip_gradient=pOpt.optimizer.clip_gradient) if pKv.fp16: optimizer_params['multi_precision'] = True optimizer_params['rescale_grad'] /= 128.0 # train mod.fit(train_data=train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback, kvstore=kv, optimizer=pOpt.optimizer.type, optimizer_params=optimizer_params, initializer=init, allow_missing=True, arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch) logging.info("Training has done")
class TDNDetector: def __init__(self, configFn, ctx, outFolder, threshold): os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"] = "0" config = importlib.import_module( configFn.replace('.py', '').replace('/', '.')) _, _, _, _, _, _, self.__pModel, _, self.__pTest, self.transform, _, _, _ = config.get_config( is_train=False) self.__pModel = patch_config_as_nothrow(self.__pModel) self.__pTest = patch_config_as_nothrow(self.__pTest) self.resizeParam = (800, 1200) if callable(self.__pTest.nms.type): self.__nms = self.__pTest.nms.type(self.__pTest.nms.thr) else: from operator_py.nms import py_nms_wrapper self.__nms = py_nms_wrapper(self.__pTest.nms.thr) arg_params, aux_params = load_checkpoint(self.__pTest.model.prefix, self.__pTest.model.epoch) sym = self.__pModel.test_symbol from utils.graph_optimize import merge_bn sym, arg_params, aux_params = merge_bn(sym, arg_params, aux_params) self.__mod = DetModule( sym, data_names=['data', 'im_info', 'im_id', 'rec_id'], context=ctx) self.__mod.bind(data_shapes=[('data', (1, 3, self.resizeParam[0], self.resizeParam[1])), ('im_info', (1, 3)), ('im_id', (1, )), ('rec_id', (1, ))], for_training=False) self.__mod.set_params(arg_params, aux_params, allow_extra=False) self.__saveSymbol(sym, outFolder, self.__pTest.model.prefix.split('/')[-1]) self.__threshold = threshold self.outFolder = outFolder def __call__(self, imgFilename): # detect onto image roi_record, scale, img = self.__readImg(imgFilename) h, w = roi_record['data'][0].shape im_c1 = roi_record['data'][0].reshape(1, 1, h, w) im_c2 = roi_record['data'][1].reshape(1, 1, h, w) im_c3 = roi_record['data'][2].reshape(1, 1, h, w) im_data = np.concatenate((im_c1, im_c2, im_c3), axis=1) im_info, im_id, rec_id = [(h, w, scale)], [1], [1] data = mx.io.DataBatch(data=[ mx.nd.array(im_data), mx.nd.array(im_info), mx.nd.array(im_id), mx.nd.array(rec_id) ]) self.__mod.forward(data, is_train=False) # extract results outputs = self.__mod.get_outputs(merge_multi_context=False) rid, id, info, cls, box = [x[0].asnumpy() for x in outputs] rid, id, info, cls, box = rid.squeeze(), id.squeeze(), info.squeeze( ), cls.squeeze(), box.squeeze() cls = cls[:, 1:] # remove background box = box / scale output_record = dict(rec_id=rid, im_id=id, im_info=info, bbox_xyxy=box, cls_score=cls) output_record = self.__pTest.process_output([output_record], None)[0] final_result = self.__do_nms(output_record) # obtain representable output detections = [] for cid, bbox in final_result.items(): idx = np.where(bbox[:, -1] > self.__threshold)[0] for i in idx: final_box = bbox[i][:4] score = bbox[i][-1] detections.append({ 'cls': cid, 'box': final_box, 'score': score }) img_vis = self.__vis_detections(detections, img) cv2.imwrite(os.path.join(self.outFolder, imgFilename), img_vis) #print(os.path.join(self.outFolder,imgFilename)) return detections, None def __vis_detections(self, dets, img): font = cv2.FONT_HERSHEY_SIMPLEX for d in dets: box = d['box'] clsID = d['cls'] score = d['score'] img = cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (255, 0, 0), 4) img = cv2.putText(img, str(clsID) + ': ' + str(round(score, 2)), (box[0], box[1]), font, 1, (255, 0, 0), 2, cv2.LINE_AA) return img def __do_nms(self, all_output): box = all_output['bbox_xyxy'] score = all_output['cls_score'] final_dets = {} for cid in range(score.shape[1]): score_cls = score[:, cid] valid_inds = np.where(score_cls > self.__threshold)[0] box_cls = box[valid_inds] score_cls = score_cls[valid_inds] if valid_inds.shape[0] == 0: continue det = np.concatenate((box_cls, score_cls.reshape(-1, 1)), axis=1).astype(np.float32) det = self.__nms(det) #cls = coco[cid] final_dets[cid] = det return final_dets def __readImg(self, imgFilename): img = cv2.imread(imgFilename, cv2.IMREAD_COLOR) height, width, channels = img.shape roi_record = { 'gt_bbox': np.array([[0., 0., 0., 0.]]), 'gt_class': np.array([0]) } roi_record['image_url'] = imgFilename roi_record['h'] = height roi_record['w'] = width for trans in self.transform: trans.apply(roi_record) img_shape = [roi_record['h'], roi_record['w']] shorts, longs = min(img_shape), max(img_shape) scale = min(self.resizeParam[0] / shorts, self.resizeParam[1] / longs) return roi_record, scale, img def __saveSymbol(self, sym, outFolder, fnPrefix): if not os.path.exists(outFolder): os.makedirs(outFolder) resFilename = os.path.join(outFolder, fnPrefix + "_symbol_test.json") sym.save(resFilename) #import mxnet as mx #import argparse #from infer import TDNDetector #def parse_args(): # parser = argparse.ArgumentParser(description='Test Detection') # parser.add_argument('--config', type=str, default='config/faster_r101v2c4_c5_256roi_1x.py', help='config file path') # parser.add_argument('--ctx', type=int, default=0, help='GPU index. Set negative value to use CPU') # #parser.add_argument('--inputs', type=str, nargs='+', required=True, default='', help='File(-s) to test') # parser.add_argument('--output', type=str, default='results', help='Where to store results') # parser.add_argument('--threshold', type=float, default=0.5, help='Detector threshold') # return parser.parse_args() #if __name__ == "__main__": # args = parse_args() # ctx = mx.gpu(args.ctx) if args.ctx>=0 else args.cpu() # #imgFilenames = args.inputs # imgFilenames = ['car.jpg', 'COCO_val2014_000000581929.jpg'] # detector = TDNDetector(args.config, ctx, args.output, args.threshold) # for i, imgFilename in enumerate(imgFilenames): # print(imgFilename) # dets,_= detector(imgFilename) # print(dets)