def build_quant_network(network): quantizer = QuantizationAwareTraining(bn_fold=True, per_channel=[True, False], symmetric=[True, False], one_conv_fold=False) network = quantizer.quantize(network) return network
def export_lenet(optim_option="QAT"): context.set_context(mode=context.GRAPH_MODE, device_target=device_target) cfg = quant_cfg # define fusion network network = LeNet5Fusion(cfg.num_classes) # convert fusion network to quantization aware network if optim_option == "LEARNED_SCALE": quant_optim_otions = OptimizeOption.LEARNED_SCALE quantizer = QuantizationAwareTraining( bn_fold=False, per_channel=[True, False], symmetric=[True, True], narrow_range=[True, True], freeze_bn=0, quant_delay=0, one_conv_fold=True, optimize_option=quant_optim_otions) else: quantizer = QuantizationAwareTraining(quant_delay=0, bn_fold=False, freeze_bn=10000, per_channel=[True, False], symmetric=[True, False]) network = quantizer.quantize(network) # export network inputs = Tensor(np.ones([1, 1, cfg.image_height, cfg.image_width]), mstype.float32) export(network, inputs, file_name="lenet_quant", file_format='MINDIR', quant_mode='AUTO')
def test_mobilenetv2_quant(): set_seed(1) context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") config = config_ascend_quant print("training configure: {}".format(config)) epoch_size = config.epoch_size # define network network = mobilenetV2(num_classes=config.num_classes) # define loss if config.label_smooth > 0: loss = CrossEntropyWithLabelSmooth( smooth_factor=config.label_smooth, num_classes=config.num_classes) else: loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') # define dataset dataset = create_dataset(dataset_path=dataset_path, config=config, repeat_num=1, batch_size=config.batch_size) step_size = dataset.get_dataset_size() # convert fusion network to quantization aware network quantizer = QuantizationAwareTraining(bn_fold=True, per_channel=[True, False], symmetric=[True, False]) network = quantizer.quantize(network) # get learning rate lr = Tensor(get_lr(global_step=config.start_epoch * step_size, lr_init=0, lr_end=0, lr_max=config.lr, warmup_epochs=config.warmup_epochs, total_epochs=epoch_size + config.start_epoch, steps_per_epoch=step_size)) # define optimization opt = nn.Momentum(filter(lambda x: x.requires_grad, network.get_parameters()), lr, config.momentum, config.weight_decay) # define model model = Model(network, loss_fn=loss, optimizer=opt) print("============== Starting Training ==============") monitor = Monitor(lr_init=lr.asnumpy(), step_threshold=config.step_threshold) callback = [monitor] model.train(epoch_size, dataset, callbacks=callback, dataset_sink_mode=False) print("============== End Training ==============") export_time_used = 650 train_time = monitor.step_mseconds print('train_time_used:{}'.format(train_time)) assert train_time < export_time_used expect_avg_step_loss = 2.32 avg_step_loss = np.mean(np.array(monitor.losses)) print("average step loss:{}".format(avg_step_loss)) assert avg_step_loss < expect_avg_step_loss
def train_lenet_quant(optim_option="QAT"): context.set_context(mode=context.GRAPH_MODE, device_target=device_target) cfg = quant_cfg ckpt_path = './ckpt_lenet_noquant-10_1875.ckpt' ds_train = create_dataset(os.path.join(data_path, "train"), cfg.batch_size, 1) step_size = ds_train.get_dataset_size() # define fusion network network = LeNet5Fusion(cfg.num_classes) # load quantization aware network checkpoint param_dict = load_checkpoint(ckpt_path) load_nonquant_param_into_quant_net(network, param_dict) # convert fusion network to quantization aware network if optim_option == "LEARNED_SCALE": quant_optim_otions = OptimizeOption.LEARNED_SCALE quantizer = QuantizationAwareTraining( bn_fold=False, per_channel=[True, False], symmetric=[True, True], narrow_range=[True, True], freeze_bn=0, quant_delay=0, one_conv_fold=True, optimize_option=quant_optim_otions) else: quantizer = QuantizationAwareTraining(quant_delay=900, bn_fold=False, per_channel=[True, False], symmetric=[True, False]) network = quantizer.quantize(network) # define network loss net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") # define network optimization net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum) # call back and monitor config_ckpt = CheckpointConfig(save_checkpoint_steps=cfg.epoch_size * step_size, keep_checkpoint_max=cfg.keep_checkpoint_max) ckpt_callback = ModelCheckpoint(prefix="ckpt_lenet_quant" + optim_option, config=config_ckpt) # define model model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()}) print("============== Starting Training ==============") model.train(cfg['epoch_size'], ds_train, callbacks=[ckpt_callback, LossMonitor()], dataset_sink_mode=True) print("============== End Training ==============")
def test(): """The function of eval.""" start_time = time.time() args = parse_args() # logger args.outputs_dir = os.path.join( args.log_path, datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S')) rank_id = int(os.environ.get('RANK_ID')) args.logger = get_logger(args.outputs_dir, rank_id) context.reset_auto_parallel_context() parallel_mode = ParallelMode.STAND_ALONE context.set_auto_parallel_context(parallel_mode=parallel_mode, gradients_mean=True, device_num=1) args.logger.info('Creating Network....') network = YOLOV3DarkNet53(is_training=False) config = ConfigYOLOV3DarkNet53() if args.testing_shape: config.test_img_shape = conver_testing_shape(args) # convert fusion network to quantization aware network if config.quantization_aware: quantizer = QuantizationAwareTraining(bn_fold=True, per_channel=[True, False], symmetric=[True, False]) network = quantizer.quantize(network) args.logger.info(args.pretrained) if os.path.isfile(args.pretrained): param_dict = load_checkpoint(args.pretrained) param_dict_new = {} for key, values in param_dict.items(): if key.startswith('moments.'): continue elif key.startswith('yolo_network.'): param_dict_new[key[13:]] = values else: param_dict_new[key] = values load_param_into_net(network, param_dict_new) args.logger.info('load_model {} success'.format(args.pretrained)) else: args.logger.info('{} not exists or not a pre-trained file'.format( args.pretrained)) assert FileNotFoundError( '{} not exists or not a pre-trained file'.format(args.pretrained)) exit(1) data_root = args.data_root ann_file = args.annFile ds, data_size = create_yolo_dataset(data_root, ann_file, is_training=False, batch_size=args.per_batch_size, max_epoch=1, device_num=1, rank=rank_id, shuffle=False, config=config) args.logger.info('testing shape : {}'.format(config.test_img_shape)) args.logger.info('totol {} images to eval'.format(data_size)) network.set_train(False) # init detection engine detection = DetectionEngine(args) input_shape = Tensor(tuple(config.test_img_shape), ms.float32) args.logger.info('Start inference....') for i, data in enumerate(ds.create_dict_iterator(num_epochs=1)): image = data["image"] image_shape = data["image_shape"] image_id = data["img_id"] prediction = network(image, input_shape) output_big, output_me, output_small = prediction output_big = output_big.asnumpy() output_me = output_me.asnumpy() output_small = output_small.asnumpy() image_id = image_id.asnumpy() image_shape = image_shape.asnumpy() detection.detect([output_small, output_me, output_big], args.per_batch_size, image_shape, image_id) if i % 1000 == 0: args.logger.info('Processing... {:.2f}% '.format( i * args.per_batch_size / data_size * 100)) args.logger.info('Calculating mAP...') detection.do_nms_for_results() result_file_path = detection.write_result() args.logger.info('result file path: {}'.format(result_file_path)) eval_result = detection.get_eval_result() cost_time = time.time() - start_time args.logger.info('\n=============coco eval reulst=========\n' + eval_result) args.logger.info('testing cost time {:.2f}h'.format(cost_time / 3600.))
def train(): """Train function.""" args = parse_args() args.logger.save_args(args) if args.need_profiler: from mindspore.profiler.profiling import Profiler profiler = Profiler(output_path=args.outputs_dir, is_detail=True, is_show_op_path=True) loss_meter = AverageMeter('loss') context.reset_auto_parallel_context() parallel_mode = ParallelMode.STAND_ALONE degree = 1 if args.is_distributed: parallel_mode = ParallelMode.DATA_PARALLEL degree = get_group_size() context.set_auto_parallel_context(parallel_mode=parallel_mode, gradients_mean=True, device_num=degree) network = YOLOV3DarkNet53(is_training=True) # default is kaiming-normal default_recurisive_init(network) load_yolov3_quant_params(args, network) config = ConfigYOLOV3DarkNet53() # convert fusion network to quantization aware network if config.quantization_aware: quantizer = QuantizationAwareTraining(bn_fold=True, per_channel=[True, False], symmetric=[True, False]) network = quantizer.quantize(network) network = YoloWithLossCell(network) args.logger.info('finish get network') config.label_smooth = args.label_smooth config.label_smooth_factor = args.label_smooth_factor if args.training_shape: config.multi_scale = [conver_training_shape(args)] if args.resize_rate: config.resize_rate = args.resize_rate ds, data_size = create_yolo_dataset(image_dir=args.data_root, anno_path=args.annFile, is_training=True, batch_size=args.per_batch_size, max_epoch=args.max_epoch, device_num=args.group_size, rank=args.rank, config=config) args.logger.info('Finish loading dataset') args.steps_per_epoch = int(data_size / args.per_batch_size / args.group_size) if not args.ckpt_interval: args.ckpt_interval = args.steps_per_epoch lr = get_lr(args) opt = Momentum(params=get_param_groups(network), learning_rate=Tensor(lr), momentum=args.momentum, weight_decay=args.weight_decay, loss_scale=args.loss_scale) network = TrainingWrapper(network, opt) network.set_train() if args.rank_save_ckpt_flag: # checkpoint save ckpt_max_num = args.max_epoch * args.steps_per_epoch // args.ckpt_interval ckpt_config = CheckpointConfig( save_checkpoint_steps=args.ckpt_interval, keep_checkpoint_max=ckpt_max_num) save_ckpt_path = os.path.join(args.outputs_dir, 'ckpt_' + str(args.rank) + '/') ckpt_cb = ModelCheckpoint(config=ckpt_config, directory=save_ckpt_path, prefix='{}'.format(args.rank)) cb_params = _InternalCallbackParam() cb_params.train_network = network cb_params.epoch_num = ckpt_max_num cb_params.cur_epoch_num = 1 run_context = RunContext(cb_params) ckpt_cb.begin(run_context) old_progress = -1 t_end = time.time() data_loader = ds.create_dict_iterator(output_numpy=True, num_epochs=1) shape_record = ShapeRecord() for i, data in enumerate(data_loader): images = data["image"] input_shape = images.shape[2:4] args.logger.info('iter[{}], shape{}'.format(i, input_shape[0])) shape_record.set(input_shape) images = Tensor.from_numpy(images) annos = data["annotation"] if args.group_size == 1: batch_y_true_0, batch_y_true_1, batch_y_true_2, batch_gt_box0, batch_gt_box1, batch_gt_box2 = \ batch_preprocess_true_box(annos, config, input_shape) else: batch_y_true_0, batch_y_true_1, batch_y_true_2, batch_gt_box0, batch_gt_box1, batch_gt_box2 = \ batch_preprocess_true_box_single(annos, config, input_shape) batch_y_true_0 = Tensor.from_numpy(batch_y_true_0) batch_y_true_1 = Tensor.from_numpy(batch_y_true_1) batch_y_true_2 = Tensor.from_numpy(batch_y_true_2) batch_gt_box0 = Tensor.from_numpy(batch_gt_box0) batch_gt_box1 = Tensor.from_numpy(batch_gt_box1) batch_gt_box2 = Tensor.from_numpy(batch_gt_box2) input_shape = Tensor(tuple(input_shape[::-1]), ms.float32) loss = network(images, batch_y_true_0, batch_y_true_1, batch_y_true_2, batch_gt_box0, batch_gt_box1, batch_gt_box2, input_shape) loss_meter.update(loss.asnumpy()) if args.rank_save_ckpt_flag: # ckpt progress cb_params.cur_step_num = i + 1 # current step number cb_params.batch_num = i + 2 ckpt_cb.step_end(run_context) if i % args.log_interval == 0: time_used = time.time() - t_end epoch = int(i / args.steps_per_epoch) fps = args.per_batch_size * ( i - old_progress) * args.group_size / time_used if args.rank == 0: args.logger.info( 'epoch[{}], iter[{}], {}, {:.2f} imgs/sec, lr:{}'.format( epoch, i, loss_meter, fps, lr[i])) t_end = time.time() loss_meter.reset() old_progress = i if (i + 1) % args.steps_per_epoch == 0 and args.rank_save_ckpt_flag: cb_params.cur_epoch_num += 1 if args.need_profiler: if i == 10: profiler.analyse() break args.logger.info('==========end training===============')
def test_resnet50_quant(): set_seed(1) context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") config = config_quant print("training configure: {}".format(config)) epoch_size = config.epoch_size # define network net = resnet50_quant(class_num=config.class_num) net.set_train(True) # define loss if not config.use_label_smooth: config.label_smooth_factor = 0.0 loss = CrossEntropy(smooth_factor=config.label_smooth_factor, num_classes=config.class_num) #loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False) # define dataset dataset = create_dataset(dataset_path=dataset_path, config=config, repeat_num=1, batch_size=config.batch_size) step_size = dataset.get_dataset_size() # convert fusion network to quantization aware network quantizer = QuantizationAwareTraining(bn_fold=True, per_channel=[True, False], symmetric=[True, False]) net = quantizer.quantize(net) # get learning rate lr = Tensor( get_lr(lr_init=config.lr_init, lr_end=0.0, lr_max=config.lr_max, warmup_epochs=config.warmup_epochs, total_epochs=config.epoch_size, steps_per_epoch=step_size, lr_decay_mode='cosine')) # define optimization opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum, config.weight_decay, config.loss_scale) # define model #model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'}) model = Model(net, loss_fn=loss, optimizer=opt) print("============== Starting Training ==============") monitor = Monitor(lr_init=lr.asnumpy(), step_threshold=config.step_threshold) callbacks = [monitor] model.train(epoch_size, dataset, callbacks=callbacks, dataset_sink_mode=False) print("============== End Training ==============") expect_avg_step_loss = 2.40 avg_step_loss = np.mean(np.array(monitor.losses)) print("average step loss:{}".format(avg_step_loss)) assert avg_step_loss < expect_avg_step_loss
loss = CrossEntropy(smooth_factor=config.label_smooth_factor, num_classes=config.class_num) loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False) # define dataset dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=True, repeat_num=1, batch_size=config.batch_size, target=args_opt.device_target) step_size = dataset.get_dataset_size() # convert fusion network to quantization aware network quantizer = QuantizationAwareTraining(bn_fold=True, per_channel=[True, False], symmetric=[True, False], one_conv_fold=False) net = quantizer.quantize(net) # get learning rate lr = get_lr(lr_init=config.lr_init, lr_end=0.0, lr_max=config.lr_max, warmup_epochs=config.warmup_epochs, total_epochs=config.epoch_size, steps_per_epoch=step_size, lr_decay_mode='cosine') if args_opt.pre_trained: lr = lr[config.pretrained_epoch_size * step_size:] lr = Tensor(lr)