def test_dp_model(): context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend") l2_norm_bound = 1.0 initial_noise_multiplier = 0.01 net = LeNet5() batch_size = 32 batches = 128 epochs = 1 loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) optim = SGD(params=net.trainable_params(), learning_rate=0.1, momentum=0.9) gaussian_mech = DPOptimizerClassFactory() gaussian_mech.set_mechanisms( 'Gaussian', norm_bound=l2_norm_bound, initial_noise_multiplier=initial_noise_multiplier) model = DPModel(micro_batches=2, norm_clip=l2_norm_bound, dp_mech=gaussian_mech.mech, network=net, loss_fn=loss, optimizer=optim, metrics=None) ms_ds = ds.GeneratorDataset(dataset_generator(batch_size, batches), ['data', 'label']) ms_ds.set_dataset_size(batch_size * batches) model.train(epochs, ms_ds)
def test_1d_train(): context.set_context(mode=context.GRAPH_MODE, device_target="GPU") bn_net = BatchNorm1d_Net(use_batch_statistics=None) grad_net = GradByListNet(bn_net) optimizer = SGD(bn_net.trainable_params(), learning_rate=0.01, momentum=0.9) bn_net.set_train(True) x1 = np.array([[1.6243454, -0.6117564], [-0.5281718, -1.0729686], [0.86540765, -2.3015387], [1.7448118, -0.7612069], [0.3190391, -0.24937038]]).astype(np.float32) dy1 = np.array([[1.4621079, -2.0601406], [-0.3224172, -0.38405436], [1.1337694, -1.0998913], [-0.1724282, -0.8778584], [0.04221375, 0.58281523]]).astype(np.float32) x2 = np.array([[-0.19183555, -0.887629], [-0.7471583, 1.6924546], [0.05080776, -0.6369957], [0.19091548, 2.1002553], [0.12015896, 0.6172031]]).astype(np.float32) dy2 = np.array([[0.30017033, -0.35224986], [-1.1425182, -0.34934273], [-0.20889424, 0.5866232], [0.8389834, 0.9311021], [0.2855873, 0.8851412]]).astype(np.float32) x_train = [x1, x2] dy_train = [dy1, dy2] dx1 = np.array([[0.8120, -2.0371], [-0.2202, 0.5837], [0.8040, 0.1950], [-1.1823, -0.2786], [-0.2135, 1.5371]]).astype(np.float32) gamma1 = np.array([0.9821, 0.9873]).astype(np.float32) beta1 = np.array([-0.0214, 0.0384]).astype(np.float32) mean1 = np.array([0.7246, -0.8994]).astype(np.float32) variance1 = np.array([0.9036, 0.6559]).astype(np.float32) dx2 = np.array([[1.1955, -0.4247], [-0.2425, -0.6789], [-1.4563, 0.3237], [0.8752, 0.3351], [-0.3719, 0.4448]]).astype(np.float32) gamma2 = np.array([0.9370, 0.9687]).astype(np.float32) beta2 = np.array([-0.0415, 0.0559]).astype(np.float32) mean2 = np.array([-0.0314, 0.4294]).astype(np.float32) variance2 = np.array([0.2213, 1.6822]).astype(np.float32) exp_dx = [dx1, dx2] exp_gamma = [gamma1, gamma2] exp_beta = [beta1, beta2] exp_mean = [mean1, mean2] exp_variance = [variance1, variance2] for data in zip(x_train, dy_train, exp_dx, exp_gamma, exp_beta, exp_mean, exp_variance): output = grad_net(Tensor(data[0]), Tensor(data[1])) assert np.allclose(output[0][0].asnumpy(), data[2], atol=1.0e-4) optimizer(output[1]) assert np.allclose(bn_net.bn1.gamma.asnumpy(), data[3], atol=1.0e-4) assert np.allclose(bn_net.bn1.beta.asnumpy(), data[4], atol=1.0e-4) assert np.allclose(bn_net.bn1.moving_mean.asnumpy(), data[5], atol=1.0e-4) assert np.allclose(bn_net.bn1.moving_variance.asnumpy(), data[6], atol=1.0e-4)
def dpn_train(args): # init context context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False, device_id=device_id) # init distributed if args.is_distributed: init() args.rank = get_rank() args.group_size = get_group_size() context.set_auto_parallel_context(device_num=args.group_size, parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True) # select for master rank save ckpt or all rank save, compatible for model parallel args.rank_save_ckpt_flag = 0 if args.is_save_on_master: if args.rank == 0: args.rank_save_ckpt_flag = 1 else: args.rank_save_ckpt_flag = 1 # create dataset args.train_dir = os.path.join(args.data_dir, 'train') args.eval_dir = os.path.join(args.data_dir, 'val') train_dataset = classification_dataset(args.train_dir, image_size=args.image_size, per_batch_size=args.batch_size, max_epoch=1, num_parallel_workers=args.num_parallel_workers, shuffle=True, rank=args.rank, group_size=args.group_size) if args.eval_each_epoch: print("create eval_dataset") eval_dataset = classification_dataset(args.eval_dir, image_size=args.image_size, per_batch_size=args.batch_size, max_epoch=1, num_parallel_workers=args.num_parallel_workers, shuffle=False, rank=args.rank, group_size=args.group_size, mode='eval') train_step_size = train_dataset.get_dataset_size() # choose net net = dpns[args.backbone](num_classes=args.num_classes) # load checkpoint if os.path.isfile(args.pretrained): print("load ckpt") load_param_into_net(net, load_checkpoint(args.pretrained)) # learing rate schedule if args.lr_schedule == 'drop': print("lr_schedule:drop") lr = Tensor(get_lr_drop(global_step=args.global_step, total_epochs=args.epoch_size, steps_per_epoch=train_step_size, lr_init=args.lr_init, factor=args.factor)) elif args.lr_schedule == 'warmup': print("lr_schedule:warmup") lr = Tensor(get_lr_warmup(global_step=args.global_step, total_epochs=args.epoch_size, steps_per_epoch=train_step_size, lr_init=args.lr_init, lr_max=args.lr_max, warmup_epochs=args.warmup_epochs)) # optimizer opt = SGD(net.trainable_params(), lr, momentum=args.momentum, weight_decay=args.weight_decay, loss_scale=args.loss_scale_num) # loss scale loss_scale = FixedLossScaleManager(args.loss_scale_num, False) # loss function if args.dataset == "imagenet-1K": print("Use SoftmaxCrossEntropyWithLogits") loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') else: if not args.label_smooth: args.label_smooth_factor = 0.0 print("Use Label_smooth CrossEntropy") loss = CrossEntropy(smooth_factor=args.label_smooth_factor, num_classes=args.num_classes) # create model model = Model(net, amp_level="O2", keep_batchnorm_fp32=False, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'top_1_accuracy', 'top_5_accuracy'}) # loss/time monitor & ckpt save callback loss_cb = LossMonitor() time_cb = TimeMonitor(data_size=train_step_size) cb = [loss_cb, time_cb] if args.rank_save_ckpt_flag: if args.eval_each_epoch: save_cb = SaveCallback(model, eval_dataset, args.ckpt_path) cb += [save_cb] else: config_ck = CheckpointConfig(save_checkpoint_steps=train_step_size, keep_checkpoint_max=args.keep_checkpoint_max) ckpoint_cb = ModelCheckpoint(prefix="dpn", directory=args.ckpt_path, config=config_ck) cb.append(ckpoint_cb) # train model model.train(args.epoch_size, train_dataset, callbacks=cb)
def main(): args, _ = parser.parse_known_args() rank_id, rank_size = 0, 1 context.set_context(mode=context.GRAPH_MODE) if args.distributed: if args.GPU: init("nccl") context.set_context(device_target='GPU') else: raise ValueError("Only supported GPU training.") context.reset_auto_parallel_context() rank_id = get_rank() rank_size = get_group_size() context.set_auto_parallel_context( parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True, device_num=rank_size) else: if args.GPU: context.set_context(device_target='GPU') else: raise ValueError("Only supported GPU training.") net = efficientnet_b0( num_classes=cfg.num_classes, drop_rate=cfg.drop, drop_connect_rate=cfg.drop_connect, global_pool=cfg.gp, bn_tf=cfg.bn_tf, ) train_data_url = args.data_path train_dataset = create_dataset(cfg.batch_size, train_data_url, workers=cfg.workers, distributed=args.distributed) batches_per_epoch = train_dataset.get_dataset_size() loss_cb = LossMonitor(per_print_times=batches_per_epoch) loss = LabelSmoothingCrossEntropy(smooth_factor=cfg.smoothing) time_cb = TimeMonitor(data_size=batches_per_epoch) loss_scale_manager = FixedLossScaleManager(cfg.loss_scale, drop_overflow_update=False) callbacks = [time_cb, loss_cb] if cfg.save_checkpoint: config_ck = CheckpointConfig( save_checkpoint_steps=batches_per_epoch, keep_checkpoint_max=cfg.keep_checkpoint_max) ckpoint_cb = ModelCheckpoint(prefix=cfg.model, directory='./ckpt_' + str(rank_id) + '/', config=config_ck) callbacks += [ckpoint_cb] lr = Tensor( get_lr(base_lr=cfg.lr, total_epochs=cfg.epochs, steps_per_epoch=batches_per_epoch, decay_steps=cfg.decay_epochs, decay_rate=cfg.decay_rate, warmup_steps=cfg.warmup_epochs, warmup_lr_init=cfg.warmup_lr_init, global_epoch=cfg.resume_start_epoch)) if cfg.opt == 'sgd': optimizer = SGD(net.trainable_params(), learning_rate=lr, momentum=cfg.momentum, weight_decay=cfg.weight_decay, loss_scale=cfg.loss_scale) elif cfg.opt == 'rmsprop': optimizer = RMSProp(net.trainable_params(), learning_rate=lr, decay=0.9, weight_decay=cfg.weight_decay, momentum=cfg.momentum, epsilon=cfg.opt_eps, loss_scale=cfg.loss_scale) loss.add_flags_recursive(fp32=True, fp16=False) if args.resume: ckpt = load_checkpoint(args.resume) load_param_into_net(net, ckpt) model = Model(net, loss, optimizer, loss_scale_manager=loss_scale_manager, amp_level=cfg.amp_level) # callbacks = callbacks if is_master else [] if args.resume: real_epoch = cfg.epochs - cfg.resume_start_epoch model.train(real_epoch, train_dataset, callbacks=callbacks, dataset_sink_mode=True) else: model.train(cfg.epochs, train_dataset, callbacks=callbacks, dataset_sink_mode=True)
def main(): """Main entrance for training""" args = parser.parse_args() print(sys.argv) devid, args.rank_id, args.rank_size = 0, 0, 1 context.set_context(mode=context.GRAPH_MODE) if args.distributed: if args.GPU: init("nccl") context.set_context(device_target='GPU') else: init() devid = int(os.getenv('DEVICE_ID')) context.set_context(device_target='Ascend', device_id=devid, reserve_class_name_in_scope=False) context.reset_auto_parallel_context() args.rank_id = get_rank() args.rank_size = get_group_size() context.set_auto_parallel_context( parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True, device_num=args.rank_size) else: if args.GPU: context.set_context(device_target='GPU') is_master = not args.distributed or (args.rank_id == 0) # parse model argument assert args.model.startswith( "tinynet"), "Only Tinynet models are supported." _, sub_name = args.model.split("_") net = tinynet(sub_model=sub_name, num_classes=args.num_classes, drop_rate=args.drop, drop_connect_rate=args.drop_connect, global_pool="avg", bn_tf=args.bn_tf, bn_momentum=args.bn_momentum, bn_eps=args.bn_eps) if is_master: print("Total number of parameters:", count_params(net)) # input image size of the network input_size = net.default_cfg['input_size'][1] train_dataset = val_dataset = None train_data_url = os.path.join(args.data_path, 'train') val_data_url = os.path.join(args.data_path, 'val') val_dataset = create_dataset_val(args.batch_size, val_data_url, workers=args.workers, distributed=False, input_size=input_size) if args.train: train_dataset = create_dataset(args.batch_size, train_data_url, workers=args.workers, distributed=args.distributed, input_size=input_size) batches_per_epoch = train_dataset.get_dataset_size() loss = LabelSmoothingCrossEntropy(smooth_factor=args.smoothing, num_classes=args.num_classes) time_cb = TimeMonitor(data_size=batches_per_epoch) loss_scale_manager = FixedLossScaleManager(args.loss_scale, drop_overflow_update=False) lr_array = get_lr(base_lr=args.lr, total_epochs=args.epochs, steps_per_epoch=batches_per_epoch, decay_epochs=args.decay_epochs, decay_rate=args.decay_rate, warmup_epochs=args.warmup_epochs, warmup_lr_init=args.warmup_lr, global_epoch=0) lr = Tensor(lr_array) loss_cb = LossMonitor(lr_array, args.epochs, per_print_times=args.per_print_times, start_epoch=0) param_group = add_weight_decay(net, weight_decay=args.weight_decay) if args.opt == 'sgd': if is_master: print('Using SGD optimizer') optimizer = SGD(param_group, learning_rate=lr, momentum=args.momentum, weight_decay=args.weight_decay, loss_scale=args.loss_scale) elif args.opt == 'rmsprop': if is_master: print('Using rmsprop optimizer') optimizer = RMSProp(param_group, learning_rate=lr, decay=0.9, weight_decay=args.weight_decay, momentum=args.momentum, epsilon=args.opt_eps, loss_scale=args.loss_scale) loss.add_flags_recursive(fp32=True, fp16=False) eval_metrics = { 'Validation-Loss': Loss(), 'Top1-Acc': Top1CategoricalAccuracy(), 'Top5-Acc': Top5CategoricalAccuracy() } if args.ckpt: ckpt = load_checkpoint(args.ckpt) load_param_into_net(net, ckpt) net.set_train(False) model = Model(net, loss, optimizer, metrics=eval_metrics, loss_scale_manager=loss_scale_manager, amp_level=args.amp_level) net_ema = copy.deepcopy(net) net_ema.set_train(False) assert args.ema_decay > 0, "EMA should be used in tinynet training." ema_cb = EmaEvalCallBack(network=net, ema_network=net_ema, loss_fn=loss, eval_dataset=val_dataset, decay=args.ema_decay, save_epoch=args.ckpt_save_epoch, dataset_sink_mode=args.dataset_sink, start_epoch=0) callbacks = [loss_cb, ema_cb, time_cb] if is_master else [] if is_master: print("Training on " + args.model + " with " + str(args.num_classes) + " classes") model.train(args.epochs, train_dataset, callbacks=callbacks, dataset_sink_mode=args.dataset_sink)
for item in list(param_dict.keys()): if not item.startswith('backbone'): param_dict.pop(item) for key, value in param_dict.items(): tensor = value.asnumpy().astype(np.float32) param_dict[key] = Parameter(tensor, key) load_param_into_net(net, param_dict) loss = LossNet() lr = Tensor(dynamic_lr(config, dataset_size), mstype.float32) opt = SGD(params=net.trainable_params(), learning_rate=lr, momentum=config.momentum, weight_decay=config.weight_decay, loss_scale=config.loss_scale) net_with_loss = WithLossCell(net, loss) if args_opt.run_distribute: net = TrainOneStepCell(net_with_loss, opt, sens=config.loss_scale, reduce_flag=True, mean=True, degree=device_num) else: net = TrainOneStepCell(net_with_loss, opt, sens=config.loss_scale) time_cb = TimeMonitor(data_size=dataset_size) loss_cb = LossCallBack(rank_id=rank)
net.get_parameters()) optimizer_P = SGD( [ { 'params': base_params, 'lr': 0.1 * args.lr }, { 'params': net.bottleneck.get_parameters(), 'lr': args.lr }, { 'params': net.classifier.get_parameters(), 'lr': args.lr }, { 'params': net.wpa.get_parameters(), 'lr': args.lr }, # {'params': net.attention_0.parameters(), 'lr': args.lr}, # {'params': net.attention_1.parameters(), 'lr': args.lr}, # {'params': net.attention_2.parameters(), 'lr': args.lr}, # {'params': net.attention_3.parameters(), 'lr': args.lr}, # {'params': net.out_att.parameters(), 'lr': args.lr} , ], learning_rate=args.lr, weight_decay=5e-4, nesterov=True, momentum=0.9) elif args.optim == 'adam':