def _valid_epoch(self): if self.config.mixup and self.config.loss.type == 'CrossEntropyLoss': from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits loss_fn = SoftmaxCrossEntropyWithLogits(sparse=True) self.ms_model = MsModel( network=self.model, loss_fn=loss_fn, optimizer=self.optimizer, metrics={self.metric_name: self.valid_metrics()}) self.callbacks.before_valid() try: eval_metrics = self.ms_model.eval( valid_dataset=self.valid_loader, dataset_sink_mode=self.dataset_sink_mode) self.valid_metrics.update(eval_metrics) valid_logs = dict() valid_logs['cur_valid_perfs'] = self.valid_metrics.results self.callbacks.after_valid(valid_logs) except RuntimeError as exc: logging.warning( "RuntimeError occurred when eval the model. Skip eval this model." ) logging.warning("The RuntimeError message is : {}.".format(exc))
def test_batchnorm_batch_parallel(): num_classes = 1001 batch_size = 32 learning_rate = 0.1 momentum = 0.9 epoch_size = 2 rank_size = 0 predict = Tensor(np.ones([batch_size, 3, 224, 224]), dtype=ms.float32) label = Tensor(np.ones([batch_size]), dtype=ms.int32) dataset = DatasetLenet(predict, label, 2) net = batchnorm_net(num_classes) loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) loss.softmax_cross_entropy.set_strategy(((dev_num, 1), (dev_num, 1))) opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum) context.reset_auto_parallel_context() context.set_auto_parallel_context( parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, device_num=dev_num) context.set_context(mode=context.GRAPH_MODE) model = Model(net, loss, opt) model.train(epoch_size, dataset, dataset_sink_mode=False)
def test_net(network, data_path, ckpt): """define the evaluation method""" print("============== Starting Testing ==============") #load the saved model for evaluation load_checkpoint(ckpt, net=network) #load testing dataset ds_eval = create_dataset(False, data_path) # config = GPTConfig(batch_size=4, # seq_length=1024, # vocab_size=50257, # embedding_size=1024, # num_layers=24, # num_heads=16, # expand_ratio=4, # post_layernorm_residual=False, # dropout_rate=0.1, # compute_dtype=mstype.float16, # use_past=False) # loss = CrossEntropyLoss(config) net_loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') model = Model(resnet, net_loss, metrics={"Accuracy": Accuracy()}) # model = Model(resnet, net_loss, metrics={"Accuracy": Accuracy()}, amp_level="O3") acc = model.eval(ds_eval, dataset_sink_mode=False) print("============== Accuracy:{} ==============".format(acc))
def test_resnet_model_parallel(): num_classes = 1024 batch_size = 32 learning_rate = 0.1 momentum = 0.9 epoch_size = 2 context.reset_auto_parallel_context() context.set_auto_parallel_context(device_num=dev_num, global_rank=0) context.set_auto_parallel_context( parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, device_num=dev_num) context.set_context(mode=context.GRAPH_MODE) predict = Tensor(np.ones([batch_size, 64, 112, 112]), dtype=ms.float32) label = Tensor(np.ones([batch_size]), dtype=ms.int32) dataset = DatasetLenet(predict, label, 2) net = resnet_model_parallel_net(num_classes) loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') loss.softmax_cross_entropy.shard(((dev_num, 1), (dev_num, 1))) opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum) model = Model(net, loss, opt) model.train(epoch_size, dataset, dataset_sink_mode=False)
def __init__(self, model=None): """Initializing the trainer with the provided model. Arguments: client_id: The ID of the client using this trainer (optional). model: The model to train. """ super().__init__() if hasattr(Config().trainer, 'cpuonly') and Config().trainer.cpuonly: mindspore.context.set_context(mode=mindspore.context.PYNATIVE_MODE, device_target='CPU') else: mindspore.context.set_context(mode=mindspore.context.PYNATIVE_MODE, device_target='GPU') if model is None: self.model = models_registry.get() # Initializing the loss criterion loss_criterion = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') # Initializing the optimizer optimizer = nn.Momentum(self.model.trainable_params(), Config().trainer.learning_rate, Config().trainer.momentum) self.mindspore_model = mindspore.Model( self.model, loss_criterion, optimizer, metrics={"Accuracy": Accuracy()})
def train_common(net): batch_size = 32 learning_rate = 0.1 momentum = 0.9 epoch_size = 2 device_num = 4 context.reset_auto_parallel_context() auto_parallel_context().set_enable_all_reduce_fusion( enable_all_reduce_fusion=True) context.set_auto_parallel_context( parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, device_num=device_num, parameter_broadcast=False) context.set_context(mode=context.GRAPH_MODE) predict = Tensor(np.ones([batch_size, 128]), dtype=ms.float32) label = Tensor(np.ones([batch_size]), dtype=ms.int32) dataset = Dataset(predict, label, 2) loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) opt = Momentum(net.trainable_params(), learning_rate, momentum) model = Model(net, loss, opt) model.train(epoch_size, dataset, dataset_sink_mode=False) allreduce_fusion_dict = _executor._get_allreduce_fusion( model._train_network) print(allreduce_fusion_dict) return allreduce_fusion_dict
def run(args): ms.context.set_context(mode=ms.context.GRAPH_MODE, device_target=args.device) dataset_sink_mode = False download_dataset(args.data_dir) # define the loss function net_loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') # create the network network = LeNet5() # define the optimizer net_opt = build_optimizer(args, network) config_ck = CheckpointConfig(save_checkpoint_steps=1875, keep_checkpoint_max=10) # save the network model and parameters for subsequence fine-tuning ckpoint_cb = ModelCheckpoint(prefix="checkpoint_lenet", config=config_ck) # group layers into an object with training and evaluation features model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()}) if args.init_ckpt: load_ckpt(network, args.init_ckpt) train_net(network, model, args, ckpoint_cb, dataset_sink_mode)
def bn_common(parallel_mode, train_flag, strategy_loss=None): context.set_context(mode=context.GRAPH_MODE) context.set_auto_parallel_context(parallel_mode=parallel_mode, device_num=8) learning_rate = 0.1 momentum = 0.9 epoch_size = 2 rank_size = 8 predict = Tensor(np.ones([32, 512]), dtype=ms.float32) label = Tensor(np.ones([32]), dtype=ms.int32) dataset = Dataset(predict, label, 2) net = bn_net() loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') loss.softmax_cross_entropy.shard(strategy_loss) opt = Momentum(net.trainable_params(), learning_rate, momentum, 0.0001, 1024 * rank_size) if not train_flag: net = WithLossCell(net, loss) net.set_train() if parallel_mode == ParallelMode.DATA_PARALLEL: context.set_auto_parallel_context(parameter_broadcast=True) model = Model(net, loss, opt) if train_flag: model.train(epoch_size, dataset, dataset_sink_mode=False) else: model._predict(predict, label)
def loss_scale_manager_common(strategy1): learning_rate = 0.1 momentum = 0.9 epoch_size = 2 context.reset_auto_parallel_context() context.set_auto_parallel_context(parallel_mode=ParallelMode.AUTO_PARALLEL, device_num=8) predict = Tensor(np.ones([32, 128]), dtype=ms.float32) label = Tensor(np.ones([32]), dtype=ms.int32) dataset = Dataset(predict, label, 2) net = all_to_all_net(strategy1) loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) loss.softmax_cross_entropy.set_strategy(((8, 1), (8, 1))) opt = Momentum(net.trainable_params(), learning_rate, momentum) scale_manager = DynamicLossScaleManager(32, 2, 2000) model = Model(net, loss, opt, loss_scale_manager=scale_manager) # if no GE exists, outputs = self._train_network(*next_element) outputs inputs tensor. try: model.train(epoch_size, dataset, dataset_sink_mode=False) except TypeError: pass else: assert False
def main(): """ main function """ os.environ["DEVICE_NUM"] = "1" os.environ["RANK_ID"] = "0" target = 'Ascend' context.set_context(mode=context.GRAPH_MODE, device_target=target) # step1: create_dataset for evaluation, prepare the input data # and initialize the network, load the pretrained checkpoint to the network. # Ensure that the network before quant_resnet50 is proper functioning. dataset = create_dataset(dataset_path=ARGS_OPT.dataset_path, do_train=False, batch_size=32, target=target) loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') dataset = dataset.take(1) input_shape = [32, 3, 224, 224] class_num = 10 input_data = np.random.uniform(0.0, 1.0, size=input_shape).astype(np.float32) network = resnet50(class_num) param_dict = load_checkpoint(ARGS_OPT.checkpoint_path) load_param_into_net(network, param_dict) network.set_train(False) quant_resnet50(network, dataset, loss, input_data)
def calibration(): """ do the calibration to get the scale offset record file""" dataset = create_dataset( dataset_path=ARGS_OPT.eval_dataset, do_train=False, batch_size=config.batch_size, # pylint: disable=no-member target=ARGS_OPT.device_target) dataset = dataset.take(1) loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') network = resnet(10) network.set_train(False) param_dict = load_checkpoint(ARGS_OPT.pre_trained) load_param_into_net(network, param_dict) input_data = np.random.uniform(0.0, 1.0, size=[32, 3, 224, 224]).astype(np.float32) config_file = os.path.join(CUR_DIR, './config.json') amct.create_quant_config(config_file, network, input_data) calibration_network = amct.quantize_model(config_file, network, input_data) model = Model(calibration_network, loss_fn=loss, metrics={'top_1_accuracy', 'top_5_accuracy'}) _ = model.eval(dataset) amct.save_model('./resnet50_quant_calibration', calibration_network, input_data)
def dpn_evaluate(args): # create evaluate dataset eval_path = os.path.join(args.data_dir, 'val') eval_dataset = classification_dataset(eval_path, image_size=args.image_size, num_parallel_workers=args.num_parallel_workers, per_batch_size=args.batch_size, max_epoch=1, rank=args.rank, shuffle=False, group_size=args.group_size, mode='eval') # create network net = dpns[args.backbone](num_classes=args.num_classes) # load checkpoint if os.path.isfile(args.pretrained): load_param_into_net(net, load_checkpoint(args.pretrained)) # loss if args.dataset == "imagenet-1K": loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') else: if not args.label_smooth: args.label_smooth_factor = 0.0 loss = CrossEntropy(smooth_factor=args.label_smooth_factor, num_classes=args.num_classes) # create model model = Model(net, amp_level="O2", keep_batchnorm_fp32=False, loss_fn=loss, metrics={'top_1_accuracy', 'top_5_accuracy'}) # evaluate output = model.eval(eval_dataset) print(f'Evaluation result: {output}.')
def __init__(self, network, is_train=True): super(NetWithLossClass, self).__init__(auto_prefix=False) self.loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') self.l1_loss = L1Loss() self.network = network self.is_train = is_train self.concat = P.Concat(axis=1)
def resnet50_train(args_opt): device_id = 0 device_num = 1 epoch_size = args_opt.epoch_size batch_size = 32 class_num = 10 loss_scale_num = 1024 local_data_path = '/home/share/dataset/cifar-10-batches-bin/' # your cifar10 path # set graph mode and parallel mode context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False) context.set_context(device_id=device_id) if device_num > 1: context.set_auto_parallel_context(device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True) init() local_data_path = os.path.join(local_data_path, str(device_id)) # data download print('Download data.') mox.file.copy_parallel(src_url=args_opt.data_url, dst_url=local_data_path) # create dataset print('Create train and evaluate dataset.') train_dataset = create_dataset(dataset_path=local_data_path, do_train=True, repeat_num=1, batch_size=batch_size) eval_dataset = create_dataset(dataset_path=local_data_path, do_train=False, repeat_num=1, batch_size=batch_size) train_step_size = train_dataset.get_dataset_size() print('Create dataset success.') # create model net = resnet50(class_num = class_num) # reduction='mean' means that apply reduction of mean to loss loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') lr = Tensor(get_lr(global_step=0, total_epochs=epoch_size, steps_per_epoch=train_step_size)) opt = Momentum(net.trainable_params(), lr, momentum=0.9, weight_decay=1e-4, loss_scale=loss_scale_num) loss_scale = FixedLossScaleManager(loss_scale_num, False) # amp_level="O2" means that the hybrid precision of O2 mode is used for training # the whole network except that batchnoram will be cast into float16 format and dynamic loss scale will be used # 'keep_batchnorm_fp32 = False' means that use the float16 format model = Model(net, amp_level="O2", keep_batchnorm_fp32=False, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'}) # define performance callback to show ips and loss callback to show loss for every epoch performance_cb = PerformanceCallback(batch_size) loss_cb = LossMonitor() cb = [performance_cb, loss_cb] print(f'Start run training, total epoch: {epoch_size}.') model.train(epoch_size, train_dataset, callbacks=cb) if device_num == 1 or device_id == 0: print(f'=================================Start run evaluation.=================================') output = model.eval(eval_dataset) print(f'Evaluation result: {output}.')
def compile_net(net): context.set_context(save_graphs=True) learning_rate = 0.1 momentum = 0.9 epoch_size = 2 dataset = Dataset(_x, _b) loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') opt = Momentum(net.trainable_params(), learning_rate, momentum) model = Model(net, loss, optimizer=opt, amp_level="O2") model.train(epoch_size, dataset, dataset_sink_mode=False) context.reset_auto_parallel_context()
def __init__(self): context.set_context(reserve_class_name_in_scope=False) net = resnet50(batch_size, num_classes) ls = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.01, 0.9) model = Model(net, loss_fn=ls, optimizer=opt, metrics={'acc'}) self.model = model self.model.train(1, create_dataset(list(range(32))), dataset_sink_mode=False)
def resnet50_train(args_opt): epoch_size = args_opt.epoch_size batch_size = cfg.batch_size class_num = cfg.class_num loss_scale_num = cfg.loss_scale local_data_path = '/cache/data' local_ckpt_path = '/cache/ckpt_file' # set graph mode and parallel mode context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False) # data download print('Download data.') mox.file.copy_parallel(src_url=args_opt.data_url, dst_url=local_data_path) # create dataset print('Create train and evaluate dataset.') train_dataset = create_dataset(dataset_path=local_data_path, do_train=True, repeat_num=epoch_size, batch_size=batch_size) train_step_size = train_dataset.get_dataset_size() print('Create dataset success.') # create model net = resnet50(class_num=class_num) # reduction='mean' means that apply reduction of mean to loss loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') lr = Tensor(get_lr(global_step=0, total_epochs=epoch_size, steps_per_epoch=train_step_size)) opt = Momentum(net.trainable_params(), lr, momentum=0.9, weight_decay=1e-4, loss_scale=loss_scale_num) loss_scale = FixedLossScaleManager(loss_scale_num, False) # amp_level="O2" means that the hybrid precision of O2 mode is used for training # the whole network except that batchnorm will be cast into float16 format and dynamic loss scale will be used # 'keep_batchnorm_fp32 = False' means that use the float16 format model = Model(net, amp_level="O2", keep_batchnorm_fp32=False, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'}) # define performance callback to show ips and loss callback to show loss for every epoch time_cb = TimeMonitor(data_size=train_step_size) performance_cb = PerformanceCallback(batch_size) loss_cb = LossMonitor() cb = [time_cb, performance_cb, loss_cb] config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_epochs * train_step_size, keep_checkpoint_max=cfg.keep_checkpoint_max) ckpt_cb = ModelCheckpoint(prefix="resnet", directory=local_ckpt_path, config=config_ck) cb += [ckpt_cb] print(f'Start run training, total epoch: {epoch_size}.') model.train(epoch_size, train_dataset, callbacks=cb) # upload checkpoint files print('Upload checkpoint.') mox.file.copy_parallel(src_url=local_ckpt_path, dst_url=args_opt.train_url)
def me_train_tensor(net, input_np, label_np, epoch_size=2): loss = SoftmaxCrossEntropyWithLogits(sparse=True) opt = nn.Momentum(Tensor(np.array([0.1])), Tensor(np.array([0.9])), filter(lambda x: x.requires_grad, net.get_parameters())) context.set_context(mode=context.GRAPH_MODE) Model(net, loss, opt) _network = nn.WithLossCell(net, loss) _train_net = MsWrapper(nn.TrainOneStepCell(_network, opt)) _train_net.set_train() for epoch in range(0, epoch_size): print(f"epoch %d" % (epoch)) output = _train_net(Tensor(input_np), Tensor(label_np)) print(output.asnumpy())
def me_train_tensor(net, input_np, label_np, epoch_size=2): """me_train_tensor""" loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr_gen(lambda i: 0.1, epoch_size), 0.9, 0.01, 1024) Model(net, loss, opt) _network = nn.WithLossCell(net, loss) _train_net = nn.TrainOneStepCell(_network, opt) _train_net.set_train() label_np = np.argmax(label_np, axis=-1).astype(np.int32) for epoch in range(0, epoch_size): print(f"epoch %d" % (epoch)) _train_net(Tensor(input_np), Tensor(label_np))
def resnet50_train(args_opt): epoch_size = args_opt.epoch_size batch_size = 32 class_num = 10 loss_scale_num = 1024 local_data_path = '/cache/data' # set graph mode and parallel mode context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False) context.set_context(enable_task_sink=True, device_id=device_id) context.set_context(enable_loop_sink=True) context.set_context(enable_mem_reuse=True) if device_num > 1: context.set_auto_parallel_context(device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True) local_data_path = os.path.join(local_data_path, str(device_id)) # data download print('Download data.') mox.file.copy_parallel(src_url=args_opt.data_url, dst_url=local_data_path) # create dataset print('Create train and evaluate dataset.') train_dataset = create_dataset(dataset_path=local_data_path, do_train=True, repeat_num=epoch_size, batch_size=batch_size) eval_dataset = create_dataset(dataset_path=local_data_path, do_train=False, repeat_num=1, batch_size=batch_size) train_step_size = train_dataset.get_dataset_size() print('Create dataset success.') # create model net = resnet50(class_num = class_num) loss = SoftmaxCrossEntropyWithLogits(sparse=True) lr = Tensor(get_lr(global_step=0, total_epochs=epoch_size, steps_per_epoch=train_step_size)) opt = Momentum(net.trainable_params(), lr, momentum=0.9, weight_decay=1e-4, loss_scale=loss_scale_num) loss_scale = FixedLossScaleManager(loss_scale_num, False) model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'}) # define performance callback to show ips and loss callback to show loss for every epoch performance_cb = PerformanceCallback(batch_size) loss_cb = LossMonitor() cb = [performance_cb, loss_cb] print(f'Start run training, total epoch: {epoch_size}.') model.train(epoch_size, train_dataset, callbacks=cb) if device_num == 1 or device_id == 0: print(f'Start run evaluation.') output = model.eval(eval_dataset) print(f'Evaluation result: {output}.')
def me_train_tensor(net, input_np, label_np, epoch_size=2): """me_train_tensor""" loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) # reorder the net parameters , leave the parameters that need to be passed into lars to the end part opt = Momentum( get_net_trainable_reordered_params(net)[2], lr_gen(lambda i: 0.1, epoch_size), 0.9, 0.01, 1024) Model(net, loss, opt) _network = nn.WithLossCell(net, loss) TrainOneStepWithLarsCell(_network, opt) data = Tensor(input_np) label = Tensor(label_np) net(data, label)
def train_lenet(): context.set_context(mode=context.GRAPH_MODE, save_graphs=True, device_target="CPU") dataset_sink_mode = False # download mnist dataset download_dataset() # learning rate setting lr = 0.01 momentum = 0.9 epoch_size = 1 mnist_path = "../MNIST_Data" # define the loss function net_loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") repeat_size = epoch_size # create the network network = LeNet5() # define the optimizer net_opt = nn.Momentum(network.trainable_params(), lr, momentum) config_ck = CheckpointConfig(save_checkpoint_steps=1875, keep_checkpoint_max=10) # save the network model and parameters for subsequence fine-tuning ckpoint_cb = ModelCheckpoint(prefix="checkpoint_lenet", config=config_ck) # group layers into an object with training and evaluation features model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()}) summary_writer = SummaryRecord(log_dir="../../summary", network=network) summary_callback = SummaryStep(summary_writer, flush_step=10) # Init TrainLineage to record the training information train_callback = TrainLineage(summary_writer) train_net( model, epoch_size, mnist_path, repeat_size, ckpoint_cb, dataset_sink_mode, callbacks=[summary_callback, train_callback], ) test_net(network, model, mnist_path) summary_writer.close()
def me_train_tensor(net, input_np, label_np, epoch_size=2): context.set_context(mode=context.GRAPH_MODE) loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) opt = ApplyMomentum(Tensor(np.array([0.1])), Tensor(np.array([0.9])), filter(lambda x: x.requires_grad, net.get_parameters())) Model(net, loss, opt) _network = wrap.WithLossCell(net, loss) _train_net = MsWrapper(wrap.TrainOneStepCell(_network, opt)) _train_net.set_train() with SummaryRecord(SUMMARY_DIR, file_suffix="_MS_GRAPH", network=_train_net) as summary_writer: for epoch in range(0, epoch_size): print(f"epoch %d" % (epoch)) output = _train_net(Tensor(input_np), Tensor(label_np)) summary_writer.record(i) print("********output***********") print(output.asnumpy())
def reshape_common(parallel_mode): learning_rate = 0.1 momentum = 0.9 epoch_size = 2 context.reset_auto_parallel_context() context.set_auto_parallel_context(parallel_mode=parallel_mode, device_num=8) predict = Tensor(np.ones([32, 256]), dtype=ms.float32) label = Tensor(np.ones([32]), dtype=ms.int32) dataset = Dataset(predict, label, 2) net = prelu_net() loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) opt = Momentum(net.trainable_params(), learning_rate, momentum) model = Model(net, loss, opt) model.train(epoch_size, dataset, dataset_sink_mode=False)
def test_original_resnet50(): """ evaluate the original resnet50""" dataset = create_dataset( dataset_path=ARGS_OPT.eval_dataset, do_train=False, batch_size=config.batch_size, # pylint: disable=no-member target=ARGS_OPT.device_target) network = resnet(10) network.set_train(False) param_dict = load_checkpoint(ARGS_OPT.pre_trained) load_param_into_net(network, param_dict) loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') model = Model(network, loss_fn=loss, metrics={'top_1_accuracy', 'top_5_accuracy'}) res = model.eval(dataset) print("result for original resnet50:", res, "ckpt=", ARGS_OPT.pre_trained)
def reshape_common(parallel_mode, strategy0, strategy1, strategy2, strategy_loss): learning_rate = 0.1 momentum = 0.9 epoch_size = 2 context.reset_auto_parallel_context() context.set_auto_parallel_context(parallel_mode=parallel_mode, device_num=8) predict = Tensor(np.ones([32, 512, 7, 7]), dtype=ms.float32) label = Tensor(np.ones([32]), dtype=ms.int32) dataset = Dataset(predict, label, 2) net = reshape_net(strategy0, strategy1, strategy2) loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') loss.softmax_cross_entropy.shard(strategy_loss) loss.one_hot.shard(((8, 1), (), ())) opt = Momentum(net.trainable_params(), learning_rate, momentum) model = Model(net, loss, opt) model.train(epoch_size, dataset, dataset_sink_mode=False)
def test_data_parallel_mode(): _reset_op_id() learning_rate = 0.1 momentum = 0.9 epoch_size = 2 context.set_context(mode=context.GRAPH_MODE, save_graphs=False) context.reset_auto_parallel_context() context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, full_batch=True) predict = Tensor(np.ones([256, 128]), dtype=ms.float32) label = Tensor(np.ones([256]), dtype=ms.int32) dataset = Dataset(predict, label, 2) net = all_to_all_net(None) loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') opt = Momentum(net.trainable_params(), learning_rate, momentum) model = Model(net, loss, opt) with pytest.raises(RuntimeError): model.train(epoch_size, dataset, dataset_sink_mode=False)
def all_to_all_common(): learning_rate = 0.1 momentum = 0.9 epoch_size = 2 context.reset_auto_parallel_context() context.set_auto_parallel_context(parallel_mode=ParallelMode.AUTO_PARALLEL, device_num=1, global_rank=0) predict = Tensor(np.ones([32, 128]), dtype=ms.float32) label = Tensor(np.ones([32]), dtype=ms.int32) dataset = Dataset(predict, label, 2) net = all_to_all_net() loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) opt = Momentum(net.trainable_params(), learning_rate, momentum) model = Model(net, loss, opt) model.train(epoch_size, dataset, dataset_sink_mode=False) strategys = _executor._get_strategy(model._train_network) return strategys
def test_pynative_resnet50(): context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend") batch_size = 32 num_classes = 10 loss_scale = 128 total_step = 50 net = resnet50(batch_size, num_classes) optimizer = Momentum(learning_rate=0.01, momentum=0.9, params=filter(lambda x: x.requires_grad, net.get_parameters())) data_set = create_dataset(repeat_num=1, training=True, batch_size=batch_size, num_samples=total_step * batch_size) # define callbacks time_cb = MyTimeMonitor(data_size=data_set.get_dataset_size()) loss_cb = LossMonitor() cb = [time_cb, loss_cb] loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') loss_scale = FixedLossScaleManager(loss_scale=loss_scale, drop_overflow_update=False) model = Model(net, loss_fn=loss, optimizer=optimizer, loss_scale_manager=loss_scale, metrics={'acc'}, amp_level="O2", keep_batchnorm_fp32=False) # train model model.train(1, data_set, callbacks=cb, sink_size=data_set.get_dataset_size(), dataset_sink_mode=True) assert time_cb.good_step() > 10
def get_tensor_from_training( indices, ckpt_file="/tmp/pycharm_project_589/summary_dir-202010191622/weights/-1_350.ckpt", node_name="conv1.weight", data_type="gradient"): context.set_context(reserve_class_name_in_scope=False) net = resnet50(batch_size, num_classes) load_checkpoint(ckpt_file, net=net) ls = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.01, 0.9) model = Model(net, loss_fn=ls, optimizer=opt, metrics={'acc'}) dataset = create_dataset(indices) data_inception_callback = DataInterceptionCallback(node_name=node_name, data_type=data_type) model.train(1, dataset, callbacks=[LossMonitor(), data_inception_callback], dataset_sink_mode=False) return data_inception_callback.result