def epoch_end(self, run_context): """evaluate the model and ema-model at the end of each epoch""" cb_params = run_context.original_args() cur_epoch = cb_params.cur_epoch_num + self._start_epoch - 1 save_ckpt = (cur_epoch % self.save_epoch == 0) load_nparray_into_net(self.ema_network, self.shadow) model = Model(self.network, loss_fn=self.loss_fn, metrics=self.eval_metrics) model_ema = Model(self.ema_network, loss_fn=self.loss_fn, metrics=self.eval_metrics) acc = model.eval(self.eval_dataset, dataset_sink_mode=self.dataset_sink_mode) ema_acc = model_ema.eval(self.eval_dataset, dataset_sink_mode=self.dataset_sink_mode) print("Model Accuracy:", acc) print("EMA-Model Accuracy:", ema_acc) output = [{ "name": k, "data": Tensor(v) } for k, v in self.shadow.items()] self.ema_accuracy[cur_epoch] = ema_acc["Top1-Acc"] if self.best_ema_accuracy < ema_acc["Top1-Acc"]: self.best_ema_accuracy = ema_acc["Top1-Acc"] self.best_ema_epoch = cur_epoch save_checkpoint(output, "ema_best.ckpt") if self.best_accuracy < acc["Top1-Acc"]: self.best_accuracy = acc["Top1-Acc"] self.best_epoch = cur_epoch print("Best Model Accuracy: %s, at epoch %s" % (self.best_accuracy, self.best_epoch)) print("Best EMA-Model Accuracy: %s, at epoch %s" % (self.best_ema_accuracy, self.best_ema_epoch)) if save_ckpt: # Save the ema_model checkpoints ckpt = "{}-{}.ckpt".format("ema", cur_epoch) save_checkpoint(output, ckpt) save_checkpoint(output, "ema_last.ckpt") # Save the model checkpoints save_checkpoint(cb_params.train_network, "last.ckpt") print("Top 10 EMA-Model Accuracies: ") count = 0 for epoch in sorted(self.ema_accuracy, key=self.ema_accuracy.get, reverse=True): if count == 10: break print("epoch: %s, Top-1: %s)" % (epoch, self.ema_accuracy[epoch])) count += 1
def dpn_evaluate(args): # create evaluate dataset eval_path = os.path.join(args.data_dir, 'val') eval_dataset = classification_dataset(eval_path, image_size=args.image_size, num_parallel_workers=args.num_parallel_workers, per_batch_size=args.batch_size, max_epoch=1, rank=args.rank, shuffle=False, group_size=args.group_size, mode='eval') # create network net = dpns[args.backbone](num_classes=args.num_classes) # load checkpoint if os.path.isfile(args.pretrained): load_param_into_net(net, load_checkpoint(args.pretrained)) # loss if args.dataset == "imagenet-1K": loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') else: if not args.label_smooth: args.label_smooth_factor = 0.0 loss = CrossEntropy(smooth_factor=args.label_smooth_factor, num_classes=args.num_classes) # create model model = Model(net, amp_level="O2", keep_batchnorm_fp32=False, loss_fn=loss, metrics={'top_1_accuracy', 'top_5_accuracy'}) # evaluate output = model.eval(eval_dataset) print(f'Evaluation result: {output}.')
def eval_net(): '''eval net''' if config.dataset == 'MR': instance = MovieReview(root_dir=config.data_path, maxlen=config.word_len, split=0.9) elif config.dataset == 'SUBJ': instance = Subjectivity(root_dir=config.data_path, maxlen=config.word_len, split=0.9) elif config.dataset == 'SST2': instance = SST2(root_dir=config.data_path, maxlen=config.word_len, split=0.9) device_target = config.device_target context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target) if device_target == "Ascend": context.set_context(device_id=get_device_id()) dataset = instance.create_test_dataset(batch_size=config.batch_size) loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True) net = TextCNN(vocab_len=instance.get_dict_len(), word_len=config.word_len, num_classes=config.num_classes, vec_length=config.vec_length) opt = nn.Adam(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate=0.001, weight_decay=float(config.weight_decay)) param_dict = load_checkpoint(config.checkpoint_file_path) print("load checkpoint from [{}].".format(config.checkpoint_file_path)) load_param_into_net(net, param_dict) net.set_train(False) model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc': Accuracy()}) acc = model.eval(dataset) print("accuracy: ", acc)
def test(cloud_args=None): """test""" args = parse_args(cloud_args) context.set_context(mode=context.GRAPH_MODE, enable_auto_mixed_precision=True, device_target=args.device_target, save_graphs=False) if os.getenv('DEVICE_ID', "not_set").isdigit(): context.set_context(device_id=int(os.getenv('DEVICE_ID'))) args.outputs_dir = os.path.join( args.log_path, datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S')) args.logger = LogUtil.get_instance() args.logger.set_level(20) net = vgg16(num_classes=args.num_classes, args=args) opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.01, args.momentum, weight_decay=args.weight_decay) loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'}) param_dict = load_checkpoint(args.pre_trained) load_param_into_net(net, param_dict) net.set_train(False) dataset_test = vgg_create_dataset100(args.data_path, args.image_size, args.per_batch_size, training=False) res = model.eval(dataset_test) print("result: ", res)
def resnet50_train(args_opt): device_id = 0 device_num = 1 epoch_size = args_opt.epoch_size batch_size = 32 class_num = 10 loss_scale_num = 1024 local_data_path = '/home/share/dataset/cifar-10-batches-bin/' # your cifar10 path # set graph mode and parallel mode context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False) context.set_context(device_id=device_id) if device_num > 1: context.set_auto_parallel_context(device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True) init() local_data_path = os.path.join(local_data_path, str(device_id)) # data download print('Download data.') mox.file.copy_parallel(src_url=args_opt.data_url, dst_url=local_data_path) # create dataset print('Create train and evaluate dataset.') train_dataset = create_dataset(dataset_path=local_data_path, do_train=True, repeat_num=1, batch_size=batch_size) eval_dataset = create_dataset(dataset_path=local_data_path, do_train=False, repeat_num=1, batch_size=batch_size) train_step_size = train_dataset.get_dataset_size() print('Create dataset success.') # create model net = resnet50(class_num = class_num) # reduction='mean' means that apply reduction of mean to loss loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') lr = Tensor(get_lr(global_step=0, total_epochs=epoch_size, steps_per_epoch=train_step_size)) opt = Momentum(net.trainable_params(), lr, momentum=0.9, weight_decay=1e-4, loss_scale=loss_scale_num) loss_scale = FixedLossScaleManager(loss_scale_num, False) # amp_level="O2" means that the hybrid precision of O2 mode is used for training # the whole network except that batchnoram will be cast into float16 format and dynamic loss scale will be used # 'keep_batchnorm_fp32 = False' means that use the float16 format model = Model(net, amp_level="O2", keep_batchnorm_fp32=False, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'}) # define performance callback to show ips and loss callback to show loss for every epoch performance_cb = PerformanceCallback(batch_size) loss_cb = LossMonitor() cb = [performance_cb, loss_cb] print(f'Start run training, total epoch: {epoch_size}.') model.train(epoch_size, train_dataset, callbacks=cb) if device_num == 1 or device_id == 0: print(f'=================================Start run evaluation.=================================') output = model.eval(eval_dataset) print(f'Evaluation result: {output}.')
def MLM_eval(): ''' Evaluate function ''' _, dataset, net_for_pretraining = bert_predict() net = Model(net_for_pretraining, eval_network=net_for_pretraining, eval_indexes=[0, 1, 2], metrics={'name': myMetric()}) res = net.eval(dataset, dataset_sink_mode=False) for _, v in res.items(): print("Accuracy is: ", v)
def main(): """Main entrance for training""" args = parser.parse_args() print(sys.argv) #context.set_context(mode=context.GRAPH_MODE) context.set_context(mode=context.PYNATIVE_MODE) if args.GPU: context.set_context(device_target='GPU') # parse model argument assert args.model.startswith( "hournas"), "Only Tinynet models are supported." #_, sub_name = args.model.split("_") net = hournasnet(args.model, num_classes=args.num_classes, drop_rate=0.0, drop_connect_rate=0.0, global_pool="avg", bn_tf=False, bn_momentum=None, bn_eps=None) print(net) print("Total number of parameters:", count_params(net)) cfg = edict({ 'image_height': args.image_size, 'image_width': args.image_size, }) cfg.batch_size = args.batch_size print(cfg) #input_size = net.default_cfg['input_size'][1] val_data_url = args.data_path #os.path.join(args.data_path, 'val') val_dataset = create_dataset_cifar10(val_data_url, repeat_num=1, training=False, cifar_cfg=cfg) loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') eval_metrics = { 'Validation-Loss': Loss(), 'Top1-Acc': Top1CategoricalAccuracy(), 'Top5-Acc': Top5CategoricalAccuracy() } ckpt = load_checkpoint(args.ckpt) load_param_into_net(net, ckpt) net.set_train(False) model = Model(net, loss, metrics=eval_metrics) metrics = model.eval(val_dataset, dataset_sink_mode=False) print(metrics)
def resnet50_train(args_opt): epoch_size = args_opt.epoch_size batch_size = 32 class_num = 10 loss_scale_num = 1024 local_data_path = '/cache/data' # set graph mode and parallel mode context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False) context.set_context(enable_task_sink=True, device_id=device_id) context.set_context(enable_loop_sink=True) context.set_context(enable_mem_reuse=True) if device_num > 1: context.set_auto_parallel_context(device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True) local_data_path = os.path.join(local_data_path, str(device_id)) # data download print('Download data.') mox.file.copy_parallel(src_url=args_opt.data_url, dst_url=local_data_path) # create dataset print('Create train and evaluate dataset.') train_dataset = create_dataset(dataset_path=local_data_path, do_train=True, repeat_num=epoch_size, batch_size=batch_size) eval_dataset = create_dataset(dataset_path=local_data_path, do_train=False, repeat_num=1, batch_size=batch_size) train_step_size = train_dataset.get_dataset_size() print('Create dataset success.') # create model net = resnet50(class_num = class_num) loss = SoftmaxCrossEntropyWithLogits(sparse=True) lr = Tensor(get_lr(global_step=0, total_epochs=epoch_size, steps_per_epoch=train_step_size)) opt = Momentum(net.trainable_params(), lr, momentum=0.9, weight_decay=1e-4, loss_scale=loss_scale_num) loss_scale = FixedLossScaleManager(loss_scale_num, False) model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'}) # define performance callback to show ips and loss callback to show loss for every epoch performance_cb = PerformanceCallback(batch_size) loss_cb = LossMonitor() cb = [performance_cb, loss_cb] print(f'Start run training, total epoch: {epoch_size}.') model.train(epoch_size, train_dataset, callbacks=cb) if device_num == 1 or device_id == 0: print(f'Start run evaluation.') output = model.eval(eval_dataset) print(f'Evaluation result: {output}.')
def main(): """Main entrance for training""" args = parser.parse_args() print(sys.argv) context.set_context(mode=context.GRAPH_MODE) if args.GPU: context.set_context(device_target='GPU') # parse model argument assert args.model.startswith( "tinynet"), "Only Tinynet models are supported." _, sub_name = args.model.split("_") net = tinynet(sub_model=sub_name, num_classes=args.num_classes, drop_rate=0.0, drop_connect_rate=0.0, global_pool="avg", bn_tf=False, bn_momentum=None, bn_eps=None) print("Total number of parameters:", count_params(net)) input_size = net.default_cfg['input_size'][1] val_data_url = os.path.join(args.data_path, 'val') val_dataset = create_dataset_val(args.batch_size, val_data_url, workers=args.workers, distributed=False, input_size=input_size) loss = LabelSmoothingCrossEntropy(smooth_factor=args.smoothing, num_classes=args.num_classes) loss.add_flags_recursive(fp32=True, fp16=False) eval_metrics = { 'Validation-Loss': Loss(), 'Top1-Acc': Top1CategoricalAccuracy(), 'Top5-Acc': Top5CategoricalAccuracy() } ckpt = load_checkpoint(args.ckpt) load_param_into_net(net, ckpt) net.set_train(False) model = Model(net, loss, metrics=eval_metrics) metrics = model.eval(val_dataset, dataset_sink_mode=False) print(metrics)
def train_process(epoch_size, num_classes, batch_size): context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") net = resnet50(batch_size, num_classes) loss = CrossEntropyLoss() opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.01, 0.9) model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'}) dataset = create_dataset(epoch_size, training=True, batch_size=batch_size) loss_cb = LossGet() model.train(epoch_size, dataset, callbacks=[loss_cb]) net.set_train(False) eval_dataset = create_dataset(1, training=False) res = model.eval(eval_dataset) print("result: ", res) return res
def resnet50_eval(args_opt): class_num = cfg.class_num local_data_path = '/cache/data' ckpt_file_slice = args_opt.checkpoint_path.split('/') ckpt_file = ckpt_file_slice[len(ckpt_file_slice) - 1] local_ckpt_path = '/cache/' + ckpt_file # set graph mode and parallel mode context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False) # data download print('Download data.') mox.file.copy_parallel(src_url=args_opt.data_url, dst_url=local_data_path) mox.file.copy_parallel(src_url=args_opt.checkpoint_path, dst_url=local_ckpt_path) # create dataset dataset = create_dataset(dataset_path=local_data_path, do_train=False, batch_size=cfg.batch_size) # load checkpoint into net net = resnet50(class_num=class_num) param_dict = load_checkpoint(local_ckpt_path) load_param_into_net(net, param_dict) net.set_train(False) # define loss and model if not cfg.use_label_smooth: cfg.label_smooth_factor = 0.0 loss = CrossEntropySmooth(sparse=True, reduction='mean', smooth_factor=cfg.label_smooth_factor, num_classes=cfg.class_num) model = Model(net, loss_fn=loss, metrics={'top_1_accuracy', 'top_5_accuracy'}) # eval model res = model.eval(dataset) print("result:", res, "ckpt=", args_opt.checkpoint_path)
def eval(batch_size, num_classes): context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") context.set_context(device_id=0) context.set_context(enable_loop_sink=True) context.set_context(enable_mem_reuse=True) net = resnet50(batch_size, num_classes) loss = CrossEntropyLoss() opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.01, 0.9) model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'}) checkpoint_path = "./train_resnet_cifar10_device_id_0-1_1562.ckpt" param_dict = load_checkpoint(checkpoint_path) load_param_into_net(net, param_dict) net.set_train(False) eval_dataset = create_dataset(1, training=False) res = model.eval(eval_dataset) print("result: ", res) return res
def main(): """Main entrance for training""" args = parser.parse_args() print(sys.argv) #context.set_context(mode=context.GRAPH_MODE) context.set_context(mode=context.PYNATIVE_MODE) if args.GPU: context.set_context(device_target='GPU', device_id=args.device_id) # parse model argument assert args.model.startswith( "hournas"), "Only Tinynet models are supported." net = nasbenchnet() cfg = edict({ 'image_height': args.image_size, 'image_width': args.image_size, }) cfg.batch_size = args.batch_size val_data_url = args.data_path val_dataset = create_dataset_cifar10(val_data_url, repeat_num=1, training=False, cifar_cfg=cfg) loss = LabelSmoothingCrossEntropy(smooth_factor=args.smoothing, num_classes=args.num_classes) loss.add_flags_recursive(fp32=True, fp16=False) eval_metrics = { 'Validation-Loss': Loss(), 'Top1-Acc': Top1CategoricalAccuracy(), 'Top5-Acc': Top5CategoricalAccuracy() } ckpt = load_checkpoint(args.ckpt) load_param_into_net(net, ckpt) net.set_train(False) model = Model(net, loss, metrics=eval_metrics) metrics = model.eval(val_dataset, dataset_sink_mode=False) print(metrics)
def eval_(): # set args dev = "GPU" compute_type = str(args_opt.dtype).lower() ckpt_dir = str(args_opt.ckpt_path) total_batch = int(args_opt.batch_size) # init context if args_opt.mode == "GRAPH": mode = context.GRAPH_MODE else: mode = context.PYNATIVE_MODE context.set_context(mode=mode, device_target=dev, save_graphs=False) # create dataset dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=False, repeat_num=1, batch_size=total_batch, target=dev, dtype=compute_type) # define net net = resnet(class_num=1001, dtype=compute_type) # load checkpoint param_dict = load_checkpoint(ckpt_dir) load_param_into_net(net, param_dict) net.set_train(False) # define loss, model loss = CrossEntropySmooth(sparse=True, reduction='mean', smooth_factor=0.1, num_classes=1001) # define model model = Model(net, loss_fn=loss, metrics={'top_1_accuracy', 'top_5_accuracy'}) # eval model print("========START EVAL RESNET50 ON GPU ========") res = model.eval(dataset) print("result:", res, "ckpt=", ckpt_dir)
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False) context.set_context(enable_task_sink=True, device_id=device_id) context.set_context(enable_loop_sink=True) context.set_context(enable_mem_reuse=True) if __name__ == '__main__': net = resnet50(class_num=config.class_num) if not config.use_label_smooth: config.label_smooth_factor = 0.0 loss = CrossEntropy(smooth_factor=config.label_smooth_factor, num_classes=config.class_num) if args_opt.do_eval: dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=False, batch_size=config.batch_size) step_size = dataset.get_dataset_size() if args_opt.checkpoint_path: param_dict = load_checkpoint(args_opt.checkpoint_path) load_param_into_net(net, param_dict) net.set_train(False) model = Model(net, loss_fn=loss, metrics={'acc'}) res = model.eval(dataset) print("result:", res, "ckpt=", args_opt.checkpoint_path)
else: raise ValueError("Unsupported device_target.") context.set_context(device_id=args_opt.device_id) context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.device_target, save_graphs=False) # create dataset dataset = create_dataset(args_opt.dataset_path, do_train=False, batch_size=config.batch_size, device_num=1, rank=0) step_size = dataset.get_dataset_size() # define net net = xception(class_num=config.class_num) # load checkpoint param_dict = load_checkpoint(args_opt.checkpoint_path) load_param_into_net(net, param_dict) net.set_train(False) # define loss, model loss = CrossEntropySmooth(smooth_factor=config.label_smooth_factor, num_classes=config.class_num) # define model eval_metrics = {'Loss': nn.Loss(), 'Top_1_Acc': nn.Top1CategoricalAccuracy(), 'Top_5_Acc': nn.Top5CategoricalAccuracy()} model = Model(net, loss_fn=loss, metrics=eval_metrics) # eval model res = model.eval(dataset, dataset_sink_mode=True) print("result:", res, "ckpt=", args_opt.checkpoint_path)
dataset_lr, dataset_rl = create_dataset_eval(args_opt.dataset_path + "/" + dataset_name + ".mindrecord0", config=config, dataset_name=dataset_name) step_size = dataset_lr.get_dataset_size() print("step_size ", step_size) # define net net = CNNDirectionModel([3, 64, 48, 48, 64], [64, 48, 48, 64, 64], [256, 64], [64, 512]) # load checkpoint param_dict = load_checkpoint(args_opt.checkpoint_path) load_param_into_net(net, param_dict) net.set_train(False) # define loss, model loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="sum") # define model model = Model(net, loss_fn=loss, metrics={'top_1_accuracy'}) # eval model res_lr = model.eval(dataset_lr, dataset_sink_mode=False) res_rl = model.eval(dataset_rl, dataset_sink_mode=False) print("result on upright images:", res_lr, "ckpt=", args_opt.checkpoint_path) print("result on 180 degrees rotated images:", res_rl, "ckpt=", args_opt.checkpoint_path)
if not cfg.use_label_smooth: cfg.label_smooth_factor = 0.0 loss = CrossEntropySmooth(sparse=True, reduction="mean", smooth_factor=cfg.label_smooth_factor, num_classes=cfg.num_classes) net = TinyDarkNet(num_classes=cfg.num_classes) model = Model(net, loss_fn=loss, metrics={'top_1_accuracy', 'top_5_accuracy'}) else: raise ValueError("dataset is not support.") device_target = cfg.device_target context.set_context(mode=context.GRAPH_MODE, device_target=cfg.device_target) if args_opt.checkpoint_path is not None: param_dict = load_checkpoint(args_opt.checkpoint_path) print("load checkpoint from [{}].".format(args_opt.checkpoint_path)) else: param_dict = load_checkpoint(cfg.checkpoint_path) print("load checkpoint from [{}].".format(cfg.checkpoint_path)) load_param_into_net(net, param_dict) net.set_train(False) acc = model.eval(dataset) print("accuracy: ", acc)
def train_process(q, device_id, epoch_size, device_num, enable_hccl): os.system("mkdir " + str(device_id)) os.chdir(str(device_id)) context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False) context.set_context(device_id=device_id) os.environ['MINDSPORE_HCCL_CONFIG_PATH'] = MINDSPORE_HCCL_CONFIG_PATH os.environ['RANK_ID'] = str(device_id) os.environ['RANK_SIZE'] = str(device_num) if enable_hccl: context.set_auto_parallel_context( device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True, parameter_broadcast=True) auto_parallel_context().set_all_reduce_fusion_split_indices([107, 160]) init() # network net = resnet50(class_num=config.class_num) # evaluation network dist_eval_network = ClassifyCorrectCell(net) if not config.use_label_smooth: config.label_smooth_factor = 0.0 # loss loss = nn.SoftmaxCrossEntropyWithLogits( sparse=True, reduction="mean", smooth_factor=config.label_smooth_factor, num_classes=config.class_num) # train dataset dataset = create_dataset(dataset_path=dataset_path, do_train=True, repeat_num=epoch_size, batch_size=config.batch_size) step_size = dataset.get_dataset_size() eval_interval = config.eval_interval dataset.__loop_size__ = step_size * eval_interval # evalutation dataset eval_dataset = create_dataset(dataset_path=eval_path, do_train=False, repeat_num=epoch_size, batch_size=config.eval_batch_size) # loss scale loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False) # learning rate lr = Tensor( get_learning_rate(lr_init=config.lr_init, lr_end=0.0, lr_max=config.lr_max, warmup_epochs=config.warmup_epochs, total_epochs=config.epoch_size, steps_per_epoch=step_size, lr_decay_mode=config.lr_decay_mode)) # optimizer decayed_params = list( filter( lambda x: 'beta' not in x.name and 'gamma' not in x.name and 'bias' not in x.name, net.trainable_params())) no_decayed_params = [ param for param in net.trainable_params() if param not in decayed_params ] group_params = [{ 'params': decayed_params, 'weight_decay': config.weight_decay }, { 'params': no_decayed_params, 'weight_decay': 0.0 }, { 'order_params': net.trainable_params() }] if config.use_lars: momentum = nn.Momentum(group_params, lr, config.momentum, loss_scale=config.loss_scale, use_nesterov=config.use_nesterov) opt = nn.LARS(momentum, epsilon=config.lars_epsilon, coefficient=config.lars_coefficient, lars_filter=lambda x: 'beta' not in x.name and 'gamma' not in x.name and 'bias' not in x.name) else: opt = nn.Momentum(group_params, lr, config.momentum, loss_scale=config.loss_scale, use_nesterov=config.use_nesterov) # model model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, amp_level="O2", keep_batchnorm_fp32=False, metrics={ 'acc': DistAccuracy(batch_size=config.eval_batch_size, device_num=device_num) }, eval_network=dist_eval_network) # model init print("init_start", device_id) model.init(dataset, eval_dataset) print("init_stop", device_id) # callbacks loss_cb = LossGet(1, step_size) # train and eval print("run_start", device_id) acc = 0.0 time_cost = 0.0 for epoch_idx in range(0, int(epoch_size / eval_interval)): model.train(1, dataset, callbacks=loss_cb) eval_start = time.time() output = model.eval(eval_dataset) eval_cost = (time.time() - eval_start) * 1000 acc = float(output["acc"]) time_cost = loss_cb.get_per_step_time() loss = loss_cb.get_loss() print( "the {} epoch's resnet result:\n " "device{}, training loss {}, acc {}, " "training per step cost {:.2f} ms, eval cost {:.2f} ms, total_cost {:.2f} ms" .format(epoch_idx, device_id, loss, acc, time_cost, eval_cost, time_cost * step_size + eval_cost)) q.put({'acc': acc, 'cost': time_cost})
dataset = create_dataset(args_opt.dataset_path, do_train=False, batch_size=config.batch_size, device_num=1, rank=0) step_size = dataset.get_dataset_size() # define net net = xception(class_num=config.class_num) # load checkpoint param_dict = load_checkpoint(args_opt.checkpoint_path) load_param_into_net(net, param_dict) net.set_train(False) # define loss, model loss = CrossEntropySmooth(smooth_factor=config.label_smooth_factor, num_classes=config.class_num) # define model eval_metrics = { 'Loss': nn.Loss(), 'Top_1_Acc': nn.Top1CategoricalAccuracy(), 'Top_5_Acc': nn.Top5CategoricalAccuracy() } model = Model(net, loss_fn=loss, metrics=eval_metrics) # eval model res = model.eval(dataset, dataset_sink_mode=False) print("result:", res, "ckpt=", args_opt.checkpoint_path)
if args_opt.platform == 'Ascend': device_id = int(os.getenv('DEVICE_ID')) context.set_context(device_id=device_id) if __name__ == '__main__': config.batch_size = 1 max_text_length = config.max_text_length input_size = config.input_size # create dataset dataset = create_dataset(name=args_opt.dataset, dataset_path=args_opt.dataset_path, batch_size=config.batch_size, is_training=False, config=config) step_size = dataset.get_dataset_size() loss = CTCLoss(max_sequence_length=config.num_step, max_label_length=max_text_length, batch_size=config.batch_size) net = CRNN(config) # load checkpoint param_dict = load_checkpoint(args_opt.checkpoint_path) load_param_into_net(net, param_dict) net.set_train(False) # define model model = Model(net, loss_fn=loss, metrics={'CRNNAccuracy': CRNNAccuracy(config)}) # start evaluation res = model.eval(dataset, dataset_sink_mode=args_opt.platform == 'Ascend') print("result:", res, flush=True)
def test(cloud_args=None): """test""" args = parse_args(cloud_args) context.set_context(mode=context.GRAPH_MODE, enable_auto_mixed_precision=True, device_target=args.device_target, save_graphs=False) if os.getenv('DEVICE_ID', "not_set").isdigit(): context.set_context(device_id=int(os.getenv('DEVICE_ID'))) args.outputs_dir = os.path.join( args.log_path, datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S')) args.logger = get_logger(args.outputs_dir, args.rank) args.logger.save_args(args) if args.dataset == "cifar10": net = vgg16(num_classes=args.num_classes) opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.01, cfg.momentum, weight_decay=args.weight_decay) loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False) model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'}) param_dict = load_checkpoint(args.checkpoint_path) load_param_into_net(net, param_dict) net.set_train(False) dataset = vgg_create_dataset(args.data_path, 1, False) res = model.eval(dataset) print("result: ", res) else: # network args.logger.important_info('start create network') if os.path.isdir(args.pretrained): models = list(glob.glob(os.path.join(args.pretrained, '*.ckpt'))) print(models) if args.graph_ckpt: f = lambda x: -1 * int( os.path.splitext(os.path.split(x)[-1])[0].split('-')[-1]. split('_')[0]) else: f = lambda x: -1 * int( os.path.splitext(os.path.split(x)[-1])[0].split('_')[-1]) args.models = sorted(models, key=f) else: args.models = [ args.pretrained, ] for model in args.models: if args.dataset == "cifar10": dataset = vgg_create_dataset(args.data_path, args.image_size, args.per_batch_size, training=False) else: dataset = classification_dataset(args.data_path, args.image_size, args.per_batch_size) eval_dataloader = dataset.create_tuple_iterator() network = vgg16(args.num_classes, args, phase="test") # pre_trained load_param_into_net(network, load_checkpoint(model)) network.add_flags_recursive(fp16=True) img_tot = 0 top1_correct = 0 top5_correct = 0 network.set_train(False) t_end = time.time() it = 0 for data, gt_classes in eval_dataloader: output = network(Tensor(data, mstype.float32)) output = output.asnumpy() top1_output = np.argmax(output, (-1)) top5_output = np.argsort(output)[:, -5:] t1_correct = np.equal(top1_output, gt_classes).sum() top1_correct += t1_correct top5_correct += get_top5_acc(top5_output, gt_classes) img_tot += args.per_batch_size if args.rank == 0 and it == 0: t_end = time.time() it = 1 if args.rank == 0: time_used = time.time() - t_end fps = (img_tot - args.per_batch_size) * args.group_size / time_used args.logger.info( 'Inference Performance: {:.2f} img/sec'.format(fps)) results = [[top1_correct], [top5_correct], [img_tot]] args.logger.info('before results={}'.format(results)) results = np.array(results) args.logger.info('after results={}'.format(results)) top1_correct = results[0, 0] top5_correct = results[1, 0] img_tot = results[2, 0] acc1 = 100.0 * top1_correct / img_tot acc5 = 100.0 * top5_correct / img_tot args.logger.info('after allreduce eval: top1_correct={}, tot={},' 'acc={:.2f}%(TOP1)'.format( top1_correct, img_tot, acc1)) args.logger.info('after allreduce eval: top5_correct={}, tot={},' 'acc={:.2f}%(TOP5)'.format( top5_correct, img_tot, acc5))
parser.add_argument('--dataset_path', type=str, default='', help='Dataset path') parser.add_argument('--platform', type=str, default='GPU', choices=('Ascend', 'GPU'), help='run platform') args_opt = parser.parse_args() if args_opt.platform != 'GPU': raise ValueError("Only supported GPU training.") context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.platform) net = NASNetAMobile(num_classes=cfg.num_classes, is_training=False) ckpt = load_checkpoint(args_opt.checkpoint) load_param_into_net(net, ckpt) net.set_train(False) dataset = create_dataset(args_opt.dataset_path, cfg, False) loss = CrossEntropy_Val(smooth_factor=0.1, num_classes=cfg.num_classes) eval_metrics = { 'Loss': nn.Loss(), 'Top1-Acc': nn.Top1CategoricalAccuracy(), 'Top5-Acc': nn.Top5CategoricalAccuracy() } model = Model(net, loss, optimizer=None, metrics=eval_metrics) metrics = model.eval(dataset) print("metric: ", metrics)
default='GPU', choices=('Ascend', 'GPU', 'CPU'), help='run platform') args_opt = parser.parse_args() if args_opt.platform == 'Ascend': device_id = int(os.getenv('DEVICE_ID')) context.set_context(device_id=device_id) cfg = CFG_DICT[args_opt.platform] create_dataset = DS_DICT[cfg.ds_type] context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.platform) net = InceptionV3(num_classes=cfg.num_classes, is_training=False) ckpt = load_checkpoint(args_opt.checkpoint) load_param_into_net(net, ckpt) net.set_train(False) cfg.rank = 0 cfg.group_size = 1 dataset = create_dataset(args_opt.dataset_path, False, cfg) loss = CrossEntropy_Val(smooth_factor=0.1, num_classes=cfg.num_classes) eval_metrics = { 'Loss': nn.Loss(), 'Top1-Acc': nn.Top1CategoricalAccuracy(), 'Top5-Acc': nn.Top5CategoricalAccuracy() } model = Model(net, loss, optimizer=None, metrics=eval_metrics) metrics = model.eval(dataset, dataset_sink_mode=cfg.ds_sink_mode) print("metric: ", metrics)
parser.add_argument('--device_target', type=str, default='GPU', choices=("GPU"), help="Device target, support GPU.") args, _ = parser.parse_known_args() if args.device_target == "GPU": context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target, save_graphs=False) else: raise ValueError("Unsupported device target.") eval_ds = create_eval_dataset(args.dataset_path) net = SRCNN() lr = Tensor(config.lr, ms.float32) opt = nn.Adam(params=net.trainable_params(), learning_rate=lr, eps=1e-07) loss = nn.MSELoss(reduction='mean') param_dict = load_checkpoint(args.checkpoint_path) load_param_into_net(net, param_dict) net.set_train(False) model = Model(net, loss_fn=loss, optimizer=opt, metrics={'PSNR': SRCNNpsnr()}) res = model.eval(eval_ds, dataset_sink_mode=False) print("result ", res)
if __name__ == '__main__': data_config = DataConfig() model_config = ModelConfig() train_config = TrainConfig() ds_eval = create_dataset(args_opt.dataset_path, train_mode=False, epochs=1, batch_size=train_config.batch_size, data_type=DataType(data_config.data_format)) model_builder = ModelBuilder(ModelConfig, TrainConfig) train_net, eval_net = model_builder.get_train_eval_net() train_net.set_train() eval_net.set_train(False) auc_metric = AUCMetric() model = Model(train_net, eval_network=eval_net, metrics={"auc": auc_metric}) param_dict = load_checkpoint(args_opt.checkpoint_path) load_param_into_net(eval_net, param_dict) start = time.time() res = model.eval(ds_eval) eval_time = time.time() - start time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) out_str = f'{time_str} AUC: {list(res.values())[0]}, eval time: {eval_time}s.' print(out_str) add_write('./auc.log', str(out_str))
context.set_auto_parallel_context(all_reduce_fusion_split_indices=[140]) init() else: context.set_context(enable_hccl=False) context.set_context(mode=context.GRAPH_MODE) epoch_size = args_opt.epoch_size net = resnet50(args_opt.batch_size, args_opt.num_classes) loss = CrossEntropyLoss() opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.01, 0.9) model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'}) if args_opt.do_train: dataset = create_dataset(epoch_size) batch_num = dataset.get_dataset_size() config_ck = CheckpointConfig(save_checkpoint_steps=batch_num * 5, keep_checkpoint_max=10) ckpoint_cb = ModelCheckpoint(prefix="train_resnet_cifar10", directory="./", config=config_ck) loss_cb = LossMonitor() model.train(epoch_size, dataset, callbacks=[ckpoint_cb, loss_cb]) if args_opt.do_eval: # if args_opt.checkpoint_path: # param_dict = load_checkpoint(args_opt.checkpoint_path) # load_param_into_net(net, param_dict) eval_dataset = create_dataset(1, training=False) res = model.eval(eval_dataset) print("result: ", res) checker = os.path.exists("./memreuse.ir") assert (checker, True)
# define loss, model loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, momentum) eval_net = nn.WithEvalCell(net, loss, AMP_LEVEL in ["O2", "O3"]) model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'},amp_level=AMP_LEVEL, eval_network=eval_net, eval_indexes=[0, 1, 2], keep_batchnorm_fp32=False) # define callbacks time_cb = TimeMonitor(data_size=step_size) loss_cb = LossMonitor() cb = [time_cb, loss_cb] save_checkpoint = 5 if save_checkpoint: save_checkpoint_epochs = 5 keep_checkpoint_max = 10 config_ck = CheckpointConfig(save_checkpoint_steps=save_checkpoint_epochs * step_size, keep_checkpoint_max=keep_checkpoint_max) ckpt_cb = ModelCheckpoint(prefix="resnet", directory=ckpt_save_dir, config=config_ck) cb += [ckpt_cb] # train model model.train(epoch_size, dataset, callbacks=cb, dataset_sink_mode=True) # Eval model eval_dataset_path = "./datasets/cifar10/test" eval_data = create_dataset(eval_dataset_path,do_train=False) acc = model.eval(eval_data,dataset_sink_mode=True) print("Accuracy:",acc)