def eval_alexnet(): print("============== Starting Testing ==============") device_num = get_device_num() if device_num > 1: # context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target) context.set_context(mode=context.GRAPH_MODE, device_target='Davinci', save_graphs=False) if config.device_target == "Ascend": context.set_context(device_id=get_device_id()) init() elif config.device_target == "GPU": init() if config.dataset_name == 'cifar10': network = AlexNet(config.num_classes, phase='test') loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") opt = nn.Momentum(network.trainable_params(), config.learning_rate, config.momentum) ds_eval = create_dataset_cifar10(config.data_path, config.batch_size, status="test", \ target=config.device_target) param_dict = load_checkpoint(load_path) print("load checkpoint from [{}].".format(load_path)) load_param_into_net(network, param_dict) network.set_train(False) model = Model(network, loss, opt, metrics={"Accuracy": Accuracy()}) elif config.dataset_name == 'imagenet': network = AlexNet(config.num_classes, phase='test') loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") ds_eval = create_dataset_imagenet(config.data_path, config.batch_size, training=False) param_dict = load_checkpoint(load_path) print("load checkpoint from [{}].".format(load_path)) load_param_into_net(network, param_dict) network.set_train(False) model = Model(network, loss_fn=loss, metrics={'top_1_accuracy', 'top_5_accuracy'}) else: raise ValueError("Unsupported dataset.") if ds_eval.get_dataset_size() == 0: raise ValueError( "Please check dataset size > 0 and batch_size <= dataset size") result = model.eval(ds_eval, dataset_sink_mode=config.dataset_sink_mode) print("result : {}".format(result))
from mindspore.nn.metrics import Accuracy if __name__ == "__main__": parser = argparse.ArgumentParser(description='MindSpore AlexNet Example') parser.add_argument('--device_target', type=str, default="Ascend", choices=['Ascend', 'GPU'], help='device where the code will be implemented (default: Ascend)') parser.add_argument('--data_path', type=str, default="./", help='path where the dataset is saved') parser.add_argument('--ckpt_path', type=str, default="./ckpt", help='if is test, must provide\ path where the trained ckpt file') parser.add_argument('--dataset_sink_mode', type=ast.literal_eval, default=True, help='dataset_sink_mode is False or True') args = parser.parse_args() context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target) network = AlexNet(cfg.num_classes) loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") repeat_size = cfg.epoch_size opt = nn.Momentum(network.trainable_params(), cfg.learning_rate, cfg.momentum) model = Model(network, loss, opt, metrics={"Accuracy": Accuracy()}) print("============== Starting Testing ==============") param_dict = load_checkpoint(args.ckpt_path) load_param_into_net(network, param_dict) ds_eval = create_dataset_cifar10(args.data_path, cfg.batch_size, status="test") acc = model.eval(ds_eval, dataset_sink_mode=args.dataset_sink_mode) print("============== {} ==============".format(acc))
default=True, help='dataset_sink_mode is False or True') parser.add_argument('--device_id', type=int, default=0, help='device id of GPU or Ascend. (Default: 0)') args = parser.parse_args() context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target) print("============== Starting Testing ==============") if args.dataset_name == 'cifar10': cfg = alexnet_cifar10_cfg network = AlexNet(cfg.num_classes, phase='test') loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") opt = nn.Momentum(network.trainable_params(), cfg.learning_rate, cfg.momentum) ds_eval = create_dataset_cifar10(args.data_path, cfg.batch_size, status="test", target=args.device_target) param_dict = load_checkpoint(args.ckpt_path) print("load checkpoint from [{}].".format(args.ckpt_path)) load_param_into_net(network, param_dict) network.set_train(False) model = Model(network, loss, opt, metrics={"Accuracy": Accuracy()})
default="Ascend", choices=['Ascend', 'GPU'], help='device where the code will be implemented (default: Ascend)') parser.add_argument('--ckpt_path', type=str, default="./ckpt", help='if is test, must provide\ path where the trained ckpt file') args_opt = parser.parse_args() context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.device_target) if args_opt.dataset_name == 'cifar10': cfg = alexnet_cifar10_cfg elif args_opt.dataset_name == 'imagenet': cfg = alexnet_imagenet_cfg else: raise ValueError("dataset is not support.") net = AlexNet(num_classes=cfg.num_classes) param_dict = load_checkpoint(args_opt.ckpt_path) load_param_into_net(net, param_dict) input_arr = Tensor( np.random.uniform(0.0, 1.0, size=[1, 3, cfg.image_height, cfg.image_width]), ms.float32) export(net, input_arr, file_name=cfg.air_name, file_format="AIR")
raise ValueError("Unsupported platform.") if args.dataset_name == "cifar10": ds_train = create_dataset_cifar10(args.data_path, cfg.batch_size, target=args.device_target) elif args.dataset_name == "imagenet": ds_train = create_dataset_imagenet(args.data_path, cfg.batch_size) else: raise ValueError("Unsupport dataset.") if ds_train.get_dataset_size() == 0: raise ValueError( "Please check dataset size > 0 and batch_size <= dataset size") network = AlexNet(cfg.num_classes, phase='train') loss_scale_manager = None metrics = None step_per_epoch = ds_train.get_dataset_size( ) if args.sink_size == -1 else args.sink_size if args.dataset_name == 'cifar10': loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") lr = Tensor( get_lr_cifar10(0, cfg.learning_rate, cfg.epoch_size, step_per_epoch)) opt = nn.Momentum(network.trainable_params(), lr, cfg.momentum) metrics = {"Accuracy": Accuracy()} elif args.dataset_name == 'imagenet': loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
tr_data = ImageDataGenerator(train_file, mode='training', batch_size=batch_size, num_classes=num_classes, shuffle=True) val_data = ImageDataGenerator(train_file, mode='inference', batch_size=batch_size, num_classes=num_classes, shuffle=False) iterator = Iterator.from_structure(tr_data.data.output_types, tr_data.data.output_shapes) next_batch = iterator.get_next() training_init_op = iterator.make_initializer(tr_data.data) validation_init_op = iterator.make_initializer(val_data.data) x = tf.placeholder(tf.float32, [batch_size, 277, 277, 3]) y = tf.placeholder(tf.float32, [batch_size, num_classes]) keep_prob = tf.placeholder(tf.float32) model = AlexNet(x, keep_prob, num_classes, train_layers) score = model.fc8 var_list = [v for v in tf.trainable_variables() if v.name.split('/')[0] in train_layers] with tf.name_scope('cross_ent'): loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=score, labels=y)) with tf.name_scope('train'): gradients = tf.gradients(loss, var_list) gradients = list(zip(gradients, var_list)) optimizer = tf.train.GradientDescentOptimizer(learning_rate) train_op = optimizer.apply_gradients(grads_and_vars=gradients)
""" import os # import sys # sys.path.append(os.path.join(os.getcwd(), 'utils')) from utils.config import config import numpy as np import mindspore as ms from mindspore import context, Tensor, load_checkpoint, load_param_into_net, export from src.alexnet import AlexNet if os.path.exists(config.data_path_local): ckpt_path = config.ckpt_path_local else: ckpt_path = os.path.join(config.data_path, 'checkpoint_alexnet-30_1562.ckpt') context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target) if config.device_target == "Ascend": context.set_context(device_id=config.device_id) if __name__ == '__main__': net = AlexNet(num_classes=config.num_classes) param_dict = load_checkpoint(ckpt_path) load_param_into_net(net, param_dict) input_arr = Tensor(np.zeros([config.batch_size, 3, config.image_height, config.image_width]), ms.float32) export(net, input_arr, file_name=config.file_name, file_format=config.file_format)
save_model_scalar(MODEL_SCALAR_TYPE.TrainLoss, loss) return loss class MyCallback(Callback): def __init__(self): super(MyCallback, self).__init__() def step_end(self, run_context): cb_params = run_context.original_args() set_iteration(cb_params.cur_step_num) context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU") network = AlexNet(num_classes=10) ds_train = create_dataset_mnist("./dataset/10-batches-bin", cfg.batch_size, cfg.epoch_size) loss = MyLoss(is_grad=False, sparse=True, reduction="mean") lr = Tensor( get_lr(0, cfg.learning_rate, cfg.epoch_size, ds_train.get_dataset_size())) opt = MyOptimizer(network.trainable_params(), lr, cfg.momentum) model = Model(network, loss, opt, metrics={"Accuracy": Accuracy()}) time_cb = TimeMonitor(data_size=ds_train.get_dataset_size()) config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_steps, keep_checkpoint_max=cfg.keep_checkpoint_max) ckpoint_cb = ModelCheckpoint(prefix="checkpoint_alexnet", directory="./ckpt", config=config_ck)
def train_alexnet(): print(config) print('device id:', get_device_id()) print('device num:', get_device_num()) print('rank id:', get_rank_id()) print('job id:', get_job_id()) device_target = config.device_target context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target) context.set_context(save_graphs=False) device_num = get_device_num() if config.dataset_name == "cifar10": if device_num > 1: config.learning_rate = config.learning_rate * device_num config.epoch_size = config.epoch_size * 2 elif config.dataset_name == "imagenet": pass else: raise ValueError("Unsupported dataset.") if device_num > 1: context.reset_auto_parallel_context() context.set_auto_parallel_context(device_num=device_num, \ parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True) if device_target == "Ascend": context.set_context(device_id=get_device_id()) init() elif device_target == "GPU": init() else: context.set_context(device_id=get_device_id()) if config.dataset_name == "cifar10": ds_train = create_dataset_cifar10(config.data_path, config.batch_size, target=config.device_target) elif config.dataset_name == "imagenet": ds_train = create_dataset_imagenet(config.data_path, config.batch_size) else: raise ValueError("Unsupported dataset.") if ds_train.get_dataset_size() == 0: raise ValueError( "Please check dataset size > 0 and batch_size <= dataset size") network = AlexNet(config.num_classes, phase='train') loss_scale_manager = None metrics = None step_per_epoch = ds_train.get_dataset_size( ) if config.sink_size == -1 else config.sink_size if config.dataset_name == 'cifar10': loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") lr = Tensor( get_lr_cifar10(0, config.learning_rate, config.epoch_size, step_per_epoch)) opt = nn.Momentum(network.trainable_params(), lr, config.momentum) metrics = {"Accuracy": Accuracy()} elif config.dataset_name == 'imagenet': loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") lr = Tensor( get_lr_imagenet(config.learning_rate, config.epoch_size, step_per_epoch)) opt = nn.Momentum(params=get_param_groups(network), learning_rate=lr, momentum=config.momentum, weight_decay=config.weight_decay, loss_scale=config.loss_scale) from mindspore.train.loss_scale_manager import DynamicLossScaleManager, FixedLossScaleManager if config.is_dynamic_loss_scale == 1: loss_scale_manager = DynamicLossScaleManager(init_loss_scale=65536, scale_factor=2, scale_window=2000) else: loss_scale_manager = FixedLossScaleManager( config.loss_scale, drop_overflow_update=False) else: raise ValueError("Unsupported dataset.") if device_target == "Ascend": model = Model(network, loss_fn=loss, optimizer=opt, metrics=metrics, amp_level="O2", keep_batchnorm_fp32=False, loss_scale_manager=loss_scale_manager) elif device_target == "GPU": model = Model(network, loss_fn=loss, optimizer=opt, metrics=metrics, loss_scale_manager=loss_scale_manager) else: raise ValueError("Unsupported platform.") if device_num > 1: ckpt_save_dir = os.path.join(config.checkpoint_path + "_" + str(get_rank())) else: ckpt_save_dir = config.checkpoint_path time_cb = TimeMonitor(data_size=step_per_epoch) config_ck = CheckpointConfig( save_checkpoint_steps=config.save_checkpoint_steps, keep_checkpoint_max=config.keep_checkpoint_max) ckpoint_cb = ModelCheckpoint(prefix="checkpoint_alexnet", directory=ckpt_save_dir, config=config_ck) print("============== Starting Training ==============") model.train(config.epoch_size, ds_train, callbacks=[time_cb, ckpoint_cb, LossMonitor()], dataset_sink_mode=config.dataset_sink_mode, sink_size=config.sink_size)
def alexnet(*args, **kwargs): return AlexNet(*args, **kwargs)