Ejemplo n.º 1
0
def eval_alexnet():
    print("============== Starting Testing ==============")

    device_num = get_device_num()
    if device_num > 1:
        # context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target)
        context.set_context(mode=context.GRAPH_MODE,
                            device_target='Davinci',
                            save_graphs=False)
        if config.device_target == "Ascend":
            context.set_context(device_id=get_device_id())
            init()
        elif config.device_target == "GPU":
            init()

    if config.dataset_name == 'cifar10':
        network = AlexNet(config.num_classes, phase='test')
        loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
        opt = nn.Momentum(network.trainable_params(), config.learning_rate,
                          config.momentum)
        ds_eval = create_dataset_cifar10(config.data_path, config.batch_size, status="test", \
            target=config.device_target)
        param_dict = load_checkpoint(load_path)
        print("load checkpoint from [{}].".format(load_path))
        load_param_into_net(network, param_dict)
        network.set_train(False)
        model = Model(network, loss, opt, metrics={"Accuracy": Accuracy()})

    elif config.dataset_name == 'imagenet':
        network = AlexNet(config.num_classes, phase='test')
        loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
        ds_eval = create_dataset_imagenet(config.data_path,
                                          config.batch_size,
                                          training=False)
        param_dict = load_checkpoint(load_path)
        print("load checkpoint from [{}].".format(load_path))
        load_param_into_net(network, param_dict)
        network.set_train(False)
        model = Model(network,
                      loss_fn=loss,
                      metrics={'top_1_accuracy', 'top_5_accuracy'})

    else:
        raise ValueError("Unsupported dataset.")

    if ds_eval.get_dataset_size() == 0:
        raise ValueError(
            "Please check dataset size > 0 and batch_size <= dataset size")

    result = model.eval(ds_eval, dataset_sink_mode=config.dataset_sink_mode)
    print("result : {}".format(result))
Ejemplo n.º 2
0
from mindspore.nn.metrics import Accuracy


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='MindSpore AlexNet Example')
    parser.add_argument('--device_target', type=str, default="Ascend", choices=['Ascend', 'GPU'],
                        help='device where the code will be implemented (default: Ascend)')
    parser.add_argument('--data_path', type=str, default="./", help='path where the dataset is saved')
    parser.add_argument('--ckpt_path', type=str, default="./ckpt", help='if is test, must provide\
                        path where the trained ckpt file')
    parser.add_argument('--dataset_sink_mode', type=ast.literal_eval,
                        default=True, help='dataset_sink_mode is False or True')
    args = parser.parse_args()

    context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target)

    network = AlexNet(cfg.num_classes)
    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
    repeat_size = cfg.epoch_size
    opt = nn.Momentum(network.trainable_params(), cfg.learning_rate, cfg.momentum)
    model = Model(network, loss, opt, metrics={"Accuracy": Accuracy()})

    print("============== Starting Testing ==============")
    param_dict = load_checkpoint(args.ckpt_path)
    load_param_into_net(network, param_dict)
    ds_eval = create_dataset_cifar10(args.data_path,
                                     cfg.batch_size,
                                     status="test")
    acc = model.eval(ds_eval, dataset_sink_mode=args.dataset_sink_mode)
    print("============== {} ==============".format(acc))
Ejemplo n.º 3
0
                        help='dataset_sink_mode is False or True')
    args = parser.parse_args()

    context.set_context(mode=context.GRAPH_MODE,
                        device_target=args.device_target)

    ds_train = create_dataset_cifar10(args.data_path, cfg.batch_size,
                                      cfg.epoch_size)
    network = AlexNet(cfg.num_classes)
    loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False,
                                            sparse=True,
                                            reduction="mean")
    lr = Tensor(
        get_lr(0, cfg.learning_rate, cfg.epoch_size,
               ds_train.get_dataset_size()))
    opt = nn.Momentum(network.trainable_params(), lr, cfg.momentum)
    model = Model(network, loss, opt, metrics={"Accuracy": Accuracy()})
    time_cb = TimeMonitor(data_size=ds_train.get_dataset_size())
    config_ck = CheckpointConfig(
        save_checkpoint_steps=cfg.save_checkpoint_steps,
        keep_checkpoint_max=cfg.keep_checkpoint_max)
    ckpoint_cb = ModelCheckpoint(prefix="checkpoint_alexnet",
                                 directory=args.ckpt_path,
                                 config=config_ck)

    print("============== Starting Training ==============")
    model.train(cfg.epoch_size,
                ds_train,
                callbacks=[time_cb, ckpoint_cb,
                           LossMonitor()],
                dataset_sink_mode=args.dataset_sink_mode)
Ejemplo n.º 4
0
        super(MyCallback, self).__init__()

    def step_end(self, run_context):
        cb_params = run_context.original_args()
        set_iteration(cb_params.cur_step_num)


context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU")

network = AlexNet(num_classes=10)
ds_train = create_dataset_mnist("./dataset/10-batches-bin", cfg.batch_size,
                                cfg.epoch_size)
loss = MyLoss(is_grad=False, sparse=True, reduction="mean")
lr = Tensor(
    get_lr(0, cfg.learning_rate, cfg.epoch_size, ds_train.get_dataset_size()))
opt = MyOptimizer(network.trainable_params(), lr, cfg.momentum)

model = Model(network, loss, opt, metrics={"Accuracy": Accuracy()})

time_cb = TimeMonitor(data_size=ds_train.get_dataset_size())
config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_steps,
                             keep_checkpoint_max=cfg.keep_checkpoint_max)
ckpoint_cb = ModelCheckpoint(prefix="checkpoint_alexnet",
                             directory="./ckpt",
                             config=config_ck)
summary_collector = SummaryCollector(
    summary_dir='./summary_dir/alex-202007281532', collect_freq=1)

model.train(cfg.epoch_size,
            ds_train,
            callbacks=[MyCallback(), LossMonitor()],
Ejemplo n.º 5
0
def train_alexnet():
    print(config)
    print('device id:', get_device_id())
    print('device num:', get_device_num())
    print('rank id:', get_rank_id())
    print('job id:', get_job_id())

    device_target = config.device_target
    context.set_context(mode=context.GRAPH_MODE,
                        device_target=config.device_target)
    context.set_context(save_graphs=False)

    device_num = get_device_num()
    if config.dataset_name == "cifar10":
        if device_num > 1:
            config.learning_rate = config.learning_rate * device_num
            config.epoch_size = config.epoch_size * 2
    elif config.dataset_name == "imagenet":
        pass
    else:
        raise ValueError("Unsupported dataset.")

    if device_num > 1:
        context.reset_auto_parallel_context()
        context.set_auto_parallel_context(device_num=device_num, \
            parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True)
        if device_target == "Ascend":
            context.set_context(device_id=get_device_id())
            init()
        elif device_target == "GPU":
            init()
    else:
        context.set_context(device_id=get_device_id())

    if config.dataset_name == "cifar10":
        ds_train = create_dataset_cifar10(config.data_path,
                                          config.batch_size,
                                          target=config.device_target)
    elif config.dataset_name == "imagenet":
        ds_train = create_dataset_imagenet(config.data_path, config.batch_size)
    else:
        raise ValueError("Unsupported dataset.")

    if ds_train.get_dataset_size() == 0:
        raise ValueError(
            "Please check dataset size > 0 and batch_size <= dataset size")

    network = AlexNet(config.num_classes, phase='train')

    loss_scale_manager = None
    metrics = None
    step_per_epoch = ds_train.get_dataset_size(
    ) if config.sink_size == -1 else config.sink_size
    if config.dataset_name == 'cifar10':
        loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
        lr = Tensor(
            get_lr_cifar10(0, config.learning_rate, config.epoch_size,
                           step_per_epoch))
        opt = nn.Momentum(network.trainable_params(), lr, config.momentum)
        metrics = {"Accuracy": Accuracy()}

    elif config.dataset_name == 'imagenet':
        loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
        lr = Tensor(
            get_lr_imagenet(config.learning_rate, config.epoch_size,
                            step_per_epoch))
        opt = nn.Momentum(params=get_param_groups(network),
                          learning_rate=lr,
                          momentum=config.momentum,
                          weight_decay=config.weight_decay,
                          loss_scale=config.loss_scale)

        from mindspore.train.loss_scale_manager import DynamicLossScaleManager, FixedLossScaleManager
        if config.is_dynamic_loss_scale == 1:
            loss_scale_manager = DynamicLossScaleManager(init_loss_scale=65536,
                                                         scale_factor=2,
                                                         scale_window=2000)
        else:
            loss_scale_manager = FixedLossScaleManager(
                config.loss_scale, drop_overflow_update=False)

    else:
        raise ValueError("Unsupported dataset.")

    if device_target == "Ascend":
        model = Model(network,
                      loss_fn=loss,
                      optimizer=opt,
                      metrics=metrics,
                      amp_level="O2",
                      keep_batchnorm_fp32=False,
                      loss_scale_manager=loss_scale_manager)
    elif device_target == "GPU":
        model = Model(network,
                      loss_fn=loss,
                      optimizer=opt,
                      metrics=metrics,
                      loss_scale_manager=loss_scale_manager)
    else:
        raise ValueError("Unsupported platform.")

    if device_num > 1:
        ckpt_save_dir = os.path.join(config.checkpoint_path + "_" +
                                     str(get_rank()))
    else:
        ckpt_save_dir = config.checkpoint_path

    time_cb = TimeMonitor(data_size=step_per_epoch)
    config_ck = CheckpointConfig(
        save_checkpoint_steps=config.save_checkpoint_steps,
        keep_checkpoint_max=config.keep_checkpoint_max)
    ckpoint_cb = ModelCheckpoint(prefix="checkpoint_alexnet",
                                 directory=ckpt_save_dir,
                                 config=config_ck)

    print("============== Starting Training ==============")
    model.train(config.epoch_size,
                ds_train,
                callbacks=[time_cb, ckpoint_cb,
                           LossMonitor()],
                dataset_sink_mode=config.dataset_sink_mode,
                sink_size=config.sink_size)