Ejemplo n.º 1
0
    def _valid_epoch(self):
        if self.config.mixup and self.config.loss.type == 'CrossEntropyLoss':
            from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
            loss_fn = SoftmaxCrossEntropyWithLogits(sparse=True)
            self.ms_model = MsModel(
                network=self.model,
                loss_fn=loss_fn,
                optimizer=self.optimizer,
                metrics={self.metric_name: self.valid_metrics()})
        self.callbacks.before_valid()

        try:
            eval_metrics = self.ms_model.eval(
                valid_dataset=self.valid_loader,
                dataset_sink_mode=self.dataset_sink_mode)
            self.valid_metrics.update(eval_metrics)
            valid_logs = dict()
            valid_logs['cur_valid_perfs'] = self.valid_metrics.results

            self.callbacks.after_valid(valid_logs)
        except RuntimeError as exc:
            logging.warning(
                "RuntimeError occurred when eval the model. Skip eval this model."
            )
            logging.warning("The RuntimeError message is : {}.".format(exc))
def test_batchnorm_batch_parallel():
    num_classes = 1001
    batch_size = 32
    learning_rate = 0.1
    momentum = 0.9
    epoch_size = 2
    rank_size = 0

    predict = Tensor(np.ones([batch_size, 3, 224, 224]), dtype=ms.float32)
    label = Tensor(np.ones([batch_size]), dtype=ms.int32)

    dataset = DatasetLenet(predict, label, 2)
    net = batchnorm_net(num_classes)

    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
    loss.softmax_cross_entropy.set_strategy(((dev_num, 1), (dev_num, 1)))
    opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()),
                   learning_rate, momentum)

    context.reset_auto_parallel_context()
    context.set_auto_parallel_context(
        parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, device_num=dev_num)
    context.set_context(mode=context.GRAPH_MODE)
    model = Model(net, loss, opt)
    model.train(epoch_size, dataset, dataset_sink_mode=False)
Ejemplo n.º 3
0
def test_net(network, data_path, ckpt):
    """define the evaluation method"""
    print("============== Starting Testing ==============")
    #load the saved model for evaluation
    load_checkpoint(ckpt, net=network)
    #load testing dataset
    ds_eval = create_dataset(False, data_path)

    # config = GPTConfig(batch_size=4,
    #                    seq_length=1024,
    #                    vocab_size=50257,
    #                    embedding_size=1024,
    #                    num_layers=24,
    #                    num_heads=16,
    #                    expand_ratio=4,
    #                    post_layernorm_residual=False,
    #                    dropout_rate=0.1,
    #                    compute_dtype=mstype.float16,
    #                    use_past=False)
    # loss = CrossEntropyLoss(config)

    net_loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
    model = Model(resnet, net_loss, metrics={"Accuracy": Accuracy()})
    # model = Model(resnet, net_loss, metrics={"Accuracy": Accuracy()}, amp_level="O3")
    acc = model.eval(ds_eval, dataset_sink_mode=False)
    print("============== Accuracy:{} ==============".format(acc))
def test_resnet_model_parallel():
    num_classes = 1024
    batch_size = 32
    learning_rate = 0.1
    momentum = 0.9
    epoch_size = 2

    context.reset_auto_parallel_context()
    context.set_auto_parallel_context(device_num=dev_num, global_rank=0)
    context.set_auto_parallel_context(
        parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, device_num=dev_num)
    context.set_context(mode=context.GRAPH_MODE)
    predict = Tensor(np.ones([batch_size, 64, 112, 112]), dtype=ms.float32)
    label = Tensor(np.ones([batch_size]), dtype=ms.int32)

    dataset = DatasetLenet(predict, label, 2)
    net = resnet_model_parallel_net(num_classes)

    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
    loss.softmax_cross_entropy.shard(((dev_num, 1), (dev_num, 1)))
    opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()),
                   learning_rate, momentum)

    model = Model(net, loss, opt)
    model.train(epoch_size, dataset, dataset_sink_mode=False)
Ejemplo n.º 5
0
    def __init__(self, model=None):
        """Initializing the trainer with the provided model.

        Arguments:
        client_id: The ID of the client using this trainer (optional).
        model: The model to train.
        """
        super().__init__()

        if hasattr(Config().trainer, 'cpuonly') and Config().trainer.cpuonly:
            mindspore.context.set_context(mode=mindspore.context.PYNATIVE_MODE,
                                          device_target='CPU')
        else:
            mindspore.context.set_context(mode=mindspore.context.PYNATIVE_MODE,
                                          device_target='GPU')

        if model is None:
            self.model = models_registry.get()

        # Initializing the loss criterion
        loss_criterion = SoftmaxCrossEntropyWithLogits(sparse=True,
                                                       reduction='mean')

        # Initializing the optimizer
        optimizer = nn.Momentum(self.model.trainable_params(),
                                Config().trainer.learning_rate,
                                Config().trainer.momentum)

        self.mindspore_model = mindspore.Model(
            self.model,
            loss_criterion,
            optimizer,
            metrics={"Accuracy": Accuracy()})
Ejemplo n.º 6
0
def train_common(net):
    batch_size = 32
    learning_rate = 0.1
    momentum = 0.9
    epoch_size = 2
    device_num = 4
    context.reset_auto_parallel_context()
    auto_parallel_context().set_enable_all_reduce_fusion(
        enable_all_reduce_fusion=True)
    context.set_auto_parallel_context(
        parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL,
        device_num=device_num,
        parameter_broadcast=False)
    context.set_context(mode=context.GRAPH_MODE)

    predict = Tensor(np.ones([batch_size, 128]), dtype=ms.float32)
    label = Tensor(np.ones([batch_size]), dtype=ms.int32)
    dataset = Dataset(predict, label, 2)

    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
    opt = Momentum(net.trainable_params(), learning_rate, momentum)
    model = Model(net, loss, opt)

    model.train(epoch_size, dataset, dataset_sink_mode=False)
    allreduce_fusion_dict = _executor._get_allreduce_fusion(
        model._train_network)

    print(allreduce_fusion_dict)
    return allreduce_fusion_dict
Ejemplo n.º 7
0
def run(args):
    ms.context.set_context(mode=ms.context.GRAPH_MODE,
                           device_target=args.device)
    dataset_sink_mode = False

    download_dataset(args.data_dir)

    # define the loss function
    net_loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')

    # create the network
    network = LeNet5()
    # define the optimizer
    net_opt = build_optimizer(args, network)
    config_ck = CheckpointConfig(save_checkpoint_steps=1875,
                                 keep_checkpoint_max=10)
    # save the network model and parameters for subsequence fine-tuning
    ckpoint_cb = ModelCheckpoint(prefix="checkpoint_lenet", config=config_ck)
    # group layers into an object with training and evaluation features
    model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()})

    if args.init_ckpt:
        load_ckpt(network, args.init_ckpt)

    train_net(network, model, args, ckpoint_cb, dataset_sink_mode)
Ejemplo n.º 8
0
def bn_common(parallel_mode, train_flag, strategy_loss=None):
    context.set_context(mode=context.GRAPH_MODE)
    context.set_auto_parallel_context(parallel_mode=parallel_mode,
                                      device_num=8)
    learning_rate = 0.1
    momentum = 0.9
    epoch_size = 2
    rank_size = 8

    predict = Tensor(np.ones([32, 512]), dtype=ms.float32)
    label = Tensor(np.ones([32]), dtype=ms.int32)
    dataset = Dataset(predict, label, 2)
    net = bn_net()

    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
    loss.softmax_cross_entropy.shard(strategy_loss)
    opt = Momentum(net.trainable_params(), learning_rate, momentum, 0.0001,
                   1024 * rank_size)

    if not train_flag:
        net = WithLossCell(net, loss)
        net.set_train()

    if parallel_mode == ParallelMode.DATA_PARALLEL:
        context.set_auto_parallel_context(parameter_broadcast=True)
    model = Model(net, loss, opt)
    if train_flag:
        model.train(epoch_size, dataset, dataset_sink_mode=False)
    else:
        model._predict(predict, label)
Ejemplo n.º 9
0
def loss_scale_manager_common(strategy1):
    learning_rate = 0.1
    momentum = 0.9
    epoch_size = 2

    context.reset_auto_parallel_context()
    context.set_auto_parallel_context(parallel_mode=ParallelMode.AUTO_PARALLEL,
                                      device_num=8)
    predict = Tensor(np.ones([32, 128]), dtype=ms.float32)
    label = Tensor(np.ones([32]), dtype=ms.int32)
    dataset = Dataset(predict, label, 2)
    net = all_to_all_net(strategy1)

    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
    loss.softmax_cross_entropy.set_strategy(((8, 1), (8, 1)))
    opt = Momentum(net.trainable_params(), learning_rate, momentum)
    scale_manager = DynamicLossScaleManager(32, 2, 2000)
    model = Model(net, loss, opt, loss_scale_manager=scale_manager)
    # if no GE exists, outputs = self._train_network(*next_element) outputs inputs tensor.
    try:
        model.train(epoch_size, dataset, dataset_sink_mode=False)
    except TypeError:
        pass
    else:
        assert False
Ejemplo n.º 10
0
def main():
    """ main function """
    os.environ["DEVICE_NUM"] = "1"
    os.environ["RANK_ID"] = "0"
    target = 'Ascend'
    context.set_context(mode=context.GRAPH_MODE, device_target=target)

    # step1: create_dataset for evaluation, prepare the input data
    # and initialize the network, load the pretrained checkpoint to the network.
    # Ensure that the network before quant_resnet50 is proper functioning.
    dataset = create_dataset(dataset_path=ARGS_OPT.dataset_path,
                             do_train=False,
                             batch_size=32,
                             target=target)

    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')

    dataset = dataset.take(1)
    input_shape = [32, 3, 224, 224]

    class_num = 10
    input_data = np.random.uniform(0.0, 1.0,
                                   size=input_shape).astype(np.float32)
    network = resnet50(class_num)
    param_dict = load_checkpoint(ARGS_OPT.checkpoint_path)
    load_param_into_net(network, param_dict)
    network.set_train(False)

    quant_resnet50(network, dataset, loss, input_data)
Ejemplo n.º 11
0
def calibration():
    """ do the calibration to get the scale offset record file"""
    dataset = create_dataset(
        dataset_path=ARGS_OPT.eval_dataset,
        do_train=False,
        batch_size=config.batch_size,  # pylint: disable=no-member
        target=ARGS_OPT.device_target)
    dataset = dataset.take(1)
    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')

    network = resnet(10)
    network.set_train(False)
    param_dict = load_checkpoint(ARGS_OPT.pre_trained)
    load_param_into_net(network, param_dict)
    input_data = np.random.uniform(0.0, 1.0, size=[32, 3, 224,
                                                   224]).astype(np.float32)
    config_file = os.path.join(CUR_DIR, './config.json')
    amct.create_quant_config(config_file, network, input_data)
    calibration_network = amct.quantize_model(config_file, network, input_data)

    model = Model(calibration_network,
                  loss_fn=loss,
                  metrics={'top_1_accuracy', 'top_5_accuracy'})
    _ = model.eval(dataset)
    amct.save_model('./resnet50_quant_calibration', calibration_network,
                    input_data)
Ejemplo n.º 12
0
def dpn_evaluate(args):
    # create evaluate dataset
    eval_path = os.path.join(args.data_dir, 'val')
    eval_dataset = classification_dataset(eval_path,
                                          image_size=args.image_size,
                                          num_parallel_workers=args.num_parallel_workers,
                                          per_batch_size=args.batch_size,
                                          max_epoch=1,
                                          rank=args.rank,
                                          shuffle=False,
                                          group_size=args.group_size,
                                          mode='eval')

    # create network
    net = dpns[args.backbone](num_classes=args.num_classes)
    # load checkpoint
    if os.path.isfile(args.pretrained):
        load_param_into_net(net, load_checkpoint(args.pretrained))
    # loss
    if args.dataset == "imagenet-1K":
        loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
    else:
        if not args.label_smooth:
            args.label_smooth_factor = 0.0
        loss = CrossEntropy(smooth_factor=args.label_smooth_factor, num_classes=args.num_classes)

        # create model
    model = Model(net, amp_level="O2", keep_batchnorm_fp32=False, loss_fn=loss,
                  metrics={'top_1_accuracy', 'top_5_accuracy'})
    # evaluate
    output = model.eval(eval_dataset)
    print(f'Evaluation result: {output}.')
Ejemplo n.º 13
0
 def __init__(self, network, is_train=True):
     super(NetWithLossClass, self).__init__(auto_prefix=False)
     self.loss = SoftmaxCrossEntropyWithLogits(sparse=True,
                                               reduction='mean')
     self.l1_loss = L1Loss()
     self.network = network
     self.is_train = is_train
     self.concat = P.Concat(axis=1)
Ejemplo n.º 14
0
def resnet50_train(args_opt):
    device_id = 0
    device_num = 1
    epoch_size = args_opt.epoch_size
    batch_size = 32
    class_num = 10
    loss_scale_num = 1024
    local_data_path = '/home/share/dataset/cifar-10-batches-bin/' # your cifar10 path

    # set graph mode and parallel mode
    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False)
    context.set_context(device_id=device_id)
    if device_num > 1:
        context.set_auto_parallel_context(device_num=device_num,
                                          parallel_mode=ParallelMode.DATA_PARALLEL,
                                          gradients_mean=True)
        init()
        local_data_path = os.path.join(local_data_path, str(device_id))

    # data download
    print('Download data.')
    mox.file.copy_parallel(src_url=args_opt.data_url, dst_url=local_data_path)

    # create dataset
    print('Create train and evaluate dataset.')
    train_dataset = create_dataset(dataset_path=local_data_path, do_train=True,
                                   repeat_num=1, batch_size=batch_size)
    eval_dataset = create_dataset(dataset_path=local_data_path, do_train=False,
                                   repeat_num=1, batch_size=batch_size)
    train_step_size = train_dataset.get_dataset_size()
    print('Create dataset success.')

    # create model
    net = resnet50(class_num = class_num)
    # reduction='mean' means that apply reduction of mean to loss
    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
    lr = Tensor(get_lr(global_step=0, total_epochs=epoch_size, steps_per_epoch=train_step_size))
    opt = Momentum(net.trainable_params(), lr, momentum=0.9, weight_decay=1e-4, loss_scale=loss_scale_num)
    loss_scale = FixedLossScaleManager(loss_scale_num, False)

    # amp_level="O2" means that the hybrid precision of O2 mode is used for training
    # the whole network except that batchnoram will be cast into float16 format and dynamic loss scale will be used
    # 'keep_batchnorm_fp32 = False' means that use the float16 format
    model = Model(net, amp_level="O2", keep_batchnorm_fp32=False, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'})

    # define performance callback to show ips and loss callback to show loss for every epoch
    performance_cb = PerformanceCallback(batch_size)
    loss_cb = LossMonitor()
    cb = [performance_cb, loss_cb]

    print(f'Start run training, total epoch: {epoch_size}.')
    model.train(epoch_size, train_dataset, callbacks=cb)
    if device_num == 1 or device_id == 0:
         print(f'=================================Start run evaluation.=================================')
        output = model.eval(eval_dataset)
        print(f'Evaluation result: {output}.')
Ejemplo n.º 15
0
def compile_net(net):
    context.set_context(save_graphs=True)
    learning_rate = 0.1
    momentum = 0.9
    epoch_size = 2
    dataset = Dataset(_x, _b)
    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
    opt = Momentum(net.trainable_params(), learning_rate, momentum)
    model = Model(net, loss, optimizer=opt, amp_level="O2")
    model.train(epoch_size, dataset, dataset_sink_mode=False)
    context.reset_auto_parallel_context()
Ejemplo n.º 16
0
 def __init__(self):
     context.set_context(reserve_class_name_in_scope=False)
     net = resnet50(batch_size, num_classes)
     ls = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
     opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()),
                    0.01, 0.9)
     model = Model(net, loss_fn=ls, optimizer=opt, metrics={'acc'})
     self.model = model
     self.model.train(1,
                      create_dataset(list(range(32))),
                      dataset_sink_mode=False)
def resnet50_train(args_opt):
    epoch_size = args_opt.epoch_size
    batch_size = cfg.batch_size
    class_num = cfg.class_num
    loss_scale_num = cfg.loss_scale
    local_data_path = '/cache/data'
    local_ckpt_path = '/cache/ckpt_file'

    # set graph mode and parallel mode
    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False)

    # data download
    print('Download data.')
    mox.file.copy_parallel(src_url=args_opt.data_url, dst_url=local_data_path)

    # create dataset
    print('Create train and evaluate dataset.')
    train_dataset = create_dataset(dataset_path=local_data_path, do_train=True,
                                   repeat_num=epoch_size, batch_size=batch_size)
    train_step_size = train_dataset.get_dataset_size()
    print('Create dataset success.')

    # create model
    net = resnet50(class_num=class_num)
    # reduction='mean' means that apply reduction of mean to loss
    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
    lr = Tensor(get_lr(global_step=0, total_epochs=epoch_size, steps_per_epoch=train_step_size))
    opt = Momentum(net.trainable_params(), lr, momentum=0.9, weight_decay=1e-4, loss_scale=loss_scale_num)
    loss_scale = FixedLossScaleManager(loss_scale_num, False)

    # amp_level="O2" means that the hybrid precision of O2 mode is used for training
    # the whole network except that batchnorm will be cast into float16 format and dynamic loss scale will be used
    # 'keep_batchnorm_fp32 = False' means that use the float16 format
    model = Model(net, amp_level="O2", keep_batchnorm_fp32=False, loss_fn=loss,
                  optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'})

    # define performance callback to show ips and loss callback to show loss for every epoch
    time_cb = TimeMonitor(data_size=train_step_size)
    performance_cb = PerformanceCallback(batch_size)
    loss_cb = LossMonitor()
    cb = [time_cb, performance_cb, loss_cb]
    config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_epochs * train_step_size,
                                 keep_checkpoint_max=cfg.keep_checkpoint_max)
    ckpt_cb = ModelCheckpoint(prefix="resnet", directory=local_ckpt_path, config=config_ck)
    cb += [ckpt_cb]

    print(f'Start run training, total epoch: {epoch_size}.')
    model.train(epoch_size, train_dataset, callbacks=cb)

    # upload checkpoint files
    print('Upload checkpoint.')
    mox.file.copy_parallel(src_url=local_ckpt_path, dst_url=args_opt.train_url)
Ejemplo n.º 18
0
def me_train_tensor(net, input_np, label_np, epoch_size=2):
    loss = SoftmaxCrossEntropyWithLogits(sparse=True)
    opt = nn.Momentum(Tensor(np.array([0.1])), Tensor(np.array([0.9])),
                      filter(lambda x: x.requires_grad, net.get_parameters()))
    context.set_context(mode=context.GRAPH_MODE)
    Model(net, loss, opt)
    _network = nn.WithLossCell(net, loss)
    _train_net = MsWrapper(nn.TrainOneStepCell(_network, opt))
    _train_net.set_train()
    for epoch in range(0, epoch_size):
        print(f"epoch %d" % (epoch))
        output = _train_net(Tensor(input_np), Tensor(label_np))
        print(output.asnumpy())
Ejemplo n.º 19
0
def me_train_tensor(net, input_np, label_np, epoch_size=2):
    """me_train_tensor"""
    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
    opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr_gen(lambda i: 0.1, epoch_size), 0.9,
                   0.01, 1024)
    Model(net, loss, opt)
    _network = nn.WithLossCell(net, loss)
    _train_net = nn.TrainOneStepCell(_network, opt)
    _train_net.set_train()
    label_np = np.argmax(label_np, axis=-1).astype(np.int32)
    for epoch in range(0, epoch_size):
        print(f"epoch %d" % (epoch))
        _train_net(Tensor(input_np), Tensor(label_np))
Ejemplo n.º 20
0
def resnet50_train(args_opt):
    epoch_size = args_opt.epoch_size
    batch_size = 32
    class_num = 10
    loss_scale_num = 1024
    local_data_path = '/cache/data'

    # set graph mode and parallel mode
    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False)
    context.set_context(enable_task_sink=True, device_id=device_id)
    context.set_context(enable_loop_sink=True)
    context.set_context(enable_mem_reuse=True)
    if device_num > 1:
        context.set_auto_parallel_context(device_num=device_num,
                                          parallel_mode=ParallelMode.DATA_PARALLEL,
                                          mirror_mean=True)
        local_data_path = os.path.join(local_data_path, str(device_id))

    # data download
    print('Download data.')
    mox.file.copy_parallel(src_url=args_opt.data_url, dst_url=local_data_path)

    # create dataset
    print('Create train and evaluate dataset.')
    train_dataset = create_dataset(dataset_path=local_data_path, do_train=True,
                                   repeat_num=epoch_size, batch_size=batch_size)
    eval_dataset = create_dataset(dataset_path=local_data_path, do_train=False,
                                   repeat_num=1, batch_size=batch_size)
    train_step_size = train_dataset.get_dataset_size()
    print('Create dataset success.')

    # create model
    net = resnet50(class_num = class_num)
    loss = SoftmaxCrossEntropyWithLogits(sparse=True)
    lr = Tensor(get_lr(global_step=0, total_epochs=epoch_size, steps_per_epoch=train_step_size))
    opt = Momentum(net.trainable_params(), lr, momentum=0.9, weight_decay=1e-4, loss_scale=loss_scale_num)
    loss_scale = FixedLossScaleManager(loss_scale_num, False)

    model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'})

    # define performance callback to show ips and loss callback to show loss for every epoch
    performance_cb = PerformanceCallback(batch_size)
    loss_cb = LossMonitor()
    cb = [performance_cb, loss_cb]

    print(f'Start run training, total epoch: {epoch_size}.')
    model.train(epoch_size, train_dataset, callbacks=cb)
    if device_num == 1 or device_id == 0:
        print(f'Start run evaluation.')
        output = model.eval(eval_dataset)
        print(f'Evaluation result: {output}.')
Ejemplo n.º 21
0
def me_train_tensor(net, input_np, label_np, epoch_size=2):
    """me_train_tensor"""
    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
    # reorder the net parameters , leave the parameters that need to be passed into lars to the end part

    opt = Momentum(
        get_net_trainable_reordered_params(net)[2],
        lr_gen(lambda i: 0.1, epoch_size), 0.9, 0.01, 1024)
    Model(net, loss, opt)
    _network = nn.WithLossCell(net, loss)
    TrainOneStepWithLarsCell(_network, opt)
    data = Tensor(input_np)
    label = Tensor(label_np)
    net(data, label)
def train_lenet():
    context.set_context(mode=context.GRAPH_MODE, save_graphs=True, device_target="CPU")
    dataset_sink_mode = False

    # download mnist dataset
    download_dataset()

    # learning rate setting
    lr = 0.01
    momentum = 0.9
    epoch_size = 1
    mnist_path = "../MNIST_Data"

    # define the loss function
    net_loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
    repeat_size = epoch_size

    # create the network
    network = LeNet5()

    # define the optimizer
    net_opt = nn.Momentum(network.trainable_params(), lr, momentum)
    config_ck = CheckpointConfig(save_checkpoint_steps=1875, keep_checkpoint_max=10)

    # save the network model and parameters for subsequence fine-tuning
    ckpoint_cb = ModelCheckpoint(prefix="checkpoint_lenet", config=config_ck)

    # group layers into an object with training and evaluation features
    model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()})

    summary_writer = SummaryRecord(log_dir="../../summary", network=network)
    summary_callback = SummaryStep(summary_writer, flush_step=10)

    # Init TrainLineage to record the training information
    train_callback = TrainLineage(summary_writer)

    train_net(
        model,
        epoch_size,
        mnist_path,
        repeat_size,
        ckpoint_cb,
        dataset_sink_mode,
        callbacks=[summary_callback, train_callback],
    )

    test_net(network, model, mnist_path)

    summary_writer.close()
Ejemplo n.º 23
0
def me_train_tensor(net, input_np, label_np, epoch_size=2):
    context.set_context(mode=context.GRAPH_MODE)
    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
    opt = ApplyMomentum(Tensor(np.array([0.1])), Tensor(np.array([0.9])),
                        filter(lambda x: x.requires_grad, net.get_parameters()))
    Model(net, loss, opt)
    _network = wrap.WithLossCell(net, loss)
    _train_net = MsWrapper(wrap.TrainOneStepCell(_network, opt))
    _train_net.set_train()
    with SummaryRecord(SUMMARY_DIR, file_suffix="_MS_GRAPH", network=_train_net) as summary_writer:
        for epoch in range(0, epoch_size):
            print(f"epoch %d" % (epoch))
            output = _train_net(Tensor(input_np), Tensor(label_np))
            summary_writer.record(i)
            print("********output***********")
            print(output.asnumpy())
Ejemplo n.º 24
0
def reshape_common(parallel_mode):
    learning_rate = 0.1
    momentum = 0.9
    epoch_size = 2

    context.reset_auto_parallel_context()
    context.set_auto_parallel_context(parallel_mode=parallel_mode,
                                      device_num=8)
    predict = Tensor(np.ones([32, 256]), dtype=ms.float32)
    label = Tensor(np.ones([32]), dtype=ms.int32)
    dataset = Dataset(predict, label, 2)
    net = prelu_net()

    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
    opt = Momentum(net.trainable_params(), learning_rate, momentum)
    model = Model(net, loss, opt)
    model.train(epoch_size, dataset, dataset_sink_mode=False)
Ejemplo n.º 25
0
def test_original_resnet50():
    """ evaluate the original resnet50"""
    dataset = create_dataset(
        dataset_path=ARGS_OPT.eval_dataset,
        do_train=False,
        batch_size=config.batch_size,  # pylint: disable=no-member
        target=ARGS_OPT.device_target)
    network = resnet(10)
    network.set_train(False)
    param_dict = load_checkpoint(ARGS_OPT.pre_trained)
    load_param_into_net(network, param_dict)
    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
    model = Model(network,
                  loss_fn=loss,
                  metrics={'top_1_accuracy', 'top_5_accuracy'})
    res = model.eval(dataset)
    print("result for original resnet50:", res, "ckpt=", ARGS_OPT.pre_trained)
Ejemplo n.º 26
0
def reshape_common(parallel_mode, strategy0, strategy1, strategy2, strategy_loss):
    learning_rate = 0.1
    momentum = 0.9
    epoch_size = 2

    context.reset_auto_parallel_context()
    context.set_auto_parallel_context(parallel_mode=parallel_mode, device_num=8)
    predict = Tensor(np.ones([32, 512, 7, 7]), dtype=ms.float32)
    label = Tensor(np.ones([32]), dtype=ms.int32)
    dataset = Dataset(predict, label, 2)
    net = reshape_net(strategy0, strategy1, strategy2)

    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
    loss.softmax_cross_entropy.shard(strategy_loss)
    loss.one_hot.shard(((8, 1), (), ()))
    opt = Momentum(net.trainable_params(), learning_rate, momentum)
    model = Model(net, loss, opt)
    model.train(epoch_size, dataset, dataset_sink_mode=False)
Ejemplo n.º 27
0
def test_data_parallel_mode():
    _reset_op_id()
    learning_rate = 0.1
    momentum = 0.9
    epoch_size = 2
    context.set_context(mode=context.GRAPH_MODE, save_graphs=False)
    context.reset_auto_parallel_context()
    context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL,
                                      full_batch=True)
    predict = Tensor(np.ones([256, 128]), dtype=ms.float32)
    label = Tensor(np.ones([256]), dtype=ms.int32)
    dataset = Dataset(predict, label, 2)
    net = all_to_all_net(None)
    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
    opt = Momentum(net.trainable_params(), learning_rate, momentum)
    model = Model(net, loss, opt)

    with pytest.raises(RuntimeError):
        model.train(epoch_size, dataset, dataset_sink_mode=False)
Ejemplo n.º 28
0
def all_to_all_common():
    learning_rate = 0.1
    momentum = 0.9
    epoch_size = 2

    context.reset_auto_parallel_context()
    context.set_auto_parallel_context(parallel_mode=ParallelMode.AUTO_PARALLEL, device_num=1, global_rank=0)
    predict = Tensor(np.ones([32, 128]), dtype=ms.float32)
    label = Tensor(np.ones([32]), dtype=ms.int32)
    dataset = Dataset(predict, label, 2)
    net = all_to_all_net()

    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
    opt = Momentum(net.trainable_params(), learning_rate, momentum)
    model = Model(net, loss, opt)

    model.train(epoch_size, dataset, dataset_sink_mode=False)
    strategys = _executor._get_strategy(model._train_network)
    return strategys
def test_pynative_resnet50():
    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")

    batch_size = 32
    num_classes = 10
    loss_scale = 128
    total_step = 50
    net = resnet50(batch_size, num_classes)
    optimizer = Momentum(learning_rate=0.01,
                         momentum=0.9,
                         params=filter(lambda x: x.requires_grad,
                                       net.get_parameters()))
    data_set = create_dataset(repeat_num=1,
                              training=True,
                              batch_size=batch_size,
                              num_samples=total_step * batch_size)

    # define callbacks
    time_cb = MyTimeMonitor(data_size=data_set.get_dataset_size())
    loss_cb = LossMonitor()
    cb = [time_cb, loss_cb]

    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
    loss_scale = FixedLossScaleManager(loss_scale=loss_scale,
                                       drop_overflow_update=False)
    model = Model(net,
                  loss_fn=loss,
                  optimizer=optimizer,
                  loss_scale_manager=loss_scale,
                  metrics={'acc'},
                  amp_level="O2",
                  keep_batchnorm_fp32=False)

    # train model
    model.train(1,
                data_set,
                callbacks=cb,
                sink_size=data_set.get_dataset_size(),
                dataset_sink_mode=True)

    assert time_cb.good_step() > 10
Ejemplo n.º 30
0
def get_tensor_from_training(
        indices,
        ckpt_file="/tmp/pycharm_project_589/summary_dir-202010191622/weights/-1_350.ckpt",
        node_name="conv1.weight",
        data_type="gradient"):
    context.set_context(reserve_class_name_in_scope=False)
    net = resnet50(batch_size, num_classes)
    load_checkpoint(ckpt_file, net=net)
    ls = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
    opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()),
                   0.01, 0.9)
    model = Model(net, loss_fn=ls, optimizer=opt, metrics={'acc'})
    dataset = create_dataset(indices)

    data_inception_callback = DataInterceptionCallback(node_name=node_name,
                                                       data_type=data_type)
    model.train(1,
                dataset,
                callbacks=[LossMonitor(), data_inception_callback],
                dataset_sink_mode=False)
    return data_inception_callback.result