Example #1
0
    def epoch_end(self, run_context):
        """evaluate the model and ema-model at the end of each epoch"""
        cb_params = run_context.original_args()
        cur_epoch = cb_params.cur_epoch_num + self._start_epoch - 1

        save_ckpt = (cur_epoch % self.save_epoch == 0)
        load_nparray_into_net(self.ema_network, self.shadow)
        model = Model(self.network,
                      loss_fn=self.loss_fn,
                      metrics=self.eval_metrics)
        model_ema = Model(self.ema_network,
                          loss_fn=self.loss_fn,
                          metrics=self.eval_metrics)
        acc = model.eval(self.eval_dataset,
                         dataset_sink_mode=self.dataset_sink_mode)
        ema_acc = model_ema.eval(self.eval_dataset,
                                 dataset_sink_mode=self.dataset_sink_mode)
        print("Model Accuracy:", acc)
        print("EMA-Model Accuracy:", ema_acc)

        output = [{
            "name": k,
            "data": Tensor(v)
        } for k, v in self.shadow.items()]
        self.ema_accuracy[cur_epoch] = ema_acc["Top1-Acc"]
        if self.best_ema_accuracy < ema_acc["Top1-Acc"]:
            self.best_ema_accuracy = ema_acc["Top1-Acc"]
            self.best_ema_epoch = cur_epoch
            save_checkpoint(output, "ema_best.ckpt")

        if self.best_accuracy < acc["Top1-Acc"]:
            self.best_accuracy = acc["Top1-Acc"]
            self.best_epoch = cur_epoch

        print("Best Model Accuracy: %s, at epoch %s" %
              (self.best_accuracy, self.best_epoch))
        print("Best EMA-Model Accuracy: %s, at epoch %s" %
              (self.best_ema_accuracy, self.best_ema_epoch))

        if save_ckpt:
            # Save the ema_model checkpoints
            ckpt = "{}-{}.ckpt".format("ema", cur_epoch)
            save_checkpoint(output, ckpt)
            save_checkpoint(output, "ema_last.ckpt")

            # Save the model checkpoints
            save_checkpoint(cb_params.train_network, "last.ckpt")

        print("Top 10 EMA-Model Accuracies: ")
        count = 0
        for epoch in sorted(self.ema_accuracy,
                            key=self.ema_accuracy.get,
                            reverse=True):
            if count == 10:
                break
            print("epoch: %s, Top-1: %s)" % (epoch, self.ema_accuracy[epoch]))
            count += 1
Example #2
0
def dpn_evaluate(args):
    # create evaluate dataset
    eval_path = os.path.join(args.data_dir, 'val')
    eval_dataset = classification_dataset(eval_path,
                                          image_size=args.image_size,
                                          num_parallel_workers=args.num_parallel_workers,
                                          per_batch_size=args.batch_size,
                                          max_epoch=1,
                                          rank=args.rank,
                                          shuffle=False,
                                          group_size=args.group_size,
                                          mode='eval')

    # create network
    net = dpns[args.backbone](num_classes=args.num_classes)
    # load checkpoint
    if os.path.isfile(args.pretrained):
        load_param_into_net(net, load_checkpoint(args.pretrained))
    # loss
    if args.dataset == "imagenet-1K":
        loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
    else:
        if not args.label_smooth:
            args.label_smooth_factor = 0.0
        loss = CrossEntropy(smooth_factor=args.label_smooth_factor, num_classes=args.num_classes)

        # create model
    model = Model(net, amp_level="O2", keep_batchnorm_fp32=False, loss_fn=loss,
                  metrics={'top_1_accuracy', 'top_5_accuracy'})
    # evaluate
    output = model.eval(eval_dataset)
    print(f'Evaluation result: {output}.')
Example #3
0
def eval_net():
    '''eval net'''
    if config.dataset == 'MR':
        instance = MovieReview(root_dir=config.data_path, maxlen=config.word_len, split=0.9)
    elif config.dataset == 'SUBJ':
        instance = Subjectivity(root_dir=config.data_path, maxlen=config.word_len, split=0.9)
    elif config.dataset == 'SST2':
        instance = SST2(root_dir=config.data_path, maxlen=config.word_len, split=0.9)
    device_target = config.device_target
    context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target)
    if device_target == "Ascend":
        context.set_context(device_id=get_device_id())
    dataset = instance.create_test_dataset(batch_size=config.batch_size)
    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
    net = TextCNN(vocab_len=instance.get_dict_len(), word_len=config.word_len,
                  num_classes=config.num_classes, vec_length=config.vec_length)
    opt = nn.Adam(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate=0.001,
                  weight_decay=float(config.weight_decay))

    param_dict = load_checkpoint(config.checkpoint_file_path)
    print("load checkpoint from [{}].".format(config.checkpoint_file_path))

    load_param_into_net(net, param_dict)
    net.set_train(False)
    model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc': Accuracy()})

    acc = model.eval(dataset)
    print("accuracy: ", acc)
Example #4
0
def test(cloud_args=None):
    """test"""
    args = parse_args(cloud_args)
    context.set_context(mode=context.GRAPH_MODE,
                        enable_auto_mixed_precision=True,
                        device_target=args.device_target,
                        save_graphs=False)
    if os.getenv('DEVICE_ID', "not_set").isdigit():
        context.set_context(device_id=int(os.getenv('DEVICE_ID')))

    args.outputs_dir = os.path.join(
        args.log_path,
        datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S'))

    args.logger = LogUtil.get_instance()
    args.logger.set_level(20)

    net = vgg16(num_classes=args.num_classes, args=args)
    opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()),
                   0.01,
                   args.momentum,
                   weight_decay=args.weight_decay)
    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
    model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'})

    param_dict = load_checkpoint(args.pre_trained)
    load_param_into_net(net, param_dict)
    net.set_train(False)

    dataset_test = vgg_create_dataset100(args.data_path,
                                         args.image_size,
                                         args.per_batch_size,
                                         training=False)
    res = model.eval(dataset_test)
    print("result: ", res)
Example #5
0
def resnet50_train(args_opt):
    device_id = 0
    device_num = 1
    epoch_size = args_opt.epoch_size
    batch_size = 32
    class_num = 10
    loss_scale_num = 1024
    local_data_path = '/home/share/dataset/cifar-10-batches-bin/' # your cifar10 path

    # set graph mode and parallel mode
    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False)
    context.set_context(device_id=device_id)
    if device_num > 1:
        context.set_auto_parallel_context(device_num=device_num,
                                          parallel_mode=ParallelMode.DATA_PARALLEL,
                                          gradients_mean=True)
        init()
        local_data_path = os.path.join(local_data_path, str(device_id))

    # data download
    print('Download data.')
    mox.file.copy_parallel(src_url=args_opt.data_url, dst_url=local_data_path)

    # create dataset
    print('Create train and evaluate dataset.')
    train_dataset = create_dataset(dataset_path=local_data_path, do_train=True,
                                   repeat_num=1, batch_size=batch_size)
    eval_dataset = create_dataset(dataset_path=local_data_path, do_train=False,
                                   repeat_num=1, batch_size=batch_size)
    train_step_size = train_dataset.get_dataset_size()
    print('Create dataset success.')

    # create model
    net = resnet50(class_num = class_num)
    # reduction='mean' means that apply reduction of mean to loss
    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
    lr = Tensor(get_lr(global_step=0, total_epochs=epoch_size, steps_per_epoch=train_step_size))
    opt = Momentum(net.trainable_params(), lr, momentum=0.9, weight_decay=1e-4, loss_scale=loss_scale_num)
    loss_scale = FixedLossScaleManager(loss_scale_num, False)

    # amp_level="O2" means that the hybrid precision of O2 mode is used for training
    # the whole network except that batchnoram will be cast into float16 format and dynamic loss scale will be used
    # 'keep_batchnorm_fp32 = False' means that use the float16 format
    model = Model(net, amp_level="O2", keep_batchnorm_fp32=False, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'})

    # define performance callback to show ips and loss callback to show loss for every epoch
    performance_cb = PerformanceCallback(batch_size)
    loss_cb = LossMonitor()
    cb = [performance_cb, loss_cb]

    print(f'Start run training, total epoch: {epoch_size}.')
    model.train(epoch_size, train_dataset, callbacks=cb)
    if device_num == 1 or device_id == 0:
         print(f'=================================Start run evaluation.=================================')
        output = model.eval(eval_dataset)
        print(f'Evaluation result: {output}.')
Example #6
0
def MLM_eval():
    '''
    Evaluate function
    '''
    _, dataset, net_for_pretraining = bert_predict()
    net = Model(net_for_pretraining, eval_network=net_for_pretraining, eval_indexes=[0, 1, 2],
                metrics={'name': myMetric()})
    res = net.eval(dataset, dataset_sink_mode=False)
    for _, v in res.items():
        print("Accuracy is: ", v)
Example #7
0
def main():
    """Main entrance for training"""
    args = parser.parse_args()
    print(sys.argv)

    #context.set_context(mode=context.GRAPH_MODE)
    context.set_context(mode=context.PYNATIVE_MODE)

    if args.GPU:
        context.set_context(device_target='GPU')

    # parse model argument
    assert args.model.startswith(
        "hournas"), "Only Tinynet models are supported."
    #_, sub_name = args.model.split("_")
    net = hournasnet(args.model,
                     num_classes=args.num_classes,
                     drop_rate=0.0,
                     drop_connect_rate=0.0,
                     global_pool="avg",
                     bn_tf=False,
                     bn_momentum=None,
                     bn_eps=None)
    print(net)
    print("Total number of parameters:", count_params(net))
    cfg = edict({
        'image_height': args.image_size,
        'image_width': args.image_size,
    })
    cfg.batch_size = args.batch_size
    print(cfg)

    #input_size = net.default_cfg['input_size'][1]
    val_data_url = args.data_path  #os.path.join(args.data_path, 'val')
    val_dataset = create_dataset_cifar10(val_data_url,
                                         repeat_num=1,
                                         training=False,
                                         cifar_cfg=cfg)

    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')

    eval_metrics = {
        'Validation-Loss': Loss(),
        'Top1-Acc': Top1CategoricalAccuracy(),
        'Top5-Acc': Top5CategoricalAccuracy()
    }

    ckpt = load_checkpoint(args.ckpt)
    load_param_into_net(net, ckpt)
    net.set_train(False)

    model = Model(net, loss, metrics=eval_metrics)

    metrics = model.eval(val_dataset, dataset_sink_mode=False)
    print(metrics)
Example #8
0
def resnet50_train(args_opt):
    epoch_size = args_opt.epoch_size
    batch_size = 32
    class_num = 10
    loss_scale_num = 1024
    local_data_path = '/cache/data'

    # set graph mode and parallel mode
    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False)
    context.set_context(enable_task_sink=True, device_id=device_id)
    context.set_context(enable_loop_sink=True)
    context.set_context(enable_mem_reuse=True)
    if device_num > 1:
        context.set_auto_parallel_context(device_num=device_num,
                                          parallel_mode=ParallelMode.DATA_PARALLEL,
                                          mirror_mean=True)
        local_data_path = os.path.join(local_data_path, str(device_id))

    # data download
    print('Download data.')
    mox.file.copy_parallel(src_url=args_opt.data_url, dst_url=local_data_path)

    # create dataset
    print('Create train and evaluate dataset.')
    train_dataset = create_dataset(dataset_path=local_data_path, do_train=True,
                                   repeat_num=epoch_size, batch_size=batch_size)
    eval_dataset = create_dataset(dataset_path=local_data_path, do_train=False,
                                   repeat_num=1, batch_size=batch_size)
    train_step_size = train_dataset.get_dataset_size()
    print('Create dataset success.')

    # create model
    net = resnet50(class_num = class_num)
    loss = SoftmaxCrossEntropyWithLogits(sparse=True)
    lr = Tensor(get_lr(global_step=0, total_epochs=epoch_size, steps_per_epoch=train_step_size))
    opt = Momentum(net.trainable_params(), lr, momentum=0.9, weight_decay=1e-4, loss_scale=loss_scale_num)
    loss_scale = FixedLossScaleManager(loss_scale_num, False)

    model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'})

    # define performance callback to show ips and loss callback to show loss for every epoch
    performance_cb = PerformanceCallback(batch_size)
    loss_cb = LossMonitor()
    cb = [performance_cb, loss_cb]

    print(f'Start run training, total epoch: {epoch_size}.')
    model.train(epoch_size, train_dataset, callbacks=cb)
    if device_num == 1 or device_id == 0:
        print(f'Start run evaluation.')
        output = model.eval(eval_dataset)
        print(f'Evaluation result: {output}.')
Example #9
0
def main():
    """Main entrance for training"""
    args = parser.parse_args()
    print(sys.argv)

    context.set_context(mode=context.GRAPH_MODE)

    if args.GPU:
        context.set_context(device_target='GPU')

    # parse model argument
    assert args.model.startswith(
        "tinynet"), "Only Tinynet models are supported."
    _, sub_name = args.model.split("_")
    net = tinynet(sub_model=sub_name,
                  num_classes=args.num_classes,
                  drop_rate=0.0,
                  drop_connect_rate=0.0,
                  global_pool="avg",
                  bn_tf=False,
                  bn_momentum=None,
                  bn_eps=None)
    print("Total number of parameters:", count_params(net))

    input_size = net.default_cfg['input_size'][1]
    val_data_url = os.path.join(args.data_path, 'val')
    val_dataset = create_dataset_val(args.batch_size,
                                     val_data_url,
                                     workers=args.workers,
                                     distributed=False,
                                     input_size=input_size)

    loss = LabelSmoothingCrossEntropy(smooth_factor=args.smoothing,
                                      num_classes=args.num_classes)

    loss.add_flags_recursive(fp32=True, fp16=False)
    eval_metrics = {
        'Validation-Loss': Loss(),
        'Top1-Acc': Top1CategoricalAccuracy(),
        'Top5-Acc': Top5CategoricalAccuracy()
    }

    ckpt = load_checkpoint(args.ckpt)
    load_param_into_net(net, ckpt)
    net.set_train(False)

    model = Model(net, loss, metrics=eval_metrics)

    metrics = model.eval(val_dataset, dataset_sink_mode=False)
    print(metrics)
def train_process(epoch_size, num_classes, batch_size):
    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
    net = resnet50(batch_size, num_classes)
    loss = CrossEntropyLoss()
    opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()),
                   0.01, 0.9)

    model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'})

    dataset = create_dataset(epoch_size, training=True, batch_size=batch_size)
    loss_cb = LossGet()
    model.train(epoch_size, dataset, callbacks=[loss_cb])

    net.set_train(False)
    eval_dataset = create_dataset(1, training=False)
    res = model.eval(eval_dataset)
    print("result: ", res)
    return res
def resnet50_eval(args_opt):
    class_num = cfg.class_num
    local_data_path = '/cache/data'
    ckpt_file_slice = args_opt.checkpoint_path.split('/')
    ckpt_file = ckpt_file_slice[len(ckpt_file_slice) - 1]
    local_ckpt_path = '/cache/' + ckpt_file

    # set graph mode and parallel mode
    context.set_context(mode=context.GRAPH_MODE,
                        device_target="Ascend",
                        save_graphs=False)

    # data download
    print('Download data.')
    mox.file.copy_parallel(src_url=args_opt.data_url, dst_url=local_data_path)
    mox.file.copy_parallel(src_url=args_opt.checkpoint_path,
                           dst_url=local_ckpt_path)

    # create dataset
    dataset = create_dataset(dataset_path=local_data_path,
                             do_train=False,
                             batch_size=cfg.batch_size)

    # load checkpoint into net
    net = resnet50(class_num=class_num)
    param_dict = load_checkpoint(local_ckpt_path)
    load_param_into_net(net, param_dict)
    net.set_train(False)

    # define loss and model
    if not cfg.use_label_smooth:
        cfg.label_smooth_factor = 0.0
    loss = CrossEntropySmooth(sparse=True,
                              reduction='mean',
                              smooth_factor=cfg.label_smooth_factor,
                              num_classes=cfg.class_num)
    model = Model(net,
                  loss_fn=loss,
                  metrics={'top_1_accuracy', 'top_5_accuracy'})

    # eval model
    res = model.eval(dataset)
    print("result:", res, "ckpt=", args_opt.checkpoint_path)
def eval(batch_size, num_classes):
    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
    context.set_context(device_id=0)
    context.set_context(enable_loop_sink=True)
    context.set_context(enable_mem_reuse=True)

    net = resnet50(batch_size, num_classes)
    loss = CrossEntropyLoss()
    opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()),
                   0.01, 0.9)

    model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'})
    checkpoint_path = "./train_resnet_cifar10_device_id_0-1_1562.ckpt"
    param_dict = load_checkpoint(checkpoint_path)
    load_param_into_net(net, param_dict)
    net.set_train(False)
    eval_dataset = create_dataset(1, training=False)
    res = model.eval(eval_dataset)
    print("result: ", res)
    return res
Example #13
0
def main():
    """Main entrance for training"""
    args = parser.parse_args()
    print(sys.argv)

    #context.set_context(mode=context.GRAPH_MODE)
    context.set_context(mode=context.PYNATIVE_MODE)

    if args.GPU:
        context.set_context(device_target='GPU', device_id=args.device_id)

    # parse model argument
    assert args.model.startswith(
        "hournas"), "Only Tinynet models are supported."
    net = nasbenchnet()
    cfg = edict({
        'image_height': args.image_size,
        'image_width': args.image_size,
    })
    cfg.batch_size = args.batch_size
    val_data_url = args.data_path
    val_dataset = create_dataset_cifar10(val_data_url,
                                         repeat_num=1,
                                         training=False,
                                         cifar_cfg=cfg)
    loss = LabelSmoothingCrossEntropy(smooth_factor=args.smoothing,
                                      num_classes=args.num_classes)
    loss.add_flags_recursive(fp32=True, fp16=False)
    eval_metrics = {
        'Validation-Loss': Loss(),
        'Top1-Acc': Top1CategoricalAccuracy(),
        'Top5-Acc': Top5CategoricalAccuracy()
    }
    ckpt = load_checkpoint(args.ckpt)
    load_param_into_net(net, ckpt)
    net.set_train(False)
    model = Model(net, loss, metrics=eval_metrics)
    metrics = model.eval(val_dataset, dataset_sink_mode=False)
    print(metrics)
Example #14
0
def eval_():
    # set args
    dev = "GPU"
    compute_type = str(args_opt.dtype).lower()
    ckpt_dir = str(args_opt.ckpt_path)
    total_batch = int(args_opt.batch_size)
    # init context
    if args_opt.mode == "GRAPH":
        mode = context.GRAPH_MODE
    else:
        mode = context.PYNATIVE_MODE
    context.set_context(mode=mode, device_target=dev, save_graphs=False)
    # create dataset
    dataset = create_dataset(dataset_path=args_opt.dataset_path,
                             do_train=False,
                             repeat_num=1,
                             batch_size=total_batch,
                             target=dev,
                             dtype=compute_type)
    # define net
    net = resnet(class_num=1001, dtype=compute_type)
    # load checkpoint
    param_dict = load_checkpoint(ckpt_dir)
    load_param_into_net(net, param_dict)
    net.set_train(False)
    # define loss, model
    loss = CrossEntropySmooth(sparse=True,
                              reduction='mean',
                              smooth_factor=0.1,
                              num_classes=1001)
    # define model
    model = Model(net,
                  loss_fn=loss,
                  metrics={'top_1_accuracy', 'top_5_accuracy'})
    # eval model
    print("========START EVAL RESNET50 ON GPU ========")
    res = model.eval(dataset)
    print("result:", res, "ckpt=", ckpt_dir)
Example #15
0
context.set_context(mode=context.GRAPH_MODE,
                    device_target="Ascend",
                    save_graphs=False)
context.set_context(enable_task_sink=True, device_id=device_id)
context.set_context(enable_loop_sink=True)
context.set_context(enable_mem_reuse=True)

if __name__ == '__main__':

    net = resnet50(class_num=config.class_num)
    if not config.use_label_smooth:
        config.label_smooth_factor = 0.0
    loss = CrossEntropy(smooth_factor=config.label_smooth_factor,
                        num_classes=config.class_num)

    if args_opt.do_eval:
        dataset = create_dataset(dataset_path=args_opt.dataset_path,
                                 do_train=False,
                                 batch_size=config.batch_size)
        step_size = dataset.get_dataset_size()

        if args_opt.checkpoint_path:
            param_dict = load_checkpoint(args_opt.checkpoint_path)
            load_param_into_net(net, param_dict)
        net.set_train(False)

        model = Model(net, loss_fn=loss, metrics={'acc'})
        res = model.eval(dataset)
        print("result:", res, "ckpt=", args_opt.checkpoint_path)
Example #16
0
    else:
        raise ValueError("Unsupported device_target.")

    context.set_context(device_id=args_opt.device_id)
    context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.device_target, save_graphs=False)

    # create dataset
    dataset = create_dataset(args_opt.dataset_path, do_train=False, batch_size=config.batch_size, device_num=1, rank=0)
    step_size = dataset.get_dataset_size()

    # define net
    net = xception(class_num=config.class_num)

    # load checkpoint
    param_dict = load_checkpoint(args_opt.checkpoint_path)
    load_param_into_net(net, param_dict)
    net.set_train(False)

    # define loss, model
    loss = CrossEntropySmooth(smooth_factor=config.label_smooth_factor, num_classes=config.class_num)

    # define model
    eval_metrics = {'Loss': nn.Loss(),
                    'Top_1_Acc': nn.Top1CategoricalAccuracy(),
                    'Top_5_Acc': nn.Top5CategoricalAccuracy()}
    model = Model(net, loss_fn=loss, metrics=eval_metrics)

    # eval model
    res = model.eval(dataset, dataset_sink_mode=True)
    print("result:", res, "ckpt=", args_opt.checkpoint_path)
Example #17
0
    dataset_lr, dataset_rl = create_dataset_eval(args_opt.dataset_path + "/" +
                                                 dataset_name + ".mindrecord0",
                                                 config=config,
                                                 dataset_name=dataset_name)
    step_size = dataset_lr.get_dataset_size()

    print("step_size ", step_size)

    # define net
    net = CNNDirectionModel([3, 64, 48, 48, 64], [64, 48, 48, 64, 64],
                            [256, 64], [64, 512])

    # load checkpoint
    param_dict = load_checkpoint(args_opt.checkpoint_path)
    load_param_into_net(net, param_dict)
    net.set_train(False)

    # define loss, model
    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="sum")

    # define model
    model = Model(net, loss_fn=loss, metrics={'top_1_accuracy'})

    # eval model
    res_lr = model.eval(dataset_lr, dataset_sink_mode=False)
    res_rl = model.eval(dataset_rl, dataset_sink_mode=False)
    print("result on upright images:", res_lr, "ckpt=",
          args_opt.checkpoint_path)
    print("result on 180 degrees rotated images:", res_rl, "ckpt=",
          args_opt.checkpoint_path)
Example #18
0
        if not cfg.use_label_smooth:
            cfg.label_smooth_factor = 0.0
        loss = CrossEntropySmooth(sparse=True,
                                  reduction="mean",
                                  smooth_factor=cfg.label_smooth_factor,
                                  num_classes=cfg.num_classes)
        net = TinyDarkNet(num_classes=cfg.num_classes)
        model = Model(net,
                      loss_fn=loss,
                      metrics={'top_1_accuracy', 'top_5_accuracy'})

    else:
        raise ValueError("dataset is not support.")

    device_target = cfg.device_target
    context.set_context(mode=context.GRAPH_MODE,
                        device_target=cfg.device_target)

    if args_opt.checkpoint_path is not None:
        param_dict = load_checkpoint(args_opt.checkpoint_path)
        print("load checkpoint from [{}].".format(args_opt.checkpoint_path))
    else:
        param_dict = load_checkpoint(cfg.checkpoint_path)
        print("load checkpoint from [{}].".format(cfg.checkpoint_path))

    load_param_into_net(net, param_dict)
    net.set_train(False)

    acc = model.eval(dataset)
    print("accuracy: ", acc)
Example #19
0
def train_process(q, device_id, epoch_size, device_num, enable_hccl):
    os.system("mkdir " + str(device_id))
    os.chdir(str(device_id))
    context.set_context(mode=context.GRAPH_MODE,
                        device_target="Ascend",
                        save_graphs=False)
    context.set_context(device_id=device_id)
    os.environ['MINDSPORE_HCCL_CONFIG_PATH'] = MINDSPORE_HCCL_CONFIG_PATH
    os.environ['RANK_ID'] = str(device_id)
    os.environ['RANK_SIZE'] = str(device_num)
    if enable_hccl:
        context.set_auto_parallel_context(
            device_num=device_num,
            parallel_mode=ParallelMode.DATA_PARALLEL,
            mirror_mean=True,
            parameter_broadcast=True)
        auto_parallel_context().set_all_reduce_fusion_split_indices([107, 160])
        init()

    # network
    net = resnet50(class_num=config.class_num)

    # evaluation network
    dist_eval_network = ClassifyCorrectCell(net)

    if not config.use_label_smooth:
        config.label_smooth_factor = 0.0

    # loss
    loss = nn.SoftmaxCrossEntropyWithLogits(
        sparse=True,
        reduction="mean",
        smooth_factor=config.label_smooth_factor,
        num_classes=config.class_num)

    # train dataset
    dataset = create_dataset(dataset_path=dataset_path,
                             do_train=True,
                             repeat_num=epoch_size,
                             batch_size=config.batch_size)

    step_size = dataset.get_dataset_size()
    eval_interval = config.eval_interval
    dataset.__loop_size__ = step_size * eval_interval

    # evalutation dataset
    eval_dataset = create_dataset(dataset_path=eval_path,
                                  do_train=False,
                                  repeat_num=epoch_size,
                                  batch_size=config.eval_batch_size)

    # loss scale
    loss_scale = FixedLossScaleManager(config.loss_scale,
                                       drop_overflow_update=False)

    # learning rate
    lr = Tensor(
        get_learning_rate(lr_init=config.lr_init,
                          lr_end=0.0,
                          lr_max=config.lr_max,
                          warmup_epochs=config.warmup_epochs,
                          total_epochs=config.epoch_size,
                          steps_per_epoch=step_size,
                          lr_decay_mode=config.lr_decay_mode))

    # optimizer
    decayed_params = list(
        filter(
            lambda x: 'beta' not in x.name and 'gamma' not in x.name and 'bias'
            not in x.name, net.trainable_params()))
    no_decayed_params = [
        param for param in net.trainable_params()
        if param not in decayed_params
    ]
    group_params = [{
        'params': decayed_params,
        'weight_decay': config.weight_decay
    }, {
        'params': no_decayed_params,
        'weight_decay': 0.0
    }, {
        'order_params': net.trainable_params()
    }]

    if config.use_lars:
        momentum = nn.Momentum(group_params,
                               lr,
                               config.momentum,
                               loss_scale=config.loss_scale,
                               use_nesterov=config.use_nesterov)
        opt = nn.LARS(momentum,
                      epsilon=config.lars_epsilon,
                      coefficient=config.lars_coefficient,
                      lars_filter=lambda x: 'beta' not in x.name and 'gamma'
                      not in x.name and 'bias' not in x.name)

    else:
        opt = nn.Momentum(group_params,
                          lr,
                          config.momentum,
                          loss_scale=config.loss_scale,
                          use_nesterov=config.use_nesterov)

    # model
    model = Model(net,
                  loss_fn=loss,
                  optimizer=opt,
                  loss_scale_manager=loss_scale,
                  amp_level="O2",
                  keep_batchnorm_fp32=False,
                  metrics={
                      'acc':
                      DistAccuracy(batch_size=config.eval_batch_size,
                                   device_num=device_num)
                  },
                  eval_network=dist_eval_network)

    # model init
    print("init_start", device_id)
    model.init(dataset, eval_dataset)
    print("init_stop", device_id)

    # callbacks
    loss_cb = LossGet(1, step_size)

    # train and eval
    print("run_start", device_id)
    acc = 0.0
    time_cost = 0.0
    for epoch_idx in range(0, int(epoch_size / eval_interval)):
        model.train(1, dataset, callbacks=loss_cb)
        eval_start = time.time()
        output = model.eval(eval_dataset)
        eval_cost = (time.time() - eval_start) * 1000
        acc = float(output["acc"])
        time_cost = loss_cb.get_per_step_time()
        loss = loss_cb.get_loss()
        print(
            "the {} epoch's resnet result:\n "
            "device{}, training loss {}, acc {}, "
            "training per step cost {:.2f} ms, eval cost {:.2f} ms, total_cost {:.2f} ms"
            .format(epoch_idx, device_id, loss, acc, time_cost, eval_cost,
                    time_cost * step_size + eval_cost))
    q.put({'acc': acc, 'cost': time_cost})
Example #20
0
    dataset = create_dataset(args_opt.dataset_path,
                             do_train=False,
                             batch_size=config.batch_size,
                             device_num=1,
                             rank=0)
    step_size = dataset.get_dataset_size()

    # define net
    net = xception(class_num=config.class_num)

    # load checkpoint
    param_dict = load_checkpoint(args_opt.checkpoint_path)
    load_param_into_net(net, param_dict)
    net.set_train(False)

    # define loss, model
    loss = CrossEntropySmooth(smooth_factor=config.label_smooth_factor,
                              num_classes=config.class_num)

    # define model
    eval_metrics = {
        'Loss': nn.Loss(),
        'Top_1_Acc': nn.Top1CategoricalAccuracy(),
        'Top_5_Acc': nn.Top5CategoricalAccuracy()
    }
    model = Model(net, loss_fn=loss, metrics=eval_metrics)

    # eval model
    res = model.eval(dataset, dataset_sink_mode=False)
    print("result:", res, "ckpt=", args_opt.checkpoint_path)
Example #21
0
if args_opt.platform == 'Ascend':
    device_id = int(os.getenv('DEVICE_ID'))
    context.set_context(device_id=device_id)

if __name__ == '__main__':
    config.batch_size = 1
    max_text_length = config.max_text_length
    input_size = config.input_size
    # create dataset
    dataset = create_dataset(name=args_opt.dataset,
                             dataset_path=args_opt.dataset_path,
                             batch_size=config.batch_size,
                             is_training=False,
                             config=config)
    step_size = dataset.get_dataset_size()
    loss = CTCLoss(max_sequence_length=config.num_step,
                   max_label_length=max_text_length,
                   batch_size=config.batch_size)
    net = CRNN(config)
    # load checkpoint
    param_dict = load_checkpoint(args_opt.checkpoint_path)
    load_param_into_net(net, param_dict)
    net.set_train(False)
    # define model
    model = Model(net,
                  loss_fn=loss,
                  metrics={'CRNNAccuracy': CRNNAccuracy(config)})
    # start evaluation
    res = model.eval(dataset, dataset_sink_mode=args_opt.platform == 'Ascend')
    print("result:", res, flush=True)
Example #22
0
def test(cloud_args=None):
    """test"""
    args = parse_args(cloud_args)
    context.set_context(mode=context.GRAPH_MODE,
                        enable_auto_mixed_precision=True,
                        device_target=args.device_target,
                        save_graphs=False)
    if os.getenv('DEVICE_ID', "not_set").isdigit():
        context.set_context(device_id=int(os.getenv('DEVICE_ID')))

    args.outputs_dir = os.path.join(
        args.log_path,
        datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S'))

    args.logger = get_logger(args.outputs_dir, args.rank)
    args.logger.save_args(args)

    if args.dataset == "cifar10":
        net = vgg16(num_classes=args.num_classes)
        opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()),
                       0.01,
                       cfg.momentum,
                       weight_decay=args.weight_decay)
        loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True,
                                                reduction='mean',
                                                is_grad=False)
        model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'})

        param_dict = load_checkpoint(args.checkpoint_path)
        load_param_into_net(net, param_dict)
        net.set_train(False)
        dataset = vgg_create_dataset(args.data_path, 1, False)
        res = model.eval(dataset)
        print("result: ", res)
    else:
        # network
        args.logger.important_info('start create network')
        if os.path.isdir(args.pretrained):
            models = list(glob.glob(os.path.join(args.pretrained, '*.ckpt')))
            print(models)
            if args.graph_ckpt:
                f = lambda x: -1 * int(
                    os.path.splitext(os.path.split(x)[-1])[0].split('-')[-1].
                    split('_')[0])
            else:
                f = lambda x: -1 * int(
                    os.path.splitext(os.path.split(x)[-1])[0].split('_')[-1])
            args.models = sorted(models, key=f)
        else:
            args.models = [
                args.pretrained,
            ]

        for model in args.models:
            if args.dataset == "cifar10":
                dataset = vgg_create_dataset(args.data_path,
                                             args.image_size,
                                             args.per_batch_size,
                                             training=False)
            else:
                dataset = classification_dataset(args.data_path,
                                                 args.image_size,
                                                 args.per_batch_size)

            eval_dataloader = dataset.create_tuple_iterator()
            network = vgg16(args.num_classes, args, phase="test")

            # pre_trained
            load_param_into_net(network, load_checkpoint(model))
            network.add_flags_recursive(fp16=True)

            img_tot = 0
            top1_correct = 0
            top5_correct = 0

            network.set_train(False)
            t_end = time.time()
            it = 0
            for data, gt_classes in eval_dataloader:
                output = network(Tensor(data, mstype.float32))
                output = output.asnumpy()

                top1_output = np.argmax(output, (-1))
                top5_output = np.argsort(output)[:, -5:]

                t1_correct = np.equal(top1_output, gt_classes).sum()
                top1_correct += t1_correct
                top5_correct += get_top5_acc(top5_output, gt_classes)
                img_tot += args.per_batch_size

                if args.rank == 0 and it == 0:
                    t_end = time.time()
                    it = 1
            if args.rank == 0:
                time_used = time.time() - t_end
                fps = (img_tot -
                       args.per_batch_size) * args.group_size / time_used
                args.logger.info(
                    'Inference Performance: {:.2f} img/sec'.format(fps))
            results = [[top1_correct], [top5_correct], [img_tot]]
            args.logger.info('before results={}'.format(results))
            results = np.array(results)

            args.logger.info('after results={}'.format(results))
            top1_correct = results[0, 0]
            top5_correct = results[1, 0]
            img_tot = results[2, 0]
            acc1 = 100.0 * top1_correct / img_tot
            acc5 = 100.0 * top5_correct / img_tot
            args.logger.info('after allreduce eval: top1_correct={}, tot={},'
                             'acc={:.2f}%(TOP1)'.format(
                                 top1_correct, img_tot, acc1))
            args.logger.info('after allreduce eval: top5_correct={}, tot={},'
                             'acc={:.2f}%(TOP5)'.format(
                                 top5_correct, img_tot, acc5))
Example #23
0
    parser.add_argument('--dataset_path',
                        type=str,
                        default='',
                        help='Dataset path')
    parser.add_argument('--platform',
                        type=str,
                        default='GPU',
                        choices=('Ascend', 'GPU'),
                        help='run platform')
    args_opt = parser.parse_args()

    if args_opt.platform != 'GPU':
        raise ValueError("Only supported GPU training.")

    context.set_context(mode=context.GRAPH_MODE,
                        device_target=args_opt.platform)
    net = NASNetAMobile(num_classes=cfg.num_classes, is_training=False)
    ckpt = load_checkpoint(args_opt.checkpoint)
    load_param_into_net(net, ckpt)
    net.set_train(False)
    dataset = create_dataset(args_opt.dataset_path, cfg, False)
    loss = CrossEntropy_Val(smooth_factor=0.1, num_classes=cfg.num_classes)
    eval_metrics = {
        'Loss': nn.Loss(),
        'Top1-Acc': nn.Top1CategoricalAccuracy(),
        'Top5-Acc': nn.Top5CategoricalAccuracy()
    }
    model = Model(net, loss, optimizer=None, metrics=eval_metrics)
    metrics = model.eval(dataset)
    print("metric: ", metrics)
Example #24
0
                        default='GPU',
                        choices=('Ascend', 'GPU', 'CPU'),
                        help='run platform')
    args_opt = parser.parse_args()

    if args_opt.platform == 'Ascend':
        device_id = int(os.getenv('DEVICE_ID'))
        context.set_context(device_id=device_id)

    cfg = CFG_DICT[args_opt.platform]
    create_dataset = DS_DICT[cfg.ds_type]

    context.set_context(mode=context.GRAPH_MODE,
                        device_target=args_opt.platform)
    net = InceptionV3(num_classes=cfg.num_classes, is_training=False)
    ckpt = load_checkpoint(args_opt.checkpoint)
    load_param_into_net(net, ckpt)
    net.set_train(False)
    cfg.rank = 0
    cfg.group_size = 1
    dataset = create_dataset(args_opt.dataset_path, False, cfg)
    loss = CrossEntropy_Val(smooth_factor=0.1, num_classes=cfg.num_classes)
    eval_metrics = {
        'Loss': nn.Loss(),
        'Top1-Acc': nn.Top1CategoricalAccuracy(),
        'Top5-Acc': nn.Top5CategoricalAccuracy()
    }
    model = Model(net, loss, optimizer=None, metrics=eval_metrics)
    metrics = model.eval(dataset, dataset_sink_mode=cfg.ds_sink_mode)
    print("metric: ", metrics)
Example #25
0
    parser.add_argument('--device_target',
                        type=str,
                        default='GPU',
                        choices=("GPU"),
                        help="Device target, support GPU.")
    args, _ = parser.parse_known_args()

    if args.device_target == "GPU":
        context.set_context(mode=context.GRAPH_MODE,
                            device_target=args.device_target,
                            save_graphs=False)
    else:
        raise ValueError("Unsupported device target.")

    eval_ds = create_eval_dataset(args.dataset_path)

    net = SRCNN()
    lr = Tensor(config.lr, ms.float32)
    opt = nn.Adam(params=net.trainable_params(), learning_rate=lr, eps=1e-07)
    loss = nn.MSELoss(reduction='mean')
    param_dict = load_checkpoint(args.checkpoint_path)
    load_param_into_net(net, param_dict)
    net.set_train(False)
    model = Model(net,
                  loss_fn=loss,
                  optimizer=opt,
                  metrics={'PSNR': SRCNNpsnr()})

    res = model.eval(eval_ds, dataset_sink_mode=False)
    print("result ", res)
Example #26
0

if __name__ == '__main__':
    data_config = DataConfig()
    model_config = ModelConfig()
    train_config = TrainConfig()

    ds_eval = create_dataset(args_opt.dataset_path,
                             train_mode=False,
                             epochs=1,
                             batch_size=train_config.batch_size,
                             data_type=DataType(data_config.data_format))
    model_builder = ModelBuilder(ModelConfig, TrainConfig)
    train_net, eval_net = model_builder.get_train_eval_net()
    train_net.set_train()
    eval_net.set_train(False)
    auc_metric = AUCMetric()
    model = Model(train_net,
                  eval_network=eval_net,
                  metrics={"auc": auc_metric})
    param_dict = load_checkpoint(args_opt.checkpoint_path)
    load_param_into_net(eval_net, param_dict)

    start = time.time()
    res = model.eval(ds_eval)
    eval_time = time.time() - start
    time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
    out_str = f'{time_str} AUC: {list(res.values())[0]}, eval time: {eval_time}s.'
    print(out_str)
    add_write('./auc.log', str(out_str))
Example #27
0
            context.set_auto_parallel_context(all_reduce_fusion_split_indices=[140])
            init()
        else:
            context.set_context(enable_hccl=False)

    context.set_context(mode=context.GRAPH_MODE)
    epoch_size = args_opt.epoch_size
    net = resnet50(args_opt.batch_size, args_opt.num_classes)
    loss = CrossEntropyLoss()
    opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.01, 0.9)

    model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'})

    if args_opt.do_train:
        dataset = create_dataset(epoch_size)
        batch_num = dataset.get_dataset_size()
        config_ck = CheckpointConfig(save_checkpoint_steps=batch_num * 5, keep_checkpoint_max=10)
        ckpoint_cb = ModelCheckpoint(prefix="train_resnet_cifar10", directory="./", config=config_ck)
        loss_cb = LossMonitor()
        model.train(epoch_size, dataset, callbacks=[ckpoint_cb, loss_cb])

    if args_opt.do_eval:
        # if args_opt.checkpoint_path:
        #     param_dict = load_checkpoint(args_opt.checkpoint_path)
        #     load_param_into_net(net, param_dict)
        eval_dataset = create_dataset(1, training=False)
        res = model.eval(eval_dataset)
        print("result: ", res)
    checker = os.path.exists("./memreuse.ir")
    assert (checker, True)
Example #28
0
    # define loss, model
    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
    opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, momentum)
    
    eval_net = nn.WithEvalCell(net, loss, AMP_LEVEL in ["O2", "O3"])
    model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'},amp_level=AMP_LEVEL, eval_network=eval_net, 
              eval_indexes=[0, 1, 2], keep_batchnorm_fp32=False)

    # define callbacks
    time_cb = TimeMonitor(data_size=step_size)
    loss_cb = LossMonitor()

    cb = [time_cb, loss_cb]
    save_checkpoint = 5
    if save_checkpoint:
        save_checkpoint_epochs = 5
        keep_checkpoint_max = 10
        config_ck = CheckpointConfig(save_checkpoint_steps=save_checkpoint_epochs * step_size,
                                     keep_checkpoint_max=keep_checkpoint_max)
        ckpt_cb = ModelCheckpoint(prefix="resnet", directory=ckpt_save_dir, config=config_ck)
        cb += [ckpt_cb]

    # train model
    model.train(epoch_size, dataset, callbacks=cb, dataset_sink_mode=True)

    # Eval model
    eval_dataset_path = "./datasets/cifar10/test"
    eval_data = create_dataset(eval_dataset_path,do_train=False)
    acc = model.eval(eval_data,dataset_sink_mode=True)
    print("Accuracy:",acc)