Beispiel #1
0
    def __init__(self,
                 model,
                 ema_network,
                 eval_dataset,
                 loss_fn,
                 decay=0.999,
                 save_epoch=1,
                 dataset_sink_mode=True,
                 start_epoch=0):
        self.model = model
        self.ema_network = ema_network
        self.eval_dataset = eval_dataset
        self.loss_fn = loss_fn
        self.decay = decay
        self.save_epoch = save_epoch
        self.shadow = {}
        self.ema_accuracy = {}

        self.best_ema_accuracy = 0
        self.best_accuracy = 0
        self.best_ema_epoch = 0
        self.best_epoch = 0
        self._start_epoch = start_epoch
        self.eval_metrics = {
            'Validation-Loss': Loss(),
            'Top1-Acc': Top1CategoricalAccuracy(),
            'Top5-Acc': Top5CategoricalAccuracy()
        }
        self.dataset_sink_mode = dataset_sink_mode
Beispiel #2
0
def main():
    """Main entrance for training"""
    args = parser.parse_args()
    print(sys.argv)

    #context.set_context(mode=context.GRAPH_MODE)
    context.set_context(mode=context.PYNATIVE_MODE)

    if args.GPU:
        context.set_context(device_target='GPU')

    # parse model argument
    assert args.model.startswith(
        "hournas"), "Only Tinynet models are supported."
    #_, sub_name = args.model.split("_")
    net = hournasnet(args.model,
                     num_classes=args.num_classes,
                     drop_rate=0.0,
                     drop_connect_rate=0.0,
                     global_pool="avg",
                     bn_tf=False,
                     bn_momentum=None,
                     bn_eps=None)
    print(net)
    print("Total number of parameters:", count_params(net))
    cfg = edict({
        'image_height': args.image_size,
        'image_width': args.image_size,
    })
    cfg.batch_size = args.batch_size
    print(cfg)

    #input_size = net.default_cfg['input_size'][1]
    val_data_url = args.data_path  #os.path.join(args.data_path, 'val')
    val_dataset = create_dataset_cifar10(val_data_url,
                                         repeat_num=1,
                                         training=False,
                                         cifar_cfg=cfg)

    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')

    eval_metrics = {
        'Validation-Loss': Loss(),
        'Top1-Acc': Top1CategoricalAccuracy(),
        'Top5-Acc': Top5CategoricalAccuracy()
    }

    ckpt = load_checkpoint(args.ckpt)
    load_param_into_net(net, ckpt)
    net.set_train(False)

    model = Model(net, loss, metrics=eval_metrics)

    metrics = model.eval(val_dataset, dataset_sink_mode=False)
    print(metrics)
Beispiel #3
0
def main():
    """Main entrance for training"""
    args = parser.parse_args()
    print(sys.argv)

    context.set_context(mode=context.GRAPH_MODE)

    if args.GPU:
        context.set_context(device_target='GPU')

    # parse model argument
    assert args.model.startswith(
        "tinynet"), "Only Tinynet models are supported."
    _, sub_name = args.model.split("_")
    net = tinynet(sub_model=sub_name,
                  num_classes=args.num_classes,
                  drop_rate=0.0,
                  drop_connect_rate=0.0,
                  global_pool="avg",
                  bn_tf=False,
                  bn_momentum=None,
                  bn_eps=None)
    print("Total number of parameters:", count_params(net))

    input_size = net.default_cfg['input_size'][1]
    val_data_url = os.path.join(args.data_path, 'val')
    val_dataset = create_dataset_val(args.batch_size,
                                     val_data_url,
                                     workers=args.workers,
                                     distributed=False,
                                     input_size=input_size)

    loss = LabelSmoothingCrossEntropy(smooth_factor=args.smoothing,
                                      num_classes=args.num_classes)

    loss.add_flags_recursive(fp32=True, fp16=False)
    eval_metrics = {
        'Validation-Loss': Loss(),
        'Top1-Acc': Top1CategoricalAccuracy(),
        'Top5-Acc': Top5CategoricalAccuracy()
    }

    ckpt = load_checkpoint(args.ckpt)
    load_param_into_net(net, ckpt)
    net.set_train(False)

    model = Model(net, loss, metrics=eval_metrics)

    metrics = model.eval(val_dataset, dataset_sink_mode=False)
    print(metrics)
Beispiel #4
0
def main():
    """Main entrance for training"""
    args = parser.parse_args()
    print(sys.argv)

    #context.set_context(mode=context.GRAPH_MODE)
    context.set_context(mode=context.PYNATIVE_MODE)

    if args.GPU:
        context.set_context(device_target='GPU', device_id=args.device_id)

    # parse model argument
    assert args.model.startswith(
        "hournas"), "Only Tinynet models are supported."
    net = nasbenchnet()
    cfg = edict({
        'image_height': args.image_size,
        'image_width': args.image_size,
    })
    cfg.batch_size = args.batch_size
    val_data_url = args.data_path
    val_dataset = create_dataset_cifar10(val_data_url,
                                         repeat_num=1,
                                         training=False,
                                         cifar_cfg=cfg)
    loss = LabelSmoothingCrossEntropy(smooth_factor=args.smoothing,
                                      num_classes=args.num_classes)
    loss.add_flags_recursive(fp32=True, fp16=False)
    eval_metrics = {
        'Validation-Loss': Loss(),
        'Top1-Acc': Top1CategoricalAccuracy(),
        'Top5-Acc': Top5CategoricalAccuracy()
    }
    ckpt = load_checkpoint(args.ckpt)
    load_param_into_net(net, ckpt)
    net.set_train(False)
    model = Model(net, loss, metrics=eval_metrics)
    metrics = model.eval(val_dataset, dataset_sink_mode=False)
    print(metrics)
Beispiel #5
0
def main():
    """Main entrance for training"""
    args = parser.parse_args()
    print(sys.argv)
    devid, args.rank_id, args.rank_size = 0, 0, 1

    context.set_context(mode=context.GRAPH_MODE)

    if args.distributed:
        if args.GPU:
            init("nccl")
            context.set_context(device_target='GPU')
        else:
            init()
            devid = int(os.getenv('DEVICE_ID'))
            context.set_context(device_target='Ascend',
                                device_id=devid,
                                reserve_class_name_in_scope=False)
        context.reset_auto_parallel_context()
        args.rank_id = get_rank()
        args.rank_size = get_group_size()
        context.set_auto_parallel_context(
            parallel_mode=ParallelMode.DATA_PARALLEL,
            gradients_mean=True,
            device_num=args.rank_size)
    else:
        if args.GPU:
            context.set_context(device_target='GPU')

    is_master = not args.distributed or (args.rank_id == 0)

    # parse model argument
    assert args.model.startswith(
        "tinynet"), "Only Tinynet models are supported."
    _, sub_name = args.model.split("_")
    net = tinynet(sub_model=sub_name,
                  num_classes=args.num_classes,
                  drop_rate=args.drop,
                  drop_connect_rate=args.drop_connect,
                  global_pool="avg",
                  bn_tf=args.bn_tf,
                  bn_momentum=args.bn_momentum,
                  bn_eps=args.bn_eps)

    if is_master:
        print("Total number of parameters:", count_params(net))
    # input image size of the network
    input_size = net.default_cfg['input_size'][1]

    train_dataset = val_dataset = None
    train_data_url = os.path.join(args.data_path, 'train')
    val_data_url = os.path.join(args.data_path, 'val')
    val_dataset = create_dataset_val(args.batch_size,
                                     val_data_url,
                                     workers=args.workers,
                                     distributed=False,
                                     input_size=input_size)

    if args.train:
        train_dataset = create_dataset(args.batch_size,
                                       train_data_url,
                                       workers=args.workers,
                                       distributed=args.distributed,
                                       input_size=input_size)
        batches_per_epoch = train_dataset.get_dataset_size()

    loss = LabelSmoothingCrossEntropy(smooth_factor=args.smoothing,
                                      num_classes=args.num_classes)
    time_cb = TimeMonitor(data_size=batches_per_epoch)
    loss_scale_manager = FixedLossScaleManager(args.loss_scale,
                                               drop_overflow_update=False)

    lr_array = get_lr(base_lr=args.lr,
                      total_epochs=args.epochs,
                      steps_per_epoch=batches_per_epoch,
                      decay_epochs=args.decay_epochs,
                      decay_rate=args.decay_rate,
                      warmup_epochs=args.warmup_epochs,
                      warmup_lr_init=args.warmup_lr,
                      global_epoch=0)
    lr = Tensor(lr_array)

    loss_cb = LossMonitor(lr_array,
                          args.epochs,
                          per_print_times=args.per_print_times,
                          start_epoch=0)

    param_group = add_weight_decay(net, weight_decay=args.weight_decay)

    if args.opt == 'sgd':
        if is_master:
            print('Using SGD optimizer')
        optimizer = SGD(param_group,
                        learning_rate=lr,
                        momentum=args.momentum,
                        weight_decay=args.weight_decay,
                        loss_scale=args.loss_scale)

    elif args.opt == 'rmsprop':
        if is_master:
            print('Using rmsprop optimizer')
        optimizer = RMSProp(param_group,
                            learning_rate=lr,
                            decay=0.9,
                            weight_decay=args.weight_decay,
                            momentum=args.momentum,
                            epsilon=args.opt_eps,
                            loss_scale=args.loss_scale)

    loss.add_flags_recursive(fp32=True, fp16=False)
    eval_metrics = {
        'Validation-Loss': Loss(),
        'Top1-Acc': Top1CategoricalAccuracy(),
        'Top5-Acc': Top5CategoricalAccuracy()
    }

    if args.ckpt:
        ckpt = load_checkpoint(args.ckpt)
        load_param_into_net(net, ckpt)
        net.set_train(False)

    model = Model(net,
                  loss,
                  optimizer,
                  metrics=eval_metrics,
                  loss_scale_manager=loss_scale_manager,
                  amp_level=args.amp_level)

    net_ema = copy.deepcopy(net)
    net_ema.set_train(False)
    assert args.ema_decay > 0, "EMA should be used in tinynet training."

    ema_cb = EmaEvalCallBack(network=net,
                             ema_network=net_ema,
                             loss_fn=loss,
                             eval_dataset=val_dataset,
                             decay=args.ema_decay,
                             save_epoch=args.ckpt_save_epoch,
                             dataset_sink_mode=args.dataset_sink,
                             start_epoch=0)

    callbacks = [loss_cb, ema_cb, time_cb] if is_master else []

    if is_master:
        print("Training on " + args.model + " with " + str(args.num_classes) +
              " classes")

    model.train(args.epochs,
                train_dataset,
                callbacks=callbacks,
                dataset_sink_mode=args.dataset_sink)