Beispiel #1
0
        'params': decayed_params,
        'weight_decay': cfg.weight_decay
    }, {
        'params': no_decayed_params
    }, {
        'order_params': net.trainable_params()
    }]
    optimizer = RMSProp(group_params,
                        lr,
                        decay=0.9,
                        weight_decay=cfg.weight_decay,
                        momentum=cfg.momentum,
                        epsilon=cfg.opt_eps,
                        loss_scale=cfg.loss_scale)
    eval_metrics = {
        'Loss': nn.Loss(),
        'Top1-Acc': nn.Top1CategoricalAccuracy(),
        'Top5-Acc': nn.Top5CategoricalAccuracy()
    }

    if args_opt.resume:
        ckpt = load_checkpoint(args_opt.resume)
        load_param_into_net(net, ckpt)
    model = Model(net, loss_fn=loss, optimizer=optimizer, metrics={'acc'})

    print("============== Starting Training ==============")
    loss_cb = LossMonitor(per_print_times=batches_per_epoch)
    time_cb = TimeMonitor(data_size=batches_per_epoch)
    callbacks = [loss_cb, time_cb]
    config_ck = CheckpointConfig(save_checkpoint_steps=batches_per_epoch,
                                 keep_checkpoint_max=cfg.keep_checkpoint_max)
Beispiel #2
0
    # optimizer
    decayed_params = []
    no_decayed_params = []
    for param in net.trainable_params():
        if 'beta' not in param.name and 'gamma' not in param.name and 'bias' not in param.name:
            decayed_params.append(param)
        else:
            no_decayed_params.append(param)

    group_params = [{'params': decayed_params, 'weight_decay': cfg.weight_decay},
                    {'params': no_decayed_params},
                    {'order_params': net.trainable_params()}]
    optimizer = Momentum(params=net.trainable_params(), learning_rate=Tensor(lr), momentum=cfg.momentum,
                         weight_decay=cfg.weight_decay)
    eval_metrics = {'Loss': nn.Loss(),
                    'Top1-Acc': nn.Top1CategoricalAccuracy(),
                    'Top5-Acc': nn.Top5CategoricalAccuracy()}

    if args_opt.resume:
        ckpt = load_checkpoint(args_opt.resume)
        load_param_into_net(net, ckpt)
    model = Model(net, loss_fn=loss, optimizer=optimizer, metrics={'acc'})

    print("============== Starting Training ==============")
    loss_cb = LossMonitor(per_print_times=batches_per_epoch)
    time_cb = TimeMonitor(data_size=batches_per_epoch)
    callbacks = [loss_cb, time_cb]
    config_ck = CheckpointConfig(save_checkpoint_steps=batches_per_epoch, keep_checkpoint_max=cfg.keep_checkpoint_max)
    ckpoint_cb = ModelCheckpoint(prefix=f"shufflenet-rank{cfg.rank}", directory=cfg.ckpt_path, config=config_ck)
    if args_opt.is_distributed & cfg.is_save_on_master: