Example #1
0
def test_get_lr_parameter_with_group():
    net = LeNet5()
    conv_lr = 0.1
    default_lr = 0.3
    conv_params = list(
        filter(lambda x: 'conv' in x.name, net.trainable_params()))
    no_conv_params = list(
        filter(lambda x: 'conv' not in x.name, net.trainable_params()))
    group_params = [{
        'params': conv_params,
        'lr': conv_lr
    }, {
        'params': no_conv_params,
        'lr': default_lr
    }]
    opt = SGD(group_params)
    assert opt.is_group_lr is True
    for param in opt.parameters:
        lr = opt.get_lr_parameter(param)
        if 'conv' in param.name:
            cur_name = 'learning_rate_group_' + '0'
        else:
            cur_name = 'learning_rate_group_' + '1'
        assert lr.name == cur_name

    lr_list = opt.get_lr_parameter(conv_params)
    for lr, param in zip(lr_list, conv_params):
        assert lr.name == 'learning_rate_group_' + '0'
Example #2
0
def test_get_lr_parameter_with_order_group():
    net = LeNet5()
    conv_lr = 0.1
    conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params()))
    group_params = [{'params': conv_params, 'lr': conv_lr},
                    {'order_params': net.trainable_params()}]
    opt = SGD(group_params)
    assert opt.is_group_lr is True
    for param in opt.parameters:
        lr = opt.get_lr_parameter(param)
        assert lr.name == 'lr_' + param.name
Example #3
0
def test_order_params_2():
    net = LeNet5()
    conv_weight_decay = 0.01
    fc1_lr = (0.5, 0.4, 0.3)
    default_lr = 0.1
    default_wd = 0.0
    conv_params = list(
        filter(lambda x: 'conv' in x.name, net.trainable_params()))
    fc1_params = list(filter(lambda x: 'fc1' in x.name,
                             net.trainable_params()))
    group_params = [{
        'params': fc1_params,
        'lr': fc1_lr
    }, {
        'params': conv_params,
        'weight_decay': conv_weight_decay
    }, {
        'order_params': fc1_params + conv_params
    }]
    opt = SGD(group_params, learning_rate=default_lr, weight_decay=default_wd)
    assert opt.is_group is True
    assert opt.is_group_lr is True
    assert opt.is_group_params_ordered is True
    all_lr = opt.get_lr_parameter(fc1_params + conv_params)
    for weight_decay, decay_flags, lr, param, order_param in zip(
            opt.weight_decay, opt.decay_flags, all_lr, opt.parameters,
            fc1_params + conv_params):
        if 'conv' in param.name:
            assert np.all(lr.data.asnumpy() == Tensor(
                np.array([default_lr] * 3), mstype.float32).asnumpy())
            assert weight_decay == conv_weight_decay
            assert decay_flags is True
        elif 'fc1' in param.name:
            assert np.all(
                lr.data.asnumpy() == Tensor(fc1_lr, mstype.float32).asnumpy())
            assert weight_decay == default_wd
            assert decay_flags is False
        else:
            assert np.all(lr.data.asnumpy() == Tensor(
                np.array([default_lr] * 3), mstype.float32).asnumpy())
            assert weight_decay == default_wd
            assert decay_flags is False

        assert param.name == order_param.name
        if 'conv' in param.name:
            assert lr.name == 'learning_rate'
        elif 'fc1' in param.name:
            assert lr.name == 'learning_rate_group_' + '0'
Example #4
0
 def test_init(self):
     with pytest.raises(ValueError):
         SGD(params,
             learning_rate=0.1,
             momentum=-0.1,
             dampening=0,
             weight_decay=0,
             nesterov=False)
     with pytest.raises(ValueError):
         SGD(params,
             learning_rate=0.12,
             momentum=-0.1,
             dampening=0,
             weight_decay=0,
             nesterov=False)
     SGD(params)
Example #5
0
def test_SGD():
    epoch = 3
    net = NetSGD()
    learning_rate = 0.1
    momentum = 0.9
    dampening = 0.0
    weight_decay = 0.0
    nesterov = True
    loss_scale = 1.0

    optimizer = SGD(filter(lambda x: x.requires_grad,
                           net.get_parameters()), learning_rate, momentum,
                    dampening, weight_decay, nesterov, loss_scale)
    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
    net_with_criterion = WithLossCell(net, criterion)
    train_network = TrainOneStepCell(net_with_criterion,
                                     optimizer)  # optimizer
    train_network.set_train()
    losses = []
    for _ in range(epoch):
        data = Tensor(
            np.arange(0, 16).reshape(1, 1, 4, 4).astype(np.float32) * 0.01)
        label = Tensor(np.array([0]).astype(np.int32))
        loss = train_network(data, label)
        losses.append(loss.asnumpy())

    last_loss = 100.0
    for loss in losses:
        assert last_loss > loss
        last_loss = loss
    return losses
Example #6
0
def test_order_params_all_1():
    net = LeNet5()
    conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params()))
    bias_params = list(filter(lambda x: 'bias' in x.name, net.trainable_params()))
    group_params = [{'params': conv_params, 'weight_decay': 0.01},
                    {'params': bias_params, 'lr': 0.01},
                    {'order_params': net.trainable_params()}]
    opt = SGD(group_params, learning_rate=0.1, weight_decay=0.0)
    assert opt.is_group is True
    assert opt.is_group_lr is True
    assert opt.is_group_params_ordered is True
    for weight_decay, decay_flags, lr, param, order_param in zip(
            opt.weight_decay, opt.decay_flags, opt.learning_rate, opt.parameters, net.trainable_params()):
        if param in conv_params:
            assert np.all(lr.data.asnumpy() == Tensor(0.1, mstype.float32).asnumpy())
            assert weight_decay == 0.01
            assert decay_flags is True
        elif param in bias_params:
            assert np.all(lr.data.asnumpy() == Tensor(0.01, mstype.float32).asnumpy())
            assert weight_decay == 0.0
            assert decay_flags is False
        else:
            assert np.all(lr.data.asnumpy() == Tensor(0.1, mstype.float32).asnumpy())
            assert weight_decay == 0.0
            assert decay_flags is False

        assert param.name == order_param.name
        assert lr.name == 'lr_' + param.name
Example #7
0
def test_weight_decay():
    inputs = Tensor(np.ones([1, 1, 32, 32]).astype(np.float32) * 0.01)
    label = Tensor(np.ones([1, 10]).astype(np.float32))

    net = LeNet5()
    conv_weight_decay = 0.8
    default_weight_decay = 0.0
    conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params()))
    no_conv_params = list(filter(lambda x: 'conv' not in x.name, net.trainable_params()))
    group_params = [{'params': no_conv_params},
                    {'params': conv_params, 'weight_decay': conv_weight_decay},
                    {'order_params': net.trainable_params()}]
    net.set_train()
    loss = nn.SoftmaxCrossEntropyWithLogits()

    opt = SGD(group_params, learning_rate=0.1, weight_decay=default_weight_decay)
    assert opt.is_group is True
    assert opt.is_group_lr is False
    assert opt.is_group_params_ordered is True
    for weight_decay, decay_flags, param, order_param in zip(
            opt.weight_decay, opt.decay_flags, opt.parameters, net.trainable_params()):
        if param in conv_params:
            assert weight_decay == conv_weight_decay
            assert decay_flags is True
        else:
            assert weight_decay == default_weight_decay
            assert decay_flags is False

        assert param.name == order_param.name

    net_with_loss = WithLossCell(net, loss)
    train_network = TrainOneStepCell(net_with_loss, opt)
    _executor.compile(train_network, inputs, label)
Example #8
0
def test_get_lr_parameter_with_no_group():
    net = LeNet5()
    conv_weight_decay = 0.8

    conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params()))
    no_conv_params = list(filter(lambda x: 'conv' not in x.name, net.trainable_params()))
    group_params = [{'params': conv_params, 'weight_decay': conv_weight_decay},
                    {'params': no_conv_params}]
    opt = SGD(group_params)
    assert opt.is_group_lr is False
    for param in opt.parameters:
        lr = opt.get_lr_parameter(param)
        assert lr.name == opt.learning_rate.name

    params_error = [1, 2, 3]
    with pytest.raises(TypeError):
        opt.get_lr_parameter(params_error)
Example #9
0
def test_get_order_params_with_not_include():
    net = LeNet5()
    conv_weight_decay = 0.8

    conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params()))
    no_conv_params = list(filter(lambda x: 'conv' not in x.name, net.trainable_params()))
    group_params = [{'params': conv_params, 'weight_decay': conv_weight_decay},
                    {'order_params': no_conv_params}]
    with pytest.raises(ValueError):
        SGD(group_params)
Example #10
0
def test_order_params_lr():
    net = LeNet5()
    conv_lr = 0.01
    default_lr = 0.1
    conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params()))
    group_params = [{'params': conv_params, 'lr': conv_lr},
                    {'order_params': net.trainable_params()}]
    opt = SGD(group_params, learning_rate=default_lr)
    assert opt.is_group is True
    assert opt.is_group_lr is True
    assert opt.is_group_params_ordered is True
    for lr, param, order_param in zip(opt.learning_rate, opt.parameters, net.trainable_params()):
        if param in conv_params:
            assert np.all(lr.data.asnumpy() == Tensor(conv_lr, mstype.float32).asnumpy())
        else:
            assert np.all(lr.data.asnumpy() == Tensor(default_lr, mstype.float32).asnumpy())

        assert param.name == order_param.name
        assert lr.name == 'lr_' + param.name
Example #11
0
def test_order_params_weight_decay():
    net = LeNet5()
    conv_weight_decay = 0.01
    default_wd = 0.0
    default_lr = 0.1
    conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params()))
    group_params = [{'params': conv_params, 'weight_decay': conv_weight_decay},
                    {'order_params': net.trainable_params()}]
    opt = SGD(group_params, learning_rate=default_lr, weight_decay=default_wd)
    assert opt.is_group is True
    assert opt.is_group_lr is False
    assert opt.is_group_params_ordered is True
    assert opt.learning_rate.name == "learning_rate"
    assert np.all(opt.learning_rate.data.asnumpy() == Tensor(default_lr, mstype.float32).asnumpy())
    for weight_decay, decay_flags, param, order_param in zip(
            opt.weight_decay, opt.decay_flags, opt.parameters, net.trainable_params()):
        if param in conv_params:
            assert weight_decay == conv_weight_decay
            assert decay_flags is True
        else:
            assert weight_decay == default_wd
            assert decay_flags is False
        assert param.name == order_param.name
Example #12
0
 def test_Sgd_init(self):
     with pytest.raises(TypeError):
         paramsTensor = Tensor(np.zeros([1, 2, 3]))
         SGD(paramsTensor)
Example #13
0
 def test_Sgd_init(self):
     with pytest.raises(ValueError):
         SGD(None)
Example #14
0
 def test_Sgd_init(self):
     with pytest.raises(TypeError):
         SGD(paramsTensor)
Example #15
0
def main():
    cfg, args = init_argument()
    loss_meter = AverageMeter('loss')
    # dataloader
    cfg.logger.info('start create dataloader')
    de_dataset, steps_per_epoch, class_num = get_de_dataset(cfg)
    cfg.steps_per_epoch = steps_per_epoch
    cfg.logger.info('step per epoch: %s', cfg.steps_per_epoch)
    de_dataloader = de_dataset.create_tuple_iterator()
    cfg.logger.info('class num original: %s', class_num)
    if class_num % 16 != 0:
        class_num = (class_num // 16 + 1) * 16
    cfg.class_num = class_num
    cfg.logger.info('change the class num to: %s', cfg.class_num)
    cfg.logger.info('end create dataloader')

    # backbone and loss
    cfg.logger.important_info('start create network')
    create_network_start = time.time()

    network = SphereNet(num_layers=cfg.net_depth,
                        feature_dim=cfg.embedding_size,
                        shape=cfg.input_size)
    if args.device_target == 'CPU':
        head = CombineMarginFC(embbeding_size=cfg.embedding_size,
                               classnum=cfg.class_num)
    else:
        head = CombineMarginFCFp16(embbeding_size=cfg.embedding_size,
                                   classnum=cfg.class_num)
    criterion = CrossEntropy()

    # load the pretrained model
    if os.path.isfile(cfg.pretrained):
        param_dict = load_checkpoint(cfg.pretrained)
        param_dict_new = {}
        for key, values in param_dict.items():
            if key.startswith('moments.'):
                continue
            elif key.startswith('network.'):
                param_dict_new[key[8:]] = values
            else:
                param_dict_new[key] = values
        load_param_into_net(network, param_dict_new)
        cfg.logger.info('load model %s success', cfg.pretrained)

    # mixed precision training
    if args.device_target == 'CPU':
        network.add_flags_recursive(fp32=True)
        head.add_flags_recursive(fp32=True)
    else:
        network.add_flags_recursive(fp16=True)
        head.add_flags_recursive(fp16=True)
    criterion.add_flags_recursive(fp32=True)

    train_net = BuildTrainNetworkWithHead(network, head, criterion)

    # optimizer and lr scheduler
    lr = step_lr(lr=cfg.lr,
                 epoch_size=cfg.epoch_size,
                 steps_per_epoch=cfg.steps_per_epoch,
                 max_epoch=cfg.max_epoch,
                 gamma=cfg.lr_gamma)
    opt = SGD(params=train_net.trainable_params(),
              learning_rate=lr,
              momentum=cfg.momentum,
              weight_decay=cfg.weight_decay,
              loss_scale=cfg.loss_scale)

    # package training process, adjust lr + forward + backward + optimizer
    train_net = TrainOneStepCell(train_net, opt, sens=cfg.loss_scale)

    # checkpoint save
    if cfg.local_rank == 0:
        ckpt_max_num = cfg.max_epoch * cfg.steps_per_epoch // cfg.ckpt_interval
        train_config = CheckpointConfig(
            save_checkpoint_steps=cfg.ckpt_interval,
            keep_checkpoint_max=ckpt_max_num)
        ckpt_cb = ModelCheckpoint(config=train_config,
                                  directory=cfg.outputs_dir,
                                  prefix='{}'.format(cfg.local_rank))
        cb_params = _InternalCallbackParam()
        cb_params.train_network = train_net
        cb_params.epoch_num = ckpt_max_num
        cb_params.cur_epoch_num = 1
        run_context = RunContext(cb_params)
        ckpt_cb.begin(run_context)

    train_net.set_train()
    t_end = time.time()
    t_epoch = time.time()
    old_progress = -1

    cfg.logger.important_info('====start train====')
    for i, total_data in enumerate(de_dataloader):
        data, gt = total_data
        data = Tensor(data)
        gt = Tensor(gt)

        loss = train_net(data, gt)
        loss_meter.update(loss.asnumpy())

        # ckpt
        if cfg.local_rank == 0:
            cb_params.cur_step_num = i + 1  # current step number
            cb_params.batch_num = i + 2
            ckpt_cb.step_end(run_context)

        # logging loss, fps, ...
        if i == 0:
            time_for_graph_compile = time.time() - create_network_start
            cfg.logger.important_info('{}, graph compile time={:.2f}s'.format(
                cfg.task, time_for_graph_compile))

        if i % cfg.log_interval == 0 and cfg.local_rank == 0:
            time_used = time.time() - t_end
            epoch = int(i / cfg.steps_per_epoch)
            fps = cfg.per_batch_size * (
                i - old_progress) * cfg.world_size / time_used
            cfg.logger.info(
                'epoch[{}], iter[{}], {}, {:.2f} imgs/sec, lr={}'.format(
                    epoch, i, loss_meter, fps, lr[i]))
            t_end = time.time()
            loss_meter.reset()
            old_progress = i

        if i % cfg.steps_per_epoch == 0 and cfg.local_rank == 0:
            epoch_time_used = time.time() - t_epoch
            epoch = int(i / cfg.steps_per_epoch)
            fps = cfg.per_batch_size * cfg.world_size * cfg.steps_per_epoch / epoch_time_used
            cfg.logger.info(
                '=================================================')
            cfg.logger.info(
                'epoch time: epoch[{}], iter[{}], {:.2f} imgs/sec'.format(
                    epoch, i, fps))
            cfg.logger.info(
                '=================================================')
            t_epoch = time.time()

    cfg.logger.important_info('====train end====')