Esempio n. 1
0
model2['layer2.1.weight'] = model1.modules[42].modules[8].weight
model2['layer2.1.bias'] = model1.modules[42].modules[8].bias
model2['fc.0.weight'] = model1.modules[43].modules[1].modules[3].weight
model2['fc.0.bias'] = model1.modules[43].modules[1].modules[3].bias
model2['fc.2.weight'] = model1.modules[43].modules[1].modules[5].weight
model2['fc.2.bias'] = model1.modules[43].modules[1].modules[5].bias

save_name = os.path.join('experiments', 'vgg_SCNN_DULR_w9',
                         'vgg_SCNN_DULR_w9.pth')
torch.save(model2, save_name)

# load and save again
net = SCNN(pretrained=False)
d = torch.load(save_name)
net.load_state_dict(d, strict=False)
for m in net.backbone.modules():
    if isinstance(m, nn.Conv2d):
        if m.bias is not None:
            m.bias.data.zero_()

save_dict = {
    "epoch": 0,
    "net": net.state_dict(),
    "optim": None,
    "lr_scheduler": None
}

if not os.path.exists(os.path.join('experiments', 'vgg_SCNN_DULR_w9')):
    os.makedirs(os.path.join('experiments', 'vgg_SCNN_DULR_w9'), exist_ok=True)
torch.save(save_dict, save_name)
Esempio n. 2
0
net = torch.nn.DataParallel(net)
lr = exp_cfg['optim']['lr']
momentum = exp_cfg['optim']['momentum']
weight_decay = exp_cfg['optim']['weight_decay']
nesterov = exp_cfg['optim']['nesterov']

# Horovod: scale learning rate by lr_scaler.
optimizer = optim.SGD(net.parameters(),
                      lr=lr * lr_scaler,
                      momentum=momentum,
                      weight_decay=weight_decay,
                      nesterov=nesterov)

# Horovod: broadcast parameters & optimizer state.
hvd.broadcast_parameters(net.state_dict(), root_rank=0)
hvd.broadcast_optimizer_state(optimizer, root_rank=0)

# Horovod: (optional) compression algorithm.
#compression = hvd.Compression.fp16

# Horovod: wrap optimizer with DistributedOptimizer.
gradient_predivide_factor = 1.0
optimizer = hvd.DistributedOptimizer(
    optimizer,
    named_parameters=net.named_parameters(),
    #compression=compression,
    op=hvd.Average,
    gradient_predivide_factor=gradient_predivide_factor)

lr_scheduler = PolyLR(optimizer, 0.9, **exp_cfg['lr_scheduler'])