Beispiel #1
0
                      'max_rank': 20,
                      'pca_rank': args.rank,
                  },
                  *model_cfg.args,
                  **model_cfg.kwargs)
swag_model.to(args.device)

print('Loading: %s' % args.checkpoint)
ckpt = torch.load(args.checkpoint)
swag_model.load_state_dict(ckpt['state_dict'], strict=False)

swag_model.set_swa()
print("SWA:",
      utils.eval(loaders["train"], swag_model, criterion=losses.cross_entropy))

mean, var, cov_factor = swag_model.get_space()
subspace = Subspace(mean, cov_factor)

print(torch.norm(cov_factor, dim=1))

nvp_flow = construct_flow(cov_factor.shape[0],
                          device=torch.cuda.current_device())

vi_model = VINFModel(base=model_cfg.base,
                     subspace=subspace,
                     flow=nvp_flow,
                     prior_log_sigma=math.log(args.prior_std) +
                     math.log(args.temperature) / 2,
                     num_classes=num_classes,
                     *model_cfg.args,
                     **model_cfg.kwargs)
Beispiel #2
0
for file in os.listdir(args.dir):
    if "checkpoint" in file and checkpoint_num(file) > 160:
        path = os.path.join(args.dir, file)
        print('Loading %s' % path)
        checkpoint = torch.load(path)
        model.load_state_dict(checkpoint['state_dict'])
        #W.append(np.concatenate([p.detach().cpu().numpy().ravel() for p in model.parameters()]))
        swag_model.collect_model(model)

#print('Loading: %s' % args.checkpoint)
#ckpt = torch.load(args.checkpoint)
#swag_model.load_state_dict(ckpt['state_dict'], strict=False)

swag_model.set_swa()

mean, var, subspace = swag_model.get_space()
mean = mean.cuda()
subspace = subspace.cuda()

proj_params = torch.zeros(subspace.size(0),
                          1,
                          dtype=subspace.dtype,
                          device=subspace.device,
                          requires_grad=True)
print(proj_params.device, subspace.device)
proj_model = ProjectedModel(model=copy.deepcopy(model).cuda(),
                            mean=mean.unsqueeze(1),
                            projection=subspace,
                            proj_params=proj_params)

Beispiel #3
0
                                momentum=0.9,
                                weight_decay=1e-4)
    loader = generate_dataloaders(N=10)

    state_dict = None

    for epoch in range(num_epochs):
        model.train()

        for x, y in loader:
            model.zero_grad()
            pred = model(x)
            loss = ((pred - y)**2.0).sum()
            loss.backward()
            optimizer.step()
        small_swag_model.collect_model(model)

        if epoch == 4:
            state_dict = small_swag_model.state_dict()

    small_swag_model.fit()
    with torch.no_grad():
        x = torch.arange(-6., 6., 1.0).unsqueeze(1)
        for i in range(10):
            small_swag_model.sample(0.5)
            small_swag_model(x)

    _, _ = small_swag_model.get_space(export_cov_factor=False)
    _, _, _ = small_swag_model.get_space(export_cov_factor=True)
    small_swag_model.load_state_dict(state_dict)