def compute_hessian_eigenthings(model,
                                dataloader,
                                loss,
                                num_eigenthings=10,
                                full_dataset=True,
                                mode='power_iter',
                                use_gpu=True,
                                max_samples=512,
                                hvp_operator_class=HVPOperatorParams,
                                **kwargs):
    """
    Computes the top `num_eigenthings` eigenvalues and eigenvecs
    for the hessian of the given model by using subsampled power iteration
    with deflation and the hessian-vector product

    Parameters
    ---------------

    model : Module
        pytorch model for this netowrk
    dataloader : torch.data.DataLoader
        dataloader with x,y pairs for which we compute the loss.
    loss : torch.nn.modules.Loss | torch.nn.functional criterion
        loss function to differentiate through
    num_eigenthings : int
        number of eigenvalues/eigenvecs to compute. computed in order of
        decreasing eigenvalue magnitude.
    full_dataset : boolean
        if true, each power iteration call evaluates the gradient over the
        whole dataset.
    mode : str ['power_iter', 'lanczos']
        which backend to use to compute the top eigenvalues.
    use_gpu:
        if true, attempt to use cuda for all lin alg computatoins
    max_samples:
        the maximum number of samples that can fit on-memory. used
        to accumulate gradients for large batches.
    **kwargs:
        contains additional parameters passed onto lanczos or power_iter.
    """
    hvp_operator = hvp_operator_class(model,
                                      dataloader,
                                      loss,
                                      use_gpu=use_gpu,
                                      full_dataset=full_dataset,
                                      max_samples=max_samples)
    if mode == 'power_iter':
        eigenvals, eigenvecs = deflated_power_iteration(hvp_operator,
                                                        num_eigenthings,
                                                        use_gpu=use_gpu,
                                                        **kwargs)
    elif mode == 'lanczos':
        eigenvals, eigenvecs = lanczos(hvp_operator,
                                       num_eigenthings,
                                       use_gpu=use_gpu,
                                       **kwargs)
    else:
        raise ValueError(
            "Unsupported mode %s (must be power_iter or lanczos)" % mode)
    return eigenvals, eigenvecs
if len(args.idx_rg) == 2:
    id_str, id_end = args.idx_rg[0], args.idx_rg[1]
    id_end = min(id_end, codes_all.shape[0])
else:
    print("doing it all! ")
    id_str, id_end = 0, codes_all.shape[0]

t0 = time()
for imgi in range(id_str, id_end):  #range(pasu_codes.shape[0] - 1, 0, -1):
    code = codes_all[imgi, :]
    feat = torch.from_numpy(code[np.newaxis, :])
    feat.requires_grad_(False)
    if hessian_method == "BackwardIter":
        metricHVP = GANHVPOperator(G, feat, model_squ)
        eigvals, eigvects = lanczos(
            metricHVP, num_eigenthings=800,
            use_gpu=True)  # takes 113 sec on K20x cluster,
        eigvects = eigvects.T  # note the output shape from lanczos is different from that of linalg.eigh, row is eigvec
        # the spectrum has a close correspondance with the full Hessian. since they use the same graph.
    elif hessian_method == "ForwardIter":
        metricHVP = GANForwardMetricHVPOperator(G,
                                                feat,
                                                model_squ,
                                                preprocess=lambda img: img,
                                                EPS=args.EPS)  #1E-3,)
        eigvals, eigvects = lanczos(
            metricHVP,
            num_eigenthings=800,
            use_gpu=True,
            max_steps=200,
            tol=1e-6,
예제 #3
0
def compute_hessian_eigenthings(model,
                                dataloader,
                                loss,
                                num_eigenthings=10,
                                full_dataset=True,
                                mode="power_iter",
                                use_gpu=True,
                                fp16=False,
                                max_possible_gpu_samples=2**16,
                                **kwargs):
    """
    Computes the top `num_eigenthings` eigenvalues and eigenvecs
    for the hessian of the given model by using subsampled power iteration
    with deflation and the hessian-vector product

    Parameters
    ---------------

    model : Module
        pytorch model for this netowrk
    dataloader : torch.data.DataLoader
        dataloader with x,y pairs for which we compute the loss.
    loss : torch.nn.modules.Loss | torch.nn.functional criterion
        loss function to differentiate through
    num_eigenthings : int
        number of eigenvalues/eigenvecs to compute. computed in order of
        decreasing eigenvalue magnitude.
    full_dataset : boolean
        if true, each power iteration call evaluates the gradient over the
        whole dataset.
        (if False, you might want to check if the eigenvalue estimate variance
         depends on batch size)
    mode : str ['power_iter', 'lanczos']
        which backend algorithm to use to compute the top eigenvalues.
    use_gpu:
        if true, attempt to use cuda for all lin alg computatoins
    fp16: bool
        if true, store and do math with eigenvectors, gradients, etc. in fp16.
        (you should test if this is numerically stable for your application)
    max_possible_gpu_samples:
        the maximum number of samples that can fit on-memory. used
        to accumulate gradients for large batches.
        (note: if smaller than dataloader batch size, this can have odd
         interactions with batch norm statistics)
    **kwargs:
        contains additional parameters passed onto lanczos or power_iter.
    """
    hvp_operator = HVPOperator(
        model,
        dataloader,
        loss,
        use_gpu=use_gpu,
        full_dataset=full_dataset,
        max_possible_gpu_samples=max_possible_gpu_samples,
    )
    eigenvals, eigenvecs = None, None
    if mode == "power_iter":
        eigenvals, eigenvecs = deflated_power_iteration(hvp_operator,
                                                        num_eigenthings,
                                                        use_gpu=use_gpu,
                                                        fp16=fp16,
                                                        **kwargs)
    elif mode == "lanczos":
        eigenvals, eigenvecs = lanczos(hvp_operator,
                                       num_eigenthings,
                                       use_gpu=use_gpu,
                                       fp16=fp16,
                                       **kwargs)
    else:
        raise ValueError(
            "Unsupported mode %s (must be power_iter or lanczos)" % mode)
    return eigenvals, eigenvecs
예제 #4
0
savedir = r"E:\Cluster_Backup\StyleGAN2\Cats_forw"
for triali in range(10):
    for HVP_eps in [1E-1, 5E-2, 2E-2, 1E-2, 5E-3, 2E-3]:
        RND = np.random.randint(10000)
        T0 = time()
        ref_z = torch.randn(1, latent, device="cuda").cuda()
        SGhvp = GANForwardMetricHVPOperator(
            G,
            ref_z,
            ImDist,
            preprocess=lambda img: img,
            EPS=HVP_eps,
        )
        eigenvals, eigenvecs = lanczos(
            SGhvp,
            num_eigenthings=250,
            max_steps=200,
            tol=1e-5,
        )
        print(time() - T0, " sec")  # 10 eigvect takes 12 sec
        # 50 eigvect takes 40 sec 40.1 sec
        # 200 eigvect, 100 steps takes 163 sec
        #%
        eigenvecs = eigenvecs.T
        sort_idx = np.argsort(np.abs(eigenvals))
        eigabs_sort = eigenvals[sort_idx]
        eigvect_sort = eigenvecs[:, sort_idx]
        #%
        np.savez(
            join(savedir, "Hess_trunc%.1f_eps%.E_%03d.npz" %
                 (truncation, HVP_eps, RND)),
            eigvals=eigenvals,
예제 #5
0
# for param in alexnet.parameters():
#     param.requires_grad_(False)

#%% Load the pasupathy codes
from scipy.io import loadmat
code_path = r"E:\OneDrive - Washington University in St. Louis\ref_img_fit\Pasupathy\pasu_fit_code.mat"
out_dir = r"E:\OneDrive - Washington University in St. Louis\ref_img_fit\Pasupathy\Nullspace"
data = loadmat(code_path)
pasu_codes = data['pasu_code']
#%% Compute the Hessian around a certain Pasupathy image.
t0 = time()
for imgi, code in enumerate(pasu_codes[:, :]):
    feat = torch.from_numpy(code[np.newaxis, :])
    feat.requires_grad_(False)
    metricHVP = GANHVPOperator(G, feat, model_squ)
    eigvals, eigvects = lanczos(metricHVP, num_eigenthings=800, use_gpu=True)
    print("Finish computing img %d %.2f sec passed, max %.2e min %.2e 10th %.1e 50th %.e 100th %.1e" % (imgi,
        time() - t0, max(np.abs(eigvals)), min(np.abs(eigvals)), eigvals[-10], eigvals[-50], eigvals[-100]))
    np.savez(join(out_dir, "pasu_%03d.npz" % imgi), eigvals=eigvals, eigvects=eigvects, code=code)
#%%
imgi, imgj = 0, 1
with np.load(join(out_dir, "pasu_%03d.npz" % imgi)) as data:
    basisi = data["eigvects"]
    eigvi = data["eigvals"]
    codei = data["code"]

with np.load(join(out_dir, "pasu_%03d.npz" % imgj)) as data:
    basisj = data["eigvects"]
    eigvj = data["eigvals"]
    codej = data["code"]
예제 #6
0
def hessian_compute(G,
                    feat,
                    ImDist,
                    hessian_method="BackwardIter",
                    cutoff=None,
                    preprocess=lambda img: img,
                    EPS=1E-2,
                    device="cuda"):
    """Higher level API for GAN hessian compute
    Parameters:
        G: GAN, usually wrapped up by a custom class. Equipped with a `visualize` function that takes a torch vector and
           output a torch image
        feat: a latent code as input to the GAN.
        ImDist: the image distance function. Support dsim = ImDist(img1, img2). takes in 2 torch images and output a
           scalar distance. Pass gradient.
       hessian_method: Currently, "BP" "ForwardIter" "BackwardIter" are supported
       preprocess: or post processing is the operation on the image generated by GAN. Default to be an identity map.
            `lambda img: F.interpolate(img, (256, 256), mode='bilinear', align_corners=True)` is a common choice.
        cutoff: For iterative methods, "ForwardIter" "BackwardIter" this specify how many eigenvectors it's going to
            compute.
    """
    if cutoff is None: cutoff = feat.numel() // 2 - 1
    if 'to' in dir(ImDist): ImDist.to(device)
    if hessian_method == "BackwardIter":
        metricHVP = GANHVPOperator(G, feat, ImDist, preprocess=preprocess)
        eigvals, eigvects = lanczos(
            metricHVP, num_eigenthings=cutoff,
            use_gpu=True)  # takes 113 sec on K20x cluster,
        eigvects = eigvects.T  # note the output shape from lanczos is different from that of linalg.eigh, row is eigvec
        H = eigvects @ np.diag(eigvals) @ eigvects.T
        # the spectrum has a close correspondance with the full Hessian. since they use the same graph.
    elif hessian_method == "ForwardIter":
        metricHVP = GANForwardMetricHVPOperator(G,
                                                feat,
                                                ImDist,
                                                preprocess=preprocess,
                                                EPS=EPS)  # 1E-3,)
        eigvals, eigvects = lanczos(
            metricHVP,
            num_eigenthings=cutoff,
            use_gpu=True,
            max_steps=200,
            tol=1e-6,
        )
        eigvects = eigvects.T
        H = eigvects @ np.diag(eigvals) @ eigvects.T
        # EPS=1E-2, max_steps=20 takes 84 sec on K20x cluster.
        # The hessian is not so close
    elif hessian_method == "BP":  # 240 sec on cluster
        ref_vect = feat.detach().clone().float().to(device)
        mov_vect = ref_vect.float().detach().clone().requires_grad_(True)
        imgs1 = G.visualize(ref_vect)
        imgs2 = G.visualize(mov_vect)
        dsim = ImDist(preprocess(imgs1), preprocess(imgs2))
        H = get_full_hessian(
            dsim, mov_vect
        )  # 122 sec for a 256d hessian, # 240 sec on cluster for 4096d hessian
        eigvals, eigvects = np.linalg.eigh(H)
    else:
        raise NotImplementedError
    return eigvals, eigvects, H
예제 #7
0
feat.requires_grad_(True)
optimizer = optim.Adam([feat], lr=5e-2)
for step in range(200):
    optimizer.zero_grad()
    obj = objective(preprocess(G.visualize(feat)))
    obj.backward()
    optimizer.step()
    if np.mod((step + 1), 10) == 0:
        print("step %d: %.2f" % (step, obj.item()))
#%%
feat.requires_grad_(False)
activHVP = GANForwardHVPOperator(G, feat, objective, preprocess=preprocess)
activHVP.apply(1 * torch.randn((4096)).requires_grad_(False).cuda())
#%%
t0 = time()
eigvals, eigvects = lanczos(activHVP, num_eigenthings=500, use_gpu=True)
print(time() - t0)  # 40 sec
eigvals = eigvals[::-1]
eigvects = eigvects[::-1, :]
#%%
eigvals_u = eigvals
eigvects_u = eigvects
#%%
feat.requires_grad_(False)
metricHVP = GANHVPOperator(G, feat, model_squ)
t0 = time()
eigvals, eigvects = lanczos_generalized(activHVP,
                                        metric_operator=metricHVP,
                                        num_eigenthings=2,
                                        use_gpu=True,
                                        tol=1e-2)