def woodbury_inv(A_diag, U, V, k): """This matrix inversion is O(k^3) rather than O(p^3) where p is the dimensionality of the data and k is the latent dimension. """ # Helps with numerics. If A_diag[i, j] == 0, then 1 / 0 == inf. SMALL = 1e-12 A_inv_diag = 1. / (A_diag + SMALL) I = torch.eye(k, device=cuda.device()) B_inv = inv(I + ((V * A_inv_diag) @ U)) # We want to perform the operation `U @ B_inv @ V` but need to optimize it: # - Computing `tmp1` is fast because it is (p, k) * (k, k). # - Computing `tmp2` is slow because it is (p, k) * (k, p). tmp1 = U @ B_inv tmp2 = torch.einsum('ab,bc->ac', (tmp1, V)) # Use `view` rather than `reshape`. The former guarantees that a new tensor # is returned. tmp3 = A_inv_diag.view(-1, 1) * tmp2 right = tmp3 * A_inv_diag # This is a fast version of `diag(A_inv_diag) - right`. right = -1 * right idx = torch.arange(0, A_diag.size(0), device=cuda.device()) right[idx, idx] = A_inv_diag + right[idx, idx] return right
def test_untile_params(self): device = cuda.device() L1_before = torch.ones(self.p1, self.k, device=device) L2_before = torch.ones(self.p2, self.k, device=device) * 2 B1_before = torch.ones(self.p1, self.k, device=device) * 3 B2_before = torch.ones(self.p2, self.k, device=device) * 4 B12_before = torch.zeros(self.p1, self.k, device=device) B21_before = torch.zeros(self.p2, self.k, device=device) P1_before = torch.ones(self.p1, device=device) * 5 P2_before = torch.ones(self.p2, device=device) * 6 L = torch.cat([ torch.cat([L1_before, B1_before, B12_before], dim=1), torch.cat([L2_before, B21_before, B2_before], dim=1) ], dim=0) P = torch.cat([P1_before, P2_before]) L1_after, L2_after, B1_after, B2_after, P1_after, P2_after = \ self.pcca.untile_params(L, P) self.assertTrue((L1_after == L1_before).all()) self.assertTrue((L2_after == L2_before).all()) self.assertTrue((B1_after == B1_before).all()) self.assertTrue((B2_after == B2_before).all()) self.assertTrue((P1_after == P1_before).all()) self.assertTrue((P2_after == P2_before).all())
def to_positive(A_diag, eps=0.00001): """Convert n-vector into an n-vector with nonnegative entries. """ A_diag[A_diag < 0] = eps inds = torch.isclose(A_diag, torch.zeros(1, device=cuda.device())) A_diag[inds] = eps return A_diag
def tile_params(self): """Tile parameters so we can use factor analysis updates for PCCA. :return: Model parameters concatenated appropriately. """ p1 = self.p1 p2 = self.p2 k = self.latent_dim device = cuda.device() B12 = torch.zeros(p1, k).to(device) B21 = torch.zeros(p2, k).to(device) if self.private_z: Lambda = torch.cat([ torch.cat([self.Lambda1, self.B1, B12], dim=1), torch.cat([self.Lambda2, B21, self.B2], dim=1) ], dim=0) else: Lambda = torch.cat([self.Lambda1, self.Lambda2], dim=0) Psi_diag = torch.cat([self.Psi1_diag, self.Psi2_diag]) return Lambda, Psi_diag
def benchmark2(m, n, k, device=0, precision='float32', iteration=10): """ benchmarking gemm generate C(m,n) = A(m,k) x B(k, n) :param device: cuda device to be used :param precision: gpu precision 'float32' or 'float64' :param iteration: times to repeat gemm for averaging """ # set up device device = cuda.device(0) cublas_hanlde = device.get_cublas_handle() # create a gpu timer (with cudaEvent) gtimer = cuda.timer() # set up matrices matrix_A = cuda.curand.gaussian(size=(m, k), dtype=precision) matrix_B = cuda.curand.gaussian(size=(k, n), dtype=precision) matrix_C = cuda.matrix(shape=(m, n), dtype=precision) # record the start time #record the stop time elapsedtime = gtimer.profile(mat_mul, matrix_A, matrix_B, matrix_C, cublas_hanlde, iteration) # get the average computation time (in s) time = elapsedtime / iteration / 1e3 # all done return time
def sample_x1_from_x2(self, x2): """Sample images based on gene expression data. """ device = cuda.device() y1 = torch.zeros(x2.shape[0], self.cfg.IMG_EMBED_DIM, device=device) y2 = self.genes_net.encode(x2) x1r, _ = self._sample(y1, y2, n_samples=None, sample_across=True) return x1r
def sample_x2_from_x1(self, x1): """Sample gene expression data from images. """ device = cuda.device() y1 = self.image_net.encode(x1) y2 = torch.zeros(x1.shape[0], self.cfg.GENE_EMBED_DIM, device=device) _, x2r = self._sample(y1, y2, n_samples=None, sample_across=True) return x2r
def test(): """ Test random numbers """ samples = 4096 * 16 parameters = 2 dtype = 'float32' if dtype == 'float32': maxerr = 1e-6 else: maxerr = 1e-12 device = cuda.device(0) # vector gvector = cuda.curand.gaussian(size=parameters, dtype=dtype) gmean = gvector.mean() gstd = gvector.std(mean=gmean) vector = gvector.copy_to_host(type='gsl') mean = vector.mean() std = vector.sdev(mean=mean) print("vector mean/std difference between cpu/gpu", gmean - mean, gstd - std) assert (abs(gmean - mean) < maxerr) # matrix gmatrix = cuda.curand.gaussian(loc=2.5, scale=2.0, size=(samples, parameters), dtype=dtype) # cuda results gmean, gsd = gmatrix.mean_sd(axis=0) # gsl results gslmatrix = gmatrix.copy_to_host(type='gsl') gslmean, gslsd = gslmatrix.mean_sd(axis=0) print("matrix mean/std max difference between cpu/gpu", cuda.stats.max_diff(gmean, cuda.vector(source=gslmean, dtype=dtype)), cuda.stats.max_diff(gsd, cuda.vector(source=gslsd, dtype=dtype))) print("max value difference between gpu/cpu:", gmatrix.amax() - gslmatrix.max()) print("min value difference between gpu/cpu:", gmatrix.amin() - gslmatrix.min()) return
def test_parameters_simple(self): device = cuda.device() params = nn.Parameter(torch.randn(3, 5, device=device)) optimizer = optim.Adam([params], lr=0.1) params_saved = deepcopy(params.data) x = torch.ones(5, device=device) y = params @ x t = torch.zeros(3, device=device) loss = F.mse_loss(t, y) loss.backward() optimizer.step() # We expect the parameters to change. self.assertFalse(bool((params.data == params_saved).all()))
def init_params(self): """Create model parameters and move them to appropriate device. :return: Model parameters. """ p1 = self.p1 p2 = self.p2 k = self.latent_dim device = cuda.device() Lambda1 = torch.randn(p1, k).to(device) Lambda2 = torch.randn(p2, k).to(device) Psi1_diag = torch.ones(p1).to(device) Psi2_diag = torch.ones(p2).to(device) return Lambda1, Lambda2, Psi1_diag, Psi2_diag
def test_tiled_params(self): device = cuda.device() model = Model() optimizer = optim.Adam(model.parameters(), lr=0.1) params1_saved = deepcopy(model.params1.data) params2_saved = deepcopy(model.params2.data) x = torch.ones(5, device=device) y = model.forward(x) t = torch.zeros(6, device=device) loss = F.mse_loss(t, y) loss.backward() optimizer.step() # # We expect the parameters to change. self.assertFalse(bool((model.params1.data == params1_saved).all())) self.assertFalse(bool((model.params2.data == params2_saved).all()))
def reparameterize(self, Lambda, Psi_diag, z): """Performs the reparameterization trick for a Gaussian random variable. For details, see: http://blog.shakirm.com/2015/10/ machine-learning-trick-of-the-day-4-reparameterisation-tricks/ :param Lambda: Current value for Lambda parameter. :param Psi_diag: Current value for Psi parameter. :param z: Latent variable. :return: Samples of y from estimated parameters Lambda and Psi. """ n = z.shape[1] p = Psi_diag.shape[0] eps = torch.randn(p, n, device=cuda.device()) # For numerical stability. For Psi to be PD, all elements must be # positive: https://math.stackexchange.com/a/927916/159872. Psi_diag = LA.to_positive(Psi_diag) R = torch.cholesky(diag(Psi_diag), upper=False) return Lambda @ z + R @ eps
def init_params(self): """Create model parameters and move them to appropriate device. :return: Model parameters. """ p1 = self.p1 p2 = self.p2 k = self.latent_dim device = cuda.device() Lambda1 = torch.randn(p1, k).to(device) Lambda2 = torch.randn(p2, k).to(device) # Create these modality-specific parameters regardless. We will discard # them if the user did not set `private_z=True`. B1 = torch.randn(p1, k).to(device) B2 = torch.randn(p2, k).to(device) Psi1_diag = torch.ones(p1).to(device) Psi2_diag = torch.ones(p2).to(device) return Lambda1, Lambda2, B1, B2, Psi1_diag, Psi2_diag
def test(): """ Test random numbers """ samples = 2**12 parameters = 4096 maxerr = 1e-12 device = cuda.device(0) # vector correlation by gpu gv1 = cuda.curand.gaussian(loc=1, scale=2, size=samples) gv2 = cuda.curand.gaussian(size=samples) gcorr = cuda.stats.correlation(gv1, gv2) # vector correlation by cpu v1 = gv1.copy_to_host() v2 = gv2.copy_to_host() corr = gsl.stats.correlation(v1, v2) # compare the results diff = gcorr-corr print("vector correlation, cpu/gpu difference", diff) assert(abs(diff) < maxerr) # create two random matrices gm1 = cuda.curand.gaussian(size=(samples, parameters)) gm2 = cuda.curand.gaussian(size=(samples, parameters)) # copy to cpu m1 = gm1.copy_to_host() m2 = gm2.copy_to_host() # correlation along row gcorr = cuda.stats.correlation(gm1, gm2, axis=0) corr = gsl.vector(shape=m1.shape[1]) for col in range(m1.shape[1]): v1 = m1.getColumn(col) v2 = m2.getColumn(col) corr[col] = gsl.stats.correlation(v1, v2) # check difference diff = cuda.stats.max_diff(gcorr, cuda.vector(source=corr, dtype=gcorr.dtype)) print("matrix correlation along row, cpu/gpu max difference", diff) assert( diff < maxerr) # correlation along column gcorr = cuda.stats.correlation(gm1, gm2, axis=1) corr = gsl.vector(shape=m1.shape[0]) for row in range(m1.shape[0]): v1 = m1.getRow(row) v2 = m2.getRow(row) corr[row] = gsl.stats.correlation(v1, v2) # check difference diff = cuda.stats.max_diff(gcorr, cuda.vector(source=corr, dtype=gcorr.dtype)) print("matrix correlation along column, cpu/gpu max difference", diff) assert( diff < maxerr) return
def test_untile_params_grad(self): device = cuda.device() # These are the parameters we want to be updated even if we manipulate # them after tiling and then unrolling. L1_before = nn.Parameter(torch.ones(self.p1, self.k, device=device)) L2_before = nn.Parameter( torch.ones(self.p2, self.k, device=device) * 2) B1_before = nn.Parameter( torch.ones(self.p1, self.k, device=device) * 3) B2_before = nn.Parameter( torch.ones(self.p2, self.k, device=device) * 4) B12_before = nn.Parameter(torch.zeros(self.p1, self.k, device=device)) B21_before = nn.Parameter(torch.zeros(self.p2, self.k, device=device)) P1_before = nn.Parameter(torch.ones(self.p1, device=device) * 5) P2_before = nn.Parameter(torch.ones(self.p2, device=device) * 6) L = torch.cat([ torch.cat([L1_before, B1_before, B12_before], dim=1), torch.cat([L2_before, B21_before, B2_before], dim=1) ], dim=0) P = torch.cat([P1_before, P2_before]) L1_after, L2_after, B1_after, B2_after, P1_after, P2_after = \ self.pcca.untile_params(L, P) # ---------------------------------------------------------------------- # Gradient before is None because we have never called backward(). self.assertTrue(L1_before.grad is None) x = torch.ones(self.p1) x.requires_grad_(True) y = L1_after.t() @ x y.backward(torch.ones(self.k)) # Gradient after is not None because manipulating L1_after effects # L1_before. self.assertTrue(L1_before.grad is not None) self.assertFalse((L1_before.grad == 0).all()) # Gradient before is not None because we have manipulated L2_before via # L1_before via L. self.assertTrue((L2_before.grad == 0).all()) x = torch.ones(self.p2) x.requires_grad_(True) y = L2_after.t() @ x y.backward(torch.ones(self.k)) # The gradient before is not None but it should not be all 0s. self.assertTrue(L2_before.grad is not None) self.assertFalse((L2_before.grad == 0).all()) # ---------------------------------------------------------------------- # Gradient before is not None because we have manipulated B1_before via # L1_before and L2_before via L. self.assertTrue((B1_before.grad == 0).all()) x = torch.ones(self.p1) x.requires_grad_(True) y = B1_after.t() @ x y.backward(torch.ones(self.k)) # The gradient before is not None but it should not be all 0s. self.assertTrue(B1_before.grad is not None) self.assertFalse((B1_before.grad == 0).all()) # Gradient before is not None because we have manipulated B2_before via # L1_before and L2_before via L. self.assertTrue((B2_before.grad == 0).all()) x = torch.ones(self.p2) x.requires_grad_(True) y = B2_after.t() @ x y.backward(torch.ones(self.k)) # The gradient before is not None but it should not be all 0s. self.assertTrue(B2_before.grad is not None) self.assertFalse((B2_before.grad == 0).all()) # ---------------------------------------------------------------------- # Gradient before is None because we have not manipulated Psi in any way # when manipulating Lambdas and Bs. self.assertTrue(P1_before.grad is None) x = torch.ones(self.p1) x.requires_grad_(True) y = P1_after @ x y.backward() self.assertTrue(P1_before.grad is not None) self.assertFalse((P1_before.grad == 0).all()) self.assertTrue((P2_before.grad == 0).all()) x = torch.ones(self.p2) x.requires_grad_(True) y = P2_after @ x y.backward() self.assertTrue(P2_before.grad is not None) self.assertFalse((P2_before.grad == 0).all())
def __init__(self): super(Model, self).__init__() device = cuda.device() self.params1 = nn.Parameter(torch.randn(3, 5, device=device)) self.params2 = nn.Parameter(torch.randn(3, 5, device=device))
import os import torch import torch.utils.data from torch import optim from torch import nn from torchvision.models import inception_v3, resnet18 import cuda from data import get_data_loaders, split_across # ------------------------------------------------------------------------------ SAVE_MODEL_EVERY = 5 device = cuda.device() # ------------------------------------------------------------------------------ def main(args): """Train fear extinction CNN. """ start_time = time.time() print('-' * 80) log_args(args) print('-' * 80) model = load_model(args.model, args.pretrained) model = model.to(device) print('Model loaded.')