def model(self): self.set_mode("model") Xu = self.get_param("Xu") u_loc = self.get_param("u_loc") u_scale_tril = self.get_param("u_scale_tril") M = Xu.shape[0] Kuu = self.kernel(Xu) + torch.eye(M, out=Xu.new_empty(M, M)) * self.jitter Luu = Kuu.potrf(upper=False) zero_loc = Xu.new_zeros(u_loc.shape) u_name = param_with_module_name(self.name, "u") if self.whiten: Id = torch.eye(M, out=Xu.new_empty(M, M)) pyro.sample(u_name, dist.MultivariateNormal(zero_loc, scale_tril=Id) .independent(zero_loc.dim() - 1)) else: pyro.sample(u_name, dist.MultivariateNormal(zero_loc, scale_tril=Luu) .independent(zero_loc.dim() - 1)) f_loc, f_var = conditional(self.X, Xu, self.kernel, u_loc, u_scale_tril, Luu, full_cov=False, whiten=self.whiten, jitter=self.jitter) f_loc = f_loc + self.mean_function(self.X) if self.y is None: return f_loc, f_var else: with poutine.scale(None, self.num_data / self.X.shape[0]): return self.likelihood(f_loc, f_var, self.y)
def model(self): self.set_mode("model") f_loc = self.get_param("f_loc") f_scale_tril = self.get_param("f_scale_tril") N = self.X.shape[0] Kff = self.kernel(self.X) + (torch.eye(N, out=self.X.new_empty(N, N)) * self.jitter) Lff = Kff.potrf(upper=False) zero_loc = self.X.new_zeros(f_loc.shape) f_name = param_with_module_name(self.name, "f") if self.whiten: Id = torch.eye(N, out=self.X.new_empty(N, N)) pyro.sample(f_name, dist.MultivariateNormal(zero_loc, scale_tril=Id) .independent(zero_loc.dim() - 1)) f_scale_tril = Lff.matmul(f_scale_tril) else: pyro.sample(f_name, dist.MultivariateNormal(zero_loc, scale_tril=Lff) .independent(zero_loc.dim() - 1)) f_var = f_scale_tril.pow(2).sum(dim=-1) if self.whiten: f_loc = Lff.matmul(f_loc.unsqueeze(-1)).squeeze(-1) f_loc = f_loc + self.mean_function(self.X) if self.y is None: return f_loc, f_var else: return self.likelihood(f_loc, f_var, self.y)
def __init__(self): super(Tune, self).__init__() self.linear1 = nn.Linear(len(TEXT.vocab),len(TEXT.vocab)) self.linear1.weight.data.copy_(torch.eye(len(TEXT.vocab))) self.linear2 = nn.Linear(len(TEXT.vocab),len(TEXT.vocab)) self.linear2.weight.data.copy_(torch.eye(len(TEXT.vocab))) self.linear3 = nn.Linear(len(TEXT.vocab),len(TEXT.vocab)) self.linear3.weight.data.copy_(torch.eye(len(TEXT.vocab)))
def enumerate_support(self): probs = self._categorical.probs n = self.event_shape[0] if isinstance(probs, Variable): values = Variable(torch.eye(n, out=probs.data.new(n, n))) else: values = torch.eye(n, out=probs.new(n, n)) values = values.view((n,) + (1,) * len(self.batch_shape) + (n,)) return values.expand((n,) + self.batch_shape + (n,))
def test_forward(self): # pylint: disable=protected-access similarity = MultiHeadedSimilarity(num_heads=3, tensor_1_dim=6) similarity._tensor_1_projection = Parameter(torch.eye(6)) similarity._tensor_2_projection = Parameter(torch.eye(6)) a_vectors = Variable(torch.FloatTensor([[[[1, 1, -1, -1, 0, 1], [-2, 5, 9, -1, 3, 4]]]])) b_vectors = Variable(torch.FloatTensor([[[[1, 1, 1, 0, 2, 5], [0, 1, -1, -7, 1, 2]]]])) result = similarity(a_vectors, b_vectors).data.numpy() assert result.shape == (1, 1, 2, 3) assert_almost_equal(result, [[[[2, -1, 5], [5, -2, 11]]]])
def btriunpack(LU_data, LU_pivots, unpack_data=True, unpack_pivots=True): r"""Unpacks the data and pivots from a batched LU factorization (btrifact) of a tensor. Returns a tuple indexed by: 0: The pivots. 1: The L tensor. 2: The U tensor. Arguments: LU_data (Tensor): the packed LU factorization data LU_pivots (Tensor): the packed LU factorization pivots unpack_data (bool): flag indicating if the data should be unpacked unpack_pivots (bool): tlag indicating if the pivots should be unpacked Example:: >>> A = torch.randn(2, 3, 3) >>> A_LU, pivots = A.btrifact() >>> P, a_L, a_U = torch.btriunpack(A_LU, pivots) >>> >>> # test that (P, A_L, A_U) gives LU factorization >>> A_ = torch.bmm(P, torch.bmm(A_L, A_U)) >>> assert torch.equal(A_, A) == True # can recover A """ nBatch, sz, _ = LU_data.size() if unpack_data: I_U = torch.triu(torch.ones(sz, sz)).type_as(LU_data).byte().unsqueeze(0).expand(nBatch, sz, sz) I_L = 1 - I_U L = LU_data.new(LU_data.size()).zero_() U = LU_data.new(LU_data.size()).zero_() I_diag = torch.eye(sz).type_as(LU_data).byte().unsqueeze(0).expand(nBatch, sz, sz) L[I_diag] = 1.0 L[I_L] = LU_data[I_L] U[I_U] = LU_data[I_U] else: L = U = None if unpack_pivots: P = torch.eye(sz).type_as(LU_data).unsqueeze(0).repeat(nBatch, 1, 1) for i in range(nBatch): for j in range(sz): k = LU_pivots[i, j] - 1 t = P[i, :, j].clone() P[i, :, j] = P[i, :, k] P[i, :, k] = t else: P = None return P, L, U
def _compute_logdet_and_mahalanobis(self, D, W, y, trace_term=0): """ Calculates log determinant and (squared) Mahalanobis term of covariance matrix ``(D + Wt.W)``, where ``D`` is a diagonal matrix, based on the "Woodbury matrix identity" and "matrix determinant lemma":: inv(D + Wt.W) = inv(D) - inv(D).Wt.inv(I + W.inv(D).Wt).W.inv(D) log|D + Wt.W| = log|Id + Wt.inv(D).W| + log|D| """ W_Dinv = W / D M = W.shape[0] Id = torch.eye(M, M, out=W.new_empty(M, M)) K = Id + W_Dinv.matmul(W.t()) L = K.potrf(upper=False) if y.dim() == 1: W_Dinv_y = W_Dinv.matmul(y) elif y.dim() == 2: W_Dinv_y = W_Dinv.matmul(y.t()) else: raise NotImplementedError("SparseMultivariateNormal distribution does not support " "computing log_prob for a tensor with more than 2 dimensionals.") Linv_W_Dinv_y = matrix_triangular_solve_compat(W_Dinv_y, L, upper=False) if y.dim() == 2: Linv_W_Dinv_y = Linv_W_Dinv_y.t() logdet = 2 * L.diag().log().sum() + D.log().sum() mahalanobis1 = (y * y / D).sum(-1) mahalanobis2 = (Linv_W_Dinv_y * Linv_W_Dinv_y).sum(-1) mahalanobis_squared = mahalanobis1 - mahalanobis2 + trace_term return logdet, mahalanobis_squared
def create_input(points, sigma2): bs, N, _ = points.size() #points has size bs,N,2 OP = torch.zeros(bs,N,N,4).type(dtype) E = torch.eye(N).type(dtype).unsqueeze(0).expand(bs,N,N) OP[:,:,:,0] = E W = points.unsqueeze(1).expand(bs,N,N,dim) - points.unsqueeze(2).expand(bs,N,N,dim) dists2 = (W * W).sum(3) dists = torch.sqrt(dists2) W = torch.exp(-dists2 / sigma2) OP[:,:,:,1] = W D = E * W.sum(2,True).expand(bs,N,N) OP[:,:,:,2] = D U = (torch.ones(N,N).type(dtype)/N).unsqueeze(0).expand(bs,N,N) OP[:,:,:,3] = U OP = Variable(OP) x = Variable(points) Y = Variable(W.clone()) # Normalize inputs if normalize: mu = x.sum(1)/N mu_ext = mu.unsqueeze(1).expand_as(x) var = ((x - mu_ext)*(x - mu_ext)).sum(1)/N var_ext = var.unsqueeze(1).expand_as(x) x = x - mu_ext x = x/(10 * var_ext) return (OP, x, Y), dists
def test_forward_backward(self): import torch import torch.nn.functional as F from torch.autograd import Variable from reid.loss import OIMLoss criterion = OIMLoss(3, 3, scalar=1.0, size_average=False) criterion.lut = torch.eye(3) x = Variable(torch.randn(3, 3), requires_grad=True) y = Variable(torch.range(0, 2).long()) loss = criterion(x, y) loss.backward() probs = F.softmax(x) grads = probs.data - torch.eye(3) abs_diff = torch.abs(grads - x.grad.data) self.assertEquals(torch.log(probs).diag().sum(), -loss) self.assertTrue(torch.max(abs_diff) < 1e-6)
def __init__(self, X, y, kernel, Xu, likelihood, mean_function=None, latent_shape=None, num_data=None, whiten=False, jitter=1e-6, name="SVGP"): super(VariationalSparseGP, self).__init__(X, y, kernel, mean_function, jitter, name) self.likelihood = likelihood self.num_data = num_data if num_data is not None else self.X.shape[0] self.whiten = whiten self.Xu = Parameter(Xu) y_batch_shape = self.y.shape[:-1] if self.y is not None else torch.Size([]) self.latent_shape = latent_shape if latent_shape is not None else y_batch_shape M = self.Xu.shape[0] u_loc_shape = self.latent_shape + (M,) u_loc = self.Xu.new_zeros(u_loc_shape) self.u_loc = Parameter(u_loc) u_scale_tril_shape = self.latent_shape + (M, M) Id = torch.eye(M, out=self.Xu.new_empty(M, M)) u_scale_tril = Id.expand(u_scale_tril_shape) self.u_scale_tril = Parameter(u_scale_tril) self.set_constraint("u_scale_tril", constraints.lower_cholesky) self._sample_latent = True
def read_test(memory): print("Memory Reading Test: ") _k = T.ones(1, M_DIM*Kr) _b = T.eye(Kr)[0].view(1, -1) print("k tensor: ", _k) print("b tensor: ", _b) print(memory.read(_k, _b))
def calculate_distance_term(means, n_objects, delta_d, norm=2, usegpu=True): """means: bs, n_instances, n_filters""" bs, n_instances, n_filters = means.size() dist_term = 0.0 for i in range(bs): _n_objects_sample = n_objects[i] if _n_objects_sample <= 1: continue _mean_sample = means[i, : _n_objects_sample, :] # n_objects, n_filters means_1 = _mean_sample.unsqueeze(1).expand( _n_objects_sample, _n_objects_sample, n_filters) means_2 = means_1.permute(1, 0, 2) diff = means_1 - means_2 # n_objects, n_objects, n_filters _norm = torch.norm(diff, norm, 2) margin = 2 * delta_d * (1.0 - torch.eye(_n_objects_sample)) if usegpu: margin = margin.cuda() margin = Variable(margin) _dist_term_sample = torch.sum( torch.clamp(margin - _norm, min=0.0) ** 2) _dist_term_sample = _dist_term_sample / \ (_n_objects_sample * (_n_objects_sample - 1)) dist_term += _dist_term_sample dist_term = dist_term / bs return dist_term
def check(self, value): value_tril = batch_tril(value) lower_triangular = (value_tril == value).view(value.shape[:-2] + (-1,)).min(-1)[0] n = value.size(-1) diag_mask = torch.eye(n, n, out=value.new(n, n)) positive_diagonal = (value * diag_mask > (diag_mask - 1)).min(-1)[0].min(-1)[0] return lower_triangular & positive_diagonal
def get_cat_mapping(model: infogan.InfoGAN, data_loader: DataLoader): eye = torch.eye(10) confusion = torch.zeros(10, 10) for data, labels in data_loader: real_data = data.to(model.device).unsqueeze(1).float() / 255. cat_logits = model.rec(model.dis(real_data)[1])[0] confusion += eye[labels.long()].t() @ eye[cat_logits.cpu().argmax(1)] return confusion.argmax(0).numpy()
def torch_eye(n, m=None, out=None): """ Like `torch.eye()`, but works with cuda tensors. """ if m is None: m = n try: return torch.eye(n, m, out=out) except TypeError: # Only catch errors due to torch.eye() not being available for cuda tensors. module = torch.Tensor.__module__ if out is None else type(out).__module__ if module != 'torch.cuda': raise Tensor = getattr(torch, torch.Tensor.__name__) cpu_out = Tensor(n, m) cuda_out = torch.eye(m, n, out=cpu_out).cuda() return cuda_out if out is None else out.copy_(cuda_out)
def eye_(tensor): r"""Fills the 2-dimensional input `Tensor` with the identity matrix. Preserves the identity of the inputs in `Linear` layers, where as many inputs are preserved as possible. Args: tensor: a 2-dimensional `torch.Tensor` Examples: >>> w = torch.empty(3, 5) >>> nn.init.eye_(w) """ if tensor.ndimension() != 2: raise ValueError("Only tensors with 2 dimensions are supported") with torch.no_grad(): torch.eye(*tensor.shape, out=tensor, requires_grad=tensor.requires_grad) return tensor
def addOrthoRegularizer(loss,model, regParam, targetLayers) : for i in range( len(targetLayers) ) : layerParams = model[targetLayers[i]].named_parameters() for param in layerParams: # dont regularize bias params if 'bias' not in param[0]: W = param[1].t() WTW = torch.mm( W.t(), W) C = ( regParam * 0.5) * torch.sum(torch.abs(WTW - torch.eye(WTW.shape[0])) ) loss += C
def eye(tensor): """Fills the 2-dimensional input Tensor or Variable with the identity matrix. Preserves the identity of the inputs in Linear layers, where as many inputs are preserved as possible. Args: tensor: a 2-dimensional torch.Tensor or autograd.Variable Examples: >>> w = torch.Tensor(3, 5) >>> nn.init.eye(w) """ if tensor.ndimension() != 2: raise ValueError("Only tensors with 2 dimensions are supported") with torch.no_grad(): torch.eye(*tensor.shape, out=tensor) return tensor
def test_forward(model_class, X, y, kernel, likelihood): if model_class is SparseGPRegression or model_class is VariationalSparseGP: gp = model_class(X, y, kernel, X, likelihood) else: gp = model_class(X, y, kernel, likelihood) # test shape Xnew = torch.tensor([[2.0, 3.0, 1.0]]) loc0, cov0 = gp(Xnew, full_cov=True) loc1, var1 = gp(Xnew, full_cov=False) assert loc0.dim() == y.dim() assert loc0.shape[-1] == Xnew.shape[0] # test latent shape assert loc0.shape[:-1] == y.shape[:-1] assert cov0.shape[:-2] == y.shape[:-1] assert cov0.shape[-1] == cov0.shape[-2] assert cov0.shape[-1] == Xnew.shape[0] assert_equal(loc0, loc1) n = Xnew.shape[0] cov0_diag = torch.stack([mat.diag() for mat in cov0.view(-1, n, n)]).reshape(var1.shape) assert_equal(cov0_diag, var1) # test trivial forward: Xnew = X loc, cov = gp(X, full_cov=True) if model_class is VariationalGP or model_class is VariationalSparseGP: assert_equal(loc.norm().item(), 0) assert_equal(cov, torch.eye(cov.shape[-1]).expand(cov.shape)) else: assert_equal(loc, y) assert_equal(cov.norm().item(), 0) # test same input forward: Xnew[0,:] = Xnew[1,:] = ... Xnew = torch.tensor([[2.0, 3.0, 1.0]]).expand(10, 3) loc, cov = gp(Xnew, full_cov=True) loc_diff = loc - loc[..., :1].expand(y.shape[:-1] + (10,)) assert_equal(loc_diff.norm().item(), 0) cov_diff = cov - cov[..., :1, :1].expand(y.shape[:-1] + (10, 10)) assert_equal(cov_diff.norm().item(), 0) # test noise kernel forward: kernel = WhiteNoise gp.kernel = WhiteNoise(input_dim=3, variance=torch.tensor(10.)) loc, cov = gp(X, full_cov=True) assert_equal(loc.norm().item(), 0) assert_equal(cov, torch.eye(cov.shape[-1]).expand(cov.shape) * 10)
def test_categorical_accuracy(self): metric = CategoricalAccuracy() predicted = Variable(torch.eye(10)) expected = Variable(torch.LongTensor(list(range(10)))) self.assertEqual(metric(predicted, expected), 100.0) # Set 1st column to ones predicted = Variable(torch.zeros(10, 10)) predicted.data[:, 0] = torch.ones(10) self.assertEqual(metric(predicted, expected), 55.0)
def reset_parameters(self): """ Initialize parameters following the way proposed in the paper. """ init.orthogonal(self.weight_ih.data) init.orthogonal(self.alpha_weight_ih.data) weight_hh_data = torch.eye(self.hidden_size) weight_hh_data = weight_hh_data.repeat(1, 3) self.weight_hh.data.set_(weight_hh_data) alpha_weight_hh_data = torch.eye(self.hidden_size) alpha_weight_hh_data = alpha_weight_hh_data.repeat(1, 1) self.alpha_weight_hh.data.set_(alpha_weight_hh_data) # The bias is just set to zero vectors. if self.use_bias: init.constant(self.bias.data, val=0) init.constant(self.alpha_bias.data, val=0)
def fwd_merge(self, Inputs_N, target, Phis, Bs, lp, batch, depth, mode='train', epoch=0): # Flow backwards Phis, Bs, Inputs_N = Phis[::-1], Bs[::-1], Inputs_N[::-1] length = self.merge.n perm = (torch.range(0.0, length) .unsqueeze(0).expand(self.batch_size, length + 1)) perm = Variable(perm, requires_grad=False).type(dtype_l) ind = perm[:, :-1].clone() prob_matrix = Variable(torch.eye(length + 1)).type(dtype) prob_matrix = prob_matrix.unsqueeze(0).expand(self.batch_size, length + 1, length + 1) # concatenate pad_token to input pad_token = (self.merge.pad_token[:-1].unsqueeze(0) .expand(self.batch_size, 1, self.input_size)) input = torch.cat((pad_token, Inputs_N[0]), 1) phis = Phis[0] input_target = torch.cat((pad_token, Inputs_N[-1]), 1) input_scale = input input_norm = input_scale Perms = [perm] Points = [input_scale] for i, scale in enumerate(range(depth)): if scale < depth - 1: # fine scales prob_sc = self.merge(input_scale, phis) input_norm = torch.cat((pad_token, Inputs_N[scale + 1]), 1) phis = Phis[scale + 1] prob_sc, ind, phis, _ = self.eliminate_rows(prob_sc, ind, phis) comb = self.combine_matrices(prob_matrix, prob_sc, perm, last=False) prob_matrix, _, perm = comb # postprocess before feeding to next scale hard_out, soft_out = self.outputs(input_norm, prob_matrix, perm) input_scale = hard_out else: # coarsest scale if mode == 'test': prob_sc = self.merge(input_scale, phis, input_target=None, target=None) else: prob_sc = self.merge(input_scale, phis, input_target=input_target, target=target) comb = self.combine_matrices(prob_matrix, prob_sc, perm, last=True) prob_matrix, prob_sc, perm = comb hard_out, soft_out = self.outputs(input, prob_matrix, perm) loss, pg_loss = self.merge.compute_loss(prob_matrix, target, lp=lp) Perms.append(perm) Points.append(input_norm) return loss, pg_loss, Perms
def test_GPyTorchPosterior(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): mean = torch.rand(3, dtype=dtype, device=device) variance = 1 + torch.rand(3, dtype=dtype, device=device) covar = variance.diag() mvn = MultivariateNormal(mean, lazify(covar)) posterior = GPyTorchPosterior(mvn=mvn) # basics self.assertEqual(posterior.device.type, device.type) self.assertTrue(posterior.dtype == dtype) self.assertEqual(posterior.event_shape, torch.Size([3, 1])) self.assertTrue(torch.equal(posterior.mean, mean.unsqueeze(-1))) self.assertTrue(torch.equal(posterior.variance, variance.unsqueeze(-1))) # rsample samples = posterior.rsample() self.assertEqual(samples.shape, torch.Size([1, 3, 1])) samples = posterior.rsample(sample_shape=torch.Size([4])) self.assertEqual(samples.shape, torch.Size([4, 3, 1])) samples2 = posterior.rsample(sample_shape=torch.Size([4, 2])) self.assertEqual(samples2.shape, torch.Size([4, 2, 3, 1])) # rsample w/ base samples base_samples = torch.randn(4, 3, 1, device=device, dtype=dtype) # incompatible shapes with self.assertRaises(RuntimeError): posterior.rsample( sample_shape=torch.Size([3]), base_samples=base_samples ) samples_b1 = posterior.rsample( sample_shape=torch.Size([4]), base_samples=base_samples ) samples_b2 = posterior.rsample( sample_shape=torch.Size([4]), base_samples=base_samples ) self.assertTrue(torch.allclose(samples_b1, samples_b2)) base_samples2 = torch.randn(4, 2, 3, 1, device=device, dtype=dtype) samples2_b1 = posterior.rsample( sample_shape=torch.Size([4, 2]), base_samples=base_samples2 ) samples2_b2 = posterior.rsample( sample_shape=torch.Size([4, 2]), base_samples=base_samples2 ) self.assertTrue(torch.allclose(samples2_b1, samples2_b2)) # collapse_batch_dims b_mean = torch.rand(2, 3, dtype=dtype, device=device) b_variance = 1 + torch.rand(2, 3, dtype=dtype, device=device) b_covar = b_variance.unsqueeze(-1) * torch.eye(3).type_as(b_variance) b_mvn = MultivariateNormal(b_mean, lazify(b_covar)) b_posterior = GPyTorchPosterior(mvn=b_mvn) b_base_samples = torch.randn(4, 1, 3, 1, device=device, dtype=dtype) b_samples = b_posterior.rsample( sample_shape=torch.Size([4]), base_samples=b_base_samples ) self.assertEqual(b_samples.shape, torch.Size([4, 2, 3, 1]))
def test_MockPosterior(self): mean = torch.rand(2) variance = torch.eye(2) samples = torch.rand(1, 2) mp = MockPosterior(mean=mean, variance=variance, samples=samples) self.assertTrue(torch.equal(mp.mean, mean)) self.assertTrue(torch.equal(mp.variance, variance)) self.assertTrue(torch.all(mp.sample() == samples.unsqueeze(0))) self.assertTrue( torch.all(mp.sample(torch.Size([2])) == samples.repeat(2, 1, 1)) )
def one_hot_embedding(labels, num_classes): '''Embedding labels to one-hot form. Args: labels: (LongTensor) class labels, sized [N,]. num_classes: (int) number of classes. Returns: (tensor) encoded labels, sized [N,#classes]. ''' y = torch.eye(num_classes) # [D,D] return y[labels] # [N,D]
def sample_latent(self, *args, **kwargs): """ Samples the (single) multivariate normal latent used in the auto guide. """ loc = pyro.param("{}_loc".format(self.prefix), lambda: torch.zeros(self.latent_dim)) scale_tril = pyro.param("{}_scale_tril".format(self.prefix), lambda: torch.eye(self.latent_dim), constraint=constraints.lower_cholesky) return pyro.sample("_{}_latent".format(self.prefix), dist.MultivariateNormal(loc, scale_tril=scale_tril), infer={"is_auxiliary": True})
def scale_tril(self): # We use the following formula to increase the numerically computation stability # when using Cholesky decomposition (see GPML section 3.4.3): # D + W.T @ W = D1/2 @ (I + D-1/2 @ W.T @ W @ D-1/2) @ D1/2 Dsqrt = self.covariance_matrix_D_term.sqrt() A = self.covariance_matrix_W_term / Dsqrt At_A = A.t().matmul(A) N = A.shape[1] Id = torch.eye(N, N, out=A.new_empty(N, N)) K = Id + At_A L = K.potrf(upper=False) return Dsqrt.unsqueeze(1) * L
def cross_correlation(X, remove_diagonal=False): X_s = X / X.std(0) X_m = X_s - X_s.mean(0) b, dim = X_m.size() correlations = (X_m.unsqueeze(2).expand(b, dim, dim) * X_m.unsqueeze(1).expand(b, dim, dim)).sum(0) / float(b) if remove_diagonal: Id = torch.eye(dim) Id = torch.autograd.Variable(Id.cuda(), requires_grad=False) correlations -= Id return correlations
def model(self): self.set_mode("model", recursive=False) # sample X from unit multivariate normal distribution zero_loc = self.X_loc.new_zeros(self.X_loc.shape) C = self.X_loc.shape[1] Id = torch.eye(C, out=self.X_loc.new_empty(C, C)) X_name = param_with_module_name(self.name, "X") X = pyro.sample(X_name, dist.MultivariateNormal(zero_loc, scale_tril=Id) .independent(zero_loc.dim()-1)) self.base_model.set_data(X, self.y) self.base_model.model()
def merge(tbl): inp = scn.InputBatch(2, spatial_size) center = spatial_size.float().view(1, 2) / 2 p = torch.LongTensor(2) v = torch.FloatTensor([1, 0, 0]) for char in tbl['input']: inp.addSample() m = torch.eye(2) r = random.randint(1, 3) alpha = random.uniform(-0.2, 0.2) if alpha == 1: m[0][1] = alpha elif alpha == 2: m[1][0] = alpha else: m = torch.mm(m, torch.FloatTensor( [[math.cos(alpha), math.sin(alpha)], [-math.sin(alpha), math.cos(alpha)]])) c = center + torch.FloatTensor(1, 2).uniform_(-8, 8) for stroke in char: stroke = stroke.float() / 255 - 0.5 stroke = c.expand_as(stroke) + \ torch.mm(stroke, m * (Scale - 0.01)) ############################################################### # To avoid GIL problems use a helper function: scn.dim_fn( 2, 'drawCurve')( inp.metadata.ffi, inp.features, stroke) ############################################################### # Above is equivalent to : # x1,x2,y1,y2,l=0,stroke[0][0],0,stroke[0][1],0 # for i in range(1,stroke.size(0)): # x1=x2 # y1=y2 # x2=stroke[i][0] # y2=stroke[i][1] # l=1e-10+((x2-x1)**2+(y2-y1)**2)**0.5 # v[1]=(x2-x1)/l # v[2]=(y2-y1)/l # l=max(x2-x1,y2-y1,x1-x2,y1-y2,0.9) # for j in numpy.arange(0,1,1/l): # p[0]=math.floor(x1*j+x2*(1-j)) # p[1]=math.floor(y1*j+y2*(1-j)) # inp.setLocation(p,v,False) ############################################################### inp.precomputeMetadata(precomputeStride) return {'input': inp, 'target': torch.LongTensor(tbl['target']) - 1}
def ransac_voting_layer(cloud, pred_t, round_hyp_num=128, inlier_thresh=0.99, confidence=0.99, max_iter=20, min_num=5, max_num=30000): """ Args: cloud: [b, pn, 3] - x, y, z pred_t: [b, pn, 3] - dx, dy, dz round_hyp_num: number of hypothesis per round inlier_thresh: voting threshold, cosine angle Returns: batch_win_pts: [b, 3] - x, y, z batch_inliers: [b, pn] """ b, pn, _ = pred_t.shape vn = 1 # only voting for center batch_win_pts = [] batch_inliers = [] for bi in range(b): hyp_num = 0 foreground_num = torch.tensor(pn, device=pred_t.device) cur_mask = torch.ones([pn, 1], dtype=torch.uint8, device=pred_t.device) # if too few points, just skip it if foreground_num < min_num: win_pts = torch.zeros([1, 3], dtype=torch.float32, device=pred_t.device) inliers = torch.zeros([1, pn], dtype=torch.uint8, device=pred_t.device) batch_win_pts.append(win_pts) batch_inliers.append(inliers) continue # if too many inliers, randomly downsample if foreground_num > max_num: selection = torch.zeros(cur_mask.shape, dtype=torch.float32, device=pred_t.device).uniform_(0, 1) selected_mask = (selection < (max_num / foreground_num.float())) cur_mask *= selected_mask tn = torch.sum(cur_mask) coords = cloud[bi, :, :].masked_select(cur_mask).view([tn, 3]) # [tn, 3] direct = pred_t[bi, :, :].masked_select(cur_mask).view( [tn, vn, 3]) # [tn, vn, 3] # RANSAC idxs = torch.zeros([round_hyp_num, vn, 2], dtype=torch.int32, device=pred_t.device).random_(0, direct.shape[0]) all_win_ratio = torch.zeros([vn], dtype=torch.float32, device=pred_t.device) all_win_pts = torch.zeros([vn, 3], dtype=torch.float32, device=pred_t.device) cur_iter = 0 while True: # generate hypothesis cur_hyp_pts = ransac_voting_3d.generate_hypothesis( direct, coords, idxs) # [hn, vn, 3] # voting for hypothesis cur_inlier = torch.zeros([round_hyp_num, vn, tn], dtype=torch.uint8, device=pred_t.device) # [hn, vn, tn] ransac_voting_3d.voting_for_hypothesis(direct, coords, cur_hyp_pts, cur_inlier, inlier_thresh) # find max cur_inlier_counts = torch.sum(cur_inlier, 2) # [hn, vn] cur_win_counts, cur_win_idx = torch.max(cur_inlier_counts, 0) # [vn] cur_win_pts = cur_hyp_pts[cur_win_idx, torch.arange(vn)] cur_win_ratio = cur_win_counts.float() / tn # update best point larger_mask = all_win_ratio < cur_win_ratio all_win_pts[larger_mask, :] = cur_win_pts[larger_mask, :] all_win_ratio[larger_mask] = cur_win_ratio[larger_mask] # check confidence hyp_num += round_hyp_num cur_iter += 1 cur_min_ratio = torch.min(all_win_ratio) if (1 - (1 - cur_min_ratio**2)** hyp_num) > confidence or cur_iter > max_iter: break # compute mean intersection all_inlier = torch.zeros([1, vn, tn], dtype=torch.uint8, device=pred_t.device) all_win_pts = torch.unsqueeze(all_win_pts, 0) # [1, vn, 3] ransac_voting_3d.voting_for_hypothesis(direct, coords, all_win_pts, all_inlier, inlier_thresh) all_inlier = all_inlier.view([tn, 1]) # because of vn = 1 all_inlier_count = torch.sum(all_inlier) inlier_coords = coords.masked_select(all_inlier).view( [all_inlier_count, 3, 1]) # normalize directions inlier_direct = torch.squeeze(direct, 1) # [tn, 3] inlier_direct = inlier_direct / torch.norm( inlier_direct, dim=1, keepdim=True) inlier_direct = inlier_direct.masked_select(all_inlier).view( [all_inlier_count, 3, 1]) S = torch.bmm(inlier_direct, inlier_direct.permute(0, 2, 1)) - \ torch.unsqueeze(torch.eye(3, device=pred_t.device), 0).repeat(all_inlier_count, 1, 1) A = torch.sum(S, 0) # [3, 3] b = torch.sum(torch.bmm(S, inlier_coords), 0) # [3, 1] # voting result win_pts = torch.matmul(torch.inverse(A), b).permute(1, 0) # [1, 3] batch_win_pts.append(win_pts) # mask inliers = torch.squeeze(cur_mask, 1).repeat(vn, 1) # [vn, pn] index = torch.squeeze(cur_mask, 1).nonzero().view([tn]).repeat(vn, 1) # [vn, tn] inliers.scatter_(1, index, all_inlier.permute(1, 0)) batch_inliers.append(inliers) batch_win_pts = torch.cat(batch_win_pts) batch_inliers = torch.squeeze(torch.cat(batch_inliers), 1) return batch_win_pts, batch_inliers
def Learning_to_learn_global_training(opt, hand_optimizee, optimizee, train_loader): DIM = opt.DIM outputDIM = opt.outputDIM batchsize_para = opt.batchsize_para Observe = opt.Observe Epochs = opt.Epochs Optimizee_Train_Steps = opt.Optimizee_Train_Steps optimizer_lr = opt.optimizer_lr Decay = opt.Decay Decay_rate = opt.Decay_rate Imcrement = opt.Imcrement Sample_number = opt.Sample_number X = [] Y = [] Number_iterations = 0 data1 = np.load('data/dim784_training_images_bool.npy') data2 = torch.from_numpy(data1) data2 = data2.float() data3 = data2.to(torch.device("cuda:0")) data_all = data3.view(batchsize_para, data3.shape[0] // batchsize_para, -1) data_all = data_all.permute(0, 2, 1) #adam_global_optimizer = torch.optim.Adam(optimizee.parameters(),lr = optimizer_lr) adam_global_optimizer = torch.optim.Adamax(optimizee.parameters(), lr=optimizer_lr) RB = ReplayBuffer(500 * batchsize_para) Square = torch.eye(DIM) for i in range(Observe): RB.shuffle() if i == 0: M = torch.randn(batchsize_para, DIM, outputDIM).cuda() for k in range(batchsize_para): nn.init.orthogonal_(M[k]) state = ( torch.zeros(batchsize_para, DIM, outputDIM).cuda(), torch.zeros(batchsize_para, DIM, outputDIM).cuda(), torch.zeros(batchsize_para, DIM, outputDIM).cuda(), torch.zeros(batchsize_para, DIM, outputDIM).cuda(), ) iteration = torch.zeros(batchsize_para) #M.retain_grad() M.requires_grad = True RB.push(state, M, iteration) count = 1 print('observe finish', count) break_flag = False for j, data in enumerate(train_loader, 0): inputs, labels = data inputs = Variable(inputs.cuda()) labels = Variable(labels).cuda() inputs = inputs.view(batchsize_para, inputs.shape[0] // batchsize_para, -1) labels = labels.view(batchsize_para, labels.shape[0] // batchsize_para) inputs = inputs.permute(0, 2, 1) loss = f(inputs, M) loss.backward() #M_grad=M.grad M, state = hand_optimizee(M.grad, M, state) print('-------------------------') #print('MtM', torch.mm(M[k].t(),M[k])) iteration = iteration + 1 for k in range(batchsize_para): if iteration[k] >= Optimizee_Train_Steps - opt.train_steps: M[k] = Square[:, 0:outputDIM] state[0][k] = torch.zeros(DIM, outputDIM).cuda() state[1][k] = torch.zeros(DIM, outputDIM).cuda() state[2][k] = torch.zeros(DIM, outputDIM).cuda() state[3][k] = torch.zeros(DIM, outputDIM).cuda() iteration[k] = 0 state = (state[0].detach(), state[1].detach(), state[2].detach(), state[3].detach()) M = M.detach() M.requires_grad = True M.retain_grad() RB.push(state, M, iteration) count = count + 1 print('loss', loss.item() / opt.batchsize_data) print('observe finish', count) localtime = time.asctime(time.localtime(time.time())) if count >= Observe: break_flag = True break if break_flag == True: break RB.shuffle() check_point = optimizee.state_dict() check_point2 = optimizee.state_dict() check_point3 = optimizee.state_dict() Global_Epochs = 0 train_svd = False for i in range(Epochs): print('\n=======> global training steps: {}'.format(i)) if (i + 1) % Decay == 0 and (i + 1) != 0: count = count + 1 adjust_learning_rate(adam_global_optimizer, Decay_rate) if opt.Imcrementflag == True: if (i + 1) % Imcrement == 0 and (i + 1) != 0: Optimizee_Train_Steps = Optimizee_Train_Steps + 50 if (i + 1) % opt.modelsave == 0 and (i + 1) != 0: print( '-------------------------------SAVE----------------------------------------------' ) print(opt.modelsave) if opt.Pretrain == True: torch.save( optimizee.state_dict(), 'STATE/inner_epoch20_5/' + str(i) + '_' + str(opt.optimizer_lr * 1000) + '_Decay' + str(opt.Decay) + '_Observe' + str(opt.Observe) + '_Epochs' + str(opt.Epochs) + '_Optimizee_Train_Steps' + str(opt.Optimizee_Train_Steps) + '_train_steps' + str(opt.train_steps) + '_hand_optimizer_lr' + str(opt.hand_optimizer_lr) + '.pth') else: torch.save( optimizee.state_dict(), 'STATE/inner_epoch20_5/' + str(i) + '_' + str(opt.optimizer_lr * 1000) + '_Decay' + str(opt.Decay) + '_Observe' + str(opt.Observe) + '_Epochs' + str(opt.Epochs) + '_Optimizee_Train_Steps' + str(opt.Optimizee_Train_Steps) + '_train_steps' + str(opt.train_steps) + '_hand_optimizer_lr' + str(opt.hand_optimizer_lr) + 'nopretrain_newlr_meanvar_devide2' + '.pth') # torch.save(optimizee.state_dict(), 'snapshot/'+str(i)+'_'+str(opt.optimizer_lr*1000)+'_Decay'+str(opt.Decay)+'_Observe'+str(opt.Observe)+'_Epochs'+str(opt.Epochs)+'_Optimizee_Train_Steps'+str(opt.Optimizee_Train_Steps)+'_train_steps'+str(opt.train_steps)+'_hand_optimizer_lr'+str(opt.hand_optimizer_lr)+'.pth') if i == 0: global_loss_graph = 0 else: if train_svd == False: global_loss_graph = global_loss_graph.detach() global_loss_graph = 0 else: global_loss_graph = 0 train_svd = False state_read, M_read, iteration_read = RB.sample(batchsize_para) state = (state_read[0].detach(), state_read[1].detach(), state_read[2].detach(), state_read[3].detach()) M = M_read.detach() iteration = iteration_read.detach() M.requires_grad = True M.retain_grad() flag = False break_flag = False count = 0 new_count = 0 begin = True adam_global_optimizer.zero_grad() while (1): for j, data in enumerate(train_loader, 0): # print('---------------------------------------------------------------------------') #print('M',M) inputs, labels = data inputs = Variable(inputs.cuda()) labels = Variable(labels).cuda() inputs = inputs.view(batchsize_para, inputs.shape[0] // batchsize_para, -1) labels = labels.view(batchsize_para, labels.shape[0] // batchsize_para) inputs = inputs.permute(0, 2, 1) if count == 0: loss = f(inputs, M) loss.backward(retain_graph=True) #print('state',torch.sum(state[0]),torch.sum(state[1]),torch.sum(state[2]),torch.sum(state[3])) M_grad = M.grad.data P = M_grad - torch.matmul( torch.matmul(M, M.permute(0, 2, 1)), M_grad) P = P * 1e-4 print('EPOCHES:{},loss:{}'.format(i, loss.item() / 640)) try: M_csgd = retraction(M, P, 1) loss_csgd = f(data_all, M_csgd) print('EPOCHES:{},loss_csgd:{}'.format( i, loss_csgd.item() / 60000)) except: print('svd') #print(inputs.shape) lr, update, state = optimizee(P, state, inputs) lr = torch.abs(lr) #lr=lr/(1/opt.hand_optimizer_lr) s = torch.sum(state[0]) + torch.sum(state[1]) + torch.sum( state[2]) + torch.sum(state[3]) if s > 100000: break_flag = True flag = True break #projection M_update = update - torch.matmul( torch.matmul(M, M.permute(0, 2, 1)), update) P = P - lr * M_update update.retain_grad() P.retain_grad() M_update.retain_grad() lr.retain_grad() count = count + 1 if count == opt.train_steps: break_flag = True break if break_flag == True: break #P=M_grad-torch.matmul(torch.matmul(M,M.permute(0,2,1)),M_grad) iteration = iteration + 1 try: M = retraction(M, P, 1) train_svd = False except: print('svd') train_svd = True continue M.retain_grad() #print(M.requires_grad) #M.requires_grad=True global_loss_graph = f(data_all, M) # loss.backward(retain_graph=True) global_loss_graph.backward() M_after_shape = M.grad.shape number_M = 1 for number in M_after_shape: number_M = number_M * number M_grad_after = M.grad.data M_grad_mean = torch.sum(torch.norm( M_grad_after, p=1, dim=(0, 1))).detach().cpu().numpy().tolist() M_grad_mean = M_grad_mean / number_M print('M_after', M_grad_mean) if np.isnan(M_grad_mean): print('ERROR NAN!!!') continue P_grad_shape = P.grad.shape #print('P_shape',P_grad_shape) number_P = 1 for number in P_grad_shape: number_P = number_P * number #print(number_P) P_grad_data = P.grad.data P_grad_mean = torch.sum(torch.norm( P_grad_data, p=1, dim=(0, 1))).detach().cpu().numpy().tolist() P_grad_mean = P_grad_mean / number_P print('P_gradient', P_grad_mean) if np.isnan(P_grad_mean): print('ERROR NAN!!!') continue P.grad.data.zero_() params = list(optimizee.named_parameters()) (name, network_weight) = params[37] #print('network_weight',network_weight) network_weight_copy = network_weight.clone() #print(name) network_weight_grad_copy = network_weight.grad.data network_weight_shape = network_weight_grad_copy.shape network_weight_length = len(network_weight_shape) network_weight_size = 1 for l in range(network_weight_length): network_weight_size = network_weight_size * network_weight_shape[l] #print('network_weight_shape',network_weight_shape) # print('network_weight_shape',network_weight_size) grad_mean = torch.sum( torch.norm(network_weight_grad_copy, p=1, dim=(0))).detach().cpu().numpy().tolist() grad_mean = grad_mean / network_weight_size print('network_grad_mean', grad_mean) if np.isnan(grad_mean): print('ERROR NAN!!!') continue if flag == False: adam_global_optimizer.step() params = list(optimizee.named_parameters()) (name, network_weight_after) = params[37] contrast = network_weight_after - network_weight_copy #print(contrast) loss_con = torch.sum(torch.norm( contrast, p=1, dim=(0))).detach().cpu().numpy().tolist() loss_con = loss_con / network_weight_size print('EPOCHES:{},Parameters_update:{},loss_contrast:{}'.format( i, flag, loss_con)) # length=len(params) # for t in range(length): # (name,param)=params[t] # param.grad.data.zero_() #print('network_weight_after',network_weight_after) for k in range(batchsize_para): if iteration[k] >= Optimizee_Train_Steps - opt.train_steps: nn.init.orthogonal_(M[k]) state[0][k] = torch.zeros(DIM, outputDIM).cuda() state[1][k] = torch.zeros(DIM, outputDIM).cuda() state[2][k] = torch.zeros(DIM, outputDIM).cuda() state[3][k] = torch.zeros(DIM, outputDIM).cuda() iteration[k] = 0 RB.push((state[0].detach(), state[1].detach(), state[2].detach(), state[3].detach()), M.detach(), iteration.detach()) check_point = check_point2 check_point2 = check_point3 check_point3 = optimizee.state_dict() else: print('=====>eigenvalue break, reloading check_point') optimizee.load_state_dict(check_point) print('==========>EPOCHES<-=========', i) print('=======>global_loss_graph', global_loss_graph.item() / 60000)
return out.view(-1, self.n_outputs) # 最终输出大小 : batch_size X n_output # -------------------- # Device configuration # -------------------- device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # ------------------------------------ # Test the model(输入一张图片查看输出) # ------------------------------------ # 定义模型 model = ImageRNN(batch_size, N_INPUTS, N_NEURONS, N_OUTPUT, N_LAYERS).to(device) # 初始化模型的weight model.basic_rnn.weight_hh_l0.data = torch.eye( n=N_NEURONS, m=N_NEURONS, out=None).to(device) model.basic_rnn.weight_hh_l1.data = torch.eye( n=N_NEURONS, m=N_NEURONS, out=None).to(device) model.basic_rnn.weight_hh_l2.data = torch.eye( n=N_NEURONS, m=N_NEURONS, out=None).to(device) # 定义数据 dataiter = iter(train_loader) images, labels = dataiter.next() model.hidden = model.init_hidden() #logits = model(images.view(-1, 32, 32).to(device)) # print(logits[0:2]) """ tensor([[-0.2846, -0.1503, -0.1593, 0.5478, 0.6827, 0.3489, -0.2989, 0.4575, -0.2426, -0.0464], [-0.6708, -0.3025, -0.0205, 0.2242, 0.8470, 0.2654, -0.0381, 0.6646, -0.4479, 0.2523]], device='cuda:0', grad_fn=<SliceBackward>)
def to_onehot(targets, n_classes): return torch.eye(n_classes)[targets]
def __init__(self, Nc, Ns, Nt, c2v, n2v, Ms, Ac, As, cmask, smask, d_m=10, d_ms=9, d_v=64, d_h=56, d_f=256, d_p=8, heads=1): super(NH1GFCSAttnR, self).__init__() self.Ac, self.As = Ac, As self.Nc, self.Ns, self.Nt = Nc, Ns, Nt self.n2v = n2v self.c2v = c2v self.Ms = Ms self.cmask = 1 - cmask self.smask = 1 - smask self.d_m = d_m self.d_ms = d_ms self.d_v = d_v self.d_h = d_h self.d_f = d_f self.d_p = d_p self.heads = heads self.model_m = nn.Sequential(nn.Linear(self.d_m, self.d_h), nn.ReLU(), nn.Linear(self.d_h, self.d_h), nn.ReLU()) self.model_ms = nn.Sequential(nn.Linear(self.d_ms, self.d_h), nn.ReLU(), nn.Linear(self.d_h, self.d_h), nn.ReLU()) self.model_c = nn.Sequential( nn.Linear(2, self.d_h), nn.ReLU(), ) self.model_cs = nn.Sequential( nn.Linear(self.Nc * self.Nt * 2, self.d_f), nn.ReLU(), nn.Linear(self.d_f, self.Ns * self.Nt * self.d_h), nn.ReLU(), ) #self.embS = self.n2v #Parameter(torch.eye(self.Ns, self.d_h), requires_grad=True) self.GCW = Parameter(torch.randn(self.Ns * self.Nt, self.Nc * self.Nt), requires_grad=True) self.GCB = Parameter(torch.randn(self.Ns * self.Nt, self.d_h), requires_grad=True) self.GCL = nn.Linear(self.d_h, self.d_h, bias=False) self.GSW1 = Parameter(torch.randn(self.Ns * self.Nt, self.Ns * self.Nt), requires_grad=True) self.GSB1 = Parameter(torch.randn(self.Ns * self.Nt, self.d_h), requires_grad=True) self.GSL1 = nn.Linear(self.d_h, self.d_h, bias=False) self.GSW2 = Parameter(torch.randn(self.Ns * self.Nt, self.Ns * self.Nt), requires_grad=True) self.GSB2 = Parameter(torch.randn(self.Ns * self.Nt, self.d_h), requires_grad=True) self.GSL2 = nn.Linear(self.d_h, self.d_h, bias=False) self.GSW3 = Parameter(torch.randn(self.Ns * self.Nt, self.Ns * self.Nt), requires_grad=True) self.GSB3 = Parameter(torch.randn(self.Ns * self.Nt, self.d_h), requires_grad=True) self.GSL3 = nn.Linear(self.d_h, self.d_h, bias=False) self.Gmask = Parameter(torch.eye(self.Nt).repeat_interleave( self.Ns, dim=0).repeat_interleave(self.Nc, dim=1), requires_grad=False) self.Smask = Parameter(torch.eye(self.Nt).repeat_interleave( self.Ns, dim=0).repeat_interleave(self.Ns, dim=1), requires_grad=False) #self.Gmask = Parameter(torch.zeros(self.Ns*self.Nt, self.Nc*self.Nt), requires_grad=False) self.embC = Parameter(torch.randn(self.Nc, self.d_h), requires_grad=True) #self.embC = self.c2v self.embT = Parameter(torch.randn(self.Nt, self.d_h), requires_grad=True) self.model_embMM = nn.Sequential( nn.Linear(self.d_h * 2, self.d_h // 4), nn.ReLU(), ) self.model_embS = nn.Sequential( nn.Linear(self.d_v, self.d_h * 2), nn.ReLU(), nn.Linear(self.d_h * 2, self.d_h // 2), nn.ReLU(), ) self.model_embC = nn.Sequential( nn.Linear(self.d_h, self.d_h // 2), nn.ReLU(), ) self.model_embP = nn.Sequential( nn.Linear(self.d_p, self.d_h // 4), nn.ReLU(), ) self.model_embT = nn.Sequential( nn.Linear(self.d_h, self.d_h // 4), nn.ReLU(), ) self.model_final = nn.Sequential( nn.Linear(self.d_h * 2, self.d_f), nn.Dropout(0.1), nn.ReLU(), nn.Linear(self.d_f, 1), nn.ReLU(), ) self.attnTC = SelfAttnBlock(self.Nc, self.Nt, self.d_h, 1, 1) self.attnTS = SelfAttnBlock(self.Ns, self.Nt, self.d_h, 1, 1) self.attnS1 = SelfAttnBlock(self.Ns, self.Nt, self.d_h, 1, 0) self.attnT1 = SelfAttnBlock(self.Ns, self.Nt, self.d_h, 1, 1) self.attnS2 = SelfAttnBlock(self.Ns, self.Nt, self.d_h, 1, 0) self.attnT2 = SelfAttnBlock(self.Ns, self.Nt, self.d_h, 1, 1) self.attnS3 = SelfAttnBlock(self.Ns, self.Nt, self.d_h, 1, 0) self.attnT3 = SelfAttnBlock(self.Ns, self.Nt, self.d_h, 1, 1) self.model_ST1 = nn.Sequential(nn.Linear(self.d_h * 2, self.d_h), nn.ReLU()) self.model_ST2 = nn.Sequential(nn.Linear(self.d_h * 2, self.d_h), nn.ReLU()) self.model_ST3 = nn.Sequential(nn.Linear(self.d_h * 2, self.d_h), nn.ReLU()) self.model_mss = nn.Sequential(nn.Linear(self.d_h * 4, self.d_f), nn.ReLU(), nn.Linear(self.d_f, self.d_h), nn.ReLU()) self.attn_cs = nn.MultiheadAttention(self.d_h * self.heads, self.heads) self.norm_cs = Norm(self.d_h) self.ffn_cs = nn.Sequential(nn.Linear(self.d_h, self.d_f), nn.ReLU(), nn.Dropout(0.1), nn.Linear(self.d_f, self.d_h), nn.ReLU())
def dense_mincut_pool(x, adj, s, mask=None): r"""The MinCut pooling operator from the `"Spectral Clustering in Graph Neural Networks for Graph Pooling" <https://arxiv.org/abs/1907.00481>`_ paper .. math:: \mathbf{X}^{\prime} &= {\mathrm{softmax}(\mathbf{S})}^{\top} \cdot \mathbf{X} \mathbf{A}^{\prime} &= {\mathrm{softmax}(\mathbf{S})}^{\top} \cdot \mathbf{A} \cdot \mathrm{softmax}(\mathbf{S}) based on dense learned assignments :math:`\mathbf{S} \in \mathbb{R}^{B \times N \times C}`. Returns the pooled node feature matrix, the coarsened and symmetrically normalized adjacency matrix and two auxiliary objectives: (1) The MinCut loss .. math:: \mathcal{L}_c = - \frac{\mathrm{Tr}(\mathbf{S}^{\top} \mathbf{A} \mathbf{S})} {\mathrm{Tr}(\mathbf{S}^{\top} \mathbf{D} \mathbf{S})} where :math:`\mathbf{D}` is the degree matrix, and (2) the orthogonality loss .. math:: \mathcal{L}_o = {\left\| \frac{\mathbf{S}^{\top} \mathbf{S}} {{\|\mathbf{S}^{\top} \mathbf{S}\|}_F} -\frac{\mathbf{I}_C}{\sqrt{C}} \right\|}_F. Args: x (Tensor): Node feature tensor :math:`\mathbf{X} \in \mathbb{R}^{B \times N \times F}` with batch-size :math:`B`, (maximum) number of nodes :math:`N` for each graph, and feature dimension :math:`F`. adj (Tensor): Symmetrically normalized adjacency tensor :math:`\mathbf{A} \in \mathbb{R}^{B \times N \times N}`. s (Tensor): Assignment tensor :math:`\mathbf{S} \in \mathbb{R}^{B \times N \times C}` with number of clusters :math:`C`. The softmax does not have to be applied beforehand, since it is executed within this method. mask (BoolTensor, optional): Mask matrix :math:`\mathbf{M} \in {\{ 0, 1 \}}^{B \times N}` indicating the valid nodes for each graph. (default: :obj:`None`) :rtype: (:class:`Tensor`, :class:`Tensor`, :class:`Tensor`, :class:`Tensor`) """ x = x.unsqueeze(0) if x.dim() == 2 else x adj = adj.unsqueeze(0) if adj.dim() == 2 else adj s = s.unsqueeze(0) if s.dim() == 2 else s (batch_size, num_nodes, _), k = x.size(), s.size(-1) s = torch.softmax(s, dim=-1) if mask is not None: mask = mask.view(batch_size, num_nodes, 1).to(x.dtype) x, s = x * mask, s * mask out = torch.matmul(s.transpose(1, 2), x) out_adj = torch.matmul(torch.matmul(s.transpose(1, 2), adj), s) # MinCut regularization. mincut_num = _rank3_trace(out_adj) d_flat = torch.einsum('ijk->ij', adj) d = _rank3_diag(d_flat) mincut_den = _rank3_trace( torch.matmul(torch.matmul(s.transpose(1, 2), d), s)) mincut_loss = -(mincut_num / mincut_den) mincut_loss = torch.mean(mincut_loss) # Orthogonality regularization. ss = torch.matmul(s.transpose(1, 2), s) i_s = torch.eye(k).type_as(ss) ortho_loss = torch.norm( ss / torch.norm(ss, dim=(-1, -2), keepdim=True) - i_s / torch.norm(i_s), dim=(-1, -2)) ortho_loss = torch.mean(ortho_loss) # Fix and normalize coarsened adjacency matrix. ind = torch.arange(k, device=out_adj.device) out_adj[:, ind, ind] = 0 d = torch.einsum('ijk->ij', out_adj) d = torch.sqrt(d)[:, None] + EPS out_adj = (out_adj / d) / d.transpose(1, 2) return out, out_adj, mincut_loss, ortho_loss
def _rank3_diag(x): eye = torch.eye(x.size(1)).type_as(x) out = eye * x.unsqueeze(2).expand(*x.size(), x.size(1)) return out
def test_returns_diag_matrix_if_equal_dimensions(self): ret_mat = self.kernel(self.test_tensor, self.test_tensor) unscaled_mat = ret_mat / self.kernel.scaling torch.testing.assert_allclose(unscaled_mat, torch.eye(10)) torch.testing.assert_allclose(torch.diag(ret_mat), self.kernel.scaling)
def generate_cartesian_target_joint_min_jerk( joint_pos_start: torch.Tensor, ee_pose_goal: T.TransformationObj, time_to_go: float, hz: float, robot_model: torch.nn.Module, ) -> List[Dict]: """ Cartesian space minimum jerk trajectory planner, but outputs plan in joint space. Assumes zero velocity & acceleration at start & goal. Args: start: Start pose goal: Goal pose time_to_go: Trajectory duration in seconds hz: Frequency of output trajectory robot_model: A valid robot model module from torchcontrol.models Returns: q_traj: Joint position trajectory qd_traj: Joint velocity trajectory qdd_traj: Joint acceleration trajectory """ steps = _compute_num_steps(time_to_go, hz) dt = 1.0 / hz # Compute start pose ee_pos_start, ee_quat_start = robot_model.forward_kinematics(joint_pos_start) ee_pose_start = T.from_rot_xyz( rotation=R.from_quat(ee_quat_start), translation=ee_pos_start ) cartesian_waypoints = generate_cartesian_space_min_jerk( ee_pose_start, ee_pose_goal, time_to_go, hz ) # Extract plan & convert to joint space q_traj = torch.zeros(steps, joint_pos_start.shape[0]) qd_traj = torch.zeros(steps, joint_pos_start.shape[0]) qdd_traj = torch.zeros(steps, joint_pos_start.shape[0]) q_traj[0, :] = joint_pos_start for i in range(0, steps - 1): # Get current joint state & jacobian joint_pos_current = q_traj[i, :] jacobian = robot_model.compute_jacobian(joint_pos_current) jacobian_pinv = torch.pinverse(jacobian) # Query Cartesian plan for next step & compute diff ee_pose_desired = cartesian_waypoints[i + 1]["pose"] ee_twist_desired = cartesian_waypoints[i + 1]["twist"] ee_accel_desired = cartesian_waypoints[i + 1]["acceleration"] # Convert next step to joint plan qdd_traj[i + 1, :] = jacobian_pinv @ ee_accel_desired qd_traj[i + 1, :] = jacobian_pinv @ ee_twist_desired q_delta = qd_traj[i + 1, :] * dt q_traj[i + 1, :] = joint_pos_current + q_delta # Null space correction null_space_proj = torch.eye(joint_pos_start.shape[0]) - jacobian_pinv @ jacobian q_null_err = -null_space_proj @ q_traj[i + 1, :] q_null_err_norm = q_null_err.norm() + 1e-27 # prevent zero division q_null_err_clamped = ( q_null_err / q_null_err_norm * min(q_null_err_norm, q_delta.norm()) ) # norm of correction clamped to norm of current action q_traj[i + 1, :] = q_traj[i + 1, :] + q_null_err_clamped waypoints = [ { "time_from_start": i * dt, "position": q_traj[i, :], "velocity": qd_traj[i, :], "acceleration": qdd_traj[i, :], } for i in range(steps) ] return waypoints
def loss(self, input_h, input_c, heads, types, mask=None, lengths=None): ''' Args: input_h: Tensor the head input tensor with shape = [batch, length, input_size] input_c: Tensor the child input tensor with shape = [batch, length, input_size] target: Tensor the tensor of target labels with shape [batch, length] mask:Tensor or None the mask tensor with shape = [batch, length] lengths: tensor or list of int the length of each input shape = [batch] Returns: Tensor A 1D tensor for minus log likelihood loss ''' batch, length, _ = input_h.size() energy = self.forward(input_h, input_c, mask=mask) # [batch, num_labels, length, length] A = torch.exp(energy) # mask out invalid positions if mask is not None: A = A * mask.unsqueeze(1).unsqueeze(3) * mask.unsqueeze(1).unsqueeze(2) # sum along the label axis [batch, length, length] A = A.sum(dim=1) # get D [batch, 1, length] D = A.sum(dim=1, keepdim=True) # make sure L is positive-defined rtol = 1e-4 atol = 1e-6 D += D * rtol + atol # [batch, length, length] D = Variable(A.data.new(A.size()).zero_()) + D # zeros out all elements except diagonal. D = D * Variable(torch.eye(length)).type_as(D) # compute laplacian matrix # [batch, length, length] L = D - A # compute lengths if lengths is None: if mask is None: lengths = [length for _ in range(batch)] else: lengths = mask.data.sum(dim=1).long() # compute partition Z(x) [batch] z = Variable(energy.data.new(batch)) for b in range(batch): Lx = L[b, 1:lengths[b], 1:lengths[b]] # print(torch.log(torch.eig(Lx.data)[0])) z[b] = logdet(Lx) # first create index matrix [length, batch] # index = torch.zeros(length, batch) + torch.arange(0, length).view(length, 1) index = torch.arange(0, length).view(length, 1).expand(length, batch) index = index.type_as(energy.data).long() batch_index = torch.arange(0, batch).type_as(energy.data).long() # compute target energy [length-1, batch] tgt_energy = energy[batch_index, types.data.t(), heads.data.t(), index][1:] # sum over dim=0 shape = [batch] tgt_energy = tgt_energy.sum(dim=0) return z - tgt_energy
def identity_matrix(batch_size): r"""Creates a batched homogeneous identity matrix""" return torch.eye(4).repeat(batch_size, 1, 1) # Nx4x4
def attack(model, model_name, loader, start_eps, end_eps, max_eps, norm, logger, verbose, method, **kwargs): torch.manual_seed(6247423) num_class = 10 losses = AverageMeter() l1_losses = AverageMeter() errors = AverageMeter() robust_errors = AverageMeter() regular_ce_losses = AverageMeter() robust_ce_losses = AverageMeter() relu_activities = AverageMeter() bound_bias = AverageMeter() bound_diff = AverageMeter() unstable_neurons = AverageMeter() dead_neurons = AverageMeter() alive_neurons = AverageMeter() batch_time = AverageMeter() # initial model.eval() duplicate_rgb = True # pregenerate the array for specifications, will be used for scatter sa = np.zeros((num_class, num_class - 1), dtype=np.int32) for i in range(sa.shape[0]): for j in range(sa.shape[1]): if j < i: sa[i][j] = j else: sa[i][j] = j + 1 sa = torch.LongTensor(sa) total = len(loader.dataset) batch_size = loader.batch_size print(batch_size) std = torch.tensor(loader.std).unsqueeze(0).unsqueeze(-1).unsqueeze(-1) total_steps = 300 batch_eps = np.linspace(start_eps, end_eps, (total // batch_size) + 1) if end_eps < 1e-6: logger.log('eps {} close to 0, using natural training'.format(end_eps)) method = "natural" exp_name = 'outputs/[{}:{}]'.format(get_exp_name(), model_name) # real_i = 0 for i, (init_data, init_labels) in enumerate(loader): # labels = torch.zeros_like(init_labels) init_data = init_data.cuda() tv_eps, tv_lam, reg_lam = get_args(duplicate_rgb=duplicate_rgb) attacker = Shadow(init_data, init_labels, tv_lam, reg_lam, tv_eps) success = np.zeros(len(init_data)) # saved_advs = torch.zeros_like(init_data).cuda() for t_i in range(9): attacker.iterate_labels_not_equal_to(init_labels) attacker.renew_t() labels = attacker.labels for rep in range(total_steps): ct = attacker.get_ct() data = init_data + ct data.data = get_normal(get_unit01(data)) # ========================== The rest of code is taken from CROWN-IBP REPO start = time.time() eps = batch_eps[i] c = torch.eye(num_class).type_as(data)[labels].unsqueeze( 1) - torch.eye(num_class).type_as(data).unsqueeze(0) # remove specifications to self eye = (~(labels.data.unsqueeze(1) == torch.arange(num_class).type_as( labels.data).unsqueeze(0))) c = (c[eye].view(data.size(0), num_class - 1, num_class)) # scatter matrix to avoid compute margin to self sa_labels = sa[labels] # storing computed lower bounds after scatter lb_s = torch.zeros(data.size(0), num_class) # FIXME: Assume data is from range 0 - 1 if kwargs["bounded_input"]: assert loader.std == [1, 1, 1] or loader.std == [1] # bounded input only makes sense for Linf perturbation assert norm == np.inf data_ub = (data + eps).clamp(max=1.0) data_lb = (data - eps).clamp(min=0.0) else: if norm == np.inf: data_ub = data.cpu() + (eps / std) data_lb = data.cpu() - (eps / std) else: data_ub = data_lb = data if list(model.parameters())[0].is_cuda: data = data.cuda() data_ub = data_ub.cuda() data_lb = data_lb.cuda() labels = labels.cuda() c = c.cuda() sa_labels = sa_labels.cuda() lb_s = lb_s.cuda() # convert epsilon to a tensor eps_tensor = data.new(1) eps_tensor[0] = eps # omit the regular cross entropy, since we use robust error output = model(data) regular_ce = torch.nn.CrossEntropyLoss()(output, labels) regular_ce_losses.update(regular_ce.cpu().detach().numpy(), data.size(0)) errors.update( torch.sum(torch.argmax(output, dim=1) != labels).cpu(). detach().numpy() / data.size(0), data.size(0)) # get range statistic if verbose or method != "natural": if kwargs["bound_type"] == "convex-adv": # Wong and Kolter's bound, or equivalently Fast-Lin if kwargs["convex-proj"] is not None: proj = kwargs["convex-proj"] if norm == np.inf: norm_type = "l1_median" elif norm == 2: norm_type = "l2_normal" else: raise (ValueError( "Unsupported norm {} for convex-adv". format(norm))) else: proj = None if norm == np.inf: norm_type = "l1" elif norm == 2: norm_type = "l2" else: raise (ValueError( "Unsupported norm {} for convex-adv". format(norm))) if loader.std == [1] or loader.std == [1, 1, 1]: convex_eps = eps else: convex_eps = eps / np.mean(loader.std) # for CIFAR we are roughly / 0.2 # FIXME this is due to a bug in convex_adversarial, we cannot use per-channel eps if norm == np.inf: # bounded input is only for Linf if kwargs["bounded_input"]: # FIXME the bounded projection in convex_adversarial has a bug, data range must be positive data_l = 0.0 data_u = 1.0 else: data_l = -np.inf data_u = np.inf else: data_l = data_u = None f = DualNetwork(model, data, convex_eps, proj=proj, norm_type=norm_type, bounded_input=kwargs["bounded_input"], data_l=data_l, data_u=data_u) lb = f(c) elif kwargs["bound_type"] == "interval": ub, lb, relu_activity, unstable, dead, alive = model.interval_range( norm=norm, x_U=data_ub, x_L=data_lb, eps=eps, C=c) elif kwargs["bound_type"] == "crown-interval": ub, ilb, relu_activity, unstable, dead, alive = model.interval_range( norm=norm, x_U=data_ub, x_L=data_lb, eps=eps, C=c) crown_final_factor = kwargs['final-beta'] factor = (max_eps - eps * (1.0 - crown_final_factor)) / max_eps if factor < 1e-5: lb = ilb else: if kwargs["runnerup_only"]: masked_output = output.detach().scatter( 1, labels.unsqueeze(-1), -100) runner_up = masked_output.max(1)[1] runnerup_c = torch.eye(num_class).type_as( data)[labels] runnerup_c.scatter_(1, runner_up.unsqueeze(-1), -1) runnerup_c = runnerup_c.unsqueeze(1).detach() clb, bias = model.backward_range(norm=norm, x_U=data_ub, x_L=data_lb, eps=eps, C=c) clb = clb.expand(clb.size(0), num_class - 1) else: clb, bias = model.backward_range(norm=norm, x_U=data_ub, x_L=data_lb, eps=eps, C=c) bound_bias.update(bias.sum() / data.size(0)) diff = (clb - ilb).sum().item() bound_diff.update(diff / data.size(0), data.size(0)) lb = clb * factor + ilb * (1 - factor) else: raise RuntimeError("Unknown bound_type " + kwargs["bound_type"]) lb = lb_s.scatter(1, sa_labels, lb) robust_ce = torch.nn.CrossEntropyLoss()(-lb, labels) if kwargs["bound_type"] != "convex-adv": relu_activities.update( relu_activity.detach().cpu().item() / data.size(0), data.size(0)) unstable_neurons.update(unstable / data.size(0), data.size(0)) dead_neurons.update(dead / data.size(0), data.size(0)) alive_neurons.update(alive / data.size(0), data.size(0)) if method == "robust": loss = robust_ce elif method == "robust_activity": loss = robust_ce + kwargs["activity_reg"] * relu_activity elif method == "natural": loss = regular_ce elif method == "robust_natural": natural_final_factor = kwargs["final-kappa"] kappa = (max_eps - eps * (1.0 - natural_final_factor)) / max_eps loss = (1 - kappa) * robust_ce + kappa * regular_ce else: raise ValueError("Unknown method " + method) if "l1_reg" in kwargs: reg = kwargs["l1_reg"] l1_loss = 0.0 for name, param in model.named_parameters(): if 'bias' not in name: l1_loss = l1_loss + (reg * torch.sum(torch.abs(param))) loss = loss + l1_loss l1_losses.update(l1_loss.cpu().detach().numpy(), data.size(0)) # =========================================== The rest is from breaking paper not from CROWN-IBP Repo c_loss = -loss attacker.back_prop(c_loss, rep) batch_time.update(time.time() - start) losses.update(loss.cpu().detach().numpy(), data.size(0)) if (verbose or method != "natural") and rep == total_steps - 1: robust_ce_losses.update(robust_ce.cpu().detach().numpy(), data.size(0)) certified = (lb < 0).any(dim=1).cpu().numpy() success = success + np.ones(len(success)) - certified # saved_advs[certified == False] = data[certified == False].data torch.cuda.empty_cache() to_print = '{}\t{}\t{}'.format((success > 0).sum(), t_i, attacker.log) print(to_print, flush=True) attacker.labels = attacker.labels + 1 # save_images(get_unit01(torch.cat((saved_advs, init_data), dim=-1)), success.astype(np.bool), real_i, exp_name) # real_i += len(saved_advs) robust_errors.update((success > 0).sum() / len(success), len(success)) print('====', robust_errors.avg, '===', flush=True) for i, l in enumerate(model): if isinstance(l, BoundLinear) or isinstance(l, BoundConv2d): norm = l.weight.data.detach().view(l.weight.size(0), -1).abs().sum(1).max().cpu() logger.log('layer {} norm {}'.format(i, norm)) if method == "natural": return errors.avg, errors.avg else: return robust_errors.avg, errors.avg
def train_mnist(): ag = [] nb_class = 10 img_size = 28 n = 64 f = 7 n_m = 12 d = 2 nb_action = 4 batch_size = 64 t = 7 nr = 1 cuda = True #m = ModelsUnion(n, f, n_m, d, nb_action, nb_class, test_mnist()) m = ModelsUnion(n, f, n_m, d, nb_action, nb_class) a1 = Agent(ag, m, n, f, n_m, img_size, nb_action, batch_size, obs_MNIST, trans_MNIST) a2 = Agent(ag, m, n, f, n_m, img_size, nb_action, batch_size, obs_MNIST, trans_MNIST) a3 = Agent(ag, m, n, f, n_m, img_size, nb_action, batch_size, obs_MNIST, trans_MNIST) ag.append(a1) ag.append(a2) ag.append(a3) if cuda: for a in ag: a.cuda() sm = Softmax(dim=-1) criterion = MSELoss() if cuda: criterion.cuda() params = [] for net in m.get_networks(): if cuda: net.cuda() params += list(net.parameters()) optim = th.optim.Adam(params, lr=1e-3) nb_epoch = 10 (x_train, y_train), (x_valid, y_valid), (x_test, y_test) = load_mnist() x_train, y_train = x_train[:10000], y_train[:10000] nb_batch = ceil(x_train.size(0) / batch_size) loss_v = [] acc = [] for e in range(nb_epoch): sum_loss = 0 for net in m.get_networks(): net.train() grad_norm_cnn = [] grad_norm_pred = [] random_walk = e < 5 for i in tqdm(range(nb_batch)): i_min = i * batch_size i_max = (i + 1) * batch_size i_max = i_max if i_max < x_train.size(0) else x_train.size(0) losses = [] for k in range(nr): x, y = x_train[i_min:i_max, :, :], y_train[i_min:i_max] if cuda: x, y = x.cuda(), y.cuda() pred, log_probas = step(ag, x, t, sm, cuda, random_walk, nb_class) # Sum on agent dimension proba_per_image = log_probas.sum(dim=0) y_eye = th.eye(nb_class)[y] if cuda: y_eye = y_eye.cuda() r = -criterion(pred, y_eye) # Mean on image batch l = (proba_per_image * r.detach() + r).mean(dim=0).view(-1) losses.append(l) loss = -th.cat(losses).sum() / nr optim.zero_grad() loss.backward() optim.step() sum_loss += loss.item() grad_norm_cnn.append( m.get_networks()[0].seq_lin[0].weight.grad.norm()) grad_norm_pred.append( m.get_networks()[-1].seq_lin[0].weight.grad.norm()) sum_loss /= nb_batch print("Epoch %d, loss = %f" % (e, sum_loss)) print("grad_cnn_norm_mean = %f, grad_pred_norm_mean = %f" % (sum(grad_norm_cnn) / len(grad_norm_cnn), sum(grad_norm_pred) / len(grad_norm_pred))) print("CNN_el = %d, Pred_el = %d" % (m.get_networks()[0].seq_lin[0].weight.grad.nelement(), m.get_networks()[-1].seq_lin[0].weight.grad.nelement())) nb_correct = 0 nb_batch_valid = ceil(x_valid.size(0) / batch_size) for net in m.get_networks(): net.eval() with th.no_grad(): for i in tqdm(range(nb_batch_valid)): i_min = i * batch_size i_max = (i + 1) * batch_size i_max = i_max if i_max < x_valid.size(0) else x_valid.size(0) x, y = x_valid[ i_min:i_max, :, :].cuda(), y_valid[i_min:i_max].cuda() pred, proba = step(ag, x, t, sm, cuda, random_walk, nb_class) nb_correct += (pred.argmax(dim=1) == y).sum().item() nb_correct /= x_valid.size(0) acc.append(nb_correct) loss_v.append(sum_loss) print("Epoch %d, accuracy = %f" % (e, nb_correct)) plt.plot(acc, "b", label="accuracy") plt.plot(loss_v, "r", label="criterion value") plt.xlabel("Epoch") plt.title("MARL Classification f=%d, n=%d, n_m=%d, d=%d, T=%d" % (f, n, n_m, d, t)) plt.legend() plt.show() viz(ag, x_test[randint(0, x_test.size(0) - 1)], t, sm, f)
from model import GCN from graph_builder import build_karate_club_graph, draw import warnings warnings.filterwarnings("ignore") # Model net = GCN(34, 5, 2) optimizer = torch.optim.Adam(net.parameters(), lr=0.01) all_logits = [] print("Model structure:", net) # Data G = build_karate_club_graph() inputs = torch.eye(34) labeled_nodes = torch.tensor([0, 33]) labels = torch.tensor([0, 1]) # Train net.train() for epoch in range(30): optimizer.zero_grad() # 前向传播 logits = net(G, inputs) all_logits.append(logits.detach()) # Softmax
def forward(self, feats, meta, epoch, step=0, fname_vis=None): device = feats.device X1 = meta['pc0'].to(device) if X1.shape[2] > 3: N1 = X1[:, :, 3:] X1 = X1[:, :, :3] feats1 = feats[0::2] feats2 = feats[1::2] # deformed B, N, C = feats1.shape num_vis = 1 if B > 3 else B if fname_vis is not None: vis_idx = np.random.choice(B, num_vis, replace=False) # parameters loss = 0. correct_match = 0. diff_via_recon = 0. Lc = 0.0 perm_to_I = 0.0 for b in range(B): # C X N f1 = feats1[b].permute(1, 0) # source to B, C, N f2 = feats2[b].permute(1, 0) # target fa = feats1[(b + 1) % B].permute(1, 0) # auxiliary if self.normalize_vectors: f1 = F.normalize(f1, p=2, dim=0) * 20 f2 = F.normalize(f2, p=2, dim=0) * 20 fa = F.normalize(fa, p=2, dim=0) * 20 ## f1 && fa correlation corr_1a = torch.matmul( f1.t(), fa) / self.temperature ## [C, M]T X [C, N] = [M, N] smcorr_1a = F.softmax(corr_1a, dim=1) ## f1 reconstructed by fa f1_via_fa_t = torch.sum( smcorr_1a[:, None, :] * fa[None, :, :], dim=-1) ## [M, 1, N] X [1, C, N] --> [M, C, N] --> [M, C] corr_1a2 = torch.matmul( f1_via_fa_t, f2) / self.temperature ## [M, C] X [C, K] = [M, K] smcorr_1a2 = F.softmax(corr_1a2, dim=1) with torch.no_grad(): smcorr_1a2_sink, _ = sinkFunc.gumbel_sinkhorn( corr_1a2, temp=self.sink_tau[1], n_iters=self.sink_iters[1]) del corr_1a2 if smcorr_1a2_sink.shape[0] == 1: smcorr_1a2_sink = smcorr_1a2_sink.squeeze(0) else: smcorr_1a2_sink = torch.mean(smcorr_1a2_sink, dim=0) diff = X1[b, :, None, :] - X1[b, None, :, :] dist = (diff * diff).sum(2).sqrt() dist = dist.pow(self.pow) # make distance more sharper # C1*C2 L = dist * smcorr_1a2 ## rotational invariance corr_12 = torch.matmul(f1.t(), f2) smcorr_12 = F.softmax(corr_12 / self.temperature, dim=1) del corr_12 L12 = dist * smcorr_12 ## for reference perm_to_I += 3.0 * F.l1_loss( torch.eye(N).to(device), smcorr_1a2_sink, reduction='sum') / N ## Sinkhorn regularization ## ablation ## 1) constraint to permutation constraint_to_perm = "1a2_perm" # constraint_to_perm = "1a_perm" if constraint_to_perm == "1a2_perm": Lc_b = F.l1_loss(smcorr_1a2_sink, smcorr_1a2, reduction='sum') / N ## 2) constraint to identity elif constraint_to_perm == "1a2_identity": Lc_b = F.l1_loss( torch.eye(N).to(device), smcorr_1a2, reduction='sum') / N print("constraint smcorr_1a2_sink to identity") ## 3) constraint on 1-a correspondence elif constraint_to_perm == "1a_perm": Lc_b = F.l1_loss(smcorr_1a_sink, smcorr_1a, reduction='sum') / N del smcorr_1a print("constraint smcorr_1a_sink to perm") Lc += 3.0 * Lc_b ## finall loss L += 1.0 * L12 loss += (L.sum() / N) print( f"Loss: {L.sum():.6f}, Loss 12: {L12.sum():.6f}, smcorr1a2 to perm: {Lc_b:.6f}" ) ## record & check with torch.no_grad(): # ## f1 fa correlation max_idx = torch.argmax(smcorr_1a2, dim=1) count = 0.0 for i, max_id in enumerate(max_idx): if max_id == i: count += 1 correct_match += count if fname_vis is not None and np.sum(vis_idx == b) == 1: txt_fname = fname_vis + str(b) + "smcorr_1a2_sink.png" npdata = smcorr_1a2_sink.cpu().detach().numpy() save_data_as_image(txt_fname, npdata) txt_fname = fname_vis + str(b) + "smcorr_1a2.png" npdata = smcorr_1a2.cpu().detach().numpy() save_data_as_image(txt_fname, npdata) print("saved files") del diff print("--------LOSS with DVE: {}--------".format(loss / B)) total_loss = loss + self.lambda_lc * Lc output_loss = { 'total_loss': total_loss / B, 'cycle_loss': loss / B, 'perm_loss': Lc / B, } output_info = { 'correct_match': correct_match / B, 'smcorr_to_I': perm_to_I / B, } return output_loss, output_info
def __init__( self, batch_size, nz, n_iter, save_dir, *, load=False, lr=2e-4, beta1=0, beta2=0.9, sch_iter_rate=10000, gamma=10, ncritic=5, log_rate=100, ckpt_rate=1000, ): self.batch_size = batch_size self.niter = n_iter self.gamma = gamma self.ncritic = ncritic self.log_rate = log_rate self.ckpt_rate = ckpt_rate self.device = torch.device( 'cuda') if torch.cuda.is_available() else 'cpu' if not load: meta_data = dict(batch_size=batch_size, n_iter=n_iter, ncritic=ncritic, nz=nz, lr=lr, beta1=beta1, beta2=beta2, gamma=gamma) self.logger = TorchLogger(save_dir, meta_data=meta_data) self.model = WGANModel(nz) self.model.to(self.device) self.opt_gen = optim.Adam(self.model.gen.parameters(), lr=lr, betas=(beta1, beta2)) self.opt_critic = optim.Adam(self.model.disc.parameters(), lr=lr, betas=(beta1, beta2)) # ?? self.sch_gen = optim.lr_scheduler.StepLR(self.opt_gen, sch_iter_rate, 0.3) self.sch_critic = optim.lr_scheduler.StepLR(self.opt_critic, sch_iter_rate, 0.3) # real data generator self.train_loader, self.test_loader, _ = get_data('./data', batch_size) self.gen_train = self.inf_train_gen() # prior on z ~ p(z) self.prior = dist.MultivariateNormal(torch.zeros(nz), torch.eye(nz)) # inception score self.ins_score = None self.log(f'{"iter":<10} | {"gen_loss":<15} | {"critic_loss":<15} | ' f'{"time":<10}')
def forward(self, predictions, wrapper, wrapper_mask): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, mask preds, and prior boxes from SSD net. loc shape: torch.size(batch_size,num_priors,4) conf shape: torch.size(batch_size,num_priors,num_classes) masks shape: torch.size(batch_size,num_priors,mask_dim) priors shape: torch.size(num_priors,4) proto* shape: torch.size(batch_size,mask_h,mask_w,mask_dim) targets (list<tensor>): Ground truth boxes and labels for a batch, shape: [batch_size][num_objs,5] (last idx is the label). masks (list<tensor>): Ground truth masks for each object in each image, shape: [batch_size][num_objs,im_height,im_width] num_crowds (list<int>): Number of crowd annotations per batch. The crowd annotations should be the last num_crowds elements of targets and masks. * Only if mask_type == lincomb """ loc_data = predictions['loc'] conf_data = predictions['conf'] mask_data = predictions['mask'] priors = predictions['priors'] if cfg.mask_type == mask_type.lincomb: proto_data = predictions['proto'] if cfg.use_instance_coeff: inst_data = predictions['inst'] else: inst_data = None targets, masks, num_crowds = wrapper.get_args(wrapper_mask) labels = [None] * len(targets) # Used in sem segm loss batch_size = loc_data.size(0) # This is necessary for training on multiple GPUs because # DataParallel will cat the priors from each GPU together priors = priors[:loc_data.size(1), :] num_priors = (priors.size(0)) num_classes = self.num_classes # Match priors (default boxes) and ground truth boxes # These tensors will be created with the same device as loc_data loc_t = loc_data.new(batch_size, num_priors, 4) gt_box_t = loc_data.new(batch_size, num_priors, 4) conf_t = loc_data.new(batch_size, num_priors).long() idx_t = loc_data.new(batch_size, num_priors).long() defaults = priors.data if cfg.use_class_existence_loss: class_existence_t = loc_data.new(batch_size, num_classes - 1) for idx in range(batch_size): truths = targets[idx][:, :-1].data labels[idx] = targets[idx][:, -1].data.long() if cfg.use_class_existence_loss: # Construct a one-hot vector for each object and collapse it into an existence vector with max # Also it's fine to include the crowd annotations here class_existence_t[idx, :] = torch.eye( num_classes - 1, device=conf_t.get_device())[labels[idx]].max(dim=0)[0] # Split the crowd annotations because they come bundled in cur_crowds = num_crowds[idx] if cur_crowds > 0: split = lambda x: (x[-cur_crowds:], x[:-cur_crowds]) crowd_boxes, truths = split(truths) # We don't use the crowd labels or masks _, labels[idx] = split(labels[idx]) _, masks[idx] = split(masks[idx]) else: crowd_boxes = None match(self.pos_threshold, self.neg_threshold, truths, defaults, labels[idx], crowd_boxes, loc_t, conf_t, idx_t, idx, loc_data[idx]) gt_box_t[idx, :, :] = truths[idx_t[idx]] # wrap targets loc_t = Variable(loc_t, requires_grad=False) conf_t = Variable(conf_t, requires_grad=False) idx_t = Variable(idx_t, requires_grad=False) pos = conf_t > 0 num_pos = pos.sum(dim=1, keepdim=True) # Shape: [batch,num_priors,4] pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) losses = {} # Localization Loss (Smooth L1) if cfg.train_boxes: loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) losses['B'] = F.smooth_l1_loss(loc_p, loc_t, reduction='sum') * cfg.bbox_alpha if cfg.train_masks: if cfg.mask_type == mask_type.direct: if cfg.use_gt_bboxes: pos_masks = [] for idx in range(batch_size): pos_masks.append(masks[idx][idx_t[idx, pos[idx]]]) masks_t = torch.cat(pos_masks, 0) masks_p = mask_data[pos, :].view(-1, cfg.mask_dim) losses['M'] = F.binary_cross_entropy( torch.clamp(masks_p, 0, 1), masks_t, reduction='sum') * cfg.mask_alpha else: losses['M'] = self.direct_mask_loss( pos_idx, idx_t, loc_data, mask_data, priors, masks) elif cfg.mask_type == mask_type.lincomb: losses.update( self.lincomb_mask_loss(pos, idx_t, loc_data, mask_data, priors, proto_data, masks, gt_box_t, inst_data)) if cfg.mask_proto_loss is not None: if cfg.mask_proto_loss == 'l1': losses['P'] = torch.mean( torch.abs(proto_data) ) / self.l1_expected_area * self.l1_alpha elif cfg.mask_proto_loss == 'disj': losses['P'] = -torch.mean( torch.max(F.log_softmax(proto_data, dim=-1), dim=-1)[0]) # Confidence loss if cfg.use_focal_loss: if cfg.use_sigmoid_focal_loss: losses['C'] = self.focal_conf_sigmoid_loss(conf_data, conf_t) elif cfg.use_objectness_score: losses['C'] = self.focal_conf_objectness_loss( conf_data, conf_t) else: losses['C'] = self.focal_conf_loss(conf_data, conf_t) else: losses['C'] = self.ohem_conf_loss(conf_data, conf_t, pos, batch_size) # These losses also don't depend on anchors if cfg.use_class_existence_loss: losses['E'] = self.class_existence_loss(predictions['classes'], class_existence_t) if cfg.use_semantic_segmentation_loss: losses['S'] = self.semantic_segmentation_loss( predictions['segm'], masks, labels) # Divide all losses by the number of positives. # Don't do it for loss[P] because that doesn't depend on the anchors. total_num_pos = num_pos.data.sum().float() for k in losses: if k not in ('P', 'E', 'S'): losses[k] /= total_num_pos else: losses[k] /= batch_size # Loss Key: # - B: Box Localization Loss # - C: Class Confidence Loss # - M: Mask Loss # - P: Prototype Loss # - D: Coefficient Diversity Loss # - E: Class Existence Loss # - S: Semantic Segmentation Loss return losses
def back_sub(self, true_label, order=None): """ Implements backsubstitution true_label (int): index (0 to 9) of the right label - used in the last step of backsubstitution order (int): defines number of layers to backsubstitute starting from the output. """ if order is None: order = len( self.activations ) # example: 10 layers, 9 actual lows and highs, 1 for the input, 8 for the rest of the layers low = self.lows[-order] high = self.highs[-order] num_classes = 10 # we will start from the output bias_high = torch.zeros(num_classes - 1) bias_low = torch.zeros(num_classes - 1) # First, we insert the affine layer corresponding to the substractions # employed by the verifier to check the correctness of the prediction # output_j = logit_i - logit_j, where i is the true_label W_substract = torch.eye(num_classes - 1, num_classes - 1) * (-1) W_substract = torch.cat([ W_substract[:, 0:true_label], torch.ones(num_classes - 1, 1), W_substract[:, true_label:num_classes] ], 1) # inserting the column of ones for the true label # now cumulating the last operation W_low = W_substract.clone() W_high = W_substract.clone() for layer in reversed( self.layers[-(order - 1):] ): # order = layers -1 --> order -1 = layers -2 --> skipping first two layers if type(layer) == AbstractLinear: W_prime_high = layer.layer.weight b_prime_high = layer.layer.bias W_prime_low = layer.layer.weight b_prime_low = layer.layer.bias bias_high += torch.matmul(W_high, b_prime_high) bias_low += torch.matmul(W_low, b_prime_low) W_high = torch.matmul(W_high, W_prime_high) W_low = torch.matmul(W_low, W_prime_low) elif type(layer) == AbstractRelu: W_prime_low = layer.weight_low W_prime_high = layer.weight_high b_prime_high = layer.bias_high W_high, delta_bias_high = self.back_sub_relu( W_high, W_prime_high, W_prime_low, bias_high=b_prime_high) W_low, delta_bias_low = self.back_sub_relu( W_low, W_prime_high, W_prime_low, bias_high=b_prime_high, high=False) bias_high += delta_bias_high bias_low += delta_bias_low else: raise Exception("Unknown layer in the forward pass ") # finally computing the forward pass on the input ranges # note: no bias here (all the biases were already included in W) low_out, _ = AbstractLinear.forward_boxes(W_low, bias_low, low, high) _, high_out = AbstractLinear.forward_boxes(W_high, bias_high, low, high) return low_out, high_out
def main(args): torch.manual_seed(args.seed) if torch.cuda.is_available(): torch.cuda.manual_seed(args.seed) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') dataset = MNIST(root=args.root, train=True, transform=transforms.ToTensor(), download=True) data_loader = DataLoader(dataset=dataset, batch_size=args.batch_size, shuffle=True) if args.loss == 'bce': def loss_fn(recon_x, x, mean, log_var, invSigma=None): BCE = torch.nn.functional.binary_cross_entropy(recon_x.view( -1, 28 * 28), x.view(-1, 28 * 28), reduction='sum') KLD = -0.5 * torch.sum(1 + log_var - mean.pow(2) - log_var.exp()) return (BCE + KLD) / x.size(0) elif args.loss == 'mse': def loss_fn(recon_x, x, mean, log_var, invSigma=None): xdiff = x.view(-1, 28 * 28) - recon_x.view(-1, 28 * 28) MSE = 0.5 * torch.trace(invSigma.mm(torch.t(xdiff).mm(xdiff))) KLD = -0.5 * torch.sum(1 + log_var - mean.pow(2) - log_var.exp()) return (MSE + KLD) / x.size(0) else: raise NameError('Wrong loss name. Choose either bce or mse.') vae = VAE(encoder_layer_sizes=args.encoder_layer_sizes, latent_size=args.latent_size, decoder_layer_sizes=args.decoder_layer_sizes, device=device, conditional=args.conditional, num_labels=10 if args.conditional else 0).to(device) optimizer = torch.optim.Adam(vae.parameters(), lr=args.learning_rate) logs = defaultdict(list) invSigma = torch.eye(28 * 28).to(device) for epoch in range(args.epochs): tracker_epoch = defaultdict(lambda: defaultdict(dict)) vae.train() for iteration, (x, y) in enumerate(data_loader): x, y = x.to(device), y.to(device) if args.conditional: recon_x, mean, log_var, z = vae(x, y) else: recon_x, mean, log_var, z = vae(x) for i, yi in enumerate(y): id = len(tracker_epoch) tracker_epoch[id]['x'] = z[i, 0].item() tracker_epoch[id]['y'] = z[i, 1].item() tracker_epoch[id]['label'] = yi.item() loss = loss_fn(recon_x, x, mean, log_var, invSigma) optimizer.zero_grad() loss.backward() optimizer.step() logs['loss'].append(loss.item()) if iteration % args.print_every == 0 or iteration == len( data_loader) - 1: print("Epoch {:02d}/{:02d} Batch {:04d}/{:d}, Loss {:9.4f}". format(epoch, args.epochs, iteration, len(data_loader) - 1, loss.item())) if args.conditional: c = torch.arange(0, 10).long().unsqueeze(1) x = vae.inference(n=c.size(0), c=c) else: x = vae.inference(n=10) plt.figure() plt.figure(figsize=(5, 10)) for p in range(10): plt.subplot(5, 2, p + 1) if args.conditional: plt.text(0, 0, "c={:d}".format(c[p].item()), color='black', backgroundcolor='white', fontsize=8) plt.imshow(x[p].view(28, 28).cpu().data.numpy()) plt.axis('off') if not os.path.exists( os.path.join(args.fig_root, args.vae_name)): if not (os.path.exists(os.path.join(args.fig_root))): os.mkdir(os.path.join(args.fig_root)) os.mkdir(os.path.join(args.fig_root, args.vae_name)) plt.savefig(os.path.join( args.fig_root, args.vae_name, "E{:d}I{:d}.png".format(epoch, iteration)), dpi=300) plt.clf() plt.close('all') df = pd.DataFrame.from_dict(tracker_epoch, orient='index') g = sns.lmplot(x='x', y='y', hue='label', data=df.groupby('label').head(100), fit_reg=False, legend=True) g.savefig(os.path.join(args.fig_root, args.vae_name, "E{:d}-Dist.png".format(epoch)), dpi=300) # Update the (inverse) covariance matrix: if args.loss == 'mse': with torch.no_grad(): Sigma = torch.zeros(28 * 28, 28 * 28).to(device) vae.eval() for iteration, (x, y) in enumerate(data_loader): x, y = x.to(device), y.to(device) if args.conditional: recon_x, mean, log_var, z = vae(x, y) else: recon_x, mean, log_var, z = vae(x) xdiff = x.view(-1, 28 * 28) - recon_x.view(-1, 28 * 28) Sigma += torch.t(xdiff).mm(xdiff) Sigma = Sigma / len(data_loader.dataset) + 1e-3 * torch.eye( 28 * 28).to(device) invSigma = torch.inverse(Sigma) if args.loss == 'mse': plt.figure() plt.imshow(Sigma.cpu().data.numpy()) plt.axis('off') plt.savefig(os.path.join(args.fig_root, args.vae_name, "Sigma{:d}.png".format(epoch)), dpi=300) plt.clf() plt.close('all') Sigma = None # save invSigma: np.save(os.path.join(args.fig_root, args.vae_name, "inSigma.npy"), invSigma.cpu().data.numpy()) # save final model save_file = os.path.join(args.fig_root, args.vae_name, 'final_model.pt') torch.save(vae.state_dict(), save_file)
def test_l2_norm(): mat = torch.tensor([[1, 1], [0, 1]]).float() u.check_equal(u.l2_norm(mat), 0.5 * (1 + math.sqrt(5))) ii = torch.eye(5) u.check_equal(u.l2_norm(ii), 1)
def discriminative_loss(self, embedding, seg_gt): batch_size = embedding.shape[0] var_loss = torch.tensor(0, dtype=embedding.dtype, device=embedding.device) dist_loss = torch.tensor(0, dtype=embedding.dtype, device=embedding.device) reg_loss = torch.tensor(0, dtype=embedding.dtype, device=embedding.device) for b in range(batch_size): embedding_b = embedding[b] # (embed_dim, H, W) seg_gt_b = seg_gt[b] labels = torch.unique(seg_gt_b) labels = labels[labels != 0] num_lanes = len(labels) if num_lanes == 0: # please refer to issue here: https://github.com/harryhan618/LaneNet/issues/12 _nonsense = embedding.sum() _zero = torch.zeros_like(_nonsense) var_loss = var_loss + _nonsense * _zero dist_loss = dist_loss + _nonsense * _zero reg_loss = reg_loss + _nonsense * _zero continue centroid_mean = [] for lane_idx in labels: seg_mask_i = (seg_gt_b == lane_idx) if not seg_mask_i.any(): continue embedding_i = embedding_b[:, seg_mask_i] mean_i = torch.mean(embedding_i, dim=1) centroid_mean.append(mean_i) # ---------- var_loss ------------- var_loss = var_loss + torch.mean( F.relu( torch.norm(embedding_i - mean_i.reshape(self.embed_dim, 1), dim=0) - self.delta_v)**2) / num_lanes centroid_mean = torch.stack(centroid_mean) # (n_lane, embed_dim) if num_lanes > 1: centroid_mean1 = centroid_mean.reshape(-1, 1, self.embed_dim) centroid_mean2 = centroid_mean.reshape(1, -1, self.embed_dim) dist = torch.norm(centroid_mean1 - centroid_mean2, dim=2) # shape (num_lanes, num_lanes) dist = dist + torch.eye( num_lanes, dtype=dist.dtype, device=dist.device ) * self.delta_d # diagonal elements are 0, now mask above delta_d # divided by two for double calculated loss above, for implementation convenience dist_loss = dist_loss + torch.sum( F.relu(-dist + self.delta_d)**2) / (num_lanes * (num_lanes - 1)) / 2 # reg_loss is not used in original paper # reg_loss = reg_loss + torch.mean(torch.norm(centroid_mean, dim=1)) var_loss = var_loss / batch_size dist_loss = dist_loss / batch_size reg_loss = reg_loss / batch_size return var_loss, dist_loss, reg_loss
for angle in np.linspace(-180, 180, args.num_views + 1)[:-1] ], 0, ) # (NV, 4, 4) render_rays = util.gen_rays(render_poses, W, H, focal, z_near, z_far).to(device=device) # Params #num_images = 11 #num_images = 11 num_images = 6 output_name = 'eval_real_ct_out' cam_poses = [] cam_pose = torch.eye(4, device=device) cam_pose[2, -1] = args.radius cam_poses.append(cam_pose) for i in range(1, num_images): cam_pose = torch.eye(4, device=device) #angle = (math.pi / 6.0) * i # 30 degs angle = (math.pi / 3.0) * i # 30 degs # R_x #cam_pose_2[1, 1] = math.cos(angle) #cam_pose_2[1, 2] = -math.sin(angle) #cam_pose_2[2, 1] = math.sin(angle) #cam_pose_2[2, 2] = math.cos(angle) # R_y
) # new cell state; note that tanh(x)=2*sigmoid(2*x) - 1 h = o * torch.tanh(c) # new hidden state return h @ self.W2[:-1] + self.W2[-1] lstm_net = LSTM_net() @torch.jit.script def train_loss(xy_pair): # logistic loss # type: (Tuple[Tensor, Tensor]) -> Tensor return -torch.mean( torch.log(torch.sigmoid(xy_pair[1] * lstm_net(xy_pair[0])))) Qs = [[torch.eye(W.shape[0]), torch.eye(W.shape[1])] for W in lstm_net.parameters()] lr = 0.02 grad_norm_clip_thr = 1.0 Losses = [] for num_iter in range(100000): loss = train_loss(generate_train_data()) grads = torch.autograd.grad(loss, lstm_net.parameters(), create_graph=True) vs = [torch.randn_like(W) for W in lstm_net.parameters()] Hvs = torch.autograd.grad(grads, lstm_net.parameters(), vs) with torch.no_grad(): Qs = [ psgd.update_precond_kron(Qlr[0], Qlr[1], v, Hv) for (Qlr, v, Hv) in zip(Qs, vs, Hvs) ] pre_grads = [
def _get_ortho(self, U, V): """Return B-orthonormal U with columns are B-orthogonal to V. .. note:: When `bparams["ortho_use_drop"] == False` then `_get_ortho` is based on the Algorithm 3 from [DuerschPhD2015] that is a slight modification of the corresponding algorithm introduced in [StathopolousWu2002]. Otherwise, the method implements Algorithm 6 from [DuerschPhD2015] .. note:: If all U columns are B-collinear to V then the returned tensor U will be empty. Arguments: U (Tensor) : initial approximation, size is (m, n) V (Tensor) : B-orthogonal external basis, size is (m, k) Returns: U (Tensor) : B-orthonormal columns (:math:`U^T B U = I`) such that :math:`V^T B U=0`, size is (m, n1), where `n1 = n` if `drop` is `False, otherwise `n1 <= n`. """ mm = torch.matmul mm_B = _utils.matmul m = self.iparams['m'] tau_ortho = self.fparams['ortho_tol'] tau_drop = self.fparams['ortho_tol_drop'] tau_replace = self.fparams['ortho_tol_replace'] i_max = self.iparams['ortho_i_max'] j_max = self.iparams['ortho_j_max'] # when use_drop==True, enable dropping U columns that have # small contribution to the `span([U, V])`. use_drop = self.bparams['ortho_use_drop'] # clean up variables from the previous call for vkey in list(self.fvars.keys()): if vkey.startswith('ortho_') and vkey.endswith('_rerr'): self.fvars.pop(vkey) self.ivars.pop('ortho_i', 0) self.ivars.pop('ortho_j', 0) BV_norm = torch.norm(mm_B(self.B, V)) BU = mm_B(self.B, U) VBU = mm(_utils.transpose(V), BU) i = j = 0 stats = '' for i in range(i_max): U = U - mm(V, VBU) drop = False tau_svqb = tau_drop for j in range(j_max): if use_drop: U = self._get_svqb(U, drop, tau_svqb) drop = True tau_svqb = tau_replace else: U = self._get_svqb(U, False, tau_replace) if torch.numel(U) == 0: # all initial U columns are B-collinear to V self.ivars['ortho_i'] = i self.ivars['ortho_j'] = j return U BU = mm_B(self.B, U) UBU = mm(_utils.transpose(U), BU) U_norm = torch.norm(U) BU_norm = torch.norm(BU) R = UBU - torch.eye( UBU.shape[-1], device=UBU.device, dtype=UBU.dtype) R_norm = torch.norm(R) # https://github.com/pytorch/pytorch/issues/33810 workaround: rerr = float(R_norm) * float(BU_norm * U_norm)**-1 vkey = 'ortho_UBUmI_rerr[{}, {}]'.format(i, j) self.fvars[vkey] = rerr if rerr < tau_ortho: break VBU = mm(_utils.transpose(V), BU) VBU_norm = torch.norm(VBU) U_norm = torch.norm(U) rerr = float(VBU_norm) * float(BV_norm * U_norm)**-1 vkey = 'ortho_VBU_rerr[{}]'.format(i) self.fvars[vkey] = rerr if rerr < tau_ortho: break if m < U.shape[-1] + V.shape[-1]: # TorchScript needs the class var to be assigned to a local to # do optional type refinement B = self.B assert B is not None raise ValueError( 'Overdetermined shape of U:' ' #B-cols(={}) >= #U-cols(={}) + #V-cols(={}) must hold'. format(B.shape[-1], U.shape[-1], V.shape[-1])) self.ivars['ortho_i'] = i self.ivars['ortho_j'] = j return U
def get_body_parameters_from_urdf(self, i, link): body_params = {} body_params["joint_id"] = i body_params["link_name"] = link.name if i == 0: rot_angles = torch.zeros(3, device=self._device) trans = torch.zeros(3, device=self._device) joint_name = "base_joint" joint_type = "fixed" joint_limits = None joint_damping = None joint_axis = torch.zeros((1, 3), device=self._device) else: link_name = link.name jid = self.find_joint_of_body(link_name) joint = self.robot.joints[jid] joint_name = joint.name # find joint that is the "child" of this body according to urdf rot_angles = torch.tensor(joint.origin.rotation, dtype=torch.float32, device=self._device) trans = torch.tensor(joint.origin.position, dtype=torch.float32, device=self._device) joint_type = joint.type joint_limits = None joint_damping = torch.zeros(1, device=self._device) joint_axis = torch.zeros((1, 3), device=self._device) if joint_type != "fixed": joint_limits = { "effort": joint.limit.effort, "lower": joint.limit.lower, "upper": joint.limit.upper, "velocity": joint.limit.velocity, } try: joint_damping = torch.tensor( [joint.dynamics.damping], dtype=torch.float32, device=self._device, ) except AttributeError: joint_damping = torch.zeros(1, device=self._device) joint_axis = torch.tensor(joint.axis, dtype=torch.float32, device=self._device).reshape(1, 3) body_params["rot_angles"] = rot_angles body_params["trans"] = trans body_params["joint_name"] = joint_name body_params["joint_type"] = joint_type body_params["joint_limits"] = joint_limits body_params["joint_damping"] = joint_damping body_params["joint_axis"] = joint_axis if link.inertial is not None: mass = torch.tensor([link.inertial.mass], dtype=torch.float32, device=self._device) com = (torch.tensor( link.inertial.origin.position, dtype=torch.float32, device=self._device, ).reshape((1, 3)).to(self._device)) inert_mat = torch.zeros((3, 3), device=self._device) inert_mat[0, 0] = link.inertial.inertia.ixx inert_mat[0, 1] = link.inertial.inertia.ixy inert_mat[0, 2] = link.inertial.inertia.ixz inert_mat[1, 0] = link.inertial.inertia.ixy inert_mat[1, 1] = link.inertial.inertia.iyy inert_mat[1, 2] = link.inertial.inertia.iyz inert_mat[2, 0] = link.inertial.inertia.ixz inert_mat[2, 1] = link.inertial.inertia.iyz inert_mat[2, 2] = link.inertial.inertia.izz inert_mat = inert_mat.unsqueeze(0) body_params["mass"] = mass body_params["com"] = com body_params["inertia_mat"] = inert_mat else: body_params["mass"] = torch.ones((1, ), device=self._device) body_params["com"] = torch.zeros((1, 3), device=self._device) body_params["inertia_mat"] = torch.eye( 3, 3, device=self._device).unsqueeze(0) print( "Warning: No dynamics information for link: {}, setting all inertial properties to 1." .format(link.name)) return body_params
def init_pose6d(self): return torch.eye(4).float().to(self.device)
def one_hot(indice, num_classes): I = torch.eye(num_classes).to(indice.device) T = I[indice] return T
def parse_shape(node, material_dict, shape_id, shape_group_dict=None): if node.attrib['type'] == 'obj' or node.attrib['type'] == 'serialized': to_world = torch.eye(4) serialized_shape_id = 0 mat_id = -1 light_intensity = None filename = '' max_smooth_angle = -1 for child in node: if 'name' in child.attrib: if child.attrib['name'] == 'filename': filename = child.attrib['value'] elif child.attrib['name'] == 'toWorld': to_world = parse_transform(child) elif child.attrib['name'] == 'shapeIndex': serialized_shape_id = int(child.attrib['value']) elif child.attrib['name'] == 'maxSmoothAngle': max_smooth_angle = float(child.attrib['value']) if child.tag == 'ref': mat_id = material_dict[child.attrib['id']] elif child.tag == 'emitter': for grandchild in child: if grandchild.attrib['name'] == 'radiance': light_intensity = parse_vector( grandchild.attrib['value']) if light_intensity.shape[0] == 1: light_intensity = torch.tensor(\ [light_intensity[0], light_intensity[0], light_intensity[0]]) if node.attrib['type'] == 'obj': _, mesh_list, _ = pyredner.load_obj(filename) # Convert to CPU for rebuild_topology vertices = mesh_list[0][1].vertices.cpu() indices = mesh_list[0][1].indices.cpu() uvs = mesh_list[0][1].uvs normals = mesh_list[0][1].normals uv_indices = mesh_list[0][1].uv_indices normal_indices = mesh_list[0][1].normal_indices if uvs is not None: uvs = uvs.cpu() if normals is not None: normals = normals.cpu() if uv_indices is not None: uv_indices = uv_indices.cpu() else: assert (node.attrib['type'] == 'serialized') mitsuba_tri_mesh = redner.load_serialized(filename, serialized_shape_id) vertices = torch.from_numpy(mitsuba_tri_mesh.vertices) indices = torch.from_numpy(mitsuba_tri_mesh.indices) uvs = torch.from_numpy(mitsuba_tri_mesh.uvs) normals = torch.from_numpy(mitsuba_tri_mesh.normals) if uvs.shape[0] == 0: uvs = None if normals.shape[0] == 0: normals = None uv_indices = None # Serialized doesn't use different indices for UV & normal # Transform the vertices and normals vertices = torch.cat((vertices, torch.ones(vertices.shape[0], 1)), dim=1) vertices = vertices @ torch.transpose(to_world, 0, 1) vertices = vertices / vertices[:, 3:4] vertices = vertices[:, 0:3].contiguous() if normals is not None: normals = normals @ (torch.inverse(torch.transpose(to_world, 0, 1))[:3, :3]) normals = normals.contiguous() assert (vertices is not None) assert (indices is not None) if max_smooth_angle >= 0: if normals is None: normals = torch.zeros_like(vertices) new_num_vertices = redner.rebuild_topology(\ redner.float_ptr(vertices.data_ptr()), redner.int_ptr(indices.data_ptr()), redner.float_ptr(uvs.data_ptr() if uvs is not None else 0), redner.float_ptr(normals.data_ptr() if normals is not None else 0), redner.int_ptr(uv_indices.data_ptr() if uv_indices is not None else 0), int(vertices.shape[0]), int(indices.shape[0]), max_smooth_angle) print('Rebuilt topology, original vertices size: {}, new vertices size: {}'.format(\ int(vertices.shape[0]), new_num_vertices)) vertices.resize_(new_num_vertices, 3) if uvs is not None: uvs.resize_(new_num_vertices, 2) if normals is not None: normals.resize_(new_num_vertices, 3) lgt = None if light_intensity is not None: lgt = pyredner.AreaLight(shape_id, light_intensity) if pyredner.get_use_gpu(): # Copy to GPU vertices = vertices.cuda(device=pyredner.get_device()) indices = indices.cuda(device=pyredner.get_device()) if uvs is not None: uvs = uvs.cuda(device=pyredner.get_device()) if normals is not None: normals = normals.cuda(device=pyredner.get_device()) if uv_indices is not None: uv_indices = uv_indices.cuda(device=pyredner.get_device()) if normal_indices is not None: normal_indices = normal_indices.cuda( device=pyredner.get_device()) return pyredner.Shape(vertices, indices, uvs=uvs, normals=normals, uv_indices=uv_indices, normal_indices=normal_indices, material_id=mat_id), lgt elif node.attrib['type'] == 'rectangle': indices = torch.tensor([[0, 2, 1], [1, 2, 3]], dtype=torch.int32) vertices = torch.tensor([[-1.0, -1.0, 0.0], [-1.0, 1.0, 0.0], [1.0, -1.0, 0.0], [1.0, 1.0, 0.0]]) uvs = None normals = None to_world = torch.eye(4) mat_id = -1 light_intensity = None for child in node: if 'name' in child.attrib: if child.attrib['name'] == 'toWorld': to_world = parse_transform(child) if child.tag == 'ref': mat_id = material_dict[child.attrib['id']] elif child.tag == 'emitter': for grandchild in child: if grandchild.attrib['name'] == 'radiance': light_intensity = parse_vector( grandchild.attrib['value']) if light_intensity.shape[0] == 1: light_intensity = torch.tensor(\ [light_intensity[0], light_intensity[0], light_intensity[0]]) # Transform the vertices # Transform the vertices and normals vertices = torch.cat((vertices, torch.ones(vertices.shape[0], 1)), dim=1) vertices = vertices @ torch.transpose(to_world, 0, 1) vertices = vertices / vertices[:, 3:4] vertices = vertices[:, 0:3].contiguous() if normals is not None: normals = normals @ (torch.inverse(torch.transpose(to_world, 0, 1))[:3, :3]) normals = normals.contiguous() assert (vertices is not None) assert (indices is not None) lgt = None if light_intensity is not None: lgt = pyredner.AreaLight(shape_id, light_intensity) if pyredner.get_use_gpu(): # Copy to GPU vertices = vertices.cuda(device=pyredner.get_device()) indices = indices.cuda(device=pyredner.get_device()) if uvs is not None: uvs = uvs.cuda(device=pyredner.get_device()) if normals is not None: normals = normals.cuda(device=pyredner.get_device()) return pyredner.Shape(vertices, indices, uvs=uvs, normals=normals, material_id=mat_id), lgt # Add instance support # TODO (simply transform & create a new shape now) elif node.attrib['type'] == 'instance': shape = None for child in node: if 'name' in child.attrib: if child.attrib['name'] == 'toWorld': to_world = parse_transform(child) if pyredner.get_use_gpu(): to_world = to_world.cuda() if child.tag == 'ref': shape = shape_group_dict[child.attrib['id']] # transform instance vertices = shape.vertices normals = shape.normals vector1 = torch.ones(vertices.shape[0], 1) vertices = torch.cat( (vertices, vector1.cuda() if pyredner.get_use_gpu() else vector1), dim=1) vertices = vertices @ torch.transpose(to_world, 0, 1) vertices = vertices / vertices[:, 3:4] vertices = vertices[:, 0:3].contiguous() if normals is not None: normals = normals @ (torch.inverse(torch.transpose(to_world, 0, 1))[:3, :3]) normals = normals.contiguous() # assert(vertices is not None) # assert(indices is not None) # lgt = None # if light_intensity is not None: # lgt = pyredner.AreaLight(shape_id, light_intensity) return pyredner.Shape(vertices, shape.indices, uvs=shape.uvs, normals=normals, material_ids=shape.material_id), None else: print('Shape type {} is not supported!'.format(node.attrib['type'])) assert (False)
def __init__(self, args): super(SVDHead, self).__init__() self.emb_dims = args.emb_dims self.reflect = nn.Parameter(torch.eye(3), requires_grad=False) self.reflect[2, 2] = -1
def solve_lqr_subproblem(self, x_init, C, c, F, f, cost, dynamics, x, u, verbose, no_op_forward=False): if self.slew_rate_penalty is None or isinstance(cost, Module): _lqr = LQRStep( n_state=self.n_state, n_ctrl=self.n_ctrl, T=self.T, verbose=verbose, u_lower=self.u_lower, u_upper=self.u_upper, u_zero_I=self.u_zero_I, true_cost=cost, true_dynamics=dynamics, delta_u=self.delta_u, linesearch_decay=self.linesearch_decay, max_linesearch_iter=self.max_linesearch_iter, delta_space=True, current_x=x, current_u=u, back_eps=self.back_eps, no_op_forward=no_op_forward, ) e = np.array([]) x, u = _lqr(x_init, C, c, F, f if f is not None else e) return x, u, _lqr else: nsc = self.n_state + self.n_ctrl _n_state = nsc _nsc = _n_state + self.n_ctrl n_batch = C.shape[1] _C = np.zeros((self.T, n_batch, _nsc, _nsc), dtype='single') half_gamI = np.expand_dims(np.expand_dims(self.slew_rate_penalty * np.eye( self.n_ctrl), 0), 0).repeat(self.T, 0).repeat(n_batch, 1) _C[:,:,:self.n_ctrl,:self.n_ctrl] = half_gamI _C[:,:,-self.n_ctrl:,:self.n_ctrl] = -half_gamI _C[:,:,:self.n_ctrl,-self.n_ctrl:] = -half_gamI _C[:,:,-self.n_ctrl:,-self.n_ctrl:] = half_gamI slew_C = _C.copy() _C = _C + torch.nn.ZeroPad2d((self.n_ctrl, 0, self.n_ctrl, 0))(C) _c = torch.cat(( torch.zeros(self.T, n_batch, self.n_ctrl).type_as(c),c), 2) _F0 = torch.cat(( torch.zeros(self.n_ctrl, self.n_state+self.n_ctrl), torch.eye(self.n_ctrl), ), 1).type_as(F).unsqueeze(0).unsqueeze(0).repeat( self.T-1, n_batch, 1, 1 ) _F1 = torch.cat(( torch.zeros( self.T-1, n_batch, self.n_state, self.n_ctrl ).type_as(F),F), 3) _F = torch.cat((_F0, _F1), 2) if f is not None: _f = torch.cat(( torch.zeros(self.T-1, n_batch, self.n_ctrl).type_as(f),f), 2) else: _f = Variable(torch.Tensor()) u_data = util.detach_maybe(u) if self.prev_ctrl is not None: prev_u = self.prev_ctrl if prev_u.ndimension() == 1: prev_u = prev_u.unsqueeze(0) if prev_u.ndimension() == 2: prev_u = prev_u.unsqueeze(0) prev_u = prev_u.data else: prev_u = torch.zeros(1, n_batch, self.n_ctrl).type_as(u) utm1s = torch.cat((prev_u, u_data[:-1])).clone() _x = torch.cat((utm1s, x), 2) _x_init = torch.cat((Variable(prev_u[0]), x_init), 1) if not isinstance(dynamics, LinDx): _dynamics = CtrlPassthroughDynamics(dynamics) else: _dynamics = None if isinstance(cost, QuadCost): _true_cost = QuadCost(_C, _c) else: _true_cost = SlewRateCost( cost, slew_C, self.n_state, self.n_ctrl ) _lqr = LQRStep( n_state=_n_state, n_ctrl=self.n_ctrl, T=self.T, u_lower=self.u_lower, u_upper=self.u_upper, u_zero_I=self.u_zero_I, true_cost=_true_cost, true_dynamics=_dynamics, delta_u=self.delta_u, linesearch_decay=self.linesearch_decay, max_linesearch_iter=self.max_linesearch_iter, delta_space=True, current_x=_x, current_u=u, back_eps=self.back_eps, no_op_forward=no_op_forward, ) x, u = _lqr(_x_init, _C, _c, _F, _f) x = x[:,:,self.n_ctrl:] return x, u, _lqr