def adpW(self,x): ''' calculate the pairwise_att of everypair of inputs output_size: (x.size(0),x.size(1)/2) ''' x = x.detach() x = self.adp_metric_embedding1(x) x = self.adp_metric_embedding1_bn(x) x = F.relu(x) x = self.adp_metric_embedding2(x) x = self.adp_metric_embedding2_bn(x) x = F.relu(x) x = self.adp_metric_embedding3(x) x = self.adp_metric_embedding3_bn(x) x = F.relu(x) pairwise_att = F.sigmoid(self.adp_metric_embedding4(x)) # x = self.adp_metric_embedding2_bn(x) diag_matrix1 = [] diag_matrix2 = [] for i in range(x.size(0)): diag_matrix1.append(torch.diag(pairwise_att[i, :x.size(1)/2])) for i in range(x.size(0)): diag_matrix2.append(torch.diag(pairwise_att[i, x.size(1)/2:])) pairwise_att1 = torch.stack(diag_matrix1) pairwise_att2 = torch.stack(diag_matrix1) return pairwise_att1, pairwise_att2
def _test_jacobian(self, input_dim, hidden_dim): jacobian = torch.zeros(input_dim, input_dim) iaf = InverseAutoregressiveFlow(input_dim, hidden_dim, sigmoid_bias=0.5) def nonzero(x): return torch.sign(torch.abs(x)) x = torch.randn(1, input_dim) iaf_x = iaf(x) analytic_ldt = iaf.log_abs_det_jacobian(x, iaf_x).data.sum() for j in range(input_dim): for k in range(input_dim): epsilon_vector = torch.zeros(1, input_dim) epsilon_vector[0, j] = self.epsilon iaf_x_eps = iaf(x + epsilon_vector) delta = (iaf_x_eps - iaf_x) / self.epsilon jacobian[j, k] = float(delta[0, k].data.sum()) permutation = iaf.arn.get_permutation() permuted_jacobian = jacobian.clone() for j in range(input_dim): for k in range(input_dim): permuted_jacobian[j, k] = jacobian[permutation[j], permutation[k]] numeric_ldt = torch.sum(torch.log(torch.diag(permuted_jacobian))) ldt_discrepancy = np.fabs(analytic_ldt - numeric_ldt) diag_sum = torch.sum(torch.diag(nonzero(permuted_jacobian))) lower_sum = torch.sum(torch.tril(nonzero(permuted_jacobian), diagonal=-1)) assert ldt_discrepancy < self.epsilon assert diag_sum == float(input_dim) assert lower_sum == float(0.0)
def _mmd2(K_XX, K_XY, K_YY, const_diagonal=False, biased=False): m = K_XX.size(0) # assume X, Y are same shape # Get the various sums of kernels that we'll use # Kts drop the diagonal, but we don't need to compute them explicitly if const_diagonal is not False: diag_X = diag_Y = const_diagonal sum_diag_X = sum_diag_Y = m * const_diagonal else: diag_X = torch.diag(K_XX) # (m,) diag_Y = torch.diag(K_YY) # (m,) sum_diag_X = torch.sum(diag_X) sum_diag_Y = torch.sum(diag_Y) Kt_XX_sums = K_XX.sum(dim=1) - diag_X # \tilde{K}_XX * e = K_XX * e - diag_X Kt_YY_sums = K_YY.sum(dim=1) - diag_Y # \tilde{K}_YY * e = K_YY * e - diag_Y K_XY_sums_0 = K_XY.sum(dim=0) # K_{XY}^T * e Kt_XX_sum = Kt_XX_sums.sum() # e^T * \tilde{K}_XX * e Kt_YY_sum = Kt_YY_sums.sum() # e^T * \tilde{K}_YY * e K_XY_sum = K_XY_sums_0.sum() # e^T * K_{XY} * e if biased: mmd2 = ((Kt_XX_sum + sum_diag_X) / (m * m) + (Kt_YY_sum + sum_diag_Y) / (m * m) - 2.0 * K_XY_sum / (m * m)) else: mmd2 = (Kt_XX_sum / (m * (m - 1)) + Kt_YY_sum / (m * (m - 1)) - 2.0 * K_XY_sum / (m * m)) return mmd2
def __init__(self, hidden_size, num_inputs, action_space): super(Policy, self).__init__() self.action_space = action_space num_outputs = action_space.shape[0] self.bn0 = nn.BatchNorm1d(num_inputs) self.bn0.weight.data.fill_(1) self.bn0.bias.data.fill_(0) self.linear1 = nn.Linear(num_inputs, hidden_size) self.bn1 = nn.BatchNorm1d(hidden_size) self.bn1.weight.data.fill_(1) self.bn1.bias.data.fill_(0) self.linear2 = nn.Linear(hidden_size, hidden_size) self.bn2 = nn.BatchNorm1d(hidden_size) self.bn2.weight.data.fill_(1) self.bn2.bias.data.fill_(0) self.V = nn.Linear(hidden_size, 1) self.V.weight.data.mul_(0.1) self.V.bias.data.mul_(0.1) self.mu = nn.Linear(hidden_size, num_outputs) self.mu.weight.data.mul_(0.1) self.mu.bias.data.mul_(0.1) self.L = nn.Linear(hidden_size, num_outputs ** 2) self.L.weight.data.mul_(0.1) self.L.bias.data.mul_(0.1) self.tril_mask = Variable(torch.tril(torch.ones( num_outputs, num_outputs), diagonal=-1).unsqueeze(0)) self.diag_mask = Variable(torch.diag(torch.diag( torch.ones(num_outputs, num_outputs))).unsqueeze(0))
def forward(self, pred, labels, targets): indexer = labels.data - 1 prep = pred[:, indexer, :] class_pred = torch.cat((torch.diag(prep[:, :, 0]).view(-1, 1), torch.diag(prep[:, :, 1]).view(-1, 1)), dim=1) loss = self.smooth_l1_loss(class_pred.view(-1), targets.view(-1)) * 2 return loss
def phi(A): """ Return lower triangle of A and halve the diagonal. """ B = A.tril() B = B - 0.5 * torch.diag(torch.diag(B)) return B
def _mmd2_and_variance(K_XX, K_XY, K_YY, const_diagonal=False, biased=False): m = K_XX.size(0) # assume X, Y are same shape # Get the various sums of kernels that we'll use # Kts drop the diagonal, but we don't need to compute them explicitly if const_diagonal is not False: diag_X = diag_Y = const_diagonal sum_diag_X = sum_diag_Y = m * const_diagonal sum_diag2_X = sum_diag2_Y = m * const_diagonal**2 else: diag_X = torch.diag(K_XX) # (m,) diag_Y = torch.diag(K_YY) # (m,) sum_diag_X = torch.sum(diag_X) sum_diag_Y = torch.sum(diag_Y) sum_diag2_X = diag_X.dot(diag_X) sum_diag2_Y = diag_Y.dot(diag_Y) Kt_XX_sums = K_XX.sum(dim=1) - diag_X # \tilde{K}_XX * e = K_XX * e - diag_X Kt_YY_sums = K_YY.sum(dim=1) - diag_Y # \tilde{K}_YY * e = K_YY * e - diag_Y K_XY_sums_0 = K_XY.sum(dim=0) # K_{XY}^T * e K_XY_sums_1 = K_XY.sum(dim=1) # K_{XY} * e Kt_XX_sum = Kt_XX_sums.sum() # e^T * \tilde{K}_XX * e Kt_YY_sum = Kt_YY_sums.sum() # e^T * \tilde{K}_YY * e K_XY_sum = K_XY_sums_0.sum() # e^T * K_{XY} * e Kt_XX_2_sum = (K_XX ** 2).sum() - sum_diag2_X # \| \tilde{K}_XX \|_F^2 Kt_YY_2_sum = (K_YY ** 2).sum() - sum_diag2_Y # \| \tilde{K}_YY \|_F^2 K_XY_2_sum = (K_XY ** 2).sum() # \| K_{XY} \|_F^2 if biased: mmd2 = ((Kt_XX_sum + sum_diag_X) / (m * m) + (Kt_YY_sum + sum_diag_Y) / (m * m) - 2.0 * K_XY_sum / (m * m)) else: mmd2 = (Kt_XX_sum / (m * (m - 1)) + Kt_YY_sum / (m * (m - 1)) - 2.0 * K_XY_sum / (m * m)) var_est = ( 2.0 / (m**2 * (m - 1.0)**2) * (2 * Kt_XX_sums.dot(Kt_XX_sums) - Kt_XX_2_sum + 2 * Kt_YY_sums.dot(Kt_YY_sums) - Kt_YY_2_sum) - (4.0*m - 6.0) / (m**3 * (m - 1.0)**3) * (Kt_XX_sum**2 + Kt_YY_sum**2) + 4.0*(m - 2.0) / (m**3 * (m - 1.0)**2) * (K_XY_sums_1.dot(K_XY_sums_1) + K_XY_sums_0.dot(K_XY_sums_0)) - 4.0*(m - 3.0) / (m**3 * (m - 1.0)**2) * (K_XY_2_sum) - (8 * m - 12) / (m**5 * (m - 1)) * K_XY_sum**2 + 8.0 / (m**3 * (m - 1.0)) * ( 1.0 / m * (Kt_XX_sum + Kt_YY_sum) * K_XY_sum - Kt_XX_sums.dot(K_XY_sums_1) - Kt_YY_sums.dot(K_XY_sums_0)) ) return mmd2, var_est
def fov(self, value): self._fov = value fov_factor = 1.0 / torch.tan(transform.radians(0.5 * self._fov)) o = torch.ones([1], dtype=torch.float32) diag = torch.cat([fov_factor, fov_factor, o], 0) self._cam_to_ndc = torch.diag(diag) self.ndc_to_cam = torch.inverse(self._cam_to_ndc)
def knn(Mxx, Mxy, Myy, k, sqrt): n0 = Mxx.size(0) n1 = Myy.size(0) label = torch.cat((torch.ones(n0),torch.zeros(n1))) M = torch.cat((torch.cat((Mxx,Mxy),1), torch.cat((Mxy.transpose(0,1),Myy), 1)), 0) if sqrt: M = M.abs().sqrt() INFINITY = float('inf') val, idx = (M+torch.diag(INFINITY*torch.ones(n0+n1))).topk(k, 0, False) count = torch.zeros(n0+n1) for i in range(0,k): count = count + label.index_select(0,idx[i]) pred = torch.ge(count, (float(k)/2)*torch.ones(n0+n1)).float() s = Score_knn() s.tp = (pred*label).sum() s.fp = (pred*(1-label)).sum() s.fn = ((1-pred)*label).sum() s.tn = ((1-pred)*(1-label)).sum() s.precision = s.tp/(s.tp+s.fp) s.recall = s.tp/(s.tp+s.fn) s.acc_t = s.tp/(s.tp+s.fn) s.acc_f = s.tn/(s.tn+s.fp) s.acc = torch.eq(label, pred).float().mean() s.k = k return s
def setUp(self, size=(2, 5), batch=3, dtype=torch.float64, device=None, seed=None, mu=None, cov=None, A=None, b=None): '''Test the correctness of batch implementation of mean(). This function will stack `[1 * mu, 2 * mu, ..., batch * mu]`. Then, it will see whether the batch output is accurate or not. Args: size: Tuple size of matrix A. batch: The batch size > 0. dtype: data type. device: In which device. seed: Seed for the random number generator. mu: To test a specific mean mu. cov: To test a specific covariance matrix. A: To test a specific A matrix. b: To test a specific bias b. ''' if seed is not None: torch.manual_seed(seed) if A is None: A = torch.rand(size, dtype=dtype, device=device) if b is None: b = torch.rand(size[0], dtype=dtype, device=device) if mu is None: mu = torch.rand(size[1], dtype=dtype, device=device) if cov is None: cov = rand.definite(size[1], dtype=dtype, device=device, positive=True, semi=False, norm=10**2) self.A = A self.b = b var = torch.diag(cov) self.batch_mean = torch.stack([(i + 1) * mu for i in range(batch)]) self.batch_cov = torch.stack([(i + 1) * cov for i in range(batch)]) self.batch_var = torch.stack([(i + 1) * var for i in range(batch)])
def __wct_core(self, cont_feat, styl_feat): cFSize = cont_feat.size() c_mean = torch.mean(cont_feat, 1) # c x (h x w) c_mean = c_mean.unsqueeze(1).expand_as(cont_feat) cont_feat = cont_feat - c_mean iden = torch.eye(cFSize[0]) # .double() if self.is_cuda: iden = iden.cuda() contentConv = torch.mm(cont_feat, cont_feat.t()).div(cFSize[1] - 1) + iden # del iden c_u, c_e, c_v = torch.svd(contentConv, some=False) # c_e2, c_v = torch.eig(contentConv, True) # c_e = c_e2[:,0] k_c = cFSize[0] for i in range(cFSize[0] - 1, -1, -1): if c_e[i] >= 0.00001: k_c = i + 1 break sFSize = styl_feat.size() s_mean = torch.mean(styl_feat, 1) styl_feat = styl_feat - s_mean.unsqueeze(1).expand_as(styl_feat) styleConv = torch.mm(styl_feat, styl_feat.t()).div(sFSize[1] - 1) s_u, s_e, s_v = torch.svd(styleConv, some=False) k_s = sFSize[0] for i in range(sFSize[0] - 1, -1, -1): if s_e[i] >= 0.00001: k_s = i + 1 break c_d = (c_e[0:k_c]).pow(-0.5) step1 = torch.mm(c_v[:, 0:k_c], torch.diag(c_d)) step2 = torch.mm(step1, (c_v[:, 0:k_c].t())) whiten_cF = torch.mm(step2, cont_feat) s_d = (s_e[0:k_s]).pow(0.5) targetFeature = torch.mm(torch.mm(torch.mm(s_v[:, 0:k_s], torch.diag(s_d)), (s_v[:, 0:k_s].t())), whiten_cF) targetFeature = targetFeature + s_mean.unsqueeze(1).expand_as(targetFeature) return targetFeature
def _greedy_decode(self, head_tag_representation: torch.Tensor, child_tag_representation: torch.Tensor, attended_arcs: torch.Tensor, mask: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: """ Decodes the head and head tag predictions by decoding the unlabeled arcs independently for each word and then again, predicting the head tags of these greedily chosen arcs indpendently. Note that this method of decoding is not guaranteed to produce trees (i.e. there maybe be multiple roots, or cycles when children are attached to their parents). Parameters ---------- head_tag_representation : ``torch.Tensor``, required. A tensor of shape (batch_size, sequence_length, tag_representation_dim), which will be used to generate predictions for the dependency tags for the given arcs. child_tag_representation : ``torch.Tensor``, required A tensor of shape (batch_size, sequence_length, tag_representation_dim), which will be used to generate predictions for the dependency tags for the given arcs. attended_arcs : ``torch.Tensor``, required. A tensor of shape (batch_size, sequence_length, sequence_length) used to generate a distribution over attachements of a given word to all other words. Returns ------- heads : ``torch.Tensor`` A tensor of shape (batch_size, sequence_length) representing the greedily decoded heads of each word. head_tags : ``torch.Tensor`` A tensor of shape (batch_size, sequence_length) representing the dependency tags of the greedily decoded heads of each word. """ # Mask the diagonal, because the head of a word can't be itself. attended_arcs = attended_arcs + torch.diag(attended_arcs.new(mask.size(1)).fill_(-numpy.inf)) # Mask padded tokens, because we only want to consider actual words as heads. if mask is not None: minus_mask = (1 - mask).byte().unsqueeze(2) attended_arcs.masked_fill_(minus_mask, -numpy.inf) # Compute the heads greedily. # shape (batch_size, sequence_length) _, heads = attended_arcs.max(dim=2) # Given the greedily predicted heads, decode their dependency tags. # shape (batch_size, sequence_length, num_head_tags) head_tag_logits = self._get_head_tags(head_tag_representation, child_tag_representation, heads) _, head_tags = head_tag_logits.max(dim=2) return heads, head_tags
def adpW(self,x): # x = F.normalize(x) x = self.adp_metric_embedding1(x) # x = self.adp_metric_embedding1_bn(x) x = F.prelu(x) x = self.adp_metric_embedding2(x) # x = self.adp_metric_embedding2_bn(x) diag_matrix = [] for i in range(x.size(0)): diag_matrix.append(torch.diag(x[i,:])) x = torch.stack(diag_matrix) W = torch.matmul(self.transform_matrix,torch.matmul(x,self.transform_matrix)) return W
def forward(self, input): laplacian = input.exp() + self.eps output = input.clone() for b in range(input.size(0)): lap = laplacian[b].masked_fill( torch.eye(input.size(1), device=input.device).ne(0), 0) lap = -lap + torch.diag(lap.sum(0)) # store roots on diagonal lap[0] = input[b].diag().exp() inv_laplacian = lap.inverse() factor = inv_laplacian.diag().unsqueeze(1)\ .expand_as(input[b]).transpose(0, 1) term1 = input[b].exp().mul(factor).clone() term2 = input[b].exp().mul(inv_laplacian.transpose(0, 1)).clone() term1[:, 0] = 0 term2[0] = 0 output[b] = term1 - term2 roots_output = input[b].diag().exp().mul( inv_laplacian.transpose(0, 1)[0]) output[b] = output[b] + torch.diag(roots_output) return output
def _mix_rbf_kernel(X, Y, sigma_list): assert(X.size(0) == Y.size(0)) m = X.size(0) Z = torch.cat((X, Y), 0) ZZT = torch.mm(Z, Z.t()) diag_ZZT = torch.diag(ZZT).unsqueeze(1) Z_norm_sqr = diag_ZZT.expand_as(ZZT) exponent = Z_norm_sqr - 2 * ZZT + Z_norm_sqr.t() K = 0.0 for sigma in sigma_list: gamma = 1.0 / (2 * sigma**2) K += torch.exp(-gamma * exponent) return K[:m, :m], K[:m, m:], K[m:, m:], len(sigma_list)
def test_constant(self): x = Variable(torch.randn(2, 2), requires_grad=True) trace = torch._C._tracer_enter((x,), 0) y = Variable(torch.diag(torch.Tensor([2, 2]))) z = x.matmul(y) torch._C._tracer_exit((z,)) function = torch._C._jit_createAutogradClosure(trace) z2 = function()(x) self.assertEqual(z, z2) y.data.fill_(1000) # make sure the data has been cloned x2 = Variable(torch.ones(2, 2) * 2, requires_grad=True) z3 = function()(x2) self.assertEqual(z3.data, torch.ones(2, 2) * 4)
def orthogonal(tensor, gain=1): """Fills the input Tensor or Variable with a (semi) orthogonal matrix, as described in "Exact solutions to the nonlinear dynamics of learning in deep linear neural networks" - Saxe, A. et al. (2013). The input tensor must have at least 2 dimensions, and for tensors with more than 2 dimensions the trailing dimensions are flattened. Args: tensor: an n-dimensional torch.Tensor or autograd.Variable, where n >= 2 gain: optional scaling factor Examples: >>> w = torch.Tensor(3, 5) >>> nn.init.orthogonal(w) """ if isinstance(tensor, Variable): orthogonal(tensor.data, gain=gain) return tensor if tensor.ndimension() < 2: raise ValueError("Only tensors with 2 or more dimensions are supported") rows = tensor.size(0) cols = tensor[0].numel() flattened = torch.Tensor(rows, cols).normal_(0, 1) # Compute the qr factorization q, r = torch.qr(flattened) # Make Q uniform according to https://arxiv.org/pdf/math-ph/0609050.pdf d = torch.diag(r, 0) ph = d.sign() q *= ph.expand_as(q) # Pad zeros to Q (if rows smaller than cols) if rows < cols: padding = torch.zeros(rows, cols - rows) if q.is_cuda: q = torch.cat([q, padding.cuda()], 1) else: q = torch.cat([q, padding], 1) tensor.view_as(q).copy_(q) tensor.mul_(gain) return tensor
def _greedy_decode(arc_scores: torch.Tensor, arc_tag_logits: torch.Tensor, mask: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: """ Decodes the head and head tag predictions by decoding the unlabeled arcs independently for each word and then again, predicting the head tags of these greedily chosen arcs independently. Parameters ---------- arc_scores : ``torch.Tensor``, required. A tensor of shape (batch_size, sequence_length, sequence_length) used to generate a distribution over attachments of a given word to all other words. arc_tag_logits : ``torch.Tensor``, required. A tensor of shape (batch_size, sequence_length, sequence_length, num_tags) used to generate a distribution over tags for each arc. mask : ``torch.Tensor``, required. A mask of shape (batch_size, sequence_length). Returns ------- arc_probs : ``torch.Tensor`` A tensor of shape (batch_size, sequence_length, sequence_length) representing the probability of an arc being present for this edge. arc_tag_probs : ``torch.Tensor`` A tensor of shape (batch_size, sequence_length, sequence_length, sequence_length) representing the distribution over edge tags for a given edge. """ # Mask the diagonal, because we don't self edges. inf_diagonal_mask = torch.diag(arc_scores.new(mask.size(1)).fill_(-numpy.inf)) arc_scores = arc_scores + inf_diagonal_mask # shape (batch_size, sequence_length, sequence_length, num_tags) arc_tag_logits = arc_tag_logits + inf_diagonal_mask.unsqueeze(0).unsqueeze(-1) # Mask padded tokens, because we only want to consider actual word -> word edges. minus_mask = (1 - mask).byte().unsqueeze(2) arc_scores.masked_fill_(minus_mask, -numpy.inf) arc_tag_logits.masked_fill_(minus_mask.unsqueeze(-1), -numpy.inf) # shape (batch_size, sequence_length, sequence_length) arc_probs = arc_scores.sigmoid() # shape (batch_size, sequence_length, sequence_length, num_tags) arc_tag_probs = torch.nn.functional.softmax(arc_tag_logits, dim=-1) return arc_probs, arc_tag_probs
def build_model(params, with_dis): """ Build all components of the model. """ # source embeddings src_dico, _src_emb = load_external_embeddings(params, source=True) params.src_dico = src_dico src_emb = nn.Embedding(len(src_dico), params.emb_dim, sparse=True) src_emb.weight.data.copy_(_src_emb) # target embeddings if params.tgt_lang: tgt_dico, _tgt_emb = load_external_embeddings(params, source=False) params.tgt_dico = tgt_dico tgt_emb = nn.Embedding(len(tgt_dico), params.emb_dim, sparse=True) tgt_emb.weight.data.copy_(_tgt_emb) else: tgt_emb = None # mapping mapping = nn.Linear(params.emb_dim, params.emb_dim, bias=False) if getattr(params, 'map_id_init', True): mapping.weight.data.copy_(torch.diag(torch.ones(params.emb_dim))) # discriminator discriminator = Discriminator(params) if with_dis else None # cuda if params.cuda: src_emb.cuda() if params.tgt_lang: tgt_emb.cuda() mapping.cuda() if with_dis: discriminator.cuda() # normalize embeddings normalize_embeddings(src_emb.weight.data, params.normalize_embeddings) if params.tgt_lang: normalize_embeddings(tgt_emb.weight.data, params.normalize_embeddings) return src_emb, tgt_emb, mapping, discriminator
def th_corrcoef(x): """ mimics np.corrcoef """ # calculate covariance matrix of rows mean_x = th.mean(x, 1) xm = x.sub(mean_x.expand_as(x)) c = xm.mm(xm.t()) c = c / (x.size(1) - 1) # normalize covariance matrix d = th.diag(c) stddev = th.pow(d, 0.5) c = c.div(stddev.expand_as(c)) c = c.div(stddev.expand_as(c).t()) # clamp between -1 and 1 c = th.clamp(c, -1.0, 1.0) return c
def orthogonal_(tensor, gain=1): r"""Fills the input `Tensor` with a (semi) orthogonal matrix, as described in "Exact solutions to the nonlinear dynamics of learning in deep linear neural networks" - Saxe, A. et al. (2013). The input tensor must have at least 2 dimensions, and for tensors with more than 2 dimensions the trailing dimensions are flattened. Args: tensor: an n-dimensional `torch.Tensor`, where :math:`n \geq 2` gain: optional scaling factor Examples: >>> w = torch.empty(3, 5) >>> nn.init.orthogonal_(w) """ if tensor.ndimension() < 2: raise ValueError("Only tensors with 2 or more dimensions are supported") rows = tensor.size(0) cols = tensor[0].numel() flattened = tensor.new(rows, cols).normal_(0, 1) if rows < cols: flattened.t_() # Compute the qr factorization q, r = torch.qr(flattened) # Make Q uniform according to https://arxiv.org/pdf/math-ph/0609050.pdf d = torch.diag(r, 0) ph = d.sign() q *= ph if rows < cols: q.t_() with torch.no_grad(): tensor.view_as(q).copy_(q) tensor.mul_(gain) return tensor
def __init__(self, position, look_at, up, fov, clip_near, resolution, cam_to_ndc = None, fisheye = False): assert(position.dtype == torch.float32) assert(len(position.shape) == 1 and position.shape[0] == 3) assert(look_at.dtype == torch.float32) assert(len(look_at.shape) == 1 and look_at.shape[0] == 3) assert(up.dtype == torch.float32) assert(len(up.shape) == 1 and up.shape[0] == 3) if fov is not None: assert(fov.dtype == torch.float32) assert(len(fov.shape) == 1 and fov.shape[0] == 1) assert(isinstance(clip_near, float)) self.position = position self.look_at = look_at self.up = up self._fov = fov # self.cam_to_world = transform.gen_look_at_matrix(position, look_at, up) # self.world_to_cam = torch.inverse(self.cam_to_world).contiguous() if cam_to_ndc is None: fov_factor = 1.0 / torch.tan(transform.radians(0.5 * fov)) o = torch.ones([1], dtype=torch.float32) diag = torch.cat([fov_factor, fov_factor, o], 0) self._cam_to_ndc = torch.diag(diag) else: self._cam_to_ndc = cam_to_ndc self.ndc_to_cam = torch.inverse(self.cam_to_ndc) self.clip_near = clip_near self.resolution = resolution self.fisheye = fisheye
def get_probs(self, x, y): output = self.model(self.normalize(x.cuda())).cpu() probs = torch.index_select(F.softmax(output, dim=-1).data, 1, y) return torch.diag(probs)
def vector_to_diag(not_informed_fraction, beta): not_informed_fraction_diag = torch.zeros_like(beta) for s in range(beta.shape[0]): not_informed_fraction_diag[s] = torch.diag( not_informed_fraction[s].squeeze()) return not_informed_fraction_diag
def forward(self, state): mu = self.model(state) return MultivariateNormal(mu, torch.diag(self.log_std.exp()))
def quadratic_gaussian(x, mu, S): matrix = torch.mm(torch.mm(x - mu, S), (x - mu).T) matrix *= 0.5 return torch.diag(matrix)
def run_seis_information_new(T, G, S, I, migration_I, migration_E, nu, mu, d, beta, N, alpha_fast, alpha_slow, E, beta_information, nu_max): ''' Runs the linearized SEIS model, returning the total number of infected agents summed over all time steps. ''' #read in for first period of F, informed #nu_sq = np.loadtxt('ann2018_clearanceProb.csv.csv', delimiter=',', skiprows=1) #nu_sq[np.isnan(nu_sq)] = 0 #nu_sq = nu_sq.mean(axis = 0) #nu_sq = torch.from_numpy(nu_sq) #duplicate these variables along an additional axis to match the batch size beta = beta.expand_as(G) informed = nu.view(len(nu), 1) informed = informed.expand(beta.shape[0], *informed.shape) nu = torch.diag(1 - nu).expand_as(beta) num_samples = G.shape[0] #keep track of infected, latent, and informed at each time step all_I = torch.zeros(T, num_samples, beta.shape[1], 1).double() all_E = torch.zeros(T, num_samples, E.shape[1], E.shape[2]).double() all_F = torch.zeros_like(all_I).double() all_I[0] = I[0] all_E[0] = E[0] #all_I[0] = I[30] #all_E[0] = E[30] all_F[0] = informed #run the main loop for the linearized disease dynamics for t in range(1, T): #update nu with new information spread not_informed_fraction = 1 - informed not_informed_fraction_diag = vector_to_diag(not_informed_fraction, beta) #constant scaling the beta for information spread informed = not_informed_fraction_diag @ beta_information @ informed + informed #print('here is info beta mat') #print(beta_information) #print('here is informed') #print(informed) #debug sze nu = nu_max * informed nu = vector_to_diag(1 - nu, beta) #infections new_infections = S[t - 1] @ mu @ beta @ N[t - 1] @ I new_infections_active = alpha_fast @ new_infections new_infections_latent = new_infections - new_infections_active E = mu @ E activations = alpha_slow @ E E = E - activations E += new_infections_latent E = G @ E + migration_E[ t] #CHANGING TO USING THE LAST MIGRATION PERIOD #E = G @ E + migration_E[30] old_infections = nu @ d @ I I = new_infections_active + old_infections + activations I = G @ I + migration_I[ t] #CHANGING TO USING THE LAST MIGRATION PERIOD #I = G @ I + migration_I[30] #return E, I, F by time and age group #mean across samples all_I[t] = I all_E[t] = E all_F[t] = informed #print(all_I) return all_I, all_E, all_F
def decoder(self, z): mean = self.decoder_fc(z) cov_mat = torch.diag(self.std**2) dist = MultivariateNormal(mean, cov_mat) return dist
def loss_HardNet(anchor, positive, anchor_swap = False, anchor_ave = False,\ margin = 2.0, batch_reduce = 'min', loss_type = "triplet_margin"): """HardNet margin loss - calculates loss based on distance matrix based on positive distance and closest negative distance. """ assert anchor.size() == positive.size( ), "Input sizes between positive and negative must be equal." assert anchor.dim() == 2, "Inputd must be a 2D matrix." eps = 1e-8 dist_matrix = distance_matrix_vector(anchor, positive) + eps #if args.cuda: eye = torch.autograd.Variable(torch.eye(dist_matrix.size(1))).cuda() #else: #eye = torch.autograd.Variable(torch.eye(dist_matrix.size(1))) # steps to filter out same patches that occur in distance matrix as negatives pos1 = torch.diag(dist_matrix) dist_without_min_on_diag = dist_matrix + eye * 10 mask = (dist_without_min_on_diag.ge(0.008).float() - 1.0) * (-1) mask = mask.type_as(dist_without_min_on_diag) * 10 dist_without_min_on_diag = dist_without_min_on_diag + mask if batch_reduce == 'min': idx_neg = torch.min(dist_without_min_on_diag, 1)[1].tolist() neg = positive[idx_neg[0]] for i in range(1, len(idx_neg)): newneg = positive[idx_neg[i]] neg = torch.cat((neg, newneg), 0) neg = neg.view(-1, 128) #min_neg = torch.min(dist_without_min_on_diag,1)[0] if anchor_swap: min_neg2 = torch.min(dist_without_min_on_diag, 0)[0] min_neg = torch.min(min_neg, min_neg2) if False: dist_matrix_a = distance_matrix_vector(anchor, anchor) + eps dist_matrix_p = distance_matrix_vector(positive, positive) + eps dist_without_min_on_diag_a = dist_matrix_a + eye * 10 dist_without_min_on_diag_p = dist_matrix_p + eye * 10 min_neg_a = torch.min(dist_without_min_on_diag_a, 1)[0] min_neg_p = torch.t(torch.min(dist_without_min_on_diag_p, 0)[0]) min_neg_3 = torch.min(min_neg_p, min_neg_a) min_neg = torch.min(min_neg, min_neg_3) print(min_neg_a) print(min_neg_p) print(min_neg_3) print(min_neg) #min_neg = min_neg pos = pos1 elif batch_reduce == 'average': pos = pos1.repeat(anchor.size(0)).view(-1, 1).squeeze(0) min_neg = dist_without_min_on_diag.view(-1, 1) if anchor_swap: min_neg2 = torch.t(dist_without_min_on_diag).contiguous().view( -1, 1) min_neg = torch.min(min_neg, min_neg2) min_neg = min_neg.squeeze(0) elif batch_reduce == 'random': idxs = torch.autograd.Variable( torch.randperm(anchor.size()[0]).long()).cuda() min_neg = dist_without_min_on_diag.gather(1, idxs.view(-1, 1)) if anchor_swap: min_neg2 = torch.t(dist_without_min_on_diag).gather( 1, idxs.view(-1, 1)) min_neg = torch.min(min_neg, min_neg2) min_neg = torch.t(min_neg).squeeze(0) pos = pos1 else: print('Unknown batch reduce mode. Try min, average or random') sys.exit(1) if loss_type == "triplet_margin": loss = F.triplet_margin_loss(anchor, positive, neg, margin, True) #loss = torch.clamp(margin + pos - min_neg, min=0.0) elif loss_type == 'softmax': exp_pos = torch.exp(2.0 - pos) exp_den = exp_pos + torch.exp(2.0 - min_neg) + eps loss = -torch.log(exp_pos / exp_den) elif loss_type == 'contrastive': loss = torch.clamp(margin - min_neg, min=0.0) + pos else: print('Unknown loss type. Try triplet_margin, softmax or contrastive') sys.exit(1) loss = torch.mean(loss) return loss
def gen_adj(A): D = torch.pow(A.sum(1).float(), -0.5) D = torch.diag(D) adj = torch.matmul(torch.matmul(A, D).t(), D) return adj
e = A[r, c] - U[r, :] @ V[c, :].t() U[r, :] = U[r, :] + lr * e * V[c, :] V[c, :] = V[c, :] + lr * e * U[r, :] return U, V if __name__ == '__main__': test = torch.tensor([[0.3374, 0.6005, 0.1735], [3.3359, 0.0492, 1.8374], [2.9407, 0.5301, 2.2620]]) U, V = sgd_factorise(test, 2) loss = torch.nn.functional.mse_loss(U @ V.t(), test, reduction='sum') print(f"Approximation {[email protected]()}") print(f'Loss is {loss}')\ U, S, V = truncatedSVD(test) reconstruction = U @ torch.diag(S) @ V.t() loss = torch.nn.functional.mse_loss(reconstruction, test, reduction='sum') print(f"Approximation \n {reconstruction}") print(f'Loss is {loss}') test_2 = torch.tensor([[0.3374, 0.6005, 0.1735], [0, 0.0492, 1.8374], [2.9407, 0, 2.2620]]) mask = torch.tensor([[1, 1, 1], [0, 1, 1], [1, 0, 1]]) U, V = sgd_factorise_masked(test_2, mask, 2) loss = torch.nn.functional.mse_loss(U @ V.t(), test, reduction='sum') print(f"Approximation \n {U @ V.t()}") print(f'Loss is {loss}')
def grassmanGeodesic(X,Y,t): svd_term = [email protected](X.t()@Y)-X U,s,V = to.svd(svd_term) theta = to.atan(s).float() qr_term = X@[email protected](to.cos(theta*t))[email protected](to.sin(theta*t)) return qr_term
def main(cfg): global BN_MOMENTUM_INIT global BN_MOMENTUM_MAX global BN_DECAY_STEP global BN_DECAY_RATE global BASE_LEARNING_RATE global LR_DECAY_STEPS global LR_DECAY_RATES BN_MOMENTUM_INIT = 0.5 BN_MOMENTUM_MAX = 0.001 BN_DECAY_STEP = cfg.opt.bn_decay_step BN_DECAY_RATE = cfg.opt.bn_decay_rate BASE_LEARNING_RATE = cfg.opt.learning_rate LR_DECAY_STEPS = [int(x) for x in cfg.opt.lr_decay_steps.split(',')] LR_DECAY_RATES = [float(x) for x in cfg.opt.lr_decay_rates.split(',')] train_dataset = ScanNetXYZProbMultiDataset(cfg, training=True, augment=cfg.augment) val_dataset = ScanNetXYZProbMultiDataset(cfg, training=False, augment=False) train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=cfg.batch_size, shuffle=True, collate_fn=collate_fn, num_workers=cfg.num_workers, drop_last=True) val_dataloader = torch.utils.data.DataLoader(val_dataset, collate_fn=collate_fn, shuffle=True, batch_size=1, num_workers=cfg.num_workers) logger.info('Start training...') nclasses = 9 # each class predict xyz and scale independently model = MinkUNet34C(6 if cfg.use_xyz else 3, 6 * nclasses + nclasses + 1) optimizer = torch.optim.Adam( model.parameters(), lr=cfg.opt.learning_rate, weight_decay=cfg.weight_decay ) bn_lbmd = lambda it: max(BN_MOMENTUM_INIT * BN_DECAY_RATE**(int(it / BN_DECAY_STEP)), BN_MOMENTUM_MAX) bnm_scheduler = BNMomentumScheduler(model, bn_lambda=bn_lbmd, last_epoch=cfg.start_epoch-1) hv = HoughVoting(cfg.scannet_res) obj_criterion = torch.nn.CrossEntropyLoss() model = model.cuda() xyz_weights = torch.tensor([float(x) for x in cfg.xyz_component_weights.split(',')]).cuda() meter = AverageMeter() losses = {} for epoch in range(cfg.start_epoch, cfg.max_epoch + 1): # Training adjust_learning_rate(optimizer, epoch) bnm_scheduler.step() # decay BN momentum model.train() meter.reset() with tqdm(enumerate(train_dataloader)) as t: for i, data in t: optimizer.zero_grad() _, scan_points, scan_feats, scan_xyz_labels, scan_scale_labels, scan_class_labels = data feats = scan_feats.reshape(-1, 6 if cfg.use_xyz else 3) # recenter to [-1, 1] ? feats[:, -3:] = feats[:, -3:] * 2. - 1. scan_input = ME.SparseTensor(feats, scan_points, device='cuda') scan_output = model(scan_input) class_label_idx = scan_class_labels.cuda().unsqueeze(-1).unsqueeze(-1).expand(-1, -1, 3) class_label_idx[class_label_idx < 0] = 0 # since we have mask to filter out, just set to zero here class_label_idx[class_label_idx == nclasses] = 0 scan_output_xyz = torch.gather(scan_output.F[:, :3 * nclasses].reshape(-1, nclasses, 3), 1, class_label_idx)[:, 0] scan_output_scale = torch.gather(scan_output.F[:, 3 * nclasses:6 * nclasses].reshape(-1, nclasses, 3), 1, class_label_idx)[:, 0] scan_output_class = scan_output.F[:, 6 * nclasses:] mask = (scan_class_labels < nclasses) & (0 <= scan_class_labels) loss_xyz = torch.zeros(()).cuda() loss_scale = torch.zeros(()).cuda() loss_class = torch.zeros(()).cuda() if torch.any(mask): if cfg.log_scale: scan_scale_target = torch.log(scan_scale_labels[mask].cuda()) else: scan_scale_target = scan_scale_labels[mask].cuda() loss_scale = torch.mean((scan_output_scale[mask] - scan_scale_target) ** 2 * xyz_weights) loss_xyz = torch.mean((scan_output_xyz[mask] - scan_xyz_labels[mask].cuda()) ** 2 * xyz_weights) # only optimize xyz when there are objects loss_class = obj_criterion(scan_output_class, scan_class_labels.cuda()) loss_xyz *= cfg.xyz_factor loss_scale *= cfg.scale_factor losses['loss_xyz'] = loss_xyz losses['loss_scale'] = loss_scale losses['loss_class'] = loss_class loss = torch.sum(torch.stack(list(losses.values()))) loss.backward() # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1) meter.update(loss.item()) t.set_postfix(loss=meter.avg, **dict([(k, v.item()) for (k, v) in losses.items()])) optimizer.step() if epoch % 10 == 0: torch.save(model.state_dict(), 'epoch{}.pth'.format(epoch)) if epoch % 10 == 0: # validation model.eval() meter.reset() logger.info('epoch {} validation'.format(epoch)) pred_map_cls = {} gt_map_cls = {} cnt = 0 for scan_ids, scan_points, scan_feats, scan_xyz_labels, scan_scale_labels, scan_class_labels in tqdm(val_dataloader): cnt += 1 id_scan = scan_ids[0] feats = scan_feats.reshape(-1, 6 if cfg.use_xyz else 3) # recenter to [-1, 1]? feats[:, -3:] = feats[:, -3:] * 2. - 1. scan_input = ME.SparseTensor(feats, scan_points, device='cuda') with torch.no_grad(): scan_output = model(scan_input) scan_output_xyz = scan_output.F[:, :3 * nclasses] scan_output_scale = scan_output.F[:, 3 * nclasses:6 * nclasses] scan_output_class = scan_output.F[:, 6 * nclasses:] class_label_idx = scan_output_class.argmax(-1).unsqueeze(-1).unsqueeze(-1).expand(-1, -1, 3) class_label_idx[class_label_idx == nclasses] = 0 scan_output_xyz = torch.gather(scan_output_xyz.reshape(-1, nclasses, 3), 1, class_label_idx)[:, 0] scan_output_scale = torch.gather(scan_output_scale.reshape(-1, nclasses, 3), 1, class_label_idx)[:, 0] mask = (scan_class_labels < nclasses) & (0 <= scan_class_labels) loss_xyz = torch.zeros(()).cuda() loss_scale = torch.zeros(()).cuda() loss_class = torch.zeros(()).cuda() if cfg.log_scale: scan_scale_target = torch.log(scan_scale_labels[mask].cuda()) else: scan_scale_target = scan_scale_labels[mask].cuda() loss_scale = torch.mean((scan_output_scale[mask] - scan_scale_target) ** 2 * xyz_weights) loss_xyz = torch.mean((scan_output_xyz[mask] - scan_xyz_labels[mask].cuda()) ** 2 * xyz_weights) # only optimize xyz when there are objects loss_class = obj_criterion(scan_output_class, scan_class_labels.cuda()) loss_xyz *= cfg.xyz_factor loss_scale *= cfg.scale_factor losses['loss_xyz'] = loss_xyz losses['loss_scale'] = loss_scale losses['loss_class'] = loss_class curr_points = scan_points[:, 1:] xyz_pred = scan_output_xyz if cfg.log_scale: scale_pred = torch.exp(scan_output_scale) else: scale_pred = scan_output_scale class_pred = torch.argmax(scan_output_class[..., :-1], dim=-1) prob_pred = torch.max(torch.softmax(scan_output_class, dim=-1)[..., :-1], dim=-1)[0] with torch.no_grad(): grid_obj, grid_rot, grid_scale = hv(curr_points.to('cuda') * cfg.scannet_res, xyz_pred.contiguous(), scale_pred.contiguous(), prob_pred.contiguous()) map_scene = [] boxes = [] scores = [] probs = [] classes = [] scan_points = curr_points.to('cuda') * cfg.scannet_res corners = torch.stack([torch.min(scan_points, 0)[0], torch.max(scan_points, 0)[0]]) l, h, w = 2, 2, 2 bbox_raw = torch.from_numpy(np.array([[l/2,l/2,-l/2,-l/2,l/2,l/2,-l/2,-l/2], [h/2,h/2,h/2,h/2,-h/2,-h/2,-h/2,-h/2], [w/2,-w/2,-w/2,w/2,w/2,-w/2,-w/2,w/2]]).T).float() while True: cand = torch.stack(unravel_index(torch.argmax(grid_obj), grid_obj.shape)) cand_world = torch.tensor([corners[0, 0] + cfg.scannet_res * cand[0], corners[0, 1] + cfg.scannet_res * cand[1], corners[0, 2] + cfg.scannet_res * cand[2]]).cuda() if grid_obj[cand[0], cand[1], cand[2]].item() < thresh_high: break grid_obj[max(cand[0]-elimination,0):cand[0]+elimination+1, max(cand[1]-elimination,0):cand[1]+elimination+1, max(cand[2]-elimination,0):cand[2]+elimination+1] = 0 rot_vec = grid_rot[cand[0], cand[1], cand[2]] rot = torch.atan2(rot_vec[1], rot_vec[0]) rot_mat_full = torch.tensor([[torch.cos(rot), 0, -torch.sin(rot)], [0, 1, 0], [torch.sin(rot), 0, torch.cos(rot)]]).cuda() scale_full = grid_scale[cand[0], cand[1], cand[2]] # fast filtering bbox = (rot_mat_full @ torch.diag(scale_full) @ bbox_raw.cuda().T).T bounding_vol = (torch.stack([torch.min(bbox, 0)[0], torch.max(bbox, 0)[0]]) / cfg.scannet_res).int() cand_coords = torch.stack(torch.meshgrid(torch.arange(bounding_vol[0, 0], bounding_vol[1, 0] + 1), torch.arange(bounding_vol[0, 1], bounding_vol[1, 1] + 1), torch.arange(bounding_vol[0, 2], bounding_vol[1, 2] + 1)), -1).reshape(-1, 3).cuda() cand_coords = cand_coords + cand cand_coords = torch.max(torch.min(cand_coords, torch.tensor(grid_obj.shape).cuda() - 1), torch.tensor([0, 0, 0]).cuda()) coords_inv = (((cand_coords - cand) * cfg.scannet_res) @ rot_mat_full) / scale_full bbox_mask = (-1 < coords_inv[:, 0]) & (coords_inv[:, 0] < 1) \ & (-1 < coords_inv[:, 1]) & (coords_inv[:, 1] < 1) \ & (-1 < coords_inv[:, 2]) & (coords_inv[:, 2] < 1) bbox_coords = cand_coords[bbox_mask] coords_inv_world = ((scan_points - cand_world) @ rot_mat_full) / scale_full bbox_mask_world = (-1 < coords_inv_world[:, 0]) & (coords_inv_world[:, 0] < 1) \ & (-1 < coords_inv_world[:, 1]) & (coords_inv_world[:, 1] < 1) \ & (-1 < coords_inv_world[:, 2]) & (coords_inv_world[:, 2] < 1) # back project elimination: current off # prob_delta = torch.zeros_like(prob_pred) # prob_delta[bbox_mask_world] = prob_pred[bbox_mask_world] # if not torch.all(prob_delta == 0): # grid_obj_delta, _, _ = hv(scan_points.cuda(), xyz_pred.contiguous(), scale_pred.contiguous(), prob_delta.contiguous()) # grid_obj -= grid_obj_delta grid_obj[bbox_coords[:, 0], bbox_coords[:, 1], bbox_coords[:, 2]] = 0 mask = prob_pred[bbox_mask_world] > 0.3 if torch.sum(mask) < valid_ratio * torch.sum(bbox_mask_world) or torch.sum(bbox_mask_world) < thresh_low: continue gt_coords = coords_inv_world[bbox_mask_world][mask] error = torch.mean(torch.norm(xyz_pred[bbox_mask_world][mask] - gt_coords, dim=-1) * prob_pred[bbox_mask_world][mask]).item() if error > 0.3: continue elems, counts = torch.unique(class_pred[bbox_mask_world][mask], return_counts=True) best_class_idx = elems[torch.argmax(counts)].item() best_class = idx2name[best_class_idx] probmax = torch.max(prob_pred[bbox_mask_world]) bbox = (rot_mat_full @ torch.diag(scale_full) @ bbox_raw.cuda().T).T + cand_world boxes.append(bbox.cpu().numpy()) scores.append(probmax.item()) probs.append(probmax.item()) classes.append(best_class_idx) boxes = np.array(boxes) scores = np.array(scores) probs = np.array(probs) classes = np.array(classes) if len(classes) > 0: for i in range(nclasses): if (classes == i).sum() > 0: boxes_cls = boxes[classes == i] scores_cls = scores[classes == i] probs_cls = probs[classes == i] pick = nms(boxes_cls, scores_cls, 0.3) for j in pick: map_scene.append((idx2name[i], boxes_cls[j], probs_cls[j])) pred_map_cls[id_scan] = map_scene # read ground truth lines = open(os.path.join(cfg.data.gt_path, '{}.txt'.format(id_scan))).read().splitlines() map_scene = [] for line in lines: tx, ty, tz, ry, sx, sy, sz = [float(v) for v in line.split(' ')[:7]] category = line.split(' ')[-1] bbox = (np.array([[np.cos(ry), 0, -np.sin(ry)], [0, 1, 0], [np.sin(ry), 0, np.cos(ry)]]) @ np.diag([sx, sy, sz]) @ bbox_raw.numpy().T).T + np.array([tx, ty, tz]) bbox_mat = np.eye(4) bbox_mat[:3, :3] = np.array([[np.cos(ry), 0, -np.sin(ry)], [0, 1, 0], [np.sin(ry), 0, np.cos(ry)]]) @ np.diag([sx, sy, sz]) bbox_mat[:3, 3] = np.array([tx, ty, tz]) map_scene.append((category, bbox)) gt_map_cls[id_scan] = map_scene loss = torch.sum(torch.stack(list(losses.values()))) meter.update(loss.item()) losses_numeral = dict([(k, v.item()) for (k, v) in losses.items()]) logger.info(', '.join([k + ': {' + k + '}' for k in losses_numeral.keys()]).format(**losses_numeral)) for thresh in [0.25, 0.5]: print(thresh) ret_dict = compute_map(pred_map_cls, gt_map_cls, thresh) if cfg.category != 'all': logger.info('{} Recall: {}'.format(cfg.category, ret_dict['{} Recall'.format(cfg.category)])) logger.info('{} Average Precision: {}'.format(cfg.category, ret_dict['{} Average Precision'.format(cfg.category)])) else: for k in range(nclasses): logger.info('{} Recall: {}'.format(idx2name[k], ret_dict['{} Recall'.format(idx2name[k])])) logger.info('{} Average Precision: {}'.format(idx2name[k], ret_dict['{} Average Precision'.format(idx2name[k])])) logger.info('mean Average Precision: {}'.format(ret_dict['mAP']))
def _init_G(self, factor, module): """Initialize memory for factor G and its eigendecomp""" self.m_G[module] = torch.diag(factor.new(factor.shape[0]).fill_(1)) self.m_dG[module] = factor.new_zeros(factor.shape[0]) self.m_QG[module] = factor.new_zeros(factor.shape)
def add_value_to_diagonal(X, value): return X.add_(torch.diag(X.new(X.shape[0]).fill_(value)))
def _setweights(self): w_hh = getattr(self, 'weight_hh_l0') w_hr = diag(w_hh[0, :]) w_hz = diag(w_hh[1, :]) w_hn = diag(w_hh[2, :]) setattr(self.module, 'weight_hh_l0', cat([w_hr, w_hz, w_hn], dim=1))
def forward(self, text_inputs, mask_input, len_seq, len_sents, tid, len_para=None, mode=""): # init hidden layers (teacher forcing) last_hid = self.init_hidden_layers(batch_size=self.batch_size, num_layers=self.num_layers, rnn_cell_size=self.rnn_cell_size) last_cell = self.init_hidden_layers(batch_size=self.batch_size, num_layers=self.num_layers, rnn_cell_size=self.rnn_cell_size) ## Stage 1: Embedding representation #flat_sent_input = text_inputs.view(-1, self.max_len_sent) # #print(text_inputs.shape) x_input = self.x_embed(text_inputs) # 4dim, (batch_size, num_sents, len_sent, embed_size) ## Stage 2: Encoding context via LSTM from input sent hid_encoded = [] # list of LSTM states from sentences, denoted "H" in the paper hid_mask = [] # list for actual length of hidden layer mask = mask_input.view(text_inputs.shape) for ind_sent in range(self.max_num_sents): # embedding representation sent_text = text_inputs[:, ind_sent, :].contiguous() sent_x_inputs = x_input[:, ind_sent, :].contiguous() # e.g., 1st sentence of all docs in batch; (batch_size, len_sent, embed_size) cur_sents_mask = mask_input[:, ind_sent, :] len_seq_sorted, ind_len_sorted = torch.sort(len_seq, descending=True) # ind_len_sorted: (batch_size, num_sents) # x_input_sorted = self.sort_by_2d(sent_x_inputs, ind_len_sorted) sent_x_input_sorted = sent_x_inputs[ind_len_sorted] ## encoding context via LSTM from input sentneces # sorting last_hid and last_cell to feed to next training (not needed in nightly version) last_hid = self.sort_hid(last_hid, ind_len_sorted) # sort for RNN training last_cell = self.sort_hid(last_cell, ind_len_sorted) # sort for RNN training # encoding sentence via RNN self.lstm.flatten_parameters() sent_lstm_out, (last_hid, last_cell) = self.lstm(sent_x_input_sorted, (last_hid, last_cell)) # out: (batch_size, len_sent, cell_size) # # Note that, in the nightly version, instead, "enforce_sorted=False" for the automatic sorting () # x_input_packed = pack_padded_sequence(sent_x_input_sorted, lengths=len_seq_sorted, batch_first=True) # # x_input_packed = pack_padded_sequence(sent_x_input_sorted, cur_len_sent_sorted, batch_first=True, enforce_sorted=False) # out_packed, (last_hid, last_cell) = self.lstm(x_input_packed, (last_hid, last_cell)) # sent_lstm_out, out_len = pad_packed_sequence(out_packed, batch_first=True) # applying mask to last_hid and last_cell by only actual length of sentence _, ind_origin = torch.sort(ind_len_sorted) sent_lstm_out = sent_lstm_out[ind_origin] last_hid = self.sort_hid(last_hid, ind_origin) # return to origin index for masking last_cell = self.sort_hid(last_cell, ind_origin) # return to origin index for masking sent_lstm_out = sent_lstm_out * cur_sents_mask.unsqueeze(2) # store encoded sentence hid_encoded.append(sent_lstm_out) # end for ind_sent ## Stage 3: get the most similar hidden states vec_close_states = [] # (max_num_sents, batch_size, cell_size) for i in range(len(hid_encoded)-1): encoded_state_i = hid_encoded[i] # encoded from current sentence encoded_state_j = hid_encoded[i+1] # encoded from next sentence # get similarity (actually, original paper describes that it is just matrix multiplication) sim_states = torch.bmm(encoded_state_i, encoded_state_j.transpose(2,1)) # matmul corresponding to the paper sim_states = sim_states.clamp(min=0) # filter for positive (relu), because it is similarity sim_states = self.dropout_layer(sim_states) # select two states with maximum similarity vec_H = [] ind_max_sim = torch.argmax(sim_states.view(sim_states.shape[0], -1), dim=1) # sim index matrix between all states for b_id in range(sim_states.shape[0]): val_ind = ind_max_sim[b_id] max_ind_i = math.floor(val_ind / sim_states.shape[2]) max_ind_j = val_ind % sim_states.shape[2] max_state_i = encoded_state_i[b_id, max_ind_i, :] max_state_j = encoded_state_j[b_id, max_ind_j, :] vec_ij = (max_state_i + max_state_j) / 2 vec_H.append(vec_ij) # end for batch_id vec_H = torch.stack(vec_H) # convert to torch vec_close_states.append(vec_H) # end for range(len(hid_encoded)), stage3 ## Stage 4: produce coherence vector vec_coh = [] # final output vector represents coherence for i in range(len(vec_close_states)-1): # (max_num_sents, batch_size, cell_size) vec_u = vec_close_states[i] # (batch_size, cell_size) vec_v = vec_close_states[i+1] # get similarity (d) between vectors; paper describes that matrix multiplication with division by vector size dist_vec_states = torch.mm(vec_u, vec_v.transpose(1, 0)) dist_states = torch.diag(dist_vec_states) # they are already the most related states, thus only extract the sim value between them dist_states = dist_states / vec_u.shape[1] # divide by vector size, i.e., cell size dist_states = dist_states.clamp(min=0) # again, only positivity for similarity vec_coh.append(dist_states) # end for vec_close_states vec_coh = torch.stack(vec_coh) # convert to torch variable from list, (num_sents-2, batch_size) vec_coh = vec_coh.permute(1, 0) # transpose to shape for linear layer (batch * num_sents-2) vec_coh = self.dropout_layer(vec_coh) # applying convolutional layer and max_pooling vec_coh = vec_coh.unsqueeze(1) # nn.conv requires (mini_batch x in_channel x w) vec_coh = self.conv(vec_coh) vec_coh = self.leak_relu(vec_coh) vec_coh = vec_coh.squeeze(1) ## linear_out_coh = self.linear_1(vec_coh) linear_out_coh = self.tanh(linear_out_coh) #linear_out_coh = self.dropout_layer(linear_out_coh) coh_score = self.linear_out(linear_out_coh) if self.loss_type.lower() == "mseloss": coh_score = self.sigmoid(coh_score) outputs = [] outputs.append(coh_score) # return coh_score return outputs
def compute_mmd(self, unbiaised=False, approximation='standard', shared_anchors=True, name=None, verbose=0, anchors_basis=None): self.verbosity(function_name='compute_mmd', dict_of_variables={ 'unbiaised': unbiaised, 'approximation': approximation, 'shared_anchors': shared_anchors, 'name': name }, start=True, verbose=verbose) if approximation == 'standard': m = self.compute_omega(sample='xy', quantization=False) K = self.compute_gram() if unbiaised: K.masked_fill_(torch.eye(K.shape[0], K.shape[0]).byte(), 0) mmd = torch.dot(mv(K, m), m)**2 if approximation == 'nystrom' and shared_anchors: m = self.compute_omega(sample='xy', quantization=False) Up = self.spev['xy']['anchors'][anchors_basis][anchors_basis]['ev'] Lp_inv2 = torch.diag( self.spev['xy']['anchors'][anchors_basis]['sp']**-(1 / 2)) Pm = self.compute_centering_matrix(sample='xy', landmarks=True) Kmn = self.compute_kmn(sample='xy') psi_m = mv(Lp_inv2, mv(Up.T, mv(Pm, mv(Kmn, m)))) mmd = torch.dot(psi_m, psi_m)**2 if approximation == 'nystrom' and not shared_anchors: mx = self.compute_omega(sample='x', quantization=False) my = self.compute_omega(sample='y', quantization=False) Upx = self.spev['x']['anchors'][anchors_basis]['ev'] Upy = self.spev['y']['anchors'][anchors_basis]['ev'] Lpx_inv2 = torch.diag( self.spev['x']['anchors'][anchors_basis]['sp']**-(1 / 2)) Lpy_inv2 = torch.diag( self.spev['y']['anchors'][anchors_basis]['sp']**-(1 / 2)) Lpy_inv = torch.diag( self.spev['y']['anchors'][anchors_basis]['sp']**-1) Pmx = self.compute_centering_matrix(sample='x', landmarks=True) Pmy = self.compute_centering_matrix(sample='y', landmarks=True) Kmnx = self.compute_kmn(sample='x') Kmny = self.compute_kmn(sample='y') Km = self.compute_gram(sample='xy', landmarks=True) m1 = Kmnx.shape[0] m2 = Kmny.shape[0] Kmxmy = Km[:m1, m2:] psix_mx = mv(Lpx_inv2, mv(Upx.T, mv(Pmx, mv(Kmnx, mx)))) psiy_my = mv(Lpy_inv2, mv(Upy.T, mv(Pmy, mv(Kmny, my)))) Cpsiy_my = mv(Lpx_inv2,mv(Upx.T,mv(Pmx,mv(Kmxmy,\ mv(Pmy,mv(Upy,mv(Lpy_inv,mv(Upy.T,mv(Pmy,mv(Kmny,my)))))))))) mmd = torch.dot(psix_mx, psix_mx)**2 + torch.dot( psiy_my, psiy_my)**2 - 2 * torch.dot(psix_mx, Cpsiy_my) if approximation == 'quantization': mq = self.compute_omega(sample='xy', quantization=True) Km = self.compute_gram(sample='xy', landmarks=True) mmd = torch.dot(mv(Km, mq), mq)**2 if name is None: name = f'{approximation}' if approximation == 'nystrom': name += 'shared' if shared_anchors else 'diff' self.dict_mmd[name] = mmd.item() self.verbosity(function_name='compute_mmd', dict_of_variables={ 'unbiaised': unbiaised, 'approximation': approximation, 'shared_anchors': shared_anchors, 'name': name }, start=False, verbose=verbose)
def forward_with_log_data( self, maximizer_outputs: torch.Tensor, correspondence_outputs: torch.Tensor, inlier_labels: torch.Tensor, outlier_labels: torch.Tensor ) -> Tuple[torch.Tensor, Dict[str, torch.Tensor]]: # maximizer_outputs: BxCx1x1 where B == C # correspondence_outputs: BxCx1x1 where B == C # If h and w are not 1 w.r.t. maximizer_outpus and correspondence_outputs, # the center values will be extracted. assert (maximizer_outputs.shape[0] % maximizer_outputs.shape[1] == 0) assert (maximizer_outputs.shape[2] == maximizer_outputs.shape[3]) if maximizer_outputs.shape[2] != 1: center_px = (maximizer_outputs.shape[2] - 1) // 2 maximizer_outputs = maximizer_outputs[:, :, center_px, center_px] if correspondence_outputs.shape[2] != 1: center_px = (correspondence_outputs.shape[2] - 1) // 2 correspondence_outputs = correspondence_outputs[:, :, center_px, center_px] maximizer_outputs = torch.sigmoid( maximizer_outputs.squeeze()) # BxCx1x1 -> BxC correspondence_outputs = torch.sigmoid( correspondence_outputs.squeeze()) # BxCx1x1 -> BxC # convert the label types so we can use torch.diag() on the labels if inlier_labels.dtype == torch.bool: inlier_labels = inlier_labels.to(torch.uint8) if outlier_labels.dtype == torch.bool: outlier_labels = outlier_labels.to(torch.uint8) corr_outlier_index_2d = torch.diag(outlier_labels) aligned_outlier_corr_outputs = correspondence_outputs[ corr_outlier_index_2d] if aligned_outlier_corr_outputs.numel() == 0: outlier_correspondence_loss = None else: outlier_correspondence_loss = torch.sum(-1 * torch.log( torch.max(aligned_outlier_corr_outputs, self._epsilon))) # expand inlier_labels by num_patches_per_channel # inlier should begin every segment of (num patches per channel) num_patches_per_channel = maximizer_outputs.shape[ 0] // maximizer_outputs.shape[1] expanded_inlier_labels = torch.zeros(inlier_labels.shape[0] * num_patches_per_channel, dtype=torch.uint8, device=inlier_labels.device) maximum_patch_index = torch.arange(0, maximizer_outputs.shape[0], num_patches_per_channel) expanded_inlier_labels[maximum_patch_index] = inlier_labels has_data_labels = inlier_labels | outlier_labels # B expanded_outlier_labels = ( has_data_labels.repeat_interleave(num_patches_per_channel) & torch.logical_not(expanded_inlier_labels).to(dtype=torch.uint8)) maxima_inlier_index_2d = expanded_inlier_labels[:, None] * torch.repeat_interleave( torch.eye(maximizer_outputs.shape[1], device=maximizer_outputs.device, dtype=torch.uint8), num_patches_per_channel, dim=0) maxima_outlier_index_2d = expanded_outlier_labels[:, None] * torch.repeat_interleave( torch.eye(maximizer_outputs.shape[1], device=maximizer_outputs.device, dtype=torch.uint8), num_patches_per_channel, dim=0) aligned_outlier_maximizer_scores = maximizer_outputs[ maxima_outlier_index_2d] if aligned_outlier_maximizer_scores.numel() == 0: outlier_maximizer_loss = None else: outlier_maximizer_loss = torch.sum(-1 * torch.log( torch.max(-1 * aligned_outlier_maximizer_scores + 1, self._epsilon))) aligned_inlier_maximizer_scores = maximizer_outputs[ maxima_inlier_index_2d] if aligned_inlier_maximizer_scores.numel() == 0: inlier_loss = None else: inlier_loss = torch.sum(-1 * torch.log( torch.max(aligned_inlier_maximizer_scores, self._epsilon))) # Finally, if a channel attains its maximum response inside of a given radius # about it's target correspondence site, the responses of all the other channels # to it's maximizing patch are minimized. maximizer_outputs = maximizer_outputs[ maximum_patch_index] # BxC where B==C # equivalent: inlier_labels.unsqueeze(1).repeat(1, inlier_labels.shape[0]) - inlier_labels.diag() unaligned_inlier_index = torch.diag( inlier_labels) ^ inlier_labels.unsqueeze(1) unaligned_inlier_maximizer_scores = maximizer_outputs[ unaligned_inlier_index] unaligned_maximizer_loss = torch.sum(unaligned_inlier_maximizer_scores) if unaligned_inlier_maximizer_scores.nelement() == 0: unaligned_maximizer_loss = torch.zeros( [1], requires_grad=True, device=unaligned_maximizer_loss.device) total_loss = torch.zeros(1, device=maximizer_outputs.device, dtype=maximizer_outputs.dtype, requires_grad=True) # imips just adds the unaligned scores to the loss directly total_loss = self._add_if_not_none(total_loss, outlier_correspondence_loss) total_loss = self._add_if_not_none(total_loss, outlier_maximizer_loss) total_loss = self._add_if_not_none(total_loss, inlier_loss) total_loss = self._add_if_not_none(total_loss, unaligned_maximizer_loss) return total_loss, { "loss": total_loss.detach(), "outlier_correspondence_loss": self._detach_if_not_none(outlier_correspondence_loss), "outlier_maximizer_loss": self._detach_if_not_none(outlier_maximizer_loss), "inlier_maximizer_loss": self._detach_if_not_none(inlier_loss), "unaligned_maximizer_loss": self._detach_if_not_none(unaligned_maximizer_loss), }
def forward(self, states_prev, controls, noisy=False): # states_prev: (N, M, state_dim) # controls: (N, control_dim) self.jacobian = False if self.use_particles: assert len(states_prev.shape) == 3 # (N, M, state_dim) N, M, state_dim = states_prev.shape dimensions = (N, M) else: if len(states_prev.shape) > 2: #this is due to mask for jacobian N, X, state_dim = states_prev.shape dimensions = (N, X) self.jacobian = True else: assert len(states_prev.shape) == 2 # (N, M, state_dim) N, state_dim = states_prev.shape dimensions = (N, ) assert len(controls.shape) == 2 # (N, control_dim,) # N := distinct trajectory count # M := particle count # (N, control_dim) => (N, units // 2) control_features = self.control_layers(controls) # (N, units // 2) => (N, M, units // 2) if self.use_particles: control_features = control_features[:, np.newaxis, :].expand( N, M, self.units) assert control_features.shape == (N, M, self.units) # (N, M, state_dim) => (N, M, units // 2) state_features = self.state_layers(states_prev) assert state_features.shape == dimensions + (self.units, ) # (N, M, units) merged_features = torch.cat((control_features, state_features), dim=-1) assert merged_features.shape == dimensions + (self.units * 2, ) # (N, M, units * 2) => (N, M, state_dim + 1) output_features = self.shared_layers(merged_features) # We separately compute a direction for our network and a "gate" # These are multiplied to produce our final state output if self.use_particles or self.jacobian: state_update_direction = output_features[:, :, :state_dim] state_update_gate = torch.sigmoid(output_features[:, :, -1:]) else: state_update_direction = output_features[:, :state_dim] state_update_gate = torch.sigmoid(output_features[:, -1:]) state_update = state_update_direction * state_update_gate assert state_update.shape == dimensions + (state_dim, ) # Compute new states ## TODO: fix this? should be simple -- currently breaks Jacobians # update_dims = tuple(slice(None) for _ in dimensions) # update_dims += (tuple(i for i in range(state_dim) # if i not in self.identity_prediction_dims), ) # # states_new = states_prev.clone() # states_new[update_dims] += state_update states_new = states_prev + state_update assert states_new.shape == dimensions + (state_dim, ) self.Q = torch.diag(self.Q_l**2) # print("q: ", self.Q) # Add noise if desired if noisy: dist = torch.distributions.MultivariateNormal( torch.zeros(self.state_dim, dtype=torch.float32).to(states_new.device), self.Q, ) # Taking sqrt of the covariance matrix since it is diagonal... # Normal takes in std instead of variance if self.learnable_Q: noise = dist.rsample(dimensions) else: noise = dist.sample(dimensions) assert noise.shape == dimensions + (state_dim, ) states_new = states_new + noise # Return (N, M, state_dim) return states_new
def recover_full_adj_from_lower(self, lower): diag = torch.diag(torch.diag(lower, 0)) return lower + torch.transpose(lower, 0, 1) - diag
def compute(self): h = self.mat.float() acc_global = torch.diag(h).sum() / h.sum() acc = torch.diag(h) / h.sum(1) iu = torch.diag(h) / (h.sum(1) + h.sum(0) - torch.diag(h)) return acc_global, acc, iu
def compute_pkm(self): """ pkm is an alias for the product PKomega in the standard KFDA statistic. This functions computes the corresponding block with respect to the model parameters. """ cov, mmd = self.approximation_cov, self.approximation_mmd anchors_basis = self.anchors_basis cov_anchors = 'shared' # pas terminé if 'nystrom' in cov or 'nystrom' in mmd: r = self.r omega = self.compute_omega(quantization=(mmd == 'quantization')) Pbi = self.compute_centering_matrix(sample='xy', quantization=(cov == 'quantization')) if any([ ny in [mmd, cov] for ny in ['nystrom1', 'nystrom2', 'nystrom3', 'nystrom'] ]): Uz = self.spev['xy']['anchors'][anchors_basis]['ev'] Lz = torch.diag(self.spev['xy']['anchors'][anchors_basis]['sp']**-1) if not (mmd == cov) or mmd == 'nystrom': Kzx = self.compute_kmn(sample='xy') if cov == 'standard': if mmd == 'standard': Kx = self.compute_gram() pkm = mv(Pbi, mv(Kx, omega)) elif mmd == 'nystrom': Pi = self.compute_centering_matrix(sample='xy', landmarks=True) pkm = 1 / r * mv( Pbi, mv(Kzx.T, mv(Pi, mv(Uz, mv(Lz, mv(Uz.T, mv(Pi, mv(Kzx, omega)))))))) # pkm = mv(Pbi,mv(Kzx.T,mv(Pi,mv(Uz,mv(Lz,mv(Uz.T,mv(Pi,mv(Kzx,omega)))))))) elif mmd == 'quantization': pkm = mv(Pbi, mv(Kzx.T, omega)) if cov == 'nystrom1' and cov_anchors == 'shared': if mmd in ['standard', 'nystrom']: # c'est exactement la même stat Pi = self.compute_centering_matrix(sample='xy', landmarks=True) pkm = 1 / r**2 * mv( Pbi, mv(Kzx.T, mv(Pi, mv(Uz, mv(Lz, mv(Uz.T, mv(Pi, mv(Kzx, omega)))))))) # pkm = mv(Pbi,mv(Kzx.T,mv(Pi,mv(Uz,mv(Lz,mv(Uz.T,mv(Pi,mv(Kzx,omega)))))))) elif mmd == 'quantization': Kz = self.compute_gram(landmarks=True) pkm = 1 / r**2 * mv( Pbi, mv(Kzx.T, mv(Uz, mv(Lz, mv(Uz.T, mv(Kz, omega)))))) # pkm = mv(Pbi,mv(Kzx.T,mv(Uz,mv(Lz,mv(Uz.T,mv(Kz,omega)))))) if cov == 'nystrom2' and cov_anchors == 'shared': Lz12 = torch.diag( self.spev['xy']['anchors'][anchors_basis]['sp']**-(1 / 2)) if mmd in ['standard', 'nystrom']: # c'est exactement la même stat Pi = self.compute_centering_matrix(sample='xy', landmarks=True) pkm = 1 / r**3 * mv( Lz12, mv( Uz.T, mv( Pi, mv( Kzx, mv( Pbi, mv( Kzx.T, mv( Pi, mv( Uz, mv( Lz, mv(Uz.T, mv( Pi, mv(Kzx, omega)))))))))))) # pkm = mv(Lz12,mv(Uz.T,mv(Pi,mv(Kzx,mv(Pbi,mv(Kzx.T,mv(Pi,mv(Uz,mv(Lz,mv(Uz.T,mv(Pi,mv(Kzx,omega)))))))))))) elif mmd == 'quantization': # pas à jour # il pourrait y avoir la dichotomie anchres centrees ou non ici. Kz = self.compute_gram(landmarks=True) pkm = 1 / r**3 * mv( Lz12, mv( Uz.T, mv( Kzx, mv(Pbi, mv(Kzx.T, mv(Uz, mv(Lz, mv(Uz.T, mv(Kz, omega))))))))) # pkm = mv(Lz12,mv(Uz.T,mv(Kzx,mv(Pbi,mv(Kzx.T,mv(Uz,mv(Lz,mv(Uz.T,mv(Kz,omega))))))))) if cov == 'nystrom3' and cov_anchors == 'shared': Lz12 = torch.diag( self.spev['xy']['anchors'][anchors_basis]['sp']**-(1 / 2)) Pi = self.compute_centering_matrix(sample='xy', landmarks=True) if mmd in ['standard', 'nystrom']: # c'est exactement la même stat pkm = 1 / r * mv(Lz12, mv(Uz.T, mv(Pi, mv(Kzx, omega)))) # pkm = mv(Lz12,mv(Uz.T,mv(Pi,mv(Kzx,omega)))) elif mmd == 'quantization': # pas à jour # il faut ajouter Pi ici . Kz = self.compute_gram(landmarks=True) pkm = 1 / r**2 * mv( Lz12, mv( Uz.T, mv( Pi, mv( Kzx, mv( Pbi, mv( Kzx.T, mv(Pi, mv(Uz, mv(Lz, mv(Uz.T, mv( Kz, omega))))))))))) # pkm = mv(Lz12,mv(Uz.T,mv(Kzx,mv(Pbi,mv(Kzx.T,mv(Uz,mv(Lz,mv(Uz.T,mv(Kz,omega))))))))) if cov == 'nystrom1' and cov_anchors == 'separated': if mmd == 'standard': x, y = self.get_xy() z1, z2 = self.get_xy(landmarks=True) Kz1x = self.kerne(z1, x) Kz1y = self.kerne(z1, y) Kz2x = self.kerne(z2, x) Kz2y = self.kerne(z2, y) Uz1 = self.spev['x']['anchors'][anchors_basis]['ev'] Lz1 = torch.diag( self.spev['x']['anchors'][anchors_basis]['sp']**-1) Uz2 = self.spev['y']['anchors'][anchors_basis]['ev'] Lz2 = torch.diag( self.spev['y']['anchors'][anchors_basis]['sp']**-1) omega1 = self.compute_omega(sample='x', quantization=False) omega2 = self.compute_omega(sample='y', quantization=False) Pn1 = self.compute_centering_matrix(sample='x') Pn2 = self.compute_centering_matrix(sample='y') haut = mv( Lz1, mv( Uz1, mv( Kz1x, mv( Pn1, mv( Kz1x, mv( Uz1, mv( Lz1, mv(Uz1.T, mv(Kz1y, omega2) - mv(Kz1x, omega1))))))))) bas = mv( Lz2, mv( Uz2, mv( Kz2y, mv( Pn2, mv( Kz2y, mv( Uz2, mv( Lz2, mv(Uz2.T, mv(Kz2y, omega2) - mv(Kz2x, omega1))))))))) if cov == 'quantization': # pas à jour A = self.compute_quantization_weights(power=1 / 2, sample='xy') if mmd == 'standard': pkm = mv(Pbi, mv(A, mv(Kzx, omega))) elif mmd == 'nystrom': Pi = self.compute_centering_matrix(sample='xy', landmarks=True) Kz = self.compute_gram(landmarks=True) pkm = 1 / r * mv( Pbi, mv(A, mv(Kz, mv(Uz, mv(Lz, mv(Uz.T, mv(Pi, mv(Kzx, omega)))))))) elif mmd == 'quantization': Kz = self.compute_gram(landmarks=True) pkm = mv(Pbi, mv(A, mv(Kz, omega))) return (pkm)
def _init_A(self, factor, module): """Initialize memory for factor A and its inverse""" self.m_A[module] = torch.diag(factor.new_ones(factor.shape[0])) self.m_inv_A[module] = factor.new_zeros(factor.shape)
def _repulsion_loss(pred_box, gt_box, pred_label, pred_box_gt_index, num_gt_box, sigma_gt=1, sigma_box=0): """This function compute repulsion loss. This function only support the classification task with 2 categories :arg rois: [bs, num_rois, 4], each roi is denoted as [x1, y1, x2, y2] gt_box: [bs, num_gts, 4], each gt_box is denoted as [x1, y1, x2, y2] rois_label: [bs, num_rois], the label of rois 1 (fg) or 0 (bg) rois_gt_index: [bs, num_rois], the assigned gt box index for each roi """ # TODO: support multi-categories classification # ------------------ rep gt loss ---------------------------- bs = pred_box.size()[0] pred_box_gt_index = pred_box_gt_index.data.long() pred_label = pred_label.data.long() num_gt_box = num_gt_box.data.long() rep_gt_loss = Variable(pred_box.data.new([0]), requires_grad=True) count = 0 for b in range(bs): gt_index = pred_box_gt_index.new(range(num_gt_box[b])) true_pred_box_index = ((pred_label[b] > 0) & (pred_box_gt_index[b] < num_gt_box[b])) true_pred_box_index = torch.nonzero(true_pred_box_index).squeeze() for i in true_pred_box_index: index = gt_index != pred_box_gt_index[b][i] index = torch.nonzero(index).squeeze() if index.size(): one_pred_box = pred_box[b][i].unsqueeze(dim=0) one_gt_boxes = gt_box[b][index] iog = _IoG(one_pred_box, one_gt_boxes) iog = iog.view(-1) # filter out 1. Because some proposal maybe the gt box, so remove it index = (iog < 1).data.long() index = torch.nonzero(index).squeeze() if index.size(): iog = iog[index] # the repulsion loss in https://github.com/bailvwangzi/repulsion_loss_ssd rep_gt_loss = rep_gt_loss + iog.max() count += 1 # the repulsion loss in origin paperhttps: https://arxiv.org/abs/1711.07752 # iog = iog.max() # if iog.data[0] > 0: # count = count + 1 # if iog.data[0] <= sigma_gt: # loss_tmp = -torch.log(1 -iog) # if iog.data[0] > sigma_gt: # loss_tmp = (iog - sigma_gt) / (1 - sigma_gt) - Variable(torch.log(iog.data.new([1-sigma_gt]))) # rep_gt_loss = rep_gt_loss + loss_tmp if count > 0: rep_gt_loss = rep_gt_loss / count # ------------------- rep box loss ------------------------------- rep_box_loss = Variable(pred_box.data.new([0]), requires_grad=True) count = 0 for b in range(bs): # get the index of gt box pred_box_gt_index_tmp = pred_box_gt_index[b] if pred_box_gt_index_tmp.is_cuda: assigned_gt_index = np.unique(pred_box_gt_index_tmp.cpu().numpy()) else: assigned_gt_index = np.unique(pred_box_gt_index_tmp.numpy()) assigned_gt_index = pred_box_gt_index.new(assigned_gt_index) # the gt_index for bg rois are -1, so we need to filter out it assigned_gt_index = assigned_gt_index[assigned_gt_index >= 0] assigned_gt_index = assigned_gt_index[ assigned_gt_index < num_gt_box[b]] # used to store the index of chosen rois for each gt box chosen_rois_index = [] for gt_i in assigned_gt_index: index = pred_box_gt_index[ b] == gt_i # each gt box has at least 1 roi index = torch.nonzero(index).squeeze() # choose one roi random roi_index_random = index[np.random.choice(range(index.size()[0]))] chosen_rois_index.append(roi_index_random) chosen_rois_index = pred_box_gt_index.new(chosen_rois_index) if chosen_rois_index.size(): chosen_rois = pred_box[b][chosen_rois_index] iou = _IoU(chosen_rois, chosen_rois) iou = iou - torch.diag(iou.diag()) iou = iou.view(-1) index = (iou > 0).data.long() index = torch.nonzero(index).squeeze() if index.size(): iou = iou[index] count = count + index.size()[0] # we use the iou directly, without smooth rep_box_loss = rep_box_loss + iog.sum() # # the repulsion loss in origin paperhttps: https://arxiv.org/abs/1711.07752 # index = (iou <= sigma_box).data.long() # index = torch.nonzero(index).squeeze() # if index.size(): # iou_1 = iou[index] # iou_1 = - torch.log(1 - iou_1) # loss_tmp_1 = iou_1.sum() # else: # loss_tmp_1 = 0 # # index = (iou > sigma_box).data.long() # index = torch.nonzero(index).squeeze() # if index.size(): # iou_2 = iou[index] # iou_2 = (iou_2 - sigma_box) / (1 - sigma_box) - Variable(torch.log(iou.data.new([1-sigma_box]))) # loss_tmp_2 = iou_2.sum() # else: # loss_tmp_2 = 0 # # rep_box_loss = rep_box_loss + loss_tmp_1 + loss_tmp_2 if count > 0: rep_box_loss = rep_box_loss / count return rep_gt_loss, rep_box_loss
def test_diag(): A = torch.tensor([[1, -4], [-2, 3]]) assert (torch.diag(A) == utils.diag(A)).all() X = torch.randn(4, 5, 5) assert (utils.batch_diag(X) == torch.stack([torch.diag(x) for x in X])).all()
def INF(B, H, W): return -torch.diag(torch.tensor(float("inf")).cuda().repeat(H), 0).unsqueeze(0).repeat(B * W, 1, 1)
def gen_scale_matrix(scale): o = torch.ones([1], dtype=torch.float32) return torch.diag(torch.cat([scale, o], 0))
else: config['cf_fair']['type'] = 'regression' # ------------------------------------------------------------------------- # Fit assumed model A via cross validation # ------------------------------------------------------------------------- logger.info("Fit model A via CV, compute phi and residuals...") model_a = models.ModelA(g_noy) _, phi, vareps = model_a.fit(data, config['cf_fair']) logger.info(f"Best parameters: {model_a.best_parameters}") # Refit as torch with weighted ridge logger.info("Refit model analytically...") targets = utils.data_to_tensor(data, list(model_a.targets.keys()), numpy=True) phi, a, targets = [torch.tensor(_) for _ in (phi, a, targets)] sigma = torch.diag(torch.tensor(vareps.std(axis=0)**2)) wdagger, vareps = utils.weighted_ridge(phi, targets, sigma, model_a.alpha) model_a.model.regressor_.coef_ = wdagger.clone().numpy().squeeze() vareps = vareps.clone().numpy().squeeze() if debug > 0: logger.info("Plot conditional histograms...") for i, target in enumerate(g_noy.vertices()[1:]): plotters.plot_conditional_histograms( { f'resid_{target}': vareps[:, i], 'A': a }, f'resid_{target}', 'A', fig_dir) # -------------------------------------------------------------------------
def gen_adj(self, A): A += torch.eye(A.size(0)).cuda() D = torch.pow(A.sum(1).float(), -0.5) D = torch.diag(D) adj = torch.matmul(torch.matmul(A, D).t(), D) return adj
def __init__( self, n_inpt: int, n_neurons: int = 100, exc: float = 22.5, inh: float = 17.5, dt: float = 1.0, nu: Optional[Union[float, Sequence[float]]] = (1e-4, 1e-2), reduction: Optional[callable] = None, wmin: float = 0.0, wmax: float = 1.0, norm: float = 78.4, theta_plus: float = 0.05, tc_theta_decay: float = 1e7, inpt_shape: Optional[Iterable[int]] = None, ) -> None: # language=rst """ Constructor for class ``DiehlAndCook2015``. :param n_inpt: Number of input neurons. Matches the 1D size of the input data. :param n_neurons: Number of excitatory, inhibitory neurons. :param exc: Strength of synapse weights from excitatory to inhibitory layer. :param inh: Strength of synapse weights from inhibitory to excitatory layer. :param dt: Simulation time step. :param nu: Single or pair of learning rates for pre- and post-synaptic events, respectively. :param reduction: Method for reducing parameter updates along the minibatch dimension. :param wmin: Minimum allowed weight on input to excitatory synapses. :param wmax: Maximum allowed weight on input to excitatory synapses. :param norm: Input to excitatory layer connection weights normalization constant. :param theta_plus: On-spike increment of ``DiehlAndCookNodes`` membrane threshold potential. :param tc_theta_decay: Time constant of ``DiehlAndCookNodes`` threshold potential decay. :param inpt_shape: The dimensionality of the input layer. """ super().__init__(dt=dt) self.n_inpt = n_inpt self.inpt_shape = inpt_shape self.n_neurons = n_neurons self.exc = exc self.inh = inh self.dt = dt # Layers input_layer = Input(n=self.n_inpt, shape=self.inpt_shape, traces=True, tc_trace=20.0) exc_layer = DiehlAndCookNodes( n=self.n_neurons, traces=True, rest=-65.0, reset=-60.0, thresh=-52.0, refrac=5, tc_decay=100.0, tc_trace=20.0, theta_plus=theta_plus, tc_theta_decay=tc_theta_decay, ) inh_layer = LIFNodes( n=self.n_neurons, traces=False, rest=-60.0, reset=-45.0, thresh=-40.0, tc_decay=10.0, refrac=2, tc_trace=20.0, ) # Connections w = 0.3 * torch.rand(self.n_inpt, self.n_neurons) input_exc_conn = Connection( source=input_layer, target=exc_layer, w=w, update_rule=PostPre, nu=nu, reduction=reduction, wmin=wmin, wmax=wmax, norm=norm, ) w = self.exc * torch.diag(torch.ones(self.n_neurons)) exc_inh_conn = Connection(source=exc_layer, target=inh_layer, w=w, wmin=0, wmax=self.exc) w = -self.inh * (torch.ones(self.n_neurons, self.n_neurons) - torch.diag(torch.ones(self.n_neurons))) inh_exc_conn = Connection(source=inh_layer, target=exc_layer, w=w, wmin=-self.inh, wmax=0) # Add to network self.add_layer(input_layer, name="X") self.add_layer(exc_layer, name="Ae") self.add_layer(inh_layer, name="Ai") self.add_connection(input_exc_conn, source="X", target="Ae") self.add_connection(exc_inh_conn, source="Ae", target="Ai") self.add_connection(inh_exc_conn, source="Ai", target="Ae")
def calc_weight(self): weight = (self.w_p @ (self.w_l * self.l_mask + self.l_eye) @ ( (self.w_u * self.u_mask) + torch.diag(self.s_sign * torch.exp(self.w_s)))) return weight.unsqueeze(2).unsqueeze(3)
import torch import matplotlib.pyplot as plt a = torch.tensor([3, 2, -0.1]) Q = torch.diag(a) alpha = 0.6 def count(iteration_time, X): function_error = [] for iter in range(iteration_time): f = torch.matmul(torch.matmul(torch.transpose(X, 0, 1), Q), X) * 0.5 g = torch.matmul(Q, X) #print("Loss: %f; ||g||: %f,iteration: %f, error:%f" % (torch.log10(f), torch.norm(g),iter,torch.log10(itr_error*itr_error))) g = g.view(-1, 1) X = X - alpha * g #Loss_list.append(torch.log(f)) #Grad_norm.append(torch.log(torch.norm(g))) function_error.append(torch.log10(f)) return (function_error) f1 = count(1000, torch.ones(3, 1)) #f2=count(1000000,torch.ones(3,1)) itr_time1 = range(0, 1000) #itr_time2 = range(0, 10000) plt.subplot(1, 2, 1) plt.plot(itr_time1, f1, label='GD with constant stepsize') plt.title('Function_error v.s. iteration in 1000 iterations') plt.ylabel('Log of function_error') plt.legend()
def __init__(self, device, num_nodes, dropout=0.3, supports=None, gcn_bool=True, addaptadj=True, aptinit=None, in_dim=2,out_dim=12,residual_channels=32,dilation_channels=32,skip_channels=256,end_channels=512,kernel_size=2,blocks=4,layers=2): super(gwnet, self).__init__() self.dropout = dropout self.blocks = blocks self.layers = layers self.gcn_bool = gcn_bool self.addaptadj = addaptadj self.filter_convs = nn.ModuleList() self.gate_convs = nn.ModuleList() self.residual_convs = nn.ModuleList() self.skip_convs = nn.ModuleList() self.bn = nn.ModuleList() self.gconv = nn.ModuleList() self.start_conv = nn.Conv2d(in_channels=in_dim, out_channels=residual_channels, kernel_size=(1,1)) self.supports = supports receptive_field = 1 self.supports_len = 0 if supports is not None: self.supports_len += len(supports) if gcn_bool and addaptadj: if aptinit is None: if supports is None: self.supports = [] self.nodevec1 = nn.Parameter(torch.randn(num_nodes, 10).to(device), requires_grad=True).to(device) self.nodevec2 = nn.Parameter(torch.randn(10, num_nodes).to(device), requires_grad=True).to(device) self.supports_len +=1 else: if supports is None: self.supports = [] m, p, n = torch.svd(aptinit) initemb1 = torch.mm(m[:, :10], torch.diag(p[:10] ** 0.5)) initemb2 = torch.mm(torch.diag(p[:10] ** 0.5), n[:, :10].t()) self.nodevec1 = nn.Parameter(initemb1, requires_grad=True).to(device) self.nodevec2 = nn.Parameter(initemb2, requires_grad=True).to(device) self.supports_len += 1 for b in range(blocks): additional_scope = kernel_size - 1 new_dilation = 1 for i in range(layers): # dilated convolutions self.filter_convs.append(nn.Conv2d(in_channels=residual_channels, out_channels=dilation_channels, kernel_size=(1,kernel_size),dilation=new_dilation)) self.gate_convs.append(nn.Conv1d(in_channels=residual_channels, out_channels=dilation_channels, kernel_size=(1, kernel_size), dilation=new_dilation)) # 1x1 convolution for residual connection self.residual_convs.append(nn.Conv1d(in_channels=dilation_channels, out_channels=residual_channels, kernel_size=(1, 1))) # 1x1 convolution for skip connection self.skip_convs.append(nn.Conv1d(in_channels=dilation_channels, out_channels=skip_channels, kernel_size=(1, 1))) self.bn.append(nn.BatchNorm2d(residual_channels)) new_dilation *=2 receptive_field += additional_scope additional_scope *= 2 if self.gcn_bool: self.gconv.append(gcn(dilation_channels,residual_channels,dropout,support_len=self.supports_len)) self.end_conv_1 = nn.Conv2d(in_channels=skip_channels, out_channels=end_channels, kernel_size=(1,1), bias=True) self.end_conv_2 = nn.Conv2d(in_channels=end_channels, out_channels=out_dim, kernel_size=(1,1), bias=True) self.receptive_field = receptive_field