def forward(self, X: SparseTensor, ppr_scores: SparseTensor): """ Parameters: X: torch_sparse.SparseTensor of shape (num_ppr_nodes, num_features) The node features of all neighboring from nodes of the ppr_matrix (training nodes) ppr_matrix: torch_sparse.SparseTensor of shape (ppr_num_nonzeros, num_features) The node features of all neighboring nodes of the training nodes in the graph derived from the Personal Page Rank as specified by idx Returns: propagated_logits: torch.Tensor of shape (batch_size, num_classes) """ # logits of shape (num_batch_nodes, num_classes) embedding = self.mlp(X) if self._mean.__name__ == 'soft_median' and ppr_scores.size( 0) == 1 and 'temperature' in self._mean_kwargs: c = embedding.shape[1] weights = ppr_scores.storage.value() with torch.no_grad(): sort_idx = embedding.argsort(0) weights_cumsum = weights[sort_idx].cumsum(0) median_idx = sort_idx[( weights_cumsum < weights_cumsum[-1][None, :] / 2).sum(0), torch.arange(c)] median = embedding[median_idx, torch.arange(c)] distances = torch.norm(embedding - median[None, :], dim=1) / pow( c, 1 / 2) soft_weights = weights * F.softmax( -distances / self._mean_kwargs['temperature'], dim=-1) soft_weights /= soft_weights.sum() new_embedding = (soft_weights[:, None] * weights.sum() * embedding).sum(0) diffused_embedding = new_embedding[None, :] elif "k" in self._mean_kwargs.keys() and "with_weight_correction" in self._mean_kwargs.keys() \ and self._mean_kwargs["k"] > X.size(0): # `n` less than `k` and `with_weight_correction` is not implemented # so we need to make sure we set with_weight_correction to false if n less than k print("no with_weight_correction") diffused_embedding = self._mean( ppr_scores, embedding, # we can not manipluate self._mean_kwargs because this would affect # the next call to forward, so we do it this way with_weight_correction=False, **{ k: v for k, v in self._mean_kwargs.items() if k != "with_weight_correction" }) else: diffused_embedding = self._mean(ppr_scores, embedding, **self._mean_kwargs) return self.mlp_logits(diffused_embedding)
def panentropy(self, adj_t: SparseTensor, dtype: Optional[int] = None) -> SparseTensor: tmp = SparseTensor.eye(adj_t.size(0), adj_t.size(1), has_value=True, dtype=dtype, device=adj_t.device()) tmp = tmp.mul_nnz(self.weight[0]) outs = [tmp] for i in range(1, self.filter_size + 1): tmp = tmp @ adj_t tmp = tmp.mul_nnz(self.weight[i]) outs += [tmp] row = torch.cat([out.storage.row() for out in outs], dim=0) col = torch.cat([out.storage.col() for out in outs], dim=0) value = torch.cat([out.storage.value() for out in outs], dim=0) out = SparseTensor(row=row, col=col, value=value, sparse_sizes=adj_t.sparse_sizes()).coalesce() return out
def forward(self, X: SparseTensor, ppr_scores: SparseTensor): """ Parameters: X: torch_sparse.SparseTensor of shape (num_ppr_nodes, num_features) The node features of all neighboring from nodes of the ppr_matrix (training nodes) ppr_matrix: torch_sparse.SparseTensor of shape (ppr_num_nonzeros, num_features) The node features of all neighboring nodes of the training nodes in the graph derived from the Personal Page Rank as specified by idx Returns: propagated_logits: torch.Tensor of shape (batch_size, num_classes) """ # logits of shape (num_batch_nodes, num_classes) logits = self.mlp(X) if "k" in self._mean_kwargs.keys( ) and "with_weight_correction" in self._mean_kwargs.keys(): # `n` less than `k` and `with_weight_correction` is not implemented # so we need to make sure we set with_weight_correction to false if n less than k if self._mean_kwargs["k"] > X.size(0): print("no with_weight_correction") return self._mean( ppr_scores, logits, # we can not manipluate self._mean_kwargs because this would affect # the next call to forward, so we do it this way with_weight_correction=False, **{ k: v for k, v in self._mean_kwargs.items() if k != "with_weight_correction" }) return self._mean(ppr_scores, logits, **self._mean_kwargs)
def degree_matrix(adj: SparseTensor, indeg=True): N = adj.size(-1) deg = adj.sum(0) if indeg else adj.sum(1) row = col = torch.arange(N, device=adj.device()) degs = torch.as_tensor(deg, device=adj.device()) return SparseTensor( row=row, col=col, value=degs, sparse_sizes=(N, N), is_sorted=True )
def cheby_op(x: torch.Tensor, L: SparseTensor, coeff: torch.Tensor, lam_max: float = 2.0): r"""Chebyshev approximation of graph filtering Parameters ---------- x: Tensor The input graph signal. It's shape can be either :obj:`(N,)` , :obj:`(N,Ci)` or :obj:`(Co,N,Ci)`, wherein :obj:`N`, :obj:`Ci` and :obj:`Co` are the numbers of nodes, input channels, and output channels respectively. L: SparseTensor The :obj:`(N,N)` Laplacian matrix. coeff: Tensor The :obj:`(Co,Ci,K+1)` Chebyshev coefficients for :obj:`Ci*Co` kernels, wherein :obj:`K` is the order of approximation. lam_max: float,optional The maximal graph frequency, i.e., :math:`\lambda_{max}` Returns ------- Tensor The filtered signals of shape :obj:`(Co,N,Ci)` """ Co, Ci, K = coeff.shape N = L.size(-1) if x.dim() == 1: assert x.size() == (N, ) x = x[None, ..., None] # (N,) --> 1 x N x 1 elif x.dim() == 2: # N x Ci --> Co x N x Ci assert x.size() == (N, Ci) x = x.unsqueeze(0) elif x.dim() == 3: # Co x N x Ci assert x.size() == (Co, N, Ci) or (1, N, Ci) else: raise RuntimeError( "The input signals has mismatched dimensions: {}".format(x.size())) K = K - 1 c = coeff.unsqueeze(1) # Co x Ci x K --> Co x 1 x Ci x K L_norm = normalize_laplace(L, lam_max) twf_old = x twf_cur = L_norm @ x # Co x N x Ci result = 0.5 * c[..., 0] * twf_old + c[..., 1] * twf_cur for k in range(2, K + 1): twf_new = 2 * (L_norm @ twf_cur) - twf_old result = result + c[..., k] * twf_new twf_old = twf_cur twf_cur = twf_new return result
def process(self): with open(self.raw_paths[0], 'r') as f: data = [x.split('\t') for x in f.read().split('\n')[1:-1]] rows, cols = [], [] for n_id, col, _ in data: col = [int(x) for x in col.split(',')] rows += [int(n_id)] * len(col) cols += col x = SparseTensor(row=torch.tensor(rows), col=torch.tensor(cols)) x = x.to_dense() y = torch.empty(len(data), dtype=torch.long) for n_id, _, label in data: y[int(n_id)] = int(label) with open(self.raw_paths[1], 'r') as f: data = f.read().split('\n')[1:-1] data = [[int(v) for v in r.split('\t')] for r in data] edge_index = torch.tensor(data, dtype=torch.long).t().contiguous() edge_index, _ = coalesce(edge_index, None, x.size(0), x.size(0)) train_masks, val_masks, test_masks = [], [], [] for f in self.raw_paths[2:]: tmp = np.load(f) train_masks += [torch.from_numpy(tmp['train_mask']).to(torch.bool)] val_masks += [torch.from_numpy(tmp['val_mask']).to(torch.bool)] test_masks += [torch.from_numpy(tmp['test_mask']).to(torch.bool)] train_mask = torch.stack(train_masks, dim=1) val_mask = torch.stack(val_masks, dim=1) test_mask = torch.stack(test_masks, dim=1) data = Data(x=x, edge_index=edge_index, y=y, train_mask=train_mask, val_mask=val_mask, test_mask=test_mask) data = data if self.pre_transform is None else self.pre_transform(data) torch.save(self.collate([data]), self.processed_paths[0])
def test_padded_index_select_runtime(): return from torch_geometric.datasets import Planetoid device = torch.device('cuda') start = torch.cuda.Event(enable_timing=True) end = torch.cuda.Event(enable_timing=True) dataset = Planetoid('/tmp/Planetoid', name='PubMed') data = dataset[0] row, col = data.edge_index.to(device) adj = SparseTensor(row=row, col=col) rowcount = adj.storage.rowcount().to(device) rowptr = adj.storage.rowptr().to(device) binptr = torch.tensor([0, 4, 11, 30, 50, 80, 120, 140, 2000]).to(device) x = torch.randn(adj.size(0), 512).to(device) data = torch.ops.torch_sparse.padded_index(rowptr, col, rowcount, binptr) node_perm, row_perm, col_perm, mask, node_sizes, edge_sizes = data out = torch.ops.torch_sparse.padded_index_select(x, col_perm, torch.tensor(0.)) outs = out.split(edge_sizes) for out, size in zip(outs, node_sizes): print(out.view(size, -1, x.size(-1)).shape) for i in range(110): if i == 10: start.record() torch.ops.torch_sparse.padded_index(rowptr, col, rowcount, binptr) end.record() torch.cuda.synchronize() print('padded index', start.elapsed_time(end)) for i in range(110): if i == 10: start.record() out = torch.ops.torch_sparse.padded_index_select( x, col_perm, torch.tensor(0.)) out.split(edge_sizes) end.record() torch.cuda.synchronize() print('padded index select', start.elapsed_time(end)) for i in range(110): if i == 10: start.record() x.index_select(0, col) end.record() torch.cuda.synchronize() print('index_select', start.elapsed_time(end))
def in_degree(adj: SparseTensor, bunch=None): if bunch is None: in_deg = adj.sum(0) else: N = adj.size(0) if len(bunch) > int(0.2 * N): in_deg = adj.sum(0)[bunch] else: ptr, idx, val = adj.csc() in_deg = val.new_zeros(len(bunch)) for i, v in enumerate(bunch): in_deg[i] = val[ptr[v] : ptr[v + 1]].sum() return in_deg
def __dropout_adj__(self, sparse_adj: SparseTensor, dropout_adj_prob: float): # number of nodes N = sparse_adj.size(0) # sparse adj matrix to dense adj matrix row, col, edge_attr = sparse_adj.coo() edge_index = torch.stack([row, col], dim=0) # dropout adjacency matrix -> generalization edge_index, edge_attr = dropout_adj(edge_index, edge_attr=edge_attr, p=dropout_adj_prob, force_undirected=True, training=self.training) # because dropout removes self-loops (due to force_undirected=True), make sure to add them back again edge_index, edge_attr = add_remaining_self_loops(edge_index, edge_weight=edge_attr, fill_value=0.00, num_nodes=N) # dense adj matrix to sparse adj matrix sparse_adj = SparseTensor.from_edge_index(edge_index, edge_attr=edge_attr, sparse_sizes=(N, N)) return sparse_adj
def amfs( A: SparseTensor, Sigma=None, level=None, delta=0.1, thresh_kld=1e-6, priority=True, verbose=False, ) -> Tuple[List[lil_matrix], np.ndarray]: r""" AMFS bipartite approximation for graph wavelet signal processing [3]_. Parameters ---------- A: SparseTensor The adjacency matrix. Sigma: scipy.spmatrix, optional The covariance matrix specified by the Laplacian matrix L. If None, :math:`\Sigma^{-1}=L+\delta I` level: int, optional The number of bipartite subgraphs, i.e., the decomposition level. If None, :math:`level=\lceil log_2( \mathcal{X}) \rceil`, where :math:`\mathcal{X}` is the chromatic number of :obj:`A`. delta: float, optional :math:`1/\delta` is interpreted as the variance of the DC compnent. Refer to [4]_ for more details. thresh_kld: float, optional Threshold of Kullback-Leibler divergence to perform `AMFS` decomposition. priority: bool,optional If True, KLD holds priority. verbose: bool,optional Returns ------- bptG: List[SparseTensor] The bipartite subgraphs. beta: Tensor(N, M) The indicator of bipartite sets References ---------- .. [3] Jing Zen, et al, "Bipartite Subgraph Decomposition for Critically Sampledwavelet Filterbanks on Arbitrary Graphs," IEEE trans on SP, 2016. .. [4] A. Gadde, et al, "A probablistic interpretation of sampling theory of graph signals". ICASSP, 2015. """ N = A.size(-1) # compute_sigma consists of laplace matrix which prefers "coo" A = A.to_scipy(layout="coo").astype("d") if Sigma is None: Sigma = compute_sigma(A, delta) else: assert Sigma.shape == (N, N) if level is None: chromatic = dsatur(A).n_color level = np.ceil(np.log2(chromatic)) A = A.tolil() beta = np.zeros((N, level), dtype=bool) bptG = [lil_matrix((N, N), dtype=A.dtype) for _ in range(level)] for i in range(level): if verbose: print(f"\n|----decomposition in level: {i:4d} ----|") s1, s2 = amfs1level(A, Sigma, delta, thresh_kld, priority, verbose) bt = beta[:, i] bt[s1] = 1 # set s1 True mask = bipartite_mask(bt) bptG[i][mask] = A[mask] A[mask] = 0 return bptG, beta
def sample_adj(self, adj: SparseTensor) -> SparseTensor: row, col, _ = adj.coo() deg = degree(row, num_nodes=adj.size(0)) prob = (self.max_sample * (1. / deg))[row] mask = torch.rand_like(prob) < prob return adj.masked_select_nnz(mask, layout='coo')