def test_graph_store(): graph_store = MyGraphStore() edge_index = torch.LongTensor([[0, 1], [1, 2]]) adj = SparseTensor(row=edge_index[0], col=edge_index[1]) def assert_equal_tensor_tuple(expected, actual): assert len(expected) == len(actual) for i in range(len(expected)): assert torch.equal(expected[i], actual[i]) # We put all three tensor types: COO, CSR, and CSC, and we get them back # to confirm that `GraphStore` works as intended. coo = adj.coo()[:-1] csr = adj.csr()[:-1] csc = adj.csc()[-2::-1] # (row, colptr) # Put: graph_store['edge', EdgeLayout.COO] = coo graph_store['edge', 'csr'] = csr graph_store['edge', 'csc'] = csc # Get: assert_equal_tensor_tuple(coo, graph_store['edge', 'coo']) assert_equal_tensor_tuple(csr, graph_store['edge', 'csr']) assert_equal_tensor_tuple(csc, graph_store['edge', 'csc']) # Get attrs: edge_attrs = graph_store.get_all_edge_attrs() assert len(edge_attrs) == 3 with pytest.raises(KeyError): _ = graph_store['edge_2', 'coo']
def pos_sample(self,batch): name_of_samples = self.datasetname+'_'+str(self.walk_length)+ '_'+str(self.walks_per_node)+'_' + str(self.context_size)+'_'+str(self.p)+'_'+str(self.q)+'.pickle' if os.path.exists(name_of_samples): with open(name_of_samples,'rb') as f: pos_samples = pickle.load(f) else: d = datetime.now() len_batch = len(batch) a,_ = subgraph(batch, self.data.edge_index) row,col=a row = row col = col d1 = datetime.now() adj = SparseTensor(row=row, col=col, sparse_sizes=(len_batch, len_batch)) rowptr, col, _ = adj.csr() d2 = datetime.now() start = batch.repeat(self.walks_per_node).to(self.device) rw = RW(rowptr, col, start, self.walk_length, self.p, self.q) if not isinstance(rw, torch.Tensor): rw = rw[0] walks = [] num_walks_per_rw = 1 + self.walk_length + 1 - self.context_size for j in range(num_walks_per_rw): walks.append(rw[:, j:j + self.context_size]) #теперь у нас внутри walks лежат 12 матриц размерам 10*1 pos_samples =(torch.cat(walks, dim=0))#.to(self.device) with open(name_of_samples,'wb') as f: pickle.dump(pos_samples,f) #if os.stat(name_of_samples).st_size/1000000 >90: # with open('Arxiv_15_20_10_1_1.pickle','rb') as f: # pos_samples = pickle.load(f) # print('nope') return pos_samples
def binary_out_degree(adj: SparseTensor, bunch=None): ptr, _, _ = adj.csr() ptr = ptr.cpu().numpy() deg = ptr[1:] - ptr[:-1] if bunch is not None: deg = deg[bunch] return deg
def out_degree(adj: SparseTensor, bunch=None): if bunch is None: out_deg = adj.sum(1) else: N = adj.size(0) if len(bunch) > int(0.2 * N): out_deg = adj.sum(1)[bunch] else: ptr, idx, val = adj.csr() out_deg = val.new_zeros(len(bunch)) for i, v in enumerate(bunch): out_deg[i] = val[ptr[v] : ptr[v + 1]].sum() return out_deg
class RandomWalk(): def __init__(self, edge_index, walk_length, context_size, walks_per_node=1, p=1, q=1, num_negative_samples=1, num_nodes=None, sparse=False): if random_walk is None: raise ImportError('`Node2Vec` requires `torch-cluster`.') N = maybe_num_nodes(edge_index, num_nodes) row, col = edge_index self.adj = SparseTensor(row=row, col=col, sparse_sizes=(N, N)) self.adj = self.adj.to('cpu') assert walk_length >= context_size self.walk_length = walk_length - 1 self.context_size = context_size self.walks_per_node = walks_per_node self.p = p self.q = q self.num_negative_samples = num_negative_samples def loader(self, **kwargs): return DataLoader(range(self.adj.sparse_size(0)), collate_fn=self.sample, **kwargs) def sample(self, batch): if not isinstance(batch, torch.Tensor): batch = torch.tensor(batch) batch = batch.repeat(self.walks_per_node) rowptr, col, _ = self.adj.csr() rw = random_walk(rowptr, col, batch, self.walk_length, self.p, self.q) if not isinstance(rw, torch.Tensor): rw = rw[0] walks = [] num_walks_per_rw = 1 + self.walk_length + 1 - self.context_size for j in range(num_walks_per_rw): for i in range(1, self.context_size): walks.append(rw[:, [j, j + i]]) return torch.cat(walks, dim=0)
class Node2Vec(torch.nn.Module): r"""The Node2Vec model from the `"node2vec: Scalable Feature Learning for Networks" <https://arxiv.org/abs/1607.00653>`_ paper where random walks of length :obj:`walk_length` are sampled in a given graph, and node embeddings are learned via negative sampling optimization. .. note:: For an example of using Node2Vec, see `examples/node2vec.py <https://github.com/pyg-team/pytorch_geometric/blob/master/examples/ node2vec.py>`_. Args: edge_index (LongTensor): The edge indices. embedding_dim (int): The size of each embedding vector. walk_length (int): The walk length. context_size (int): The actual context size which is considered for positive samples. This parameter increases the effective sampling rate by reusing samples across different source nodes. walks_per_node (int, optional): The number of walks to sample for each node. (default: :obj:`1`) p (float, optional): Likelihood of immediately revisiting a node in the walk. (default: :obj:`1`) q (float, optional): Control parameter to interpolate between breadth-first strategy and depth-first strategy (default: :obj:`1`) num_negative_samples (int, optional): The number of negative samples to use for each positive sample. (default: :obj:`1`) num_nodes (int, optional): The number of nodes. (default: :obj:`None`) sparse (bool, optional): If set to :obj:`True`, gradients w.r.t. to the weight matrix will be sparse. (default: :obj:`False`) """ def __init__(self, edge_index, embedding_dim, walk_length, context_size, walks_per_node=1, p=1, q=1, num_negative_samples=1, num_nodes=None, sparse=False): super().__init__() if random_walk is None: raise ImportError('`Node2Vec` requires `torch-cluster`.') N = maybe_num_nodes(edge_index, num_nodes) row, col = edge_index self.adj = SparseTensor(row=row, col=col, sparse_sizes=(N, N)) self.adj = self.adj.to('cpu') assert walk_length >= context_size self.embedding_dim = embedding_dim self.walk_length = walk_length - 1 self.context_size = context_size self.walks_per_node = walks_per_node self.p = p self.q = q self.num_negative_samples = num_negative_samples self.embedding = Embedding(N, embedding_dim, sparse=sparse) self.reset_parameters() def reset_parameters(self): self.embedding.reset_parameters() def forward(self, batch=None): """Returns the embeddings for the nodes in :obj:`batch`.""" emb = self.embedding.weight return emb if batch is None else emb.index_select(0, batch) def loader(self, **kwargs): return DataLoader(range(self.adj.sparse_size(0)), collate_fn=self.sample, **kwargs) def pos_sample(self, batch): batch = batch.repeat(self.walks_per_node) rowptr, col, _ = self.adj.csr() rw = random_walk(rowptr, col, batch, self.walk_length, self.p, self.q) if not isinstance(rw, torch.Tensor): rw = rw[0] walks = [] num_walks_per_rw = 1 + self.walk_length + 1 - self.context_size for j in range(num_walks_per_rw): walks.append(rw[:, j:j + self.context_size]) return torch.cat(walks, dim=0) def neg_sample(self, batch): batch = batch.repeat(self.walks_per_node * self.num_negative_samples) rw = torch.randint(self.adj.sparse_size(0), (batch.size(0), self.walk_length)) rw = torch.cat([batch.view(-1, 1), rw], dim=-1) walks = [] num_walks_per_rw = 1 + self.walk_length + 1 - self.context_size for j in range(num_walks_per_rw): walks.append(rw[:, j:j + self.context_size]) return torch.cat(walks, dim=0) def sample(self, batch): if not isinstance(batch, torch.Tensor): batch = torch.tensor(batch) return self.pos_sample(batch), self.neg_sample(batch) def loss(self, pos_rw, neg_rw): r"""Computes the loss given positive and negative random walks.""" # Positive loss. start, rest = pos_rw[:, 0], pos_rw[:, 1:].contiguous() h_start = self.embedding(start).view(pos_rw.size(0), 1, self.embedding_dim) h_rest = self.embedding(rest.view(-1)).view(pos_rw.size(0), -1, self.embedding_dim) out = (h_start * h_rest).sum(dim=-1).view(-1) pos_loss = -torch.log(torch.sigmoid(out) + EPS).mean() # Negative loss. start, rest = neg_rw[:, 0], neg_rw[:, 1:].contiguous() h_start = self.embedding(start).view(neg_rw.size(0), 1, self.embedding_dim) h_rest = self.embedding(rest.view(-1)).view(neg_rw.size(0), -1, self.embedding_dim) out = (h_start * h_rest).sum(dim=-1).view(-1) neg_loss = -torch.log(1 - torch.sigmoid(out) + EPS).mean() return pos_loss + neg_loss def test(self, train_z, train_y, test_z, test_y, solver='lbfgs', multi_class='auto', *args, **kwargs): r"""Evaluates latent space quality via a logistic regression downstream task.""" from sklearn.linear_model import LogisticRegression clf = LogisticRegression(solver=solver, multi_class=multi_class, *args, **kwargs).fit(train_z.detach().cpu().numpy(), train_y.detach().cpu().numpy()) return clf.score(test_z.detach().cpu().numpy(), test_y.detach().cpu().numpy()) def __repr__(self) -> str: return (f'{self.__class__.__name__}({self.embedding.weight.size(0)}, ' f'{self.embedding.weight.size(1)})')
class ShaDowKHopSampler(torch.utils.data.DataLoader): r"""The ShaDow :math:`k`-hop sampler from the `"Deep Graph Neural Networks with Shallow Subgraph Samplers" <https://arxiv.org/abs/2012.01380>`_ paper. Given a graph in a :obj:`data` object, the sampler will create shallow, localized subgraphs. A deep GNN on this local graph then smooths the informative local signals. Args: data (torch_geometric.data.Data): The graph data object. depth (int): The depth/number of hops of the localized subgraph. num_neighbors (int): The number of neighbors to sample for each node in each hop. node_idx (LongTensor or BoolTensor, optional): The nodes that should be considered for creating mini-batches. If set to :obj:`None`, all nodes will be considered. replace (bool, optional): If set to :obj:`True`, will sample neighbors with replacement. (default: :obj:`False`) **kwargs (optional): Additional arguments of :class:`torch.utils.data.DataLoader`, such as :obj:`batch_size` or :obj:`num_workers`. """ def __init__(self, data: Data, depth: int, num_neighbors: int, node_idx: Optional[Tensor] = None, replace: bool = False, **kwargs): self.data = copy.copy(data) self.depth = depth self.num_neighbors = num_neighbors self.replace = replace if data.edge_index is not None: self.is_sparse_tensor = False row, col = data.edge_index.cpu() self.adj_t = SparseTensor(row=row, col=col, value=torch.arange(col.size(0)), sparse_sizes=(data.num_nodes, data.num_nodes)).t() else: self.is_sparse_tensor = True self.adj_t = data.adj_t.cpu() if node_idx is None: node_idx = torch.arange(self.adj_t.sparse_size(0)) elif node_idx.dtype == torch.bool: node_idx = node_idx.nonzero(as_tuple=False).view(-1) self.node_idx = node_idx super().__init__(node_idx.tolist(), collate_fn=self.__collate__, **kwargs) def __collate__(self, n_id): n_id = torch.tensor(n_id) rowptr, col, value = self.adj_t.csr() out = torch.ops.torch_sparse.ego_k_hop_sample_adj( rowptr, col, n_id, self.depth, self.num_neighbors, self.replace) rowptr, col, n_id, e_id, ptr, root_n_id = out adj_t = SparseTensor(rowptr=rowptr, col=col, value=value[e_id] if value is not None else None, sparse_sizes=(n_id.numel(), n_id.numel()), is_sorted=True) batch = Batch(batch=torch.ops.torch_sparse.ptr2ind(ptr, n_id.numel()), ptr=ptr) batch.root_n_id = root_n_id if self.is_sparse_tensor: batch.adj_t = adj_t else: row, col, e_id = adj_t.t().coo() batch.edge_index = torch.stack([row, col], dim=0) for k, v in self.data: if k in ['edge_index', 'adj_t', 'num_nodes']: continue if k == 'y' and v.size(0) == self.data.num_nodes: batch[k] = v[n_id][root_n_id] elif isinstance(v, Tensor) and v.size(0) == self.data.num_nodes: batch[k] = v[n_id] elif isinstance(v, Tensor) and v.size(0) == self.data.num_edges: batch[k] = v[e_id] else: batch[k] = v return batch