def degree_matrix(adj: SparseTensor, indeg=True): N = adj.size(-1) deg = adj.sum(0) if indeg else adj.sum(1) row = col = torch.arange(N, device=adj.device()) degs = torch.as_tensor(deg, device=adj.device()) return SparseTensor( row=row, col=col, value=degs, sparse_sizes=(N, N), is_sorted=True )
def test_get_data_size(): x = torch.randn(10, 128) row, col = torch.randint(0, 10, (2, 100), dtype=torch.long) adj_t = SparseTensor(row=row, col=col, value=None, sparse_sizes=(10, 10)) data = Data(x=x, y=x, adj_t=adj_t) data_size = get_data_size(data) assert data_size == 10 * 128 * 4 + 11 * 8 + 100 * 8
def radius(x, r=0.5, loop=False, dtype=None, device=None): N, D = x.shape batch = torch.zeros(N, dtype=torch.long) edge_index = radius_graph(x, r, batch=batch, loop=loop).to(device) edge_val = torch.ones(edge_index.shape[-1], dtype=dtype, device=device) return SparseTensor( row=edge_index[0], col=edge_index[1], value=edge_val, sparse_sizes=(N, N) )
def setup_geom(self): edge_file = self.root / self.name / 'out1_graph_edges.txt' feature_label_file = self.root / self.name / 'out1_node_feature_label.txt' self.metric = 'Accuracy' edges = edge_file.open('r').readlines()[1:] edges = torch.LongTensor([ (lambda x: [int(x[0]), int(x[1])])(edge.strip().split('\t')) for edge in edges ]) self.num_nodes = torch.max(edges).item() + 1 self.adj_t = SparseTensor(row=torch.LongTensor(edges[:, 0]), col=torch.LongTensor(edges[:, 1]), sparse_sizes=(self.num_nodes, self.num_nodes)) # self.adj_t = self.adj_t.to_symmetric() if self.make_edge_index: self.edge_index = edges.t() idx = [] x = [] y = [] xy = feature_label_file.open('r').readlines()[1:] for line in xy: node_id, feature, label = line.strip().split('\t') idx.append(int(node_id)) if self.name == 'actor': one_hot = torch.zeros(932) pos_with_ones = list(map(int, feature.split(','))) one_hot[pos_with_ones] = 1 x.append(one_hot.int().tolist()) else: x.append(list(map(int, feature.split(',')))) y.append(int(label)) _, indices = torch.sort(torch.LongTensor(idx)) self.x = torch.LongTensor(x)[indices] self.y = torch.LongTensor(y).view(-1, 1)[indices] self.num_classes = torch.max(self.y).item() + 1 idx = torch.arange(self.y.shape[0]).view(-1, 1) train_idx, val_test_idx = train_test_split(idx, test_size=0.4, stratify=self.y) val_idx, test_idx = train_test_split( val_test_idx, test_size=0.5, stratify=self.y[val_test_idx.squeeze()]) self.split_idx = { 'train': train_idx.view(-1), 'valid': val_idx.view(-1), 'test': test_idx.view(-1) } self.criterion = torch.nn.CrossEntropyLoss()
def __init__(self, data, batch_size, num_steps=1, sample_coverage=50, save_dir=None, num_workers=0, log=True): assert data.edge_index is not None assert 'node_norm' not in data assert 'edge_norm' not in data self.N = N = data.num_nodes self.E = data.num_edges self.adj = SparseTensor(row=data.edge_index[0], col=data.edge_index[1], value=data.edge_attr, sparse_sizes=(N, N)) self.data = copy.copy(data) self.data.edge_index = None self.data.edge_attr = None self.batch_size = batch_size self.num_steps = num_steps self.sample_coverage = sample_coverage self.num_workers = num_workers self.log = log self.__count__ = 0 if self.num_workers > 0: self.__sample_queue__ = Queue() self.__sample_workers__ = [] for _ in range(self.num_workers): worker = Process(target=self.__put_sample__, args=(self.__sample_queue__, )) worker.daemon = True worker.start() self.__sample_workers__.append(worker) path = osp.join(save_dir or '', self.__filename__) if save_dir is not None and osp.exists(path): # pragma: no cover self.node_norm, self.edge_norm = torch.load(path) else: self.node_norm, self.edge_norm = self.__compute_norm__() if save_dir is not None: # pragma: no cover torch.save((self.node_norm, self.edge_norm), path) if self.num_workers > 0: self.__data_queue__ = Queue() self.__data_workers__ = [] for _ in range(self.num_workers): worker = Process(target=self.__put_data__, args=(self.__data_queue__, )) worker.daemon = True worker.start() self.__data_workers__.append(worker)
def test_nn_conv(): x1 = torch.randn(4, 8) x2 = torch.randn(2, 16) edge_index = torch.tensor([[0, 1, 2, 3], [0, 0, 1, 1]]) row, col = edge_index value = torch.rand(row.size(0), 3) adj = SparseTensor(row=row, col=col, value=value, sparse_sizes=(4, 4)) nn = Seq(Lin(3, 32), ReLU(), Lin(32, 8 * 32)) conv = NNConv(8, 32, nn=nn) assert conv.__repr__() == ( 'NNConv(8, 32, aggr="add", nn=Sequential(\n' ' (0): Linear(in_features=3, out_features=32, bias=True)\n' ' (1): ReLU()\n' ' (2): Linear(in_features=32, out_features=256, bias=True)\n' '))') out = conv(x1, edge_index, value) assert out.size() == (4, 32) assert conv(x1, edge_index, value, size=(4, 4)).tolist() == out.tolist() assert conv(x1, adj.t()).tolist() == out.tolist() t = '(Tensor, Tensor, OptTensor, Size) -> Tensor' jit = torch.jit.script(conv.jittable(t)) assert jit(x1, edge_index, value).tolist() == out.tolist() assert jit(x1, edge_index, value, size=(4, 4)).tolist() == out.tolist() t = '(Tensor, SparseTensor, OptTensor, Size) -> Tensor' jit = torch.jit.script(conv.jittable(t)) assert jit(x1, adj.t()).tolist() == out.tolist() adj = adj.sparse_resize((4, 2)) conv = NNConv((8, 16), 32, nn=nn) assert conv.__repr__() == ( 'NNConv((8, 16), 32, aggr="add", nn=Sequential(\n' ' (0): Linear(in_features=3, out_features=32, bias=True)\n' ' (1): ReLU()\n' ' (2): Linear(in_features=32, out_features=256, bias=True)\n' '))') out1 = conv((x1, x2), edge_index, value) out2 = conv((x1, None), edge_index, value, (4, 2)) assert out1.size() == (2, 32) assert out2.size() == (2, 32) assert conv((x1, x2), edge_index, value, (4, 2)).tolist() == out1.tolist() assert conv((x1, x2), adj.t()).tolist() == out1.tolist() assert conv((x1, None), adj.t()).tolist() == out2.tolist() t = '(OptPairTensor, Tensor, OptTensor, Size) -> Tensor' jit = torch.jit.script(conv.jittable(t)) assert jit((x1, x2), edge_index, value).tolist() == out1.tolist() assert jit((x1, x2), edge_index, value, size=(4, 2)).tolist() == out1.tolist() assert jit((x1, None), edge_index, value, size=(4, 2)).tolist() == out2.tolist() t = '(OptPairTensor, SparseTensor, OptTensor, Size) -> Tensor' jit = torch.jit.script(conv.jittable(t)) assert jit((x1, x2), adj.t()).tolist() == out1.tolist() assert jit((x1, None), adj.t()).tolist() == out2.tolist()
def preprocess(data, preprocess="diffusion", num_propagations=10, p=None, alpha=None, use_cache=True, post_fix=""): if use_cache: try: x = torch.load(f'embeddings/{preprocess}{post_fix}.pt') print('Using cache') return x except: print( f'embeddings/{preprocess}{post_fix}.pt not found or not enough iterations! Regenerating it now' ) # Creates a new file with open(f'embeddings/{preprocess}{post_fix}.pt', 'w') as fp: pass if preprocess == "community": return community(data, post_fix) if preprocess == "spectral": return spectral(data, post_fix) print('Computing adj...') N = data.num_nodes data.edge_index = to_undirected(data.edge_index, data.num_nodes) row, col = data.edge_index adj = SparseTensor(row=row, col=col, sparse_sizes=(N, N)) adj = adj.set_diag() deg = adj.sum(dim=1).to(torch.float) deg_inv_sqrt = deg.pow(-0.5) deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0 adj = deg_inv_sqrt.view(-1, 1) * adj * deg_inv_sqrt.view(1, -1) adj = adj.to_scipy(layout='csr') sgc_dict = {} print(f'Start {preprocess} processing') if preprocess == "sgc": result = sgc(data.x.numpy(), adj, num_propagations) # if preprocess == "lp": # result = lp(adj, data.y.data, num_propagations, p = p, alpha = alpha, preprocess = preprocess) if preprocess == "diffusion": result = diffusion(data.x.numpy(), adj, num_propagations, p=p, alpha=alpha) torch.save(result, f'embeddings/{preprocess}{post_fix}.pt') return result
def from_cpx(mat): wgt = from_dlpack(mat.data.toDlpack()) rowptr = from_dlpack(mat.indptr.toDlpack()).to(torch.long) col = from_dlpack(mat.indices.toDlpack()).to(torch.long) return SparseTensor(rowptr=rowptr, col=col, value=wgt, sparse_sizes=mat.shape, is_sorted=True)
def test_my_default_arg_conv(): x = torch.randn(4, 1) edge_index = torch.tensor([[0, 1, 2, 3], [0, 0, 1, 1]]) row, col = edge_index adj = SparseTensor(row=row, col=col, sparse_sizes=(4, 4)) conv = MyDefaultArgConv() assert conv(x, edge_index).view(-1).tolist() == [0, 0, 0, 0] assert conv(x, adj.t()).view(-1).tolist() == [0, 0, 0, 0]
def test_my_conv(): x1 = torch.randn(4, 8) x2 = torch.randn(2, 16) edge_index = torch.tensor([[0, 1, 2, 3], [0, 0, 1, 1]]) row, col = edge_index value = torch.randn(row.size(0)) adj = SparseTensor(row=row, col=col, value=value, sparse_sizes=(4, 4)) conv = MyConv(8, 32) out = conv(x1, edge_index, value) assert out.size() == (4, 32) assert conv(x1, edge_index, value, (4, 4)).tolist() == out.tolist() assert conv(x1, adj.t()).tolist() == out.tolist() conv.fuse = False assert conv(x1, adj.t()).tolist() == out.tolist() conv.fuse = True t = '(Tensor, Tensor, OptTensor, Size) -> Tensor' jit = torch.jit.script(conv.jittable(t)) assert jit(x1, edge_index, value).tolist() == out.tolist() assert jit(x1, edge_index, value, (4, 4)).tolist() == out.tolist() t = '(Tensor, SparseTensor, OptTensor, Size) -> Tensor' jit = torch.jit.script(conv.jittable(t)) assert jit(x1, adj.t()).tolist() == out.tolist() jit.fuse = False assert jit(x1, adj.t()).tolist() == out.tolist() jit.fuse = True adj = adj.sparse_resize((4, 2)) conv = MyConv((8, 16), 32) out1 = conv((x1, x2), edge_index, value) out2 = conv((x1, None), edge_index, value, (4, 2)) assert out1.size() == (2, 32) assert out2.size() == (2, 32) assert conv((x1, x2), edge_index, value, (4, 2)).tolist() == out1.tolist() assert conv((x1, x2), adj.t()).tolist() == out1.tolist() assert conv((x1, None), adj.t()).tolist() == out2.tolist() conv.fuse = False assert conv((x1, x2), adj.t()).tolist() == out1.tolist() assert conv((x1, None), adj.t()).tolist() == out2.tolist() conv.fuse = True t = '(OptPairTensor, Tensor, OptTensor, Size) -> Tensor' jit = torch.jit.script(conv.jittable(t)) assert jit((x1, x2), edge_index, value).tolist() == out1.tolist() assert jit((x1, x2), edge_index, value, (4, 2)).tolist() == out1.tolist() assert jit((x1, None), edge_index, value, (4, 2)).tolist() == out2.tolist() t = '(OptPairTensor, SparseTensor, OptTensor, Size) -> Tensor' jit = torch.jit.script(conv.jittable(t)) assert jit((x1, x2), adj.t()).tolist() == out1.tolist() assert jit((x1, None), adj.t()).tolist() == out2.tolist() jit.fuse = False assert jit((x1, x2), adj.t()).tolist() == out1.tolist() assert jit((x1, None), adj.t()).tolist() == out2.tolist() jit.fuse = True
def prune(self): # start_time = time.time() self.mask = self.pruner.prune(self.edge_index) # print("--- %s seconds ---" % (time.time() - start_time)) start_time = time.time() self.adj = SparseTensor( row=self.edge_index[0,self.mask], col=self.edge_index[1,self.mask], value=torch.arange(self.E, device=self.edge_index.device)[self.mask], sparse_sizes=(self.N, self.N))
def build_batch(data: torch_geometric.data.Batch, id2graphlet: Dict[int, Subgraph], common_file=None) -> "Batch": # Check if graphlet_id == 0 exists in x because of how remap works graphlet_id_zero = (data.x == 0).any().item() # remap graphlet_ids to (0..len(data.x.unique())) remapped_graphlet_ids, mapping = renumber( data.x.numpy(), start=0 + int(graphlet_id_zero), in_place=False, preserve_zero=graphlet_id_zero) batch_graphlet_indices = torch.tensor(remapped_graphlet_ids.flatten(), dtype=torch.int64) graphlet_ids_sorted_by_new_id = ( list( map( lambda e: e[0], # get key of # (key, value) sorted by value sorted(mapping.items(), key=lambda e: e[1])))) xs = [] edge_indices = [] # create list of xs and edge_indices where # xs[i] are the features of the graphlet that was mapped to # new_id == i etc. for i, graphlet_id in enumerate(graphlet_ids_sorted_by_new_id): graphlet = id2graphlet[graphlet_id] xs.append(graphlet.x) edge_indices.append(graphlet.edge_index + i * graphlet.x.size(0)) if common_file is not None: common = np.loadtxt(str(common_file), dtype=np.int64) common = torch.tensor( list(map(lambda x: mapping.get(x, -100), common))) common = (batch_graphlet_indices == common.reshape(-1, 1)).any(0) data.estimates[~common] = 0 # Sparse matrix where each row represents a graph # and each column a graphlet where # m[graph][graphlet] == count of graphlet in graph graph_has_graphlet = SparseTensor(row=data.batch, col=batch_graphlet_indices, value=data.estimates) if graph_has_graphlet.density( ) > .75: # FIXME: update parameter if necessary graph_has_graphlet = graph_has_graphlet.to_dense() return Batch(x=torch.cat(xs, dim=0), edge_index=torch.cat(edge_indices, dim=1), graph_has_graphlet=graph_has_graphlet, graphlet_ids=graphlet_ids_sorted_by_new_id, y=data.y)
def dgl_to_pyg_graph(g): eidx = g.edges() N = g.number_of_nodes() E = g.number_of_edges() adj_t = SparseTensor(row=eidx[0], col=eidx[1], value=th.ones(E).float(), sparse_sizes=(N, N)).t() return eidx, adj_t
def test_gmm_conv(separate_gaussians): x1 = torch.randn(4, 8) x2 = torch.randn(2, 16) edge_index = torch.tensor([[0, 1, 2, 3], [0, 0, 1, 1]]) row, col = edge_index value = torch.rand(row.size(0), 3) adj = SparseTensor(row=row, col=col, value=value, sparse_sizes=(4, 4)) conv = GMMConv(8, 32, dim=3, kernel_size=25, separate_gaussians=separate_gaussians) assert conv.__repr__() == 'GMMConv(8, 32, dim=3)' out = conv(x1, edge_index, value) assert out.size() == (4, 32) assert torch.allclose(conv(x1, edge_index, value, size=(4, 4)), out) assert torch.allclose(conv(x1, adj.t()), out) if is_full_test(): t = '(Tensor, Tensor, OptTensor, Size) -> Tensor' jit = torch.jit.script(conv.jittable(t)) assert torch.allclose(jit(x1, edge_index, value), out) assert torch.allclose(jit(x1, edge_index, value, size=(4, 4)), out) t = '(Tensor, SparseTensor, OptTensor, Size) -> Tensor' jit = torch.jit.script(conv.jittable(t)) assert torch.allclose(jit(x1, adj.t()), out) adj = adj.sparse_resize((4, 2)) conv = GMMConv((8, 16), 32, dim=3, kernel_size=5, separate_gaussians=separate_gaussians) assert conv.__repr__() == 'GMMConv((8, 16), 32, dim=3)' out1 = conv((x1, x2), edge_index, value) out2 = conv((x1, None), edge_index, value, (4, 2)) assert out1.size() == (2, 32) assert out2.size() == (2, 32) assert torch.allclose(conv((x1, x2), edge_index, value, (4, 2)), out1) assert torch.allclose(conv((x1, x2), adj.t()), out1) assert torch.allclose(conv((x1, None), adj.t()), out2) if is_full_test(): t = '(OptPairTensor, Tensor, OptTensor, Size) -> Tensor' jit = torch.jit.script(conv.jittable(t)) assert torch.allclose(jit((x1, x2), edge_index, value), out1) assert torch.allclose(jit((x1, x2), edge_index, value, size=(4, 2)), out1) assert torch.allclose(jit((x1, None), edge_index, value, size=(4, 2)), out2) t = '(OptPairTensor, SparseTensor, OptTensor, Size) -> Tensor' jit = torch.jit.script(conv.jittable(t)) assert torch.allclose(jit((x1, x2), adj.t()), out1) assert torch.allclose(jit((x1, None), adj.t()), out2)
def drop_edges(mat, p=0.3): mask = torch.rand((mat.storage.row().shape[0], )) > p matr = SparseTensor( row=mat.storage.row()[mask], col=mat.storage.col()[mask], value=mat.storage.value()[mask], sparse_sizes=mat.storage.sparse_sizes(), ) return matr, mask
def __init__(self, edge_index_dict, embedding_dim, metapath, walk_length, context_size, walks_per_node=1, num_negative_samples=1, num_nodes_dict=None, sparse=False): super(MetaPath2Vec, self).__init__() if num_nodes_dict is None: num_nodes_dict = {} for keys, edge_index in edge_index_dict.items(): key = keys[0] N = int(edge_index[0].max() + 1) num_nodes_dict[key] = max(N, num_nodes_dict.get(key, N)) key = keys[-1] N = int(edge_index[1].max() + 1) num_nodes_dict[key] = max(N, num_nodes_dict.get(key, N)) adj_dict = {} for keys, edge_index in edge_index_dict.items(): sizes = (num_nodes_dict[keys[0]], num_nodes_dict[keys[-1]]) row, col = edge_index adj = SparseTensor(row=row, col=col, sparse_sizes=sizes) adj = adj.to('cpu') adj_dict[keys] = adj assert metapath[0][0] == metapath[-1][-1] assert walk_length >= context_size self.adj_dict = adj_dict self.embedding_dim = embedding_dim self.metapath = metapath self.walk_length = walk_length self.context_size = context_size self.walks_per_node = walks_per_node self.num_negative_samples = num_negative_samples self.num_nodes_dict = num_nodes_dict types = set([x[0] for x in metapath]) | set([x[-1] for x in metapath]) types = sorted(list(types)) count = 0 self.start, self.end = {}, {} for key in types: self.start[key] = count count += num_nodes_dict[key] self.end[key] = count offset = [self.start[metapath[0][0]]] offset += [self.start[keys[-1]] for keys in metapath ] * int((walk_length / len(metapath)) + 1) offset = offset[:walk_length + 1] assert len(offset) == walk_length + 1 self.offset = torch.tensor(offset) self.embedding = Embedding(count, embedding_dim, sparse=sparse) self.reset_parameters()
def forward(self, data): N = data.graph['num_nodes'] edge_index = data.graph['edge_index'] if isinstance(edge_index, torch.Tensor): row, col = edge_index A = SparseTensor(row=row, col=col, sparse_sizes=(N, N)).to_torch_sparse_coo_tensor() elif isinstance(edge_index, SparseTensor): A = edge_index.to_torch_sparse_coo_tensor() logits = self.W(A) return logits
def __init__(self, edge_index: Union[Tensor, SparseTensor], sizes: List[int], node_idx: Optional[Tensor] = None, num_nodes: Optional[int] = None, return_e_id: bool = True, transform: Callable = None, **kwargs): edge_index = edge_index.to('cpu') if 'collate_fn' in kwargs: del kwargs['collate_fn'] if 'dataset' in kwargs: del kwargs['dataset'] # Save for Pytorch Lightning... self.edge_index = edge_index self.node_idx = node_idx self.num_nodes = num_nodes self.sizes = sizes self.return_e_id = return_e_id self.transform = transform self.is_sparse_tensor = isinstance(edge_index, SparseTensor) self.__val__ = None # Obtain a *transposed* `SparseTensor` instance. if not self.is_sparse_tensor: if (num_nodes is None and node_idx is not None and node_idx.dtype == torch.bool): num_nodes = node_idx.size(0) if (num_nodes is None and node_idx is not None and node_idx.dtype == torch.long): num_nodes = max(int(edge_index.max()), int(node_idx.max())) + 1 if num_nodes is None: num_nodes = int(edge_index.max()) + 1 value = torch.arange(edge_index.size(1)) if return_e_id else None self.adj_t = SparseTensor(row=edge_index[0], col=edge_index[1], value=value, sparse_sizes=(num_nodes, num_nodes)).t() else: adj_t = edge_index if return_e_id: self.__val__ = adj_t.storage.value() value = torch.arange(adj_t.nnz()) adj_t = adj_t.set_value(value, layout='coo') self.adj_t = adj_t self.adj_t.storage.rowptr() if node_idx is None: node_idx = torch.arange(self.adj_t.sparse_size(0)) elif node_idx.dtype == torch.bool: node_idx = node_idx.nonzero(as_tuple=False).view(-1) super().__init__( node_idx.view(-1).tolist(), collate_fn=self.sample, **kwargs)
def test_shadow_k_hop_sampler(): row = torch.tensor([0, 0, 0, 1, 1, 2, 2, 2, 2, 3, 4, 4, 5, 5]) col = torch.tensor([1, 2, 3, 0, 2, 0, 1, 4, 5, 0, 2, 5, 2, 4]) edge_index = torch.stack([row, col], dim=0) edge_weight = torch.arange(row.size(0)) x = torch.randn(6, 16) y = torch.randint(3, (6, ), dtype=torch.long) data = Data(edge_index=edge_index, edge_weight=edge_weight, x=x, y=y) train_mask = torch.tensor([1, 1, 0, 0, 0, 0], dtype=torch.bool) loader = ShaDowKHopSampler(data, depth=1, num_neighbors=3, node_idx=train_mask, batch_size=2) assert len(loader) == 1 batch1 = next(iter(loader)) assert len(batch1) == 7 assert batch1.batch.tolist() == [0, 0, 0, 0, 1, 1, 1] assert batch1.ptr.tolist() == [0, 4, 7] assert batch1.root_n_id.tolist() == [0, 5] assert batch1.x.tolist() == x[torch.tensor([0, 1, 2, 3, 0, 1, 2])].tolist() assert batch1.y.tolist() == y[train_mask].tolist() row, col = batch1.edge_index assert row.tolist() == [0, 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 5, 6, 6] assert col.tolist() == [1, 2, 3, 0, 2, 0, 1, 0, 5, 6, 4, 6, 4, 5] e_id = torch.tensor([0, 1, 2, 3, 4, 5, 6, 9, 0, 1, 3, 4, 5, 6]) assert batch1.edge_weight.tolist() == edge_weight[e_id].tolist() adj_t = SparseTensor(row=edge_index[0], col=edge_index[1], value=edge_weight).t() data = Data(adj_t=adj_t, x=x, y=y) loader = ShaDowKHopSampler(data, depth=1, num_neighbors=3, node_idx=train_mask, batch_size=2) assert len(loader) == 1 batch2 = next(iter(loader)) assert len(batch2) == 6 assert batch1.batch.tolist() == batch2.batch.tolist() assert batch1.ptr.tolist() == batch2.ptr.tolist() assert batch1.root_n_id.tolist() == batch2.root_n_id.tolist() assert batch1.x.tolist() == batch2.x.tolist() assert batch1.y.tolist() == batch2.y.tolist() row, col, value = batch2.adj_t.t().coo() assert batch1.edge_index[0].tolist() == row.tolist() assert batch1.edge_index[1].tolist() == col.tolist() assert batch1.edge_weight.tolist() == value.tolist()
def test_ppf_conv(): x1 = torch.randn(4, 16) pos1 = torch.randn(4, 3) pos2 = torch.randn(2, 3) n1 = F.normalize(torch.rand(4, 3), dim=-1) n2 = F.normalize(torch.rand(2, 3), dim=-1) edge_index = torch.tensor([[0, 1, 2, 3], [0, 0, 1, 1]]) row, col = edge_index adj = SparseTensor(row=row, col=col, sparse_sizes=(4, 4)) local_nn = Seq(Lin(16 + 4, 32), ReLU(), Lin(32, 32)) global_nn = Seq(Lin(32, 32)) conv = PPFConv(local_nn, global_nn) assert conv.__repr__() == ( 'PPFConv(local_nn=Sequential(\n' ' (0): Linear(in_features=20, out_features=32, bias=True)\n' ' (1): ReLU()\n' ' (2): Linear(in_features=32, out_features=32, bias=True)\n' '), global_nn=Sequential(\n' ' (0): Linear(in_features=32, out_features=32, bias=True)\n' '))') out = conv(x1, pos1, n1, edge_index) assert out.size() == (4, 32) assert torch.allclose(conv(x1, pos1, n1, adj.t()), out, atol=1e-6) t = '(OptTensor, Tensor, Tensor, Tensor) -> Tensor' jit = torch.jit.script(conv.jittable(t)) assert jit(x1, pos1, n1, edge_index).tolist() == out.tolist() t = '(OptTensor, Tensor, Tensor, SparseTensor) -> Tensor' jit = torch.jit.script(conv.jittable(t)) assert torch.allclose(jit(x1, pos1, n1, adj.t()), out, atol=1e-6) adj = adj.sparse_resize((4, 2)) out = conv(x1, (pos1, pos2), (n1, n2), edge_index) assert out.size() == (2, 32) assert conv((x1, None), (pos1, pos2), (n1, n2), edge_index).tolist() == out.tolist() assert torch.allclose(conv(x1, (pos1, pos2), (n1, n2), adj.t()), out, atol=1e-6) assert torch.allclose(conv((x1, None), (pos1, pos2), (n1, n2), adj.t()), out, atol=1e-6) t = '(PairOptTensor, PairTensor, PairTensor, Tensor) -> Tensor' jit = torch.jit.script(conv.jittable(t)) assert jit((x1, None), (pos1, pos2), (n1, n2), edge_index).tolist() == out.tolist() t = '(PairOptTensor, PairTensor, PairTensor, SparseTensor) -> Tensor' jit = torch.jit.script(conv.jittable(t)) assert torch.allclose(jit((x1, None), (pos1, pos2), (n1, n2), adj.t()), out, atol=1e-6)
def test_message_passing_with_aggr_module(aggr_module): x = torch.randn(4, 8) edge_index = torch.tensor([[0, 1, 2, 3], [0, 0, 1, 1]]) row, col = edge_index adj = SparseTensor(row=row, col=col, sparse_sizes=(4, 4)) conv = MyAggregatorConv(aggr=aggr_module) assert isinstance(conv.aggr_module, aggr.Aggregation) out = conv(x, edge_index) assert out.size(0) == 4 and out.size(1) in {8, 16} assert torch.allclose(conv(x, adj.t()), out)
def get_sparse_buffer(module, name): row = getattr(module, "{}_row".format(name)) col = getattr(module, "{}_col".format(name)) val = getattr(module, "{}_val".format(name)) siz = getattr(module, "{}_size".format(name)) return SparseTensor( row=row, col=col, value=val, sparse_sizes=siz.tolist(), )
def process_adj(data): N = data.num_nodes data.edge_index = to_undirected(data.edge_index, data.num_nodes) row, col = data.edge_index adj = SparseTensor(row=row, col=col, sparse_sizes=(N, N)) deg = adj.sum(dim=1).to(torch.float) deg_inv_sqrt = deg.pow(-0.5) deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0 return adj, deg_inv_sqrt
def edge_tensor_type_to_adj_type( attr: EdgeAttr, tensor_tuple: EdgeTensorType, ) -> Adj: r"""Converts an EdgeTensorType tensor tuple to a PyG Adj tensor.""" src, dst = tensor_tuple if attr.layout == EdgeLayout.COO: # COO: (row, col) assert src.dim() == 1 and dst.dim() == 1 and src.numel() == dst.numel() if src.numel() == 0: return torch.empty((2, 0), dtype=torch.long, device=src.device) if (src[0].storage().data_ptr() == dst[1].storage().data_ptr() and src.storage_offset() < dst.storage_offset()): # Do not copy if the tensor tuple is constructed from the same # storage (instead, return a view): out = torch.empty(0, dtype=src.dtype) out.set_(src.storage(), storage_offset=src.storage_offset(), size=(src.size()[0] + dst.size()[0], )) return out.view(2, -1) return torch.stack([src, dst], dim=0) elif attr.layout == EdgeLayout.CSR: # CSR: (rowptr, col) return SparseTensor(rowptr=src, col=dst, is_sorted=True, sparse_sizes=attr.size) elif attr.layout == EdgeLayout.CSC: # CSC: (row, colptr) # CSC is a transposed adjacency matrix, so rowptr is the compressed # column and col is the uncompressed row. sparse_sizes = None if attr.size is None else (attr.size[1], attr.size[0]) return SparseTensor(rowptr=dst, col=src, is_sorted=True, sparse_sizes=sparse_sizes) raise ValueError(f"Bad edge layout (got '{attr.layout}')")
def test_padded_index_select_runtime(): return from torch_geometric.datasets import Planetoid device = torch.device('cuda') start = torch.cuda.Event(enable_timing=True) end = torch.cuda.Event(enable_timing=True) dataset = Planetoid('/tmp/Planetoid', name='PubMed') data = dataset[0] row, col = data.edge_index.to(device) adj = SparseTensor(row=row, col=col) rowcount = adj.storage.rowcount().to(device) rowptr = adj.storage.rowptr().to(device) binptr = torch.tensor([0, 4, 11, 30, 50, 80, 120, 140, 2000]).to(device) x = torch.randn(adj.size(0), 512).to(device) data = torch.ops.torch_sparse.padded_index(rowptr, col, rowcount, binptr) node_perm, row_perm, col_perm, mask, node_sizes, edge_sizes = data out = torch.ops.torch_sparse.padded_index_select(x, col_perm, torch.tensor(0.)) outs = out.split(edge_sizes) for out, size in zip(outs, node_sizes): print(out.view(size, -1, x.size(-1)).shape) for i in range(110): if i == 10: start.record() torch.ops.torch_sparse.padded_index(rowptr, col, rowcount, binptr) end.record() torch.cuda.synchronize() print('padded index', start.elapsed_time(end)) for i in range(110): if i == 10: start.record() out = torch.ops.torch_sparse.padded_index_select( x, col_perm, torch.tensor(0.)) out.split(edge_sizes) end.record() torch.cuda.synchronize() print('padded index select', start.elapsed_time(end)) for i in range(110): if i == 10: start.record() x.index_select(0, col) end.record() torch.cuda.synchronize() print('index_select', start.elapsed_time(end))
def main(args): device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu' device = torch.device(device) path = osp.join('..', 'data', 'Reddit') dataset = Reddit(path) data = dataset[0] features = data.x.to(device) labels = data.y.to(device) edge_index = data.edge_index.to(device) adj = SparseTensor(row=edge_index[0], col=edge_index[1]) train_mask = torch.BoolTensor(data.train_mask).to(device) val_mask = torch.BoolTensor(data.val_mask).to(device) test_mask = torch.BoolTensor(data.test_mask).to(device) model = GraphSAGE(dataset.num_features, args.n_hidden, dataset.num_classes, args.aggr, F.relu, args.dropout).to(device) loss_fcn = nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) dur = [] for epoch in range(1, args.epochs + 1): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features, adj) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) if args.eval: acc = evaluate(model, adj, features, labels, val_mask) else: acc = 0 print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} ". format(epoch, np.mean(dur), loss.item(), acc)) if args.eval: print() acc = evaluate(model, adj, features, labels, test_mask) print("Test Accuracy {:.4f}".format(acc))
def test_gine_conv(): x1 = torch.randn(4, 16) x2 = torch.randn(2, 16) edge_index = torch.tensor([[0, 1, 2, 3], [0, 0, 1, 1]]) row, col = edge_index value = torch.randn(row.size(0), 16) adj = SparseTensor(row=row, col=col, value=value, sparse_sizes=(4, 4)) nn = Seq(Lin(16, 32), ReLU(), Lin(32, 32)) conv = GINEConv(nn, train_eps=True) assert conv.__repr__() == ( 'GINEConv(nn=Sequential(\n' ' (0): Linear(in_features=16, out_features=32, bias=True)\n' ' (1): ReLU()\n' ' (2): Linear(in_features=32, out_features=32, bias=True)\n' '))') out = conv(x1, edge_index, value) assert out.size() == (4, 32) assert conv(x1, edge_index, value, size=(4, 4)).tolist() == out.tolist() assert conv(x1, adj.t()).tolist() == out.tolist() if is_full_test(): t = '(Tensor, Tensor, OptTensor, Size) -> Tensor' jit = torch.jit.script(conv.jittable(t)) assert jit(x1, edge_index, value).tolist() == out.tolist() assert jit(x1, edge_index, value, size=(4, 4)).tolist() == out.tolist() t = '(Tensor, SparseTensor, OptTensor, Size) -> Tensor' jit = torch.jit.script(conv.jittable(t)) assert jit(x1, adj.t()).tolist() == out.tolist() adj = adj.sparse_resize((4, 2)) out1 = conv((x1, x2), edge_index, value) out2 = conv((x1, None), edge_index, value, (4, 2)) assert out1.size() == (2, 32) assert out2.size() == (2, 32) assert conv((x1, x2), edge_index, value, (4, 2)).tolist() == out1.tolist() assert conv((x1, x2), adj.t()).tolist() == out1.tolist() assert conv((x1, None), adj.t()).tolist() == out2.tolist() if is_full_test(): t = '(OptPairTensor, Tensor, OptTensor, Size) -> Tensor' jit = torch.jit.script(conv.jittable(t)) assert jit((x1, x2), edge_index, value).tolist() == out1.tolist() assert jit((x1, x2), edge_index, value, size=(4, 2)).tolist() == out1.tolist() assert jit((x1, None), edge_index, value, size=(4, 2)).tolist() == out2.tolist() t = '(OptPairTensor, SparseTensor, OptTensor, Size) -> Tensor' jit = torch.jit.script(conv.jittable(t)) assert jit((x1, x2), adj.t()).tolist() == out1.tolist() assert jit((x1, None), adj.t()).tolist() == out2.tolist()
def __init__(self, edge_index_dict, embedding_dim, metapath, walk_length, context_size, num_nodes_dict, types, type_accs, walks_per_node=1, num_negative_samples=1, sparse=False): super(MetaPath2Vec, self).__init__() adj_dict = {} for keys, edge_index in edge_index_dict.items(): sizes = (num_nodes_dict[keys[0]], num_nodes_dict[keys[-1]]) row, col = edge_index row, col = row - type_accs[keys[0]], col - type_accs[keys[-1]] adj = SparseTensor(row=row, col=col, sparse_sizes=sizes) adj = adj.to('cpu') adj_dict[keys] = adj assert metapath[0][0] == metapath[-1][-1] assert walk_length >= context_size self.adj_dict = adj_dict self.embedding_dim = embedding_dim self.metapath = metapath self.walk_length = walk_length self.context_size = context_size self.walks_per_node = walks_per_node self.num_negative_samples = num_negative_samples self.num_nodes_dict = num_nodes_dict count = 0 self.start, self.end = {}, {} for key in types: self.start[key] = count count += num_nodes_dict[key] self.end[key] = count offset = [self.start[metapath[0][0]]] offset += [self.start[keys[-1]] for keys in metapath ] * int((walk_length / len(metapath)) + 1) offset = offset[:walk_length + 1] assert len(offset) == walk_length + 1 self.offset = torch.tensor(offset) self.embedding = Embedding(count, embedding_dim, sparse=sparse) self.reset_parameters()
def test_point_transformer_conv(): x1 = torch.rand(4, 16) x2 = torch.randn(2, 8) pos1 = torch.rand(4, 3) pos2 = torch.randn(2, 3) edge_index = torch.tensor([[0, 1, 2, 3], [0, 0, 1, 1]]) row, col = edge_index adj = SparseTensor(row=row, col=col, sparse_sizes=(4, 4)) conv = PointTransformerConv(in_channels=16, out_channels=32) assert str(conv) == 'PointTransformerConv(16, 32)' out = conv(x1, pos1, edge_index) assert out.size() == (4, 32) assert torch.allclose(conv(x1, pos1, adj.t()), out, atol=1e-6) if is_full_test(): t = '(Tensor, Tensor, Tensor) -> Tensor' jit = torch.jit.script(conv.jittable(t)) assert jit(x1, pos1, edge_index).tolist() == out.tolist() t = '(Tensor, Tensor, SparseTensor) -> Tensor' jit = torch.jit.script(conv.jittable(t)) assert torch.allclose(jit(x1, pos1, adj.t()), out, atol=1e-6) pos_nn = Sequential(Linear(3, 16), ReLU(), Linear(16, 32)) attn_nn = Sequential(Linear(32, 32), ReLU(), Linear(32, 32)) conv = PointTransformerConv(16, 32, pos_nn, attn_nn) out = conv(x1, pos1, edge_index) assert out.size() == (4, 32) assert torch.allclose(conv(x1, pos1, adj.t()), out, atol=1e-6) conv = PointTransformerConv((16, 8), 32) adj = adj.sparse_resize((4, 2)) out = conv((x1, x2), (pos1, pos2), edge_index) assert out.size() == (2, 32) assert torch.allclose(conv((x1, x2), (pos1, pos2), adj.t()), out, atol=1e-6) if is_full_test(): t = '(PairTensor, PairTensor, Tensor) -> Tensor' jit = torch.jit.script(conv.jittable(t)) assert jit((x1, x2), (pos1, pos2), edge_index).tolist() == out.tolist() t = '(PairTensor, PairTensor, SparseTensor) -> Tensor' jit = torch.jit.script(conv.jittable(t)) assert torch.allclose(jit((x1, x2), (pos1, pos2), adj.t()), out, atol=1e-6)
def test_han_conv(): x_dict = { 'author': torch.randn(6, 16), 'paper': torch.randn(5, 12), 'term': torch.randn(4, 3) } edge1 = torch.randint(0, 6, (2, 7), dtype=torch.long) edge2 = torch.randint(0, 5, (2, 4), dtype=torch.long) edge3 = torch.randint(0, 3, (2, 5), dtype=torch.long) edge_index_dict = { ('author', 'metapath0', 'author'): edge1, ('paper', 'matapath1', 'paper'): edge2, ('paper', 'matapath2', 'paper'): edge3, } adj_t_dict = {} for edge_type, edge_index in edge_index_dict.items(): src_type, _, dst_type = edge_type adj_t_dict[edge_type] = SparseTensor( row=edge_index[0], col=edge_index[1], sparse_sizes=(x_dict[src_type].size(0), x_dict[dst_type].size(0))).t() metadata = (list(x_dict.keys()), list(edge_index_dict.keys())) in_channels = {'author': 16, 'paper': 12, 'term': 3} conv = HANConv(in_channels, 16, metadata, heads=2) assert str(conv) == 'HANConv(16, heads=2)' out_dict1 = conv(x_dict, edge_index_dict) assert len(out_dict1) == 3 assert out_dict1['author'].size() == (6, 16) assert out_dict1['paper'].size() == (5, 16) assert out_dict1['term'] is None del out_dict1['term'] del x_dict['term'] out_dict2 = conv(x_dict, adj_t_dict) assert len(out_dict1) == len(out_dict2) for node_type in out_dict1.keys(): assert torch.allclose(out_dict1[node_type], out_dict2[node_type], atol=1e-6) # non zero dropout conv = HANConv(in_channels, 16, metadata, heads=2, dropout=0.1) assert str(conv) == 'HANConv(16, heads=2)' out_dict1 = conv(x_dict, edge_index_dict) assert len(out_dict1) == 2 assert out_dict1['author'].size() == (6, 16) assert out_dict1['paper'].size() == (5, 16)