def __call__(self, data: HeteroData) -> HeteroData: edge_types = data.edge_types # save original edge types data.metapath_dict = {} for j, metapath in enumerate(self.metapaths): for edge_type in metapath: assert data._to_canonical( edge_type) in edge_types, f"'{edge_type}' not present" edge_type = metapath[0] adj1 = SparseTensor.from_edge_index( edge_index=data[edge_type].edge_index, sparse_sizes=data[edge_type].size()) for i, edge_type in enumerate(metapath[1:]): adj2 = SparseTensor.from_edge_index( edge_index=data[edge_type].edge_index, sparse_sizes=data[edge_type].size()) adj1 = adj1 @ adj2 row, col, _ = adj1.coo() new_edge_type = (metapath[0][0], f'metapath_{j}', metapath[-1][-1]) data[new_edge_type].edge_index = torch.vstack([row, col]) data.metapath_dict[new_edge_type] = metapath if self.drop_orig_edges: for i in edge_types: if self.keep_same_node_type and i[0] == i[-1]: continue else: del data[i] return data
def __call__(self, data: HeteroData) -> HeteroData: edge_types = data.edge_types # save original edge types data.metapath_dict = {} for j, metapath in enumerate(self.metapaths): for edge_type in metapath: assert data._to_canonical( edge_type) in edge_types, f"'{edge_type}' not present" edge_type = metapath[0] edge_weight = self._get_edge_weight(data, edge_type) adj1 = SparseTensor.from_edge_index( edge_index=data[edge_type].edge_index, sparse_sizes=data[edge_type].size(), edge_attr=edge_weight) if self.max_sample is not None: adj1 = self.sample_adj(adj1) for i, edge_type in enumerate(metapath[1:]): edge_weight = self._get_edge_weight(data, edge_type) adj2 = SparseTensor.from_edge_index( edge_index=data[edge_type].edge_index, sparse_sizes=data[edge_type].size(), edge_attr=edge_weight) adj1 = adj1 @ adj2 if self.max_sample is not None: adj1 = self.sample_adj(adj1) row, col, edge_weight = adj1.coo() new_edge_type = (metapath[0][0], f'metapath_{j}', metapath[-1][-1]) data[new_edge_type].edge_index = torch.vstack([row, col]) if self.weighted: data[new_edge_type].edge_weight = edge_weight data.metapath_dict[new_edge_type] = metapath if self.drop_orig_edges: for i in edge_types: if self.keep_same_node_type and i[0] == i[-1]: continue else: del data[i] # remove nodes not connected by any edge type. if self.drop_unconnected_nodes: new_edge_types = data.edge_types node_types = data.node_types connected_nodes = set() for i in new_edge_types: connected_nodes.add(i[0]) connected_nodes.add(i[-1]) for node in node_types: if node not in connected_nodes: del data[node] return data
def __init__(self, edge_index: torch.Tensor, sizes: List[int], node_idx: Optional[torch.Tensor] = None, num_nodes: Optional[int] = None, flow: str = "source_to_target", **kwargs): N = int(edge_index.max() + 1) if num_nodes is None else num_nodes edge_attr = torch.arange(edge_index.size(1)) adj = SparseTensor.from_edge_index(edge_index, edge_attr, (N, N), is_sorted=False) adj = adj.t() if flow == 'source_to_target' else adj self.adj = adj.to('cpu') if node_idx is None: node_idx = torch.arange(N) elif node_idx.dtype == torch.bool: node_idx = node_idx.nonzero().view(-1) self.sizes = sizes self.flow = flow assert self.flow in ['source_to_target', 'target_to_source'] super(NeighborSampler, self).__init__(node_idx.tolist(), collate_fn=self.sample, **kwargs)
def train(epoch): model.train() pbar = tqdm(total=len(train_loader)) pbar.set_description(f'Training epoch: {epoch:03d}') total_loss = total_examples = 0 for data in train_loader: data = data.to(device) optimizer.zero_grad() # Memory-efficient aggregations: adj_t = SparseTensor.from_edge_index(data.edge_index).t() out = model(data.x, adj_t)[data.train_mask] loss = F.cross_entropy(out, data.y[data.train_mask].view(-1)) loss.backward() optimizer.step() total_loss += float(loss) * int(data.train_mask.sum()) total_examples += int(data.train_mask.sum()) pbar.update(1) pbar.close() return total_loss / total_examples
def test_linkx(num_edge_layers): x = torch.randn(4, 16) edge_index = torch.tensor([[0, 1, 2], [1, 2, 3]]) edge_weight = torch.rand(edge_index.size(1)) adj2 = SparseTensor.from_edge_index(edge_index, edge_weight) adj1 = adj2.set_value(None) model = LINKX(num_nodes=4, in_channels=16, hidden_channels=32, out_channels=8, num_layers=2, num_edge_layers=num_edge_layers) assert str(model) == 'LINKX(num_nodes=4, in_channels=16, out_channels=8)' out = model(x, edge_index) assert out.size() == (4, 8) assert torch.allclose(out, model(x, adj1.t()), atol=1e-4) out = model(None, edge_index) assert out.size() == (4, 8) assert torch.allclose(out, model(None, adj1.t()), atol=1e-4) out = model(x, edge_index, edge_weight) assert out.size() == (4, 8) assert torch.allclose(out, model(x, adj2.t()), atol=1e-4) out = model(None, edge_index, edge_weight) assert out.size() == (4, 8) assert torch.allclose(out, model(None, adj2.t()), atol=1e-4)
def test_to_hetero_with_bases_and_rgcn_equal_output(): torch.manual_seed(1234) # Run `RGCN` with basis decomposition: x = torch.randn(10, 16) # 6 paper nodes, 4 author nodes adj = (torch.rand(10, 10) > 0.5) adj[6:, 6:] = False edge_index = adj.nonzero(as_tuple=False).t().contiguous() row, col = edge_index # # 0 = paper<->paper, 1 = author->paper, 2 = paper->author edge_type = torch.full((edge_index.size(1), ), -1, dtype=torch.long) edge_type[(row < 6) & (col < 6)] = 0 edge_type[(row < 6) & (col >= 6)] = 1 edge_type[(row >= 6) & (col < 6)] = 2 assert edge_type.min() == 0 num_bases = 4 conv = RGCNConv(16, 32, num_relations=3, num_bases=num_bases, aggr='add') out1 = conv(x, edge_index, edge_type) # Run `to_hetero_with_bases`: x_dict = { 'paper': x[:6], 'author': x[6:], } edge_index_dict = { ('paper', '_', 'paper'): edge_index[:, edge_type == 0], ('paper', '_', 'author'): edge_index[:, edge_type == 1] - torch.tensor([[0], [6]]), ('author', '_', 'paper'): edge_index[:, edge_type == 2] - torch.tensor([[6], [0]]), } adj_t_dict = { key: SparseTensor.from_edge_index(edge_index).t() for key, edge_index in edge_index_dict.items() } metadata = (list(x_dict.keys()), list(edge_index_dict.keys())) model = to_hetero_with_bases(RGCN(16, 32), metadata, num_bases=num_bases, debug=False) # Set model weights: for i in range(num_bases): model.conv.convs[i].lin.weight.data = conv.weight[i].data.t() model.conv.convs[i].edge_type_weight.data = conv.comp[:, i].data.t() model.lin.weight.data = conv.root.data.t() model.lin.bias.data = conv.bias.data out2 = model(x_dict, edge_index_dict) out2 = torch.cat([out2['paper'], out2['author']], dim=0) assert torch.allclose(out1, out2, atol=1e-6) out3 = model(x_dict, adj_t_dict) out3 = torch.cat([out3['paper'], out3['author']], dim=0) assert torch.allclose(out1, out3, atol=1e-6)
def test_to_hetero_and_rgcn_equal_output(): torch.manual_seed(1234) # Run `RGCN`: x = torch.randn(10, 16) # 6 paper nodes, 4 author nodes adj = (torch.rand(10, 10) > 0.5) adj[6:, 6:] = False edge_index = adj.nonzero(as_tuple=False).t().contiguous() row, col = edge_index # # 0 = paper<->paper, 1 = paper->author, 2 = author->paper edge_type = torch.full((edge_index.size(1), ), -1, dtype=torch.long) edge_type[(row < 6) & (col < 6)] = 0 edge_type[(row < 6) & (col >= 6)] = 1 edge_type[(row >= 6) & (col < 6)] = 2 assert edge_type.min() == 0 conv = RGCNConv(16, 32, num_relations=3) out1 = conv(x, edge_index, edge_type) # Run `to_hetero`: x_dict = { 'paper': x[:6], 'author': x[6:], } edge_index_dict = { ('paper', '_', 'paper'): edge_index[:, edge_type == 0], ('paper', '_', 'author'): edge_index[:, edge_type == 1] - torch.tensor([[0], [6]]), ('author', '_', 'paper'): edge_index[:, edge_type == 2] - torch.tensor([[6], [0]]), } node_types, edge_types = list(x_dict.keys()), list(edge_index_dict.keys()) adj_t_dict = { key: SparseTensor.from_edge_index(edge_index).t() for key, edge_index in edge_index_dict.items() } model = to_hetero(RGCN(16, 32), (node_types, edge_types)) # Set model weights: for i, edge_type in enumerate(edge_types): weight = model.conv['__'.join(edge_type)].lin.weight weight.data = conv.weight[i].data.t() for i, node_type in enumerate(node_types): model.lin[node_type].weight.data = conv.root.data.t() model.lin[node_type].bias.data = conv.bias.data out2 = model(x_dict, edge_index_dict) out2 = torch.cat([out2['paper'], out2['author']], dim=0) assert torch.allclose(out1, out2, atol=1e-6) out3 = model(x_dict, adj_t_dict) out3 = torch.cat([out3['paper'], out3['author']], dim=0) assert torch.allclose(out1, out3, atol=1e-6)
def test_lists_of_SparseTensors(): e1 = torch.tensor([[4, 1, 3, 2, 2, 3], [1, 3, 2, 3, 3, 2]]) e2 = torch.tensor([[0, 1, 4, 7, 2, 9], [7, 2, 2, 1, 4, 7]]) e3 = torch.tensor([[3, 5, 1, 2, 3, 3], [5, 0, 2, 1, 3, 7]]) e4 = torch.tensor([[0, 1, 9, 2, 0, 3], [1, 1, 2, 1, 3, 2]]) adj1 = SparseTensor.from_edge_index(e1, sparse_sizes=(11, 11)) adj2 = SparseTensor.from_edge_index(e2, sparse_sizes=(22, 22)) adj3 = SparseTensor.from_edge_index(e3, sparse_sizes=(12, 12)) adj4 = SparseTensor.from_edge_index(e4, sparse_sizes=(15, 15)) d1 = Data(adj_test=[adj1, adj2]) d2 = Data(adj_test=[adj3, adj4]) data_list = [d1, d2] dataset = MyTestDataset3(data_list) assert len(dataset) == 2 assert dataset[0].adj_test[0].sparse_sizes() == (11, 11) assert dataset[0].adj_test[1].sparse_sizes() == (22, 22) assert dataset[1].adj_test[0].sparse_sizes() == (12, 12) assert dataset[1].adj_test[1].sparse_sizes() == (15, 15)
def __init__(self, edge_index_dict, embedding_dim, metapath, walk_length, context_size, walks_per_node=1, num_negative_samples=1, num_nodes_dict=None, sparse=False): super(MetaPath2Vec, self).__init__() if num_nodes_dict is None: num_nodes_dict = {} for keys, edge_index in edge_index_dict.items(): key = keys[0] N = int(edge_index[0].max() + 1) num_nodes_dict[key] = max(N, num_nodes_dict.get(key, N)) key = keys[-1] N = int(edge_index[1].max() + 1) num_nodes_dict[key] = max(N, num_nodes_dict.get(key, N)) adj_dict = {} for keys, edge_index in edge_index_dict.items(): sizes = (num_nodes_dict[keys[0]], num_nodes_dict[keys[-1]]) adj = SparseTensor.from_edge_index(edge_index, sparse_sizes=sizes) adj_dict[keys] = adj self.adj_dict = adj_dict self.embedding_dim = embedding_dim self.metapath = metapath assert metapath[0][0] == metapath[-1][-1] self.walk_length = walk_length self.context_size = context_size self.walks_per_node = walks_per_node self.num_negative_samples = num_negative_samples self.num_nodes_dict = num_nodes_dict types = set([x[0] for x in metapath]) | set([x[-1] for x in metapath]) types = sorted(list(types)) count = 0 self.start, self.end = {}, {} for key in types: self.start[key] = count count += num_nodes_dict[key] self.end[key] = count offset = [self.start[metapath[0][0]]] offset += [self.start[keys[-1]] for keys in metapath ] * int((walk_length / len(metapath)) + 1) offset = offset[:walk_length + 1] assert len(offset) == walk_length + 1 self.register_buffer('offset', torch.tensor(offset)) self.embedding = Embedding(count, embedding_dim, sparse=sparse) self.reset_parameters()
def extract( self, data: Data, ) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor]: adj_t = SparseTensor.from_edge_index( data.edge_index, sparse_sizes=(data.num_nodes, data.num_nodes), ).t() n_mask = torch.eye(data.num_nodes, device=data.edge_index.device) for _ in range(self.num_hops): n_mask += adj_t @ n_mask return self.map(data, n_mask > 0)
def test_wl_conv(): x1 = torch.tensor([1, 0, 0, 1]) x2 = F.one_hot(x1).to(torch.float) edge_index = torch.tensor([[0, 1, 1, 2, 2, 3], [1, 0, 2, 1, 3, 2]]) adj_t = SparseTensor.from_edge_index(edge_index).t() conv = WLConv() assert str(conv) == 'WLConv()' out = conv(x1, edge_index) assert out.tolist() == [0, 1, 1, 0] assert conv(x2, edge_index).tolist() == out.tolist() assert conv(x1, adj_t).tolist() == out.tolist() assert conv(x2, adj_t).tolist() == out.tolist() assert conv.histogram(out).tolist() == [[2, 2]] assert torch.allclose(conv.histogram(out, norm=True), torch.tensor([[0.7071, 0.7071]]))
def forward(self, x, edge_index, edge_attr=None, batch=None): if batch is None: batch = edge_index.new_zeros(x.size(0)) num_node = x.size(0) k = F.relu(self.lin_2(x)) A = SparseTensor.from_edge_index(edge_index=edge_index, edge_attr=edge_attr, sparse_sizes=(num_node, num_node)) I = SparseTensor.eye(num_node, device=self.args.device) A_wave = fill_diag(A, 1) s = A_wave @ k score = s.squeeze() perm = topk(score, self.ratio, batch) A = self.norm(A) K_neighbor = A * k.T x_neighbor = K_neighbor @ x # ----modified deg = sum(A, dim=1) deg_inv = deg.pow_(-1) deg_inv.masked_fill_(deg_inv == float('inf'), 0.) x_neighbor = x_neighbor * deg_inv.view(1, -1).T # ---- x_self = x * k x = x_neighbor * ( 1 - self.args.combine_ratio) + x_self * self.args.combine_ratio x = x[perm] batch = batch[perm] edge_index, edge_attr = filter_adj(edge_index, edge_attr, perm, num_nodes=s.size(0)) return x, edge_index, edge_attr, batch, perm
def test(epoch): model.eval() y_true = {"train": [], "valid": [], "test": []} y_pred = {"train": [], "valid": [], "test": []} pbar = tqdm(total=len(test_loader)) pbar.set_description(f'Evaluating epoch: {epoch:03d}') for data in test_loader: data = data.to(device) # Memory-efficient aggregations adj_t = SparseTensor.from_edge_index(data.edge_index).t() out = model(data.x, adj_t).argmax(dim=-1, keepdim=True) for split in ['train', 'valid', 'test']: mask = data[f'{split}_mask'] y_true[split].append(data.y[mask].cpu()) y_pred[split].append(out[mask].cpu()) pbar.update(1) pbar.close() train_acc = evaluator.eval({ 'y_true': torch.cat(y_true['train'], dim=0), 'y_pred': torch.cat(y_pred['train'], dim=0), })['acc'] valid_acc = evaluator.eval({ 'y_true': torch.cat(y_true['valid'], dim=0), 'y_pred': torch.cat(y_pred['valid'], dim=0), })['acc'] test_acc = evaluator.eval({ 'y_true': torch.cat(y_true['test'], dim=0), 'y_pred': torch.cat(y_pred['test'], dim=0), })['acc'] return train_acc, valid_acc, test_acc
def __init__(self, edge_index: torch.Tensor, node_idx: torch.Tensor, sizes: List[int], num_nodes: Optional[int] = None, flow: str = 'source_to_target', **kwargs): N = int(edge_index.max() + 1) if num_nodes is None else num_nodes edge_attr = torch.arange(edge_index.size(1)) adj = SparseTensor.from_edge_index(edge_index, edge_attr, (N, N), is_sorted=False) self.adj = adj.t() if flow == 'source_to_target' else adj self.sizes = sizes self.flow = flow assert self.flow in ['source_to_target', 'target_to_source'] super(NeighborSampler, self).__init__(node_idx.tolist(), collate_fn=self.__gen_batch__, **kwargs)
def __init__(self, edge_index, edge_weight, embedding_dim, walk_length, context_size, walks_per_node=1, p=1, q=1, num_negative_samples=1, num_nodes=None, sparse=False): super(GuidedNode2Vec, self).__init__(edge_index, embedding_dim, walk_length, context_size, walks_per_node, p, q, num_negative_samples, num_nodes, sparse) N = maybe_num_nodes(edge_index, num_nodes) self.adj = SparseTensor.from_edge_index(edge_index, edge_attr=edge_weight, sparse_sizes=(N, N)) self.adj = self.adj.to('cpu')
def __dropout_adj__(self, sparse_adj: SparseTensor, dropout_adj_prob: float): # number of nodes N = sparse_adj.size(0) # sparse adj matrix to dense adj matrix row, col, edge_attr = sparse_adj.coo() edge_index = torch.stack([row, col], dim=0) # dropout adjacency matrix -> generalization edge_index, edge_attr = dropout_adj(edge_index, edge_attr=edge_attr, p=dropout_adj_prob, force_undirected=True, training=self.training) # because dropout removes self-loops (due to force_undirected=True), make sure to add them back again edge_index, edge_attr = add_remaining_self_loops(edge_index, edge_weight=edge_attr, fill_value=0.00, num_nodes=N) # dense adj matrix to sparse adj matrix sparse_adj = SparseTensor.from_edge_index(edge_index, edge_attr=edge_attr, sparse_sizes=(N, N)) return sparse_adj
def __call__(self, data: Data) -> Data: num_nodes = data.num_nodes edge_index, edge_weight = data.edge_index, data.edge_weight adj = SparseTensor.from_edge_index(edge_index, edge_weight, sparse_sizes=(num_nodes, num_nodes)) # Compute D^{-1} A: deg_inv = 1.0 / adj.sum(dim=1) deg_inv[deg_inv == float('inf')] = 0 adj = adj * deg_inv.view(-1, 1) out = adj row, col, value = out.coo() pe_list = [get_self_loop_attr((row, col), value, num_nodes)] for _ in range(self.walk_length - 1): out = out @ adj row, col, value = out.coo() pe_list.append(get_self_loop_attr((row, col), value, num_nodes)) pe = torch.stack(pe_list, dim=-1) data = add_node_attr(data, pe, attr_name=self.attr_name) return data
def test_batch(): torch_geometric.set_debug(True) x1 = torch.tensor([1, 2, 3], dtype=torch.float) x1_sp = SparseTensor.from_dense(x1.view(-1, 1)) e1 = torch.tensor([[0, 1, 1, 2], [1, 0, 2, 1]]) adj1 = SparseTensor.from_edge_index(e1) s1 = '1' array1 = ['1', '2'] x2 = torch.tensor([1, 2], dtype=torch.float) x2_sp = SparseTensor.from_dense(x2.view(-1, 1)) e2 = torch.tensor([[0, 1], [1, 0]]) adj2 = SparseTensor.from_edge_index(e2) s2 = '2' array2 = ['3', '4', '5'] x3 = torch.tensor([1, 2, 3, 4], dtype=torch.float) x3_sp = SparseTensor.from_dense(x3.view(-1, 1)) e3 = torch.tensor([[0, 1, 1, 2, 2, 3], [1, 0, 2, 1, 3, 2]]) adj3 = SparseTensor.from_edge_index(e3) s3 = '3' array3 = ['6', '7', '8', '9'] data1 = Data(x=x1, x_sp=x1_sp, edge_index=e1, adj=adj1, s=s1, array=array1, num_nodes=3) data2 = Data(x=x2, x_sp=x2_sp, edge_index=e2, adj=adj2, s=s2, array=array2, num_nodes=2) data3 = Data(x=x3, x_sp=x3_sp, edge_index=e3, adj=adj3, s=s3, array=array3, num_nodes=4) batch = Batch.from_data_list([data1]) assert str(batch) == ('Batch(x=[3], edge_index=[2, 4], ' 'x_sp=[3, 1, nnz=3], adj=[3, 3, nnz=4], s=[1], ' 'array=[1], num_nodes=3, batch=[3], ptr=[2])') assert batch.num_graphs == 1 assert len(batch) == 9 assert batch.x.tolist() == [1, 2, 3] assert batch.x_sp.to_dense().view(-1).tolist() == batch.x.tolist() assert batch.edge_index.tolist() == [[0, 1, 1, 2], [1, 0, 2, 1]] edge_index = torch.stack(batch.adj.coo()[:2], dim=0) assert edge_index.tolist() == batch.edge_index.tolist() assert batch.s == ['1'] assert batch.array == [['1', '2']] assert batch.num_nodes == 3 assert batch.batch.tolist() == [0, 0, 0] assert batch.ptr.tolist() == [0, 3] batch = Batch.from_data_list([data1, data2, data3], follow_batch=['s']) assert str(batch) == ('Batch(x=[9], edge_index=[2, 12], ' 'x_sp=[9, 1, nnz=9], adj=[9, 9, nnz=12], s=[3], ' 's_batch=[3], array=[3], num_nodes=9, batch=[9], ' 'ptr=[4])') assert batch.num_graphs == 3 assert len(batch) == 10 assert batch.x.tolist() == [1, 2, 3, 1, 2, 1, 2, 3, 4] assert batch.x_sp.to_dense().view(-1).tolist() == batch.x.tolist() assert batch.edge_index.tolist() == [[0, 1, 1, 2, 3, 4, 5, 6, 6, 7, 7, 8], [1, 0, 2, 1, 4, 3, 6, 5, 7, 6, 8, 7]] edge_index = torch.stack(batch.adj.coo()[:2], dim=0) assert edge_index.tolist() == batch.edge_index.tolist() assert batch.s == ['1', '2', '3'] assert batch.s_batch.tolist() == [0, 1, 2] assert batch.array == [['1', '2'], ['3', '4', '5'], ['6', '7', '8', '9']] assert batch.num_nodes == 9 assert batch.batch.tolist() == [0, 0, 0, 1, 1, 2, 2, 2, 2] assert batch.ptr.tolist() == [0, 3, 5, 9] data = batch[0] assert str(data) == ("Data(x=[3], edge_index=[2, 4], x_sp=[3, 1, nnz=3], " "adj=[3, 3, nnz=4], s='1', array=[2], num_nodes=3)") data = batch[1] assert str(data) == ("Data(x=[2], edge_index=[2, 2], x_sp=[2, 1, nnz=2], " "adj=[2, 2, nnz=2], s='2', array=[3], num_nodes=2)") data = batch[2] assert str(data) == ("Data(x=[4], edge_index=[2, 6], x_sp=[4, 1, nnz=4], " "adj=[4, 4, nnz=6], s='3', array=[4], num_nodes=4)") assert len(batch.index_select([1, 0])) == 2 assert len(batch.index_select(torch.tensor([1, 0]))) == 2 assert len(batch.index_select(torch.tensor([True, False]))) == 1 assert len(batch.index_select(np.array([1, 0], dtype=np.int64))) == 2 assert len(batch.index_select(np.array([True, False]))) == 1 assert len(batch[:2]) == 2 data_list = batch.to_data_list() assert len(data_list) == 3 assert len(data_list[0]) == 7 assert data_list[0].x.tolist() == [1, 2, 3] assert data_list[0].x_sp.to_dense().view(-1).tolist() == [1, 2, 3] assert data_list[0].edge_index.tolist() == [[0, 1, 1, 2], [1, 0, 2, 1]] edge_index = torch.stack(data_list[0].adj.coo()[:2], dim=0) assert edge_index.tolist() == data_list[0].edge_index.tolist() assert data_list[0].s == '1' assert data_list[0].array == ['1', '2'] assert data_list[0].num_nodes == 3 assert len(data_list[1]) == 7 assert data_list[1].x.tolist() == [1, 2] assert data_list[1].x_sp.to_dense().view(-1).tolist() == [1, 2] assert data_list[1].edge_index.tolist() == [[0, 1], [1, 0]] edge_index = torch.stack(data_list[1].adj.coo()[:2], dim=0) assert edge_index.tolist() == data_list[1].edge_index.tolist() assert data_list[1].s == '2' assert data_list[1].array == ['3', '4', '5'] assert data_list[1].num_nodes == 2 assert len(data_list[2]) == 7 assert data_list[2].x.tolist() == [1, 2, 3, 4] assert data_list[2].x_sp.to_dense().view(-1).tolist() == [1, 2, 3, 4] assert data_list[2].edge_index.tolist() == [[0, 1, 1, 2, 2, 3], [1, 0, 2, 1, 3, 2]] edge_index = torch.stack(data_list[2].adj.coo()[:2], dim=0) assert edge_index.tolist() == data_list[2].edge_index.tolist() assert data_list[2].s == '3' assert data_list[2].array == ['6', '7', '8', '9'] assert data_list[2].num_nodes == 4 torch_geometric.set_debug(True)
def test_graph_store_conversion(): graph_store = MyGraphStore() edge_index = get_edge_index(100, 100, 300) edge_index = sort_edge_index(edge_index, sort_by_row=False) adj = SparseTensor.from_edge_index(edge_index, sparse_sizes=(100, 100)) coo = (edge_index[0], edge_index[1]) csr = adj.csr()[:2] csc = adj.csc()[-2::-1] # Put all edge indices: graph_store.put_edge_index(edge_index=coo, edge_type=('v', '1', 'v'), layout='coo', size=(100, 100), is_sorted=True) assert graph_store.num_src_nodes(edge_type=('v', '1', 'v')) == 100 assert graph_store.num_dst_nodes(edge_type=('v', '1', 'v')) == 100 graph_store.put_edge_index(edge_index=csr, edge_type=('v', '2', 'v'), layout='csr', size=(100, 100)) assert graph_store.num_src_nodes(edge_type=('v', '2', 'v')) == 100 assert graph_store.num_dst_nodes(edge_type=('v', '2', 'v')) == 100 graph_store.put_edge_index(edge_index=csc, edge_type=('v', '3', 'v'), layout='csc', size=(100, 100)) assert graph_store.num_src_nodes(edge_type=('v', '3', 'v')) == 100 assert graph_store.num_dst_nodes(edge_type=('v', '3', 'v')) == 100 def assert_edge_index_equal(expected: torch.Tensor, actual: torch.Tensor): assert torch.equal(sort_edge_index(expected), sort_edge_index(actual)) # Convert to COO: row_dict, col_dict, perm_dict = graph_store.coo() assert len(row_dict) == len(col_dict) == len(perm_dict) == 3 for key in row_dict.keys(): actual = torch.stack((row_dict[key], col_dict[key])) assert_edge_index_equal(actual, edge_index) assert perm_dict[key] is None # Convert to CSR: rowptr_dict, col_dict, perm_dict = graph_store.csr() assert len(rowptr_dict) == len(col_dict) == len(perm_dict) == 3 for key in rowptr_dict: assert torch.equal(rowptr_dict[key], csr[0]) assert torch.equal(col_dict[key], csr[1]) if key == ('v', '1', 'v'): assert perm_dict[key] is not None # Convert to CSC: row_dict, colptr_dict, perm_dict = graph_store.csc() assert len(row_dict) == len(colptr_dict) == len(perm_dict) == 3 for key in row_dict: assert torch.equal(row_dict[key], csc[0]) assert torch.equal(colptr_dict[key], csc[1]) assert perm_dict[key] is None # Ensure that 'edge_types' parameters work as intended: def _tensor_eq(expected: List[OptTensor], actual: List[OptTensor]): for tensor_expected, tensor_actual in zip(expected, actual): if tensor_expected is None or tensor_actual is None: return tensor_actual == tensor_expected return torch.equal(tensor_expected, tensor_actual) edge_types = [('v', '1', 'v'), ('v', '2', 'v')] assert _tensor_eq( list(graph_store.coo()[0].values())[:-1], graph_store.coo(edge_types=edge_types)[0].values()) assert _tensor_eq( list(graph_store.csr()[0].values())[:-1], graph_store.csr(edge_types=edge_types)[0].values()) assert _tensor_eq( list(graph_store.csc()[0].values())[:-1], graph_store.csc(edge_types=edge_types)[0].values())
def main(): parser = argparse.ArgumentParser(description="OGBL-COLLAB (GNN)") parser.add_argument("--device", type=int, default=0) parser.add_argument("--log_steps", type=int, default=1) parser.add_argument("--use_sage", action="store_true") parser.add_argument("--use_valedges_as_input", action="store_true") parser.add_argument("--num_layers", type=int, default=3) parser.add_argument("--hidden_channels", type=int, default=256) parser.add_argument("--dropout", type=float, default=0.0) parser.add_argument("--batch_size", type=int, default=64 * 1024) parser.add_argument("--lr", type=float, default=0.001) parser.add_argument("--epochs", type=int, default=400) parser.add_argument("--eval_steps", type=int, default=1) parser.add_argument("--runs", type=int, default=1) parser.add_argument("--seed",type=int,default=1) args = parser.parse_args() print(args) device = f"cuda:{args.device}" if torch.cuda.is_available() else "cpu" device = torch.device(device) dataset = PygLinkPropPredDataset(name="ogbl-collab") data = dataset[0] edge_index = data.edge_index data.edge_weight = data.edge_weight.view(-1).to(torch.float) data = T.ToSparseTensor()(data) split_edge = dataset.get_edge_split() # Use training + validation edges for inference on test set. if args.use_valedges_as_input: val_edge_index = split_edge["valid"]["edge"].t() full_edge_index = torch.cat([edge_index, val_edge_index], dim=-1) data.full_adj_t = SparseTensor.from_edge_index(full_edge_index).t() data.full_adj_t = data.full_adj_t.to_symmetric() else: data.full_adj_t = data.adj_t data = data.to(device) if args.use_sage: model = SAGE( data.num_features, args.hidden_channels, args.hidden_channels, args.num_layers, args.dropout, ).to(device) else: model = GCN( data.num_features, args.hidden_channels, args.hidden_channels, args.num_layers, args.dropout, ).to(device) predictor = LinkPredictor( args.hidden_channels, args.hidden_channels, 1, args.num_layers, args.dropout ).to(device) evaluator = Evaluator(name="ogbl-collab") loggers = { "Hits@10": Logger(args.runs, args), "Hits@50": Logger(args.runs, args), "Hits@100": Logger(args.runs, args), } for run in tqdm(range(args.runs)): torch.manual_seed(args.seed + run) np.random.seed(args.seed+run) model.reset_parameters() predictor.reset_parameters() optimizer = torch.optim.Adam( list(model.parameters()) + list(predictor.parameters()), lr=args.lr ) for epoch in range(1, 1 + args.epochs): loss = train(model, predictor, data, split_edge, optimizer, args.batch_size) if epoch % args.eval_steps == 0: results = test( model, predictor, data, split_edge, evaluator, args.batch_size ) for key, result in results.items(): loggers[key].add_result(run, result) if epoch % args.log_steps == 0: for key, result in results.items(): train_hits, valid_hits, test_hits = result print(key) print( f"Run: {run + 1:02d}, " f"Epoch: {epoch:02d}, " f"Loss: {loss:.4f}, " f"Train: {100 * train_hits:.2f}%, " f"Valid: {100 * valid_hits:.2f}%, " f"Test: {100 * test_hits:.2f}%" ) print("---") for key in loggers.keys(): print(key) loggers[key].print_statistics(run) for key in loggers.keys(): print(key) loggers[key].print_statistics()
def test_custom_neighbor_loader(FeatureStore, GraphStore): # Initialize feature store, graph store, and reference: feature_store = FeatureStore() graph_store = GraphStore() data = HeteroData() # Set up node features: x = torch.arange(100) data['paper'].x = x feature_store.put_tensor(x, group_name='paper', attr_name='x', index=None) x = torch.arange(100, 300) data['author'].x = x feature_store.put_tensor(x, group_name='author', attr_name='x', index=None) # Set up edge indices: # COO: edge_index = get_edge_index(100, 100, 500) data['paper', 'to', 'paper'].edge_index = edge_index coo = (edge_index[0], edge_index[1]) graph_store.put_edge_index(edge_index=coo, edge_type=('paper', 'to', 'paper'), layout='coo', size=(100, 100)) # CSR: edge_index = get_edge_index(100, 200, 1000) data['paper', 'to', 'author'].edge_index = edge_index csr = SparseTensor.from_edge_index(edge_index).csr()[:2] graph_store.put_edge_index(edge_index=csr, edge_type=('paper', 'to', 'author'), layout='csr', size=(100, 200)) # CSC: edge_index = get_edge_index(200, 100, 1000) data['author', 'to', 'paper'].edge_index = edge_index csc = SparseTensor(row=edge_index[1], col=edge_index[0]).csr()[-2::-1] graph_store.put_edge_index(edge_index=csc, edge_type=('author', 'to', 'paper'), layout='csc', size=(200, 100)) # COO (sorted): edge_index = get_edge_index(200, 200, 100) edge_index = edge_index[:, edge_index[1].argsort()] data['author', 'to', 'author'].edge_index = edge_index coo = (edge_index[0], edge_index[1]) graph_store.put_edge_index(edge_index=coo, edge_type=('author', 'to', 'author'), layout='coo', size=(200, 200), is_sorted=True) # Construct neighbor loaders: loader1 = NeighborLoader(data, batch_size=20, input_nodes=('paper', range(100)), num_neighbors=[-1] * 2) loader2 = NeighborLoader((feature_store, graph_store), batch_size=20, input_nodes=('paper', range(100)), num_neighbors=[-1] * 2) assert str(loader1) == str(loader2) assert len(loader1) == len(loader2) for batch1, batch2 in zip(loader1, loader2): assert len(batch1) == len(batch2) assert batch1['paper'].batch_size == batch2['paper'].batch_size # Mapped indices of neighbors may be differently sorted: assert torch.allclose(batch1['paper'].x.sort()[0], batch2['paper'].x.sort()[0]) assert torch.allclose(batch1['author'].x.sort()[0], batch2['author'].x.sort()[0]) assert (batch1['paper', 'to', 'paper'].edge_index.size() == batch1[ 'paper', 'to', 'paper'].edge_index.size()) assert (batch1['paper', 'to', 'author'].edge_index.size() == batch1[ 'paper', 'to', 'author'].edge_index.size()) assert (batch1['author', 'to', 'paper'].edge_index.size() == batch1[ 'author', 'to', 'paper'].edge_index.size())
def __create_edge_index_for_cluster__(self, cluster_info: list, modeling_type: str = "fine"): my_key = f"{str(cluster_info)}_{modeling_type}" if my_key in self.__conn_dict__: return # no need for further processing cluster_info_list = [] custom_batch = None if modeling_type == "coarse": # first: metric count, second: component name, third: group name, fourth: node name cluster_info_list = [el[1:] for el in cluster_info] custom_batch = list(range(len(cluster_info_list))) else: custom_batch = [] for i, el in enumerate(cluster_info): # first: metric count, second: component name, third: group name, fourth: node name cluster_info_list += (el[0] * [el[1:]]) custom_batch += el[0] * [i] custom_batch = torch.tensor(custom_batch, dtype=torch.long) edge_index_list = [] edge_attr_list = [] # each tuple has following structure: (component name, group name, node name) for i, tuple_i in enumerate(cluster_info_list): for j, tuple_j in enumerate(cluster_info_list): # identity (self-loop) if i == j: edge_index_list.append((i, j)) edge_attr_list.append(self.__get_edge_type__("identity")) # system of tuple_i is hosted on tuple_j elif tuple_i[2] == tuple_j[1]: edge_index_list.append((i, j)) edge_attr_list.append(self.__get_edge_type__("guest-host")) # system of tuple_i is hosting tuple_j elif tuple_i[1] == tuple_j[2]: edge_index_list.append((i, j)) edge_attr_list.append(self.__get_edge_type__("host-guest")) # both systems are from the same group elif tuple_i[1] == tuple_j[1]: edge_index_list.append((i, j)) edge_attr_list.append(self.__get_edge_type__(tuple_j[1])) # both systems are from distinct groups, but there is communication elif tuple_j[1] in __node_relation_dict__.get(tuple_i[1], []): edge_index_list.append((i, j)) edge_attr_list.append( self.__get_edge_type__(f"{tuple_i[1]}->{tuple_j[1]}")) # both systems are from distinct groups, but there is communication elif tuple_i[1] in __node_relation_dict__.get(tuple_j[1], []): edge_index_list.append((i, j)) edge_attr_list.append( self.__get_edge_type__(f"{tuple_j[1]}->{tuple_i[1]}")) edge_index = torch.tensor(edge_index_list, dtype=torch.long).t().contiguous() edge_attr = torch.tensor(edge_attr_list, dtype=torch.double).contiguous() N = len(cluster_info_list) adj = SparseTensor.from_edge_index(edge_index, edge_attr=edge_attr, sparse_sizes=(N, N)) self.__conn_dict__[my_key] = (adj, custom_batch)
def main(): parser = argparse.ArgumentParser(description='OGBL-COLLAB (GNN)') parser.add_argument('--device', type=int, default=0) parser.add_argument('--log_steps', type=int, default=1) parser.add_argument('--use_sage', action='store_true') parser.add_argument('--use_valedges_as_input', action='store_true') parser.add_argument('--num_layers', type=int, default=3) parser.add_argument('--hidden_channels', type=int, default=256) parser.add_argument('--dropout', type=float, default=0.0) parser.add_argument('--batch_size', type=int, default=64 * 1024) parser.add_argument('--lr', type=float, default=0.001) parser.add_argument('--epochs', type=int, default=400) parser.add_argument('--eval_steps', type=int, default=1) parser.add_argument('--runs', type=int, default=10) args = parser.parse_args() print(args) device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu' device = torch.device(device) dataset = PygLinkPropPredDataset(name='ogbl-collab') data = dataset[0] edge_index = data.edge_index data.edge_weight = data.edge_weight.view(-1).to(torch.float) data = T.ToSparseTensor()(data) split_edge = dataset.get_edge_split() # Use training + validation edges for inference on test set. if args.use_valedges_as_input: val_edge_index = split_edge['valid']['edge'].t() full_edge_index = torch.cat([edge_index, val_edge_index], dim=-1) data.full_adj_t = SparseTensor.from_edge_index(full_edge_index).t() data.full_adj_t = data.full_adj_t.to_symmetric() else: data.full_adj_t = data.adj_t data = data.to(device) if args.use_sage: model = SAGE(data.num_features, args.hidden_channels, args.hidden_channels, args.num_layers, args.dropout).to(device) else: model = GCN(data.num_features, args.hidden_channels, args.hidden_channels, args.num_layers, args.dropout).to(device) predictor = LinkPredictor(args.hidden_channels, args.hidden_channels, 1, args.num_layers, args.dropout).to(device) evaluator = Evaluator(name='ogbl-collab') loggers = { 'Hits@10': Logger(args.runs, args), 'Hits@50': Logger(args.runs, args), 'Hits@100': Logger(args.runs, args), } for run in range(args.runs): model.reset_parameters() predictor.reset_parameters() optimizer = torch.optim.Adam(list(model.parameters()) + list(predictor.parameters()), lr=args.lr) for epoch in range(1, 1 + args.epochs): loss = train(model, predictor, data, split_edge, optimizer, args.batch_size) if epoch % args.eval_steps == 0: results = test(model, predictor, data, split_edge, evaluator, args.batch_size) for key, result in results.items(): loggers[key].add_result(run, result) if epoch % args.log_steps == 0: for key, result in results.items(): train_hits, valid_hits, test_hits = result print(key) print(f'Run: {run + 1:02d}, ' f'Epoch: {epoch:02d}, ' f'Loss: {loss:.4f}, ' f'Train: {100 * train_hits:.2f}%, ' f'Valid: {100 * valid_hits:.2f}%, ' f'Test: {100 * test_hits:.2f}%') print('---') for key in loggers.keys(): print(key) loggers[key].print_statistics(run) for key in loggers.keys(): print(key) loggers[key].print_statistics()