def process(self): for s, split in enumerate(['train', 'valid', 'test']): path = osp.join(self.raw_dir, '{}_graph.json').format(split) with open(path, 'r') as f: G = nx.DiGraph(json_graph.node_link_graph(json.load(f))) x = np.load(osp.join(self.raw_dir, '{}_feats.npy').format(split)) x = torch.from_numpy(x).to(torch.float) y = np.load(osp.join(self.raw_dir, '{}_labels.npy').format(split)) y = torch.from_numpy(y).to(torch.float) data_list = [] path = osp.join(self.raw_dir, '{}_graph_id.npy').format(split) idx = torch.from_numpy(np.load(path)).to(torch.long) idx = idx - idx.min() for i in range(idx.max().item() + 1): mask = idx == i G_s = G.subgraph(mask.nonzero().view(-1).tolist()) edge_index = torch.tensor(list(G_s.edges)).t().contiguous() edge_index = edge_index - edge_index.min() edge_index, _ = remove_self_loops(edge_index) data = Data(edge_index=edge_index, x=x[mask], y=y[mask]) if self.pre_filter is not None and not self.pre_filter(data): continue if self.pre_transform is not None: data = self.pre_transform(data) data_list.append(data) torch.save(self.collate(data_list), self.processed_paths[s])
def edge_index_from_dict(graph_dict, num_nodes=None): row, col = [], [] for key, value in graph_dict.items(): row.append(np.repeat(key, len(value))) col.append(value) _row = np.concatenate(np.array(row)) _col = np.concatenate(np.array(col)) edge_index = np.stack([_row, _col], axis=0) row_dom = edge_index[:, _row > _col] col_dom = edge_index[:, _col > _row][[1, 0]] edge_index = np.concatenate([row_dom, col_dom], axis=1) _row, _col = edge_index edge_index = np.stack([_row, _col], axis=0) order = np.lexsort((_col, _row)) edge_index = edge_index[:, order] edge_index = torch.tensor(edge_index, dtype=torch.long) # There may be duplicated edges and self loops in the datasets. row, col, _ = coalesce(edge_index[0], edge_index[1]) edge_index = torch.stack([row, col], dim=0) edge_index, _ = remove_self_loops(edge_index) row = torch.cat([edge_index[0], edge_index[1]]) col = torch.cat([edge_index[1], edge_index[0]]) edge_index = torch.stack([row, col]) return edge_index
def __call__(self, data): edge_index, edge_attr = data.edge_index, data.edge_attr n = data.num_nodes fill = 1e16 value = edge_index.new_full((edge_index.size(1), ), fill, dtype=torch.float) index, value = spspmm(edge_index, value, edge_index, value, n, n, n) index, value = remove_self_loops(index, value) edge_index = torch.cat([edge_index, index], dim=1) if edge_attr is None: data.edge_index, _ = coalesce(edge_index, None, n, n) else: value = value.view(-1, *[1 for _ in range(edge_attr.dim() - 1)]) value = value.expand(-1, *list(edge_attr.size())[1:]) edge_attr = torch.cat([edge_attr, value], dim=0) data.edge_index, edge_attr = coalesce(edge_index, edge_attr, n, n, op='min', fill_value=fill) edge_attr[edge_attr >= fill] = 0 data.edge_attr = edge_attr return data
def __init__(self, root, name): super(OGBNDataset, self).__init__(root) dataset = NodePropPredDataset(name, root) graph, y = dataset[0] x = torch.tensor(graph["node_feat"]) y = torch.tensor(y.squeeze()) row, col, edge_attr = coalesce(graph["edge_index"][0], graph["edge_index"][1], graph["edge_feat"]) edge_index = torch.stack([row, col], dim=0) edge_index, edge_attr = remove_self_loops(edge_index, edge_attr) row = torch.cat([edge_index[0], edge_index[1]]) col = torch.cat([edge_index[1], edge_index[0]]) edge_index = torch.stack([row, col], dim=0) if edge_attr is not None: edge_attr = torch.cat([edge_attr, edge_attr], dim=0) self.data = Graph(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y) self.data.num_nodes = graph["num_nodes"] assert self.data.num_nodes == self.data.x.shape[0] # split split_index = dataset.get_idx_split() self.data.train_mask = torch.zeros(self.data.num_nodes, dtype=torch.bool) self.data.test_mask = torch.zeros(self.data.num_nodes, dtype=torch.bool) self.data.val_mask = torch.zeros(self.data.num_nodes, dtype=torch.bool) self.data.train_mask[split_index["train"]] = True self.data.test_mask[split_index["test"]] = True self.data.val_mask[split_index["valid"]] = True self.transform = None
def preprocess(self, n_cluster): save_name = f"{self.dataset_name}-{n_cluster}.cluster" if os.path.exists(save_name): return torch.load(save_name) print("Preprocessing...") edges = self.data.edge_index edges, _ = remove_self_loops(edges) if str(edges.device) != "cpu": edges = edges.cpu() edges = edges.numpy() num_nodes = np.max(edges) + 1 adj = sp.csr_matrix((np.ones(edges.shape[1]), (edges[0], edges[1])), shape=(num_nodes, num_nodes)) indptr = adj.indptr indptr = np.split(adj.indices, indptr[1:])[:-1] _, parts = ClusteredDataset.partition_tool.part_graph(indptr, n_cluster, seed=1) division = [[] for _ in range(n_cluster)] for i, v in enumerate(parts): division[v].append(i) for k in range(len(division)): division[k] = np.array(division[k], dtype=np.int) torch.save(division, save_name) print("Graph clustering done") return division
def forward(self, x, edge_index): """""" edge_index, _ = remove_self_loops(edge_index) edge_index = add_self_loops(edge_index, num_nodes=x.size(0)) x = torch.mm(x, self.weight).view(-1, self.heads, self.out_channels) return self.propagate(edge_index, x=x, num_nodes=x.size(0))
def forward(self, x, edge_index): h = x edge_index, _ = remove_self_loops(edge_index) for layer in self.layers: h = layer(h, edge_index) # h = F.leaky_relu(h) h = F.dropout(h, p=self.dropout, training=self.training) out = torch.matmul(h, self.weight) + self.bias return out
def forward(self, x, edge_index, edge_weight=None): edge_index, _ = remove_self_loops(edge_index) edge_weight = torch.ones(edge_index.shape[1]).to(x.device) if edge_weight is None else edge_weight adj = torch.sparse_coo_tensor(edge_index, edge_weight, (x.shape[0], x.shape[0])) adj = adj.to(x.device) out = (1 + self.eps) * x + torch.spmm(adj, x) if self.apply_func is not None: out = self.apply_func(out) return out
def normalization(self, H): norm_H = [] for i in range(self.num_channels): edge, value = H[i] edge, value = remove_self_loops(edge, value) deg_row, deg_col = self.norm(edge.detach(), self.num_nodes, value.detach()) value = deg_col * value norm_H.append((edge, value)) return norm_H
def edge_index_from_dict(graph_dict, num_nodes=None): row, col = [], [] for key, value in graph_dict.items(): row += repeat(key, len(value)) col += value edge_index = torch.stack([torch.tensor(row), torch.tensor(col)], dim=0) # NOTE: There are duplicated edges and self loops in the datasets. Other # implementations do not remove them! edge_index, _ = remove_self_loops(edge_index) edge_index, _ = coalesce(edge_index, None, num_nodes, num_nodes) return edge_index
def get_adj(row, col, asymm_norm=False, set_diag=True, remove_diag=False): edge_index = torch.stack([row, col]) edge_attr = torch.ones(edge_index.shape[1]).to(edge_index.device) if set_diag: edge_index, edge_attr = add_remaining_self_loops(edge_index, edge_attr) elif remove_diag: edge_index, _ = remove_self_loops(edge_index) num_nodes = int(torch.max(edge_index)) + 1 if not asymm_norm: edge_attr = row_normalization(num_nodes, edge_index, edge_attr) else: edge_attr = symmetric_normalization(num_nodes, edge_index, edge_attr) return edge_index, edge_attr
def __call__(self, data): pos = data.pos assert not pos.is_cuda tree = scipy.spatial.cKDTree(pos) indices = tree.query_ball_tree(tree, self.r) row, col = [], [] for i, neighbors in enumerate(indices): row += repeat(i, len(neighbors)) col += neighbors edge_index = torch.tensor([row, col]) edge_index, _ = remove_self_loops(edge_index) data.edge_index = edge_index return data
def norm(edge_index, num_nodes, edge_weight, gcn=False, dtype=None): if edge_weight is None: edge_weight = torch.ones((edge_index.size(1), ), dtype=dtype, device=edge_index.device) edge_weight = edge_weight.view(-1) assert edge_weight.size(0) == edge_index.size(1) edge_index, _ = remove_self_loops(edge_index) edge_index = add_self_loops(edge_index, num_nodes) loop_weight = torch.full((num_nodes, ), 1 if gcn else 0, dtype=edge_weight.dtype, device=edge_weight.device) edge_weight = torch.cat([edge_weight, loop_weight], dim=0) row, col = edge_index deg = scatter_add(edge_weight, row, dim=0, dim_size=num_nodes) deg_inv_sqrt = deg.pow(-1) # deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0 return edge_index, deg_inv_sqrt[row] * edge_weight
def remove_self_loops(self): edge_index = torch.stack([self.row, self.col]) edge_index, self.weight = remove_self_loops(edge_index, self.weight) self.row, self.col = edge_index if indicator is True: self._to_csr()