def pyg_to_graphs(dataset, verbose: bool = False, fixed_split: bool = False) -> List[Graph]: r""" Transform a torch_geometric.data.Dataset object to a list of Graph object. Args: dataset: a torch_geometric.data.Dataset object. verbose: if print verbose warning fixed_split: if load fixed data split from PyG dataset Returns: list: A list of :class:`deepsnap.graph.Graph` object. """ if fixed_split: graphs = [ Graph.pyg_to_graph(data, verbose=verbose, fixed_split=True) for data in dataset ] graphs_split = [[graph] for graph in graphs[0]] return graphs_split else: return [ Graph.pyg_to_graph(data, verbose=verbose) for data in dataset ]
def concatenate_citeseer_cora(cora_pyg, citeseer_pyg): cora = Graph.pyg_to_graph(cora_pyg) citeseer = Graph.pyg_to_graph(citeseer_pyg) cora_g = cora.G citeseer_g = citeseer.G nx.set_node_attributes(cora_g, 'cora_node', name='node_type') nx.set_edge_attributes(cora_g, 'cora_edge', name='edge_type') nx.set_node_attributes(citeseer_g, 'citeseer_node', name='node_type') nx.set_edge_attributes(citeseer_g, 'citeseer_edge', name='edge_type') G = deepcopy(cora_g) num_nodes_cora = cora_g.number_of_nodes() num_edges_cora = cora_g.number_of_edges() for i, node in enumerate(citeseer_g.nodes(data=True)): G.add_node(node[0] + num_nodes_cora, **node[1]) assert G.nodes[num_nodes_cora + i]['node_label'] == citeseer_g.nodes[i]['node_label'] assert G.nodes[num_nodes_cora + i]['node_type'] == citeseer_g.nodes[i]['node_type'] assert G.number_of_nodes( ) == cora_g.number_of_nodes() + citeseer_g.number_of_nodes() for i, edge in enumerate(citeseer_g.edges(data=True)): u = edge[0] + num_nodes_cora v = edge[1] + num_nodes_cora G.add_edge(u, v, **edge[2]) assert G.edges[(u, v)]['edge_type'] == citeseer_g.edges[( edge[0], edge[1])]['edge_type'] assert G.number_of_edges( ) == cora_g.number_of_edges() + citeseer_g.number_of_edges() return G
def test_pyg_to_graph_global(self): import deepsnap deepsnap.use(nx) pyg_dataset = Planetoid('./planetoid', "Cora") pyg_data = pyg_dataset[0] graph = Graph.pyg_to_graph(pyg_data) self.assertTrue(isinstance(graph.G, nx.Graph)) deepsnap.use(sx) graph = Graph.pyg_to_graph(pyg_data) self.assertTrue(isinstance(graph.G, sx.Graph))
def test_ensemble_generator(self): pyg_dataset = Planetoid("./cora", "Cora") dg = Graph.pyg_to_graph(pyg_dataset[0]) num_nodes = 500 sizes = [2, 3] class NeighborGenerator1(Generator): def __len__(self): return sizes def generate(self): graph = Graph(gen_graph(num_nodes, dg.G)) return graph class NeighborGenerator2(Generator): def __len__(self): return sizes def generate(self): graph = Graph(gen_graph(num_nodes, dg.G)) return graph ensemble_generator = ( EnsembleGenerator( [ NeighborGenerator1(sizes), NeighborGenerator2(sizes), ] ) ) dataset = GraphDataset(None, generator=ensemble_generator) self.assertTrue(dataset[0].node_feature.shape[0] == num_nodes)
def test_split(self): pyg_dataset = Planetoid("./cora", "Cora") dg = Graph.pyg_to_graph(pyg_dataset[0]) dg_node = dg.split() dg_num_nodes = dg.num_nodes node_0 = int(0.8 * dg_num_nodes) node_1 = int(0.1 * dg_num_nodes) node_2 = dg_num_nodes - node_0 - node_1 self.assertEqual(dg_node[0].node_label_index.shape[0], node_0) self.assertEqual(dg_node[1].node_label_index.shape[0], node_1) self.assertEqual(dg_node[2].node_label_index.shape[0], node_2) for split_ratio in [[0.1, 0.4, 0.5], [0.4, 0.3, 0.3], [0.7, 0.2, 0.1]]: dg_link_custom = (dg.split(task="link_pred", split_ratio=split_ratio)) dg_num_edges = dg.num_edges edge_0 = 2 * int(split_ratio[0] * dg_num_edges) edge_1 = 2 * int(split_ratio[1] * dg_num_edges) edge_2 = 2 * (dg_num_edges - int(split_ratio[0] * dg_num_edges) - int(split_ratio[1] * dg_num_edges)) self.assertEqual( dg_link_custom[0].edge_label_index.shape[1], edge_0, ) self.assertEqual( dg_link_custom[1].edge_label_index.shape[1], edge_1, ) self.assertEqual( dg_link_custom[2].edge_label_index.shape[1], edge_2, )
def test_split(self): pyg_dataset = Planetoid('./cora', 'Cora') dg = Graph.pyg_to_graph(pyg_dataset[0]) dg_node = dg.split() dg_num_nodes_reduced = dg.num_nodes - 3 self.assertEqual( dg_node[0].node_label_index.shape[0], 1 + int(dg_num_nodes_reduced * 0.8)) self.assertEqual( dg_node[1].node_label_index.shape[0], 1 + int(dg_num_nodes_reduced * 0.1)) self.assertEqual( dg_node[2].node_label_index.shape[0], dg.num_nodes - 2 - int(dg_num_nodes_reduced * 0.8) - int(dg_num_nodes_reduced * 0.1)) dg_edge = dg.split(task='edge') dg_num_edges_reduced = dg.num_edges - 3 edge_0 = 2 * (1 + int(dg_num_edges_reduced * 0.8)) edge_1 = 2 * (1 + int(dg_num_edges_reduced * 0.1)) edge_2 = dg.num_edges * 2 - edge_0 - edge_1 self.assertEqual(dg_edge[0].edge_label_index.shape[1], edge_0) self.assertEqual(dg_edge[1].edge_label_index.shape[1], edge_1) self.assertEqual(dg_edge[2].edge_label_index.shape[1], edge_2) dg_link = dg.split(task='link_pred') dg_num_edges_reduced = dg.num_edges - 3 edge_0 = 2 * (1 + int(dg_num_edges_reduced * 0.8)) edge_1 = 2 * (1 + int(dg_num_edges_reduced * 0.1)) edge_2 = dg.num_edges * 2 - edge_0 - edge_1 self.assertEqual(dg_link[0].edge_label_index.shape[1], edge_0) self.assertEqual(dg_link[1].edge_label_index.shape[1], edge_1) self.assertEqual(dg_link[2].edge_label_index.shape[1], edge_2) for message_ratio in [0.1, 0.2, 0.4, 0.8]: dg_link_resample = dg_link[0].clone().\ resample_disjoint(message_ratio=message_ratio) positive_edge_num = \ int(0.5 * dg_link[0].clone().edge_label_index.shape[1]) self.assertEqual(dg_link_resample.edge_label_index.shape[1], 2 * (positive_edge_num - 1 - int(message_ratio * (positive_edge_num - 2)))) for split_ratio in [[0.1, 0.4, 0.5], [0.4, 0.3, 0.3], [0.7, 0.2, 0.1]]: dg_link_custom = \ dg.split(task='link_pred', split_ratio=split_ratio) dg_num_edges_reduced = dg.num_edges - 3 edge_0 = 2 * (1 + int(dg_num_edges_reduced * split_ratio[0])) self.assertEqual(dg_link_custom[0]. edge_label_index.shape[1], edge_0) edge_1 = \ (1 + int(split_ratio[0] * dg_num_edges_reduced) + 1 + int(split_ratio[1] * dg_num_edges_reduced)) * 2 - edge_0 self.assertEqual(dg_link_custom[1].edge_label_index.shape[1], edge_1) edge_2 = dg.num_edges * 2 - edge_0 - edge_1 self.assertEqual(dg_link_custom[2].edge_label_index.shape[1], edge_2)
def test_pyg_to_graph(self): pyg_dataset = Planetoid('./cora', 'Cora') dg = Graph.pyg_to_graph(pyg_dataset[0]) pyg_data = pyg_dataset[0] self.assertEqual(pyg_data.num_nodes, dg.num_nodes) self.assertEqual(pyg_data.is_directed(), dg.is_directed()) self.assertEqual(pyg_data.num_edges / 2, dg.num_edges) self.assertTrue(dg.num_node_features == pyg_data.x.shape[1]) self.assertTrue(dg.num_node_labels == torch.max(pyg_data.y).item() + 1) self.assertTrue(dg.edge_index.shape == pyg_data.edge_index.shape) keys = ['G', 'node_feature', 'node_label', 'edge_index', 'edge_label_index', 'node_label_index'] self.assertTrue(tuple(dg.keys) == tuple(keys))
def test_pyg_to_graph(self): pyg_dataset = Planetoid("./cora", "Cora") dg = Graph.pyg_to_graph(pyg_dataset[0]) pyg_data = pyg_dataset[0] self.assertEqual(pyg_data.num_nodes, dg.num_nodes) self.assertEqual(pyg_data.is_directed(), dg.is_directed()) self.assertEqual(pyg_data.num_edges / 2, dg.num_edges) self.assertTrue(dg.num_node_features == pyg_data.x.shape[1]) self.assertTrue(dg.num_node_labels == torch.max(pyg_data.y).item() + 1) self.assertTrue(dg.edge_index.shape == pyg_data.edge_index.shape) keys = [ "G", "node_feature", "node_label", "edge_index", "edge_label_index", "node_label_index", "is_train" ] self.assertTrue(tuple(dg.keys) == tuple(keys))
def test_generator(self): pyg_dataset = Planetoid('./cora', 'Cora') dg = Graph.pyg_to_graph(pyg_dataset[0]) num_nodes = 500 sizes = [2, 3] class NeighborGenerator(Generator): def __len__(self): return sizes def generate(self): graph = Graph(gen_graph(num_nodes, dg.G)) return graph dataset = GraphDataset(None, generator=NeighborGenerator(sizes)) self.assertTrue(dataset[0].node_feature.shape[0] == num_nodes)