Example #1
0
    def pyg_to_graphs(dataset,
                      verbose: bool = False,
                      fixed_split: bool = False) -> List[Graph]:
        r"""
        Transform a torch_geometric.data.Dataset object to a list of Graph object.

        Args:
            dataset: a torch_geometric.data.Dataset object.
            verbose: if print verbose warning
            fixed_split: if load fixed data split from PyG dataset

        Returns:
            list: A list of :class:`deepsnap.graph.Graph` object.
        """
        if fixed_split:
            graphs = [
                Graph.pyg_to_graph(data, verbose=verbose, fixed_split=True)
                for data in dataset
            ]
            graphs_split = [[graph] for graph in graphs[0]]
            return graphs_split
        else:
            return [
                Graph.pyg_to_graph(data, verbose=verbose) for data in dataset
            ]
Example #2
0
def concatenate_citeseer_cora(cora_pyg, citeseer_pyg):
    cora = Graph.pyg_to_graph(cora_pyg)
    citeseer = Graph.pyg_to_graph(citeseer_pyg)
    cora_g = cora.G
    citeseer_g = citeseer.G
    nx.set_node_attributes(cora_g, 'cora_node', name='node_type')
    nx.set_edge_attributes(cora_g, 'cora_edge', name='edge_type')
    nx.set_node_attributes(citeseer_g, 'citeseer_node', name='node_type')
    nx.set_edge_attributes(citeseer_g, 'citeseer_edge', name='edge_type')

    G = deepcopy(cora_g)
    num_nodes_cora = cora_g.number_of_nodes()
    num_edges_cora = cora_g.number_of_edges()
    for i, node in enumerate(citeseer_g.nodes(data=True)):
        G.add_node(node[0] + num_nodes_cora, **node[1])
        assert G.nodes[num_nodes_cora +
                       i]['node_label'] == citeseer_g.nodes[i]['node_label']
        assert G.nodes[num_nodes_cora +
                       i]['node_type'] == citeseer_g.nodes[i]['node_type']
    assert G.number_of_nodes(
    ) == cora_g.number_of_nodes() + citeseer_g.number_of_nodes()

    for i, edge in enumerate(citeseer_g.edges(data=True)):
        u = edge[0] + num_nodes_cora
        v = edge[1] + num_nodes_cora
        G.add_edge(u, v, **edge[2])
        assert G.edges[(u, v)]['edge_type'] == citeseer_g.edges[(
            edge[0], edge[1])]['edge_type']
    assert G.number_of_edges(
    ) == cora_g.number_of_edges() + citeseer_g.number_of_edges()
    return G
Example #3
0
    def test_pyg_to_graph_global(self):
        import deepsnap
        deepsnap.use(nx)

        pyg_dataset = Planetoid('./planetoid', "Cora")
        pyg_data = pyg_dataset[0]
        graph = Graph.pyg_to_graph(pyg_data)
        self.assertTrue(isinstance(graph.G, nx.Graph))

        deepsnap.use(sx)
        graph = Graph.pyg_to_graph(pyg_data)
        self.assertTrue(isinstance(graph.G, sx.Graph))
Example #4
0
    def test_ensemble_generator(self):
        pyg_dataset = Planetoid("./cora", "Cora")
        dg = Graph.pyg_to_graph(pyg_dataset[0])

        num_nodes = 500
        sizes = [2, 3]

        class NeighborGenerator1(Generator):
            def __len__(self):
                return sizes

            def generate(self):
                graph = Graph(gen_graph(num_nodes, dg.G))
                return graph

        class NeighborGenerator2(Generator):
            def __len__(self):
                return sizes

            def generate(self):
                graph = Graph(gen_graph(num_nodes, dg.G))
                return graph

        ensemble_generator = (
            EnsembleGenerator(
                [
                    NeighborGenerator1(sizes),
                    NeighborGenerator2(sizes),
                ]
            )
        )
        dataset = GraphDataset(None, generator=ensemble_generator)
        self.assertTrue(dataset[0].node_feature.shape[0] == num_nodes)
Example #5
0
    def test_split(self):
        pyg_dataset = Planetoid("./cora", "Cora")
        dg = Graph.pyg_to_graph(pyg_dataset[0])

        dg_node = dg.split()
        dg_num_nodes = dg.num_nodes
        node_0 = int(0.8 * dg_num_nodes)
        node_1 = int(0.1 * dg_num_nodes)
        node_2 = dg_num_nodes - node_0 - node_1
        self.assertEqual(dg_node[0].node_label_index.shape[0], node_0)
        self.assertEqual(dg_node[1].node_label_index.shape[0], node_1)
        self.assertEqual(dg_node[2].node_label_index.shape[0], node_2)

        for split_ratio in [[0.1, 0.4, 0.5], [0.4, 0.3, 0.3], [0.7, 0.2, 0.1]]:
            dg_link_custom = (dg.split(task="link_pred",
                                       split_ratio=split_ratio))
            dg_num_edges = dg.num_edges
            edge_0 = 2 * int(split_ratio[0] * dg_num_edges)
            edge_1 = 2 * int(split_ratio[1] * dg_num_edges)
            edge_2 = 2 * (dg_num_edges - int(split_ratio[0] * dg_num_edges) -
                          int(split_ratio[1] * dg_num_edges))
            self.assertEqual(
                dg_link_custom[0].edge_label_index.shape[1],
                edge_0,
            )
            self.assertEqual(
                dg_link_custom[1].edge_label_index.shape[1],
                edge_1,
            )
            self.assertEqual(
                dg_link_custom[2].edge_label_index.shape[1],
                edge_2,
            )
Example #6
0
    def test_split(self):
        pyg_dataset = Planetoid('./cora', 'Cora')
        dg = Graph.pyg_to_graph(pyg_dataset[0])

        dg_node = dg.split()
        dg_num_nodes_reduced = dg.num_nodes - 3
        self.assertEqual(
            dg_node[0].node_label_index.shape[0], 1 + int(dg_num_nodes_reduced * 0.8))
        self.assertEqual(
            dg_node[1].node_label_index.shape[0], 1 + int(dg_num_nodes_reduced * 0.1))
        self.assertEqual(
            dg_node[2].node_label_index.shape[0], dg.num_nodes - 2 -
            int(dg_num_nodes_reduced * 0.8) - int(dg_num_nodes_reduced * 0.1))

        dg_edge = dg.split(task='edge')
        dg_num_edges_reduced = dg.num_edges - 3
        edge_0 = 2 * (1 + int(dg_num_edges_reduced * 0.8))
        edge_1 = 2 * (1 + int(dg_num_edges_reduced * 0.1))
        edge_2 = dg.num_edges * 2 - edge_0 - edge_1
        self.assertEqual(dg_edge[0].edge_label_index.shape[1], edge_0)
        self.assertEqual(dg_edge[1].edge_label_index.shape[1], edge_1)
        self.assertEqual(dg_edge[2].edge_label_index.shape[1], edge_2)

        dg_link = dg.split(task='link_pred')
        dg_num_edges_reduced = dg.num_edges - 3
        edge_0 = 2 * (1 + int(dg_num_edges_reduced * 0.8))
        edge_1 = 2 * (1 + int(dg_num_edges_reduced * 0.1))
        edge_2 = dg.num_edges * 2 - edge_0 - edge_1
        self.assertEqual(dg_link[0].edge_label_index.shape[1], edge_0)
        self.assertEqual(dg_link[1].edge_label_index.shape[1], edge_1)
        self.assertEqual(dg_link[2].edge_label_index.shape[1], edge_2)

        for message_ratio in [0.1, 0.2, 0.4, 0.8]:
            dg_link_resample = dg_link[0].clone().\
                resample_disjoint(message_ratio=message_ratio)
            positive_edge_num = \
                int(0.5 * dg_link[0].clone().edge_label_index.shape[1])
            self.assertEqual(dg_link_resample.edge_label_index.shape[1],
                             2 * (positive_edge_num - 1 - int(message_ratio * (positive_edge_num - 2))))

        for split_ratio in [[0.1, 0.4, 0.5], [0.4, 0.3, 0.3], [0.7, 0.2, 0.1]]:
            dg_link_custom = \
                dg.split(task='link_pred', split_ratio=split_ratio)
            dg_num_edges_reduced = dg.num_edges - 3
            edge_0 = 2 * (1 + int(dg_num_edges_reduced * split_ratio[0]))
            self.assertEqual(dg_link_custom[0].
                             edge_label_index.shape[1], edge_0)
            edge_1 = \
                (1 + int(split_ratio[0] * dg_num_edges_reduced) + 1 + int(split_ratio[1] * dg_num_edges_reduced)) * 2 - edge_0
            self.assertEqual(dg_link_custom[1].edge_label_index.shape[1],
                             edge_1)
            edge_2 = dg.num_edges * 2 - edge_0 - edge_1
            self.assertEqual(dg_link_custom[2].edge_label_index.shape[1],
                             edge_2)
Example #7
0
 def test_pyg_to_graph(self):
     pyg_dataset = Planetoid('./cora', 'Cora')
     dg = Graph.pyg_to_graph(pyg_dataset[0])
     pyg_data = pyg_dataset[0]
     self.assertEqual(pyg_data.num_nodes, dg.num_nodes)
     self.assertEqual(pyg_data.is_directed(), dg.is_directed())
     self.assertEqual(pyg_data.num_edges / 2, dg.num_edges)
     self.assertTrue(dg.num_node_features == pyg_data.x.shape[1])
     self.assertTrue(dg.num_node_labels == torch.max(pyg_data.y).item() + 1)
     self.assertTrue(dg.edge_index.shape == pyg_data.edge_index.shape)
     keys = ['G', 'node_feature', 'node_label', 'edge_index',
             'edge_label_index', 'node_label_index']
     self.assertTrue(tuple(dg.keys) == tuple(keys))
Example #8
0
    def test_pyg_to_graph(self):
        pyg_dataset = Planetoid("./cora", "Cora")

        dg = Graph.pyg_to_graph(pyg_dataset[0])
        pyg_data = pyg_dataset[0]
        self.assertEqual(pyg_data.num_nodes, dg.num_nodes)
        self.assertEqual(pyg_data.is_directed(), dg.is_directed())
        self.assertEqual(pyg_data.num_edges / 2, dg.num_edges)
        self.assertTrue(dg.num_node_features == pyg_data.x.shape[1])
        self.assertTrue(dg.num_node_labels == torch.max(pyg_data.y).item() + 1)
        self.assertTrue(dg.edge_index.shape == pyg_data.edge_index.shape)
        keys = [
            "G", "node_feature", "node_label", "edge_index",
            "edge_label_index", "node_label_index", "is_train"
        ]
        self.assertTrue(tuple(dg.keys) == tuple(keys))
Example #9
0
    def test_generator(self):
        pyg_dataset = Planetoid('./cora', 'Cora')
        dg = Graph.pyg_to_graph(pyg_dataset[0])

        num_nodes = 500
        sizes = [2, 3]

        class NeighborGenerator(Generator):
            def __len__(self):
                return sizes

            def generate(self):
                graph = Graph(gen_graph(num_nodes, dg.G))
                return graph

        dataset = GraphDataset(None, generator=NeighborGenerator(sizes))
        self.assertTrue(dataset[0].node_feature.shape[0] == num_nodes)