Ejemplo n.º 1
0
    def test_node_types(self):
        """Test the .node_types() method"""
        G_epgm = EPGM(self.input_dir)
        graph_id = G_epgm.G["graphs"][0]["id"]

        # cora has a single 'paper' node type:
        node_types = G_epgm.node_types(graph_id)

        assert len(node_types) == 1
        assert self.node_type in node_types

        with pytest.raises(Exception):
            G_epgm.node_types("invalid_graph_id")
Ejemplo n.º 2
0
    def test_node_types(self):
        """Test the .node_types() method"""
        G_epgm = EPGM(self.input_dir)
        graph_id = G_epgm.G["graphs"][0]["id"]

        # dataset has multiple node types:
        node_types = G_epgm.node_types(graph_id)

        assert len(node_types) == 3
        assert "person" in node_types
        assert "paper" in node_types
        assert "venue" in node_types

        with pytest.raises(Exception):
            G_epgm.node_types("invalid_graph_id")
Ejemplo n.º 3
0
def load_data(path, dataset_name=None, node_type=None, target_attribute=None):
    """
    Loads the node data

     :param path: Input filename or directory where graph in EPGM format is stored
     :param node_type: For HINs, the node type to consider
     :param target_attribute: For EPGM format, the target node attribute
     :return: N x 2 numpy arrays where the first column is the node id and the second column is the node label.
    """
    if os.path.isdir(path):
        g_epgm = EPGM(path)
        graphs = g_epgm.G["graphs"]
        for g in graphs:
            if g["meta"]["label"] == dataset_name:
                g_id = g["id"]

        g_vertices = g_epgm.G["vertices"]  # retrieve all graph vertices

        if node_type is None:
            node_type = g_epgm.node_types(g_id)
            if len(node_type) == 1:
                node_type = node_type[0]
            else:
                raise Exception(
                    "Multiple node types detected in graph {}: {}.".format(
                        g_id, node_type
                    )
                )

        if target_attribute is None:
            target_attribute = g_epgm.node_attributes(g_id, node_type)
            if len(target_attribute) == 1:
                target_attribute = target_attribute[0]
            else:
                raise Exception(
                    "Multiple node attributes detected for nodes of type {} in graph {}: {}.".format(
                        node_type, g_id, target_attribute
                    )
                )

        y = np.array(
            get_nodes(
                g_vertices, node_type=node_type, target_attribute=target_attribute
            )
        )

    else:
        y_df = pd.read_csv(path, delimiter=" ", header=None, dtype=str)
        y_df.sort_values(by=[0], inplace=True)

        y = y_df.values

    return y