def test_node_types(self): """Test the .node_types() method""" G_epgm = EPGM(self.input_dir) graph_id = G_epgm.G["graphs"][0]["id"] # cora has a single 'paper' node type: node_types = G_epgm.node_types(graph_id) assert len(node_types) == 1 assert self.node_type in node_types with pytest.raises(Exception): G_epgm.node_types("invalid_graph_id")
def test_node_types(self): """Test the .node_types() method""" G_epgm = EPGM(self.input_dir) graph_id = G_epgm.G["graphs"][0]["id"] # dataset has multiple node types: node_types = G_epgm.node_types(graph_id) assert len(node_types) == 3 assert "person" in node_types assert "paper" in node_types assert "venue" in node_types with pytest.raises(Exception): G_epgm.node_types("invalid_graph_id")
def load_data(path, dataset_name=None, node_type=None, target_attribute=None): """ Loads the node data :param path: Input filename or directory where graph in EPGM format is stored :param node_type: For HINs, the node type to consider :param target_attribute: For EPGM format, the target node attribute :return: N x 2 numpy arrays where the first column is the node id and the second column is the node label. """ if os.path.isdir(path): g_epgm = EPGM(path) graphs = g_epgm.G["graphs"] for g in graphs: if g["meta"]["label"] == dataset_name: g_id = g["id"] g_vertices = g_epgm.G["vertices"] # retrieve all graph vertices if node_type is None: node_type = g_epgm.node_types(g_id) if len(node_type) == 1: node_type = node_type[0] else: raise Exception( "Multiple node types detected in graph {}: {}.".format( g_id, node_type ) ) if target_attribute is None: target_attribute = g_epgm.node_attributes(g_id, node_type) if len(target_attribute) == 1: target_attribute = target_attribute[0] else: raise Exception( "Multiple node attributes detected for nodes of type {} in graph {}: {}.".format( node_type, g_id, target_attribute ) ) y = np.array( get_nodes( g_vertices, node_type=node_type, target_attribute=target_attribute ) ) else: y_df = pd.read_csv(path, delimiter=" ", header=None, dtype=str) y_df.sort_values(by=[0], inplace=True) y = y_df.values return y