def __init__(self, num_nodes, edges, node_types=None, node_feat=None, edge_feat=None): self._g = hpg.HeterGraph(num_nodes, edges, node_types, node_feat, edge_feat) self._multi_graph = self._g._multi_graph
def build_graph(self): """Build pgl heterogeneous graph. """ self.conf_id2index, self.conf_name2index, conf_node_type = self.remapping_id( self.config['data_path'] + 'id_conf.txt', start_index=0, node_type='conf') logging.info('%d venues have been loaded.' % (len(self.conf_id2index))) self.author_id2index, self.author_name2index, author_node_type = self.remapping_id( self.config['data_path'] + 'id_author.txt', start_index=len(self.conf_id2index), node_type='author') logging.info('%d authors have been loaded.' % (len(self.author_id2index))) self.paper_id2index, self.paper_name2index, paper_node_type = self.remapping_id( self.config['data_path'] + 'paper.txt', start_index=(len(self.conf_id2index) + len(self.author_id2index)), node_type='paper', separator='\t') logging.info('%d papers have been loaded.' % (len(self.paper_id2index))) node_types = conf_node_type + author_node_type + paper_node_type num_nodes = len(node_types) edges_by_types = {} paper_author_edges = self.load_edges( self.config['data_path'] + 'paper_author.txt', self.paper_id2index, self.author_id2index) paper_conf_edges = self.load_edges( self.config['data_path'] + 'paper_conf.txt', self.paper_id2index, self.conf_id2index) # edges_by_types['edge'] = paper_author_edges + paper_conf_edges edges_by_types['p2c'] = paper_conf_edges edges_by_types['c2p'] = [(dst, src) for src, dst in paper_conf_edges] edges_by_types['p2a'] = paper_author_edges edges_by_types['a2p'] = [(dst, src) for src, dst in paper_author_edges] # logging.info('%d edges have been loaded.' % # (len(edges_by_types['edge']))) node_features = { 'index': np.array([i for i in range(num_nodes)]).reshape(-1, 1).astype(np.int64) } self.graph = heter_graph.HeterGraph(num_nodes=num_nodes, edges=edges_by_types, node_types=node_types, node_feat=node_features)
def build_graph(self): """Build pgl heterogeneous graph. """ edges_by_types = {} npy = self.edge_file_list[0][1] + ".npy" if os.path.exists(npy): log.info("load data from numpy file") for pair in self.edge_file_list: edges_by_types[pair[0]] = np.load(pair[1] + ".npy") else: log.info("load data from txt file") for pair in self.edge_file_list: edges_by_types[pair[0]] = self.load_edges(pair[1]) # np.save(pair[1] + ".npy", edges_by_types[pair[0]]) for e_type, edges in edges_by_types.items(): log.info(["number of %s edges: " % e_type, len(edges)]) if self.symmetry: tmp = {} for key, edges in edges_by_types.items(): n_list = key.split('2') re_key = n_list[1] + '2' + n_list[0] tmp[re_key] = edges_by_types[key][:, [1, 0]] edges_by_types.update(tmp) log.info(["finished loadding symmetry edges."]) node_types = self.load_node_types(self.node_types_file) assert len(node_types) == self.num_nodes, \ "num_nodes should be equal to the length of node_types" log.info(["number of nodes: ", len(node_types)]) node_features = { 'index': np.array([i for i in range(self.num_nodes) ]).reshape(-1, 1).astype(np.int64) } self.graph = heter_graph.HeterGraph(num_nodes=self.num_nodes, edges=edges_by_types, node_types=node_types, node_feat=node_features)
def test_dump(): np.random.seed(1) edges = {} # for test no successor edges['c2p'] = [(1, 4), (0, 5), (1, 9), (1, 8), (2, 8), (2, 5), (3, 6), (3, 7), (3, 4), (3, 8)] edges['p2c'] = [(v, u) for u, v in edges['c2p']] edges['p2a'] = [(4, 10), (4, 11), (4, 12), (4, 14), (4, 13), (6, 12), (6, 11), (6, 14), (7, 12), (7, 11), (8, 14), (9, 10)] edges['a2p'] = [(v, u) for u, v in edges['p2a']] node_types = ['c' for _ in range(4)] + ['p' for _ in range(6) ] + ['a' for _ in range(5)] node_types = [(i, t) for i, t in enumerate(node_types)] graph = heter_graph.HeterGraph(num_nodes=len(node_types), edges=edges, node_types=node_types) graph.dump("./hetergraph_mmap", outdegree=True)
def build_graph(self): """Build pgl heterogeneous graph. """ edge_data_by_type, all_edges, all_nodes = self.load_training_data( self.train_edges_file, slf_loop=self.config['slf_loop'], symmetry_edge=self.config['symmetry_edge']) num_nodes = len(all_nodes) node_features = { 'index': np.array([i for i in range(num_nodes)], dtype=np.int64).reshape(-1, 1) } self.graph = heter_graph.HeterGraph(num_nodes=num_nodes, edges=edge_data_by_type, node_types=None, node_feat=node_features) self.edge_types = sorted(self.graph.edge_types_info()) logging.info('total %d nodes are loaded' % (self.graph.num_nodes))
def setUpClass(cls): np.random.seed(1) edges = {} # for test no successor edges['c2p'] = [(1, 4), (0, 5), (1, 9), (1, 8), (2, 8), (2, 5), (3, 6), (3, 7), (3, 4), (3, 8)] edges['p2c'] = [(v, u) for u, v in edges['c2p']] edges['p2a'] = [(4, 10), (4, 11), (4, 12), (4, 14), (4, 13), (6, 12), (6, 11), (6, 14), (7, 12), (7, 11), (8, 14), (9, 10)] edges['a2p'] = [(v, u) for u, v in edges['p2a']] # for test speed # edges['c2p'] = [(0, 4), (0, 5), (1, 9), (1,8), (2,8), (2,5), (3,6), (3,7), (3,4), (3,8)] # edges['p2c'] = [(v,u) for u, v in edges['c2p']] # edges['p2a'] = [(4,10), (4,11), (4,12), (4,14), (5,13), (6,13), (6,11), (6,14), (7,12), (7,11), (8,14), (9,13)] # edges['a2p'] = [(v,u) for u, v in edges['p2a']] node_types = ['c' for _ in range(4)] + ['p' for _ in range(6) ] + ['a' for _ in range(5)] node_types = [(i, t) for i, t in enumerate(node_types)] cls.graph = heter_graph.HeterGraph( num_nodes=len(node_types), edges=edges, node_types=node_types)