def test_hetero_partition(): g = create_random_graph(10000) g = dgl.as_heterograph(g) check_partition(g, 'metis', True) check_partition(g, 'metis', False) check_partition(g, 'random', True) check_partition(g, 'random', False)
def test_cast(): m = spsp.coo_matrix(([1, 1], ([0, 1], [1, 2])), (4, 4)) g = dgl.DGLGraph(m, readonly=True) gsrc, gdst = g.edges(order='eid') ndata = F.randn((4, 5)) edata = F.randn((2, 4)) g.ndata['x'] = ndata g.edata['y'] = edata hg = dgl.as_heterograph(g, 'A', 'AA') assert hg.ntypes == ['A'] assert hg.etypes == ['AA'] assert hg.canonical_etypes == [('A', 'AA', 'A')] assert hg.number_of_nodes() == 4 assert hg.number_of_edges() == 2 hgsrc, hgdst = hg.edges(order='eid') assert F.array_equal(gsrc, hgsrc) assert F.array_equal(gdst, hgdst) g2 = dgl.as_immutable_graph(hg) assert g2.number_of_nodes() == 4 assert g2.number_of_edges() == 2 g2src, g2dst = hg.edges(order='eid') assert F.array_equal(g2src, gsrc) assert F.array_equal(g2dst, gdst)
def test_hetero_metis_partition(): # TODO(zhengda) Metis fails to partition a small graph. g = dgl.DGLGraph(create_large_graph_index(1000), readonly=True) g = dgl.as_heterograph(g) check_metis_partition(g, 0) check_metis_partition(g, 1) check_metis_partition(g, 2) check_metis_partition_with_constraint(g)
def load(cls, filepath, device=None, faiss_gpu=None): """Restore a previous instance of this class from disk. Args ---- filepath : str path on disk to load from device : str optionally override the pytorch device faiss_gpu : str optionally override whether faiss uses gpu""" with open(f'{filepath}/initargs.pkl','rb') as pklf: (embedding_dim, feature_dim, hidden_dim, hidden_layers, dropout, agg_type, distance, torch_device, faiss_gpu_loaded, inference_batch_size, p_train, train_faiss_index) = pickle.load(pklf) if device is not None: torch_device=device if faiss_gpu is not None: faiss_gpu_loaded = faiss_gpu restored_self = cls(embedding_dim, feature_dim, hidden_dim, hidden_layers, dropout, agg_type, distance, torch_device, faiss_gpu_loaded, inference_batch_size, p_train, train_faiss_index) restored_self.G,_ = dgl.data.utils.load_graphs(f'{filepath}/dgl.bin') restored_self.G = restored_self.G[0] restored_self.G.readonly() restored_self.G = dgl.as_heterograph(restored_self.G) restored_self.node_ids = pd.read_csv(f'{filepath}/node_ids.csv') restored_self.embed = th.load(f'{filepath}/embed.torch',map_location=th.device(torch_device)) restored_self.net.load_state_dict(th.load(f'{filepath}/model_weights.torch',map_location=th.device(torch_device))) embeddings = np.load(f'{filepath}/final_embed.npy',allow_pickle=False) restored_self._embeddings = embeddings return restored_self
def check_rpc_in_subgraph(tmpdir, num_server): ip_config = open("rpc_ip_config.txt", "w") for _ in range(num_server): ip_config.write('{} 1\n'.format(get_local_usable_addr())) ip_config.close() g = CitationGraphDataset("cora")[0] g.readonly() num_parts = num_server partition_graph(g, 'test_in_subgraph', num_parts, tmpdir, num_hops=1, part_method='metis', reshuffle=False) pserver_list = [] ctx = mp.get_context('spawn') for i in range(num_server): p = ctx.Process(target=start_server, args=(i, tmpdir, num_server > 1, 'test_in_subgraph')) p.start() time.sleep(1) pserver_list.append(p) nodes = [0, 10, 99, 66, 1024, 2008] time.sleep(3) sampled_graph = start_in_subgraph_client(0, tmpdir, num_server > 1, nodes) for p in pserver_list: p.join() src, dst = sampled_graph.edges() g = dgl.as_heterograph(g) assert sampled_graph.number_of_nodes() == g.number_of_nodes() subg1 = dgl.in_subgraph(g, nodes) src1, dst1 = subg1.edges() assert np.all(np.sort(F.asnumpy(src)) == np.sort(F.asnumpy(src1))) assert np.all(np.sort(F.asnumpy(dst)) == np.sort(F.asnumpy(dst1))) eids = g.edge_ids(src, dst) assert np.array_equal(F.asnumpy(sampled_graph.edata[dgl.EID]), F.asnumpy(eids))
def set_masks(self): """Sets train, test, and relevance masks. Needs to be called once after data as been added to graph. self.train and self.evaluate automatically check if this needs to be called and will call it, but it can also be called manually. Can be called a second time manually to reroll the random generation of the train and test sets.""" self.node_ids = self.node_ids.sort_values('intID') self.labels = self.node_ids.classid.to_numpy() #is relevant mask indicates the nodes which we know the class of self.is_relevant_mask = np.logical_not(pd.isna(self.node_ids.classid).to_numpy()) #entity_mask indicates the nodes which we want to include in the faiss index self.entity_mask = np.logical_not(self.node_ids.feature_flag.to_numpy().astype(np.bool)) self.train_mask = np.random.choice( a=[False,True],size=(len(self.node_ids)),p=[1-self.p_train,self.p_train]) #test set is all nodes other than the train set unless train set is all #nodes and then test set is the same as train set. if self.p_train != 1: self.test_mask = np.logical_not(self.train_mask) else: self.test_mask = self.train_mask #do not include any node without a classid in either set self.train_mask = np.logical_and(self.train_mask,self.is_relevant_mask) self.train_mask = np.logical_and(self.train_mask,self.entity_mask) self.test_mask = np.logical_and(self.test_mask,self.is_relevant_mask) self.test_mask = np.logical_and(self.test_mask,self.entity_mask) if not self.G.is_readonly: self.embed = nn.Embedding(len(self.node_ids),self.feature_dim) self.G.readonly() self.G = dgl.as_heterograph(self.G) self.G.ndata['features'] = self.embed.weight self.features = self.embed.weight self.features.to(self.device) self.embed.to(self.device) self._masks_set = True
def load_graph_data(self,filepath): """Restore graph data from disk, but not network parameters or trained embeddings. Useful for changing network parameters if you don't want to reconstruct the graph. Args ---- filepath : str path to where you saved previous the GraphRecommender """ self.G,_ = dgl.data.utils.load_graphs(f'{filepath}/dgl.bin') self.G = restored_self.G[0] self.G.readonly() self.G = dgl.as_heterograph(restored_self.G) self.node_ids = pd.read_csv(f'{filepath}/node_ids.csv') self._masks_set = False self._embeddings = None self._index = None
argparser.add_argument( '--num-workers', type=int, default=0, help="Number of sampling processes. Use 0 for no extra process.") argparser.add_argument('--inductive', action='store_true', help="Inductive learning setting") args = argparser.parse_args() devices = list(map(int, args.gpu.split(','))) n_gpus = len(devices) g, n_classes = load_reddit() # Construct graph g = dgl.as_heterograph(g) in_feats = g.ndata['features'].shape[1] if args.inductive: train_g, val_g, test_g = inductive_split(g) else: train_g = val_g = test_g = g prepare_mp(train_g) prepare_mp(val_g) prepare_mp(test_g) # Pack data data = in_feats, n_classes, train_g, val_g, test_g if n_gpus == 1: run(0, n_gpus, args, devices, data)
labels = labels[:, 0] graph.readonly(False) _, _, self_e = graph.edge_ids(th.arange(graph.number_of_nodes()), th.arange(graph.number_of_nodes()), return_uv=True) print('Total edges before adding self-loop {}'.format( graph.number_of_edges())) # clean partial self-loop edges graph.remove_edges(self_e) # Add all self-loop edges graph.add_edges(th.arange(graph.number_of_nodes()), th.arange(graph.number_of_nodes())) print('Total edges after adding self-loop {}'.format( graph.number_of_edges())) graph.readonly(True) graph = dgl.as_heterograph(graph) in_feats = graph.ndata['feat'].shape[1] n_classes = (labels.max() + 1).item() prepare_mp(graph) # Pack data data = train_idx, val_idx, test_idx, in_feats, labels, n_classes, graph, args.head # Run 10 times test_accs = [] for i in range(10): test_accs.append(run(args, device, data)) print('Average test accuracy:', np.mean(test_accs), '±', np.std(test_accs))