def test_cast(): m = spsp.coo_matrix(([1, 1], ([0, 1], [1, 2])), (4, 4)) g = dgl.DGLGraph(m, readonly=True) gsrc, gdst = g.edges(order='eid') ndata = F.randn((4, 5)) edata = F.randn((2, 4)) g.ndata['x'] = ndata g.edata['y'] = edata hg = dgl.as_heterograph(g, 'A', 'AA') assert hg.ntypes == ['A'] assert hg.etypes == ['AA'] assert hg.canonical_etypes == [('A', 'AA', 'A')] assert hg.number_of_nodes() == 4 assert hg.number_of_edges() == 2 hgsrc, hgdst = hg.edges(order='eid') assert F.array_equal(gsrc, hgsrc) assert F.array_equal(gdst, hgdst) g2 = dgl.as_immutable_graph(hg) assert g2.number_of_nodes() == 4 assert g2.number_of_edges() == 2 g2src, g2dst = hg.edges(order='eid') assert F.array_equal(g2src, gsrc) assert F.array_equal(g2dst, gdst)
def add_nodes(self, nodearray, skip_duplicates=False): """Define nodes by passing an array (or array like object). Nodes can be identified by any data type (even mixed data types), but each node must be unique. An exception is raised if all nodes are not unique including if the same node is attempted to be added in two calls to this method. Each node is mapped to a unique integer id based on the order they are added. Args ---- nodearray : numpy array (or array-like object) array containing the identifiers of each node to be added skip_duplicates : bool if true, ignore nodes which have already been added. If False, raise error. """ ninputnodes = len(nodearray) nodedf = pd.DataFrame(nodearray, columns=['id']) if len(nodedf) != len(nodedf.drop_duplicates()): raise ValueError('Provided nodeids are not unique. Please pass an array of unique identifiers.') nodes_already_exist = nodedf.merge(self.node_ids,on='id',how='inner') if len(nodes_already_exist)>0 and not skip_duplicates: raise ValueError( 'Some provided nodes have already been added to the graph. See node_ids.ids.') elif len(nodes_already_exist)>0 and skip_duplicates: #get rid of the duplicates nodes_already_exist['dropflag'] = True nodedf = nodedf.merge(nodes_already_exist,on='id',how='left') nodedf['dropflag'] = ~pd.isna(nodedf.dropflag) nodedf = nodedf.drop(nodedf[nodedf.dropflag].index) nodedf = nodedf[['id']] current_maximum_id = self.node_ids.intID.max() num_new_nodes = len(nodedf) start = (current_maximum_id+1) if np.isnan(start): start = 0 end = start + num_new_nodes nodedf['intID'] = range(start,end) nodedf['classid'] = None nodedf['feature_flag'] = False self.node_ids = pd.concat([self.node_ids,nodedf]) self._masks_set = False if self.G.is_readonly: self.G = dgl.as_immutable_graph(self.G) self.G.readonly(False) self.G.add_nodes(num_new_nodes) self._masks_set = False self._embeddings = None self._index = None
def add_edges(self, n1, n2): """Adds edges to the DGL graph. Nodes must be previously defined by add_nodes or an exception is raised. Edges are directed. To define a undirected graph, include both n1->n2 and n2->n1 in the graph. Args ---- n1 : numpy array (or array-like object) first node in the edge (n1->n2) n2 : numpy array (or array-like object) second node in the edge (n1->n2) """ edgedf_all = pd.DataFrame(n1,columns=['n1']) edgedf_all['n2'] = n2 chunks = int(max(len(edgedf_all)//MAX_ADD_EDGES,1)) edgedf_all = np.array_split(edgedf_all, chunks) if chunks>1: pbar = tqdm.tqdm(total=chunks) for i in range(chunks): edgedf = edgedf_all.pop() edgedf = edgedf.merge(self.node_ids,left_on='n1',right_on='id',how='left') edgedf = edgedf.merge(self.node_ids,left_on='n2',right_on='id',how='left',suffixes=('','2')) edgedf = edgedf[['intID','intID2']] if len(edgedf) != len(edgedf.dropna()): raise ValueError('Some edges do not correspond to any known node. Please add with add_nodes method first.') if self.G.is_readonly: self.G = dgl.as_immutable_graph(self.G) self.G.readonly(False) self.G.add_edges(edgedf.intID,edgedf.intID2) if chunks>1: pbar.update(1) if chunks>1: pbar.close() self._masks_set = False self._embeddings = None self._index = None
def save(self, filepath): """Save all information neccessary to recover current state of the current instance of this object to a folder. Initialization args, graph data, node ids, current trained embedding, and current torch paramters are all saved. Args ---- filepath : str path on disk to save files""" outg = dgl.as_immutable_graph(self.G) dgl.data.utils.save_graphs(f'{filepath}/dgl.bin',outg) self.node_ids.to_csv(f'{filepath}/node_ids.csv',index=False) th.save(self.embed,f'{filepath}/embed.torch') th.save(self.net.state_dict(),f'{filepath}/model_weights.torch') embeddings = self.embeddings np.save(f'{filepath}/final_embed.npy',embeddings,allow_pickle=False) with open(f'{filepath}/initargs.pkl','wb') as pklf: pickle.dump(self.initargs,pklf)