def __getitem__(self, idx): graph_idx, node_idx = self._convert_idx(idx) step = np.random.choice(len(self.step_dist), 1, p=self.step_dist)[0] if step == 0: other_node_idx = node_idx else: other_node_idx = dgl.contrib.sampling.random_walk( g=self.graphs[graph_idx], seeds=[node_idx], num_traces=1, num_hops=step )[0][0][-1].item() max_nodes_per_seed = max( self.rw_hops, int( ( self.graphs[graph_idx].out_degree(node_idx) * math.e / (math.e - 1) / self.restart_prob ) + 0.5 ), ) traces = dgl.contrib.sampling.random_walk_with_restart( self.graphs[graph_idx], seeds=[node_idx, other_node_idx], restart_prob=self.restart_prob, max_nodes_per_seed=max_nodes_per_seed, ) graph_q = data_util._rwr_trace_to_dgl_graph( g=self.graphs[graph_idx], graph_idx = graph_idx, seed=node_idx, trace=traces[0], positional_embedding_size=self.positional_embedding_size, entire_graph=hasattr(self, "entire_graph") and self.entire_graph, ) graph_k = data_util._rwr_trace_to_dgl_graph( g=self.graphs[graph_idx], graph_idx = graph_idx, seed=other_node_idx, trace=traces[1], positional_embedding_size=self.positional_embedding_size, entire_graph=hasattr(self, "entire_graph") and self.entire_graph, ) #print("graph dataset here") return graph_q, graph_k
def __getitem__(self, idx): graph_idx = 0 node_idx = idx for i in range(len(self.graphs)): if node_idx < self.graphs[i].number_of_nodes(): graph_idx = i break else: node_idx -= self.graphs[i].number_of_nodes() traces = dgl.contrib.sampling.random_walk_with_restart( self.graphs[graph_idx], seeds=[node_idx], restart_prob=self.restart_prob, max_nodes_per_seed=self.rw_hops, ) graph_q = data_util._rwr_trace_to_dgl_graph( g=self.graphs[graph_idx], graph_idx = graph_idx, seed=node_idx, trace=traces[0], positional_embedding_size=self.positional_embedding_size, ) #with label finetune #print(graph_q, graph_idx) return graph_q, graph_idx
def getitem(self, idx): graph_idx = idx node_idx = self.graphs[idx].out_degrees().argmax().item() traces = dgl.contrib.sampling.random_walk_with_restart( self.graphs[graph_idx], seeds=[node_idx], restart_prob=self.restart_prob, max_nodes_per_seed=self.rw_hops, ) graph_q = data_util._rwr_trace_to_dgl_graph( g=self.graphs[graph_idx], seed=node_idx, trace=traces[0], positional_embedding_size=self.positional_embedding_size, entire_graph=True, ) return graph_q, self.dataset.graph_labels[graph_idx].item()
def __getitem__(self, idx): graph_idx = 0 node_idx = idx for i in range(len(self.graphs)): if node_idx < self.graphs[i].number_of_nodes(): graph_idx = i break else: node_idx -= self.graphs[i].number_of_nodes() step = np.random.choice(len(self.step_dist), 1, p=self.step_dist)[0] if step == 0: other_node_idx = node_idx else: other_node_idx = dgl.contrib.sampling.random_walk( g=self.graphs[graph_idx], seeds=[node_idx], num_traces=1, num_hops=step )[0][0][-1].item() if self.aug == "rwr": max_nodes_per_seed = max( self.rw_hops, int( ( (self.graphs[graph_idx].in_degree(node_idx) ** 0.75) * math.e / (math.e - 1) / self.restart_prob ) + 0.5 ), ) traces = dgl.contrib.sampling.random_walk_with_restart( self.graphs[graph_idx], seeds=[node_idx, other_node_idx], restart_prob=self.restart_prob, max_nodes_per_seed=max_nodes_per_seed, ) elif self.aug == "ns": prob = dgl.backend.tensor([], dgl.backend.float32) prob = dgl.backend.zerocopy_to_dgl_ndarray(prob) nf1 = dgl.contrib.sampling.sampler._CAPI_NeighborSampling( self.graphs[graph_idx]._graph, dgl.utils.toindex([node_idx]).todgltensor(), 0, # batch_start_id 1, # batch_size 1, # workers self.num_neighbors, # expand_factor self.rw_hops, # num_hops "out", False, prob, )[0] nf1 = NodeFlow(self.graphs[graph_idx], nf1) trace1 = [nf1.layer_parent_nid(i) for i in range(nf1.num_layers)] nf2 = dgl.contrib.sampling.sampler._CAPI_NeighborSampling( self.graphs[graph_idx]._graph, dgl.utils.toindex([other_node_idx]).todgltensor(), 0, # batch_start_id 1, # batch_size 1, # workers self.num_neighbors, # expand_factor self.rw_hops, # num_hops "out", False, prob, )[0] nf2 = NodeFlow(self.graphs[graph_idx], nf2) trace2 = [nf2.layer_parent_nid(i) for i in range(nf2.num_layers)] traces = [trace1, trace2] graph_q = data_util._rwr_trace_to_dgl_graph( g=self.graphs[graph_idx], graph_idx=graph_idx, seed=node_idx, trace=traces[0], positional_embedding_size=self.positional_embedding_size, ) #print(graph_idx, node_idx, other_node_idx) #node_idx == other_node_idx #exit(0) graph_k = data_util._rwr_trace_to_dgl_graph( g=self.graphs[graph_idx], graph_idx=graph_idx, seed=other_node_idx, trace=traces[1], positional_embedding_size=self.positional_embedding_size, ) #print(len(graph_q.unbatch())) #for i in range(len(graph_q)): # graph_q[i].ndata['idx'] = graph_idx #print("load") #print(graph_idx, node_idx, other_node_idx) #graph_q.ndata_schemes['idx'] = node_idx #graph_k.ndata['idx'] = node_idx if self.graph_transform: graph_q = self.graph_transform(graph_q) graph_k = self.graph_transform(graph_k) # pretrain # print("here", graph_q, graph_k) return graph_q, graph_k