def forward(self, g, inputs, factor_key): g = g.local_var() # graph conv on the factor graph feat = self.linear1(inputs) norm = torch.pow(g.in_degrees().float().clamp(min=1), -0.5) shp = norm.shape + (1, ) * (feat.dim() - 1) norm = torch.reshape(norm, shp).to(feat.device) feat = feat * norm g.ndata['h'] = feat g.update_all(fn.u_mul_e('h', factor_key, 'm'), fn.sum(msg='m', out='h')) g.ndata['h'] = torch.tanh(g.ndata['h']) # graph conv on the factor graph feat = self.linear2(g.ndata['h']) feat = feat * norm g.ndata['h'] = feat g.update_all(fn.u_mul_e('h', factor_key, 'm'), fn.sum(msg='m', out='h')) g.ndata['h'] = torch.tanh(g.ndata['h']) h = dgl.mean_nodes(g, 'h').unsqueeze(-1) h = torch.tanh(h) return h
def forward(self, graph, feat, e_feat): r"""Compute GraphSAGE layer. Parameters ---------- graph : DGLGraph The graph. feat : torch.Tensor The input feature of shape :math:`(N, D_{in})` where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. Returns ------- torch.Tensor The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}` is size of output feature. """ graph = graph.local_var() feat = self.feat_drop(feat) h_self = feat graph.edata['e'] = e_feat if self._aggre_type == 'sum': graph.ndata['h'] = feat graph.update_all(fn.u_mul_e('h', 'e', 'm'), fn.sum('m', 'neigh')) h_neigh = graph.ndata['neigh'] elif self._aggre_type == 'mean': graph.ndata['h'] = feat graph.update_all(fn.u_mul_e('h', 'e', 'm'), fn.mean('m', 'neigh')) h_neigh = graph.ndata['neigh'] elif self._aggre_type == 'gcn': graph.ndata['h'] = feat graph.update_all(fn.u_mul_e('h', 'e', 'm'), fn.sum('m', 'neigh')) # divide in_degrees degs = graph.in_degrees().float() degs = degs.to(feat.device) h_neigh = (graph.ndata['neigh'] + graph.ndata['h']) / (degs.unsqueeze(-1) + 1) elif self._aggre_type == 'pool': graph.ndata['h'] = F.relu(self.fc_pool(feat)) graph.update_all(fn.u_mul_e('h', 'e', 'm'), fn.max('m', 'neigh')) h_neigh = graph.ndata['neigh'] elif self._aggre_type == 'lstm': graph.ndata['h'] = feat graph.update_all(fn.u_mul_e('h', 'e', 'm'), self._lstm_reducer) h_neigh = graph.ndata['neigh'] else: raise KeyError('Aggregator type {} not recognized.'.format( self._aggre_type)) # GraphSAGE GCN does not require fc_self. if self._aggre_type == 'gcn': rst = self.fc_neigh(h_neigh) else: rst = self.fc_self(h_self) + self.fc_neigh(h_neigh) # activation if self.activation is not None: rst = self.activation(rst) # normalization if self.norm is not None: rst = self.norm(rst) return rst
def infer(self, g, nids_eq_pos, eids_eq_pos, nids_eq_pos_leaf, g_inter, readout_ids): # Part I: self-attention h = g.nodes[nids_eq_pos].data['h'] if self.rel_pos: g.edges[eids_eq_pos].data['ak'] = self.embed_ak( g.edges[eids_eq_pos].data['etype']) g.nodes[nids_eq_pos].data['q'] = self.proj_q[0](h).view( -1, self.h, self.d_k) g.nodes[nids_eq_pos].data['k'] = self.proj_k[0](h).view( -1, self.h, self.d_k) g.nodes[nids_eq_pos].data['v'] = self.proj_v[0](h).view( -1, self.h, self.d_k) g.apply_edges( lambda edges: {'e': (edges.src['k'] * edges.dst['q']).sum(dim=-1, keepdim=True)}, eids_eq_pos) e = g.edges[eids_eq_pos].data['e'] # relative positional encoding if self.rel_pos: g.apply_edges( lambda edges: { 'e_rel': (edges.data['ak'].unsqueeze(1) * edges.dst['q']). sum(dim=-1, keepdim=True) }, eids_eq_pos) e = e + g.edges[eids_eq_pos].data['e_rel'] # softmax g.edges[eids_eq_pos].data['a'] = self.drop_att[0](edge_softmax( g, e / np.sqrt(self.d_k), eids_eq_pos)) # spmm g.send_and_recv(eids_eq_pos, fn.u_mul_e('v', 'a', 'm'), fn.sum('m', 'o')) o = g.nodes[nids_eq_pos].data['o'].view(-1, self.d_k * self.h) o = self.drop_h[0](self.proj_o[0](o)) g.nodes[nids_eq_pos].data['h'] = self.norm_in[0](h + o) # Part II: attend to memory h = g.nodes[nids_eq_pos_leaf].data['h'] q = self.proj_q[1](h).view(-1, self.h, self.d_k) g_inter.nodes[readout_ids].data['q'] = q g_inter.apply_edges( lambda edges: {'e': (edges.src['k'] * edges.dst['q']).sum(dim=-1, keepdim=True)}) # softmax g_inter.edata['a'] = self.drop_att[1](edge_softmax( g_inter, g_inter.edata['e'] / np.sqrt(self.d_k))) # spmm g_inter.update_all(fn.u_mul_e('v', 'a', 'm'), fn.sum('m', 'o')) o = g_inter.nodes[readout_ids].data['o'].view(-1, self.d_k * self.h) o = self.drop_h[1](self.proj_o[1](o)) g.nodes[nids_eq_pos_leaf].data['h'] = h + o h = self.norm_in[1](g.nodes[nids_eq_pos].data['h']) # FFN h = self.norm_inter(h + self.ffn(h)) g.nodes[nids_eq_pos].data['h'] = h
def propagate_attention(self, g): # Compute attention score g.apply_edges(src_dot_dst('K_h', 'Q_h', 'score')) # , edges) g.apply_edges(scaled_exp('score', np.sqrt(self.out_dim))) # Send weighted values to target nodes eids = g.edges() g.send_and_recv(eids, fn.u_mul_e( 'V_h', 'score', 'V_h'), fn.sum('V_h', 'V_h')) g.send_and_recv(eids, fn.u_mul_e( 'V_h', 'w', 'V_h'), fn.sum('V_h', 'wV')) g.send_and_recv(eids, fn.copy_edge( 'score', 'score'), fn.sum('score', 'z'))
def forward(self, graph, feat): r"""Compute graph convolution. Notes ----- * Input shape: :math:`(N, *, \text{in_feats})` where * means any number of additional dimensions, :math:`N` is the number of nodes. * Output shape: :math:`(N, *, \text{out_feats})` where all but the last dimension are the same shape as the input. Parameters ---------- graph : DGLGraph The graph. feat : torch.Tensor The input feature Returns ------- torch.Tensor The output feature """ graph = graph.local_var() if self._norm: norm = th.pow(graph.in_degrees().float().clamp(min=1), -0.5) shp = norm.shape + (1, ) * (feat.dim() - 1) norm = th.reshape(norm, shp).to(feat.device) feat = feat * norm if self._in_feats > self._out_feats: # mult W first to reduce the feature size for aggregation. feat = th.matmul(feat, self.weight) graph.ndata['h'] = feat graph.update_all(fn.u_mul_e('h', 'weight', 'm'), fn.sum(msg='m', out='h')) rst = graph.ndata['h'] else: # aggregate first then mult W graph.ndata['h'] = feat graph.update_all(fn.u_mul_e('h', 'weight', 'm'), fn.sum(msg='m', out='h')) rst = graph.ndata['h'] rst = th.matmul(rst, self.weight) if self._norm: rst = rst * norm if self.bias is not None: rst = rst + self.bias if self._activation is not None: rst = self._activation(rst) return rst
def GRANDConv(graph, feats, order): ''' Parameters ----------- graph: dgl.Graph The input graph feats: Tensor (n_nodes * feat_dim) Node features order: int Propagation Steps ''' with graph.local_scope(): ''' Calculate Symmetric normalized adjacency matrix \hat{A} ''' degs = graph.in_degrees().float().clamp(min=1) norm = th.pow(degs, -0.5).to(feats.device).unsqueeze(1) graph.ndata['norm'] = norm graph.apply_edges(fn.u_mul_v('norm', 'norm', 'weight')) ''' Graph Conv ''' x = feats y = 0 + feats for i in range(order): graph.ndata['h'] = x graph.update_all(fn.u_mul_e('h', 'weight', 'm'), fn.sum('m', 'h')) x = graph.ndata.pop('h') y.add_(x) return y / (order + 1)
def calc_author_citation(g): """使用论文引用数加权求和计算学者引用数 :param g: DGLGraph 学者-论文二分图 :return: tensor(N_author) 学者引用数 """ import dgl.function as fn from dgl.ops import edge_softmax with g.local_scope(): # 第k作者的权重为1/k,最后一个视为通讯作者,权重为1/2 g.edges['writes'].data['w'] = 1.0 / g.edges['writes'].data['order'] g.update_all(fn.copy_e('w', 'w'), fn.min('w', 'mw'), etype='writes') g.apply_edges(fn.copy_u('mw', 'mw'), etype='writes_rev') w, mw = g.edges['writes'].data.pop( 'w'), g.edges['writes_rev'].data.pop('mw') w[w == mw] = 0.5 # 每篇论文所有作者的权重归一化,每个学者所有论文的引用数加权求和 p = edge_softmax(g['author', 'writes', 'paper'], torch.log(w).unsqueeze(dim=1)) g.edges['writes_rev'].data['p'] = p.squeeze(dim=1) g.update_all(fn.u_mul_e('citation', 'p', 'c'), fn.sum('c', 'c'), etype='writes_rev') return g.nodes['author'].data['c']
def forward(self, g, feat): """ :param g: DGLGraph 二分图(只包含一种关系) :param feat: tensor(N_src, d_in) or (tensor(N_src, d_in), tensor(N_dst, d_in)) 输入特征 :return: tensor(N_dst, d_out) 目标顶点该关于关系的表示 """ with g.local_scope(): feat_src, feat_dst = expand_as_pair(feat, g) # (N_src, d_in) -> (N_src, d_out) -> (N_src, K, d_out/K) k = self.k_linear(feat_src).view(-1, self.num_heads, self.d_k) v = self.v_linear(feat_src).view(-1, self.num_heads, self.d_k) q = self.q_linear(feat_dst).view(-1, self.num_heads, self.d_k) # k[:, h] @= w_att[h] => k[n, h, j] = ∑(i) k[n, h, i] * w_att[h, i, j] k = torch.einsum('nhi,hij->nhj', k, self.w_att) v = torch.einsum('nhi,hij->nhj', v, self.w_msg) g.srcdata.update({'k': k, 'v': v}) g.dstdata['q'] = q g.apply_edges(fn.v_dot_u('q', 'k', 't')) # g.edata['t']: (E, K, 1) attn = g.edata.pop('t').squeeze(dim=-1) * self.mu / math.sqrt( self.d_k) attn = edge_softmax(g, attn) # (E, K) self.attn = attn.detach() g.edata['t'] = attn.unsqueeze(dim=-1) # (E, K, 1) g.update_all(fn.u_mul_e('v', 't', 'm'), fn.sum('m', 'h')) out = g.dstdata['h'].view(-1, self.out_dim) # (N_dst, d_out) return out
def forward(self, graph, features): g = graph.local_var() h_in = self.dropout(features) h_hop = [h_in] D_norm = g.ndata['train_D_norm'] if 'train_D_norm' in g.ndata else g.ndata['full_D_norm'] for _ in range(self.order): g.ndata['h'] = h_hop[-1] if 'w' not in g.edata: g.edata['w'] = th.ones((g.num_edges(), )).to(features.device) g.update_all(fn.u_mul_e('h', 'w', 'm'), fn.sum('m', 'h')) h = g.ndata.pop('h') h = h * D_norm h_hop.append(h) h_part = [self.feat_trans(ft, idx) for idx, ft in enumerate(h_hop)] if self.aggr == "mean": h_out = h_part[0] for i in range(len(h_part) - 1): h_out = h_out + h_part[i + 1] elif self.aggr == "concat": h_out = th.cat(h_part, 1) else: raise NotImplementedError return h_out
def forward(self, g, h, weights): """ g : graph h : node features weights : scalar edge weights """ h_src, h_dst = h with g.local_scope(): # 将src节点上的原始特征映射成hidden_dims长,存储于'n'字段 g.srcdata['n'] = self.act(self.Q(self.dropout(h_src))) g.edata['w'] = weights.float() # src节点上的特征'n'乘以边上的权重,构成消息'm' # dst节点将所有接收到的消息'm',相加起来,存入dst节点的'n'字段 g.update_all(fn.u_mul_e('n', 'w', 'm'), fn.sum('m', 'n')) # 将边上的权重w拷贝成消息'm' # dst节点将所有接收到的消息'm',相加起来,存入dst节点的'ws'字段 g.update_all(fn.copy_e('w', 'm'), fn.sum('m', 'ws')) n = g.dstdata['n'] # 邻居节点的embedding的加权和 ws = g.dstdata['ws'].unsqueeze(1).clamp(min=1) # 边上权重之和 # 先将邻居节点的embedding,做加权平均 # 再拼接上一轮卷积后,dst节点自身的embedding # 再经过线性变化与非线性激活,得到这一轮卷积后各dst节点的embedding z = self.act(self.W(self.dropout(torch.cat([n / ws, h_dst], 1)))) z_norm = z.norm(2, 1, keepdim=True) z_norm = torch.where(z_norm == 0, torch.tensor(1.).to(z_norm), z_norm) z = z / z_norm return z
def update_all_p_norm(self, graph): """ Attempt at robust p-norm á: def robust_norm(x, p): a = np.abs(x).max() return a * norm1(x / a, p) def norm1(x, p): "First-pass implementation of p-norm." return (np.abs(x)**p).sum() ** (1./p) """ p = torch.clamp(self.P,1,100) graph.apply_edges(fn.u_add_v('Dh', 'Eh', 'DEh')) graph.edata['e'] = graph.edata['DEh'] + graph.edata['Ce'] graph.edata['sigma'] = torch.sigmoid(graph.edata['e']) # n_{ij} alpha = torch.max(torch.abs(torch.cat((graph.ndata['Bh'],graph.edata['sigma']), dim=0))) graph.ndata['Bh_pow'] = (torch.abs(graph.ndata['Bh'])/alpha).pow(p) graph.edata['sig_pow'] = (torch.abs(graph.edata['sigma'])/alpha).pow(p) graph.update_all(fn.u_mul_e('Bh_pow', 'sig_pow', 'm'), fn.sum('m', 'sum_sigma_h')) # u_mul_e = elementwise mul. Output "m" = n_{ij}***Vh. Then sum! # Update_all - send messages through all edges and update all nodes. graph.update_all(fn.copy_e('sig_pow', 'm'), fn.sum('m', 'sum_sigma')) # copy_e - eqv to 'm': graph.edata['sigma']. Output "m". Then sum. # Again, send messages and update all nodes. Why do this step????? graph.ndata['h'] = graph.ndata['Ah'] + ((graph.ndata['sum_sigma_h'] / (graph.ndata['sum_sigma'] + 1e-6))*alpha).pow(torch.div(1,p)) # Uh + sum() #graph.update_all(self.message_func,self.reduce_func) h = graph.ndata['h'] # result of graph convolution e = graph.edata['e'] # result of graph convolution # Call update function outside of update_all return h, e
def track_time(graph_name, format, feat_size, msg_type, reduce_type): device = utils.get_bench_device() graph = utils.get_graph(graph_name, format) graph = graph.to(device) graph.ndata['h'] = torch.randn((graph.num_nodes(), feat_size), device=device) graph.edata['e'] = torch.randn((graph.num_edges(), 1), device=device) msg_builtin_dict = { 'copy_u': fn.copy_u('h', 'x'), 'u_mul_e': fn.u_mul_e('h', 'e', 'x'), } reduce_builtin_dict = { 'sum': fn.sum('x', 'h_new'), 'mean': fn.mean('x', 'h_new'), 'max': fn.max('x', 'h_new'), } # dry run graph.update_all(msg_builtin_dict[msg_type], reduce_builtin_dict[reduce_type]) # timing with utils.Timer() as t: for i in range(3): graph.update_all(msg_builtin_dict[msg_type], reduce_builtin_dict[reduce_type]) return t.elapsed_secs / 3
def forward(self, g, feat): """ :param g: DGLGraph 同构图 :param feat: tensor(N_src, d_in) 输入顶点特征 :return: tensor(N_dst, K, d_out) 输出顶点特征 """ with g.local_scope(): feat_src = self.fc(self.feat_drop(feat)).view( -1, self.num_heads, self.out_dim) feat_dst = feat_src[:g.num_dst_nodes()] if g.is_block else feat_src e = self.leaky_relu(self.attn(g, feat_src, feat_dst)) # (E, K, 1) g.edata['a'] = self.attn_drop(edge_softmax(g, e)) # (E, K, 1) g.srcdata['ft'] = feat_src # 消息传递 g.update_all(fn.u_mul_e('ft', 'a', 'm'), fn.sum('m', 'ft')) out = g.dstdata['ft'] # (N_dst, K, d_out) if self.training: # 负采样 neg_g = dgl.graph(self.neg_sampler(g, list(range(g.num_edges()))), num_nodes=g.num_nodes(), device=g.device) neg_e = self.attn(neg_g, feat_src, feat_src) # (E', K, 1) self.attn_x = torch.cat([e, neg_e]).squeeze(dim=-1).mean( dim=1) # (E+E',) self.attn_y = torch.cat([torch.ones(e.shape[0]), torch.zeros(neg_e.shape[0])]) \ .to(self.attn_x.device) if self.activation: out = self.activation(out) return out
def forward(self, graph, feat): graph = graph.local_var() feat_c = feat.clone().detach().requires_grad_(False) q, k, v = self.q_proj(feat), self.k_proj(feat_c), self.v_proj(feat_c) q = q.view(-1, self._num_heads, self._out_feats) k = k.view(-1, self._num_heads, self._out_feats) v = v.view(-1, self._num_heads, self._out_feats) graph.ndata.update({ 'ft': v, 'el': k, 'er': q }) # k,q instead of q,k, the edge_softmax is applied on incoming edges # compute edge attention graph.apply_edges(fn.u_dot_v('el', 'er', 'e')) e = graph.edata.pop('e') / math.sqrt(self._out_feats * self._num_heads) graph.edata['a'] = edge_softmax(graph, e).unsqueeze(-1) # message passing graph.update_all(fn.u_mul_e('ft', 'a', 'm'), fn.sum('m', 'ft2')) rst = graph.ndata['ft2'] # residual rst = rst.view(feat.shape) + feat if self._trans: rst = self.ln1(rst) rst = self.ln1(rst + self.FFN(rst)) # use the same layer norm, see the author's code return rst
def forward(self, graph, feat): ''' :param graph: DGLGraph :param feat: <N, b, F> :return: ''' with graph.local_scope(): N, b, _ = feat.size() graph = graph.local_var() graph = graph.to(feat.device) feat = torch.cat([self.fc1(feat[:get_Parameter('taxi_size')]), self.fc2(feat[get_Parameter('taxi_size'):])], dim=0) feat_src = feat_dst = feat.view(N, b, self._num_heads, self._out_feats) #feat_src = feat_dst = self.fc(feat).view(N, b, self._num_heads, self._out_feats) el = (feat_src * self.attn_l).sum(dim=-1).unsqueeze(-1) er = (feat_dst * self.attn_l).sum(dim=-1).unsqueeze(-1) graph.srcdata.update({'ft': feat_src, 'el': el}) graph.dstdata.update({'er': er}) graph.apply_edges(fn.u_add_v('el', 'er', 'e')) #graph.apply_edges(fn.u_mul_e('e', 'w', 'e')) e = self.leaky_relu(graph.edata.pop('e')) graph.edata['a'] = self.attn_drop(edge_softmax(graph, e)) #print(graph.edata['a'].size()) graph.update_all(fn.u_mul_e('ft', 'a', 'm'), fn.sum('m', 'ft')) rst = graph.dstdata['ft'] rst = rst.reshape(N, -1, self._num_heads*self._out_feats) return rst, graph.edata['a']
def forward(self, g, node_feats, edge_feats): """Performs message passing and updates node representations. Parameters ---------- g : DGLGraph DGLGraph for a batch of graphs node_feats : float32 tensor of shape (V, node_in_feats) Input node features. V for the number of nodes. edge_feats : float32 tensor of shape (E, edge_in_feats) Input edge features. E for the number of edges. Returns ------- float32 tensor of shape (V, node_out_feats) Updated node representations. """ node_feats = self.project_node_in_feats(node_feats) for _ in range(self.n_layers): g = g.local_var() g.ndata['hv'] = node_feats g.apply_edges(fn.copy_src('hv', 'he_src')) concat_edge_feats = torch.cat([g.edata['he_src'], edge_feats], dim=1) g.edata['he'] = self.project_concatenated_messages( concat_edge_feats) g.update_all(fn.copy_edge('he', 'm'), fn.sum('m', 'hv_new')) g = g.local_var() g.ndata['hv'] = self.project_node_messages(node_feats) g.edata['he'] = self.project_edge_messages(edge_feats) g.update_all(fn.u_mul_e('hv', 'he', 'm'), fn.sum('m', 'h_nbr')) h_self = self.project_self(node_feats) # (V, node_out_feats) return g.ndata['h_nbr'] * h_self
def forward(self, graph, feat): with graph.local_scope(): if not self._allow_zero_in_degree: if (graph.in_degrees() == 0).any(): assert False if isinstance(feat, tuple): h_src = self.feat_drop(feat[0]) h_dst = self.feat_drop(feat[1]) if not hasattr(self, "fc_src"): self.fc_src, self.fc_dst = self.fc, self.fc feat_src, feat_dst = h_src, h_dst feat_src = self.fc_src(h_src).view(-1, self._num_heads, self._out_feats) feat_dst = self.fc_dst(h_dst).view(-1, self._num_heads, self._out_feats) else: h_src = h_dst = self.feat_drop(feat) feat_src, feat_dst = h_src, h_dst feat_src = feat_dst = self.fc(h_src).view(-1, self._num_heads, self._out_feats) if graph.is_block: feat_dst = feat_src[: graph.number_of_dst_nodes()] if self._norm == "both": degs = graph.out_degrees().float().clamp(min=1) norm = torch.pow(degs, -0.5) shp = norm.shape + (1,) * (feat_src.dim() - 1) norm = torch.reshape(norm, shp) feat_src = feat_src * norm # Implement GeniePath adaptive-breadth function only graph.srcdata.update({"ft": feat_src}) graph.dstdata.update({"ft_dst": feat_dst}) graph.apply_edges(fn.u_add_v("ft", "ft_dst", "e")) e = graph.edata.pop("e") e = self.attn * torch.tanh(e) #Genie Path paper doesn't use LeakyReLU #e = self.leaky_relu(graph.edata.pop("e")) # compute softmax graph.edata["a"] = self.attn_drop(edge_softmax(graph, e)) # message passing graph.srcdata.update({"ft": feat_src}) graph.update_all(fn.u_mul_e("ft", "a", "m"), fn.sum("m", "ft")) rst = graph.dstdata["ft"] if self._norm == "both": degs = graph.in_degrees().float().clamp(min=1) norm = torch.pow(degs, 0.5) shp = norm.shape + (1,) * (feat_dst.dim() - 1) norm = torch.reshape(norm, shp) rst = rst * norm # residual if self.res_fc is not None: resval = self.res_fc(h_dst).view(h_dst.shape[0], -1, self._out_feats) rst = rst + resval # activation if self._activation is not None: rst = self._activation(rst) return rst
def forward(self, graph, feat, device): graph = graph.to(device).local_var() feat_c = feat.clone().detach().requires_grad_(False) q, k, v = self.query_proj(feat), self.key_proj( feat_c), self.value_proj(feat_c) q = q.view(-1, self.num_heads, self.embedding_size // self.num_heads) k = k.view(-1, self.num_heads, self.embedding_size // self.num_heads) v = v.view(-1, self.num_heads, self.embedding_size // self.num_heads) graph.ndata.update({ 'ft': v, 'el': k, 'er': q }) # k,q instead of q,k, the edge_softmax is applied on incoming edges # compute edge attention graph.apply_edges(fn.u_dot_v('el', 'er', 'e')) e = graph.edata.pop('e') / math.sqrt(self.embedding_size) graph.edata['a'] = edge_softmax(graph, e) # message passing graph.update_all(fn.u_mul_e('ft', 'a', 'm'), fn.sum('m', 'ft2')) rst = graph.ndata['ft2'] # residual rst = rst.view(feat.shape) + feat rst = self.ln1(rst) rst = self.ln1(rst + self.out_proj(rst)) return rst
def forward(self, graph): node_num = graph.ndata['h'].size(0) Q = self.query(graph.ndata['h']) K = self.key(graph.ndata['h']) V = self.value(graph.ndata['h']) Q = self.transpose_for_scores(Q) K = self.transpose_for_scores(K) V = self.transpose_for_scores(V) graph.ndata['Q'] = Q graph.ndata['K'] = K graph.ndata['V'] = V graph.apply_edges(fn.u_mul_v('K', 'Q', 'attn_probs')) graph.edata['attn_probs'] = graph.edata['attn_probs'].sum(-1, keepdim=True) graph.edata['attn_probs'] = edge_softmax(graph, graph.edata['attn_probs']) graph.edata['attn_probs'] = self.dropout(graph.edata['attn_probs']) graph.apply_edges(fn.u_mul_e('V', 'attn_probs', 'attn_values')) graph.register_message_func(fn.copy_e('attn_values', 'm')) graph.register_reduce_func(fn.sum('m', 'h')) graph.update_all() graph.ndata['h'] = graph.ndata['h'].view([node_num, -1]) return graph
def forward(self, graph, features): h_pre = features g = graph.local_var() h = self.dropout(features) if self.graph_norm: degs = g.in_degrees().float().clamp(min=1) norm = th.pow(degs, -0.5) norm = norm.to(features.device).unsqueeze(1) h = h * norm g.ndata['h'] = h w = th.ones(g.number_of_edges(), 1).to(features.device) g.edata['w'] = self.dropedge(w) g.update_all(fn.u_mul_e('h', 'w', 'm'), fn.sum('m', 'h')) h = g.ndata.pop('h') if self.graph_norm: h = h * norm h = self.linear(h) if self.batch_norm: h = self.bn(h) if self.pair_norm: h = self.pn(h) if self.activation is not None: h = self.activation(h) if self.residual: h = h + self.res_fc(h_pre) return h
def forward(self, g, h, e): h_in = h # for residual connection g.ndata['h'] = h g.ndata['Ah'] = self.A(h) g.ndata['Bh'] = self.B(h) g.ndata['Dh'] = self.D(h) g.ndata['Eh'] = self.E(h) #g.update_all(self.message_func,self.reduce_func) g.apply_edges(fn.u_add_v('Dh', 'Eh', 'e')) g.edata['sigma'] = torch.sigmoid(g.edata['e']) g.update_all(fn.u_mul_e('Bh', 'sigma', 'm'), fn.sum('m', 'sum_sigma_h')) g.update_all(fn.copy_e('sigma', 'm'), fn.sum('m', 'sum_sigma')) g.ndata['h'] = g.ndata['Ah'] + g.ndata['sum_sigma_h'] / ( g.ndata['sum_sigma'] + 1e-6) h = g.ndata['h'] # result of graph convolution if self.batch_norm: h = self.bn_node_h(h) # batch normalization h = F.relu(h) # non-linear activation if self.residual: h = h_in + h # residual connection h = F.dropout(h, self.dropout, training=self.training) return h, e
def forward(self, g, features): h_pre = features g = g.local_var() g.ndata['h'] = features g.ndata['norm_h'] = F.normalize(features, p=2, dim=-1) g.apply_edges(fn.u_dot_v('norm_h', 'norm_h', 'cos')) cos = g.edata.pop('cos') e = self.beta * cos if self.graph_cut > 0: k = int(e.size()[0] * self.graph_cut) _, indices = e.topk(k, largest=False, sorted=False) e[indices] = 0 g.edata['p'] = edge_softmax(g, e) g.update_all(fn.u_mul_e('h', 'p', 'm'), fn.sum('m', 'h')) h = g.ndata['h'] if self.project: h = self.linear(h) if self.activation: h = self.activation(h) if self.residual: h = h + self.res_fc(h_pre) h = self.dropout(h) return h
def forward(self, g, feat_src, feat_dst): """ :param g: DGLGraph 邻居-目标顶点二分图 :param feat_src: tensor(N_src, d) 邻居顶点输入特征 :param feat_dst: tensor(N_dst, d) 目标顶点输入特征 :return: tensor(N_dst, d) 目标顶点输出特征 """ with g.local_scope(): # HeCo作者代码中使用attn_drop的方式与原始GAT不同,这样是不对的,却能顶点聚类提升性能…… attn_l = self.attn_drop(self.attn_l) attn_r = self.attn_drop(self.attn_r) el = (feat_src * attn_l).sum(dim=-1).unsqueeze(dim=-1) # (N_src, 1) er = (feat_dst * attn_r).sum(dim=-1).unsqueeze(dim=-1) # (N_dst, 1) g.srcdata.update({'ft': feat_src, 'el': el}) g.dstdata['er'] = er g.apply_edges(fn.u_add_v('el', 'er', 'e')) e = self.leaky_relu(g.edata.pop('e')) g.edata['a'] = edge_softmax(g, e) # (E, 1) # 消息传递 g.update_all(fn.u_mul_e('ft', 'a', 'm'), fn.sum('m', 'ft')) ret = g.dstdata['ft'] if self.activation: ret = self.activation(ret) return ret
def forward(self, g, h, pseudo, snorm_n): h_in = h # for residual connection g = g.local_var() g.ndata['h'] = self.fc(h).view(-1, self.kernel, self.out_dim) E = g.number_of_edges() # compute gaussian weight gaussian = -0.5 * ((pseudo.view(E, 1, self.dim) - self.mu.view(1, self.kernel, self.dim))**2) gaussian = gaussian * (self.inv_sigma.view(1, self.kernel, self.dim)** 2) gaussian = torch.exp(gaussian.sum(dim=-1, keepdim=True)) # (E, K, 1) g.edata['w'] = gaussian g.update_all(fn.u_mul_e('h', 'w', 'm'), self._reducer('m', 'h')) h = g.ndata['h'].sum(1) if self.graph_norm: h = h * snorm_n # normalize activation w.r.t. graph size if self.batch_norm: h = self.bn_node_h(h) # batch normalization h = F.relu(h) # non-linear activation if self.residual: h = h_in + h # residual connection if self.bias is not None: h = h + self.bias h = F.dropout(h, self.dropout, training=self.training) return h
def forward(self, g, feat): """ :param g: DGLGraph 二分图(只包含一种关系) :param feat: tensor(N_src, d_in) or (tensor(N_src, d_in), tensor(N_dst, d_in)) 输入特征 :return: tensor(N_dst, K*d_out) 该关系关于目标顶点的表示 """ with g.local_scope(): feat_src, feat_dst = expand_as_pair(feat, g) feat_src = self.fc_src(self.feat_drop(feat_src)).view(-1, self.num_heads, self.out_dim) feat_dst = self.fc_dst(self.feat_drop(feat_dst)).view(-1, self.num_heads, self.out_dim) # a^T (z_u || z_v) = (a_l^T || a_r^T) (z_u || z_v) = a_l^T z_u + a_r^T z_v = el + er el = (feat_src * self.attn_src[:, :self.out_dim]).sum(dim=-1, keepdim=True) # (N_src, K, 1) er = (feat_dst * self.attn_src[:, self.out_dim:]).sum(dim=-1, keepdim=True) # (N_dst, K, 1) g.srcdata.update({'ft': feat_src, 'el': el}) g.dstdata['er'] = er g.apply_edges(fn.u_add_v('el', 'er', 'e')) e = self.leaky_relu(g.edata.pop('e')) g.edata['a'] = edge_softmax(g, e) # (E, K, 1) # 消息传递 g.update_all(fn.u_mul_e('ft', 'a', 'm'), fn.sum('m', 'ft')) ret = g.dstdata['ft'].view(-1, self.num_heads * self.out_dim) if self.activation: ret = self.activation(ret) return ret
def appnp(graph, key_in, key_out, alpha=0.2, k_hop=1): feat_in = graph.ndata[key_in] for i in range(k_hop): graph.update_all(fn.u_mul_e(key_in if i == 0 else 'tmp', 'attn', 'm'), fn.sum('m', 'tmp')) graph.ndata['tmp'] = graph.ndata['tmp'] * (1 - alpha) + feat_in * alpha graph.ndata[key_out] = graph.ndata.pop('tmp')
def forward(self, g, feat): with g.local_scope(): if self.aggre_type == 'attention': if isinstance(feat, tuple): h_src = self.feat_drop(feat[0]).view( -1, self.num_heads, self.in_size) h_dst = self.feat_drop(feat[1]).view( -1, self.num_heads, self.in_size) el = (h_src * self.attn_l).sum(dim=-1).unsqueeze(-1) g.srcdata.update({'ft': h_src, 'el': el}) g.apply_edges(fn.copy_u('el', 'e')) e = self.leaky_relu(g.edata.pop('e')) g.edata['a'] = self.attn_drop(edge_softmax(g, e)) g.update_all(fn.u_mul_e('ft', 'a', 'm'), fn.sum('m', 'ft')) rst = g.dstdata['ft'].flatten(1) if self.residual: rst = rst + h_dst if self.activation: rst = self.activation(rst) elif self.aggre_type == 'mean': h_src = self.feat_drop(feat[0]).view( -1, self.in_size * self.num_heads) g.srcdata['ft'] = h_src g.update_all(fn.copy_u('ft', 'm'), fn.mean('m', 'ft')) rst = g.dstdata['ft'] elif self.aggre_type == 'pool': h_src = self.feat_drop(feat[0]).view( -1, self.in_size * self.num_heads) g.srcdata['ft'] = F.relu(self.fc_pool(h_src)) g.update_all(fn.copy_u('ft', 'm'), fn.mean('m', 'ft')) rst = g.dstdata['ft'] return rst
def forward(self, g, node_feats, edge_feats, expanded_dists): """Performs message passing and updates node and edge representations. Parameters ---------- g : DGLGraph DGLGraph for a batch of graphs. node_feats : float32 tensor of shape (V, feats) Input node features. edge_feats : float32 tensor of shape (E, feats) Input edge features. expanded_dists : float32 tensor of shape (E, dist_feats) Expanded distances, i.e. the output of RBFExpansion. Returns ------- node_feats : float32 tensor of shape (V, feats) Updated node representations. edge_feats : float32 tensor of shape (E, feats) Edge representations, updated if ``update_edge == True`` in initialization. """ expanded_dists = self.update_dists(expanded_dists) if self.update_edge_feats is not None: edge_feats = self.update_edge_feats(edge_feats) g = g.local_var() g.ndata.update({'hv': node_feats}) g.edata.update({'dist': expanded_dists, 'he': edge_feats}) g.update_all(message_func=[fn.u_mul_e('hv', 'dist', 'm_0'), fn.copy_e('he', 'm_1')], reduce_func=[fn.sum('m_0', 'hv_0'), fn.sum('m_1', 'hv_1')]) node_feats = g.ndata.pop('hv_0') + g.ndata.pop('hv_1') return node_feats, edge_feats
def forward(self, graph, feat): graph = graph.local_var() if isinstance(feat, tuple): h_src = self.feat_drop(feat[0]) h_dst = self.feat_drop(feat[1]) feat_src = self.fc_src(h_src).view(-1, self._num_heads, self._out_feats) feat_dst = self.fc_dst(h_dst).view(-1, self._num_heads, self._out_feats) else: h_src = h_dst = self.feat_drop(feat) feat_src = feat_dst = self.fc(h_src).view( -1, self._num_heads, self._out_feats) el = (feat_src * self.attn_l).sum(dim=-1).unsqueeze(-1) er = (feat_dst * self.attn_r).sum(dim=-1).unsqueeze(-1) graph.srcdata.update({'ft': feat_src, 'el': el}) graph.dstdata.update({'er': er}) # compute edge attention, el and er are a_l Wh_i and a_r Wh_j respectively. graph.apply_edges(fn.u_add_v('el', 'er', 'e')) e = self.leaky_relu(graph.edata.pop('e')) # compute softmax graph.edata['a'] = self.attn_drop(edge_softmax(graph, e)) # message passing graph.update_all(fn.u_mul_e('ft', 'a', 'm'), fn.sum('m', 'ft')) rst = graph.dstdata['ft'] # residual if self.res_fc is not None: resval = self.res_fc(h_dst).view(h_dst.shape[0], -1, self._out_feats) rst = rst + resval # activation if self.activation: rst = self.activation(rst) return rst
def forward(self, graph, feat): graph = graph.local_var() h = self.feat_drop(feat) feat = self.fc(h).view(-1, self._num_heads, self._out_feats) el = (feat * self.attn_l).sum(dim=-1).unsqueeze(-1) er = (feat * self.attn_r).sum(dim=-1).unsqueeze(-1) graph.ndata.update({"ft": feat, "el": el, "er": er}) # compute edge attention graph.apply_edges(fn.u_add_v("el", "er", "e")) # apply leaky relu graph.apply_edges(self.relu_udf) # compute softmax/sparsemax if self.sparsemax: graph.apply_edges(self.sparsemax_udf) else: graph.edata["a"] = edge_softmax(graph, graph.edata.pop("e")) # attention dropout graph.apply_edges(self.attn_drop_udf) # message passing graph.update_all(fn.u_mul_e("ft", "a", "m"), fn.sum("m", "ft")) rst = graph.ndata["ft"] # residual if self.res_fc is not None: resval = self.res_fc(h).view(h.shape[0], -1, self._out_feats) rst = rst + resval # activation if self.activation: rst = self.activation(rst) return rst