def calc_weight(g): """计算行归一化的D^(-1/2)AD(-1/2)""" with g.local_scope(): g.ndata['in_degree'] = g.in_degrees().float().pow(-0.5) g.ndata['out_degree'] = g.out_degrees().float().pow(-0.5) g.apply_edges(fn.u_mul_v('out_degree', 'in_degree', 'weight')) g.update_all(fn.copy_e('weight', 'msg'), fn.sum('msg', 'norm')) g.apply_edges(fn.e_div_v('weight', 'norm', 'weight')) return g.edata['weight']
def normalize_edge_weights(graph, device, num_ew_channels): degs = graph.in_degrees().float() degs = torch.clamp(degs, min=1) norm = torch.pow(degs, 0.5) norm = norm.to(args["device"]) graph.ndata["norm"] = norm.unsqueeze(1) graph.apply_edges(fn.e_div_u("feat", "norm", "feat")) graph.apply_edges(fn.e_div_v("feat", "norm", "feat")) for channel in range(num_ew_channels): graph.edata["feat_" + str(channel)] = graph.edata["feat"][:, channel:channel + 1]
def edge_softmax_fix(graph, score): def reduce_sum(nodes): accum = torch.sum(nodes.mailbox['temp'], 1) return {'out_sum': accum} graph = graph.local_var() graph.edata['out'] = score graph.edata['out'] = torch.exp(graph.edata['out']) graph.update_all(fn.copy_e('out', 'temp'), reduce_sum) graph.apply_edges(fn.e_div_v('out', 'out_sum', 'out')) out = graph.edata['out'] return out
def calc_weight(g): """ Compute row_normalized(D^(-1/2)AD^(-1/2)) """ with g.local_scope(): # compute D^(-0.5)*D(-1/2), assuming A is Identity g.ndata["in_deg"] = g.in_degrees().float().pow(-0.5) g.ndata["out_deg"] = g.out_degrees().float().pow(-0.5) g.apply_edges(fn.u_mul_v("out_deg", "in_deg", "weight")) # row-normalize weight g.update_all(fn.copy_e("weight", "msg"), fn.sum("msg", "norm")) g.apply_edges(fn.e_div_v("weight", "norm", "weight")) return g.edata["weight"]
def expected_output(): g.srcdata.update({'ft': feat_src, 'el': el}) g.dstdata.update({'er': er}) g.apply_edges(fn.u_add_v('el', 'er', 'e')) e = leaky_relu(g.edata.pop('e')) g.edata['out'] = th.exp(e) g.update_all(fn.copy_e('out', 'm'), fn.sum('m', 'out_sum')) g.apply_edges(fn.e_div_v('out', 'out_sum', 'out1')) # Omit attn_drop for deterministic execution g.edata['a'] = g.edata['out1'] # message passing g.update_all(fn.u_mul_e('ft', 'a', 'm'), fn.sum('m', 'ft')) rst = g.dstdata['ft'] return rst
def forward(self, g, feat_dict): funcs = {} for srctype, etype, dsttype in g.canonical_etypes: g.nodes[dsttype].data['h'] = feat_dict[ dsttype] #nodes' original feature g.nodes[srctype].data['h'] = feat_dict[srctype] g.nodes[srctype].data['t_h'] = self.W_T[etype]( feat_dict[srctype]) #src nodes' transformed feature #compute the attention numerator (exp) g.apply_edges(fn.u_mul_v('t_h', 'h', 'x'), etype=etype) g.edges[etype].data['x'] = torch.exp(self.W_A[etype]( g.edges[etype].data['x'])) #first update to compute the attention denominator (\sum exp) funcs[etype] = (fn.copy_e('x', 'm'), fn.sum('m', 'att')) g.multi_update_all(funcs, 'sum') funcs = {} for srctype, etype, dsttype in g.canonical_etypes: g.apply_edges(fn.e_div_v('x', 'att', 'att'), etype=etype ) #compute attention weights (numerator/denominator) funcs[etype] = (fn.u_mul_e('h', 'att', 'm'), fn.sum('m', 'h')) #\sum(h0*att) -> h1 #second update to obtain h1 g.multi_update_all(funcs, 'sum') #apply activation, layernorm, and dropout feat_dict = {} for ntype in g.ntypes: feat_dict[ntype] = self.dropout( self.layernorm(F.relu_(g.nodes[ntype].data['h'])) ) #apply activation, layernorm, and dropout return feat_dict
def heterograph_edge_softmax(graph, edge_types, edge_data): r"""Edge softmax for heterograph. For a node :math:`i`, edge softmax is an operation of computing .. math:: a_{ij} = \frac{\exp(z_{ij})}{\sum_{j\in\mathcal{N}(i)}\exp(z_{ij})} where :math:`z_{ij}` is a signal of edge :math:`j\rightarrow i`, also called logits in the context of softmax. :math:`\mathcal{N}(i)` is the set of nodes that have an edge to :math:`i`. The type of j is ignored, i.e. it runs over all j that directs to i, no matter what the node type of j is. .. code:: python score = dgl.EData(g, score) score_max = score.dst_max() # of type dgl.NData score = score - score_max # edge_sub_dst, ret dgl.EData score_sum = score.dst_sum() # of type dgl.NData out = score / score_sum # edge_div_dst, ret dgl.EData return out.data ""[summary] Returns: [type]: [description] """ g = graph.local_var() ##################################################################################### ## assign data # max_e = [] # min_e = [] # for etype, edata in zip(edge_types, edge_data): # g.edges[etype].data["e"] = edata # max_e.append(torch.max(edata)) # min_e.append(torch.min(edata)) # max_e = max(max_e) # min_e = min(min_e) ## The softmax trick, making the exponential stable. ## see https://stackoverflow.com/questions/42599498/numercially-stable-softmax ## max_e > 64 to prevent overflow; min_e<-64 to prevent underflow ## ## Of course, we can apply the trick all the time, but here we choose to apply only ## in some conditions to save some time, since multi_update_all is really expensive. # if max_e > 64.0 or min_e < -64.0: # # e max (fn.max operates on the axis of features from different nodes) # g.multi_update_all( # {etype: (fn.copy_e("e", "m"), fn.max("m", "emax")) for etype in edge_types}, # "max", # ) # # subtract max and compute exponential # for etype in edge_types: # g.apply_edges(fn.e_sub_v("e", "emax", "e"), etype=etype) ##################################################################################### for etype, edata in zip(edge_types, edge_data): g.edges[etype].data["e"] = edata g.multi_update_all( { etype: (fn.copy_e("e", "m"), fn.max("m", "emax")) for etype in edge_types }, "max") # subtract max and compute exponential for etype in edge_types: g.apply_edges(fn.e_sub_v("e", "emax", "e"), etype=etype) g.edges[etype].data["out"] = torch.exp(g.edges[etype].data["e"]) ##################################################################################### # e sum g.multi_update_all( { etype: (fn.copy_e("out", "m"), fn.sum("m", "out_sum")) for etype in edge_types }, "sum", ) a = [] for etype in edge_types: g.apply_edges(fn.e_div_v("out", "out_sum", "a"), etype=etype) a.append(g.edges[etype].data["a"]) return a
def forward(self, graph, feat): graph = graph.local_var() h_src = h_dst = self.feat_drop(feat) feat = self.fc(h_src).view(-1, self._num_heads, self._out_feats) ell = (self.attn_l * feat).sum(dim=-1, keepdim=True) err = (self.attn_r * feat).sum(dim=-1, keepdim=True) g = graph g.srcdata.update({'ft': feat, 'el': ell}) g.dstdata.update({'er': err}) g.apply_edges(fn.u_add_v('el', 'er', 'e')) e = self.leaky_relu(g.edata.pop('e')) g.edata['out'] = th.exp(e) g.update_all(fn.copy_e('out', 'm'), fn.sum('m', 'out_sum')) g.apply_edges(fn.e_div_v('out', 'out_sum', 'out1')) # Omit attn_drop for deterministic execution g.edata['a'] = g.edata['out1'] # message passing g.update_all(fn.u_mul_e('ft', 'a', 'm'), fn.sum('m', 'ft')) dglrst = g.dstdata['ft'] fusedrst = B.fused_gat(g, feat, ell, err, self.negative_slope) dgl_context = utils.to_dgl_context(feat.device) graph = graph._graph.get_immutable_gidx(dgl_context) with self.cm.zoomIn(namespace=[self, th], graph=graph, node_feats={'f': h_src}, edge_feats={}) as v: feat_src = [ self.fc(n.f).view(self._num_heads, self._out_feats) for n in v.innbs ] el = [(nf * self.attn_l).sum(dim=-1, keepdim=True) for nf in feat_src] er = (self.fc(v.f).view(self._num_heads, self._out_feats) * self.attn_r).sum(dim=-1, keepdim=True) coeff = [th.exp(self.leaky_relu(l + er)) for l in el] s = sum(coeff) alpha = [c / s for c in coeff] rst = sum([ef[0] * ef[1] for ef in zip(alpha, feat_src)]) self.cm.collect_output(rst) rst = self.cm.zoomOut() grad_out = th.ones_like(rst) egl_graer = grad(outputs=rst, inputs=self.cm._executor.ts.tensor_map['V7'], grad_outputs=grad_out, retain_graph=True) egl_grael = grad(outputs=rst, inputs=self.cm._executor.ts.tensor_map['V3'], grad_outputs=grad_out, retain_graph=True) dgl_graer = grad(outputs=dglrst, inputs=err, grad_outputs=grad_out, retain_graph=True) dgl_grael = grad(outputs=dglrst, inputs=ell, grad_outputs=grad_out, retain_graph=True) fused_graer = grad(outputs=fusedrst, inputs=err, grad_outputs=grad_out, retain_graph=True) fused_grael = grad(outputs=fusedrst, inputs=ell, grad_outputs=grad_out, retain_graph=True) print('rst close?', th.allclose(rst, dglrst), rst) #print('exp', g.edata['out'], 'div', g.edata['a'], 'rst', dglrst, 'feat', feat, 'ell', ell, 'err', err) print('\negl_graer', egl_graer, '\ndgl_graer', dgl_graer, '\nfused_graer', fused_graer, 'egl close with dgl?', th.allclose(egl_graer[0], dgl_graer[0])) print('\negl_grael', egl_grael, '\ndgl_grael', dgl_grael, '\nfused_grael', fused_grael, 'egl close with dgl?', th.allclose(egl_grael[0], dgl_grael[0])) # residual if self.res_fc is not None: resval = self.res_fc(h_dst).view(h_dst.shape[0], -1, self._out_feats) rst = rst + resval # activation if self.activation: rst = self.activation(rst) return rst, dglrst