def forward(self, out, pos_graph, neg_graph, cuda): pos_graph.ndata['h'] = out pos_graph.apply_edges(fn.u_dot_v('h', 'h', 'score')) pos_score = pos_graph.edata['score'] neg_graph.ndata['h'] = out neg_graph.apply_edges(fn.u_dot_v('h', 'h', 'score')) neg_score = neg_graph.edata['score'] score = torch.cat([pos_score, neg_score]) label = torch.cat([torch.ones(pos_score.shape[0]), torch.zeros(neg_score.shape[0])]).long() if cuda: label = label.cuda() loss = F.binary_cross_entropy_with_logits(score, label.float()) return loss
def forward(self, block_outputs, pos_graph, neg_graph): with pos_graph.local_scope(): pos_graph.ndata['h'] = block_outputs pos_graph.apply_edges(fn.u_dot_v('h', 'h', 'score')) pos_score = pos_graph.edata['score'] with neg_graph.local_scope(): neg_graph.ndata['h'] = block_outputs neg_graph.apply_edges(fn.u_dot_v('h', 'h', 'score')) neg_score = neg_graph.edata['score'] score = th.cat([pos_score, neg_score]) label = th.cat([th.ones_like(pos_score), th.zeros_like(neg_score)]).long() loss = F.binary_cross_entropy_with_logits(score, label.float()) return loss
def forward(self, item_item_graph, h): with item_item_graph.local_scope(): item_item_graph.ndata['h'] = h item_item_graph.apply_edges(fn.u_dot_v('h', 'h', 's')) item_item_graph.apply_edges(self._add_bias) pair_score = item_item_graph.edata['s'] return pair_score
def forward(self, g, features): h_pre = features g = g.local_var() g.ndata['h'] = features g.ndata['norm_h'] = F.normalize(features, p=2, dim=-1) g.apply_edges(fn.u_dot_v('norm_h', 'norm_h', 'cos')) cos = g.edata.pop('cos') e = self.beta * cos if self.graph_cut > 0: k = int(e.size()[0] * self.graph_cut) _, indices = e.topk(k, largest=False, sorted=False) e[indices] = 0 g.edata['p'] = edge_softmax(g, e) g.update_all(fn.u_mul_e('h', 'p', 'm'), fn.sum('m', 'h')) h = g.ndata['h'] if self.project: h = self.linear(h) if self.activation: h = self.activation(h) if self.residual: h = h + self.res_fc(h_pre) h = self.dropout(h) return h
def forward(self, graph, ufeat, ifeat): """Forward function. Parameters ---------- graph : DGLHeteroGraph "Flattened" user-movie graph with only one edge type. ufeat : th.Tensor User embeddings. Shape: (|V_u|, D) ifeat : th.Tensor Movie embeddings. Shape: (|V_m|, D) Returns ------- th.Tensor Predicting scores for each user-movie edge. """ with graph.local_scope(): ufeat = self.dropout(ufeat) ifeat = self.dropout(ifeat) graph.nodes['movie'].data['h'] = ifeat basis_out = [] for i in range(self._num_basis): graph.nodes['user'].data['h'] = ufeat @ self.Ps[i] graph.apply_edges(fn.u_dot_v('h', 'h', 'sr')) basis_out.append(graph.edata['sr'].unsqueeze(1)) out = th.cat(basis_out, dim=1) out = self.combine_basis(out) return out
def forward(self, graph, feat): graph = graph.local_var() feat_c = feat.clone().detach().requires_grad_(False) q, k, v = self.q_proj(feat), self.k_proj(feat_c), self.v_proj(feat_c) q = q.view(-1, self._num_heads, self._out_feats) k = k.view(-1, self._num_heads, self._out_feats) v = v.view(-1, self._num_heads, self._out_feats) graph.ndata.update({ 'ft': v, 'el': k, 'er': q }) # k,q instead of q,k, the edge_softmax is applied on incoming edges # compute edge attention graph.apply_edges(fn.u_dot_v('el', 'er', 'e')) e = graph.edata.pop('e') / math.sqrt(self._out_feats * self._num_heads) graph.edata['a'] = edge_softmax(graph, e).unsqueeze(-1) # message passing graph.update_all(fn.u_mul_e('ft', 'a', 'm'), fn.sum('m', 'ft2')) rst = graph.ndata['ft2'] # residual rst = rst.view(feat.shape) + feat if self._trans: rst = self.ln1(rst) rst = self.ln1(rst + self.FFN(rst)) # use the same layer norm, see the author's code return rst
def forward(self, graph, h): # h contains the node representations computed from the GNN defined # in the node classification section (Section 5.1). with graph.local_scope(): graph.ndata['h'] = h graph.apply_edges(fn.u_dot_v('h', 'h', 'score')) return graph.edata['score']
def forward(self, graph, h, etype): # h contains the node representations for each node type computed from # the GNN defined in the previous section (Section 5.1). with graph.local_scope(): graph.ndata['h'] = h graph.apply_edges(fn.u_dot_v('h', 'h', 'score'), etype=etype) return graph.edges[etype].data['score']
def forward(self, graph, feat, device): graph = graph.to(device).local_var() feat_c = feat.clone().detach().requires_grad_(False) q, k, v = self.query_proj(feat), self.key_proj( feat_c), self.value_proj(feat_c) q = q.view(-1, self.num_heads, self.embedding_size // self.num_heads) k = k.view(-1, self.num_heads, self.embedding_size // self.num_heads) v = v.view(-1, self.num_heads, self.embedding_size // self.num_heads) graph.ndata.update({ 'ft': v, 'el': k, 'er': q }) # k,q instead of q,k, the edge_softmax is applied on incoming edges # compute edge attention graph.apply_edges(fn.u_dot_v('el', 'er', 'e')) e = graph.edata.pop('e') / math.sqrt(self.embedding_size) graph.edata['a'] = edge_softmax(graph, e) # message passing graph.update_all(fn.u_mul_e('ft', 'a', 'm'), fn.sum('m', 'ft2')) rst = graph.ndata['ft2'] # residual rst = rst.view(feat.shape) + feat rst = self.ln1(rst) rst = self.ln1(rst + self.out_proj(rst)) return rst
def forward(self, graph, ufeat, ifeat): """Forward function. Parameters ---------- graph : DGLHeteroGraph "Flattened" user-movie graph with only one edge type. ufeat : mx.nd.NDArray User embeddings. Shape: (|V_u|, D) ifeat : mx.nd.NDArray Movie embeddings. Shape: (|V_m|, D) Returns ------- mx.nd.NDArray Predicting scores for each user-movie edge. """ graph = graph.local_var() ufeat = self.dropout(ufeat) ifeat = self.dropout(ifeat) graph.nodes['movie'].data['h'] = ifeat basis_out = [] for i in range(self._num_basis_functions): graph.nodes['user'].data['h'] = F.dot(ufeat, self.Ps[i].data()) graph.apply_edges(fn.u_dot_v('h', 'h', 'sr')) basis_out.append(graph.edata['sr']) out = F.concat(*basis_out, dim=1) out = self.rate_out(out) return out
def forward(self, g, h): with g.local_scope(): g.ndata['h'] = h # Compute a new edge feature named 'score' by a dot-product between the # source node feature 'h' and destination node feature 'h'. g.apply_edges(fn.u_dot_v('h', 'h', 'score')) # u_dot_v returns a 1-element vector for each edge so you need to squeeze it. return g.edata['score'][:, 0]
def forward(self, dec_graph, ufeat, ifeat): with dec_graph.local_scope(): dec_graph.nodes['item'].data['h'] = ifeat dec_graph.nodes['user'].data['h'] = ufeat dec_graph.apply_edges(fn.u_dot_v('h', 'h', 'sr')) out = dec_graph.edata['sr'] return out
def forward(self, item_item_graph, h): """ item_item_graph : graph consists of edges connecting the pairs h : hidden state of every node """ with item_item_graph.local_scope(): item_item_graph.ndata['h'] = h item_item_graph.apply_edges(fn.u_dot_v('h', 'h', 's')) item_item_graph.apply_edges(self._add_bias) pair_score = item_item_graph.edata['s'] return pair_score
def calc_score(self, g, h): """计算图中每一条边的得分 s(u, v)=h(u)^T h(v) :param g: DGLGraph 异构图 :param h: Dict[str, tensor(N_i, d)] 顶点类型到顶点嵌入的映射 :return: tensor(A*E) 所有边的得分 """ with g.local_scope(): g.ndata['h'] = h for etype in g.etypes: g.apply_edges(fn.u_dot_v('h', 'h', 's'), etype=etype) return torch.cat(list(g.edata['s'].values())).squeeze( dim=-1) # (A*E,)
def forward(self, g, features): g = g.local_var() h_pre = features if self.graph_norm: norm = th.pow(g.in_degrees().float().clamp(min=1), -0.5) shp = norm.shape + (1,) * (features.dim() - 1) norm = th.reshape(norm, shp).to(features.device) features = features * norm g.ndata['h'] = features w = th.ones(g.number_of_edges(), 1).to(features.device) w = self.edge_drop(w) if self.graph_cut > 0: g.ndata['norm_h'] = F.normalize(features, p=2, dim=-1) g.apply_edges(fn.u_dot_v('norm_h', 'norm_h', 'cos')) e = g.edata.pop('cos') k = int(e.size()[0] * self.graph_cut) _, cut_indices = e.topk(k, largest=False, sorted=False) w[cut_indices] = 0 g.edata['w'] = w g.update_all(fn.u_mul_e('h', 'w', 'm'), fn.sum('m', 'h')) # g.update_all(gcn_msg, gcn_reduce) g.apply_nodes(func=self.apply_mod) h = g.ndata['h'] if self.graph_norm: h = h * norm if self.batch_norm: h = self.bn(h) if self.pair_norm: h = self.pn(h) # #self_gcn # if True: # h = (1-self.beta) * h +self.beta * self.self_fc(g.ndata['initial']) if self.activation is not None: h = self.activation(h) if self.res_fc is not None: # # h = h + self.res_fc(h_pre) # print("beta:{}".format(self.beta)) h = h + self.beta * self.res_fc(h_pre) # h = self.alpha * h + self.beta * self.res_fc(h_pre) # h = (1 - self.beta) * h + self.beta * self.res_fc(h_pre) h = self.dropout(h) return h
def forward(self, graph, h): with graph.local_scope(): for etype in graph.canonical_etypes: try: graph.nodes[etype[0]].data['norm_h'] = F.normalize( h[etype[0]], p=2, dim=-1) graph.nodes[etype[2]].data['norm_h'] = F.normalize( h[etype[2]], p=2, dim=-1) graph.apply_edges(fn.u_dot_v('norm_h', 'norm_h', 'cos'), etype=etype) except KeyError: pass # For etypes that are not in training eids, thus have no 'h' ratings = graph.edata['cos'] return ratings
def get_sddmm_flops(g, num_hid): g.ndata['h'] = th.rand(g.number_of_nodes(), num_hid).cuda() g.ndata['x'] = th.rand(g.number_of_nodes(), num_hid).cuda() accum_time = 0 accum_FLOPs = 0 for t in range(100): with th_op_time() as timer: g.apply_edges2(fn.u_dot_v('h', 'x', 'h')) if t >= 30: accum_time += timer.time accum_FLOPs += g.number_of_edges() * num_hid g.ndata.clear() g.edata.clear() th.cuda.empty_cache() return accum_FLOPs / accum_time
def recall_paper(g, field_ids, num_recall): """预先计算论文召回 :param g: DGLGraph 异构图 :param field_ids: List[int] 目标领域id :param num_recall: 每个领域召回的论文数 :return: Dict[int, List[int]] {field_id: [paper_id]} """ similarity = torch.zeros(len(field_ids), g.num_nodes('paper')) sg = dgl.out_subgraph(g['has_field_rev'], {'field': field_ids}, relabel_nodes=True) sg.apply_edges(fn.u_dot_v('feat', 'feat', 's')) f, p = sg.edges() similarity[f, sg.nodes['paper'].data[dgl.NID][p]] = sg.edata['s'].squeeze( dim=1) _, pid = similarity.topk(num_recall, dim=1) return {f: pid[i].tolist() for i, f in enumerate(field_ids)}
def forward(self, edge_subgraph: dgl.DGLHeteroGraph, nodes_representation: dict, etype: str): """ :param edge_subgraph: sampled subgraph :param nodes_representation: input node features, dict :param etype: predict edge type, str :return: """ edge_subgraph = edge_subgraph.local_var() edge_type_subgraph = edge_subgraph[etype] for ntype in nodes_representation: edge_type_subgraph.nodes[ntype].data['h'] = self.projection_layer( nodes_representation[ntype]) edge_type_subgraph.apply_edges(fn.u_dot_v('h', 'h', 'score'), etype=etype) return self.sigmoid(edge_type_subgraph.edata['score'])
def forward(self, graph, ufeat, ifeat): """Forward function. Parameters ---------- graph : DGLHeteroGraph "Flattened" user-movie graph with only one edge type. ufeat : torch.Tensor User embeddings. Shape: (|V_u|, D) ifeat : torch.Tensor Movie embeddings. Shape: (|V_m|, D) Returns ------- torch.Tensor Predicting scores for each user-movie edge. """ graph = graph.local_var() ufeat = self.dropout(ufeat) ifeat = self.dropout(ifeat) graph.nodes['item'].data['h'] = ifeat basis_out = [] for i in range(len(self.rating_vals)): graph.nodes['user'].data['h'] = torch.mm(ufeat, self.Ps[i]) graph.apply_edges(fn.u_dot_v('h', 'h', 'sr')) # basis_out.append(graph.edata['sr'].expand_dims(1)) basis_out.append(torch.unsqueeze(graph.edata['sr'], 1)) out = torch.cat(basis_out, dim=1) out = F.softmax(out, dim=1) possible_ratings = torch.Tensor(self.rating_vals) ratings = torch.sum(out * possible_ratings, dim=1) return ratings # def dot_or_identity(A, B): # # if A is None, treat as identity matrix # if A is None: # return B # else: # return torch.mm(A, B)
def forward(self, graph, feat, soft_label): graph = graph.local_var() if not self._allow_zero_in_degree: if (graph.in_degrees() == 0).any(): raise DGLError('There are 0-in-degree nodes in the graph, ' 'output for those nodes will be invalid. ' 'This is harmful for some applications, ' 'causing silent performance regression. ' 'Adding self-loop on the input graph by ' 'calling `g = dgl.add_self_loop(g)` will resolve ' 'the issue. Setting ``allow_zero_in_degree`` ' 'to be `True` when constructing this module will ' 'suppress the check and let the code run.') h_src = feat feat_src = feat_dst = self.fc(h_src) if graph.is_block: feat_dst = feat_src[:graph.number_of_dst_nodes()] # Assign features to nodes graph.srcdata.update({'ft': feat_src}) graph.dstdata.update({'ft': feat_dst}) # Step 1. dot product graph.apply_edges(fn.u_dot_v('ft', 'ft', 'a')) # graph.edata['a'] = th.ones(graph.num_edges(), device=graph.device) # Step 2. edge softmax to compute attention scores graph.edata['sa'] = edge_softmax(graph, graph.edata['a']) att = graph.edata['sa'].squeeze() cog_label = soft_label # cog_label = self.fc2(feat) # cog_label = th.sigmoid(self.lr_alpha) * soft_label + th.sigmoid(-self.lr_alpha) * self.fc2(feat) graph.srcdata.update({'ft': cog_label}) graph.dstdata.update({'ft': cog_label}) # Step 3. Broadcast softmax value to each edge, and aggregate dst node graph.update_all(fn.u_mul_e('ft', 'sa', 'attn'), fn.sum('attn', 'agg_u')) # output results to the destination nodes rst = graph.dstdata['agg_u'] return rst, att, th.sigmoid(self.lr_alpha).squeeze()
def forward(self, graph, feat): """Compute graph attention network layer. Parameters ---------- graph : DGLGraph The graph. feat : torch.Tensor The input feature of shape :math:`(N, D_{in})` where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. Returns ------- torch.Tensor The output feature of shape :math:`(N, H, D_{out})` where :math:`H` is the number of heads, and :math:`D_{out}` is size of output feature. """ graph = graph.local_var() #feat_c = feat.clone().detach().requires_grad_(False) feat_c = feat q, k, v = self.q_proj(feat), self.k_proj(feat_c), self.v_proj(feat_c) q = q.view(-1, self._num_heads, self._out_feats) k = k.view(-1, self._num_heads, self._out_feats) v = v.view(-1, self._num_heads, self._out_feats) graph.ndata.update({'ft': v, 'el': k, 'er': q}) # compute edge attention graph.apply_edges(fn.u_dot_v('el', 'er', 'e')) e = graph.edata.pop('e') / math.sqrt(self._out_feats) # compute softmax graph.edata['a'] = self.attn_drop(edge_softmax(graph, e)).unsqueeze(-1) # message passing graph.update_all(fn.u_mul_e('ft', 'a', 'm'), fn.sum('m', 'ft')) rst = graph.ndata['ft'] # residual rst = rst.view(feat.shape) + feat if self._trans: rst = self.ln(rst) rst = self.ln(rst + self.FFN(rst)) return rst
def graph_nn(self, g, h, ctx, c, graph_membership): g = g.local_var() c_broadcast = F.embedding(graph_membership, c) fuse = self.W4(self.read_drop(h)) * self.W5(self.read_drop(ctx)) cat = th.cat([h, ctx, fuse], dim=1) src_ctx = self.W7(cat) * self.W8(c_broadcast) dst_ctx = self.W6(cat) g.srcdata.update({"s_e": src_ctx}) g.dstdata.update({"d_e": dst_ctx}) g.apply_edges(fn.u_dot_v("s_e", "d_e", "e")) e = g.edata.pop('e') g.edata['a'] = edge_softmax(g, e) g.ndata['ft'] = self.W9(cat) * self.W10(c_broadcast) g.update_all(fn.u_mul_e('ft', 'a', 'm'), fn.sum('m', 's')) ctx = self.W11(ctx) + self.W11b(g.ndata['s']) rst = ctx return rst
def forward(self, g, features): g = g.local_var() # if self.graph_cut > 0: # k1 = int(g.number_of_nodes() * 0.8) # degrees = g.in_degrees() + g.out_degrees() # _, indices = degrees.topk(k1, largest=False, sorted=False) # edges = th.cat([g.in_edges(indices, 'eid'), g.out_edges(indices, 'eid')]) if self._cached_h is not None: features = self._cached_h else: # compute normalization if self.graph_norm: degs = g.in_degrees().float().clamp(min=1) norm = th.pow(degs, -0.5) norm = norm.to(features.device).unsqueeze(1) if self.pair_norm: self.pn(features) # compute (D^-1 A^k D)^k X for i in range(self._k): w = th.ones(g.number_of_edges(), 1).to(features.device) if i > -1: w = self.edge_drop(w) if self.graph_cut > 0: # g.ndata['norm_h'] = F.normalize(features, p=2, dim=-1) # g.apply_edges(fn.u_dot_v('norm_h', 'norm_h', 'cos'), edges=edges) # t = g.edata.pop('cos') # e = th.where(t == 0, t, th.tensor(1.0).to(features.device)) # # k = int(edges.size()[0] * self.graph_cut) # _, indices = e.topk(k, largest=False, sorted=False) g.ndata['norm_h'] = F.normalize(features, p=2, dim=-1) g.apply_edges(fn.u_dot_v('norm_h', 'norm_h', 'cos')) e = g.edata.pop('cos') k = int(e.size()[0] * self.graph_cut) _, cut_indices = e.topk(k, largest=False, sorted=False) # shuffled_indices = np.random.permutation(edges.size()[0]) # indices = shuffled_indices[int(edges.size()[0] * self.graph_cut)-1] # cut_indices = edges[indices] w[cut_indices] = 0 # g.ndata['norm_h'] = F.normalize(features, p=2, dim=-1) # g.apply_edges(fn.u_dot_v('norm_h', 'norm_h', 'cos')) # cos = g.edata.pop('cos') # w = edge_softmax(g, cos) g.edata['w'] = w if self.graph_norm: features = features * norm g.ndata['h'] = features g.update_all(fn.u_mul_e('h', 'w', 'm'), fn.sum('m', 'h')) features = g.ndata.pop('h') if self.graph_norm: features = features * norm if self.pair_norm: self.pn(features) # cache feature if self._cached: self._cached_h = features return self.fc(features)
def predict2(g, h): with g.local_scope(): g.ndata['h'] = h g.apply_edges(fn.u_dot_v('h', 'h', 'score')) return g.edata['score']
def link_pre(model, sampler, n_train, head_t, tail_t, batch_size_test, features_u, features_v, features_e, t, n_users, n_items, in_feats_s, out_feats, inductive=False, new_id=None, use_cuda=False, gpu=-1, advanced=False): val_ap, val_auc = [], [] print('Start Link Prediction...') model.eval() with torch.no_grad(): si, sj = torch.zeros(n_users, in_feats_s), torch.zeros(n_items, in_feats_s) zi, zj = torch.zeros(n_users, out_feats), torch.zeros(n_items, out_feats) ## cuda if use_cuda: si, sj = si.cuda(), sj.cuda() zi, zj = zi.cuda(), zj.cuda() for start in tqdm.trange(0, head_t.shape[0], batch_size_test): end = start + batch_size_test if end > head_t.shape[0]: end = head_t.shape[0] head_b = head_t[start:end] tail_b = tail_t[start:end] # sample pos_graph, pos_graph_r, neg_graph, \ pos_graph_v, neg_graph_v, \ extra_v_u_id, extra_u_v_id, extra_neg_id = sampler.obtain_Bs(head_b, tail_b, start) ## cuda if use_cuda: pos_graph.to(torch.device('cuda:{}'.format(gpu))) pos_graph_r.to(torch.device('cuda:{}'.format(gpu))) neg_graph.to(torch.device('cuda:{}'.format(gpu))) pos_graph_v.to(torch.device('cuda:{}'.format(gpu))) neg_graph_v.to(torch.device('cuda:{}'.format(gpu))) # id head_id = pos_graph.srcdata[dgl.NID] tail_id = pos_graph.dstdata[dgl.NID] head_id_r = pos_graph_r.srcdata[dgl.NID] tail_id_r = pos_graph_r.dstdata[dgl.NID] head_id_neg = neg_graph.srcdata[dgl.NID] tail_id_neg = neg_graph.dstdata[dgl.NID] head_id_out = pos_graph_v.srcdata[dgl.NID] tail_id_out = pos_graph_v.dstdata[dgl.NID] # input si_b, sj_b = si[head_id], sj[tail_id] si_b_r, sj_b_r = sj[head_id_r], si[tail_id_r] si_b_n, sj_b_n = si[head_id_neg], sj[tail_id_neg] vi_b, vj_b = features_u[head_id], features_v[tail_id] vi_b_r, vj_b_r = features_v[head_id_r], features_u[tail_id_r] vi_b_n, vj_b_n = features_u[head_id_neg], features_v[tail_id_neg] e_b = torch.cat([ features_e[extra_u_v_id], features_e[start + n_train:end + n_train] ], dim=0) e_b_r = torch.cat([ features_e[extra_v_u_id], features_e[start + n_train:end + n_train] ], dim=0) e_b_n = torch.cat([ features_e[extra_neg_id], features_e[start + n_train:end + n_train] ], dim=0) t_b = torch.cat( [t[extra_u_v_id], t[start + n_train:end + n_train]]) t_b_r = torch.cat( [t[extra_v_u_id], t[start + n_train:end + n_train]]) t_b_n = torch.cat( [t[extra_neg_id], t[start + n_train:end + n_train]]) # forward if advanced: zi_b, zj_b, zn_b, si_b2, sj_b2 = model.infer( pos_graph, pos_graph_r, neg_graph, si_b, sj_b, si_b_r, sj_b_r, si_b_n, sj_b_n, e_b, t_b, vi_b, vj_b, vi_b_r, vj_b_r, vi_b_n, vj_b_n) else: zi_b, zj_b, zn_b = model.forward(pos_graph, pos_graph_r, neg_graph, si_b, sj_b, si_b_r, sj_b_r, si_b_n, sj_b_n, e_b, e_b_r, e_b_n, t_b, t_b_r, t_b_n, vi_b, vj_b, vi_b_r, vj_b_r, vi_b_n, vj_b_n) si_b2, sj_b2 = model.evolve(pos_graph, pos_graph_r, si_b, sj_b, si_b_r, sj_b_r, t_b, t_b_r, e_b, e_b_r) # output si[head_id_out], sj[tail_id_out] = si_b2, sj_b2 zi[head_id_out], zj[tail_id_out] = zi_b, zj_b # eval pos_graph_v.srcdata['z'] = zi_b pos_graph_v.dstdata['z'] = zj_b pos_graph_v.apply_edges(fn.u_dot_v('z', 'z', 'score')) pos_score = pos_graph_v.edata['score'] neg_graph_v.srcdata['z'] = zi_b neg_graph_v.dstdata['z'] = zn_b neg_graph_v.apply_edges(fn.u_dot_v('z', 'z', 'score')) neg_score = neg_graph_v.edata['score'] # inductive if inductive: id_tmp = new_id[start:end] pos_score = pos_score[np.where(id_tmp == 1)] neg_score = neg_score[np.where(id_tmp == 1)] # metrics score = torch.cat([pos_score, neg_score]).view(-1, 1).cpu().numpy() target = torch.cat([ torch.ones(pos_score.shape[0]), torch.zeros(neg_score.shape[0]) ]).cpu().numpy() if len(pos_score) > 0: val_ap.append(average_precision_score(target, score)) val_auc.append(roc_auc_score(target, score)) model.train() res = {'AP': np.mean(val_ap), 'AUC': np.mean(val_auc)} return res
def forward(self, graph, feat): r"""Compute graph attention network layer. Parameters ---------- graph : DGLGraph The graph. feat : torch.Tensor or pair of torch.Tensor If a torch.Tensor is given, the input feature of shape :math:`(N, D_{in})` where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. If a pair of torch.Tensor is given, the pair must contain two tensors of shape :math:`(N_{in}, D_{in_{src}})` and :math:`(N_{out}, D_{in_{dst}})`. Returns ------- torch.Tensor The output feature of shape :math:`(N, H, D_{out})` where :math:`H` is the number of heads, and :math:`D_{out}` is size of output feature. """ graph = graph.local_var() if isinstance(feat, tuple): h_src = self.feat_drop(feat[0]) h_dst = self.feat_drop(feat[1]) feat_src = self.fc_src(h_src).view(-1, self._num_heads, self._out_feats) feat_dst = self.fc_dst(h_dst).view(-1, self._num_heads, self._out_feats) else: h_src = h_dst = self.feat_drop(feat) feat_src = feat_dst = self.fc(h_src).view(-1, self._num_heads, self._out_feats) if self.opt['att_type'] == "GAT": # NOTE: GAT paper uses "first concatenation then linear projection" # to compute attention scores, while ours is "first projection then # addition", the two approaches are mathematically equivalent: # We decompose the weight vector a mentioned in the paper into # [a_l || a_r], then # a^T [Wh_i || Wh_j] = a_l Wh_i + a_r Wh_j # Our implementation is much efficient because we do not need to # save [Wh_i || Wh_j] on edges, which is not memory-efficient. Plus, # addition could be optimized with DGL's built-in function u_add_v, # which further speeds up computation and saves memory footprint. el = (feat_src * self.attn_l).sum(dim=-1).unsqueeze(-1) er = (feat_dst * self.attn_r).sum(dim=-1).unsqueeze(-1) graph.srcdata.update({'ft': feat_src, 'el': el}) graph.dstdata.update({'er': er}) # compute edge attention, el and er are a_l Wh_i and a_r Wh_j respectively. graph.apply_edges(fn.u_add_v('el', 'er', 'e')) e = self.leaky_relu(graph.edata.pop('e')) elif self.opt['att_type'] == "cosine": el = feat_src * self.attn_l er = feat_dst * self.attn_r graph.srcdata.update({'ft': feat_src, 'el': el}) graph.dstdata.update({'er': er}) graph.srcdata['norm_h'] = F.normalize(el, p=2, dim=-1) graph.dstdata['norm_h'] = F.normalize(er, p=2, dim=-1) # compute cosine distance graph.apply_edges(fn.u_dot_v('norm_h', 'norm_h', 'cos')) e = graph.edata.pop('cos') elif self.opt['att_type'] == "scaled_dot": el = feat_src * self.attn_l er = feat_dst * self.attn_r / th.sqrt( th.tensor(self.opt['num_hidden'] / self.opt['num_heads'])) graph.srcdata.update({'ft': feat_src, 'el': el}) graph.dstdata.update({'er': er}) # compute dot graph.apply_edges(fn.u_dot_v('el', 'er', 'dot')) e = graph.edata.pop('dot') elif self.opt['att_type'] == "pearson": el = feat_src * self.attn_l er = feat_dst * self.attn_r graph.srcdata.update({'ft': feat_src, 'el': el}) graph.dstdata.update({'er': er}) src_mu = th.mean(el, dim=1, keepdim=True) graph.srcdata['norm_h'] = F.normalize(el - src_mu, p=2, dim=-1) dst_mu = th.mean(er, dim=1, keepdim=True) graph.dstdata['norm_h'] = F.normalize(er - dst_mu, p=2, dim=-1) # compute cosine distance graph.apply_edges(fn.u_dot_v('norm_h', 'norm_h', 'cos')) e = graph.edata.pop('cos') elif self.opt['att_type'] == "spearman": #todo check all these operations el = feat_src * self.attn_l er = feat_dst * self.attn_r graph.srcdata.update({'ft': feat_src, 'el': el}) graph.dstdata.update({'er': er}) el = el.view(-1, self._out_feats) er = er.view(-1, self._out_feats) el = soft_rank(el, regularization_strength=1.0) er = soft_rank(er, regularization_strength=1.0) ranked_src = soft_rank( 1000 * F.normalize(el, p=2, dim=-1)) #, regularization_strength=0.1) ranked_dst = soft_rank(1000 * F.normalize(er, p=2, dim=-1), regularization_strength=0.1) src_mu = th.mean(ranked_src, dim=1, keepdim=True) dst_mu = th.mean(ranked_dst, dim=1, keepdim=True) el = F.normalize(ranked_src - src_mu, p=2, dim=-1) er = F.normalize(ranked_dst - dst_mu, p=2, dim=-1) el = el.view(-1, self._num_heads, self._out_feats) er = er.view(-1, self._num_heads, self._out_feats) graph.srcdata['norm_h'] = F.normalize(el, p=2, dim=-1) graph.dstdata['norm_h'] = F.normalize(er, p=2, dim=-1) # compute cosine distance graph.apply_edges(fn.u_dot_v('norm_h', 'norm_h', 'cos')) e = graph.edata.pop('cos') # compute softmax graph.edata['a'] = self.attn_drop(edge_softmax(graph, e)) # message passing graph.update_all(fn.u_mul_e('ft', 'a', 'm'), fn.sum('m', 'ft')) rst = graph.dstdata['ft'] # residual if self.res_fc is not None: resval = self.res_fc(h_dst).view(h_dst.shape[0], -1, self._out_feats) rst = rst + resval # activation if self.activation: rst = self.activation(rst) return rst
def forward(self, g, h): with g.local_scope(): g.ndata['h'] = h g.apply_edges(fn.u_dot_v('h', 'h', 'score')) return g.edata['score']
def forward(self, graph, h): with graph.local_scope(): graph.ndata["h"] = h graph.apply_edges(fn.u_dot_v("h", "h", "score")) return graph.edata["score"]
def forward(self, graph, h, etype): # h contains the node representations for each edge type computed from node_clf_hetero.py with graph.local_scope(): graph.ndata['h'] = h # assigns 'h' of all node types in one shot graph.apply_edges(fn.u_dot_v('h', 'h', 'score'), etype=etype) return graph.edges[etype].data['score']