def fused_gat(graph, feat_src, el, er, slope): g = graph._graph.get_immutable_gidx(utils.to_dgl_context( context(feat_src))) exp = el.new_empty([g.number_of_edges()] + list(el.size()[1:])) s = th.empty_like(el) ret = th.empty_like(feat_src) return FusedGat.apply(g, feat_src, el, er, s, exp, ret, slope)
def forward(ctx, g, score, eids): # remember to save the graph to backward cache before making it # a local variable if not is_all(eids): g = g.edge_subgraph(eids.long()) n_nodes = g.number_of_nodes() n_edges = g.number_of_edges() gidx = g._graph.get_immutable_gidx(utils.to_dgl_context(score.device)) ctx.backward_cache = n_nodes, n_edges, gidx # g.update_all(fn.copy_e('s', 'm'), fn.max('m', 'smax')) smax = F.copy_reduce("max", gidx, TargetCode.EDGE, score, n_nodes) # g.apply_edges(fn.e_sub_v('s', 'smax', 'out')) out = F.binary_reduce("none", "sub", gidx, TargetCode.EDGE, TargetCode.DST, score, smax, n_edges) # g.edata['out'] = th.exp(g.edata['out']) out = th.exp(out) # g.update_all(fn.copy_e('out', 'm'), fn.sum('m', 'out_sum')) out_sum = F.copy_reduce("sum", gidx, TargetCode.EDGE, out, n_nodes) # g.apply_edges(fn.e_div_v('out', 'out_sum', 'out')) out = F.binary_reduce("none", "div", gidx, TargetCode.EDGE, TargetCode.DST, out, out_sum, n_edges) ctx.save_for_backward(out) return out
def forward(ctx, g, score, eids): """Forward function. Pseudo-code: .. code:: python score = dgl.EData(g, score) score_max = score.dst_max() # of type dgl.NData score = score - score_max # edge_sub_dst, ret dgl.EData score_sum = score.dst_sum() # of type dgl.NData out = score / score_sum # edge_div_dst, ret dgl.EData return out.data """ # remember to save the graph to backward cache before making it # a local variable if not is_all(eids): g = g.edge_subgraph(eids.long()) n_nodes = g.number_of_dst_nodes() n_edges = g.number_of_edges() # TODO(BarclayII): this is a temporary fix of memory leakage in PyTorch # in PR #1139. We should investigate further on what was actually happening # when implementing EdgeSoftmax with message passing API instead of # operators. score_context = utils.to_dgl_context(score.device) if isinstance(g, DGLGraph): gidx = g._graph.get_immutable_gidx(score_context) elif isinstance(g, DGLHeteroGraph): assert g._graph.number_of_etypes() == 1, \ "EdgeSoftmax only support one edge type" gidx = g._graph.get_unitgraph(0, score_context) ctx.backward_cache = n_nodes, n_edges, gidx #g.update_all(fn.copy_e('s', 'm'), fn.max('m', 'smax')) smax = F.copy_reduce('max', gidx, TargetCode.EDGE, score, n_nodes) #g.apply_edges(fn.e_sub_v('s', 'smax', 'out')) out = F.binary_reduce('none', 'sub', gidx, TargetCode.EDGE, TargetCode.DST, score, smax, n_edges) #g.edata['out'] = th.exp(g.edata['out']) out = th.exp(out) #g.update_all(fn.copy_e('out', 'm'), fn.sum('m', 'out_sum')) out_sum = F.copy_reduce('sum', gidx, TargetCode.EDGE, out, n_nodes) #g.apply_edges(fn.e_div_v('out', 'out_sum', 'out')) out = F.binary_reduce('none', 'div', gidx, TargetCode.EDGE, TargetCode.DST, out, out_sum, n_edges) ctx.save_for_backward(out) return out
def gat_layer_dgl(feat, weight, attn_l, attn_r, in_feat_len, out_feat_len): feat2 = torch.mm(feat, weight) att_l = torch.mm(feat2, attn_l) att_r = torch.mm(feat2, attn_r) g.srcdata.update({'ft': feat2, 'el': att_l}) g.dstdata.update({'er': att_r}) g.apply_edges(fn.u_add_v('el', 'er', 'e')) e = torch.exp(F.leaky_relu(g.edata.pop('e'), 0.1)) cont = utils.to_dgl_context(e.device) gidx = g._graph.get_immutable_gidx(cont) e_sum = backend.copy_reduce("sum", gidx, TargetCode.EDGE, e, num_v) att = backend.binary_reduce('none', 'div', gidx, TargetCode.EDGE, TargetCode.DST, e, e_sum, n_edges) g.edata['a'] = att g.update_all(fn.u_mul_e('ft', 'a', 'm'), fn.sum('m', 'ft')) output = g.dstdata['ft'] torch.cuda.synchronize() return output
def nb_access_bench(graph, feat, node_map, deg_inc_node_map): g = graph._graph.get_immutable_gidx(utils.to_dgl_context(context(feat))) return NbAccess.apply(g, feat, node_map, deg_inc_node_map)
def train(args): set_random_seed(args.seed) device = get_device(args.device) g, author_rank, field_ids, true_relevance = load_rank_data(device) out_dim = g.nodes['field'].data['feat'].shape[1] add_node_feat(g, 'pretrained', args.node_embed_path, use_raw_id=True) field_paper = recall_paper(g.cpu(), field_ids, args.num_recall) # {field_id: [paper_id]} sampler = MultiLayerNeighborSampler([args.neighbor_size] * args.num_layers) sampler.set_output_context(to_dgl_context(device)) triplet_collator = TripletNodeCollator(g, sampler) model = RHGNN( {ntype: g.nodes[ntype].data['feat'].shape[1] for ntype in g.ntypes}, args.num_hidden, out_dim, args.num_rel_hidden, args.num_rel_hidden, args.num_heads, g.ntypes, g.canonical_etypes, 'author', args.num_layers, args.dropout).to(device) if args.load_path: model.load_state_dict(torch.load(args.load_path, map_location=device)) optimizer = optim.Adam(model.parameters(), lr=args.lr) scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=len(field_ids) * args.epochs, eta_min=args.lr / 100) warnings.filterwarnings( 'ignore', 'Setting attributes on ParameterDict is not supported') for epoch in range(args.epochs): model.train() losses = [] for f in tqdm(field_ids): false_author_ids = list( set(g.in_edges(field_paper[f], etype='writes')[0].tolist()) - set(author_rank[f])) triplets = sample_triplets(f, author_rank[f], false_author_ids, args.num_triplets).to(device) aid, blocks = triplet_collator.collate(triplets) author_embeds = model(blocks, blocks[0].srcdata['feat']) author_embeds = author_embeds / author_embeds.norm(dim=1, keepdim=True) aid_map = {a: i for i, a in enumerate(aid.tolist())} anchor = g.nodes['field'].data['feat'][triplets[:, 0]] positive = author_embeds[[ aid_map[a] for a in triplets[:, 1].tolist() ]] negative = author_embeds[[ aid_map[a] for a in triplets[:, 2].tolist() ]] loss = F.triplet_margin_loss(anchor, positive, negative, args.margin) losses.append(loss.item()) optimizer.zero_grad() loss.backward() optimizer.step() scheduler.step() torch.cuda.empty_cache() print('Epoch {:d} | Loss {:.4f}'.format(epoch, sum(losses) / len(losses))) torch.save(model.state_dict(), args.model_save_path) if epoch % args.eval_every == 0 or epoch == args.epochs - 1: print( METRICS_STR.format(*evaluate( model, g, out_dim, sampler, args.batch_size, device, field_ids, field_paper, author_rank, true_relevance))) torch.save(model.state_dict(), args.model_save_path) print('模型已保存到', args.model_save_path) embeds = infer(model, g, 'author', out_dim, sampler, args.batch_size, device) author_embed_save_path = DATA_DIR / 'rank/author_embed.pkl' torch.save(embeds.cpu(), author_embed_save_path) print('学者嵌入已保存到', author_embed_save_path)
def forward(self, graph, feat): graph = graph.local_var() h_src = h_dst = self.feat_drop(feat) feat = self.fc(h_src).view(-1, self._num_heads, self._out_feats) ell = (self.attn_l * feat).sum(dim=-1, keepdim=True) err = (self.attn_r * feat).sum(dim=-1, keepdim=True) g = graph g.srcdata.update({'ft': feat, 'el': ell}) g.dstdata.update({'er': err}) g.apply_edges(fn.u_add_v('el', 'er', 'e')) e = self.leaky_relu(g.edata.pop('e')) g.edata['out'] = th.exp(e) g.update_all(fn.copy_e('out', 'm'), fn.sum('m', 'out_sum')) g.apply_edges(fn.e_div_v('out', 'out_sum', 'out1')) # Omit attn_drop for deterministic execution g.edata['a'] = g.edata['out1'] # message passing g.update_all(fn.u_mul_e('ft', 'a', 'm'), fn.sum('m', 'ft')) dglrst = g.dstdata['ft'] fusedrst = B.fused_gat(g, feat, ell, err, self.negative_slope) dgl_context = utils.to_dgl_context(feat.device) graph = graph._graph.get_immutable_gidx(dgl_context) with self.cm.zoomIn(namespace=[self, th], graph=graph, node_feats={'f': h_src}, edge_feats={}) as v: feat_src = [ self.fc(n.f).view(self._num_heads, self._out_feats) for n in v.innbs ] el = [(nf * self.attn_l).sum(dim=-1, keepdim=True) for nf in feat_src] er = (self.fc(v.f).view(self._num_heads, self._out_feats) * self.attn_r).sum(dim=-1, keepdim=True) coeff = [th.exp(self.leaky_relu(l + er)) for l in el] s = sum(coeff) alpha = [c / s for c in coeff] rst = sum([ef[0] * ef[1] for ef in zip(alpha, feat_src)]) self.cm.collect_output(rst) rst = self.cm.zoomOut() grad_out = th.ones_like(rst) egl_graer = grad(outputs=rst, inputs=self.cm._executor.ts.tensor_map['V7'], grad_outputs=grad_out, retain_graph=True) egl_grael = grad(outputs=rst, inputs=self.cm._executor.ts.tensor_map['V3'], grad_outputs=grad_out, retain_graph=True) dgl_graer = grad(outputs=dglrst, inputs=err, grad_outputs=grad_out, retain_graph=True) dgl_grael = grad(outputs=dglrst, inputs=ell, grad_outputs=grad_out, retain_graph=True) fused_graer = grad(outputs=fusedrst, inputs=err, grad_outputs=grad_out, retain_graph=True) fused_grael = grad(outputs=fusedrst, inputs=ell, grad_outputs=grad_out, retain_graph=True) print('rst close?', th.allclose(rst, dglrst), rst) #print('exp', g.edata['out'], 'div', g.edata['a'], 'rst', dglrst, 'feat', feat, 'ell', ell, 'err', err) print('\negl_graer', egl_graer, '\ndgl_graer', dgl_graer, '\nfused_graer', fused_graer, 'egl close with dgl?', th.allclose(egl_graer[0], dgl_graer[0])) print('\negl_grael', egl_grael, '\ndgl_grael', dgl_grael, '\nfused_grael', fused_grael, 'egl close with dgl?', th.allclose(egl_grael[0], dgl_grael[0])) # residual if self.res_fc is not None: resval = self.res_fc(h_dst).view(h_dst.shape[0], -1, self._out_feats) rst = rst + resval # activation if self.activation: rst = self.activation(rst) return rst, dglrst