def inference(self, mode='validation', verbose=False): assert mode in ['validation', 'testing'], "got mode {}".format(mode) from dgl.dataloading import NodeDataLoader, MultiLayerNeighborSampler self.eval() if mode == 'testing': sampler = MultiLayerNeighborSampler([None]) else: sampler = MultiLayerNeighborSampler(self.fans) g = self.cpu_graph kwargs = { 'batch_size': 64, 'shuffle': True, 'drop_last': False, 'num_workers': 6, } dataloader = NodeDataLoader(g, th.arange(g.number_of_nodes()), sampler, **kwargs) if verbose: dataloader = tqdm(dataloader) x = self.embedding.weight x = th.cat((self.W1(x[:self.num_users]), self.W2(x[self.num_users:])), dim=0) # Within a layer, iterate over nodes in batches for input_nodes, output_nodes, blocks in dataloader: block = blocks[0].to(commons.device) h = self.forward_block(block, x[input_nodes]) self.check_point[output_nodes] = h if verbose: print('Inference Done Successfully')
def inference(self, mode='validation', verbose=False): assert mode in ['validation', 'testing'], "got mode {}".format(mode) from dgl.dataloading import NodeDataLoader, MultiLayerNeighborSampler self.eval() if mode == 'testing': sampler = MultiLayerNeighborSampler([None] * self.num_layers) else: sampler = MultiLayerNeighborSampler(self.fans) g = self.cpu_graph kwargs = { 'batch_size': 1024, 'shuffle': True, 'drop_last': False, 'num_workers': commons.workers, } dataloader = NodeDataLoader(g, th.arange(g.number_of_nodes()), sampler, **kwargs) # Within a layer, iterate over nodes in batches if verbose: dataloader = tqdm(dataloader) for input_nodes, output_nodes, blocks in dataloader: blocks = [x.to(commons.device) for x in blocks] users = th.arange(output_nodes.shape[0]).long().to(self.device) d1 = th.zeros((0, )).long().to(self.device) d2 = th.zeros((0, )).long().to(self.device) h = self.forward_blocks(blocks, users, d1, d2)[0] self.check_point[output_nodes] = h if verbose: print('Inference Done Successfully')
def train(args): set_random_seed(args.seed) device = get_device(args.device) g, author_rank, field_ids, true_relevance = load_rank_data(device) field_paper = recall_paper(g.cpu(), field_ids, args.num_recall) data = RatingKnowledgeGraphDataset() user_item_graph = data.user_item_graph knowledge_graph = dgl.sampling.sample_neighbors( data.knowledge_graph, data.knowledge_graph.nodes(), args.neighbor_size, replace=True ) sampler = MultiLayerNeighborSampler([args.neighbor_size] * args.num_hops) train_loader = KGCNEdgeDataLoader( user_item_graph, torch.arange(user_item_graph.num_edges()), sampler, knowledge_graph, device=device, batch_size=args.batch_size ) model = KGCN(args.num_hidden, args.neighbor_size, 'sum', args.num_hops, *data.get_num()).to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) for epoch in range(args.epochs): model.train() losses = [] for _, pair_graph, blocks in train_loader: scores = model(pair_graph, blocks) loss = F.binary_cross_entropy(scores, pair_graph.edata['label']) losses.append(loss.item()) optimizer.zero_grad() loss.backward() optimizer.step() print('Epoch {:d} | Loss {:.4f}'.format(epoch, sum(losses) / len(losses))) print(METRICS_STR.format(*evaluate( model, g, knowledge_graph, sampler, field_ids, author_rank, true_relevance, field_paper )))
def train(args): set_random_seed(args.seed) device = get_device(args.device) data, g, _, labels, predict_ntype, train_idx, val_idx, test_idx, evaluator = \ load_data(args.dataset, device) add_node_feat(g, 'pretrained', args.node_embed_path, True) sampler = MultiLayerNeighborSampler( list(range(args.neighbor_size, args.neighbor_size + args.num_layers))) train_loader = NodeDataLoader(g, {predict_ntype: train_idx}, sampler, device=device, batch_size=args.batch_size) loader = NodeDataLoader(g, {predict_ntype: g.nodes(predict_ntype)}, sampler, device=device, batch_size=args.batch_size) model = RHGNN( {ntype: g.nodes[ntype].data['feat'].shape[1] for ntype in g.ntypes}, args.num_hidden, data.num_classes, args.num_rel_hidden, args.num_rel_hidden, args.num_heads, g.ntypes, g.canonical_etypes, predict_ntype, args.num_layers, args.dropout).to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=len(train_loader) * args.epochs, eta_min=args.lr / 100) warnings.filterwarnings( 'ignore', 'Setting attributes on ParameterDict is not supported') for epoch in range(args.epochs): model.train() losses = [] for input_nodes, output_nodes, blocks in tqdm(train_loader): batch_logits = model(blocks, blocks[0].srcdata['feat']) batch_labels = labels[output_nodes[predict_ntype]] loss = F.cross_entropy(batch_logits, batch_labels) losses.append(loss.item()) optimizer.zero_grad() loss.backward() optimizer.step() scheduler.step() torch.cuda.empty_cache() print('Epoch {:d} | Loss {:.4f}'.format(epoch, sum(losses) / len(losses))) if epoch % args.eval_every == 0 or epoch == args.epochs - 1: print( METRICS_STR.format(*evaluate( model, loader, g, labels, data.num_classes, predict_ntype, train_idx, val_idx, test_idx, evaluator))) if args.save_path: torch.save(model.cpu().state_dict(), args.save_path) print('模型已保存到', args.save_path)
def train(args): set_random_seed(args.seed) device = get_device(args.device) data, g, _, labels, predict_ntype, train_idx, val_idx, test_idx, evaluator = \ load_data(args.dataset, device) add_node_feat(g, args.node_feat, args.node_embed_path) sampler = MultiLayerNeighborSampler([args.neighbor_size] * args.num_layers) train_loader = NodeDataLoader(g, {predict_ntype: train_idx}, sampler, device=device, batch_size=args.batch_size) loader = NodeDataLoader(g, {predict_ntype: g.nodes(predict_ntype)}, sampler, device=device, batch_size=args.batch_size) model = HGT( {ntype: g.nodes[ntype].data['feat'].shape[1] for ntype in g.ntypes}, args.num_hidden, data.num_classes, args.num_heads, g.ntypes, g.canonical_etypes, predict_ntype, args.num_layers, args.dropout).to(device) optimizer = optim.AdamW(model.parameters(), eps=1e-6) scheduler = optim.lr_scheduler.OneCycleLR( optimizer, args.max_lr, epochs=args.epochs, steps_per_epoch=len(train_loader), pct_start=0.05, anneal_strategy='linear', final_div_factor=10.0) warnings.filterwarnings( 'ignore', 'Setting attributes on ParameterDict is not supported') for epoch in range(args.epochs): model.train() losses = [] for input_nodes, output_nodes, blocks in tqdm(train_loader): batch_logits = model(blocks, blocks[0].srcdata['feat']) batch_labels = labels[output_nodes[predict_ntype]] loss = F.cross_entropy(batch_logits, batch_labels) losses.append(loss.item()) optimizer.zero_grad() loss.backward() optimizer.step() scheduler.step() torch.cuda.empty_cache() print('Epoch {:d} | Loss {:.4f}'.format(epoch, sum(losses) / len(losses))) if epoch % args.eval_every == 0 or epoch == args.epochs - 1: print( METRICS_STR.format(*evaluate( model, loader, g, labels, data.num_classes, predict_ntype, train_idx, val_idx, test_idx, evaluator))) if args.save_path: torch.save(model.cpu().state_dict(), args.save_path) print('模型已保存到', args.save_path)
def calc_attn_pos(g, num_classes, predict_ntype, num_samples, device, args): """使用预训练的HGT模型计算的注意力权重选择目标顶点的正样本。""" # 第1层只保留AB边,第2层只保留BA边,其中A是目标顶点类型,B是中间顶点类型 num_neighbors = [{}, {}] # 形如ABA的元路径,其中A是目标顶点类型 metapaths = [] rev_etype = { e: next(re for rs, re, rd in g.canonical_etypes if rs == d and rd == s and re != e) for s, e, d in g.canonical_etypes } for s, e, d in g.canonical_etypes: if d == predict_ntype: re = rev_etype[e] num_neighbors[0][re] = num_neighbors[1][e] = 10 metapaths.append((re, e)) for i in range(len(num_neighbors)): d = dict.fromkeys(g.etypes, 0) d.update(num_neighbors[i]) num_neighbors[i] = d sampler = MultiLayerNeighborSampler(num_neighbors) loader = NodeDataLoader(g, {predict_ntype: g.nodes(predict_ntype)}, sampler, device=device, batch_size=args.batch_size) model = HGT( {ntype: g.nodes[ntype].data['feat'].shape[1] for ntype in g.ntypes}, args.num_hidden, num_classes, args.num_heads, g.ntypes, g.canonical_etypes, predict_ntype, 2, args.dropout).to(device) model.load_state_dict(torch.load(args.hgt_model_path, map_location=device)) # 每条元路径ABA对应一个正样本图G_ABA,加一个总体正样本图G_pos pos = [ torch.zeros(g.num_nodes(predict_ntype), num_samples, dtype=torch.long, device=device) for _ in range(len(metapaths) + 1) ] with torch.no_grad(): for input_nodes, output_nodes, blocks in tqdm(loader): _ = model(blocks, blocks[0].srcdata['feat']) # List[tensor(N_src, N_dst)] attn = [ calc_attn(mp, model, blocks, device).t() for mp in metapaths ] for i in range(len(attn)): _, nid = torch.topk(attn[i], num_samples) # (N_dst, T_pos) # nid是blocks[0]中的源顶点id,将其转换为原异构图中的顶点id pos[i][output_nodes[predict_ntype]] = input_nodes[ predict_ntype][nid] _, nid = torch.topk(sum(attn), num_samples) pos[-1][ output_nodes[predict_ntype]] = input_nodes[predict_ntype][nid] return [p.cpu() for p in pos]
def init_dataloaders(args, g, train_idx, test_idx, target_idx, device, use_ddp=False): fanouts = [int(fanout) for fanout in args.fanout.split(',')] sampler = MultiLayerNeighborSampler(fanouts) train_loader = DataLoader(g, target_idx[train_idx], sampler, use_ddp=use_ddp, device=device, batch_size=args.batch_size, shuffle=True, drop_last=False) # The datasets do not have a validation subset, use the train subset val_loader = DataLoader(g, target_idx[train_idx], sampler, use_ddp=use_ddp, device=device, batch_size=args.batch_size, shuffle=False, drop_last=False) # -1 for sampling all neighbors test_sampler = MultiLayerNeighborSampler([-1] * len(fanouts)) test_loader = DataLoader(g, target_idx[test_idx], test_sampler, use_ddp=use_ddp, device=device, batch_size=32, shuffle=False, drop_last=False) return train_loader, val_loader, test_loader
def train(args): set_random_seed(args.seed) device = get_device(args.device) g, labels, num_classes, train_idx, val_idx, test_idx, evaluator = \ load_data(args.ogb_path, device) load_pretrained_node_embed(g, args.node_embed_path) g = g.to(device) sampler = MultiLayerNeighborSampler( list(range(args.neighbor_size, args.neighbor_size + args.num_layers)) ) train_loader = NodeDataLoader(g, {'paper': train_idx}, sampler, device=device, batch_size=args.batch_size) val_loader = NodeDataLoader(g, {'paper': val_idx}, sampler, device=device, batch_size=args.batch_size) test_loader = NodeDataLoader(g, {'paper': test_idx}, sampler, device=device, batch_size=args.batch_size) model = RHGNN( {ntype: g.nodes[ntype].data['feat'].shape[1] for ntype in g.ntypes}, args.num_hidden, num_classes, args.num_rel_hidden, args.num_rel_hidden, args.num_heads, g.ntypes, g.canonical_etypes, 'paper', args.num_layers, args.dropout, residual=args.residual ).to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) scheduler = optim.lr_scheduler.CosineAnnealingLR( optimizer, T_max=len(train_loader) * args.epochs, eta_min=args.lr / 100 ) warnings.filterwarnings('ignore', 'Setting attributes on ParameterDict is not supported') for epoch in range(args.epochs): model.train() logits, train_labels, losses = [], [], [] for input_nodes, output_nodes, blocks in tqdm(train_loader): batch_labels = labels[output_nodes['paper']] batch_logits = model(blocks, blocks[0].srcdata['feat']) loss = F.cross_entropy(batch_logits, batch_labels.squeeze(dim=1)) logits.append(batch_logits.detach().cpu()) train_labels.append(batch_labels.detach().cpu()) losses.append(loss.item()) optimizer.zero_grad() loss.backward() optimizer.step() scheduler.step() torch.cuda.empty_cache() train_acc = accuracy(torch.cat(logits, dim=0), torch.cat(train_labels, dim=0), evaluator) val_acc = evaluate(val_loader, device, model, labels, evaluator) test_acc = evaluate(test_loader, device, model, labels, evaluator) print('Epoch {:d} | Train Loss {:.4f} | Train Acc {:.4f} | Val Acc {:.4f} | Test Acc {:.4f}'.format( epoch, torch.tensor(losses).mean().item(), train_acc, val_acc, test_acc )) # embed = model.inference(g, g.ndata['feat'], device, args.batch_size) # test_acc = accuracy(embed[test_idx], labels[test_idx], evaluator) test_acc = evaluate(test_loader, device, model, labels, evaluator) print('Test Acc {:.4f}'.format(test_acc))
def train(args): set_random_seed(args.seed) device = get_device(args.device) data = RatingKnowledgeGraphDataset(args.dataset) user_item_graph = data.user_item_graph knowledge_graph = dgl.sampling.sample_neighbors( data.knowledge_graph, data.knowledge_graph.nodes(), args.neighbor_size, replace=True ) train_eids, test_eids = train_test_split( torch.arange(user_item_graph.num_edges()), train_size=args.train_size, random_state=args.seed ) sampler = MultiLayerNeighborSampler([args.neighbor_size] * args.num_hops) train_loader = KGCNEdgeDataLoader( user_item_graph, train_eids, sampler, knowledge_graph, device=device, batch_size=args.batch_size ) test_loader = KGCNEdgeDataLoader( user_item_graph, test_eids, sampler, knowledge_graph, device=device, batch_size=args.batch_size ) model = KGCN(args.num_hidden, args.neighbor_size, args.aggregator, args.num_hops, *data.get_num()).to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) for epoch in range(args.epochs): model.train() losses = [] for _, pair_graph, blocks in train_loader: scores = model(pair_graph, blocks) loss = F.binary_cross_entropy(scores, pair_graph.edata['label']) losses.append(loss.item()) optimizer.zero_grad() loss.backward() optimizer.step() print('Epoch {:d} | Train Loss {:.4f} | Train AUC {:.4f} | Train F1 {:.4f} | Test AUC {:.4f} | Test F1 {:.4f}'.format( epoch, sum(losses) / len(losses), *evaluate(model, train_loader), *evaluate(model, test_loader) ))
def run(args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running): evaluator_wrapper = lambda pred, labels: evaluator.eval({ "y_pred": pred, "y_true": labels })["rocauc"] train_batch_size = (len(train_idx) + 9) // 10 # batch_size = len(train_idx) train_sampler = MultiLayerNeighborSampler( [16 for _ in range(args.n_layers)]) # sampler = MultiLayerFullNeighborSampler(args.n_layers) train_dataloader = DataLoaderWrapper( NodeDataLoader( graph.cpu(), train_idx.cpu(), train_sampler, batch_sampler=BatchSampler(len(train_idx), batch_size=train_batch_size), num_workers=4, )) eval_sampler = MultiLayerNeighborSampler( [60 for _ in range(args.n_layers)]) # sampler = MultiLayerFullNeighborSampler(args.n_layers) eval_dataloader = DataLoaderWrapper( NodeDataLoader( graph.cpu(), torch.cat([train_idx.cpu(), val_idx.cpu(), test_idx.cpu()]), eval_sampler, batch_sampler=BatchSampler(graph.number_of_nodes(), batch_size=32768), num_workers=4, )) criterion = nn.BCEWithLogitsLoss() model = gen_model(args).to(device) optimizer = optim.AdamW(model.parameters(), lr=args.lr, weight_decay=args.wd) lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="max", factor=0.75, patience=50, verbose=True) total_time = 0 val_score, best_val_score, final_test_score = 0, 0, 0 train_scores, val_scores, test_scores = [], [], [] losses, train_losses, val_losses, test_losses = [], [], [], [] final_pred = None for epoch in range(1, args.n_epochs + 1): tic = time.time() loss = train(args, model, train_dataloader, labels, train_idx, criterion, optimizer, evaluator_wrapper) toc = time.time() total_time += toc - tic if epoch == args.n_epochs or epoch % args.eval_every == 0 or epoch % args.log_every == 0: train_score, val_score, test_score, train_loss, val_loss, test_loss, pred = evaluate( args, model, eval_dataloader, labels, train_idx, val_idx, test_idx, criterion, evaluator_wrapper) if val_score > best_val_score: best_val_score = val_score final_test_score = test_score final_pred = pred if epoch % args.log_every == 0: print( f"Run: {n_running}/{args.n_runs}, Epoch: {epoch}/{args.n_epochs}, Average epoch time: {total_time / epoch:.2f}s" ) print( f"Loss: {loss:.4f}\n" f"Train/Val/Test loss: {train_loss:.4f}/{val_loss:.4f}/{test_loss:.4f}\n" f"Train/Val/Test/Best val/Final test score: {train_score:.4f}/{val_score:.4f}/{test_score:.4f}/{best_val_score:.4f}/{final_test_score:.4f}" ) for l, e in zip( [ train_scores, val_scores, test_scores, losses, train_losses, val_losses, test_losses ], [ train_score, val_score, test_score, loss, train_loss, val_loss, test_loss ], ): l.append(e) lr_scheduler.step(val_score) print("*" * 50) print( f"Best val score: {best_val_score}, Final test score: {final_test_score}" ) print("*" * 50) if args.plot_curves: fig = plt.figure(figsize=(24, 24)) ax = fig.gca() ax.set_xticks(np.arange(0, args.n_epochs, 100)) ax.set_yticks(np.linspace(0, 1.0, 101)) ax.tick_params(labeltop=True, labelright=True) for y, label in zip([train_scores, val_scores, test_scores], ["train score", "val score", "test score"]): plt.plot(range(1, args.n_epochs + 1, args.log_every), y, label=label, linewidth=1) ax.xaxis.set_major_locator(MultipleLocator(100)) ax.xaxis.set_minor_locator(AutoMinorLocator(1)) ax.yaxis.set_major_locator(MultipleLocator(0.01)) ax.yaxis.set_minor_locator(AutoMinorLocator(2)) plt.grid(which="major", color="red", linestyle="dotted") plt.grid(which="minor", color="orange", linestyle="dotted") plt.legend() plt.tight_layout() plt.savefig(f"gat_score_{n_running}.png") fig = plt.figure(figsize=(24, 24)) ax = fig.gca() ax.set_xticks(np.arange(0, args.n_epochs, 100)) ax.tick_params(labeltop=True, labelright=True) for y, label in zip([losses, train_losses, val_losses, test_losses], ["loss", "train loss", "val loss", "test loss"]): plt.plot(range(1, args.n_epochs + 1, args.log_every), y, label=label, linewidth=1) ax.xaxis.set_major_locator(MultipleLocator(100)) ax.xaxis.set_minor_locator(AutoMinorLocator(1)) ax.yaxis.set_major_locator(MultipleLocator(0.1)) ax.yaxis.set_minor_locator(AutoMinorLocator(5)) plt.grid(which="major", color="red", linestyle="dotted") plt.grid(which="minor", color="orange", linestyle="dotted") plt.legend() plt.tight_layout() plt.savefig(f"gat_loss_{n_running}.png") if args.save_pred: os.makedirs("./output", exist_ok=True) torch.save(F.softmax(final_pred, dim=1), f"./output/{n_running}.pt") return best_val_score, final_test_score
def train(args): set_random_seed(args.seed) data = DBLPFourAreaDataset() g = data[0] metapaths = data.metapaths predict_ntype = data.predict_ntype generate_one_hot_id(g) features = g.ndata['feat'] # Dict[str, tensor(N_i, d_i)] labels = g.nodes[predict_ntype].data['label'] train_idx = g.nodes[predict_ntype].data['train_mask'].nonzero( as_tuple=True)[0] val_idx = g.nodes[predict_ntype].data['val_mask'].nonzero(as_tuple=True)[0] test_idx = g.nodes[predict_ntype].data['test_mask'].nonzero( as_tuple=True)[0] out_shape = (g.num_nodes(predict_ntype), data.num_classes) print('正在生成基于元路径的图(有点慢)...') mgs = [metapath_based_graph(g, metapath) for metapath in metapaths] mgs[0].ndata['feat'] = features[predict_ntype] sampler = MultiLayerNeighborSampler([args.neighbor_size]) collators = [NodeCollator(mg, None, sampler) for mg in mgs] train_dataloader = DataLoader(train_idx, batch_size=args.batch_size) val_dataloader = DataLoader(val_idx, batch_size=args.batch_size) test_dataloader = DataLoader(test_idx, batch_size=args.batch_size) metapaths_ntype = [to_ntype_list(g, metapath) for metapath in metapaths] model = MAGNNMinibatch( predict_ntype, metapaths_ntype, {ntype: feat.shape[1] for ntype, feat in features.items()}, args.num_hidden, data.num_classes, args.num_heads, args.encoder, args.dropout) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) for epoch in range(args.epochs): model.train() losses = [] train_logits = torch.zeros(out_shape) for batch in train_dataloader: gs = [collator.collate(batch)[2][0] for collator in collators] train_logits[batch] = logits = model(gs, features) loss = F.cross_entropy(logits, labels[batch]) losses.append(loss.item()) optimizer.zero_grad() loss.backward() optimizer.step() train_metrics = micro_macro_f1_score(train_logits[train_idx], labels[train_idx]) print( 'Epoch {:d} | Train Loss {:.4f} | Train Micro-F1 {:.4f} | Train Macro-F1 {:.4f}' .format(epoch, torch.tensor(losses).mean().item(), *train_metrics)) if (epoch + 1) % 10 == 0: val_metrics = evaluate(out_shape, collators, val_dataloader, model, features, labels) print('Val Micro-F1 {:.4f} | Val Macro-F1 {:.4f}'.format( *val_metrics)) test_metrics = evaluate(out_shape, collators, test_dataloader, model, features, labels) print('Test Micro-F1 {:.4f} | Test Macro-F1 {:.4f}'.format(*test_metrics))
def run(args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running): evaluator_wrapper = lambda pred, labels: evaluator.eval( { "y_pred": pred.argmax(dim=-1, keepdim=True), "y_true": labels })["acc"] criterion = custom_loss_function n_train_samples = train_idx.shape[0] train_batch_size = (n_train_samples + 29) // 30 train_sampler = MultiLayerNeighborSampler( [10 for _ in range(args.n_layers)]) train_dataloader = DataLoaderWrapper( DataLoader( graph.cpu(), train_idx.cpu(), train_sampler, batch_sampler=BatchSampler(len(train_idx), batch_size=train_batch_size, shuffle=True), num_workers=4, )) eval_batch_size = 32768 eval_sampler = MultiLayerNeighborSampler( [15 for _ in range(args.n_layers)]) if args.estimation_mode: test_idx_during_training = test_idx[torch.arange(start=0, end=len(test_idx), step=45)] else: test_idx_during_training = test_idx eval_idx = torch.cat( [train_idx.cpu(), val_idx.cpu(), test_idx_during_training.cpu()]) eval_dataloader = DataLoaderWrapper( DataLoader( graph.cpu(), eval_idx, eval_sampler, batch_sampler=BatchSampler(len(eval_idx), batch_size=eval_batch_size, shuffle=False), num_workers=4, )) model = gen_model(args).to(device) optimizer = optim.AdamW(model.parameters(), lr=args.lr, weight_decay=args.wd) lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="max", factor=0.7, patience=20, verbose=True, min_lr=1e-4) best_model_state_dict = None total_time = 0 val_score, best_val_score, final_test_score = 0, 0, 0 scores, train_scores, val_scores, test_scores = [], [], [], [] losses, train_losses, val_losses, test_losses = [], [], [], [] for epoch in range(1, args.n_epochs + 1): tic = time.time() score, loss = train(args, model, train_dataloader, labels, train_idx, criterion, optimizer, evaluator_wrapper) toc = time.time() total_time += toc - tic if epoch == args.n_epochs or epoch % args.eval_every == 0 or epoch % args.log_every == 0: train_score, val_score, test_score, train_loss, val_loss, test_loss = evaluate( args, model, eval_dataloader, labels, train_idx, val_idx, test_idx_during_training, criterion, evaluator_wrapper, ) if val_score > best_val_score: best_val_score = val_score final_test_score = test_score if args.estimation_mode: best_model_state_dict = { k: v.to("cpu") for k, v in model.state_dict().items() } if epoch == args.n_epochs or epoch % args.log_every == 0: print( f"Run: {n_running}/{args.n_runs}, Epoch: {epoch}/{args.n_epochs}, Average epoch time: {total_time / epoch:.2s}\n" f"Loss: {loss:.4f}, Score: {score:.4f}\n" f"Train/Val/Test loss: {train_loss:.4f}/{val_loss:.4f}/{test_loss:.4f}\n" f"Train/Val/Test/Best val/Final test score: {train_score:.4f}/{val_score:.4f}/{test_score:.4f}/{best_val_score:.4f}/{final_test_score:.4f}" ) for l, e in zip( [ scores, train_scores, val_scores, test_scores, losses, train_losses, val_losses, test_losses ], [ score, train_score, val_score, test_score, loss, train_loss, val_loss, test_loss ], ): l.append(e) lr_scheduler.step(val_score) if args.estimation_mode: model.load_state_dict(best_model_state_dict) eval_dataloader = DataLoaderWrapper( DataLoader( graph.cpu(), test_idx.cpu(), eval_sampler, batch_sampler=BatchSampler(len(test_idx), batch_size=eval_batch_size, shuffle=False), num_workers=4, )) final_test_score = evaluate(args, model, eval_dataloader, labels, train_idx, val_idx, test_idx, criterion, evaluator_wrapper)[2] print("*" * 50) print( f"Best val score: {best_val_score}, Final test score: {final_test_score}" ) print("*" * 50) if args.plot_curves: fig = plt.figure(figsize=(24, 24)) ax = fig.gca() ax.set_xticks(np.arange(0, args.n_epochs, 100)) ax.set_yticks(np.linspace(0, 1.0, 101)) ax.tick_params(labeltop=True, labelright=True) for y, label in zip([train_scores, val_scores, test_scores], ["train score", "val score", "test score"]): plt.plot(range(1, args.n_epochs + 1, args.log_every), y, label=label, linewidth=1) ax.xaxis.set_major_locator(MultipleLocator(10)) ax.xaxis.set_minor_locator(AutoMinorLocator(1)) ax.yaxis.set_major_locator(MultipleLocator(0.01)) ax.yaxis.set_minor_locator(AutoMinorLocator(2)) plt.grid(which="major", color="red", linestyle="dotted") plt.grid(which="minor", color="orange", linestyle="dotted") plt.legend() plt.tight_layout() plt.savefig(f"gat_score_{n_running}.png") fig = plt.figure(figsize=(24, 24)) ax = fig.gca() ax.set_xticks(np.arange(0, args.n_epochs, 100)) ax.tick_params(labeltop=True, labelright=True) for y, label in zip([losses, train_losses, val_losses, test_losses], ["loss", "train loss", "val loss", "test loss"]): plt.plot(range(1, args.n_epochs + 1, args.log_every), y, label=label, linewidth=1) ax.xaxis.set_major_locator(MultipleLocator(10)) ax.xaxis.set_minor_locator(AutoMinorLocator(1)) ax.yaxis.set_major_locator(MultipleLocator(0.1)) ax.yaxis.set_minor_locator(AutoMinorLocator(5)) plt.grid(which="major", color="red", linestyle="dotted") plt.grid(which="minor", color="orange", linestyle="dotted") plt.legend() plt.tight_layout() plt.savefig(f"gat_loss_{n_running}.png") return best_val_score, final_test_score
def train(args): set_random_seed(args.seed) device = get_device(args.device) g, author_rank, field_ids, true_relevance = load_rank_data(device) out_dim = g.nodes['field'].data['feat'].shape[1] add_node_feat(g, 'pretrained', args.node_embed_path, use_raw_id=True) field_paper = recall_paper(g.cpu(), field_ids, args.num_recall) # {field_id: [paper_id]} sampler = MultiLayerNeighborSampler([args.neighbor_size] * args.num_layers) sampler.set_output_context(to_dgl_context(device)) triplet_collator = TripletNodeCollator(g, sampler) model = RHGNN( {ntype: g.nodes[ntype].data['feat'].shape[1] for ntype in g.ntypes}, args.num_hidden, out_dim, args.num_rel_hidden, args.num_rel_hidden, args.num_heads, g.ntypes, g.canonical_etypes, 'author', args.num_layers, args.dropout).to(device) if args.load_path: model.load_state_dict(torch.load(args.load_path, map_location=device)) optimizer = optim.Adam(model.parameters(), lr=args.lr) scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=len(field_ids) * args.epochs, eta_min=args.lr / 100) warnings.filterwarnings( 'ignore', 'Setting attributes on ParameterDict is not supported') for epoch in range(args.epochs): model.train() losses = [] for f in tqdm(field_ids): false_author_ids = list( set(g.in_edges(field_paper[f], etype='writes')[0].tolist()) - set(author_rank[f])) triplets = sample_triplets(f, author_rank[f], false_author_ids, args.num_triplets).to(device) aid, blocks = triplet_collator.collate(triplets) author_embeds = model(blocks, blocks[0].srcdata['feat']) author_embeds = author_embeds / author_embeds.norm(dim=1, keepdim=True) aid_map = {a: i for i, a in enumerate(aid.tolist())} anchor = g.nodes['field'].data['feat'][triplets[:, 0]] positive = author_embeds[[ aid_map[a] for a in triplets[:, 1].tolist() ]] negative = author_embeds[[ aid_map[a] for a in triplets[:, 2].tolist() ]] loss = F.triplet_margin_loss(anchor, positive, negative, args.margin) losses.append(loss.item()) optimizer.zero_grad() loss.backward() optimizer.step() scheduler.step() torch.cuda.empty_cache() print('Epoch {:d} | Loss {:.4f}'.format(epoch, sum(losses) / len(losses))) torch.save(model.state_dict(), args.model_save_path) if epoch % args.eval_every == 0 or epoch == args.epochs - 1: print( METRICS_STR.format(*evaluate( model, g, out_dim, sampler, args.batch_size, device, field_ids, field_paper, author_rank, true_relevance))) torch.save(model.state_dict(), args.model_save_path) print('模型已保存到', args.model_save_path) embeds = infer(model, g, 'author', out_dim, sampler, args.batch_size, device) author_embed_save_path = DATA_DIR / 'rank/author_embed.pkl' torch.save(embeds.cpu(), author_embed_save_path) print('学者嵌入已保存到', author_embed_save_path)