def pipeline_GC(top_k): dataset = get_dataset(data_args.dataset_dir, data_args.dataset_name) if data_args.dataset_name == 'mutag': data_indices = list(range(len(dataset))) pgexplainer_trainset = dataset else: loader = get_dataloader(dataset, batch_size=train_args.batch_size, random_split_flag=data_args.random_split, data_split_ratio=data_args.data_split_ratio, seed=data_args.seed) data_indices = loader['test'].dataset.indices pgexplainer_trainset = loader['train'].dataset input_dim = dataset.num_node_features output_dim = dataset.num_classes gnnNets = GnnNets(input_dim, output_dim, model_args) checkpoint = torch.load(model_args.model_path) gnnNets.update_state_dict(checkpoint['net']) gnnNets.to_device() gnnNets.eval() save_dir = os.path.join( './results', f"{data_args.dataset_name}_" f"{model_args.model_name}_" f"pgexplainer") if not os.path.isdir(save_dir): os.mkdir(save_dir) pgexplainer = PGExplainer(gnnNets) if torch.cuda.is_available(): torch.cuda.synchronize() tic = time.perf_counter() pgexplainer.get_explanation_network(pgexplainer_trainset) if torch.cuda.is_available(): torch.cuda.synchronize() toc = time.perf_counter() training_duration = toc - tic print(f"training time is {training_duration: .4}s ") explain_duration = 0.0 plotutils = PlotUtils(dataset_name=data_args.dataset_name) fidelity_score_list = [] sparsity_score_list = [] for data_idx in tqdm(data_indices): data = dataset[data_idx] if torch.cuda.is_available(): torch.cuda.synchronize() tic = time.perf_counter() prob = pgexplainer.eval_probs(data.x, data.edge_index) pred_label = prob.argmax(-1).item() if glob.glob(os.path.join(save_dir, f"example_{data_idx}.pt")): file = glob.glob(os.path.join(save_dir, f"example_{data_idx}.pt"))[0] edge_mask = torch.from_numpy(torch.load(file)) else: edge_mask = pgexplainer.explain_edge_mask(data.x, data.edge_index) save_path = os.path.join(save_dir, f"example_{data_idx}.pt") edge_mask = edge_mask.cpu() torch.save(edge_mask.detach().numpy(), save_path) if torch.cuda.is_available(): torch.cuda.synchronize() toc = time.perf_counter() explain_duration += (toc - tic) graph = to_networkx(data) fidelity_score = top_k_fidelity(data, edge_mask, top_k, gnnNets, pred_label) sparsity_score = top_k_sparsity(data, edge_mask, top_k) fidelity_score_list.append(fidelity_score) sparsity_score_list.append(sparsity_score) # visualization if hasattr(dataset, 'supplement'): words = dataset.supplement['sentence_tokens'][str(data_idx)] plotutils.plot_soft_edge_mask(graph, edge_mask, top_k, x=data.x, words=words, un_directed=True, figname=os.path.join( save_dir, f"example_{data_idx}.png")) else: plotutils.plot_soft_edge_mask(graph, edge_mask, top_k, x=data.x, un_directed=True, figname=os.path.join( save_dir, f"example_{data_idx}.png")) fidelity_scores = torch.tensor(fidelity_score_list) sparsity_scores = torch.tensor(sparsity_score_list) return fidelity_scores, sparsity_scores
def pipeline(max_nodes): dataset = get_dataset(data_args.dataset_dir, data_args.dataset_name) plotutils = PlotUtils(dataset_name=data_args.dataset_name) input_dim = dataset.num_node_features output_dim = dataset.num_classes if data_args.dataset_name == 'mutag': data_indices = list(range(len(dataset))) else: loader = get_dataloader(dataset, batch_size=train_args.batch_size, random_split_flag=data_args.random_split, data_split_ratio=data_args.data_split_ratio, seed=data_args.seed) data_indices = loader['test'].dataset.indices gnnNets = GnnNets(input_dim, output_dim, model_args) checkpoint = torch.load(mcts_args.explain_model_path) gnnNets.update_state_dict(checkpoint['net']) gnnNets.to_device() gnnNets.eval() save_dir = os.path.join( './results', f"{mcts_args.dataset_name}_" f"{model_args.model_name}_" f"{reward_args.reward_method}") if not os.path.isdir(save_dir): os.mkdir(save_dir) fidelity_score_list = [] sparsity_score_list = [] for i in tqdm(data_indices): # get data and prediction data = dataset[i] _, probs, _ = gnnNets(Batch.from_data_list([data.clone()])) prediction = probs.squeeze().argmax(-1).item() original_score = probs.squeeze()[prediction] # get the reward func value_func = GnnNets_GC2value_func(gnnNets, target_class=prediction) payoff_func = reward_func(reward_args, value_func) # find the paths and build the graph result_path = os.path.join(save_dir, f"example_{i}.pt") # mcts for l_shapely mcts_state_map = MCTS(data.x, data.edge_index, score_func=payoff_func, n_rollout=mcts_args.rollout, min_atoms=mcts_args.min_atoms, c_puct=mcts_args.c_puct, expand_atoms=mcts_args.expand_atoms) if os.path.isfile(result_path): results = torch.load(result_path) else: results = mcts_state_map.mcts(verbose=True) torch.save(results, result_path) # l sharply score graph_node_x = find_closest_node_result(results, max_nodes=max_nodes) masked_node_list = [ node for node in list(range(graph_node_x.data.x.shape[0])) if node not in graph_node_x.coalition ] fidelity_score = original_score - gnn_score( masked_node_list, data, value_func, subgraph_building_method='zero_filling') sparsity_score = 1 - len( graph_node_x.coalition) / graph_node_x.ori_graph.number_of_nodes() fidelity_score_list.append(fidelity_score) sparsity_score_list.append(sparsity_score) # visualization if hasattr(dataset, 'supplement'): words = dataset.supplement['sentence_tokens'][str(i)] plotutils.plot(graph_node_x.ori_graph, graph_node_x.coalition, words=words, figname=os.path.join(save_dir, f"example_{i}.png")) else: plotutils.plot(graph_node_x.ori_graph, graph_node_x.coalition, x=graph_node_x.data.x, figname=os.path.join(save_dir, f"example_{i}.png")) fidelity_scores = torch.tensor(fidelity_score_list) sparsity_scores = torch.tensor(sparsity_score_list) return fidelity_scores, sparsity_scores
def train_GC(): # attention the multi-task here print('start loading data====================') dataset = get_dataset(data_args) input_dim = dataset.num_node_features output_dim = int(dataset.num_classes) dataloader = get_dataloader(dataset, data_args, train_args) print('start training model==================') gnnNets = GnnNets(input_dim, output_dim, model_args) gnnNets.to_device() criterion = nn.CrossEntropyLoss() optimizer = Adam(gnnNets.parameters(), lr=train_args.learning_rate, weight_decay=train_args.weight_decay) avg_nodes = 0.0 avg_edge_index = 0.0 for i in range(len(dataset)): avg_nodes += dataset[i].x.shape[0] avg_edge_index += dataset[i].edge_index.shape[1] avg_nodes /= len(dataset) avg_edge_index /= len(dataset) print( f"graphs {len(dataset)}, avg_nodes{avg_nodes :.4f}, avg_edge_index_{avg_edge_index/2 :.4f}" ) best_acc = 0.0 data_size = len(dataset) print(f'The total num of dataset is {data_size}') # save path for model if not os.path.isdir('checkpoint'): os.mkdir('checkpoint') if not os.path.isdir(os.path.join('checkpoint', data_args.dataset_name)): os.mkdir(os.path.join('checkpoint', f"{data_args.dataset_name}")) ckpt_dir = f"./checkpoint/{data_args.dataset_name}/" early_stop_count = 0 for epoch in range(train_args.max_epochs): acc = [] loss_list = [] gnnNets.train() for batch in dataloader['train']: logits, probs, _ = gnnNets(batch) loss = criterion(logits, batch.y) # optimization optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_value_(gnnNets.parameters(), clip_value=2.0) optimizer.step() ## record _, prediction = torch.max(logits, -1) loss_list.append(loss.item()) acc.append(prediction.eq(batch.y).cpu().numpy()) # report train msg print(f"Train Epoch:{epoch} |Loss: {np.average(loss_list):.3f} | " f"Acc: {np.concatenate(acc, axis=0).mean():.3f}") # report eval msg eval_state = evaluate_GC(dataloader['eval'], gnnNets, criterion) print( f"Eval Epoch: {epoch} | Loss: {eval_state['loss']:.3f} | Acc: {eval_state['acc']:.3f}" ) # only save the best model is_best = (eval_state['acc'] > best_acc) if eval_state['acc'] > best_acc: early_stop_count = 0 else: early_stop_count += 1 if early_stop_count > train_args.early_stopping: break if is_best: best_acc = eval_state['acc'] early_stop_count = 0 if is_best or epoch % train_args.save_epoch == 0: save_best(ckpt_dir, epoch, gnnNets, model_args.model_name, eval_state['acc'], is_best) print(f"The best validation accuracy is {best_acc}.") # report test msg checkpoint = torch.load( os.path.join(ckpt_dir, f'{model_args.model_name}_best.pth')) gnnNets.update_state_dict(checkpoint['net']) test_state, _, _ = test_GC(dataloader['test'], gnnNets, criterion) print( f"Test: | Loss: {test_state['loss']:.3f} | Acc: {test_state['acc']:.3f}" )