def filter_useless_nodes(args_dataset, args_model, args_explainers, args_hops, args_num_samples, args_test_samples, args_K, args_prop_noise_nodes, args_connectedness, node_indices, info, args_hv, args_feat, args_coal, args_g, args_multiclass, args_regu, args_gpu, args_fullempty, args_S, seed): """ Add noisy neighbours to dataset and check how many are included in explanations The fewest, the better the explainer. Args: Arguments defined in argument parser of script_eval.py """ # Define dataset data = prepare_data(args_dataset, seed=seed) # Select a random subset of nodes to eval the explainer on. if not node_indices: node_indices = extract_test_nodes(data, args_test_samples, seed) # Define number of noisy nodes according to dataset size args_num_noise_nodes = int(args_prop_noise_nodes * data.x.size(0)) args_c = eval('EVAL1_' + data.name)['args_c'] args_p = eval('EVAL1_' + data.name)['args_p'] args_binary = eval('EVAL1_' + data.name)['args_binary'] # Add noisy neighbours to the graph, with random features data = add_noise_neighbours(data, args_num_noise_nodes, node_indices, binary=args_binary, p=args_p, connectedness=args_connectedness, c=args_c) # Define training parameters depending on (model-dataset) couple hyperparam = ''.join(['hparams_', args_dataset, '_', args_model]) param = ''.join(['params_', args_dataset, '_', args_model]) # Define the model if args_model == 'GCN': model = GCN(input_dim=data.x.size( 1), output_dim=data.num_classes, **eval(hyperparam)) else: model = GAT(input_dim=data.x.size( 1), output_dim=data.num_classes, **eval(hyperparam)) # Re-train the model on dataset with noisy features train_and_val(model, data, **eval(param)) # Evaluate model model.eval() with torch.no_grad(): log_logits = model(x=data.x, edge_index=data.edge_index) # [2708, 7] test_acc = accuracy(log_logits[data.test_mask], data.y[data.test_mask]) print('Test accuracy is {:.4f}'.format(test_acc)) # Derive predicted class for each test node with torch.no_grad(): true_confs, predicted_classes = log_logits.exp()[node_indices].max(dim=1) del log_logits if args_regu == 1: args_regu = 0 # Study attention weights of noisy nodes in GAT model - compare attention with explanations if str(type(model)) == "<class 'src.models.GAT'>": study_attention_weights(data, model, args_test_samples) # Do for several explainers for c, explainer_name in enumerate(args_explainers): print('EXPLAINER: ', explainer_name) # Define the explainer explainer = eval(explainer_name)(data, model, args_gpu) # Loop on each test sample and store how many times do noisy nodes appear among # K most influential features in our explanations # count number of noisy nodes in explanations pred_class_num_noise_neis = [] # count number of noisy nodes in subgraph total_num_noisy_nei = [] # Number of neigbours of v in subgraph total_neigbours = [] # Stores number of most important neighbours we look at, for each node K = [] # To retrieve the predicted class j = 0 for node_idx in tqdm(node_indices, desc='explain node', leave=False): # Look only at coefficients for nodes (not node features) if explainer_name == 'Greedy': coefs = explainer.explain_nei(node_idx, args_hops, args_num_samples) elif explainer_name == 'GNNExplainer': _ = explainer.explain(node_idx, args_hops, args_num_samples) coefs = explainer.coefs else: # Explanations via GraphSVX coefs = explainer.explain([node_idx], args_hops, args_num_samples, info, args_multiclass, args_fullempty, args_S, args_hv, args_feat, args_coal, args_g, args_regu) coefs = coefs[0].T[explainer.F:] # Number of noisy nodes in the subgraph of node_idx num_noisy_nodes = len( [n_idx for n_idx in explainer.neighbours if n_idx >= data.x.size(0)-args_num_noise_nodes]) # Number of neighbours in the subgraph total_neigbours.append(len(explainer.neighbours)) # Adaptable K - vary according to number of nodes in the subgraph if len(explainer.neighbours) > 100: K.append(int(args_K * 100)) else: K.append( max(1, int(args_K * len(explainer.neighbours))) ) # Store indexes of K most important features, for each class nei_indices = coefs.argsort()[-K[j]:].tolist() # Number of noisy features that appear in explanations - use index to spot them noise_nei = [idx for idx in nei_indices if idx > (explainer.neighbours.shape[0] - num_noisy_nodes)] # If node importance of top K neighbours is unsignificant, discard # Possible as we have importance informative measure, unlike others. if explainer_name == 'GraphSVX': explainable_part = true_confs[c] - \ explainer.base_values[c] noise_nei = [idx for idx in noise_nei if np.abs(coefs[idx]) > 0.05*np.abs(explainable_part)] num_noise_nei = len(noise_nei) pred_class_num_noise_neis.append(num_noise_nei) # Return number of noisy nodes adjacent to node of interest total_num_noisy_nei.append(num_noisy_nodes) j += 1 print('Noisy neighbours included in explanations: ', pred_class_num_noise_neis) print('There are {} noise neighbours found in the explanations of {} test samples, an average of {} per sample' .format(sum(pred_class_num_noise_neis), args_test_samples, sum(pred_class_num_noise_neis)/args_test_samples)) print('Proportion of explanations showing noisy neighbours: {:.2f}%'.format( 100 * sum(pred_class_num_noise_neis) / sum(K))) perc = 100 * sum(pred_class_num_noise_neis) / (sum(total_num_noisy_nei)) perc2 = 100 * (sum(K) - sum(pred_class_num_noise_neis)) \ / (sum(total_neigbours) - sum(total_num_noisy_nei)) print('Proportion of noisy neighbours found in explanations vs normal neighbours (in subgraph): {:.2f}% vs {:.2f}'.format( perc, perc2)) print('Proportion of nodes in subgraph that are noisy: {:.2f}%'.format( 100 * sum(total_num_noisy_nei) / sum(total_neigbours))) print('Proportion of noisy neighbours found in explanations (entire graph): {:.2f}%'.format( 100 * sum(pred_class_num_noise_neis) / (args_test_samples * args_num_noise_nodes))) print('------------------------------------') # Plot of kernel density estimates of number of noisy features included in explanation # Do for all benchmarks (with diff colors) and plt.show() to get on the same graph plot_dist(pred_class_num_noise_neis, label=explainer_name, color=COLOURS[c]) # Random explainer - plot estimated kernel density total_num_noise_neis = noise_nodes_for_random( data, model, K, node_indices, total_num_noisy_nei, total_neigbours) plot_dist(total_num_noise_neis, label='Random', color='y') # Store graph - with key params and time now = datetime.now() current_time = now.strftime("%H:%M:%S") plt.savefig('results/eval1_node_{}_{}_{}_{}_{}.pdf'.format(data.name, args_coal, args_feat, args_hv, current_time)) plt.close() #plt.show() return total_num_noise_neis
def eval_shap(args_dataset, args_model, args_test_samples, args_hops, args_K, args_num_samples, node_indices, info, args_hv, args_feat, args_coal, args_g, args_multiclass, args_regu, args_gpu, args_fullempty, args_S, seed): """ Compares SHAP and GraphSVX on graph based datasets Check if they agree on features'contribution towards prediction for several test samples """ # Define dataset data = prepare_data(args_dataset, seed=10) # Select a random subset of nodes to eval the explainer on. if not node_indices: node_indices = extract_test_nodes(data, args_test_samples, seed) # Define training parameters depending on (model-dataset) couple hyperparam = ''.join(['hparams_', args_dataset, '_', args_model]) param = ''.join(['params_', args_dataset, '_', args_model]) # Define the model if args_model == 'GCN': model = GCN(input_dim=data.x.size( 1), output_dim=data.num_classes, **eval(hyperparam)) else: model = GAT(input_dim=data.x.size( 1), output_dim=data.num_classes, **eval(hyperparam)) # Re-train the model on dataset with noisy features train_and_val(model, data, **eval(param)) # Store metrics iou = [] prop_contrib_diff = [] # Iterate over test samples for node_idx in tqdm(node_indices, desc='explain node', leave=False): # Define explainers we would like to compare graphshap = GraphSVX(data, model, args_gpu) shap = SHAP(data, model, args_gpu) # Explanations via GraphSVX graphshap_coefs = graphshap.explain([node_idx], args_hops, args_num_samples, info, args_multiclass, args_fullempty, args_S, args_hv, args_feat, # All args_coal, # Random or SmarerSoftRegu args_g, # WLS args_regu) # 1 graphshap_coefs = graphshap_coefs[0].T[:graphshap.F] shap_coefs = shap.explain(node_idx, args_hops, args_num_samples, info=False, multiclass=False )[:shap.F] # Consider node features only - for predicted class only true_conf, predicted_class = model(x=data.x, edge_index=data.edge_index).exp()[ node_idx].max(dim=0) # Need to apply regularisation # Proportional contribution prop_contrib_diff.append(np.abs( graphshap_coefs.sum( ) / np.abs(graphshap_coefs).sum() - shap_coefs.sum() / np.abs(shap_coefs).sum())) #print('GraphSVX proportional contribution to pred: {:.2f}'.format(graphshap_coefs.sum() / np.abs(graphshap_coefs).sum() )) #print('SHAP proportional contribution to pred: {:.2f}'.format(shap_coefs.sum() / np.abs(shap_coefs).sum() )) # Important features graphshap_feat_indices = np.abs(graphshap_coefs).argsort()[-10:].tolist() shap_feat_indices = np.abs(shap_coefs).argsort()[-10:].tolist() iou.append(len(set(graphshap_feat_indices).intersection(set(shap_feat_indices)) ) / len(set(graphshap_feat_indices).union(set(shap_feat_indices)))) #print('Iou important features: ', iou) print('iou av:', np.mean(iou)) print('difference in contibutions towards pred: ', np.mean(prop_contrib_diff))
num_noise=args_num_noise_feat, binary=args_binary, p=args_p) # Define training parameters depending on (model-dataset) couple hyperparam = ''.join(['hparams_', args_dataset, '_', args_model]) param = ''.join(['params_', args_dataset, '_', args_model]) # Define the model if args_model == 'GCN': model = GCN(input_dim=data.x.size(1), output_dim=data.num_classes, **eval(hyperparam)) else: model = GAT(input_dim=data.x.size(1), output_dim=data.num_classes, **eval(hyperparam)) # Re-train the model on dataset with noisy features train_and_val(model, data, **eval(param)) # Define explainer graphshap = GraphSHAP(data, model) for node_indices in node_indices_list: # Select random subset of nodes to eval the explainer on. # node_indices = extract_test_nodes(data, args_test_samples) total_num_noise_feats = [ ] # count noisy features found in explanations for each test sample (for each class)
def filter_useless_features(args_dataset, args_model, args_explainers, args_hops, args_num_samples, args_test_samples, args_K, args_prop_noise_feat, node_indices, info, args_hv, args_feat, args_coal, args_g, args_multiclass, args_regu, args_gpu, args_fullempty, args_S, seed): """ Add noisy features to dataset and check how many are included in explanations The fewest, the better the explainer. Args: Arguments defined in argument parser of script_eval.py """ # Define dataset data = prepare_data(args_dataset, seed=seed) args_num_noise_feat = int(data.x.size(1) * args_prop_noise_feat) args_p = eval('EVAL1_' + data.name)['args_p'] args_binary = eval('EVAL1_' + data.name)['args_binary'] # Include noisy neighbours data, noise_feat = add_noise_features( data, num_noise=args_num_noise_feat, binary=args_binary, p=args_p) # Define training parameters depending on (model-dataset) couple hyperparam = ''.join(['hparams_', args_dataset, '_', args_model]) param = ''.join(['params_', args_dataset, '_', args_model]) # Define the model if args_model == 'GCN': model = GCN(input_dim=data.x.size( 1), output_dim=data.num_classes, **eval(hyperparam)) else: model = GAT(input_dim=data.x.size( 1), output_dim=data.num_classes, **eval(hyperparam)) # Re-train the model on dataset with noisy features train_and_val(model, data, **eval(param)) # Select random subset of nodes to eval the explainer on. if not node_indices: node_indices = extract_test_nodes(data, args_test_samples, seed) # Evaluate the model on test set model.eval() with torch.no_grad(): log_logits = model(x=data.x, edge_index=data.edge_index) test_acc = accuracy(log_logits[data.test_mask], data.y[data.test_mask]) print('Test accuracy is {:.4f}'.format(test_acc)) # Derive predicted class for each test sample with torch.no_grad(): true_confs, predicted_classes = log_logits.exp()[node_indices].max(dim=1) del log_logits # Adaptable K - top k explanations we look at for each node # Depends on number of existing features considered for GraphSVX if 'GraphSVX' in args_explainers: K = [] else: K = [10]*len(node_indices) #for node_idx in node_indices: # K.append(int(data.x[node_idx].nonzero().shape[0] * args_K)) if args_regu == 0: args_regu = 1 # Loop on the different explainers selected for c, explainer_name in enumerate(args_explainers): # Define explainer explainer = eval(explainer_name)(data, model, args_gpu) print('EXPLAINER: ', explainer_name) # count noisy features found in explanations pred_class_num_noise_feats = [] # count number of noisy features considered total_num_noise_feat_considered = [] # count number of features F = [] # Loop on each test sample and store how many times do noise features appear among # K most influential features in our explanations j=0 for node_idx in tqdm(node_indices, desc='explain node', leave=False): # Explanations via GraphSVX if explainer_name == 'GraphSVX': coefs = explainer.explain( [node_idx], args_hops, args_num_samples, info, args_multiclass, args_fullempty, args_S, args_hv, args_feat, args_coal, args_g, args_regu, ) # Look only at features coefficients # Neighbours are irrelevant here coefs = coefs[0][:explainer.F] # Adaptable K if explainer.F > 100: K.append(int(args_K * 100)) else: K.append( max(1, int(explainer.F * args_K)) ) # Num_features_considered if args_feat == 'Null': feat_idx = noise_feat[explainer.neighbours, :].mean(axis=0).nonzero() num_noise_feat_considered = feat_idx.size()[0] # Consider all features (+ use expectation like below) elif args_feat == 'All': num_noise_feat_considered = args_num_noise_feat # Consider only features whose aggregated value is different from expected one else: # Stats dataset var = noise_feat.std(axis=0) mean = noise_feat.mean(axis=0) # Feature intermediate rep mean_subgraph = noise_feat[explainer.neighbours, :].mean(axis=0) # Select relevant features only - (E-e,E+e) mean_subgraph = torch.where(mean_subgraph > mean - 0.25*var, mean_subgraph, torch.ones_like(mean_subgraph)*100) mean_subgraph = torch.where(mean_subgraph < mean + 0.25*var, mean_subgraph, torch.ones_like(mean_subgraph)*100) feat_idx = (mean_subgraph == 100).nonzero() num_noise_feat_considered = feat_idx.shape[0] del mean, mean_subgraph, var else: coefs = explainer.explain(node_idx, args_hops, args_num_samples, info=False, multiclass=False )[:explainer.F] # All features are considered num_noise_feat_considered = args_num_noise_feat # Features considered F.append(explainer.F) # Store indexes of K most important node features, for each class feat_indices = coefs.argsort()[-K[j]:].tolist() # Number of noisy features that appear in explanations - use index to spot them num_noise_feat = [idx for idx in feat_indices if idx > (explainer.F - num_noise_feat_considered)] # If node importance of top K features is unsignificant, discard # Possible as we have importance informative measure, unlike others. if explainer_name == 'GraphSVX': explainable_part = true_confs[c] - \ explainer.base_values[c] num_noise_feat = [idx for idx in num_noise_feat if np.abs(coefs[idx]) > 0.05*np.abs(explainable_part)] # Count number of noisy that appear in explanations num_noise_feat = len(num_noise_feat) pred_class_num_noise_feats.append(num_noise_feat) # Return number of noisy features considered in this test sample total_num_noise_feat_considered.append(num_noise_feat_considered) j+=1 print('Noisy features included in explanations: ', sum(pred_class_num_noise_feats) ) print('For the predicted class, there are {} noise features found in the explanations of {} test samples, an average of {} per sample' .format(sum(pred_class_num_noise_feats), args_test_samples, sum(pred_class_num_noise_feats)/args_test_samples)) print(pred_class_num_noise_feats) if sum(F) != 0: perc = 100 * sum(total_num_noise_feat_considered) / sum(F) print( 'Proportion of considered noisy features among features: {:.2f}%'.format(perc)) if sum(K) != 0: perc = 100 * sum(pred_class_num_noise_feats) / sum(K) print('Proportion of explanations showing noisy features: {:.2f}%'.format(perc)) if sum(total_num_noise_feat_considered) != 0: perc = 100 * sum(pred_class_num_noise_feats) / (sum(total_num_noise_feat_considered)) perc2 = 100 * (sum(K) - sum(pred_class_num_noise_feats)) / (sum(F) - sum(total_num_noise_feat_considered)) print('Proportion of noisy features found in explanations vs proportion of normal features (among considered ones): {:.2f}% vs {:.2f}%, over considered features only'.format( perc, perc2)) print('------------------------------------') # Plot of kernel density estimates of number of noisy features included in explanation # Do for all benchmarks (with diff colors) and plt.show() to get on the same graph plot_dist(pred_class_num_noise_feats, label=explainer_name, color=COLOURS[c]) # Random explainer - plot estimated kernel density total_num_noise_feats = noise_feats_for_random( data, model, K, args_num_noise_feat, node_indices) save_path = 'results/eval1_feat' plot_dist(total_num_noise_feats, label='Random', color='y') # Store graph - with key params and time now = datetime.now() current_time = now.strftime("%H:%M:%S") plt.savefig('results/eval1_feat_{}_{}_{}_{}_{}.pdf'.format(data.name, args_coal, args_feat, args_hv, current_time)) plt.close()
logger = myLogger(name='exp', log_path=os.path.join(log_dir, 'log.txt')) print_config(args, logger) logger.setLevel(args.log_level) # Model and optimizer if args.sparse: model = SpGAT(device=args.device, nfeat=args.feature_len, nhid=args.hidden, output_dim=args.output_dim, dropout=args.dropout, nheads=args.nb_heads, alpha=args.alpha) else: model = GAT(device=args.device, nfeat=args.feature_len, nhid=args.hidden, output_dim=args.output_dim, dropout=args.dropout, nheads=args.nb_heads, alpha=args.alpha) model.to(args.device) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # Train model t_total = time.time() train(args, model, Data, args.log_dir, logger, optimizer) logger.info("Optimization Finished!") logger.info("Total time elapsed: {:.4f}s".format(time.time() - t_total))
def filter_useless_features(args_model, args_dataset, args_explainers, args_hops, args_num_samples, args_test_samples, args_K, args_num_noise_feat, args_p, args_binary, node_indices, info=True): """ Arguments defined in argument parser of script_eval.py Add noisy features to dataset and check how many are included in explanations The fewest, the better the explainer. """ ''' ####### Input in script_eval file args_dataset = 'Cora' args_model = 'GCN' args_explainers = ['GraphSHAP', 'Greedy'] args_hops = 2 args_num_samples = 100 # size shap dataset args_test_samples = 20 # number of test samples args_num_noise_feat= 25 # number of noisy features args_K= 5 # maybe def depending on M args_binary = True args_p = 0.5 info=True node_indices= [2420,2455,1783,2165,2628,1822,2682,2261,1896,1880,2137,2237,2313,2218,1822,1719,1763,2263,2020,1988] node_indices = [10, 18, 89, 178, 333, 356, 378, 456, 500, 2222, 1220, 1900, 1328, 189, 1111] node_indices = [1834,2512,2591,2101,1848,1853,2326,1987,2359,2453,2230,2267,2399, 2150,2400] ''' #### Create function from here. Maybe create training fct first, to avoid retraining the model. # Define dataset - include noisy features data = prepare_data(args_dataset, seed=10) data, noise_feat = add_noise_features(data, num_noise=args_num_noise_feat, binary=args_binary, p=args_p) # Define training parameters depending on (model-dataset) couple hyperparam = ''.join(['hparams_', args_dataset, '_', args_model]) param = ''.join(['params_', args_dataset, '_', args_model]) # Define the model if args_model == 'GCN': model = GCN(input_dim=data.x.size(1), output_dim=data.num_classes, **eval(hyperparam)) else: model = GAT(input_dim=data.x.size(1), output_dim=data.num_classes, **eval(hyperparam)) # Re-train the model on dataset with noisy features train_and_val(model, data, **eval(param)) # Select random subset of nodes to eval the explainer on. if not node_indices: node_indices = extract_test_nodes(data, args_test_samples) for explainer_name in args_explainers: # Define explainer explainer = eval(explainer_name)(data, model) total_num_noise_feats = [ ] # count noisy features found in explanations for each test sample (for each class) pred_class_num_noise_feats = [ ] # count noisy features found in explanations for each test sample for class of interest total_num_non_zero_noise_feat = [ ] # count number of noisy features considered for each test sample M = [] # count number of non zero features for each test sample # Loop on each test sample and store how many times do noise features appear among # K most influential features in our explanations for node_idx in tqdm(node_indices, desc='explain node', leave=False): # Explanations via GraphSHAP coefs = explainer.explainer(node_index=node_idx, hops=args_hops, num_samples=args_num_samples, info=False) # Check how many non zero features M.append(explainer.M) # Number of non zero noisy features num_non_zero_noise_feat = len( [val for val in noise_feat[node_idx] if val != 0]) # Multilabel classification - consider all classes instead of focusing on the # class that is predicted by our model num_noise_feats = [] true_conf, predicted_class = model( x=data.x, edge_index=data.edge_index).exp()[node_idx].max(dim=0) for i in range(data.num_classes): # Store indexes of K most important features, for each class feat_indices = np.abs(coefs[:, i]).argsort()[-args_K:].tolist() # Number of noisy features that appear in explanations - use index to spot them num_noise_feat = sum(idx < num_non_zero_noise_feat for idx in feat_indices) num_noise_feats.append(num_noise_feat) if i == predicted_class: pred_class_num_noise_feats.append(num_noise_feat) # Return this number => number of times noisy features are provided as explanations total_num_noise_feats.append(sum(num_noise_feats)) # Return number of noisy features considered in this test sample total_num_non_zero_noise_feat.append(num_non_zero_noise_feat) if info: print('Noise features included in explanations: ', total_num_noise_feats) print('There are {} noise features found in the explanations of {} test samples, an average of {} per sample'\ .format(sum(total_num_noise_feats),args_test_samples,sum(total_num_noise_feats)/args_test_samples) ) # Number of noisy features found in explanation for the predicted class print( np.sum(pred_class_num_noise_feats) / args_test_samples, 'for the predicted class only') perc = 100 * sum(total_num_non_zero_noise_feat) / np.sum(M) print('Overall proportion of considered noisy features : {:.2f}%'. format(perc)) perc = 100 * sum(total_num_noise_feats) / ( args_K * args_test_samples * data.num_classes) print('Percentage of explanations showing noisy features: {:.2f}%'. format(perc)) if sum(total_num_non_zero_noise_feat) != 0: perc = 100 * sum(total_num_noise_feats) / ( sum(total_num_non_zero_noise_feat) * data.num_classes) perc2 = 100 * ( args_K * args_test_samples * data.num_classes - sum(total_num_noise_feats)) / ( data.num_classes * (sum(M) - sum(total_num_non_zero_noise_feat))) print( 'Proportion of noisy features found in explanations vs normal features: {:.2f}% vs {:.2f}%, over considered features only' .format(perc, perc2)) print('------------------------------------') # Plot of kernel density estimates of number of noisy features included in explanation # Do for all benchmarks (with diff colors) and plt.show() to get on the same graph plot_dist(total_num_noise_feats, label=explainer_name, color='g') plt.show() return sum(total_num_noise_feats)
def filter_useless_nodes(args_model, args_dataset, args_hops, args_num_samples, args_test_samples, args_K, args_num_noise_nodes, args_p, args_binary, args_connectedness, node_indices=None, info=True): """ Arguments defined in argument parser in script_eval.py Add noisy features to dataset and check how many are included in explanations The fewest, the better the explainer. """ ''' ####### Input in script_eval file args_dataset = 'Cora' args_model = 'GAT' args_hops = 2 args_num_samples = 100 args_test_samples = 10 args_num_noise_nodes = 20 args_K= 5 # maybe def depending on M args_p = 0.013 args_connectedness = 'medium' args_binary=True ''' #### Create function from here. Maybe create training fct first, to avoid retraining the model. # Define dataset data = prepare_data(args_dataset, seed=10) # Select random subset of nodes to eval the explainer on. if not node_indices: node_indices = extract_test_nodes(data, args_test_samples) # Include noisy neighbours data = add_noise_neighbors(data, args_num_noise_nodes, node_indices, binary=args_binary, p=args_p, connectedness=args_connectedness) # data, noise_feat = add_noise_features(data, num_noise=args_num_noise_feat, binary=True) # Define training parameters depending on (model-dataset) couple hyperparam = ''.join(['hparams_', args_dataset, '_', args_model]) param = ''.join(['params_', args_dataset, '_', args_model]) # Define the model if args_model == 'GCN': model = GCN(input_dim=data.x.size(1), output_dim=data.num_classes, **eval(hyperparam)) else: model = GAT(input_dim=data.x.size(1), output_dim=data.num_classes, **eval(hyperparam)) # Re-train the model on dataset with noisy features train_and_val(model, data, **eval(param)) # Study attention weights of noisy nodes - for 20 new nodes def study_attention_weights(data, model): """ Studies the attention weights of the GAT model """ _, alpha, alpha_bis = model(data.x, data.edge_index, att=True) edges, alpha1 = alpha[0][:, :-(data.x.size(0) - 1)], alpha[1][:-( data.x.size(0) - 1), :] # remove self loops att alpha2 = alpha_bis[1][:-(data.x.size(0) - 1)] att1 = [] att2 = [] for i in range( data.x.size(0) - args_test_samples, (data.x.size(0) - 1)): ind = (edges == i).nonzero() for j in ind[:, 1]: att1.append(torch.mean(alpha1[j])) att2.append(alpha2[j][0]) print('shape attention noisy', len(att2)) # It looks like these noisy nodes are very important print('av attention', (torch.mean(alpha1) + torch.mean(alpha2)) / 2) # 0.18 (torch.mean(torch.stack(att1)) + torch.mean(torch.stack(att2))) / 2 # 0.32 # In fact, noisy nodes are slightly below average in terms of attention received # Importance of interest: look only at imp. of noisy nei for test nodes print('attention 1 av. for noisy nodes: ', torch.mean(torch.stack(att1[0::2]))) print('attention 2 av. for noisy nodes: ', torch.mean(torch.stack(att2[0::2]))) # Study attention weights if str(type(model)) == "<class 'src.models.GAT'>": study_attention_weights(data, model) # Define explainer graphshap = GraphSHAP(data, model) # Loop on each test sample and store how many times do noise features appear among # K most influential features in our explanations total_num_noise_neis = [ ] # 1 el per test sample - count number of noisy nodes in explanations pred_class_num_noise_neis = [ ] # 1 el per test sample - count number of noisy nodes in explanations for 1 class total_num_noisy_nei = [ ] # 1 el per test sample - count number of noisy nodes in subgraph total_neigbours = [ ] # 1 el per test samples - number of neigbours of v in subgraph M = [] # 1 el per test sample - number of non zero features for node_idx in tqdm(node_indices, desc='explain node', leave=False): # Explanations via GraphSHAP coefs = graphshap.explainer(node_index=node_idx, hops=args_hops, num_samples=args_num_samples, info=False) # Check how many non zero features M.append(graphshap.M) # Number of noisy nodes in the subgraph of node_idx num_noisy_nodes = len([ n_idx for n_idx in graphshap.neighbors if n_idx >= data.x.size(0) - args_num_noise_nodes ]) total_neigbours.append(len(graphshap.neighbors)) # Multilabel classification - consider all classes instead of focusing on the # class that is predicted by our model num_noise_neis = [] # one element for each class of a test sample true_conf, predicted_class = model( x=data.x, edge_index=data.edge_index).exp()[node_idx].max(dim=0) for i in range(data.num_classes): # Store indexes of K most important features, for each class nei_indices = np.abs(coefs[:, i]).argsort()[-args_K:].tolist() # Number of noisy features that appear in explanations - use index to spot them num_noise_nei = sum(idx >= graphshap.M - num_noisy_nodes for idx in nei_indices) num_noise_neis.append(num_noise_nei) if i == predicted_class: pred_class_num_noise_neis.append(num_noise_nei) # Return this number => number of times noisy neighbours are provided as explanations total_num_noise_neis.append(sum(num_noise_neis)) # Return number of noisy nodes adjacent to node of interest total_num_noisy_nei.append(num_noisy_nodes) if info: print('Noisy neighbours included in explanations: ', total_num_noise_neis) print('There are {} noise neighbours found in the explanations of {} test samples, an average of {} per sample'\ .format(sum(total_num_noise_neis),args_test_samples,sum(total_num_noise_neis)/args_test_samples) ) print( np.sum(pred_class_num_noise_neis) / args_test_samples, 'for the predicted class only') print('Proportion of explanations showing noisy neighbours: {:.2f}%'. format(100 * sum(total_num_noise_neis) / (args_K * args_test_samples * data.num_classes))) perc = 100 * sum(total_num_noise_neis) / ( args_test_samples * args_num_noise_nodes * data.num_classes) perc2 = 100 * ( (args_K * args_test_samples * data.num_classes) - sum(total_num_noise_neis)) / (np.sum(M) - sum(total_num_noisy_nei)) print( 'Proportion of noisy neighbours found in explanations vs normal features: {:.2f}% vs {:.2f}' .format(perc, perc2)) print('Proportion of nodes in subgraph that are noisy: {:.2f}%'.format( 100 * sum(total_num_noisy_nei) / sum(total_neigbours))) print('Proportion of noisy neighbours among features: {:.2f}%'.format( 100 * sum(total_num_noisy_nei) / np.sum(M))) # Plot of kernel density estimates of number of noisy features included in explanation # Do for all benchmarks (with diff colors) and plt.show() to get on the same graph plot_dist(total_num_noise_neis, label='GraphSHAP', color='g') #plt.show() return total_num_noise_neis
def filter_useless_nodes_multiclass(args_dataset, args_model, args_explainers, args_hops, args_num_samples, args_test_samples, args_prop_noise_nodes, args_connectedness, node_indices, args_K, info, args_hv, args_feat, args_coal, args_g, args_multiclass, args_regu, args_gpu, args_fullempty, args_S, seed): """ Add noisy neighbours to dataset and check how many are included in explanations The fewest, the better the explainer. Args: Arguments defined in argument parser of script_eval.py """ # Define dataset data = prepare_data(args_dataset, seed=10) args_num_noise_nodes = int(args_prop_noise_nodes * data.x.size(0)) args_c = eval('EVAL1_' + data.name)['args_c'] args_p = eval('EVAL1_' + data.name)['args_p'] args_binary = eval('EVAL1_' + data.name)['args_binary'] # Select a random subset of nodes to eval the explainer on. if not node_indices: node_indices = extract_test_nodes(data, args_test_samples, seed) # Add noisy neighbours to the graph, with random features data = add_noise_neighbours(data, args_num_noise_nodes, node_indices, binary=args_binary, p=args_p, connectedness=args_connectedness) # Define training parameters depending on (model-dataset) couple hyperparam = ''.join(['hparams_', args_dataset, '_', args_model]) param = ''.join(['params_', args_dataset, '_', args_model]) # Define the model if args_model == 'GCN': model = GCN(input_dim=data.x.size( 1), output_dim=data.num_classes, **eval(hyperparam)) else: model = GAT(input_dim=data.x.size( 1), output_dim=data.num_classes, **eval(hyperparam)) # Re-train the model on dataset with noisy features train_and_val(model, data, **eval(param)) model.eval() with torch.no_grad(): log_logits = model(x=data.x, edge_index=data.edge_index) # [2708, 7] test_acc = accuracy(log_logits[data.test_mask], data.y[data.test_mask]) print('Test accuracy is {:.4f}'.format(test_acc)) del log_logits # Study attention weights of noisy nodes in GAT model - compare attention with explanations if str(type(model)) == "<class 'src.models.GAT'>": study_attention_weights(data, model, args_test_samples) # Adaptable K - top k explanations we look at for each node # Depends on number of existing features/neighbours considered for GraphSVX # if 'GraphSVX' in args_explainers: # K = [] # else: # K = [5]*len(node_indices) # Do for several explainers for c, explainer_name in enumerate(args_explainers): print('EXPLAINER: ', explainer_name) # Define the explainer explainer = eval(explainer_name)(data, model, args_gpu) # Loop on each test sample and store how many times do noisy nodes appear among # K most influential features in our explanations # 1 el per test sample - count number of noisy nodes in explanations total_num_noise_neis = [] # 1 el per test sample - count number of noisy nodes in explanations for 1 class pred_class_num_noise_neis = [] # 1 el per test sample - count number of noisy nodes in subgraph total_num_noisy_nei = [] total_neigbours = [] # 1 el per test samples - number of neigbours of v in subgraph M = [] # 1 el per test sample - number of non zero features for node_idx in tqdm(node_indices, desc='explain node', leave=False): # Look only at coefficients for nodes (not node features) if explainer_name == 'Greedy': coefs = explainer.explain_nei(node_index=node_idx, hops=args_hops, num_samples=args_num_samples, info=False, multiclass=True) elif explainer_name == 'GNNExplainer': _ = explainer.explain(node_index=node_idx, hops=args_hops, num_samples=args_num_samples, info=False, multiclass=True) coefs = explainer.coefs else: # Explanations via GraphSVX coefs = explainer.explain([node_idx], args_hops, args_num_samples, info, args_multiclass, args_fullempty, args_S, args_hv, args_feat, args_coal, args_g, args_regu) coefs = coefs[0].T[explainer.F:] # if explainer.F > 50: # K.append(10) # else: # K.append(int(explainer.F * args_K)) # Check how many non zero features M.append(explainer.M) # Number of noisy nodes in the subgraph of node_idx num_noisy_nodes = len( [n_idx for n_idx in explainer.neighbours if n_idx >= data.x.size(0)-args_num_noise_nodes]) # Number of neighbours in the subgraph total_neigbours.append(len(explainer.neighbours)) # Multilabel classification - consider all classes instead of focusing on the # class that is predicted by our model num_noise_neis = [] # one element for each class of a test sample true_conf, predicted_class = model(x=data.x, edge_index=data.edge_index).exp()[ node_idx].max(dim=0) for i in range(data.num_classes): # Store indexes of K most important features, for each class nei_indices = np.abs(coefs[:, i]).argsort()[-args_K:].tolist() # Number of noisy features that appear in explanations - use index to spot them num_noise_nei = sum( idx >= (explainer.neighbours.shape[0] - num_noisy_nodes) for idx in nei_indices) num_noise_neis.append(num_noise_nei) if i == predicted_class: #nei_indices = coefs[:,i].argsort()[-args_K:].tolist() #num_noise_nei = sum(idx >= (explainer.neighbours.shape[0] - num_noisy_nodes) for idx in nei_indices) pred_class_num_noise_neis.append(num_noise_nei) # Return this number => number of times noisy neighbours are provided as explanations total_num_noise_neis.append(sum(num_noise_neis)) # Return number of noisy nodes adjacent to node of interest total_num_noisy_nei.append(num_noisy_nodes) if info: print('Noisy neighbours included in explanations: ', total_num_noise_neis) print('There are {} noise neighbours found in the explanations of {} test samples, an average of {} per sample' .format(sum(total_num_noise_neis), args_test_samples, sum(total_num_noise_neis)/args_test_samples)) print(np.sum(pred_class_num_noise_neis) / args_test_samples, 'for the predicted class only') print('Proportion of explanations showing noisy neighbours: {:.2f}%'.format( 100 * sum(total_num_noise_neis) / (args_K * args_test_samples * data.num_classes))) perc = 100 * sum(total_num_noise_neis) / (args_test_samples * args_num_noise_nodes * data.num_classes) perc2 = 100 * ((args_K * args_test_samples * data.num_classes) - sum(total_num_noise_neis)) / (np.sum(M) - sum(total_num_noisy_nei)) print('Proportion of noisy neighbours found in explanations vs normal features: {:.2f}% vs {:.2f}'.format( perc, perc2)) print('Proportion of nodes in subgraph that are noisy: {:.2f}%'.format( 100 * sum(total_num_noisy_nei) / sum(total_neigbours))) print('Proportion of noisy neighbours in subgraph found in explanations: {:.2f}%'.format( 100 * sum(total_num_noise_neis) / (sum(total_num_noisy_nei) * data.num_classes))) # Plot of kernel density estimates of number of noisy features included in explanation # Do for all benchmarks (with diff colors) and plt.show() to get on the same graph total_num_noise_neis = [item/data.num_classes for item in total_num_noise_neis] plot_dist(total_num_noise_neis, label=explainer_name, color=COLOURS[c]) # else: # consider only predicted class # plot_dist(pred_class_num_noise_neis, # label=explainer_name, color=COLOURS[c]) # Random explainer - plot estimated kernel density total_num_noise_neis = noise_nodes_for_random( data, model, args_K, args_num_noise_nodes, node_indices) total_num_noise_neis= [item/data.num_classes for item in total_num_noise_neis] plot_dist(total_num_noise_neis, label='Random', color='y') plt.savefig('results/eval1_node_{}'.format(data.name)) #plt.show() return total_num_noise_neis
def filter_useless_features_multiclass(args_dataset, args_model, args_explainers, args_hops, args_num_samples, args_test_samples, args_prop_noise_nodes, args_connectedness, node_indices, args_K, info, args_hv, args_feat, args_coal, args_g, args_multiclass, args_regu, args_gpu, args_fullempty, args_S, seed): """ Add noisy features to dataset and check how many are included in explanations The fewest, the better the explainer. Args: Arguments defined in argument parser of script_eval.py """ # Define dataset - include noisy features data = prepare_data(args_dataset, seed=10) args_num_noise_feat = int(data.x.size(1) * args_prop_noise_feat) args_p = eval('EVAL1_' + data.name)['args_p'] args_binary = eval('EVAL1_' + data.name)['args_binary'] data, noise_feat = add_noise_features( data, num_noise=args_num_noise_feat, binary=args_binary, p=args_p) # Define training parameters depending on (model-dataset) couple hyperparam = ''.join(['hparams_', args_dataset, '_', args_model]) param = ''.join(['params_', args_dataset, '_', args_model]) # Define the model if args_model == 'GCN': model = GCN(input_dim=data.x.size( 1), output_dim=data.num_classes, **eval(hyperparam)) else: model = GAT(input_dim=data.x.size( 1), output_dim=data.num_classes, **eval(hyperparam)) # Re-train the model on dataset with noisy features train_and_val(model, data, **eval(param)) # Select random subset of nodes to eval the explainer on. if not node_indices: node_indices = extract_test_nodes(data, args_test_samples, seed) # Evaluate the model - test set model.eval() with torch.no_grad(): log_logits = model(x=data.x, edge_index=data.edge_index) # [2708, 7] test_acc = accuracy(log_logits[data.test_mask], data.y[data.test_mask]) print('Test accuracy is {:.4f}'.format(test_acc)) del log_logits # Loop on different explainers selected for c, explainer_name in enumerate(args_explainers): # Define explainer explainer = eval(explainer_name)(data, model, args_gpu) # count noisy features found in explanations for each test sample (for each class) total_num_noise_feats = [] # count noisy features found in explanations for each test sample for class of interest pred_class_num_noise_feats = [] # count number of noisy features considered for each test sample (for each class) total_num_noise_feat_considered = [] F = [] # count number of non zero features for each test sample # Loop on each test sample and store how many times do noise features appear among # K most influential features in our explanations for node_idx in tqdm(node_indices, desc='explain node', leave=False): if explainer_name == 'GraphSVX': coefs = explainer.explain( [node_idx], args_hops, args_num_samples, info, args_multiclass, args_fullempty, args_S, args_hv, args_feat, args_coal, args_g, args_regu) coefs = coefs[0].T[:explainer.F] # Explanations via GraphSVX else: coefs = explainer.explain(node_index=node_idx, hops=args_hops, num_samples=args_num_samples, info=False, multiclass=True) # Check how many non zero features F.append(explainer.F) # Number of non zero noisy features # Dfferent for explainers with all features considered vs non zero features only (shap,graphshap) # if explainer.F != data.x.size(1) if explainer_name == 'GraphSVX' or explainer_name == 'SHAP': num_noise_feat_considered = len( [val for val in noise_feat[node_idx] if val != 0]) else: num_noise_feat_considered = args_num_noise_feat # Multilabel classification - consider all classes instead of focusing on the # class that is predicted by our model num_noise_feats = [] true_conf, predicted_class = model(x=data.x, edge_index=data.edge_index).exp()[ node_idx].max(dim=0) for i in range(data.num_classes): # Store indexes of K most important node features, for each class feat_indices = np.abs( coefs[:explainer.F, i]).argsort()[-args_K:].tolist() # Number of noisy features that appear in explanations - use index to spot them num_noise_feat = sum( idx < num_noise_feat_considered for idx in feat_indices) num_noise_feats.append(num_noise_feat) # For predicted class only if i == predicted_class: pred_class_num_noise_feats.append(num_noise_feat) # Return number of times noisy features are provided as explanations total_num_noise_feats.append(sum(num_noise_feats)) # Return number of noisy features considered in this test sample total_num_noise_feat_considered.append(num_noise_feat_considered) if info: print('Noise features included in explanations: ', total_num_noise_feats) print('There are {} noise features found in the explanations of {} test samples, an average of {} per sample' .format(sum(total_num_noise_feats), args_test_samples, sum(total_num_noise_feats)/args_test_samples)) # Number of noisy features found in explanation for the predicted class print(np.sum(pred_class_num_noise_feats) / args_test_samples, 'for the predicted class only') perc = 100 * sum(total_num_noise_feat_considered) / np.sum(F) print( 'Proportion of non-zero noisy features among non-zero features: {:.2f}%'.format(perc)) perc = 100 * sum(total_num_noise_feats) / \ (args_K * args_test_samples * data.num_classes) print( 'Proportion of explanations showing noisy features: {:.2f}%'.format(perc)) if sum(total_num_noise_feat_considered) != 0: perc = 100 * sum(total_num_noise_feats) / \ (sum(total_num_noise_feat_considered)*data.num_classes) perc2 = 100 * (args_K * args_test_samples * data.num_classes - sum(total_num_noise_feats)) / ( data.num_classes * (sum(F) - sum(total_num_noise_feat_considered))) print('Proportion of noisy features found in explanations vs normal features (among considered ones): {:.2f}% vs {:.2f}%, over considered features only'.format( perc, perc2)) print('------------------------------------') # Plot of kernel density estimates of number of noisy features included in explanation # Do for all benchmarks (with diff colors) and plt.show() to get on the same graph # plot_dist(total_num_noise_feats, label=explainer_name, color=COLOURS[c]) plot_dist(total_num_noise_feats, label=explainer_name, color=COLOURS[c]) # Random explainer - plot estimated kernel density total_num_noise_feats = noise_feats_for_random( data, model, args_K, args_num_noise_feat, node_indices) save_path = 'results/eval1_feat' plot_dist(total_num_noise_feats, label='Random', color='y') plt.savefig('results/eval1_feat_{}'.format(data.name))