예제 #1
0
def filter_useless_nodes(args_dataset,
                         args_model,
                         args_explainers,
                         args_hops,
                         args_num_samples,
                         args_test_samples,
                         args_K,
                         args_prop_noise_nodes,
                         args_connectedness,
                         node_indices,
                         info,
                         args_hv,
                         args_feat,
                         args_coal,
                         args_g,
                         args_multiclass,
                         args_regu,
                         args_gpu,
                         args_fullempty,
                         args_S,
                         seed):
    """ Add noisy neighbours to dataset and check how many are included in explanations
    The fewest, the better the explainer.

    Args:
        Arguments defined in argument parser of script_eval.py
    
    """

    # Define dataset
    data = prepare_data(args_dataset, seed=seed)

    # Select a random subset of nodes to eval the explainer on.
    if not node_indices:
        node_indices = extract_test_nodes(data, args_test_samples, seed)
    
    # Define number of noisy nodes according to dataset size
    args_num_noise_nodes = int(args_prop_noise_nodes * data.x.size(0))
    args_c = eval('EVAL1_' + data.name)['args_c']
    args_p = eval('EVAL1_' + data.name)['args_p']
    args_binary = eval('EVAL1_' + data.name)['args_binary']

    # Add noisy neighbours to the graph, with random features
    data = add_noise_neighbours(data, args_num_noise_nodes, node_indices,
                                binary=args_binary, p=args_p, 
                                connectedness=args_connectedness, c=args_c)

    # Define training parameters depending on (model-dataset) couple
    hyperparam = ''.join(['hparams_', args_dataset, '_', args_model])
    param = ''.join(['params_', args_dataset, '_', args_model])

    # Define the model
    if args_model == 'GCN':
        model = GCN(input_dim=data.x.size(
            1), output_dim=data.num_classes, **eval(hyperparam))
    else:
        model = GAT(input_dim=data.x.size(
            1), output_dim=data.num_classes, **eval(hyperparam))

    # Re-train the model on dataset with noisy features
    train_and_val(model, data, **eval(param))
    
    # Evaluate model
    model.eval()
    with torch.no_grad():
        log_logits = model(x=data.x, edge_index=data.edge_index)  # [2708, 7]
    test_acc = accuracy(log_logits[data.test_mask], data.y[data.test_mask])
    print('Test accuracy is {:.4f}'.format(test_acc))

    # Derive predicted class for each test node
    with torch.no_grad():
        true_confs, predicted_classes = log_logits.exp()[node_indices].max(dim=1)
    del log_logits

    if args_regu == 1:
        args_regu = 0

    # Study attention weights of noisy nodes in GAT model - compare attention with explanations
    if str(type(model)) == "<class 'src.models.GAT'>":
        study_attention_weights(data, model, args_test_samples)

    # Do for several explainers
    for c, explainer_name in enumerate(args_explainers):

        print('EXPLAINER: ', explainer_name)	
        # Define the explainer
        explainer = eval(explainer_name)(data, model, args_gpu)

        # Loop on each test sample and store how many times do noisy nodes appear among
        # K most influential features in our explanations
        # count number of noisy nodes in explanations 
        pred_class_num_noise_neis = []
        # count number of noisy nodes in subgraph
        total_num_noisy_nei = []
        # Number of neigbours of v in subgraph
        total_neigbours = []  
        # Stores number of most important neighbours we look at, for each node 
        K = []
        # To retrieve the predicted class
        j = 0
        for node_idx in tqdm(node_indices, desc='explain node', leave=False):

            # Look only at coefficients for nodes (not node features)
            if explainer_name == 'Greedy':
                coefs = explainer.explain_nei(node_idx,
                                              args_hops,
                                              args_num_samples)

            elif explainer_name == 'GNNExplainer':
                _ = explainer.explain(node_idx,
                                      args_hops,
                                      args_num_samples)
                coefs = explainer.coefs

            else:
                # Explanations via GraphSVX
                coefs = explainer.explain([node_idx],
                                          args_hops,
                                          args_num_samples,
                                          info,
                                          args_multiclass,
                                          args_fullempty,
                                          args_S,
                                          args_hv,
                                          args_feat,
                                          args_coal,
                                          args_g,
                                          args_regu)
                coefs = coefs[0].T[explainer.F:]

            # Number of noisy nodes in the subgraph of node_idx
            num_noisy_nodes = len(
                [n_idx for n_idx in explainer.neighbours if n_idx >= data.x.size(0)-args_num_noise_nodes])

            # Number of neighbours in the subgraph
            total_neigbours.append(len(explainer.neighbours))

            # Adaptable K - vary according to number of nodes in the subgraph
            if len(explainer.neighbours) > 100:
                K.append(int(args_K * 100))
            else:
                K.append( max(1, int(args_K * len(explainer.neighbours))) )

            # Store indexes of K most important features, for each class
            nei_indices = coefs.argsort()[-K[j]:].tolist()

            # Number of noisy features that appear in explanations - use index to spot them
            noise_nei = [idx for idx in nei_indices if idx > (explainer.neighbours.shape[0] - num_noisy_nodes)]

            # If node importance of top K neighbours is unsignificant, discard 
            # Possible as we have importance informative measure, unlike others.
            if explainer_name == 'GraphSVX':
                explainable_part = true_confs[c] - \
                                explainer.base_values[c]
                noise_nei = [idx for idx in noise_nei if np.abs(coefs[idx]) > 0.05*np.abs(explainable_part)]
            
            num_noise_nei = len(noise_nei)
            pred_class_num_noise_neis.append(num_noise_nei)

            # Return number of noisy nodes adjacent to node of interest
            total_num_noisy_nei.append(num_noisy_nodes)

            j += 1

        print('Noisy neighbours included in explanations: ',
                        pred_class_num_noise_neis)

        print('There are {} noise neighbours found in the explanations of {} test samples, an average of {} per sample'
                        .format(sum(pred_class_num_noise_neis), args_test_samples, sum(pred_class_num_noise_neis)/args_test_samples))

        print('Proportion of explanations showing noisy neighbours: {:.2f}%'.format(
            100 * sum(pred_class_num_noise_neis) / sum(K)))

        perc = 100 * sum(pred_class_num_noise_neis) / (sum(total_num_noisy_nei))
        perc2 = 100 * (sum(K) - sum(pred_class_num_noise_neis)) \
        / (sum(total_neigbours) - sum(total_num_noisy_nei))
        print('Proportion of noisy neighbours found in explanations vs normal neighbours (in subgraph): {:.2f}% vs {:.2f}'.format(
            perc, perc2))

        print('Proportion of nodes in subgraph that are noisy: {:.2f}%'.format(
            100 * sum(total_num_noisy_nei) / sum(total_neigbours)))

        print('Proportion of noisy neighbours found in explanations (entire graph): {:.2f}%'.format(
            100 * sum(pred_class_num_noise_neis) / (args_test_samples * args_num_noise_nodes)))
        
        print('------------------------------------')

        # Plot of kernel density estimates of number of noisy features included in explanation
        # Do for all benchmarks (with diff colors) and plt.show() to get on the same graph
        plot_dist(pred_class_num_noise_neis,
                    label=explainer_name, color=COLOURS[c])

    # Random explainer - plot estimated kernel density
    total_num_noise_neis = noise_nodes_for_random(
        data, model, K, node_indices, total_num_noisy_nei, total_neigbours)
    plot_dist(total_num_noise_neis, label='Random',
              color='y')
    
    # Store graph - with key params and time
    now = datetime.now()
    current_time = now.strftime("%H:%M:%S")
    plt.savefig('results/eval1_node_{}_{}_{}_{}_{}.pdf'.format(data.name,
                                                           args_coal, 
                                                           args_feat, 
                                                           args_hv, 
                                                           current_time))
    plt.close()
    #plt.show()

    return total_num_noise_neis
예제 #2
0
def filter_useless_nodes_multiclass(args_dataset,
                                    args_model,
                                    args_explainers,
                                    args_hops,
                                    args_num_samples,
                                    args_test_samples,
                                    args_prop_noise_nodes,
                                    args_connectedness,
                                    node_indices,
                                    args_K,
                                    info,
                                    args_hv,
                                    args_feat,
                                    args_coal,
                                    args_g,
                                    args_multiclass,
                                    args_regu,
                                    args_gpu,
                                    args_fullempty,
                                    args_S, 
                                    seed):
    """ Add noisy neighbours to dataset and check how many are included in explanations
    The fewest, the better the explainer.

    Args:
        Arguments defined in argument parser of script_eval.py
    
    """

    # Define dataset
    data = prepare_data(args_dataset, seed=10)
    args_num_noise_nodes = int(args_prop_noise_nodes * data.x.size(0))
    args_c = eval('EVAL1_' + data.name)['args_c']
    args_p = eval('EVAL1_' + data.name)['args_p']
    args_binary = eval('EVAL1_' + data.name)['args_binary']

    # Select a random subset of nodes to eval the explainer on.
    if not node_indices:
        node_indices = extract_test_nodes(data, args_test_samples, seed)

    # Add noisy neighbours to the graph, with random features
    data = add_noise_neighbours(data, args_num_noise_nodes, node_indices,
                                binary=args_binary, p=args_p, connectedness=args_connectedness)

    # Define training parameters depending on (model-dataset) couple
    hyperparam = ''.join(['hparams_', args_dataset, '_', args_model])
    param = ''.join(['params_', args_dataset, '_', args_model])

    # Define the model
    if args_model == 'GCN':
        model = GCN(input_dim=data.x.size(
            1), output_dim=data.num_classes, **eval(hyperparam))
    else:
        model = GAT(input_dim=data.x.size(
            1), output_dim=data.num_classes, **eval(hyperparam))

    # Re-train the model on dataset with noisy features
    train_and_val(model, data, **eval(param))

    model.eval()
    with torch.no_grad():
        log_logits = model(x=data.x, edge_index=data.edge_index)  # [2708, 7]
    test_acc = accuracy(log_logits[data.test_mask], data.y[data.test_mask])
    print('Test accuracy is {:.4f}'.format(test_acc))
    del log_logits

    # Study attention weights of noisy nodes in GAT model - compare attention with explanations
    if str(type(model)) == "<class 'src.models.GAT'>":
        study_attention_weights(data, model, args_test_samples)
    
    # Adaptable K - top k explanations we look at for each node
    # Depends on number of existing features/neighbours considered for GraphSVX
    # if 'GraphSVX' in args_explainers:
    # 	K = []
    # else:
    # 	K = [5]*len(node_indices)

    # Do for several explainers
    for c, explainer_name in enumerate(args_explainers):
        
        print('EXPLAINER: ', explainer_name)
        # Define the explainer
        explainer = eval(explainer_name)(data, model, args_gpu)

        # Loop on each test sample and store how many times do noisy nodes appear among
        # K most influential features in our explanations
        # 1 el per test sample - count number of noisy nodes in explanations
        total_num_noise_neis = []
        # 1 el per test sample - count number of noisy nodes in explanations for 1 class
        pred_class_num_noise_neis = []
        # 1 el per test sample - count number of noisy nodes in subgraph
        total_num_noisy_nei = []
        total_neigbours = []  # 1 el per test samples - number of neigbours of v in subgraph
        M = []  # 1 el per test sample - number of non zero features
        for node_idx in tqdm(node_indices, desc='explain node', leave=False):

            # Look only at coefficients for nodes (not node features)
            if explainer_name == 'Greedy':
                coefs = explainer.explain_nei(node_index=node_idx,
                                              hops=args_hops,
                                              num_samples=args_num_samples,
                                              info=False,
                                              multiclass=True)

            elif explainer_name == 'GNNExplainer':
                _ = explainer.explain(node_index=node_idx,
                                      hops=args_hops,
                                      num_samples=args_num_samples,
                                      info=False,
                                      multiclass=True)
                coefs = explainer.coefs

            else:
                # Explanations via GraphSVX
                coefs = explainer.explain([node_idx],
                                          args_hops,
                                          args_num_samples,
                                          info,
                                          args_multiclass,
                                          args_fullempty,
                                          args_S,
                                          args_hv,
                                          args_feat,
                                          args_coal,
                                          args_g,
                                          args_regu)
                coefs = coefs[0].T[explainer.F:]
            
            # if explainer.F > 50:
            # 	K.append(10)
            # else:
            # 	K.append(int(explainer.F * args_K))

            # Check how many non zero features
            M.append(explainer.M)

            # Number of noisy nodes in the subgraph of node_idx
            num_noisy_nodes = len(
                [n_idx for n_idx in explainer.neighbours if n_idx >= data.x.size(0)-args_num_noise_nodes])

            # Number of neighbours in the subgraph
            total_neigbours.append(len(explainer.neighbours))

            # Multilabel classification - consider all classes instead of focusing on the
            # class that is predicted by our model
            num_noise_neis = []  # one element for each class of a test sample
            true_conf, predicted_class = model(x=data.x, edge_index=data.edge_index).exp()[
                node_idx].max(dim=0)

            for i in range(data.num_classes):

                # Store indexes of K most important features, for each class
                nei_indices = np.abs(coefs[:, i]).argsort()[-args_K:].tolist()

                # Number of noisy features that appear in explanations - use index to spot them
                num_noise_nei = sum(
                    idx >= (explainer.neighbours.shape[0] - num_noisy_nodes) for idx in nei_indices)
                num_noise_neis.append(num_noise_nei)

                if i == predicted_class:
                    #nei_indices = coefs[:,i].argsort()[-args_K:].tolist()
                    #num_noise_nei = sum(idx >= (explainer.neighbours.shape[0] - num_noisy_nodes) for idx in nei_indices)
                    pred_class_num_noise_neis.append(num_noise_nei)

            # Return this number => number of times noisy neighbours are provided as explanations
            total_num_noise_neis.append(sum(num_noise_neis))
            # Return number of noisy nodes adjacent to node of interest
            total_num_noisy_nei.append(num_noisy_nodes)

        if info:
            print('Noisy neighbours included in explanations: ',
                  total_num_noise_neis)

            print('There are {} noise neighbours found in the explanations of {} test samples, an average of {} per sample'
                  .format(sum(total_num_noise_neis), args_test_samples, sum(total_num_noise_neis)/args_test_samples))

            print(np.sum(pred_class_num_noise_neis) /
                  args_test_samples, 'for the predicted class only')

            print('Proportion of explanations showing noisy neighbours: {:.2f}%'.format(
                100 * sum(total_num_noise_neis) / (args_K * args_test_samples * data.num_classes)))

            perc = 100 * sum(total_num_noise_neis) / (args_test_samples *
                                                      args_num_noise_nodes * data.num_classes)
            perc2 = 100 * ((args_K * args_test_samples * data.num_classes) -
                           sum(total_num_noise_neis)) / (np.sum(M) - sum(total_num_noisy_nei))
            print('Proportion of noisy neighbours found in explanations vs normal features: {:.2f}% vs {:.2f}'.format(
                perc, perc2))

            print('Proportion of nodes in subgraph that are noisy: {:.2f}%'.format(
                100 * sum(total_num_noisy_nei) / sum(total_neigbours)))

            print('Proportion of noisy neighbours in subgraph found in explanations: {:.2f}%'.format(
                100 * sum(total_num_noise_neis) / (sum(total_num_noisy_nei) * data.num_classes)))

        # Plot of kernel density estimates of number of noisy features included in explanation
        # Do for all benchmarks (with diff colors) and plt.show() to get on the same graph
        total_num_noise_neis = [item/data.num_classes for item in total_num_noise_neis]
        plot_dist(total_num_noise_neis,
                    label=explainer_name, color=COLOURS[c])
        # else:  # consider only predicted class
        # 	plot_dist(pred_class_num_noise_neis,
        # 			  label=explainer_name, color=COLOURS[c])

    # Random explainer - plot estimated kernel density
    total_num_noise_neis = noise_nodes_for_random(
        data, model, args_K, args_num_noise_nodes, node_indices)
    
    total_num_noise_neis= [item/data.num_classes for item in total_num_noise_neis]
    plot_dist(total_num_noise_neis, label='Random',
              color='y')

    plt.savefig('results/eval1_node_{}'.format(data.name))
    #plt.show()

    return total_num_noise_neis
예제 #3
0
def filter_useless_features(args_dataset,
                            args_model,
                            args_explainers,
                            args_hops,
                            args_num_samples,
                            args_test_samples,
                            args_K,
                            args_prop_noise_feat,
                            node_indices,
                            info,
                            args_hv,
                            args_feat,
                            args_coal,
                            args_g,
                            args_multiclass,
                            args_regu,
                            args_gpu,
                            args_fullempty,
                            args_S, 
                            seed):
    """ Add noisy features to dataset and check how many are included in explanations
    The fewest, the better the explainer.

    Args:
        Arguments defined in argument parser of script_eval.py
    
    """

    # Define dataset 
    data = prepare_data(args_dataset, seed=seed)
    args_num_noise_feat = int(data.x.size(1) * args_prop_noise_feat)
    args_p = eval('EVAL1_' + data.name)['args_p']
    args_binary = eval('EVAL1_' + data.name)['args_binary']

    # Include noisy neighbours
    data, noise_feat = add_noise_features(
        data, num_noise=args_num_noise_feat, binary=args_binary, p=args_p)

    # Define training parameters depending on (model-dataset) couple
    hyperparam = ''.join(['hparams_', args_dataset, '_', args_model])
    param = ''.join(['params_', args_dataset, '_', args_model])

    # Define the model
    if args_model == 'GCN':
        model = GCN(input_dim=data.x.size(
            1), output_dim=data.num_classes, **eval(hyperparam))
    else:
        model = GAT(input_dim=data.x.size(
            1), output_dim=data.num_classes,  **eval(hyperparam))

    # Re-train the model on dataset with noisy features
    train_and_val(model, data, **eval(param))

    # Select random subset of nodes to eval the explainer on.
    if not node_indices:
        node_indices = extract_test_nodes(data, args_test_samples, seed)
    
    # Evaluate the model on test set
    model.eval()
    with torch.no_grad():
        log_logits = model(x=data.x, edge_index=data.edge_index)  
    test_acc = accuracy(log_logits[data.test_mask], data.y[data.test_mask])
    print('Test accuracy is {:.4f}'.format(test_acc))

    # Derive predicted class for each test sample
    with torch.no_grad():
        true_confs, predicted_classes = log_logits.exp()[node_indices].max(dim=1)
    del log_logits

    # Adaptable K - top k explanations we look at for each node
    # Depends on number of existing features considered for GraphSVX
    if 'GraphSVX' in args_explainers:
        K = []
    else:
        K = [10]*len(node_indices)
    #for node_idx in node_indices:
    #	K.append(int(data.x[node_idx].nonzero().shape[0] * args_K))

    if args_regu == 0:
        args_regu = 1

    # Loop on the different explainers selected
    for c, explainer_name in enumerate(args_explainers):
        
        # Define explainer
        explainer = eval(explainer_name)(data, model, args_gpu)
        print('EXPLAINER: ', explainer_name)

        # count noisy features found in explanations 
        pred_class_num_noise_feats = []
        # count number of noisy features considered
        total_num_noise_feat_considered = []
        # count number of features   
        F = []

        # Loop on each test sample and store how many times do noise features appear among
        # K most influential features in our explanations
        j=0
        for node_idx in tqdm(node_indices, desc='explain node', leave=False):
            
            # Explanations via GraphSVX
            if explainer_name == 'GraphSVX':
                coefs = explainer.explain(
                                [node_idx],
                                args_hops,
                                args_num_samples,
                                info,
                                args_multiclass,
                                args_fullempty,
                                args_S,
                                args_hv,
                                args_feat,
                                args_coal,
                                args_g,
                                args_regu,
                                )
                # Look only at features coefficients 
                # Neighbours are irrelevant here
                coefs = coefs[0][:explainer.F]
                
                # Adaptable K
                if explainer.F > 100:
                    K.append(int(args_K * 100))
                else:
                    K.append( max(1, int(explainer.F * args_K)) )

                # Num_features_considered
                if args_feat == 'Null':
                    feat_idx = noise_feat[explainer.neighbours, :].mean(axis=0).nonzero()
                    num_noise_feat_considered = feat_idx.size()[0]

                # Consider all features (+ use expectation like below)
                elif args_feat == 'All':
                    num_noise_feat_considered = args_num_noise_feat

                # Consider only features whose aggregated value is different from expected one
                else:
                    # Stats dataset
                    var = noise_feat.std(axis=0)
                    mean = noise_feat.mean(axis=0)
                    # Feature intermediate rep
                    mean_subgraph = noise_feat[explainer.neighbours, :].mean(axis=0)
                    # Select relevant features only - (E-e,E+e)
                    mean_subgraph = torch.where(mean_subgraph > mean - 0.25*var, mean_subgraph,
                                        torch.ones_like(mean_subgraph)*100)
                    mean_subgraph = torch.where(mean_subgraph < mean + 0.25*var, mean_subgraph,
                                        torch.ones_like(mean_subgraph)*100)
                    feat_idx = (mean_subgraph == 100).nonzero()
                    num_noise_feat_considered = feat_idx.shape[0]
                    del mean, mean_subgraph, var
                
            else:
                coefs = explainer.explain(node_idx,
                                        args_hops,
                                        args_num_samples,
                                        info=False,
                                        multiclass=False
                                        )[:explainer.F]
                # All features are considered
                num_noise_feat_considered = args_num_noise_feat

            # Features considered 
            F.append(explainer.F)

            # Store indexes of K most important node features, for each class
            feat_indices = coefs.argsort()[-K[j]:].tolist()

            # Number of noisy features that appear in explanations - use index to spot them
            num_noise_feat = [idx for idx in feat_indices if idx > (explainer.F - num_noise_feat_considered)]

            # If node importance of top K features is unsignificant, discard 
            # Possible as we have importance informative measure, unlike others.
            if explainer_name == 'GraphSVX':
                explainable_part = true_confs[c] - \
                                explainer.base_values[c]
                num_noise_feat = [idx for idx in num_noise_feat if np.abs(coefs[idx]) > 0.05*np.abs(explainable_part)]
            
            # Count number of noisy that appear in explanations
            num_noise_feat = len(num_noise_feat)
            pred_class_num_noise_feats.append(num_noise_feat)

            # Return number of noisy features considered in this test sample
            total_num_noise_feat_considered.append(num_noise_feat_considered)

            j+=1

        print('Noisy features included in explanations: ',
                        sum(pred_class_num_noise_feats) )
        print('For the predicted class, there are {} noise features found in the explanations of {} test samples, an average of {} per sample'
                        .format(sum(pred_class_num_noise_feats), args_test_samples, sum(pred_class_num_noise_feats)/args_test_samples))

        print(pred_class_num_noise_feats)

        if sum(F) != 0:
            perc = 100 * sum(total_num_noise_feat_considered) / sum(F)
            print(
                'Proportion of considered noisy features among features: {:.2f}%'.format(perc))
        if sum(K) != 0:
            perc = 100 * sum(pred_class_num_noise_feats) / sum(K)
            print('Proportion of explanations showing noisy features: {:.2f}%'.format(perc))

        if sum(total_num_noise_feat_considered) != 0:
            perc = 100 * sum(pred_class_num_noise_feats) / (sum(total_num_noise_feat_considered))
            perc2 = 100 * (sum(K) - sum(pred_class_num_noise_feats)) / (sum(F) - sum(total_num_noise_feat_considered)) 
            print('Proportion of noisy features found in explanations vs proportion of normal features (among considered ones): {:.2f}% vs {:.2f}%, over considered features only'.format(
                perc, perc2))

        print('------------------------------------')

        # Plot of kernel density estimates of number of noisy features included in explanation
        # Do for all benchmarks (with diff colors) and plt.show() to get on the same graph
        plot_dist(pred_class_num_noise_feats, 
                    label=explainer_name, color=COLOURS[c])

    # Random explainer - plot estimated kernel density
    total_num_noise_feats = noise_feats_for_random(
        data, model, K, args_num_noise_feat, node_indices)
    save_path = 'results/eval1_feat'
    plot_dist(total_num_noise_feats, label='Random', color='y')

    # Store graph - with key params and time
    now = datetime.now()
    current_time = now.strftime("%H:%M:%S")
    plt.savefig('results/eval1_feat_{}_{}_{}_{}_{}.pdf'.format(data.name,
                                                           args_coal, 
                                                           args_feat, 
                                                           args_hv, 
                                                           current_time))
    plt.close()
예제 #4
0
def filter_useless_features_multiclass(args_dataset,
                                       args_model,
                                       args_explainers,
                                       args_hops,
                                       args_num_samples,
                                       args_test_samples,
                                       args_prop_noise_nodes,
                                       args_connectedness,
                                       node_indices,
                                       args_K,
                                       info,
                                       args_hv,
                                       args_feat,
                                       args_coal,
                                       args_g,
                                       args_multiclass,
                                       args_regu,
                                       args_gpu,
                                       args_fullempty,
                                       args_S, 
                                       seed):
    """ Add noisy features to dataset and check how many are included in explanations
    The fewest, the better the explainer.

    Args:
        Arguments defined in argument parser of script_eval.py
    
    """

    # Define dataset - include noisy features
    data = prepare_data(args_dataset, seed=10)
    args_num_noise_feat = int(data.x.size(1) * args_prop_noise_feat)
    args_p = eval('EVAL1_' + data.name)['args_p']
    args_binary = eval('EVAL1_' + data.name)['args_binary']
    data, noise_feat = add_noise_features(
        data, num_noise=args_num_noise_feat, binary=args_binary, p=args_p)

    # Define training parameters depending on (model-dataset) couple
    hyperparam = ''.join(['hparams_', args_dataset, '_', args_model])
    param = ''.join(['params_', args_dataset, '_', args_model])

    # Define the model
    if args_model == 'GCN':
        model = GCN(input_dim=data.x.size(
            1), output_dim=data.num_classes, **eval(hyperparam))
    else:
        model = GAT(input_dim=data.x.size(
            1), output_dim=data.num_classes,  **eval(hyperparam))

    # Re-train the model on dataset with noisy features
    train_and_val(model, data, **eval(param))

    # Select random subset of nodes to eval the explainer on.
    if not node_indices:
        node_indices = extract_test_nodes(data, args_test_samples, seed)

    # Evaluate the model - test set
    model.eval()
    with torch.no_grad():
        log_logits = model(x=data.x, edge_index=data.edge_index)  # [2708, 7]
    test_acc = accuracy(log_logits[data.test_mask], data.y[data.test_mask])
    print('Test accuracy is {:.4f}'.format(test_acc))
    del log_logits

    # Loop on different explainers selected
    for c, explainer_name in enumerate(args_explainers):

        # Define explainer
        explainer = eval(explainer_name)(data, model, args_gpu)

        # count noisy features found in explanations for each test sample (for each class)
        total_num_noise_feats = []
        # count noisy features found in explanations for each test sample for class of interest
        pred_class_num_noise_feats = []
        # count number of noisy features considered for each test sample (for each class)
        total_num_noise_feat_considered = []
        F = []  # count number of non zero features for each test sample

        # Loop on each test sample and store how many times do noise features appear among
        # K most influential features in our explanations
        for node_idx in tqdm(node_indices, desc='explain node', leave=False):

            if explainer_name == 'GraphSVX':
                coefs = explainer.explain(
                    [node_idx],
                    args_hops,
                    args_num_samples,
                    info,
                    args_multiclass,
                    args_fullempty,
                    args_S,
                    args_hv,
                    args_feat,
                    args_coal,
                    args_g,
                    args_regu)
                coefs = coefs[0].T[:explainer.F]

            # Explanations via GraphSVX
            else:
                coefs = explainer.explain(node_index=node_idx,
                                        hops=args_hops,
                                        num_samples=args_num_samples,
                                        info=False, 
                                        multiclass=True)
            

            # Check how many non zero features
            F.append(explainer.F)

            # Number of non zero noisy features
            # Dfferent for explainers with all features considered vs non zero features only (shap,graphshap)
            # if explainer.F != data.x.size(1)
            if explainer_name == 'GraphSVX' or explainer_name == 'SHAP':
                num_noise_feat_considered = len(
                    [val for val in noise_feat[node_idx] if val != 0])
            else:
                num_noise_feat_considered = args_num_noise_feat

            # Multilabel classification - consider all classes instead of focusing on the
            # class that is predicted by our model
            num_noise_feats = []
            true_conf, predicted_class = model(x=data.x, edge_index=data.edge_index).exp()[
                node_idx].max(dim=0)

            for i in range(data.num_classes):

                # Store indexes of K most important node features, for each class
                feat_indices = np.abs(
                    coefs[:explainer.F, i]).argsort()[-args_K:].tolist()

                # Number of noisy features that appear in explanations - use index to spot them
                num_noise_feat = sum(
                    idx < num_noise_feat_considered for idx in feat_indices)
                num_noise_feats.append(num_noise_feat)

                # For predicted class only
                if i == predicted_class:
                    pred_class_num_noise_feats.append(num_noise_feat)

            # Return number of times noisy features are provided as explanations
            total_num_noise_feats.append(sum(num_noise_feats))

            # Return number of noisy features considered in this test sample
            total_num_noise_feat_considered.append(num_noise_feat_considered)

        if info:
            print('Noise features included in explanations: ',
                  total_num_noise_feats)
            print('There are {} noise features found in the explanations of {} test samples, an average of {} per sample'
                  .format(sum(total_num_noise_feats), args_test_samples, sum(total_num_noise_feats)/args_test_samples))

            # Number of noisy features found in explanation for the predicted class
            print(np.sum(pred_class_num_noise_feats) /
                  args_test_samples, 'for the predicted class only')

            perc = 100 * sum(total_num_noise_feat_considered) / np.sum(F)
            print(
                'Proportion of non-zero noisy features among non-zero features: {:.2f}%'.format(perc))

            perc = 100 * sum(total_num_noise_feats) / \
                (args_K * args_test_samples * data.num_classes)
            print(
                'Proportion of explanations showing noisy features: {:.2f}%'.format(perc))

            if sum(total_num_noise_feat_considered) != 0:
                perc = 100 * sum(total_num_noise_feats) / \
                    (sum(total_num_noise_feat_considered)*data.num_classes)
                perc2 = 100 * (args_K * args_test_samples * data.num_classes - sum(total_num_noise_feats)) / (
                    data.num_classes * (sum(F) - sum(total_num_noise_feat_considered)))
                print('Proportion of noisy features found in explanations vs normal features (among considered ones): {:.2f}% vs {:.2f}%, over considered features only'.format(
                    perc, perc2))

            print('------------------------------------')

        # Plot of kernel density estimates of number of noisy features included in explanation
        # Do for all benchmarks (with diff colors) and plt.show() to get on the same graph
        # plot_dist(total_num_noise_feats, label=explainer_name, color=COLOURS[c])
        plot_dist(total_num_noise_feats,
                    label=explainer_name, color=COLOURS[c])

    # Random explainer - plot estimated kernel density
    total_num_noise_feats = noise_feats_for_random(
        data, model, args_K, args_num_noise_feat, node_indices)
    save_path = 'results/eval1_feat'
    plot_dist(total_num_noise_feats, label='Random', color='y')

    plt.savefig('results/eval1_feat_{}'.format(data.name))