def syn_task1(args, writer=None):
    # data
    G, labels, name = gengraph.gen_syn1(
        feature_generator=featgen.ConstFeatureGen(
            np.ones(args.input_dim, dtype=float)))
    num_classes = max(labels) + 1

    if args.method == "att":
        print("Method: att")
        model = models.GcnEncoderNode(
            args.input_dim,
            args.hidden_dim,
            args.output_dim,
            num_classes,
            args.num_gc_layers,
            bn=args.bn,
            args=args,
        )
    else:
        print("Method:", args.method)
        model = models.GcnEncoderNode(
            args.input_dim,
            args.hidden_dim,
            args.output_dim,
            num_classes,
            args.num_gc_layers,
            bn=args.bn,
            args=args,
        )
    if args.gpu:
        model = model.cuda()

    train_node_classifier(G, labels, model, args, writer=writer)
def ppi_essential_task(args, writer=None):
    feat_file = "G-MtfPathways_gene-motifs.csv"
    # G = io_utils.read_biosnap('data/ppi_essential', 'PP-Pathways_ppi.csv', 'G-HumanEssential.tsv',
    #        feat_file=feat_file)
    G = io_utils.read_biosnap(
        "data/ppi_essential",
        "hi-union-ppi.tsv",
        "G-HumanEssential.tsv",
        feat_file=feat_file,
    )
    labels = np.array([G.nodes[u]["label"] for u in G.nodes()])
    num_classes = max(labels) + 1
    input_dim = G.nodes[next(iter(G.nodes()))]["feat"].shape[0]

    if args.method == "attn":
        print("Method: attn")
    else:
        print("Method:", args.method)
        args.loss_weight = torch.tensor([1, 5.0], dtype=torch.float).cuda()
        model = models.GcnEncoderNode(
            input_dim,
            args.hidden_dim,
            args.output_dim,
            num_classes,
            args.num_gc_layers,
            bn=args.bn,
            args=args,
        )
        if args.gpu:
            model = model.cuda()

    train_node_classifier(G, labels, model, args, writer=writer)
Example #3
0
def task_bitcoinalpha(args):
    A, X = utils.load_XA(args.dataset, datadir="../Generate_XA_Data/XAL")
    L = utils.load_labels(args.dataset, datadir="../Generate_XA_Data/XAL")
    num_classes = max(L) + 1
    print("NUMBER OF CLASS IS: " + str(num_classes))
    input_dim = X.shape[1]

    print("Input dimension is: ", input_dim)

    model = models.GcnEncoderNode(
        input_dim,
        args.hidden_dim,
        args.output_dim,
        num_classes,
        args.num_gc_layers,
        bn=args.bn,
        args=args,
    )

    train_node_classifier.train(model,
                                A,
                                X,
                                L,
                                args,
                                normalize_adjacency=False)
Example #4
0
def bitcoin(args):
        
    A, X = utils.load_XA(args.dataset, datadir = "../Generate_XA_Data/XAL")
    L = utils.load_labels(args.dataset, datadir = "../Generate_XA_Data/XAL")
    num_classes = max(L) + 1
    input_dim = X.shape[1]
    num_nodes = X.shape[0]
    ckpt = utils.load_ckpt(args)

    print("input dim: ", input_dim, "; num classes: ", num_classes)
    
    model = models.GcnEncoderNode(
            input_dim=input_dim,
            hidden_dim=args.hidden_dim,
            embedding_dim=args.output_dim,
            label_dim=num_classes,
            num_layers=args.num_gc_layers,
            bn=args.bn,
            args=args,
        )
    
    model.load_state_dict(ckpt["model_state"]) 
    pred = ckpt["save_data"]["pred"]
    
    explainer = pe.Node_Explainer(model, A, X, pred, args.num_gc_layers)
    
    node_to_explain = [i for [i] in np.argwhere(np.sum(A,axis = 0) > 2)]
    
    explanations = explainer.explain_range(node_to_explain, num_samples = args.num_perturb_samples, top_node = args.top_node)
    
    
    print(explanations)
    
    savename = utils.gen_filesave(args)
    np.save(savename,explanations)
Example #5
0
def syn_task5(args, writer=None):
    # data
    G, labels, name = gnnexplainer_gengraph.gen_syn5(
        feature_generator=featgen.ConstFeatureGen(
            np.ones(args.input_dim, dtype=float)))
    print(labels)
    print("Number of nodes: ", G.number_of_nodes())
    num_classes = max(labels) + 1

    if args.method == "attn":
        print("Method: attn")
    else:
        print("Method: base")
        model = models.GcnEncoderNode(
            args.input_dim,
            args.hidden_dim,
            args.output_dim,
            num_classes,
            args.num_gc_layers,
            bn=args.bn,
            args=args,
        )

        if args.gpu:
            model = model

    train_node_classifier(G, labels, model, args, writer=writer)
def reveal(args, idx=None, writer=None):
    labels_dict = {
        "None": 5,
        "Employee": 0,
        "Vice President": 1,
        "Manager": 2,
        "Trader": 3,
        "CEO+Managing Director+Director+President": 4,
    }
    max_enron_id = 183
    if idx is None:
        G_list = []
        labels_list = []
        for i in range(10):
            net = pickle.load(
                open("data/gnn-explainer-enron/enron_slice_{}.pkl".format(i),
                     "rb"))
            # net.add_nodes_from(range(max_enron_id))
            # labels=[n[1].get('role', 'None') for n in net.nodes(data=True)]
            # labels_num = [labels_dict[l] for l in labels]
            featgen_const = featgen.ConstFeatureGen(
                np.ones(args.input_dim, dtype=float))
            featgen_const.gen_node_features(net)
            G_list.append(net)
            print(net.number_of_nodes())
            # labels_list.append(labels_num)

        G = nx.disjoint_union_all(G_list)
        model = models.GcnEncoderNode(
            args.input_dim,
            args.hidden_dim,
            args.output_dim,
            len(labels_dict),
            args.num_gc_layers,
            bn=args.bn,
            args=args,
        )
        labels = [n[1].get("role", "None") for n in G.nodes(data=True)]
        labels_num = [labels_dict[l] for l in labels]
        for i in range(5):
            print("Label ", i, ": ", labels_num.count(i))

        print("Total num nodes: ", len(labels_num))
        print(labels_num)

        if args.gpu:
            model = model.cuda()
        train_node_classifier(G, labels_num, model, args, writer=writer)
    else:
        print("Running Enron full task")
Example #7
0
def task_syn(args):
        
    A, X = utils.load_XA(args.dataset, datadir = "../Generate_XA_Data/XAL")
    L = utils.load_labels(args.dataset, datadir = "../Generate_XA_Data/XAL")
    num_classes = max(L) + 1
    input_dim = X.shape[1]
    ckpt = utils.load_ckpt(args)

    print("input dim: ", input_dim, "; num classes: ", num_classes)
    
    model = models.GcnEncoderNode(
            input_dim=input_dim,
            hidden_dim=args.hidden_dim,
            embedding_dim=args.output_dim,
            label_dim=num_classes,
            num_layers=args.num_gc_layers,
            bn=args.bn,
            args=args,
        )
    
    model.load_state_dict(ckpt["model_state"]) 
    pred = ckpt["save_data"]["pred"]
    
    explainer = pe.Node_Explainer(model, A, X, pred, args.num_gc_layers)
    
    explanations = {}
    if args.explain_node == None:
        if args.dataset == 'syn1': 
            explanations = explainer.explain_range(list(range(300,700)), num_samples = args.num_perturb_samples, top_node = args.top_node)
        elif args.dataset == 'syn2': 
            explanations = explainer.explain_range(list(range(300,700)) + list(range(1000,1400)), num_samples = args.num_perturb_samples, top_node = args.top_node, pred_threshold = 0.1)
        elif args.dataset == 'syn3': 
            explanations = explainer.explain_range(list(range(300,1020)), num_samples = args.num_perturb_samples, top_node = args.top_node,pred_threshold = 0.05) 
        elif args.dataset == 'syn4': 
            explanations = explainer.explain_range(list(range(511,871)), num_samples = args.num_perturb_samples, top_node = args.top_node, pred_threshold = 0.1) 
        elif args.dataset == 'syn5': 
            explanations = explainer.explain_range(list(range(511,1231)), num_samples = args.num_perturb_samples, top_node = args.top_node, pred_threshold = 0.05)     
        elif args.dataset == 'syn6': 
            explanations = explainer.explain_range(list(range(300,700)), num_samples = args.num_perturb_samples, top_node = args.top_node)
    else:
        explanation = explainer.explain(args.explain_node, num_samples = args.num_perturb_samples, top_node = args.top_node)
        print(explanation)
        explanations[args.explain_node] = explanation
    
    
    print(explanations)
    
    savename = utils.gen_filesave(args)
    np.save(savename,explanations)
def enron_task_multigraph(args, idx=None, writer=None):
    labels_dict = {
        "None": 5,
        "Employee": 0,
        "Vice President": 1,
        "Manager": 2,
        "Trader": 3,
        "CEO+Managing Director+Director+President": 4,
    }
    max_enron_id = 183
    if idx is None:
        G_list = []
        labels_list = []
        for i in range(10):
            net = pickle.load(
                open("data/gnn-explainer-enron/enron_slice_{}.pkl".format(i),
                     "rb"))
            net.add_nodes_from(range(max_enron_id))
            labels = [n[1].get("role", "None") for n in net.nodes(data=True)]
            labels_num = [labels_dict[l] for l in labels]
            featgen_const = featgen.ConstFeatureGen(
                np.ones(args.input_dim, dtype=float))
            featgen_const.gen_node_features(net)
            G_list.append(net)
            labels_list.append(labels_num)
        # train_dataset, test_dataset, max_num_nodes = prepare_data(G_list, args)
        model = models.GcnEncoderNode(
            args.input_dim,
            args.hidden_dim,
            args.output_dim,
            args.num_classes,
            args.num_gc_layers,
            bn=args.bn,
            args=args,
        )
        if args.gpu:
            model = model.cuda()
        print(labels_num)
        train_node_classifier_multigraph(G_list,
                                         labels_list,
                                         model,
                                         args,
                                         writer=writer)
    else:
        print("Running Enron full task")
Example #9
0
def task_syn(args):
    A, X = utils.load_XA(args.dataset, datadir="../Generate_XA_Data/XAL")
    L = utils.load_labels(args.dataset, datadir="../Generate_XA_Data/XAL")
    num_classes = max(L) + 1
    input_dim = X.shape[1]

    model = models.GcnEncoderNode(
        args.input_dim,
        args.hidden_dim,
        args.output_dim,
        num_classes,
        args.num_gc_layers,
        bn=args.bn,
        args=args,
    )

    train_node_classifier.train(model,
                                A,
                                X,
                                L,
                                args,
                                normalize_adjacency=False)
def syn_task2(args, writer=None):
    # data
    G, labels, name = gengraph.gen_syn2()
    input_dim = len(G.nodes[0]["feat"])
    num_classes = max(labels) + 1

    if args.method == "attn":
        print("Method: attn")
    else:
        print("Method:", args.method)
        model = models.GcnEncoderNode(
            input_dim,
            args.hidden_dim,
            args.output_dim,
            num_classes,
            args.num_gc_layers,
            bn=args.bn,
            args=args,
        )
        if args.gpu:
            model = model.cuda()

    train_node_classifier(G, labels, model, args, writer=writer)
Example #11
0
def main():
    # Parsing defaults for all program parameters unless provided by user
    prog_args = parse_explainer_args.arg_parse()

    # More params on top of train.py
    prog_args.writer = None  # Check is for None and default is True

    path = os.path.join(prog_args.logdir, io_utils.gen_prefix(prog_args))
    print("Tensorboard writer path :\n", path)
    print("No. of epochs :", prog_args.num_epochs)

    # writer = SummaryWriter(path)

    if prog_args.gpu:
        #    os.environ["CUDA_VISIBLE_DEVICES"] = prog_args.cuda
        #    env = os.environ.get('CUDA_VISIBLE_DEVICES')
        #    print("Environment is set :", env)
        print('\nCUDA_VISIBLE_DEVICES')
        print('------------------------------------------')
        print("CUDA", prog_args.cuda)
    else:
        print('\n------------------------------------------')
        print("Using CPU")

    # Loading previously saved computational graph data (model checkpoint)
    model_dict = io_utils.load_ckpt(prog_args)
    model_optimizer = model_dict['optimizer']

    print("Model optimizer :", model_optimizer)
    print("Model optimizer state dictionary :\n",
          model_optimizer.state_dict()['param_groups'])
    # model.load_state_dict(checkpoint['model_state_dict'])
    # optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    # epoch = checkpoint['epoch']
    # loss = checkpoint['loss']

    print(
        '------------------------------------------------------------------------------------'
    )
    print("Keys in loaded model dictionary :", list(model_dict))
    print("Keys in loaded model optimizer dictionary:",
          list(model_optimizer.state_dict()))
    print("All loaded labels :\n", model_dict['cg']['label'])

    print()
    print('mask_act:{}, mask_bias:{}, explainer_suffix:{}'.format(
        prog_args.mask_act, prog_args.mask_bias, prog_args.explainer_suffix))

    # Determine explainer mode
    graph_mode = (prog_args.graph_mode or prog_args.multigraph_class >= 0
                  or prog_args.graph_idx >= 0)

    # Trained data stored in computational graph dictionary
    cg_dict = model_dict['cg']
    input_dim = cg_dict['feat'].shape[2]
    num_classes = cg_dict['pred'].shape[2]
    print("\nLoaded model from subdirectory \"{}\" ...".format(
        prog_args.ckptdir))
    print("input dim :", input_dim, "; num classes :", num_classes)
    print("Labels of retrieved data :\n", cg_dict['label'])

    print(
        '------------------------------------------------------------------------------------'
    )
    print("Multigraph class :", prog_args.multigraph_class)
    print("Graph Index :", prog_args.graph_idx)
    print("Explainer graph mode :", graph_mode)
    print("Input dimension :", input_dim)
    print("Hidden dimension :", prog_args.hidden_dim)
    print("Output dimension :", prog_args.output_dim)
    print("Number of classes :", num_classes)
    print("Number of GCN layers :", prog_args.num_gc_layers)
    print("Batch Normalization :", prog_args.bn)

    model = models.GcnEncoderNode(input_dim=input_dim,
                                  hidden_dim=prog_args.hidden_dim,
                                  embedding_dim=prog_args.output_dim,
                                  label_dim=num_classes,
                                  num_layers=prog_args.num_gc_layers,
                                  bn=prog_args.bn,
                                  args=prog_args)

    print("\nGcnEncoderNode model :\n", model)

    # load state_dict (obtained by model.state_dict() when saving checkpoint)
    # Loading Model for Inference
    print("Model checked result :",
          model.load_state_dict(model_dict['model_state']))
    print(
        '------------------------------------------------------------------------------------\n'
    )

    # Explaining single node prediction
    print('Explaining single default node :', prog_args.explain_node)

    # The number of epochs used for explanation training is much smaller than the 1K epochs used for node label
    # trainings and predictions in the GCN.  The former is trained only based on the k-hop labels which depends
    # on the number GCN layers (at a smaller scale, so the number of epochs can be lower without reducing the
    # accuracy). Whereas, the latter will affect the node predictions and thus, it will affect the accuracy of
    # the node explanations.

    print('GNN Explainer is trained based on {} epochs.'.format(
        prog_args.num_epochs))
    print("Writer :", prog_args.writer)

    # Create explainer
    explainer = explain.Explainer(
        model=model,
        adj=cg_dict["adj"],
        feat=cg_dict["feat"],
        label=cg_dict["label"],
        pred=cg_dict["pred"],
        train_idx=cg_dict["train_idx"],
        args=prog_args,
        writer=prog_args.writer,
        print_training=True,
        graph_mode=graph_mode,
        graph_idx=prog_args.graph_idx,
    )

    if prog_args.explain_node is not None:
        # Returned masked adjacency, edges and features of the subgraph
        masked_adj, masked_edges, masked_features = explainer.explain(
            prog_args.explain_node, unconstrained=False)

        print("Returned masked adjacency matrix :\n", masked_adj)
        print("Returned masked edges matrix :\n", masked_edges)
        print("Returned masked features matrix :\n", masked_features)
    else:
        print("Please provide node for explanation.")
Example #12
0
def main():
    # Load a configuration
    prog_args = arg_parse()

    if prog_args.gpu:
        os.environ["CUDA_VISIBLE_DEVICES"] = prog_args.cuda
        print("CUDA", prog_args.cuda)
    else:
        print("Using CPU")

    # Configure the logging directory
    if prog_args.writer:
        path = os.path.join(prog_args.logdir,
                            io_utils.gen_explainer_prefix(prog_args))
        if os.path.isdir(path) and prog_args.clean_log:
            print('Removing existing log dir: ', path)
            if not input(
                    "Are you sure you want to remove this directory? (y/n): "
            ).lower().strip()[:1] == "y":
                sys.exit(1)
            shutil.rmtree(path)
        writer = SummaryWriter(path)
    else:
        writer = None

    # Load a model checkpoint
    ckpt = io_utils.load_ckpt(prog_args)
    cg_dict = ckpt["cg"]  # get computation graph
    input_dim = cg_dict["feat"].shape[2]
    num_classes = cg_dict["pred"].shape[2]
    print("Loaded model from {}".format(prog_args.ckptdir))
    print("input dim: ", input_dim, "; num classes: ", num_classes)

    # Determine explainer mode
    graph_mode = (prog_args.graph_mode or prog_args.multigraph_class >= 0
                  or prog_args.graph_idx >= 0)

    # build model
    print("Method: ", prog_args.method)
    if graph_mode:
        # Explain Graph prediction
        model = models.GcnEncoderGraph(
            input_dim=input_dim,
            hidden_dim=prog_args.hidden_dim,
            embedding_dim=prog_args.output_dim,
            label_dim=num_classes,
            num_layers=prog_args.num_gc_layers,
            bn=prog_args.bn,
            args=prog_args,
        )
    else:
        if prog_args.dataset == "ppi_essential":
            # class weight in CE loss for handling imbalanced label classes
            prog_args.loss_weight = torch.tensor([1.0, 5.0],
                                                 dtype=torch.float).cuda()
        # Explain Node prediction
        model = models.GcnEncoderNode(
            input_dim=input_dim,
            hidden_dim=prog_args.hidden_dim,
            embedding_dim=prog_args.output_dim,
            label_dim=num_classes,
            num_layers=prog_args.num_gc_layers,
            bn=prog_args.bn,
            args=prog_args,
        )
    if prog_args.gpu:
        model = model.cuda()
    # load state_dict (obtained by model.state_dict() when saving checkpoint)
    model.load_state_dict(ckpt["model_state"])

    # Create explainer
    explainer = explain.Explainer(
        model=model,
        adj=cg_dict["adj"],
        feat=cg_dict["feat"],
        label=cg_dict["label"],
        pred=cg_dict["pred"],
        train_idx=cg_dict["train_idx"],
        args=prog_args,
        writer=writer,
        print_training=True,
        graph_mode=graph_mode,
        graph_idx=prog_args.graph_idx,
    )

    # TODO: API should definitely be cleaner
    # Let's define exactly which modes we support
    # We could even move each mode to a different method (even file)
    if prog_args.explain_node is not None:
        explainer.explain(prog_args.explain_node, unconstrained=False)
    elif graph_mode:
        if prog_args.explain_all:
            explain_path = Path('explanations/gnnexplainer/')
            explain_path.mkdir(exist_ok=True, parents=True)
            embeddings_path = Path('embeddings-%s/' % prog_args.bmname)
            embeddings_path.mkdir(exist_ok=True, parents=True)
            for i in range(len(cg_dict['all_idx'])):
                print('Explaining %s' % cg_dict['all_idx'][i])
                explainer.explain(node_idx=0,
                                  graph_idx=i,
                                  graph_mode=True,
                                  unconstrained=False,
                                  original_idx=cg_dict['all_idx'][i])
        elif prog_args.multigraph_class >= 0:
            print(cg_dict["label"])
            # only run for graphs with label specified by multigraph_class
            labels = cg_dict["label"].numpy()
            graph_indices = []
            for i, l in enumerate(labels):
                if l == prog_args.multigraph_class:
                    graph_indices.append(i)
                if len(graph_indices) > 30:
                    break
            print(
                "Graph indices for label ",
                prog_args.multigraph_class,
                " : ",
                graph_indices,
            )
            explainer.explain_graphs(graph_indices=graph_indices)

        elif prog_args.graph_idx == -1:
            # just run for a customized set of indices
            explainer.explain_graphs(graph_indices=[1, 2, 3, 4])
        else:
            explainer.explain(
                node_idx=0,
                graph_idx=prog_args.graph_idx,
                graph_mode=True,
                unconstrained=False,
                original_idx=cg_dict['all_idx'][prog_args.graph_idx])
            # io_utils.plot_cmap_tb(writer, "tab20", 20, "tab20_cmap")
    else:
        if prog_args.multinode_class >= 0:
            print(cg_dict["label"])
            # only run for nodes with label specified by multinode_class
            labels = cg_dict["label"][0]  # already numpy matrix

            node_indices = []
            for i, l in enumerate(labels):
                if len(node_indices) > 4:
                    break
                if l == prog_args.multinode_class:
                    node_indices.append(i)
            print(
                "Node indices for label ",
                prog_args.multinode_class,
                " : ",
                node_indices,
            )
            explainer.explain_nodes(node_indices, prog_args)

        else:
            # explain a set of nodes
            masked_adj = explainer.explain_nodes_gnn_stats(
                range(400, 700, 5), prog_args)
Example #13
0
def syn_task1(args, writer=None):
    print("\nStart with these parsed program arguments :\n", args)

    # np.ones(input_dim, dtype=float) = [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]
    constant_feature = featureGen.ConstFeatureGen(
        np.ones(args.input_dim, dtype=float))
    print("Constant feature generator : ", constant_feature.val)

    #feat_dict = {i:{'feat': np.array(constant_feature.val, dtype=np.float32)} for i in G.nodes()}
    #print ('Values of feat_dict[0]["feat"]:', feat_dict[0]['feat'])

    #nx.set_node_attributes(G, feat_dict)
    #print('Node attributes of node \'0\', G.nodes[0]["feat"]:', G.nodes[0]['feat'])

    # Create the BA graph with the "house" motifs
    G, labels, name = gengraph.gen_syn1(feature_generator=constant_feature)

    # No .of classes from [0-3] for BA graph with house motifs
    num_classes = max(labels) + 1
    # Update number of classes in argument for training (Out of bounds error)
    args.num_classes = num_classes

    # GcnEncoderNode model
    print("------------ GCNEncoderNode Model ------------")
    print("Input dimensions :", args.input_dim)
    print("Hidden dimensions :", args.hidden_dim)
    print("Output dimensions :", args.output_dim)
    print("Number of classes in args :", args.num_classes)
    print("Number of GCN layers :", args.num_gc_layers)
    print("Method : ", args.method)

    model = models.GcnEncoderNode(args.input_dim,
                                  args.hidden_dim,
                                  args.output_dim,
                                  args.num_classes,
                                  args.num_gc_layers,
                                  bn=args.bn,
                                  args=args)

    print("GcnEncoderNode model :\n", model)

    #     if args.method == "att":
    #         print("Method: att")
    #         model = models.GcnEncoderNode(
    #             args.input_dim,
    #             args.hidden_dim,
    #             args.output_dim,
    #             num_classes,
    #             args.num_gc_layers,
    #             bn=args.bn,
    #             args=args,
    #         )
    #     else:
    #         print("Method:", args.method)
    #         model = models.GcnEncoderNode(
    #             args.input_dim,
    #             args.hidden_dim,
    #             args.output_dim,
    #             num_classes,
    #             args.num_gc_layers,
    #             bn=args.bn,
    #             args=args,
    #         )
    if args.gpu:
        model = model.cuda()

    train_node_classifier(G, labels, model, args, writer=writer)

    # Return model for manipulations in ipynb
    return model
Example #14
0
def main():
    # Load a configuration
    prog_args = arg_parse()

    if prog_args.gpu:
        os.environ["CUDA_VISIBLE_DEVICES"] = prog_args.cuda
        print("CUDA", prog_args.cuda)
    else:
        print("Using CPU")

    # Configure the logging directory
    if prog_args.writer:
        path = os.path.join(prog_args.logdir,
                            io_utils.gen_explainer_prefix(prog_args))
        if os.path.isdir(path) and prog_args.clean_log:
            print('Removing existing log dir: ', path)
            if not input(
                    "Are you sure you want to remove this directory? (y/n): "
            ).lower().strip()[:1] == "y":
                sys.exit(1)
            shutil.rmtree(path)
        writer = SummaryWriter(path)
    else:
        writer = None

    # Load data and a model checkpoint
    ckpt = io_utils.load_ckpt(prog_args)
    cg_dict = ckpt["cg"]  # get computation graph
    input_dim = cg_dict["feat"].shape[2]
    num_classes = cg_dict["pred"].shape[2]
    print("Loaded model from {}".format(prog_args.ckptdir))
    print("input dim: ", input_dim, "; num classes: ", num_classes)

    # Determine explainer mode (node classif)
    graph_mode = (prog_args.graph_mode or prog_args.multigraph_class >= 0
                  or prog_args.graph_idx >= 0)

    # build model
    print("Method: ", prog_args.method)
    if graph_mode:
        # Explain Graph prediction
        model = models.GcnEncoderGraph(
            input_dim=input_dim,
            hidden_dim=prog_args.hidden_dim,
            embedding_dim=prog_args.output_dim,
            label_dim=num_classes,
            num_layers=prog_args.num_gc_layers,
            bn=prog_args.bn,
            args=prog_args,
        )
    else:
        if prog_args.dataset == "ppi_essential":
            # class weight in CE loss for handling imbalanced label classes
            prog_args.loss_weight = torch.tensor([1.0, 5.0],
                                                 dtype=torch.float).cuda()
        # Explain Node prediction
        model = models.GcnEncoderNode(
            input_dim=input_dim,
            hidden_dim=prog_args.hidden_dim,
            embedding_dim=prog_args.output_dim,
            label_dim=num_classes,
            num_layers=prog_args.num_gc_layers,
            bn=prog_args.bn,
            args=prog_args,
        )
    if prog_args.gpu:
        model = model.cuda()

    # Load state_dict (obtained by model.state_dict() when saving checkpoint)
    model.load_state_dict(ckpt["model_state"])

    # Convertion data required to get correct model output for GraphSHAP
    adj = torch.tensor(cg_dict["adj"], dtype=torch.float)
    x = torch.tensor(cg_dict["feat"], requires_grad=True, dtype=torch.float)
    if prog_args.gpu:
        y_pred, att_adj = model(x.cuda(), adj.cuda())
    else:
        y_pred, att_adj = model(x, adj)

    # Transform their data into our format
    data = transform_data(adj, x, cg_dict["label"][0].tolist())

    # Generate test nodes
    # Use only these specific nodes as they are the ones added manually, part of the defined shapes
    # node_indices = extract_test_nodes(data, num_samples=10, cg_dict['train_idx'])
    k = 4  # number of nodes for the shape introduced (house, cycle)
    K = 0
    if prog_args.dataset == 'syn1':
        node_indices = list(range(400, 410, 5))
    elif prog_args.dataset == 'syn2':
        node_indices = list(range(400, 405, 5)) + list(range(1100, 1105, 5))
    elif prog_args.dataset == 'syn4':
        node_indices = list(range(511, 523, 6))
        if prog_args.hops == 3:
            k = 5
        else:
            K = 5
    elif prog_args.dataset == 'syn5':
        node_indices = list(range(511, 529, 9))
        if prog_args.hops == 3:
            k = 7
            K = 8
        else:
            k = 5
            K = 8

    # GraphSHAP explainer
    # graphshap = GraphSHAP(data, model, adj, writer, prog_args.dataset, prog_args.gpu)

    # Run GNN Explainer and retrieve produced explanations

    gnne = explain.Explainer(
        model=model,
        adj=cg_dict["adj"],
        feat=cg_dict["feat"],
        label=cg_dict["label"],
        pred=cg_dict["pred"],
        train_idx=cg_dict["train_idx"],
        args=prog_args,
        writer=writer,
        print_training=True,
        graph_mode=graph_mode,
        graph_idx=prog_args.graph_idx,
    )

    ### GNNE
    # Explain a set of nodes - accuracy on edges this time
    t = time.time()
    gnne_edge_accuracy, gnne_auc, gnne_node_accuracy, important_nodes_gnne =\
        gnne.explain_nodes_gnn_stats(
            node_indices, prog_args
        )
    e = time.time()
    print('Time: ', e - t)
Example #15
0
def medic(args):
    """
    Creating a simple Graph ConvNet using parameters of args (https://arxiv.org/abs/1609.02907)
    """

    # Loading DataSet from /Pickles
    global result_test, result_train
    with open('Pickles/feats.pickle', 'rb') as handle:
        feats = np.expand_dims(pickle.load(handle), axis=0)
    with open('Pickles/age_adj.pickle', 'rb') as handle:
        age_adj = pickle.load(handle)
    with open('Pickles/preds.pickle', 'rb') as handle:
        labels = np.expand_dims(pickle.load(handle), axis=0)

    # initializing model variables
    num_nodes = labels.shape[1]
    num_train = int(num_nodes * 0.9)
    num_classes = max(labels[0]) + 1
    idx = [i for i in range(num_nodes)]
    np.random.shuffle(idx)
    train_idx = idx[:num_train]
    test_idx = idx[num_train:]

    labels = labels.astype(np.long)
    age_adj = age_adj.astype(np.float)
    feats = feats.astype(np.float)

    age_adj = age_adj + np.eye(age_adj.shape[0])
    d_hat_inv = np.linalg.inv(np.diag(age_adj.sum(axis=1)))**(1 / 2)
    temp = np.matmul(d_hat_inv, age_adj)
    age_adj = np.matmul(temp, d_hat_inv)
    age_adj = np.expand_dims(age_adj, axis=0)

    labels_train = torch.tensor(labels[:, train_idx], dtype=torch.long)
    adj = torch.tensor(age_adj, dtype=torch.float)
    x = torch.tensor(feats, dtype=torch.float, requires_grad=True)

    # Creating a model which is used in https://github.com/RexYing/gnn-model-explainer
    model = models.GcnEncoderNode(
        args.input_dim,
        args.hidden_dim,
        args.output_dim,
        num_classes,
        args.num_gc_layers,
        bn=args.bn,
        args=args,
    )

    if args.gpu:
        model = model.cuda()

    scheduler, optimizer = build_optimizer(args,
                                           model.parameters(),
                                           weight_decay=args.weight_decay)
    model.train()
    to_save = (0, None)  # used for saving best model

    # training the model
    for epoch in range(args.num_epochs):
        begin_time = time.time()
        model.zero_grad()

        if args.gpu:
            ypred, adj_att = model(x.cuda(), adj.cuda())
        else:
            ypred, adj_att = model(x, adj)
        ypred_train = ypred[:, train_idx, :]
        if args.gpu:
            loss = model.loss(ypred_train, labels_train.cuda())
        else:
            loss = model.loss(ypred_train, labels_train)
        loss.backward()
        nn.utils.clip_grad_norm(model.parameters(), args.clip)

        optimizer.step()
        # for param_group in optimizer.param_groups:
        #    print(param_group["lr"])
        elapsed = time.time() - begin_time

        result_train, result_test = evaluate_node(ypred.cpu(), labels,
                                                  train_idx, test_idx)

        if result_test["acc"] > to_save[0]:
            to_save = (result_test["acc"], (model, optimizer, args))

        if epoch % 10 == 0:
            print(
                "epoch: ",
                epoch,
                "; loss: ",
                loss.item(),
                "; train_acc: ",
                result_train["acc"],
                "; test_acc: ",
                result_test["acc"],
                "; train_prec: ",
                result_train["prec"],
                "; test_prec: ",
                result_test["prec"],
                "; epoch time: ",
                "{0:0.2f}".format(elapsed),
            )
        if epoch % 100 == 0:
            print(result_train["conf_mat"])
            print(result_test["conf_mat"])

        if scheduler is not None:
            scheduler.step()

    print(result_train["conf_mat"])
    print(result_test["conf_mat"])

    to_save[1][0].eval()
    if args.gpu:
        ypred, _ = to_save[1][0](x.cuda(), adj.cuda())
    else:
        ypred, _ = to_save[1][0](x, adj)
    cg_data = {
        "adj": age_adj,
        "feat": feats,
        "label": labels,
        "pred": ypred.cpu().detach().numpy(),
        "train_idx": train_idx,
    }

    # saving the model so that it can be restored for GNN explaining
    print(
        save_checkpoint(to_save[1][0],
                        to_save[1][1],
                        args,
                        num_epochs=-1,
                        cg_dict=cg_data))

    return to_save[1][0], to_save[1][1], args, cg_data
Example #16
0
def main():
    # Load a configuration
    prog_args = arg_parse()

    if prog_args.gpu:
        os.environ["CUDA_VISIBLE_DEVICES"] = prog_args.cuda
        print("CUDA", prog_args.cuda)
    else:
        print("Using CPU")

    # Configure the logging directory
    if prog_args.writer:
        path = os.path.join(prog_args.logdir,
                            io_utils.gen_explainer_prefix(prog_args))
        if os.path.isdir(path) and prog_args.clean_log:
            print('Removing existing log dir: ', path)
            if not input(
                    "Are you sure you want to remove this directory? (y/n): "
            ).lower().strip()[:1] == "y":
                sys.exit(1)
            shutil.rmtree(path)
        writer = SummaryWriter(path)
    else:
        writer = None

    # Load data and a model checkpoint
    ckpt = io_utils.load_ckpt(prog_args)
    cg_dict = ckpt["cg"]  # get computation graph
    input_dim = cg_dict["feat"].shape[2]
    num_classes = cg_dict["pred"].shape[2]
    print("Loaded model from {}".format(prog_args.ckptdir))
    print("input dim: ", input_dim, "; num classes: ", num_classes)

    # Determine explainer mode (node classif)
    graph_mode = (prog_args.graph_mode or prog_args.multigraph_class >= 0
                  or prog_args.graph_idx >= 0)

    # build model
    print("Method: ", prog_args.method)
    if graph_mode:
        # Explain Graph prediction
        model = models.GcnEncoderGraph(
            input_dim=input_dim,
            hidden_dim=prog_args.hidden_dim,
            embedding_dim=prog_args.output_dim,
            label_dim=num_classes,
            num_layers=prog_args.num_gc_layers,
            bn=prog_args.bn,
            args=prog_args,
        )
    else:
        if prog_args.dataset == "ppi_essential":
            # class weight in CE loss for handling imbalanced label classes
            prog_args.loss_weight = torch.tensor([1.0, 5.0],
                                                 dtype=torch.float).cuda()
        # Explain Node prediction
        model = models.GcnEncoderNode(
            input_dim=input_dim,
            hidden_dim=prog_args.hidden_dim,
            embedding_dim=prog_args.output_dim,
            label_dim=num_classes,
            num_layers=prog_args.num_gc_layers,
            bn=prog_args.bn,
            args=prog_args,
        )
    if prog_args.gpu:
        model = model.cuda()

    # Load state_dict (obtained by model.state_dict() when saving checkpoint)
    model.load_state_dict(ckpt["model_state"])

    # Convertion data required to get correct model output for GraphSHAP
    adj = torch.tensor(cg_dict["adj"], dtype=torch.float)
    x = torch.tensor(cg_dict["feat"], requires_grad=True, dtype=torch.float)
    if prog_args.gpu:
        y_pred, att_adj = model(x.cuda(), adj.cuda())
    else:
        y_pred, att_adj = model(x, adj)

    # Transform their data into our format
    data = transform_data(adj, x, cg_dict["label"][0].tolist())

    # Generate test nodes
    # Use only these specific nodes as they are the ones added manually, part of the defined shapes
    # node_indices = extract_test_nodes(data, num_samples=10, cg_dict['train_idx'])
    k = 4  # number of nodes for the shape introduced (house, cycle)
    K = 0
    if prog_args.dataset == 'syn1':
        node_indices = list(range(400, 450, 5))
    elif prog_args.dataset == 'syn2':
        node_indices = list(range(400, 425, 5)) + list(range(1100, 1125, 5))
    elif prog_args.dataset == 'syn4':
        node_indices = list(range(511, 571, 6))
        if prog_args.hops == 3:
            k = 5
        else:
            K = 5
    elif prog_args.dataset == 'syn5':
        node_indices = list(range(511, 601, 9))
        if prog_args.hops == 3:
            k = 8
        else:
            k = 5
            K = 8

    # GraphSHAP explainer
    graphshap = GraphSHAP(data, model, adj, writer, prog_args.dataset,
                          prog_args.gpu)

    # Run GNN Explainer and retrieve produced explanations
    gnne = explain.Explainer(
        model=model,
        adj=cg_dict["adj"],
        feat=cg_dict["feat"],
        label=cg_dict["label"],
        pred=cg_dict["pred"],
        train_idx=cg_dict["train_idx"],
        args=prog_args,
        writer=writer,
        print_training=True,
        graph_mode=graph_mode,
        graph_idx=prog_args.graph_idx,
    )

    #if prog_args.explain_node is not None:
    # _, gnne_edge_accuracy, gnne_auc, gnne_node_accuracy = \
    #     gnne.explain_nodes_gnn_stats(
    #         node_indices, prog_args
    # )
    # elif graph_mode:
    #     # Graph explanation
    #     gnne_expl = gnne.explain_graphs([1])[0]

    # GraphSHAP - assess accuracy of explanations
    # Loop over test nodes
    accuracy = []
    feat_accuracy = []
    for node_idx in node_indices:
        start = time.time()
        graphshap_explanations = graphshap.explain(
            [node_idx],
            prog_args.hops,
            prog_args.num_samples,
            prog_args.info,
            prog_args.multiclass,
            prog_args.fullempty,
            prog_args.S,
            prog_args.hv,
            prog_args.feat,
            prog_args.coal,
            prog_args.g,
            prog_args.regu,
        )[0]

        end = time.time()
        print('GS Time:', end - start)

        # Predicted class
        pred_val, predicted_class = y_pred[0, node_idx, :].max(dim=0)

        # Keep only node explanations
        # ,predicted_class]
        graphshap_node_explanations = graphshap_explanations[graphshap.F:]

        # Derive ground truth from graph structure
        ground_truth = list(range(node_idx + 1, node_idx + max(k, K) + 1))

        # Retrieve top k elements indices form graphshap_node_explanations
        if graphshap.neighbours.shape[0] > k:
            i = 0
            val, indices = torch.topk(
                torch.tensor(graphshap_node_explanations.T), k + 1)
            # could weight importance based on val
            for node in graphshap.neighbours[indices]:
                if node.item() in ground_truth:
                    i += 1
            # Sort of accruacy metric
            accuracy.append(i / k)

            print('There are {} from targeted shape among most imp. nodes'.
                  format(i))

        # Look at importance distribution among features
        # Identify most important features and check if it corresponds to truly imp ones
        if prog_args.dataset == 'syn2':
            # ,predicted_class]
            graphshap_feat_explanations = graphshap_explanations[:graphshap.F]
            print('Feature importance graphshap',
                  graphshap_feat_explanations.T)
            if np.argsort(graphshap_feat_explanations)[-1] == 0:
                feat_accuracy.append(1)
            else:
                feat_accuracy.append(0)

    # Metric for graphshap
    final_accuracy = sum(accuracy) / len(accuracy)

    ### GNNE
    # Explain a set of nodes - accuracy on edges this time
    _, gnne_edge_accuracy, gnne_auc, gnne_node_accuracy =\
        gnne.explain_nodes_gnn_stats(
            node_indices, prog_args
        )

    ### GRAD benchmark
    #  MetricS to assess quality of predictionsx
    """
    _, grad_edge_accuracy, grad_auc, grad_node_accuracy =\
            gnne.explain_nodes_gnn_stats(
                node_indices, prog_args, model="grad")
    """
    grad_edge_accuracy = 0
    grad_node_accuracy = 0

    ### GAT
    # Nothing for now - implem a GAT on the side and look at weights coef

    ### Results
    print(
        'Accuracy for GraphSHAP is {:.2f} vs {:.2f},{:.2f} for GNNE vs {:.2f},{:.2f} for GRAD'
        .format(final_accuracy, np.mean(gnne_edge_accuracy),
                np.mean(gnne_node_accuracy), np.mean(grad_edge_accuracy),
                np.mean(grad_node_accuracy)))
    if prog_args.dataset == 'syn2':
        print('Most important feature was found in {:.2f}% of the case'.format(
            100 * np.mean(feat_accuracy)))

    print('GNNE_auc is:', gnne_auc)