Esempio n. 1
0
def pipeline_GC(top_k):
    dataset = get_dataset(data_args.dataset_dir, data_args.dataset_name)
    if data_args.dataset_name == 'mutag':
        data_indices = list(range(len(dataset)))
        pgexplainer_trainset = dataset
    else:
        loader = get_dataloader(dataset,
                                batch_size=train_args.batch_size,
                                random_split_flag=data_args.random_split,
                                data_split_ratio=data_args.data_split_ratio,
                                seed=data_args.seed)
        data_indices = loader['test'].dataset.indices
        pgexplainer_trainset = loader['train'].dataset

    input_dim = dataset.num_node_features
    output_dim = dataset.num_classes
    gnnNets = GnnNets(input_dim, output_dim, model_args)
    checkpoint = torch.load(model_args.model_path)
    gnnNets.update_state_dict(checkpoint['net'])
    gnnNets.to_device()
    gnnNets.eval()

    save_dir = os.path.join(
        './results', f"{data_args.dataset_name}_"
        f"{model_args.model_name}_"
        f"pgexplainer")
    if not os.path.isdir(save_dir):
        os.mkdir(save_dir)

    pgexplainer = PGExplainer(gnnNets)

    if torch.cuda.is_available():
        torch.cuda.synchronize()
    tic = time.perf_counter()

    pgexplainer.get_explanation_network(pgexplainer_trainset)

    if torch.cuda.is_available():
        torch.cuda.synchronize()
    toc = time.perf_counter()
    training_duration = toc - tic
    print(f"training time is {training_duration: .4}s ")

    explain_duration = 0.0
    plotutils = PlotUtils(dataset_name=data_args.dataset_name)
    fidelity_score_list = []
    sparsity_score_list = []
    for data_idx in tqdm(data_indices):
        data = dataset[data_idx]
        if torch.cuda.is_available():
            torch.cuda.synchronize()
        tic = time.perf_counter()

        prob = pgexplainer.eval_probs(data.x, data.edge_index)
        pred_label = prob.argmax(-1).item()

        if glob.glob(os.path.join(save_dir, f"example_{data_idx}.pt")):
            file = glob.glob(os.path.join(save_dir,
                                          f"example_{data_idx}.pt"))[0]
            edge_mask = torch.from_numpy(torch.load(file))
        else:
            edge_mask = pgexplainer.explain_edge_mask(data.x, data.edge_index)
            save_path = os.path.join(save_dir, f"example_{data_idx}.pt")
            edge_mask = edge_mask.cpu()
            torch.save(edge_mask.detach().numpy(), save_path)

        if torch.cuda.is_available():
            torch.cuda.synchronize()
        toc = time.perf_counter()
        explain_duration += (toc - tic)

        graph = to_networkx(data)

        fidelity_score = top_k_fidelity(data, edge_mask, top_k, gnnNets,
                                        pred_label)
        sparsity_score = top_k_sparsity(data, edge_mask, top_k)

        fidelity_score_list.append(fidelity_score)
        sparsity_score_list.append(sparsity_score)

        # visualization
        if hasattr(dataset, 'supplement'):
            words = dataset.supplement['sentence_tokens'][str(data_idx)]
            plotutils.plot_soft_edge_mask(graph,
                                          edge_mask,
                                          top_k,
                                          x=data.x,
                                          words=words,
                                          un_directed=True,
                                          figname=os.path.join(
                                              save_dir,
                                              f"example_{data_idx}.png"))
        else:
            plotutils.plot_soft_edge_mask(graph,
                                          edge_mask,
                                          top_k,
                                          x=data.x,
                                          un_directed=True,
                                          figname=os.path.join(
                                              save_dir,
                                              f"example_{data_idx}.png"))

    fidelity_scores = torch.tensor(fidelity_score_list)
    sparsity_scores = torch.tensor(sparsity_score_list)
    return fidelity_scores, sparsity_scores
Esempio n. 2
0
def pipeline(max_nodes):
    dataset = get_dataset(data_args.dataset_dir, data_args.dataset_name)
    plotutils = PlotUtils(dataset_name=data_args.dataset_name)
    input_dim = dataset.num_node_features
    output_dim = dataset.num_classes

    if data_args.dataset_name == 'mutag':
        data_indices = list(range(len(dataset)))
    else:
        loader = get_dataloader(dataset,
                                batch_size=train_args.batch_size,
                                random_split_flag=data_args.random_split,
                                data_split_ratio=data_args.data_split_ratio,
                                seed=data_args.seed)
        data_indices = loader['test'].dataset.indices

    gnnNets = GnnNets(input_dim, output_dim, model_args)
    checkpoint = torch.load(mcts_args.explain_model_path)
    gnnNets.update_state_dict(checkpoint['net'])
    gnnNets.to_device()
    gnnNets.eval()

    save_dir = os.path.join(
        './results', f"{mcts_args.dataset_name}_"
        f"{model_args.model_name}_"
        f"{reward_args.reward_method}")
    if not os.path.isdir(save_dir):
        os.mkdir(save_dir)

    fidelity_score_list = []
    sparsity_score_list = []
    for i in tqdm(data_indices):
        # get data and prediction
        data = dataset[i]
        _, probs, _ = gnnNets(Batch.from_data_list([data.clone()]))
        prediction = probs.squeeze().argmax(-1).item()
        original_score = probs.squeeze()[prediction]

        # get the reward func
        value_func = GnnNets_GC2value_func(gnnNets, target_class=prediction)
        payoff_func = reward_func(reward_args, value_func)

        # find the paths and build the graph
        result_path = os.path.join(save_dir, f"example_{i}.pt")

        # mcts for l_shapely
        mcts_state_map = MCTS(data.x,
                              data.edge_index,
                              score_func=payoff_func,
                              n_rollout=mcts_args.rollout,
                              min_atoms=mcts_args.min_atoms,
                              c_puct=mcts_args.c_puct,
                              expand_atoms=mcts_args.expand_atoms)

        if os.path.isfile(result_path):
            results = torch.load(result_path)
        else:
            results = mcts_state_map.mcts(verbose=True)
            torch.save(results, result_path)

        # l sharply score
        graph_node_x = find_closest_node_result(results, max_nodes=max_nodes)
        masked_node_list = [
            node for node in list(range(graph_node_x.data.x.shape[0]))
            if node not in graph_node_x.coalition
        ]
        fidelity_score = original_score - gnn_score(
            masked_node_list,
            data,
            value_func,
            subgraph_building_method='zero_filling')
        sparsity_score = 1 - len(
            graph_node_x.coalition) / graph_node_x.ori_graph.number_of_nodes()
        fidelity_score_list.append(fidelity_score)
        sparsity_score_list.append(sparsity_score)

        # visualization
        if hasattr(dataset, 'supplement'):
            words = dataset.supplement['sentence_tokens'][str(i)]
            plotutils.plot(graph_node_x.ori_graph,
                           graph_node_x.coalition,
                           words=words,
                           figname=os.path.join(save_dir, f"example_{i}.png"))
        else:
            plotutils.plot(graph_node_x.ori_graph,
                           graph_node_x.coalition,
                           x=graph_node_x.data.x,
                           figname=os.path.join(save_dir, f"example_{i}.png"))

    fidelity_scores = torch.tensor(fidelity_score_list)
    sparsity_scores = torch.tensor(sparsity_score_list)
    return fidelity_scores, sparsity_scores
Esempio n. 3
0
def train_GC():
    # attention the multi-task here
    print('start loading data====================')
    dataset = get_dataset(data_args)
    input_dim = dataset.num_node_features
    output_dim = int(dataset.num_classes)
    dataloader = get_dataloader(dataset, data_args, train_args)

    print('start training model==================')
    gnnNets = GnnNets(input_dim, output_dim, model_args)
    gnnNets.to_device()
    criterion = nn.CrossEntropyLoss()
    optimizer = Adam(gnnNets.parameters(),
                     lr=train_args.learning_rate,
                     weight_decay=train_args.weight_decay)

    avg_nodes = 0.0
    avg_edge_index = 0.0
    for i in range(len(dataset)):
        avg_nodes += dataset[i].x.shape[0]
        avg_edge_index += dataset[i].edge_index.shape[1]
    avg_nodes /= len(dataset)
    avg_edge_index /= len(dataset)
    print(
        f"graphs {len(dataset)}, avg_nodes{avg_nodes :.4f}, avg_edge_index_{avg_edge_index/2 :.4f}"
    )

    best_acc = 0.0
    data_size = len(dataset)
    print(f'The total num of dataset is {data_size}')

    # save path for model
    if not os.path.isdir('checkpoint'):
        os.mkdir('checkpoint')
    if not os.path.isdir(os.path.join('checkpoint', data_args.dataset_name)):
        os.mkdir(os.path.join('checkpoint', f"{data_args.dataset_name}"))
    ckpt_dir = f"./checkpoint/{data_args.dataset_name}/"

    early_stop_count = 0
    for epoch in range(train_args.max_epochs):
        acc = []
        loss_list = []
        gnnNets.train()
        for batch in dataloader['train']:
            logits, probs, _ = gnnNets(batch)
            loss = criterion(logits, batch.y)

            # optimization
            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_value_(gnnNets.parameters(),
                                            clip_value=2.0)
            optimizer.step()

            ## record
            _, prediction = torch.max(logits, -1)
            loss_list.append(loss.item())
            acc.append(prediction.eq(batch.y).cpu().numpy())

        # report train msg
        print(f"Train Epoch:{epoch}  |Loss: {np.average(loss_list):.3f} | "
              f"Acc: {np.concatenate(acc, axis=0).mean():.3f}")

        # report eval msg
        eval_state = evaluate_GC(dataloader['eval'], gnnNets, criterion)
        print(
            f"Eval Epoch: {epoch} | Loss: {eval_state['loss']:.3f} | Acc: {eval_state['acc']:.3f}"
        )

        # only save the best model
        is_best = (eval_state['acc'] > best_acc)

        if eval_state['acc'] > best_acc:
            early_stop_count = 0
        else:
            early_stop_count += 1

        if early_stop_count > train_args.early_stopping:
            break

        if is_best:
            best_acc = eval_state['acc']
            early_stop_count = 0
        if is_best or epoch % train_args.save_epoch == 0:
            save_best(ckpt_dir, epoch, gnnNets, model_args.model_name,
                      eval_state['acc'], is_best)

    print(f"The best validation accuracy is {best_acc}.")
    # report test msg
    checkpoint = torch.load(
        os.path.join(ckpt_dir, f'{model_args.model_name}_best.pth'))
    gnnNets.update_state_dict(checkpoint['net'])
    test_state, _, _ = test_GC(dataloader['test'], gnnNets, criterion)
    print(
        f"Test: | Loss: {test_state['loss']:.3f} | Acc: {test_state['acc']:.3f}"
    )