def init_setup():
    data = Dataset(root='/tmp/', name=args.dataset, setting='gcn')

    data.features = normalize_feature(data.features)
    adj, features, labels = data.adj, data.features, data.labels

    StaticGraph.graph = nx.from_scipy_sparse_matrix(adj)
    dict_of_lists = nx.to_dict_of_lists(StaticGraph.graph)

    idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test
    device = torch.device('cuda') if args.ctx == 'gpu' else 'cpu'

    # black box setting
    adj, features, labels = preprocess(adj,
                                       features,
                                       labels,
                                       preprocess_adj=False,
                                       sparse=True,
                                       device=device)
    victim_model = load_victim_model(data,
                                     device=device,
                                     file_path=args.saved_model)
    setattr(victim_model, 'norm_tool',
            GraphNormTool(normalize=True, gm='gcn', device=device))
    output = victim_model.predict(features, adj)
    loss_test = F.nll_loss(output[idx_test], labels[idx_test])
    acc_test = accuracy(output[idx_test], labels[idx_test])
    print("Test set results:", "loss= {:.4f}".format(loss_test.item()),
          "accuracy= {:.4f}".format(acc_test.item()))

    return features, labels, idx_val, idx_test, victim_model, dict_of_lists, adj
Beispiel #2
0
def main():
    df_path = 'reports/eval/distance_range.csv'
    for dataset in args.dataset:
        data = Dataset(root='/tmp/', name=dataset, setting='nettack')
        print(f'Accuracy on the clean graph: {test(data,data.adj)}')
        for ptb_rate in args.ptb_rate:
            perturbations = int(ptb_rate * (data.adj.sum() // 2))
            if args.percentile_step is None:
                model = StructackRangeDistance(
                    distance_percentile_range=[args.frm, args.to])
                model.attack(adj, perturbations)
                modified_adj = model.modified_adj
                # modified_features = model.modified_features
                test(data, modified_adj)
            else:
                for frm in np.arange(0, 1, args.percentile_step):
                    print(f'===={frm}====')
                    to = frm + args.percentile_step
                    model = StructackRangeDistance(
                        distance_percentile_range=[frm, to])
                    accs = []
                    for seed_i in range(10):
                        tick = time.time()
                        model.attack(data.adj, perturbations)
                        elapsed = time.time() - tick

                        modified_adj = model.modified_adj
                        # modified_features = model.modified_features
                        acc = test(data, modified_adj)
                        accs.append(acc)

                        cdf = pd.DataFrame()
                        if os.path.exists(df_path):
                            cdf = pd.read_csv(df_path)
                        row = {
                            'dataset': dataset,
                            'attack': model.__class__.__name__,
                            'seed': seed_i,
                            'acc': acc,
                            'perturbation_rate': ptb_rate,
                            'elapsed': elapsed,
                            'frm': frm,
                            'to': to,
                            'mean_distance': model.mean_distance
                        }
                        print(row)
                        cdf = cdf.append(row, ignore_index=True)
                        cdf.to_csv(df_path, index=False)

                    print(
                        f'percentile [{frm:.2f},{to:.2f}]: {np.mean(accs):.4f} +- {np.std(accs):.2f}'
                    )
Beispiel #3
0
def main(args):
    datasets = args.datasets
    df_path = args.output
    perturbation_rates = args.ptb

    attacks = [
        # [attack_random, 'Random', build_random],
        #         [attack_dice, 'DICE', build_dice],
        #         [attack_mettaack, 'Metattack', build_mettack],
        # [attack_pgd, 'PGD', build_pgd],
        [attack_minmax, 'MinMax', build_minmax],
    ]
    for dataset in datasets:
        for attack, model_name, model_builder in attacks:
            print('attack ' + model_name)
            for split_seed in range(5):
                np.random.seed(split_seed)
                torch.manual_seed(split_seed)
                if cuda:
                    torch.cuda.manual_seed(split_seed)
                data = Dataset(root='/tmp/', name=dataset)
                G_orig = nx.from_scipy_sparse_matrix(data.adj)
                degree_centralities_orig = np.array(
                    list(nx.degree_centrality(G_orig).values()))
                ccoefs_orig = np.array(
                    list(
                        nx.clustering(G_orig, nodes=G_orig.nodes,
                                      weight=None).values()))
                for perturbation_rate in perturbation_rates:
                    for attack_seed in range(1 if model_name == 'DICE' else 5):
                        modified_adj, elapsed = apply_perturbation(
                            model_builder, attack, data, perturbation_rate,
                            cuda and (dataset != 'pubmed'), attack_seed)
                        print(type(modified_adj))
                        row = {
                            'dataset': dataset,
                            'attack': model_name,
                            'perturbation_rate': perturbation_rate,
                            'elapsed': elapsed,
                            'attack_seed': attack_seed,
                            'split_seed': split_seed
                        }
                        row = extend_row_with_noticeability(
                            row, G_orig, degree_centralities_orig, ccoefs_orig,
                            data.adj, modified_adj)
                        print(row)
                        cdf = pd.DataFrame()
                        if os.path.exists(df_path):
                            cdf = pd.read_csv(df_path)
                        cdf = cdf.append(row, ignore_index=True)
                        cdf.to_csv(df_path, index=False)
Beispiel #4
0
def combination(datasets):

    df_path = 'reports/eval/combination-memory.csv'

    selection_options = [
        [ns.get_nodes_with_lowest_degree, 'degree'],
        [ns.get_nodes_with_lowest_pagerank, 'pagerank'],
        [ns.get_nodes_with_lowest_eigenvector_centrality, 'eigenvector'],
        [ns.get_nodes_with_lowest_betweenness_centrality, 'betweenness'],
        [ns.get_nodes_with_lowest_closeness_centrality, 'closeness'],
        [ns.get_random_nodes, 'random'],
    ]

    connection_options = [
        [nc.community_hungarian_connection, 'community'],
        [nc.distance_hungarian_connection, 'distance'],
        [nc.katz_hungarian_connection, 'katz'],
        [nc.random_connection, 'random'],
    ]

    for selection, selection_name in selection_options:
        for connection, connection_name in connection_options:
            if selection_name == 'random' or connection_name == 'random':
                continue
            for dataset in datasets:

                data = Dataset(root='/tmp/', name=dataset)
                print(f'attack [{selection_name}]*[{connection_name}]')
                for perturbation_rate in [0.05]:  #,0.10,0.15,0.20]:
                    mem = apply_structack(build_custom(selection,
                                                       connection,
                                                       dataset_name=None),
                                          attack_structack,
                                          data,
                                          perturbation_rate,
                                          cuda and (dataset != 'pubmed'),
                                          seed=0)
                    row = {
                        'dataset': dataset,
                        'selection': selection_name,
                        'connection': connection_name,
                        'memory': mem
                    }
                    print(row)
                    cdf = pd.DataFrame()
                    if os.path.exists(df_path):
                        cdf = pd.read_csv(df_path)
                    cdf = cdf.append(row, ignore_index=True)
                    cdf.to_csv(df_path, index=False)
Beispiel #5
0
def main(datasets):
    df_path = 'reports/eval/baseline_eval.csv'
    attacks = [
        # [attack_random, 'Random', build_random],
        [attack_dice, 'DICE', build_dice],
        [attack_mettaack, 'Metattack', build_mettack],
        [attack_pgd, 'PGD', build_pgd],
        [attack_minmax, 'MinMax', build_minmax],
    ]
    for dataset in datasets:
        for attack, model_name, model_builder in attacks:
            print('attack ' + model_name)
            for split_seed in range(5):
                np.random.seed(split_seed)
                torch.manual_seed(split_seed)
                if cuda:
                    torch.cuda.manual_seed(split_seed)
                data = Dataset(root='/tmp/', name=dataset)
                for perturbation_rate in [0.05]:  #,0.10,0.15,0.20]:
                    for attack_seed in range(1 if model_name == 'DICE' else 5):
                        modified_adj, elapsed = apply_perturbation(
                            model_builder, attack, data, perturbation_rate,
                            cuda, attack_seed)
                        for gcn_seed in range(5):

                            np.random.seed(gcn_seed)
                            torch.manual_seed(gcn_seed)
                            if cuda:
                                torch.cuda.manual_seed(gcn_seed)
                            acc = test_gcn(modified_adj, data, cuda,
                                           pre_test_data)
                            row = {
                                'dataset': dataset,
                                'attack': model_name,
                                'gcn_seed': gcn_seed,
                                'acc': acc,
                                'perturbation_rate': perturbation_rate,
                                'elapsed': elapsed,
                                'attack_seed': attack_seed,
                                'split_seed': split_seed
                            }
                            print(row)
                            cdf = pd.DataFrame()
                            if os.path.exists(df_path):
                                cdf = pd.read_csv(df_path)
                            cdf = cdf.append(row, ignore_index=True)
                            cdf.to_csv(df_path, index=False)
Beispiel #6
0
def clean(datasets):
    df_path = 'reports/eval/clean.csv'
    split_seeds = 5
    gcn_seeds = 5

    for dataset in datasets:
        ''' Clean graph evaluation '''
        for split_seed in range(split_seeds):
            np.random.seed(split_seed)
            torch.manual_seed(split_seed)
            if cuda:
                torch.cuda.manual_seed(split_seed)
            # reload the dataset with a different split (WARNING: this doesn't work for attack methods which depend on the split)
            data = Dataset(root='/tmp/', name=dataset)
            for seed in range(gcn_seeds):

                np.random.seed(seed)
                torch.manual_seed(seed)
                if cuda:
                    torch.cuda.manual_seed(seed)
                acc = test_gcn(
                    postprocess_adj(data.adj).to(
                        torch.device("cuda" if cuda else "cpu")), data, cuda,
                    pre_test_data)
                row = {
                    'dataset': dataset,
                    'selection': 'clean',
                    'connection': 'clean',
                    'gcn_seed': seed,
                    'acc': acc,
                    'perturbation_rate': 0,
                    'elapsed': 0,
                    'split_seed': split_seed
                }
                print(row)
                cdf = pd.DataFrame()
                if os.path.exists(df_path):
                    cdf = pd.read_csv(df_path)
                cdf = cdf.append(row, ignore_index=True)
                cdf.to_csv(df_path, index=False)
def init_setup():
    data = Dataset(root='/tmp/', name=args.dataset, setting='nettack')
    injecting_nodes(data)

    adj, features, labels = data.adj, data.features, data.labels

    StaticGraph.graph = nx.from_scipy_sparse_matrix(adj)
    dict_of_lists = nx.to_dict_of_lists(StaticGraph.graph)

    idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test
    device = torch.device('cuda') if args.ctx == 'gpu' else 'cpu'

    # gray box setting
    adj, features, labels = preprocess(adj,
                                       features,
                                       labels,
                                       preprocess_adj=False,
                                       sparse=True,
                                       device=device)
    # Setup victim model
    victim_model = GCN(nfeat=features.shape[1],
                       nclass=labels.max().item() + 1,
                       nhid=16,
                       dropout=0.5,
                       weight_decay=5e-4,
                       device=device)

    victim_model = victim_model.to(device)
    victim_model.fit(features, adj, labels, idx_train, idx_val)
    setattr(victim_model, 'norm_tool',
            GraphNormTool(normalize=True, gm='gcn', device=device))

    output = victim_model.predict(features, adj)
    loss_test = F.nll_loss(output[idx_test], labels[idx_test])
    acc_test = accuracy(output[idx_test], labels[idx_test])
    print("Test set results:", "loss= {:.4f}".format(loss_test.item()),
          "accuracy= {:.4f}".format(acc_test.item()))

    return features, labels, idx_train, idx_val, idx_test, victim_model, dict_of_lists, adj
Beispiel #8
0
def baseline(datasets):

    df_path = 'reports/eval/baseline-memory.csv'
    attacks = [
        # [attack_dice, 'DICE', build_dice],
        [attack_mettaack, 'Metattack', build_mettack],
        # [attack_pgd, 'PGD', build_pgd],
        # [attack_minmax, 'MinMax', build_minmax],
    ]
    for dataset in datasets:
        for attack, model_name, model_builder in attacks:
            print('attack ' + model_name)
            for split_seed in range(1):
                np.random.seed(split_seed)
                torch.manual_seed(split_seed)
                if cuda:
                    torch.cuda.manual_seed(split_seed)
                data = Dataset(root='/tmp/', name=dataset)
                for perturbation_rate in [0.05]:  #,0.10,0.15,0.20]:
                    for attack_seed in range(1):
                        mem = apply_perturbation(model_builder, attack, data,
                                                 perturbation_rate, cuda,
                                                 attack_seed)

                        row = {
                            'dataset': dataset,
                            'attack': model_name,
                            'attack_seed': attack_seed,
                            'memory': mem
                        }
                        print(row)
                        cdf = pd.DataFrame()
                        if os.path.exists(df_path):
                            cdf = pd.read_csv(df_path)
                        cdf = cdf.append(row, ignore_index=True)
                        cdf.to_csv(df_path, index=False)
                del data
                gc.collect()
        def get_y(idx):
            mx = np.zeros(labels.shape)
            mx[idx] = labels[idx]
            return mx

        self.train_mask = get_mask(self.idx_train)
        self.val_mask = get_mask(self.idx_val)
        self.test_mask = get_mask(self.idx_test)
        self.y_train, self.y_val, self.y_test = get_y(idx_train), get_y(
            idx_val), get_y(idx_test)

    def onehot(self, labels):
        eye = np.identity(labels.max() + 1)
        onehot_mx = eye[labels]
        return onehot_mx


def parse_index_file(filename):
    index = []
    for line in open(filename):
        index.append(int(line.strip()))
    return index


if __name__ == '__main__':
    from deeprobust.graph.data import Dataset
    data = Dataset(root='./tmp/', name='flickr')
    adj, features, labels = data.adj, data.features, data.labels
    idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test
Beispiel #10
0
    type=str,
    default='cora',
    choices=['cora', 'cora_ml', 'citeseer', 'polblogs', 'pubmed'],
    help='dataset')
parser.add_argument('--ptb_rate',
                    type=float,
                    default=0.05,
                    help='perturbation rate')

args = parser.parse_args()
args.cuda = torch.cuda.is_available()
print('cuda: %s' % args.cuda)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# use data splist provided by prognn
data = Dataset(root='/tmp/', name=args.dataset, setting='prognn')
adj, features, labels = data.adj, data.features, data.labels
idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test

gat = GAT(nfeat=features.shape[1],
          nhid=8,
          heads=8,
          nclass=labels.max().item() + 1,
          dropout=0.5,
          device=device)
gat = gat.to(device)

# test on clean graph
print('==================')
print('=== train on clean graph ===')
Beispiel #11
0
parser.add_argument('--ptb_rate', type=float, default=0.0,  help='pertubation rate')
parser.add_argument('--k', type=int, default=100, help='Truncated Components.')
parser.add_argument('--ptb_type', type=str, default='add', choices=['add', 'remove', 'meta'])
parser.add_argument('--cuda_num', type=int, default=0)
parser.add_argument('--model_type', type=str, default='GCN_SVD', choices=['GCN_SVD', 'RGCN', 'GCN'])

args = parser.parse_args()
device = torch.device(f"cuda:{args.cuda_num}" if args.cuda_num else "cpu")

# make sure you use the same data splits as you generated attacks

for seed in [5, 15, 20, 25, 35]:
    np.random.seed(seed)
    torch.cuda.manual_seed(seed)
    # load original dataset (to get clean features and labels)
    data = Dataset(root='/tmp/', name=args.dataset, setting='nettack', seed=15, require_mask=True)
    adj, features, labels = data.adj, data.features, data.labels
    idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test

    # num_edge = adj.sum(axis=None)/2
    # attacker = Random()
    # attacker.attack(adj, n_perturbations=int(args.ptb_rate*num_edge), type=args.ptb_type)
    # perturbed_adj = attacker.modified_adj
    if args.ptb_rate > 0:
        perturbed_data = PrePtbDataset(root='/tmp/', name=args.dataset, attack_method='meta', ptb_rate=args.ptb_rate)
        perturbed_adj = perturbed_data.adj
    else: perturbed_adj = adj
    # Setup Defense Model
    if args.model_type == 'GCN_SVD':
        model = GCNSVD(nfeat=features.shape[1], nclass=labels.max()+1,
                    nhid=16, device=device)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--cuda', type=int, default=-1, help='cuda')
    parser.add_argument('--seed',
                        type=int,
                        default=123,
                        help='Random seed for model')
    parser.add_argument('--data_seed',
                        type=int,
                        default=123,
                        help='Random seed for data split')
    parser.add_argument('--dataset', type=str, default='cora', help='Dataset')
    parser.add_argument('--gnn_base',
                        type=str,
                        default='gcn',
                        help='base gnn models')
    parser.add_argument('--gnn_epochs',
                        type=int,
                        default=500,
                        help='Number of epochs to train the gnn')
    parser.add_argument('--lr',
                        type=float,
                        default=0.01,
                        help='Initial learning rate')
    parser.add_argument('--weight_decay',
                        type=float,
                        default=5e-4,
                        help='Weight decay (L2 loss on parameters)')
    parser.add_argument('--hidden',
                        type=int,
                        default=32,
                        help='Number of hidden units')
    parser.add_argument('--dropout',
                        type=float,
                        default=0.0,
                        help='Dropout rate (1 - keep probability)')
    parser.add_argument('--patience',
                        type=int,
                        default=200,
                        help='patience for early stopping')
    parser.add_argument('--model_dir',
                        type=str,
                        default='./nat_model_saved/',
                        help='Directory to save the trained model.')
    parser.add_argument('--data_dir',
                        type=str,
                        default='./tmp/',
                        help='Directory to download dataset.')

    args = parser.parse_args()
    args.device = torch.device(
        f'cuda:{args.cuda}' if torch.cuda.is_available() else 'cpu')
    torch.set_num_threads(1)  # limit cpu use

    set_random_seed(args.seed, args.device)

    if not os.path.exists(args.model_dir):
        os.mkdir(args.model_dir)
    if not os.path.exists(args.data_dir):
        os.mkdir(args.data_dir)

    print('==== Environment ====')
    print(f'torch version: {torch.__version__}')
    print(f'device: {args.device}')
    print(f'torch seed: {args.seed}')

    #########################################################
    # Load data for node classification task
    data = Dataset(root=args.data_dir,
                   name=args.dataset,
                   setting='gcn',
                   seed=args.data_seed)
    adj, features, labels = data.process(process_adj=False,
                                         process_feature=False,
                                         device=args.device)
    idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test
    idx_unlabeled = np.union1d(idx_val, idx_test)

    print('==== Dataset ====')
    print(f'density: {nx.density(nx.from_numpy_array(adj.cpu().numpy()))}')
    print(f'adj shape: {adj.shape}')
    print(f'feature shape: {features.shape}')
    print(f'label number: {labels.max().item()+1}')
    print(f'split seed: {args.data_seed}')
    print(
        f'train|valid|test set: {idx_train.shape}|{idx_val.shape}|{idx_test.shape}'
    )

    #########################################################
    # Setup gnn model and fit it on clean graph
    if args.gnn_base == 'gcn':
        nat_model = GCN(nfeat=features.shape[1],
                        nclass=labels.max().item() + 1,
                        nhid=args.hidden,
                        dropout=args.dropout,
                        weight_decay=args.weight_decay,
                        lr=args.lr,
                        device=args.device)
    elif args.gnn_base == 'gat':
        nat_model = GAT(nfeat=features.shape[1],
                        nclass=labels.max().item() + 1,
                        nhid=args.hidden,
                        heads=8,
                        dropout=args.dropout,
                        weight_decay=args.weight_decay,
                        lr=args.lr,
                        device=args.device)
    elif args.gnn_base == 'sgc':
        nat_model = SGC(nfeat=features.shape[1],
                        nclass=labels.max().item() + 1,
                        lr=args.lr,
                        device=args.device)
    else:
        assert AssertionError("GNN model {} not found!".format(args.gnn_base))

    nat_model = nat_model.to(args.device)

    if args.gnn_base == 'gcn':
        nat_model.fit(features,
                      adj,
                      labels,
                      idx_train,
                      idx_val=None,
                      train_iters=args.gnn_epochs,
                      patience=args.patience,
                      verbose=True)
    else:
        raise AssertionError('Model Not ready')

    print('==== {} performance ===='.format(args.gnn_base))
    check_victim_model_performance(nat_model, features, adj, labels, idx_test,
                                   idx_train)

    #########################################################
    # Save the trained model
    path = args.model_dir + '{}_{}.pt'.format(args.dataset, args.gnn_base)
    torch.save(nat_model.state_dict(), path)
Beispiel #13
0
                    type=float,
                    default=0.05,
                    help='pertubation rate')

args = parser.parse_args()
args.cuda = torch.cuda.is_available()
print('cuda: %s' % args.cuda)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

np.random.seed(args.seed)
torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)

data = Dataset(
    root=r'D:\Python Project\defense\Low_pass_defense\fold_defense\tmp\\',
    name=args.dataset)
adj, features, labels = data.adj, data.features, data.labels

idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test

idx_unlabeled = np.union1d(idx_val, idx_test)

# Setup Surrogate model
surrogate = GCN(nfeat=features.shape[1],
                nclass=labels.max().item() + 1,
                nhid=16,
                dropout=0,
                with_relu=False,
                with_bias=False,
                device=device)
Beispiel #14
0
        torch.FloatTensor
            output (log probabilities) of SGC
        """

        self.eval()
        self.dropout = dropout

        if pyg_data == None:
            data = self.data
        else:
            data = pyg_data[0].to(self.device)

        self.data = data
        return self.forward(data)


if __name__ == "__main__":
    from deeprobust.graph.data import Dataset, Dpr2Pyg
    # from deeprobust.graph.defense import SGC
    data = Dataset(root='/tmp/', name='cora')
    adj, features, labels = data.adj, data.features, data.labels
    idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test
    sgc = SGC(nfeat=features.shape[1],
              nclass=labels.max().item() + 1,
              device='cpu')
    sgc = sgc.to('cpu')
    pyg_data = Dpr2Pyg(data)
    sgc.fit(pyg_data, verbose=True)  # train with earlystopping
    sgc.test()
    print(sgc.predict())
Beispiel #15
0
from deeprobust.graph.data import Dataset
from deeprobust.graph.defense import DeepWalk
from deeprobust.graph.global_attack import NodeEmbeddingAttack
from deeprobust.graph.global_attack import OtherNodeEmbeddingAttack
import itertools

dataset_str = 'cora_ml'
data = Dataset(root='/tmp/', name=dataset_str, seed=15)
adj, features, labels = data.adj, data.features, data.labels
idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test

comb = itertools.product(["random", "degree", "eigencentrality"], ["remove", "add"])
for type, attack_type in comb:
    model = OtherNodeEmbeddingAttack(type=type)
    print(model.type, attack_type)
    try:
        model.attack(adj, attack_type=attack_type, n_candidates=10000)
        defender = DeepWalk()
        defender.fit(adj)
        defender.evaluate_node_classification(labels, idx_train, idx_test)
    except KeyError:
        print('eigencentrality only supports removing edges')

model = NodeEmbeddingAttack()
model.attack(adj, attack_type="remove")
model.attack(adj, attack_type="remove", min_span_tree=True)
modified_adj = model.modified_adj
model.attack(adj, attack_type="add", n_candidates=10000)
model.attack(adj, attack_type="add_by_remove", n_candidates=10000)
Beispiel #16
0
args = parser.parse_args()
args.cuda = not args.no_cuda and torch.cuda.is_available()
device = torch.device("cuda" if args.cuda else "cpu")

if args.cuda:
    torch.cuda.manual_seed(args.seed)
if args.ptb_rate == 0:
    args.attack = "no"

print(args)

np.random.seed(
    15
)  # Here the random seed is to split the train/val/test data, we need to set the random seed to be the same as that when you generate the perturbed graph

data = Dataset(root='/tmp/', name=args.dataset, setting='nettack')
adj, features, labels = data.adj, data.features, data.labels
idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test

if args.attack == 'no':
    perturbed_adj = adj

if args.attack == 'random':
    from deeprobust.graph.global_attack import Random
    attacker = Random()
    n_perturbations = int(args.ptb_rate * (adj.sum() // 2))
    perturbed_adj = attacker.attack(adj, n_perturbations, type='add')

if args.attack == 'meta' or args.attack == 'nettack':
    perturbed_data = PrePtbDataset(root='/tmp/',
                                   name=args.dataset,
Beispiel #17
0
torch.manual_seed(args.seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed(args.seed)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

###########################
# global settings
# 'cora' or 'citeseer' or 'polblogs'
dataname = args.dataset
gpu_id = '0'

if dataname in ['cora', 'citeseer', 'pubmed']:
    adj, features, idx_train, idx_val, idx_test, labels = load_data(dataname,
                                                                    root='..')
if dataname == "polblogs":
    data = Dataset(root='../data/', name=dataname)
    adj, features, labels = data.adj, data.features, data.labels
    idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test
num_class = labels.max() + 1
adj_norm = utils.preprocess_graph(adj)
labels_onehot = np.eye(num_class)[labels]
preds = {}

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

################################## attacking ##################################
degrees = adj.sum(0).A1
hidden_dim = 16
direct_attack = True
n_influencers = 1 if direct_attack else 5
Beispiel #18
0
# Here the random seed is to split the train/val/test data,
# we need to set the random seed to be the same as that when you generate the perturbed graph
# data = Dataset(root='/tmp/', name=args.dataset, setting='nettack', seed=15)
# Or we can just use setting='prognn' to get the splits
# data = Dataset(root='./tmp/', name=args.dataset, setting='prognn')
# adj, features, labels_1 = data.adj, data.features, data.labels
# idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test

# load pre-attacked graph
# perturbed_data = PrePtbDataset(root='./tmp/',
#         name=args.dataset,
#         attack_method='meta',
#         ptb_rate=args.ptb_rate)

# use data splist provided by prognn
data = Dataset(root='./tmp/', name=args.dataset, setting='prognn')
data.adj = sp.csr_matrix(adj_per)
data.features = sp.csr_matrix(base_feat)
data.labels = labels
data.idx_train = idx_train
data.idx_val = idx_val
data.idx_test = idx_test

perturbed_adj = sp.csr_matrix(adj_per, dtype=float)
features = sp.csr_matrix(base_feat, dtype=float)

# Setup Defense Model
gat = GAT(nfeat=features.shape[1],
          nhid=8,
          heads=8,
          nclass=labels.max().item() + 1,
Beispiel #19
0
parser.add_argument('--attack_rate', type=float, default=0.2)
parser.add_argument('--denoise_rate', type=float, default=0.01)
parser.add_argument('--lmda', type=float, default=0.1)

args = parser.parse_args()

args.device = device = torch.device(
    f'cuda:{args.cuda_id:d}' if torch.cuda.is_available() else 'cpu')

np.random.seed(args.seed)
torch.manual_seed(args.seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed(args.seed)
    torch.backends.cudnn.deterministic = True

data = Dataset(root='./datasets/',
               name=args.dataset, seed=15, setting='nettack')
adj, features, labels = data.adj, data.features, data.labels
idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test

if args.ptb_rate != 0:
    adj = ssp.load_npz(
        f'./datasets/{args.dataset}_meta_adj_{args.ptb_rate:g}.npz')

features = torch.Tensor(features.todense()).to(args.device)
adj = adj.tocoo()
edge_index = torch.LongTensor([adj.row, adj.col]).to(args.device)
edge_values = torch.Tensor(adj.data).to(args.device)
labels = torch.LongTensor(labels).to(args.device)
idx_train = torch.LongTensor(idx_train).to(args.device)
idx_val = torch.LongTensor(idx_val).to(args.device)
idx_test = torch.LongTensor(idx_test).to(args.device)
Beispiel #20
0
def load_perterbued_data(dataset, ptb_rate, ptb_type="meta"):
    if ptb_type == 'meta':
        data = Dataset(root='/tmp/',
                       name=dataset.lower(),
                       setting='nettack',
                       seed=15,
                       require_mask=True)
        data.x, data.y = data.features, data.labels
        if ptb_rate > 0:
            perturbed_data = PrePtbDataset(root='/tmp/',
                                           name=dataset.lower(),
                                           attack_method='meta',
                                           ptb_rate=ptb_rate)
            data.edge_index = perturbed_data.adj
        else:
            data.edge_index = data.adj
        return data

    elif ptb_type == 'random_add':
        data = Dataset(root='/tmp/',
                       name=dataset.lower(),
                       setting='nettack',
                       seed=15,
                       require_mask=True)
        data.x, data.y = data.features, data.labels
        num_edge = data.adj.sum(axis=None) / 2
        attacker = Random()
        attacker.attack(data.adj,
                        n_perturbations=int(ptb_rate * num_edge),
                        type='add')
        data.edge_index = attacker.modified_adj
        return data

    elif ptb_type == 'random_remove':
        data = Dataset(root='/tmp/',
                       name=dataset.lower(),
                       setting='nettack',
                       seed=15,
                       require_mask=True)
        data.x, data.y = data.features, data.labels
        num_edge = data.adj.sum(axis=None) / 2
        attacker = Random()
        attacker.attack(data.adj,
                        n_perturbations=int(ptb_rate * num_edge),
                        type='remove')
        data.edge_index = attacker.modified_adj
        return data

    raise Exception(f"the ptb_type of {ptb_type} has not been implemented")
Beispiel #21
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--cuda', type=int, default=0, help='cuda')
    parser.add_argument('--seed', type=int, default=123, help='Random seed for model')
    parser.add_argument('--data_seed', type=int, default=123,help='Random seed for data split')
    parser.add_argument('--dataset', type=str, default='cora', help='dataset')
    parser.add_argument('--gnn_path', type=str, required=True, help='Path of saved model')
    parser.add_argument('--model', type=str, default='PGD', help='model variant')  # PGD, random
    parser.add_argument('--loss_type', type=str, default='CE', help='loss type')
    parser.add_argument('--att_lr', type=float, default=200, help='Initial learning rate')
    parser.add_argument('--perturb_epochs', type=int, default=100, help='Number of epochs to poisoning loop')
    parser.add_argument('--ptb_rate', type=float, default=0.05, help='pertubation rate')
    parser.add_argument('--reg_weight', type=float, default=0.0, help='regularization weight')
    parser.add_argument('--loss_weight', type=float, default=1.0, help='loss weight')
    parser.add_argument('--weight_decay', type=float, default=5e-4, help='Weight decay (L2 loss on parameters)')
    parser.add_argument('--hidden', type=int, default=32, help='Number of hidden units')
    parser.add_argument('--dropout', type=float, default=0.0, help='Dropout rate (1 - keep probability)')
    parser.add_argument('--data_dir', type=str, default='./tmp/', help='Directory to download dataset')
    parser.add_argument('--sanitycheck', type=str, default='no', help='whether store the intermediate results')
    parser.add_argument('--sanity_dir', type=str, default='./sanitycheck_evasion/', help='Directory to store the intermediate results')
    
    parser.add_argument('--distance_type', type=str, default='l2', help='distance type')
    parser.add_argument('--sample_type', type=str, default='sample', help='sample type')
    

    args = parser.parse_args()
    args.device = torch.device(f'cuda:{args.cuda}' if torch.cuda.is_available() else 'cpu')
    torch.set_num_threads(1) # limit cpu use
    
    set_random_seed(args.seed, args.device)

    if not os.path.exists(args.data_dir):
        os.mkdir(args.data_dir)
    if not os.path.exists(args.sanity_dir):
        os.mkdir(args.sanity_dir)
    if not os.path.exists(args.gnn_path):
        raise AssertionError (f'No trained model found under {args.gnn_path}!')

    print('==== Environment ====')
    print(f'torch version: {torch.__version__}')
    print(f'device: {args.device}')
    print(f'torch seed: {args.seed}')

    #########################################################
    # Load data for node classification task
    data = Dataset(root=args.data_dir, name=args.dataset, setting='gcn', seed=args.data_seed)
    adj, features, labels = data.process(process_adj=False, process_feature=False, device=args.device)
    idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test
    idx_unlabeled = np.union1d(idx_val, idx_test)

    print('==== Dataset ====')
    print(f'density: {nx.density(nx.from_numpy_array(adj.cpu().numpy()))}')
    print(f'adj shape: {adj.shape}')
    print(f'feature shape: {features.shape}')
    print(f'label number: {labels.max().item()+1}')
    print(f'split seed: {args.data_seed}')
    print(f'train|valid|test set: {idx_train.shape}|{idx_val.shape}|{idx_test.shape}')

    #########################################################
    # Load victim model and test it on clean training nodes
    victim_model = GCN(
        nfeat=features.shape[1], 
        nclass=labels.max().item()+1, 
        nhid=args.hidden,
        dropout=args.dropout, 
        weight_decay=args.weight_decay,
        device=args.device)
    victim_model = victim_model.to(args.device)
    
    victim_model.load_state_dict(torch.load(args.gnn_path))
    victim_model.eval()

    print('==== Victim Model on Clean Graph ====')
    check_victim_model_performance(victim_model, features, adj, labels, idx_test, idx_train)

    #########################################################
    # Setup attack model
    if args.model == 'PGD':
        model = PGDAttack(
            model=victim_model, 
            nnodes=adj.shape[0], 
            loss_type=args.loss_type, 
            loss_weight=args.loss_weight,
            regularization_weight=args.reg_weight,
            device=args.device)
        model = model.to(args.device)
    elif args.model == 'random':
        model = Random()
    else:
        raise AssertionError (f'Attack {args.model} not found!')
        
    #########################################################
    # Attack and evaluate
    print('***************** seed {} *****************'.format(args.seed))
    print('==== Attacking ====')

    perturbations = int(args.ptb_rate * (adj.sum()/2))
    nat_adj = copy.deepcopy(adj)

    # global attack on whole testing nodes
    idx_target = idx_test
    
    # utility = model.calc_utility(
    #     features, 
    #     nat_adj, 
    #     labels, 
    #     idx_target)
    # os.makedirs(f'./{args.dataset}', exist_ok=True)
    # save_utility(f'./{args.dataset}/utility.bin', utility)
    # exit('done')
    
    # gradients = model.attack(
    #         features, 
    #         nat_adj, 
    #         labels, 
    #         idx_target,
    #         perturbations, 
    #         att_lr=args.att_lr, 
    #         epochs=args.perturb_epochs,
    #         distance_type=args.distance_type,
    #         sample_type=args.sample_type)
    # os.makedirs(f'./{args.dataset}', exist_ok=True)
    # save_utility(f'./{args.dataset}/grad_{args.reg_weight}.bin', gradients)
    # exit('done')
    
    if args.model == 'random':
        model.attack(nat_adj, perturbations, 'flip')
    else:
        model.attack(
            features, 
            nat_adj, 
            labels, 
            idx_target,
            perturbations, 
            att_lr=args.att_lr, 
            epochs=args.perturb_epochs,
            distance_type=args.distance_type,
            sample_type=args.sample_type)

    modified_adj = model.modified_adj
    
    # evaluation
    victim_model.load_state_dict(torch.load(args.gnn_path)) # reset to clean model
    victim_model.eval()
    
    print('==== Victim Model on Perturbed Graph ====')
    check_victim_model_performance(victim_model, features, modified_adj, labels, idx_test, idx_train)

    print("==== Parameter ====")
    print(f'Data seed: {args.data_seed}')
    print(f'Dataset: {args.dataset}')
    print(f'Loss type: {args.loss_type}')
    print(f'Perturbation Rate: {args.ptb_rate}')
    print(f'Reg weight: {args.reg_weight}')
    print(f'Attack: {args.model}')
    print(f'Attack seed: {args.seed}')

    # if you want to save the modified adj/features, uncomment the code below
    if args.sanitycheck == 'yes':
        root = args.sanity_dir + '{}_{}_{}_{}_{}_{}lr_{}epoch_{}rate_{}reg1_{}reg2_{}seed'
        root = root.format(args.dataset, args.distance_type, args.sample_type, args.model, 
                           args.loss_type, args.att_lr, args.perturb_epochs, args.ptb_rate, args.loss_weight, args.reg_weight, args.seed)
        save_all(root, model)
torch.set_num_threads(1)  # limit cpu use
print('  pytorch version: ', torch.__version__)
print('  device: ', device)

np.random.seed(args.seed)
torch.manual_seed(args.seed)
if device != 'cpu':
    torch.cuda.manual_seed(args.seed)

#########################################################
# Load data for node classification task
print('==== Dataset ====')
if not osp.exists(args.data_dir):
    os.makedirs(args.data_dir)
data = Dataset(root=args.data_dir,
               name=args.dataset,
               setting='gcn',
               seed=args.data_seed)
adj, features, labels = data.adj, data.features, data.labels
idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test

adj, features, labels = preprocess(adj, features, labels, preprocess_adj=False)

print('  adj shape: ', adj.shape)
print('  feature shape: ', features.shape)
print('  label number: ', labels.max().item() + 1)
print('  train|valid|test set: {}|{}|{}'.format(idx_train.shape, idx_val.shape,
                                                idx_test.shape))

#########################################################
# Set victim model for adversarial training
adv_train_model = GCN(nfeat=features.shape[1],
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--cuda', type=int, default=0, help='cuda')
    parser.add_argument('--seed',
                        type=int,
                        default=123,
                        help='Random seed for model')
    parser.add_argument('--data_seed',
                        type=int,
                        default=123,
                        help='Random seed for data split')
    parser.add_argument('--dataset', type=str, default='cora', help='dataset')
    parser.add_argument('--gnn_path',
                        type=str,
                        required=True,
                        help='Path of saved model')
    parser.add_argument(
        '--model', type=str, default='minmax', help='model variant'
    )  # ['minmax', 'Meta-Self', 'A-Meta-Self', 'Meta-Train', 'A-Meta-Train', 'random']
    parser.add_argument('--loss_type',
                        type=str,
                        default='CE',
                        help='loss type')
    parser.add_argument('--att_lr',
                        type=float,
                        default=200,
                        help='Initial learning rate')
    parser.add_argument('--perturb_epochs',
                        type=int,
                        default=200,
                        help='Number of epochs to poisoning loop')
    parser.add_argument('--ptb_rate',
                        type=float,
                        default=0.05,
                        help='pertubation rate')
    parser.add_argument('--loss_weight',
                        type=float,
                        default=1.0,
                        help='loss weight')
    parser.add_argument('--reg_weight',
                        type=float,
                        default=0.0,
                        help='regularization weight')
    parser.add_argument('--weight_decay',
                        type=float,
                        default=5e-4,
                        help='Weight decay (L2 loss on parameters)')
    parser.add_argument('--hidden',
                        type=int,
                        default=32,
                        help='Number of hidden units')
    parser.add_argument('--dropout',
                        type=float,
                        default=0.0,
                        help='Dropout rate (1 - keep probability)')
    parser.add_argument('--data_dir',
                        type=str,
                        default='./tmp/',
                        help='Directory to download dataset')
    parser.add_argument('--target_node',
                        type=str,
                        default='train',
                        help='target node set')
    parser.add_argument('--sanitycheck',
                        type=str,
                        default='no',
                        help='whether store the intermediate results')

    parser.add_argument('--distance_type',
                        type=str,
                        default='l2',
                        help='distance type')
    parser.add_argument('--opt_type',
                        type=str,
                        default='max',
                        help='optimization type')
    parser.add_argument('--sample_type',
                        type=str,
                        default='sample',
                        help='sample type')

    args = parser.parse_args()
    args.device = torch.device(
        f'cuda:{args.cuda}' if torch.cuda.is_available() else 'cpu')
    torch.set_num_threads(1)  # limit cpu use

    set_random_seed(args.seed, args.device)

    if not os.path.exists(args.data_dir):
        os.mkdir(args.data_dir)
    if not os.path.exists(args.gnn_path):
        raise AssertionError(f'No trained model found under {args.gnn_path}!')

    print('==== Environment ====')
    print(f'torch version: {torch.__version__}')
    print(f'device: {args.device}')
    print(f'torch seed: {args.seed}')

    #########################################################
    # Load data for node classification task
    data = Dataset(root=args.data_dir,
                   name=args.dataset,
                   setting='gcn',
                   seed=args.data_seed)
    adj, features, labels = data.process(process_adj=False,
                                         process_feature=False,
                                         device=args.device)
    idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test
    idx_unlabeled = np.union1d(idx_val, idx_test)

    print('==== Dataset ====')
    print(f'density: {nx.density(nx.from_numpy_array(adj.cpu().numpy()))}')
    print(f'adj shape: {adj.shape}')
    print(f'feature shape: {features.shape}')
    print(f'label number: {labels.max().item()+1}')
    print(f'split seed: {args.data_seed}')
    print(
        f'train|valid|test set: {idx_train.shape}|{idx_val.shape}|{idx_test.shape}'
    )

    #########################################################
    # Load victim model and test it on clean training nodes
    weight_decay = 0 if args.dataset == 'polblogs' else args.weight_decay
    victim_model = GCN(nfeat=features.shape[1],
                       nclass=labels.max().item() + 1,
                       nhid=args.hidden,
                       dropout=args.dropout,
                       weight_decay=weight_decay,
                       device=args.device)
    victim_model = victim_model.to(args.device)
    victim_model.load_state_dict(torch.load(args.gnn_path))

    surrogate_model = GCN(nfeat=features.shape[1],
                          nclass=labels.max().item() + 1,
                          nhid=args.hidden,
                          dropout=args.dropout,
                          weight_decay=weight_decay,
                          device=args.device)
    surrogate_model = surrogate_model.to(args.device)
    surrogate_model.load_state_dict(torch.load(args.gnn_path))

    print('==== Initial Surrogate Model on Clean Graph ====')
    surrogate_model.eval()
    check_victim_model_performance(surrogate_model, features, adj, labels,
                                   idx_test, idx_train)

    #########################################################
    # Setup attack model
    if args.model == 'minmax':
        model = MinMax(model=surrogate_model,
                       nnodes=adj.shape[0],
                       loss_type=args.loss_type,
                       loss_weight=args.loss_weight,
                       regularization_weight=args.reg_weight,
                       device=args.device)
        model = model.to(args.device)
    elif 'Meta' in args.model:  # 'Meta-Self', 'A-Meta-Self', 'Meta-Train', 'A-Meta-Train'
        if 'Self' in args.model:
            lambda_ = 0
        if 'Train' in args.model:
            lambda_ = 1
        if 'Both' in args.model:
            lambda_ = 0.5

        if 'A' in args.model:
            model = MetaApprox(model=surrogate_model,
                               nnodes=adj.shape[0],
                               attack_structure=True,
                               attack_features=False,
                               regularization_weight=args.reg_weight,
                               device=args.device,
                               lambda_=lambda_)
        else:
            model = Metattack(model=surrogate_model,
                              nnodes=adj.shape[0],
                              attack_structure=True,
                              attack_features=False,
                              regularization_weight=args.reg_weight,
                              device=args.device,
                              lambda_=lambda_)
        model = model.to(args.device)
    elif args.model == 'random':
        model = Random()
    else:
        raise AssertionError(f'Attack {args.model} not found!')

    #########################################################
    # Attack and evaluate
    print('***************** seed {} *****************'.format(args.seed))
    print('==== Attacking ====')

    perturbations = int(args.ptb_rate * (adj.sum() / 2))
    nat_adj = copy.deepcopy(adj)

    # switch target node set for minmax attack
    if args.target_node == 'test':
        idx_target = idx_test
    elif args.target_node == 'train':
        idx_target = idx_train
    else:
        idx_target = np.hstack((idx_test, idx_train)).astype(np.int)

    # Start attack
    if args.model == 'random':
        model.attack(nat_adj, perturbations, 'flip')
    elif 'Meta' in args.model:
        model.attack(features,
                     nat_adj,
                     labels,
                     idx_train,
                     idx_unlabeled,
                     perturbations,
                     ll_constraint=False,
                     verbose=True)
    else:
        model.attack(features,
                     nat_adj,
                     labels,
                     idx_target,
                     perturbations,
                     att_lr=args.att_lr,
                     epochs=args.perturb_epochs,
                     distance_type=args.distance_type,
                     sample_type=args.sample_type,
                     opt_type=args.opt_type)

    modified_adj = model.modified_adj

    # evaluation
    #########################################################
    # vitim model on clean graph
    print('==== Victim Model on clean graph ====')
    check_victim_model_performance(victim_model, features, adj, labels,
                                   idx_test, idx_train)

    # vitim model on perturbed graph
    print('==== Victim Model on perturbed graph ====')
    check_victim_model_performance(victim_model, features, modified_adj,
                                   labels, idx_test, idx_train)

    # retrain victim model on perturbed graph
    print('==== Poisoned Surrogate Model on perturbed graph ====')
    surrogate_model.initialize()
    surrogate_model.fit(features,
                        modified_adj,
                        labels,
                        idx_train,
                        idx_val=None,
                        train_iters=1000,
                        verbose=False)
    check_victim_model_performance(surrogate_model, features, modified_adj,
                                   labels, idx_test, idx_train)

    # test poisoned model on clean graph
    print('==== Poisoned Surrogate Model on clean graph ====')
    check_victim_model_performance(surrogate_model, features, adj, labels,
                                   idx_test, idx_train)

    print("==== Parameter ====")
    print(f'seed: data {args.data_seed}, attack {args.seed}')
    print(
        f'dataset: {args.dataset}, attack model {args.model}, target {args.target_node}'
    )
    print(f'loss type: {args.loss_type}')
    print(
        f'perturbation rate: {args.ptb_rate}, epoch: {args.perturb_epochs}, lr: {args.att_lr}'
    )
    print(f'weight: loss {args.loss_weight}, reg {args.reg_weight}')
    print(f'distance type: {args.distance_type}, opt type: {args.opt_type}')

    # if you want to save the modified adj/features, uncomment the code below
    if args.sanitycheck == 'yes':
        root = './sanitycheck_evasion/{}_{}_{}_{}_{}_{}lr_{}epoch_{}rate_{}reg_{}target_{}seed'
        root = root.format(args.dataset, args.distance_type, args.sample_type,
                           args.model, args.loss_type, args.att_lr,
                           args.perturb_epochs, args.ptb_rate, args.reg_weight,
                           args.target_node, args.seed)
        save_all(root, model)
Beispiel #24
0
    def __init__(self, dataset, args, data_path="data", task_type="full"):
        self.dataset = dataset
        self.data_path = data_path
        (self.adj, self.train_adj, self.features, self.train_features,
         self.labels, self.idx_train, self.idx_val, self.idx_test, self.degree,
         self.learning_type) = data_loader(dataset,
                                           data_path,
                                           "NoNorm",
                                           False,
                                           task_type,
                                           seed=args.seed)

        if args.ptb_rate > 0:
            # need to install deeprobust: https://github.com/DSE-MSU/DeepRobust
            from deeprobust.graph.data import Dataset, PrePtbDataset
            data = Dataset(root='/tmp/',
                           name=args.dataset,
                           setting='nettack',
                           seed=15)
            self.adj, self.features, self.labels = data.adj, data.features.todense(
            ), data.labels
            self.idx_train, self.idx_val, self.idx_test = data.idx_train, data.idx_val, data.idx_test
            if args.ptb_rate != 10:
                perturbed_data = PrePtbDataset(root='/tmp/',
                                               name=args.dataset,
                                               attack_method='meta',
                                               ptb_rate=args.ptb_rate)
                self.adj = perturbed_data.adj

        self.train_adj = self.adj
        self.train_features = self.features
        self.learning_type = 'transductive'
        self.labels = self.labels.astype(np.int)

        self.features = torch.FloatTensor(self.features).float()
        self.train_features = torch.FloatTensor(self.train_features).float()
        # self.train_adj = self.train_adj.tocsr()

        if args.train_size and not args.fastmode:
            self.idx_train, self.idx_val, self.idx_test = get_splits_each_class(
                labels=self.labels, train_size=args.train_size)
            # print(self.idx_train[:10])
            # from ssl_utils import get_few_labeled_splits
            # self.idx_train, self.idx_val, self.idx_test = get_few_labeled_splits(
            #         labels=self.labels, train_size=args.train_size)

        if args.fastmode:
            from deeprobust.graph.utils import get_train_test
            self.idx_train, self.idx_test = get_train_test(
                nnodes=self.adj.shape[0],
                test_size=1 - args.label_rate,
                stratify=self.labels)
            self.idx_test = self.idx_test[:1000]

        self.labels_torch = torch.LongTensor(self.labels)
        self.idx_train_torch = torch.LongTensor(self.idx_train)
        self.idx_val_torch = torch.LongTensor(self.idx_val)
        self.idx_test_torch = torch.LongTensor(self.idx_test)
        # vertex_sampler cache
        # where return a tuple
        self.pos_train_idx = np.where(self.labels[self.idx_train] == 1)[0]
        self.neg_train_idx = np.where(self.labels[self.idx_train] == 0)[0]
        # self.pos_train_neighbor_idx = np.where

        self.nfeat = self.features.shape[1]
        self.nclass = int(self.labels.max().item() + 1)
        self.trainadj_cache = {}
        self.adj_cache = {}
        #print(type(self.train_adj))
        self.degree_p = None
Beispiel #25
0
def combination(args):
    datasets = args.datasets
    df_path = args.output

    selection_options = [
        [ns.get_random_nodes, 'random'],
        [ns.get_nodes_with_lowest_degree, 'degree'],
        [ns.get_nodes_with_lowest_pagerank, 'pagerank'],
        [ns.get_nodes_with_lowest_eigenvector_centrality, 'eigenvector'],
        [ns.get_nodes_with_lowest_betweenness_centrality, 'betweenness'],
        [ns.get_nodes_with_lowest_closeness_centrality, 'closeness'],
    ]

    connection_options = [
        [nc.random_connection, 'random'],
        [nc.community_hungarian_connection, 'community'],
        [nc.distance_hungarian_connection, 'distance'],
        [nc.katz_hungarian_connection, 'katz'],
    ]

    for dataset in datasets:
        data = Dataset(root='/tmp/', name=dataset)
        G_orig = nx.from_scipy_sparse_matrix(data.adj)
        degree_centralities_orig = np.array(
            list(nx.degree_centrality(G_orig).values()))
        ccoefs_orig = np.array(
            list(
                nx.clustering(G_orig, nodes=G_orig.nodes,
                              weight=None).values()))

        for selection, selection_name in selection_options:
            for connection, connection_name in connection_options:
                print(f'attack [{selection_name}]*[{connection_name}]')
                for perturbation_rate in [
                        0.005, 0.0075, 0.01, 0.025, 0.05, 0.075, 0.10, 0.15,
                        0.20
                ]:
                    for seed in range(5 if (
                            selection_name == 'random' or connection_name ==
                            'random') else 1):
                        modified_adj, elapsed = apply_structack(
                            build_custom(selection, connection),
                            attack_structack,
                            data,
                            perturbation_rate,
                            cuda and (dataset != 'pubmed'),
                            seed=seed)

                        # reload the dataset with a different split (WARNING: this doesn't work for attack methods which depend on the split)
                        data = Dataset(root='/tmp/', name=dataset)

                        row = {
                            'dataset': dataset,
                            'selection': selection_name,
                            'connection': connection_name,
                            'gcn_seed': seed,
                            'perturbation_rate': perturbation_rate,
                            'elapsed': elapsed
                        }
                        row = extend_row_with_noticeability(
                            row, G_orig, degree_centralities_orig, ccoefs_orig,
                            data.adj, modified_adj)
                        print(row)
                        cdf = pd.DataFrame()
                        if os.path.exists(df_path):
                            cdf = pd.read_csv(df_path)
                        cdf = cdf.append(row, ignore_index=True)
                        cdf.to_csv(df_path, index=False)
Beispiel #26
0
def combination(datasets):

    df_path = 'reports/eval/comb_acc_eval-new-datasets.csv'

    selection_options = [
        [ns.get_nodes_with_lowest_degree, 'degree'],
        [ns.get_nodes_with_lowest_pagerank, 'pagerank'],
        [ns.get_nodes_with_lowest_eigenvector_centrality, 'eigenvector'],
        [ns.get_nodes_with_lowest_betweenness_centrality, 'betweenness'],
        [ns.get_nodes_with_lowest_closeness_centrality, 'closeness'],
        [ns.get_random_nodes, 'random'],
    ]

    connection_options = [
        [nc.community_hungarian_connection, 'community'],
        [nc.distance_hungarian_connection, 'distance'],
        [nc.katz_hungarian_connection, 'katz'],
        [nc.random_connection, 'random'],
    ]

    split_seeds = 1
    gcn_seeds = 1
    for selection, selection_name in selection_options:
        for connection, connection_name in connection_options:
            for dataset in datasets:
                ''' Clean graph evaluation '''
                # for split_seed in range(split_seeds):
                #     np.random.seed(split_seed)
                #     torch.manual_seed(split_seed)
                #     if cuda:
                #         torch.cuda.manual_seed(split_seed)
                #     # reload the dataset with a different split (WARNING: this doesn't work for attack methods which depend on the split)
                #     data = Dataset(root='/tmp/', name=dataset)
                #     for seed in range(gcn_seeds):

                #         np.random.seed(seed)
                #         torch.manual_seed(seed)
                #         if cuda:
                #             torch.cuda.manual_seed(seed)
                #         acc = test_gcn(postprocess_adj(data.adj).to(torch.device("cuda" if cuda else "cpu")),
                #                     data, cuda, pre_test_data)
                #         row = {'dataset':dataset, 'selection':'clean', 'connection':'clean',
                #                 'gcn_seed':seed, 'acc':acc, 'perturbation_rate':0,'elapsed':0,
                #                 'split_seed':split_seed}
                #         print(row)
                #         cdf = pd.DataFrame()
                #         if os.path.exists(df_path):
                #             cdf = pd.read_csv(df_path)
                #         cdf = cdf.append(row, ignore_index=True)
                #         cdf.to_csv(df_path,index=False)

                data = Dataset(root='/tmp/', name=dataset)
                print(f'attack [{selection_name}]*[{connection_name}]')
                for perturbation_rate in [0.01]:  #,0.10,0.15,0.20]:
                    modified_adj, elapsed = apply_structack(
                        build_custom(selection, connection, dataset),
                        attack_structack,
                        data,
                        perturbation_rate,
                        cuda and (dataset != 'pubmed'),
                        seed=0)
                    for split_seed in range(split_seeds):
                        np.random.seed(split_seed)
                        torch.manual_seed(split_seed)
                        if cuda:
                            torch.cuda.manual_seed(split_seed)

                        # reload the dataset with a different split (WARNING: this doesn't work for attack methods which depend on the split)
                        data = Dataset(root='/tmp/', name=dataset)

                        for seed in range(gcn_seeds):

                            np.random.seed(seed)
                            torch.manual_seed(seed)
                            if cuda:
                                torch.cuda.manual_seed(seed)

                            acc = test_gcn(modified_adj, data, cuda,
                                           pre_test_data)
                            row = {
                                'dataset': dataset,
                                'selection': selection_name,
                                'connection': connection_name,
                                'gcn_seed': seed,
                                'acc': acc,
                                'perturbation_rate': perturbation_rate,
                                'elapsed': elapsed,
                                'split_seed': split_seed
                            }
                            print(row)
                            cdf = pd.DataFrame()
                            if os.path.exists(df_path):
                                cdf = pd.read_csv(df_path)
                            cdf = cdf.append(row, ignore_index=True)
                            cdf.to_csv(df_path, index=False)
Beispiel #27
0
                    type=float,
                    default=0.05,
                    help='pertubation rate')

args = parser.parse_args()
args.cuda = torch.cuda.is_available()
print('cuda: %s' % args.cuda)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# make sure you use the same data splits as you generated attacks
np.random.seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)

# load original dataset (to get clean features and labels)
data = Dataset(root='/tmp/', name=args.dataset)
adj, features, labels = data.adj, data.features, data.labels
idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test

# Setup Target Model
model = GCN(nfeat=features.shape[1],
            nclass=labels.max() + 1,
            nhid=16,
            dropout=0,
            with_relu=False,
            with_bias=True,
            device=device)

model = model.to(device)

# test on original adj
Beispiel #28
0
parser.add_argument('--missing_rate',
                    type=int,
                    default=0,
                    help='missing rate, from 0 to 100')
parser.add_argument('--adj_path', type=str, default="")
parser.add_argument('--seed', type=int, default=1, help='seed')

args = parser.parse_args()

# logger
#filename='example.log'
logging.basicConfig(format='%(message)s',
                    level=getattr(logging, args.log.upper()))

# load data
data = Dataset(root='/tmp/', name=args.data, setting='nettack')
adj, features, labels = data.adj, data.features, data.labels
#idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test
splits = np.load('../Pro-GNN/splits/{}_{}.npz'.format(args.data, args.seed))
idx_train, idx_val, idx_test = splits['train'], splits['val'], splits['test']

#from .random import Random

attacker = Random()
n_perturbations = int(1.0 * (adj.sum() // 2))
perturbed_adj = attacker.attack(adj, n_perturbations, type='insert')

#perturbed_adj = sp.load_npz(args.adj_path)
device = torch.device("cuda")
torch.manual_seed(123)
Beispiel #29
0
        self.val_mask = get_mask(self.idx_val)
        self.test_mask = get_mask(self.idx_test)
        self.y_train, self.y_val, self.y_test = get_y(idx_train), get_y(
            idx_val), get_y(idx_test)

    def onehot(self, labels):
        eye = np.identity(labels.max() + 1)
        onehot_mx = eye[labels]
        return onehot_mx


def parse_index_file(filename):
    index = []
    for line in open(filename):
        index.append(int(line.strip()))
    return index


if __name__ == '__main__':
    from deeprobust.graph.data import Dataset
    for name in ['cora', 'citeseer', 'pubmed', 'cora_ml']:
        data = Dataset(root='/tmp/', name=name, setting="prognn")
        idx_train = data.idx_train
        data2 = Dataset(root='/tmp/', name=name, setting="nettack", seed=15)
        idx_train2 = data2.idx_train
        assert (idx_train != idx_train2).sum() == 0

    data = Dataset(root='/tmp/', name='flickr')
    adj, features, labels = data.adj, data.features, data.labels
    idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test
Beispiel #30
0
        Returns
        -------
        torch.FloatTensor
            output (log probabilities) of GAT
        """

        self.eval()
        self.dropout = dropout

        self.data = self.data if geodata == None else geodata.to(self.device)

        return self.forward(self.data)


if __name__ == "__main__":
    from deeprobust.graph.data import Dataset
    # from deeprobust.graph.defense import GAT
    data = Dataset(root='./tmp/', name='cora', setting='gcn')
    adj, features, labels = data.adj, data.features, data.labels
    idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test
    gat = GAT(nfeat=features.shape[1],
              nhid=8,
              heads=8,
              nclass=labels.max().item() + 1,
              dropout=0.5,
              device='cpu')
    gat = gat.to('cpu')
    gat.fit(data.geodata, verbose=True)  # train with earlystopping
    gat.test()
    print(gat.predict())