Exemple #1
0
def load_perterbued_data(dataset, ptb_rate, ptb_type="meta"):
    if ptb_type == 'meta':
        data = Dataset(root='/tmp/',
                       name=dataset.lower(),
                       setting='nettack',
                       seed=15,
                       require_mask=True)
        data.x, data.y = data.features, data.labels
        if ptb_rate > 0:
            perturbed_data = PrePtbDataset(root='/tmp/',
                                           name=dataset.lower(),
                                           attack_method='meta',
                                           ptb_rate=ptb_rate)
            data.edge_index = perturbed_data.adj
        else:
            data.edge_index = data.adj
        return data

    elif ptb_type == 'random_add':
        data = Dataset(root='/tmp/',
                       name=dataset.lower(),
                       setting='nettack',
                       seed=15,
                       require_mask=True)
        data.x, data.y = data.features, data.labels
        num_edge = data.adj.sum(axis=None) / 2
        attacker = Random()
        attacker.attack(data.adj,
                        n_perturbations=int(ptb_rate * num_edge),
                        type='add')
        data.edge_index = attacker.modified_adj
        return data

    elif ptb_type == 'random_remove':
        data = Dataset(root='/tmp/',
                       name=dataset.lower(),
                       setting='nettack',
                       seed=15,
                       require_mask=True)
        data.x, data.y = data.features, data.labels
        num_edge = data.adj.sum(axis=None) / 2
        attacker = Random()
        attacker.attack(data.adj,
                        n_perturbations=int(ptb_rate * num_edge),
                        type='remove')
        data.edge_index = attacker.modified_adj
        return data

    raise Exception(f"the ptb_type of {ptb_type} has not been implemented")
Exemple #2
0
print(args)

np.random.seed(
    15
)  # Here the random seed is to split the train/val/test data, we need to set the random seed to be the same as that when you generate the perturbed graph

data = Dataset(root='/tmp/', name=args.dataset, setting='nettack')
adj, features, labels = data.adj, data.features, data.labels
idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test

if args.attack == 'no':
    perturbed_adj = adj

if args.attack == 'random':
    from deeprobust.graph.global_attack import Random
    attacker = Random()
    n_perturbations = int(args.ptb_rate * (adj.sum() // 2))
    perturbed_adj = attacker.attack(adj, n_perturbations, type='add')

if args.attack == 'meta' or args.attack == 'nettack':
    perturbed_data = PrePtbDataset(root='/tmp/',
                                   name=args.dataset,
                                   attack_method=args.attack,
                                   ptb_rate=args.ptb_rate)
    perturbed_adj = perturbed_data.adj
    if args.attack == 'nettack':
        idx_test = perturbed_data.target_nodes

np.random.seed(args.seed)
torch.manual_seed(args.seed)
Exemple #3
0
        adj.shape[0],
        val_size=0.1,
        test_size=0.8,
        stratify=encode_onehot(labels),
        seed=15)

if args.attack == 'no':
    perturbed_adj = adj

if args.attack == 'random':
    from deeprobust.graph.global_attack import Random
    # to fix the seed of generated random attack, you need to fix both np.random and random
    # you can uncomment the following code
    # import random; random.seed(args.seed)
    # np.random.seed(args.seed)
    attacker = Random()
    n_perturbations = int(args.ptb_rate * (adj.sum() // 2))
    attacker.attack(adj, n_perturbations, type='add')
    perturbed_adj = attacker.modified_adj

if args.attack == 'meta' or args.attack == 'nettack':
    perturbed_data = PrePtbDataset(root='/tmp/',
                                   name=args.dataset,
                                   attack_method=args.attack,
                                   ptb_rate=args.ptb_rate)
    perturbed_adj = perturbed_data.adj
    if args.attack == 'nettack':
        idx_test = perturbed_data.target_nodes

np.random.seed(args.seed)
torch.manual_seed(args.seed)
Exemple #4
0
            dropout=0,
            with_relu=False,
            with_bias=True,
            device=device)

model = model.to(device)

# test on original adj
print('=== test on original adj ===')
model.fit(features, adj, labels, idx_train)
output = model.output
acc_test = accuracy(output[idx_test], labels[idx_test])
print("Test set results:", "accuracy= {:.4f}".format(acc_test.item()))

print('=== Adversarial Training for Evasion Attack===')
adversary = Random()
adv_train_model = GCN(nfeat=features.shape[1],
                      nclass=labels.max() + 1,
                      nhid=16,
                      dropout=0,
                      with_relu=False,
                      with_bias=True,
                      device=device)

adv_train_model = adv_train_model.to(device)

adv_train_model.initialize()
n_perturbations = int(0.01 * (adj.sum() // 2))
for i in tqdm(range(100)):
    # modified_adj = adversary.attack(features, adj)
    modified_adj = adversary.attack(adj,
Exemple #5
0
# load pre-attacked graph
perturbed_data = PtbDataset(root='/tmp/', name=args.dataset)
perturbed_adj = perturbed_data.adj

# Setup Target Model
model = GCN(nfeat=features.shape[1],
            nclass=labels.max() + 1,
            nhid=16,
            dropout=0,
            with_relu=False,
            with_bias=True,
            device=device)

model = model.to(device)

adversary = Random()
# test on original adj
print('=== test on original adj ===')
model.fit(features, adj, labels, idx_train)
output = model.output
acc_test = accuracy(output[idx_test], labels[idx_test])
print("Test set results:", "accuracy= {:.4f}".format(acc_test.item()))

print('=== testing GCN on perturbed graph ===')
model.fit(features, perturbed_adj, labels, idx_train)
output = model.output
acc_test = accuracy(output[idx_test], labels[idx_test])
print("Test set results:", "accuracy= {:.4f}".format(acc_test.item()))

# For poisoning attack, the adjacency matrix you have
# is alreay perturbed
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--cuda', type=int, default=0, help='cuda')
    parser.add_argument('--seed',
                        type=int,
                        default=123,
                        help='Random seed for model')
    parser.add_argument('--data_seed',
                        type=int,
                        default=123,
                        help='Random seed for data split')
    parser.add_argument('--dataset', type=str, default='cora', help='dataset')
    parser.add_argument('--gnn_path',
                        type=str,
                        required=True,
                        help='Path of saved model')
    parser.add_argument(
        '--model', type=str, default='minmax', help='model variant'
    )  # ['minmax', 'Meta-Self', 'A-Meta-Self', 'Meta-Train', 'A-Meta-Train', 'random']
    parser.add_argument('--loss_type',
                        type=str,
                        default='CE',
                        help='loss type')
    parser.add_argument('--att_lr',
                        type=float,
                        default=200,
                        help='Initial learning rate')
    parser.add_argument('--perturb_epochs',
                        type=int,
                        default=200,
                        help='Number of epochs to poisoning loop')
    parser.add_argument('--ptb_rate',
                        type=float,
                        default=0.05,
                        help='pertubation rate')
    parser.add_argument('--loss_weight',
                        type=float,
                        default=1.0,
                        help='loss weight')
    parser.add_argument('--reg_weight',
                        type=float,
                        default=0.0,
                        help='regularization weight')
    parser.add_argument('--weight_decay',
                        type=float,
                        default=5e-4,
                        help='Weight decay (L2 loss on parameters)')
    parser.add_argument('--hidden',
                        type=int,
                        default=32,
                        help='Number of hidden units')
    parser.add_argument('--dropout',
                        type=float,
                        default=0.0,
                        help='Dropout rate (1 - keep probability)')
    parser.add_argument('--data_dir',
                        type=str,
                        default='./tmp/',
                        help='Directory to download dataset')
    parser.add_argument('--target_node',
                        type=str,
                        default='train',
                        help='target node set')
    parser.add_argument('--sanitycheck',
                        type=str,
                        default='no',
                        help='whether store the intermediate results')

    parser.add_argument('--distance_type',
                        type=str,
                        default='l2',
                        help='distance type')
    parser.add_argument('--opt_type',
                        type=str,
                        default='max',
                        help='optimization type')
    parser.add_argument('--sample_type',
                        type=str,
                        default='sample',
                        help='sample type')

    args = parser.parse_args()
    args.device = torch.device(
        f'cuda:{args.cuda}' if torch.cuda.is_available() else 'cpu')
    torch.set_num_threads(1)  # limit cpu use

    set_random_seed(args.seed, args.device)

    if not os.path.exists(args.data_dir):
        os.mkdir(args.data_dir)
    if not os.path.exists(args.gnn_path):
        raise AssertionError(f'No trained model found under {args.gnn_path}!')

    print('==== Environment ====')
    print(f'torch version: {torch.__version__}')
    print(f'device: {args.device}')
    print(f'torch seed: {args.seed}')

    #########################################################
    # Load data for node classification task
    data = Dataset(root=args.data_dir,
                   name=args.dataset,
                   setting='gcn',
                   seed=args.data_seed)
    adj, features, labels = data.process(process_adj=False,
                                         process_feature=False,
                                         device=args.device)
    idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test
    idx_unlabeled = np.union1d(idx_val, idx_test)

    print('==== Dataset ====')
    print(f'density: {nx.density(nx.from_numpy_array(adj.cpu().numpy()))}')
    print(f'adj shape: {adj.shape}')
    print(f'feature shape: {features.shape}')
    print(f'label number: {labels.max().item()+1}')
    print(f'split seed: {args.data_seed}')
    print(
        f'train|valid|test set: {idx_train.shape}|{idx_val.shape}|{idx_test.shape}'
    )

    #########################################################
    # Load victim model and test it on clean training nodes
    weight_decay = 0 if args.dataset == 'polblogs' else args.weight_decay
    victim_model = GCN(nfeat=features.shape[1],
                       nclass=labels.max().item() + 1,
                       nhid=args.hidden,
                       dropout=args.dropout,
                       weight_decay=weight_decay,
                       device=args.device)
    victim_model = victim_model.to(args.device)
    victim_model.load_state_dict(torch.load(args.gnn_path))

    surrogate_model = GCN(nfeat=features.shape[1],
                          nclass=labels.max().item() + 1,
                          nhid=args.hidden,
                          dropout=args.dropout,
                          weight_decay=weight_decay,
                          device=args.device)
    surrogate_model = surrogate_model.to(args.device)
    surrogate_model.load_state_dict(torch.load(args.gnn_path))

    print('==== Initial Surrogate Model on Clean Graph ====')
    surrogate_model.eval()
    check_victim_model_performance(surrogate_model, features, adj, labels,
                                   idx_test, idx_train)

    #########################################################
    # Setup attack model
    if args.model == 'minmax':
        model = MinMax(model=surrogate_model,
                       nnodes=adj.shape[0],
                       loss_type=args.loss_type,
                       loss_weight=args.loss_weight,
                       regularization_weight=args.reg_weight,
                       device=args.device)
        model = model.to(args.device)
    elif 'Meta' in args.model:  # 'Meta-Self', 'A-Meta-Self', 'Meta-Train', 'A-Meta-Train'
        if 'Self' in args.model:
            lambda_ = 0
        if 'Train' in args.model:
            lambda_ = 1
        if 'Both' in args.model:
            lambda_ = 0.5

        if 'A' in args.model:
            model = MetaApprox(model=surrogate_model,
                               nnodes=adj.shape[0],
                               attack_structure=True,
                               attack_features=False,
                               regularization_weight=args.reg_weight,
                               device=args.device,
                               lambda_=lambda_)
        else:
            model = Metattack(model=surrogate_model,
                              nnodes=adj.shape[0],
                              attack_structure=True,
                              attack_features=False,
                              regularization_weight=args.reg_weight,
                              device=args.device,
                              lambda_=lambda_)
        model = model.to(args.device)
    elif args.model == 'random':
        model = Random()
    else:
        raise AssertionError(f'Attack {args.model} not found!')

    #########################################################
    # Attack and evaluate
    print('***************** seed {} *****************'.format(args.seed))
    print('==== Attacking ====')

    perturbations = int(args.ptb_rate * (adj.sum() / 2))
    nat_adj = copy.deepcopy(adj)

    # switch target node set for minmax attack
    if args.target_node == 'test':
        idx_target = idx_test
    elif args.target_node == 'train':
        idx_target = idx_train
    else:
        idx_target = np.hstack((idx_test, idx_train)).astype(np.int)

    # Start attack
    if args.model == 'random':
        model.attack(nat_adj, perturbations, 'flip')
    elif 'Meta' in args.model:
        model.attack(features,
                     nat_adj,
                     labels,
                     idx_train,
                     idx_unlabeled,
                     perturbations,
                     ll_constraint=False,
                     verbose=True)
    else:
        model.attack(features,
                     nat_adj,
                     labels,
                     idx_target,
                     perturbations,
                     att_lr=args.att_lr,
                     epochs=args.perturb_epochs,
                     distance_type=args.distance_type,
                     sample_type=args.sample_type,
                     opt_type=args.opt_type)

    modified_adj = model.modified_adj

    # evaluation
    #########################################################
    # vitim model on clean graph
    print('==== Victim Model on clean graph ====')
    check_victim_model_performance(victim_model, features, adj, labels,
                                   idx_test, idx_train)

    # vitim model on perturbed graph
    print('==== Victim Model on perturbed graph ====')
    check_victim_model_performance(victim_model, features, modified_adj,
                                   labels, idx_test, idx_train)

    # retrain victim model on perturbed graph
    print('==== Poisoned Surrogate Model on perturbed graph ====')
    surrogate_model.initialize()
    surrogate_model.fit(features,
                        modified_adj,
                        labels,
                        idx_train,
                        idx_val=None,
                        train_iters=1000,
                        verbose=False)
    check_victim_model_performance(surrogate_model, features, modified_adj,
                                   labels, idx_test, idx_train)

    # test poisoned model on clean graph
    print('==== Poisoned Surrogate Model on clean graph ====')
    check_victim_model_performance(surrogate_model, features, adj, labels,
                                   idx_test, idx_train)

    print("==== Parameter ====")
    print(f'seed: data {args.data_seed}, attack {args.seed}')
    print(
        f'dataset: {args.dataset}, attack model {args.model}, target {args.target_node}'
    )
    print(f'loss type: {args.loss_type}')
    print(
        f'perturbation rate: {args.ptb_rate}, epoch: {args.perturb_epochs}, lr: {args.att_lr}'
    )
    print(f'weight: loss {args.loss_weight}, reg {args.reg_weight}')
    print(f'distance type: {args.distance_type}, opt type: {args.opt_type}')

    # if you want to save the modified adj/features, uncomment the code below
    if args.sanitycheck == 'yes':
        root = './sanitycheck_evasion/{}_{}_{}_{}_{}_{}lr_{}epoch_{}rate_{}reg_{}target_{}seed'
        root = root.format(args.dataset, args.distance_type, args.sample_type,
                           args.model, args.loss_type, args.att_lr,
                           args.perturb_epochs, args.ptb_rate, args.reg_weight,
                           args.target_node, args.seed)
        save_all(root, model)
args.cuda = torch.cuda.is_available()
print('cuda: %s' % args.cuda)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

np.random.seed(args.seed)
torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)

data = Dataset(root='/tmp/', name=args.dataset)
adj, features, labels = data.adj, data.features, data.labels
idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test
idx_unlabeled = np.union1d(idx_val, idx_test)

# Setup Attack Model
model = Random()

n_perturbations = int(args.ptb_rate * (adj.sum() // 2))

model.attack(adj, n_perturbations)
modified_adj = model.modified_adj

adj, features, labels = preprocess(adj,
                                   features,
                                   labels,
                                   preprocess_adj=False,
                                   sparse=True)
adj = adj.to(device)
features = features.to(device)
labels = labels.to(device)
Exemple #8
0
def build_random(adj=None,
                 features=None,
                 labels=None,
                 idx_train=None,
                 device=None):
    return Random()
Exemple #9
0
args.cuda = torch.cuda.is_available()
print('cuda: %s' % args.cuda)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

np.random.seed(args.seed)
torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)

data = Dataset(root='/tmp/', name=args.dataset)
adj, features, labels = data.adj, data.features, data.labels
idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test
idx_unlabeled = np.union1d(idx_val, idx_test)

# Setup Attack Model
model = Random()

n_perturbations = int(args.ptb_rate * (adj.sum()//2))

modified_adj = model.attack(adj, n_perturbations)

adj, features, labels = preprocess(adj, features, labels, preprocess_adj=False, sparse=True)
adj = adj.to(device)
features = features.to(device)
labels = labels.to(device)

modified_adj = normalize_adj(modified_adj)
modified_adj = sparse_mx_to_torch_sparse_tensor(modified_adj)
modified_adj = modified_adj.to(device)

Exemple #10
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--cuda', type=int, default=0, help='cuda')
    parser.add_argument('--seed', type=int, default=123, help='Random seed for model')
    parser.add_argument('--data_seed', type=int, default=123,help='Random seed for data split')
    parser.add_argument('--dataset', type=str, default='cora', help='dataset')
    parser.add_argument('--gnn_path', type=str, required=True, help='Path of saved model')
    parser.add_argument('--model', type=str, default='PGD', help='model variant')  # PGD, random
    parser.add_argument('--loss_type', type=str, default='CE', help='loss type')
    parser.add_argument('--att_lr', type=float, default=200, help='Initial learning rate')
    parser.add_argument('--perturb_epochs', type=int, default=100, help='Number of epochs to poisoning loop')
    parser.add_argument('--ptb_rate', type=float, default=0.05, help='pertubation rate')
    parser.add_argument('--reg_weight', type=float, default=0.0, help='regularization weight')
    parser.add_argument('--loss_weight', type=float, default=1.0, help='loss weight')
    parser.add_argument('--weight_decay', type=float, default=5e-4, help='Weight decay (L2 loss on parameters)')
    parser.add_argument('--hidden', type=int, default=32, help='Number of hidden units')
    parser.add_argument('--dropout', type=float, default=0.0, help='Dropout rate (1 - keep probability)')
    parser.add_argument('--data_dir', type=str, default='./tmp/', help='Directory to download dataset')
    parser.add_argument('--sanitycheck', type=str, default='no', help='whether store the intermediate results')
    parser.add_argument('--sanity_dir', type=str, default='./sanitycheck_evasion/', help='Directory to store the intermediate results')
    
    parser.add_argument('--distance_type', type=str, default='l2', help='distance type')
    parser.add_argument('--sample_type', type=str, default='sample', help='sample type')
    

    args = parser.parse_args()
    args.device = torch.device(f'cuda:{args.cuda}' if torch.cuda.is_available() else 'cpu')
    torch.set_num_threads(1) # limit cpu use
    
    set_random_seed(args.seed, args.device)

    if not os.path.exists(args.data_dir):
        os.mkdir(args.data_dir)
    if not os.path.exists(args.sanity_dir):
        os.mkdir(args.sanity_dir)
    if not os.path.exists(args.gnn_path):
        raise AssertionError (f'No trained model found under {args.gnn_path}!')

    print('==== Environment ====')
    print(f'torch version: {torch.__version__}')
    print(f'device: {args.device}')
    print(f'torch seed: {args.seed}')

    #########################################################
    # Load data for node classification task
    data = Dataset(root=args.data_dir, name=args.dataset, setting='gcn', seed=args.data_seed)
    adj, features, labels = data.process(process_adj=False, process_feature=False, device=args.device)
    idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test
    idx_unlabeled = np.union1d(idx_val, idx_test)

    print('==== Dataset ====')
    print(f'density: {nx.density(nx.from_numpy_array(adj.cpu().numpy()))}')
    print(f'adj shape: {adj.shape}')
    print(f'feature shape: {features.shape}')
    print(f'label number: {labels.max().item()+1}')
    print(f'split seed: {args.data_seed}')
    print(f'train|valid|test set: {idx_train.shape}|{idx_val.shape}|{idx_test.shape}')

    #########################################################
    # Load victim model and test it on clean training nodes
    victim_model = GCN(
        nfeat=features.shape[1], 
        nclass=labels.max().item()+1, 
        nhid=args.hidden,
        dropout=args.dropout, 
        weight_decay=args.weight_decay,
        device=args.device)
    victim_model = victim_model.to(args.device)
    
    victim_model.load_state_dict(torch.load(args.gnn_path))
    victim_model.eval()

    print('==== Victim Model on Clean Graph ====')
    check_victim_model_performance(victim_model, features, adj, labels, idx_test, idx_train)

    #########################################################
    # Setup attack model
    if args.model == 'PGD':
        model = PGDAttack(
            model=victim_model, 
            nnodes=adj.shape[0], 
            loss_type=args.loss_type, 
            loss_weight=args.loss_weight,
            regularization_weight=args.reg_weight,
            device=args.device)
        model = model.to(args.device)
    elif args.model == 'random':
        model = Random()
    else:
        raise AssertionError (f'Attack {args.model} not found!')
        
    #########################################################
    # Attack and evaluate
    print('***************** seed {} *****************'.format(args.seed))
    print('==== Attacking ====')

    perturbations = int(args.ptb_rate * (adj.sum()/2))
    nat_adj = copy.deepcopy(adj)

    # global attack on whole testing nodes
    idx_target = idx_test
    
    # utility = model.calc_utility(
    #     features, 
    #     nat_adj, 
    #     labels, 
    #     idx_target)
    # os.makedirs(f'./{args.dataset}', exist_ok=True)
    # save_utility(f'./{args.dataset}/utility.bin', utility)
    # exit('done')
    
    # gradients = model.attack(
    #         features, 
    #         nat_adj, 
    #         labels, 
    #         idx_target,
    #         perturbations, 
    #         att_lr=args.att_lr, 
    #         epochs=args.perturb_epochs,
    #         distance_type=args.distance_type,
    #         sample_type=args.sample_type)
    # os.makedirs(f'./{args.dataset}', exist_ok=True)
    # save_utility(f'./{args.dataset}/grad_{args.reg_weight}.bin', gradients)
    # exit('done')
    
    if args.model == 'random':
        model.attack(nat_adj, perturbations, 'flip')
    else:
        model.attack(
            features, 
            nat_adj, 
            labels, 
            idx_target,
            perturbations, 
            att_lr=args.att_lr, 
            epochs=args.perturb_epochs,
            distance_type=args.distance_type,
            sample_type=args.sample_type)

    modified_adj = model.modified_adj
    
    # evaluation
    victim_model.load_state_dict(torch.load(args.gnn_path)) # reset to clean model
    victim_model.eval()
    
    print('==== Victim Model on Perturbed Graph ====')
    check_victim_model_performance(victim_model, features, modified_adj, labels, idx_test, idx_train)

    print("==== Parameter ====")
    print(f'Data seed: {args.data_seed}')
    print(f'Dataset: {args.dataset}')
    print(f'Loss type: {args.loss_type}')
    print(f'Perturbation Rate: {args.ptb_rate}')
    print(f'Reg weight: {args.reg_weight}')
    print(f'Attack: {args.model}')
    print(f'Attack seed: {args.seed}')

    # if you want to save the modified adj/features, uncomment the code below
    if args.sanitycheck == 'yes':
        root = args.sanity_dir + '{}_{}_{}_{}_{}_{}lr_{}epoch_{}rate_{}reg1_{}reg2_{}seed'
        root = root.format(args.dataset, args.distance_type, args.sample_type, args.model, 
                           args.loss_type, args.att_lr, args.perturb_epochs, args.ptb_rate, args.loss_weight, args.reg_weight, args.seed)
        save_all(root, model)