def init_setup(): data = Dataset(root='/tmp/', name=args.dataset, setting='gcn') data.features = normalize_feature(data.features) adj, features, labels = data.adj, data.features, data.labels StaticGraph.graph = nx.from_scipy_sparse_matrix(adj) dict_of_lists = nx.to_dict_of_lists(StaticGraph.graph) idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test device = torch.device('cuda') if args.ctx == 'gpu' else 'cpu' # black box setting adj, features, labels = preprocess(adj, features, labels, preprocess_adj=False, sparse=True, device=device) victim_model = load_victim_model(data, device=device, file_path=args.saved_model) setattr(victim_model, 'norm_tool', GraphNormTool(normalize=True, gm='gcn', device=device)) output = victim_model.predict(features, adj) loss_test = F.nll_loss(output[idx_test], labels[idx_test]) acc_test = accuracy(output[idx_test], labels[idx_test]) print("Test set results:", "loss= {:.4f}".format(loss_test.item()), "accuracy= {:.4f}".format(acc_test.item())) return features, labels, idx_val, idx_test, victim_model, dict_of_lists, adj
def main(): df_path = 'reports/eval/distance_range.csv' for dataset in args.dataset: data = Dataset(root='/tmp/', name=dataset, setting='nettack') print(f'Accuracy on the clean graph: {test(data,data.adj)}') for ptb_rate in args.ptb_rate: perturbations = int(ptb_rate * (data.adj.sum() // 2)) if args.percentile_step is None: model = StructackRangeDistance( distance_percentile_range=[args.frm, args.to]) model.attack(adj, perturbations) modified_adj = model.modified_adj # modified_features = model.modified_features test(data, modified_adj) else: for frm in np.arange(0, 1, args.percentile_step): print(f'===={frm}====') to = frm + args.percentile_step model = StructackRangeDistance( distance_percentile_range=[frm, to]) accs = [] for seed_i in range(10): tick = time.time() model.attack(data.adj, perturbations) elapsed = time.time() - tick modified_adj = model.modified_adj # modified_features = model.modified_features acc = test(data, modified_adj) accs.append(acc) cdf = pd.DataFrame() if os.path.exists(df_path): cdf = pd.read_csv(df_path) row = { 'dataset': dataset, 'attack': model.__class__.__name__, 'seed': seed_i, 'acc': acc, 'perturbation_rate': ptb_rate, 'elapsed': elapsed, 'frm': frm, 'to': to, 'mean_distance': model.mean_distance } print(row) cdf = cdf.append(row, ignore_index=True) cdf.to_csv(df_path, index=False) print( f'percentile [{frm:.2f},{to:.2f}]: {np.mean(accs):.4f} +- {np.std(accs):.2f}' )
def main(args): datasets = args.datasets df_path = args.output perturbation_rates = args.ptb attacks = [ # [attack_random, 'Random', build_random], # [attack_dice, 'DICE', build_dice], # [attack_mettaack, 'Metattack', build_mettack], # [attack_pgd, 'PGD', build_pgd], [attack_minmax, 'MinMax', build_minmax], ] for dataset in datasets: for attack, model_name, model_builder in attacks: print('attack ' + model_name) for split_seed in range(5): np.random.seed(split_seed) torch.manual_seed(split_seed) if cuda: torch.cuda.manual_seed(split_seed) data = Dataset(root='/tmp/', name=dataset) G_orig = nx.from_scipy_sparse_matrix(data.adj) degree_centralities_orig = np.array( list(nx.degree_centrality(G_orig).values())) ccoefs_orig = np.array( list( nx.clustering(G_orig, nodes=G_orig.nodes, weight=None).values())) for perturbation_rate in perturbation_rates: for attack_seed in range(1 if model_name == 'DICE' else 5): modified_adj, elapsed = apply_perturbation( model_builder, attack, data, perturbation_rate, cuda and (dataset != 'pubmed'), attack_seed) print(type(modified_adj)) row = { 'dataset': dataset, 'attack': model_name, 'perturbation_rate': perturbation_rate, 'elapsed': elapsed, 'attack_seed': attack_seed, 'split_seed': split_seed } row = extend_row_with_noticeability( row, G_orig, degree_centralities_orig, ccoefs_orig, data.adj, modified_adj) print(row) cdf = pd.DataFrame() if os.path.exists(df_path): cdf = pd.read_csv(df_path) cdf = cdf.append(row, ignore_index=True) cdf.to_csv(df_path, index=False)
def combination(datasets): df_path = 'reports/eval/combination-memory.csv' selection_options = [ [ns.get_nodes_with_lowest_degree, 'degree'], [ns.get_nodes_with_lowest_pagerank, 'pagerank'], [ns.get_nodes_with_lowest_eigenvector_centrality, 'eigenvector'], [ns.get_nodes_with_lowest_betweenness_centrality, 'betweenness'], [ns.get_nodes_with_lowest_closeness_centrality, 'closeness'], [ns.get_random_nodes, 'random'], ] connection_options = [ [nc.community_hungarian_connection, 'community'], [nc.distance_hungarian_connection, 'distance'], [nc.katz_hungarian_connection, 'katz'], [nc.random_connection, 'random'], ] for selection, selection_name in selection_options: for connection, connection_name in connection_options: if selection_name == 'random' or connection_name == 'random': continue for dataset in datasets: data = Dataset(root='/tmp/', name=dataset) print(f'attack [{selection_name}]*[{connection_name}]') for perturbation_rate in [0.05]: #,0.10,0.15,0.20]: mem = apply_structack(build_custom(selection, connection, dataset_name=None), attack_structack, data, perturbation_rate, cuda and (dataset != 'pubmed'), seed=0) row = { 'dataset': dataset, 'selection': selection_name, 'connection': connection_name, 'memory': mem } print(row) cdf = pd.DataFrame() if os.path.exists(df_path): cdf = pd.read_csv(df_path) cdf = cdf.append(row, ignore_index=True) cdf.to_csv(df_path, index=False)
def main(datasets): df_path = 'reports/eval/baseline_eval.csv' attacks = [ # [attack_random, 'Random', build_random], [attack_dice, 'DICE', build_dice], [attack_mettaack, 'Metattack', build_mettack], [attack_pgd, 'PGD', build_pgd], [attack_minmax, 'MinMax', build_minmax], ] for dataset in datasets: for attack, model_name, model_builder in attacks: print('attack ' + model_name) for split_seed in range(5): np.random.seed(split_seed) torch.manual_seed(split_seed) if cuda: torch.cuda.manual_seed(split_seed) data = Dataset(root='/tmp/', name=dataset) for perturbation_rate in [0.05]: #,0.10,0.15,0.20]: for attack_seed in range(1 if model_name == 'DICE' else 5): modified_adj, elapsed = apply_perturbation( model_builder, attack, data, perturbation_rate, cuda, attack_seed) for gcn_seed in range(5): np.random.seed(gcn_seed) torch.manual_seed(gcn_seed) if cuda: torch.cuda.manual_seed(gcn_seed) acc = test_gcn(modified_adj, data, cuda, pre_test_data) row = { 'dataset': dataset, 'attack': model_name, 'gcn_seed': gcn_seed, 'acc': acc, 'perturbation_rate': perturbation_rate, 'elapsed': elapsed, 'attack_seed': attack_seed, 'split_seed': split_seed } print(row) cdf = pd.DataFrame() if os.path.exists(df_path): cdf = pd.read_csv(df_path) cdf = cdf.append(row, ignore_index=True) cdf.to_csv(df_path, index=False)
def clean(datasets): df_path = 'reports/eval/clean.csv' split_seeds = 5 gcn_seeds = 5 for dataset in datasets: ''' Clean graph evaluation ''' for split_seed in range(split_seeds): np.random.seed(split_seed) torch.manual_seed(split_seed) if cuda: torch.cuda.manual_seed(split_seed) # reload the dataset with a different split (WARNING: this doesn't work for attack methods which depend on the split) data = Dataset(root='/tmp/', name=dataset) for seed in range(gcn_seeds): np.random.seed(seed) torch.manual_seed(seed) if cuda: torch.cuda.manual_seed(seed) acc = test_gcn( postprocess_adj(data.adj).to( torch.device("cuda" if cuda else "cpu")), data, cuda, pre_test_data) row = { 'dataset': dataset, 'selection': 'clean', 'connection': 'clean', 'gcn_seed': seed, 'acc': acc, 'perturbation_rate': 0, 'elapsed': 0, 'split_seed': split_seed } print(row) cdf = pd.DataFrame() if os.path.exists(df_path): cdf = pd.read_csv(df_path) cdf = cdf.append(row, ignore_index=True) cdf.to_csv(df_path, index=False)
def init_setup(): data = Dataset(root='/tmp/', name=args.dataset, setting='nettack') injecting_nodes(data) adj, features, labels = data.adj, data.features, data.labels StaticGraph.graph = nx.from_scipy_sparse_matrix(adj) dict_of_lists = nx.to_dict_of_lists(StaticGraph.graph) idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test device = torch.device('cuda') if args.ctx == 'gpu' else 'cpu' # gray box setting adj, features, labels = preprocess(adj, features, labels, preprocess_adj=False, sparse=True, device=device) # Setup victim model victim_model = GCN(nfeat=features.shape[1], nclass=labels.max().item() + 1, nhid=16, dropout=0.5, weight_decay=5e-4, device=device) victim_model = victim_model.to(device) victim_model.fit(features, adj, labels, idx_train, idx_val) setattr(victim_model, 'norm_tool', GraphNormTool(normalize=True, gm='gcn', device=device)) output = victim_model.predict(features, adj) loss_test = F.nll_loss(output[idx_test], labels[idx_test]) acc_test = accuracy(output[idx_test], labels[idx_test]) print("Test set results:", "loss= {:.4f}".format(loss_test.item()), "accuracy= {:.4f}".format(acc_test.item())) return features, labels, idx_train, idx_val, idx_test, victim_model, dict_of_lists, adj
def baseline(datasets): df_path = 'reports/eval/baseline-memory.csv' attacks = [ # [attack_dice, 'DICE', build_dice], [attack_mettaack, 'Metattack', build_mettack], # [attack_pgd, 'PGD', build_pgd], # [attack_minmax, 'MinMax', build_minmax], ] for dataset in datasets: for attack, model_name, model_builder in attacks: print('attack ' + model_name) for split_seed in range(1): np.random.seed(split_seed) torch.manual_seed(split_seed) if cuda: torch.cuda.manual_seed(split_seed) data = Dataset(root='/tmp/', name=dataset) for perturbation_rate in [0.05]: #,0.10,0.15,0.20]: for attack_seed in range(1): mem = apply_perturbation(model_builder, attack, data, perturbation_rate, cuda, attack_seed) row = { 'dataset': dataset, 'attack': model_name, 'attack_seed': attack_seed, 'memory': mem } print(row) cdf = pd.DataFrame() if os.path.exists(df_path): cdf = pd.read_csv(df_path) cdf = cdf.append(row, ignore_index=True) cdf.to_csv(df_path, index=False) del data gc.collect()
def get_y(idx): mx = np.zeros(labels.shape) mx[idx] = labels[idx] return mx self.train_mask = get_mask(self.idx_train) self.val_mask = get_mask(self.idx_val) self.test_mask = get_mask(self.idx_test) self.y_train, self.y_val, self.y_test = get_y(idx_train), get_y( idx_val), get_y(idx_test) def onehot(self, labels): eye = np.identity(labels.max() + 1) onehot_mx = eye[labels] return onehot_mx def parse_index_file(filename): index = [] for line in open(filename): index.append(int(line.strip())) return index if __name__ == '__main__': from deeprobust.graph.data import Dataset data = Dataset(root='./tmp/', name='flickr') adj, features, labels = data.adj, data.features, data.labels idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test
type=str, default='cora', choices=['cora', 'cora_ml', 'citeseer', 'polblogs', 'pubmed'], help='dataset') parser.add_argument('--ptb_rate', type=float, default=0.05, help='perturbation rate') args = parser.parse_args() args.cuda = torch.cuda.is_available() print('cuda: %s' % args.cuda) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # use data splist provided by prognn data = Dataset(root='/tmp/', name=args.dataset, setting='prognn') adj, features, labels = data.adj, data.features, data.labels idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test gat = GAT(nfeat=features.shape[1], nhid=8, heads=8, nclass=labels.max().item() + 1, dropout=0.5, device=device) gat = gat.to(device) # test on clean graph print('==================') print('=== train on clean graph ===')
parser.add_argument('--ptb_rate', type=float, default=0.0, help='pertubation rate') parser.add_argument('--k', type=int, default=100, help='Truncated Components.') parser.add_argument('--ptb_type', type=str, default='add', choices=['add', 'remove', 'meta']) parser.add_argument('--cuda_num', type=int, default=0) parser.add_argument('--model_type', type=str, default='GCN_SVD', choices=['GCN_SVD', 'RGCN', 'GCN']) args = parser.parse_args() device = torch.device(f"cuda:{args.cuda_num}" if args.cuda_num else "cpu") # make sure you use the same data splits as you generated attacks for seed in [5, 15, 20, 25, 35]: np.random.seed(seed) torch.cuda.manual_seed(seed) # load original dataset (to get clean features and labels) data = Dataset(root='/tmp/', name=args.dataset, setting='nettack', seed=15, require_mask=True) adj, features, labels = data.adj, data.features, data.labels idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test # num_edge = adj.sum(axis=None)/2 # attacker = Random() # attacker.attack(adj, n_perturbations=int(args.ptb_rate*num_edge), type=args.ptb_type) # perturbed_adj = attacker.modified_adj if args.ptb_rate > 0: perturbed_data = PrePtbDataset(root='/tmp/', name=args.dataset, attack_method='meta', ptb_rate=args.ptb_rate) perturbed_adj = perturbed_data.adj else: perturbed_adj = adj # Setup Defense Model if args.model_type == 'GCN_SVD': model = GCNSVD(nfeat=features.shape[1], nclass=labels.max()+1, nhid=16, device=device)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--cuda', type=int, default=-1, help='cuda') parser.add_argument('--seed', type=int, default=123, help='Random seed for model') parser.add_argument('--data_seed', type=int, default=123, help='Random seed for data split') parser.add_argument('--dataset', type=str, default='cora', help='Dataset') parser.add_argument('--gnn_base', type=str, default='gcn', help='base gnn models') parser.add_argument('--gnn_epochs', type=int, default=500, help='Number of epochs to train the gnn') parser.add_argument('--lr', type=float, default=0.01, help='Initial learning rate') parser.add_argument('--weight_decay', type=float, default=5e-4, help='Weight decay (L2 loss on parameters)') parser.add_argument('--hidden', type=int, default=32, help='Number of hidden units') parser.add_argument('--dropout', type=float, default=0.0, help='Dropout rate (1 - keep probability)') parser.add_argument('--patience', type=int, default=200, help='patience for early stopping') parser.add_argument('--model_dir', type=str, default='./nat_model_saved/', help='Directory to save the trained model.') parser.add_argument('--data_dir', type=str, default='./tmp/', help='Directory to download dataset.') args = parser.parse_args() args.device = torch.device( f'cuda:{args.cuda}' if torch.cuda.is_available() else 'cpu') torch.set_num_threads(1) # limit cpu use set_random_seed(args.seed, args.device) if not os.path.exists(args.model_dir): os.mkdir(args.model_dir) if not os.path.exists(args.data_dir): os.mkdir(args.data_dir) print('==== Environment ====') print(f'torch version: {torch.__version__}') print(f'device: {args.device}') print(f'torch seed: {args.seed}') ######################################################### # Load data for node classification task data = Dataset(root=args.data_dir, name=args.dataset, setting='gcn', seed=args.data_seed) adj, features, labels = data.process(process_adj=False, process_feature=False, device=args.device) idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test idx_unlabeled = np.union1d(idx_val, idx_test) print('==== Dataset ====') print(f'density: {nx.density(nx.from_numpy_array(adj.cpu().numpy()))}') print(f'adj shape: {adj.shape}') print(f'feature shape: {features.shape}') print(f'label number: {labels.max().item()+1}') print(f'split seed: {args.data_seed}') print( f'train|valid|test set: {idx_train.shape}|{idx_val.shape}|{idx_test.shape}' ) ######################################################### # Setup gnn model and fit it on clean graph if args.gnn_base == 'gcn': nat_model = GCN(nfeat=features.shape[1], nclass=labels.max().item() + 1, nhid=args.hidden, dropout=args.dropout, weight_decay=args.weight_decay, lr=args.lr, device=args.device) elif args.gnn_base == 'gat': nat_model = GAT(nfeat=features.shape[1], nclass=labels.max().item() + 1, nhid=args.hidden, heads=8, dropout=args.dropout, weight_decay=args.weight_decay, lr=args.lr, device=args.device) elif args.gnn_base == 'sgc': nat_model = SGC(nfeat=features.shape[1], nclass=labels.max().item() + 1, lr=args.lr, device=args.device) else: assert AssertionError("GNN model {} not found!".format(args.gnn_base)) nat_model = nat_model.to(args.device) if args.gnn_base == 'gcn': nat_model.fit(features, adj, labels, idx_train, idx_val=None, train_iters=args.gnn_epochs, patience=args.patience, verbose=True) else: raise AssertionError('Model Not ready') print('==== {} performance ===='.format(args.gnn_base)) check_victim_model_performance(nat_model, features, adj, labels, idx_test, idx_train) ######################################################### # Save the trained model path = args.model_dir + '{}_{}.pt'.format(args.dataset, args.gnn_base) torch.save(nat_model.state_dict(), path)
type=float, default=0.05, help='pertubation rate') args = parser.parse_args() args.cuda = torch.cuda.is_available() print('cuda: %s' % args.cuda) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") np.random.seed(args.seed) torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) data = Dataset( root=r'D:\Python Project\defense\Low_pass_defense\fold_defense\tmp\\', name=args.dataset) adj, features, labels = data.adj, data.features, data.labels idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test idx_unlabeled = np.union1d(idx_val, idx_test) # Setup Surrogate model surrogate = GCN(nfeat=features.shape[1], nclass=labels.max().item() + 1, nhid=16, dropout=0, with_relu=False, with_bias=False, device=device)
torch.FloatTensor output (log probabilities) of SGC """ self.eval() self.dropout = dropout if pyg_data == None: data = self.data else: data = pyg_data[0].to(self.device) self.data = data return self.forward(data) if __name__ == "__main__": from deeprobust.graph.data import Dataset, Dpr2Pyg # from deeprobust.graph.defense import SGC data = Dataset(root='/tmp/', name='cora') adj, features, labels = data.adj, data.features, data.labels idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test sgc = SGC(nfeat=features.shape[1], nclass=labels.max().item() + 1, device='cpu') sgc = sgc.to('cpu') pyg_data = Dpr2Pyg(data) sgc.fit(pyg_data, verbose=True) # train with earlystopping sgc.test() print(sgc.predict())
from deeprobust.graph.data import Dataset from deeprobust.graph.defense import DeepWalk from deeprobust.graph.global_attack import NodeEmbeddingAttack from deeprobust.graph.global_attack import OtherNodeEmbeddingAttack import itertools dataset_str = 'cora_ml' data = Dataset(root='/tmp/', name=dataset_str, seed=15) adj, features, labels = data.adj, data.features, data.labels idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test comb = itertools.product(["random", "degree", "eigencentrality"], ["remove", "add"]) for type, attack_type in comb: model = OtherNodeEmbeddingAttack(type=type) print(model.type, attack_type) try: model.attack(adj, attack_type=attack_type, n_candidates=10000) defender = DeepWalk() defender.fit(adj) defender.evaluate_node_classification(labels, idx_train, idx_test) except KeyError: print('eigencentrality only supports removing edges') model = NodeEmbeddingAttack() model.attack(adj, attack_type="remove") model.attack(adj, attack_type="remove", min_span_tree=True) modified_adj = model.modified_adj model.attack(adj, attack_type="add", n_candidates=10000) model.attack(adj, attack_type="add_by_remove", n_candidates=10000)
args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() device = torch.device("cuda" if args.cuda else "cpu") if args.cuda: torch.cuda.manual_seed(args.seed) if args.ptb_rate == 0: args.attack = "no" print(args) np.random.seed( 15 ) # Here the random seed is to split the train/val/test data, we need to set the random seed to be the same as that when you generate the perturbed graph data = Dataset(root='/tmp/', name=args.dataset, setting='nettack') adj, features, labels = data.adj, data.features, data.labels idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test if args.attack == 'no': perturbed_adj = adj if args.attack == 'random': from deeprobust.graph.global_attack import Random attacker = Random() n_perturbations = int(args.ptb_rate * (adj.sum() // 2)) perturbed_adj = attacker.attack(adj, n_perturbations, type='add') if args.attack == 'meta' or args.attack == 'nettack': perturbed_data = PrePtbDataset(root='/tmp/', name=args.dataset,
torch.manual_seed(args.seed) if torch.cuda.is_available(): torch.cuda.manual_seed(args.seed) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") ########################### # global settings # 'cora' or 'citeseer' or 'polblogs' dataname = args.dataset gpu_id = '0' if dataname in ['cora', 'citeseer', 'pubmed']: adj, features, idx_train, idx_val, idx_test, labels = load_data(dataname, root='..') if dataname == "polblogs": data = Dataset(root='../data/', name=dataname) adj, features, labels = data.adj, data.features, data.labels idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test num_class = labels.max() + 1 adj_norm = utils.preprocess_graph(adj) labels_onehot = np.eye(num_class)[labels] preds = {} device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") ################################## attacking ################################## degrees = adj.sum(0).A1 hidden_dim = 16 direct_attack = True n_influencers = 1 if direct_attack else 5
# Here the random seed is to split the train/val/test data, # we need to set the random seed to be the same as that when you generate the perturbed graph # data = Dataset(root='/tmp/', name=args.dataset, setting='nettack', seed=15) # Or we can just use setting='prognn' to get the splits # data = Dataset(root='./tmp/', name=args.dataset, setting='prognn') # adj, features, labels_1 = data.adj, data.features, data.labels # idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test # load pre-attacked graph # perturbed_data = PrePtbDataset(root='./tmp/', # name=args.dataset, # attack_method='meta', # ptb_rate=args.ptb_rate) # use data splist provided by prognn data = Dataset(root='./tmp/', name=args.dataset, setting='prognn') data.adj = sp.csr_matrix(adj_per) data.features = sp.csr_matrix(base_feat) data.labels = labels data.idx_train = idx_train data.idx_val = idx_val data.idx_test = idx_test perturbed_adj = sp.csr_matrix(adj_per, dtype=float) features = sp.csr_matrix(base_feat, dtype=float) # Setup Defense Model gat = GAT(nfeat=features.shape[1], nhid=8, heads=8, nclass=labels.max().item() + 1,
parser.add_argument('--attack_rate', type=float, default=0.2) parser.add_argument('--denoise_rate', type=float, default=0.01) parser.add_argument('--lmda', type=float, default=0.1) args = parser.parse_args() args.device = device = torch.device( f'cuda:{args.cuda_id:d}' if torch.cuda.is_available() else 'cpu') np.random.seed(args.seed) torch.manual_seed(args.seed) if torch.cuda.is_available(): torch.cuda.manual_seed(args.seed) torch.backends.cudnn.deterministic = True data = Dataset(root='./datasets/', name=args.dataset, seed=15, setting='nettack') adj, features, labels = data.adj, data.features, data.labels idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test if args.ptb_rate != 0: adj = ssp.load_npz( f'./datasets/{args.dataset}_meta_adj_{args.ptb_rate:g}.npz') features = torch.Tensor(features.todense()).to(args.device) adj = adj.tocoo() edge_index = torch.LongTensor([adj.row, adj.col]).to(args.device) edge_values = torch.Tensor(adj.data).to(args.device) labels = torch.LongTensor(labels).to(args.device) idx_train = torch.LongTensor(idx_train).to(args.device) idx_val = torch.LongTensor(idx_val).to(args.device) idx_test = torch.LongTensor(idx_test).to(args.device)
def load_perterbued_data(dataset, ptb_rate, ptb_type="meta"): if ptb_type == 'meta': data = Dataset(root='/tmp/', name=dataset.lower(), setting='nettack', seed=15, require_mask=True) data.x, data.y = data.features, data.labels if ptb_rate > 0: perturbed_data = PrePtbDataset(root='/tmp/', name=dataset.lower(), attack_method='meta', ptb_rate=ptb_rate) data.edge_index = perturbed_data.adj else: data.edge_index = data.adj return data elif ptb_type == 'random_add': data = Dataset(root='/tmp/', name=dataset.lower(), setting='nettack', seed=15, require_mask=True) data.x, data.y = data.features, data.labels num_edge = data.adj.sum(axis=None) / 2 attacker = Random() attacker.attack(data.adj, n_perturbations=int(ptb_rate * num_edge), type='add') data.edge_index = attacker.modified_adj return data elif ptb_type == 'random_remove': data = Dataset(root='/tmp/', name=dataset.lower(), setting='nettack', seed=15, require_mask=True) data.x, data.y = data.features, data.labels num_edge = data.adj.sum(axis=None) / 2 attacker = Random() attacker.attack(data.adj, n_perturbations=int(ptb_rate * num_edge), type='remove') data.edge_index = attacker.modified_adj return data raise Exception(f"the ptb_type of {ptb_type} has not been implemented")
def main(): parser = argparse.ArgumentParser() parser.add_argument('--cuda', type=int, default=0, help='cuda') parser.add_argument('--seed', type=int, default=123, help='Random seed for model') parser.add_argument('--data_seed', type=int, default=123,help='Random seed for data split') parser.add_argument('--dataset', type=str, default='cora', help='dataset') parser.add_argument('--gnn_path', type=str, required=True, help='Path of saved model') parser.add_argument('--model', type=str, default='PGD', help='model variant') # PGD, random parser.add_argument('--loss_type', type=str, default='CE', help='loss type') parser.add_argument('--att_lr', type=float, default=200, help='Initial learning rate') parser.add_argument('--perturb_epochs', type=int, default=100, help='Number of epochs to poisoning loop') parser.add_argument('--ptb_rate', type=float, default=0.05, help='pertubation rate') parser.add_argument('--reg_weight', type=float, default=0.0, help='regularization weight') parser.add_argument('--loss_weight', type=float, default=1.0, help='loss weight') parser.add_argument('--weight_decay', type=float, default=5e-4, help='Weight decay (L2 loss on parameters)') parser.add_argument('--hidden', type=int, default=32, help='Number of hidden units') parser.add_argument('--dropout', type=float, default=0.0, help='Dropout rate (1 - keep probability)') parser.add_argument('--data_dir', type=str, default='./tmp/', help='Directory to download dataset') parser.add_argument('--sanitycheck', type=str, default='no', help='whether store the intermediate results') parser.add_argument('--sanity_dir', type=str, default='./sanitycheck_evasion/', help='Directory to store the intermediate results') parser.add_argument('--distance_type', type=str, default='l2', help='distance type') parser.add_argument('--sample_type', type=str, default='sample', help='sample type') args = parser.parse_args() args.device = torch.device(f'cuda:{args.cuda}' if torch.cuda.is_available() else 'cpu') torch.set_num_threads(1) # limit cpu use set_random_seed(args.seed, args.device) if not os.path.exists(args.data_dir): os.mkdir(args.data_dir) if not os.path.exists(args.sanity_dir): os.mkdir(args.sanity_dir) if not os.path.exists(args.gnn_path): raise AssertionError (f'No trained model found under {args.gnn_path}!') print('==== Environment ====') print(f'torch version: {torch.__version__}') print(f'device: {args.device}') print(f'torch seed: {args.seed}') ######################################################### # Load data for node classification task data = Dataset(root=args.data_dir, name=args.dataset, setting='gcn', seed=args.data_seed) adj, features, labels = data.process(process_adj=False, process_feature=False, device=args.device) idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test idx_unlabeled = np.union1d(idx_val, idx_test) print('==== Dataset ====') print(f'density: {nx.density(nx.from_numpy_array(adj.cpu().numpy()))}') print(f'adj shape: {adj.shape}') print(f'feature shape: {features.shape}') print(f'label number: {labels.max().item()+1}') print(f'split seed: {args.data_seed}') print(f'train|valid|test set: {idx_train.shape}|{idx_val.shape}|{idx_test.shape}') ######################################################### # Load victim model and test it on clean training nodes victim_model = GCN( nfeat=features.shape[1], nclass=labels.max().item()+1, nhid=args.hidden, dropout=args.dropout, weight_decay=args.weight_decay, device=args.device) victim_model = victim_model.to(args.device) victim_model.load_state_dict(torch.load(args.gnn_path)) victim_model.eval() print('==== Victim Model on Clean Graph ====') check_victim_model_performance(victim_model, features, adj, labels, idx_test, idx_train) ######################################################### # Setup attack model if args.model == 'PGD': model = PGDAttack( model=victim_model, nnodes=adj.shape[0], loss_type=args.loss_type, loss_weight=args.loss_weight, regularization_weight=args.reg_weight, device=args.device) model = model.to(args.device) elif args.model == 'random': model = Random() else: raise AssertionError (f'Attack {args.model} not found!') ######################################################### # Attack and evaluate print('***************** seed {} *****************'.format(args.seed)) print('==== Attacking ====') perturbations = int(args.ptb_rate * (adj.sum()/2)) nat_adj = copy.deepcopy(adj) # global attack on whole testing nodes idx_target = idx_test # utility = model.calc_utility( # features, # nat_adj, # labels, # idx_target) # os.makedirs(f'./{args.dataset}', exist_ok=True) # save_utility(f'./{args.dataset}/utility.bin', utility) # exit('done') # gradients = model.attack( # features, # nat_adj, # labels, # idx_target, # perturbations, # att_lr=args.att_lr, # epochs=args.perturb_epochs, # distance_type=args.distance_type, # sample_type=args.sample_type) # os.makedirs(f'./{args.dataset}', exist_ok=True) # save_utility(f'./{args.dataset}/grad_{args.reg_weight}.bin', gradients) # exit('done') if args.model == 'random': model.attack(nat_adj, perturbations, 'flip') else: model.attack( features, nat_adj, labels, idx_target, perturbations, att_lr=args.att_lr, epochs=args.perturb_epochs, distance_type=args.distance_type, sample_type=args.sample_type) modified_adj = model.modified_adj # evaluation victim_model.load_state_dict(torch.load(args.gnn_path)) # reset to clean model victim_model.eval() print('==== Victim Model on Perturbed Graph ====') check_victim_model_performance(victim_model, features, modified_adj, labels, idx_test, idx_train) print("==== Parameter ====") print(f'Data seed: {args.data_seed}') print(f'Dataset: {args.dataset}') print(f'Loss type: {args.loss_type}') print(f'Perturbation Rate: {args.ptb_rate}') print(f'Reg weight: {args.reg_weight}') print(f'Attack: {args.model}') print(f'Attack seed: {args.seed}') # if you want to save the modified adj/features, uncomment the code below if args.sanitycheck == 'yes': root = args.sanity_dir + '{}_{}_{}_{}_{}_{}lr_{}epoch_{}rate_{}reg1_{}reg2_{}seed' root = root.format(args.dataset, args.distance_type, args.sample_type, args.model, args.loss_type, args.att_lr, args.perturb_epochs, args.ptb_rate, args.loss_weight, args.reg_weight, args.seed) save_all(root, model)
torch.set_num_threads(1) # limit cpu use print(' pytorch version: ', torch.__version__) print(' device: ', device) np.random.seed(args.seed) torch.manual_seed(args.seed) if device != 'cpu': torch.cuda.manual_seed(args.seed) ######################################################### # Load data for node classification task print('==== Dataset ====') if not osp.exists(args.data_dir): os.makedirs(args.data_dir) data = Dataset(root=args.data_dir, name=args.dataset, setting='gcn', seed=args.data_seed) adj, features, labels = data.adj, data.features, data.labels idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test adj, features, labels = preprocess(adj, features, labels, preprocess_adj=False) print(' adj shape: ', adj.shape) print(' feature shape: ', features.shape) print(' label number: ', labels.max().item() + 1) print(' train|valid|test set: {}|{}|{}'.format(idx_train.shape, idx_val.shape, idx_test.shape)) ######################################################### # Set victim model for adversarial training adv_train_model = GCN(nfeat=features.shape[1],
def main(): parser = argparse.ArgumentParser() parser.add_argument('--cuda', type=int, default=0, help='cuda') parser.add_argument('--seed', type=int, default=123, help='Random seed for model') parser.add_argument('--data_seed', type=int, default=123, help='Random seed for data split') parser.add_argument('--dataset', type=str, default='cora', help='dataset') parser.add_argument('--gnn_path', type=str, required=True, help='Path of saved model') parser.add_argument( '--model', type=str, default='minmax', help='model variant' ) # ['minmax', 'Meta-Self', 'A-Meta-Self', 'Meta-Train', 'A-Meta-Train', 'random'] parser.add_argument('--loss_type', type=str, default='CE', help='loss type') parser.add_argument('--att_lr', type=float, default=200, help='Initial learning rate') parser.add_argument('--perturb_epochs', type=int, default=200, help='Number of epochs to poisoning loop') parser.add_argument('--ptb_rate', type=float, default=0.05, help='pertubation rate') parser.add_argument('--loss_weight', type=float, default=1.0, help='loss weight') parser.add_argument('--reg_weight', type=float, default=0.0, help='regularization weight') parser.add_argument('--weight_decay', type=float, default=5e-4, help='Weight decay (L2 loss on parameters)') parser.add_argument('--hidden', type=int, default=32, help='Number of hidden units') parser.add_argument('--dropout', type=float, default=0.0, help='Dropout rate (1 - keep probability)') parser.add_argument('--data_dir', type=str, default='./tmp/', help='Directory to download dataset') parser.add_argument('--target_node', type=str, default='train', help='target node set') parser.add_argument('--sanitycheck', type=str, default='no', help='whether store the intermediate results') parser.add_argument('--distance_type', type=str, default='l2', help='distance type') parser.add_argument('--opt_type', type=str, default='max', help='optimization type') parser.add_argument('--sample_type', type=str, default='sample', help='sample type') args = parser.parse_args() args.device = torch.device( f'cuda:{args.cuda}' if torch.cuda.is_available() else 'cpu') torch.set_num_threads(1) # limit cpu use set_random_seed(args.seed, args.device) if not os.path.exists(args.data_dir): os.mkdir(args.data_dir) if not os.path.exists(args.gnn_path): raise AssertionError(f'No trained model found under {args.gnn_path}!') print('==== Environment ====') print(f'torch version: {torch.__version__}') print(f'device: {args.device}') print(f'torch seed: {args.seed}') ######################################################### # Load data for node classification task data = Dataset(root=args.data_dir, name=args.dataset, setting='gcn', seed=args.data_seed) adj, features, labels = data.process(process_adj=False, process_feature=False, device=args.device) idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test idx_unlabeled = np.union1d(idx_val, idx_test) print('==== Dataset ====') print(f'density: {nx.density(nx.from_numpy_array(adj.cpu().numpy()))}') print(f'adj shape: {adj.shape}') print(f'feature shape: {features.shape}') print(f'label number: {labels.max().item()+1}') print(f'split seed: {args.data_seed}') print( f'train|valid|test set: {idx_train.shape}|{idx_val.shape}|{idx_test.shape}' ) ######################################################### # Load victim model and test it on clean training nodes weight_decay = 0 if args.dataset == 'polblogs' else args.weight_decay victim_model = GCN(nfeat=features.shape[1], nclass=labels.max().item() + 1, nhid=args.hidden, dropout=args.dropout, weight_decay=weight_decay, device=args.device) victim_model = victim_model.to(args.device) victim_model.load_state_dict(torch.load(args.gnn_path)) surrogate_model = GCN(nfeat=features.shape[1], nclass=labels.max().item() + 1, nhid=args.hidden, dropout=args.dropout, weight_decay=weight_decay, device=args.device) surrogate_model = surrogate_model.to(args.device) surrogate_model.load_state_dict(torch.load(args.gnn_path)) print('==== Initial Surrogate Model on Clean Graph ====') surrogate_model.eval() check_victim_model_performance(surrogate_model, features, adj, labels, idx_test, idx_train) ######################################################### # Setup attack model if args.model == 'minmax': model = MinMax(model=surrogate_model, nnodes=adj.shape[0], loss_type=args.loss_type, loss_weight=args.loss_weight, regularization_weight=args.reg_weight, device=args.device) model = model.to(args.device) elif 'Meta' in args.model: # 'Meta-Self', 'A-Meta-Self', 'Meta-Train', 'A-Meta-Train' if 'Self' in args.model: lambda_ = 0 if 'Train' in args.model: lambda_ = 1 if 'Both' in args.model: lambda_ = 0.5 if 'A' in args.model: model = MetaApprox(model=surrogate_model, nnodes=adj.shape[0], attack_structure=True, attack_features=False, regularization_weight=args.reg_weight, device=args.device, lambda_=lambda_) else: model = Metattack(model=surrogate_model, nnodes=adj.shape[0], attack_structure=True, attack_features=False, regularization_weight=args.reg_weight, device=args.device, lambda_=lambda_) model = model.to(args.device) elif args.model == 'random': model = Random() else: raise AssertionError(f'Attack {args.model} not found!') ######################################################### # Attack and evaluate print('***************** seed {} *****************'.format(args.seed)) print('==== Attacking ====') perturbations = int(args.ptb_rate * (adj.sum() / 2)) nat_adj = copy.deepcopy(adj) # switch target node set for minmax attack if args.target_node == 'test': idx_target = idx_test elif args.target_node == 'train': idx_target = idx_train else: idx_target = np.hstack((idx_test, idx_train)).astype(np.int) # Start attack if args.model == 'random': model.attack(nat_adj, perturbations, 'flip') elif 'Meta' in args.model: model.attack(features, nat_adj, labels, idx_train, idx_unlabeled, perturbations, ll_constraint=False, verbose=True) else: model.attack(features, nat_adj, labels, idx_target, perturbations, att_lr=args.att_lr, epochs=args.perturb_epochs, distance_type=args.distance_type, sample_type=args.sample_type, opt_type=args.opt_type) modified_adj = model.modified_adj # evaluation ######################################################### # vitim model on clean graph print('==== Victim Model on clean graph ====') check_victim_model_performance(victim_model, features, adj, labels, idx_test, idx_train) # vitim model on perturbed graph print('==== Victim Model on perturbed graph ====') check_victim_model_performance(victim_model, features, modified_adj, labels, idx_test, idx_train) # retrain victim model on perturbed graph print('==== Poisoned Surrogate Model on perturbed graph ====') surrogate_model.initialize() surrogate_model.fit(features, modified_adj, labels, idx_train, idx_val=None, train_iters=1000, verbose=False) check_victim_model_performance(surrogate_model, features, modified_adj, labels, idx_test, idx_train) # test poisoned model on clean graph print('==== Poisoned Surrogate Model on clean graph ====') check_victim_model_performance(surrogate_model, features, adj, labels, idx_test, idx_train) print("==== Parameter ====") print(f'seed: data {args.data_seed}, attack {args.seed}') print( f'dataset: {args.dataset}, attack model {args.model}, target {args.target_node}' ) print(f'loss type: {args.loss_type}') print( f'perturbation rate: {args.ptb_rate}, epoch: {args.perturb_epochs}, lr: {args.att_lr}' ) print(f'weight: loss {args.loss_weight}, reg {args.reg_weight}') print(f'distance type: {args.distance_type}, opt type: {args.opt_type}') # if you want to save the modified adj/features, uncomment the code below if args.sanitycheck == 'yes': root = './sanitycheck_evasion/{}_{}_{}_{}_{}_{}lr_{}epoch_{}rate_{}reg_{}target_{}seed' root = root.format(args.dataset, args.distance_type, args.sample_type, args.model, args.loss_type, args.att_lr, args.perturb_epochs, args.ptb_rate, args.reg_weight, args.target_node, args.seed) save_all(root, model)
def __init__(self, dataset, args, data_path="data", task_type="full"): self.dataset = dataset self.data_path = data_path (self.adj, self.train_adj, self.features, self.train_features, self.labels, self.idx_train, self.idx_val, self.idx_test, self.degree, self.learning_type) = data_loader(dataset, data_path, "NoNorm", False, task_type, seed=args.seed) if args.ptb_rate > 0: # need to install deeprobust: https://github.com/DSE-MSU/DeepRobust from deeprobust.graph.data import Dataset, PrePtbDataset data = Dataset(root='/tmp/', name=args.dataset, setting='nettack', seed=15) self.adj, self.features, self.labels = data.adj, data.features.todense( ), data.labels self.idx_train, self.idx_val, self.idx_test = data.idx_train, data.idx_val, data.idx_test if args.ptb_rate != 10: perturbed_data = PrePtbDataset(root='/tmp/', name=args.dataset, attack_method='meta', ptb_rate=args.ptb_rate) self.adj = perturbed_data.adj self.train_adj = self.adj self.train_features = self.features self.learning_type = 'transductive' self.labels = self.labels.astype(np.int) self.features = torch.FloatTensor(self.features).float() self.train_features = torch.FloatTensor(self.train_features).float() # self.train_adj = self.train_adj.tocsr() if args.train_size and not args.fastmode: self.idx_train, self.idx_val, self.idx_test = get_splits_each_class( labels=self.labels, train_size=args.train_size) # print(self.idx_train[:10]) # from ssl_utils import get_few_labeled_splits # self.idx_train, self.idx_val, self.idx_test = get_few_labeled_splits( # labels=self.labels, train_size=args.train_size) if args.fastmode: from deeprobust.graph.utils import get_train_test self.idx_train, self.idx_test = get_train_test( nnodes=self.adj.shape[0], test_size=1 - args.label_rate, stratify=self.labels) self.idx_test = self.idx_test[:1000] self.labels_torch = torch.LongTensor(self.labels) self.idx_train_torch = torch.LongTensor(self.idx_train) self.idx_val_torch = torch.LongTensor(self.idx_val) self.idx_test_torch = torch.LongTensor(self.idx_test) # vertex_sampler cache # where return a tuple self.pos_train_idx = np.where(self.labels[self.idx_train] == 1)[0] self.neg_train_idx = np.where(self.labels[self.idx_train] == 0)[0] # self.pos_train_neighbor_idx = np.where self.nfeat = self.features.shape[1] self.nclass = int(self.labels.max().item() + 1) self.trainadj_cache = {} self.adj_cache = {} #print(type(self.train_adj)) self.degree_p = None
def combination(args): datasets = args.datasets df_path = args.output selection_options = [ [ns.get_random_nodes, 'random'], [ns.get_nodes_with_lowest_degree, 'degree'], [ns.get_nodes_with_lowest_pagerank, 'pagerank'], [ns.get_nodes_with_lowest_eigenvector_centrality, 'eigenvector'], [ns.get_nodes_with_lowest_betweenness_centrality, 'betweenness'], [ns.get_nodes_with_lowest_closeness_centrality, 'closeness'], ] connection_options = [ [nc.random_connection, 'random'], [nc.community_hungarian_connection, 'community'], [nc.distance_hungarian_connection, 'distance'], [nc.katz_hungarian_connection, 'katz'], ] for dataset in datasets: data = Dataset(root='/tmp/', name=dataset) G_orig = nx.from_scipy_sparse_matrix(data.adj) degree_centralities_orig = np.array( list(nx.degree_centrality(G_orig).values())) ccoefs_orig = np.array( list( nx.clustering(G_orig, nodes=G_orig.nodes, weight=None).values())) for selection, selection_name in selection_options: for connection, connection_name in connection_options: print(f'attack [{selection_name}]*[{connection_name}]') for perturbation_rate in [ 0.005, 0.0075, 0.01, 0.025, 0.05, 0.075, 0.10, 0.15, 0.20 ]: for seed in range(5 if ( selection_name == 'random' or connection_name == 'random') else 1): modified_adj, elapsed = apply_structack( build_custom(selection, connection), attack_structack, data, perturbation_rate, cuda and (dataset != 'pubmed'), seed=seed) # reload the dataset with a different split (WARNING: this doesn't work for attack methods which depend on the split) data = Dataset(root='/tmp/', name=dataset) row = { 'dataset': dataset, 'selection': selection_name, 'connection': connection_name, 'gcn_seed': seed, 'perturbation_rate': perturbation_rate, 'elapsed': elapsed } row = extend_row_with_noticeability( row, G_orig, degree_centralities_orig, ccoefs_orig, data.adj, modified_adj) print(row) cdf = pd.DataFrame() if os.path.exists(df_path): cdf = pd.read_csv(df_path) cdf = cdf.append(row, ignore_index=True) cdf.to_csv(df_path, index=False)
def combination(datasets): df_path = 'reports/eval/comb_acc_eval-new-datasets.csv' selection_options = [ [ns.get_nodes_with_lowest_degree, 'degree'], [ns.get_nodes_with_lowest_pagerank, 'pagerank'], [ns.get_nodes_with_lowest_eigenvector_centrality, 'eigenvector'], [ns.get_nodes_with_lowest_betweenness_centrality, 'betweenness'], [ns.get_nodes_with_lowest_closeness_centrality, 'closeness'], [ns.get_random_nodes, 'random'], ] connection_options = [ [nc.community_hungarian_connection, 'community'], [nc.distance_hungarian_connection, 'distance'], [nc.katz_hungarian_connection, 'katz'], [nc.random_connection, 'random'], ] split_seeds = 1 gcn_seeds = 1 for selection, selection_name in selection_options: for connection, connection_name in connection_options: for dataset in datasets: ''' Clean graph evaluation ''' # for split_seed in range(split_seeds): # np.random.seed(split_seed) # torch.manual_seed(split_seed) # if cuda: # torch.cuda.manual_seed(split_seed) # # reload the dataset with a different split (WARNING: this doesn't work for attack methods which depend on the split) # data = Dataset(root='/tmp/', name=dataset) # for seed in range(gcn_seeds): # np.random.seed(seed) # torch.manual_seed(seed) # if cuda: # torch.cuda.manual_seed(seed) # acc = test_gcn(postprocess_adj(data.adj).to(torch.device("cuda" if cuda else "cpu")), # data, cuda, pre_test_data) # row = {'dataset':dataset, 'selection':'clean', 'connection':'clean', # 'gcn_seed':seed, 'acc':acc, 'perturbation_rate':0,'elapsed':0, # 'split_seed':split_seed} # print(row) # cdf = pd.DataFrame() # if os.path.exists(df_path): # cdf = pd.read_csv(df_path) # cdf = cdf.append(row, ignore_index=True) # cdf.to_csv(df_path,index=False) data = Dataset(root='/tmp/', name=dataset) print(f'attack [{selection_name}]*[{connection_name}]') for perturbation_rate in [0.01]: #,0.10,0.15,0.20]: modified_adj, elapsed = apply_structack( build_custom(selection, connection, dataset), attack_structack, data, perturbation_rate, cuda and (dataset != 'pubmed'), seed=0) for split_seed in range(split_seeds): np.random.seed(split_seed) torch.manual_seed(split_seed) if cuda: torch.cuda.manual_seed(split_seed) # reload the dataset with a different split (WARNING: this doesn't work for attack methods which depend on the split) data = Dataset(root='/tmp/', name=dataset) for seed in range(gcn_seeds): np.random.seed(seed) torch.manual_seed(seed) if cuda: torch.cuda.manual_seed(seed) acc = test_gcn(modified_adj, data, cuda, pre_test_data) row = { 'dataset': dataset, 'selection': selection_name, 'connection': connection_name, 'gcn_seed': seed, 'acc': acc, 'perturbation_rate': perturbation_rate, 'elapsed': elapsed, 'split_seed': split_seed } print(row) cdf = pd.DataFrame() if os.path.exists(df_path): cdf = pd.read_csv(df_path) cdf = cdf.append(row, ignore_index=True) cdf.to_csv(df_path, index=False)
type=float, default=0.05, help='pertubation rate') args = parser.parse_args() args.cuda = torch.cuda.is_available() print('cuda: %s' % args.cuda) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # make sure you use the same data splits as you generated attacks np.random.seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) # load original dataset (to get clean features and labels) data = Dataset(root='/tmp/', name=args.dataset) adj, features, labels = data.adj, data.features, data.labels idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test # Setup Target Model model = GCN(nfeat=features.shape[1], nclass=labels.max() + 1, nhid=16, dropout=0, with_relu=False, with_bias=True, device=device) model = model.to(device) # test on original adj
parser.add_argument('--missing_rate', type=int, default=0, help='missing rate, from 0 to 100') parser.add_argument('--adj_path', type=str, default="") parser.add_argument('--seed', type=int, default=1, help='seed') args = parser.parse_args() # logger #filename='example.log' logging.basicConfig(format='%(message)s', level=getattr(logging, args.log.upper())) # load data data = Dataset(root='/tmp/', name=args.data, setting='nettack') adj, features, labels = data.adj, data.features, data.labels #idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test splits = np.load('../Pro-GNN/splits/{}_{}.npz'.format(args.data, args.seed)) idx_train, idx_val, idx_test = splits['train'], splits['val'], splits['test'] #from .random import Random attacker = Random() n_perturbations = int(1.0 * (adj.sum() // 2)) perturbed_adj = attacker.attack(adj, n_perturbations, type='insert') #perturbed_adj = sp.load_npz(args.adj_path) device = torch.device("cuda") torch.manual_seed(123)
self.val_mask = get_mask(self.idx_val) self.test_mask = get_mask(self.idx_test) self.y_train, self.y_val, self.y_test = get_y(idx_train), get_y( idx_val), get_y(idx_test) def onehot(self, labels): eye = np.identity(labels.max() + 1) onehot_mx = eye[labels] return onehot_mx def parse_index_file(filename): index = [] for line in open(filename): index.append(int(line.strip())) return index if __name__ == '__main__': from deeprobust.graph.data import Dataset for name in ['cora', 'citeseer', 'pubmed', 'cora_ml']: data = Dataset(root='/tmp/', name=name, setting="prognn") idx_train = data.idx_train data2 = Dataset(root='/tmp/', name=name, setting="nettack", seed=15) idx_train2 = data2.idx_train assert (idx_train != idx_train2).sum() == 0 data = Dataset(root='/tmp/', name='flickr') adj, features, labels = data.adj, data.features, data.labels idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test
Returns ------- torch.FloatTensor output (log probabilities) of GAT """ self.eval() self.dropout = dropout self.data = self.data if geodata == None else geodata.to(self.device) return self.forward(self.data) if __name__ == "__main__": from deeprobust.graph.data import Dataset # from deeprobust.graph.defense import GAT data = Dataset(root='./tmp/', name='cora', setting='gcn') adj, features, labels = data.adj, data.features, data.labels idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test gat = GAT(nfeat=features.shape[1], nhid=8, heads=8, nclass=labels.max().item() + 1, dropout=0.5, device='cpu') gat = gat.to('cpu') gat.fit(data.geodata, verbose=True) # train with earlystopping gat.test() print(gat.predict())