def get_dataset(name, sparse=True, dataset_div=None): path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', name) try: shutil.copytree('../input/smt', path) except shutil.Error as e: for src,dst,msg in e.args[0]: print(dst,src,msg) except FileExistsError as e: print(e) dataset = TUDataset(path, name, use_node_attr=True) dataset.data.edge_attr = None if dataset.data.x is None: print('confirm the data.x do not exists!!') exit(1) max_degree = 0 degs = [] for data in dataset: degs += [degree(data.edge_index[0], dtype=torch.long)] max_degree = max(max_degree, degs[-1].max().item()) if max_degree < 1000: dataset.transform = T.OneHotDegree(max_degree) else: deg = torch.cat(degs, dim=0).to(torch.float) mean, std = deg.mean().item(), deg.std().item() dataset.transform = NormalizedDegree(mean, std) if not sparse: num_nodes = max_num_nodes = 0 for data in dataset: num_nodes += data.num_nodes max_num_nodes = max(data.num_nodes, max_num_nodes) # Filter out a few really large graphs in order to apply DiffPool. if name == 'REDDIT-BINARY': num_nodes = min(int(num_nodes / len(dataset) * 1.5), max_num_nodes) else: num_nodes = min(int(num_nodes / len(dataset) * 5), max_num_nodes) indices = [] for i, data in enumerate(dataset): if data.num_nodes <= num_nodes: indices.append(i) dataset = dataset[torch.tensor(indices)] if dataset.transform is None: dataset.transform = T.ToDense(num_nodes) else: dataset.transform = T.Compose( [dataset.transform, T.ToDense(num_nodes)]) if dataset_div!=None: dataset=dataset.shuffle()[:len(dataset)//dataset_div] return dataset
def get_dataset(name, sparse=True, cleaned=False): if name == 'node': path = osp.join(os.environ['GNN_TRAINING_DATA_ROOT'], name) print(path) dataset = HitGraphDataset2(path, directed=False, categorical=True) else: path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', name) dataset = TUDataset(path, name, cleaned=cleaned) dataset.data.edge_attr = None if dataset.data.x is None: max_degree = 0 degs = [] for data in dataset: degs += [degree(data.edge_index[0], dtype=torch.long)] max_degree = max(max_degree, degs[-1].max().item()) if max_degree < 1000: dataset.transform = T.OneHotDegree(max_degree) else: deg = torch.cat(degs, dim=0).to(torch.float) mean, std = deg.mean().item(), deg.std().item() dataset.transform = NormalizedDegree(mean, std) if not sparse: num_nodes = max_num_nodes = 0 for data in dataset: num_nodes += data.num_nodes max_num_nodes = max(data.num_nodes, max_num_nodes) # Filter out a few really large graphs in order to apply DiffPool. if name == 'REDDIT-BINARY': num_nodes = min(int(num_nodes / len(dataset) * 1.5), max_num_nodes) else: num_nodes = min(int(num_nodes / len(dataset) * 5), max_num_nodes) indices = [] for i, data in enumerate(dataset): if data.num_nodes <= num_nodes: indices.append(i) dataset = dataset[torch.tensor(indices)] if dataset.transform is None: dataset.transform = T.ToDense(num_nodes) else: dataset.transform = T.Compose( [dataset.transform, T.ToDense(num_nodes)]) return dataset
def get_dataset(self, name, sparse=True, dataset_div=None): path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', name) try: if 'SMT' in name: shutil.copytree(osp.join('../input', name.lower()), path) except FileExistsError as e: print(e) dataset = TUDataset(path, name, use_node_attr=True) dataset.data.edge_attr = None if not sparse: num_nodes = max_num_nodes = 0 limit_num = 1000 for data in dataset: num_nodes += data.num_nodes max_num_nodes = max(data.num_nodes, max_num_nodes) # Filter out a few really large graphs in order to apply DiffPool num_nodes = min(int(num_nodes / len(dataset)), max_num_nodes) num_nodes = max(int(num_nodes / len(dataset)), limit_num) self.num_nodes = num_nodes indices = [] for i, data in enumerate(dataset): if data.num_nodes <= num_nodes: indices.append(i) dataset = dataset[torch.tensor(indices)] dataset.transform = T.ToDense(num_nodes) if dataset_div is not None: dataset = dataset.shuffle()[:len(dataset) // dataset_div] return dataset.shuffle()
def load_data(dataset, cleaned=False): path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'datasets') dataset = TUDataset(path, dataset, cleaned=cleaned) dataset.data.edge_attr = None if dataset.data.x is None: max_degree = 0 degs = [] for data in dataset: degs += [degree(data.edge_index[0], dtype=torch.long)] max_degree = max(max_degree, degs[-1].max().item()) if max_degree < 1000: dataset.transform = T.OneHotDegree(max_degree) else: deg = torch.cat(degs, dim=0).to(torch.float) mean, std = deg.mean().item(), deg.std().item() dataset.transform = NormalizedDegree(mean, std) return dataset
def test_enzymes(): root = osp.join('/', 'tmp', str(random.randrange(sys.maxsize))) dataset = TUDataset(root, 'ENZYMES') assert len(dataset) == 600 assert dataset.num_features == 3 assert dataset.num_classes == 6 assert dataset.__repr__() == 'ENZYMES(600)' assert len(dataset[0]) == 3 assert len(dataset.shuffle()) == 600 assert len(dataset.shuffle(return_perm=True)) == 2 assert len(dataset[:100]) == 100 assert len(dataset[torch.arange(100, dtype=torch.long)]) == 100 mask = torch.zeros(600, dtype=torch.bool) mask[:100] = 1 assert len(dataset[mask]) == 100 loader = DataLoader(dataset, batch_size=len(dataset)) for data in loader: assert data.num_graphs == 600 avg_num_nodes = data.num_nodes / data.num_graphs assert pytest.approx(avg_num_nodes, abs=1e-2) == 32.63 avg_num_edges = data.num_edges / (2 * data.num_graphs) assert pytest.approx(avg_num_edges, abs=1e-2) == 62.14 assert len(data) == 5 assert list(data.x.size()) == [data.num_nodes, 3] assert list(data.y.size()) == [data.num_graphs] assert data.y.max() + 1 == 6 assert list(data.batch.size()) == [data.num_nodes] assert data.ptr.numel() == data.num_graphs + 1 assert data.has_isolated_nodes() assert not data.has_self_loops() assert data.is_undirected() loader = DataListLoader(dataset, batch_size=len(dataset)) for data_list in loader: assert len(data_list) == 600 dataset.transform = ToDense(num_nodes=126) loader = DenseDataLoader(dataset, batch_size=len(dataset)) for data in loader: assert len(data) == 4 assert list(data.x.size()) == [600, 126, 3] assert list(data.adj.size()) == [600, 126, 126] assert list(data.mask.size()) == [600, 126] assert list(data.y.size()) == [600, 1] dataset = TUDataset(root, 'ENZYMES', use_node_attr=True) assert dataset.num_node_features == 21 assert dataset.num_features == 21 assert dataset.num_edge_features == 0 shutil.rmtree(root)
def graph_kernel_dataset(name, path, sparse=True): dataset = TUDataset(path, name) dataset.data.edge_attr = None if dataset.data.x is None: max_degree = 0 degs = [] for data in dataset: degs += [degree(data.edge_index[0], dtype=torch.long)] max_degree = max(max_degree, degs[-1].max().item()) if max_degree < 1000: dataset.transform = T.OneHotDegree(max_degree) else: deg = torch.cat(degs, dim=0).to(torch.float) mean, std = deg.mean().item(), deg.std().item() dataset.transform = NormalizedDegree(mean, std) if not sparse: num_nodes = max_num_nodes = 0 for data in dataset: num_nodes += data.num_nodes max_num_nodes = max(data.num_nodes, max_num_nodes) # Filter out a few really large graphs in order to apply DiffPool. if name == 'REDDIT-BINARY': num_nodes = min(int(num_nodes / len(dataset) * 1.5), max_num_nodes) else: num_nodes = min(int(num_nodes / len(dataset) * 5), max_num_nodes) indices = [] for i, data in enumerate(dataset): if data.num_nodes <= num_nodes: indices.append(i) dataset = dataset[torch.Tensor(indices)] if dataset.transform is None: dataset.transform = T.ToDense(num_nodes) else: dataset.transform = T.Compose( [dataset.transform, T.ToDense(num_nodes)]) return dataset
def get_dataset(name, sparse=True): path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', name) dataset = TUDataset(path, name) dataset.data.edge_attr = None if dataset.data.x is None: max_degree = 0 degs = [] for data in dataset: degs += [degree(data.edge_index[0], dtype=torch.long)] max_degree = max(max_degree, degs[-1].max().item()) if max_degree < 1000: dataset.transform = T.OneHotDegree(max_degree) else: deg = torch.cat(degs, dim=0).to(torch.float) mean, std = deg.mean().item(), deg.std().item() dataset.transform = NormalizedDegree(mean, std) if not sparse: num_nodes = max_num_nodes = 0 for data in dataset: num_nodes += data.num_nodes max_num_nodes = max(data.num_nodes, max_num_nodes) if name == 'REDDIT-BINARY': num_nodes = min(int(num_nodes / len(dataset) * 1.5), max_num_nodes) else: num_nodes = min(int(num_nodes / len(dataset) * 5), max_num_nodes) indices = [] for i, data in enumerate(dataset): if data.num_nodes <= num_nodes: indices.append(i) dataset = dataset[torch.tensor(indices)] if dataset.transform is None: dataset.transform = T.ToDense(num_nodes) else: dataset.transform = T.Compose( [dataset.transform, T.ToDense(num_nodes)]) return dataset
def get_dataset(name, sparse=True, cleaned=False, normalize=False): dataset = TUDataset(os.path.join('./data', name), name, use_node_attr=True, cleaned=cleaned) dataset.data.edge_attr = None if dataset.data.x is None: max_degree = 0 degs = [] for data in dataset: degs += [degree(data.edge_index[0], dtype=torch.long)] max_degree = max(max_degree, degs[-1].max().item()) if max_degree < 1000: dataset.transform = T.OneHotDegree(max_degree) else: deg = torch.cat(degs, dim=0).to(torch.float) mean, std = deg.mean().item(), deg.std().item() dataset.transform = NormalizedDegree(mean, std) elif normalize: dataset.data.x -= torch.mean(dataset.data.x, axis=0) dataset.data.x /= torch.std(dataset.data.x, axis=0) if not sparse: max_num_nodes = 0 for data in dataset: max_num_nodes = max(data.num_nodes, max_num_nodes) if dataset.transform is None: dataset.transform = T.ToDense(max_num_nodes) else: dataset.transform = T.Compose( [dataset.transform, T.ToDense(max_num_nodes)]) return dataset
def test_enzymes(): root = osp.join('/', 'tmp', str(random.randrange(sys.maxsize)), 'test') dataset = TUDataset(root, 'ENZYMES') assert len(dataset) == 600 assert dataset.num_features == 21 assert dataset.num_classes == 6 assert dataset.__repr__() == 'ENZYMES(600)' assert len(dataset.__getitem__(0)) == 3 assert len(dataset.shuffle()) == 600 assert len(dataset[:100]) == 100 assert len(dataset[torch.arange(100, dtype=torch.long)]) == 100 mask = torch.zeros(600, dtype=torch.uint8) mask[:100] = 1 assert len(dataset[mask]) == 100 loader = DataLoader(dataset, batch_size=len(dataset)) for data in loader: assert data.num_graphs == 600 avg_num_nodes = data.num_nodes / data.num_graphs assert pytest.approx(avg_num_nodes, abs=1e-2) == 32.63 avg_num_edges = data.num_edges / (2 * data.num_graphs) assert pytest.approx(avg_num_edges, abs=1e-2) == 62.14 assert len(data) == 4 assert list(data.x.size()) == [data.num_nodes, 21] assert list(data.y.size()) == [data.num_graphs] assert data.y.max() + 1 == 6 assert list(data.batch.size()) == [data.num_nodes] assert data.contains_isolated_nodes() assert not data.contains_self_loops() assert data.is_undirected() dataset.transform = ToDense(num_nodes=126) loader = DenseDataLoader(dataset, batch_size=len(dataset)) for data in loader: assert len(data) == 4 assert list(data.x.size()) == [600, 126, 21] assert list(data.adj.size()) == [600, 126, 126] assert list(data.mask.size()) == [600, 126] assert list(data.y.size()) == [600, 1] shutil.rmtree(root)
def get_tudataset(name, rwr, cleaned=False): transform = None if rwr: transform = rwr_filter path = osp.join(osp.dirname(osp.realpath(__file__)), ('rwr' if rwr else '')) dataset = TUDataset(path, name, pre_transform=transform, use_edge_attr=rwr, cleaned=cleaned) if dataset.data.x is None: max_degree = 0 degs = [] for data in dataset: degs += [degree(data.edge_index[0], dtype=torch.long)] max_degree = max(max_degree, degs[-1].max().item()) dataset.transform = FilterConstant(10) #T.OneHotDegree(max_degree) return dataset
def main(args): path = pathlib.Path('./src/gkernel') if not path.is_file(): subprocess.call(["make"], cwd="./src", shell=True) dataset = TUDataset(root=f'{args.dir}/Pytorch_geometric/{args.dataset}', name=args.dataset) if dataset.num_features == 0: max_degree = -1 for data in dataset: edge_index = data.edge_index degrees = Counter(list(map(int, edge_index[0]))) if max_degree < max(degrees.values()): max_degree = max(degrees.values()) dataset.transform = OneHotDegree(max_degree=max_degree, cat=False) path = pathlib.Path(f'{args.dir}/GraphML/{args.dataset}/{args.dataset.lower()}_{args.kernel}.kernel') if not path.is_file(): save_to_graphml(dataset, f'{args.dir}/GraphML/{args.dataset}') cmd = ['./src/gkernel'] cmd.append('-k') cmd.append(args.kernel) if args.parameter: cmd.append('-p') cmd.append(args.parameter) cmd.append('-i') cmd.append(f'{args.dir}/GraphML/{args.dataset}/{args.dataset.lower()}.list') cmd.append('-g') cmd.append(f'{args.dir}/GraphML/{args.dataset}/data/') cmd.append('-o') cmd.append(f'{args.dir}/GraphML/{args.dataset}/{args.dataset.lower()}_{args.kernel}.kernel') subprocess.call(cmd) K = read_kernel_matrix(f'{args.dir}/GraphML/{args.dataset}/{args.dataset.lower()}_{args.kernel}.kernel') y = dataset.data.y.data.numpy() ev = Evaluation(K, y, args, verbose=True) accs = ev.evaluate(dataset)
def gnn_evaluation(gnn, ds_name, layers, hidden, max_num_epochs=200, batch_size=128, start_lr=0.01, min_lr=0.000001, factor=0.5, patience=5, num_repetitions=10, all_std=True): # Load dataset and shuffle. path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'datasets', ds_name) dataset = TUDataset(path, name=ds_name).shuffle() # One-hot degree if node labels are not available. # The following if clause is taken from https://github.com/rusty1s/pytorch_geometric/blob/master/benchmark/kernel/datasets.py. if dataset.data.x is None: max_degree = 0 degs = [] for data in dataset: degs += [degree(data.edge_index[0], dtype=torch.long)] max_degree = max(max_degree, degs[-1].max().item()) if max_degree < 1000: dataset.transform = T.OneHotDegree(max_degree) else: deg = torch.cat(degs, dim=0).to(torch.float) mean, std = deg.mean().item(), deg.std().item() dataset.transform = NormalizedDegree(mean, std) # Set device. device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') test_accuracies_all = [] test_accuracies_complete = [] for i in range(num_repetitions): # Test acc. over all folds. test_accuracies = [] kf = KFold(n_splits=10, shuffle=True) dataset.shuffle() for train_index, test_index in kf.split(list(range(len(dataset)))): # Sample 10% split from training split for validation. train_index, val_index = train_test_split(train_index, test_size=0.1) best_val_acc = 0.0 best_test = 0.0 # Split data. train_dataset = dataset[train_index.tolist()] val_dataset = dataset[val_index.tolist()] test_dataset = dataset[test_index.tolist()] # Prepare batching. train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True) test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True) # Collect val. and test acc. over all hyperparameter combinations. for l in layers: for h in hidden: # Setup model. model = gnn(dataset, l, h).to(device) model.reset_parameters() optimizer = torch.optim.Adam(model.parameters(), lr=start_lr) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, mode='min', factor=factor, patience=patience, min_lr=0.0000001) for epoch in range(1, max_num_epochs + 1): lr = scheduler.optimizer.param_groups[0]['lr'] train(train_loader, model, optimizer, device) val_acc = test(val_loader, model, device) scheduler.step(val_acc) if val_acc > best_val_acc: best_val_acc = val_acc best_test = test(test_loader, model, device) * 100.0 # Break if learning rate is smaller 10**-6. if lr < min_lr: break test_accuracies.append(best_test) if all_std: test_accuracies_complete.append(best_test) test_accuracies_all.append(float(np.array(test_accuracies).mean())) if all_std: return (np.array(test_accuracies_all).mean(), np.array(test_accuracies_all).std(), np.array(test_accuracies_complete).std()) else: return (np.array(test_accuracies_all).mean(), np.array(test_accuracies_all).std())
if not os.path.exists(data_seed_dir): path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', args.dataname) dataset = TUDataset(path, name= args.dataname,pre_transform=my_transform) max_nodes = max([x.num_nodes for x in dataset]) dataset.max_nodes = max_nodes dataset = dataset.shuffle() dataset = dataset.shuffle() with open(data_seed_dir, 'wb') as f: pickle.dump(dataset, f) print('Seed Data Saved : ',data_seed_dir) else: with open(data_seed_dir, 'rb') as f: dataset = pickle.load(f) print('Seed Data Loaded : ',data_seed_dir) dataset.transform = transform class Net(torch.nn.Module): def __init__(self): super(Net, self).__init__() nhid = args.nhid self.conv1 = GraphConv(dataset.num_features, nhid) self.pool1 = SAGPool(nhid, ratio=args.ratio) self.conv2 = GraphConv(nhid, nhid) self.pool2 = SAGPool(nhid, ratio=args.ratio) self.conv3 = GraphConv(nhid, nhid) self.pool3 = SAGPool(nhid, ratio=args.ratio) self.lin1 = torch.nn.Linear(nhid*2, nhid)
def main(args): dataset = TUDataset(root=f'./Datasets/{args.dataset}', name=args.dataset) print(dataset) if dataset.num_features == 0 or args.initialize_node_features: if args.randomize: print('Using random node features') dataset.transform = Random() else: print('Using degree node features') max_degree = -1 for data in dataset: edge_index = data.edge_index degrees = Counter(list(map(int, edge_index[0]))) if max_degree < max(degrees.values()): max_degree = max(degrees.values()) dataset.transform = OneHotDegree(max_degree=max_degree, cat=False) print("Use clean dataset: {}".format(bool(args.clean_dataset))) graph_idx, orbits = get_clean_graph_indices( args.dataset, path_to_orbits=args.orbits_path) print('Found {} orbits from {}'.format(len(orbits), args.orbits_path)) if args.clean_dataset: dataset_size = len(graph_idx) print(f"Dataset size: {len(dataset)} -> {dataset_size}") shuffled_idx = copy.deepcopy(graph_idx) else: dataset_size = len(dataset) shuffled_idx = list(range(dataset_size)) print(f"Dataset size: {len(dataset)}") print('Class labels:', Counter([int(dataset[int(idx)].y) for idx in shuffled_idx])) global_test_acc_original_hom = [] # accuracy of original model global_test_acc_iso_original_hom = [ ] # accuracy of original model on homogeneous Y_iso global_test_acc_hom = [] # accuracy of peering model on homogeneous global_test_acc_iso_hom = [ ] # accuracy of peering model on homogeneous Y_iso global_test_acc_original_all = [ ] # accuracy of original model (same as first) global_test_acc_iso_original_all = [ ] # accuracy of original model on all Y_iso global_test_acc_all = [] # accuracy of peering model on all global_test_acc_iso_all = [] # accuracy of peering model on all Y_iso global_loss = [] epoch_trains = [] epoch_vals = [] epoch_tests = [] kf = KFold(args.num_kfold, shuffle=True) # 20% for test size pos2idx = dict(enumerate(shuffled_idx)) for xval, (train_index, test_index) in enumerate(kf.split(shuffled_idx)): test_dataset = [dataset[pos2idx[idx]] for idx in test_index] train_val_dataset = [dataset[pos2idx[idx]] for idx in train_index] test_graph_idx = [pos2idx[idx] for idx in test_index] train_graph_idx = [pos2idx[idx] for idx in train_index] # split on train and val train_dataset, val_dataset = train_test_split(train_val_dataset, test_size=0.2) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True) val_loader = DataLoader(val_dataset, batch_size=len(val_dataset)) test_loader = DataLoader(test_dataset, batch_size=len(test_dataset)) print(len(train_dataset), len(val_dataset), len(test_dataset)) iso_test_idx_all, iso_test_labels_all = get_Y_iso_idx_and_labels( orbits, train_graph_idx, test_graph_idx, dataset, homogeneous=False) iso_test_idx_hom, iso_test_labels_hom = get_Y_iso_idx_and_labels( orbits, train_graph_idx, test_graph_idx, dataset, homogeneous=True) print('Possible train classes', get_dataset_classes(train_loader)) print('Possible val classes', get_dataset_classes(val_loader)) print('Possible test classes', get_dataset_classes(test_loader)) # model = GCN(dataset.num_features, dataset.num_classes, args.hidden, args.dropout).to(device) model = GraphCNN(5, 2, dataset.num_features, 64, dataset.num_classes, 0.5, False, "sum", "sum", device).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=0.01) best_score = -1 best_model = None epoch_train = [] epoch_val = [] epoch_test = [] print('Running {} epochs'.format(args.num_epochs)) for epoch in range(1, args.num_epochs + 1): model.train() if epoch % 50 == 0: for param_group in optimizer.param_groups: param_group['lr'] = 0.5 * param_group['lr'] train_loss = 0 for data in train_loader: data = data.to(device) optimizer.zero_grad() output = model(data) loss = F.nll_loss(output, data.y) loss.backward() train_loss += loss.item() * data.num_graphs optimizer.step() train_loss = train_loss / len(train_dataset) train_acc = test(model, train_loader, device) val_acc = test(model, val_loader, device) test_acc = test(model, test_loader, device) epoch_train.append(train_acc) epoch_val.append(val_acc) epoch_test.append(test_acc) if val_acc > best_score and epoch >= args.warmup: best_score = val_acc best_test_score = test_acc best_epoch = epoch best_model = copy.deepcopy(model) print( 'Xval: {:03d}, Epoch: {:03d}, Train Loss: {:.4f}, ' 'Train Acc: {:.4f}, Val Acc: {:.4f}, Test Acc: {:.4f}'.format( xval, epoch, train_loss, train_acc, val_acc, test_acc)) test_acc_original_hom, test_acc_iso_original_hom = test_model( best_model, test_loader, device, iso_test_idx_hom) test_acc_hom, test_acc_iso_hom = test_model(best_model, test_loader, device, iso_test_idx_hom, iso_test_labels_hom) test_acc_original_all, test_acc_iso_original_all = test_model( best_model, test_loader, device, iso_test_idx_all) test_acc_all, test_acc_iso_all = test_model(best_model, test_loader, device, iso_test_idx_all, iso_test_labels_all) print( 'Xval {:03d} Best model accuracy on test {:.4f} vs {:.4f} ({:.4f} {})' .format(xval, test_acc, best_test_score, best_score, best_epoch)) global_test_acc_original_hom.append(test_acc_original_hom) global_test_acc_iso_original_hom.append(test_acc_iso_original_hom) global_test_acc_hom.append(test_acc_hom) global_test_acc_iso_hom.append(test_acc_iso_hom) global_test_acc_original_all.append(test_acc_original_all) global_test_acc_iso_original_all.append(test_acc_iso_original_all) global_test_acc_all.append(test_acc_all) global_test_acc_iso_all.append(test_acc_iso_all) epoch_trains.append(epoch_train) epoch_vals.append(epoch_val) test_mean_original_hom, test_std_original_hom = np.mean( global_test_acc_original_hom), np.std(global_test_acc_original_hom) test_iso_mean_original_hom, test_iso_std_original_hom = np.mean( global_test_acc_iso_original_hom), np.std( global_test_acc_iso_original_hom) test_mean_hom, test_std_hom = np.mean(global_test_acc_hom), np.std( global_test_acc_hom) test_iso_mean_hom, test_iso_std_hom = np.mean( global_test_acc_iso_hom), np.std(global_test_acc_iso_hom) test_mean_original_all, test_std_original_all = np.mean( global_test_acc_original_all), np.std(global_test_acc_original_all) test_iso_mean_original_all, test_iso_std_original_all = np.mean( global_test_acc_iso_original_all), np.std( global_test_acc_iso_original_all) test_mean_all, test_std_all = np.mean(global_test_acc_all), np.std( global_test_acc_all) test_iso_mean_all, test_iso_std_all = np.mean( global_test_acc_iso_all), np.std(global_test_acc_iso_all) print( 'After 10-Fold XVal: Original-Hom Test Acc: {:.4f}+-{:.4f} Test Iso Acc: {:.4f}+-{:.4f}' .format(test_mean_original_hom, test_std_original_hom, test_iso_mean_original_hom, test_iso_std_original_hom)) print( 'After 10-Fold XVal: Peering-Hom Test Acc: {:.4f}+-{:.4f} Test Iso Acc: {:.4f}+-{:.4f}' .format(test_mean_hom, test_std_hom, test_iso_mean_hom, test_iso_std_hom)) print( 'After 10-Fold XVal: Original-All Test Acc: {:.4f}+-{:.4f} Test Iso Acc: {:.4f}+-{:.4f}' .format(test_mean_original_all, test_std_original_all, test_iso_mean_original_all, test_iso_std_original_all)) print( 'After 10-Fold XVal: Peering-All Test Acc: {:.4f}+-{:.4f} Test Iso Acc: {:.4f}+-{:.4f}' .format(test_mean_all, test_std_all, test_iso_mean_all, test_iso_std_all)) with open(args.output_fn, 'a+') as f: print("original-hom gin {} {} {} {:.3f} {:.3f} {:.3f} {:.3f}".format( args.orbits_path, int(args.clean_dataset), args.dataset, test_mean_original_hom, test_std_original_hom, test_iso_mean_original_hom, test_iso_std_original_hom), file=f) print("peering-hom gin {} {} {} {:.3f} {:.3f} {:.3f} {:.3f}".format( args.orbits_path, int(args.clean_dataset), args.dataset, test_mean_hom, test_std_hom, test_iso_mean_hom, test_iso_std_hom), file=f) print("original-all gin {} {} {} {:.3f} {:.3f} {:.3f} {:.3f}".format( args.orbits_path, int(args.clean_dataset), args.dataset, test_mean_original_all, test_std_original_all, test_iso_mean_original_all, test_iso_std_original_all), file=f) print("peering-all gin {} {} {} {:.3f} {:.3f} {:.3f} {:.3f}".format( args.orbits_path, int(args.clean_dataset), args.dataset, test_mean_all, test_std_all, test_iso_mean_all, test_iso_std_all), file=f) return best_model
results = {beta: [0] * repeats for beta in betas} for r in range(repeats): for beta in betas: accuracies = [] dataset = TUDataset(path, name=DS).shuffle() if dataset.data.x is None: max_degree = 0 degs = [] for data in dataset: degs += [degree(data.edge_index[0], dtype=torch.long)] max_degree = max(max_degree, degs[-1].max().item()) if max_degree < 1000: dataset.transform = T.OneHotDegree(max_degree) else: deg = torch.cat(degs, dim=0).to(torch.float) mean, std = deg.mean().item(), deg.std().item() dataset.transform = NormalizedDegree(mean, std) try: dataset_num_features = dataset.num_features except: dataset_num_features = 1 dataloader = DataLoader(dataset, batch_size=batch_size) device = torch.device( 'cuda:0' if torch.cuda.is_available() else 'cpu') model = GcnInfomax(args.hidden_dim, args.num_gc_layers).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=lr)