type=int, default=100, help='patience for early stopping') parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)') args = parser.parse_args() torch.manual_seed(args.seed) if torch.cuda.is_available(): torch.cuda.manual_seed(args.seed) dataset = TUDataset(os.path.join('data', args.dataset), name=args.dataset, use_node_attr=True) args.num_classes = dataset.num_classes args.num_features = dataset.num_features max_num_features = 89 if 'all' in args.resume: args.num_features = max_num_features print(args) model = Model(args).to(args.device) if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume)
import torch import torch.nn.functional as F from torch_geometric.datasets import TUDataset from torch_geometric.data import DataLoader,Data from torch_geometric.nn import GCNConv from torch.utils.data import random_split import os import os.path as osp import argparse import warnings warnings.filterwarnings("ignore") # ignore all warnings dataname='NCI109' path = osp.join(os.path.abspath(''), 'data', dataname) #ENZYMES/ DD/Mutagenicity dataset = TUDataset(path, name = dataname) dataset = dataset.shuffle() num_features = dataset.num_features num_classes = dataset.num_classes dataset1 = list() for i in range(len(dataset)): data1 = Data(x=dataset[i].x, edge_index = \ dataset[i].edge_index, y = dataset[i].y) data1.num_node = dataset[i].num_nodes data1.num_edge = dataset[i].edge_index.size(1) dataset1.append(data1) dataset = dataset1
x_5 = F.relu(self.conv5(x_4, data.edge_index, data.edge_attr)) x_6 = F.relu(self.conv6(x_5, data.edge_index, data.edge_attr)) x = x_6 x = self.set2set(x, data.batch) x = F.relu(self.fc1(x)) x = self.fc4(x) return x plot_all = [] results = [] results_log = [] for _ in range(5): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'datasets', 'alchemy_full') dataset = TUDataset(path, name="alchemy_full").shuffle() mean = dataset.data.y.mean(dim=0, keepdim=True) std = dataset.data.y.std(dim=0, keepdim=True) dataset.data.y = (dataset.data.y - mean) / std mean, std = mean.to(device), std.to(device) train_dataset = dataset[0:162063].shuffle() val_dataset = dataset[162063:182321].shuffle() test_dataset = dataset[182321:].shuffle() batch_size = 64 train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True) test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
import os.path as osp import torch import torch.nn.functional as F from torch.nn import Sequential, Linear, ReLU from torch_geometric.datasets import TUDataset from torch_geometric.data import DataLoader from torch_geometric.nn import GINConv, global_add_pool path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'NCI1') dataset = TUDataset(path, name='NCI1').shuffle() test_dataset = dataset[:len(dataset) // 10] train_dataset = dataset[len(dataset) // 10:] test_loader = DataLoader(test_dataset, batch_size=128) train_loader = DataLoader(train_dataset, batch_size=128) class Net(torch.nn.Module): def __init__(self): super(Net, self).__init__() num_features = dataset.num_features dim = 32 nn1 = Sequential(Linear(num_features, dim), torch.nn.BatchNorm1d(dim), ReLU(), Linear(dim, dim)) self.conv1 = GINConv(nn1) self.bn1 = torch.nn.BatchNorm1d(dim) nn2 = Sequential(Linear(dim, dim), torch.nn.BatchNorm1d(dim), ReLU(), Linear(dim, dim))
PRIOR = 0 return local_global_loss + PRIOR if __name__ == '__main__': args = arg_parse() accuracies = {'logreg':[], 'svc':[], 'linearsvc':[], 'randomforest':[]} epochs = 20 log_interval = 1 batch_size = 128 lr = args.lr DS = args.DS path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', DS) # kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=None) dataset = TUDataset(path, name=DS).shuffle() dataset_num_features = max(dataset.num_features, 1) dataloader = DataLoader(dataset, batch_size=batch_size) print('================') print('lr: {}'.format(lr)) print('num_features: {}'.format(dataset_num_features)) print('hidden_dim: {}'.format(args.hidden_dim)) print('num_gc_layers: {}'.format(args.num_gc_layers)) print('================') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = InfoGraph(args.hidden_dim, args.num_gc_layers).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=lr) model.eval()
from torch_scatter import scatter_mean from torch_geometric.datasets import TUDataset from torch_geometric.data import DataLoader dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES', use_node_attr=True) loader = DataLoader(dataset, batch_size=32, shuffle=True) for batch in loader: print(batch) print(batch.num_graphs) print(batch.batch) x = scatter_mean(batch.x, batch.batch, dim=0) print(x.size())
import os.path as osp import torch import torch.nn.functional as F from torch_geometric.datasets import TUDataset from torch_geometric.loader import DataLoader from torch_geometric.nn import GraphConv, TopKPooling from torch_geometric.nn import global_mean_pool as gap, global_max_pool as gmp path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'PROTEINS') dataset = TUDataset(path, name='PROTEINS') dataset = dataset.shuffle() n = len(dataset) // 10 test_dataset = dataset[:n] train_dataset = dataset[n:] test_loader = DataLoader(test_dataset, batch_size=60) train_loader = DataLoader(train_dataset, batch_size=60) class Net(torch.nn.Module): def __init__(self): super(Net, self).__init__() self.conv1 = GraphConv(dataset.num_features, 128) self.pool1 = TopKPooling(128, ratio=0.8) self.conv2 = GraphConv(128, 128) self.pool2 = TopKPooling(128, ratio=0.8) self.conv3 = GraphConv(128, 128) self.pool3 = TopKPooling(128, ratio=0.8) self.lin1 = torch.nn.Linear(256, 128)
parser.add_argument('--patience', type=int, default=50, help='patience for earlystopping') parser.add_argument('--pooling_layer_type', type=str, default='GCNConv', help='DD/PROTEINS/NCI1/NCI109/Mutagenicity') args = parser.parse_args() args.device = 'cpu' torch.manual_seed(args.seed) if torch.cuda.is_available(): torch.cuda.manual_seed(args.seed) args.device = 'cuda:0' dataset = TUDataset(os.path.join('data', args.dataset), name=args.dataset) args.num_classes = dataset.num_classes args.num_features = dataset.num_features num_training = int(len(dataset) * 0.8) num_val = int(len(dataset) * 0.1) num_test = len(dataset) - (num_training + num_val) training_set, validation_set, test_set = random_split( dataset, [num_training, num_val, num_test]) train_loader = DataLoader(training_set, batch_size=args.batch_size, shuffle=True) val_loader = DataLoader(validation_set, batch_size=args.batch_size, shuffle=False)
if args.torch_geom: if args.degree: if args.dataset == 'TRIANGLES': max_degree = 14 else: raise NotImplementedError( 'max_degree value should be specified in advance. ' 'Try running without --torch_geom (-g) and look at dataset statistics printed out by our code.' ) if args.degree: transforms.append(T.OneHotDegree(max_degree=max_degree, cat=False)) dataset = TUDataset('./data/%s/' % args.dataset, name=args.dataset, use_node_attr=args.use_cont_node_attr, transform=T.Compose(transforms)) train_ids, test_ids = split_ids(rnd_state.permutation(len(dataset)), folds=n_folds) else: datareader = DataReader(data_dir='./data/%s/' % args.dataset, rnd_state=rnd_state, folds=n_folds, use_cont_node_attr=args.use_cont_node_attr) acc_folds = [] for fold_id in range(n_folds): loaders = []
from torch_geometric.nn import global_add_pool from torch_scatter import scatter_mean from torch_geometric.data import Data, DataLoader class HandleNodeAttention(object): def __call__(self, data): data.attn = torch.softmax(data.x[:, 0], dim=0) data.x = data.x[:, 1:] return data path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'COLORS-3') dataset = TUDataset(path, 'COLORS-3', use_node_attr=True, transform=HandleNodeAttention()) train_loader = DataLoader(dataset[:500], batch_size=60, shuffle=True) val_loader = DataLoader(dataset[500:3000], batch_size=60) test_loader = DataLoader(dataset[3000:], batch_size=60) class Net(torch.nn.Module): def __init__(self, in_channels): super(Net, self).__init__() self.conv1 = GINConv(Seq(Lin(in_channels, 64), ReLU(), Lin(64, 64))) self.pool1 = TopKPooling(in_channels, min_score=0.05) self.conv2 = GINConv(Seq(Lin(64, 64), ReLU(), Lin(64, 64)))
if args.bench_task == 'page': print("Start benchmark PageRank!") else: print("Start benchmark Clustering coefficient!") if args.netlib == "nx": print("Use NetworkX as the DeepSNAP backend network library.") import networkx as netlib elif args.netlib == "sx": print("Use SnapX as the DeepSNAP backend network library.") import snap import snapx as netlib else: import networkx as netlib print("Use NetworkX as the DeepSNAP backend network library.") if args.dataset == 'COX2': pyg_dataset = TUDataset('./tu', args.dataset) if args.bench_task == 'page': print("Start benchmark DeepSNAP:") deepsnap_pagerank(args, pyg_dataset) print("Start benchmark Tensor:") pyg_pagerank(args, pyg_dataset) else: print("Start benchmark DeepSNAP:") deepsnap_cluster(args, pyg_dataset) print("Start benchmark Tensor:") pyg_cluster(args, pyg_dataset)
'model', 'model_params', 'train_size', 'split', 'epoch', 'time_for_epoch(ms)', 'train_loss', 'train_acc', 'test_acc', 'train_conf', 'test_conf']) log_file.flush() for dataset_name in datasets: for model_dict in models: for train_size in train_sizes: dataset = TUDataset(root=f'data/{dataset_name}', name=dataset_name).shuffle() for split, (all_train_idx, test_idx) in enumerate(get_splits(train_size)): print(dataset_name, model_dict['class'].__name__, split) model = model_dict['class'](num_feats=dataset.num_features, num_classes=dataset.num_classes, **model_dict['params']).to( device) optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=0.001) crit = CrossEntropyLoss() # convert idx to torch tensors train_idx = torch.tensor(all_train_idx, dtype=torch.long) test_idx = torch.tensor(test_idx, dtype=torch.long)
cache = {} def transform(data): if data not in cache: #data.z = torch.Tensor(computeWL([data]).todense()) cache[data] = torch.Tensor(computeWL([data]).todense()) data.z = cache[data] else: data.z = cache[data] return data path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'NCI1') dataset = TUDataset(path, name='NCI1', transform=transform).shuffle() test_dataset = dataset[:len(dataset) // 10] train_dataset = dataset[len(dataset) // 10:] pretrain_loader = DataLoader(dataset, batch_size=50) test_loader = DataLoader(test_dataset, batch_size=50) train_loader = DataLoader(train_dataset, batch_size=50) class GCNNet(torch.nn.Module): def __init__(self, dim=32, gdim=128, pretr_out_dim=100, out_dim=2): super(GCNNet, self).__init__() self.conv1 = GCNConv(dataset.num_features, dim, improved=True) self.conv2 = GCNConv(dim, dim, improved=True) self.conv3 = GCNConv(dim, dim, improved=True)
if __name__ == '__main__': args = arg_parse() accuracies = {'logreg': [], 'svc': [], 'linearsvc': [], 'randomforest': []} epochs = 20 log_interval = 1 batch_size = 1 lr = args.lr DS = args.DS path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', DS) # kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=None) # dataset = TUDataset(path, name=DS).shuffle() dataset = TUDataset(path, name=DS) try: dataset_num_features = dataset.num_features except: dataset_num_features = 1 dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True) dataloader_v = DataLoader(dataset, batch_size=1, shuffle=False) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = GcnInfomax(args.hidden_dim, args.num_gc_layers).to(device) print(model) optimizer = torch.optim.Adam(model.parameters(), lr=lr) print('================')
parser.add_argument('--pooling_ratio', type=float, default=0.5, help='pooling ratio') parser.add_argument('--dropout_ratio', type=float, default=0.4, help='dropout ratio') parser.add_argument('--lamb', type=float, default=1.0, help='trade-off parameter') parser.add_argument('--dataset', type=str, default='PROTEINS', help='DD/PROTEINS/NCI1/NCI109/Mutagenicity/ENZYMES') parser.add_argument('--device', type=str, default='cuda:0', help='specify cuda devices') parser.add_argument('--epochs', type=int, default=1000, help='maximum number of epochs') parser.add_argument('--patience', type=int, default=100, help='patience for early stopping') args = parser.parse_args() torch.manual_seed(args.seed) if torch.cuda.is_available(): print("CUDA") torch.cuda.manual_seed(args.seed) dataset = TUDataset("/home/anoopkumar/Brain/HGP-SL/data", name=args.dataset, use_node_attr=False, use_edge_attr=True) args.num_classes = dataset.num_classes args.num_features = dataset.num_features print(args) print("len of the dataset ######## ",len(dataset)) num_training = int(len(dataset) * 0.80) num_val = int(len(dataset) * 0.1) num_test = len(dataset) - (num_training + num_val) training_set, validation_set, test_set = random_split(dataset, [num_training, num_val, num_test]) print(" train len", len(training_set)) print("val len", len(validation_set))
def my_transform(data): data.x = F.normalize(data.x, p=2, dim=-1) return data if not os.path.exists(os.path.join('./data', args.dataname)): os.mkdir(os.path.join('./data', args.dataname)) data_seed_dir = os.path.join('./data', args.dataname, str(args.seed) + '.pkl') if not os.path.exists(data_seed_dir): path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', args.dataname) dataset = TUDataset(path, name=args.dataname, pre_transform=my_transform) max_nodes = max([x.num_nodes for x in dataset]) dataset.max_nodes = max_nodes dataset = dataset.shuffle() dataset = dataset.shuffle() with open(data_seed_dir, 'wb') as f: pickle.dump(dataset, f) print('Seed Data Saved : ', data_seed_dir) else: with open(data_seed_dir, 'rb') as f: dataset = pickle.load(f) print('Seed Data Loaded : ', data_seed_dir) #min_nodes = max([x.num_nodes for x in dataset])//2 nodes = [x.num_nodes for x in dataset] min_nodes = sorted(nodes)[int(len(nodes) * 0.4)]
from torch_geometric.nn import GINConv, GCNConv, SAGPooling from torch_geometric.nn import global_max_pool from torch_scatter import scatter_mean class HandleNodeAttention(object): def __call__(self, data): data.attn = torch.softmax(data.x, dim=0).flatten() data.x = None return data transform = T.Compose([HandleNodeAttention(), T.OneHotDegree(max_degree=14)]) path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'TRIANGLES') dataset = TUDataset(path, name='TRIANGLES', use_node_attr=True, transform=transform) train_loader = DataLoader(dataset[:30000], batch_size=60, shuffle=True) val_loader = DataLoader(dataset[30000:35000], batch_size=60) test_loader = DataLoader(dataset[35000:], batch_size=60) class Net(torch.nn.Module): def __init__(self, in_channels): super(Net, self).__init__() self.conv1 = GINConv(Seq(Lin(in_channels, 64), ReLU(), Lin(64, 64))) self.pool1 = SAGPooling(64, min_score=0.001, GNN=GCNConv) self.conv2 = GINConv(Seq(Lin(64, 64), ReLU(), Lin(64, 64))) self.pool2 = SAGPooling(64, min_score=0.001, GNN=GCNConv)
import os.path as osp import torch torch.version.cuda =None import torch.nn.functional as F from torch.nn import Sequential, Linear, ReLU from torch_geometric.datasets import TUDataset from torch_geometric.data import DataLoader from torch_geometric.nn import GINLafConv, global_add_pool, GINConv path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'MUTAG') dataset = TUDataset(path, name='MUTAG').shuffle() test_dataset = dataset[:len(dataset) // 10] train_dataset = dataset[len(dataset) // 10:] test_loader = DataLoader(test_dataset, batch_size=128) train_loader = DataLoader(train_dataset, batch_size=128) class Net(torch.nn.Module): def __init__(self): super(Net, self).__init__() num_features = dataset.num_features dim = 32 nn1 = Sequential(Linear(num_features, dim), ReLU(), Linear(dim, dim)) self.conv1 = GINLafConv(nn1, embed_dim=num_features) #self.conv1 = GINConv(nn1) self.bn1 = torch.nn.BatchNorm1d(dim) nn2 = Sequential(Linear(dim, dim), ReLU(), Linear(dim, dim)) self.conv2 = GINLafConv(nn2, embed_dim=dim)
def __call__(self, data): return data.num_nodes <= 70 class MyPreTransform(object): def __call__(self, data): data.x = degree(data.edge_index[0], data.num_nodes, dtype=torch.long) data.x = one_hot(data.x, 136, torch.float) return data BATCH = 32 path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', '1-IMDB-BINARY') dataset = TUDataset(path, name='IMDB-BINARY', pre_transform=MyPreTransform(), pre_filter=MyFilter()) perm = torch.randperm(len(dataset), dtype=torch.long) dataset = dataset[perm] class Net(torch.nn.Module): def __init__(self): super(Net, self).__init__() self.conv1 = GraphConv(dataset.num_features, 32) self.conv2 = GraphConv(32, 64) self.conv3 = GraphConv(64, 64) self.fc1 = torch.nn.Linear(64, 64) self.fc2 = torch.nn.Linear(64, 32) self.fc3 = torch.nn.Linear(32, dataset.num_classes)
def prepare_data(self): TUDataset(self.data_dir, name='IMDB-BINARY', pre_transform=self.transform)
import os.path as osp from math import ceil import torch import torch.nn.functional as F from torch.nn import Linear from torch_geometric.datasets import TUDataset from torch_geometric.loader import DataLoader from torch_geometric.nn import DenseGraphConv, GCNConv, dense_mincut_pool from torch_geometric.utils import to_dense_adj, to_dense_batch path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'PROTEINS') dataset = TUDataset(path, name='PROTEINS').shuffle() average_nodes = int(dataset.data.x.size(0) / len(dataset)) n = (len(dataset) + 9) // 10 test_dataset = dataset[:n] val_dataset = dataset[n:2 * n] train_dataset = dataset[2 * n:] test_loader = DataLoader(test_dataset, batch_size=20) val_loader = DataLoader(val_dataset, batch_size=20) train_loader = DataLoader(train_dataset, batch_size=20) class Net(torch.nn.Module): def __init__(self, in_channels, out_channels, hidden_channels=32): super().__init__() self.conv1 = GCNConv(in_channels, hidden_channels) num_nodes = ceil(0.5 * average_nodes) self.pool1 = Linear(hidden_channels, num_nodes)
data.num_nodes < 450 class MyPreTransform(object): def __call__(self, data): data.x = data.x[:, -3:] # Only use node attributes. return data BATCH = 20 path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', '1-2-3-PROTEINS') dataset = TUDataset(path, name='PROTEINS', pre_transform=T.Compose([ MyPreTransform(), TwoMalkin(), ConnectedThreeMalkin() ]), pre_filter=MyFilter()) perm = torch.randperm(len(dataset), dtype=torch.long) dataset = dataset[perm] dataset.data.iso_type_2 = torch.unique(dataset.data.iso_type_2, True, True)[1] num_i_2 = dataset.data.iso_type_2.max().item() + 1 dataset.data.iso_type_2 = F.one_hot(dataset.data.iso_type_2, num_classes=num_i_2).to(torch.float) dataset.data.iso_type_3 = torch.unique(dataset.data.iso_type_3, True, True)[1] num_i_3 = dataset.data.iso_type_3.max().item() + 1 dataset.data.iso_type_3 = F.one_hot(dataset.data.iso_type_3,
import pickle import util import numpy as np import matplotlib.pyplot as plt import torch.nn.functional as F from torch_geometric.nn import GCNConv from torch_geometric.datasets import TUDataset from torch_geometric.data import DataLoader from torch_scatter import scatter_mean from class_SOM_batch import ConvSOM_dense1 from sklearn.model_selection import KFold, train_test_split import time start = time.time() dataset = TUDataset(root='./PTC_MR', name='PTC_MR') print('Dataset information: ') print('size ', len(dataset), ' graphs') print(dataset.num_classes, ' classes') print(dataset.num_features, ' features') # conv_dim è il numero di canali della convoluzione, # conv_dim deve essere lo stesso del modello che viene caricato conv_dim = 64 lattice_dim = [util.args.p1, util.args.p2] sigma_out = util.args.sigma_out reg = util.args.regularization wd = util.args.weight_decay print('lattice: ', lattice_dim) print('learning rate: ', util.args.learning_rate) print('repetitions ', util.args.repetitions)
import torch import torch.nn.functional as F from torch_geometric.datasets import TUDataset from torch_geometric.data import DataLoader from torch_geometric.nn import GraphConv, TopKPooling from torch_geometric.nn import global_mean_pool as gap, global_max_pool as gmp from torch_geometric.nn import GCNConv,SAGEConv,GATConv # path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'ENZYMES') dataset = TUDataset(root="/content/sample_data", name='ENZYMES') dataset = dataset.shuffle() n = len(dataset) // 5 test_dataset = dataset[:n] train_dataset = dataset[n:] test_loader = DataLoader(test_dataset, batch_size=120) train_loader = DataLoader(train_dataset, batch_size=120) class Net(torch.nn.Module): def __init__(self): super(Net, self).__init__() self.conv1 = GraphConv(dataset.num_features, 128)#num_features表示节点的特征数,为3 # self.pool1 = TopKPooling(128, ratio=0.8) self.conv2 = GraphConv(128, 128) # self.pool2 = TopKPooling(128, ratio=0.8) self.conv3 = GraphConv(128, 128) # self.pool3 = TopKPooling(128, ratio=0.8)
def my_transform(data): data.x = F.normalize(data.x, p=2, dim=-1) return data if not os.path.exists(os.path.join('./data', args.dataname)): os.mkdir(os.path.join('./data', args.dataname)) data_seed_dir = os.path.join('./data', args.dataname, str(args.seed) + '.pkl') if not os.path.exists(data_seed_dir): path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', args.dataname) dataset = TUDataset( path, name=args.dataname ) #pre_transform=my_transform,transform=T.ToDense(args.max_nodes),pre_filter=MyFilter()) max_nodes = max([x.num_nodes for x in dataset]) del dataset dataset = TUDataset( path, name=args.dataname, pre_transform=my_transform) #,transform=T.ToDense(max_nodes)) #dataset.max_nodes = max_nodes dataset = dataset.shuffle() dataset = dataset.shuffle() with open(data_seed_dir, 'wb') as f: pickle.dump(dataset, f) print('Seed Data Saved : ', data_seed_dir) else: with open(data_seed_dir, 'rb') as f:
from torch_geometric.data import DataLoader parser = argparse.ArgumentParser() parser.add_argument('--no-train', default=False) args = parser.parse_args() class MyFilter(object): def __call__(self, data): return True return data.num_nodes >= 5 BATCH = 32 path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', '1-NCI') dataset = TUDataset(path, name='NCI1', pre_filter=MyFilter()) perm = torch.randperm(len(dataset), dtype=torch.long) torch.save(perm, 'nci_perm.pt') perm = torch.load('nci_perm.pt') dataset = dataset[perm] class Net(torch.nn.Module): def __init__(self): super(Net, self).__init__() self.conv1 = GraphConv(dataset.num_features, 32) self.conv2 = GraphConv(32, 64) self.conv3 = GraphConv(64, 64) self.fc1 = torch.nn.Linear(64, 64) self.fc2 = torch.nn.Linear(64, 32)
from torch_geometric.datasets import TUDataset import torch_geometric.transforms as T from torch_geometric.data import DenseDataLoader from torch_geometric.nn import DenseSAGEConv, dense_diff_pool max_nodes = 100 class MyFilter(object): def __call__(self, data): return data.num_nodes <= max_nodes path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'ENZYMES_d') dataset = TUDataset(path, name='ENZYMES', transform=T.ToDense(max_nodes), pre_filter=MyFilter()) dataset = dataset.shuffle() n = (len(dataset) + 9) // 10 test_dataset = dataset[:n] val_dataset = dataset[n:2 * n] train_dataset = dataset[2 * n:] test_loader = DenseDataLoader(test_dataset, batch_size=20) val_loader = DenseDataLoader(val_dataset, batch_size=20) train_loader = DenseDataLoader(train_dataset, batch_size=20) class GNN(torch.nn.Module): def __init__(self, in_channels, hidden_channels,
# distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import torch import flash from flash.core.utilities.imports import example_requires from flash.graph import GraphClassificationData, GraphEmbedder example_requires("graph") from torch_geometric.datasets import TUDataset # noqa: E402 # 1. Create the DataModule dataset = TUDataset(root="data", name="KKI") datamodule = GraphClassificationData.from_datasets( predict_dataset=dataset[:3], batch_size=4, ) # 2. Load a previously trained GraphClassifier model = GraphEmbedder.load_from_checkpoint( "https://flash-weights.s3.amazonaws.com/0.7.0/graph_classification_model.pt" ) # 3. Generate embeddings for the first 3 graphs trainer = flash.Trainer(gpus=torch.cuda.device_count()) predictions = trainer.predict(model, datamodule=datamodule) print(predictions)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--dataset', type=str, default="PROTEINS", help='name of dataset') parser.add_argument( '--mod', type=str, default="f-scaled", choices=["origin", "additive", "scaled", "f-additive", "f-scaled"], help='model to be used: origin, additive, scaled, f-additive, f-scaled' ) parser.add_argument('--seed', type=int, default=809, help='random seed') parser.add_argument('--epochs', type=int, default=300, help='number of epochs to train') parser.add_argument('--lr', type=float, default=1e-2, help='initial learning rate') parser.add_argument('--wd', type=float, default=1e-3, help='weight decay value') parser.add_argument('--n_layer', type=int, default=4, help='number of hidden layers') parser.add_argument('--hid', type=int, default=32, help='size of input hidden units') parser.add_argument('--heads', type=int, default=1, help='number of attention heads') parser.add_argument('--dropout', type=float, default=0.0, help='dropout rate') parser.add_argument('--alpha', type=float, default=0.2, help='alpha for the leaky_relu') parser.add_argument('--kfold', type=int, default=10, help='number of kfold') parser.add_argument('--batch_size', type=int, default=32, help='batch size') parser.add_argument('--readout', type=str, default="add", choices=["add", "mean"], help='readout function: add, mean') args = parser.parse_args() device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') set_seed(args.seed) path = osp.join(osp.dirname(osp.realpath(__file__)), '.', 'data', args.dataset) dataset = TUDataset(path, name=args.dataset, pre_transform=Constant()).shuffle() train_graphs, test_graphs = separate_data(len(dataset), args.kfold) kfold_num = args.kfold print('Dataset:', args.dataset) print('# of graphs:', len(dataset)) print('# of classes:', dataset.num_classes) test_acc_values = torch.zeros(kfold_num, args.epochs) for idx in range(kfold_num): print( '=============================================================================' ) print(kfold_num, 'fold cross validation:', idx + 1) idx_train = train_graphs[idx] idx_test = test_graphs[idx] train_dataset = dataset[idx_train] test_dataset = dataset[idx_test] train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, worker_init_fn=args.seed) test_loader = DataLoader(test_dataset, batch_size=args.batch_size) t_start = time.time() best_epoch = 0 config = Config(mod=args.mod, nhid=args.hid, nclass=dataset.num_classes, nfeat=dataset.num_features, dropout=args.dropout, heads=args.heads, alpha=args.alpha, n_layer=args.n_layer, readout=args.readout) model = CPA(config).to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.wd, amsgrad=False) scheduler = MultiStepLR( optimizer, milestones=[50, 100, 150, 200, 250, 300, 350, 400, 450, 500], gamma=0.5) for epoch in range(args.epochs): train_loss = train(model, train_loader, optimizer, device) train_acc = test(model, train_loader, device) test_acc = test(model, test_loader, device) test_acc_values[idx, epoch] = test_acc scheduler.step() print('Epoch {:03d}'.format(epoch + 1), 'train_loss: {:.4f}'.format(train_loss), 'train_acc: {:.4f}'.format(train_acc), 'test_acc: {:.4f}'.format(test_acc)) print("Optimization Finished!") print("Total time elapsed: {:.4f}s".format(time.time() - t_start)) print( '=============================================================================' ) mean_test_acc = torch.mean(test_acc_values, dim=0) best_epoch = int(torch.argmax(mean_test_acc).data) print('Best Epoch:', best_epoch + 1) print('Best Testing Accs:') for i in test_acc_values[:, best_epoch]: print('{:0.4f},'.format(i.item()), end='') print('\n') print('Averaged Best Testing Acc:') print('{:0.4f}'.format(mean_test_acc[best_epoch].item()))
if __name__ == '__main__': print(cmd_args) random.seed(cmd_args.seed) np.random.seed(cmd_args.seed) torch.manual_seed(cmd_args.seed) if cmd_args.mode == 'cpu': device = torch.device('cpu') else: device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') path = osp.join(osp.dirname(osp.realpath(__file__)), '.', 'data', cmd_args.data) dataset = TUDataset(path, name=cmd_args.data) if cmd_args.sortpooling_k <= 1: num_nodes_list = sorted([g.num_nodes for g in dataset]) cmd_args.sortpooling_k = num_nodes_list[ int(math.ceil(cmd_args.sortpooling_k * len(num_nodes_list))) - 1] cmd_args.sortpooling_k = max(10, cmd_args.sortpooling_k) print('k used in SortPooling is: ' + str(cmd_args.sortpooling_k)) # Ten Folds validation train_dataset, test_dataset = sep_tg_data(dataset, cmd_args.fold - 1) print('# train: %d, # test: %d' % (len(train_dataset), len(test_dataset))) print('# num of classes: ', dataset.num_classes) test_loader = DataLoader(test_dataset, batch_size=cmd_args.batch_size) train_loader = DataLoader(train_dataset,