@File : main.py """ import torch from torch_geometric.data import Data from torch_geometric.datasets import Planetoid from torch_geometric.nn import GAE, VGAE from torch_geometric.utils import train_test_split_edges import args from model import Encoder, VEncoder, get_edge_acc DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu') dataset = None if args.dataset.lower() == 'Cora'.lower(): dataset = Planetoid(root='tmp', name='Cora') print("use dataset: Cora") elif args.dataset.lower() == 'CiteSeer'.lower(): dataset = Planetoid(root='tmp', name='CiteSeer') print("use dataset: CiteSeer") elif args.dataset.lower() == 'PubMed'.lower(): dataset = Planetoid(root='tmp', name='PubMed') print("use dataset: PubMed") data = dataset[0] enhanced_data = train_test_split_edges(data.clone(), val_ratio=0.1, test_ratio=0.2) train_data = Data(x=enhanced_data.x, edge_index=enhanced_data['train_pos_edge_index']).to(DEVICE)
import os.path as osp import torch from torch.nn import Linear import torch.nn.functional as F from torch_geometric.datasets import Planetoid import torch_geometric.transforms as T from torch_geometric.nn import GCN2Conv from torch_geometric.nn.conv.gcn_conv import gcn_norm import onnxruntime dataset = 'Cora' path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset) transform = T.Compose([T.NormalizeFeatures(), T.ToSparseTensor()]) dataset = Planetoid(path, dataset, transform=transform) data = dataset[0] data.adj_t = gcn_norm(data.adj_t) # Pre-process GCN normalization. def export_to_onnx_pt(model, data, use_dynamic=True): input_names = ['input_1', 'input_2'] inputs = { 'input_1': data.x, 'input_2': data.adj_t } output_names = ["output1"] batch = torch.arange(data.num_nodes) if use_dynamic: torch_out = torch.onnx.export(model, # model being run args=tuple(inputs.values()), # model input (or a tuple for multiple inputs) f="models/graphml/gcn2.onnx",
import os.path as osp import torch import torch.nn.functional as F import matplotlib.pyplot as plt from torch_geometric.datasets import Planetoid import torch_geometric.transforms as T from torch_geometric.nn import GCNConv, GNNExplainer from BayesianExplainer import BayesianExplainer from tqdm import tqdm dataset = 'Cora' path = osp.join('data', 'Planetoid') dataset = Planetoid(path, dataset, transform=T.NormalizeFeatures()) data = dataset[0] class Net(torch.nn.Module): def __init__(self): super(Net, self).__init__() self.conv1 = GCNConv(dataset.num_features, 32) self.conv2 = GCNConv(32, 16) self.linear = torch.nn.Linear(16, dataset.num_features) def forward(self, x, edge_index): x = F.relu(self.conv1(x, edge_index)) x = F.dropout(x, training=self.training) x = F.relu(self.conv2(x, edge_index)) x = self.linear(x)
def test_neighbor_sampler_on_cora(): root = osp.join('/', 'tmp', str(random.randrange(sys.maxsize))) dataset = Planetoid(root, 'Cora') data = dataset[0] batch = torch.arange(10) loader = NeighborSampler(data.edge_index, sizes=[-1, -1, -1], node_idx=batch, batch_size=10) class SAGE(torch.nn.Module): def __init__(self, in_channels, out_channels): super().__init__() self.convs = torch.nn.ModuleList() self.convs.append(SAGEConv(in_channels, 16)) self.convs.append(SAGEConv(16, 16)) self.convs.append(SAGEConv(16, out_channels)) def batch(self, x, adjs): for i, (edge_index, _, size) in enumerate(adjs): x_target = x[:size[1]] # Target nodes are always placed first. x = self.convs[i]((x, x_target), edge_index) return x def full(self, x, edge_index): for conv in self.convs: x = conv(x, edge_index) return x model = SAGE(dataset.num_features, dataset.num_classes) _, n_id, adjs = next(iter(loader)) out1 = model.batch(data.x[n_id], adjs) out2 = model.full(data.x, data.edge_index)[batch] assert torch.allclose(out1, out2) class GAT(torch.nn.Module): def __init__(self, in_channels, out_channels): super().__init__() self.convs = torch.nn.ModuleList() self.convs.append(GATConv(in_channels, 16, heads=2)) self.convs.append(GATConv(32, 16, heads=2)) self.convs.append(GATConv(32, out_channels, heads=2, concat=False)) def batch(self, x, adjs): for i, (edge_index, _, size) in enumerate(adjs): x_target = x[:size[1]] # Target nodes are always placed first. x = self.convs[i]((x, x_target), edge_index) return x def full(self, x, edge_index): for conv in self.convs: x = conv(x, edge_index) return x _, n_id, adjs = next(iter(loader)) out1 = model.batch(data.x[n_id], adjs) out2 = model.full(data.x, data.edge_index)[batch] assert torch.allclose(out1, out2) shutil.rmtree(root)
import sys import inspect import torch import torch.nn.functional as F import pdb from torch.nn import Parameter from torch_scatter import scatter_add from torch_geometric.utils import scatter_ from torch_geometric.utils import add_remaining_self_loops from torch_geometric.nn.inits import uniform, glorot, zeros, ones, reset from torch_geometric.datasets import Planetoid #dataset = Planetoid(root='/tmp/Cora', name='Cora') dataset = Planetoid(root='/tmp/Pubmed', name='Pubmed') #dataset = Planetoid(root='/tmp/Citeseer', name='Citeseer') class GCNConv(torch.nn.Module): def __init__(self, in_channels, out_channels, improved=False, cached=False, bias=True, **kwargs): super(GCNConv, self).__init__() self.in_channels = in_channels self.out_channels = out_channels self.improved = improved
from torch_geometric.datasets import Planetoid import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from torch_geometric.nn import GCNConv from torch_geometric.nn import GATConv from torch_geometric.nn import SAGEConv from torch_geometric.nn import JumpingKnowledge dataset = Planetoid(root='./cora/', name='Cora') # dataset = Planetoid(root='./cora/', name='Cora', split='random', # num_train_per_class=232, num_val=542, num_test=542) # dataset = Planetoid(root='./citeseer',name='Citeseer') # dataset = Planetoid(root='./pubmed/', name='Pubmed') print(dataset) # baseline:GCN模型(2层) class GCNNet(nn.Module): def __init__(self, dataset): super(GCNNet, self).__init__() self.conv1 = GCNConv(dataset.num_node_features, 16) self.conv2 = GCNConv(16, dataset.num_classes) def forward(self, data): x, edge_index = data.x, data.edge_index x = self.conv1(x, edge_index) x = F.relu(x) x = F.dropout(x, training=self.training) x = self.conv2(x, edge_index)
type=str, default='results', help='filename to store results and the model (default: results)') args = parser.parse_args() np.random.seed(args.seed) torch.manual_seed(args.seed) # Training on CPU/GPU device device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(device) # load dataset dataname = args.dataset rootname = osp.join(osp.abspath(''), 'data', dataname) dataset = Planetoid(root=rootname, name=dataname) num_nodes = dataset[0].x.shape[0] L = get_laplacian(dataset[0].edge_index, num_nodes=num_nodes, normalization='sym') L = sparse.coo_matrix( (L[1].numpy(), (L[0][0, :].numpy(), L[0][1, :].numpy())), shape=(num_nodes, num_nodes)) lobpcg_init = np.random.rand(num_nodes, 1) lambda_max, _ = lobpcg(L, lobpcg_init) lambda_max = lambda_max[0] # extract decomposition/reconstruction Masks FrameType = args.FrameType
def train(): # get the parameters args = get_args() print(args.domain) # decide the device device = torch.device('cuda:2' if torch.cuda.is_available() and args.cuda else 'cpu') # load dataset if args.domain == 'Cora': dataset = Planetoid(root='/home/amax/xsx/data/gnn_datas/Cora', name='Cora', transform=T.NormalizeFeatures()) elif args.domain == 'CiteSeer': dataset = Planetoid(root='/home/amax/xsx/data/gnn_datas/CiteSeer', name='CiteSeer', transform=T.NormalizeFeatures()) elif args.domain == 'PubMed': dataset = Planetoid(root='/home/amax/xsx/data/gnn_datas/PubMed', name='PubMed', transform=T.NormalizeFeatures()) elif args.domain == 'DBLP': dataset = DBLP(root='/home/amax/xsx/data/gnn_datas/DBLP', name='DBLP') elif args.domain == 'Cora-ML': dataset = CoraML(root='/home/amax/xsx/data/gnn_datas/Cora_ML', name='Cora_ML') elif args.domain == 'CS': dataset = Coauthor(root='/home/amax/xsx/data/gnn_datas/Coauthor/CS', name='CS') elif args.domain == 'Physics': dataset = Coauthor(root='/home/amax/xsx/data/gnn_datas/Coauthor/Physics', name='Physics') elif args.domain == 'Computers': dataset = Amazon(root='/home/amax/xsx/data/gnn_datas/Amazon/Computers', name='Computers') elif args.domain == 'Photo': dataset = Amazon(root='/home/amax/xsx/data/gnn_datas/Amazon/Photo', name='Photo') else: dataset = None if dataset is None: pdb.set_trace() data = dataset[0].to(device) # create the model and optimizer model = DeepGraphInfomax(hidden_channels=args.hidden_dim, encoder=Encoder(dataset.num_features, args.hidden_dim), summary=lambda z, *args, **kwargs: z.mean(dim=0), corruption=corruption).to(device) optimizer = Adam(model.parameters(), lr=args.lr) # the information which need to be recorded start_time = time.time() bad_counter = 0 best_epoch = 0 least_loss = float("inf") best_model = None # beging training for epoch in range(args.epochs): # the steps of training model.train() optimizer.zero_grad() pos_z, neg_z, summary = model(data.x, data.edge_index) loss = model.loss(pos_z, neg_z, summary) current_loss = loss.item() loss.backward() optimizer.step() # save the model if it access the minimum loss in current epoch if current_loss < least_loss: least_loss = current_loss best_epoch = epoch + 1 best_model = copy.deepcopy(model) bad_counter = 0 else: bad_counter += 1 # early stop if bad_counter >= args.patience: break print("Optimization Finished!") used_time = time.time() - start_time print("Total epochs: {:2d}".format(best_epoch + 100)) print("Best epochs: {:2d}".format(best_epoch)) # train a classification model node_classification(best_model, data, args, device, int(dataset.num_classes)) print("Total time elapsed: {:.2f}s".format(used_time))
def GCN(dataset, params, Epochs, MonteSize, width, lr, savepath): Batch_size = int(params[0]) for Monte_iter in range(MonteSize): # Data best_loss = float('inf') # best test loss start_epoch = 0 # start from epoch 0 or last checkpoint epoch TrainConvergence = [] TestConvergence = [] # model root = '/data/GraphData/' + dataset if dataset == 'Cora': model_name = "GCN3" datasetroot = Planetoid(root=root, name=dataset).shuffle() trainloader = DataListLoader(datasetroot, batch_size=Batch_size, shuffle=True) testloader = DataListLoader(datasetroot, batch_size=100, shuffle=False) model_to_save = './checkpoint/{}-{}-param_{}_{}-Mon_{}-ckpt.pth'.format( dataset, model_name, params[0], params[1], Monte_iter) if resume and os.path.exists(model_to_save): [net, TrainConvergence, TestConvergence, start_epoch] = ResumeModel(model_to_save) if start_epoch >= Epochs - 1: continue else: net = Net(datasetroot, width) elif dataset == 'ENZYMES' or dataset == 'MUTAG': model_name = "topk_pool_Net" root = '/data/GraphData' + dataset datasetroot = TUDataset(root, name=dataset) trainloader = DataListLoader(datasetroot, batch_size=Batch_size, shuffle=True) testloader = DataListLoader(datasetroot, batch_size=100, shuffle=False) model_to_save = './checkpoint/{}-{}-param_{}_{}-Mon_{}-ckpt.pth'.format( dataset, model_name, params[0], params[1], Monte_iter) if resume and os.path.exists(model_to_save): [net, TrainConvergence, TestConvergence, start_epoch] = ResumeModel(model_to_save) if start_epoch >= Epochs - 1: continue else: net = topk_pool_Net(datasetroot, width) elif dataset == 'MNIST': datasetroot = MNISTSuperpixels(root='/data/GraphData/' + dataset, transform=T.Cartesian()).shuffle() trainloader = DataListLoader(datasetroot, batch_size=Batch_size, shuffle=True) testloader = DataListLoader(datasetroot, batch_size=100, shuffle=False) model_name = 'SPlineNet' model_to_save = './checkpoint/{}-{}-param_{}_{}-Mon_{}-ckpt.pth'.format( dataset, model_name, params[0], params[1], Monte_iter) if resume and os.path.exists(model_to_save): [net, TrainConvergence, TestConvergence, start_epoch] = ResumeModel(model_to_save) if start_epoch >= Epochs - 1: continue else: #net=Net(datasetroot,width) net = SPlineNet(datasetroot, width) elif dataset == 'CIFAR10': if resume and os.path.exists(model_to_save): [net, TrainConvergence, TestConvergence, start_epoch] = ResumeModel(model_to_save) if start_epoch >= Epochs - 1: continue else: net = getattr(CIFAR10_resnet, 'Resnet20_CIFAR10')(params[1]) else: raise Exception( "The dataset is:{}, it isn't existed.".format(dataset)) print('Let\'s use', torch.cuda.device_count(), 'GPUs!') torch.cuda.is_available() net = DataParallel(net) device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') net = net.to(device) #cudnn.benchmark = True criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4) for epoch in range(start_epoch, start_epoch + Epochs): if epoch < Epochs: logging( 'Batch size: {},ConCoeff: {},MonteSize:{},epoch:{}'.format( params[0], params[1], Monte_iter, epoch)) TrainLoss = train(trainloader, net, optimizer, criterion) TrainConvergence.append(statistics.mean(TrainLoss)) TestConvergence.append( statistics.mean(test(testloader, net, criterion))) else: break if TestConvergence[epoch] < best_loss: logging('Saving..') state = { 'net': net.module, 'TrainConvergence': TrainConvergence, 'TestConvergence': TestConvergence, 'epoch': epoch, } if not os.path.isdir('checkpoint'): os.mkdir('checkpoint') torch.save(state, model_to_save) best_loss = TestConvergence[epoch] if not os.path.exists('./%s' % model_name): os.makedirs('./%s' % model_name) torch.save( net.module.state_dict(), './%s/%s_%s_%s_%s_%s_pretrain.pth' % (model_name, dataset, model_name, params[0], params[1], Epochs)) else: pass ## save recurrence plots if epoch % 20 == 0: save_recurrencePlots_file = "../Results/RecurrencePlots/RecurrencePlots_{}_{}_BatchSize{}_ConCoeffi{}_epoch{}.png".format( dataset, model_name, params[0], params[1], epoch) save_recurrencePlots(net, save_recurrencePlots_file) FileName = "{}-{}-param_{}_{}-monte_{}".format(dataset, model_name, params[0], params[1], Monte_iter) np.save(savepath + 'TrainConvergence-' + FileName, TrainConvergence) np.save(savepath + 'TestConvergence-' + FileName, TestConvergence) torch.cuda.empty_cache() print_nvidia_useage() if return_output == True: return TestConvergence[-1], net.module.fc.weight else: pass
log_file = os.path.join(args.res_dir, 'log.txt') # Save command line input. cmd_input = 'python ' + ' '.join(sys.argv) + '\n' with open(os.path.join(args.res_dir, 'cmd_input.txt'), 'a') as f: f.write(cmd_input) print('Command line input: ' + cmd_input + ' is saved.') with open(log_file, 'a') as f: f.write('\n' + cmd_input) if args.dataset.startswith('ogbl'): dataset = PygLinkPropPredDataset(name=args.dataset) split_edge = dataset.get_edge_split() data = dataset[0] else: path = osp.join('dataset', args.dataset) dataset = Planetoid(path, args.dataset) split_edge = do_edge_split(dataset) data = dataset[0] data.edge_index = split_edge['train']['edge'].t() if args.use_valedges_as_input: val_edge_index = split_edge['valid']['edge'].t() val_edge_index = to_undirected(val_edge_index) data.edge_index = torch.cat([data.edge_index, val_edge_index], dim=-1) val_edge_weight = torch.ones([val_edge_index.size(1), 1], dtype=int) data.edge_weight = torch.cat([data.edge_weight, val_edge_weight], 0) if args.dataset == 'ogbl-citation': args.eval_metric = 'mrr' elif args.dataset.startswith('ogbl'): args.eval_metric = 'hits'
from torch_geometric.datasets import Planetoid, CoraFull for dataset_name in ['Cora', 'PubMed', 'CoraFull']: print(dataset_name) if dataset_name == 'CoraFull': dataset = CoraFull(root='/tmp/CoraFull') elif dataset_name == 'PubMed': dataset = Planetoid(root='/tmp/PubMed', name=dataset_name) else: dataset = Planetoid(root='/tmp/Cora', name=dataset_name) print("num classes=", dataset.num_classes) data = dataset[0] print("num nodes=", data.num_nodes) print("num edges=", data.num_edges / 2) print("num features=", dataset.num_node_features)
import os.path as osp import torch import torch.nn.functional as F from torch_geometric.datasets import Planetoid import torch_geometric.transforms as T from torch_geometric.nn import SplineConv from torchdyn.models import NeuralDE dataset = 'Cora' path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset) dataset = Planetoid(path, dataset, transform=T.TargetIndegree()) data = dataset[0] data.train_mask = torch.zeros(data.num_nodes, dtype=torch.bool) data.train_mask[:data.num_nodes - 1000] = 1 data.val_mask = None data.test_mask = torch.zeros(data.num_nodes, dtype=torch.bool) data.test_mask[data.num_nodes - 500:] = 1 class GCNLayer(torch.nn.Module): def __init__(self, input_size, output_size): super(GCNLayer, self).__init__() if input_size != output_size: raise AttributeError('input size must equal output size') self.conv1 = SplineConv(input_size, output_size, dim=1, kernel_size=2).to(device)
parser = argparse.ArgumentParser(description="Model Name") parser.add_argument("-model", action="store", dest="model", type=int, default=1) parser.add_argument("-net", action="store", dest="net", type=int, default=1) pr = parser.parse_args() label_ids = defaultdict(list) if pr.net == 1: print("Data Cora") _data = Planetoid(root="./pcora", name="Cora") elif pr.net == 2: print("Data CiteSeer") _data = Planetoid(root="./pciteseer", name="Citeseer") elif pr.net == 3: print("Data Pubmed") _data = Planetoid(root="./ppubmed", name="Pubmed") elif pr.net == 4: print("Data CoraFull") _data = CoraFull("./Corafull") elif pr.net == 5: print("Data Coauthor CS") _data = Coauthor("./CS", "CS") elif pr.net == 6: print("Data Coauthor Physics") _data = Coauthor("./Physics", "Physics")
import networkx as nx import torch import numpy as np import pandas as pd from torch_geometric.datasets import Planetoid from torch_geometric.utils.convert import to_networkx dataset1 = Planetoid(root = '/content/cora',name='Cora') cora = dataset1 [0] coragraph = to_networkx(cora) node_labels = cora.y[list(coragraph.nodes)].numpy() import matplotlib.pyplot as plt plt.figure(1,figsize=(14,12)) nx.draw(coragraph, cmap=plt.get_cmap('Set1'),node_color = node_labels,node_size=75,linewidths=6) plt.show()
import sys import networkx as nx import pdb from deepsnap.dataset import GraphDataset from deepsnap.batch import Batch from torch.utils.data import DataLoader from torch.nn.parallel import DistributedDataParallel n_gpus = 2 name = 'Cora' model_name = 'GCN' fixed_split = True pyg_dataset = Planetoid( './cora', name, transform=T.TargetIndegree()) # load some format of graph data if not fixed_split: graphs = GraphDataset.pyg_to_graphs( pyg_dataset, verbose=True, fixed_split=fixed_split) # transform to our format dataset = GraphDataset(graphs, task='node') # node, edge, link_pred, graph dataset_train, dataset_val, dataset_test = dataset.split( transductive=True, split_ratio=[0.8, 0.1, 0.1]) # transductive split, inductive split else: graphs_train, graphs_val, graphs_test = \ GraphDataset.pyg_to_graphs(pyg_dataset, verbose=True, fixed_split=fixed_split) # transform to our format
def main(): parser = argparse.ArgumentParser(description='GAT') parser.add_argument("--dataset", type=str) parser.add_argument("--device", type=int, default=0) parser.add_argument("--num-layers", type=int, default=3, help="number of hidden layers") parser.add_argument("--lr", type=float, default=0.005, help="learning rate") parser.add_argument('--weight-decay', type=float, default=5e-4, help="weight decay") parser.add_argument("--num-hidden", type=int, default=8, help="number of hidden units") parser.add_argument("--dropout", type=float, default=.6, help="Dropout to use") parser.add_argument('--epochs', type=int, default=200) parser.add_argument("--eval", action='store_true', help='If not set, we will only do the training part.') parser.add_argument("--runs", type=int, default=10) args = parser.parse_args() print(args) device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu' device = torch.device(device) path = osp.join('dataset', args.dataset) dataset = Planetoid(path, args.dataset, transform=T.NormalizeFeatures()) data = dataset[0] features = data.x.to(device) labels = data.y.to(device) edge_index = data.edge_index.to(device) adj = SparseTensor(row=edge_index[0], col=edge_index[1]) train_mask = torch.BoolTensor(data.train_mask).to(device) val_mask = torch.BoolTensor(data.val_mask).to(device) test_mask = torch.BoolTensor(data.test_mask).to(device) model = GAT(num_layers=args.num_layers, in_feats=features.size(-1), num_hidden=args.num_hidden, num_classes=dataset.num_classes, heads=[8, 8, 1], dropout=args.dropout).to(device) loss_fcn = nn.CrossEntropyLoss() logger = Logger(args.runs, args) dur = [] for run in range(args.runs): model.reset_parameters() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) for epoch in range(1, args.epochs + 1): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features, adj) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) print('Training time/epoch {}'.format(np.mean(dur))) if not args.eval: continue train_acc, val_acc, test_acc = evaluate(model, features, adj, labels, train_mask, val_mask, test_mask) logger.add_result(run, (train_acc, val_acc, test_acc)) print( "Run {:02d} | Epoch {:05d} | Loss {:.4f} | Train {:.4f} | Val {:.4f} | Test {:.4f}" .format(run, epoch, loss.item(), train_acc, val_acc, test_acc)) if args.eval: logger.print_statistics(run) if args.eval: logger.print_statistics()
def test_split(self): pyg_dataset = Planetoid("./cora", "Cora") dg = Graph.pyg_to_graph(pyg_dataset[0]) dg_node = dg.split() dg_num_nodes_reduced = dg.num_nodes - 3 self.assertEqual( dg_node[0].node_label_index.shape[0], 1 + int(dg_num_nodes_reduced * 0.8), ) self.assertEqual( dg_node[1].node_label_index.shape[0], 1 + int(dg_num_nodes_reduced * 0.1), ) self.assertEqual( dg_node[2].node_label_index.shape[0], dg.num_nodes - 2 - int(dg_num_nodes_reduced * 0.8) - int(dg_num_nodes_reduced * 0.1), ) dg_edge = dg.split(task="edge") dg_num_edges_reduced = dg.num_edges - 3 edge_0 = 2 * (1 + int(dg_num_edges_reduced * 0.8)) edge_1 = 2 * (1 + int(dg_num_edges_reduced * 0.1)) edge_2 = dg.num_edges * 2 - edge_0 - edge_1 self.assertEqual(dg_edge[0].edge_label_index.shape[1], edge_0) self.assertEqual(dg_edge[1].edge_label_index.shape[1], edge_1) self.assertEqual(dg_edge[2].edge_label_index.shape[1], edge_2) dg_link = dg.split(task="link_pred") dg_num_edges_reduced = dg.num_edges - 3 edge_0 = 2 * (1 + int(dg_num_edges_reduced * 0.8)) edge_1 = 2 * (1 + int(dg_num_edges_reduced * 0.1)) edge_2 = dg.num_edges * 2 - edge_0 - edge_1 self.assertEqual(dg_link[0].edge_label_index.shape[1], edge_0) self.assertEqual(dg_link[1].edge_label_index.shape[1], edge_1) self.assertEqual(dg_link[2].edge_label_index.shape[1], edge_2) for message_ratio in [0.1, 0.2, 0.4, 0.8]: dg_link_resample = ( dg_link[0].clone().resample_disjoint( message_ratio=message_ratio, ) ) positive_edge_num = ( int(0.5 * dg_link[0].clone().edge_label_index.shape[1]) ) self.assertEqual( dg_link_resample.edge_label_index.shape[1], 2 * ( positive_edge_num - 1 - int(message_ratio * (positive_edge_num - 2)) ) ) for split_ratio in [[0.1, 0.4, 0.5], [0.4, 0.3, 0.3], [0.7, 0.2, 0.1]]: dg_link_custom = ( dg.split(task='link_pred', split_ratio=split_ratio) ) dg_num_edges_reduced = dg.num_edges - 3 edge_0 = 2 * (1 + int(dg_num_edges_reduced * split_ratio[0])) self.assertEqual( dg_link_custom[0].edge_label_index.shape[1], edge_0, ) edge_1 = ( 2 * ( 1 + int(split_ratio[0] * dg_num_edges_reduced) + 1 + int(split_ratio[1] * dg_num_edges_reduced) ) - edge_0 ) self.assertEqual( dg_link_custom[1].edge_label_index.shape[1], edge_1, ) edge_2 = dg.num_edges * 2 - edge_0 - edge_1 self.assertEqual( dg_link_custom[2].edge_label_index.shape[1], edge_2, )
dist = dist2src + dist2dst dist_over_2, dist_mod_2 = dist // 2, dist % 2 z = 1 + torch.min(dist2src, dist2dst) z += dist_over_2 * (dist_over_2 + dist_mod_2 - 1) z[src] = 1. z[dst] = 1. z[torch.isnan(z)] = 0. self.__max_z__ = max(int(z.max()), self.__max_z__) return z.to(torch.long) path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'Planetoid') dataset = Planetoid(path, 'Cora') train_dataset = SEALDataset(dataset, num_hops=2, split='train') val_dataset = SEALDataset(dataset, num_hops=2, split='val') test_dataset = SEALDataset(dataset, num_hops=2, split='test') train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True) val_loader = DataLoader(val_dataset, batch_size=32) test_loader = DataLoader(test_dataset, batch_size=32) class DGCNN(torch.nn.Module): def __init__(self, hidden_channels, num_layers, GNN=GCNConv, k=0.6): super(DGCNN, self).__init__() if k < 1: # Transform percentile to number.
import os.path as osp import torch import torch.nn.functional as F from torch_geometric.datasets import Planetoid import torch_geometric.transforms as T from torch_geometric.nn import SplineConv dataset = 'Cora' path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset) dataset = Planetoid(path, dataset, T.TargetIndegree()) data = dataset[0] data.train_mask = torch.zeros(data.num_nodes, dtype=torch.bool) data.train_mask[:data.num_nodes - 1000] = 1 data.val_mask = None data.test_mask = torch.zeros(data.num_nodes, dtype=torch.bool) data.test_mask[data.num_nodes - 500:] = 1 class Net(torch.nn.Module): def __init__(self): super(Net, self).__init__() self.conv1 = SplineConv(dataset.num_features, 16, dim=1, kernel_size=2) self.conv2 = SplineConv(16, dataset.num_classes, dim=1, kernel_size=2) def forward(self): x, edge_index, edge_attr = data.x, data.edge_index, data.edge_attr x = F.dropout(x, training=self.training) x = F.elu(self.conv1(x, edge_index, edge_attr)) x = F.dropout(x, training=self.training)
import torch import torch.nn.functional as F from torch_geometric.datasets import Planetoid from torch_geometric.transforms import NormalizeFeatures from torch_geometric.data import ClusterData, ClusterLoader from torch_geometric.nn import GCNConv ### Load data dataset = Planetoid(root='data/Planetoid', name='PubMed', transform=NormalizeFeatures()) print() print(f'Dataset: {dataset}:') print('==================') print(f'Number of graphs: {len(dataset)}') print(f'Number of features: {dataset.num_features}') print(f'Number of classes: {dataset.num_classes}') data = dataset[0] # Get the first graph object. print() print(data) print( '===============================================================================================================' ) # Gather some statistics about the graph. print(f'Number of nodes: {data.num_nodes}') print(f'Number of edges: {data.num_edges}') print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
def load_dataset(dataset, transform=None): if dataset.lower() in ["cora", "citeseer", "pubmed"]: path = os.path.join(".datasets", "Plantoid") dataset = Planetoid(path, dataset.lower(), transform=transform) elif dataset.lower() in ["cs", "physics"]: path = os.path.join(".datasets", "Coauthor", dataset.lower()) dataset = Coauthor(path, dataset.lower(), transform=transform) elif dataset.lower() in ["computers", "photo"]: path = os.path.join(".datasets", "Amazon", dataset.lower()) dataset = Amazon(path, dataset.lower(), transform=transform) else: print("Dataset not supported!") assert False return dataset
import os.path as osp import torch import torch.nn as nn import torch.nn.functional as F from torch_geometric.datasets import Planetoid from torch_geometric.nn import GCNConv from torch_geometric.nn.inits import uniform hidden_dim = 512 dataset = 'Cora' path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset) data = Planetoid(path, dataset)[0] class Encoder(nn.Module): def __init__(self, hidden_dim): super(Encoder, self).__init__() self.conv = GCNConv(data.num_features, hidden_dim) self.prelu = nn.PReLU(hidden_dim) def forward(self, x, edge_index, corrupt=False): if corrupt: perm = torch.randperm(data.num_nodes) x = x[perm] x = self.conv(x, edge_index) x = self.prelu(x) return x
import torch.nn.functional as F import time import matplotlib as mpl from torch_geometric.datasets import Planetoid import torch_geometric.transforms as T from torch_geometric.nn import GCNConv, ChebConv mpl.use('agg') # Loading Dataset dataset = 'Cora' path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset) dataset = Planetoid(path, dataset, split='public', transform=T.NormalizeFeatures()) graph_data = dataset[0] num_train = len(graph_data.y[graph_data.train_mask]) num_test = len(graph_data.y[graph_data.test_mask]) # Initialise and parse command-line inputs parser = argparse.ArgumentParser(description='PT MCMC CNN') parser.add_argument('-s', '--samples', help='Number of samples', default=80, dest="samples", type=int)
def planetoid_dataset(name: str) -> Callable: return lambda root: Planetoid(root, name)
#!/usr/bin/env python3 import torch.nn.functional as F from torch_geometric.nn import GCNConv import sys import torch.cuda.profiler as profiler from torch_geometric.datasets import Planetoid import pyprof import torch pyprof.init() dataset = Planetoid(root='/tmp/Cora', name='Cora') with torch.autograd.profiler.emit_nvtx(): profiler.start() class Net(torch.nn.Module): def __init__(self): super(Net, self).__init__() self.conv1 = GCNConv(dataset.num_node_features, 16) #self.conv2 = GCNConv(16, dataset.num_classes) def forward(self, data): x, edge_index = data.x, data.edge_index x = self.conv1(x, edge_index) #x = F.relu(x) #x = F.dropout(x, training=self.training) #x = self.conv2(x, edge_index)
def __init__(self, path): dataset = "Cora" # path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset) Planetoid(path, dataset) super(CoraDataset, self).__init__(path, dataset)
#imports import numpy as np import matplotlib.pyplot as plt import networkx as nx import torch import torch.nn.functional as F from tqdm import tqdm_notebook as tqdm torch.manual_seed(0) np.random.seed(0) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False dataset = 'CiteSeer' path = osp.join('..', 'data', dataset) dataset = Planetoid(path, dataset, T.NormalizeFeatures()) data = dataset[0] class Net(torch.nn.Module): def __init__(self, in_features, num_classes): super(Net, self).__init__() self.conv1 = GCNConv(in_features, 16, cached=True) self.conv2 = GCNConv(16, num_classes, cached=True) def forward(self, x, edge_index): # get the graph data x = F.relu(self.conv1(x, edge_index)) x = F.dropout(x, training=self.training) x = self.conv2(x, edge_index) return F.log_softmax(x, dim=1)
def load_data(dataset="cora", num_labels_per_class=20, missing_edge=False, verbose=0): # Load data. path = os.path.join("data", dataset) if verbose: print("loading data from %s. %d labels per class." % (path, num_labels_per_class)) assert dataset in ["cora", "pubmed", "citeseer"] dataset = Planetoid(root=path, name=dataset, transform=T.NormalizeFeatures()) data = dataset[0] data.num_classes = dataset.num_classes if missing_edge: assert num_labels_per_class == 20 test_idx = data.test_mask.nonzero().squeeze().numpy() edge_index = data.edge_index.numpy() num_nodes = data.y.size(0) adj = sps.csc_matrix( (np.ones(edge_index.shape[1]), (edge_index[0], edge_index[1])), shape=(num_nodes, num_nodes)) adj_mask = np.ones(num_nodes) adj_mask[test_idx] = 0 adj_mask = sps.diags(adj_mask, format="csr") adj = adj_mask.dot(adj).dot(adj_mask.tocsc()).tocoo() edge_index = np.concatenate( [adj.row.reshape(1, -1), adj.col.reshape(1, -1)], axis=0) data.edge_index = torch.LongTensor(edge_index) # Original Planetoid setting. if num_labels_per_class == 20: return data # Get one-hot labels. temp = data.y.numpy() labels = np.zeros((len(temp), temp.max() + 1)) for i in range(len(labels)): labels[i, temp[i]] = 1 all_idx = list(range(len(labels))) # Select a fixed number of training data per class. idx_train = [] class_cnt = np.zeros( labels.shape[1]) # number of nodes selected for each class for i in all_idx: if (class_cnt >= num_labels_per_class).all(): break if ((class_cnt + labels[i]) > num_labels_per_class).any(): continue class_cnt += labels[i] idx_train.append(i) if verbose: print("number of training data: ", len(idx_train)) train_mask = np.zeros((len(labels), ), dtype=int) val_mask = np.zeros((len(labels), ), dtype=int) test_mask = np.zeros((len(labels), ), dtype=int) for i in all_idx: if i in idx_train: train_mask[i] = 1 elif sum(val_mask) < 500: # select 500 validation data val_mask[i] = 1 else: test_mask[i] = 1 data.train_mask = torch.ByteTensor(train_mask) data.val_mask = torch.ByteTensor(val_mask) data.test_mask = torch.ByteTensor(test_mask) return data
import os.path as osp import torch import torch.nn as nn from torch_geometric.datasets import Planetoid from torch_geometric.nn import GCNConv, DeepGraphInfomax dataset = 'Cora' path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset) dataset = Planetoid(path, dataset) class Encoder(nn.Module): def __init__(self, in_channels, hidden_channels): super(Encoder, self).__init__() self.conv = GCNConv(in_channels, hidden_channels, cached=True) self.prelu = nn.PReLU(hidden_channels) def forward(self, x, edge_index): x = self.conv(x, edge_index) x = self.prelu(x) return x def corruption(x, edge_index): return x[torch.randperm(x.size(0))], edge_index device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = DeepGraphInfomax(hidden_channels=512, encoder=Encoder(dataset.num_features, 512),
dist = dist2src + dist2dst dist_over_2, dist_mod_2 = dist // 2, dist % 2 z = 1 + torch.min(dist2src, dist2dst) z += dist_over_2 * (dist_over_2 + dist_mod_2 - 1) z[src] = 1. z[dst] = 1. z[torch.isnan(z)] = 0. self._max_z = max(int(z.max()), self._max_z) return z.to(torch.long) path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'Planetoid') dataset = Planetoid(path, name='Cora') train_dataset = SEALDataset(dataset, num_hops=2, split='train') val_dataset = SEALDataset(dataset, num_hops=2, split='val') test_dataset = SEALDataset(dataset, num_hops=2, split='test') train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True) val_loader = DataLoader(val_dataset, batch_size=32) test_loader = DataLoader(test_dataset, batch_size=32) class DGCNN(torch.nn.Module): def __init__(self, hidden_channels, num_layers, GNN=GCNConv, k=0.6): super().__init__() if k < 1: # Transform percentile to number.