def load_data(dataset, ogb_root): if dataset in ('cora', 'reddit'): data = CoraGraphDataset() if dataset == 'cora' else RedditDataset(self_loop=True) g = data[0] train_idx = g.ndata['train_mask'].nonzero(as_tuple=True)[0] val_idx = g.ndata['val_mask'].nonzero(as_tuple=True)[0] test_idx = g.ndata['test_mask'].nonzero(as_tuple=True)[0] return g, g.ndata['label'], data.num_classes, train_idx, val_idx, test_idx else: data = DglNodePropPredDataset('ogbn-products', ogb_root) g, labels = data[0] split_idx = data.get_idx_split() return g, labels.squeeze(dim=-1), data.num_classes, \ split_idx['train'], split_idx['valid'], split_idx['test']
def train( n_epochs=100,lr=1e-2, weight_decay=5e-4,n_hidden=16, n_layers=1,activation=F.relu, dropout=0.5): data = CoraGraphDataset() print(data) g = data[0] features = g.ndata['feat'] labels = g.ndata['label'] train_mask = g.ndata['train_mask'] #是true,fasle。其实就是样本提取 val_mask = g.ndata['val_mask'] test_mask = g.ndata['test_mask'] in_feats = features.shape[1] n_classes = data.num_classes model = GCN(g,in_feats,n_hidden,n_classes,n_layers,activation,dropout) loss_fcn = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(),lr=lr,weight_decay=weight_decay) for epoch in range(n_epochs): model.train() logits =model(features) loss = loss_fcn(logits[train_mask],labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() acc =evalutate(model,features,labels,val_mask) print("Epoch {} | Locc {:.4f} | accuracy {:.4f}".format(epoch,loss.item(),acc)) print() acc =evalutate(model,features,labels,test_mask) print("Test accuracy {:.2%}".format(acc))
def load_cora_data(): dataset = CoraGraphDataset() g = dataset[0] features = g.ndata['feat'] labels = g.ndata['label'] train_mask = g.ndata['train_mask'] test_mask = g.ndata['test_mask'] return g, features, labels, train_mask, test_mask
def load_cora_data(): data = CoraGraphDataset() g = data[0] features = th.FloatTensor(data.features) labels = g.ndata['label'] train_mask = g.ndata['train_mask'] valid_mask = g.ndata['val_mask'] test_mask = g.ndata['test_mask'] return g, features, labels, train_mask, test_mask
def get_dataset(opt): # load and preprocess dataset if opt['dataset'] == 'cora': data = CoraGraphDataset() elif opt['dataset'] == 'citeseer': data = CiteseerGraphDataset() elif opt['dataset'] == 'pubmed': data = PubmedGraphDataset() else: raise ValueError('Unknown dataset: {}'.format(opt['dataset'])) return data
def load_cora_data(): dataset = CoraGraphDataset() g = dataset[0] features = g.ndata['feat'] labels = g.ndata['label'] train_mask = g.ndata['train_mask'] test_mask = g.ndata['test_mask'] val_mask = g.ndata['val_mask'] g.set_n_initializer(dgl.init.zero_initializer) g.add_edges(g.nodes(), g.nodes()) return g, features, labels, train_mask, test_mask, val_mask
def load_data_default(dataset_name): if dataset_name in ['cora', 'citeseer', 'pubmed']: if dataset_name == 'cora': dataset = CoraGraphDataset() if dataset_name == 'citeseer': dataset = CiteseerGraphDataset() if dataset_name == 'pubmed': dataset = PubmedGraphDataset() graph = dataset[0] graph = graph.remove_self_loop().add_self_loop() print(graph) features = graph.ndata['feat'] labels = graph.ndata['label'] train_mask = graph.ndata['train_mask'] val_mask = graph.ndata['val_mask'] test_mask = graph.ndata['test_mask'] num_feats = features.shape[1] num_classes = int(labels.max().item() + 1) else: dataset = DglNodePropPredDataset(name=dataset_name) splitted_mask = dataset.get_idx_split() train_mask, val_mask, test_mask = splitted_mask[ 'train'], splitted_mask['valid'], splitted_mask['test'] graph, labels = dataset[0] features = graph.ndata["feat"] num_feats = features.shape[1] num_classes = (labels.max() + 1).item() # add reverse edges srcs, dsts = graph.all_edges() graph.add_edges(dsts, srcs) #add self-loop graph = graph.remove_self_loop().add_self_loop() return graph, features, labels, train_mask, val_mask, test_mask, num_feats, num_classes
def train( n_epochs = 100, lr = 5e-3, weight_decay = 5e-4, n_hidden = 16, n_layers = 1, activation = F.elu, n_heads = 3, #中间层多头注意力的数量 n_out_heads = 1, #输出层多头注意力的数量 feat_drop = 0.6, attn_drop = 0.6, negative_slope = 0.2): data = CoraGraphDataset() g=data[0] features = g.ndata['feat'] labels = g.ndata['label'] train_mask = g.ndata['train_mask'] val_mask = g.ndata['val_mask'] test_mask = g.ndata['test_mask'] in_feats = features.shape[1] n_classes = data.num_classes heads = ([n_heads] * n_layers) + [n_out_heads] model = GAT( g, n_layers, in_feats, n_hidden, n_classes, heads, activation, feat_drop, attn_drop, negative_slope ) loss_fcn = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam( model.parameters(), lr = lr, weight_decay = weight_decay) for epoch in range( n_epochs ): model.train() logits = model( features ) loss = loss_fcn( logits[ train_mask ], labels[ train_mask ] ) optimizer.zero_grad() loss.backward() optimizer.step() acc = evaluate( model, features, labels, val_mask ) print("Epoch {} | Loss {:.4f} | Accuracy {:.4f} " .format(epoch, loss.item(), acc )) print() acc = evaluate(model, features, labels, test_mask) print("Test accuracy {:.2%}".format(acc))
def load(name): if name == 'cora': dataset = CoraGraphDataset() elif name == 'citeseer': dataset = CiteseerGraphDataset() elif name == 'pubmed': dataset = PubmedGraphDataset() graph = dataset[0] train_mask = graph.ndata.pop('train_mask') test_mask = graph.ndata.pop('test_mask') feat = graph.ndata.pop('feat') labels = graph.ndata.pop('label') return graph, feat, labels, train_mask, test_mask
def data_load(args): # load and preprocess dataset if args.dataset == 'cora': data = CoraGraphDataset() elif args.dataset == 'citeseer': data = CiteseerGraphDataset() elif args.dataset == 'pubmed': data = PubmedGraphDataset() else: raise ValueError('Unknown dataset: {}'.format(args.dataset)) g = data[0] args.device = torch.device( args.device if torch.cuda.is_available() else 'cpu') g.int().to(args.device) args.in_feats = g.ndata['feat'].shape[1] args.num_classes = data.num_classes return g
def load_data(dataset): if dataset == 'cora': data = CoraGraphDataset() elif dataset == 'citeseer': data = CiteseerGraphDataset() elif dataset == 'pubmed': data = PubmedGraphDataset() g = data[0] features = g.ndata['feat'] labels = g.ndata['label'] train_mask = g.ndata['train_mask'] val_mask = g.ndata['val_mask'] test_mask = g.ndata['test_mask'] nxg = g.to_networkx() adj = nx.to_scipy_sparse_matrix(nxg, dtype=np.float) adj = preprocess_adj(adj) adj = sparse_mx_to_torch_sparse_tensor(adj) return adj, features, labels, train_mask, val_mask, test_mask
def process_dataset(name, epsilon): if name == 'cora': dataset = CoraGraphDataset() elif name == 'citeseer': dataset = CiteseerGraphDataset() graph = dataset[0] feat = graph.ndata.pop('feat') label = graph.ndata.pop('label') train_mask = graph.ndata.pop('train_mask') val_mask = graph.ndata.pop('val_mask') test_mask = graph.ndata.pop('test_mask') train_idx = th.nonzero(train_mask, as_tuple=False).squeeze() val_idx = th.nonzero(val_mask, as_tuple=False).squeeze() test_idx = th.nonzero(test_mask, as_tuple=False).squeeze() nx_g = dgl.to_networkx(graph) print('computing ppr') diff_adj = compute_ppr(nx_g, 0.2) print('computing end') if name == 'citeseer': print('additional processing') feat = th.tensor(preprocess_features(feat.numpy())).float() diff_adj[diff_adj < epsilon] = 0 scaler = MinMaxScaler() scaler.fit(diff_adj) diff_adj = scaler.transform(diff_adj) diff_edges = np.nonzero(diff_adj) diff_weight = diff_adj[diff_edges] diff_graph = dgl.graph(diff_edges) graph = graph.add_self_loop() return graph, diff_graph, feat, label, train_idx, val_idx, test_idx, diff_weight
def main(): # check cuda device = f'cuda:{args.gpu}' if torch.cuda.is_available( ) and args.gpu >= 0 else 'cpu' # load data if args.dataset == 'Cora': dataset = CoraGraphDataset() elif args.dataset == 'Citeseer': dataset = CiteseerGraphDataset() elif args.dataset == 'Pubmed': dataset = PubmedGraphDataset() else: raise ValueError('Dataset {} is invalid.'.format(args.dataset)) g = dataset[0] g = dgl.add_self_loop(g) labels = g.ndata.pop('label').to(device).long() # load masks for train / test, valid is not used. train_mask = g.ndata.pop('train_mask') test_mask = g.ndata.pop('test_mask') train_idx = torch.nonzero(train_mask, as_tuple=False).squeeze().to(device) test_idx = torch.nonzero(test_mask, as_tuple=False).squeeze().to(device) g = g.to(device) # label propagation lp = LabelPropagation(args.num_layers, args.alpha) logits = lp(g, labels, mask=train_idx) test_acc = torch.sum(logits[test_idx].argmax( dim=1) == labels[test_idx]).item() / len(test_idx) print("Test Acc {:.4f}".format(test_acc))
def get_dgl_cora(aggregation_type="sum", sparse_matrix=False): import dgl from dgl.data import CoraGraphDataset tr_set = CoraGraphDataset() G = tr_set.graph e = len(G.edges) n = len(G.nodes) d_l = tr_set.features.shape[1] is_multilabel = False n_classes = tr_set.num_labels node_labels = torch.tensor(tr_set.features) targets = torch.tensor(tr_set.labels) idx_train = torch.BoolTensor( tr_set.train_mask ) # in this case, there are msk => convert to boolean mask idx_valid = torch.BoolTensor(tr_set.val_mask) idx_test = torch.BoolTensor(tr_set.test_mask) edges, agg_matrix = nx_to_format(G, aggregation_type, sparse_matrix) return Dataset( "cora", n, e, d_l, is_multilabel, n_classes, edges, agg_matrix, node_labels, targets, idx_train, idx_valid, idx_test, )
loss = th.mean(th.sum(th.pow(ps - sharp_p, 1. / temp), dim=1, keepdim=True)) loss = lam * loss return loss if __name__ == '__main__': # Step 1: Prepare graph data and retrieve train/validation/test index ============================= # # Load from DGL dataset args = argument() print(args) if args.dataname == 'cora': dataset = CoraGraphDataset() elif args.dataname == 'citeseer': dataset = CiteseerGraphDataset() elif args.dataname == 'pubmed': dataset = PubmedGraphDataset() graph = dataset[0] graph = dgl.add_self_loop(graph) device = args.device # retrieve the number of classes n_classes = dataset.num_classes # retrieve labels of ground truth labels = graph.ndata.pop('label').to(device).long()
def main(args): # load and preprocess dataset if args.dataset == 'cora': data = CoraGraphDataset() elif args.dataset == 'citeseer': data = CiteseerGraphDataset() elif args.dataset == 'pubmed': data = PubmedGraphDataset() else: raise ValueError('Unknown dataset: {}'.format(args.dataset)) g = data[0] if args.gpu < 0: cuda = False ctx = mx.cpu(0) else: cuda = True ctx = mx.gpu(args.gpu) g = g.to(ctx) features = g.ndata['feat'] labels = mx.nd.array(g.ndata['label'], dtype="float32", ctx=ctx) mask = g.ndata['train_mask'] mask = mx.nd.array(np.nonzero(mask.asnumpy())[0], ctx=ctx) val_mask = g.ndata['val_mask'] val_mask = mx.nd.array(np.nonzero(val_mask.asnumpy())[0], ctx=ctx) test_mask = g.ndata['test_mask'] test_mask = mx.nd.array(np.nonzero(test_mask.asnumpy())[0], ctx=ctx) in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) # create model heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] model = GAT(g, args.num_layers, in_feats, args.num_hidden, n_classes, heads, elu, args.in_drop, args.attn_drop, args.alpha, args.residual) if args.early_stop: stopper = EarlyStopping(patience=100) model.initialize(ctx=ctx) # use optimizer trainer = gluon.Trainer(model.collect_params(), 'adam', {'learning_rate': args.lr}) dur = [] for epoch in range(args.epochs): if epoch >= 3: t0 = time.time() # forward with mx.autograd.record(): logits = model(features) loss = mx.nd.softmax_cross_entropy(logits[mask].squeeze(), labels[mask].squeeze()) loss.backward() trainer.step(mask.shape[0]) if epoch >= 3: dur.append(time.time() - t0) print("Epoch {:05d} | Loss {:.4f} | Time(s) {:.4f} | ETputs(KTEPS) {:.2f}".format( epoch, loss.asnumpy()[0], np.mean(dur), n_edges / np.mean(dur) / 1000)) val_accuracy = evaluate(model, features, labels, val_mask) print("Validation Accuracy {:.4f}".format(val_accuracy)) if args.early_stop: if stopper.step(val_accuracy, model): break print() if args.early_stop: model.load_parameters('model.param') test_accuracy = evaluate(model, features, labels, test_mask) print("Test Accuracy {:.4f}".format(test_accuracy))
def main(args): # load and preprocess dataset if args.dataset == 'cora': data = CoraGraphDataset() elif args.dataset == 'citeseer': data = CiteseerGraphDataset() elif args.dataset == 'pubmed': data = PubmedGraphDataset() else: raise ValueError('Unknown dataset: {}'.format(args.dataset)) g = data[0] if args.gpu < 0: cuda = False else: cuda = True g = g.int().to(args.gpu) features = g.ndata['feat'] labels = g.ndata['label'] train_mask = g.ndata['train_mask'] val_mask = g.ndata['val_mask'] test_mask = g.ndata['test_mask'] in_feats = features.shape[1] n_classes = data.num_labels n_edges = g.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) n_edges = g.number_of_edges() # add self loop g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) # create SGC model model = SGConv(in_feats, n_classes, k=2, cached=True, bias=args.bias) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] for epoch in range(args.n_epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(g, features) # only compute the train set loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) acc = evaluate(model, g, features, labels, val_mask) print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}". format(epoch, np.mean(dur), loss.item(), acc, n_edges / np.mean(dur) / 1000)) print() acc = evaluate(model, g, features, labels, test_mask) print("Test Accuracy {:.4f}".format(acc))
def dgl_main(): # Load from DGL dataset if args.dataset == 'cora': dataset = CoraGraphDataset(reverse_edge=False) elif args.dataset == 'citeseer': dataset = CiteseerGraphDataset(reverse_edge=False) elif args.dataset == 'pubmed': dataset = PubmedGraphDataset(reverse_edge=False) else: raise NotImplementedError graph = dataset[0] # Extract node features feats = graph.ndata.pop('feat').to(device) in_dim = feats.shape[-1] # generate input adj_orig = graph.adjacency_matrix().to_dense() # build test set with 10% positive links train_edge_idx, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges_dgl( graph, adj_orig) graph = graph.to(device) # create train graph train_edge_idx = torch.tensor(train_edge_idx).to(device) train_graph = dgl.edge_subgraph(graph, train_edge_idx, relabel_nodes=False) train_graph = train_graph.to(device) adj = train_graph.adjacency_matrix().to_dense().to(device) # compute loss parameters weight_tensor, norm = compute_loss_para(adj) # create model vgae_model = model.VGAEModel(in_dim, args.hidden1, args.hidden2) vgae_model = vgae_model.to(device) # create training component optimizer = torch.optim.Adam(vgae_model.parameters(), lr=args.learning_rate) print('Total Parameters:', sum([p.nelement() for p in vgae_model.parameters()])) # create training epoch for epoch in range(args.epochs): t = time.time() # Training and validation using a full graph vgae_model.train() logits = vgae_model.forward(graph, feats) # compute loss loss = norm * F.binary_cross_entropy( logits.view(-1), adj.view(-1), weight=weight_tensor) kl_divergence = 0.5 / logits.size(0) * ( 1 + 2 * vgae_model.log_std - vgae_model.mean**2 - torch.exp(vgae_model.log_std)**2).sum(1).mean() loss -= kl_divergence # backward optimizer.zero_grad() loss.backward() optimizer.step() train_acc = get_acc(logits, adj) val_roc, val_ap = get_scores(val_edges, val_edges_false, logits) # Print out performance print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(loss.item()), "train_acc=", "{:.5f}".format(train_acc), "val_roc=", "{:.5f}".format(val_roc), "val_ap=", "{:.5f}".format(val_ap), "time=", "{:.5f}".format(time.time() - t)) test_roc, test_ap = get_scores(test_edges, test_edges_false, logits) # roc_means.append(test_roc) # ap_means.append(test_ap) print("End of training!", "test_roc=", "{:.5f}".format(test_roc), "test_ap=", "{:.5f}".format(test_ap))
def main(args): # Step 1: Prepare graph data and retrieve train/validation/test index ============================= # # Load from DGL dataset if args.dataset == 'cora': dataset = CoraGraphDataset() graph = dataset[0] else: raise NotImplementedError # check cuda if args.gpu >= 0 and th.cuda.is_available(): device = 'cuda:{}'.format(args.gpu) else: device = 'cpu' # retrieve the number of classes num_classes = dataset.num_classes # retrieve labels of ground truth labels = graph.ndata.pop('label').to(device).long() # Extract node features n_feats = graph.ndata.pop('feat').to(device) in_dim = n_feats.shape[-1] # retrieve masks for train/validation/test train_mask = graph.ndata.pop('train_mask') val_mask = graph.ndata.pop('val_mask') test_mask = graph.ndata.pop('test_mask') train_idx = th.nonzero(train_mask, as_tuple=False).squeeze().to(device) val_idx = th.nonzero(val_mask, as_tuple=False).squeeze().to(device) test_idx = th.nonzero(test_mask, as_tuple=False).squeeze().to(device) # In this Cora dataset, the graph is a homogeneous graph, so the model will handle one relation embedding only. # Although the Cora dataset is a homogeneous graph, DGL has set its edges to be bidirectional. For this simplified # CompGCN implementation, we extract masks indicating the direction of edges and will use them as # an edge features in the CompGCN layer. We store them with the keys "in_edges_mask" and "out_edges_mask". in_edges_mask, out_edges_mask = extract_cora_edge_direction(graph) graph.edata['in_edges_mask'] = th.tensor(in_edges_mask) graph.edata['out_edges_mask'] = th.tensor(out_edges_mask) graph = graph.to(device) # Step 2: Create model =================================================================== # compgcn_model = CompGCN(in_dim=in_dim, hid_dim=args.hid_dim, out_dim=num_classes, num_layers=args.num_layers, comp_fn=args.comp_fn, dropout=args.dropout, activation=F.relu, batchnorm=True) compgcn_model = compgcn_model.to(device) # Step 3: Create training components ===================================================== # loss_fn = th.nn.CrossEntropyLoss() optimizer = optim.Adam(compgcn_model.parameters(), lr=args.lr, weight_decay=5e-4) # Step 4: training epoches =============================================================== # for epoch in range(args.max_epoch): # Training and validation using a full graph compgcn_model.train() logits = compgcn_model.forward(graph, n_feats) # compute loss tr_loss = loss_fn(logits[train_idx], labels[train_idx]) tr_acc = th.sum(logits[train_idx].argmax(dim=1) == labels[train_idx]).item() / len(train_idx) valid_loss = loss_fn(logits[val_idx], labels[val_idx]) valid_acc = th.sum(logits[val_idx].argmax(dim=1) == labels[val_idx]).item() / len(val_idx) # backward optimizer.zero_grad() tr_loss.backward() optimizer.step() # Print out performance print("In epoch {}, Train Acc: {:.4f} | Train Loss: {:.4f}; Valid Acc: {:.4f} | Valid loss: {:.4f}". format(epoch, tr_acc, tr_loss.item(), valid_acc, valid_loss.item())) # Test with mini batch after all epoch compgcn_model.eval() # forward logits = compgcn_model.forward(graph, n_feats) # compute loss test_loss = loss_fn(logits[test_idx], labels[test_idx]) test_acc = th.sum(logits[test_idx].argmax(dim=1) == labels[test_idx]).item() / len(test_idx) print("Test Acc: {:.4f} | Test loss: {:.4f}".format(test_acc, test_loss.item()))
def main(args): # Step 1: Prepare graph data and retrieve train/validation/test index ============================= # # Load from DGL dataset if args.dataset == 'Cora': dataset = CoraGraphDataset() elif args.dataset == 'Citeseer': dataset = CiteseerGraphDataset() elif args.dataset == 'Pubmed': dataset = PubmedGraphDataset() else: raise ValueError('Dataset {} is invalid.'.format(args.dataset)) graph = dataset[0] graph = graph.remove_self_loop().add_self_loop() # check cuda if args.gpu >= 0 and torch.cuda.is_available(): device = 'cuda:{}'.format(args.gpu) else: device = 'cpu' # retrieve the number of classes n_classes = dataset.num_classes # retrieve labels of ground truth labels = graph.ndata.pop('label').to(device).long() # Extract node features feats = graph.ndata.pop('feat').to(device) n_features = feats.shape[-1] # retrieve masks for train/validation/test train_mask = graph.ndata.pop('train_mask') val_mask = graph.ndata.pop('val_mask') test_mask = graph.ndata.pop('test_mask') train_idx = torch.nonzero(train_mask, as_tuple=False).squeeze().to(device) val_idx = torch.nonzero(val_mask, as_tuple=False).squeeze().to(device) test_idx = torch.nonzero(test_mask, as_tuple=False).squeeze().to(device) graph = graph.to(device) # Step 2: Create model =================================================================== # model = DAGNN(k=args.k, in_dim=n_features, hid_dim=args.hid_dim, out_dim=n_classes, batchnorm=args.batchnorm, dropout=args.dropout, drop_bef=args.drop_bef) model = model.to(device) # Step 3: Create training components ===================================================== # loss_fn = F.cross_entropy opt = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.lamb) # Step 4: training epoches =============================================================== # loss = None best_acc = None no_improvement = 0 epochs = trange(args.epochs, desc='Accuracy & Loss') for _ in epochs: model.train() logits = model(graph, feats) # compute loss train_loss = loss_fn(logits[train_idx], labels[train_idx]) # backward opt.zero_grad() train_loss.backward() opt.step() # Validation using a full graph train_loss, train_acc, valid_loss, valid_acc, test_loss, test_acc = evaluate( model, graph, feats, labels, (train_idx, val_idx, test_idx)) # Print out performance epochs.set_description( 'Train Acc {:.4f} | Train Loss {:.4f} | Val Acc {:.4f} | Val loss {:.4f}' .format(train_acc, train_loss.item(), valid_acc, valid_loss.item())) if loss is None: loss = valid_loss best_acc = test_acc else: if valid_loss > loss: no_improvement += 1 if no_improvement == args.early_stopping: print('Early stop.') break else: no_improvement = 0 loss = valid_loss best_acc = test_acc print("Test Acc {:.4f}".format(best_acc)) return best_acc
def main(args): # load and preprocess dataset set_seeds(args.rand_seed) if args.dataset == 'cora': data = CoraGraphDataset() elif args.dataset == 'citeseer': data = CiteseerGraphDataset() elif args.dataset == 'pubmed': data = PubmedGraphDataset() else: raise ValueError('Unknown dataset: {}'.format(args.dataset)) g = data[0] if args.gpu < 0: cuda = False else: cuda = True g = g.int().to(args.gpu) features = g.ndata['feat'] labels = g.ndata['label'] train_mask = g.ndata['train_mask'] val_mask = g.ndata['val_mask'] test_mask = g.ndata['test_mask'] if args.shuffle: print('Random splitting...') #### train_dev_test_tuple = (train_mask.sum().data.item(), val_mask.sum().data.item(), test_mask.sum().data.item()) train_mask, val_mask, test_mask = random_split( N=g.num_nodes(), train_dev_test_tuple=train_dev_test_tuple, random_seed=args.rand_seed) #### else: print('standard splitting') num_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) # add self loop g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) n_edges = g.number_of_edges() # create model heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] model = GATFF(g, args.num_layers, num_feats, args.num_hidden, n_classes, heads, F.elu, args.in_drop, args.attn_drop, args.negative_slope, args.residual) print(model) if args.early_stop: stopper = EarlyStopping(patience=100) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] best_val_acc = 0 test_acc = 0 for epoch in range(args.epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) train_acc = accuracy(logits[train_mask], labels[train_mask]) if args.fastmode: val_acc = accuracy(logits[val_mask], labels[val_mask]) else: val_acc = evaluate(model, features, labels, val_mask) if args.early_stop: if stopper.step(val_acc, model): break if best_val_acc < val_acc: best_val_acc = val_acc test_acc = evaluate(model, features, labels, test_mask) print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |" " ValAcc {:.4f} TestAcc {:.4f}| ETputs(KTEPS) {:.2f}".format( epoch, np.mean(dur), loss.item(), train_acc, val_acc, test_acc, n_edges / np.mean(dur) / 1000)) print() if args.early_stop: model.load_state_dict(torch.load('es_checkpoint.pt')) acc = evaluate(model, features, labels, test_mask) print("Test Accuracy {:.4f}".format(acc)) print('Test Accuracy {:.4f}'.format(test_acc)) return test_acc
def main(args): # load and preprocess dataset if args.dataset == 'cora': data = CoraGraphDataset() elif args.dataset == 'citeseer': data = CiteseerGraphDataset() elif args.dataset == 'pubmed': data = PubmedGraphDataset() elif args.dataset == 'reddit': data = RedditDataset() else: raise ValueError('Unknown dataset: {}'.format(args.dataset)) g = data[0] g.create_formats_() st = pg.Storage(g=g, data=g.ndata, gpu='cuda:0', cache_rate=args.cache_rate, nodes=g.nodes()) if (True): features = g.ndata.pop('feat') labels = g.ndata.pop('label') train_mask = g.ndata.pop('train_mask') val_mask = g.ndata.pop('val_mask') test_mask = g.ndata.pop('test_mask') in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() #return g device = gpu = 'cuda:0' cpu = 'cpu' print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) # create GCN model model = MyGCN( in_feats, args.n_hidden, n_classes, args.n_layers, F.relu, args.dropout, ) model = model.to(gpu) # set sampler fanouts = [] for i in range(args.n_layers + 1): fanouts.append(args.neighbor_number) ''' example: fanout=[2,2,2,2] or [3,3,3] ... ''' sampler = dgl.dataloading.MultiLayerNeighborSampler(fanouts) train_nids = torch.arange(0, len(train_mask), 1, dtype=torch.long) dataloader = dgl.dataloading.NodeDataLoader(g, train_nids, sampler, batch_size=args.batch_size, shuffle=True, drop_last=True, num_workers=0) # set loss function loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) # initialize graph dur = [] #Start trainning model.train() for epoch in range(args.n_epochs): # time record #if epoch >= 3: tS = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] t0 = time.time() # forward for count, (in_nodes, out_nodes, blocks) in enumerate(dataloader): t1 = time.time() blocks = [b.to(device) for b in blocks] t2 = time.time() feat_in = st.Query(fname='feat', nodes=in_nodes) labels_out = st.Query(fname='label', nodes=out_nodes) t3 = time.time() # forward feat_out = model(blocks, feat_in) t4 = time.time() loss = loss_fcn(feat_out, labels_out) #Loss=Loss+loss.detach() t5 = time.time() optimizer.zero_grad() loss.backward() optimizer.step() t6 = time.time() tS[1] = tS[1] + t2 - t1 tS[2] = tS[2] + t3 - t2 tS[3] = tS[3] + t4 - t3 tS[4] = tS[4] + t5 - t4 tS[5] = tS[5] + t6 - t5 tE = time.time() #logits = model(features) #loss = loss_fcn(logits[train_mask], labels[train_mask]) #optimizer.zero_grad() #loss.backward() #optimizer.step() #if epoch >= 3: dur.append(time.time() - t0) acc = 0.0 #evaluate(model, features, labels, val_mask) print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(), acc, n_edges / np.mean(dur) / 1000)) #for i in range(1,6): print(tS[1:], '\nTotal:', tE - t0, " s ") #Finish trainning model.eval() print("____________________________")
def main(args): # Step 1: Prepare graph data and retrieve train/validation/test index ============================= # # Load from DGL dataset if args.dataset == 'Cora': dataset = CoraGraphDataset() elif args.dataset == 'Citeseer': dataset = CiteseerGraphDataset() elif args.dataset == 'Pubmed': dataset = PubmedGraphDataset() else: raise ValueError('Dataset {} is invalid.'.format(args.dataset)) graph = dataset[0] graph = dgl.add_self_loop(graph) # check cuda if args.gpu >= 0 and torch.cuda.is_available(): device = 'cuda:{}'.format(args.gpu) else: device = 'cpu' # retrieve the number of classes n_classes = dataset.num_classes # retrieve labels of ground truth labels = graph.ndata.pop('label').to(device).long() # Extract node features feats = graph.ndata.pop('feat').to(device) n_features = feats.shape[-1] # retrieve masks for train/validation/test train_mask = graph.ndata.pop('train_mask') val_mask = graph.ndata.pop('val_mask') test_mask = graph.ndata.pop('test_mask') train_idx = torch.nonzero(train_mask, as_tuple=False).squeeze().to(device) val_idx = torch.nonzero(val_mask, as_tuple=False).squeeze().to(device) test_idx = torch.nonzero(test_mask, as_tuple=False).squeeze().to(device) graph = graph.to(device) # Step 2: Create model =================================================================== # model = MixHop(in_dim=n_features, hid_dim=args.hid_dim, out_dim=n_classes, num_layers=args.num_layers, p=args.p, input_dropout=args.input_dropout, layer_dropout=args.layer_dropout, activation=torch.tanh, batchnorm=True) model = model.to(device) # Step 3: Create training components ===================================================== # loss_fn = nn.CrossEntropyLoss() opt = optim.SGD(model.parameters(), lr=args.lr, weight_decay=args.lamb) scheduler = optim.lr_scheduler.StepLR(opt, args.step_size, gamma=args.gamma) # Step 4: training epoches =============================================================== # acc = 0 no_improvement = 0 epochs = trange(args.epochs, desc='Accuracy & Loss') for _ in epochs: # Training using a full graph model.train() logits = model(graph, feats) # compute loss train_loss = loss_fn(logits[train_idx], labels[train_idx]) train_acc = torch.sum(logits[train_idx].argmax( dim=1) == labels[train_idx]).item() / len(train_idx) # backward opt.zero_grad() train_loss.backward() opt.step() # Validation using a full graph model.eval() with torch.no_grad(): valid_loss = loss_fn(logits[val_idx], labels[val_idx]) valid_acc = torch.sum(logits[val_idx].argmax( dim=1) == labels[val_idx]).item() / len(val_idx) # Print out performance epochs.set_description( 'Train Acc {:.4f} | Train Loss {:.4f} | Val Acc {:.4f} | Val loss {:.4f}' .format(train_acc, train_loss.item(), valid_acc, valid_loss.item())) if valid_acc < acc: no_improvement += 1 if no_improvement == args.early_stopping: print('Early stop.') break else: no_improvement = 0 acc = valid_acc scheduler.step() model.eval() logits = model(graph, feats) test_acc = torch.sum(logits[test_idx].argmax( dim=1) == labels[test_idx]).item() / len(test_idx) print("Test Acc {:.4f}".format(test_acc)) return test_acc
def main(args): # load and preprocess dataset if args.dataset == 'cora': data = CoraGraphDataset() elif args.dataset == 'citeseer': data = CiteseerGraphDataset() elif args.dataset == 'pubmed': data = PubmedGraphDataset() else: raise ValueError('Unknown dataset: {}'.format(args.dataset)) g = data[0] if args.gpu < 0: cuda = False ctx = mx.cpu(0) else: cuda = True ctx = mx.gpu(args.gpu) g = g.to(ctx) features = g.ndata['feat'] labels = mx.nd.array(g.ndata['label'], dtype="float32", ctx=ctx) train_mask = g.ndata['train_mask'] val_mask = g.ndata['val_mask'] test_mask = g.ndata['test_mask'] in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.sum().asscalar(), val_mask.sum().asscalar(), test_mask.sum().asscalar())) # add self loop g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) n_edges = g.number_of_edges() us, vs = g.edges() us = us.asnumpy() vs = vs.asnumpy() pseudo = [] for i in range(g.number_of_edges()): pseudo.append([ 1 / np.sqrt(g.in_degree(us[i])), 1 / np.sqrt(g.in_degree(vs[i])) ]) pseudo = nd.array(pseudo, ctx=ctx) # create GraphSAGE model model = MoNet(g, in_feats, args.n_hidden, n_classes, args.n_layers, args.pseudo_dim, args.n_kernels, args.dropout ) model.initialize(ctx=ctx) n_train_samples = train_mask.sum().asscalar() loss_fcn = gluon.loss.SoftmaxCELoss() print(model.collect_params()) trainer = gluon.Trainer(model.collect_params(), 'adam', {'learning_rate': args.lr, 'wd': args.weight_decay}) # initialize graph dur = [] for epoch in range(args.n_epochs): if epoch >= 3: t0 = time.time() # forward with mx.autograd.record(): pred = model(features, pseudo) loss = loss_fcn(pred, labels, mx.nd.expand_dims(train_mask, 1)) loss = loss.sum() / n_train_samples loss.backward() trainer.step(batch_size=1) if epoch >= 3: loss.asscalar() dur.append(time.time() - t0) acc = evaluate(model, features, pseudo, labels, val_mask) print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}". format( epoch, np.mean(dur), loss.asscalar(), acc, n_edges / np.mean(dur) / 1000)) # test set accuracy acc = evaluate(model, features, pseudo, labels, test_mask) print("Test accuracy {:.2%}".format(acc))
def main(args): # load and preprocess dataset if args.dataset == 'cora': data = CoraGraphDataset() elif args.dataset == 'citeseer': data = CiteseerGraphDataset() elif args.dataset == 'pubmed': data = PubmedGraphDataset() else: raise ValueError('Unknown dataset: {}'.format(args.dataset)) g = data[0] if args.gpu < 0: cuda = False ctx = mx.cpu(0) else: cuda = True ctx = mx.gpu(args.gpu) g = g.to(ctx) features = g.ndata['feat'] labels = mx.nd.array(g.ndata['label'], dtype="float32", ctx=ctx) train_mask = g.ndata['train_mask'] val_mask = g.ndata['val_mask'] test_mask = g.ndata['test_mask'] in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.sum().asscalar(), val_mask.sum().asscalar(), test_mask.sum().asscalar())) # add self loop if args.self_loop: g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) # normalization in_degs = g.in_degrees().astype('float32') out_degs = g.out_degrees().astype('float32') in_norm = mx.nd.power(in_degs, -0.5) out_norm = mx.nd.power(out_degs, -0.5) if cuda: in_norm = in_norm.as_in_context(ctx) out_norm = out_norm.as_in_context(ctx) g.ndata['in_norm'] = mx.nd.expand_dims(in_norm, 1) g.ndata['out_norm'] = mx.nd.expand_dims(out_norm, 1) model = GCN( g, args.n_hidden, n_classes, args.n_layers, 'relu', args.dropout, ) model.initialize(ctx=ctx) n_train_samples = train_mask.sum().asscalar() loss_fcn = gluon.loss.SoftmaxCELoss() # use optimizer print(model.collect_params()) trainer = gluon.Trainer(model.collect_params(), 'adam', { 'learning_rate': args.lr, 'wd': args.weight_decay }) # initialize graph dur = [] for epoch in range(args.n_epochs): if epoch >= 3: t0 = time.time() # forward with mx.autograd.record(): pred = model(features) loss = loss_fcn(pred, labels, mx.nd.expand_dims(train_mask, 1)) loss = loss.sum() / n_train_samples loss.backward() trainer.step(batch_size=1) if epoch >= 3: dur.append(time.time() - t0) acc = evaluate(model, features, labels, val_mask) print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.asscalar(), acc, n_edges / np.mean(dur) / 1000)) # test set accuracy acc = evaluate(model, features, labels, test_mask) print("Test accuracy {:.2%}".format(acc))
optimizer.step() acc = evaluate(g, features, labels, val_mask, model) print("Epoch {:05d} | Loss {:.4f} | Accuracy {:.4f} " . format(epoch, loss.item(), acc)) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("--dataset", type=str, default="cora", help="Dataset name ('cora', 'citeseer', 'pubmed').") args = parser.parse_args() print(f'Training with DGL built-in GraphConv module.') # load and preprocess dataset transform = AddSelfLoop() # by default, it will first remove self-loops to prevent duplication if args.dataset == 'cora': data = CoraGraphDataset(transform=transform) elif args.dataset == 'citeseer': data = CiteseerGraphDataset(transform=transform) elif args.dataset == 'pubmed': data = PubmedGraphDataset(transform=transform) else: raise ValueError('Unknown dataset: {}'.format(args.dataset)) g = data[0] device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') g = g.int().to(device) features = g.ndata['feat'] labels = g.ndata['label'] masks = g.ndata['train_mask'], g.ndata['val_mask'], g.ndata['test_mask'] # normalization degs = g.in_degrees().float()
def main(args): # load and preprocess dataset if args.dataset == 'cora': data = CoraGraphDataset() elif args.dataset == 'citeseer': data = CiteseerGraphDataset() elif args.dataset == 'pubmed': data = PubmedGraphDataset() else: raise ValueError('Unknown dataset: {}'.format(args.dataset)) g = data[0] if args.gpu < 0: device = "/cpu:0" else: device = "/gpu:{}".format(args.gpu) g = g.to(device) with tf.device(device): features = g.ndata['feat'] labels = g.ndata['label'] train_mask = g.ndata['train_mask'] val_mask = g.ndata['val_mask'] test_mask = g.ndata['test_mask'] in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.numpy().sum(), val_mask.numpy().sum(), test_mask.numpy().sum())) # add self loop g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) n_edges = g.number_of_edges() # # normalization degs = tf.cast(tf.identity(g.in_degrees()), dtype=tf.float32) norm = tf.math.pow(degs, -0.5) norm = tf.where(tf.math.is_inf(norm), tf.zeros_like(norm), norm) g.ndata['norm'] = tf.expand_dims(norm, -1) # create GCN model model = GCN(g, in_feats, args.n_hidden, n_classes, args.n_layers, tf.nn.relu, args.dropout) optimizer = tf.keras.optimizers.Adam(learning_rate=args.lr) loss_fcn = tf.keras.losses.SparseCategoricalCrossentropy( from_logits=True) # initialize graph dur = [] for epoch in range(args.n_epochs): if epoch >= 3: t0 = time.time() # forward with tf.GradientTape() as tape: logits = model(features) loss_value = loss_fcn(labels[train_mask], logits[train_mask]) # Manually Weight Decay # We found Tensorflow has a different implementation on weight decay # of Adam(W) optimizer with PyTorch. And this results in worse results. # Manually adding weights to the loss to do weight decay solves this problem. for weight in model.trainable_weights: loss_value = loss_value + \ args.weight_decay*tf.nn.l2_loss(weight) grads = tape.gradient(loss_value, model.trainable_weights) optimizer.apply_gradients(zip(grads, model.trainable_weights)) if epoch >= 3: dur.append(time.time() - t0) acc = evaluate(model, features, labels, val_mask) print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss_value.numpy().item(), acc, n_edges / np.mean(dur) / 1000)) acc = evaluate(model, features, labels, test_mask) print("Test Accuracy {:.4f}".format(acc))
def graphSAGE_eval_pipeline(): if GRAPHSAGE_CONFIG['dataset'] == 'cora': data = CoraGraphDataset() elif GRAPHSAGE_CONFIG['dataset'] == 'citeseer': data = citegrh.load_citeseer() elif GRAPHSAGE_CONFIG['dataset'] == 'pubmed': data = citegrh.load_pubmed() else: raise ValueError('Unknown dataset: {}'.format( GRAPHSAGE_CONFIG['dataset'])) g = data[0] features = g.ndata['feat'] labels = g.ndata['label'] train_mask = g.ndata['train_mask'] val_mask = g.ndata['val_mask'] test_mask = g.ndata['test_mask'] in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() # print("""----Data statistics------' # # Edges %d # # Classes %d # # Train samples %d # # Val samples %d # # Test samples %d""" % # (n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) if GRAPHSAGE_CONFIG['gpu'] < 0: cuda = False else: cuda = True torch.cuda.set_device(GRAPHSAGE_CONFIG['gpu']) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() print("use cuda:", GRAPHSAGE_CONFIG['gpu']) train_nid = train_mask.nonzero().squeeze() val_nid = val_mask.nonzero().squeeze() test_nid = test_mask.nonzero().squeeze() # graph preprocess and calculate normalization factor g = dgl.remove_self_loop(g) n_edges = g.number_of_edges() if cuda: g = g.int().to(GRAPHSAGE_CONFIG['gpu']) # create GraphSAGE model model = GraphSAGE( in_feats, GRAPHSAGE_CONFIG['n-hidden'], n_classes, GRAPHSAGE_CONFIG['n-layers'], F.relu, GRAPHSAGE_CONFIG['dropout'], GRAPHSAGE_CONFIG['aggregator-type'], ) if cuda: model.cuda() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=GRAPHSAGE_CONFIG['lr'], weight_decay=GRAPHSAGE_CONFIG['weight-decay']) # initialize graph dur = [] for epoch in range(GRAPHSAGE_CONFIG['n-epochs']): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(g, features) loss = F.cross_entropy(logits[train_nid], labels[train_nid]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) acc = evaluate(model, g, features, labels, val_nid) # print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " # "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(), acc, n_edges / np.mean(dur) / 1000)) print() acc = evaluate(model, g, features, labels, test_nid) print("Test Accuracy {:.4f}".format(acc))
def main(args): # load and preprocess dataset if args.dataset == 'cora': data = CoraGraphDataset() elif args.dataset == 'citeseer': data = CiteseerGraphDataset() elif args.dataset == 'pubmed': data = PubmedGraphDataset() else: raise ValueError('Unknown dataset: {}'.format(args.dataset)) g = data[0] if args.gpu < 0: cuda = False else: cuda = True g = g.int().to(args.gpu) features = g.ndata['feat'] labels = g.ndata['label'] train_mask = g.ndata['train_mask'] val_mask = g.ndata['val_mask'] test_mask = g.ndata['test_mask'] in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) # add self loop if args.self_loop: g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) n_edges = g.number_of_edges() # normalization degs = g.in_degrees().float() norm = torch.pow(degs, -0.5) norm[torch.isinf(norm)] = 0 if cuda: norm = norm.cuda() g.ndata['norm'] = norm.unsqueeze(1) # create GCN model model = GCN(g, in_feats, args.n_hidden, n_classes, args.n_layers, F.relu, args.dropout) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] for epoch in range(args.n_epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) acc = evaluate(model, features, labels, val_mask) print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(), acc, n_edges / np.mean(dur) / 1000)) print() acc = evaluate(model, features, labels, test_mask) print("Test accuracy {:.2%}".format(acc))
def main(args): # load and preprocess dataset if args.dataset == 'cora': data = CoraGraphDataset() elif args.dataset == 'citeseer': data = CiteseerGraphDataset() elif args.dataset == 'pubmed': data = PubmedGraphDataset() else: raise ValueError('Unknown dataset: {}'.format(args.dataset)) g = data[0] if args.gpu < 0: device = "/cpu:0" else: device = "/gpu:{}".format(args.gpu) g = g.to(device) with tf.device(device): features = g.ndata['feat'] labels = g.ndata['label'] train_mask = g.ndata['train_mask'] val_mask = g.ndata['val_mask'] test_mask = g.ndata['test_mask'] in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() # add self loop if args.self_loop: g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) n_edges = g.number_of_edges() # create DGI model dgi = DGI( g, in_feats, args.n_hidden, args.n_layers, tf.keras.layers.PReLU( alpha_initializer=tf.constant_initializer(0.25)), args.dropout) dgi_optimizer = tf.keras.optimizers.Adam(learning_rate=args.dgi_lr) # train deep graph infomax cnt_wait = 0 best = 1e9 best_t = 0 dur = [] for epoch in range(args.n_dgi_epochs): if epoch >= 3: t0 = time.time() with tf.GradientTape() as tape: loss = dgi(features) # Manually Weight Decay # We found Tensorflow has a different implementation on weight decay # of Adam(W) optimizer with PyTorch. And this results in worse results. # Manually adding weights to the loss to do weight decay solves this problem. for weight in dgi.trainable_weights: loss = loss + \ args.weight_decay * tf.nn.l2_loss(weight) grads = tape.gradient(loss, dgi.trainable_weights) dgi_optimizer.apply_gradients(zip(grads, dgi.trainable_weights)) if loss < best: best = loss best_t = epoch cnt_wait = 0 dgi.save_weights('best_dgi.pkl') else: cnt_wait += 1 if cnt_wait == args.patience: print('Early stopping!') break if epoch >= 3: dur.append(time.time() - t0) print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | " "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.numpy().item(), n_edges / np.mean(dur) / 1000)) # create classifier model classifier = Classifier(args.n_hidden, n_classes) classifier_optimizer = tf.keras.optimizers.Adam( learning_rate=args.classifier_lr) # train classifier print('Loading {}th epoch'.format(best_t)) dgi.load_weights('best_dgi.pkl') embeds = dgi.encoder(features, corrupt=False) embeds = tf.stop_gradient(embeds) dur = [] loss_fcn = tf.keras.losses.SparseCategoricalCrossentropy( from_logits=True) for epoch in range(args.n_classifier_epochs): if epoch >= 3: t0 = time.time() with tf.GradientTape() as tape: preds = classifier(embeds) loss = loss_fcn(labels[train_mask], preds[train_mask]) # Manually Weight Decay # We found Tensorflow has a different implementation on weight decay # of Adam(W) optimizer with PyTorch. And this results in worse results. # Manually adding weights to the loss to do weight decay solves this problem. # In original code, there's no weight decay applied in this part # link: https://github.com/PetarV-/DGI/blob/master/execute.py#L121 # for weight in classifier.trainable_weights: # loss = loss + \ # args.weight_decay * tf.nn.l2_loss(weight) grads = tape.gradient(loss, classifier.trainable_weights) classifier_optimizer.apply_gradients( zip(grads, classifier.trainable_weights)) if epoch >= 3: dur.append(time.time() - t0) acc = evaluate(classifier, embeds, labels, val_mask) print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.numpy().item(), acc, n_edges / np.mean(dur) / 1000)) print() acc = evaluate(classifier, embeds, labels, test_mask) print("Test Accuracy {:.4f}".format(acc))