def main(args): # load and preprocess dataset if args.dataset == 'cora': data = CoraGraphDataset() elif args.dataset == 'citeseer': data = CiteseerGraphDataset() elif args.dataset == 'pubmed': data = PubmedGraphDataset() else: raise ValueError('Unknown dataset: {}'.format(args.dataset)) g = data[0] if args.gpu < 0: cuda = False else: cuda = True g = g.int().to(args.gpu) features = g.ndata['feat'] labels = g.ndata['label'] train_mask = g.ndata['train_mask'] val_mask = g.ndata['val_mask'] test_mask = g.ndata['test_mask'] num_feats = features.shape[1] n_classes = data.num_labels n_edges = g.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) # add self loop g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) n_edges = g.number_of_edges() # create model heads = ([args.num_heads] * (args.num_layers - 1)) + [args.num_out_heads] model = GAT(g, args.num_layers, num_feats, args.num_hidden, n_classes, heads, F.elu, args.in_drop, args.attn_drop, args.negative_slope, args.residual) print(model) if args.early_stop: stopper = EarlyStopping(patience=100) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] for epoch in range(args.epochs): model.train() if epoch >= 3: if cuda: torch.cuda.synchronize() t0 = time.time() # forward logits = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: if cuda: torch.cuda.synchronize() dur.append(time.time() - t0) train_acc = accuracy(logits[train_mask], labels[train_mask]) if args.fastmode: val_acc = accuracy(logits[val_mask], labels[val_mask]) else: val_acc = evaluate(model, features, labels, val_mask) if args.early_stop: if stopper.step(val_acc, model): break print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |" " ValAcc {:.4f} | ETputs(KTEPS) {:.2f}".format( epoch, np.mean(dur), loss.item(), train_acc, val_acc, n_edges / np.mean(dur) / 1000)) print() if args.early_stop: model.load_state_dict(torch.load('es_checkpoint.pt')) acc = evaluate(model, features, labels, test_mask) print("Test Accuracy {:.4f}".format(acc))
num_out_heads = 1 num_layers = 1 num_hidden = 8 residual = False in_drop = 0.6 attn_drop = 0.6 lr = 0.005 weight_decay = 5e-4 negative_slope = 0.2 if __name__ == '__main__': data = CiteseerGraphDataset() g = data[0] # add self loop g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) args = GATArgs() if not os.path.isfile("gat_model.p"): model = main(g, data, args) torch.save(model, "gat_model.p") else: model = torch.load("gat_model.p") model.eval() num_hops = args.num_layers + 1 node_id = 136 g.ndata[ExplainerTags.NODE_FEATURES] = g.ndata['feat'].float().to( torch.device("cpu")) explainer = GNNExplainer(g, model, num_hops, epochs=200, edge_size=0.015) subgraph, feat_mask = explainer.explain_node(node_id)
def graphSAGE_eval_pipeline(): if GRAPHSAGE_CONFIG['dataset'] == 'cora': data = CoraGraphDataset() elif GRAPHSAGE_CONFIG['dataset'] == 'citeseer': data = citegrh.load_citeseer() elif GRAPHSAGE_CONFIG['dataset'] == 'pubmed': data = citegrh.load_pubmed() else: raise ValueError('Unknown dataset: {}'.format( GRAPHSAGE_CONFIG['dataset'])) g = data[0] features = g.ndata['feat'] labels = g.ndata['label'] train_mask = g.ndata['train_mask'] val_mask = g.ndata['val_mask'] test_mask = g.ndata['test_mask'] in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() # print("""----Data statistics------' # # Edges %d # # Classes %d # # Train samples %d # # Val samples %d # # Test samples %d""" % # (n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) if GRAPHSAGE_CONFIG['gpu'] < 0: cuda = False else: cuda = True torch.cuda.set_device(GRAPHSAGE_CONFIG['gpu']) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() print("use cuda:", GRAPHSAGE_CONFIG['gpu']) train_nid = train_mask.nonzero().squeeze() val_nid = val_mask.nonzero().squeeze() test_nid = test_mask.nonzero().squeeze() # graph preprocess and calculate normalization factor g = dgl.remove_self_loop(g) n_edges = g.number_of_edges() if cuda: g = g.int().to(GRAPHSAGE_CONFIG['gpu']) # create GraphSAGE model model = GraphSAGE( in_feats, GRAPHSAGE_CONFIG['n-hidden'], n_classes, GRAPHSAGE_CONFIG['n-layers'], F.relu, GRAPHSAGE_CONFIG['dropout'], GRAPHSAGE_CONFIG['aggregator-type'], ) if cuda: model.cuda() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=GRAPHSAGE_CONFIG['lr'], weight_decay=GRAPHSAGE_CONFIG['weight-decay']) # initialize graph dur = [] for epoch in range(GRAPHSAGE_CONFIG['n-epochs']): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(g, features) loss = F.cross_entropy(logits[train_nid], labels[train_nid]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) acc = evaluate(model, g, features, labels, val_nid) # print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " # "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(), acc, n_edges / np.mean(dur) / 1000)) print() acc = evaluate(model, g, features, labels, test_nid) print("Test Accuracy {:.4f}".format(acc))
def main(args): # load and preprocess dataset if args.dataset == 'cora': data = CoraGraphDataset() elif args.dataset == 'citeseer': data = CiteseerGraphDataset() elif args.dataset == 'pubmed': data = PubmedGraphDataset() else: raise ValueError('Unknown dataset: {}'.format(args.dataset)) g = data[0] if args.gpu < 0: device = "/cpu:0" else: device = "/gpu:{}".format(args.gpu) g = g.to(device) with tf.device(device): features = g.ndata['feat'] labels = g.ndata['label'] train_mask = g.ndata['train_mask'] val_mask = g.ndata['val_mask'] test_mask = g.ndata['test_mask'] in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() # add self loop if args.self_loop: g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) n_edges = g.number_of_edges() # create DGI model dgi = DGI( g, in_feats, args.n_hidden, args.n_layers, tf.keras.layers.PReLU( alpha_initializer=tf.constant_initializer(0.25)), args.dropout) dgi_optimizer = tf.keras.optimizers.Adam(learning_rate=args.dgi_lr) # train deep graph infomax cnt_wait = 0 best = 1e9 best_t = 0 dur = [] for epoch in range(args.n_dgi_epochs): if epoch >= 3: t0 = time.time() with tf.GradientTape() as tape: loss = dgi(features) # Manually Weight Decay # We found Tensorflow has a different implementation on weight decay # of Adam(W) optimizer with PyTorch. And this results in worse results. # Manually adding weights to the loss to do weight decay solves this problem. for weight in dgi.trainable_weights: loss = loss + \ args.weight_decay * tf.nn.l2_loss(weight) grads = tape.gradient(loss, dgi.trainable_weights) dgi_optimizer.apply_gradients(zip(grads, dgi.trainable_weights)) if loss < best: best = loss best_t = epoch cnt_wait = 0 dgi.save_weights('best_dgi.pkl') else: cnt_wait += 1 if cnt_wait == args.patience: print('Early stopping!') break if epoch >= 3: dur.append(time.time() - t0) print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | " "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.numpy().item(), n_edges / np.mean(dur) / 1000)) # create classifier model classifier = Classifier(args.n_hidden, n_classes) classifier_optimizer = tf.keras.optimizers.Adam( learning_rate=args.classifier_lr) # train classifier print('Loading {}th epoch'.format(best_t)) dgi.load_weights('best_dgi.pkl') embeds = dgi.encoder(features, corrupt=False) embeds = tf.stop_gradient(embeds) dur = [] loss_fcn = tf.keras.losses.SparseCategoricalCrossentropy( from_logits=True) for epoch in range(args.n_classifier_epochs): if epoch >= 3: t0 = time.time() with tf.GradientTape() as tape: preds = classifier(embeds) loss = loss_fcn(labels[train_mask], preds[train_mask]) # Manually Weight Decay # We found Tensorflow has a different implementation on weight decay # of Adam(W) optimizer with PyTorch. And this results in worse results. # Manually adding weights to the loss to do weight decay solves this problem. # In original code, there's no weight decay applied in this part # link: https://github.com/PetarV-/DGI/blob/master/execute.py#L121 # for weight in classifier.trainable_weights: # loss = loss + \ # args.weight_decay * tf.nn.l2_loss(weight) grads = tape.gradient(loss, classifier.trainable_weights) classifier_optimizer.apply_gradients( zip(grads, classifier.trainable_weights)) if epoch >= 3: dur.append(time.time() - t0) acc = evaluate(classifier, embeds, labels, val_mask) print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.numpy().item(), acc, n_edges / np.mean(dur) / 1000)) print() acc = evaluate(classifier, embeds, labels, test_mask) print("Test Accuracy {:.4f}".format(acc))
def main(args): # load and preprocess dataset data = load_data(args) g = data[0] features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) if hasattr(torch, 'BoolTensor'): train_mask = torch.BoolTensor(data.train_mask) val_mask = torch.BoolTensor(data.val_mask) test_mask = torch.BoolTensor(data.test_mask) else: train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) in_feats = features.shape[1] n_classes = data.num_labels n_edges = g.number_of_edges() if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() # add self loop if args.self_loop: g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) n_edges = g.number_of_edges() if args.gpu >= 0: g = g.to(args.gpu) # create DGI model dgi = DGI(g, in_feats, args.n_hidden, args.n_layers, nn.PReLU(args.n_hidden), args.dropout) if cuda: dgi.cuda() dgi_optimizer = torch.optim.Adam(dgi.parameters(), lr=args.dgi_lr, weight_decay=args.weight_decay) # train deep graph infomax cnt_wait = 0 best = 1e9 best_t = 0 dur = [] for epoch in range(args.n_dgi_epochs): dgi.train() if epoch >= 3: t0 = time.time() dgi_optimizer.zero_grad() loss = dgi(features) loss.backward() dgi_optimizer.step() if loss < best: best = loss best_t = epoch cnt_wait = 0 torch.save(dgi.state_dict(), 'best_dgi.pkl') else: cnt_wait += 1 if cnt_wait == args.patience: print('Early stopping!') break if epoch >= 3: dur.append(time.time() - t0) print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | " "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(), n_edges / np.mean(dur) / 1000)) # create classifier model classifier = Classifier(args.n_hidden, n_classes) if cuda: classifier.cuda() classifier_optimizer = torch.optim.Adam(classifier.parameters(), lr=args.classifier_lr, weight_decay=args.weight_decay) # train classifier print('Loading {}th epoch'.format(best_t)) dgi.load_state_dict(torch.load('best_dgi.pkl')) embeds = dgi.encoder(features, corrupt=False) embeds = embeds.detach() dur = [] for epoch in range(args.n_classifier_epochs): classifier.train() if epoch >= 3: t0 = time.time() classifier_optimizer.zero_grad() preds = classifier(embeds) loss = F.nll_loss(preds[train_mask], labels[train_mask]) loss.backward() classifier_optimizer.step() if epoch >= 3: dur.append(time.time() - t0) acc = evaluate(classifier, embeds, labels, val_mask) print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(), acc, n_edges / np.mean(dur) / 1000)) print() acc = evaluate(classifier, embeds, labels, test_mask) print("Test Accuracy {:.4f}".format(acc))
def main(): parser = argparse.ArgumentParser(description='OGBN (GNN)') parser.add_argument('--device', type=int, default=0) parser.add_argument('--project', type=str, default='lcgnn') parser.add_argument('--dataset', type=str, default='flickr') parser.add_argument('--model', type=str, default='gcn') parser.add_argument('--log_steps', type=int, default=1) parser.add_argument('--num_layers', type=int, default=4) parser.add_argument('--num_heads', type=int, default=1) parser.add_argument('--ego_size', type=int, default=64) parser.add_argument('--hidden_size', type=int, default=64) parser.add_argument('--input_dropout', type=float, default=0.2) parser.add_argument('--hidden_dropout', type=float, default=0.4) parser.add_argument('--weight_decay', type=float, default=0.0005) parser.add_argument('--lr', type=float, default=0.001) parser.add_argument('--epochs', type=int, default=500) parser.add_argument('--early_stopping', type=int, default=20) parser.add_argument('--batch_size', type=int, default=256) parser.add_argument('--eval_batch_size', type=int, default=512) parser.add_argument('--batch_norm', type=int, default=1) parser.add_argument('--residual', type=int, default=1) parser.add_argument('--linear_layer', type=int, default=1) parser.add_argument('--num_workers', type=int, default=4, help='number of workers') parser.add_argument("--optimizer", type=str, default='adamw', choices=['adam', 'adamw'], help="optimizer") parser.add_argument('--warmup', type=int, default=0) parser.add_argument('--seed', type=int, default=0) parser.add_argument('--load_path', type=str, default='') parser.add_argument('--exp_name', type=str, default='') args = parser.parse_args() print(args) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) para_dic = { '': args.model, 'nl': args.num_layers, 'nh': args.num_heads, 'es': args.ego_size, 'hs': args.hidden_size, 'id': args.input_dropout, 'hd': args.hidden_dropout, 'bs': args.batch_size, 'op': args.optimizer, 'lr': args.lr, 'wd': args.weight_decay, 'bn': args.batch_norm, 'rs': args.residual, 'll': args.linear_layer, 'sd': args.seed } para_dic['warm'] = args.warmup exp_name = get_exp_name(args.dataset, para_dic, args.exp_name) wandb_name = exp_name.replace('_sd' + str(args.seed), '') wandb.init(name=wandb_name, project=args.project) wandb.config.update(args) device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu' device = torch.device(device) if args.dataset == 'papers100M': dataset = MyNodePropPredDataset(name=args.dataset) elif args.dataset in ['flickr', 'reddit', 'yelp', 'amazon']: dataset = SAINTDataset(name=args.dataset) else: dataset = DglNodePropPredDataset(name=f'ogbn-{args.dataset}') split_idx = dataset.get_idx_split() train_idx = set(split_idx['train'].cpu().numpy()) valid_idx = set(split_idx['valid'].cpu().numpy()) test_idx = set(split_idx['test'].cpu().numpy()) tmp_ego_size = 256 if args.dataset == 'products' else args.ego_size if args.ego_size < 64: tmp_ego_size = 64 ego_graphs_unpadded = np.load( f'data/{args.dataset}-lc-ego-graphs-{tmp_ego_size}.npy', allow_pickle=True) conds_unpadded = np.load( f'data/{args.dataset}-lc-conds-{tmp_ego_size}.npy', allow_pickle=True) ego_graphs_train, ego_graphs_valid, ego_graphs_test = [], [], [] cut_train, cut_valid, cut_test = [], [], [] for i, ego_graph in enumerate(ego_graphs_unpadded): idx = ego_graph[0] assert len(ego_graph) == len(conds_unpadded[i]) if len(ego_graph) > args.ego_size: ego_graph = ego_graph[:args.ego_size] conds_unpadded[i] = conds_unpadded[i][:args.ego_size] cut_position = np.argmin(conds_unpadded[i]) cut = torch.zeros(len(ego_graph), dtype=torch.float32) cut[:cut_position + 1] = 1.0 cut = cut.unsqueeze(1) if idx in train_idx: ego_graphs_train.append(ego_graph) cut_train.append(cut) elif idx in valid_idx: ego_graphs_valid.append(ego_graph) cut_valid.append(cut) elif idx in test_idx: ego_graphs_test.append(ego_graph) cut_test.append(cut) else: print(f"{idx} not in train/valid/test idx") num_classes = dataset.num_classes if isinstance(dataset, DglNodePropPredDataset): data = dataset[0] graph = dgl.remove_self_loop(data[0]) graph = dgl.add_self_loop(graph) if args.dataset == 'arxiv' or args.dataset == 'papers100M': temp_graph = dgl.to_bidirected(graph) temp_graph.ndata['feat'] = graph.ndata['feat'] graph = temp_graph data = (graph, data[1].long()) graph = data[0] graph.ndata['labels'] = data[1] elif isinstance(dataset, SAINTDataset): data = dataset[0] edge_index = data.edge_index graph = dgl.DGLGraph((edge_index[0], edge_index[1])) graph = dgl.remove_self_loop(graph) graph = dgl.add_self_loop(graph) graph.ndata['feat'] = data.x label = data.y if len(label.shape) == 1: label = label.unsqueeze(1) data = (graph, label) else: raise NotImplementedError train_dataset = NodeClassificationDataset(data, ego_graphs_train, cut_train) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=batcher(), pin_memory=True) valid_dataset = NodeClassificationDataset(data, ego_graphs_valid, cut_valid) valid_loader = DataLoader(valid_dataset, batch_size=args.eval_batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=batcher(), pin_memory=True) test_dataset = NodeClassificationDataset(data, ego_graphs_test, cut_test) test_loader = DataLoader(test_dataset, batch_size=args.eval_batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=batcher(), pin_memory=True) model = GNNModel(conv_type=args.model, input_size=graph.ndata['feat'].shape[1] + 1, hidden_size=args.hidden_size, num_layers=args.num_layers, num_classes=num_classes, batch_norm=args.batch_norm, residual=args.residual, idropout=args.input_dropout, dropout=args.hidden_dropout, linear_layer=args.linear_layer, num_heads=args.num_heads).to(device) wandb.watch(model, log='all') pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad) print('model parameters:', pytorch_total_params) if not os.path.exists('saved'): os.mkdir('saved') model.reset_parameters() if args.load_path: model.load_state_dict(torch.load(args.load_path, map_location='cuda:0')) valid_acc, valid_loss = test(model, valid_loader, device, args) valid_output = f'Valid: {100 * valid_acc:.2f}% ' cor_train_acc, _ = test(model, train_loader, device, args) cor_test_acc, cor_test_loss = test(model, test_loader, device, args) train_output = f'Train: {100 * cor_train_acc:.2f}%, ' test_output = f'Test: {100 * cor_test_acc:.2f}%' print(train_output + valid_output + test_output) return best_val_acc = 0 cor_train_acc = 0 cor_test_acc = 0 patience = 0 if args.optimizer == 'adam': optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) elif args.optimizer == 'adamw': optimizer = torch.optim.AdamW(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) else: raise NotImplementedError if args.warmup > 0: optimizer = NoamOptim( optimizer, args.hidden_size if args.hidden_size > 0 else data.x.size(1), n_warmup_steps=args.warmup, init_lr=args.lr) for epoch in range(1, 1 + args.epochs): # lp = LineProfiler() # lp_wrapper = lp(train) # loss = lp_wrapper(model, train_loader, device, optimizer, args) # lp.print_stats() loss = train(model, train_loader, device, optimizer, args) train_output = valid_output = test_output = '' if epoch >= 10 and epoch % args.log_steps == 0: valid_acc, valid_loss = test(model, valid_loader, device, args) valid_output = f'Valid: {100 * valid_acc:.2f}% ' if valid_acc > best_val_acc: best_val_acc = valid_acc # cor_train_acc, _ = test(model, train_loader, device, args) cor_test_acc, cor_test_loss = test(model, test_loader, device, args) # train_output = f'Train: {100 * cor_train_acc:.2f}%, ' test_output = f'Test: {100 * cor_test_acc:.2f}%' patience = 0 try: torch.save(model.state_dict(), 'saved/' + exp_name + '.pt') wandb.save('saved/' + exp_name + '.pt') except FileNotFoundError as e: print(e) else: patience += 1 if patience >= args.early_stopping: print('Early stopping...') break wandb.log({ 'Train Loss': loss, 'Valid Acc': valid_acc, 'best_val_acc': best_val_acc, 'cor_test_acc': cor_test_acc, 'LR': get_lr(optimizer), 'Valid Loss': valid_loss, 'cor_test_loss': cor_test_loss }) else: wandb.log({'Train Loss': loss, 'LR': get_lr(optimizer)}) # train_output + print(f'Epoch: {epoch:02d}, ' f'Loss: {loss:.4f}, ' + valid_output + test_output)
def main(args): # load and preprocess dataset if args.dataset == 'cora': data = CoraGraphDataset() elif args.dataset == 'citeseer': data = CiteseerGraphDataset() elif args.dataset == 'pubmed': data = PubmedGraphDataset() else: raise ValueError('Unknown dataset: {}'.format(args.dataset)) g = data[0] if args.gpu < 0: device = "/cpu:0" else: device = "/gpu:{}".format(args.gpu) g = g.to(device) with tf.device(device): features = g.ndata['feat'] labels = g.ndata['label'] train_mask = g.ndata['train_mask'] val_mask = g.ndata['val_mask'] test_mask = g.ndata['test_mask'] in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.numpy().sum(), val_mask.numpy().sum(), test_mask.numpy().sum())) # add self loop g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) n_edges = g.number_of_edges() # # normalization degs = tf.cast(tf.identity(g.in_degrees()), dtype=tf.float32) norm = tf.math.pow(degs, -0.5) norm = tf.where(tf.math.is_inf(norm), tf.zeros_like(norm), norm) g.ndata['norm'] = tf.expand_dims(norm, -1) # create GCN model model = GCN(g, in_feats, args.n_hidden, n_classes, args.n_layers, tf.nn.relu, args.dropout) optimizer = tf.keras.optimizers.Adam(learning_rate=args.lr) loss_fcn = tf.keras.losses.SparseCategoricalCrossentropy( from_logits=True) # initialize graph dur = [] for epoch in range(args.n_epochs): if epoch >= 3: t0 = time.time() # forward with tf.GradientTape() as tape: logits = model(features) loss_value = loss_fcn(labels[train_mask], logits[train_mask]) # Manually Weight Decay # We found Tensorflow has a different implementation on weight decay # of Adam(W) optimizer with PyTorch. And this results in worse results. # Manually adding weights to the loss to do weight decay solves this problem. for weight in model.trainable_weights: loss_value = loss_value + \ args.weight_decay*tf.nn.l2_loss(weight) grads = tape.gradient(loss_value, model.trainable_weights) optimizer.apply_gradients(zip(grads, model.trainable_weights)) if epoch >= 3: dur.append(time.time() - t0) acc = evaluate(model, features, labels, val_mask) print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss_value.numpy().item(), acc, n_edges / np.mean(dur) / 1000)) acc = evaluate(model, features, labels, test_mask) print("Test Accuracy {:.4f}".format(acc))
def main(args): # load and preprocess dataset if args.dataset == 'cora': data = CoraGraphDataset() elif args.dataset == 'citeseer': data = CiteseerGraphDataset() elif args.dataset == 'pubmed': data = PubmedGraphDataset() else: raise ValueError('Unknown dataset: {}'.format(args.dataset)) g = data[0] if args.gpu < 0: cuda = False ctx = mx.cpu(0) else: cuda = True ctx = mx.gpu(args.gpu) g = g.int().to(ctx) features = g.ndata['feat'] labels = mx.nd.array(g.ndata['label'], dtype="float32", ctx=ctx) train_mask = g.ndata['train_mask'] val_mask = g.ndata['val_mask'] test_mask = g.ndata['test_mask'] in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.sum().asscalar(), val_mask.sum().asscalar(), test_mask.sum().asscalar())) # add self loop g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) # create SGC model model = SGConv(in_feats, n_classes, k=2, cached=True, bias=args.bias) model.initialize(ctx=ctx) n_train_samples = train_mask.sum().asscalar() loss_fcn = gluon.loss.SoftmaxCELoss() # use optimizer print(model.collect_params()) trainer = gluon.Trainer(model.collect_params(), 'adam', { 'learning_rate': args.lr, 'wd': args.weight_decay }) # initialize graph dur = [] for epoch in range(args.n_epochs): if epoch >= 3: t0 = time.time() # forward with mx.autograd.record(): pred = model(g, features) loss = loss_fcn(pred, labels, mx.nd.expand_dims(train_mask, 1)) loss = loss.sum() / n_train_samples loss.backward() trainer.step(batch_size=1) if epoch >= 3: loss.asscalar() dur.append(time.time() - t0) acc = evaluate(model, g, features, labels, val_mask) print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.asscalar(), acc, n_edges / np.mean(dur) / 1000)) # test set accuracy acc = evaluate(model, g, features, labels, test_mask) print("Test accuracy {:.2%}".format(acc))
if args.gpu >= 0: device = th.device('cuda:%d' % args.gpu) else: device = th.device('cpu') # load reddit data data = DglNodePropPredDataset(name='ogbn-products') splitted_idx = data.get_idx_split() train_idx, val_idx, test_idx = splitted_idx['train'], splitted_idx[ 'valid'], splitted_idx['test'] graph, labels = data[0] labels = labels[:, 0] print('Total edges before adding self-loop {}'.format( graph.number_of_edges())) graph = dgl.remove_self_loop(graph) graph = dgl.add_self_loop(graph) print('Total edges after adding self-loop {}'.format( graph.number_of_edges())) num_nodes = train_idx.shape[0] + val_idx.shape[0] + test_idx.shape[0] assert num_nodes == graph.number_of_nodes() graph.ndata['labels'] = labels mask = th.zeros(num_nodes, dtype=th.bool) mask[train_idx] = True graph.ndata['train_mask'] = mask mask = th.zeros(num_nodes, dtype=th.bool) mask[val_idx] = True graph.ndata['valid_mask'] = mask mask = th.zeros(num_nodes, dtype=th.bool) mask[test_idx] = True graph.ndata['test_mask'] = mask
def main(args): # Step 1: Prepare graph data and retrieve train/validation/test index ============================= # dataset = LegacyTUDataset(args.dataset, raw_dir=args.dataset_path) # add self loop. We add self loop for each graph here since the function "add_self_loop" does not # support batch graph. for i in range(len(dataset)): dataset.graph_lists[i] = dgl.remove_self_loop(dataset.graph_lists[i]) dataset.graph_lists[i] = dgl.add_self_loop(dataset.graph_lists[i]) # preprocess: use node degree/label as node feature if args.degree_as_feature: dataset = degree_as_feature(dataset) mode = "concat" else: mode = "replace" dataset = node_label_as_feature(dataset, mode=mode) num_training = int(len(dataset) * 0.9) num_test = len(dataset) - num_training train_set, test_set = random_split(dataset, [num_training, num_test]) train_loader = GraphDataLoader(train_set, batch_size=args.batch_size, shuffle=True, num_workers=1) test_loader = GraphDataLoader(test_set, batch_size=args.batch_size, num_workers=1) device = torch.device(args.device) # Step 2: Create model =================================================================== # num_feature, num_classes, _ = dataset.statistics() args.in_dim = int(num_feature) args.out_dim = int(num_classes) args.edge_feat_dim = 0 # No edge feature in datasets that we use. model = GraphClassifier(args).to(device) # Step 3: Create training components ===================================================== # optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, amsgrad=True, weight_decay=args.weight_decay) # Step 4: training epoches =============================================================== # best_test_acc = 0.0 best_epoch = -1 train_times = [] for e in range(args.epochs): s_time = time() train_loss = train(model, optimizer, train_loader, device, e, args.epochs) train_times.append(time() - s_time) test_acc = test(model, test_loader, device) if test_acc > best_test_acc: best_test_acc = test_acc best_epoch = e + 1 if (e + 1) % args.print_every == 0: log_format = "Epoch {}: loss={:.4f}, test_acc={:.4f}, best_test_acc={:.4f}" print(log_format.format(e + 1, train_loss, test_acc, best_test_acc)) print("Best Epoch {}, final test acc {:.4f}".format( best_epoch, best_test_acc)) return best_test_acc, sum(train_times) / len(train_times)
def main(args): # load and preprocess dataset data = load_data(args) g = data[0] features = g.ndata['feat'] labels = g.ndata['label'] train_mask = g.ndata['train_mask'] val_mask = g.ndata['val_mask'] test_mask = g.ndata['test_mask'] in_feats = features.shape[1] n_classes = data.num_classes n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() print("use cuda:", args.gpu) train_nid = train_mask.nonzero().squeeze() val_nid = val_mask.nonzero().squeeze() test_nid = test_mask.nonzero().squeeze() # graph preprocess and calculate normalization factor g = dgl.remove_self_loop(g) n_edges = g.number_of_edges() if cuda: g = g.int().to(args.gpu) # create GraphSAGE model model = GraphSAGE(in_feats, args.n_hidden, n_classes, args.n_layers, F.relu, args.dropout, args.aggregator_type) if cuda: model.cuda() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] for epoch in range(args.n_epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(g, features) if logits.dtype == torch.bfloat16: logits = logits.to(torch.float32) loss = F.cross_entropy(logits[train_nid], labels[train_nid]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) acc = evaluate(model, g, features, labels, val_nid) print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(), acc, n_edges / np.mean(dur) / 1000)) print() acc = evaluate(model, g, features, labels, test_nid) print("Test Accuracy {:.4f}".format(acc))
def main(args): # load and preprocess dataset if args.dataset == 'cora': data = CoraGraphDataset() elif args.dataset == 'citeseer': data = CiteseerGraphDataset() elif args.dataset == 'pubmed': data = PubmedGraphDataset() else: raise ValueError('Unknown dataset: {}'.format(args.dataset)) g = data[0] if args.gpu < 0: cuda = False else: cuda = True g = g.int().to(args.gpu) features = g.ndata['feat'] labels = g.ndata['label'] train_mask = g.ndata['train_mask'] val_mask = g.ndata['val_mask'] test_mask = g.ndata['test_mask'] in_feats = features.shape[1] n_classes = data.num_labels n_edges = g.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) n_edges = g.number_of_edges() # add self loop g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) # create SGC model model = SGConv(in_feats, n_classes, k=2, cached=True, bias=args.bias) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] for epoch in range(args.n_epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(g, features) # only compute the train set loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) acc = evaluate(model, g, features, labels, val_mask) print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}". format(epoch, np.mean(dur), loss.item(), acc, n_edges / np.mean(dur) / 1000)) print() acc = evaluate(model, g, features, labels, test_mask) print("Test Accuracy {:.4f}".format(acc))
def preprocess_data(dataset, train_percentage): import dgl # Modified from AAAI21 FA-GCN if dataset in ['cora', 'citeseer', 'pubmed']: load_default_split = train_percentage <= 0 edge = np.loadtxt(f'{DATA_PATH}/{dataset}/{dataset}.edge', dtype=int).tolist() features = np.loadtxt(f'{DATA_PATH}/{dataset}/{dataset}.feature') labels = np.loadtxt(f'{DATA_PATH}/{dataset}/{dataset}.label', dtype=int) if load_default_split: train = np.loadtxt(f'{DATA_PATH}/{dataset}/{dataset}.train', dtype=int) val = np.loadtxt(f'{DATA_PATH}/{dataset}/{dataset}.val', dtype=int) test = np.loadtxt(f'{DATA_PATH}/{dataset}/{dataset}.test', dtype=int) else: train, val, test = stratified_train_test_split( np.arange(len(labels)), labels, len(labels), train_percentage) nclass = len(set(labels.tolist())) print(dataset, nclass) U = [e[0] for e in edge] V = [e[1] for e in edge] g = dgl.graph((U, V)) g = dgl.to_simple(g) g = dgl.remove_self_loop(g) g = dgl.to_bidirected(g) features = normalize_features(features) features = th.FloatTensor(features) labels = th.LongTensor(labels) train = th.LongTensor(train) val = th.LongTensor(val) test = th.LongTensor(test) elif dataset in ['airport', 'blogcatalog', 'flickr']: load_default_split = train_percentage <= 0 adj_orig = pickle.load( open(f'{DATA_PATH}/{dataset}/{dataset}_adj.pkl', 'rb')) # sparse features = pickle.load( open(f'{DATA_PATH}/{dataset}/{dataset}_features.pkl', 'rb')) # sparase labels = pickle.load( open(f'{DATA_PATH}/{dataset}/{dataset}_labels.pkl', 'rb')) # tensor if th.is_tensor(labels): labels = labels.numpy() if load_default_split: tvt_nids = pickle.load( open(f'{DATA_PATH}/{dataset}/{dataset}_tvt_nids.pkl', 'rb')) # 3 array train = tvt_nids[0] val = tvt_nids[1] test = tvt_nids[2] else: train, val, test = stratified_train_test_split( np.arange(len(labels)), labels, len(labels), train_percentage) nclass = len(set(labels.tolist())) print(dataset, nclass) adj_orig = adj_orig.tocoo() U = adj_orig.row.tolist() V = adj_orig.col.tolist() g = dgl.graph((U, V)) g = dgl.to_simple(g) g = dgl.remove_self_loop(g) g = dgl.to_bidirected(g) if dataset in ['airport']: features = normalize_features(features) if sp.issparse(features): features = torch.FloatTensor(features.toarray()) else: features = th.FloatTensor(features) labels = th.LongTensor(labels) train = th.LongTensor(train) val = th.LongTensor(val) test = th.LongTensor(test) elif dataset in ['arxiv']: dataset = DglNodePropPredDataset(name='ogbn-arxiv', root='data/ogb_arxiv') split_idx = dataset.get_idx_split() train, val, test = split_idx["train"], split_idx["valid"], split_idx[ "test"] g, labels = dataset[0] features = g.ndata['feat'] nclass = 40 labels = labels.squeeze() g = dgl.to_bidirected(g) g = dgl.to_bidirected(g) if dataset in ['citeseer']: g = dgl.add_self_loop(g) return g, features, features.shape[1], nclass, labels, train, val, test
def main(args): # load and preprocess dataset if args.dataset == 'cora': data = CoraGraphDataset() elif args.dataset == 'citeseer': data = CiteseerGraphDataset() elif args.dataset == 'pubmed': data = PubmedGraphDataset() else: raise ValueError('Unknown dataset: {}'.format(args.dataset)) g = data[0] if args.gpu < 0: cuda = False else: cuda = True g = g.int().to(args.gpu) features = g.ndata['feat'] labels = g.ndata['label'] train_mask = g.ndata['train_mask'] val_mask = g.ndata['val_mask'] test_mask = g.ndata['test_mask'] in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) # add self loop if args.self_loop: g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) n_edges = g.number_of_edges() # normalization degs = g.in_degrees().float() norm = torch.pow(degs, -0.5) norm[torch.isinf(norm)] = 0 if cuda: norm = norm.cuda() g.ndata['norm'] = norm.unsqueeze(1) # create GCN model model = GCN(g, in_feats, args.n_hidden, n_classes, args.n_layers, F.relu, args.dropout) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] for epoch in range(args.n_epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) acc = evaluate(model, features, labels, val_mask) print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(), acc, n_edges / np.mean(dur) / 1000)) print() acc = evaluate(model, features, labels, test_mask) print("Test accuracy {:.2%}".format(acc))
def load_dataset(device, args): """ Load dataset and move graph and features to device """ if args.dataset in [ "reddit", "cora", "ppi", "ppi_large", "yelp", "flickr" ]: # raise RuntimeError("Dataset {} is not supported".format(name)) if args.dataset == "reddit": from dgl.data import RedditDataset data = RedditDataset(self_loop=True) g = data[0] g = dgl.add_self_loop(g) n_classes = data.num_classes elif args.dataset == "cora": from dgl.data import CitationGraphDataset data = CitationGraphDataset('cora', raw_dir=os.path.join( args.data_dir, 'cora')) g = data[0] g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) n_classes = data.num_classes elif args.dataset == "ppi": data = load_ppi_data(args.data_dir) g = data.g n_classes = data.num_classes elif args.dataset == "ppi_large": data = load_ppi_large_data() g = data.g n_classes = data.num_classes elif args.dataset == "yelp": from torch_geometric.datasets import Yelp pyg_data = Yelp(os.path.join(args.data_dir, 'yelp'))[0] feat = pyg_data.x labels = pyg_data.y u, v = pyg_data.edge_index g = dgl.graph((u, v)) g.ndata['feat'] = feat g.ndata['label'] = labels g.ndata['train_mask'] = pyg_data.train_mask g.ndata['val_mask'] = pyg_data.val_mask g.ndata['test_mask'] = pyg_data.test_mask n_classes = labels.size(1) elif args.dataset == "flickr": from torch_geometric.datasets import Flickr pyg_data = Flickr(os.path.join(args.data_dir, "flickr"))[0] feat = pyg_data.x labels = pyg_data.y # labels = torch.argmax(labels, dim=1) u, v = pyg_data.edge_index g = dgl.graph((u, v)) g.ndata['feat'] = feat g.ndata['label'] = labels g.ndata['train_mask'] = pyg_data.train_mask g.ndata['val_mask'] = pyg_data.val_mask g.ndata['test_mask'] = pyg_data.test_mask n_classes = labels.max().item() + 1 train_mask = g.ndata['train_mask'] val_mask = g.ndata['val_mask'] test_mask = g.ndata['test_mask'] train_nid = train_mask.nonzero().squeeze().long() val_nid = val_mask.nonzero().squeeze().long() test_nid = test_mask.nonzero().squeeze().long() g = g.to(device) labels = g.ndata['label'] else: dataset = DglNodePropPredDataset(name=args.dataset, root=args.data_dir) splitted_idx = dataset.get_idx_split() train_nid = splitted_idx["train"] val_nid = splitted_idx["valid"] test_nid = splitted_idx["test"] g, labels = dataset[0] n_classes = dataset.num_classes g = g.to(device) if args.dataset == "ogbn-arxiv": g = dgl.add_reverse_edges(g, copy_ndata=True) g = dgl.add_self_loop(g) g.ndata['feat'] = g.ndata['feat'].float() elif args.dataset == "ogbn-papers100M": g = dgl.add_reverse_edges(g, copy_ndata=True) g.ndata['feat'] = g.ndata['feat'].float() labels = labels.long() elif args.dataset == "ogbn-mag": # MAG is a heterogeneous graph. The task is to make prediction for # paper nodes path = os.path.join(args.emb_path, f"{args.pretrain_model}_mag") labels = labels["paper"] train_nid = train_nid["paper"] val_nid = val_nid["paper"] test_nid = test_nid["paper"] features = g.nodes['paper'].data['feat'] author_emb = torch.load(os.path.join(path, "author.pt"), map_location=torch.device("cpu")).float() topic_emb = torch.load(os.path.join(path, "field_of_study.pt"), map_location=torch.device("cpu")).float() institution_emb = torch.load( os.path.join(path, "institution.pt"), map_location=torch.device("cpu")).float() g.nodes["author"].data["feat"] = author_emb.to(device) g.nodes["institution"].data["feat"] = institution_emb.to(device) g.nodes["field_of_study"].data["feat"] = topic_emb.to(device) g.nodes["paper"].data["feat"] = features.to(device) paper_dim = g.nodes["paper"].data["feat"].shape[1] author_dim = g.nodes["author"].data["feat"].shape[1] if paper_dim != author_dim: paper_feat = g.nodes["paper"].data.pop("feat") rand_weight = torch.Tensor(paper_dim, author_dim).uniform_(-0.5, 0.5) g.nodes["paper"].data["feat"] = torch.matmul( paper_feat, rand_weight.to(device)) print( f"Randomly project paper feature from dimension {paper_dim} to {author_dim}" ) labels = labels.to(device).squeeze() n_classes = int(labels.max() - labels.min()) + 1 else: g.ndata['feat'] = g.ndata['feat'].float() labels = labels.squeeze() evaluator = get_evaluator(args.dataset) print(f"# Nodes: {g.number_of_nodes()}\n" f"# Edges: {g.number_of_edges()}\n" f"# Train: {len(train_nid)}\n" f"# Val: {len(val_nid)}\n" f"# Test: {len(test_nid)}\n" f"# Classes: {n_classes}") return g, labels, n_classes, train_nid, val_nid, test_nid, evaluator
def main(args): torch.manual_seed(args.rnd_seed) np.random.seed(args.rnd_seed) random.seed(args.rnd_seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False multitask_data = set(['ppi']) multitask = args.dataset in multitask_data # load and preprocess dataset data = load_data(args) train_nid = np.nonzero(data.train_mask)[0].astype(np.int64) # Normalize features if args.normalize: train_feats = data.features[train_nid] scaler = sklearn.preprocessing.StandardScaler() scaler.fit(train_feats) features = scaler.transform(data.features) else: features = data.features features = torch.FloatTensor(features) if not multitask: labels = torch.LongTensor(data.labels) else: labels = torch.FloatTensor(data.labels) if hasattr(torch, 'BoolTensor'): train_mask = torch.BoolTensor(data.train_mask) val_mask = torch.BoolTensor(data.val_mask) test_mask = torch.BoolTensor(data.test_mask) else: train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() n_train_samples = train_mask.int().sum().item() n_val_samples = val_mask.int().sum().item() n_test_samples = test_mask.int().sum().item() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, n_train_samples, n_val_samples, n_test_samples)) # create GCN model g = data.graph g = dgl.graph(g) if args.self_loop and not args.dataset.startswith('reddit'): g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) print("adding self-loop edges") # metis only support int64 graph g = g.long() g.ndata['features'] = features g.ndata['labels'] = labels g.ndata['train_mask'] = train_mask cluster_iterator = ClusterIter(args.dataset, g, args.psize, args.batch_size, train_nid, use_pp=args.use_pp) # set device for dataset tensors if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) val_mask = val_mask.cuda() test_mask = test_mask.cuda() g = g.to(args.gpu) print(torch.cuda.get_device_name(0)) print('labels shape:', labels.shape) print("features shape, ", features.shape) model = GraphSAGE(in_feats, args.n_hidden, n_classes, args.n_layers, F.relu, args.dropout, args.use_pp) if cuda: model.cuda() # logger and so on log_dir = save_log_dir(args) writer = SummaryWriter(log_dir) logger = Logger(os.path.join(log_dir, 'loggings')) logger.write(args) # Loss function if multitask: print('Using multi-label loss') loss_f = nn.BCEWithLogitsLoss() else: print('Using multi-class loss') loss_f = nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # set train_nids to cuda tensor if cuda: train_nid = torch.from_numpy(train_nid).cuda() print("current memory after model before training", torch.cuda.memory_allocated(device=train_nid.device) / 1024 / 1024) start_time = time.time() best_f1 = -1 for epoch in range(args.n_epochs): for j, cluster in enumerate(cluster_iterator): # sync with upper level training graph cluster = cluster.to(torch.cuda.current_device()) model.train() # forward pred = model(cluster) batch_labels = cluster.ndata['labels'] batch_train_mask = cluster.ndata['train_mask'] loss = loss_f(pred[batch_train_mask], batch_labels[batch_train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() # in PPI case, `log_every` is chosen to log one time per epoch. # Choose your log freq dynamically when you want more info within one epoch if j % args.log_every == 0: print( f"epoch:{epoch}/{args.n_epochs}, Iteration {j}/" f"{len(cluster_iterator)}:training loss", loss.item()) writer.add_scalar('train/loss', loss.item(), global_step=j + epoch * len(cluster_iterator)) print("current memory:", torch.cuda.memory_allocated(device=pred.device) / 1024 / 1024) # evaluate if epoch % args.val_every == 0: val_f1_mic, val_f1_mac = evaluate(model, g, labels, val_mask, multitask) print("Val F1-mic{:.4f}, Val F1-mac{:.4f}".format( val_f1_mic, val_f1_mac)) if val_f1_mic > best_f1: best_f1 = val_f1_mic print('new best val f1:', best_f1) torch.save(model.state_dict(), os.path.join(log_dir, 'best_model.pkl')) writer.add_scalar('val/f1-mic', val_f1_mic, global_step=epoch) writer.add_scalar('val/f1-mac', val_f1_mac, global_step=epoch) end_time = time.time() print(f'training using time {start_time-end_time}') # test if args.use_val: model.load_state_dict( torch.load(os.path.join(log_dir, 'best_model.pkl'))) test_f1_mic, test_f1_mac = evaluate(model, g, labels, test_mask, multitask) print("Test F1-mic{:.4f}, Test F1-mac{:.4f}".format( test_f1_mic, test_f1_mac)) writer.add_scalar('test/f1-mic', test_f1_mic) writer.add_scalar('test/f1-mac', test_f1_mac)
def main(args): # load and preprocess dataset data = load_data(args) g = data[0] features = g.ndata['feat'] labels = g.ndata['label'] train_mask = g.ndata['train_mask'] val_mask = g.ndata['val_mask'] test_mask = g.ndata['test_mask'] in_feats = features.shape[1] n_classes = data.num_classes n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() print("use cuda:", args.gpu) train_nid = train_mask.nonzero().squeeze() val_nid = val_mask.nonzero().squeeze() test_nid = test_mask.nonzero().squeeze() # graph preprocess and calculate normalization factor g = dgl.remove_self_loop(g) n_edges = g.number_of_edges() if cuda: g = g.int().to(args.gpu) # Initialize distributed environment with SLURM if "SLURM_PROCID" in os.environ.keys(): os.environ["RANK"] = os.environ["SLURM_PROCID"] if "SLURM_NTASKS" in os.environ.keys(): os.environ["WORLD_SIZE"] = os.environ["SLURM_NTASKS"] print(f"hostname: {socket.gethostname()} rank: {os.environ['RANK']}", flush=True) os.environ["MASTER_ADDR"] = args.hostname os.environ["MASTER_PORT"] = "1234" # dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, # world_size=args.world_size, rank=args.rank) # dist.init_process_group(backend='mpi') dist.init_process_group(backend='gloo') rank = dist.get_rank() size = dist.get_world_size() print(f"hostname: {socket.gethostname()} rank: {rank} size: {size}") # if args.dist_url == "env://" and args.world_size == -1: # args.world_size = int(os.environ.get("PMI_SIZE", -1)) # if args.world_size == -1: args.world_size = int(os.environ["WORLD_SIZE"]) # args.distributed = args.world_size > 1 # if args.distributed: # args.rank = int(os.environ.get("PMI_RANK", -1)) # if args.rank == -1: args.rank = int(os.environ["RANK"]) # dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, # world_size=args.world_size, rank=args.rank) # print("Rank: ", args.rank ," World_size: ", args.world_size) # rank = args.rank # size = args.world_size print("Processes: " + str(size)) device = torch.device('cpu') group = dist.new_group(list(range(size))) row_groups, col_groups = get_proc_groups(rank, size, args.replication) # Partition input graph and features edges = g.all_edges() # Convert DGLGraph to COO for partitioning edges = torch.stack([edges[0], edges[1]], dim=0) features_loc, g_loc, ampbyp = oned_partition(rank, size, args.replication, features, edges, args.normalization) # print("------------------->",ampbyp) # Convert COO back to DGLGraph # Uses hardcoded types, doesn't include all the metadata in original g g_loc = dgl.heterograph({ ("_N", "_N", "_E"): (g_loc._indices()[0], g_loc._indices()[1]) }) # for i in range(len(ampbyp)): # ampbyp[i] = dgl.heterograph({("_N", "_N", "_E"): (ampbyp[i]._indices()[0], ampbyp[i]._indices()[1])}) for i in range(len(ampbyp)): ampbyp[i] = ampbyp[i].t().coalesce().to(device) # ampbyp[i] = dgl.heterograph({am_pbyp[i]}) features.requires_grad = True # create GraphSAGE model model = GraphSAGE(in_feats, args.n_hidden, n_classes, args.n_layers, F.relu, args.dropout, args.aggregator_type, rank, size, args.replication, group, row_groups, col_groups) if cuda: model.cuda() print(f"is cuda: {next(model.parameters()).is_cuda}:") # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] for epoch in range(args.n_epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(g, features, ampbyp) rank_c = rank // args.replication var = logits.size(0) rank_train_mask = torch.split(train_mask, logits.size(0), dim=0)[rank_c] rank_val_mask = torch.split(val_mask, logits.size(0), dim=0)[rank_c] label_rank = torch.split(labels, logits.size(0), dim=0)[rank_c] train_nids = rank_train_mask.nonzero().squeeze() val_nids = rank_val_mask.nonzero().squeeze() print("train_mask ==========", train_mask.shape) print("labl =============", labels.dtype) print("rank_train_mask ===========", train_nids.dtype) print("label_rank =============", label_rank.dtype) print("labels ------>", label_rank[rank_train_mask].size() ) #, " ", labels[train_nid].shape) print("logits ------>", logits[rank_train_mask].size()) # if list(label_rank[rank_train_mask].size())[0] > 0: # loss = F.cross_entropy(logits[rank_train_mask], label_rank[rank_train_mask]) # for param in model.parameters(): param.requires_grad = True # for param in model.parameters(): param.requires_grad=True loss = F.cross_entropy(logits[train_nids], label_rank[train_nids]) # loss = Variable(loss, requires_grad = True) # loss.requires_grad = True optimizer.zero_grad() loss.backward() # else: # fake_loss = (logits * torch.FloatTensor(logits.size(), device=device).fill_(0)).sum() # fake_loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) acc = evaluate(model, g, features, labels, val_nids, ampbyp) print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(), acc, n_edges / np.mean(dur) / 1000)) rank_test_mask = torch.split(test_mask, logits.size(0), dim=0)[rank_c] test_nids = rank_test_mask.nonzero().squeeze() print() acc = evaluate(model, g, features, labels, test_nids, ampbyp) print("Test Accuracy {:.4f}".format(acc))
def preprocess_data(dataset, train_ratio): if dataset in ['cora', 'citeseer', 'pubmed']: edge = np.loadtxt('../low_freq/{}.edge'.format(dataset), dtype=int).tolist() feat = np.loadtxt('../low_freq/{}.feature'.format(dataset)) labels = np.loadtxt('../low_freq/{}.label'.format(dataset), dtype=int) train = np.loadtxt('../low_freq/{}.train'.format(dataset), dtype=int) val = np.loadtxt('../low_freq/{}.val'.format(dataset), dtype=int) test = np.loadtxt('../low_freq/{}.test'.format(dataset), dtype=int) nclass = len(set(labels.tolist())) print(dataset, nclass) U = [e[0] for e in edge] V = [e[1] for e in edge] g = dgl.graph((U, V)) g = dgl.to_simple(g) g = dgl.remove_self_loop(g) g = dgl.to_bidirected(g) feat = normalize_features(feat) feat = torch.FloatTensor(feat) labels = torch.LongTensor(labels) train = torch.LongTensor(train) val = torch.LongTensor(val) test = torch.LongTensor(test) return g, nclass, feat, labels, train, val, test elif 'syn' in dataset: edge = np.loadtxt('../syn/{}.edge'.format(dataset), dtype=int).tolist() labels = np.loadtxt('../syn/{}.lab'.format(dataset), dtype=int) features = np.loadtxt('../syn/{}.feat'.format(dataset), dtype=float) n = labels.shape[0] idx = [i for i in range(n)] random.shuffle(idx) idx_train = np.array(idx[:100]) idx_test = np.array(idx[100:]) U = [e[0] for e in edge] V = [e[1] for e in edge] g = dgl.graph((U, V)) c1 = 0 c2 = 0 lab = labels.tolist() for e in edge: if lab[e[0]] == lab[e[1]]: c1 += 1 else: c2 += 1 print(c1 / len(edge), c2 / len(edge)) #normalization will make features degenerated #features = normalize_features(features) features = torch.FloatTensor(features) nclass = 2 labels = torch.LongTensor(labels) train = torch.LongTensor(idx_train) test = torch.LongTensor(idx_test) print(dataset, nclass) return g, nclass, features, labels, train, train, test elif dataset in ['film']: graph_adjacency_list_file_path = '../high_freq/{}/out1_graph_edges.txt'.format( dataset) graph_node_features_and_labels_file_path = '../high_freq/{}/out1_node_feature_label.txt'.format( dataset) G = nx.DiGraph() graph_node_features_dict = {} graph_labels_dict = {} if dataset == 'film': with open(graph_node_features_and_labels_file_path ) as graph_node_features_and_labels_file: graph_node_features_and_labels_file.readline() for line in graph_node_features_and_labels_file: line = line.rstrip().split('\t') assert (len(line) == 3) assert (int(line[0]) not in graph_node_features_dict and int(line[0]) not in graph_labels_dict) feature_blank = np.zeros(932, dtype=np.uint16) feature_blank[np.array(line[1].split(','), dtype=np.uint16)] = 1 graph_node_features_dict[int(line[0])] = feature_blank graph_labels_dict[int(line[0])] = int(line[2]) else: with open(graph_node_features_and_labels_file_path ) as graph_node_features_and_labels_file: graph_node_features_and_labels_file.readline() for line in graph_node_features_and_labels_file: line = line.rstrip().split('\t') assert (len(line) == 3) assert (int(line[0]) not in graph_node_features_dict and int(line[0]) not in graph_labels_dict) graph_node_features_dict[int(line[0])] = np.array( line[1].split(','), dtype=np.uint8) graph_labels_dict[int(line[0])] = int(line[2]) with open(graph_adjacency_list_file_path) as graph_adjacency_list_file: graph_adjacency_list_file.readline() for line in graph_adjacency_list_file: line = line.rstrip().split('\t') assert (len(line) == 2) if int(line[0]) not in G: G.add_node(int(line[0]), features=graph_node_features_dict[int(line[0])], label=graph_labels_dict[int(line[0])]) if int(line[1]) not in G: G.add_node(int(line[1]), features=graph_node_features_dict[int(line[1])], label=graph_labels_dict[int(line[1])]) G.add_edge(int(line[0]), int(line[1])) adj = nx.adjacency_matrix(G, sorted(G.nodes())) row, col = np.where(adj.todense() > 0) U = row.tolist() V = col.tolist() g = dgl.graph((U, V)) g = dgl.to_simple(g) g = dgl.to_bidirected(g) g = dgl.remove_self_loop(g) features = np.array([ features for _, features in sorted(G.nodes(data='features'), key=lambda x: x[0]) ], dtype=float) labels = np.array([ label for _, label in sorted(G.nodes(data='label'), key=lambda x: x[0]) ], dtype=int) n = labels.shape[0] idx = [i for i in range(n)] #random.shuffle(idx) r0 = int(n * train_ratio) r1 = int(n * 0.6) r2 = int(n * 0.8) idx_train = np.array(idx[:r0]) idx_val = np.array(idx[r1:r2]) idx_test = np.array(idx[r2:]) features = normalize_features(features) features = torch.FloatTensor(features) nclass = 5 labels = torch.LongTensor(labels) train = torch.LongTensor(idx_train) val = torch.LongTensor(idx_val) test = torch.LongTensor(idx_test) print(dataset, nclass) return g, nclass, features, labels, train, val, test # datasets in Geom-GCN elif dataset in ['cornell', 'texas', 'wisconsin', 'chameleon', 'squirrel']: graph_adjacency_list_file_path = '../high_freq/{}/out1_graph_edges.txt'.format( dataset) graph_node_features_and_labels_file_path = '../high_freq/{}/out1_node_feature_label.txt'.format( dataset) G = nx.DiGraph() graph_node_features_dict = {} graph_labels_dict = {} with open(graph_node_features_and_labels_file_path ) as graph_node_features_and_labels_file: graph_node_features_and_labels_file.readline() for line in graph_node_features_and_labels_file: line = line.rstrip().split('\t') assert (len(line) == 3) assert (int(line[0]) not in graph_node_features_dict and int(line[0]) not in graph_labels_dict) graph_node_features_dict[int(line[0])] = np.array( line[1].split(','), dtype=np.uint8) graph_labels_dict[int(line[0])] = int(line[2]) with open(graph_adjacency_list_file_path) as graph_adjacency_list_file: graph_adjacency_list_file.readline() for line in graph_adjacency_list_file: line = line.rstrip().split('\t') assert (len(line) == 2) if int(line[0]) not in G: G.add_node(int(line[0]), features=graph_node_features_dict[int(line[0])], label=graph_labels_dict[int(line[0])]) if int(line[1]) not in G: G.add_node(int(line[1]), features=graph_node_features_dict[int(line[1])], label=graph_labels_dict[int(line[1])]) G.add_edge(int(line[0]), int(line[1])) adj = nx.adjacency_matrix(G, sorted(G.nodes())) features = np.array([ features for _, features in sorted(G.nodes(data='features'), key=lambda x: x[0]) ]) labels = np.array([ label for _, label in sorted(G.nodes(data='label'), key=lambda x: x[0]) ]) features = normalize_features(features) g = DGLGraph(adj) g = dgl.to_simple(g) g = dgl.to_bidirected(g) g = dgl.remove_self_loop(g) n = len(labels.tolist()) idx = [i for i in range(n)] #random.shuffle(idx) r0 = int(n * train_ratio) r1 = int(n * 0.6) r2 = int(n * 0.8) train = np.array(idx[:r0]) val = np.array(idx[r1:r2]) test = np.array(idx[r2:]) nclass = len(set(labels.tolist())) features = torch.FloatTensor(features) labels = torch.LongTensor(labels) train = torch.LongTensor(train) val = torch.LongTensor(val) test = torch.LongTensor(test) print(dataset, nclass) return g, nclass, features, labels, train, val, test # datasets in FAGCN elif dataset in ['new_chameleon', 'new_squirrel']: edge = np.loadtxt('../high_freq/{}/edges.txt'.format(dataset), dtype=int) labels = np.loadtxt('../high_freq/{}/labels.txt'.format(dataset), dtype=int).tolist() features = np.loadtxt('../high_freq/{}/features.txt'.format(dataset), dtype=float) U = [e[0] for e in edge] V = [e[1] for e in edge] g = dgl.graph((U, V)) g = dgl.to_simple(g) g = dgl.to_bidirected(g) g = dgl.remove_self_loop(g) n = len(labels) idx = [i for i in range(n)] #random.shuffle(idx) r0 = int(n * train_ratio) r1 = int(n * 0.6) r2 = int(n * 0.8) train = np.array(idx[:r0]) val = np.array(idx[r1:r2]) test = np.array(idx[r2:]) features = normalize_features(features) features = torch.FloatTensor(features) nclass = 3 labels = torch.LongTensor(labels) train = torch.LongTensor(train) val = torch.LongTensor(val) test = torch.LongTensor(test) print(dataset, nclass) return g, nclass, features, labels, train, val, test
def main(args): # load and preprocess dataset if args.dataset == 'cora': data = CoraGraphDataset() elif args.dataset == 'citeseer': data = CiteseerGraphDataset() elif args.dataset == 'pubmed': data = PubmedGraphDataset() else: raise ValueError('Unknown dataset: {}'.format(args.dataset)) g = data[0] if args.gpu < 0: device = "/cpu:0" else: device = "/gpu:{}".format(args.gpu) g = g.to(device) with tf.device(device): features = g.ndata['feat'] labels = g.ndata['label'] train_mask = g.ndata['train_mask'] val_mask = g.ndata['val_mask'] test_mask = g.ndata['test_mask'] num_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.numpy().sum(), val_mask.numpy().sum(), test_mask.numpy().sum())) g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) n_edges = g.number_of_edges() # create model heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] model = GAT(g, args.num_layers, num_feats, args.num_hidden, n_classes, heads, tf.nn.elu, args.in_drop, args.attn_drop, args.negative_slope, args.residual) print(model) if args.early_stop: stopper = EarlyStopping(patience=100) # loss_fcn = tf.keras.losses.SparseCategoricalCrossentropy( # from_logits=False) loss_fcn = tf.nn.sparse_softmax_cross_entropy_with_logits # use optimizer optimizer = tf.keras.optimizers.Adam(learning_rate=args.lr, epsilon=1e-8) # initialize graph dur = [] for epoch in range(args.epochs): if epoch >= 3: t0 = time.time() # forward with tf.GradientTape() as tape: tape.watch(model.trainable_weights) logits = model(features, training=True) loss_value = tf.reduce_mean( loss_fcn(labels=labels[train_mask], logits=logits[train_mask])) # Manually Weight Decay # We found Tensorflow has a different implementation on weight decay # of Adam(W) optimizer with PyTorch. And this results in worse results. # Manually adding weights to the loss to do weight decay solves this problem. for weight in model.trainable_weights: loss_value = loss_value + \ args.weight_decay*tf.nn.l2_loss(weight) grads = tape.gradient(loss_value, model.trainable_weights) optimizer.apply_gradients(zip(grads, model.trainable_weights)) if epoch >= 3: dur.append(time.time() - t0) train_acc = accuracy(logits[train_mask], labels[train_mask]) if args.fastmode: val_acc = accuracy(logits[val_mask], labels[val_mask]) else: val_acc = evaluate(model, features, labels, val_mask) if args.early_stop: if stopper.step(val_acc, model): break print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |" " ValAcc {:.4f} | ETputs(KTEPS) {:.2f}".format( epoch, np.mean(dur), loss_value.numpy().item(), train_acc, val_acc, n_edges / np.mean(dur) / 1000)) print() if args.early_stop: model.load_weights('es_checkpoint.pb') acc = evaluate(model, features, labels, test_mask) print("Test Accuracy {:.4f}".format(acc))