def train(dataset): model = models.GNNStack(dataset.num_node_features, hidden_dim, num_classes, model_type, num_layers) filter_fn = filter(lambda p: p.requires_grad, model.parameters()) opt = optim.Adam(filter_fn, lr=learning_rate, weight_decay=weight_decay) writer = SummaryWriter() for epoch in range(epochs): model.train() for batch in dataset: opt.zero_grad() prob = model(batch) pred = prob.argmax(axis=1) # label = batch.y label = np.zeros(len(batch.y)) label[batch.attacked_nodes] = 1 label = torch.tensor(label, dtype=torch.long) # print((pred==label).double().mean()) loss = model.loss(prob, label) print(loss.item()) loss.backward() opt.step() true_pos = (pred[batch.attacked_nodes] == 1).sum().double() precision = true_pos / pred.sum() recall = true_pos / label.sum() print(2 * precision * recall / (precision + recall))
def train(dataset, args): # use mask to split train/validation/test test_loader = loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True) # build model model = models.GNNStack(dataset.num_node_features, args.hidden_dim, dataset.num_classes, args) scheduler, opt = utils.build_optimizer(args, model.parameters()) # train for epoch in range(args.epochs): total_loss = 0 model.train() for batch in loader: opt.zero_grad() pred = model(batch) label = batch.y pred = pred[batch.train_mask] label = label[batch.train_mask] loss = model.loss(pred, label) loss.backward() opt.step() total_loss += loss.item() * batch.num_graphs total_loss /= len(loader.dataset) print(total_loss) if epoch % 10 == 0: test_acc = test(loader, model) print(test_acc, ' test')
def train(dataset, task, args): if task == 'graph': # graph classification: separate dataloader for test set data_size = len(dataset) loader = DataLoader(dataset[:int(data_size * 0.8)], batch_size=args.batch_size, shuffle=True) test_loader = DataLoader(dataset[int(data_size * 0.8):], batch_size=args.batch_size, shuffle=True) elif task == 'node': # use mask to split train/validation/test test_loader = loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True) else: raise RuntimeError('Unknown task') # build model model = models.GNNStack(dataset.num_node_features, args.hidden_dim, dataset.num_classes, args, task=task) print(model) scheduler, opt = utils.build_optimizer(args, model.parameters()) # train best_val_acc = 0 test_acc = 0 for epoch in range(args.epochs): total_loss = 0 model.train() for batch in loader: opt.zero_grad() pred = model(batch) label = batch.y if task == 'node': pred = pred[batch.train_mask] label = label[batch.train_mask] loss = model.loss(pred, label) loss.backward() opt.step() total_loss += loss.item() * batch.num_graphs total_loss /= len(loader.dataset) print("Loss in Epoch {0}: {1}".format(epoch, total_loss)) if epoch % 10 == 0: val_acc, tmp_test_acc = test(loader, model, is_validation=True), test( loader, model) if val_acc > best_val_acc: best_val_acc = val_acc test_acc = tmp_test_acc print("Current Best Val Acc {0}, with Test Acc {1}".format( best_val_acc, test_acc)) print('Final Val Acc {0}, Test Acc {1}'.format(val_acc, test_acc))
def train(dataset, task, args): # use mask to split train/validation/test test_loader = loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True) # build model if args.model_type != 'APPNP': model = models.GNNStack(dataset.num_node_features, args.hidden_dim, dataset.num_classes, args, task=task) else: alpha = 0.1 # Change here if you need to change alpha niter = 10 # Change here if you need to change niterations of Pagerank appnp_prop = models.PPRPowerIteration(dataset.data.edge_index, alpha, niter, args.dropout) model = models.APPNP(dataset.num_node_features, args.hidden_dim, dataset.num_classes, appnp_prop, args, task=task) scheduler, opt = utils.build_optimizer(args, model.parameters()) accuracy = [] # train for epoch in range(args.epochs): total_loss = 0 model.train() for batch in loader: opt.zero_grad() pred = model(batch) label = batch.y pred = pred[batch.train_mask] label = label[batch.train_mask] loss = model.loss(pred, label) loss.backward() opt.step() total_loss += loss.item() * batch.num_graphs total_loss /= len(loader.dataset) print('Epoch: ', epoch, 'Training loss: ', total_loss) if epoch % 100 == 0: test_acc = test(loader, model) print('Test acc: ', test_acc) accuracy.append([epoch, test_acc]) test_acc = test(loader, model) accuracy.append([args.epochs, test_acc]) plot_accuracy(np.array(accuracy), args) print('Final test acc: ', test_acc)
def train(dataset, task, args): if task == 'graph': # graph classification: separate dataloader for test set data_size = len(dataset) loader = DataLoader(dataset[:int(data_size * 0.8)], batch_size=args.batch_size, shuffle=True) test_loader = DataLoader(dataset[int(data_size * 0.8):], batch_size=args.batch_size, shuffle=True) elif task == 'node': # use mask to split train/validation/test test_loader = loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True) else: raise RuntimeError('Unknown task') # build model model = models.GNNStack(dataset.num_node_features, args.hidden_dim, dataset.num_classes, args, task=task) scheduler, opt = utils.build_optimizer(args, model.parameters()) loss_t = [] acc = [] # train for epoch in range(args.epochs): total_loss = 0 model.train() for batch in loader: opt.zero_grad() pred = model(batch) label = batch.y if task == 'node': pred = pred[batch.train_mask] label = label[batch.train_mask] loss = model.loss(pred, label) loss.backward() opt.step() total_loss += loss.item() * batch.num_graphs total_loss /= len(loader.dataset) loss_t.append(total_loss) print(total_loss) if epoch % 10 == 0: test_acc = test(loader, model) acc.append(test_acc) print(test_acc, ' test') print(loss_t) print(acc)
def train(dataset, task, args): f1 = open(task + "_" + args.model_type+'.txt','w') if task == 'graph': # graph classification: separate dataloader for test set data_size = len(dataset) print("==> There are", data_size, "graphs in the dataset.") loader = DataLoader( dataset[:int(data_size * 0.8)], batch_size=args.batch_size, shuffle=True) test_loader = DataLoader( dataset[int(data_size * 0.8):], batch_size=args.batch_size, shuffle=True) elif task == 'node': print("==> There are", dataset.data.edge_index.shape[1], "edges, and", dataset.data.y.shape[0], "nodes in the dataset.") # use mask to split train/validation/test test_loader = loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True) else: raise RuntimeError('Unknown task') # build model model = models.GNNStack(dataset.num_node_features, args.hidden_dim, dataset.num_classes, args, task=task) scheduler, opt = utils.build_optimizer(args, model.parameters()) # train for epoch in range(args.epochs): total_loss = 0 model.train() for batch in loader: opt.zero_grad() pred = model(batch) label = batch.y if task == 'node': pred = pred[batch.train_mask] label = label[batch.train_mask] loss = model.loss(pred, label) loss.backward() opt.step() total_loss += loss.item() * batch.num_graphs total_loss /= len(loader.dataset) #print(total_loss) if epoch % 10 == 0: test_acc = test(loader, model) print("Epoch {}. Loss: {:.4f}. Test accuracy: {:.4f}".format( epoch, total_loss, test_acc)) f1.write("{} {:.4f} {:.4f}\n".format( epoch, total_loss, test_acc)) f1.close()
def train(dataset, args): # For reproducibility torch.manual_seed(1) np.random.seed(1) random.seed(1) logger = Logger(model=args.model_type) # build model num_feats = NUM_FEATURES if not args.use_refex else NUM_FEATURES + NUM_ROLX_FEATURES model = models.GNNStack( num_feats, args.hidden_dim, 3, # dataset.num_classes args, torch.tensor([1, 0, 15], device=dev).float() # weights for each class ) if torch.cuda.is_available(): model = model.cuda(dev) scheduler, opt = build_optimizer(args, model.parameters()) skf, x, y = get_stratified_batches() # train for epoch in range(args.epochs): total_loss = 0 accs, f1s, aucs, recalls = [], [], [], [] model.train() # No need to loop over batches since we only have one batch num_splits = 0 for train_indices, test_indices in skf.split(x, y): train_indices, test_indices = x[train_indices], x[test_indices] batch = dataset opt.zero_grad() pred = model(batch) label = batch.y pred = pred[train_indices] label = label[train_indices] loss = model.loss(pred, label) loss.backward() opt.step() total_loss += loss.item() num_splits += 1 acc_score, f1, auc_score, recall = test(dataset, model, test_indices) accs.append(acc_score) f1s.append(f1) aucs.append(auc_score) recalls.append(recall) total_loss /= num_splits accs = np.array(accs) f1s = np.array(f1s) aucs = np.array(aucs) recalls = np.array(recalls) log_metrics = { 'total_loss': total_loss, 'acc': accs, 'f1': f1s, 'auc': aucs, 'recall': recalls } logger.log(log_metrics, epoch) if epoch % 5 == 0: logger.display_status(epoch, args.epochs, total_loss, accs, f1s, aucs, recalls) logger.close()
def train(dataset, task, args): global device if task == 'graph': # graph classification: separate dataloader for test set # shuffle dataset before splitting data_size = len(dataset) idxs = np.arange(data_size).astype(int) np.random.shuffle(idxs) idxs = list(idxs) dataset = dataset[idxs] loader = DataLoader(dataset[:int(data_size * 0.8)], batch_size=args.batch_size, shuffle=True) test_loader = DataLoader(dataset[int(data_size * 0.8):], batch_size=args.batch_size, shuffle=True) elif task == 'node': # use mask to split train/validation/test test_loader = loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True) else: raise RuntimeError('Unknown task') # build model model = models.GNNStack(dataset.num_node_features, args.hidden_dim, dataset.num_classes, args, task=task) model = model.to(device) print(model) scheduler, opt = utils.build_optimizer(args, model.parameters()) # train test_accs = [] best_acc = 0 timestr = time.strftime("%Y%m%d-%H%M%S") for epoch in range(args.epochs): total_loss = 0 model.train() for batch in loader: batch = batch.to(device) opt.zero_grad() pred = model(batch) label = batch.y if task == 'node': pred = pred[batch.train_mask] label = label[batch.train_mask] loss = model.loss(pred, label) loss.backward() opt.step() total_loss += loss.item() * batch.num_graphs total_loss /= len(loader.dataset) print(total_loss) if epoch % 10 == 0: if task == 'graph': test_acc = test(test_loader, model) else: test_acc = test(loader, model, is_validation=True) test_accs.append(test_acc) print(test_acc, ' test') # save best model if test_acc > best_acc: best_acc = test_acc torch.save(model.state_dict(), str(args.model_type) + timestr + '.pt') # plot accuracies x = range(0, epoch + 1, 10) plt.plot(x, test_accs) plt.savefig(str(args.model_type) + timestr + '.png') print(f'best achieved accuracy: {best_acc}') if model.task == 'node': best_model = models.GNNStack(dataset.num_node_features, args.hidden_dim, dataset.num_classes, args, task=task) best_model.load_state_dict( torch.load(str(args.model_type) + timestr + '.pt')) best_model = best_model.to(device) test_acc = test(loader, best_model, is_validation=False) print(f'test accuracy: {test_acc}')
def train(dataset, task, args): test_epoch, test_acc_per_epoch = [], [] if task == 'graph': # graph classification: separate dataloader for test set data_size = len(dataset) dataset.shuffle() loader = DataLoader(dataset[:int(data_size * 0.8)], batch_size=args.batch_size, shuffle=True) test_loader = DataLoader(dataset[int(data_size * 0.8):], batch_size=args.batch_size, shuffle=True) elif task == 'node': # use mask to split train/validation/test test_loader = loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True) else: raise RuntimeError('Unknown task') # build model model = models.GNNStack(dataset.num_node_features, args.hidden_dim, dataset.num_classes, args, task=task) scheduler, opt = utils.build_optimizer(args, model.parameters()) # train for epoch in range(args.epochs): total_loss = 0 total_acc = 0 model.train() for batch in loader: opt.zero_grad() pred = model(batch) label = batch.y if task == 'node': pred = pred[batch.train_mask] label = label[batch.train_mask] loss = model.loss(pred, label) loss.backward() opt.step() total_loss += loss.item() * batch.num_graphs total_acc += pred.max(dim=1)[1].eq(label).float().sum().item() total_loss /= len(loader.dataset) total_acc /= len(loader.dataset) # print(total_loss) if epoch % 1 == 0: test_acc = test(loader, model) print( f'epoch {epoch}: train loss - {total_loss:.4f}, train acc - {total_acc:.2%}, test acc - {test_acc:.2%}' ) test_epoch.append(epoch) test_acc_per_epoch.append(test_acc) f, ax = plt.subplots(1, 1) ax.plot(np.array(test_epoch), np.array(test_acc_per_epoch)) ax.set_title(f'{dataset.name} - {args.model_type}') ax.set_xlabel('epochs') ax.set_ylabel('accuracy') f.savefig(f'{dataset.name}_{args.model_type}.png', bbox_inches='tight', dpi=400)
def train(dataset, task, args): if task == 'graph': # graph classification: separate dataloader for test set data_size = len(dataset) loader = DataLoader(dataset[:int(data_size * 0.8)], batch_size=args.batch_size, shuffle=True) test_loader = DataLoader(dataset[int(data_size * 0.8):], batch_size=args.batch_size, shuffle=True) elif task == 'node': # use mask to split train/validation/test test_loader = loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True) else: raise RuntimeError('Unknown task') # build model model = models.GNNStack(dataset.num_node_features, args.hidden_dim, dataset.num_classes, args, task=task) model.to(device) print(model) scheduler, opt = utils.build_optimizer(args, model.parameters()) # train vals = [] tests = [] best_val_acc = 0 test_acc = 0 early_stop = 1e9 stop_cnt = 0 for epoch in range(1, args.epochs + 1): total_loss = 0 model.train() for batch in loader: batch.to(device) opt.zero_grad() pred = model(batch) label = batch.y if task == 'node': pred = pred[batch.train_mask] label = label[batch.train_mask] loss = model.loss(pred, label) loss.backward() opt.step() total_loss += loss.item() * batch.num_graphs total_loss /= len(loader.dataset) val_acc, tmp_test_acc = test(loader, model, is_validation=True), test(loader, model) vals.append(val_acc) tests.append(tmp_test_acc) if val_acc > best_val_acc: best_val_acc = val_acc test_acc = tmp_test_acc stop_cnt = 0 else: stop_cnt += 1 print("Loss in Epoch {:03d}: {:.4f}. ".format(epoch, total_loss), end="") print("Current Best Val Acc {:.4f}, with Test Acc {:.4f}".format( best_val_acc, test_acc)) if stop_cnt >= early_stop: break print('Final Val Acc {0}, Test Acc {1}'.format(best_val_acc, test_acc)) return list(range(1, args.epochs + 1)), vals