def train_node_classifier(G, labels, model, args, writer=None): # train/test split only for nodes num_nodes = G.number_of_nodes() num_train = int(num_nodes * args.train_ratio) idx = [i for i in range(num_nodes)] np.random.shuffle(idx) train_idx = idx[:num_train] test_idx = idx[num_train:] data = gengraph.preprocess_input_graph(G, labels) labels_train = torch.tensor(data["labels"][:, train_idx], dtype=torch.long) adj = torch.tensor(data["adj"], dtype=torch.float) x = torch.tensor(data["feat"], requires_grad=True, dtype=torch.float) scheduler, optimizer = train_utils.build_optimizer( args, model.parameters(), weight_decay=args.weight_decay) model.train() ypred = None for epoch in range(args.num_epochs): begin_time = time.time() model.zero_grad() if args.gpu: ypred, adj_att = model(x.cuda(), adj.cuda()) else: ypred, adj_att = model(x, adj) ypred_train = ypred[:, train_idx, :] if args.gpu: loss = model.loss(ypred_train, labels_train.cuda()) else: loss = model.loss(ypred_train, labels_train) loss.backward() nn.utils.clip_grad_norm(model.parameters(), args.clip) optimizer.step() #for param_group in optimizer.param_groups: # print(param_group["lr"]) elapsed = time.time() - begin_time result_train, result_test = evaluate_node(ypred.cpu(), data["labels"], train_idx, test_idx) if writer is not None: writer.add_scalar("loss/avg_loss", loss, epoch) writer.add_scalars( "prec", { "train": result_train["prec"], "test": result_test["prec"] }, epoch, ) writer.add_scalars( "recall", { "train": result_train["recall"], "test": result_test["recall"] }, epoch, ) writer.add_scalars("acc", { "train": result_train["acc"], "test": result_test["acc"] }, epoch) if epoch % 10 == 0: print( "epoch: ", epoch, "; loss: ", loss.item(), "; train_acc: ", result_train["acc"], "; test_acc: ", result_test["acc"], "; train_prec: ", result_train["prec"], "; test_prec: ", result_test["prec"], "; epoch time: ", "{0:0.2f}".format(elapsed), ) if scheduler is not None: scheduler.step() print(result_train["conf_mat"]) print(result_test["conf_mat"]) # computation graph model.eval() if args.gpu: ypred, _ = model(x.cuda(), adj.cuda()) else: ypred, _ = model(x, adj) cg_data = { "adj": data["adj"], "feat": data["feat"], "label": data["labels"], "pred": ypred.cpu().detach().numpy(), "train_idx": train_idx, } # import pdb # pdb.set_trace() io_utils.save_checkpoint(model, optimizer, args, num_epochs=-1, cg_dict=cg_data)
def train_node_classifier(G, labels, model, args, writer=None): # train/test split only for nodes num_nodes = G.number_of_nodes() # Training data with 80% ratio, labels_train.size() num_train = int(num_nodes * args.train_ratio) idx = [i for i in range(num_nodes)] # Shuffle for training np.random.shuffle(idx) train_idx = idx[:num_train] test_idx = idx[num_train:] data = gengraph.preprocess_input_graph(G, labels) labels_train = torch.tensor(data["labels"][:, train_idx], dtype=torch.long) adj = torch.tensor(data["adj"], dtype=torch.float) x = torch.tensor(data["feat"], requires_grad=True, dtype=torch.float) # scheduler, optimizer = train_utils.build_optimizer( # args, model.parameters(), weight_decay=args.weight_decay # ) # list(testModel.parameters()) and list(filter_fn) to show contents # train_utils.build_optimizer filter_fn = filter(lambda p: p.requires_grad, model.parameters()) # args.opt == 'adam': optimizer = optim.Adam(filter_fn, lr=args.lr, weight_decay=0.0) scheduler = None # Sets the module in training mode model.train() ypred = None for epoch in range(args.num_epochs): begin_time = time.time() model.zero_grad() if args.gpu: ypred, adj_att = model(x.cuda(), adj.cuda()) else: ypred, adj_att = model(x, adj) ypred_train = ypred[:, train_idx, :] if args.gpu: loss = model.loss(ypred_train, labels_train.cuda()) else: loss = model.loss(ypred_train, labels_train) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.clip) optimizer.step() #for param_group in optimizer.param_groups: # print(param_group["lr"]) elapsed = time.time() - begin_time # Obtain with Confusion matrices for Train and Test results result_train, result_test = evaluate_node(ypred.cpu(), data["labels"], train_idx, test_idx) if writer is not None: writer.add_scalar("loss/avg_loss", loss, epoch) writer.add_scalars( "prec", { "train": result_train["prec"], "test": result_test["prec"] }, epoch, ) writer.add_scalars( "recall", { "train": result_train["recall"], "test": result_test["recall"] }, epoch, ) writer.add_scalars("acc", { "train": result_train["acc"], "test": result_test["acc"] }, epoch) if epoch % 10 == 0: print( "epoch: ", epoch, "; loss: ", loss.item(), "; train_acc: ", result_train["acc"], "; test_acc: ", result_test["acc"], "; train_prec: ", result_train["prec"], "; test_prec: ", result_test["prec"], "; epoch time: ", "{0:0.2f}".format(elapsed), ) if scheduler is not None: scheduler.step() print("Confusion Matrix of train result :\n", result_train["conf_mat"]) print("Confusion Matrix of test result :\n", result_test["conf_mat"]) # Sets the module in evaluation mode for computational graph model.eval() if args.gpu: ypred, _ = model(x.cuda(), adj.cuda()) else: ypred, _ = model(x, adj) cg_data = { "adj": data["adj"], "feat": data["feat"], "label": data["labels"], "pred": ypred.cpu().detach().numpy(), "train_idx": train_idx, } print("Labels of the Computational graph :\n", cg_data['label']) print("Prediction result of the Computational graph :\n", cg_data['pred']) print("Train index of the Computational graph data :\n", cg_data['train_idx']) # import pdb # pdb.set_trace() io_utils.save_checkpoint(model, optimizer, args, num_epochs=-1, cg_dict=cg_data)
def train_node_classifier_multigraph(G_list, labels, model, args, writer=None): train_idx_all, test_idx_all = [], [] # train/test split only for nodes num_nodes = G_list[0].number_of_nodes() num_train = int(num_nodes * args.train_ratio) idx = [i for i in range(num_nodes)] np.random.shuffle(idx) train_idx = idx[:num_train] train_idx_all.append(train_idx) test_idx = idx[num_train:] test_idx_all.append(test_idx) data = gengraph.preprocess_input_graph(G_list[0], labels[0]) all_labels = data["labels"] labels_train = torch.tensor(data["labels"][:, train_idx], dtype=torch.long) adj = torch.tensor(data["adj"], dtype=torch.float) x = torch.tensor(data["feat"], requires_grad=True, dtype=torch.float) for i in range(1, len(G_list)): np.random.shuffle(idx) train_idx = idx[:num_train] train_idx_all.append(train_idx) test_idx = idx[num_train:] test_idx_all.append(test_idx) data = gengraph.preprocess_input_graph(G_list[i], labels[i]) all_labels = np.concatenate((all_labels, data["labels"]), axis=0) labels_train = torch.cat( [ labels_train, torch.tensor(data["labels"][:, train_idx], dtype=torch.long), ], dim=0, ) adj = torch.cat([adj, torch.tensor(data["adj"], dtype=torch.float)]) x = torch.cat([ x, torch.tensor(data["feat"], requires_grad=True, dtype=torch.float) ]) scheduler, optimizer = train_utils.build_optimizer( args, model.parameters(), weight_decay=args.weight_decay) model.train() ypred = None for epoch in range(args.num_epochs): begin_time = time.time() model.zero_grad() if args.gpu: ypred = model(x.cuda(), adj.cuda()) else: ypred = model(x, adj) # normal indexing ypred_train = ypred[:, train_idx, :] # in multigraph setting we can't directly access all dimensions so we need to gather all the training instances all_train_idx = [item for sublist in train_idx_all for item in sublist] ypred_train_cmp = torch.cat( [ypred[i, train_idx_all[i], :] for i in range(10)], dim=0).reshape(10, 146, 6) if args.gpu: loss = model.loss(ypred_train_cmp, labels_train.cuda()) else: loss = model.loss(ypred_train_cmp, labels_train) loss.backward() nn.utils.clip_grad_norm(model.parameters(), args.clip) optimizer.step() #for param_group in optimizer.param_groups: # print(param_group["lr"]) elapsed = time.time() - begin_time result_train, result_test = evaluate_node(ypred.cpu(), all_labels, train_idx_all, test_idx_all) if writer is not None: writer.add_scalar("loss/avg_loss", loss, epoch) writer.add_scalars( "prec", { "train": result_train["prec"], "test": result_test["prec"] }, epoch, ) writer.add_scalars( "recall", { "train": result_train["recall"], "test": result_test["recall"] }, epoch, ) writer.add_scalars("acc", { "train": result_train["acc"], "test": result_test["acc"] }, epoch) print( "epoch: ", epoch, "; loss: ", loss.item(), "; train_acc: ", result_train["acc"], "; test_acc: ", result_test["acc"], "; epoch time: ", "{0:0.2f}".format(elapsed), ) if scheduler is not None: scheduler.step() print(result_train["conf_mat"]) print(result_test["conf_mat"]) # computation graph model.eval() if args.gpu: ypred = model(x.cuda(), adj.cuda()) else: ypred = model(x, adj) cg_data = { "adj": adj.cpu().detach().numpy(), "feat": x.cpu().detach().numpy(), "label": all_labels, "pred": ypred.cpu().detach().numpy(), "train_idx": train_idx_all, } io_utils.save_checkpoint(model, optimizer, args, num_epochs=-1, cg_dict=cg_data)
def syn_task1(args, writer=None): print('Generating graph.') feature_generator = featgen.ConstFeatureGen( np.ones(args.input_dim, dtype=float)) if args.dataset == 'syn1': gen_fn = gengraph.gen_syn1 elif args.dataset == 'syn2': gen_fn = gengraph.gen_syn2 feature_generator = None elif args.dataset == 'syn3': gen_fn = gengraph.gen_syn3 elif args.dataset == 'syn4': gen_fn = gengraph.gen_syn4 elif args.dataset == 'syn5': gen_fn = gengraph.gen_syn5 G, labels, name = gen_fn(feature_generator=feature_generator) pyg_G = NxDataset([G], device=torch.device('gpu' if args.gpu else 'cpu'))[0] num_classes = max(labels) + 1 labels = torch.LongTensor(labels) print('Done generating graph.') model = GCNNet(args.input_dim, args.hidden_dim, args.output_dim, num_classes, args.num_gc_layers, args=args) if args.gpu: model = model.cuda() train_ratio = args.train_ratio num_train = int(train_ratio * G.number_of_nodes()) num_test = G.number_of_nodes() - num_train idx = [i for i in range(G.number_of_nodes())] np.random.shuffle(idx) train_mask = idx[:num_train] test_mask = idx[num_train:] loader = torch_geometric.data.DataLoader([pyg_G], batch_size=1) opt = torch.optim.Adam(model.parameters(), lr=args.lr) scheduler, opt = train_utils.build_optimizer( args, model.parameters(), weight_decay=args.weight_decay) for epoch in range(args.num_epochs): model.train() total_loss = 0 for batch in loader: opt.zero_grad() pred = model(batch) pred = pred[train_mask] label = labels[train_mask] loss = model.loss(pred, label) loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) opt.step() total_loss += loss.item() * 1 writer.add_scalar("loss", total_loss, epoch) if epoch % 10 == 0: test_acc = test(loader, model, args, labels, test_mask) print("{} {:.4f} {:.4f}".format(epoch, total_loss, test_acc)) writer.add_scalar("test", test_acc, epoch) print("{} {:.4f} {:.4f}".format(epoch, total_loss, test_acc)) data = gengraph.preprocess_input_graph(G, labels) adj = torch.tensor(data['adj'], dtype=torch.float) x = torch.tensor(data['feat'], requires_grad=True, dtype=torch.float) model.eval() ypred = model(batch)