def main(): args = ArgsInit().args dataset = PygNodePropPredDataset(name=args.dataset) graph = dataset[0] if args.self_loop: graph.edge_index = add_self_loops(edge_index=graph.edge_index, num_nodes=graph.num_nodes)[0] split_idx = dataset.get_idx_split() evaluator = Evaluator(args.dataset) args.in_channels = graph.x.size(-1) args.num_tasks = dataset.num_classes print(args) model = DeeperGCN(args) print(model) model.load_state_dict(torch.load(args.model_load_path)['model_state_dict']) result = test(model, graph.x, graph.edge_index, graph.y, split_idx, evaluator) print(result) model.print_params(final=True)
def main(): args = ArgsInit().args if args.use_gpu: device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available( ) else torch.device("cpu") else: device = torch.device('cpu') dataset = PygNodePropPredDataset(name=args.dataset) data = dataset[0] split_idx = dataset.get_idx_split() evaluator = Evaluator(args.dataset) x = data.x.to(device) y_true = data.y.to(device) edge_index = data.edge_index.to(device) edge_index = to_undirected(edge_index, data.num_nodes) if args.self_loop: edge_index = add_self_loops(edge_index, num_nodes=data.num_nodes)[0] args.in_channels = data.x.size(-1) args.num_tasks = dataset.num_classes print(args) model = DeeperGCN(args) model.load_state_dict(torch.load(args.model_load_path)['model_state_dict']) model.to(device) result = test(model, x, edge_index, y_true, split_idx, evaluator) train_accuracy, valid_accuracy, test_accuracy = result print({ 'Train': train_accuracy, 'Validation': valid_accuracy, 'Test': test_accuracy }) model.print_params(final=True)
def main(): args = ArgsInit().args if args.use_gpu: device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available( ) else torch.device("cpu") else: device = torch.device('cpu') dataset = PygLinkPropPredDataset(name=args.dataset) data = dataset[0] # Data(edge_index=[2, 2358104], edge_weight=[2358104, 1], edge_year=[2358104, 1], x=[235868, 128]) split_edge = dataset.get_edge_split() evaluator = Evaluator(args.dataset) x = data.x.to(device) edge_index = data.edge_index.to(device) args.in_channels = data.x.size(-1) args.num_tasks = 1 print(args) model = DeeperGCN(args).to(device) predictor = LinkPredictor(args).to(device) model.load_state_dict(torch.load(args.model_load_path)['model_state_dict']) model.to(device) predictor.load_state_dict( torch.load(args.predictor_load_path)['model_state_dict']) predictor.to(device) hits = ['Hits@10', 'Hits@50', 'Hits@100'] result = test(model, predictor, x, edge_index, split_edge, evaluator, args.batch_size) for k in hits: train_result, valid_result, test_result = result[k] print('{}--Train: {}, Validation: {}, Test: {}'.format( k, train_result, valid_result, test_result))
def test_model(model_path): args = ArgsInit().args dataset = PygNodePropPredDataset(name=args.dataset) graph = dataset[0] num_parts = 10 data_list = list( RandomNodeSampler(graph, num_parts=num_parts, shuffle=True)) number_of_train = int(0.9 * num_parts) train_data_list = data_list[0:number_of_train] test_data_list = data_list[number_of_train:] args.in_channels = graph.x.size(-1) args.num_tasks = dataset.num_classes model = DeeperGCN(args) model.load_state_dict(torch.load(model_path)) print(test(model, test_data_list))
def main(): args = ArgsInit().save_exp() if args.use_gpu: device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available( ) else torch.device("cpu") else: device = torch.device('cpu') dataset = PygNodePropPredDataset(name=args.dataset, root=args.data_folder) graph = dataset[0] adj = SparseTensor(row=graph.edge_index[0], col=graph.edge_index[1]) if args.self_loop: adj = adj.set_diag() graph.edge_index = add_self_loops(edge_index=graph.edge_index, num_nodes=graph.num_nodes)[0] split_idx = dataset.get_idx_split() train_idx = split_idx["train"].tolist() evaluator = Evaluator(args.dataset) sub_dir = 'random-train_{}-full_batch_test'.format(args.cluster_number) logging.info(sub_dir) log_dir = os.path.join(args.save, "tensorboard/") writer = SummaryWriter(log_dir=log_dir) args.in_channels = graph.x.size(-1) args.num_tasks = dataset.num_classes logging.info('%s' % args) model = DeeperGCN(args).to(device) logging.info(model) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) results = { 'highest_valid': 0, 'final_train': 0, 'final_test': 0, 'highest_train': 0 } start_time = time.time() for epoch in range(1, args.epochs + 1): # generate batches parts = random_partition_graph(graph.num_nodes, cluster_number=args.cluster_number) data = generate_sub_graphs(adj, parts, cluster_number=args.cluster_number) # epoch_loss = train(data, model, graph.x, graph.y, train_idx, optimizer, device) epoch_loss = train_flag(data, model, graph.x, graph.y, train_idx, optimizer, device, args) logging.info('Epoch {}, training loss {:.4f}'.format( epoch, epoch_loss)) writer.add_scalar("Loss/train", epoch_loss, epoch) model.print_params(epoch=epoch) # if epoch % args.eval_epochs == 0: # save_ckpt(model, optimizer, # round(epoch_loss, 4), epoch, # args.model_save_path, # sub_dir, name_post=f'epoch{epoch}') # logging.info(f"Epoch {epoch}, saved model to checkpoint folder {args.model_save_path}") if epoch % args.eval_epochs == 0: # save_ckpt(model, optimizer, # round(epoch_loss, 4), epoch, # args.model_save_path, # sub_dir, name_post='final') # logging.info(f"Saved model to checkpoint folder {args.model_save_path}") logging.info(f'---- Evaluating at epoch {epoch} ----') res = test_with_partition(model, graph, adj, split_idx, num_clusters=args.eval_cluster_number, partition_method=args.partition_method, evaluator=evaluator, device=device) # result = test(model, graph.x, graph.edge_index, graph.y, split_idx, evaluator) logging.info(res) logging.info(f"---------------------------------") train_accuracy, valid_accuracy, test_accuracy = res[ "train_acc"], res["valid_acc"], res["test_acc"] writer.add_scalar("Acc/train", train_accuracy) writer.add_scalar("Acc/dev", valid_accuracy) if train_accuracy > results['highest_train']: results['highest_train'] = train_accuracy if valid_accuracy > results['highest_valid']: results['highest_valid'] = valid_accuracy results['final_train'] = train_accuracy results['final_test'] = test_accuracy save_ckpt(model, optimizer, round(epoch_loss, 4), epoch, args.model_save_path, sub_dir, name_post='valid_best') logging.info( f"Saved better model to checkpoint folder {args.model_save_path}" ) logging.info("%s" % results) end_time = time.time() total_time = end_time - start_time logging.info('Total time: {}'.format( time.strftime('%H:%M:%S', time.gmtime(total_time))))
def main(): args = ArgsInit().save_exp() if args.use_gpu: device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available( ) else torch.device("cpu") else: device = torch.device('cpu') dataset = PygNodePropPredDataset(name=args.dataset) data = dataset[0] split_idx = dataset.get_idx_split() evaluator = Evaluator(args.dataset) x = data.x.to(device) y_true = data.y.to(device) train_idx = split_idx['train'].to(device) edge_index = data.edge_index.to(device) edge_index = to_undirected(edge_index, data.num_nodes) if args.self_loop: edge_index = add_self_loops(edge_index, num_nodes=data.num_nodes)[0] sub_dir = 'SL_{}'.format(args.self_loop) args.in_channels = data.x.size(-1) args.num_tasks = dataset.num_classes logging.info('%s' % args) model = DeeperGCN(args).to(device) logging.info(model) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) results = { 'highest_valid': 0, 'final_train': 0, 'final_test': 0, 'highest_train': 0 } start_time = time.time() for epoch in range(1, args.epochs + 1): epoch_loss = train(model, x, edge_index, y_true, train_idx, optimizer) logging.info('Epoch {}, training loss {:.4f}'.format( epoch, epoch_loss)) model.print_params(epoch=epoch) result = test(model, x, edge_index, y_true, split_idx, evaluator) logging.info(result) train_accuracy, valid_accuracy, test_accuracy = result if train_accuracy > results['highest_train']: results['highest_train'] = train_accuracy if valid_accuracy > results['highest_valid']: results['highest_valid'] = valid_accuracy results['final_train'] = train_accuracy results['final_test'] = test_accuracy save_ckpt(model, optimizer, round(epoch_loss, 4), epoch, args.model_save_path, sub_dir, name_post='valid_best') logging.info("%s" % results) end_time = time.time() total_time = end_time - start_time logging.info('Total time: {}'.format( time.strftime('%H:%M:%S', time.gmtime(total_time))))
def main(): args = ArgsInit().save_exp() if args.use_tensor_board: writer = SummaryWriter(log_dir=args.save) if args.use_gpu: device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available( ) else torch.device("cpu") else: device = torch.device('cpu') dataset = PygLinkPropPredDataset(name=args.dataset) data = dataset[0] # Data(edge_index=[2, 2358104], edge_weight=[2358104, 1], edge_year=[2358104, 1], x=[235868, 128]) split_edge = dataset.get_edge_split() evaluator = Evaluator(args.dataset) x = data.x.to(device) edge_index = data.edge_index.to(device) args.in_channels = data.x.size(-1) args.num_tasks = 1 logging.info('%s' % args) model = DeeperGCN(args).to(device) predictor = LinkPredictor(args).to(device) logging.info(model) logging.info(predictor) optimizer = torch.optim.Adam(list(model.parameters()) + list(predictor.parameters()), lr=args.lr) results = {} keys = ['highest_valid', 'final_train', 'final_test', 'highest_train'] hits = ['Hits@10', 'Hits@50', 'Hits@100'] for key in keys: results[key] = {k: 0 for k in hits} start_time = time.time() for epoch in range(1, args.epochs + 1): epoch_loss = train(model, predictor, x, edge_index, split_edge, optimizer, args.batch_size) logging.info('Epoch {}, training loss {:.4f}'.format( epoch, epoch_loss)) model.print_params(epoch=epoch) result = test(model, predictor, x, edge_index, split_edge, evaluator, args.batch_size) for k in hits: # return a tuple train_result, valid_result, test_result = result[k] if args.use_tensor_board and k == 'Hits@50': writer.add_scalar('stats/train_loss', epoch_loss, epoch) writer.add_scalar('stats/train_Hits@50', train_result, epoch) writer.add_scalar('stats/valid_Hits@50', valid_result, epoch) writer.add_scalar('stats/test_Hits@50', test_result, epoch) if train_result > results['highest_train'][k]: results['highest_train'][k] = train_result if valid_result > results['highest_valid'][k]: results['highest_valid'][k] = valid_result results['final_train'][k] = train_result results['final_test'][k] = test_result save_ckpt(model, optimizer, round(epoch_loss, 4), epoch, args.model_save_path, k, name_post='valid_best') save_ckpt(predictor, optimizer, round(epoch_loss, 4), epoch, args.model_save_path, k, name_post='valid_best_link_predictor') logging.info(result) logging.info("%s" % results) end_time = time.time() total_time = end_time - start_time time_used = 'Total time: {}'.format( time.strftime('%H:%M:%S', time.gmtime(total_time))) logging.info(time_used)
def main(): args = ArgsInit().save_exp() if args.use_gpu: device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available( ) else torch.device("cpu") else: device = torch.device('cpu') dataset = PygNodePropPredDataset(name=args.dataset) graph = dataset[0] adj = SparseTensor(row=graph.edge_index[0], col=graph.edge_index[1]) if args.self_loop: adj = adj.set_diag() graph.edge_index = add_self_loops(edge_index=graph.edge_index, num_nodes=graph.num_nodes)[0] split_idx = dataset.get_idx_split() train_idx = split_idx["train"].tolist() evaluator = Evaluator(args.dataset) sub_dir = 'random-train_{}-full_batch_test'.format(args.cluster_number) logging.info(sub_dir) args.in_channels = graph.x.size(-1) args.num_tasks = dataset.num_classes logging.info('%s' % args) model = DeeperGCN(args).to(device) logging.info(model) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) results = { 'highest_valid': 0, 'final_train': 0, 'final_test': 0, 'highest_train': 0 } start_time = time.time() for epoch in range(1, args.epochs + 1): # generate batches parts = random_partition_graph(graph.num_nodes, cluster_number=args.cluster_number) data = generate_sub_graphs(adj, parts, cluster_number=args.cluster_number) epoch_loss = train(data, model, graph.x, graph.y, train_idx, optimizer, device) logging.info('Epoch {}, training loss {:.4f}'.format( epoch, epoch_loss)) model.print_params(epoch=epoch) if epoch == args.epochs: result = test(model, graph.x, graph.edge_index, graph.y, split_idx, evaluator) logging.info(result) train_accuracy, valid_accuracy, test_accuracy = result if train_accuracy > results['highest_train']: results['highest_train'] = train_accuracy if valid_accuracy > results['highest_valid']: results['highest_valid'] = valid_accuracy results['final_train'] = train_accuracy results['final_test'] = test_accuracy save_ckpt(model, optimizer, round(epoch_loss, 4), epoch, args.model_save_path, sub_dir, name_post='valid_best') logging.info("%s" % results) end_time = time.time() total_time = end_time - start_time logging.info('Total time: {}'.format( time.strftime('%H:%M:%S', time.gmtime(total_time))))
def main(): EPOCHS = 1 NUMBER_OF_SUBGRAPHS = 10 CKPT_PATH = f'neighbor_deeper_num_{NUMBER_OF_SUBGRAPHS}.pt' EXPERIMENT_RES_PATH = f'neighbor_deeper_num_{NUMBER_OF_SUBGRAPHS}_experiment_res.pk' args = ArgsInit().args if args.use_gpu: device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available( ) else torch.device("cpu") else: device = torch.device('cpu') dataset = PygNodePropPredDataset(name=args.dataset) graph = dataset[0] print(graph) num_parts = NUMBER_OF_SUBGRAPHS data_list = list( NeighborSubgraphLoader(graph, num_parts=NUMBER_OF_SUBGRAPHS)) print(f'len of datalist: {len(data_list)}') number_of_train = int(0.9 * num_parts) train_data_list = data_list[0:number_of_train] test_data_list = data_list[number_of_train:] print( f'Train test split successful, number of train: {len(train_data_list)} | number of test: {len(test_data_list)}' ) args.in_channels = graph.x.size(-1) args.num_tasks = dataset.num_classes logging.info('%s' % args) model = DeeperGCN(args).to(device) logging.info(model) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) start_time = time.time() epoch_loss_list = [] epoch_acc_list = [] highest_acc = 0 best_model_dict = None for epoch in range(1, EPOCHS + 1): epoch_loss, epoch_acc = train(data_list, model, optimizer, device) epoch_loss_list.append(epoch_loss) epoch_acc_list.append(epoch_acc) print('Epoch {}, training loss {:.4f} | training acc {}'.format( epoch, epoch_loss, epoch_acc)) test_acc = test(model, test_data_list) if test_acc > highest_acc: highest_acc = test_acc best_model_dict = model.state_dict() logging.info( f'best test acc: {highest_acc} | saved to path {CKPT_PATH}') end_time = time.time() total_time = end_time - start_time logging.info('Total time: {}'.format( time.strftime('%H:%M:%S', time.gmtime(total_time)))) experiment_result = {} experiment_result['Total training time'] = total_time experiment_result['Epoch loss list'] = epoch_loss_list experiment_result['Epoch acc list'] = epoch_acc_list experiment_result['Best test acc'] = highest_acc torch.save(best_model_dict, CKPT_PATH) with open(EXPERIMENT_RES_PATH, 'wb') as f: pk.dump(experiment_result, f)