def test_model(model_path): args = ArgsInit().args dataset = PygNodePropPredDataset(name=args.dataset) graph = dataset[0] num_parts = 10 data_list = list( RandomNodeSampler(graph, num_parts=num_parts, shuffle=True)) number_of_train = int(0.9 * num_parts) train_data_list = data_list[0:number_of_train] test_data_list = data_list[number_of_train:] args.in_channels = graph.x.size(-1) args.num_tasks = dataset.num_classes model = DeeperGCN(args) model.load_state_dict(torch.load(model_path)) print(test(model, test_data_list))
data.n_id = torch.arange(data.num_nodes) data.node_species = None data.y = data.y.to(torch.float) # Initialize features of nodes by aggregating edge features. row, col = data.edge_index data.x = scatter(data.edge_attr, col, 0, dim_size=data.num_nodes, reduce='sum') # Set split indices to masks. for split in ['train', 'valid', 'test']: mask = torch.zeros(data.num_nodes, dtype=torch.bool) mask[splitted_idx[split]] = True data[f'{split}_mask'] = mask train_loader = RandomNodeSampler(data, num_parts=40, shuffle=True, num_workers=5) test_loader = RandomNodeSampler(data, num_parts=10, num_workers=5) class DeeperGCN(torch.nn.Module): def __init__(self, hidden_channels, num_layers): super(DeeperGCN, self).__init__() self.node_encoder = Linear(data.x.size(-1), hidden_channels) self.edge_encoder = Linear(data.edge_attr.size(-1), hidden_channels) self.layers = torch.nn.ModuleList() for i in range(1, num_layers + 1): conv = GENConv(hidden_channels, hidden_channels, aggr='stat', t=1.0, learn_t=True, num_layers=2, norm='layer', msg_norm=True) norm = LayerNorm(hidden_channels, elementwise_affine=True)
def split_and_batch_data(data, batches=40): return \ RandomNodeSampler(data, # split num_parts=batches, shuffle=True, num_workers=10)
data.n_id = torch.arange(data.num_nodes) data.node_species = None data.y = data.y.to(torch.float) # Initialize features of nodes by aggregating edge features. row, col = data.edge_index data.x = scatter(data.edge_attr, col, 0, dim_size=data.num_nodes, reduce='sum') #Set split indices to masks. for split in ['train', 'valid', 'test']: mask = torch.zeros(data.num_nodes, dtype=torch.bool) mask[splitted_idx[split]] = True data[f'{split}_mask'] = mask # train_loader = GraphSAINTRandomWalkSampler(data, batch_size=int(data.num_nodes / 400), num_steps=10, # walk_length=10) train_loader = RandomNodeSampler(data, num_parts=num_parts, num_workers=5, shuffle=True) test_loader = RandomNodeSampler(data, num_parts=10, num_workers=5) # p_train_loader = GraphSAINTRandomWalkSampler(data, batch_size=int(data.num_nodes / 200), num_steps=10, # walk_length=10) p_train_loader = RandomNodeSampler(data, num_parts=int(num_parts / 2), num_workers=5, shuffle=True) k = int(data.num_nodes / num_parts * 2 * prune_ratio) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model1 = DeeperGCN2(hidden_channels=64, num_layers=28).to(device) model2 = DeeperGCN2(hidden_channels=64, num_layers=2).to(device) optimizer1 = torch.optim.Adam(model1.parameters(), lr=1e-3)
# Initialize features of nodes by aggregating edge features. # Set split indices to masks. for split in ['train', 'valid', 'test']: mask = torch.zeros(data.num_nodes, dtype=torch.bool) mask[splitted_idx[split]] = True data[f'{split}_mask'] = mask data['test_mask'] = data['valid_mask'] | data['test_mask'] y_tar = data.y[data.train_mask].cuda() map_ = torch.zeros(data.num_nodes, dtype=torch.long) train_cnt = data['train_mask'].int().sum() map_[splitted_idx['train']] = torch.arange(train_cnt) train_loader = RandomNodeSampler(data, num_parts=args.num_train_parts, shuffle=True, num_workers=5) test_loader = RandomNodeSampler(data, num_parts=args.num_test_parts, num_workers=5) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') if data_n == 'protein': model = AdaGNN_h(in_channels=data.x.size(-1), hidden_channels=64, num_layer_list=[layer] * num_gnns, out_channels=data.y.size(-1), gnn_model=[gnn] * num_gnns).to(device) elif data_n == 'product': model = AdaGNN_h(in_channels=data.x.size(-1), hidden_channels=64,
def train_dataloader(self): return RandomNodeSampler(self.data_train.data, num_parts=6, num_workers=self.num_workers, shuffle=True)
def train_val_pipeline(MODEL_NAME, dataset, params, net_params, dirs): start0 = time.time() per_epoch_time = [] DATASET_NAME = dataset.name if MODEL_NAME in ['GCN', 'GAT']: if net_params['self_loop']: print( "[!] Adding graph self-loops for GCN/GAT models (central node trick)." ) dataset._add_self_loops() if not net_params['edge_feat']: edge_feat_dim = 1 if DATASET_NAME == 'ogbn-mag': dataset.dataset.edge_attr = torch.ones( dataset.dataset[0].num_edges, edge_feat_dim).type(torch.float32) else: dataset.dataset.data.edge_attr = torch.ones( dataset.dataset[0].num_edges, edge_feat_dim).type(torch.float32) if net_params['pos_enc']: print("[!] Adding graph positional encoding.") dataset._add_positional_encodings(net_params['pos_enc_dim'], DATASET_NAME) print('Time PE:', time.time() - start0) device = net_params['device'] if DATASET_NAME == 'ogbn-mag': dataset.split_idx['train'], dataset.split_idx['valid'], dataset.split_idx['test'] = dataset.split_idx['train']['paper'],\ dataset.split_idx['valid']['paper'], \ dataset.split_idx['test']['paper'] # else: # dataset.split_idx['train'], dataset.split_idx['valid'], dataset.split_idx['test'] = dataset.split_idx['train'].to(device), \ # dataset.split_idx['valid'].to(device), \ # dataset.split_idx['test'].to(device) # transform = T.ToSparseTensor() To do to save memory # self.train.graph_lists = [positional_encoding(g, pos_enc_dim, framework='pyg') for _, g in enumerate(dataset.train)] root_log_dir, root_ckpt_dir, write_file_name, write_config_file = dirs # Write network and optimization hyper-parameters in folder config/ with open(write_config_file + '.txt', 'w') as f: f.write( """Dataset: {},\nModel: {}\n\nparams={}\n\nnet_params={}\n\n\nTotal Parameters: {}\n\n""" .format(DATASET_NAME, MODEL_NAME, params, net_params, net_params['total_param'])) log_dir = os.path.join(root_log_dir, "RUN_" + str(0)) writer = SummaryWriter(log_dir=log_dir) # setting seeds random.seed(params['seed']) np.random.seed(params['seed']) torch.manual_seed(params['seed']) if device.type == 'cuda': torch.cuda.manual_seed(params['seed']) print("Training Graphs: ", dataset.split_idx['train'].size(0)) print("Validation Graphs: ", dataset.split_idx['valid'].size(0)) print("Test Graphs: ", dataset.split_idx['test'].size(0)) print("Number of Classes: ", net_params['n_classes']) model = gnn_model(MODEL_NAME, net_params) model = model.to(device) optimizer = optim.Adam(model.parameters(), lr=params['init_lr'], weight_decay=params['weight_decay']) scheduler = optim.lr_scheduler.ReduceLROnPlateau( optimizer, mode='min', factor=params['lr_reduce_factor'], patience=params['lr_schedule_patience'], verbose=True) evaluator = Evaluator(name=DATASET_NAME) epoch_train_losses, epoch_val_losses = [], [] epoch_train_accs, epoch_val_accs = [], [] # import train functions for all other GCNs if DATASET_NAME == 'ogbn-mag' or DATASET_NAME == 'ogbn-products': from train.train_ogb_node_classification import train_epoch as train_epoch, evaluate_network as evaluate_network elif DATASET_NAME == 'ogbn-proteins': from train.train_ogb_node_classification import train_epoch_proteins as train_epoch, evaluate_network_proteins as evaluate_network data = dataset.dataset[0] # Set split indices to masks. for split in ['train', 'valid', 'test']: mask = torch.zeros(data.num_nodes, dtype=torch.bool) mask[dataset.split_idx[split]] = True data[f'{split}_mask'] = mask num_parts = 5 if DATASET_NAME == 'ogbn-mag' else 40 train_loader = RandomNodeSampler(data, num_parts=num_parts, shuffle=True, num_workers=0) test_loader = RandomNodeSampler(data, num_parts=5, num_workers=0) # At any point you can hit Ctrl + C to break out of training early. try: with tqdm(range(params['epochs']), ncols=0) as t: for epoch in t: t.set_description('Epoch %d' % epoch) start = time.time() # for all other models common train function epoch_train_loss = train_epoch(model, optimizer, device, train_loader, epoch) epoch_train_acc, epoch_val_acc, epoch_test_acc, epoch_val_loss = evaluate_network( model, device, test_loader, evaluator, epoch) # _, epoch_test_acc = evaluate_network(model, device, test_loader, epoch) epoch_train_losses.append(epoch_train_loss) epoch_val_losses.append(epoch_val_loss) epoch_train_accs.append(epoch_train_acc) epoch_val_accs.append(epoch_val_acc) writer.add_scalar('train/_loss', epoch_train_loss, epoch) writer.add_scalar('val/_loss', epoch_val_loss, epoch) writer.add_scalar('train/_acc', epoch_train_acc, epoch) writer.add_scalar('val/_acc', epoch_val_acc, epoch) writer.add_scalar('test/_acc', epoch_test_acc, epoch) writer.add_scalar('learning_rate', optimizer.param_groups[0]['lr'], epoch) t.set_postfix(time=time.time() - start, lr=optimizer.param_groups[0]['lr'], train_loss=epoch_train_loss, val_loss=epoch_val_loss, train_acc=epoch_train_acc, val_acc=epoch_val_acc, test_acc=epoch_test_acc) per_epoch_time.append(time.time() - start) # Saving checkpoint ckpt_dir = os.path.join(root_ckpt_dir, "RUN_") if not os.path.exists(ckpt_dir): os.makedirs(ckpt_dir) # the function to save the checkpoint # torch.save(model.state_dict(), '{}.pkl'.format(ckpt_dir + "/epoch_" + str(epoch))) files = glob.glob(ckpt_dir + '/*.pkl') for file in files: epoch_nb = file.split('_')[-1] epoch_nb = int(epoch_nb.split('.')[0]) if epoch_nb < epoch - 1: os.remove(file) scheduler.step(epoch_val_loss) # it used to test the scripts # if epoch == 1: # break if optimizer.param_groups[0]['lr'] < params['min_lr']: print("\n!! LR SMALLER OR EQUAL TO MIN LR THRESHOLD.") break # Stop training after params['max_time'] hours if time.time() - start0 > params['max_time'] * 3600: print('-' * 89) print( "Max_time for training elapsed {:.2f} hours, so stopping" .format(params['max_time'])) break except KeyboardInterrupt: print('-' * 89) print('Exiting from training early because of KeyboardInterrupt') train_acc, val_acc, test_acc, _ = evaluate_network(model, device, test_loader, evaluator, epoch) train_acc, val_acc, test_acc = 100 * train_acc, 100 * val_acc, 100 * test_acc print("Test Accuracy: {:.4f}".format(test_acc)) print("Val Accuracy: {:.4f}".format(val_acc)) print("Train Accuracy: {:.4f}".format(train_acc)) print("Convergence Time (Epochs): {:.4f}".format(epoch)) print("TOTAL TIME TAKEN: {:.4f}s".format(time.time() - start0)) print("AVG TIME PER EPOCH: {:.4f}s".format(np.mean(per_epoch_time))) writer.close() """ Write the results in out_dir/results folder """ with open(write_file_name + '.txt', 'w') as f: f.write("""Dataset: {},\nModel: {}\n\nparams={}\n\nnet_params={}\n\n{}\n\nTotal Parameters: {}\n\n FINAL RESULTS\nTEST ACCURACY: {:.4f}\nval ACCURACY: {:.4f}\nTRAIN ACCURACY: {:.4f}\n\n Convergence Time (Epochs): {:.4f}\nTotal Time Taken: {:.4f} hrs\nAverage Time Per Epoch: {:.4f} s\n\n\n"""\ .format(DATASET_NAME, MODEL_NAME, params, net_params, model, net_params['total_param'], test_acc, val_acc,train_acc, epoch, (time.time()-start0)/3600, np.mean(per_epoch_time)))