コード例 #1
0
ファイル: main.py プロジェクト: sduan1/gnn_batch_methods
def test_model(model_path):
    args = ArgsInit().args
    dataset = PygNodePropPredDataset(name=args.dataset)
    graph = dataset[0]

    num_parts = 10
    data_list = list(
        RandomNodeSampler(graph, num_parts=num_parts, shuffle=True))
    number_of_train = int(0.9 * num_parts)

    train_data_list = data_list[0:number_of_train]
    test_data_list = data_list[number_of_train:]

    args.in_channels = graph.x.size(-1)
    args.num_tasks = dataset.num_classes

    model = DeeperGCN(args)
    model.load_state_dict(torch.load(model_path))

    print(test(model, test_data_list))
コード例 #2
0
data.n_id = torch.arange(data.num_nodes)
data.node_species = None
data.y = data.y.to(torch.float)

# Initialize features of nodes by aggregating edge features.
row, col = data.edge_index
data.x = scatter(data.edge_attr, col, 0, dim_size=data.num_nodes, reduce='sum')

# Set split indices to masks.
for split in ['train', 'valid', 'test']:
    mask = torch.zeros(data.num_nodes, dtype=torch.bool)
    mask[splitted_idx[split]] = True
    data[f'{split}_mask'] = mask

train_loader = RandomNodeSampler(data, num_parts=40, shuffle=True,
                                 num_workers=5)
test_loader = RandomNodeSampler(data, num_parts=10, num_workers=5)


class DeeperGCN(torch.nn.Module):
    def __init__(self, hidden_channels, num_layers):
        super(DeeperGCN, self).__init__()

        self.node_encoder = Linear(data.x.size(-1), hidden_channels)
        self.edge_encoder = Linear(data.edge_attr.size(-1), hidden_channels)

        self.layers = torch.nn.ModuleList()
        for i in range(1, num_layers + 1):
            conv = GENConv(hidden_channels, hidden_channels, aggr='stat',
                           t=1.0, learn_t=True, num_layers=2, norm='layer', msg_norm=True)
            norm = LayerNorm(hidden_channels, elementwise_affine=True)
コード例 #3
0
ファイル: data_utils.py プロジェクト: mhnnunes/fla_nas_gnn
def split_and_batch_data(data, batches=40):
    return \
        RandomNodeSampler(data,                   # split
                          num_parts=batches,
                          shuffle=True,
                          num_workers=10)
コード例 #4
0
ファイル: protein.py プロジェクト: Richard-He/ogbn-proteins
data.n_id = torch.arange(data.num_nodes)
data.node_species = None
data.y = data.y.to(torch.float)
# Initialize features of nodes by aggregating edge features.
row, col = data.edge_index
data.x = scatter(data.edge_attr, col, 0, dim_size=data.num_nodes, reduce='sum')
#Set split indices to masks.
for split in ['train', 'valid', 'test']:
    mask = torch.zeros(data.num_nodes, dtype=torch.bool)
    mask[splitted_idx[split]] = True
    data[f'{split}_mask'] = mask

# train_loader = GraphSAINTRandomWalkSampler(data, batch_size=int(data.num_nodes / 400), num_steps=10,
#                                 walk_length=10)
train_loader = RandomNodeSampler(data,
                                 num_parts=num_parts,
                                 num_workers=5,
                                 shuffle=True)
test_loader = RandomNodeSampler(data, num_parts=10, num_workers=5)

# p_train_loader = GraphSAINTRandomWalkSampler(data, batch_size=int(data.num_nodes / 200), num_steps=10,
#                                 walk_length=10)
p_train_loader = RandomNodeSampler(data,
                                   num_parts=int(num_parts / 2),
                                   num_workers=5,
                                   shuffle=True)
k = int(data.num_nodes / num_parts * 2 * prune_ratio)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model1 = DeeperGCN2(hidden_channels=64, num_layers=28).to(device)
model2 = DeeperGCN2(hidden_channels=64, num_layers=2).to(device)
optimizer1 = torch.optim.Adam(model1.parameters(), lr=1e-3)
コード例 #5
0
ファイル: AdaGNN_h.py プロジェクト: Richard-He/AGN
# Initialize features of nodes by aggregating edge features.

# Set split indices to masks.
for split in ['train', 'valid', 'test']:
    mask = torch.zeros(data.num_nodes, dtype=torch.bool)
    mask[splitted_idx[split]] = True
    data[f'{split}_mask'] = mask
data['test_mask'] = data['valid_mask'] | data['test_mask']
y_tar = data.y[data.train_mask].cuda()

map_ = torch.zeros(data.num_nodes, dtype=torch.long)
train_cnt = data['train_mask'].int().sum()
map_[splitted_idx['train']] = torch.arange(train_cnt)

train_loader = RandomNodeSampler(data,
                                 num_parts=args.num_train_parts,
                                 shuffle=True,
                                 num_workers=5)
test_loader = RandomNodeSampler(data,
                                num_parts=args.num_test_parts,
                                num_workers=5)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
if data_n == 'protein':
    model = AdaGNN_h(in_channels=data.x.size(-1),
                     hidden_channels=64,
                     num_layer_list=[layer] * num_gnns,
                     out_channels=data.y.size(-1),
                     gnn_model=[gnn] * num_gnns).to(device)
elif data_n == 'product':
    model = AdaGNN_h(in_channels=data.x.size(-1),
                     hidden_channels=64,
コード例 #6
0
 def train_dataloader(self):
     return RandomNodeSampler(self.data_train.data,
                              num_parts=6,
                              num_workers=self.num_workers,
                              shuffle=True)
コード例 #7
0
def train_val_pipeline(MODEL_NAME, dataset, params, net_params, dirs):

    start0 = time.time()
    per_epoch_time = []

    DATASET_NAME = dataset.name

    if MODEL_NAME in ['GCN', 'GAT']:
        if net_params['self_loop']:
            print(
                "[!] Adding graph self-loops for GCN/GAT models (central node trick)."
            )
            dataset._add_self_loops()
    if not net_params['edge_feat']:
        edge_feat_dim = 1
        if DATASET_NAME == 'ogbn-mag':
            dataset.dataset.edge_attr = torch.ones(
                dataset.dataset[0].num_edges,
                edge_feat_dim).type(torch.float32)
        else:
            dataset.dataset.data.edge_attr = torch.ones(
                dataset.dataset[0].num_edges,
                edge_feat_dim).type(torch.float32)

    if net_params['pos_enc']:
        print("[!] Adding graph positional encoding.")
        dataset._add_positional_encodings(net_params['pos_enc_dim'],
                                          DATASET_NAME)
        print('Time PE:', time.time() - start0)
    device = net_params['device']
    if DATASET_NAME == 'ogbn-mag':
        dataset.split_idx['train'], dataset.split_idx['valid'], dataset.split_idx['test'] = dataset.split_idx['train']['paper'],\
                                                                           dataset.split_idx['valid']['paper'], \
                                                                           dataset.split_idx['test']['paper']
    # else:
    #     dataset.split_idx['train'], dataset.split_idx['valid'], dataset.split_idx['test'] = dataset.split_idx['train'].to(device), \
    #                                                                       dataset.split_idx['valid'].to(device), \
    #                                                                       dataset.split_idx['test'].to(device)

    # transform = T.ToSparseTensor() To do to save memory
    # self.train.graph_lists = [positional_encoding(g, pos_enc_dim, framework='pyg') for _, g in enumerate(dataset.train)]
    root_log_dir, root_ckpt_dir, write_file_name, write_config_file = dirs

    # Write network and optimization hyper-parameters in folder config/
    with open(write_config_file + '.txt', 'w') as f:
        f.write(
            """Dataset: {},\nModel: {}\n\nparams={}\n\nnet_params={}\n\n\nTotal Parameters: {}\n\n"""
            .format(DATASET_NAME, MODEL_NAME, params, net_params,
                    net_params['total_param']))

    log_dir = os.path.join(root_log_dir, "RUN_" + str(0))
    writer = SummaryWriter(log_dir=log_dir)

    # setting seeds
    random.seed(params['seed'])
    np.random.seed(params['seed'])
    torch.manual_seed(params['seed'])
    if device.type == 'cuda':
        torch.cuda.manual_seed(params['seed'])

    print("Training Graphs: ", dataset.split_idx['train'].size(0))
    print("Validation Graphs: ", dataset.split_idx['valid'].size(0))
    print("Test Graphs: ", dataset.split_idx['test'].size(0))
    print("Number of Classes: ", net_params['n_classes'])

    model = gnn_model(MODEL_NAME, net_params)
    model = model.to(device)

    optimizer = optim.Adam(model.parameters(),
                           lr=params['init_lr'],
                           weight_decay=params['weight_decay'])
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        mode='min',
        factor=params['lr_reduce_factor'],
        patience=params['lr_schedule_patience'],
        verbose=True)
    evaluator = Evaluator(name=DATASET_NAME)
    epoch_train_losses, epoch_val_losses = [], []
    epoch_train_accs, epoch_val_accs = [], []

    # import train functions for all other GCNs
    if DATASET_NAME == 'ogbn-mag' or DATASET_NAME == 'ogbn-products':
        from train.train_ogb_node_classification import train_epoch as train_epoch, evaluate_network as evaluate_network
    elif DATASET_NAME == 'ogbn-proteins':
        from train.train_ogb_node_classification import train_epoch_proteins as train_epoch, evaluate_network_proteins as evaluate_network
    data = dataset.dataset[0]
    # Set split indices to masks.
    for split in ['train', 'valid', 'test']:
        mask = torch.zeros(data.num_nodes, dtype=torch.bool)
        mask[dataset.split_idx[split]] = True
        data[f'{split}_mask'] = mask
    num_parts = 5 if DATASET_NAME == 'ogbn-mag' else 40
    train_loader = RandomNodeSampler(data,
                                     num_parts=num_parts,
                                     shuffle=True,
                                     num_workers=0)
    test_loader = RandomNodeSampler(data, num_parts=5, num_workers=0)
    # At any point you can hit Ctrl + C to break out of training early.
    try:
        with tqdm(range(params['epochs']), ncols=0) as t:
            for epoch in t:

                t.set_description('Epoch %d' % epoch)

                start = time.time()

                # for all other models common train function
                epoch_train_loss = train_epoch(model, optimizer, device,
                                               train_loader, epoch)

                epoch_train_acc, epoch_val_acc, epoch_test_acc, epoch_val_loss = evaluate_network(
                    model, device, test_loader, evaluator, epoch)
                # _, epoch_test_acc = evaluate_network(model, device, test_loader, epoch)

                epoch_train_losses.append(epoch_train_loss)
                epoch_val_losses.append(epoch_val_loss)
                epoch_train_accs.append(epoch_train_acc)
                epoch_val_accs.append(epoch_val_acc)

                writer.add_scalar('train/_loss', epoch_train_loss, epoch)
                writer.add_scalar('val/_loss', epoch_val_loss, epoch)
                writer.add_scalar('train/_acc', epoch_train_acc, epoch)
                writer.add_scalar('val/_acc', epoch_val_acc, epoch)
                writer.add_scalar('test/_acc', epoch_test_acc, epoch)
                writer.add_scalar('learning_rate',
                                  optimizer.param_groups[0]['lr'], epoch)

                t.set_postfix(time=time.time() - start,
                              lr=optimizer.param_groups[0]['lr'],
                              train_loss=epoch_train_loss,
                              val_loss=epoch_val_loss,
                              train_acc=epoch_train_acc,
                              val_acc=epoch_val_acc,
                              test_acc=epoch_test_acc)

                per_epoch_time.append(time.time() - start)

                # Saving checkpoint
                ckpt_dir = os.path.join(root_ckpt_dir, "RUN_")
                if not os.path.exists(ckpt_dir):
                    os.makedirs(ckpt_dir)
                # the function to save the checkpoint
                # torch.save(model.state_dict(), '{}.pkl'.format(ckpt_dir + "/epoch_" + str(epoch)))

                files = glob.glob(ckpt_dir + '/*.pkl')
                for file in files:
                    epoch_nb = file.split('_')[-1]
                    epoch_nb = int(epoch_nb.split('.')[0])
                    if epoch_nb < epoch - 1:
                        os.remove(file)

                scheduler.step(epoch_val_loss)
                # it used to test the scripts
                # if epoch == 1:
                #     break

                if optimizer.param_groups[0]['lr'] < params['min_lr']:
                    print("\n!! LR SMALLER OR EQUAL TO MIN LR THRESHOLD.")
                    break

                # Stop training after params['max_time'] hours
                if time.time() - start0 > params['max_time'] * 3600:
                    print('-' * 89)
                    print(
                        "Max_time for training elapsed {:.2f} hours, so stopping"
                        .format(params['max_time']))
                    break

    except KeyboardInterrupt:
        print('-' * 89)
        print('Exiting from training early because of KeyboardInterrupt')

    train_acc, val_acc, test_acc, _ = evaluate_network(model, device,
                                                       test_loader, evaluator,
                                                       epoch)
    train_acc, val_acc, test_acc = 100 * train_acc, 100 * val_acc, 100 * test_acc
    print("Test Accuracy: {:.4f}".format(test_acc))
    print("Val Accuracy: {:.4f}".format(val_acc))
    print("Train Accuracy: {:.4f}".format(train_acc))
    print("Convergence Time (Epochs): {:.4f}".format(epoch))
    print("TOTAL TIME TAKEN: {:.4f}s".format(time.time() - start0))
    print("AVG TIME PER EPOCH: {:.4f}s".format(np.mean(per_epoch_time)))

    writer.close()
    """
        Write the results in out_dir/results folder
    """
    with open(write_file_name + '.txt', 'w') as f:
        f.write("""Dataset: {},\nModel: {}\n\nparams={}\n\nnet_params={}\n\n{}\n\nTotal Parameters: {}\n\n
    FINAL RESULTS\nTEST ACCURACY: {:.4f}\nval ACCURACY: {:.4f}\nTRAIN ACCURACY: {:.4f}\n\n
    Convergence Time (Epochs): {:.4f}\nTotal Time Taken: {:.4f} hrs\nAverage Time Per Epoch: {:.4f} s\n\n\n"""\
          .format(DATASET_NAME, MODEL_NAME, params, net_params, model, net_params['total_param'],
                  test_acc, val_acc,train_acc, epoch, (time.time()-start0)/3600, np.mean(per_epoch_time)))