Beispiel #1
0
def main():
    parser = argparse.ArgumentParser(description='gen_models')
    parser.add_argument('--device', type=int, default=0)
    parser.add_argument('--log_steps', type=int, default=1)
    parser.add_argument('--num_layers', type=int, default=2)
    parser.add_argument('--hidden_channels', type=int, default=256)
    parser.add_argument('--use_node_embedding', action='store_true')

    parser.add_argument('--dropout', type=float, default=0.5)
    parser.add_argument('--lr', type=float, default=0.01)
    parser.add_argument('--epochs', type=int, default=250)
    parser.add_argument('--runs', type=int, default=10)

    args = parser.parse_args()
    print(args)

    
    device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu'
    device = torch.device(device)

    dataset = PygNodePropPredDataset(name='ogbn-arxiv',transform=T.ToSparseTensor())
    
    data = dataset[0]
    data.adj_t = data.adj_t.to_symmetric()
    data = data.to(device)
    
    x = data.x
    if args.use_node_embedding:
        embedding = torch.load('embedding.pt', map_location=device)
        x = torch.cat([x, embedding], dim=-1)
        
    x = x.to(device)
    adj_t = data.adj_t.to(device)
    y_true = data.y.to(device)
    
    split_idx = dataset.get_idx_split()
    train_idx = split_idx['train'].to(device)
    valid_idx = split_idx['valid'].to(device)
    test_idx = split_idx['test'].to(device)

    model = GCN(x.size(-1), args.hidden_channels, dataset.num_classes, args.num_layers, args.dropout).cuda()
        
    evaluator = Evaluator(name='ogbn-arxiv')
    logger = Logger(args.runs, args)
    
    idxs = torch.cat([train_idx])
    for run in range(args.runs):
        print(sum(p.numel() for p in model.parameters()))
        
        model.reset_parameters()
        optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
        best_valid = 0
        best_out = None
        
        import time
        begin = time.time()
        for epoch in range(1, args.epochs):
            model.train()
            optimizer.zero_grad()
            out = model(x, adj_t)[idxs]
            loss = F.nll_loss(out, y_true.squeeze(1)[idxs])
            result = test(model, x, y_true, adj_t, split_idx, evaluator)
            train_acc, valid_acc, test_acc = result
        
            print(f'Run: {run + 1:02d}, '
                      f'Epoch: {epoch:02d}, '
                      f'Loss: {loss:.4f}, '
                      f'Train: {100 * train_acc:.2f}%, '
                      f'Valid: {100 * valid_acc:.2f}% '
                      f'Test: {100 * test_acc:.2f}%')
            logger.add_result(run, result)
            loss.backward()
            optimizer.step()
        logger.print_statistics(run)

    logger.print_statistics()
def main():
    parser = argparse.ArgumentParser(description='OGBN-Arxiv (GAT Full-Batch)')
    parser.add_argument('--device', type=int, default=0)
    parser.add_argument('--log_steps', type=int, default=1)
    parser.add_argument("--num-layers",
                        type=int,
                        default=3,
                        help="number of hidden layers")
    parser.add_argument("--lr",
                        type=float,
                        default=0.0029739421726400865,
                        help="learning rate")
    parser.add_argument('--weight-decay',
                        type=float,
                        default=2.4222556964495987e-05,
                        help="weight decay")
    parser.add_argument("--num-hidden",
                        type=int,
                        default=16,
                        help="number of hidden units")
    parser.add_argument("--dropout",
                        type=float,
                        default=0.18074706609292976,
                        help="Dropout to use")
    parser.add_argument('--epochs', type=int, default=500)
    parser.add_argument('--runs', type=int, default=10)
    parser.add_argument("--eval",
                        action='store_true',
                        help='If not set, we will only do the training part.')
    args = parser.parse_args()
    print(args)

    device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu'
    device = torch.device(device)

    dataset = PygNodePropPredDataset(name='ogbn-arxiv')
    split_idx = dataset.get_idx_split()

    data = dataset[0]
    x = data.x.to(device)
    y_true = data.y.to(device)
    train_idx = split_idx['train'].to(device)

    edge_index = data.edge_index.to(device)
    edge_index = to_undirected(edge_index, data.num_nodes)
    edge_index, _ = remove_self_loops(edge_index)
    edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(0))

    model = GAT(num_layers=args.num_layers,
                in_feats=data.x.size(-1),
                num_hidden=args.num_hidden,
                num_classes=dataset.num_classes,
                heads=[4, 4, 4],
                dropout=args.dropout).to(device)

    evaluator = Evaluator(name='ogbn-arxiv')
    logger = Logger(args.runs, args)

    dur = []
    for run in range(args.runs):
        model.reset_parameters()
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=args.lr,
                                     weight_decay=args.weight_decay)
        for epoch in range(1, 1 + args.epochs):
            t0 = time.time()
            loss = train(model, x, edge_index, y_true, train_idx, optimizer)
            if epoch >= 3:
                dur.append(time.time() - t0)
                print('Training time/epoch {}'.format(np.mean(dur)))

            if not args.eval:
                continue

            result = test(model, x, edge_index, y_true, split_idx, evaluator)
            logger.add_result(run, result)

            if epoch % args.log_steps == 0:
                train_acc, valid_acc, test_acc = result
                print(f'Run: {run + 1:02d}, '
                      f'Epoch: {epoch:02d}, '
                      f'Loss: {loss:.4f}, '
                      f'Train: {100 * train_acc:.2f}%, '
                      f'Valid: {100 * valid_acc:.2f}% '
                      f'Test: {100 * test_acc:.2f}%')
        if args.eval:
            logger.print_statistics(run)
    if args.eval:
        logger.print_statistics()
parser.add_argument('--num_layers', type=int, default=3)
parser.add_argument('--hidden_channels', type=int, default=256)
parser.add_argument('--dropout', type=float, default=0.5)
parser.add_argument('--att_dropout', type=float, default=0.0)
parser.add_argument('--heads', type=int, default=4)
parser.add_argument('--lr', type=float, default=0.01)
parser.add_argument('--epochs', type=int, default=500)
parser.add_argument('--runs', type=int, default=10)
parser.add_argument('--K', type=int, default=10)
parser.add_argument('--alpha', type=float, default=0.1)
parser.add_argument('--mode', type=str, default='concat')
args = parser.parse_args()
print(args)

logger = Logger(args.runs, args)
evaluator = Evaluator(name='ogbn-arxiv')

device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu'
device = torch.device(device)

model = None
if args.model == 'gcn':
    model = GCN(data.num_features, args.hidden_channels, num_classes,
                args.num_layers, args.dropout).to(device)
elif args.model == 'sage':
    model = SAGE(data.num_features, args.hidden_channels, num_classes,
                 args.num_layers, args.dropout).to(device)
elif args.model == 'gat':
    model = GAT(data.num_features, args.hidden_channels, num_classes,
                args.num_layers, args.heads, args.dropout,
                args.att_dropout).to(device)
def main():
    parser = argparse.ArgumentParser(description='OGBN-Arxiv (GraphSAGE Full-Batch)')
    parser.add_argument('--device', type=int, default=0)
    parser.add_argument('--log_steps', type=int, default=1)
    parser.add_argument('--num_layers', type=int, default=3)
    parser.add_argument('--hidden_channels', type=int, default=256)
    parser.add_argument('--dropout', type=float, default=0.5)
    parser.add_argument('--lr', type=float, default=0.01)
    parser.add_argument('--epochs', type=int, default=500)
    parser.add_argument('--runs', type=int, default=10)
    parser.add_argument("--eval", action='store_true',
                        help='If not set, we will only do the training part.')
    args = parser.parse_args()
    print(args)

    dataset = DglNodePropPredDataset(name='ogbn-arxiv')
    split_idx = dataset.get_idx_split()

    g, labels = dataset[0]
    feats = jax.device_put(
            g.ndata['feat'],
            jax.devices()[0]
    )

    g = g.to(jax.devices("cpu")[0])

    g = dgl.to_bidirected(g)
    g = g.int()
    g = g.to(jax.devices()[0])

    train_idx = split_idx['train'].numpy()

    model = GraphSAGE(in_feats=feats.shape[-1],
                      hidden_feats=args.hidden_channels,
                      out_feats=dataset.num_classes,
                      num_layers=args.num_layers,
                      dropout=args.dropout)

    evaluator = Evaluator(name='ogbn-arxiv')
    logger = Logger(args.runs, args)

    dur = []
    for run in range(args.runs):
        initial_params = model.init(jax.random.PRNGKey(0), g, feats)
        optimizer = flax.optim.Adam(args.lr).create(initial_params)
        for epoch in range(1, 1 + args.epochs):
            t0 = time.time()
            optimizer, loss = train(model, g, feats, labels, train_idx, optimizer)
            print(loss)
            if epoch >= 3:
                dur.append(time.time() - t0)
                print('Training time/epoch {}'.format(np.mean(dur)))
            if not args.eval:
                continue

            result = test(model, g, feats, labels, split_idx, evaluator)
            logger.add_result(run, result)

            if epoch % args.log_steps == 0:
                train_acc, valid_acc, test_acc = result
                print(f'Run: {run + 1:02d}, '
                      f'Epoch: {epoch:02d}, '
                      f'Loss: {loss:.4f}, '
                      f'Train: {100 * train_acc:.2f}%, '
                      f'Valid: {100 * valid_acc:.2f}% '
                      f'Test: {100 * test_acc:.2f}%')

        if args.eval:
            logger.print_statistics(run)
    if args.eval:
        logger.print_statistics()
Beispiel #5
0
    elif task_type == 'sequential':
        for i in np.arange(n_batch):
            target_papers = graph.test_paper[(s_idx + i) *
                                             batch_size:(s_idx + i + 1) *
                                             batch_size]
            p = pool.apply_async(ogbn_sample,
                                 args=([randint(), target_papers]))
            jobs.append(p)
    return jobs


graph = dill.load(open(args.data_dir, 'rb'))
np.random.seed(43)
np.random.shuffle(graph.test_paper)

evaluator = Evaluator(name='ogbn-mag')
device = torch.device("cuda:%d" % args.cuda)
gnn = GNN(conv_name = args.conv_name, in_dim = len(graph.node_feature['paper'][0]), \
          n_hid = args.n_hid, n_heads = args.n_heads, n_layers = args.n_layers, dropout = args.dropout,\
          num_types = len(graph.get_types()), num_relations = len(graph.get_meta_graph()) + 1,\
          prev_norm = args.prev_norm, last_norm = args.last_norm, use_RTE = args.use_RTE)
classifier = Classifier(args.n_hid, graph.y.max().item() + 1)

model = nn.Sequential(gnn, classifier)
model.load_state_dict(torch.load(args.model_dir))
model.to(device)
print('Model #Params: %d' % get_n_params(model))
criterion = nn.NLLLoss()

model.eval()
with torch.no_grad():
def main():
    parser = argparse.ArgumentParser(description='Outcome Correlations)')
    parser.add_argument('--dataset', type=str)
    parser.add_argument('--method', type=str)
    args = parser.parse_args()

    dataset = PygNodePropPredDataset(name=f'ogbn-{args.dataset}')
    data = dataset[0]

    adj, D_isqrt = process_adj(data)
    normalized_adjs = gen_normalized_adjs(adj, D_isqrt)
    DAD, DA, AD = normalized_adjs
    evaluator = Evaluator(name=f'ogbn-{args.dataset}')

    split_idx = dataset.get_idx_split()

    def eval_test(result, idx=split_idx['test']):
        return evaluator.eval({
            'y_true':
            data.y[idx],
            'y_pred':
            result[idx].argmax(dim=-1, keepdim=True),
        })['acc']

    if args.dataset == 'arxiv':
        lp_dict = {
            'idxs': ['train'],
            'alpha': 0.9,
            'num_propagations': 50,
            'A': AD,
        }
        plain_dict = {
            'train_only': True,
            'alpha1': 0.87,
            'A1': AD,
            'num_propagations1': 50,
            'alpha2': 0.81,
            'A2': DAD,
            'num_propagations2': 50,
            'display': False,
        }
        plain_fn = double_correlation_autoscale
        """
        If you tune hyperparameters on test set
        {'alpha1': 0.9988673963255859, 'alpha2': 0.7942279952481052, 'A1': 'DA', 'A2': 'AD'} 
        gets you to 72.64
        """
        linear_dict = {
            'train_only': True,
            'alpha1': 0.98,
            'alpha2': 0.65,
            'A1': AD,
            'A2': DAD,
            'num_propagations1': 50,
            'num_propagations2': 50,
            'display': False,
        }
        linear_fn = double_correlation_autoscale
        """
        If you tune hyperparameters on test set
        {'alpha1': 0.9956668128133523, 'alpha2': 0.8542393515434346, 'A1': 'DA', 'A2': 'AD'}
        gets you to 73.35
        """
        mlp_dict = {
            'train_only': True,
            'alpha1': 0.9791632871592579,
            'alpha2': 0.7564990804200602,
            'A1': DA,
            'A2': AD,
            'num_propagations1': 50,
            'num_propagations2': 50,
            'display': False,
        }
        mlp_fn = double_correlation_autoscale

        gat_dict = {
            'labels': ['train'],
            'alpha': 0.8,
            'A': DAD,
            'num_propagations': 50,
            'display': False,
        }
        gat_fn = only_outcome_correlation

    elif args.dataset == 'products':
        lp_dict = {
            'idxs': ['train'],
            'alpha': 0.5,
            'num_propagations': 50,
            'A': DAD,
        }

        plain_dict = {
            'train_only': True,
            'alpha1': 1.0,
            'alpha2': 0.9,
            'scale': 20.0,
            'A1': DAD,
            'A2': DAD,
            'num_propagations1': 50,
            'num_propagations2': 50,
        }
        plain_fn = double_correlation_fixed

        linear_dict = {
            'train_only': True,
            'alpha1': 1.0,
            'alpha2': 0.9,
            'scale': 20.0,
            'A1': DAD,
            'A2': DAD,
            'num_propagations1': 50,
            'num_propagations2': 50,
        }
        linear_fn = double_correlation_fixed

        mlp_dict = {
            'train_only': True,
            'alpha1': 1.0,
            'alpha2': 0.8,
            'scale': 10.0,
            'A1': DAD,
            'A2': DA,
            'num_propagations1': 50,
            'num_propagations2': 50,
        }
        mlp_fn = double_correlation_fixed

    model_outs = glob.glob(f'models/{args.dataset}_{args.method}/*.pt')

    if args.method == 'lp':
        out = label_propagation(data, split_idx, **lp_dict)
        print('Valid acc: ', eval_test(out, split_idx['valid']))
        print('Test acc:', eval_test(out, split_idx['test']))
        return

    get_orig_acc(data, eval_test, model_outs, split_idx)
    while True:
        if args.method == 'plain':
            evaluate_params(data,
                            eval_test,
                            model_outs,
                            split_idx,
                            plain_dict,
                            fn=plain_fn)
        elif args.method == 'linear':
            evaluate_params(data,
                            eval_test,
                            model_outs,
                            split_idx,
                            linear_dict,
                            fn=linear_fn)
        elif args.method == 'mlp':
            evaluate_params(data,
                            eval_test,
                            model_outs,
                            split_idx,
                            mlp_dict,
                            fn=mlp_fn)
        elif args.method == 'gat':
            evaluate_params(data,
                            eval_test,
                            model_outs,
                            split_idx,
                            gat_dict,
                            fn=gat_fn)
#         import pdb; pdb.set_trace()
        break
Beispiel #7
0
from ogb.nodeproppred.dataset_dgl import DglNodePropPredDataset
dataset = DglNodePropPredDataset(name='ogbn-proteins')
num_tasks = dataset.num_tasks  # obtaining number of prediction tasks in a dataset

splitted_idx = dataset.get_idx_split()
train_idx, valid_idx, test_idx = splitted_idx["train"], splitted_idx["valid"], splitted_idx["test"]
# graph: dgl graph object, label: torch tensor of shape (num_nodes, num_tasks)
graph, label = dataset[0]
from ogb.nodeproppred import Evaluator

evaluator = Evaluator(name = 'ogbn-proteins')
print(evaluator.expected_input_format) 
print(evaluator.expected_output_format)
pass
        x = self.layers[0].conv(x, edge_index, edge_attr)

        for layer in self.layers[1:]:
            x = layer(x, edge_index, edge_attr)

        x = self.layers[0].act(self.layers[0].norm(x))
        x = F.dropout(x, p=0.1, training=self.training)

        return self.lin(x)


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GEN(hidden_channels=64, num_layers=28).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.BCEWithLogitsLoss()
evaluator = Evaluator('ogbn-proteins')


def train(epoch):
    model.train()

    pbar = tqdm(total=len(train_loader))
    pbar.set_description(f'Training epoch: {epoch:04d}')

    total_loss = total_examples = 0
    for data in train_loader:
        optimizer.zero_grad()
        data = data.to(device)
        out = model(data.x, data.edge_index, data.edge_attr)
        loss = criterion(out[data.train_mask], data.y[data.train_mask])
        loss.backward()
Beispiel #9
0
def main(args):
    print(args)
    if args["rand_seed"] > -1:
        set_random_seed(args["rand_seed"])

    dataset = DglNodePropPredDataset(name=args["dataset"])
    print(dataset.meta_info)
    splitted_idx = dataset.get_idx_split()
    graph = dataset.graph[0]
    graph.ndata["labels"] = dataset.labels.float().to(args["device"])
    graph.edata["feat"] = graph.edata["feat"].float().to(args["device"])

    if args["ewnorm"] == "both":
        print("Symmetric normalization of edge weights by degree")
        normalize_edge_weights(graph, args["device"], args["num_ew_channels"])
    elif args["ewnorm"] == "none":
        print("Not normalizing edge weights")
        for channel in range(args["num_ew_channels"]):
            graph.edata["feat_" +
                        str(channel)] = graph.edata["feat"][:,
                                                            channel:channel +
                                                            1]

    model = load_model(args).to(args["device"])
    optimizer = Adam(model.parameters(),
                     lr=args["lr"],
                     weight_decay=args["weight_decay"])
    min_lr = 1e-3
    scheduler = ReduceLROnPlateau(optimizer,
                                  "max",
                                  factor=0.7,
                                  patience=100,
                                  verbose=True,
                                  min_lr=min_lr)
    print("scheduler min_lr", min_lr)

    criterion = nn.BCEWithLogitsLoss()
    evaluator = Evaluator(args["dataset"])

    print("model", args["model"])
    print("n_layers", args["n_layers"])
    print("hidden dim", args["hidden_feats"])
    print("lr", args["lr"])

    dur = []
    best_val_score = 0.0
    num_patient_epochs = 0
    model_folder = "./saved_models/"
    model_path = model_folder + str(args["exp_name"]) + "_" + str(
        args["postfix"])

    if not os.path.exists(model_folder):
        os.makedirs(model_folder)

    for epoch in range(1, args["num_epochs"] + 1):
        if epoch >= 3:
            t0 = time.time()

        loss, train_score = run_a_train_epoch(graph, splitted_idx["train"],
                                              model, criterion, optimizer,
                                              evaluator)

        if epoch >= 3:
            dur.append(time.time() - t0)
            avg_time = np.mean(dur)
        else:
            avg_time = None

        train_score, val_score, test_score = run_an_eval_epoch(
            graph, splitted_idx, model, evaluator)

        scheduler.step(val_score)

        # Early stop
        if val_score > best_val_score:
            torch.save(model.state_dict(), model_path)
            best_val_score = val_score
            num_patient_epochs = 0
        else:
            num_patient_epochs += 1

        print("Epoch {:d}, loss {:.4f}, train score {:.4f}, "
              "val score {:.4f}, avg time {}, num patient epochs {:d}".format(
                  epoch, loss, train_score, val_score, avg_time,
                  num_patient_epochs))

        if num_patient_epochs == args["patience"]:
            break

    model.load_state_dict(torch.load(model_path))
    train_score, val_score, test_score = run_an_eval_epoch(
        graph, splitted_idx, model, evaluator)
    print("Train score {:.4f}".format(train_score))
    print("Valid score {:.4f}".format(val_score))
    print("Test score {:.4f}".format(test_score))

    with open("results.txt", "w") as f:
        f.write("loss {:.4f}\n".format(loss))
        f.write("Best validation rocauc {:.4f}\n".format(best_val_score))
        f.write("Test rocauc {:.4f}\n".format(test_score))

    print(args)
Beispiel #10
0
def main():
    parser = argparse.ArgumentParser(description='OGBN-Arxiv (MLP)')
    parser.add_argument('--device', type=int, default=0)
    parser.add_argument('--log_steps', type=int, default=1)
    parser.add_argument('--use_node_embedding', action='store_true')
    parser.add_argument('--num_layers', type=int, default=3)
    parser.add_argument('--hidden_channels', type=int, default=256)
    parser.add_argument('--dropout', type=float, default=0.5)
    parser.add_argument('--lr', type=float, default=0.01)
    parser.add_argument('--epochs', type=int, default=500)
    parser.add_argument('--runs', type=int, default=10)

    parser.add_argument('--step-size', type=float, default=2e-3)
    parser.add_argument('-m', type=int, default=3)
    parser.add_argument('--attack', type=str, default='flag')

    args = parser.parse_args()
    print(args)

    device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu'
    device = torch.device(device)

    dataset = PygNodePropPredDataset(name='ogbn-arxiv')
    split_idx = dataset.get_idx_split()
    data = dataset[0]

    x = data.x
    if args.use_node_embedding:
        embedding = torch.load('embedding.pt', map_location='cpu')
        x = torch.cat([x, embedding], dim=-1)
    x = x.to(device)

    y_true = data.y.to(device)
    train_idx = split_idx['train'].to(device)

    model = MLP(x.size(-1), args.hidden_channels, dataset.num_classes,
                args.num_layers, args.dropout).to(device)

    evaluator = Evaluator(name='ogbn-arxiv')

    vals, tests = [], []
    for run in range(args.runs):
        best_val, final_test = 0, 0

        model.reset_parameters()
        optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

        for epoch in range(1, args.epochs + 1):
            loss = train_flag(model, x, y_true, train_idx, optimizer, args,
                              device)
            result = test(model, x, y_true, split_idx, evaluator)
            train, val, tst = result
            if val > best_val:
                best_val = val
                final_test = tst

        print(f'Run{run} val:{best_val}, test:{final_test}')
        vals.append(best_val)
        tests.append(final_test)

    print('')
    print(f"Average val accuracy: {np.mean(vals)} ± {np.std(vals)}")
    print(f"Average test accuracy: {np.mean(tests)} ± {np.std(tests)}")
Beispiel #11
0
def main():
    parser = argparse.ArgumentParser(description='OGBN-Products (GNN)')
    parser.add_argument('--device', type=int, default=0)
    parser.add_argument('--log_steps', type=int, default=1)
    parser.add_argument('--use_sage', action='store_true')
    parser.add_argument('--num_layers', type=int, default=3)
    parser.add_argument('--hidden_channels', type=int, default=256)
    parser.add_argument('--dropout', type=float, default=0.5)
    parser.add_argument('--lr', type=float, default=0.01)
    parser.add_argument('--epochs', type=int, default=300)
    parser.add_argument('--runs', type=int, default=10)
    args = parser.parse_args()
    print(args)

    device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu'
    device = torch.device(device)

    dataset = PygNodePropPredDataset(name='ogbn-products',
                                     transform=T.ToSparseTensor())
    data = dataset[0]

    split_idx = dataset.get_idx_split()
    train_idx = split_idx['train'].to(device)

    if args.use_sage:
        model = SAGE(data.num_features, args.hidden_channels,
                     dataset.num_classes, args.num_layers,
                     args.dropout).to(device)
    else:
        model = GCN(data.num_features, args.hidden_channels,
                    dataset.num_classes, args.num_layers,
                    args.dropout).to(device)

        # Pre-compute GCN normalization.
        adj_t = data.adj_t.set_diag()
        deg = adj_t.sum(dim=1).to(torch.float)
        deg_inv_sqrt = deg.pow(-0.5)
        deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
        adj_t = deg_inv_sqrt.view(-1, 1) * adj_t * deg_inv_sqrt.view(1, -1)
        data.adj_t = adj_t

    data = data.to(device)

    evaluator = Evaluator(name='ogbn-products')
    logger = Logger(args.runs, args)

    for run in range(args.runs):
        model.reset_parameters()
        optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
        for epoch in range(1, 1 + args.epochs):
            loss = train(model, data, train_idx, optimizer)
            result = test(model, data, split_idx, evaluator)
            logger.add_result(run, result)

            if epoch % args.log_steps == 0:
                train_acc, valid_acc, test_acc = result
                print(f'Run: {run + 1:02d}, '
                      f'Epoch: {epoch:02d}, '
                      f'Loss: {loss:.4f}, '
                      f'Train: {100 * train_acc:.2f}%, '
                      f'Valid: {100 * valid_acc:.2f}% '
                      f'Test: {100 * test_acc:.2f}%')

        logger.print_statistics(run)
    logger.print_statistics()
Beispiel #12
0
 def __init__(self):
     d_name = "ogbn-arxiv"
     dataset = NodePropPredDataset(name=d_name)
     graph, label = dataset[0]
     self.num_nodes = graph["num_nodes"]
     self.ogb_evaluator = Evaluator(name="ogbn-arxiv")
def main():
    parser = argparse.ArgumentParser(description='OGBN-Arxiv (GraphSAGE Full-Batch)')
    parser.add_argument('--device', type=int, default=0)
    parser.add_argument('--log_steps', type=int, default=1)
    parser.add_argument('--num_layers', type=int, default=3)
    parser.add_argument('--hidden_channels', type=int, default=64)
    parser.add_argument('--dropout', type=float, default=0.5)
    parser.add_argument('--lr', type=float, default=0.01)
    parser.add_argument('--epochs', type=int, default=300)
    parser.add_argument('--runs', type=int, default=10)
    parser.add_argument("--eval", action='store_true',
                        help='If not set, we will only do the training part.')
    args = parser.parse_args()
    print(args)

    device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu'
    device = torch.device(device)

    dataset = PygNodePropPredDataset(name='ogbn-products')
    split_idx = dataset.get_idx_split()

    data = dataset[0]
    edge_index = to_undirected(data.edge_index, data.num_nodes).to(device)
    # edge_index = to_undirected(edge_index, data.num_nodes)
    adj = SparseTensor(row=edge_index[0], col=edge_index[1])

    x, y_true = data.x.to(device), data.y.to(device)
    train_idx = split_idx['train'].to(device)

    model = GraphSAGE(in_feats=data.x.size(-1),
                      hidden_feats=args.hidden_channels,
                      out_feats=dataset.num_classes,
                      num_layers=args.num_layers,
                      dropout=args.dropout).to(device)

    evaluator = Evaluator(name='ogbn-products')
    logger = Logger(args.runs, args)

    dur = []
    for run in range(args.runs):
        model.reset_parameters()
        optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
        for epoch in range(1, 1 + args.epochs):
            t0 = time.time()
            loss = train(model, x, adj, y_true, train_idx, optimizer)
            if epoch >= 3:
                dur.append(time.time() - t0)
                print('Training time/epoch {}'.format(np.mean(dur)))
            if not args.eval:
                continue

            result = test(model, x, adj, y_true, split_idx, evaluator)
            logger.add_result(run, result)

            if epoch % args.log_steps == 0:
                train_acc, valid_acc, test_acc = result
                print(f'Run: {run + 1:02d}, '
                      f'Epoch: {epoch:02d}, '
                      f'Loss: {loss:.4f}, '
                      f'Train: {100 * train_acc:.2f}%, '
                      f'Valid: {100 * valid_acc:.2f}% '
                      f'Test: {100 * test_acc:.2f}%')

        if args.eval:
            logger.print_statistics(run)
    if args.eval:
        logger.print_statistics()
Beispiel #14
0
features_val = features[val_idx]
features_test = features[test_idx]
del features
gc.collect()

label_dim = int(max(train_labels.max(),val_labels.max(),test_labels.max()))+1
train_dataset = SimpleDataset(features_train,train_labels)
valid_dataset = SimpleDataset(features_val,val_labels)
test_dataset = SimpleDataset(features_test, test_labels)

train_loader = DataLoader(train_dataset, batch_size=args.batch,shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=128, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

model = MLP(features_train.size(-1),args.hidden,label_dim,args.layer,args.dropout).cuda(args.dev)
evaluator = Evaluator(name='ogbn-papers100M')
optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)


def train(model, device, train_loader, optimizer):
	model.train()

	time_epoch=0
	loss_list=[]
	for step, (x, y) in enumerate(train_loader):
		t_st=time.time()
		x, y = x.cuda(device), y.cuda(device)
		optimizer.zero_grad()
		out = model(x)
		loss = F.nll_loss(out, y.squeeze(1))
		loss.backward()
Beispiel #15
0
def main():
    global device, in_feats, n_classes, epsilon

    argparser = argparse.ArgumentParser(
        "GAT on OGBN-Arxiv",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    argparser.add_argument("--cpu",
                           action="store_true",
                           help="CPU mode. This option overrides --gpu.")
    argparser.add_argument("--gpu", type=int, default=0, help="GPU device ID.")
    argparser.add_argument("--n-runs", type=int, default=10)
    argparser.add_argument("--n-epochs", type=int, default=2000)
    argparser.add_argument(
        "--use-labels",
        action="store_true",
        help="Use labels in the training set as input features.")
    argparser.add_argument(
        "--use-norm",
        action="store_true",
        help="Use symmetrically normalized adjacency matrix.")
    argparser.add_argument("--lr", type=float, default=0.002)
    argparser.add_argument("--n-layers", type=int, default=3)
    argparser.add_argument("--n-heads", type=int, default=3)
    argparser.add_argument("--n-hidden", type=int, default=256)
    argparser.add_argument("--dropout", type=float, default=0.75)
    argparser.add_argument("--attn_drop", type=float, default=0.05)
    argparser.add_argument("--wd", type=float, default=0)
    argparser.add_argument("--log-every", type=int, default=20)
    argparser.add_argument("--plot-curves", action="store_true")

    argparser.add_argument('--step-size', type=float, default=1e-3)
    argparser.add_argument('-m', type=int, default=3)
    argparser.add_argument('--amp', type=int, default=2)
    argparser.add_argument('--vanilla', action='store_true')

    args = argparser.parse_args()
    print(args)

    if args.cpu:
        device = th.device("cpu")
    else:
        device = th.device("cuda:%d" % args.gpu)

    # load data
    data = DglNodePropPredDataset(name="ogbn-arxiv")
    evaluator = Evaluator(name="ogbn-arxiv")

    splitted_idx = data.get_idx_split()
    train_idx, val_idx, test_idx = splitted_idx["train"], splitted_idx[
        "valid"], splitted_idx["test"]
    graph, labels = data[0]

    # add reverse edges
    srcs, dsts = graph.all_edges()
    graph.add_edges(dsts, srcs)

    # add self-loop
    print(f"Total edges before adding self-loop {graph.number_of_edges()}")
    graph = graph.remove_self_loop().add_self_loop()
    print(f"Total edges after adding self-loop {graph.number_of_edges()}")

    in_feats = graph.ndata["feat"].shape[1]
    n_classes = (labels.max() + 1).item()
    # graph.create_format_()

    train_idx = train_idx.to(device)
    val_idx = val_idx.to(device)
    test_idx = test_idx.to(device)
    labels = labels.to(device)
    graph = graph.to(device)

    # run
    val_accs = []
    test_accs = []

    for i in range(1, args.n_runs + 1):
        val_acc, test_acc = run(args, graph, labels, train_idx, val_idx,
                                test_idx, evaluator, i)
        val_accs.append(val_acc)
        test_accs.append(test_acc)

    print(f"Runned {args.n_runs} times")
    print("Val Accs:", val_accs)
    print("Test Accs:", test_accs)
    print(f"Average val accuracy: {np.mean(val_accs)} ± {np.std(val_accs)}")
    print(f"Average test accuracy: {np.mean(test_accs)} ± {np.std(test_accs)}")
    print(f"Number of params: {count_parameters(args)}")
Beispiel #16
0
train_loader = NeighborSampler(data.edge_index,
                               node_idx=train_idx,
                               sizes=[10, 10, 10],
                               batch_size=args.batch_train,
                               shuffle=True,
                               num_workers=args.num_workers)
subgraph_loader = NeighborSampler(data.edge_index,
                                  node_idx=None,
                                  sizes=[-1],
                                  batch_size=args.batch_test,
                                  shuffle=False,
                                  num_workers=args.num_workers)

train_idx = train_idx.to(device)

evaluator = Evaluator(name='ogbn-' + args.dataset)


class GCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers,
                 dropout):
        super(GCN, self).__init__()
        self.num_layers = num_layers

        self.convs = torch.nn.ModuleList()
        self.convs.append(GCNConv(in_channels, hidden_channels))
        for _ in range(num_layers - 2):
            self.convs.append(GCNConv(hidden_channels, hidden_channels))
        self.convs.append(GCNConv(hidden_channels, out_channels))

        self.dropout = dropout
Beispiel #17
0
def main():
    args = ArgsInit().save_exp()
    logging.getLogger().setLevel(logging.INFO)
    writer = SummaryWriter(log_dir=args.save)

    if args.use_gpu:
        device = torch.device("cuda:" +
                              str(args.device)) if torch.cuda.is_available(
                              ) else torch.device("cpu")
    else:
        device = torch.device("cpu")

    logging.info('%s' % device)

    dataset = OGBNDataset(dataset_name=args.dataset)
    # extract initial node features
    nf_path = dataset.extract_node_features(args.aggr)

    args.num_tasks = dataset.num_tasks
    args.nf_path = nf_path

    logging.info('%s' % args)

    evaluator = Evaluator(args.dataset)
    criterion = torch.nn.BCEWithLogitsLoss()

    valid_data_list = []

    for i in range(args.num_evals):
        parts = dataset.random_partition_graph(
            dataset.total_no_of_nodes,
            cluster_number=args.valid_cluster_number)
        valid_data = dataset.generate_sub_graphs(
            parts, cluster_number=args.valid_cluster_number)
        valid_data_list.append(valid_data)

    sub_dir = 'random-train_{}-test_{}-num_evals_{}'.format(
        args.cluster_number, args.valid_cluster_number, args.num_evals)
    logging.info(sub_dir)

    if args.backbone == 'deepergcn':
        # model = DeeperGCN(args).to(device)
        pass
    # elif args.backbone == 'deq':
    # model = DEQGCN(args).to(device)
    # elif args.backbone == 'revwt':
    # model = WTRevGCN(args).to(device)
    elif args.backbone == 'rev':
        model = RevGCN(args).to(device)
    else:
        raise Exception("unkown backbone")

    logging.info('# of params: {}'.format(
        sum(p.numel() for p in model.parameters())))
    logging.info('# of learnable params: {}'.format(
        sum(p.numel() for p in model.parameters() if p.requires_grad)))
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    results = {
        'highest_valid': 0,
        'final_train': 0,
        'final_test': 0,
        'highest_train': 0
    }

    start_time = time.time()

    for epoch in range(1, args.epochs + 1):
        # do random partition every epoch
        train_parts = dataset.random_partition_graph(
            dataset.total_no_of_nodes, cluster_number=args.cluster_number)
        data = dataset.generate_sub_graphs(train_parts,
                                           cluster_number=args.cluster_number)

        epoch_loss = train(data,
                           dataset,
                           model,
                           optimizer,
                           criterion,
                           device,
                           epoch=epoch)
        logging.info('Epoch {}, training loss {:.4f}'.format(
            epoch, epoch_loss))
        if epoch == 1:
            peak_memuse = torch.cuda.max_memory_allocated(device) / float(1024
                                                                          **3)
            logging.info('Peak memuse {:.2f} G'.format(peak_memuse))
        torch.cuda.empty_cache()

        model.print_params(epoch=epoch)

        with torch.cuda.amp.autocast():
            result = multi_evaluate(valid_data_list,
                                    dataset,
                                    model,
                                    evaluator,
                                    device,
                                    epoch=epoch)

        if epoch % 5 == 0:
            logging.info('%s' % result)

        train_result = result['train']['rocauc']
        valid_result = result['valid']['rocauc']
        test_result = result['test']['rocauc']
        writer.add_scalar('stats/train_rocauc', train_result, epoch)
        writer.add_scalar('stats/valid_rocauc', valid_result, epoch)
        writer.add_scalar('stats/test_rocauc', test_result, epoch)

        if valid_result > results['highest_valid']:
            results['highest_valid'] = valid_result
            results['final_train'] = train_result
            results['final_test'] = test_result

            save_ckpt(model,
                      optimizer,
                      round(epoch_loss, 4),
                      epoch,
                      args.model_save_path,
                      sub_dir,
                      name_post='valid_best')

        if train_result > results['highest_train']:
            results['highest_train'] = train_result

    logging.info("%s" % results)

    end_time = time.time()
    total_time = end_time - start_time
    logging.info('Total time: {}'.format(
        time.strftime('%d-%H:%M:%S', time.gmtime(total_time))))
Beispiel #18
0
        'heads': 1,
        'batch_size': 32,
        'post_hidden': 128,
        'message_hidden': 128,
        'normalize': False,
    }
    dataset_name = "ogbn-products"

    save_time = datetime.now().strftime("%m-%d_%H_%M_%S")
    save_dir = os.path.join("save", args['model_type'] + "_" + save_time)
    os.makedirs(save_dir)
    print("SAVE PATH:", save_dir)

    cluster_data, dataset, data, split_idx = ld.get_product_clusters()
    data_loader = ld.get_cluster_batches(cluster_data, args['batch_size'])
    evaluator = Evaluator(name=dataset_name)
    args['input_dim'] = data.num_features
    args['output_dim'] = dataset.num_classes

    model = models.get_model(args)
    print(model)

    # model.reset_parameters()
    optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'])
    loss_fn = F.nll_loss
    model.to(device)

    scores = {'loss': [], 'train': [], 'val': [], 'test': []}
    best_model = None
    best_valid_acc = 0
    #plot_curves = True


# Define folder to save plots and model in
subfolder = '/layers' + str(args.n_layers) + '-heads' + str(
    args.n_heads) + '-epochs' + str(args.n_epochs)

# set cpu or gpu
if args.cpu:
    device = th.device("cpu")
else:
    device = th.device("cuda:%d" % args.gpu)

# load data
data = DglNodePropPredDataset(name="ogbn-arxiv")
evaluator = Evaluator(name="ogbn-arxiv")

splitted_idx = data.get_idx_split()
train_idx, val_idx, test_idx = splitted_idx["train"], splitted_idx[
    "valid"], splitted_idx["test"]
graph, labels = data[0]

# add reverse edges
srcs, dsts = graph.all_edges()
graph.add_edges(dsts, srcs)

# add self-loop
print(f"Total edges before adding self-loop {graph.number_of_edges()}")
graph = graph.remove_self_loop().add_self_loop()
print(f"Total edges after adding self-loop {graph.number_of_edges()}")
Beispiel #20
0
def main_get_mask(args, imp_num):

    device = torch.device("cuda:" + str(args.device))
    dataset = PygLinkPropPredDataset(name=args.dataset)
    data = dataset[0]

    # Data(edge_index=[2, 2358104], edge_weight=[2358104, 1], edge_year=[2358104, 1], x=[235868, 128])
    split_edge = dataset.get_edge_split()
    evaluator = Evaluator(args.dataset)

    x = data.x.to(device)

    edge_index = data.edge_index.to(device)

    args.in_channels = data.x.size(-1)
    args.num_tasks = 1

    model = DeeperGCN(args).to(device)
    pruning.add_mask(model, args)

    for name, param in model.named_parameters():
        if 'mask' in name:
            param.requires_grad = False

    predictor = LinkPredictor(args).to(device)
    optimizer = torch.optim.Adam(list(model.parameters()) +
                                 list(predictor.parameters()),
                                 lr=args.lr)

    results = {'epoch': 0}
    keys = ['highest_valid', 'final_train', 'final_test', 'highest_train']
    hits = ['Hits@10', 'Hits@50', 'Hits@100']

    for key in keys:
        results[key] = {k: 0 for k in hits}

    start_epoch = 1
    for epoch in range(start_epoch, args.mask_epochs + 1):

        t0 = time.time()

        epoch_loss = train.train_fixed(model, predictor, x, edge_index,
                                       split_edge, optimizer, args.batch_size,
                                       args)
        result = train.test(model, predictor, x, edge_index, split_edge,
                            evaluator, args.batch_size, args)

        k = 'Hits@50'
        train_result, valid_result, test_result = result[k]

        if train_result > results['highest_train'][k]:
            results['highest_train'][k] = train_result

        if valid_result > results['highest_valid'][k]:
            results['highest_valid'][k] = valid_result
            results['final_train'][k] = train_result
            results['final_test'][k] = test_result
            results['epoch'] = epoch

        epoch_time = (time.time() - t0) / 60
        print(
            time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + ' | ' +
            'IMP:[{}] (GET Mask) Epoch:[{}/{}] LOSS:[{:.4f}] Train :[{:.2f}] Valid:[{:.2f}] Test:[{:.2f}] | Update Test:[{:.2f}] at epoch:[{}] Time:[{:.2f}min]'
            .format(imp_num, epoch, args.mask_epochs, epoch_loss,
                    train_result * 100, valid_result * 100, test_result *
                    100, results['final_test'][k] *
                    100, results['epoch'], epoch_time))
    print('-' * 100)
    print(
        "syd : IMP:[{}] (FIX Mask) Final Result Train:[{:.2f}]  Valid:[{:.2f}]  Test:[{:.2f}]"
        .format(imp_num, results['final_train'][k] * 100,
                results['highest_valid'][k] * 100,
                results['final_test'][k] * 100))
    print('-' * 100)
Beispiel #21
0
                                  sizes=[-1],
                                  batch_size=4096,
                                  shuffle=False,
                                  num_workers=12)

k = int(args.batch_size * args.prune_ratio)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model1 = SAGE(data.x.size(-1), args.hidden_channels, dataset.num_classes,
              args.num_layers, args.dropout).to(device)
model2 = SAGE(data.x.size(-1), args.hidden_channels, dataset.num_classes,
              args.num_layers, args.dropout).to(device)
optimizer1 = torch.optim.Adam(model1.parameters(), lr=1e-3)
optimizer2 = torch.optim.Adam(model2.parameters(), lr=1e-3)
criterion = torch.nn.CrossEntropyLoss(reduction='none')
evaluator = Evaluator('ogbn-products')

recorder = Record(num_nodes=data.num_nodes, num_classes=47)

meta_net = MetaNet(input_dim=49, hidden_dim=32).to(device)
meta_optimizer = torch.optim.Adam(meta_net.parameters(), lr=1e-4)

# best = 0
# for epoch in range(1, 1001):
#     loss = train(epoch, model1, optimizer1)
#     train_rocauc, valid_rocauc, test_rocauc = test(model1)
#     if test_rocauc > best:
#         best = test_rocauc
#     print(f'Loss: {loss:.4f}, Train: {train_rocauc:.4f}, '
#         f'Val: {valid_rocauc:.4f}, Test: {test_rocauc:.4f}')
# print('best: {}'.format(best))
Beispiel #22
0
def main():
    parser = argparse.ArgumentParser(description='OGBN-Products (Cluster-GCN)')
    parser.add_argument('--device', type=int, default=0)
    parser.add_argument('--log_steps', type=int, default=1)
    parser.add_argument('--num_partitions', type=int, default=5000)
    parser.add_argument('--num_workers', type=int, default=6)
    parser.add_argument('--num_layers', type=int, default=3)
    parser.add_argument('--hidden_channels', type=int, default=256)
    parser.add_argument('--dropout', type=float, default=0.5)
    parser.add_argument('--batch_size', type=int, default=128)
    parser.add_argument('--lr', type=float, default=0.01)
    parser.add_argument('--epochs', type=int, default=50)
    parser.add_argument('--eval_steps', type=int, default=10)
    parser.add_argument('--runs', type=int, default=10)
    args = parser.parse_args()
    print(args)

    device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu'
    device = torch.device(device)

    dataset = PygNodePropPredDataset(name='ogbn-products')
    split_idx = dataset.get_idx_split()
    data = dataset[0]

    # Convert split indices to boolean masks and add them to `data`.
    for key, idx in split_idx.items():
        mask = torch.zeros(data.num_nodes, dtype=torch.bool)
        mask[idx] = True
        data[f'{key}_mask'] = mask

    cluster_data = ClusterData(data, num_parts=args.num_partitions,
                               recursive=False, save_dir=dataset.processed_dir)

    loader = ClusterLoader(cluster_data, batch_size=args.batch_size,
                           shuffle=True, num_workers=args.num_workers)

    model = SAGE(data.x.size(-1), args.hidden_channels, dataset.num_classes,
                 args.num_layers, args.dropout).to(device)

    evaluator = Evaluator(name='ogbn-products')
    logger = Logger(args.runs, args)

    for run in range(args.runs):
        model.reset_parameters()
        optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
        for epoch in range(1, 1 + args.epochs):
            loss = train(model, loader, optimizer, device)
            if epoch % args.log_steps == 0:
                print(f'Run: {run + 1:02d}, '
                      f'Epoch: {epoch:02d}, '
                      f'Loss: {loss:.4f}')

            if epoch % args.eval_steps == 0:
                result = test(model, data, evaluator)
                logger.add_result(run, result)
                train_acc, valid_acc, test_acc = result
                print(f'Run: {run + 1:02d}, '
                      f'Epoch: {epoch:02d}, '
                      f'Train: {100 * train_acc:.2f}%, '
                      f'Valid: {100 * valid_acc:.2f}% '
                      f'Test: {100 * test_acc:.2f}%')

        logger.print_statistics(run)
    logger.print_statistics()
Beispiel #23
0
parser.add_argument('--batch-size', type=int, default=512)

parser.add_argument('--step-size', type=float, default=5e-3)
parser.add_argument('-m', type=int, default=3)
parser.add_argument('--test-freq', type=int, default=10)
parser.add_argument('--start-seed', type=int, default=0)

parser.add_argument('--attack', type=str, default='flag')
parser.add_argument('--amp', type=float, default=2)
parser.add_argument('--vanilla', action='store_true')

args = parser.parse_args()

dataset = PygNodePropPredDataset('ogbn-products')
split_idx = dataset.get_idx_split()
evaluator = Evaluator(name='ogbn-products')
data = dataset[0]
train_idx = split_idx['train']
train_loader = NeighborSampler(data.edge_index,
                               node_idx=train_idx,
                               sizes=[10, 10, 10],
                               batch_size=args.batch_size,
                               shuffle=True,
                               num_workers=12)
subgraph_loader = NeighborSampler(data.edge_index,
                                  node_idx=None,
                                  sizes=[-1],
                                  batch_size=1024,
                                  shuffle=False,
                                  num_workers=12)
Beispiel #24
0
if args.model == 'mlp':
    model = MLP(x.size(-1), args.hidden_channels, dataset.num_classes, args.num_layers, 0.5,
                args.dataset == 'products').to(device)
elif args.model == 'linear':
    model = MLPLinear(x.size(-1), dataset.num_classes).to(device)
elif args.model == 'plain':
    model = MLPLinear(x.size(-1), dataset.num_classes).to(device)

x = x.to(device)
y_true = data.y.to(device)
train_idx = split_idx['train'].to(device)

model_dir = prepare_folder(f'{args.dataset}_{args.model}', model)

evaluator = Evaluator(name=f'ogbn-{args.dataset}')
logger = Logger(args.runs, args)

for run in range(args.runs):
    import gc
    gc.collect()
    print(sum(p.numel() for p in model.parameters()))
    model.reset_parameters()
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    best_valid = 0
    best_out = None
    for epoch in range(1, args.epochs):
        loss = train(model, x, y_true, train_idx, optimizer)
        result, out = test(model, x, y_true, split_idx, evaluator)
        train_acc, valid_acc, test_acc = result
        if valid_acc > best_valid:
Beispiel #25
0
                    help='origin feature dir')

parser.add_argument('--conv_name',
                    type=str,
                    default='rgsn',
                    help='rgcn or rgsn')
parser.add_argument('--Norm4', type=ast.literal_eval, default=True)  # 1+
parser.add_argument('--FDFT', type=ast.literal_eval, default=True)  # 2+
parser.add_argument('--use_attack', type=ast.literal_eval, default=False)  # 3+
args = parser.parse_args()
args_print(args)

dataset = PygNodePropPredDataset(name='ogbn-mag')
data = dataset[0]
split_idx = dataset.get_idx_split()
evaluator = Evaluator(name='ogbn-mag')
logger = Logger(args.runs, args)

# We do not consider those attributes for now.
data.node_year_dict = None
data.edge_reltype_dict = None

print(data)
edge_index_dict = data.edge_index_dict

# We need to add reverse edges to the heterogeneous graph.
r, c = edge_index_dict[('author', 'affiliated_with', 'institution')]
edge_index_dict[('institution', 'to', 'author')] = torch.stack([c, r])

r, c = edge_index_dict[('author', 'writes', 'paper')]
edge_index_dict[('paper', 'to', 'author')] = torch.stack([c, r])
Beispiel #26
0
def main():

    args = ArgsInit().save_exp()

    if args.use_gpu:
        device = torch.device("cuda:" +
                              str(args.device)) if torch.cuda.is_available(
                              ) else torch.device("cpu")
    else:
        device = torch.device('cpu')

    dataset = PygNodePropPredDataset(name=args.dataset)
    graph = dataset[0]

    adj = SparseTensor(row=graph.edge_index[0], col=graph.edge_index[1])

    if args.self_loop:
        adj = adj.set_diag()
        graph.edge_index = add_self_loops(edge_index=graph.edge_index,
                                          num_nodes=graph.num_nodes)[0]
    split_idx = dataset.get_idx_split()
    train_idx = split_idx["train"].tolist()

    evaluator = Evaluator(args.dataset)

    sub_dir = 'random-train_{}-full_batch_test'.format(args.cluster_number)
    logging.info(sub_dir)

    args.in_channels = graph.x.size(-1)
    args.num_tasks = dataset.num_classes

    logging.info('%s' % args)

    model = DeeperGCN(args).to(device)

    logging.info(model)

    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

    results = {
        'highest_valid': 0,
        'final_train': 0,
        'final_test': 0,
        'highest_train': 0
    }

    start_time = time.time()

    for epoch in range(1, args.epochs + 1):
        # generate batches
        parts = random_partition_graph(graph.num_nodes,
                                       cluster_number=args.cluster_number)
        data = generate_sub_graphs(adj,
                                   parts,
                                   cluster_number=args.cluster_number)

        epoch_loss = train(data, model, graph.x, graph.y, train_idx, optimizer,
                           device)
        logging.info('Epoch {}, training loss {:.4f}'.format(
            epoch, epoch_loss))
        model.print_params(epoch=epoch)

        if epoch == args.epochs:

            result = test(model, graph.x, graph.edge_index, graph.y, split_idx,
                          evaluator)
            logging.info(result)

            train_accuracy, valid_accuracy, test_accuracy = result

            if train_accuracy > results['highest_train']:
                results['highest_train'] = train_accuracy

            if valid_accuracy > results['highest_valid']:
                results['highest_valid'] = valid_accuracy
                results['final_train'] = train_accuracy
                results['final_test'] = test_accuracy

                save_ckpt(model,
                          optimizer,
                          round(epoch_loss, 4),
                          epoch,
                          args.model_save_path,
                          sub_dir,
                          name_post='valid_best')

    logging.info("%s" % results)

    end_time = time.time()
    total_time = end_time - start_time
    logging.info('Total time: {}'.format(
        time.strftime('%H:%M:%S', time.gmtime(total_time))))
Beispiel #27
0
def main():
    global device, in_feats, n_classes

    argparser = argparse.ArgumentParser("GCN on OGBN-Arxiv", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    argparser.add_argument("--cpu", action="store_true", help="CPU mode. This option overrides --gpu.")
    argparser.add_argument("--gpu", type=int, default=0, help="GPU device ID.")
    argparser.add_argument("--n-runs", type=int, default=10, help="running times")
    argparser.add_argument("--n-epochs", type=int, default=1000, help="number of epochs")
    argparser.add_argument(
        "--use-labels", action="store_true", help="Use labels in the training set as input features."
    )
    argparser.add_argument("--use-linear", action="store_true", help="Use linear layer.")
    argparser.add_argument("--lr", type=float, default=0.005, help="learning rate")
    argparser.add_argument("--n-layers", type=int, default=3, help="number of layers")
    argparser.add_argument("--n-hidden", type=int, default=256, help="number of hidden units")
    argparser.add_argument("--dropout", type=float, default=0.5, help="dropout rate")
    argparser.add_argument("--wd", type=float, default=0, help="weight decay")
    argparser.add_argument("--log-every", type=int, default=20, help="log every LOG_EVERY epochs")
    argparser.add_argument("--plot-curves", action="store_true", help="plot learning curves")
    args = argparser.parse_args()

    if args.cpu:
        device = th.device("cpu")
    else:
        device = th.device("cuda:%d" % args.gpu)

    # load data
    data = DglNodePropPredDataset(name="ogbn-arxiv")
    evaluator = Evaluator(name="ogbn-arxiv")

    splitted_idx = data.get_idx_split()
    train_idx, val_idx, test_idx = splitted_idx["train"], splitted_idx["valid"], splitted_idx["test"]
    graph, labels = data[0]

    # add reverse edges
    srcs, dsts = graph.all_edges()
    graph.add_edges(dsts, srcs)

    # add self-loop
    print(f"Total edges before adding self-loop {graph.number_of_edges()}")
    graph = graph.remove_self_loop().add_self_loop()
    print(f"Total edges after adding self-loop {graph.number_of_edges()}")

    in_feats = graph.ndata["feat"].shape[1]
    n_classes = (labels.max() + 1).item()
    graph.create_formats_()

    train_idx = train_idx.to(device)
    val_idx = val_idx.to(device)
    test_idx = test_idx.to(device)
    labels = labels.to(device)
    graph = graph.to(device)

    # run
    val_accs = []
    test_accs = []

    for i in range(args.n_runs):
        val_acc, test_acc = run(args, graph, labels, train_idx, val_idx, test_idx, evaluator, i)
        val_accs.append(val_acc)
        test_accs.append(test_acc)

    print(f"Runned {args.n_runs} times")
    print("Val Accs:", val_accs)
    print("Test Accs:", test_accs)
    print(f"Average val accuracy: {np.mean(val_accs)} ± {np.std(val_accs)}")
    print(f"Average test accuracy: {np.mean(test_accs)} ± {np.std(test_accs)}")
    print(f"Number of params: {count_parameters(args)}")
Beispiel #28
0
    })['acc']
    va_auc = evaluator.eval({
        'y_true': y[va_mask],
        'y_pred': p[va_mask]
    })['acc']
    te_auc = evaluator.eval({
        'y_true': y[te_mask],
        'y_pred': p[te_mask]
    })['acc']
    return tr_auc, va_auc, te_auc


# Load data
dataset_name = 'ogbn-arxiv'
dataset = NodePropPredDataset(dataset_name)
evaluator = Evaluator(dataset_name)
graph, y = dataset[0]
X, A, _ = ogb.graph_to_numpy(graph)
N = A.shape[0]

# Data splits
idxs = dataset.get_idx_split()
tr_idx, va_idx, te_idx = idxs["train"], idxs["valid"], idxs["test"]
tr_mask = np.zeros(N, dtype=bool)
tr_mask[tr_idx] = True
va_mask = np.zeros(N, dtype=bool)
va_mask[va_idx] = True
te_mask = np.zeros(N, dtype=bool)
te_mask[te_idx] = True
masks = [tr_mask, va_mask, te_mask]
Beispiel #29
0
def main():
    parser = argparse.ArgumentParser(description='OGBN-Proteins (Full-Batch)')
    parser.add_argument('--device', type=int, default=0)
    parser.add_argument('--log_steps', type=int, default=1)
    parser.add_argument('--use_sage', action='store_true')
    parser.add_argument('--num_layers', type=int, default=3)
    parser.add_argument('--hidden_channels', type=int, default=256)
    parser.add_argument('--dropout', type=float, default=0.0)
    parser.add_argument('--lr', type=float, default=0.01)
    parser.add_argument('--epochs', type=int, default=200)
    parser.add_argument('--eval_steps', type=int, default=5)
    parser.add_argument('--runs', type=int, default=10)
    args = parser.parse_args()
    print(args)

    device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu'
    device = torch.device(device)

    dataset = PygNodePropPredDataset(name='ogbn-proteins')
    splitted_idx = dataset.get_idx_split()
    data = dataset[0]

    x = data.x.to(torch.float).to(device)
    y_true = data.y.to(device)
    train_idx = splitted_idx['train'].to(device)

    edge_index = data.edge_index.to(device)
    adj = SparseTensor(row=edge_index[0], col=edge_index[1])

    if args.use_sage:
        model = SAGE(x.size(-1), args.hidden_channels, 47, args.num_layers,
                     args.dropout).to(device)
    else:
        model = GCN(x.size(-1), args.hidden_channels, 47, args.num_layers,
                    args.dropout).to(device)

        # Pre-compute GCN normalization.
        adj = adj.set_diag()
        deg = adj.sum(dim=1).to(torch.float)
        deg_inv_sqrt = deg.pow(-0.5)
        deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
        adj = deg_inv_sqrt.view(-1, 1) * adj * deg_inv_sqrt.view(1, -1)

    model = GCN(x.size(-1), args.hidden_channels, 112, args.num_layers,
                args.dropout).to(device)

    evaluator = Evaluator(name='ogbn-proteins')
    logger = Logger(args.runs, args)

    for run in range(args.runs):
        model.reset_parameters()
        optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
        for epoch in range(1, 1 + args.epochs):
            loss = train(model, x, adj, y_true, train_idx, optimizer)

            if epoch % args.eval_steps == 0:
                result = test(model, x, adj, y_true, splitted_idx, evaluator)
                logger.add_result(run, result)

                if epoch % args.log_steps == 0:
                    train_rocauc, valid_rocauc, test_rocauc = result
                    print(f'Run: {run + 1:02d}, '
                          f'Epoch: {epoch:02d}, '
                          f'Loss: {loss:.4f}, '
                          f'Train: {100 * train_rocauc:.2f}%, '
                          f'Valid: {100 * valid_rocauc:.2f}% '
                          f'Test: {100 * test_rocauc:.2f}%')

        logger.print_statistics(run)
    logger.print_statistics()
Beispiel #30
0
def main(args):

    device = torch.device("cuda:" + str(args.device))
    dataset = PygLinkPropPredDataset(name=args.dataset)
    data = dataset[0]

    # Data(edge_index=[2, 2358104], edge_weight=[2358104, 1], edge_year=[2358104, 1], x=[235868, 128])
    split_edge = dataset.get_edge_split()
    evaluator = Evaluator(args.dataset)

    x = data.x.to(device)

    edge_index = data.edge_index.to(device)

    args.in_channels = data.x.size(-1)
    args.num_tasks = 1

    model = DeeperGCN(args).to(device)
    predictor = LinkPredictor(args).to(device)

    logging.info(model)
    logging.info(predictor)

    optimizer = torch.optim.Adam(list(model.parameters()) +
                                 list(predictor.parameters()),
                                 lr=args.lr)

    results = {}
    keys = ['highest_valid', 'final_train', 'final_test', 'highest_train']
    hits = ['Hits@10', 'Hits@50', 'Hits@100']

    for key in keys:
        results[key] = {k: 0 for k in hits}

    start_time = time.time()
    for epoch in range(1, args.epochs + 1):

        epoch_loss = train.train(model, predictor, x, edge_index, split_edge,
                                 optimizer, args.batch_size)
        logging.info('Epoch {}, training loss {:.4f}'.format(
            epoch, epoch_loss))
        result = train.test(model, predictor, x, edge_index, split_edge,
                            evaluator, args.batch_size)

        for k in hits:
            # return a tuple
            train_result, valid_result, test_result = result[k]

            if train_result > results['highest_train'][k]:
                results['highest_train'][k] = train_result

            if valid_result > results['highest_valid'][k]:
                results['highest_valid'][k] = valid_result
                results['final_train'][k] = train_result
                results['final_test'][k] = test_result

                save_ckpt(model,
                          optimizer,
                          round(epoch_loss, 4),
                          epoch,
                          args.model_save_path,
                          k,
                          name_post='valid_best')
                save_ckpt(predictor,
                          optimizer,
                          round(epoch_loss, 4),
                          epoch,
                          args.model_save_path,
                          k,
                          name_post='valid_best_link_predictor')

        logging.info(result)

    logging.info("%s" % results)

    end_time = time.time()
    total_time = end_time - start_time
    time_used = 'Total time: {}'.format(
        time.strftime('%H:%M:%S', time.gmtime(total_time)))
    logging.info(time_used)