Beispiel #1
0
def run_with_dataset(directory: Union[str, 'pathlib.Path'],
                     dataset: str,
                     hidden: List[int] = [91],
                     dropout: float = 0.6449297033170698,
                     learning_rate: float = 0.011888866964052763,
                     weight_decay: float = 0.0005959130002875904,
                     epochs: int = 200,
                     verbose: bool = True) -> None:
    """Runs training with a given dataset

    Args:
        directory: Path to datasets 
        dataset: dataset to run on 
        hidden: Hidden Layer sizes
        dropout: Dropout Rate
        learning_rate: Learning Rate 
        weight_decay: Weight decay
        epochs: Number of epochs to train for
        verbose: If True, prints messages during training time. \
            Defaults to true
    """
    gcn = GCN(*load_data(directory, dataset))
    gcn.train(hidden=hidden,
              dropout=dropout,
              learning_rate=learning_rate,
              weight_decay=weight_decay,
              epochs=epochs,
              verbose=verbose)
    return gcn
Beispiel #2
0
def test(adj):
    ''' test on GCN '''

    adj = normalize_adj_tensor(adj)
    gcn = GCN(nfeat=features.shape[1],
              nhid=args.hidden,
              nclass=labels.max().item() + 1,
              dropout=0.5)

    if device != 'cpu':
        gcn = gcn.to(device)

    optimizer = optim.Adam(gcn.parameters(), lr=args.lr, weight_decay=5e-4)

    gcn.train()

    for epoch in range(args.epochs):
        optimizer.zero_grad()
        output = gcn(features, adj)
        loss_train = F.nll_loss(output[idx_train], labels[idx_train])
        acc_train = accuracy(output[idx_train], labels[idx_train])
        loss_train.backward()
        optimizer.step()

    gcn.eval()
    output = gcn(features, adj)

    loss_test = F.nll_loss(output[idx_test], labels[idx_test])
    acc_test = accuracy(output[idx_test], labels[idx_test])
    # print("Test set results:",
    #       "loss= {:.4f}".format(loss_test.item()),
    #       "accuracy= {:.4f}".format(acc_test.item()))

    return acc_test.item()
Beispiel #3
0
def run(rank, world_size, args):
    print('Running DDP on rank', rank, 'world size', world_size)

    setup(rank, world_size, args)
    dev_id = ragdoll.device_id()

    if len(args.input_graph) > 0 or len(args.cached_dir) > 0:
        data = SynDataset(rank == 0, args)
    else:
        data = Dataset(rank == 0, args)

    feat_size = args.feat_size

    features = torch.FloatTensor(data.features).cuda()
    labels = torch.LongTensor(data.labels).cuda()
    train_mask = torch.BoolTensor(data.train_mask).cuda()
    val_mask = torch.BoolTensor(data.val_mask).cuda()
    test_mask = torch.BoolTensor(data.test_mask).cuda()

    n_classes = args.n_classes
    n_nodes = data.n_nodes
    local_n_nodes = data.local_n_nodes

    model = GCN(data.graph, n_nodes, local_n_nodes, True, feat_size, args.n_hidden, n_classes,
                args.n_layers, F.relu, args.dropout, comm_net=args.comm_net)
    model.cuda()
    model = DDP(model, device_ids=[dev_id])
    loss_fcn = torch.nn.CrossEntropyLoss()
    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)
    optimizer.zero_grad()

    dur = []
    print("Start training... for {} epochs".format(args.n_epochs))
    for epoch in range(args.n_epochs):
        print('Epoch {} -------------'.format(epoch))
        model.train()
        torch.distributed.barrier()
        if epoch >= 3:
            t0 = time.time()

        logits = model(features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        torch.cuda.synchronize()
        t1 = time.time()
        optimizer.step()
        torch.cuda.synchronize()
        t2 = time.time()
        if epoch >= 3:
            dur.append(time.time() - t0)
        # acc, _, _ = evaluate(model, features, labels, val_mask)
        # print('acc is {}, loss is {}, this epoch using time {}, avg time {}.'.format(
        #    acc, loss.item(), dur[-1] if epoch >= 3 else 0, np.mean(dur)))
        print('Using time to synchronize model', t2 - t1)
        print('Peak memory is {} GB'.format(
            torch.cuda.max_memory_allocated(dev_id) / 1e9))
        print('this epoch uses time {} s, avg time {} s.'.format(
            dur[-1] if epoch >= 3 else 0, np.mean(dur)))

    ##acc, corr, total = evaluate(model, features, labels, test_mask)
    ##print('my corr is', corr, 'my total is', total)
    ##corr = torch.Tensor([corr]).cuda(dev_id)
    ##total = torch.Tensor([total]).cuda(dev_id)
    ##corrs, totals = [], []
    ##for i in range(world_size):
    ##    corrs.append(torch.Tensor([0]).cuda(dev_id))
    ##    totals.append(torch.Tensor([0]).cuda(dev_id))
    ##torch.distributed.all_gather(corrs, corr)
    ##torch.distributed.all_gather(totals, total)
    ##print('corrs is', corrs)
    ##print('totals is', totals)
    ##corr = torch.stack(corrs, dim=0).sum(dim=0).item() * 1.0
    ##total = torch.stack(totals, dim=0).sum(dim=0).item() * 1.0
    ##print('Test acc is', corr / total)

    cleanup()
Beispiel #4
0
    model = GCN(dataset.num_features, args.hidden,
                dataset.num_classes).to(device)
else:
    model = GraphSAGE(dataset.num_features, args.hidden,
                      dataset.num_classes).to(device)

optimizer = torch.optim.Adam(model.parameters(),
                             lr=args.lr,
                             weight_decay=args.weight_decay)

# define two list for plot
Accuracy_list = []
Loss_list = []

# train the model
model.train()
for epoch in range(args.epochs):
    optimizer.zero_grad()
    out = model(data.x, data.edge_index)
    _, pred = model(data.x, data.edge_index).max(dim=1)
    correct = int(pred[data.test_mask].eq(data.y[data.test_mask]).sum().item())
    acc = correct / int(data.test_mask.sum())
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])

    # statistics acc and loss of every iteration
    Accuracy_list.append(acc)
    Loss_list.append(loss)
    loss.backward()
    optimizer.step()

# evaluate the model
Beispiel #5
0
def main(args):
    # load and preprocess dataset
    data = load_data(args)
    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    train_mask = torch.ByteTensor(data.train_mask)
    val_mask = torch.ByteTensor(data.val_mask)
    test_mask = torch.ByteTensor(data.test_mask)
    in_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()
    n_nodes = data.graph.number_of_nodes()
    print("""----Data statistics------'
      #Nodes %d
      #Edges %d
      #Feature %d
      #Classes %d
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (n_nodes, n_edges, in_feats, n_classes,
              train_mask.sum().item(),
              val_mask.sum().item(),
              test_mask.sum().item()))

    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.cuda()
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()

    # graph preprocess and calculate normalization factor
    g = data.graph
    # add self loop
    if args.self_loop:
        g.remove_edges_from(g.selfloop_edges())
        g.add_edges_from(zip(g.nodes(), g.nodes()))
    g = DGLGraph(g)
    n_edges = g.number_of_edges()
    # normalization
    degs = g.in_degrees().float()
    norm = torch.pow(degs, -0.5)
    norm[torch.isinf(norm)] = 0
    if cuda:
        norm = norm.cuda()
    g.ndata['norm'] = norm.unsqueeze(1)

    # create GCN model
    model = GCN(g,
                in_feats,
                args.n_hidden,
                n_classes,
                args.n_layers,
                F.relu,
                args.dropout)

    if cuda:
        model.cuda()
    loss_fcn = torch.nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # initialize graph
    dur = []
    for epoch in range(args.n_epochs):
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        logits = model(features)
        if epoch >= 3:
            dur.append(time.time() - t0)

        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        acc = evaluate(model, features, labels, val_mask)
        print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
              "ETputs(KTEPS) {:.2f}". format(epoch, np.mean(dur), loss.item(),
                                             acc, n_edges / np.mean(dur) / 1000))

    print()
    acc = evaluate(model, features, labels, test_mask)
    print("Test Accuracy {:.4f}".format(acc))
Beispiel #6
0
def main(args):
    # convert boolean type for args
    assert args.self_loop in ['True', 'False'], [
        "Only True or False for self_loop, get ", args.self_loop
    ]
    assert args.use_layernorm in ['True', 'False'], [
        "Only True or False for use_layernorm, get ", args.use_layernorm
    ]
    self_loop = (args.self_loop == 'True')
    use_layernorm = (args.use_layernorm == 'True')
    global t0
    if args.dataset in {'cora', 'citeseer', 'pubmed'}:
        data = load_data(args)
    else:
        raise NotImplementedError(f'{args.dataset} is not a valid dataset')
    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    train_mask = torch.ByteTensor(data.train_mask)
    val_mask = torch.ByteTensor(data.val_mask)
    test_mask = torch.ByteTensor(data.test_mask)
    in_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()
    print("""----Data statistics------'
      #Edges %d
      #Classes %d
      #Train samples %d
      #Val samples %d
      #Test samples %d""" % (n_edges, n_classes, train_mask.sum().item(),
                             val_mask.sum().item(), test_mask.sum().item()))
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    features = features.to(device)
    labels = labels.to(device)
    train_mask = train_mask.to(device)
    val_mask = val_mask.to(device)
    test_mask = test_mask.to(device)

    # graph preprocess and calculate normalization factor
    g = data.graph
    # add self loop
    if self_loop:
        g.remove_edges_from(nx.selfloop_edges(g))
        g.add_edges_from(zip(g.nodes(), g.nodes()))
    g = DGLGraph(g)
    g = g.to(device)
    n_edges = g.number_of_edges()

    # normalization
    degs = g.in_degrees().float()
    norm = torch.pow(degs, -0.5)
    norm[torch.isinf(norm)] = 0
    norm = norm.to(device)
    g.ndata['norm'] = norm.unsqueeze(1)

    # create GCN model
    model = GCN(g, in_feats, args.n_hidden, n_classes, args.n_layers, F.relu,
                args.dropout, use_layernorm)
    model = model.to(device)
    loss_fcn = torch.nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # initialize graph
    record = []
    dur = []
    for epoch in range(args.n_epochs):
        if args.lr_scheduler:
            if epoch == int(0.5 * args.n_epochs):
                for pg in optimizer.param_groups:
                    pg['lr'] = pg['lr'] / 10
            elif epoch == int(0.75 * args.n_epochs):
                for pg in optimizer.param_groups:
                    pg['lr'] = pg['lr'] / 10
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        optimizer.zero_grad()
        logits = model(features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])
        loss.backward()
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)

        acc_val = evaluate(model, features, labels, val_mask)
        acc_test = evaluate(model, features, labels, test_mask)
        record.append([acc_val, acc_test])

    all_test_acc = [v[1] for v in record]
    all_val_acc = [v[0] for v in record]
    acc = evaluate(model, features, labels, test_mask)
    print(f"Final Test Accuracy: {acc:.4f}")
    print(f"Best Val Accuracy: {max(all_val_acc):.4f}")
    print(f"Best Test Accuracy: {max(all_test_acc):.4f}")
def main(args):
    # load and preprocess dataset
    # data = load_data(args)
    g, graph_labels = load_graphs(
        '/yushi/dataset/Amazon2M/Amazon2M_dglgraph.bin')
    assert len(g) == 1
    g = g[0]
    data = g.ndata
    features = torch.FloatTensor(data['feat'])
    labels = torch.LongTensor(data['label'])
    if hasattr(torch, 'BoolTensor'):
        train_mask = data['train_mask'].bool()
        val_mask = data['val_mask'].bool()
        test_mask = data['test_mask'].bool()
    # else:
    #     train_mask = torch.ByteTensor(data.train_mask)
    #     val_mask = torch.ByteTensor(data.val_mask)
    #     test_mask = torch.ByteTensor(data.test_mask)
    in_feats = features.shape[1]
    n_classes = 47
    n_edges = g.number_of_edges()
    print("""----Data statistics------'
      #Edges %d
      #Classes %d
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (n_edges, n_classes, train_mask.int().sum().item(),
           val_mask.int().sum().item(), test_mask.int().sum().item()))

    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.cuda()
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()

    # graph preprocess and calculate normalization factor
    # g = data.graph
    # add self loop
    # if args.self_loop:
    #     g.remove_edges_from(nx.selfloop_edges(g))
    #     g.add_edges_from(zip(g.nodes(), g.nodes()))
    # g = DGLGraph(g)
    n_edges = g.number_of_edges()
    # normalization
    degs = g.in_degrees().float()
    norm = torch.pow(degs, -0.5)
    norm[torch.isinf(norm)] = 0
    if cuda:
        norm = norm.cuda()
    g.ndata['norm'] = norm.unsqueeze(1)

    # create GCN model
    model = GCN(g, in_feats, args.n_hidden, n_classes, args.n_layers, F.relu,
                args.dropout)

    if cuda:
        model.cuda()
    print(model)
    loss_fcn = torch.nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # initialize graph
    dur = []
    start = time.time()
    for epoch in range(args.n_epochs):
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        logits = model(features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)

        acc = evaluate(model, features, labels, val_mask)
        print(
            "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
            "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(),
                                          acc, n_edges / np.mean(dur) / 1000))

    print()
    acc = evaluate(model, features, labels, val_mask)  # no test_mask
    print("Test accuracy {:.2%}".format(acc))
    print(
        f'Training Time Consuming: {np.sum(dur)}, all time cost: {time.time() - start}'
    )
Beispiel #8
0
def main(args):
    # load and preprocess dataset
    if args.dataset == 'cora':
        data = CoraGraphDataset()
    elif args.dataset == 'citeseer':
        data = CiteseerGraphDataset()
    elif args.dataset == 'pubmed':
        data = PubmedGraphDataset()
    else:
        raise ValueError('Unknown dataset: {}'.format(args.dataset))

    g = data[0]
    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        g = g.int().to(args.gpu)

    features = g.ndata['feat']
    labels = g.ndata['label']
    train_mask = g.ndata['train_mask']
    val_mask = g.ndata['val_mask']
    test_mask = g.ndata['test_mask']
    in_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()
    print("""----Data statistics------'
      #Edges %d
      #Classes %d
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (n_edges, n_classes,
              train_mask.int().sum().item(),
              val_mask.int().sum().item(),
              test_mask.int().sum().item()))

    # add self loop
    if args.self_loop:
        g = dgl.remove_self_loop(g)
        g = dgl.add_self_loop(g)
    n_edges = g.number_of_edges()

    # normalization
    degs = g.in_degrees().float()
    norm = torch.pow(degs, -0.5)
    norm[torch.isinf(norm)] = 0
    if cuda:
        norm = norm.cuda()
    g.ndata['norm'] = norm.unsqueeze(1)

    # create GCN model
    model = GCN(g,
                in_feats,
                args.n_hidden,
                n_classes,
                args.n_layers,
                F.relu,
                args.dropout)

    if cuda:
        model.cuda()
    loss_fcn = torch.nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # initialize graph
    dur = []
    for epoch in range(args.n_epochs):
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        logits = model(features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)

        acc = evaluate(model, features, labels, val_mask)
        print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
              "ETputs(KTEPS) {:.2f}". format(epoch, np.mean(dur), loss.item(),
                                             acc, n_edges / np.mean(dur) / 1000))

    print()
    acc = evaluate(model, features, labels, test_mask)
    print("Test accuracy {:.2%}".format(acc))
Beispiel #9
0
def main(args):
    path = os.path.join(args.dataDir, args.dataset + ".npz")
    data = custom_dataset(path, args.dim, args.classes, load_from_txt=False)
    g = data.g

    if args.gpu < 0:
        cuda = False
    else:
        cuda = True

    g = g.int().to(args.gpu)

    features = data.x
    labels = data.y
    in_feats = features.size(1)
    n_classes = data.num_classes

    # normalization
    degs = g.in_degrees().float()
    norm = torch.pow(degs, -0.5)
    norm = norm.cuda()
    g.ndata['norm'] = norm.unsqueeze(1)

    if args.model == 'gcn':
        model = GCN(g,
                    in_feats=in_feats,
                    n_hidden=args.hidden,
                    n_classes=n_classes,
                    n_layers=2)
    else:
        model = GIN(g,
                    input_dim=in_feats,
                    hidden_dim=64,
                    output_dim=n_classes,
                    num_layers=5)

    if cuda: model.cuda()

    loss_fcn = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=1e-2,
                                 weight_decay=5e-4)

    torch.cuda.synchronize()
    start = time.perf_counter()
    for _ in tqdm(range(args.n_epochs)):
        model.train()

        logits = model(features)
        loss = loss_fcn(logits[:], labels[:])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    torch.cuda.synchronize()
    dur = time.perf_counter() - start

    if args.model == 'gcn':
        print("DGL GCN (L2-H16) Time: (ms) {:.3f}".format(dur * 1e3 /
                                                          args.n_epochs))
    else:
        print("DGL GIN (L5-H64) Time: (ms) {:.3f}".format(dur * 1e3 /
                                                          args.n_epochs))
    print()
Beispiel #10
0
    return batch, labels, lmasks


inds = trainable_inds(dset.data)
eval_inds = trainable_inds(valset.data)

print('Pre-evaluate:')
evf = lambda: evaluate_v2(
    eval_inds, valset, gcn, lambda preds, label, mask: loss_fcn(
        preds[mask], label[mask]), format_batch)
best_eval = evf()

print('Train:')
eval_mse = [best_eval]
for epoch in range(args.epochs):
    gcn.train()
    shuffle(inds)
    for ii, (di, hi) in enumerate(inds):
        # forward
        batch, labels, lmasks = format_batch([(di, hi)], dset.data)
        preds = gcn(batch)

        loss = loss_fcn(preds[lmasks], labels[lmasks])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        sys.stdout.write('[%d/%d]: %d/%d  \r' %
                         (epoch + 1, args.epochs, ii, len(inds)))
    sys.stdout.write('\n')
Beispiel #11
0
def main(args):
    data = load_data(args)
    features = torch.FloatTensor(data.features)
    labels = torch.FloatTensor(data.labels)
    train_mask = torch.BoolTensor(data.train_mask)
    val_mask = torch.BoolTensor(data.val_mask)
    test_mask = torch.BoolTensor(data.test_mask)

    g = data.graph
    n_feats = features.shape[1]
    n_labels = data.num_labels
    n_edges = g.number_of_edges()
    print("""----Data statistics------'
        #Features %d    
        #Edges %d
        #Labels %d
        #Train samples %d
        #Val samples %d
        #Test samples %d""" %
          (n_feats, n_edges, n_labels, train_mask.int().sum().item(),
           val_mask.int().sum().item(), test_mask.int().sum().item()))

    dataset_train = CampusDataset(features, labels)
    dict_users = iid_users(dataset_train, args.n_users)

    if args.gnnbase == 'gcn':
        g = DGLGraph(g)
        n_edges = g.number_of_edges()
        degs = g.in_degrees().float()
        norm = torch.pow(degs, -0.5)
        norm[torch.isinf(norm)] = 0
        g.ndata['norm'] = norm.unsqueeze(1)
        model = GCN(g, n_feats, args.n_hidden, n_labels, args.n_layers, F.relu,
                    args.dropout)

    if args.gnnbase == 'gat':
        g.remove_edges_from(nx.selfloop_edges(g))
        g = DGLGraph(g)
        g.add_edges(g.nodes(), g.nodes())
        n_edges = g.number_of_edges()
        heads = ([args.n_heads] * args.n_layers) + [args.n_out_heads]
        model = GAT(g, args.n_layers, n_feats, args.n_hidden, n_labels, heads,
                    F.elu, args.in_drop, args.attn_drop, args.negative_slope,
                    args.residual)

    if args.gnnbase == 'sage':
        g.remove_edges_from(nx.selfloop_edges(g))
        g = DGLGraph(g)
        n_edges = g.number_of_edges()
        model = GraphSAGE(g, n_feats, args.n_hidden, n_labels, args.n_layers,
                          F.relu, args.dropout, args.aggregator_type)

    print(model)
    model.train()

    w_glob = model.state_dict()
    loss_train = []
    timecost = []

    for epoch in range(args.n_epochs):
        time_begin = time.time()

        w_locals, loss_locals = [], []
        m = max(int(args.frac * args.n_users), 1)
        idxs_users = np.random.choice(range(args.n_users), m, replace=False)
        for idx in idxs_users:
            local = LocalUpdate(args=args,
                                dataset=dataset_train,
                                idxs=dict_users[idx],
                                mask=train_mask)
            w, loss = local.train(model=copy.deepcopy(model))
            w_locals.append(copy.deepcopy(w))
            loss_locals.append(copy.deepcopy(loss))
        w_glob = FedAvg(w_locals)

        model.load_state_dict(w_glob)

        time_end = time.time()
        timecost.append(time_end - time_begin)

        loss_avg = sum(loss_locals) / len(loss_locals)
        print('Epoch {:3d}, Average loss {:.3f}'.format(epoch, loss_avg))
        loss_train.append(loss_avg)

        train_errX, train_errY = eval_error(model, features, labels,
                                            train_mask)
        val_errX, val_errY = eval_error(model, features, labels, val_mask)
        test_errX, test_errY = eval_error(model, features, labels, test_mask)
        print(
            "Epoch {:3d} | TrainRMSEX {:.4f} | TrainRMSEY {:.4f} | ValRMSEX {:.4f} | ValRMSEY {:.4f} | TestRMSEX {:.4f} | TestRMSEY {:.4f}"
            .format(epoch, train_errX, train_errY, val_errX, val_errY,
                    test_errX, test_errY))

    print("Time cost {:.4f}".format(sum(timecost) / args.n_epochs))

    base_errX, base_errY = calc_error(features[test_mask, :2],
                                      labels[test_mask])
    print("TestRMSEX-Base {:.4f} | TestRMSEY-Base {:.4f}".format(
        base_errX, base_errY))
Beispiel #12
0
def main(training_file,
         dev_file,
         test_file,
         epochs=None,
         patience=None,
         num_heads=None,
         num_out_heads=None,
         num_layers=None,
         num_hidden=None,
         residual=None,
         in_drop=None,
         attn_drop=None,
         lr=None,
         weight_decay=None,
         alpha=None,
         batch_size=None,
         graph_type=None,
         net=None,
         freeze=None,
         cuda=None,
         fw=None):

    # number of training epochs
    if epochs is None:
        epochs = 400
    print('EPOCHS', epochs)
    # used for early stop
    if patience is None:
        patience = 15
    print('PATIENCE', patience)

    # number of hidden attention heads
    if num_heads is None:
        num_heads_ch = [4, 5, 6, 7]
    else:
        num_heads_ch = flattenList(num_heads)
    print('NUM HEADS', num_heads_ch)

    # number of output attention heads
    if num_out_heads is None:
        num_out_heads_ch = [4, 5, 6, 7]
    else:
        num_out_heads_ch = flattenList(num_out_heads)
    print('NUM OUT HEADS', num_out_heads_ch)

    # number of hidden layers
    if num_layers is None:
        num_layers_ch = [2, 3, 4, 5, 6]
    else:
        num_layers_ch = flattenList(num_layers)
    print('NUM LAYERS', num_layers_ch)
    # number of hidden units
    if num_hidden is None:
        num_hidden_ch = [32, 64, 96, 128, 256, 350, 512]
    else:
        num_hidden_ch = flattenList(num_hidden)
    print('NUM HIDDEN', num_hidden_ch)
    # use residual connection
    if residual is None:
        residual_ch = [True, False]
    else:
        residual_ch = flattenList(residual)
    print('RESIDUAL', residual_ch)
    # input feature dropout
    if in_drop is None:
        in_drop_ch = [0., 0.001, 0.0001, 0.00001]
    else:
        in_drop_ch = flattenList(in_drop)
    print('IN DROP', in_drop_ch)
    # attention dropout
    if attn_drop is None:
        attn_drop_ch = [0., 0.001, 0.0001, 0.00001]
    else:
        attn_drop_ch = flattenList(attn_drop)
    print('ATTENTION DROP', attn_drop_ch)
    # learning rate
    if lr is None:
        lr_ch = [0.0000005, 0.0000015, 0.00001, 0.00005, 0.0001]
    else:
        lr_ch = flattenList(lr)
    print('LEARNING RATE', lr_ch)
    # weight decay
    if weight_decay is None:
        weight_decay_ch = [0.0001, 0.001, 0.005]
    else:
        weight_decay_ch = flattenList(weight_decay)
    print('WEIGHT DECAY', weight_decay_ch)
    # the negative slop of leaky relu
    if alpha is None:
        alpha_ch = [0.1, 0.15, 0.2]
    else:
        alpha_ch = flattenList(alpha)
    print('ALPHA', alpha_ch)
    # batch size used for training, validation and test
    if batch_size is None:
        batch_size_ch = [175, 256, 350, 450, 512, 800, 1600]
    else:
        batch_size_ch = flattenList(batch_size)
    print('BATCH SIZE', batch_size_ch)
    # net type
    if net is None:
        net_ch = [GCN, GAT, RGCN, PGCN, PRGCN, GGN, PGAT]
    else:
        net_ch_raw = flattenList(net)
        net_ch = []
        for ch in net_ch_raw:
            if ch.lower() == 'gcn':
                if fw == 'dgl':
                    net_ch.append(GCN)
                else:
                    net_ch.append(PGCN)
            elif ch.lower() == 'gat':
                if fw == 'dgl':
                    net_ch.append(GAT)
                else:
                    net_ch.append(PGAT)
            elif ch.lower() == 'rgcn':
                if fw == 'dgl':
                    net_ch.append(RGCN)
                else:
                    net_ch.append(PRGCN)
            elif ch.lower() == 'ggn':
                net_ch.append(GGN)
            elif ch.lower() == 'rgat':
                net_ch.append(PRGAT)
            else:
                print('Network type {} is not recognised.'.format(ch))
                sys.exit(1)
    print('NET TYPE', net_ch)
    # graph type
    if net_ch in [GCN, GAT, PGCN, GGN, PGAT]:
        if graph_type is None:
            graph_type_ch = ['raw', '1', '2', '3', '4', 'relational']
        else:
            graph_type_ch = flattenList(graph_type)
    else:
        if graph_type is None:
            graph_type_ch = ['relational']
        else:
            graph_type_ch = flattenList(graph_type)

    print('GRAPH TYPE', graph_type_ch)
    # Freeze input neurons?
    if freeze is None:
        freeze_ch = [True, False]
    else:
        freeze_ch = flattenList(freeze)
    print('FREEZE', freeze_ch)
    # CUDA?
    if cuda is None:
        device = torch.device("cpu")
    elif cuda:
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")
    print('DEVICE', device)
    if fw is None:
        fw = ['dgl', 'pg']

    # define loss function
    # loss_fcn = torch.nn.BCEWithLogitsLoss()
    loss_fcn = torch.nn.MSELoss()

    for trial in range(10):
        trial_s = str(trial).zfill(6)
        num_heads = random.choice(num_heads_ch)
        num_out_heads = random.choice(num_out_heads_ch)
        num_layers = random.choice(num_layers_ch)
        num_hidden = random.choice(num_hidden_ch)
        residual = random.choice(residual_ch)
        in_drop = random.choice(in_drop_ch)
        attn_drop = random.choice(attn_drop_ch)
        lr = random.choice(lr_ch)
        weight_decay = random.choice(weight_decay_ch)
        alpha = random.choice(alpha_ch)
        batch_size = random.choice(batch_size_ch)
        graph_type = random.choice(graph_type_ch)
        net_class = random.choice(net_ch)
        freeze = random.choice(freeze_ch)
        fw = random.choice(fw)
        if freeze == False:
            freeze = 0
        else:
            if graph_type == 'raw' or graph_type == '1' or graph_type == '2':
                freeze = 4
            elif graph_type == '3' or graph_type == '4':
                freeze = 6
            elif graph_type == 'relational':
                freeze = 5
            else:
                sys.exit(1)

        print('=========================')
        print('TRIAL', trial_s)
        print('HEADS', num_heads)
        print('OUT_HEADS', num_out_heads)
        print('LAYERS', num_layers)
        print('HIDDEN', num_hidden)
        print('RESIDUAL', residual)
        print('inDROP', in_drop)
        print('atDROP', attn_drop)
        print('LR', lr)
        print('DECAY', weight_decay)
        print('ALPHA', alpha)
        print('BATCH', batch_size)
        print('GRAPH_ALT', graph_type)
        print('ARCHITECTURE', net_class)
        print('FREEZE', freeze)
        print('FRAMEWORK', fw)
        print('=========================')

        # create the dataset
        print('Loading training set...')
        train_dataset = SocNavDataset(training_file,
                                      mode='train',
                                      alt=graph_type)
        print('Loading dev set...')
        valid_dataset = SocNavDataset(dev_file, mode='valid', alt=graph_type)
        print('Loading test set...')
        test_dataset = SocNavDataset(test_file, mode='test', alt=graph_type)
        print('Done loading files')
        train_dataloader = DataLoader(train_dataset,
                                      batch_size=batch_size,
                                      collate_fn=collate)
        valid_dataloader = DataLoader(valid_dataset,
                                      batch_size=batch_size,
                                      collate_fn=collate)
        test_dataloader = DataLoader(test_dataset,
                                     batch_size=batch_size,
                                     collate_fn=collate)

        num_rels = train_dataset.data[0].num_rels
        cur_step = 0
        best_loss = -1
        n_classes = train_dataset.labels.shape[1]
        print('Number of classes:  {}'.format(n_classes))
        num_feats = train_dataset.features.shape[1]
        print('Number of features: {}'.format(num_feats))
        g = train_dataset.graph
        heads = ([num_heads] * num_layers) + [num_out_heads]
        # define the model

        if fw == 'dgl':
            if net_class in [GCN]:
                model = GCN(g, num_feats, num_hidden, n_classes, num_layers,
                            F.elu, in_drop)
            elif net_class in [GAT]:
                model = net_class(g,
                                  num_layers,
                                  num_feats,
                                  num_hidden,
                                  n_classes,
                                  heads,
                                  F.elu,
                                  in_drop,
                                  attn_drop,
                                  alpha,
                                  residual,
                                  freeze=freeze)
            else:
                # def __init__(self, g, in_dim, h_dim, out_dim, num_rels, num_hidden_layers=1):
                model = RGCN(g,
                             in_dim=num_feats,
                             h_dim=num_hidden,
                             out_dim=n_classes,
                             num_rels=num_rels,
                             feat_drop=in_drop,
                             num_hidden_layers=num_layers,
                             freeze=freeze)
        else:

            if net_class in [PGCN]:
                model = PGCN(
                    num_feats,
                    n_classes,
                    num_hidden,
                    num_layers,
                    in_drop,
                    F.relu,
                    improved=True,  #Compute A-hat as A + 2I
                    bias=True)

            elif net_class in [PRGCN]:
                model = PRGCN(
                    num_feats,
                    n_classes,
                    num_rels,
                    num_rels,  #num_rels?   # TODO: Add variable
                    num_hidden,
                    num_layers,
                    in_drop,
                    F.relu,
                    bias=True)
            elif net_class in [PGAT]:
                model = PGAT(num_feats,
                             n_classes,
                             num_heads,
                             in_drop,
                             num_hidden,
                             num_layers,
                             F.relu,
                             concat=True,
                             neg_slope=alpha,
                             bias=True)
            elif net_class in [PRGAT]:
                model = PRGAT(
                    num_feats,
                    n_classes,
                    num_heads,
                    num_rels,
                    num_rels,  #num_rels?   # TODO: Add variable
                    num_hidden,
                    num_layers,
                    num_layers,
                    in_drop,
                    F.relu,
                    alpha,
                    bias=True)
            else:
                model = GGN(num_feats, num_layers, aggr='mean', bias=True)
        #Describe the model
        #describe_model(model)

        # define the optimizer
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=lr,
                                     weight_decay=weight_decay)
        # for name, param in model.named_parameters():
        # if param.requires_grad:
        # print(name, param.data.shape)
        model = model.to(device)

        for epoch in range(epochs):
            model.train()
            loss_list = []
            for batch, data in enumerate(train_dataloader):
                subgraph, feats, labels = data
                subgraph.set_n_initializer(dgl.init.zero_initializer)
                subgraph.set_e_initializer(dgl.init.zero_initializer)
                feats = feats.to(device)
                labels = labels.to(device)
                if fw == 'dgl':
                    model.g = subgraph
                    for layer in model.layers:
                        layer.g = subgraph
                    logits = model(feats.float())
                else:
                    if net_class in [PGCN, PGAT, GGN]:
                        data = Data(x=feats.float(),
                                    edge_index=torch.stack(
                                        subgraph.edges()).to(device))
                    else:
                        data = Data(
                            x=feats.float(),
                            edge_index=torch.stack(
                                subgraph.edges()).to(device),
                            edge_type=subgraph.edata['rel_type'].squeeze().to(
                                device))
                    logits = model(data)
                loss = loss_fcn(logits[getMaskForBatch(subgraph)],
                                labels.float())
                optimizer.zero_grad()
                a = list(model.parameters())[0].clone()
                loss.backward()
                optimizer.step()
                b = list(model.parameters())[0].clone()
                not_learning = torch.equal(a.data, b.data)
                if not_learning:
                    import sys
                    print('Not learning')
                    # sys.exit(1)
                else:
                    pass
                    # print('Diff: ', (a.data-b.data).sum())
                # print(loss.item())
                loss_list.append(loss.item())
            loss_data = np.array(loss_list).mean()
            print('Loss: {}'.format(loss_data))
            if epoch % 5 == 0:
                if epoch % 5 == 0:
                    print(
                        "Epoch {:05d} | Loss: {:.4f} | Patience: {} | ".format(
                            epoch, loss_data, cur_step),
                        end='')
                score_list = []
                val_loss_list = []
                for batch, valid_data in enumerate(valid_dataloader):
                    subgraph, feats, labels = valid_data
                    subgraph.set_n_initializer(dgl.init.zero_initializer)
                    subgraph.set_e_initializer(dgl.init.zero_initializer)
                    feats = feats.to(device)
                    labels = labels.to(device)
                    score, val_loss = evaluate(feats.float(), model, subgraph,
                                               labels.float(), loss_fcn, fw,
                                               net_class)
                    score_list.append(score)
                    val_loss_list.append(val_loss)
                mean_score = np.array(score_list).mean()
                mean_val_loss = np.array(val_loss_list).mean()
                if epoch % 5 == 0:
                    print("Score: {:.4f} MEAN: {:.4f} BEST: {:.4f}".format(
                        mean_score, mean_val_loss, best_loss))
                # early stop
                if best_loss > mean_val_loss or best_loss < 0:
                    best_loss = mean_val_loss
                    # Save the model
                    # print('Writing to', trial_s)
                    torch.save(
                        model.state_dict(), fw + str(net) + '.tch'
                    )  #   3       4           5          6          7         8      9      10       11         12      13       14        15
                    params = [
                        val_loss, graph_type,
                        str(type(net_class)), g, num_layers, num_feats,
                        num_hidden, n_classes, heads, F.elu, in_drop,
                        attn_drop, alpha, residual, num_rels, freeze
                    ]
                    pickle.dump(params, open(fw + str(net) + '.prms', 'wb'))
                    cur_step = 0
                else:
                    cur_step += 1
                    if cur_step >= patience:
                        break
        torch.save(model, 'gattrial.pth')
        test_score_list = []
        for batch, test_data in enumerate(test_dataloader):
            subgraph, feats, labels = test_data
            subgraph.set_n_initializer(dgl.init.zero_initializer)
            subgraph.set_e_initializer(dgl.init.zero_initializer)
            feats = feats.to(device)
            labels = labels.to(device)
            test_score_list.append(
                evaluate(feats, model, subgraph, labels.float(), loss_fcn, fw,
                         net_class)[0])
        print("F1-Score: {:.4f}".format(np.array(test_score_list).mean()))
        model.eval()
        return best_loss
Beispiel #13
0
def main(args):
    # load and preprocess dataset
    data = load_data(args)
    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    if hasattr(torch, "BoolTensor"):
        train_mask = torch.BoolTensor(data.train_mask)
        val_mask = torch.BoolTensor(data.val_mask)
        test_mask = torch.BoolTensor(data.test_mask)
    else:
        train_mask = torch.ByteTensor(data.train_mask)
        val_mask = torch.ByteTensor(data.val_mask)
        test_mask = torch.ByteTensor(data.test_mask)
    in_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()
    print("""----Data statistics------'
      #Edges %d
      #Classes %d
      #Train samples %d
      #Val samples %d
      #Test samples %d""" % (
        n_edges,
        n_classes,
        train_mask.int().sum().item(),
        val_mask.int().sum().item(),
        test_mask.int().sum().item(),
    ))

    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.cuda()
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()

    # graph preprocess and calculate normalization factor
    g = data.graph
    # add self loop
    if args.self_loop:
        g.remove_edges_from(nx.selfloop_edges(g))
        g.add_edges_from(zip(g.nodes(), g.nodes()))
    g = DGLGraph(g)
    n_edges = g.number_of_edges()
    # normalization
    degs = g.in_degrees().float()
    norm = torch.pow(degs, -0.5)
    norm[torch.isinf(norm)] = 0
    if cuda:
        norm = norm.cuda()
    g.ndata["norm"] = norm.unsqueeze(1)

    # create GCN model
    model = GCN(
        g,
        in_feats,
        args.n_hidden,
        n_classes,
        args.n_layers,
        F.relu,
        args.dropout,
    )

    if cuda:
        model.cuda()
    loss_fcn = torch.nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # initialize graph
    dur = []
    for epoch in range(args.n_epochs):
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        logits = model(features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)

        accuracy, precision, recall, fscore, _ = evaluate(
            model, features, labels, val_mask)
        print("Epoch:", epoch)
        print("Loss:", loss.item())
        print("Accuracy:", accuracy)
        print("Precision:", precision)
        print("Recall:", recall)
        print("F-Score:", fscore)
        print()
        print("=" * 80)
        print()

    accuracy, precision, recall, fscore, class_based_report = evaluate(
        model, features, labels, test_mask)
    print("=" * 80)
    print(" " * 28 + "Final Statistics")
    print("=" * 80)
    print("Accuracy", accuracy)
    print("Precision", precision)
    print("Recall", recall)
    print("F-Score", fscore)
    print(class_based_report)
Beispiel #14
0
def main(args):

    train_mask = input_train_mask
    test_mask = input_test_mask
    in_feats = args.n_input_features
    n_classes = args.n_classes
    g = DGLGraph(data_adj)
    g.add_edges(g.nodes(), g.nodes())

    n_edges = g.number_of_edges()
    # normalization
    degs = g.in_degrees().float()
    norm = torch.pow(degs, -0.5)
    norm[torch.isinf(norm)] = 0

    g.ndata['norm'] = norm.unsqueeze(1)

    # create GCN model
    model_adv = GCN(g, in_feats, args.n_hidden, n_classes, args.n_layers,
                    F.leaky_relu, args.dropout)

    model_non_adv = GCN(g, in_feats, args.n_hidden, n_classes, args.n_layers,
                        F.leaky_relu, args.dropout)

    loss_fcn = torch.nn.MSELoss()

    # use optimizer
    optimizer = torch.optim.Adam(model_adv.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)
    print(optimizer.state_dict()['param_groups'][0]['lr'])

    # initialize graph
    dur = []
    for epoch in range(args.n_epochs):

        # print("learning_rate", scheduler.get_lr())
        # adjust_learning_rate(optimizer=optimizer, epoch=epoch)

        shufle_index = np.arange(int(train_portion))
        np.random.shuffle(shufle_index)
        for t in range(train_portion):
            features, labels = load_data(shufle_index[t])
            # features.requires_grad = True

            model_non_adv.train()
            model_adv.train()
            # if epoch >= 3:
            t0 = time.time()
            # adv_features = distr_attack(model=model, loss_fcn=loss_fcn, feature_train=features,
            #                             label_train=labels, gamma=0.000001, T_adv=20)

            # adv_features = ifgsm_attack(model=model, loss_fcn=loss_fcn, optimizer=optimizer, feature=features,
            #                             label=labels, T_adv=20, epsilon=1, lr=0.1)

            adv_features = fgsm_attack_generateor(model=model_adv,
                                                  loss_fcn=loss_fcn,
                                                  feature=features,
                                                  label=labels,
                                                  epsilon_fgsm=0,
                                                  mask=train_mask)

            # forward
            logits = model_adv(adv_features)
            # TO DO: change this
            loss = loss_fcn(logits[train_mask], labels[train_mask])

            optimizer.zero_grad()

            adjust_learning_rate(optimizer, epoch)

            loss.backward()

            optimizer.step()

            if epoch >= 3:
                dur.append(time.time() - t0)

            acc = evaluate(model_adv, features, labels, train_mask)

            learning_rate = optimizer.state_dict()['param_groups'][0]['lr']
            print(
                "Epoch {:05d} | learning_rate {:.4f} | Iter {:05d}|  Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} |"
                "ETputs(KTEPS) {:.2f}".format(epoch, learning_rate, t,
                                              np.mean(dur), loss.item(), acc,
                                              n_edges / np.mean(dur) / 1000))

    history_acc_test = {}
    history_acc_test["nominal"] = []
    # history_acc_test["distr"] = []
    history_acc_test["fgsm"] = []
    history_acc_test["ifgsm"] = []

    # eps = [0, 0.001, 0.002, 0.003, 0.005, 0.008, 0.01, 0.02, 0.04, 0.06, 0.08]
    eps = [0, .001, .005, .009, .02, 0.05, 0.08]
    for _, i_eps in enumerate(eps):
        # history_acc_test["distr"].append(test_distr(model_adv, loss_fcn, gamma=0.001, T_adv=20, test_mask=test_mask))
        # Increasing T_adv does not affect very much, smaller gamma and smaller T_adv the better
        out = test_fgsm(model_adv, epsilon=i_eps, test_mask=test_mask)
        history_acc_test["nominal"].append(out[0])
        history_acc_test["fgsm"].append(out[1])
        history_acc_test["ifgsm"].append(
            test_ifgsm(model_adv,
                       loss_fcn,
                       optimizer,
                       epsilon=i_eps,
                       T_adv=20,
                       lr=0.1,
                       test_mask=test_mask))

    plot_graphs(data=history_acc_test)
Beispiel #15
0
def main(args):
    # load and preprocess dataset
    data = load_data(args)

    #
    structure_features = np.load('../../pretrained/' + args.dataset +
                                 '_structure_8d.npy')
    attr_features = np.load('../../pretrained/' + args.dataset +
                            '_attr_8d.npy')

    structure_features = preprocessing.scale(structure_features,
                                             axis=1,
                                             with_mean=True,
                                             with_std=True,
                                             copy=True)
    #structure_features = preprocessing.scale(structure_features, axis=0, with_mean=True,with_std=True,copy=True)
    structure_features = torch.FloatTensor(structure_features).cuda()

    attr_features = preprocessing.scale(attr_features,
                                        axis=1,
                                        with_mean=True,
                                        with_std=True,
                                        copy=True)
    #attr_features = preprocessing.scale(attr_features, axis=0, with_mean=True,with_std=True,copy=True)
    attr_features = torch.FloatTensor(attr_features).cuda()

    in_feats2 = structure_features.shape[1]
    in_feats3 = attr_features.shape[1]
    print(structure_features.shape, attr_features.shape)

    #data.features = preprocessing.scale(data.features, axis=1, with_mean=True,with_std=True,copy=True)
    #data.features = preprocessing.scale(data.features, axis=0, with_mean=True,with_std=True,copy=True)
    #
    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    train_mask = torch.ByteTensor(data.train_mask)
    val_mask = torch.ByteTensor(data.val_mask)
    test_mask = torch.ByteTensor(data.test_mask)
    in_feats1 = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()
    print("""----Data statistics------'
      #Edges %d
      #Classes %d
      #Train samples %d
      #Val samples %d
      #Test samples %d""" % (n_edges, n_classes, train_mask.sum().item(),
                             val_mask.sum().item(), test_mask.sum().item()))

    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.cuda()
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()

    # graph preprocess and calculate normalization factor
    g = data.graph
    # add self loop
    if args.self_loop:
        g.remove_edges_from(g.selfloop_edges())
        g.add_edges_from(zip(g.nodes(), g.nodes()))
    g = DGLGraph(g)
    n_edges = g.number_of_edges()
    # normalization
    degs = g.in_degrees().float()
    norm = torch.pow(degs, -0.5)
    norm[torch.isinf(norm)] = 0
    if cuda:
        norm = norm.cuda()
    g.ndata['norm'] = norm.unsqueeze(1)

    # create GCN model
    #alpha2_set = [0,0.001,0.002,0.004,0.006,0.008,0.01,0.02,0.03,0.04,0.05]
    #alpha3_set = [0,0.001,0.002,0.004,0.006,0.008,0.01,0.02,0.03,0.04,0.05]
    alpha2_set = [0.02]
    alpha3_set = [0.03]
    alpha1 = 1
    for alpha2 in alpha2_set:
        for alpha3 in alpha3_set:
            result = []
            for iter in range(30):

                model = GCN(g, in_feats1, in_feats2, in_feats3, args.n_hidden,
                            n_classes, args.n_layers, F.relu, args.dropout,
                            alpha1, alpha2, alpha3)

                if cuda:
                    model.cuda()
                loss_fcn = torch.nn.CrossEntropyLoss()

                # use optimizer
                optimizer = torch.optim.Adam(model.parameters(),
                                             lr=args.lr,
                                             weight_decay=args.weight_decay)

                # initialize graph
                dur = []
                best_val_acc = 0
                best_test_acc = 0
                for epoch in range(args.n_epochs):
                    model.train()
                    if epoch >= 3:
                        t0 = time.time()
                    # forward
                    logits = model(features, structure_features, attr_features)
                    loss = loss_fcn(logits[train_mask], labels[train_mask])

                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()

                    if epoch >= 3:
                        dur.append(time.time() - t0)

                    val_acc = evaluate(model, features, structure_features,
                                       attr_features, labels, val_mask)
                    if val_acc >= best_val_acc:
                        best_val_acc = val_acc
                        best_test_acc = evaluate(model, features,
                                                 structure_features,
                                                 attr_features, labels,
                                                 test_mask)
                    #print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
                    #     "ETputs(KTEPS) {:.2f}". format(epoch, np.mean(dur), loss.item(),
                    #                                     acc, n_edges / np.mean(dur) / 1000))

                #print()
                #acc = evaluate(model, features,dw_features, labels, test_mask)
                #print("Test Accuracy {:.4f}".format(acc))
                result.append(best_test_acc)
                del model
                #print(best_test_acc)
            print(alpha2, alpha3, np.average(result), result)
def main(args):
    # load and preprocess dataset
    if args.gpu > 0:
        cuda = True
        device = torch.device('cuda:{}'.format(args.gpu))
    else:
        device = torch.device('cpu')
        cuda = False
    cora_data = NeptuneCoraDataset(device, valid_ratio=0.1, test_ratio=0.2)
    #cora_data = CoraDataset(device, valid_ratio=0.1, test_ratio=0.2)
    features = cora_data.features
    test_set = cora_data.test_set
    valid_set = cora_data.valid_set
    train_set = cora_data.train_set
    g = cora_data.g

    in_feats = features['h**o'].shape[1]
    n_edges = g.number_of_edges()
    # normalization
    degs = g.in_degrees().float()
    norm = torch.pow(degs, -0.5)
    norm[torch.isinf(norm)] = 0
    if cuda:
        norm = norm.cuda()
    g.ndata['norm'] = norm.unsqueeze(1)

    # create GCN model
    model = GCN(g, in_feats, args.n_hidden, cora_data.n_class, args.n_layers,
                F.relu, args.dropout)

    if cuda:
        model.cuda()
    loss_fcn = torch.nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # initialize graph
    dur = []
    for epoch in range(args.n_epochs):
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        logits = model(features['h**o'])
        loss = loss_fcn(logits[train_set[0]], train_set[1])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)

        acc = evaluate(model, features['h**o'], valid_set)
        print(
            "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
            "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(),
                                          acc, n_edges / np.mean(dur) / 1000))

    print()
    acc = evaluate(model, features['h**o'], test_set)
    print("Test accuracy {:.2%}".format(acc))

    torch.save(model.state_dict(), args.model_path)