コード例 #1
0
ファイル: train.py プロジェクト: Aditya239233/GNNExplainer
def train_node_classifier(G, labels, model, args, writer=None):
    # train/test split only for nodes
    num_nodes = G.number_of_nodes()
    num_train = int(num_nodes * args.train_ratio)
    idx = [i for i in range(num_nodes)]

    np.random.shuffle(idx)
    train_idx = idx[:num_train]
    test_idx = idx[num_train:]

    data = gengraph.preprocess_input_graph(G, labels)
    labels_train = torch.tensor(data["labels"][:, train_idx], dtype=torch.long)
    adj = torch.tensor(data["adj"], dtype=torch.float)
    x = torch.tensor(data["feat"], requires_grad=True, dtype=torch.float)
    scheduler, optimizer = train_utils.build_optimizer(
        args, model.parameters(), weight_decay=args.weight_decay)
    model.train()
    ypred = None
    for epoch in range(args.num_epochs):
        begin_time = time.time()
        model.zero_grad()

        if args.gpu:
            ypred, adj_att = model(x.cuda(), adj.cuda())
        else:
            ypred, adj_att = model(x, adj)
        ypred_train = ypred[:, train_idx, :]
        if args.gpu:
            loss = model.loss(ypred_train, labels_train.cuda())
        else:
            loss = model.loss(ypred_train, labels_train)
        loss.backward()
        nn.utils.clip_grad_norm(model.parameters(), args.clip)

        optimizer.step()
        #for param_group in optimizer.param_groups:
        #    print(param_group["lr"])
        elapsed = time.time() - begin_time

        result_train, result_test = evaluate_node(ypred.cpu(), data["labels"],
                                                  train_idx, test_idx)
        if writer is not None:
            writer.add_scalar("loss/avg_loss", loss, epoch)
            writer.add_scalars(
                "prec",
                {
                    "train": result_train["prec"],
                    "test": result_test["prec"]
                },
                epoch,
            )
            writer.add_scalars(
                "recall",
                {
                    "train": result_train["recall"],
                    "test": result_test["recall"]
                },
                epoch,
            )
            writer.add_scalars("acc", {
                "train": result_train["acc"],
                "test": result_test["acc"]
            }, epoch)

        if epoch % 10 == 0:
            print(
                "epoch: ",
                epoch,
                "; loss: ",
                loss.item(),
                "; train_acc: ",
                result_train["acc"],
                "; test_acc: ",
                result_test["acc"],
                "; train_prec: ",
                result_train["prec"],
                "; test_prec: ",
                result_test["prec"],
                "; epoch time: ",
                "{0:0.2f}".format(elapsed),
            )

        if scheduler is not None:
            scheduler.step()
    print(result_train["conf_mat"])
    print(result_test["conf_mat"])

    # computation graph
    model.eval()
    if args.gpu:
        ypred, _ = model(x.cuda(), adj.cuda())
    else:
        ypred, _ = model(x, adj)
    cg_data = {
        "adj": data["adj"],
        "feat": data["feat"],
        "label": data["labels"],
        "pred": ypred.cpu().detach().numpy(),
        "train_idx": train_idx,
    }
    # import pdb
    # pdb.set_trace()
    io_utils.save_checkpoint(model,
                             optimizer,
                             args,
                             num_epochs=-1,
                             cg_dict=cg_data)
コード例 #2
0
ファイル: train.py プロジェクト: samaujs/Network-Science
def train_node_classifier(G, labels, model, args, writer=None):
    # train/test split only for nodes
    num_nodes = G.number_of_nodes()

    # Training data with 80% ratio, labels_train.size()
    num_train = int(num_nodes * args.train_ratio)
    idx = [i for i in range(num_nodes)]

    # Shuffle for training
    np.random.shuffle(idx)
    train_idx = idx[:num_train]
    test_idx = idx[num_train:]

    data = gengraph.preprocess_input_graph(G, labels)
    labels_train = torch.tensor(data["labels"][:, train_idx], dtype=torch.long)
    adj = torch.tensor(data["adj"], dtype=torch.float)
    x = torch.tensor(data["feat"], requires_grad=True, dtype=torch.float)

    #     scheduler, optimizer = train_utils.build_optimizer(
    #         args, model.parameters(), weight_decay=args.weight_decay
    #     )
    # list(testModel.parameters()) and list(filter_fn) to show contents
    # train_utils.build_optimizer
    filter_fn = filter(lambda p: p.requires_grad, model.parameters())

    # args.opt == 'adam':
    optimizer = optim.Adam(filter_fn, lr=args.lr, weight_decay=0.0)
    scheduler = None

    # Sets the module in training mode
    model.train()
    ypred = None
    for epoch in range(args.num_epochs):
        begin_time = time.time()
        model.zero_grad()

        if args.gpu:
            ypred, adj_att = model(x.cuda(), adj.cuda())
        else:
            ypred, adj_att = model(x, adj)
        ypred_train = ypred[:, train_idx, :]
        if args.gpu:
            loss = model.loss(ypred_train, labels_train.cuda())
        else:
            loss = model.loss(ypred_train, labels_train)
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), args.clip)

        optimizer.step()
        #for param_group in optimizer.param_groups:
        #    print(param_group["lr"])
        elapsed = time.time() - begin_time

        # Obtain with Confusion matrices for Train and Test results
        result_train, result_test = evaluate_node(ypred.cpu(), data["labels"],
                                                  train_idx, test_idx)

        if writer is not None:
            writer.add_scalar("loss/avg_loss", loss, epoch)
            writer.add_scalars(
                "prec",
                {
                    "train": result_train["prec"],
                    "test": result_test["prec"]
                },
                epoch,
            )
            writer.add_scalars(
                "recall",
                {
                    "train": result_train["recall"],
                    "test": result_test["recall"]
                },
                epoch,
            )
            writer.add_scalars("acc", {
                "train": result_train["acc"],
                "test": result_test["acc"]
            }, epoch)

        if epoch % 10 == 0:
            print(
                "epoch: ",
                epoch,
                "; loss: ",
                loss.item(),
                "; train_acc: ",
                result_train["acc"],
                "; test_acc: ",
                result_test["acc"],
                "; train_prec: ",
                result_train["prec"],
                "; test_prec: ",
                result_test["prec"],
                "; epoch time: ",
                "{0:0.2f}".format(elapsed),
            )

        if scheduler is not None:
            scheduler.step()

    print("Confusion Matrix of train result :\n", result_train["conf_mat"])
    print("Confusion Matrix of test result :\n", result_test["conf_mat"])

    # Sets the module in evaluation mode for computational graph
    model.eval()
    if args.gpu:
        ypred, _ = model(x.cuda(), adj.cuda())
    else:
        ypred, _ = model(x, adj)

    cg_data = {
        "adj": data["adj"],
        "feat": data["feat"],
        "label": data["labels"],
        "pred": ypred.cpu().detach().numpy(),
        "train_idx": train_idx,
    }

    print("Labels of the Computational graph :\n", cg_data['label'])
    print("Prediction result of the Computational graph :\n", cg_data['pred'])
    print("Train index of the Computational graph data :\n",
          cg_data['train_idx'])
    # import pdb
    # pdb.set_trace()

    io_utils.save_checkpoint(model,
                             optimizer,
                             args,
                             num_epochs=-1,
                             cg_dict=cg_data)
コード例 #3
0
def train_node_classifier_multigraph(G_list, labels, model, args, writer=None):
    train_idx_all, test_idx_all = [], []
    # train/test split only for nodes
    num_nodes = G_list[0].number_of_nodes()
    num_train = int(num_nodes * args.train_ratio)
    idx = [i for i in range(num_nodes)]
    np.random.shuffle(idx)
    train_idx = idx[:num_train]
    train_idx_all.append(train_idx)
    test_idx = idx[num_train:]
    test_idx_all.append(test_idx)

    data = gengraph.preprocess_input_graph(G_list[0], labels[0])
    all_labels = data["labels"]
    labels_train = torch.tensor(data["labels"][:, train_idx], dtype=torch.long)
    adj = torch.tensor(data["adj"], dtype=torch.float)
    x = torch.tensor(data["feat"], requires_grad=True, dtype=torch.float)

    for i in range(1, len(G_list)):
        np.random.shuffle(idx)
        train_idx = idx[:num_train]
        train_idx_all.append(train_idx)
        test_idx = idx[num_train:]
        test_idx_all.append(test_idx)
        data = gengraph.preprocess_input_graph(G_list[i], labels[i])
        all_labels = np.concatenate((all_labels, data["labels"]), axis=0)
        labels_train = torch.cat(
            [
                labels_train,
                torch.tensor(data["labels"][:, train_idx], dtype=torch.long),
            ],
            dim=0,
        )
        adj = torch.cat([adj, torch.tensor(data["adj"], dtype=torch.float)])
        x = torch.cat([
            x,
            torch.tensor(data["feat"], requires_grad=True, dtype=torch.float)
        ])

    scheduler, optimizer = train_utils.build_optimizer(
        args, model.parameters(), weight_decay=args.weight_decay)
    model.train()
    ypred = None
    for epoch in range(args.num_epochs):
        begin_time = time.time()
        model.zero_grad()

        if args.gpu:
            ypred = model(x.cuda(), adj.cuda())
        else:
            ypred = model(x, adj)
        # normal indexing
        ypred_train = ypred[:, train_idx, :]
        # in multigraph setting we can't directly access all dimensions so we need to gather all the training instances
        all_train_idx = [item for sublist in train_idx_all for item in sublist]
        ypred_train_cmp = torch.cat(
            [ypred[i, train_idx_all[i], :] for i in range(10)],
            dim=0).reshape(10, 146, 6)
        if args.gpu:
            loss = model.loss(ypred_train_cmp, labels_train.cuda())
        else:
            loss = model.loss(ypred_train_cmp, labels_train)
        loss.backward()
        nn.utils.clip_grad_norm(model.parameters(), args.clip)

        optimizer.step()
        #for param_group in optimizer.param_groups:
        #    print(param_group["lr"])
        elapsed = time.time() - begin_time

        result_train, result_test = evaluate_node(ypred.cpu(), all_labels,
                                                  train_idx_all, test_idx_all)
        if writer is not None:
            writer.add_scalar("loss/avg_loss", loss, epoch)
            writer.add_scalars(
                "prec",
                {
                    "train": result_train["prec"],
                    "test": result_test["prec"]
                },
                epoch,
            )
            writer.add_scalars(
                "recall",
                {
                    "train": result_train["recall"],
                    "test": result_test["recall"]
                },
                epoch,
            )
            writer.add_scalars("acc", {
                "train": result_train["acc"],
                "test": result_test["acc"]
            }, epoch)

        print(
            "epoch: ",
            epoch,
            "; loss: ",
            loss.item(),
            "; train_acc: ",
            result_train["acc"],
            "; test_acc: ",
            result_test["acc"],
            "; epoch time: ",
            "{0:0.2f}".format(elapsed),
        )

        if scheduler is not None:
            scheduler.step()
    print(result_train["conf_mat"])
    print(result_test["conf_mat"])

    # computation graph
    model.eval()
    if args.gpu:
        ypred = model(x.cuda(), adj.cuda())
    else:
        ypred = model(x, adj)
    cg_data = {
        "adj": adj.cpu().detach().numpy(),
        "feat": x.cpu().detach().numpy(),
        "label": all_labels,
        "pred": ypred.cpu().detach().numpy(),
        "train_idx": train_idx_all,
    }
    io_utils.save_checkpoint(model,
                             optimizer,
                             args,
                             num_epochs=-1,
                             cg_dict=cg_data)
コード例 #4
0
def syn_task1(args, writer=None):
    print('Generating graph.')
    feature_generator = featgen.ConstFeatureGen(
        np.ones(args.input_dim, dtype=float))
    if args.dataset == 'syn1':
        gen_fn = gengraph.gen_syn1
    elif args.dataset == 'syn2':
        gen_fn = gengraph.gen_syn2
        feature_generator = None
    elif args.dataset == 'syn3':
        gen_fn = gengraph.gen_syn3
    elif args.dataset == 'syn4':
        gen_fn = gengraph.gen_syn4
    elif args.dataset == 'syn5':
        gen_fn = gengraph.gen_syn5
    G, labels, name = gen_fn(feature_generator=feature_generator)
    pyg_G = NxDataset([G],
                      device=torch.device('gpu' if args.gpu else 'cpu'))[0]
    num_classes = max(labels) + 1
    labels = torch.LongTensor(labels)
    print('Done generating graph.')

    model = GCNNet(args.input_dim,
                   args.hidden_dim,
                   args.output_dim,
                   num_classes,
                   args.num_gc_layers,
                   args=args)

    if args.gpu:
        model = model.cuda()

    train_ratio = args.train_ratio
    num_train = int(train_ratio * G.number_of_nodes())
    num_test = G.number_of_nodes() - num_train

    idx = [i for i in range(G.number_of_nodes())]

    np.random.shuffle(idx)
    train_mask = idx[:num_train]
    test_mask = idx[num_train:]

    loader = torch_geometric.data.DataLoader([pyg_G], batch_size=1)
    opt = torch.optim.Adam(model.parameters(), lr=args.lr)
    scheduler, opt = train_utils.build_optimizer(
        args, model.parameters(), weight_decay=args.weight_decay)
    for epoch in range(args.num_epochs):
        model.train()
        total_loss = 0
        for batch in loader:
            opt.zero_grad()
            pred = model(batch)

            pred = pred[train_mask]
            label = labels[train_mask]
            loss = model.loss(pred, label)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
            opt.step()
            total_loss += loss.item() * 1
        writer.add_scalar("loss", total_loss, epoch)

        if epoch % 10 == 0:
            test_acc = test(loader, model, args, labels, test_mask)
            print("{} {:.4f} {:.4f}".format(epoch, total_loss, test_acc))
            writer.add_scalar("test", test_acc, epoch)

    print("{} {:.4f} {:.4f}".format(epoch, total_loss, test_acc))
    data = gengraph.preprocess_input_graph(G, labels)
    adj = torch.tensor(data['adj'], dtype=torch.float)
    x = torch.tensor(data['feat'], requires_grad=True, dtype=torch.float)

    model.eval()
    ypred = model(batch)