Exemple #1
0
parser.add_argument('--degree',
                    type=int,
                    default=2,
                    help='degree of the approximation.')

args = parser.parse_args()
args.cuda = not args.no_cuda and torch.cuda.is_available()

set_seed(args.seed, args.cuda)

adj, train_adj, features, labels, idx_train, idx_val, idx_test = load_reddit_data(
    normalization=args.normalization, cuda=args.cuda)
print("Finished data loading.")

if args.model == "SGC":
    model = SGC(features.size(1), labels.max().item() + 1)
elif args.model == "gfNN":
    model = MLP(features.size(1), 32, labels.max().item() + 1)

if args.cuda: model.cuda()
processed_features, precompute_time = sgc_precompute(features, adj,
                                                     args.degree)
if args.inductive:
    train_features, _ = sgc_precompute(features[idx_train], train_adj,
                                       args.degree)
else:
    train_features = processed_features[idx_train]

test_features = processed_features[idx_test if args.test else idx_val]

Exemple #2
0

if __name__ == '__main__':
    if args.dataset == "mr": nclass = 1
    else: nclass = label_dict["train"].max().item() + 1
    if not args.preprocessed:
        adj_dense = sparse_to_torch_dense(sp_adj, device='cpu')
        feat_dict, precompute_time = sgc_precompute(adj, adj_dense,
                                                    args.degree - 1,
                                                    index_dict)
    else:
        # load the relased degree 2 features
        with open(os.path.join("preprocessed", "{}.pkl".format(args.dataset)),
                  "rb") as prep:
            feat_dict = pkl.load(prep)
        precompute_time = 0

    model = SGC(nfeat=feat_dict["train"].size(1), nclass=nclass)
    if args.cuda: model.cuda()
    val_acc, best_model, train_time = train_linear(model, feat_dict,
                                                   args.weight_decay,
                                                   args.dataset == "mr")
    test_res = eval_linear(best_model, feat_dict["test"].cuda(),
                           label_dict["test"].cuda(), args.dataset == "mr")
    train_res = eval_linear(best_model, feat_dict["train"].cuda(),
                            label_dict["train"].cuda(), args.dataset == "mr")
    print(
        "Total Time: {:2f}s, Train acc: {:.4f}, Val acc: {:.4f}, Test acc: {:.4f}"
        .format(precompute_time + train_time, train_res["accuracy"], val_acc,
                test_res["accuracy"]))
Exemple #3
0
            k=args.k)
print('Loaded {0} dataset with {1} nodes and {2} edges'.format(
    args.dataset, data.n_node, data.n_edge))
feature = data.feature_diffused.to(device)
label = data.label.to(device)
label_train = label[data.idx_train]
label_val = label[data.idx_val]
label_test = label[data.idx_test]
"""
===========================================================================
Training
===========================================================================
"""
# Model and optimizer
model = SGC(n_feature=data.n_feature,
            n_class=data.n_class,
            dropout=args.dropout).to(device)
optimizer = torch.optim.Adam(model.parameters(),
                             lr=args.lr,
                             weight_decay=args.weight_decay)
metric = torchmetrics.Accuracy().to(device)

for epoch in range(1, args.epoch + 1):
    t = time.time()
    # Training
    model.train()
    optimizer.zero_grad()
    output = model(feature)[data.idx_train]
    loss_train = F.nll_loss(output, label_train)
    acc_train = metric(output.max(1)[1], label_train)
    loss_train.backward()
Exemple #4
0
def test_regression(model, test_features, test_labels):
    with torch.no_grad():
        model.eval()
        return f1(model(test_features), test_labels)


# 随机种子固定结果
set_seed(args.seed, args.cuda)

# 邻接矩阵(全), 特征, 标签, 训练集,验证集,测试集
adj, features, labels, idx_train, idx_val, idx_test = \
    load_reddit_data(normalization=args.normalization, cuda=args.cuda)
print("Finished data loading.")

if args.model == 'SGC':
    model = SGC(features.size(1), labels.max().item() + 1)
    if args.cuda:
        model.cuda()
    # precompute
    processed_features, precompute_time = sgc_precompute(
        features, adj, args.degree)
    # train
    train_features = processed_features[idx_train]
    train_labels = labels[idx_train]
    model, train_time = train_regression(model, train_features, train_labels,
                                         args.epochs)
    # test
    test_features = processed_features[idx_test if args.test else idx_val]
    test_labels = labels[idx_test if args.test else idx_val]
    test_f1, _ = test_regression(model, test_features, test_labels)
Exemple #5
0
def main():

    torch.manual_seed(666)
    torch.cuda.manual_seed_all(666)
    np.random.seed(666)

    # data_name = 'BlogCatalog'

    print(args)
    nb_epochs = 50
    nb_runs = 16
    nb_try = 16
    nb_batch_maml = 10
    nb_batch = 32
    lr_1 = 0.03
    lr_s = lr_1 * args.task_num
    tr = 0.6
    # features, labels, idx_train, y_train, idx_val, y_val, idx_labeled, train_unlabeled = SGC_process(data_name, degree=2, l_ratio=0.08, tr_ratio=0.8)
    # print(args.task_num)

    aucfile = 'results/auc_' + datetime.now().strftime("%m_%d_%H_%M") + '_yelp.txt'
    with open(aucfile, 'a') as f:
        f.write("settings: {labeled ratio: %f, training ratio: %f, epochs: %d, update_step: %d}\n" % (lr_1, tr, nb_epochs, args.update_step))
        for t in range(nb_try):
            taskData = task(nb_task=args.task_num, degree=2, l_ratio=lr_1, t_ratio=tr, name='yelp')
            taskData.loadNProcess()
            f.write("target data name:" + taskData.f_name[-1] + "\n")
            f.write("%d-th try: \n" % t)
            for i in range(nb_runs):
                # training maml
                print("maml training...")
                print("In %d-th run..." % (i + 1))
                f.write("%d-th run\n" % i)
                feature_list, label, l_list, ul_list, idx_test = taskData.sampleAnomaly()
                config = modelArch(feature_list[0].shape[1], args.n_way)
                device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
                maml = Meta(args, config).to(device)
                # stats of parameters to be updated
                tmp = filter(lambda x: x.requires_grad, maml.parameters())
                num = sum(map(lambda x: np.prod(x.shape), tmp))
                # print(maml)
                print("Total #trainable tensors: ", num)
                batch_gen = DataLoaderN(feature_list, l_list, ul_list, b_size=8, b_size_qry=6, nb_task=args.task_num, device=device)
                maml.train()
                for e in range(1, nb_epochs + 1):
                    print("Running %d-th epoch" % e)
                    epoch_loss = 0
                    epoch_acc = 0
                    for b in range(nb_batch_maml):
                        x_train, y_train, x_qry, y_qry = batch_gen.getBatch(qry=False)
                        y_pred, loss = maml(x_train, y_train, x_qry, y_qry)
                        epoch_loss += loss
                    print("Epoch loss: %f" % epoch_loss)
                print("End of training.")
                # testing
                print("Evaluating the maml model")
                maml.eval()
                x_test, y_test = feature_list[args.task_num-1][idx_test].to(device), label[idx_test].to(device)
                auc_roc, auc_pr, ap = maml.evaluating(x_test, y_test)
                print("End of evaluating.")
                f.write("MAML auc_roc: %.5f, auc_pr: %.5f, ap: %.5f\n" % (auc_roc, auc_pr, ap))

                # g-dev training
                print('G-dev training...')
                features, labels, idx_labeled, idx_unlabeled, idx_test = SGC_process(taskData.target, degree=2, l_ratio=lr_s, tr_ratio=tr)
                # print("finish loading data...")
                attr_dim = features.shape[1]
                # print("%d-th run:" % i)
                # model = FCNet(attr_dim, 1).to(device)
                # model = SGC_original(attr_dim, 1).to(device)
                model = SGC(attr_dim, 1).to(device)
                # print(model)
                optim = torch.optim.Adam(model.parameters(), lr=0.002, weight_decay=0)
                # loss = deviation_loss()
                data_sampler = DataLoader(features, idx_labeled, idx_unlabeled, b_size=8)
                model.float()
                model.train()
                for e in range(1, nb_epochs + 1):
                    # print('Epoch: %d' % e)
                    epoch_loss = 0
                    epoch_acc = 0
                    for b in range(nb_batch):
                        x_b, y_b = data_sampler.getBatch()
                        x_b, y_b = x_b.to(device), y_b.to(device)
                        y_pred = model(x_b)
                        loss = deviation_loss(y_b, y_pred)
                        optim.zero_grad()
                        loss.backward()
                        optim.step()
                        epoch_loss += loss.item()
                    print("epoch loss %f" % epoch_loss)
                # validation
                model.eval()
                # print(idx_val.shape)
                x_val = features[idx_test].to(device)
                # print(x_val.shape)
                y_pred = model(x_val).detach().cpu().numpy()
                y_val = labels[idx_test].detach().cpu().numpy()
                # fpr, tpr, roc_auc = dict(), dict(), dict()
                # for i in range(2):
                #     fpr[i], tpr[i], _ = roc_curve(y_val, y_pred, pos_label=1)
                #     roc_auc[i] = auc(fpr[i], tpr[i])
                auc_roc, _, auc_pr = aucPerformance(y_val, y_pred)
                print("G-dev auc_roc: %.5f, auc_pr: %.5f" % (auc_roc, auc_pr))
                f.write("G-Dev auc_roc: %.5f, auc_pr: %.5f\n" % (auc_roc, auc_pr))

    f.close()