parser.add_argument('--degree', type=int, default=2, help='degree of the approximation.') args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() set_seed(args.seed, args.cuda) adj, train_adj, features, labels, idx_train, idx_val, idx_test = load_reddit_data( normalization=args.normalization, cuda=args.cuda) print("Finished data loading.") if args.model == "SGC": model = SGC(features.size(1), labels.max().item() + 1) elif args.model == "gfNN": model = MLP(features.size(1), 32, labels.max().item() + 1) if args.cuda: model.cuda() processed_features, precompute_time = sgc_precompute(features, adj, args.degree) if args.inductive: train_features, _ = sgc_precompute(features[idx_train], train_adj, args.degree) else: train_features = processed_features[idx_train] test_features = processed_features[idx_test if args.test else idx_val]
if __name__ == '__main__': if args.dataset == "mr": nclass = 1 else: nclass = label_dict["train"].max().item() + 1 if not args.preprocessed: adj_dense = sparse_to_torch_dense(sp_adj, device='cpu') feat_dict, precompute_time = sgc_precompute(adj, adj_dense, args.degree - 1, index_dict) else: # load the relased degree 2 features with open(os.path.join("preprocessed", "{}.pkl".format(args.dataset)), "rb") as prep: feat_dict = pkl.load(prep) precompute_time = 0 model = SGC(nfeat=feat_dict["train"].size(1), nclass=nclass) if args.cuda: model.cuda() val_acc, best_model, train_time = train_linear(model, feat_dict, args.weight_decay, args.dataset == "mr") test_res = eval_linear(best_model, feat_dict["test"].cuda(), label_dict["test"].cuda(), args.dataset == "mr") train_res = eval_linear(best_model, feat_dict["train"].cuda(), label_dict["train"].cuda(), args.dataset == "mr") print( "Total Time: {:2f}s, Train acc: {:.4f}, Val acc: {:.4f}, Test acc: {:.4f}" .format(precompute_time + train_time, train_res["accuracy"], val_acc, test_res["accuracy"]))
k=args.k) print('Loaded {0} dataset with {1} nodes and {2} edges'.format( args.dataset, data.n_node, data.n_edge)) feature = data.feature_diffused.to(device) label = data.label.to(device) label_train = label[data.idx_train] label_val = label[data.idx_val] label_test = label[data.idx_test] """ =========================================================================== Training =========================================================================== """ # Model and optimizer model = SGC(n_feature=data.n_feature, n_class=data.n_class, dropout=args.dropout).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) metric = torchmetrics.Accuracy().to(device) for epoch in range(1, args.epoch + 1): t = time.time() # Training model.train() optimizer.zero_grad() output = model(feature)[data.idx_train] loss_train = F.nll_loss(output, label_train) acc_train = metric(output.max(1)[1], label_train) loss_train.backward()
def test_regression(model, test_features, test_labels): with torch.no_grad(): model.eval() return f1(model(test_features), test_labels) # 随机种子固定结果 set_seed(args.seed, args.cuda) # 邻接矩阵(全), 特征, 标签, 训练集,验证集,测试集 adj, features, labels, idx_train, idx_val, idx_test = \ load_reddit_data(normalization=args.normalization, cuda=args.cuda) print("Finished data loading.") if args.model == 'SGC': model = SGC(features.size(1), labels.max().item() + 1) if args.cuda: model.cuda() # precompute processed_features, precompute_time = sgc_precompute( features, adj, args.degree) # train train_features = processed_features[idx_train] train_labels = labels[idx_train] model, train_time = train_regression(model, train_features, train_labels, args.epochs) # test test_features = processed_features[idx_test if args.test else idx_val] test_labels = labels[idx_test if args.test else idx_val] test_f1, _ = test_regression(model, test_features, test_labels)
def main(): torch.manual_seed(666) torch.cuda.manual_seed_all(666) np.random.seed(666) # data_name = 'BlogCatalog' print(args) nb_epochs = 50 nb_runs = 16 nb_try = 16 nb_batch_maml = 10 nb_batch = 32 lr_1 = 0.03 lr_s = lr_1 * args.task_num tr = 0.6 # features, labels, idx_train, y_train, idx_val, y_val, idx_labeled, train_unlabeled = SGC_process(data_name, degree=2, l_ratio=0.08, tr_ratio=0.8) # print(args.task_num) aucfile = 'results/auc_' + datetime.now().strftime("%m_%d_%H_%M") + '_yelp.txt' with open(aucfile, 'a') as f: f.write("settings: {labeled ratio: %f, training ratio: %f, epochs: %d, update_step: %d}\n" % (lr_1, tr, nb_epochs, args.update_step)) for t in range(nb_try): taskData = task(nb_task=args.task_num, degree=2, l_ratio=lr_1, t_ratio=tr, name='yelp') taskData.loadNProcess() f.write("target data name:" + taskData.f_name[-1] + "\n") f.write("%d-th try: \n" % t) for i in range(nb_runs): # training maml print("maml training...") print("In %d-th run..." % (i + 1)) f.write("%d-th run\n" % i) feature_list, label, l_list, ul_list, idx_test = taskData.sampleAnomaly() config = modelArch(feature_list[0].shape[1], args.n_way) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") maml = Meta(args, config).to(device) # stats of parameters to be updated tmp = filter(lambda x: x.requires_grad, maml.parameters()) num = sum(map(lambda x: np.prod(x.shape), tmp)) # print(maml) print("Total #trainable tensors: ", num) batch_gen = DataLoaderN(feature_list, l_list, ul_list, b_size=8, b_size_qry=6, nb_task=args.task_num, device=device) maml.train() for e in range(1, nb_epochs + 1): print("Running %d-th epoch" % e) epoch_loss = 0 epoch_acc = 0 for b in range(nb_batch_maml): x_train, y_train, x_qry, y_qry = batch_gen.getBatch(qry=False) y_pred, loss = maml(x_train, y_train, x_qry, y_qry) epoch_loss += loss print("Epoch loss: %f" % epoch_loss) print("End of training.") # testing print("Evaluating the maml model") maml.eval() x_test, y_test = feature_list[args.task_num-1][idx_test].to(device), label[idx_test].to(device) auc_roc, auc_pr, ap = maml.evaluating(x_test, y_test) print("End of evaluating.") f.write("MAML auc_roc: %.5f, auc_pr: %.5f, ap: %.5f\n" % (auc_roc, auc_pr, ap)) # g-dev training print('G-dev training...') features, labels, idx_labeled, idx_unlabeled, idx_test = SGC_process(taskData.target, degree=2, l_ratio=lr_s, tr_ratio=tr) # print("finish loading data...") attr_dim = features.shape[1] # print("%d-th run:" % i) # model = FCNet(attr_dim, 1).to(device) # model = SGC_original(attr_dim, 1).to(device) model = SGC(attr_dim, 1).to(device) # print(model) optim = torch.optim.Adam(model.parameters(), lr=0.002, weight_decay=0) # loss = deviation_loss() data_sampler = DataLoader(features, idx_labeled, idx_unlabeled, b_size=8) model.float() model.train() for e in range(1, nb_epochs + 1): # print('Epoch: %d' % e) epoch_loss = 0 epoch_acc = 0 for b in range(nb_batch): x_b, y_b = data_sampler.getBatch() x_b, y_b = x_b.to(device), y_b.to(device) y_pred = model(x_b) loss = deviation_loss(y_b, y_pred) optim.zero_grad() loss.backward() optim.step() epoch_loss += loss.item() print("epoch loss %f" % epoch_loss) # validation model.eval() # print(idx_val.shape) x_val = features[idx_test].to(device) # print(x_val.shape) y_pred = model(x_val).detach().cpu().numpy() y_val = labels[idx_test].detach().cpu().numpy() # fpr, tpr, roc_auc = dict(), dict(), dict() # for i in range(2): # fpr[i], tpr[i], _ = roc_curve(y_val, y_pred, pos_label=1) # roc_auc[i] = auc(fpr[i], tpr[i]) auc_roc, _, auc_pr = aucPerformance(y_val, y_pred) print("G-dev auc_roc: %.5f, auc_pr: %.5f" % (auc_roc, auc_pr)) f.write("G-Dev auc_roc: %.5f, auc_pr: %.5f\n" % (auc_roc, auc_pr)) f.close()