def main(): """ preprocess_data() : load data from dataset and precess data into numpy format process_data() : port the data to pyTorch and convert to cuda U_train, U_dev, U_test, classLatMedian, classLonMedian, userLocation : only use when Valid and Test """ data = preprocess_data(args) data = process_data(data, args, args.normalization, args.usecuda) (adj, features, labels, idx_train, idx_val, idx_test, U_train, U_dev, U_test, classLatMedian, classLonMedian, userLocation) = data # exit() model_file = "./result_cmu_desce/{}way{}shot{}query-update_lr:{}-weight_decay:{}.pkl".format( args.n_way, args.k_spt, args.k_qry, args.update_lr, args.weight_decay) device = torch.device('cuda') # maml = Meta(args, config).to(device) maml = Meta( args, features.shape[1], labels.max().item() + 1, classLatMedian, classLonMedian).to(device) if args.model == "SGC": feature_dump_file = os.path.join(args.dir, 'feature_dump.pkl') # if os.path.exists(feature_dump_file): # print("load features") # features = load_obj(feature_dump_file) # else: features = sgc_precompute(args, features, adj, args.degree) print(args.dataset) if args.splt == True: trainset, valset = dealdata1(args, labels) else: trainset, valset = dealdata(args, labels) users = U_train + U_dev + U_test class_acc, meanDis, MedianDis, accAT161 = train_regression(maml, labels, features, users, trainset, valset, classLatMedian, classLonMedian, userLocation, args.epochs, args.patience, model_file) # load model from file and test the model timeStr = time.strftime("%Y-%m-%d %H:%M:%S\t", time.localtime(time.time())) argsStr = "-dir:{}\t-{}way{}shot{}query\t-update_lr{}\t-decay:{}".format( args.dir, args.n_way, args.k_spt, args.k_qry, args.update_lr, args.weight_decay) resultStr = "Test:\tclassification_acc:{}\t\tMean:{}\t\tMedian:{}\t\tAcc@161:{}".format(class_acc, meanDis, MedianDis, accAT161) content = "\n" + timeStr + "\n" + argsStr + "\n" + resultStr + "\n" with open('./result_cmu_desce/result.txt', 'a') as f: f.write(content) f.close()
# Monkey patch for Stacked Logistic Regression if args.model == "SLG": nfeat = features.size(1) * args.degree else: nfeat = features.size(1) model = get_model(model_opt=args.model, nfeat=nfeat, nclass=labels.max().item() + 1, nhid=args.hidden, dropout=args.dropout, cuda=args.cuda, degree=args.degree) if args.model == "SGC" or args.model == "gfnn": features, precompute_time = sgc_precompute(features, adj, args.degree) print("{:.4f}s".format(precompute_time)) model, acc_val, train_time = train_regression( model, features[idx_train], labels[idx_train], features[idx_val], labels[idx_val], args.epochs, args.weight_decay, args.lr, args.dropout) acc_test = test_regression(model, features[idx_test], labels[idx_test]) print("Validation Accuracy: {:.4f} Test Accuracy: {:.4f}".format(acc_val,\ acc_test)) print("Pre-compute time: {:.4f}s, train time: {:.4f}s, total: {:.4f}s". format(precompute_time, train_time, precompute_time + train_time)) if args.model == "SLG": features, precompute_time = stack_feat(features, adj, args.degree) features = torch.FloatTensor(features).float() if args.cuda: features = features.cuda() print("{:.4f}s".format(precompute_time))
# help='Testing mode') args, _ = parser.parse_known_args() args.cuda = not args.no_cuda and torch.cuda.is_available() args.device = torch.device("cuda:0" if args.cuda else "cpu") model_name = args.dataset + '_' + args.method + '.pkl' save_file = 'model_save/' + model_name adj, features, lb_dict, tr_set, va_set, ts_set = load_data(args.dataset) if args.method == 'raw': ne_array = features elif args.method == 'dgi': ne_array = np.load('data/' + args.dataset + '_' + args.method + '.npy')[0] elif args.method == 'mul': ne_array = sgc_precompute(features, adj, 2) else: raise ValueError('Embedding method undefined:%s' % args.method) if not isinstance(ne_array, np.ndarray): ne_array = ne_array.toarray() word_num = ne_array.shape[0] emb_size = ne_array.shape[1] with launch_ipdb_on_exception(): # if args.dataset=='reddit': # tr_edges, va_edges, ts_edges = gen_train_reddit(adj, lb_dict, tr_set, va_set, ts_set, args.balance_rate, 0) # else: tr_edges, va_edges, ts_edges = gen_train_reddit(adj, lb_dict, tr_set, va_set, ts_set, args.balance_rate, args.extra_rate, 1)
def main(): args = get_citation_args() n_way = args.n_way train_shot = args.train_shot test_shot = args.test_shot step = args.step node_num = args.node_num iteration = args.iteration accuracy_meta_test = [] total_accuracy_meta_test = [] set_seed(args.seed, args.cuda) adj, features, labels = load_citation(args.dataset, args.normalization, args.cuda) if args.dataset == 'cora': class_label = [0, 1, 2, 3, 4, 5, 6] combination = list(combinations(class_label, n_way)) elif args.dataset == 'citeseer': node_num = 3327 iteration = 15 class_label = [0, 1, 2, 3, 4, 5] combination = list(combinations(class_label, n_way)) if args.model == 'SGC': features = sgc_precompute(features, adj, args.degree) for i in range(len(combination)): print('Cross_Validation: ', i + 1) test_label = list(combination[i]) train_label = [n for n in class_label if n not in test_label] print('Cross_Validation {} Train_Label_List {}: '.format( i + 1, train_label)) print('Cross_Validation {} Test_Label_List {}: '.format( i + 1, test_label)) model = get_model(args.model, features.size(1), n_way, args.cuda) for j in range(iteration): labels_local = labels.clone().detach() select_class = random.sample(train_label, n_way) print('Cross_Validation {} ITERATION {} Train_Label: {}'.format( i + 1, j + 1, select_class)) class1_idx = [] class2_idx = [] for k in range(node_num): if (labels_local[k] == select_class[0]): class1_idx.append(k) labels_local[k] = 0 elif (labels_local[k] == select_class[1]): class2_idx.append(k) labels_local[k] = 1 for m in range(step): class1_train = random.sample(class1_idx, train_shot) class2_train = random.sample(class2_idx, train_shot) class1_test = [ n1 for n1 in class1_idx if n1 not in class1_train ] class2_test = [ n2 for n2 in class2_idx if n2 not in class2_train ] train_idx = class1_train + class2_train random.shuffle(train_idx) test_idx = class1_test + class2_test random.shuffle(test_idx) model = train_regression(model, features[train_idx], labels_local[train_idx], args.epochs, args.weight_decay, args.lr) acc_query = test_regression(model, features[test_idx], labels_local[test_idx]) reset_array() torch.save(model.state_dict(), 'model.pkl') labels_local = labels.clone().detach() select_class = random.sample(test_label, 2) class1_idx = [] class2_idx = [] reset_array() print('Cross_Validation {} Test_Label {}: '.format( i + 1, select_class)) for k in range(node_num): if (labels_local[k] == select_class[0]): class1_idx.append(k) labels_local[k] = 0 elif (labels_local[k] == select_class[1]): class2_idx.append(k) labels_local[k] = 1 for m in range(step): class1_train = random.sample(class1_idx, test_shot) class2_train = random.sample(class2_idx, test_shot) class1_test = [n1 for n1 in class1_idx if n1 not in class1_train] class2_test = [n2 for n2 in class2_idx if n2 not in class2_train] train_idx = class1_train + class2_train random.shuffle(train_idx) test_idx = class1_test + class2_test random.shuffle(test_idx) model_meta_trained = get_model(args.model, features.size(1), n_way, args.cuda).cuda() model_meta_trained.load_state_dict(torch.load('model.pkl')) model_meta_trained = train_regression(model_meta_trained, features[train_idx], labels_local[train_idx], args.epochs, args.weight_decay, args.lr) acc_test = test_regression(model_meta_trained, features[test_idx], labels_local[test_idx]) accuracy_meta_test.append(acc_test) total_accuracy_meta_test.append(acc_test) reset_array() if args.dataset == 'cora': with open('cora.txt', 'a') as f: f.write('Cross_Validation: {} Meta-Test_Accuracy: {}'.format( i + 1, torch.tensor(accuracy_meta_test).numpy().mean())) f.write('\n') elif args.dataset == 'citeseer': with open('citeseer.txt', 'a') as f: f.write('Cross_Validation: {} Meta-Test_Accuracy: {}'.format( i + 1, torch.tensor(accuracy_meta_test).numpy().mean())) f.write('\n') accuracy_meta_test = [] if args.dataset == 'cora': with open('cora.txt', 'a') as f: f.write('Dataset: {}, Train_Shot: {}, Test_Shot: {}'.format( args.dataset, train_shot, test_shot)) f.write('\n') f.write('Total_Meta-Test_Accuracy: {}'.format( torch.tensor(total_accuracy_meta_test).numpy().mean())) f.write('\n') f.write('\n\n\n') elif args.dataset == 'citeseer': with open('citeseer.txt', 'a') as f: f.write('Dataset: {}, Train_Shot: {}, Test_Shot: {}'.format( args.dataset, train_shot, test_shot)) f.write('\n') f.write('Total_Meta-Test_Accuracy: {}'.format( torch.tensor(total_accuracy_meta_test).numpy().mean())) f.write('\n') f.write('\n\n\n')
args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() set_seed(args.seed, args.cuda) adj, train_adj, features, labels, idx_train, idx_val, idx_test = load_reddit_data( normalization=args.normalization, cuda=args.cuda) print("Finished data loading.") if args.model == "SGC": model = SGC(features.size(1), labels.max().item() + 1) elif args.model == "gfNN": model = MLP(features.size(1), 32, labels.max().item() + 1) if args.cuda: model.cuda() processed_features, precompute_time = sgc_precompute(features, adj, args.degree) if args.inductive: train_features, _ = sgc_precompute(features[idx_train], train_adj, args.degree) else: train_features = processed_features[idx_train] test_features = processed_features[idx_test if args.test else idx_val] def train_mlp(model, train_features, train_labels, epochs): optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-6) train_loader, val_loader = get_data_loaders(train_features, train_labels, None, None, 128) t = perf_counter() max_acc_val = 0
imglist, feature, graph_feature, label, train_idx, val_idx = utils.data_slicing( train_imglist_path, train_feature_path, train_graph_feature_path, val_imglist_path, val_feature_path, val_graph_feature_path) if edge_weight: save_knn_filename = '{}/knn_{}knn_k{}_sw{}'.format( save_root, dist_def, k, self_weight) else: save_knn_filename = '{}/knn_{}knn_k{}_sw{}negw'.format( save_root, dist_def, k, self_weight) # training preparation finished knn_graph = utils.global_build(graph_feature, dist_def, k, save_knn_filename, graph_method) agg_feature = utils.sgc_precompute(feature, knn_graph, self_weight=self_weight, edge_weight=edge_weight, degree=1) ################################################################################################################### # Training period # ################################################################################################################### # config cat_ori_feat = (args.cat_ori_feat == 'True') epochs_num = args.epochs_num lr = args.lr weight_decay = args.weight_decay dropout = args.dropout batch_size = args.batch_size loss_type = args.loss_type reweight = (args.reweight == 'True')
if args.tuned: if args.model == "SGC": with open("{}-tuning/{}.txt".format(args.model, args.dataset), 'rb') as f: args.weight_decay = pkl.load(f)['weight_decay'] print("using tuned weight decay: {}".format(args.weight_decay)) else: raise NotImplemented # setting random seeds set_seed(args.seed, args.cuda) adj, features, labels, idx_train, idx_val, idx_test, dRoot = load_citation(args.dataset, args.normalization, args.cuda) model = get_model(args.model, features.size(1), labels.max().item()+1, args.hidden, args.dropout, args.cuda) if args.model == "SGC": features, precompute_time = sgc_precompute(features, adj, args.degree) print("{:.4f}s".format(precompute_time)) def train_regression(model, train_features, train_labels, val_features, val_labels, epochs=args.epochs, weight_decay=args.weight_decay, lr=args.lr, dropout=args.dropout): optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) t = perf_counter() best_acc_val = torch.zeros((1)) best_model = None for epoch in range(epochs): model.train()
def main(args): step = args.step set_seed(args.seed) adj, features, labels = load_citation(args.dataset, args.normalization) features = sgc_precompute(features, adj, args.degree) if args.dataset == 'citeseer': node_num = 3327 class_label = [0, 1, 2, 3, 4, 5] combination = list(combinations(class_label, 2)) elif args.dataset == 'cora': node_num = 2708 class_label = [0, 1, 2, 3, 4, 5, 6] combination = list(combinations(class_label, 2)) config = [('linear', [args.hidden, features.size(1)]), ('linear', [args.n_way, args.hidden])] device = torch.device('cuda') for i in range(len(combination)): print("Cross Validation: {}".format((i + 1))) maml = Meta(args, config).to(device) test_label = list(combination[i]) train_label = [n for n in class_label if n not in test_label] print('Cross Validation {} Train_Label_List: {} '.format( i + 1, train_label)) print('Cross Validation {} Test_Label_List: {} '.format( i + 1, test_label)) for j in range(args.epoch): x_spt, y_spt, x_qry, y_qry = sgc_data_generator( features, labels, node_num, train_label, args.task_num, args.n_way, args.k_spt, args.k_qry) accs = maml.forward(x_spt, y_spt, x_qry, y_qry) print('Step:', j, '\tMeta_Training_Accuracy:', accs) if j % 100 == 0: torch.save(maml.state_dict(), 'maml.pkl') meta_test_acc = [] for k in range(step): model_meta_trained = Meta(args, config).to(device) model_meta_trained.load_state_dict(torch.load('maml.pkl')) model_meta_trained.eval() x_spt, y_spt, x_qry, y_qry = sgc_data_generator( features, labels, node_num, test_label, args.task_num, args.n_way, args.k_spt, args.k_qry) accs = model_meta_trained.forward(x_spt, y_spt, x_qry, y_qry) meta_test_acc.append(accs) if args.dataset == 'citeseer': with open('citeseer.txt', 'a') as f: f.write( 'Cross Validation:{}, Step: {}, Meta-Test_Accuracy: {}' .format( i + 1, j, np.array(meta_test_acc).mean(axis=0).astype( np.float16))) f.write('\n') elif args.dataset == 'cora': with open('cora.txt', 'a') as f: f.write( 'Cross Validation:{}, Step: {}, Meta-Test_Accuracy: {}' .format( i + 1, j, np.array(meta_test_acc).mean(axis=0).astype( np.float16))) f.write('\n')
from args import get_citation_args import torch from hyperopt import fmin, tpe, hp, STATUS_OK, Trials # Arguments args = get_citation_args() # setting random seeds set_seed(args.seed, args.cuda) # Hyperparameter optimization space = {'weight_decay' : hp.loguniform('weight_decay', log(1e-10), log(1e-4))} adj, adj_dist, features, labels, idx_train, idx_val, idx_test = load_citation(args.dataset, args.normalization, args.cuda, gamma=args.gamma,degree=args.degree, L=args.L, K=args.K) if args.model != "GCN": features, precompute_time = sgc_precompute(features, adj, adj_dist, args.degree, args.concat, args.L, args.K, idx_train, idx_val, idx_test) def sgc_objective(space): if args.K: model = get_model(args.model, features[0][0].size(1), labels.max().item()+1, args.hidden, args.decay, args.L, args.K, args.dropout, args.cuda) else: model = get_model(args.model, features.size(1), labels.max().item()+1, args.hidden, args.decay, args.L, args.K, args.dropout, args.cuda) if args.model != 'GCN': if args.K: model, acc_val, _, _ = train_regression(model, features[0], labels[idx_train], features[1], labels[idx_val], features[2], labels[idx_test], idx_test, adj, args.epochs, space['weight_decay'], args.lr, args.dropout) else: model, acc_val, _, _ = train_regression(model, features[idx_train], labels[idx_train], features[idx_val], labels[idx_val], features[idx_test], labels[idx_test], idx_test, adj, args.epochs, space['weight_decay'], args.lr, args.dropout) else: model, acc_val, _ = train_gcn(model, features, labels, idx_train, idx_val, idx_test, adj, args.epochs, args.weight_decay, args.lr, args.dropout)