Ejemplo n.º 1
0
def main():
    """
    preprocess_data() :     load data from dataset and precess data into numpy format
    process_data() :        port the data to pyTorch and convert to cuda
    U_train, U_dev, U_test, classLatMedian, classLonMedian, userLocation : only use when Valid and Test
    """
    data = preprocess_data(args)
    data = process_data(data, args, args.normalization, args.usecuda)

    (adj, features, labels, idx_train, idx_val, idx_test, U_train, U_dev, U_test,
     classLatMedian, classLonMedian, userLocation) = data
    # exit()
    model_file = "./result_cmu_desce/{}way{}shot{}query-update_lr:{}-weight_decay:{}.pkl".format(
        args.n_way, args.k_spt, args.k_qry, args.update_lr, args.weight_decay)

    device = torch.device('cuda')
    # maml = Meta(args, config).to(device)
    maml = Meta(
        args,
        features.shape[1],
        labels.max().item() + 1,
        classLatMedian,
        classLonMedian).to(device)
    if args.model == "SGC":
        feature_dump_file = os.path.join(args.dir, 'feature_dump.pkl')
        # if os.path.exists(feature_dump_file):
        # 	print("load features")
        # 	features = load_obj(feature_dump_file)
        # else:
        features = sgc_precompute(args, features, adj, args.degree)

        print(args.dataset)
        if args.splt == True:
            trainset, valset = dealdata1(args, labels)
        else:
            trainset, valset = dealdata(args, labels)

        users = U_train + U_dev + U_test
        class_acc, meanDis, MedianDis, accAT161 = train_regression(maml, labels, features, users, trainset, valset,
                                                                   classLatMedian, classLonMedian, userLocation,
                                                                   args.epochs, args.patience,
                                                                   model_file)
    # load model from file and test the model
    timeStr = time.strftime("%Y-%m-%d %H:%M:%S\t", time.localtime(time.time()))
    argsStr = "-dir:{}\t-{}way{}shot{}query\t-update_lr{}\t-decay:{}".format(
        args.dir, args.n_way, args.k_spt, args.k_qry, args.update_lr, args.weight_decay)
    resultStr = "Test:\tclassification_acc:{}\t\tMean:{}\t\tMedian:{}\t\tAcc@161:{}".format(class_acc, meanDis,
                                                                                            MedianDis, accAT161)
    content = "\n" + timeStr + "\n" + argsStr + "\n" + resultStr + "\n"
    with open('./result_cmu_desce/result.txt', 'a') as f:
        f.write(content)
    f.close()
Ejemplo n.º 2
0
# Monkey patch for Stacked Logistic Regression
if args.model == "SLG":
    nfeat = features.size(1) * args.degree
else:
    nfeat = features.size(1)

model = get_model(model_opt=args.model,
                  nfeat=nfeat,
                  nclass=labels.max().item() + 1,
                  nhid=args.hidden,
                  dropout=args.dropout,
                  cuda=args.cuda,
                  degree=args.degree)

if args.model == "SGC" or args.model == "gfnn":
    features, precompute_time = sgc_precompute(features, adj, args.degree)
    print("{:.4f}s".format(precompute_time))
    model, acc_val, train_time = train_regression(
        model, features[idx_train], labels[idx_train], features[idx_val],
        labels[idx_val], args.epochs, args.weight_decay, args.lr, args.dropout)
    acc_test = test_regression(model, features[idx_test], labels[idx_test])
    print("Validation Accuracy: {:.4f} Test Accuracy: {:.4f}".format(acc_val,\
                                                                     acc_test))
    print("Pre-compute time: {:.4f}s, train time: {:.4f}s, total: {:.4f}s".
          format(precompute_time, train_time, precompute_time + train_time))
if args.model == "SLG":
    features, precompute_time = stack_feat(features, adj, args.degree)
    features = torch.FloatTensor(features).float()
    if args.cuda:
        features = features.cuda()
    print("{:.4f}s".format(precompute_time))
Ejemplo n.º 3
0
#                     help='Testing mode')

args, _ = parser.parse_known_args()
args.cuda = not args.no_cuda and torch.cuda.is_available()
args.device = torch.device("cuda:0" if args.cuda else "cpu")
model_name = args.dataset + '_' + args.method + '.pkl'
save_file = 'model_save/' + model_name

adj, features, lb_dict, tr_set, va_set, ts_set = load_data(args.dataset)

if args.method == 'raw':
    ne_array = features
elif args.method == 'dgi':
    ne_array = np.load('data/' + args.dataset + '_' + args.method + '.npy')[0]
elif args.method == 'mul':
    ne_array = sgc_precompute(features, adj, 2)
else:
    raise ValueError('Embedding method undefined:%s' % args.method)
if not isinstance(ne_array, np.ndarray):
    ne_array = ne_array.toarray()
word_num = ne_array.shape[0]
emb_size = ne_array.shape[1]

with launch_ipdb_on_exception():
    #     if args.dataset=='reddit':
    #         tr_edges, va_edges, ts_edges = gen_train_reddit(adj, lb_dict, tr_set, va_set, ts_set, args.balance_rate, 0)
    #     else:
    tr_edges, va_edges, ts_edges = gen_train_reddit(adj, lb_dict, tr_set,
                                                    va_set, ts_set,
                                                    args.balance_rate,
                                                    args.extra_rate, 1)
Ejemplo n.º 4
0
def main():
    args = get_citation_args()
    n_way = args.n_way
    train_shot = args.train_shot
    test_shot = args.test_shot
    step = args.step
    node_num = args.node_num
    iteration = args.iteration

    accuracy_meta_test = []
    total_accuracy_meta_test = []

    set_seed(args.seed, args.cuda)

    adj, features, labels = load_citation(args.dataset, args.normalization,
                                          args.cuda)

    if args.dataset == 'cora':
        class_label = [0, 1, 2, 3, 4, 5, 6]
        combination = list(combinations(class_label, n_way))
    elif args.dataset == 'citeseer':
        node_num = 3327
        iteration = 15
        class_label = [0, 1, 2, 3, 4, 5]
        combination = list(combinations(class_label, n_way))

    if args.model == 'SGC':
        features = sgc_precompute(features, adj, args.degree)

    for i in range(len(combination)):
        print('Cross_Validation: ', i + 1)
        test_label = list(combination[i])
        train_label = [n for n in class_label if n not in test_label]
        print('Cross_Validation {} Train_Label_List {}: '.format(
            i + 1, train_label))
        print('Cross_Validation {} Test_Label_List {}: '.format(
            i + 1, test_label))
        model = get_model(args.model, features.size(1), n_way, args.cuda)

        for j in range(iteration):
            labels_local = labels.clone().detach()
            select_class = random.sample(train_label, n_way)
            print('Cross_Validation {} ITERATION {} Train_Label: {}'.format(
                i + 1, j + 1, select_class))
            class1_idx = []
            class2_idx = []
            for k in range(node_num):
                if (labels_local[k] == select_class[0]):
                    class1_idx.append(k)
                    labels_local[k] = 0
                elif (labels_local[k] == select_class[1]):
                    class2_idx.append(k)
                    labels_local[k] = 1
            for m in range(step):
                class1_train = random.sample(class1_idx, train_shot)
                class2_train = random.sample(class2_idx, train_shot)
                class1_test = [
                    n1 for n1 in class1_idx if n1 not in class1_train
                ]
                class2_test = [
                    n2 for n2 in class2_idx if n2 not in class2_train
                ]
                train_idx = class1_train + class2_train
                random.shuffle(train_idx)
                test_idx = class1_test + class2_test
                random.shuffle(test_idx)

                model = train_regression(model, features[train_idx],
                                         labels_local[train_idx], args.epochs,
                                         args.weight_decay, args.lr)
                acc_query = test_regression(model, features[test_idx],
                                            labels_local[test_idx])
                reset_array()

        torch.save(model.state_dict(), 'model.pkl')

        labels_local = labels.clone().detach()
        select_class = random.sample(test_label, 2)
        class1_idx = []
        class2_idx = []
        reset_array()
        print('Cross_Validation {} Test_Label {}: '.format(
            i + 1, select_class))

        for k in range(node_num):
            if (labels_local[k] == select_class[0]):
                class1_idx.append(k)
                labels_local[k] = 0
            elif (labels_local[k] == select_class[1]):
                class2_idx.append(k)
                labels_local[k] = 1

        for m in range(step):
            class1_train = random.sample(class1_idx, test_shot)
            class2_train = random.sample(class2_idx, test_shot)
            class1_test = [n1 for n1 in class1_idx if n1 not in class1_train]
            class2_test = [n2 for n2 in class2_idx if n2 not in class2_train]
            train_idx = class1_train + class2_train
            random.shuffle(train_idx)
            test_idx = class1_test + class2_test
            random.shuffle(test_idx)

            model_meta_trained = get_model(args.model, features.size(1), n_way,
                                           args.cuda).cuda()
            model_meta_trained.load_state_dict(torch.load('model.pkl'))

            model_meta_trained = train_regression(model_meta_trained,
                                                  features[train_idx],
                                                  labels_local[train_idx],
                                                  args.epochs,
                                                  args.weight_decay, args.lr)
            acc_test = test_regression(model_meta_trained, features[test_idx],
                                       labels_local[test_idx])
            accuracy_meta_test.append(acc_test)
            total_accuracy_meta_test.append(acc_test)
            reset_array()
        if args.dataset == 'cora':
            with open('cora.txt', 'a') as f:
                f.write('Cross_Validation: {} Meta-Test_Accuracy: {}'.format(
                    i + 1,
                    torch.tensor(accuracy_meta_test).numpy().mean()))
                f.write('\n')
        elif args.dataset == 'citeseer':
            with open('citeseer.txt', 'a') as f:
                f.write('Cross_Validation: {} Meta-Test_Accuracy: {}'.format(
                    i + 1,
                    torch.tensor(accuracy_meta_test).numpy().mean()))
                f.write('\n')
        accuracy_meta_test = []
    if args.dataset == 'cora':
        with open('cora.txt', 'a') as f:
            f.write('Dataset: {}, Train_Shot: {}, Test_Shot: {}'.format(
                args.dataset, train_shot, test_shot))
            f.write('\n')
            f.write('Total_Meta-Test_Accuracy: {}'.format(
                torch.tensor(total_accuracy_meta_test).numpy().mean()))
            f.write('\n')
            f.write('\n\n\n')
    elif args.dataset == 'citeseer':
        with open('citeseer.txt', 'a') as f:
            f.write('Dataset: {}, Train_Shot: {}, Test_Shot: {}'.format(
                args.dataset, train_shot, test_shot))
            f.write('\n')
            f.write('Total_Meta-Test_Accuracy: {}'.format(
                torch.tensor(total_accuracy_meta_test).numpy().mean()))
            f.write('\n')
            f.write('\n\n\n')
Ejemplo n.º 5
0
args = parser.parse_args()
args.cuda = not args.no_cuda and torch.cuda.is_available()

set_seed(args.seed, args.cuda)

adj, train_adj, features, labels, idx_train, idx_val, idx_test = load_reddit_data(
    normalization=args.normalization, cuda=args.cuda)
print("Finished data loading.")

if args.model == "SGC":
    model = SGC(features.size(1), labels.max().item() + 1)
elif args.model == "gfNN":
    model = MLP(features.size(1), 32, labels.max().item() + 1)

if args.cuda: model.cuda()
processed_features, precompute_time = sgc_precompute(features, adj,
                                                     args.degree)
if args.inductive:
    train_features, _ = sgc_precompute(features[idx_train], train_adj,
                                       args.degree)
else:
    train_features = processed_features[idx_train]

test_features = processed_features[idx_test if args.test else idx_val]


def train_mlp(model, train_features, train_labels, epochs):
    optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-6)
    train_loader, val_loader = get_data_loaders(train_features, train_labels,
                                                None, None, 128)
    t = perf_counter()
    max_acc_val = 0
Ejemplo n.º 6
0
    imglist, feature, graph_feature, label, train_idx, val_idx = utils.data_slicing(
        train_imglist_path, train_feature_path, train_graph_feature_path,
        val_imglist_path, val_feature_path, val_graph_feature_path)
    if edge_weight:
        save_knn_filename = '{}/knn_{}knn_k{}_sw{}'.format(
            save_root, dist_def, k, self_weight)
    else:
        save_knn_filename = '{}/knn_{}knn_k{}_sw{}negw'.format(
            save_root, dist_def, k, self_weight)

    # training preparation finished
    knn_graph = utils.global_build(graph_feature, dist_def, k,
                                   save_knn_filename, graph_method)
    agg_feature = utils.sgc_precompute(feature,
                                       knn_graph,
                                       self_weight=self_weight,
                                       edge_weight=edge_weight,
                                       degree=1)

    ###################################################################################################################
    #                                          Training period                                                        #
    ###################################################################################################################
    # config
    cat_ori_feat = (args.cat_ori_feat == 'True')
    epochs_num = args.epochs_num
    lr = args.lr
    weight_decay = args.weight_decay
    dropout = args.dropout
    batch_size = args.batch_size
    loss_type = args.loss_type
    reweight = (args.reweight == 'True')
Ejemplo n.º 7
0
if args.tuned:
    if args.model == "SGC":
        with open("{}-tuning/{}.txt".format(args.model, args.dataset), 'rb') as f:
            args.weight_decay = pkl.load(f)['weight_decay']
            print("using tuned weight decay: {}".format(args.weight_decay))
    else:
        raise NotImplemented

# setting random seeds
set_seed(args.seed, args.cuda)

adj, features, labels, idx_train, idx_val, idx_test, dRoot = load_citation(args.dataset, args.normalization, args.cuda)

model = get_model(args.model, features.size(1), labels.max().item()+1, args.hidden, args.dropout, args.cuda)

if args.model == "SGC": features, precompute_time = sgc_precompute(features, adj, args.degree)
print("{:.4f}s".format(precompute_time))

def train_regression(model,
                     train_features, train_labels,
                     val_features, val_labels,
                     epochs=args.epochs, weight_decay=args.weight_decay,
                     lr=args.lr, dropout=args.dropout):

    optimizer = optim.Adam(model.parameters(), lr=lr,
                           weight_decay=weight_decay)
    t = perf_counter()
    best_acc_val = torch.zeros((1))
    best_model = None
    for epoch in range(epochs):
        model.train()
Ejemplo n.º 8
0
def main(args):
    step = args.step
    set_seed(args.seed)

    adj, features, labels = load_citation(args.dataset, args.normalization)

    features = sgc_precompute(features, adj, args.degree)

    if args.dataset == 'citeseer':
        node_num = 3327
        class_label = [0, 1, 2, 3, 4, 5]
        combination = list(combinations(class_label, 2))
    elif args.dataset == 'cora':
        node_num = 2708
        class_label = [0, 1, 2, 3, 4, 5, 6]
        combination = list(combinations(class_label, 2))

    config = [('linear', [args.hidden, features.size(1)]),
              ('linear', [args.n_way, args.hidden])]

    device = torch.device('cuda')

    for i in range(len(combination)):
        print("Cross Validation: {}".format((i + 1)))

        maml = Meta(args, config).to(device)

        test_label = list(combination[i])
        train_label = [n for n in class_label if n not in test_label]
        print('Cross Validation {} Train_Label_List: {} '.format(
            i + 1, train_label))
        print('Cross Validation {} Test_Label_List: {} '.format(
            i + 1, test_label))

        for j in range(args.epoch):
            x_spt, y_spt, x_qry, y_qry = sgc_data_generator(
                features, labels, node_num, train_label, args.task_num,
                args.n_way, args.k_spt, args.k_qry)
            accs = maml.forward(x_spt, y_spt, x_qry, y_qry)
            print('Step:', j, '\tMeta_Training_Accuracy:', accs)
            if j % 100 == 0:
                torch.save(maml.state_dict(), 'maml.pkl')
                meta_test_acc = []
                for k in range(step):
                    model_meta_trained = Meta(args, config).to(device)
                    model_meta_trained.load_state_dict(torch.load('maml.pkl'))
                    model_meta_trained.eval()
                    x_spt, y_spt, x_qry, y_qry = sgc_data_generator(
                        features, labels, node_num, test_label, args.task_num,
                        args.n_way, args.k_spt, args.k_qry)
                    accs = model_meta_trained.forward(x_spt, y_spt, x_qry,
                                                      y_qry)
                    meta_test_acc.append(accs)
                if args.dataset == 'citeseer':
                    with open('citeseer.txt', 'a') as f:
                        f.write(
                            'Cross Validation:{}, Step: {}, Meta-Test_Accuracy: {}'
                            .format(
                                i + 1, j,
                                np.array(meta_test_acc).mean(axis=0).astype(
                                    np.float16)))
                        f.write('\n')
                elif args.dataset == 'cora':
                    with open('cora.txt', 'a') as f:
                        f.write(
                            'Cross Validation:{}, Step: {}, Meta-Test_Accuracy: {}'
                            .format(
                                i + 1, j,
                                np.array(meta_test_acc).mean(axis=0).astype(
                                    np.float16)))
                        f.write('\n')
Ejemplo n.º 9
0
from args import get_citation_args
import torch
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials

# Arguments
args = get_citation_args()

# setting random seeds
set_seed(args.seed, args.cuda)

# Hyperparameter optimization
space = {'weight_decay' : hp.loguniform('weight_decay', log(1e-10), log(1e-4))}

adj, adj_dist, features, labels, idx_train, idx_val, idx_test = load_citation(args.dataset, args.normalization, args.cuda, gamma=args.gamma,degree=args.degree, L=args.L, K=args.K)
if args.model != "GCN":
    features, precompute_time = sgc_precompute(features, adj, adj_dist, args.degree, args.concat, args.L, args.K, idx_train, idx_val, idx_test)
def sgc_objective(space):
    if args.K:
        model = get_model(args.model, features[0][0].size(1), labels.max().item()+1, args.hidden, args.decay, args.L, args.K, args.dropout, args.cuda)
    else:
        model = get_model(args.model, features.size(1), labels.max().item()+1, args.hidden, args.decay, args.L, args.K, args.dropout, args.cuda)
    if args.model != 'GCN':
        if args.K:
            model, acc_val, _, _ = train_regression(model, features[0], labels[idx_train], features[1], labels[idx_val],  features[2], labels[idx_test], idx_test, adj,
                                      args.epochs, space['weight_decay'], args.lr, args.dropout)
        else:
            model, acc_val, _, _ = train_regression(model, features[idx_train], labels[idx_train], features[idx_val], labels[idx_val],  features[idx_test], labels[idx_test], idx_test, adj,
                                      args.epochs, space['weight_decay'], args.lr, args.dropout)
    else:
        model, acc_val, _ = train_gcn(model, features, labels, idx_train, idx_val, idx_test, adj,
                        args.epochs, args.weight_decay, args.lr, args.dropout)