예제 #1
0
def run_fix_mask(args, seed, adj_percent, wei_percent):

    pruning.setup_seed(seed)
    adj, features, labels, idx_train, idx_val, idx_test = load_data(
        args['dataset'])

    node_num = features.size()[0]
    class_num = labels.numpy().max() + 1

    adj = adj.cuda()
    features = features.cuda()
    labels = labels.cuda()
    loss_func = nn.CrossEntropyLoss()

    net_gcn = net.net_gcn(embedding_dim=args['embedding_dim'], adj=adj)
    pruning.add_mask(net_gcn)
    net_gcn = net_gcn.cuda()
    pruning.random_pruning(net_gcn, adj_percent, wei_percent)

    adj_spar, wei_spar = pruning.print_sparsity(net_gcn)

    for name, param in net_gcn.named_parameters():
        if 'mask' in name:
            param.requires_grad = False

    optimizer = torch.optim.Adam(net_gcn.parameters(),
                                 lr=args['lr'],
                                 weight_decay=args['weight_decay'])
    acc_test = 0.0
    best_val_acc = {'val_acc': 0, 'epoch': 0, 'test_acc': 0}

    for epoch in range(args['total_epoch']):

        optimizer.zero_grad()
        output = net_gcn(features, adj)
        loss = loss_func(output[idx_train], labels[idx_train])
        loss.backward()
        optimizer.step()
        with torch.no_grad():
            output = net_gcn(features, adj, val_test=True)
            acc_val = f1_score(labels[idx_val].cpu().numpy(),
                               output[idx_val].cpu().numpy().argmax(axis=1),
                               average='micro')
            acc_test = f1_score(labels[idx_test].cpu().numpy(),
                                output[idx_test].cpu().numpy().argmax(axis=1),
                                average='micro')
            if acc_val > best_val_acc['val_acc']:
                best_val_acc['val_acc'] = acc_val
                best_val_acc['test_acc'] = acc_test
                best_val_acc['epoch'] = epoch

        print(
            "(Fix Mask) Epoch:[{}] Val:[{:.2f}] Test:[{:.2f}] | Final Val:[{:.2f}] Test:[{:.2f}] at Epoch:[{}]"
            .format(epoch, acc_val * 100, acc_test * 100,
                    best_val_acc['val_acc'] * 100,
                    best_val_acc['test_acc'] * 100, best_val_acc['epoch']))

    return best_val_acc['val_acc'], best_val_acc['test_acc'], best_val_acc[
        'epoch'], adj_spar, wei_spar
예제 #2
0
def run(args, seed):

    setup_seed(seed)
    adj, features, labels, idx_train, idx_val, idx_test = load_data(args['dataset'])

    node_num = features.size()[0]
    class_num = labels.numpy().max() + 1

    adj = adj.cuda()
    features = features.cuda()
    labels = labels.cuda()

    loss_func = nn.CrossEntropyLoss()
    early_stopping = 10

    net_gcn = net.net_gcn(embedding_dim=args['embedding_dim'])
    net_gcn = net_gcn.cuda()
    optimizer = torch.optim.Adam(net_gcn.parameters(), lr=args['lr'], weight_decay=args['weight_decay'])
    loss_val = []
    for epoch in range(1000):

        optimizer.zero_grad()
        output = net_gcn(features, adj)
        loss = loss_func(output[idx_train], labels[idx_train])
        # print('epoch', epoch, 'loss', loss_train.data)
        loss.backward()
        optimizer.step()

        # validation
        with torch.no_grad():
            output = net_gcn(features, adj, val_test=True)
            loss_val.append(loss_func(output[idx_val], labels[idx_val]).cpu().numpy())
            # print('val acc', f1_score(labels[idx_val].cpu().numpy(), output[idx_val].cpu().numpy().argmax(axis=1), average='micro'))

        # early stopping
        if epoch > early_stopping and loss_val[-1] > np.mean(loss_val[-(early_stopping+1):-1]):
            break

    # test
    with torch.no_grad():
        output = net_gcn(features, adj, val_test=True)
        acc_val = f1_score(labels[idx_val].cpu().numpy(), output[idx_val].cpu().numpy().argmax(axis=1), average='micro')
        acc_test = f1_score(labels[idx_test].cpu().numpy(), output[idx_test].cpu().numpy().argmax(axis=1), average='micro')

    return acc_val, acc_test
예제 #3
0
def run():

    setup_seed(11) # 11, 0, 0
    adj, features, labels, idx_train, idx_val, idx_test = load_data('cora') # 'cora', 'citeseer', 'pubmed'
    adj = adj.cuda()
    features = features.cuda()
    labels = labels.cuda()

    net_gcn = net.net_gcn(embedding_dim=[1433,16,7]) # [1433,16,7], [3703,16,6], [500,16,3]
    net_gcn = net_gcn.cuda()
    optimizer = torch.optim.Adam(net_gcn.parameters(), lr=0.01, weight_decay=5e-4)
    loss_func = nn.CrossEntropyLoss()
    loss_val = []
    early_stopping = 10

    for epoch in range(1000):

        optimizer.zero_grad()
        output = net_gcn(features, adj)
        loss_train = loss_func(output[idx_train], labels[idx_train])
        print('epoch', epoch, 'loss', loss_train.data)
        loss_train.backward()
        optimizer.step()

        # validation
        with torch.no_grad():
            output = net_gcn(features, adj, val_test=True)
            loss_val.append(loss_func(output[idx_val], labels[idx_val]).cpu().numpy())
            print('val acc', f1_score(labels[idx_val].cpu(), output[idx_val].cpu().numpy().argmax(axis=1), average='micro'))

        # early stopping
        if epoch > early_stopping and loss_val[-1] > np.mean(loss_val[-(early_stopping+1):-1]):
            break

    # test
    with torch.no_grad():
        output = net_gcn(features, adj, val_test=True)
        print('')
        print('test acc', f1_score(labels[idx_test].cpu(), output[idx_test].cpu().numpy().argmax(axis=1), average='micro'))
예제 #4
0
def run(args, seed):

    setup_seed(seed)
    dataset = args['dataset']
    adj, features, labels, idx_train, idx_val, idx_test = load_data(dataset) 
    adj = adj.cuda()

    features = features.cuda()
    labels = labels.cuda()

    net_gcn = net.net_gcn(embedding_dim=args['embedding_dim'])
    net_gcn = net_gcn.cuda()
    optimizer = torch.optim.Adam(net_gcn.parameters(), lr=args['lr'], weight_decay=args['weight_decay'])
    loss_func = nn.CrossEntropyLoss()
    loss_val = []
    early_stopping = 10

    for epoch in range(1000):

        optimizer.zero_grad()
        output = net_gcn(features, adj)
        loss_train = loss_func(output[idx_train], labels[idx_train])
        # print('epoch', epoch, 'loss', loss_train.data)
        loss_train.backward()
        optimizer.step()

        # validation
        with torch.no_grad():
            output = net_gcn(features, adj, val_test=True)
            loss_val.append(loss_func(output[idx_val], labels[idx_val]).cpu().numpy())
            # print('val acc', f1_score(labels[idx_val].cpu(), output[idx_val].cpu().numpy().argmax(axis=1), average='micro'))

        # early stopping
        if epoch > early_stopping and loss_val[-1] > np.mean(loss_val[-(early_stopping+1):-1]):
            break

    # test
    with torch.no_grad():
        output = net_gcn(features, adj, val_test=True)
        # print('')
        acc = f1_score(labels[idx_test].cpu(), output[idx_test].cpu().numpy().argmax(axis=1), average='micro')
        # print('test acc', acc)

    # attack
    w0 = np.load('./weights/' + dataset + '_w0.npy').transpose()
    w1 = np.load('./weights/' + dataset + '_w1.npy').transpose()

    adj_raw, features_raw, labels_raw = load_data_raw(dataset)

    correct_pred_link = 0
    correct_pred_feat = 0
    correct_pred_link_feat = 0
    n_attack = args['nattack']
    for idxt, n in zip(idx_test, range(1000)):

        # link
        pernode = [idxt]
        _, _, adj_per, features_per, _ = graph_attack(adj_raw, features_raw, labels_raw, w0, w1, False, True, pernode, n=n_attack)
        features_per, adj_per = preprocess_feat_adj(features_per, adj_per)
        with torch.no_grad():
            output = net_gcn(features_per, adj_per, val_test=True)[idxt].cpu().numpy().argmax()
            if output == labels[idxt].cpu().numpy():
                correct_pred_link = correct_pred_link + 1
            print(output, labels[idxt].cpu().numpy())
            print(correct_pred_link, n + 1)

        # feat
        pernode = [idxt]
        _, _, adj_per, features_per, _ = graph_attack(adj_raw, features_raw, labels_raw, w0, w1, True, False, pernode, n=n_attack)
        features_per, adj_per = preprocess_feat_adj(features_per, adj_per)
        with torch.no_grad():
            output = net_gcn(features_per, adj_per, val_test=True)[idxt].cpu().numpy().argmax()
            if output == labels[idxt].cpu().numpy():
                correct_pred_feat = correct_pred_feat + 1
            print(output, labels[idxt].cpu().numpy())
            print(correct_pred_feat, n + 1)

        # link & feat
        pernode = [idxt]
        _, _, adj_per, features_per, _ = graph_attack(adj_raw, features_raw, labels_raw, w0, w1, True, True, pernode, n=n_attack)
        features_per, adj_per = preprocess_feat_adj(features_per, adj_per)
        with torch.no_grad():
            output = net_gcn(features_per, adj_per, val_test=True)[idxt].cpu().numpy().argmax()
            if output == labels[idxt].cpu().numpy():
                correct_pred_link_feat = correct_pred_link_feat + 1
            print(output, labels[idxt].cpu().numpy())
            print(correct_pred_link_feat, n + 1)

    adv_acc_link = correct_pred_link / 1000
    adv_acc_feat = correct_pred_feat / 1000
    adv_acc_link_feat = correct_pred_link_feat / 1000

    return acc, adv_acc_link, adv_acc_feat, adv_acc_link_feat
예제 #5
0
def run(args, seed):

    setup_seed(seed)
    dataset = args['dataset']
    adj, features, labels, idx_train, idx_val, idx_test = load_data(dataset)

    idx_unlabeled = list(range(len(idx_train), features.size()[0]))
    # print(len(idx_train), features.size()[0])
    idx_unlabeled = np.random.permutation(idx_unlabeled)
    idx_clean = list(idx_unlabeled[:100])
    idx_adv = list(idx_unlabeled[100:300])

    adj = adj.cuda()

    features = features.cuda()
    labels = labels.cuda()

    net_gcn = net.net_gcn(embedding_dim=args['embedding_dim'])
    net_gcn = net_gcn.cuda()
    optimizer = torch.optim.Adam(net_gcn.parameters(),
                                 lr=args['lr'],
                                 weight_decay=args['weight_decay'])
    loss_func = nn.CrossEntropyLoss()
    loss_val = []
    early_stopping = 10

    for epoch in range(1000):

        optimizer.zero_grad()
        output = net_gcn(features, adj)
        loss_train = loss_func(output[idx_train], labels[idx_train])
        # print('epoch', epoch, 'loss', loss_train.data)
        loss_train.backward()
        optimizer.step()

        # validation
        with torch.no_grad():
            output = net_gcn(features, adj, val_test=True)
            loss_val.append(
                loss_func(output[idx_val], labels[idx_val]).cpu().numpy())
            # print('val acc', f1_score(labels[idx_val].cpu(), output[idx_val].cpu().numpy().argmax(axis=1), average='micro'))

        # early stopping
        if epoch > early_stopping and loss_val[-1] > np.mean(
                loss_val[-(early_stopping + 1):-1]):
            break

    # test
    with torch.no_grad():
        output = net_gcn(features, adj, val_test=True)
        # print('')
        acc = f1_score(labels[idx_test].cpu(),
                       output[idx_test].cpu().numpy().argmax(axis=1),
                       average='micro')
        # print('test acc', acc)

    #########
    # robust training
    w0 = np.load('./weights/' + dataset + '_w0.npy').transpose()
    w1 = np.load('./weights/' + dataset + '_w1.npy').transpose()

    adj_raw, features_raw, _ = load_data_raw(dataset)

    pseudo_labels = output.argmax(dim=1).cpu().numpy()
    # print(pseudo_labels)

    _, _, adj_per, features_per, _ = graph_attack(adj_raw,
                                                  features_raw,
                                                  pseudo_labels,
                                                  w0,
                                                  w1,
                                                  True,
                                                  True,
                                                  idx_adv,
                                                  n=2)
    partition_labels = partition(adj_per, args['partition_num'])
    # partition_labels = partition(adj_raw, args['partition_num'])

    features_per, adj_per = preprocess_feat_adj(features_per, adj_per)

    pseudo_labels = torch.tensor(pseudo_labels).cuda()

    net_gcn = net.net_gcn_2task(embedding_dim=args['embedding_dim'],
                                ss_class_num=args['partition_num'])
    net_gcn = net_gcn.cuda()
    optimizer = torch.optim.Adam(net_gcn.parameters(),
                                 lr=args['lr'],
                                 weight_decay=args['weight_decay'])

    for epoch in range(1000):

        optimizer.zero_grad()

        output, output_ss = net_gcn(features, features_per, adj, adj_per)
        output_adv, _ = net_gcn(features_per, features_per, adj_per, adj_per)

        loss_train = loss_func(
            output[idx_train],
            labels[idx_train]) * args['task_ratio'] + loss_func(
                output_ss, partition_labels) * (1 - args['task_ratio'])
        loss_adv_1 = loss_func(output_adv[idx_clean], pseudo_labels[idx_clean])
        loss_adv_2 = loss_func(output_adv[idx_adv], pseudo_labels[idx_adv])

        loss = loss_train + 1 * (loss_adv_1 + loss_adv_2)

        # print('epoch', epoch, 'loss', loss_train.data)
        loss.backward()
        optimizer.step()

        # validation
        with torch.no_grad():
            output, _ = net_gcn(features,
                                features_per,
                                adj,
                                adj_per,
                                val_test=True)
            loss_val.append(
                loss_func(output[idx_val], labels[idx_val]).cpu().numpy())
            # print('val acc', f1_score(labels[idx_val].cpu(), output[idx_val].cpu().numpy().argmax(axis=1), average='micro'))

        # early stopping
        if epoch > early_stopping and loss_val[-1] > np.mean(
                loss_val[-(early_stopping + 1):-1]):
            break

    # test
    with torch.no_grad():
        output, _ = net_gcn(features,
                            features_per,
                            adj,
                            adj_per,
                            val_test=True)
        # print('')
        acc = f1_score(labels[idx_test].cpu(),
                       output[idx_test].cpu().numpy().argmax(axis=1),
                       average='micro')
        # print('test acc', acc)

    #########
    # attack
    w0 = np.load('./weights/' + dataset + '_w0.npy').transpose()
    w1 = np.load('./weights/' + dataset + '_w1.npy').transpose()

    adj_raw, features_raw, labels_raw = load_data_raw(dataset)

    correct_pred_link = 0
    correct_pred_feat = 0
    correct_pred_link_feat = 0
    n_attack = args['nattack']
    for idxt, n in zip(idx_test, range(1000)):

        # link
        pernode = [idxt]
        _, _, adj_per, features_per, _ = graph_attack(adj_raw,
                                                      features_raw,
                                                      labels_raw,
                                                      w0,
                                                      w1,
                                                      False,
                                                      True,
                                                      pernode,
                                                      n=n_attack)
        features_per, adj_per = preprocess_feat_adj(features_per, adj_per)
        with torch.no_grad():
            output, _ = net_gcn(features_per,
                                features_per,
                                adj_per,
                                adj_per,
                                val_test=True)
            output = output[idxt].cpu().numpy().argmax()
            if output == labels[idxt].cpu().numpy():
                correct_pred_link = correct_pred_link + 1
            print(output, labels[idxt].cpu().numpy())
            print(correct_pred_link, n + 1)

        # feat
        pernode = [idxt]
        _, _, adj_per, features_per, _ = graph_attack(adj_raw,
                                                      features_raw,
                                                      labels_raw,
                                                      w0,
                                                      w1,
                                                      True,
                                                      False,
                                                      pernode,
                                                      n=n_attack)
        features_per, adj_per = preprocess_feat_adj(features_per, adj_per)
        with torch.no_grad():
            output, _ = net_gcn(features_per,
                                features_per,
                                adj_per,
                                adj_per,
                                val_test=True)
            output = output[idxt].cpu().numpy().argmax()
            if output == labels[idxt].cpu().numpy():
                correct_pred_feat = correct_pred_feat + 1
            print(output, labels[idxt].cpu().numpy())
            print(correct_pred_feat, n + 1)

        # link feat
        pernode = [idxt]
        _, _, adj_per, features_per, _ = graph_attack(adj_raw,
                                                      features_raw,
                                                      labels_raw,
                                                      w0,
                                                      w1,
                                                      True,
                                                      True,
                                                      pernode,
                                                      n=n_attack)
        features_per, adj_per = preprocess_feat_adj(features_per, adj_per)
        with torch.no_grad():
            output, _ = net_gcn(features_per,
                                features_per,
                                adj_per,
                                adj_per,
                                val_test=True)
            output = output[idxt].cpu().numpy().argmax()
            if output == labels[idxt].cpu().numpy():
                correct_pred_link_feat = correct_pred_link_feat + 1
            print(output, labels[idxt].cpu().numpy())
            print(correct_pred_link_feat, n + 1)

    adv_acc_link = correct_pred_link / 1000
    adv_acc_feat = correct_pred_feat / 1000
    adv_acc_link_feat = correct_pred_link_feat / 1000

    return acc, adv_acc_link, adv_acc_feat, adv_acc_link_feat
예제 #6
0
def run_get_mask(args, seed, imp_num, rewind_weight_mask=None):

    pruning.setup_seed(seed)
    adj, features, labels, idx_train, idx_val, idx_test = load_data(
        args['dataset'])
    # adj = coo_matrix(adj)
    # adj_dict = {}
    # adj_dict['adj'] = adj
    # torch.save(adj_dict, "./adjs/pubmed/original.pt")
    # pdb.set_trace()
    node_num = features.size()[0]
    class_num = labels.numpy().max() + 1

    adj = adj.cuda()
    features = features.cuda()
    labels = labels.cuda()
    loss_func = nn.CrossEntropyLoss()

    net_gcn = net.net_gcn(embedding_dim=args['embedding_dim'], adj=adj)
    pruning.add_mask(net_gcn)
    net_gcn = net_gcn.cuda()

    if args['weight_dir']:

        print("load : {}".format(args['weight_dir']))
        encoder_weight = {}
        cl_ckpt = torch.load(args['weight_dir'], map_location='cuda')
        encoder_weight['weight_orig_weight'] = cl_ckpt['gcn.fc.weight']
        ori_state_dict = net_gcn.net_layer[0].state_dict()
        ori_state_dict.update(encoder_weight)
        net_gcn.net_layer[0].load_state_dict(ori_state_dict)

    if rewind_weight_mask:
        net_gcn.load_state_dict(rewind_weight_mask)
        pruning.soft_mask_init(net_gcn, args['init_soft_mask_type'], seed)
        adj_spar, wei_spar = pruning.print_sparsity(net_gcn)
    else:
        pruning.soft_mask_init(net_gcn, args['init_soft_mask_type'], seed)

    optimizer = torch.optim.Adam(net_gcn.parameters(),
                                 lr=args['lr'],
                                 weight_decay=args['weight_decay'])

    acc_test = 0.0
    best_val_acc = {'val_acc': 0, 'epoch': 0, 'test_acc': 0}
    rewind_weight = copy.deepcopy(net_gcn.state_dict())
    for epoch in range(args['mask_epoch']):

        optimizer.zero_grad()
        output = net_gcn(features, adj)
        loss = loss_func(output[idx_train], labels[idx_train])
        loss.backward()
        pruning.subgradient_update_mask(net_gcn, args)  # l1 norm
        optimizer.step()
        with torch.no_grad():
            output = net_gcn(features, adj, val_test=True)
            acc_val = f1_score(labels[idx_val].cpu().numpy(),
                               output[idx_val].cpu().numpy().argmax(axis=1),
                               average='micro')
            acc_test = f1_score(labels[idx_test].cpu().numpy(),
                                output[idx_test].cpu().numpy().argmax(axis=1),
                                average='micro')
            if acc_val > best_val_acc['val_acc']:
                best_val_acc['test_acc'] = acc_test
                best_val_acc['val_acc'] = acc_val
                best_val_acc['epoch'] = epoch
                best_epoch_mask = pruning.get_final_mask_epoch(
                    net_gcn,
                    adj_percent=args['pruning_percent_adj'],
                    wei_percent=args['pruning_percent_wei'])

            print(
                "(Get Mask) Epoch:[{}] Val:[{:.2f}] Test:[{:.2f}] | Best Val:[{:.2f}] Test:[{:.2f}] at Epoch:[{}]"
                .format(epoch, acc_val * 100, acc_test * 100,
                        best_val_acc['val_acc'] * 100,
                        best_val_acc['test_acc'] * 100, best_val_acc['epoch']))

    return best_epoch_mask, rewind_weight