Ejemplo n.º 1
0
def main():
    parser = argparse.ArgumentParser()

    # parser.add_argument('--num-walks', default=10, type=int)
    # parser.add_argument('--walk-length', default=40, type=int)
    # parser.add_argument('--dimension', type=int, default=128, help='Embeddings dimension')
    # parser.add_argument('--iter', default=1, type=int, help='Number of epochs in SGD')
    # parser.add_argument('--model', default='word2vec', help='Type of model to apply on walks (word2vec/skipgram)')
    # # parser.add_argument("--emb", required=True)
    # parser.add_argument('--emb', default='.deepwalk.embeddings', help='the embedding file')
    # # parser.add_argument("--net", required=True)
    # # parser.add_argument("--labels", required=True)
    # parser.add_argument('--dic-network-name', default='network')
    # parser.add_argument('--dic-label-name', default='label')

    parser.add_argument('--walk-length', type=int, default=40)
    parser.add_argument('--num-walks', type=int, default=10)
    parser.add_argument('--dimension',
                        type=int,
                        default=128,
                        help='Embeddings dimension')
    parser.add_argument('--iter',
                        default=1,
                        type=int,
                        help='Number of epochs in SGD')
    parser.add_argument('--p',
                        type=float,
                        default=1,
                        help='Return hyperparameter')
    parser.add_argument('--q',
                        type=float,
                        default=1,
                        help='Input hyperparameter')
    parser.add_argument('--dataset', default='cora', help='dataset')

    args = parser.parse_args()

    #load dataset
    if args.dataset == "polblogs":
        graph, _, labels, idx_train, idx_test = load_polblogs_data()
    else:

        graph, _, labels, idx_train, idx_val, idx_test = load_data(
            args.dataset)

    ## Load Embeddings
    # embeddings_file = op.join(args.dataset, args.dataset + args.emb)
    # model = KeyedVectors.load_word2vec_format(embeddings_file, binary=False)
    model = node2vec(graph, args)
    # Map nodes to their features (note:  assumes nodes are labeled as integers 1:N)
    print('model', model)
    print(len(graph))
    features_matrix = np.asarray(
        [model[str(node)] for node in range(len(graph))])

    labels_matrix = np.matrix(labels)
    labels_matrix = sp.csr_matrix(labels_matrix)
    labels = np.reshape(labels, (labels.shape[0], ))

    X, y = features_matrix, labels
    # y = MultiLabelBinarizer().fit_transform(y)
    y_train = y[idx_train]
    y_test = y[idx_test]
    X_train = X[idx_train]
    X_test = X[idx_test]

    # y_train = format_csr(y_train_)
    # y_test = format_csr(y_test_)

    ## Logistic Regression

    # Train on data
    logisticRegr = LogisticRegression()
    logisticRegr.fit(X_train, y_train)

    # Measure accuracy
    score = logisticRegr.score(X_test, y_test)
    clean_pred = logisticRegr.predict(X_test)

    # Output results
    print('---------------------------------')
    print('Accuracy Score :   ', score)
    print('the prediction of y: ', clean_pred)
    print('---------------------------------')

    total_asr = []

    for i in range(10):
        # for i in [0,7,8,9]:
        print('The perturbation idx', i)
        perturb = np.array([
            float(line.rstrip('\n')) for line in open(
                '../GUA/perturbation_results/{1}_xi4_epoch100/perturbation_{1}_{0}.txt'
                .format(i, args.dataset))
        ])
        perturb = np.where(perturb > 0.5, 1, 0)
        pt = np.where(perturb > 0)[0].tolist()
        print('The perturbations are', pt)
        asr_coll = []
        for j in range(len(idx_test)):
            neigh = list(graph.neighbors(idx_test[j]))
            print('the neighrbors of node {} is'.format(j), neigh)
            tmp_G = graph.copy()
            # print ('the neighrbors of node {} is'.format(j), neigh)
            # print ('The edges in clean graph', tmp_G.number_of_edges())
            for k in pt:
                if k in neigh:
                    print('the node is', idx_test[j])
                    # print ('the neighor is', k)
                    # print ('the edges between', tmp_G.has_edge(idx_test[j],k))
                    # print ('the edges between', tmp_G.has_edge(k, idx_test[j]))
                    # print ('the edges of 12', tmp_G.has_edge(k,j))

                    tmp_G.remove_edge(idx_test[j], k)
                    # print ('the edges of 12', tmp_G.has_edge(k,j))
                    # tmp_G.remove_edge(k, j)
                else:
                    tmp_G.add_edge(idx_test[j], k, weight=1)
                    # tmp_G.add_edge(k, j)
            print('The edges in attacked graph', tmp_G.number_of_edges())
            model = node2vec(tmp_G, args)  #the attacked graph embedding
            features_matrix = np.asarray(
                [model[str(node)] for node in range(len(graph))])
            X = features_matrix
            X_test = X[idx_test]
            # idx_pred = j - idx_test[0]
            att_pred = logisticRegr.predict(X_test)
            if clean_pred[j] == att_pred[j]:
                asr_coll.append(0)
                print('attack fail', j)
            else:
                asr_coll.append(1)
                print('attack success', j)
            print('the success number', sum(asr_coll))
            print('the total number', len(asr_coll))
        avg_asr = float(sum(asr_coll) / len(asr_coll))
        print('the asr is', avg_asr)
        total_asr.append(avg_asr)
    print('the total asr over 10 experiments is:', total_asr)
Ejemplo n.º 2
0
                    type=str,
                    default="cora",
                    help='The name of the network dataset.')
parser.add_argument('--radius',
                    type=int,
                    default=4,
                    help='The radius of l2 norm projection')

args = parser.parse_args()

np.random.seed(args.seed)
torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)
if args.dataset == "polblogs":
    tmp_adj, features, labels, idx_train, idx_test = load_polblogs_data()
else:
    _, features, labels, idx_train, idx_val, idx_test, tmp_adj = load_data(
        args.dataset)

num_classes = labels.max().item() + 1
# tmp_adj = tmp_adj.toarray()

adj = tmp_adj
adj = np.eye(tmp_adj.shape[0]) + adj
adj, _ = normalize(adj)
adj = torch.from_numpy(adj.astype(np.float32))

# print (sum(features))
# print (labels.shape)
# print (idx_train.shape)
Ejemplo n.º 3
0
    type=int,
    default=10,
    help='The learning step of updating the connection entries')
parser.add_argument('--sample_percent',
                    type=int,
                    default=40,
                    help='The sampling ratio of train set')
args = parser.parse_args()

np.random.seed(args.seed)
torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)

if args.dataset == "polblogs":
    tmp_adj, tmp_feat, labels, train_idx, val_idx, test_idx = load_polblogs_data(
    )
else:
    _, _, labels, train_idx, val_idx, test_idx, tmp_adj, tmp_feat = load_data(
        args.dataset)

num_classes = labels.max().item() + 1
# tmp_adj = tmp_adj.toarray()
adj = tmp_adj
adj = np.eye(tmp_adj.shape[0]) + adj
adj, _ = normalize(adj)
adj = torch.from_numpy(adj.astype(np.float32))
feat, _ = normalize(tmp_feat)
feat = torch.FloatTensor(np.array(feat.todense()))
tmp_feat = tmp_feat.todense()

num_fake = int(tmp_adj.shape[0] * args.fake_rate)