예제 #1
0
def eva(y_true, y_pred, epoch=0):
    acc, f1 = cluster_acc(y_true, y_pred)
    nmi = nmi_score(y_true, y_pred, average_method='arithmetic')
    ari = ari_score(y_true, y_pred)
    print(epoch, ':acc {:.4f}'.format(acc), ', nmi {:.4f}'.format(nmi),
          ', ari {:.4f}'.format(ari), ', f1 {:.4f}'.format(f1))
    return f1
예제 #2
0
def init_prob_kmeans(model, eval_loader, args):
    torch.manual_seed(1)
    model = model.to(device)
    # cluster parameter initiate
    model.eval()
    targets = np.zeros(len(eval_loader.dataset))
    feats = np.zeros((len(eval_loader.dataset), 1024))
    for _, (x, _, label, idx) in enumerate(eval_loader):
        x = x.to(device)
        _, feat = model(x)
        feat = feat.view(x.size(0), -1)
        idx = idx.data.cpu().numpy()
        feats[idx, :] = feat.data.cpu().numpy()
        targets[idx] = label.data.cpu().numpy()
    # evaluate clustering performance
    pca = PCA(n_components=args.n_clusters)
    feats = pca.fit_transform(feats)
    kmeans = KMeans(n_clusters=args.n_clusters, n_init=20)
    y_pred = kmeans.fit_predict(feats)
    acc, nmi, ari = cluster_acc(targets,
                                y_pred), nmi_score(targets, y_pred), ari_score(
                                    targets, y_pred)
    print('Init acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(acc, nmi, ari))
    probs = feat2prob(torch.from_numpy(feats),
                      torch.from_numpy(kmeans.cluster_centers_))
    return kmeans.cluster_centers_, probs
예제 #3
0
파일: evaluation.py 프로젝트: Ubastic/DAEGC
def eva(y_true, y_pred, epoch=0):
    acc, f1 = cluster_acc(y_true, y_pred)
    nmi = nmi_score(y_true, y_pred, average_method="arithmetic")
    ari = ari_score(y_true, y_pred)
    print(
        f"epoch {epoch}:acc {acc:.4f}, nmi {nmi:.4f}, ari {ari:.4f}, f1 {f1:.4f}"
    )
    return acc, nmi, ari, f1
예제 #4
0
def eva(y_true, y_pred, epoch=0, pp=True, name=None, path=None):
    acc, f1 = cluster_acc(y_true, y_pred, name=name, path=path)
    nmi = nmi_score(y_true, y_pred, average_method='arithmetic')
    #nmi = np.round(metrics.normalized_mutual_info_score(y_true, y_pred), 5)
    ari = ari_score(y_true, y_pred)
    if pp:
        print(epoch, ':acc {:.4f}'.format(acc), ', nmi {:.4f}'.format(nmi),
              ', ari {:.4f}'.format(ari))
    return acc, nmi, ari
예제 #5
0
def eva(y_true, y_pred, epoch=0):
    acc, f1 = cluster_acc(y_true, y_pred)
    nmi = nmi_score(y_true, y_pred, average_method='arithmetic')
    ari = ari_score(y_true, y_pred)
    print(epoch, ':acc {:.4f}'.format(acc), ', nmi {:.4f}'.format(nmi), ', ari {:.4f}'.format(ari),
            ', f1 {:.4f}'.format(f1))
    with open('DGSCN_wiki_oversmooth.txt','a') as overfile:
        overfile.write(str(acc))
        overfile.write('\n')

    return acc,nmi,ari,f1
예제 #6
0
파일: evaluation.py 프로젝트: HeXiax/AGCC
def eva(y_true, y_pred, epoch=0):
    acc, f1 = cluster_acc(y_true, y_pred)
    #nmi = nmi_score(y_true, y_pred, average_method='arithmetic')
    nmi = nmi_score(y_true, y_pred)
    ari = ari_score(y_true, y_pred)
    # print(epoch, ':acc {:.4f}'.format(acc), ', nmi {:.4f}'.format(nmi), ', ari {:.4f}'.format(ari),
    #         ', f1 {:.4f}'.format(f1))
    acc = format(acc, '.4f')
    nmi = format(nmi, '.4f')
    ari = format(ari, '.4f')
    f1 = format(f1, '.4f')
    return acc, nmi, ari, f1
예제 #7
0
def test(model, test_loader, args):
    model.eval()
    preds = np.array([])
    targets = np.array([])
    for batch_idx, (x, label, _) in enumerate(tqdm(test_loader)):
        x, label = x.to(device), label.to(device)
        output1, output2, _ = model(x)
        if args.head == 'head1':
            output = output1
        else:
            output = output2
        _, pred = output.max(1)
        targets = np.append(targets, label.cpu().numpy())
        preds = np.append(preds, pred.cpu().numpy())
    acc, nmi, ari = cluster_acc(targets.astype(int),
                                preds.astype(int)), nmi_score(
                                    targets, preds), ari_score(targets, preds)
    print('Test acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(acc, nmi, ari))
예제 #8
0
def test(model, eval_loader, args):
    model.eval()
    targets = np.zeros(len(eval_loader.dataset)) 
    y_pred = np.zeros(len(eval_loader.dataset)) 
    probs= np.zeros((len(eval_loader.dataset), args.n_clusters))
    for _, (x, _, label, idx) in enumerate(eval_loader):
        x = x.to(device)
        _, feat = model(x)
        prob = feat2prob(feat, model.center)
        #  prob = F.softmax(logit, dim=1)
        idx = idx.data.cpu().numpy()
        y_pred[idx] = prob.data.cpu().detach().numpy().argmax(1)
        targets[idx] = label.data.cpu().numpy()
        probs[idx, :] = prob.cpu().detach().numpy()
    # evaluate clustering performance
    y_pred = y_pred.astype(np.int64)
    acc, nmi, ari = cluster_acc(targets, y_pred), nmi_score(targets, y_pred), ari_score(targets, y_pred)
    print('Test acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(acc, nmi, ari))
    probs = torch.from_numpy(probs)
    return acc, nmi, ari, probs
예제 #9
0
def test(model, test_loader, args):
    model.eval()
    preds = np.array([])
    targets = np.array([])
    feats = np.zeros((len(test_loader.dataset), args.n_clusters))
    probs = np.zeros((len(test_loader.dataset), args.n_clusters))
    for batch_idx, (x, label, idx) in enumerate(tqdm(test_loader)):
        x, label = x.to(device), label.to(device)
        feat = model(x)
        prob = feat2prob(feat, model.center)
        _, pred = prob.max(1)
        targets = np.append(targets, label.cpu().numpy())
        preds = np.append(preds, pred.cpu().numpy())
        idx = idx.data.cpu().numpy()
        feats[idx, :] = feat.cpu().detach().numpy()
        probs[idx, :] = prob.cpu().detach().numpy()
    acc, nmi, ari = cluster_acc(targets.astype(int),
                                preds.astype(int)), nmi_score(
                                    targets, preds), ari_score(targets, preds)
    print('Test acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(acc, nmi, ari))
    probs = torch.from_numpy(probs)
    return acc, nmi, ari, probs
예제 #10
0
def train_idec():

    model = IDEC(n_enc_1=500,
                 n_enc_2=500,
                 n_enc_3=1000,
                 n_dec_1=1000,
                 n_dec_2=500,
                 n_dec_3=500,
                 n_input=args.n_input,
                 n_z=args.n_z,
                 n_clusters=args.n_clusters,
                 alpha=1.0,
                 pretrain_path=args.pretrain_path).to(device)

    #  model.pretrain('data/ae_mnist.pkl')
    model.pretrain()

    train_loader = DataLoader(dataset,
                              batch_size=args.batch_size,
                              shuffle=False)
    optimizer = Adam(model.parameters(), lr=args.lr)

    # cluster parameter initiate
    data = dataset.x
    y = dataset.y
    data = torch.Tensor(data).to(device)
    x_bar, hidden = model.ae(data)

    kmeans = KMeans(n_clusters=args.n_clusters, n_init=20)
    y_pred = kmeans.fit_predict(hidden.data.cpu().numpy())
    nmi_k = nmi_score(y_pred, y)
    print("nmi score={:.4f}".format(nmi_k))

    hidden = None
    x_bar = None

    y_pred_last = y_pred
    model.cluster_layer.data = torch.tensor(kmeans.cluster_centers_).to(device)

    model.train()
    for epoch in range(100):

        if epoch % args.update_interval == 0:

            _, tmp_q = model(data)

            # update target distribution p
            tmp_q = tmp_q.data
            p = target_distribution(tmp_q)

            # evaluate clustering performance
            y_pred = tmp_q.cpu().numpy().argmax(1)
            delta_label = np.sum(y_pred != y_pred_last).astype(
                np.float32) / y_pred.shape[0]
            y_pred_last = y_pred

            acc = cluster_acc(y, y_pred)
            nmi = nmi_score(y, y_pred)
            ari = ari_score(y, y_pred)
            print('Iter {}'.format(epoch), ':Acc {:.4f}'.format(acc),
                  ', nmi {:.4f}'.format(nmi), ', ari {:.4f}'.format(ari))

            if epoch > 0 and delta_label < args.tol:
                print('delta_label {:.4f}'.format(delta_label), '< tol',
                      args.tol)
                print('Reached tolerance threshold. Stopping training.')
                break
        for batch_idx, (x, _, idx) in enumerate(train_loader):

            x = x.to(device)
            idx = idx.to(device)

            x_bar, q = model(x)

            reconstr_loss = F.mse_loss(x_bar, x)
            kl_loss = F.kl_div(q.log(), p[idx])
            loss = args.gamma * kl_loss + reconstr_loss

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
예제 #11
0
def agc(nx_graph,
        adj,
        features,
        targets_id,
        classes,
        start_time,
        flag,
        stop_threshold=-0.06):
    '''
    Parameters
    ----------
    nx_graph:    network类型的图数据
    adj:         邻接矩阵
    features:    特征矩阵
    targets_id:  label
    classes:     类别数
    stop_threshold: 类间距变化的停止与阈值
    return
    ----------
    predict_C:  预测结果
    epoch:      epoch阶
    '''
    if flag == 1:
        adj_matrix, feat_matrix = get_matrix(adj,
                                             features,
                                             self_loop=self_loop)
    else:
        adj_matrix = adj
        feat_matrix = features
        features = sp.csc_matrix(features)
        adj = sp.csc_matrix(adj)
    matrix_A = sp.csr_matrix(adj_matrix)
    dim_A = matrix_A.shape  # (3312, 3312)

    # 转换度矩阵 matrix_D
    matrix_D = np.zeros(dim_A)
    degree_table = nx.degree(nx_graph)  # len = 3312
    ind = 0
    for degree_item in nx.degree(nx_graph):
        if degree_item[0] in nx_graph.nodes:
            matrix_D[ind][ind] = degree_item[1]
            ind += 1
        else:
            logging.info('miss a item ->', degree_item)

    # 归一化拉普拉斯算子matrix_Ls
    #   求矩阵的-1/2次方
    #   v 为特征值    Q 为特征向量
    matrix_D = sp.csr_matrix(matrix_D)
    # v, Q = la.eigs(matrix_D)
    # V = sp.diags(v**(-0.5))
    # matrix_D_neg_1_2 = Q.dot(V).dot(la.inv(sp.csr_matrix(Q)))
    matrix_D_neg_1_2 = sp.csr_matrix.power(matrix_D, -0.5)
    matrix_Ls = np.identity(
        dim_A[0]) - matrix_D_neg_1_2.dot(matrix_A).dot(matrix_D_neg_1_2)

    # features单位化 影响不大
    if normal_feature:
        features_dense = features.todense()
        deno = np.repeat(np.sqrt(
            np.sum(np.multiply(features_dense, features_dense), axis=1)),
                         features_dense.shape[1],
                         axis=1)
        features_normal = np.multiply(features_dense, 1.0 / deno)

    x_hat = matrix_X = features.todense()  # features_normal
    coefficient = np.identity(dim_A[0]) - 1 / 2 * matrix_Ls

    max_iter = 140
    predict_C = []
    tmp_intra0 = tmp_intra1 = 1e8
    t = 0
    while t <= max_iter:
        #计算时差
        logging.info("iter: " + str(t) + ", at: " +
                     str(time.time() - start_time) + " s")

        t = t + 1
        # k = t
        # x_hat = (coefficient ** k).dot(matrix_X)

        x_hat = coefficient.dot(x_hat)
        matrix_K = x_hat.dot(x_hat.T)

        matrix_W = 1 / 2 * (np.abs(matrix_K) + np.abs(matrix_K.T))

        matrix_W = matrix_W / (np.max(matrix_W))
        label_pred = SpectralClustering(
            n_clusters=classes,
            gamma=0,
            affinity='precomputed',  # 改输入为亲和矩阵
            n_init=15,
            n_jobs=4,
            # kernel_params= matrix_K,
            assign_labels='kmeans',  # discretize
        ).fit_predict(matrix_W)
        print('label_pred', label_pred)
        print('targets_id', targets_id)

        # 换位簇配对
        confusion_matrix = np.zeros([classes, classes])
        tmp_dict0 = np.ones(targets_id.shape)
        tmp_label_pred = 100 * np.ones_like(label_pred)
        for i in range(classes):
            for j in range(classes):
                confusion_matrix[i, j] = np.sum(tmp_dict0[np.where(
                    (targets_id == i) * (label_pred == j))])
        # print(confusion_matrix)
        diag_max = np.sum(np.diag(confusion_matrix))

        tmp_inds = list(range(classes))

        for _ in range(5):
            for i in range(classes):
                for j in range(classes):
                    confusion_matrix[[j, i], :] = confusion_matrix[[i, j], :]
                    if np.sum(np.diag(confusion_matrix)) < diag_max:
                        confusion_matrix[[i, j], :] = confusion_matrix[
                            [j, i], :]
                    else:
                        diag_max = np.sum(np.diag(confusion_matrix))
                        tmp_inds[i], tmp_inds[j] = tmp_inds[j], tmp_inds[i]
                        # logging.info(diag_max)

        for i in range(classes):
            tmp_label_pred[np.where(label_pred == i)] = tmp_inds[i]
        print('tmp_label_pred', tmp_label_pred)

        # 指标评价
        logging.info('k: {}'.format(t))
        # 指标评价
        F1_RESULT = metrics.f1_score(targets_id,
                                     tmp_label_pred,
                                     average='macro')
        logging.info('F1_: {}%'.format(F1_RESULT * 100))
        acc_RESULT = metrics.accuracy_score(targets_id, tmp_label_pred)
        with open('agc_wiki_oversmooth.txt', 'a') as overfile:
            overfile.write(str(acc_RESULT))
            overfile.write('\n')
        logging.info('acc: {}%'.format(acc_RESULT * 100))
        NMI_RESULT = metrics.normalized_mutual_info_score(
            targets_id, tmp_label_pred, average_method='arithmetic')
        logging.info('NMI: {}%'.format(NMI_RESULT * 100))
        ari_RESULT = ari_score(targets_id, tmp_label_pred)
        logging.info('ari: {}%'.format(ari_RESULT * 100))

        tmp_intra0 = tmp_intra1
        tmp_intra1 = intra(tmp_label_pred, x_hat, classes)
        d_intra = tmp_intra1 - tmp_intra0
        logging.info('d_intra: {}'.format(d_intra))

        # if d_intra > stop_threshold:
        #     break

        predict_C = tmp_label_pred

    return predict_C, t - 1
예제 #12
0
def estimate_k(model, unlabeled_loader, labeled_loaders, args):
    u_num = len(unlabeled_loader.dataset)
    u_targets = np.zeros(u_num)
    u_feats = np.zeros((u_num, 1024))
    print('extracting features for unlabeld data')
    for _, (x, _, label, idx) in enumerate(unlabeled_loader):
        x = x.to(device)
        _, feat = model(x)
        feat = feat.view(x.size(0), -1)
        idx = idx.data.cpu().numpy()
        u_feats[idx, :] = feat.data.cpu().numpy()
        u_targets[idx] = label.data.cpu().numpy()
    cand_k = np.arange(args.max_cand_k)
    #get acc for labeled data with short listed k
    best_ks = np.zeros(len(omniglot_background_val_alphabets))
    print('extracting features for labeld data')
    for alphabetStr in omniglot_background_val_alphabets:
        labeled_loader = labeled_loaders[alphabetStr]
        args.num_val_cls = labeled_loader.num_classes

        l_num = len(labeled_loader.dataset)
        l_targets = np.zeros(l_num)
        l_feats = np.zeros((l_num, 1024))
        for _, (x, _, label, idx) in enumerate(labeled_loader):
            x = x.to(device)
            _, feat = model(x)
            feat = feat.view(x.size(0), -1)
            idx = idx.data.cpu().numpy()
            l_feats[idx, :] = feat.data.cpu().numpy()
            l_targets[idx] = label.data.cpu().numpy()

        l_classes = set(l_targets)
        num_lt_cls = int(round(len(l_classes) * args.split_ratio))
        lt_classes = set(random.sample(l_classes, num_lt_cls))
        lv_classes = l_classes - lt_classes

        lt_feats = np.empty((0, l_feats.shape[1]))
        lt_targets = np.empty(0)
        for c in lt_classes:
            lt_feats = np.vstack((lt_feats, l_feats[l_targets == c]))
            lt_targets = np.append(lt_targets, l_targets[l_targets == c])

        lv_feats = np.empty((0, l_feats.shape[1]))
        lv_targets = np.empty(0)
        for c in lv_classes:
            lv_feats = np.vstack((lv_feats, l_feats[l_targets == c]))
            lv_targets = np.append(lv_targets, l_targets[l_targets == c])

        cvi_list = np.zeros(len(cand_k))
        acc_list = np.zeros(len(cand_k))
        cat_pred_list = np.zeros([len(cand_k), u_num + l_num])
        print('estimating K ...')
        for i in range(len(cand_k)):
            cvi_list[i], cat_pred_i = labeled_val_fun(
                np.concatenate((lv_feats, u_feats)), lt_feats, lt_targets,
                cand_k[i] + args.num_val_cls)
            cat_pred_list[i, :] = cat_pred_i
            acc_list[i] = cluster_acc(
                lv_targets,
                cat_pred_i[len(lt_targets):len(lt_targets) + len(lv_targets)])
        idx_cvi = np.max(np.argwhere(cvi_list == np.max(cvi_list)))
        idx_acc = np.max(np.argwhere(acc_list == np.max(acc_list)))

        idx_best = int(math.ceil((idx_cvi + idx_acc) * 1.0 / 2))
        cat_pred = cat_pred_list[idx_best, :]
        cnt_cat = Counter(cat_pred.tolist())
        cnt_l = Counter(cat_pred[:l_num].tolist())
        cnt_ul = Counter(cat_pred[l_num:].tolist())
        bin_cat = [x[1] for x in sorted(cnt_cat.items())]
        bin_l = [x[1] for x in sorted(cnt_l.items())]
        bin_ul = [x[1] for x in sorted(cnt_ul.items())]
        expectation = u_num * 1.0 / (cand_k[idx_best] + args.num_val_cls)
        best_k = np.sum(
            np.array(bin_ul) /
            np.max(bin_ul).astype(float) > args.min_max_ratio)
        print('current best K {}'.format(best_k))
        i_alpha = omniglot_background_val_alphabets.index(alphabetStr)
        best_ks[i_alpha] = best_k
    best_k = np.ceil(np.mean(best_ks)).astype(np.int32)
    kmeans = KMeans(n_clusters=best_k)
    u_pred = kmeans.fit_predict(u_feats).astype(np.int32)
    acc, nmi, ari = cluster_acc(u_targets, u_pred), nmi_score(
        u_targets, u_pred), ari_score(u_targets, u_pred)
    print('Final K {}, acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(
        best_k, acc, nmi, ari))
    return best_k
예제 #13
0
def cluster_evaluate(y_true, y_pred, alg=0):
    acc, f1 = cluster_acc(y_true, y_pred)
    nmi = nmi_score(y_true, y_pred, average_method='arithmetic')
    ari = ari_score(y_true, y_pred)
    print(alg, ':acc {:.4f}'.format(acc), ', nmi {:.4f}'.format(nmi),
          ', ari {:.4f}'.format(ari), ', f1 {:.4f}'.format(f1))
예제 #14
0
def estimate_k(model, unlabeled_loader, labeled_loader, args):
    u_num = len(unlabeled_loader.dataset)
    u_targets = np.zeros(u_num)
    u_feats = np.zeros((u_num, 512))
    print('extracting features for unlabeld data')
    for _, (x, label, idx) in enumerate(tqdm(unlabeled_loader)):
        x = x.to(device)
        _, feat = model(x)
        feat = feat.view(x.size(0), -1)
        idx = idx.data.cpu().numpy()
        u_feats[idx, :] = feat.data.cpu().numpy()
        u_targets[idx] = label.data.cpu().numpy()
    cand_k = np.arange(args.max_cand_k)
    #get acc for labeled data with short listed k
    l_num = len(labeled_loader.dataset)
    l_targets = np.zeros(l_num)
    l_feats = np.zeros((l_num, 512))
    print('extracting features for labeld data')
    for _, (x, label, idx) in enumerate(tqdm(labeled_loader)):
        x = x.to(device)
        _, feat = model(x)
        feat = feat.view(x.size(0), -1)
        idx = idx.data.cpu().numpy()
        l_feats[idx, :] = feat.data.cpu().numpy()
        l_targets[idx] = label.data.cpu().numpy()

    l_classes = set(l_targets)
    num_lt_cls = int(round(len(l_classes) * args.split_ratio))
    lt_classes = set(random.sample(
        l_classes,
        num_lt_cls))  #random sample 5 classes from all labeled classes
    lv_classes = l_classes - lt_classes

    lt_feats = np.empty((0, l_feats.shape[1]))
    lt_targets = np.empty(0)
    for c in lt_classes:
        lt_feats = np.vstack((lt_feats, l_feats[l_targets == c]))
        lt_targets = np.append(lt_targets, l_targets[l_targets == c])

    lv_feats = np.empty((0, l_feats.shape[1]))
    lv_targets = np.empty(0)
    for c in lv_classes:
        lv_feats = np.vstack((lv_feats, l_feats[l_targets == c]))
        lv_targets = np.append(lv_targets, l_targets[l_targets == c])

    cvi_list = np.zeros(len(cand_k))
    acc_list = np.zeros(len(cand_k))
    cat_pred_list = np.zeros([len(cand_k), u_num + l_num])
    print('estimating K ...')
    for i in range(len(cand_k)):
        cvi_list[i], cat_pred_i = labeled_val_fun(
            np.concatenate((lv_feats, u_feats)), lt_feats, lt_targets,
            cand_k[i] + args.num_val_cls)
        cat_pred_list[i, :] = cat_pred_i
        acc_list[i] = cluster_acc(
            lv_targets,
            cat_pred_i[len(lt_targets):len(lt_targets) + len(lv_targets)])
        best_k = get_best_k(cvi_list[:i + 1], acc_list[:i + 1],
                            cat_pred_list[:i + 1], l_num)
        print('current best K {}'.format(best_k))
    kmeans = KMeans(n_clusters=best_k)
    u_pred = kmeans.fit_predict(u_feats).astype(np.int32)
    acc, nmi, ari = cluster_acc(u_targets, u_pred), nmi_score(
        u_targets, u_pred), ari_score(u_targets, u_pred)
    print('Final K {}, acc {:.4f}, nmi {:.4f}, ari {:.4f}'.format(
        best_k, acc, nmi, ari))
    return best_k
예제 #15
0
def main():
    """
    """
    args = parameter_parser()
    tab_printer(args)

    misc.create_directory('./_out/')
    misc.setup_logger('agc', args, './_out/')
    logger = logging.getLogger()
    logging.getLogger().setLevel(logging.INFO)

    if args.dataset in ['citeseer', 'cora', 'pubmed']:
        nx_graph, adj, features, targets_id, classes = load_data(args.dataset)
    elif args.dataset in ['dblp', 'reut', 'acm']:
        if args.dataset == 'reut':
            nx_graph, adj, features, targets_id, classes = exp_load_data(
                args.dataset, 3)
        else:
            nx_graph, adj, features, targets_id, classes = exp_load_data(
                args.dataset, None)
    elif args.dataset in ['wiki']:
        nx_graph = nx.from_edgelist(
            pd.read_csv('./data/wiki.cites.csv').values.tolist())
        features_file = pd.read_csv('./data/wiki.content.csv')
        data = np.array(features_file["content"].values.tolist())
        x1 = np.array(features_file["x1"].values.tolist())
        x2 = np.array(features_file["x2"].values.tolist())

        features = sp.csc_matrix((data, (x1, x2)))
        logging.info('dataset massage:')
        logging.info('feature size: {}'.format(features.shape))

        # nodes, targets_id = misc.target_reader('./data/wiki.label.csv')
        tar_file = pd.read_csv('./data/wiki.label.csv')
        nodes = tar_file["node"].values.tolist()
        targets_id = np.array(tar_file["labelId"]).reshape(-1, 1).T[0]
        classes = 17

        adj = sp.csr_matrix(nx.adjacency_matrix(nx_graph))
    else:
        raise Exception("dataset import error.")
    # logging.info(features)     # 三元组形式
    # logging.info(targets)
    # logging.info(node)
    # logging.info(graph.adj)      # 邻接表
    # logging.info(nx.adjacency_matrix(graph))  # 临阶矩阵    ( .todense()转矩阵形式 )
    # logging.info(nx.degree(graph))   # 每个点的度

    start_time = time.time()
    logging.info("Timing begin")

    # agc
    if args.dataset in ['citeseer', 'cora', 'pubmed', 'wiki']:
        predict_C, epoch = agc(nx_graph, adj, features, targets_id, classes,
                               start_time, 1)
    if args.dataset in ['dblp', 'reut', 'acm']:
        predict_C, epoch = agc(nx_graph, adj, features, targets_id, classes,
                               start_time, 0)

    # answer
    logging.info('Best Clustering:')
    logging.info(predict_C)
    logging.info('k: {}'.format(epoch))
    # 指标评价
    F1_RESULT = metrics.f1_score(targets_id, predict_C, average='macro')
    logging.info('F1_: {}%'.format(F1_RESULT * 100))
    acc_RESULT = metrics.accuracy_score(targets_id, predict_C)
    logging.info('acc: {}%'.format(acc_RESULT * 100))
    NMI_RESULT = metrics.normalized_mutual_info_score(
        targets_id, predict_C, average_method='arithmetic')
    logging.info('NMI: {}%'.format(NMI_RESULT * 100))
    ari_RESULT = ari_score(targets_id, predict_C)
    logging.info('ari: {}%'.format(ari_RESULT * 100))