Ejemplo n.º 1
0
def te_rand(dataname):
    #加载DATASET
    features, Amatrix, labels = data_loader.load_which(dataname)
    features = np.array(features.astype("int"))
    Amatrix = np.array(Amatrix.astype("int"))
    labels = np.array(labels.astype("int"))
    [n, k] = labels.shape
    labels_true = np.argmax(labels, axis=1)  #从one-hot计算真实label

    nmi_list = []
    acc_list = []
    [size, d] = features.shape
    nmi_times = 15
    for i in range(nmi_times):
        #print(i)
        label = rand_deal(dataname, features, Amatrix, labels)

        A = np.array(label)  #将输出label转nparray
        nmi = metrics.normalized_mutual_info_score(A, labels_true)
        acc = Acc_calculator.use_acc(labels_true, A)
        # print(A)

        # print(nmi)
        nmi_list.append(nmi)
        acc_list.append(acc)
    meani = np.mean(nmi_list)
    vari = np.var(nmi_list)
    # print(str(meani)+'+'+str(vari))
    mean_nmi = round(float(meani), 4)
    mean_acc = round(float(np.mean(acc_list)), 4)
    print(str(mean_acc) + '\t' + dataname + '\t' + str(mean_nmi))
Ejemplo n.º 2
0
def write_excel_neigh(aim_file):
    # 创建一个workbook 设置编码
    workbook = xlwt.Workbook(encoding='utf-8')
    # 创建一个worksheet
    worksheet_nmi = workbook.add_sheet('result_nmi')
    worksheet_mean_nmi = workbook.add_sheet('mean_nmi')
    worksheet_var_nmi = workbook.add_sheet('var_nmi')
    worksheet_acc = workbook.add_sheet('result_acc')
    worksheet_mean_acc = workbook.add_sheet('mean_acc')
    worksheet_var_acc = workbook.add_sheet('var_acc')
    name_list = name_list = [
        'cornell',
        'texas',
        'washington',
        'wisconsin',
        'TerrorAttack',
        'cora',
        'citeseer',
        'Pubmed_small',
    ]
    neigh_list = [
        5,
        10,
        15,
        20,
        25,
        30,
        35,
        40,
    ]
    j = 0
    for dataname in name_list:
        j = j + 1
        worksheet_nmi.write(0, j, label=dataname)  # 参数对应 行, 列, 值
        worksheet_mean_nmi.write(0, j, label=dataname)  # 参数对应 行, 列, 值
        worksheet_var_nmi.write(0, j, label=dataname)  # 参数对应 行, 列, 值
        worksheet_acc.write(0, j, label=dataname)  # 参数对应 行, 列, 值
        worksheet_mean_acc.write(0, j, label=dataname)  # 参数对应 行, 列, 值
        worksheet_var_acc.write(0, j, label=dataname)  # 参数对应 行, 列, 值
    i = 0

    for neigh in neigh_list:
        i = i + 1
        name_excel = 'neighbor_num=' + str(neigh)
        worksheet_nmi.write(i, 0, label=name_excel)
        worksheet_mean_nmi.write(i, 0, label=name_excel)  # 参数对应 行, 列, 值
        worksheet_var_nmi.write(i, 0, label=name_excel)  # 参数对应 行, 列, 值
        worksheet_acc.write(i, 0, label=name_excel)
        worksheet_mean_acc.write(i, 0, label=name_excel)  # 参数对应 行, 列, 值
        worksheet_var_acc.write(i, 0, label=name_excel)  # 参数对应 行, 列, 值
    j = 0
    for dataname in name_list:
        plt_acc = []
        plt_nmi = []
        plt_vcc = []
        plt_vmi = []
        j = j + 1
        i = 0
        features, Amatrix, labels = data_loader.load_which(dataname)

        labels = np.array(labels.astype("float32"))

        labels_true = np.argmax(labels, axis=1).tolist()  # 从one-hot计算真实label
        for neigh in neigh_list:
            i = i + 1
            loss_list = []
            acc_list = []
            nmi_list = []
            name_excel = 'neighbor_num=' + str(neigh)
            path = aim_file + '/' + dataname
            name_path = path + '/' + dataname + '_' + name_excel + 'times'
            for comp in range(3):
                read_pa = name_path + str(comp) + '.mat'
                print(read_pa)
                try:
                    data = scio.loadmat(read_pa)
                    l1 = (data['mi'])[0].tolist()
                    nmi = metrics.normalized_mutual_info_score(l1, labels_true)
                    acc = Acc_calculator.use_acc(labels_true, l1)
                    nmi_list.append(nmi)
                    acc_list.append(acc)
                except:
                    #print(dataname+' do not have times '+str(comp))
                    print('')
            mean_nmi = round(float(np.mean(nmi_list)), 4)
            var_nmi = round(float(np.var(nmi_list)), 4)
            mean_acc = round(float(np.max(acc_list)), 4)
            var_acc = round(float(np.var(acc_list)), 4)
            str_m_nmi = str(mean_nmi)
            str_v_nmi = str(var_nmi)
            excel_nmi = str_m_nmi + '+' + str_v_nmi
            str_m_acc = str(mean_acc)
            str_v_acc = str(var_acc)
            excel_acc = str_m_acc + '+' + str_v_acc

            worksheet_nmi.write(i, j, label=excel_nmi)
            worksheet_mean_nmi.write(i, j, label=str_m_nmi)  # 参数对应 行, 列, 值
            worksheet_var_nmi.write(i, j, label=str_v_nmi)  # 参数对应 行, 列, 值
            worksheet_acc.write(i, j, label=excel_acc)
            worksheet_mean_acc.write(i, j, label=str_m_acc)  # 参数对应 行, 列, 值
            worksheet_var_acc.write(i, j, label=str_v_acc)  # 参数对应 行, 列, 值

            plt_acc.append(mean_acc)
            plt_nmi.append(mean_nmi)

        plt_la = 'acc'
        plt_lb = 'nmi'
        plt.plot(neigh_list, plt_acc, label=plt_la)
        plt.plot(neigh_list, plt_nmi, label=plt_lb)
        plt.xlabel('neighbor_num')
        plt.ylabel('acc or nmi')
        plt.legend(loc='upper right')
        plt.title('acc and nmi on ' + dataname)
        pltname = aim_file + '/' + dataname + '_differ_neigh.png'
        plt.savefig(pltname)
        plt.cla()
    # 写入excel
    # 保存
    workbook.save(aim_file + '/Excel_result_fea.xls')
Ejemplo n.º 3
0
def te_EGCD(dataname, feat_map, neighbors, mask, use_h=False):
    #加载DATASET
    features, Amatrix, labels = data_loader.load_which(dataname)
    features = np.array(features.astype("float32"))
    Amatrix = np.array(Amatrix.astype("float32"))
    labels = np.array(labels.astype("float32"))
    [size, d] = features.shape
    labels_true = np.argmax(labels, axis=1)  #从one-hot计算真实label
    if (size == d):
        feat_map = 'none'
        mask = False
    #print(labels_true)
    if (neighbors != -1):
        path = 'result/' + dataname
    else:
        path = 'result_nofe/' + dataname

    if (os.path.exists(path) == False):
        os.mkdir(path)
    nmi_list = []
    acc_list = []
    name_part = '_' + 'neighbor_num=' + str(neighbors)

    if (size < 1000):
        nmi_times = 3
    elif (size < 3000):
        nmi_times = 3
    else:
        nmi_times = 2
    for i in range(nmi_times):
        mi, loss_ram, y_original, H = EGCD_similarity.use_EGCD(
            features,
            Amatrix,
            labels,
            feat_map,
            neighbors,
            mask,
            printloss=False)  #调用EGCD
        #绘制loss图
        result_mi = np.array(mi.detach())
        result_loss_ram = np.array(loss_ram)
        result_y_original = np.array(y_original.detach())
        result_H = np.array(H.detach())
        name = dataname + name_part + 'times' + str(i) + '.mat'
        file_path = path + '/' + name
        io.savemat(
            file_path, {
                'mi': result_mi,
                'loss_ram': result_loss_ram,
                'y_original': result_y_original,
                'H': result_H,
            })
        A = np.array(mi)  # 将输出label转nparray
        nmi = metrics.normalized_mutual_info_score(A, labels_true)
        acc = Acc_calculator.use_acc(labels_true, A)
        # print(A)
        # print('proposed on '+dataname+'_'+feat_map)
        prt = str(nmi) + ' and ' + str(acc)
        print(prt)
        nmi_list.append(nmi)
        acc_list.append(acc)
        loss_fin = (result_loss_ram[-1]).detach()
        loss_final = loss_fin.float()
        nmi = round(float(nmi), 4)
        acc = round(float(acc), 4)
        loss_final = round(float(loss_final), 4)
        plt_la = ' l:' + str(loss_final) + ' nmi:' + str(nmi) + ' acc:' + str(
            acc)
        plt.plot(loss_ram, label=plt_la)

        plt.xlabel('times')
        plt.ylabel('loss value')
        plt.legend(loc='upper right')
    meani = round(float(np.mean(nmi_list)), 4)
    vari = round(float(np.var(nmi_list)), 4)
    acci = round(float(np.mean(acc_list)), 4)
    vcci = round(float(np.var(acc_list)), 4)
    logfile = path + '/' + dataname + name_part + '.txt'
    file = open(logfile, 'w')
    nmi_title = str(meani) + '+' + str(vari)
    acc_title = str(acci) + '+' + str(vcci)
    file.write(str(meani) + '+' + str(vari))
    file.close()
    plt.title('Loss value on ' + dataname + name_part + '\n' + 'nmi:' +
              nmi_title + ' acc:' + acc_title)
    pltname = path + '/' + dataname + name_part + '.png'
    plt.savefig(pltname)
    plt.cla()