コード例 #1
0
# method = 3

start_cluster = 2
end_cluster = 10
np_cluster = np.array([i for i in range(start_cluster, end_cluster + 1, 1)])

# Show Method List
print('<---1. Use the calinski_harabaz_score to evaluate--->')
print('<---2. Use the calculate_by_deviation to evaluate--->')
print('<---3. Use the silhouette_score to evaluate--->')
print('<---4. Show Cluster Scatter--->')
method = input('<---Please Choose--->: ')

for i in range(1, 7, 1):
    # Load data and reduced the dimension
    org_data, org_label = LoadData.get_split_original_data(i)

    reduced_data = np.array([])
    if algorithm == 'PCA' or 'pca':
        reduced_data = ra.pca(org_data, dim)
    elif algorithm == 'Isomap' or 'isomap':
        reduced_data = ra.isomap(org_data, dim)
    elif algorithm == 'tSNE':
        reduced_data = ra.tsne(org_data, dim)
    else:
        print('<---None dimension reduced--->')

    normalized_data = Normalize.normalization(reduced_data)

    evaluated_scores = np.array([])
    values = np.array([])
コード例 #2
0
elif method == '2':
    dim = 3
    all_label = ['C1', 'C2', 'C3', 'C4', 'C5', 'C6']
    # cluster_num = {'C1': 6, 'C2': 5, 'C3': 5, 'C4': 5, 'C5': 5, 'C6': 4}
    # cluster_num = {'C1': 0, 'C2': 2, 'C3': 2, 'C4': 2, 'C5': 0, 'C6': 0}
    cluster_num = {'C1': 2, 'C2': 2, 'C3': 2, 'C4': 2, 'C5': 2, 'C6': 2}
    header = ['Dim1', 'Dim2', 'Dim3', 'Label']
    pd_data = pd.DataFrame()
    np_data = np.array([])
    nn_category = np.array([])

    for element in all_label:
        if cluster_num[element] == 0:
            nn_category = np.append(nn_category, element)
            # 讀檔, 降維, 正規化
            org_data, org_label = LoadData.get_split_original_data(element[1])
            org_label = np.array(['C' + str(i) for i in org_label])
            reduced_data = ra.tsne(org_data, dim)
            normalized_data = Normalize.normalization(reduced_data).astype(
                'float64')
            np_tmp = np.concatenate(
                (normalized_data, org_label.reshape(-1, 1)), axis=1)
            pd_tmp = pd.DataFrame(np_tmp, columns=header)
            pd_data = pd.concat((pd_data, pd_tmp), axis=0)

        else:
            for num in range(cluster_num[element]):
                nn_category = np.append(nn_category, element + '_' + str(num))
                normalized_data, org_label = LoadData.get_refactor_data(
                    element, num)
                # print(normalized_data, normalized_data.shape)