def dist_mean(data_set_1, data_set_2):
    sum_dis = 0
    for i in range(data_set_1.shape[0]):
        for j in range(data_set_2.shape[0]):
            sum_dis = sum_dis + hp.angDist(
                (data_set_1[i, :], data_set_2[j, :]))
    return sum_dis / (data_set_1.shape[0] * data_set_2.shape[0])
def dist_min(data_set_1, data_set_2):
    min_dis = 1
    for i in range(data_set_1.shape[0]):
        for j in range(data_set_2.shape[0]):
            dis_ij = hp.angDist(data_set_1[i, :], data_set_2[j, :])
            if dis_ij < min_dis:
                min_dis = dis_ij
    return min_dis
def generateCluster(data_set, clus_idx):
    clus_list = []
    for i in range(data_set.shape[0]):
        temp = []
        for j in range(len(clus_idx)):
            d = -1 * hp.angDist(data_set[i, :], data_set[clus_idx[j]])
            temp.append(d)
        c = clus_idx[temp.index(np.max(temp))]
        clus_list.append(c)
    return clus_list
def findRemotestIndex(data_mat, idx_list):
    remotest_idx = 0
    max_dist = 0
    for i in range(data_mat.shape[0]):
        min_dist = np.inf
        for j in range(len(idx_list)):
            if i == idx_list[j]:
                min_dist = -np.inf
                continue
            cur_dist = hp.angDist(data_mat[i, :], data_mat[idx_list[j]])
            if cur_dist < min_dist:
                min_dist = cur_dist
        if min_dist > max_dist:
            max_dist = min_dist
            remotest_idx = i
    return remotest_idx
def angKmeans(data_set, k, create_cent=randCenter, calc_mean=hp.orientationMean, max_iterate=50,
              min_error=0.0):
    copy_set = data_set.copy()
    m = copy_set.shape[0]
    cluster_condition = np.zeros((m, 2))
    centroids = create_cent(copy_set, k)
    ini_centroids = centroids.copy()
    cluster_changed = True
    iterate_count = 0
    old_error = 0
    while cluster_changed:
        iterate_count = iterate_count + 1
        cluster_changed = False
        for i in range(m):
            min_dist = np.inf
            min_index = -1
            for j in range(k):
                dist_ji = hp.angDist(centroids[j, :], copy_set[i, :])
                if dist_ji < min_dist:
                    min_dist = dist_ji
                    min_index = j
            if cluster_condition[i, 0] != min_index:
                cluster_changed = True
                cluster_condition[i, :] = min_index, min_dist ** 2
        max_dist = -np.inf
        max_index = -1
        for i in range(m):
            if cluster_condition[i, 1] > max_dist:
                max_dist = cluster_condition[i, 1]
                max_index = i
        for cent in range(k):
            pts_cluster = copy_set[np.nonzero(cluster_condition[:, 0] == cent)]
            if pts_cluster.shape[0] == 0:
                centroids[cent, :] = copy_set[max_index, :]
                print("empty slice happened")
                cluster_changed = True
            else:
                centroids[cent, :] = calc_mean(pts_cluster)
        if iterate_count >= max_iterate:
            cluster_changed = False
        if np.abs(np.sum(cluster_condition[:, 1]) / m - old_error) <= min_error:
            cluster_changed = False
        old_error = np.sum(cluster_condition[:, 1]) / m
        print("iterate round ", iterate_count, ", average error = ", np.sum(cluster_condition[:, 1]) / m)
    return ini_centroids, centroids, cluster_condition
def calcSimilarity(data_set, p_mode=0):
    similarity = np.mat(np.zeros((data_set.shape[0], data_set.shape[0])))
    for i in range(data_set.shape[0]):
        temp = np.mat(np.zeros((1, data_set.shape[0])))
        for j in range(data_set.shape[0]):
            s = -1 * hp.angDist(data_set[i, :], data_set[j, :])
            temp[0, j] = s
        similarity[i, :] = temp[0, :]
    if p_mode == 0:
        p = np.median(similarity.A)
    elif p_mode == -1:
        p = np.min(similarity)
    elif p_mode == 1:
        p = np.max(similarity)
    else:
        p = np.median(similarity)
    for i in range(similarity.shape[0]):
        similarity[i, i] = p
    return similarity
def neighbour(data_set, data, eps):
    nb_set = data_set.copy()
    for i in range(data_set.shape[0] - 1, -1, -1):
        if hp.angDist(data_set[i, :], data) > eps:
            nb_set = np.delete(nb_set, i, axis=0)
    return nb_set