예제 #1
0
def cluster_cost(Data, k, medoids, cluster_assignments):
    price = np.zeros(k)
    for i in range(0, k):
        x = Data[cluster_assignments == i, :]
        for j in range(0, np.size(x, 0)):
            cost = kh.euclid_distance(x[j, :], medoids[i, :])
            price[i] += cost
    return price
예제 #2
0
def cluster(k, Data, medoids):
    cluster_assignments = np.empty(np.size(Data, 0))
    for i in range(np.size(Data, 0)):
        dist = np.empty(k)
        for j in range(k):
            x1 = Data[i, :]
            x2 = medoids[j]
            dist[j] = kh.euclid_distance(x1, x2)

        cluster_assignments[i] = dist.argmin()
    return cluster_assignments
예제 #3
0
def find_greatest_dist(cluster_assignments, Data, medoids):
    combined_array = np.empty(shape=(Data.shape[0], 2), dtype=object)
    greatest_dist = np.zeros(shape=(k, 2), dtype=float)
    dist = np.empty(shape=Data.shape[0])
    for i in range(len(combined_array)):
        combined_array[i] = [cluster_assignments[i], Data[i, :]]
        x1 = Data[i, :]
        currentMedoid = int(combined_array[i, 0])
        x2 = medoids[currentMedoid]
        dist[i] = kh.euclid_distance(x1, x2)

        if greatest_dist[currentMedoid, 0] <= dist[i]:
            greatest_dist[currentMedoid] = [float(dist[i]), int(i)]

    # greatest_dist[:, 0] is the distance and greatest_dist[:, 1] is the id where it came from
    return greatest_dist