def new_centroids(data,centroids,clusters,clusters_id,num_of_iterations,num_of_clusters,iteration_count):
    #new_centroid = [[float(0) for _ in range(centroids.shape[1])] for _ in range(num_of_clusters)]
    new_centroid = []

    for i in range(len(clusters)):
        val = np.array(clusters[i])
        new_centroid.append((np.sum(val,0))/len(clusters[i]))
        
    new_centroid = np.array(new_centroid)
    sums = np.sum(np.array(centroids)-np.array(new_centroid))
    d = dict()
    if(sums==0 or iteration_count==iterations):
        print("Converged")
        for x in range(len(clusters_id)):
            for y in range(len(clusters_id[x])):
                d[clusters_id[x][y]] = x+1
        
        vals = [d[x] for x in sorted(d.keys())]
        ground_truth = list(map(int,data[:,1]))

        print("Jaccard")
        ja = helpers.jaccard(ground_truth,vals)
        print(ja)
        
        print("Rand index")
        rd = helpers.rand(ground_truth,vals)
        print(rd)
        
        unique_predicted = list(set(vals))
        new_x = helpers.pca(data[:,2:])
        helpers.scatter(new_x[:,0],new_x[:,1],vals,unique_predicted,"K-means Algorithm","iyer.txt")
    else:
        kmeans(data,new_centroid,iterations,no_cluster,iteration_count)
def new_centroids(reducedSpace, centroids, clusters, clusters_id,
                  num_of_iterations, num_clusters, iteration_count):
    new_centroid = []

    for i in range(len(clusters)):
        val = np.array(clusters[i])
        new_centroid.append((np.sum(val, 0)) / len(clusters[i]))

    new_centroid = np.array(new_centroid)
    sums = np.sum(np.array(centroids) - np.array(new_centroid))
    # sums = 0
    d = dict()
    if (sums == 0 or iteration_count == iterations):
        print("Converged")
        for x in range(len(clusters_id)):
            for y in range(len(clusters_id[x])):
                d[clusters_id[x][y]] = x + 1

        vals = [d[x] for x in sorted(d.keys())]
        vals = np.array(vals)
        print(vals)
        print(set(vals))
        # ground_truth = list(map(int,GeneExpressions[:,1]))

        print("Jaccard")
        ja = helpers.jaccard(Groundtruth, vals)
        print(ja)

        print("Rand index")
        rd = helpers.rand(Groundtruth, vals)
        print(rd)

        unique_predicted = list(set(vals))
        new_x = helpers.pca(GeneExpressions)
        helpers.scatter(new_x[:, 0], new_x[:, 1], vals, unique_predicted)

    else:
        kmeans(reducedSpace, new_centroid, iterations, num_clusters,
               iteration_count)
Esempio n. 3
0
min_pts = float(input("Enter min_pts value: "))

# 3. Perform DBSCAN
model = __dbscan.DBSCAN(X, epsilon, min_pts)
predicted = model.fit()
unique, counts = np.unique(predicted, return_counts=True)
print("Counts by cluster:")
for key, value in zip(unique, counts):
    print("{}: {}".format(key, value))

# 4. Find Rand index and Jaccard

rand_score = helpers.rand(y, predicted)
jaccard_score = helpers.jaccard(y, predicted)
unique_predicted = list(set(predicted))
print(predicted)
print(rand_score)
print(jaccard_score)

# print(adjusted_rand_score(y, predicted))
# print(jaccard_similarity_score(y, predicted))

# 5. Visualize using PCA
new_X = X
if X.shape[1] > 2:
    new_X = helpers.pca(X)

helpers.scatter(new_X[:, 0], new_X[:, 1], predicted, unique_predicted,
                "DBSCAN", file_name)
# Testing data
#distMatrix = [[0.00,0.71,5.66,3.61,4.24,3.20],[0.71,0.00,4.95,2.92,3.54,2.50],[5.66,4.95,0.00,2.24,1.41,2.50],[3.61,2.92,2.24,0.00,1.00,0.50],[4.24,3.54,1.41,1.00,0.00,1.12],[3.20,2.50,2.50,0.50,1.12,0.00]]
#rowsnumbers = [[0],[1],[2],[3],[4],[5]]
distMatrix = np.array(distMatrix)
#print(distMatrix)
while (len(distMatrix) >= 2):
    if (len(rowsnumbers) == k):
        break
    distMatrix, rowsnumbers = updateDistMatrix(distMatrix, rowsnumbers)
print(rowsnumbers)

clusterassignments = {}

cluster = 1
for i in rowsnumbers:
    for j in i:
        clusterassignments[j + 1] = cluster
    cluster += 1
sorted(clusterassignments)
sorted(Groundtruth)

predicted = list(clusterassignments.values())
unique_predicted = list(set(predicted))
Ground = list(Groundtruth.values())
print(h.jaccard(Ground, predicted))
print(h.rand(Ground, predicted))

new_X = h.pca(GeneExpressions)
h.scatter(new_X[:, 0], new_X[:, 1], predicted, unique_predicted)
Esempio n. 5
0
plt.gca().set_aspect('equal', adjustable='box')
plt.show(block=False)

input('Program paused. Press enter to continue.')

## =============== Part 2: Principal Component Analysis ===============
#  You should now implement PCA, a dimension reduction technique. You
#  should complete the code in pca.m
#
print('Running PCA on example dataset.\n')

#  Before running PCA, it is important to first normalize X
X_norm, mu, _ = featureNormalize(X)

#  Run PCA
U, S = pca(X_norm)

#  Compute mu, the mean of the each feature

#  Draw the eigenvectors centered at mean of data. These lines show the
#  directions of maximum variations in the dataset.
plt.hold(True)
drawLine(mu, mu + 1.5 * S[0, 0] * U[:, 0].T, c='k', linewidth=2)
drawLine(mu, mu + 1.5 * S[1, 1] * U[:, 1].T, c='k', linewidth=2)
plt.hold(False)

print('Top eigenvector: \n')
print(' U(:,1) = {:f} {:f} \n'.format(U[0, 0], U[1, 0]))
print('(you should expect to see -0.707107 -0.707107)')

input('Program paused. Press enter to continue.')