/
starbucks_clustering.py
64 lines (50 loc) · 1.8 KB
/
starbucks_clustering.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
from sklearn.cluster import AgglomerativeClustering
import math
import numpy as np
from scipy.cluster import hierarchy
import matplotlib.pyplot as plt
from scipy.spatial.distance import pdist, squareform
points = np.array([[1,5], [1,7], [2,6], [2,9], [3,6], [3,8], [3,8], [3,9], [4,8], [4,8], [3,3], [5,4], [7,2], [5,7], [4,5], [6,9], [7,3], [7,9], [8,1], [8,7]])
#This is the condensed matrix of distance between the points:
'''dc = pdist(points)
sf = squareform(dc)
print('This is the distance condensed matrix:')
#print(dc)
print(sf)
'''
for i in range(0, len(points)):
P = points[i]
j = i
while (j<len(points)):
Q = points[j]
#print(str(P)+" "+str(Q))
print(round(math.sqrt(math.pow((P[0]-Q[0]),2) + math.pow((P[1]-Q[1]),2)),2))
j+=1
x = np.array([3,2,5,6,5,12,10,8,7,2,5])
y = np.array([7,6,8,6,5,8,6,4,3,2,2])
plt.scatter(x, y)
fig = plt.gcf()
fig.canvas.set_window_title('Scatter Plot of points')
#plt.show()
#Fitting the data in Agg Clustering, can use this in future.
clustering = AgglomerativeClustering().fit(points)
AgglomerativeClustering(affinity='euclidean', compute_full_tree='auto',
connectivity=None, distance_threshold=None,
linkage='single', memory=None, n_clusters=1,
pooling_func='deprecated')
S= hierarchy.linkage(dc, 'single')
sdn = hierarchy.dendrogram(S)
fig = plt.gcf()
fig.canvas.set_window_title('Single-Linkage Clustering')
#plt.show()
A = hierarchy.linkage(dc, 'average')
adn = hierarchy.dendrogram(A)
fig = plt.gcf()
fig.canvas.set_window_title('Average Linkage Clustering')
#plt.show()
C = hierarchy.linkage(dc, 'complete')
cdn = hierarchy.dendrogram(C)
fig = plt.gcf()
fig.canvas.set_window_title('Complete Linkage Clustering')
#plt.show()
hierarchy.set_link_color_palette(None) # reset to default after use