def getEpsilon(self, dataSet): distances = [] sumOfDist = 0.0 for i in range(len(dataSet)): point = dataSet[i] for j in range(len(dataSet)): if i == j: continue pt = dataSet[j] dist = getEuclideanDist(point.x, point.y, pt.x, pt.y) distances.append(dist) distances.sort() sumOfDist += distances[7] distances = [] return sumOfDist/len(dataSet)
def dbscan(self, dataSet): clusters = [] visited = set() noise = set() # Iterate over data points for i in range(len(dataSet)): point = dataSet[i] if point in visited: continue visited.add(point) N = [] minPtsNeighbours = 0 # check which point satisfies minPts condition for j in range(len(dataSet)): if i==j: continue pt = dataSet[j] dist = getEuclideanDist(point.x, point.y, pt.x, pt.y) if dist <= self.e: minPtsNeighbours += 1 N.append(pt) if minPtsNeighbours >= self.minPts: cluster = set() cluster.add(point) point.isAssignedToCluster = True j = 0 while j < len(N): point1 = N[j] minPtsNeighbours1 = 0 N1 = [] if not point1 in visited: visited.add(point1) for l in range(len(dataSet)): pt = dataSet[l] dist = getEuclideanDist(point1.x, point1.y, pt.x, pt.y) if dist <= self.e: minPtsNeighbours1 += 1 N1.append(pt) if minPtsNeighbours1 >= self.minPts: self.removeDuplicates(N, N1) # Add point1 is not yet member of any other cluster then add it to cluster # Hint: use self.isAssignedToCluster function to check if a point is assigned to any clusters # ========================# # STRART YOUR CODE HERE # # ========================# def isAssignedToCluster(point, clusters): for cluster in clusters: for pt in cluster: if pt.x == point.x and pt.y == point.y: return True return False if not isAssignedToCluster(point1, clusters): cluster.add(point1) # ========================# # END YOUR CODE HERE # # ========================# j += 1 # add cluster to the list of clusters clusters.append(cluster) else: noise.add(point) # List clusters print("Number of clusters formed :" + str(len(clusters))) print("Noise points :" + str(len(noise))) # Calculate purity compute_purity(clusters,len(self.dataSet)) compute_NMI(clusters,self.noOfLabels) DataPoints.writeToFile(noise, clusters, "DBSCAN_"+ self.dataname + ".csv")