def get_eps(X, neigh=2): eps_dist = np.sort(calculate_kn_distance(X, neigh=neigh)) plt.hist(eps_dist, bins=60) plt.ylabel('n') plt.xlabel('Epsilon distance') plt.show() rotor = Rotor() curve_xy = np.concatenate( [np.arange(eps_dist.shape[0]).reshape(-1, 1), eps_dist.reshape(-1, 1)], 1) rotor.fit_rotate(curve_xy) rotor.plot_elbow() e_idx = rotor.get_elbow_index() return curve_xy[e_idx]
#1rst evaluation # Nearest neighbors to find the optimal epsilon (maximum distance) https://towardsdatascience.com/machine-learning-clustering-dbscan-determine-the-optimal-value-for-epsilon-eps-python-example-3100091cfbc nbrs = NearestNeighbors(n_neighbors = 5, algorithm = 'kd_tree').fit(xyz_nn) #['auto', 'ball_tree', 'kd_tree', 'brute'] distances, indices = nbrs.kneighbors(xyz_nn) #the indices of the nearest neighbors distances = np.sort(distances, axis=0) distances = distances[:,4] plt.plot(distances) y = np.array(distances) x = np.linspace(0,len(x),len(x)) xy = np.vstack((x,y)).T rotor = Rotor() rotor.fit_rotate(xy) elbow_idx = rotor.get_elbow_index() rotor.plot_elbow() eps = distances[elbow_idx]/2 del x,y,xy clustering = DBSCAN( algorithm = 'kd_tree',eps=eps, min_samples=5).fit(xyz_nn) #the number of samples is D+1=4 labels = clustering.labels_ colors = [int(i % 23) for i in labels] # 554 labels to 23 distinguished colors v = pptk.viewer(data,colors) v.set(point_size=0.01) # matplotlib core_samples_mask = np.zeros_like(clustering.labels_, dtype=bool) core_samples_mask[clustering.core_sample_indices_] = True labels = clustering.labels_