def neighbors_plot(self): import gc from numpy import histogram import numpy as np from sklearn.neighbors import radius_neighbors_graph start_pos, end_pos, paths = FileSplitter.points() del start_pos, end_pos gc.collect() neighbors = radius_neighbors_graph(paths, radius=0.005) del paths gc.collect() neighbors = neighbors.toarray() x = np.matrix(neighbors) x = x.sum(axis=1) counts = [d[0, 0] for d in x] hist, edges = histogram(counts, bins=10, density=False) self.plot_on_bokeh_hist('neighbors_hist.html', '# of Neighbors', '# of Occurrance', 'Neighbors Within Radius', hist, edges) pass
def trajectories_hdbscan(self, min_cluster_size): def centroids(paths): # distances = euclidean_distances(paths) # distances = cdist(paths, paths, 'euclidean') clusterer = hdbscan.HDBSCAN(min_cluster_size=min_cluster_size) cluster_labels = clusterer.fit_predict(paths) num_clusters = len( set(cluster_labels)) - (1 if -1 in cluster_labels else 0) unique_labels = set(cluster_labels) clusters = [[] for n in range(num_clusters)] logging.info('Number of clusters: %s', num_clusters) for i, v in enumerate(paths): if cluster_labels[i] != -1: clusters[cluster_labels[i]].append(v) return clusters start_pos, end_pos, paths = FileSplitter.points() clusters = centroids( paths) # Array of [start_lat, start_lon, end_lat, end_lon] gc = self.createGeometry(clusters) self.createJsonFile(gc)