def Cluster(self): # read in the coords within the safe range coords = self.openfile() # set radians for the haversine metric kms_per_radian = 6371.0088 # set epsilon to 50 meters epsilon = 0.05 / kms_per_radian # set epslion really small so it forms a cluster on each plot espsi = 0.0000000000000000001 / kms_per_radian # perform the DBSCAN using the ball tree algorithm and haversine metric. # form cluster if 10 pics are present within 50 meters db = DBSCAN(eps=epsilon, min_samples=10, algorithm='ball_tree', metric='haversine').fit(np.radians(coords)) # form cluster on every single plot scat = DBSCAN(eps=espsi, min_samples=1, algorithm='ball_tree', metric='haversine').fit(np.radians(coords)) cluster_labels = db.labels_ scatcluster_labels = scat.labels_ # set number of clusters to the number of labels (one per cluster) num_clusters = len(set(cluster_labels)) if num_clusters < 3: print('Not enough clusters were formed') PlotCluster() scatnum_clusters = len(set(scatcluster_labels)) print('Number of clusters: {}'.format(scatnum_clusters)) # convert the coords to a DataFrame with columns of lat and lon scat = pd.DataFrame(coords, columns=["latitude", "longitude"]) df = pd.DataFrame(coords, columns=["latitude", "longitude"]) # convert to matrix to allow manipulation into series scatcoords = scat.as_matrix(columns=["latitude", "longitude"]) coords = df.as_matrix(columns=["latitude", "longitude"]) # store clusters in a series data type clusters = pd.Series([coords[cluster_labels == n] for n in range(num_clusters - 1)]) scatclusters = pd.Series([scatcoords[scatcluster_labels == n] for n in range(scatnum_clusters - 1)]) def get_centermost_point(cluster): # using the centroid attribute, pull out the centre point of each cluster centroid = (MultiPoint(cluster).centroid.x, MultiPoint(cluster).centroid.y) # save the centermost point in a variable and return as a tuple centermost_point = min(cluster, key=lambda point: great_circle(point, centroid).m) print(centermost_point) return tuple(centermost_point) # use series' map attribute to map the centermost point to each cluster centermost_points = clusters.map(get_centermost_point) # also map every point to the clusters of each point to allow for plotting scatplot = scatclusters.map(get_centermost_point) # separate the lat and lon points from the tuple Alatitude, Alongitude = zip(*scatplot) if len(centermost_points) > 2: latitude, longitude = zip(*centermost_points) else: print("Not enough data to form clusters") # plot the centermost points using matplotlib plt.scatter(latitude, longitude, s=2, c="black", marker=".") plt.show() # use gmap to open the map to the first plot point gmap = gmplot.GoogleMapPlotter(longitude[0], latitude[0], zoom=15) # plot the centermost points and every other point gmap.scatter(longitude, latitude, '#ff0000', size=10, marker=False) gmap.scatter(Alongitude, Alatitude, '#0000ff', size=5, marker=False) # save map as html file in directory gmap.draw(userlocation + 'Map.html')