def affinity_propagation(location, location_callback): """Returns one or more clusters of a set of points, using an affinity propagation algorithm. The result is sorted with the first value being the largest cluster. Returns: A list of NamedTuples (see get_cluster_named_tuple for a definition of the tuple). """ pts = location._tuple_points() if not pts: return None X = np.array(pts).reshape((len(pts), len(pts[0]))) if np.any(np.isnan(X)) or not np.all(np.isfinite(X)): return None X = Imputer().fit_transform(X) X = X.astype(np.float32) afkwargs = { 'damping': 0.5, 'convergence_iter': 15, 'max_iter': 200, 'copy': True, 'preference': None, 'affinity': 'euclidean', 'verbose': False } af = AffinityPropagation(**afkwargs).fit(X) cluster_centers_indices = af.cluster_centers_indices_ clusters = [] for cluster_id, cluster_centre in enumerate(af.cluster_centers_): locations = [] for j, label in enumerate(af.labels_): if not label == cluster_id: continue locations.append(location.locations[j]) if not locations: continue clusters.append(cluster_named_tuple()(label=cluster_id, centroid=Point(cluster_centre), location=location_callback( locations))) return clusters
def mean_shift(location, location_callback, bandwidth=None): """Returns one or more clusters of a set of points, using a mean shift algorithm. The result is sorted with the first value being the largest cluster. Kwargs: bandwidth (float): If bandwidth is None, a value is detected automatically from the input using estimate_bandwidth. Returns: A list of NamedTuples (see get_cluster_named_tuple for a definition of the tuple). """ pts = location._tuple_points() if not pts: return None X = np.array(pts).reshape((len(pts), len(pts[0]))) if np.any(np.isnan(X)) or not np.all(np.isfinite(X)): return None X = Imputer().fit_transform(X) X = X.astype(np.float32) if not bandwidth: bandwidth = estimate_bandwidth(X, quantile=0.3) ms = MeanShift(bandwidth=bandwidth or None, bin_seeding=False).fit(X) clusters = [] for cluster_id, cluster_centre in enumerate(ms.cluster_centers_): locations = [] for j, label in enumerate(ms.labels_): if not label == cluster_id: continue locations.append(location.locations[j]) if not locations: continue clusters.append(cluster_named_tuple()(label=cluster_id, centroid=Point(cluster_centre), location=location_callback( locations))) return clusters