def ClusterHouses(matches, plot_groups=False): groups = {} try: N = len(matches) X = np.zeros((N, 2)) for m in range(N): loc = RFAPI.house_location(matches[m]) #logging.debug("ClusterHouses({})".format(loc)) X[m] = (loc[0], loc[1]) params = { 'quantile': .3, 'eps': .15, 'damping': .9, 'preference': -5, 'n_neighbors': 2, 'n_clusters': 5 } # a bit buggy.. spectral = cluster.SpectralClustering( n_clusters=params['n_clusters'], eigen_solver='arpack', affinity="nearest_neighbors") # best so far! gmm = mixture.GaussianMixture(n_components=params['n_clusters'], covariance_type='full') # yielded one cluster.. affinity_propagation = cluster.AffinityPropagation( damping=params['damping'], preference=params['preference']) bandwidth = cluster.estimate_bandwidth(X, quantile=params['quantile']) ms = cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True) algorithm = ms algorithm.fit(X) if hasattr(algorithm, 'labels_'): y_pred = algorithm.labels_.astype(np.int) else: y_pred = algorithm.predict(X) for m in range(len(matches)): key = str(y_pred[m]) if groups.get(key, None) == None: groups[key] = [] groups[key].append({ "adress": RFAPI.house_address(matches[m]), "location": [X[m][0], X[m][1]] }) logging.debug("groups = {}".format(groups)) if plot_groups: HouseScore._plot_groups(X, y_pred) except Exception as e: groups["error"] = str(e) logging.error(groups["error"]) return groups