def _rsl_prims_balltree(X, cut, k=5, alpha=1.4142135623730951, gamma=5, metric="minkowski", p=2): if metric == "minkowski": if p is None: raise TypeError("Minkowski metric given but no p value supplied!") if p < 0: raise ValueError("Minkowski metric with negative p value is not defined!") elif p is None: p = 2 # Unused, but needs to be integer; assume euclidean dim = X.shape[0] k = min(dim - 1, k) tree = BallTree(X, metric=metric) dist_metric = DistanceMetric.get_metric(metric) core_distances = tree.query(X, k=k)[0][:, -1] min_spanning_tree = mst_linkage_core_cdist(X, core_distances, dist_metric) single_linkage_tree = label(min_spanning_tree) single_linkage_tree = SingleLinkageTree(single_linkage_tree) labels = single_linkage_tree.get_clusters(cut, gamma) return labels, single_linkage_tree
def test_ball_tree_two_point(n_samples=100, n_features=3): np.random.seed(0) X = np.random.random((n_samples, n_features)) Y = np.random.random((n_samples, n_features)) r = np.linspace(0, 1, 10) bt = BallTree(X, leaf_size=10) D = DistanceMetric.get_metric("euclidean").pairwise(Y, X) counts_true = [(D <= ri).sum() for ri in r] def check_two_point(r, dualtree): counts = bt.two_point_correlation(Y, r=r, dualtree=dualtree) assert_allclose(counts, counts_true) for dualtree in (True, False): yield check_two_point, r, dualtree
def brute_force_neighbors(X, Y, k, metric, **kwargs): D = DistanceMetric.get_metric(metric, **kwargs).pairwise(Y, X) ind = np.argsort(D, axis=1)[:, :k] dist = D[np.arange(Y.shape[0])[:, None], ind] return dist, ind
def check_pdist(self, metric, kwargs, D_true): dm = DistanceMetric.get_metric(metric, **kwargs) D12 = dm.pairwise(self.X1) assert_allclose(D12, D_true)
def check_cdist_bool(self, metric, D_true): dm = DistanceMetric.get_metric(metric) D12 = dm.pairwise(self.X1_bool, self.X2_bool) assert_allclose(D12, D_true)