def _rsl_prims_balltree(X, cut, k=5, alpha=1.4142135623730951, gamma=5, metric="minkowski", p=2):

    if metric == "minkowski":
        if p is None:
            raise TypeError("Minkowski metric given but no p value supplied!")
        if p < 0:
            raise ValueError("Minkowski metric with negative p value is not defined!")
    elif p is None:
        p = 2  # Unused, but needs to be integer; assume euclidean

    dim = X.shape[0]
    k = min(dim - 1, k)

    tree = BallTree(X, metric=metric)

    dist_metric = DistanceMetric.get_metric(metric)

    core_distances = tree.query(X, k=k)[0][:, -1]
    min_spanning_tree = mst_linkage_core_cdist(X, core_distances, dist_metric)

    single_linkage_tree = label(min_spanning_tree)
    single_linkage_tree = SingleLinkageTree(single_linkage_tree)

    labels = single_linkage_tree.get_clusters(cut, gamma)

    return labels, single_linkage_tree
Beispiel #2
0
def test_ball_tree_two_point(n_samples=100, n_features=3):
    np.random.seed(0)
    X = np.random.random((n_samples, n_features))
    Y = np.random.random((n_samples, n_features))
    r = np.linspace(0, 1, 10)
    bt = BallTree(X, leaf_size=10)

    D = DistanceMetric.get_metric("euclidean").pairwise(Y, X)
    counts_true = [(D <= ri).sum() for ri in r]

    def check_two_point(r, dualtree):
        counts = bt.two_point_correlation(Y, r=r, dualtree=dualtree)
        assert_allclose(counts, counts_true)

    for dualtree in (True, False):
        yield check_two_point, r, dualtree
Beispiel #3
0
def test_ball_tree_two_point(n_samples=100, n_features=3):
    np.random.seed(0)
    X = np.random.random((n_samples, n_features))
    Y = np.random.random((n_samples, n_features))
    r = np.linspace(0, 1, 10)
    bt = BallTree(X, leaf_size=10)

    D = DistanceMetric.get_metric("euclidean").pairwise(Y, X)
    counts_true = [(D <= ri).sum() for ri in r]

    def check_two_point(r, dualtree):
        counts = bt.two_point_correlation(Y, r=r, dualtree=dualtree)
        assert_allclose(counts, counts_true)

    for dualtree in (True, False):
        yield check_two_point, r, dualtree
Beispiel #4
0
def brute_force_neighbors(X, Y, k, metric, **kwargs):
    D = DistanceMetric.get_metric(metric, **kwargs).pairwise(Y, X)
    ind = np.argsort(D, axis=1)[:, :k]
    dist = D[np.arange(Y.shape[0])[:, None], ind]
    return dist, ind
Beispiel #5
0
def brute_force_neighbors(X, Y, k, metric, **kwargs):
    D = DistanceMetric.get_metric(metric, **kwargs).pairwise(Y, X)
    ind = np.argsort(D, axis=1)[:, :k]
    dist = D[np.arange(Y.shape[0])[:, None], ind]
    return dist, ind
Beispiel #6
0
 def check_pdist(self, metric, kwargs, D_true):
     dm = DistanceMetric.get_metric(metric, **kwargs)
     D12 = dm.pairwise(self.X1)
     assert_allclose(D12, D_true)        
Beispiel #7
0
 def check_cdist_bool(self, metric, D_true):
     dm = DistanceMetric.get_metric(metric)
     D12 = dm.pairwise(self.X1_bool, self.X2_bool)
     assert_allclose(D12, D_true)