Esempio n. 1
0
def test_weighted_minkowski(spatial_data):
    v = np.abs(np.random.randn(spatial_data.shape[1]))
    dist_matrix = pairwise_distances(spatial_data,
                                     metric="wminkowski",
                                     w=v,
                                     p=3)
    test_matrix = np.array([[
        dist.weighted_minkowski(spatial_data[i], spatial_data[j], v, p=3)
        for j in range(spatial_data.shape[0])
    ] for i in range(spatial_data.shape[0])])
    assert_array_almost_equal(
        test_matrix,
        dist_matrix,
        err_msg="Distances don't match "
        "for metric weighted_minkowski",
    )
Esempio n. 2
0
def test_metrics():
    for metric in spatial_distances:
        dist_matrix = pairwise_distances(spatial_data, metric=metric)
        # scipy is bad sometimes
        if metric == "braycurtis":
            dist_matrix[np.where(~np.isfinite(dist_matrix))] = 0.0
        if metric in ("cosine", "correlation"):
            dist_matrix[np.where(~np.isfinite(dist_matrix))] = 1.0
            # And because distance between all zero vectors should be zero
            dist_matrix[10, 11] = 0.0
            dist_matrix[11, 10] = 0.0
        dist_function = dist.named_distances[metric]
        test_matrix = np.array(
            [
                [
                    dist_function(spatial_data[i], spatial_data[j])
                    for j in range(spatial_data.shape[0])
                ]
                for i in range(spatial_data.shape[0])
            ]
        )
        assert_array_almost_equal(
            test_matrix,
            dist_matrix,
            err_msg="Distances don't match " "for metric {}".format(metric),
        )

    for metric in binary_distances:
        dist_matrix = pairwise_distances(binary_data, metric=metric)
        if metric in ("jaccard", "dice", "sokalsneath", "yule"):
            dist_matrix[np.where(~np.isfinite(dist_matrix))] = 0.0
        if metric in ("kulsinski", "russellrao"):
            dist_matrix[np.where(~np.isfinite(dist_matrix))] = 0.0
            # And because distance between all zero vectors should be zero
            dist_matrix[10, 11] = 0.0
            dist_matrix[11, 10] = 0.0
        dist_function = dist.named_distances[metric]
        test_matrix = np.array(
            [
                [
                    dist_function(binary_data[i], binary_data[j])
                    for j in range(binary_data.shape[0])
                ]
                for i in range(binary_data.shape[0])
            ]
        )
        assert_array_almost_equal(
            test_matrix,
            dist_matrix,
            err_msg="Distances don't match " "for metric {}".format(metric),
        )

    # Handle the few special distances separately
    # SEuclidean
    v = np.abs(np.random.randn(spatial_data.shape[1]))
    dist_matrix = pairwise_distances(spatial_data, metric="seuclidean", V=v)
    test_matrix = np.array(
        [
            [
                dist.standardised_euclidean(spatial_data[i], spatial_data[j], v)
                for j in range(spatial_data.shape[0])
            ]
            for i in range(spatial_data.shape[0])
        ]
    )
    assert_array_almost_equal(
        test_matrix,
        dist_matrix,
        err_msg="Distances don't match " "for metric seuclidean",
    )

    # Weighted minkowski
    dist_matrix = pairwise_distances(spatial_data, metric="wminkowski", w=v, p=3)
    test_matrix = np.array(
        [
            [
                dist.weighted_minkowski(spatial_data[i], spatial_data[j], v, p=3)
                for j in range(spatial_data.shape[0])
            ]
            for i in range(spatial_data.shape[0])
        ]
    )
    assert_array_almost_equal(
        test_matrix,
        dist_matrix,
        err_msg="Distances don't match " "for metric weighted_minkowski",
    )
    # Mahalanobis
    v = np.abs(np.random.randn(spatial_data.shape[1], spatial_data.shape[1]))
    dist_matrix = pairwise_distances(spatial_data, metric="mahalanobis", VI=v)
    test_matrix = np.array(
        [
            [
                dist.mahalanobis(spatial_data[i], spatial_data[j], v)
                for j in range(spatial_data.shape[0])
            ]
            for i in range(spatial_data.shape[0])
        ]
    )
    assert_array_almost_equal(
        test_matrix,
        dist_matrix,
        err_msg="Distances don't match " "for metric mahalanobis",
    )
    # Haversine
    tree = BallTree(spatial_data[:, :2], metric="haversine")
    dist_matrix, _ = tree.query(spatial_data[:, :2], k=spatial_data.shape[0])
    test_matrix = np.array(
        [
            [
                dist.haversine(spatial_data[i, :2], spatial_data[j, :2])
                for j in range(spatial_data.shape[0])
            ]
            for i in range(spatial_data.shape[0])
        ]
    )
    test_matrix.sort(axis=1)
    assert_array_almost_equal(
        test_matrix,
        dist_matrix,
        err_msg="Distances don't match " "for metric haversine",
    )