def test_weighted_minkowski(spatial_data): v = np.abs(np.random.randn(spatial_data.shape[1])) dist_matrix = pairwise_distances(spatial_data, metric="wminkowski", w=v, p=3) test_matrix = np.array([[ dist.weighted_minkowski(spatial_data[i], spatial_data[j], v, p=3) for j in range(spatial_data.shape[0]) ] for i in range(spatial_data.shape[0])]) assert_array_almost_equal( test_matrix, dist_matrix, err_msg="Distances don't match " "for metric weighted_minkowski", )
def test_metrics(): for metric in spatial_distances: dist_matrix = pairwise_distances(spatial_data, metric=metric) # scipy is bad sometimes if metric == "braycurtis": dist_matrix[np.where(~np.isfinite(dist_matrix))] = 0.0 if metric in ("cosine", "correlation"): dist_matrix[np.where(~np.isfinite(dist_matrix))] = 1.0 # And because distance between all zero vectors should be zero dist_matrix[10, 11] = 0.0 dist_matrix[11, 10] = 0.0 dist_function = dist.named_distances[metric] test_matrix = np.array( [ [ dist_function(spatial_data[i], spatial_data[j]) for j in range(spatial_data.shape[0]) ] for i in range(spatial_data.shape[0]) ] ) assert_array_almost_equal( test_matrix, dist_matrix, err_msg="Distances don't match " "for metric {}".format(metric), ) for metric in binary_distances: dist_matrix = pairwise_distances(binary_data, metric=metric) if metric in ("jaccard", "dice", "sokalsneath", "yule"): dist_matrix[np.where(~np.isfinite(dist_matrix))] = 0.0 if metric in ("kulsinski", "russellrao"): dist_matrix[np.where(~np.isfinite(dist_matrix))] = 0.0 # And because distance between all zero vectors should be zero dist_matrix[10, 11] = 0.0 dist_matrix[11, 10] = 0.0 dist_function = dist.named_distances[metric] test_matrix = np.array( [ [ dist_function(binary_data[i], binary_data[j]) for j in range(binary_data.shape[0]) ] for i in range(binary_data.shape[0]) ] ) assert_array_almost_equal( test_matrix, dist_matrix, err_msg="Distances don't match " "for metric {}".format(metric), ) # Handle the few special distances separately # SEuclidean v = np.abs(np.random.randn(spatial_data.shape[1])) dist_matrix = pairwise_distances(spatial_data, metric="seuclidean", V=v) test_matrix = np.array( [ [ dist.standardised_euclidean(spatial_data[i], spatial_data[j], v) for j in range(spatial_data.shape[0]) ] for i in range(spatial_data.shape[0]) ] ) assert_array_almost_equal( test_matrix, dist_matrix, err_msg="Distances don't match " "for metric seuclidean", ) # Weighted minkowski dist_matrix = pairwise_distances(spatial_data, metric="wminkowski", w=v, p=3) test_matrix = np.array( [ [ dist.weighted_minkowski(spatial_data[i], spatial_data[j], v, p=3) for j in range(spatial_data.shape[0]) ] for i in range(spatial_data.shape[0]) ] ) assert_array_almost_equal( test_matrix, dist_matrix, err_msg="Distances don't match " "for metric weighted_minkowski", ) # Mahalanobis v = np.abs(np.random.randn(spatial_data.shape[1], spatial_data.shape[1])) dist_matrix = pairwise_distances(spatial_data, metric="mahalanobis", VI=v) test_matrix = np.array( [ [ dist.mahalanobis(spatial_data[i], spatial_data[j], v) for j in range(spatial_data.shape[0]) ] for i in range(spatial_data.shape[0]) ] ) assert_array_almost_equal( test_matrix, dist_matrix, err_msg="Distances don't match " "for metric mahalanobis", ) # Haversine tree = BallTree(spatial_data[:, :2], metric="haversine") dist_matrix, _ = tree.query(spatial_data[:, :2], k=spatial_data.shape[0]) test_matrix = np.array( [ [ dist.haversine(spatial_data[i, :2], spatial_data[j, :2]) for j in range(spatial_data.shape[0]) ] for i in range(spatial_data.shape[0]) ] ) test_matrix.sort(axis=1) assert_array_almost_equal( test_matrix, dist_matrix, err_msg="Distances don't match " "for metric haversine", )