def test_grad_metrics_match_metrics(spatial_data, spatial_distances): for metric in dist.named_distances_with_gradients: if metric in spatial_distances: spatial_check(metric, spatial_data, spatial_distances, with_grad=True) # Handle the few special distances separately # SEuclidean v = np.abs(np.random.randn(spatial_data.shape[1])) dist_matrix = pairwise_distances(spatial_data, metric="seuclidean", V=v) test_matrix = np.array([[ dist.standardised_euclidean_grad(spatial_data[i], spatial_data[j], v)[0] for j in range(spatial_data.shape[0]) ] for i in range(spatial_data.shape[0])]) assert_array_almost_equal( test_matrix, dist_matrix, err_msg="Distances don't match " "for metric seuclidean", ) # Weighted minkowski dist_matrix = pairwise_distances(spatial_data, metric="wminkowski", w=v, p=3) test_matrix = np.array([[ dist.weighted_minkowski_grad(spatial_data[i], spatial_data[j], v, p=3)[0] for j in range(spatial_data.shape[0]) ] for i in range(spatial_data.shape[0])]) assert_array_almost_equal( test_matrix, dist_matrix, err_msg="Distances don't match " "for metric weighted_minkowski", ) # Mahalanobis v = np.abs(np.random.randn(spatial_data.shape[1], spatial_data.shape[1])) dist_matrix = pairwise_distances(spatial_data, metric="mahalanobis", VI=v) test_matrix = np.array([[ dist.mahalanobis_grad(spatial_data[i], spatial_data[j], v)[0] for j in range(spatial_data.shape[0]) ] for i in range(spatial_data.shape[0])]) assert_array_almost_equal( test_matrix, dist_matrix, err_msg="Distances don't match " "for metric mahalanobis", ) # Hellinger dist_matrix = dist.pairwise_special_metric(np.abs(spatial_data[:-2]), np.abs(spatial_data[:-2])) test_matrix = np.array([[ dist.hellinger_grad(np.abs(spatial_data[i]), np.abs(spatial_data[j]))[0] for j in range(spatial_data.shape[0] - 2) ] for i in range(spatial_data.shape[0] - 2)]) assert_array_almost_equal( test_matrix, dist_matrix, err_msg="Distances don't match " "for metric hellinger", )
def test_grad_metrics_match_metrics(): for metric in dist.named_distances_with_gradients: if metric in spatial_distances: dist_matrix = pairwise_distances(spatial_data, metric=metric) # scipy is bad sometimes if metric == "braycurtis": dist_matrix[np.where(~np.isfinite(dist_matrix))] = 0.0 if metric in ("cosine", "correlation"): dist_matrix[np.where(~np.isfinite(dist_matrix))] = 1.0 # And because distance between all zero vectors should be zero dist_matrix[10, 11] = 0.0 dist_matrix[11, 10] = 0.0 dist_function = dist.named_distances_with_gradients[metric] test_matrix = np.array( [ [ dist_function(spatial_data[i], spatial_data[j])[0] for j in range(spatial_data.shape[0]) ] for i in range(spatial_data.shape[0]) ] ) assert_array_almost_equal( test_matrix, dist_matrix, err_msg="Distances with grad don't match " "for metric {}".format(metric), ) # Handle the few special distances separately # SEuclidean v = np.abs(np.random.randn(spatial_data.shape[1])) dist_matrix = pairwise_distances(spatial_data, metric="seuclidean", V=v) test_matrix = np.array( [ [ dist.standardised_euclidean_grad(spatial_data[i], spatial_data[j], v)[0] for j in range(spatial_data.shape[0]) ] for i in range(spatial_data.shape[0]) ] ) assert_array_almost_equal( test_matrix, dist_matrix, err_msg="Distances don't match " "for metric seuclidean", ) # Weighted minkowski dist_matrix = pairwise_distances(spatial_data, metric="wminkowski", w=v, p=3) test_matrix = np.array( [ [ dist.weighted_minkowski_grad(spatial_data[i], spatial_data[j], v, p=3)[ 0 ] for j in range(spatial_data.shape[0]) ] for i in range(spatial_data.shape[0]) ] ) assert_array_almost_equal( test_matrix, dist_matrix, err_msg="Distances don't match " "for metric weighted_minkowski", ) # Mahalanobis v = np.abs(np.random.randn(spatial_data.shape[1], spatial_data.shape[1])) dist_matrix = pairwise_distances(spatial_data, metric="mahalanobis", VI=v) test_matrix = np.array( [ [ dist.mahalanobis_grad(spatial_data[i], spatial_data[j], v)[0] for j in range(spatial_data.shape[0]) ] for i in range(spatial_data.shape[0]) ] ) assert_array_almost_equal( test_matrix, dist_matrix, err_msg="Distances don't match " "for metric mahalanobis", ) # Hellinger dist_matrix = dist.pairwise_special_metric( np.abs(spatial_data[:-2]), np.abs(spatial_data[:-2]) ) test_matrix = np.array( [ [ dist.hellinger_grad(np.abs(spatial_data[i]), np.abs(spatial_data[j]))[0] for j in range(spatial_data.shape[0] - 2) ] for i in range(spatial_data.shape[0] - 2) ] ) assert_array_almost_equal( test_matrix, dist_matrix, err_msg="Distances don't match " "for metric hellinger", )
def test_chunked_parallel_special_metric_implementation_hellinger( spatial_data, stashed_previous_impl_for_regression_test, ): # Base tests that must pass! dist_matrix_x = dist.chunked_parallel_special_metric( np.abs(spatial_data[:-2])) dist_matrix_xy = dist.chunked_parallel_special_metric( np.abs(spatial_data[:-2]), np.abs(spatial_data[:-2])) test_matrix = np.array([[ dist.hellinger_grad(np.abs(spatial_data[i]), np.abs(spatial_data[j]))[0] for j in range(spatial_data.shape[0] - 2) ] for i in range(spatial_data.shape[0] - 2)]).astype(np.float32) assert_array_equal( test_matrix, dist_matrix_x, err_msg="Distances don't match for metric hellinger", ) assert_array_equal( test_matrix, dist_matrix_xy, err_msg="Distances don't match for metric hellinger", ) # Test to compare chunked_parallel different implementations dist_x_stashed = stashed_previous_impl_for_regression_test( np.abs(spatial_data[:-2])) dist_xy_stashed = stashed_previous_impl_for_regression_test( np.abs(spatial_data[:-2]), np.abs(spatial_data[:-2])) assert_array_equal( dist_xy_stashed, dist_matrix_xy, err_msg= "Distances don't match between stashed and current chunked_parallel implementations with X and Y!", ) assert_array_equal( dist_x_stashed, dist_matrix_x, err_msg= "Distances don't match between stashed and current chunked_parallel implementations with X only!", ) # test hellinger on different X and Y Pair spatial_data_two = np.random.randn(10, 20) dist_stashed_diff_pair = stashed_previous_impl_for_regression_test( np.abs(spatial_data[:-2]), spatial_data_two) dist_chunked_diff_pair = dist.chunked_parallel_special_metric( np.abs(spatial_data[:-2]), spatial_data_two) assert_array_equal( dist_stashed_diff_pair, dist_chunked_diff_pair, err_msg= "Distances don't match between stashed and current chunked_parallel implementations", )