def test_pairwise_distances_sklearn_comparison(metric: str, matrix_size): # Test larger sizes to sklearn rng = np.random.RandomState(1) element_count = matrix_size[0] * matrix_size[1] X = rng.random_sample(matrix_size) Y = rng.random_sample(matrix_size) # For fp64, compare at 10 decimals, (5 places less than the ~15 max) compare_precision = 10 # Compare to sklearn, fp64 S = pairwise_distances(X, Y, metric=metric) if (element_count <= 2000000): S2 = sklearn_pairwise_distances(X, Y, metric=metric) cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision) # For fp32, compare at 4 decimals, (3 places less than the ~7 max) compare_precision = 4 X = np.asfarray(X, dtype=np.float32) Y = np.asfarray(Y, dtype=np.float32) # Compare to sklearn, fp32 S = pairwise_distances(X, Y, metric=metric) if (element_count <= 2000000): S2 = sklearn_pairwise_distances(X, Y, metric=metric) cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision)
def compute_pairwise_distances( n_samples, embeddings=None, pairwise_distances=None, metric='euclidean', ): if (pairwise_distances is None) + (embeddings is None) != 1: raise RuntimeError( 'Embeddings or pairwise_distances should be provided (only one, not both)' ) if pairwise_distances is None: embeddings = np.asarray(embeddings) assert np.ndim( embeddings ) == 2, 'embeddings should be 2-dimensional metric [n_samples, n_features]' assert len( embeddings ) == n_samples, 'number of embeddings should be the same as number of rows in metadata' if metric == 'hellinger': if np.min(embeddings) < 0: raise InputErrorRTG( 'Hellinger distance requires non-negative elements in embedding' ) return sklearn_pairwise_distances(np.sqrt(embeddings), metric='euclidean') return sklearn_pairwise_distances(embeddings, metric=metric) else: if metric != 'euclidean': raise RuntimeWarning( f'Passed metric ({metric}) not used as distances are passed') assert pairwise_distances.shape == ( n_samples, n_samples), 'wrong shape of distances passed' return pairwise_distances
def test_pairwise_distances_one_dimension_order(metric: str): # Test the pairwise_distance helper function for 1 dimensional cases which # can break down when using a size of 1 for either dimension rng = np.random.RandomState(2) Xc = rng.random_sample((1, 4)) Yc = rng.random_sample((10, 4)) Xf = np.asfortranarray(Xc) Yf = np.asfortranarray(Yc) # For fp64, compare at 13 decimals, (2 places less than the ~15 max) compare_precision = 13 # Compare to sklearn, C/C order S = pairwise_distances(Xc, Yc, metric=metric) S2 = sklearn_pairwise_distances(Xc, Yc, metric=metric) cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision) # Compare to sklearn, C/F order S = pairwise_distances(Xc, Yf, metric=metric) S2 = sklearn_pairwise_distances(Xc, Yf, metric=metric) cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision) # Compare to sklearn, F/C order S = pairwise_distances(Xf, Yc, metric=metric) S2 = sklearn_pairwise_distances(Xf, Yc, metric=metric) cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision) # Compare to sklearn, F/F order S = pairwise_distances(Xf, Yf, metric=metric) S2 = sklearn_pairwise_distances(Xf, Yf, metric=metric) cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision) # Switch which input has single dimension Xc = rng.random_sample((1, 4)) Yc = rng.random_sample((10, 4)) Xf = np.asfortranarray(Xc) Yf = np.asfortranarray(Yc) # Compare to sklearn, C/C order S = pairwise_distances(Xc, Yc, metric=metric) S2 = sklearn_pairwise_distances(Xc, Yc, metric=metric) cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision) # Compare to sklearn, C/F order S = pairwise_distances(Xc, Yf, metric=metric) S2 = sklearn_pairwise_distances(Xc, Yf, metric=metric) cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision) # Compare to sklearn, F/C order S = pairwise_distances(Xf, Yc, metric=metric) S2 = sklearn_pairwise_distances(Xf, Yc, metric=metric) cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision) # Compare to sklearn, F/F order S = pairwise_distances(Xf, Yf, metric=metric) S2 = sklearn_pairwise_distances(Xf, Yf, metric=metric) cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision)
def pairwise_distance_matrix(rankings): if isinstance(rankings, da.Array): D = dask_pairwise_distances(rankings, np.asarray(rankings), metric=mergeSortDistance) elif isinstance(rankings, np.ndarray): D = sklearn_pairwise_distances(rankings, rankings, metric=mergeSortDistance) return D
def test_pairwise_distances(metric: str, matrix_size, is_col_major): # Test the pairwise_distance helper function. rng = np.random.RandomState(0) def prep_array(array): return np.asfortranarray(array) if is_col_major else array # For fp64, compare at 13 decimals, (2 places less than the ~15 max) compare_precision = 10 # Compare to sklearn, single input X = prep_array(rng.random_sample(matrix_size)) S = pairwise_distances(X, metric=metric) S2 = sklearn_pairwise_distances(X, metric=metric) cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision) # Compare to sklearn, double input with same dimensions Y = X S = pairwise_distances(X, Y, metric=metric) S2 = sklearn_pairwise_distances(X, Y, metric=metric) cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision) # Compare single and double inputs to eachother S = pairwise_distances(X, metric=metric) S2 = pairwise_distances(X, Y, metric=metric) cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision) # Compare to sklearn, with Y dim != X dim Y = prep_array(rng.random_sample((2, matrix_size[1]))) S = pairwise_distances(X, Y, metric=metric) S2 = sklearn_pairwise_distances(X, Y, metric=metric) cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision) # Change precision of one parameter Y = np.asfarray(Y, dtype=np.float32) S = pairwise_distances(X, Y, metric=metric) S2 = sklearn_pairwise_distances(X, Y, metric=metric) cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision) # For fp32, compare at 5 decimals, (2 places less than the ~7 max) compare_precision = 2 # Change precision of both parameters to float X = np.asfarray(X, dtype=np.float32) Y = np.asfarray(Y, dtype=np.float32) S = pairwise_distances(X, Y, metric=metric) S2 = sklearn_pairwise_distances(X, Y, metric=metric) cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision) # Test sending an int type with convert_dtype=True Y = prep_array(rng.randint(10, size=Y.shape)) S = pairwise_distances(X, Y, metric=metric, convert_dtype=True) S2 = sklearn_pairwise_distances(X, Y, metric=metric) cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision) # Test that uppercase on the metric name throws an error. with pytest.raises(ValueError): pairwise_distances(X, Y, metric=metric.capitalize())
def pairwise_spearman_distance_matrix(rankings): """Returns Spearman Distances for the provided rankings Args: rankings (numpy.array, dask.array): Normalized Attributions dask (boolean): whether or not to use dask's implementation Returns: [array[array]]: Spearman Distance Matrix """ if isinstance(rankings, da.Array): D = dask_pairwise_distances(rankings, np.asarray(rankings), metric=spearman_squared_distance) elif isinstance(rankings, np.ndarray): D = sklearn_pairwise_distances(rankings, rankings, metric=spearman_squared_distance) return D