Example #1
0
def test_pairwise_distances_sklearn_comparison(metric: str, matrix_size):
    # Test larger sizes to sklearn
    rng = np.random.RandomState(1)

    element_count = matrix_size[0] * matrix_size[1]

    X = rng.random_sample(matrix_size)
    Y = rng.random_sample(matrix_size)

    # For fp64, compare at 10 decimals, (5 places less than the ~15 max)
    compare_precision = 10

    # Compare to sklearn, fp64
    S = pairwise_distances(X, Y, metric=metric)

    if (element_count <= 2000000):
        S2 = sklearn_pairwise_distances(X, Y, metric=metric)
        cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision)

    # For fp32, compare at 4 decimals, (3 places less than the ~7 max)
    compare_precision = 4

    X = np.asfarray(X, dtype=np.float32)
    Y = np.asfarray(Y, dtype=np.float32)

    # Compare to sklearn, fp32
    S = pairwise_distances(X, Y, metric=metric)

    if (element_count <= 2000000):
        S2 = sklearn_pairwise_distances(X, Y, metric=metric)
        cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision)
Example #2
0
def compute_pairwise_distances(
    n_samples,
    embeddings=None,
    pairwise_distances=None,
    metric='euclidean',
):
    if (pairwise_distances is None) + (embeddings is None) != 1:
        raise RuntimeError(
            'Embeddings or pairwise_distances should be provided (only one, not both)'
        )
    if pairwise_distances is None:
        embeddings = np.asarray(embeddings)
        assert np.ndim(
            embeddings
        ) == 2, 'embeddings should be 2-dimensional metric [n_samples, n_features]'
        assert len(
            embeddings
        ) == n_samples, 'number of embeddings should be the same as number of rows in metadata'
        if metric == 'hellinger':
            if np.min(embeddings) < 0:
                raise InputErrorRTG(
                    'Hellinger distance requires non-negative elements in embedding'
                )
            return sklearn_pairwise_distances(np.sqrt(embeddings),
                                              metric='euclidean')
        return sklearn_pairwise_distances(embeddings, metric=metric)
    else:
        if metric != 'euclidean':
            raise RuntimeWarning(
                f'Passed metric ({metric}) not used as distances are passed')
        assert pairwise_distances.shape == (
            n_samples, n_samples), 'wrong shape of distances passed'
        return pairwise_distances
Example #3
0
def test_pairwise_distances_one_dimension_order(metric: str):
    # Test the pairwise_distance helper function for 1 dimensional cases which
    # can break down when using a size of 1 for either dimension
    rng = np.random.RandomState(2)

    Xc = rng.random_sample((1, 4))
    Yc = rng.random_sample((10, 4))
    Xf = np.asfortranarray(Xc)
    Yf = np.asfortranarray(Yc)

    # For fp64, compare at 13 decimals, (2 places less than the ~15 max)
    compare_precision = 13

    # Compare to sklearn, C/C order
    S = pairwise_distances(Xc, Yc, metric=metric)
    S2 = sklearn_pairwise_distances(Xc, Yc, metric=metric)
    cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision)

    # Compare to sklearn, C/F order
    S = pairwise_distances(Xc, Yf, metric=metric)
    S2 = sklearn_pairwise_distances(Xc, Yf, metric=metric)
    cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision)

    # Compare to sklearn, F/C order
    S = pairwise_distances(Xf, Yc, metric=metric)
    S2 = sklearn_pairwise_distances(Xf, Yc, metric=metric)
    cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision)

    # Compare to sklearn, F/F order
    S = pairwise_distances(Xf, Yf, metric=metric)
    S2 = sklearn_pairwise_distances(Xf, Yf, metric=metric)
    cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision)

    # Switch which input has single dimension
    Xc = rng.random_sample((1, 4))
    Yc = rng.random_sample((10, 4))
    Xf = np.asfortranarray(Xc)
    Yf = np.asfortranarray(Yc)

    # Compare to sklearn, C/C order
    S = pairwise_distances(Xc, Yc, metric=metric)
    S2 = sklearn_pairwise_distances(Xc, Yc, metric=metric)
    cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision)

    # Compare to sklearn, C/F order
    S = pairwise_distances(Xc, Yf, metric=metric)
    S2 = sklearn_pairwise_distances(Xc, Yf, metric=metric)
    cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision)

    # Compare to sklearn, F/C order
    S = pairwise_distances(Xf, Yc, metric=metric)
    S2 = sklearn_pairwise_distances(Xf, Yc, metric=metric)
    cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision)

    # Compare to sklearn, F/F order
    S = pairwise_distances(Xf, Yf, metric=metric)
    S2 = sklearn_pairwise_distances(Xf, Yf, metric=metric)
    cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision)
Example #4
0
def pairwise_distance_matrix(rankings):
    if isinstance(rankings, da.Array):
        D = dask_pairwise_distances(rankings,
                                    np.asarray(rankings),
                                    metric=mergeSortDistance)
    elif isinstance(rankings, np.ndarray):
        D = sklearn_pairwise_distances(rankings,
                                       rankings,
                                       metric=mergeSortDistance)
    return D
Example #5
0
def test_pairwise_distances(metric: str, matrix_size, is_col_major):
    # Test the pairwise_distance helper function.
    rng = np.random.RandomState(0)

    def prep_array(array):
        return np.asfortranarray(array) if is_col_major else array

    # For fp64, compare at 13 decimals, (2 places less than the ~15 max)
    compare_precision = 10

    # Compare to sklearn, single input
    X = prep_array(rng.random_sample(matrix_size))
    S = pairwise_distances(X, metric=metric)
    S2 = sklearn_pairwise_distances(X, metric=metric)
    cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision)

    # Compare to sklearn, double input with same dimensions
    Y = X
    S = pairwise_distances(X, Y, metric=metric)
    S2 = sklearn_pairwise_distances(X, Y, metric=metric)
    cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision)

    # Compare single and double inputs to eachother
    S = pairwise_distances(X, metric=metric)
    S2 = pairwise_distances(X, Y, metric=metric)
    cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision)

    # Compare to sklearn, with Y dim != X dim
    Y = prep_array(rng.random_sample((2, matrix_size[1])))
    S = pairwise_distances(X, Y, metric=metric)
    S2 = sklearn_pairwise_distances(X, Y, metric=metric)
    cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision)

    # Change precision of one parameter
    Y = np.asfarray(Y, dtype=np.float32)
    S = pairwise_distances(X, Y, metric=metric)
    S2 = sklearn_pairwise_distances(X, Y, metric=metric)
    cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision)

    # For fp32, compare at 5 decimals, (2 places less than the ~7 max)
    compare_precision = 2

    # Change precision of both parameters to float
    X = np.asfarray(X, dtype=np.float32)
    Y = np.asfarray(Y, dtype=np.float32)
    S = pairwise_distances(X, Y, metric=metric)
    S2 = sklearn_pairwise_distances(X, Y, metric=metric)
    cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision)

    # Test sending an int type with convert_dtype=True
    Y = prep_array(rng.randint(10, size=Y.shape))
    S = pairwise_distances(X, Y, metric=metric, convert_dtype=True)
    S2 = sklearn_pairwise_distances(X, Y, metric=metric)
    cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision)

    # Test that uppercase on the metric name throws an error.
    with pytest.raises(ValueError):
        pairwise_distances(X, Y, metric=metric.capitalize())
Example #6
0
def pairwise_spearman_distance_matrix(rankings):
    """Returns Spearman Distances for the provided rankings

    Args:
        rankings (numpy.array, dask.array): Normalized Attributions
        dask (boolean): whether or not to use dask's implementation

    Returns:
        [array[array]]: Spearman Distance Matrix
    """
    if isinstance(rankings, da.Array):
        D = dask_pairwise_distances(rankings,
                                    np.asarray(rankings),
                                    metric=spearman_squared_distance)
    elif isinstance(rankings, np.ndarray):
        D = sklearn_pairwise_distances(rankings,
                                       rankings,
                                       metric=spearman_squared_distance)
    return D