Exemple #1
0
def test_n_threads_agnosticism(
    PairwiseDistancesReduction,
    seed,
    n_samples,
    chunk_size,
    n_features=100,
    dtype=np.float64,
):
    # Results should not depend on the number of threads
    rng = np.random.RandomState(seed)
    spread = 100
    X = rng.rand(n_samples, n_features).astype(dtype) * spread
    Y = rng.rand(n_samples, n_features).astype(dtype) * spread

    parameter = (
        10 if PairwiseDistancesReduction is PairwiseDistancesArgKmin
        # Scaling the radius slightly with the numbers of dimensions
        else 10**np.log(n_features))

    ref_dist, ref_indices = PairwiseDistancesReduction.compute(
        X,
        Y,
        parameter,
        return_distance=True,
    )

    with threadpoolctl.threadpool_limits(limits=1, user_api="openmp"):
        dist, indices = PairwiseDistancesReduction.compute(
            X, Y, parameter, return_distance=True)

    ASSERT_RESULT[PairwiseDistancesReduction](ref_dist, dist, ref_indices,
                                              indices)
Exemple #2
0
def test_strategies_consistency(
    PairwiseDistancesReduction,
    metric,
    n_samples,
    seed,
    n_features=10,
    dtype=np.float64,
):

    rng = np.random.RandomState(seed)
    spread = 100
    X = rng.rand(n_samples, n_features).astype(dtype) * spread
    Y = rng.rand(n_samples, n_features).astype(dtype) * spread

    # Haversine distance only accepts 2D data
    if metric == "haversine":
        X = np.ascontiguousarray(X[:, :2])
        Y = np.ascontiguousarray(Y[:, :2])

    parameter = (
        10
        if PairwiseDistancesReduction is PairwiseDistancesArgKmin
        # Scaling the radius slightly with the numbers of dimensions
        else 10 ** np.log(n_features)
    )

    dist_par_X, indices_par_X = PairwiseDistancesReduction.compute(
        X,
        Y,
        parameter,
        metric=metric,
        # Taking the first
        metric_kwargs=_get_dummy_metric_params_list(metric, n_features)[0],
        # To be sure to use parallelization
        chunk_size=n_samples // 4,
        strategy="parallel_on_X",
        return_distance=True,
    )

    dist_par_Y, indices_par_Y = PairwiseDistancesReduction.compute(
        X,
        Y,
        parameter,
        metric=metric,
        # Taking the first
        metric_kwargs=_get_dummy_metric_params_list(metric, n_features)[0],
        # To be sure to use parallelization
        chunk_size=n_samples // 4,
        strategy="parallel_on_Y",
        return_distance=True,
    )

    ASSERT_RESULT[PairwiseDistancesReduction](
        dist_par_X,
        dist_par_Y,
        indices_par_X,
        indices_par_Y,
    )
Exemple #3
0
def test_memmap_backed_data(
    metric,
    PairwiseDistancesReduction,
    n_samples=512,
    n_features=100,
    dtype=np.float64,
):
    # Results must not depend on the datasets writability
    rng = np.random.RandomState(0)
    spread = 100
    X = rng.rand(n_samples, n_features).astype(dtype) * spread
    Y = rng.rand(n_samples, n_features).astype(dtype) * spread

    # Create read only datasets
    X_mm, Y_mm = create_memmap_backed_data([X, Y])

    if PairwiseDistancesReduction is PairwiseDistancesArgKmin:
        parameter = 10
        check_parameters = {}
    else:
        # Scaling the radius slightly with the numbers of dimensions
        radius = 10**np.log(n_features)
        parameter = radius
        check_parameters = {"radius": radius}

    ref_dist, ref_indices = PairwiseDistancesReduction.compute(
        X,
        Y,
        parameter,
        metric=metric,
        return_distance=True,
    )

    dist_mm, indices_mm = PairwiseDistancesReduction.compute(
        X_mm,
        Y_mm,
        parameter,
        metric=metric,
        return_distance=True,
    )

    ASSERT_RESULT[(PairwiseDistancesReduction, dtype)](ref_dist, dist_mm,
                                                       ref_indices, indices_mm,
                                                       **check_parameters)
Exemple #4
0
def test_chunk_size_agnosticism(
    global_random_seed,
    PairwiseDistancesReduction,
    n_samples,
    chunk_size,
    n_features=100,
    dtype=np.float64,
):
    # Results must not depend on the chunk size
    rng = np.random.RandomState(global_random_seed)
    spread = 100
    X = rng.rand(n_samples, n_features).astype(dtype) * spread
    Y = rng.rand(n_samples, n_features).astype(dtype) * spread

    if PairwiseDistancesReduction is PairwiseDistancesArgKmin:
        parameter = 10
        check_parameters = {}
    else:
        # Scaling the radius slightly with the numbers of dimensions
        radius = 10**np.log(n_features)
        parameter = radius
        check_parameters = {"radius": radius}

    ref_dist, ref_indices = PairwiseDistancesReduction.compute(
        X,
        Y,
        parameter,
        metric="manhattan",
        return_distance=True,
    )

    dist, indices = PairwiseDistancesReduction.compute(
        X,
        Y,
        parameter,
        chunk_size=chunk_size,
        metric="manhattan",
        return_distance=True,
    )

    ASSERT_RESULT[(PairwiseDistancesReduction, dtype)](ref_dist, dist,
                                                       ref_indices, indices,
                                                       **check_parameters)
Exemple #5
0
def test_n_threads_agnosticism(
    global_random_seed,
    PairwiseDistancesReduction,
    n_samples,
    chunk_size,
    n_features=100,
    dtype=np.float64,
):
    # Results must not depend on the number of threads
    rng = np.random.RandomState(global_random_seed)
    spread = 100
    X = rng.rand(n_samples, n_features).astype(dtype) * spread
    Y = rng.rand(n_samples, n_features).astype(dtype) * spread

    if PairwiseDistancesReduction is PairwiseDistancesArgKmin:
        parameter = 10
        check_parameters = {}
    else:
        # Scaling the radius slightly with the numbers of dimensions
        radius = 10**np.log(n_features)
        parameter = radius
        check_parameters = {"radius": radius}

    ref_dist, ref_indices = PairwiseDistancesReduction.compute(
        X,
        Y,
        parameter,
        return_distance=True,
    )

    with threadpoolctl.threadpool_limits(limits=1, user_api="openmp"):
        dist, indices = PairwiseDistancesReduction.compute(
            X, Y, parameter, return_distance=True)

    ASSERT_RESULT[(PairwiseDistancesReduction, dtype)](ref_dist, dist,
                                                       ref_indices, indices,
                                                       **check_parameters)
def test_chunk_size_agnosticism(
    global_random_seed,
    PairwiseDistancesReduction,
    n_samples,
    chunk_size,
    n_features=100,
    dtype=np.float64,
):
    # Results should not depend on the chunk size
    rng = np.random.RandomState(global_random_seed)
    spread = 100
    X = rng.rand(n_samples, n_features).astype(dtype) * spread
    Y = rng.rand(n_samples, n_features).astype(dtype) * spread

    parameter = (
        10
        if PairwiseDistancesReduction is PairwiseDistancesArgKmin
        # Scaling the radius slightly with the numbers of dimensions
        else 10 ** np.log(n_features)
    )

    ref_dist, ref_indices = PairwiseDistancesReduction.compute(
        X,
        Y,
        parameter,
        return_distance=True,
    )

    dist, indices = PairwiseDistancesReduction.compute(
        X,
        Y,
        parameter,
        chunk_size=chunk_size,
        return_distance=True,
    )

    ASSERT_RESULT[PairwiseDistancesReduction](ref_dist, dist, ref_indices, indices)