Beispiel #1
0
def test_additivity_with_weights(data, split_index):
    """
    Test the additive propery of the KDE.

    TODO: Parameterize this test w.r.t implementation.
    """

    x = np.linspace(-10, 15)
    weights = np.arange(len(data)) + 1
    weights = weights / np.sum(weights)

    # Fit to add data
    y = FFTKDE("epa").fit(data, weights).evaluate(x)

    # Split up the data and the weights
    data = list(data)
    weights = list(weights)
    data_first_split = data[:split_index]
    data_second_split = data[split_index:]
    weights_first_split = weights[:split_index]
    weights_second_split = weights[split_index:]

    # Fit to splits, and compensate for smaller data using weights
    y_1 = FFTKDE("epa").fit(
        data_first_split,
        weights_first_split).evaluate(x) * sum(weights_first_split)

    y_2 = FFTKDE("epa").fit(
        data_second_split,
        weights_second_split).evaluate(x) * sum(weights_second_split)

    # Additive property of the functions
    assert np.allclose(y, y_1 + y_2)
Beispiel #2
0
def test_additivity(data, split_index):
    """
    Test the additive propery of the KDE.
    """
    x = np.linspace(-10, 10)

    # Fit to add data
    y = FFTKDE('epa').fit(data).evaluate(x)

    # Fit to splits, and compensate for smaller data using weights
    weight_1 = split_index / len(data)
    y_1 = FFTKDE('epa').fit(data[:split_index]).evaluate(x) * weight_1

    weight_2 = (len(data) - split_index) / len(data)
    y_2 = FFTKDE('epa').fit(data[split_index:]).evaluate(x) * weight_2

    # Additive property of the functions
    assert np.allclose(y, y_1 + y_2)
Beispiel #3
0
def test_additivity(data, split_index):
    """
    Test the additive propery of the KDE.

    TODO: Parameterize this test w.r.t implementation.
    """
    x = np.linspace(-10, 12)

    # Fit to add data
    y = FFTKDE("epa").fit(data).evaluate(x)

    # Fit to splits, and compensate for smaller data using weights
    weight_1 = split_index / len(data)
    y_1 = FFTKDE("epa").fit(data[:split_index]).evaluate(x) * weight_1

    weight_2 = (len(data) - split_index) / len(data)
    y_2 = FFTKDE("epa").fit(data[split_index:]).evaluate(x) * weight_2

    # Additive property of the functions
    assert np.allclose(y, y_1 + y_2)
Beispiel #4
0
def test_against_naive_KDE(data, bw):
    """
    The the FFTKDE against a naive KDE without weights.
    """

    # Higher accuracy when num gets larger
    x = np.linspace(min(data) - bw, max(data) + bw, num=2**10)

    y1 = NaiveKDE("epa", bw=bw).fit(data, weights=None).evaluate(x)
    y2 = FFTKDE("epa", bw=bw).fit(data, weights=None).evaluate(x)

    assert np.allclose(y1, y2, atol=10e-5)
Beispiel #5
0
def FFTKDE_test_grid_inside_data_2D():
    """
    When using a custom grid, an error should be raised if the data is not
    contained in the grid. The linear binning routine will crash if this
    is not the case. See Issue:
    https://github.com/tommyod/KDEpy/issues/7
    """
    data = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
    grid, y = FFTKDE().fit(data).evaluate()  # To get a grid

    with pytest.raises(ValueError):
        data = np.array([[0, 0], [0, 1], [1, 0], [1, 1], [0, 6]])
        FFTKDE().fit(data).evaluate(grid)

    with pytest.raises(ValueError):
        data = np.array([[0, 0], [0, 1], [1, 0], [1, 1], [0, -4]])
        FFTKDE().fit(data).evaluate(grid)

    with pytest.raises(ValueError):
        data = np.array([[0, 0], [0, 1], [1, 0], [1, 1], [0, 100]])
        FFTKDE().fit(data).evaluate(grid)
Beispiel #6
0
def FFTKDE_test_grid_inside_data_1D():
    """
    When using a custom grid, an error should be raised if the data is not
    contained in the grid. The linear binning routine will crash if this
    is not the case. See Issue:
    https://github.com/tommyod/KDEpy/issues/7
    """
    data = np.array([0, 1, 2, 3, 4, 5])
    grid = np.linspace(-1, 6, num=2**6)
    FFTKDE().fit(data).evaluate(grid)  # This should cause no problem

    with pytest.raises(ValueError):
        bad_grid = np.linspace(2, 6, num=2**6)
        FFTKDE().fit(data).evaluate(bad_grid)

    with pytest.raises(ValueError):
        bad_grid = np.linspace(-2, 4, num=2**6)
        FFTKDE().fit(data).evaluate(bad_grid)

    with pytest.raises(ValueError):
        bad_grid = np.linspace(0, 5, num=2**6)
        FFTKDE().fit(data).evaluate(bad_grid)
Beispiel #7
0
def test_against_naive_KDE_w_weights(data, bw):
    """
    The the FFTKDE against a naive KDE with weights.
    """

    # Higher accuracy when num gets larger
    x = np.linspace(min(data) - bw, max(data) + bw, num=2**10)
    weights = np.arange(len(data)) + 1

    y1 = NaiveKDE('epa', bw=bw).fit(data, weights=weights).evaluate(x)
    y2 = FFTKDE('epa', bw=bw).fit(data, weights=weights).evaluate(x)

    assert np.allclose(y1, y2, atol=10e-4)
Beispiel #8
0
def _compute_kde(data,
                 bw,
                 weights,
                 standardize,
                 n_grid_points,
                 return_pdf_at=None):
    """Compute KDE and return log densities.

    :param data: [numpy array] The data on which to run the algorithm. It is
        of shape [_n_samples x _n_dimensions]. The data is stored in the object
        so that boosting iterations can be added without the need providing
        the data again.
    :param bw: [float] The bandwidth parameter that will be passed to
        sklearn.neighbors.kde.KernelDensity.
    :param weights: [numpy array] The weights for the highest boosting
        iteration. It has shape [n_samples x 1].
    :param standardize: [bool] Whether to standardize the data to mean 0 and
        standard deviation 1.
    :param n_grid_points: [int] Number of grid points on an equidistant grid on
        which to evaluate the KDE.
    :param return_pdf_at: [numpy array] The data on which to calculate the log
        density given the KDE of `data`. If None, the log density of all `data`
        samples will be returned. In the context of leave-one-out KDE, it is
        the left-out sample.
    :return: [numpy array] Log density estimation of each left-out sample,
        given the KDE computed on all but this one sample.
    """
    # Set data points at which to return the PDF, and scale them
    if return_pdf_at is None:
        return_pdf_at = data.copy()

    # Standardize data, if requested
    if standardize:
        scaler = StandardScaler(with_mean=True, with_std=True,
                                copy=True).fit(data)
        data = scaler.transform(data)
        if standardize:
            return_pdf_at = scaler.transform(return_pdf_at)

    # Estimate KDE over a grid
    kde_estimator = FFTKDE(kernel='gaussian',
                           bw=bw).fit(data, weights=weights.ravel())
    kde_grid, kde = kde_estimator.evaluate(grid_points=n_grid_points)

    # Get number of data dimensions
    n_dims = data.shape[1]
    if n_dims == 1:
        kde_grid = kde_grid.reshape(-1, 1)

    # Get grid shape and unique grid axes
    grid_axes = [
        np.atleast_2d(np.unique(kde_grid[:, i])).T for i in range(n_dims)
    ]

    # Ge the number of bins along each dimension
    num_grid_points = [len(i) for i in grid_axes]

    # Find coordinates of data points on grid
    coords = np.array([
        np.abs(grid_axes[d] -
               np.atleast_2d(return_pdf_at[:, d])).argmin(axis=0)
        for d in range(n_dims)
    ]).T
    bin_indices = np.ravel_multi_index(multi_index=coords.T,
                                       dims=num_grid_points)
    # Take the log of the values at those positions
    values = kde[bin_indices].ravel()

    return values
Beispiel #9
0
 def kde(self, num=2**10):
     nn = (num, num)
     x, y = FFTKDE().fit(self.samples).evaluate((num, num))
     return (x[:, 0].reshape(nn), x[:, 1].reshape(nn), y.reshape(nn))