def test_additivity_with_weights(data, split_index): """ Test the additive propery of the KDE. """ x = np.linspace(-10, 15) weights = np.arange(len(data)) + 1 weights = weights / np.sum(weights) # Fit to add data y = TreeKDE("epa").fit(data, weights).evaluate(x) # Split up the data and the weights data = list(data) weights = list(weights) data_first_split = data[:split_index] data_second_split = data[split_index:] weights_first_split = weights[:split_index] weights_second_split = weights[split_index:] # Fit to splits, and compensate for smaller data using weights y_1 = TreeKDE("epa").fit( data_first_split, weights_first_split).evaluate(x) * sum(weights_first_split) y_2 = TreeKDE("epa").fit( data_second_split, weights_second_split).evaluate(x) * sum(weights_second_split) # Additive property of the functions assert np.allclose(y, y_1 + y_2)
def test_additivity(data, split_index): """ Test the additive propery of the KDE. """ x = np.linspace(-10, 10) # Fit to add data y = TreeKDE("epa").fit(data).evaluate(x) # Fit to splits, and compensate for smaller data using weights weight_1 = split_index / len(data) y_1 = TreeKDE("epa").fit(data[:split_index]).evaluate(x) * weight_1 weight_2 = (len(data) - split_index) / len(data) y_2 = TreeKDE("epa").fit(data[split_index:]).evaluate(x) * weight_2 # Additive property of the functions assert np.allclose(y, y_1 + y_2)
def test_against_R_density(kernel, bw, n, expected_result): """ Test against the following function call in R: d <- density(c(0, 0.1, 1), kernel="{kernel}", bw={bw}, n={n}, from=-1, to=1); d$y """ data = np.array([0, 0.1, 1]) x = np.linspace(-1, 1, num=n) y = TreeKDE(kernel, bw=bw).fit(data).evaluate(x) assert np.allclose(y, expected_result, atol=10**(-2.7))
def test_against_scipy_density(bw, n, expected_result): """ Test against the following function call in SciPy: data = np.array([0, 0.1, 1]) x = np.linspace(-1, 1, {n}) bw = {bw}/np.asarray(data).std(ddof=1) density_estimate = gaussian_kde(dataset = data, bw_method = bw) y = density_estimate.evaluate(x) # Note that scipy weights its bandwidth by the covariance of the # input data. To make the results comparable to the other methods, # we divide the bandwidth by the sample standard deviation here. """ data = np.array([0, 0.1, 1]) x = np.linspace(-1, 1, num=n) y = TreeKDE(kernel="gaussian", bw=bw).fit(data).evaluate(x) error = np.mean((y - expected_result)**2) assert error < 1e-10