def test_chi2s(i): """Test the function cherenkovdeconvolution.util.chi2s.""" # test on random arguments num_bins = np.random.randint(1, 1000) a = np.random.randint(1000, size = num_bins) b = np.random.randint(1000, size = num_bins) chi2s = util.chi2s(a, b) assert chi2s >= 0 result = util.chi2s(util.normalizepdf(a), util.normalizepdf(b), normalize = False) assert result == chi2s # test increase on diverging arrays num_bins = np.random.randint(2, 1000) a = np.zeros(num_bins) b = np.ones(num_bins) a[1] = 1 last_chi2s = util.chi2s(a, b) for i in range(10): b[2] += 1 - np.random.uniform() # in (0, 1] chi2s = util.chi2s(a, b) assert chi2s >= last_chi2s last_chi2s = chi2s # test exceptions with raises(ValueError): util.chi2s(np.random.uniform(size = 3), np.random.uniform(size = 4))
def test_chi2s(self): """Test the function cherenkovdeconvolution.util.chi2s.""" # test on random arguments for i in range(10): with self.subTest(i=i): num_bins = np.random.randint(1, 1000) a = np.random.randint(1000, size=num_bins) b = np.random.randint(1000, size=num_bins) chi2s = util.chi2s(a, b) self.assertGreaterEqual(chi2s, 0) self.assertEqual( util.chi2s(util.normalizepdf(a), util.normalizepdf(b), normalize=False), chi2s) # test increase on diverging arrays num_bins = np.random.randint(2, 1000) a = np.zeros(num_bins) b = np.ones(num_bins) a[1] = 1 last_chi2s = util.chi2s(a, b) for i in range(10): b[2] += 1 - np.random.uniform() # in (0, 1] with self.subTest(b2=b[2]): chi2s = util.chi2s(a, b) self.assertGreater(chi2s, last_chi2s) last_chi2s = chi2s # test exceptions with self.assertRaises(ValueError): util.chi2s(np.random.uniform(size=3), np.random.uniform(size=4))
def test_jl_alpha_adaptive_run(self): """Test the function cherenkovdeconvolution.stepsize.alpha_adaptive_run.""" from sklearn.datasets import load_iris iris = load_iris() # discretize the observed quantity into up to 6 clusters y_iris = iris.target # already discrete bins_y = np.sort(np.unique(y_iris)) x_iris = discretize.TreeDiscretizer(iris.data, y_iris, 6).discretize(iris.data) print(' ') # ensure that a line break comes before the actual printing n_runs = 100 n_failures = 0 # store the number of failed tests for i in range(n_runs): p_iris = np.random.permutation(len(iris.target)) x_data = x_iris[p_iris[0:50]] y_data = y_iris[p_iris[0:50]] x_train = x_iris[p_iris[50:150]] y_train = y_iris[p_iris[50:150]] # find some random f_prev and f_next f_true = util.fit_pdf(y_data, bins_y) f_next = util.normalizepdf(f_true + 0.05*np.random.rand(len(f_true))) f_prev = util.normalizepdf(f_next + 0.15*np.random.rand(len(f_true))) pk = f_next - f_prev # test alpha boundaries py_amin, py_amax = py_stepsize._alpha_range(pk, f_prev) jl_amin, jl_amax = jl_stepsize._alpha_range(pk, f_prev) self.assertAlmostEqual(py_amin, jl_amin) self.assertAlmostEqual(py_amax, jl_amax) # optimize the step size py_fun = py_stepsize.alpha_adaptive_run(x_data, x_train, y_train, 0, bins_y) jl_fun = jl_stepsize.alpha_adaptive_run(x_data+1, x_train+1, y_train+1, 0.0, bins = bins_y+1) k = np.random.randint(1, 100) # some irrelevant iteration numer rtol = 3 * np.linalg.norm(f_true - f_prev, np.inf) # tolerance in equality assertion py_a = py_fun(k, pk.copy(), f_prev.copy()) jl_a = jl_fun(k, pk.copy(), f_prev.copy()) print('---- rtol=%09.6f, py_a=%09.6f, jl_a=%09.6f' % (rtol, py_a, jl_a)) # assert approximate equality and remember failures try: self.assertAlmostEqual(py_a, jl_a, delta=rtol) except AssertionError: n_failures += 1 print('---- FAILURE') print('---- {}/{} tests of alpha_adaptive_run failed'.format(n_failures, n_runs)) failure_tol = 0.33 # allow 33% failures self.assertLessEqual(n_failures/n_runs, failure_tol, 'Too many tests failed')
def test_jl_normalizepdf(self): """Test the function cherenkovdeconvolution.util.normalizepdf.""" for i in range(10): with self.subTest(i = i): num_bins = np.random.randint(1, 1000) arr = np.random.uniform(size = num_bins) py_narr = py_util.normalizepdf(arr) jl_narr = jl_util.normalizepdf(arr) np.testing.assert_allclose(py_narr, jl_narr)
def test_normalizepdf(i): """Test the function cherenkovdeconvolution.util.normalizepdf.""" # test on random arguments num_bins = np.random.randint(1, 1000) arr = np.random.uniform(size = num_bins) narr = util.normalizepdf(arr) assert np.any(arr != narr) # not performed in place assert len(narr) == num_bins assert sum(narr) == approx(1) # total equality violated by rounding util.normalizepdf(arr, copy = False) assert np.all(arr == narr) # in place version # test exceptions intarr = np.random.randint(1000, size = 10) # integer array assert sum(util.normalizepdf(intarr)) == approx(1) with raises(ValueError): util.normalizepdf(intarr, copy = False) # in place only allowed on floats
def test_normalizepdf(self): """Test the function cherenkovdeconvolution.util.normalizepdf.""" # test on random arguments for i in range(10): with self.subTest(i=i): num_bins = np.random.randint(1, 1000) arr = np.random.uniform(size=num_bins) narr = util.normalizepdf(arr) self.assertTrue(np.any(arr != narr)) # not performed in place self.assertEqual(len(narr), num_bins) self.assertAlmostEqual( sum(narr), 1) # total equality violated by rounding util.normalizepdf(arr, copy=False) self.assertTrue(np.all(arr == narr)) # in place version # test exceptions intarr = np.random.randint(1000, size=10) # integer array self.assertAlmostEqual(sum(util.normalizepdf(intarr)), 1) with self.assertRaises(ValueError): util.normalizepdf(intarr, copy=False) # in place only allowed on floats
def _dsea_reconstruct(proba): return util.normalizepdf(np.apply_along_axis(np.sum, 0, proba))
def _dsea_weights(y_train, w_bin, normalize=True): if normalize: w_bin = util.normalizepdf(w_bin) # normalized copy return np.maximum(w_bin[y_train], 1 / len(y_train)) # Laplace correction
def deconvolve(R, g, f_0=None, smoothing=None, K=3, epsilon=0.0, fit_ratios=False, inspect=None): """Deconvolve the target distribution f, given R and g, with Iterative Bayesian Unfolding. Parameters ---------- R : array-like, shape (J, I), floats The detector response matrix. g : array-like, shape (J,), floats The observed discrete pdf. f_0 : array-like, shape(I,), floats, optional The prior, which is uniform by default. smoothing : callable, optional A function (f) -> (f_smooth) optionally smoothing each estimate before using it as the prior of the next iteration. K : int, optional The maximum iteration number. epsilon : float, optional The minimum Chi Square distance between iterations. If the actual distance is below this threshold, convergence is assumed and the algorithm stops. fit_ratios : boolean, optional Determines if ratios are fitted (i.e. R has to contain counts so that the ratio f_est/f_train is estimated) or if the probability density f_est is fitted directly. inspect : callable, optional A function (f, k, chi2s) -> () optionally called in every iteration. Returns ---------- f : array-like, shape (I,) The estimated target pdf. """ # check arguments if R.shape[0] != len(g): raise ValueError( 'dim(g) = {} is not equal to the observable dimension {} of R'. format(len(g), R.shape[0])) # initial estimate f = _check_prior(f_0, m=R.shape[1], fit_ratios=fit_ratios) if inspect is not None: inspect(f, 0, np.nan) # iterative Bayesian deconvolution for k in range(1, K + 1): # == smoothing in between iterations == f_prev_smooth = smoothing(f) if smoothing is not None and k > 1 else f f_prev = f # unsmoothed estimate is required for convergence check # = = = = = = = = = = = = = = = = = = = # === apply Bayes' rule === f = np.dot(_ibu_reverse_transfer(R, f_prev_smooth), g) if not fit_ratios: f = util.normalizepdf(f) # = = = = = = = = = = = = = # monitor progress chi2s = util.chi2s(f_prev, f, False) # Chi square distance between iterations if inspect is not None: inspect(f, k, chi2s) # stop when convergence is assumed if chi2s < epsilon: break return f # return the last estimate