def test_chi2s(i):
    """Test the function cherenkovdeconvolution.util.chi2s."""
    # test on random arguments
    num_bins = np.random.randint(1, 1000)
    a = np.random.randint(1000, size = num_bins)
    b = np.random.randint(1000, size = num_bins)
    chi2s = util.chi2s(a, b)
    assert chi2s >= 0
    result = util.chi2s(util.normalizepdf(a), util.normalizepdf(b), normalize = False)
    assert result == chi2s

    # test increase on diverging arrays
    num_bins = np.random.randint(2, 1000)
    a = np.zeros(num_bins)
    b = np.ones(num_bins)
    a[1] = 1
    last_chi2s = util.chi2s(a, b)

    for i in range(10):
        b[2] += 1 - np.random.uniform()  # in (0, 1]
        chi2s = util.chi2s(a, b)
        assert chi2s >= last_chi2s
        last_chi2s = chi2s

    # test exceptions
    with raises(ValueError):
        util.chi2s(np.random.uniform(size = 3), np.random.uniform(size = 4))
    def test_chi2s(self):
        """Test the function cherenkovdeconvolution.util.chi2s."""
        # test on random arguments
        for i in range(10):
            with self.subTest(i=i):
                num_bins = np.random.randint(1, 1000)
                a = np.random.randint(1000, size=num_bins)
                b = np.random.randint(1000, size=num_bins)
                chi2s = util.chi2s(a, b)
                self.assertGreaterEqual(chi2s, 0)
                self.assertEqual(
                    util.chi2s(util.normalizepdf(a),
                               util.normalizepdf(b),
                               normalize=False), chi2s)

        # test increase on diverging arrays
        num_bins = np.random.randint(2, 1000)
        a = np.zeros(num_bins)
        b = np.ones(num_bins)
        a[1] = 1
        last_chi2s = util.chi2s(a, b)
        for i in range(10):
            b[2] += 1 - np.random.uniform()  # in (0, 1]
            with self.subTest(b2=b[2]):
                chi2s = util.chi2s(a, b)
                self.assertGreater(chi2s, last_chi2s)
                last_chi2s = chi2s

        # test exceptions
        with self.assertRaises(ValueError):
            util.chi2s(np.random.uniform(size=3), np.random.uniform(size=4))
 def test_jl_alpha_adaptive_run(self):
     """Test the function cherenkovdeconvolution.stepsize.alpha_adaptive_run."""
     from sklearn.datasets import load_iris
     iris = load_iris()
     
     # discretize the observed quantity into up to 6 clusters
     y_iris = iris.target # already discrete
     bins_y = np.sort(np.unique(y_iris))
     x_iris = discretize.TreeDiscretizer(iris.data, y_iris, 6).discretize(iris.data)
     
     print(' ') # ensure that a line break comes before the actual printing
     n_runs = 100
     n_failures = 0 # store the number of failed tests
     for i in range(n_runs):
         p_iris = np.random.permutation(len(iris.target))
         x_data  = x_iris[p_iris[0:50]]
         y_data  = y_iris[p_iris[0:50]]
         x_train = x_iris[p_iris[50:150]]
         y_train = y_iris[p_iris[50:150]]
         
         # find some random f_prev and f_next
         f_true = util.fit_pdf(y_data, bins_y)
         f_next = util.normalizepdf(f_true + 0.05*np.random.rand(len(f_true)))
         f_prev = util.normalizepdf(f_next + 0.15*np.random.rand(len(f_true)))
         pk = f_next - f_prev
         
         # test alpha boundaries
         py_amin, py_amax = py_stepsize._alpha_range(pk, f_prev)
         jl_amin, jl_amax = jl_stepsize._alpha_range(pk, f_prev)
         self.assertAlmostEqual(py_amin, jl_amin)
         self.assertAlmostEqual(py_amax, jl_amax)
         
         # optimize the step size
         py_fun = py_stepsize.alpha_adaptive_run(x_data, x_train, y_train, 0, bins_y)
         jl_fun = jl_stepsize.alpha_adaptive_run(x_data+1, x_train+1, y_train+1, 0.0,
                                                 bins = bins_y+1)
         k = np.random.randint(1, 100) # some irrelevant iteration numer
         rtol = 3 * np.linalg.norm(f_true - f_prev, np.inf) # tolerance in equality assertion
         py_a = py_fun(k, pk.copy(), f_prev.copy())
         jl_a = jl_fun(k, pk.copy(), f_prev.copy())
         print('---- rtol=%09.6f, py_a=%09.6f, jl_a=%09.6f' % (rtol, py_a, jl_a))
         
         # assert approximate equality and remember failures
         try: self.assertAlmostEqual(py_a, jl_a, delta=rtol)
         except AssertionError:
             n_failures += 1
             print('---- FAILURE')
     print('---- {}/{} tests of alpha_adaptive_run failed'.format(n_failures, n_runs))
     failure_tol = 0.33 # allow 33% failures
     self.assertLessEqual(n_failures/n_runs, failure_tol, 'Too many tests failed')
 def test_jl_normalizepdf(self):
     """Test the function cherenkovdeconvolution.util.normalizepdf."""
     for i in range(10):
         with self.subTest(i = i):
             num_bins = np.random.randint(1, 1000)
             arr      = np.random.uniform(size = num_bins)
             py_narr = py_util.normalizepdf(arr)
             jl_narr = jl_util.normalizepdf(arr)
             np.testing.assert_allclose(py_narr, jl_narr)
def test_normalizepdf(i):
    """Test the function cherenkovdeconvolution.util.normalizepdf."""
    # test on random arguments
    num_bins = np.random.randint(1, 1000)
    arr      = np.random.uniform(size = num_bins)
    narr = util.normalizepdf(arr)
    assert np.any(arr != narr)  # not performed in place
    assert len(narr) == num_bins
    assert sum(narr) == approx(1) # total equality violated by rounding
    util.normalizepdf(arr, copy = False)
    assert np.all(arr == narr) # in place version

    # test exceptions
    intarr = np.random.randint(1000, size = 10) # integer array
    assert sum(util.normalizepdf(intarr)) == approx(1)
    with raises(ValueError):
        util.normalizepdf(intarr, copy = False) # in place only allowed on floats
    def test_normalizepdf(self):
        """Test the function cherenkovdeconvolution.util.normalizepdf."""
        # test on random arguments
        for i in range(10):
            with self.subTest(i=i):
                num_bins = np.random.randint(1, 1000)
                arr = np.random.uniform(size=num_bins)
                narr = util.normalizepdf(arr)
                self.assertTrue(np.any(arr != narr))  # not performed in place
                self.assertEqual(len(narr), num_bins)
                self.assertAlmostEqual(
                    sum(narr), 1)  # total equality violated by rounding
                util.normalizepdf(arr, copy=False)
                self.assertTrue(np.all(arr == narr))  # in place version

        # test exceptions
        intarr = np.random.randint(1000, size=10)  # integer array
        self.assertAlmostEqual(sum(util.normalizepdf(intarr)), 1)
        with self.assertRaises(ValueError):
            util.normalizepdf(intarr,
                              copy=False)  # in place only allowed on floats
예제 #7
0
def _dsea_reconstruct(proba):
    return util.normalizepdf(np.apply_along_axis(np.sum, 0, proba))
예제 #8
0
def _dsea_weights(y_train, w_bin, normalize=True):
    if normalize:
        w_bin = util.normalizepdf(w_bin)  # normalized copy
    return np.maximum(w_bin[y_train], 1 / len(y_train))  # Laplace correction
예제 #9
0
def deconvolve(R,
               g,
               f_0=None,
               smoothing=None,
               K=3,
               epsilon=0.0,
               fit_ratios=False,
               inspect=None):
    """Deconvolve the target distribution f, given R and g, with Iterative Bayesian
    Unfolding.
    
    Parameters
    ----------
    R : array-like, shape (J, I), floats
        The detector response matrix.
    
    g : array-like, shape (J,), floats
        The observed discrete pdf.
    
    f_0 : array-like, shape(I,), floats, optional
        The prior, which is uniform by default.
    
    smoothing : callable, optional
        A function (f) -> (f_smooth) optionally smoothing each estimate before using it as
        the prior of the next iteration.
    
    K : int, optional
        The maximum iteration number.
    
    epsilon : float, optional
        The minimum Chi Square distance between iterations. If the actual distance is below
        this threshold, convergence is assumed and the algorithm stops.
    
    fit_ratios : boolean, optional
        Determines if ratios are fitted (i.e. R has to contain counts so that the ratio
        f_est/f_train is estimated) or if the probability density f_est is fitted directly.
    
    inspect : callable, optional
        A function (f, k, chi2s) -> () optionally called in every iteration.
    
    Returns
    ----------
    f : array-like, shape (I,)
        The estimated target pdf.
    """

    # check arguments
    if R.shape[0] != len(g):
        raise ValueError(
            'dim(g) = {} is not equal to the observable dimension {} of R'.
            format(len(g), R.shape[0]))

    # initial estimate
    f = _check_prior(f_0, m=R.shape[1], fit_ratios=fit_ratios)
    if inspect is not None:
        inspect(f, 0, np.nan)

    # iterative Bayesian deconvolution
    for k in range(1, K + 1):

        # == smoothing in between iterations ==
        f_prev_smooth = smoothing(f) if smoothing is not None and k > 1 else f
        f_prev = f  # unsmoothed estimate is required for convergence check
        # = = = = = = = = = = = = = = = = = = =

        # === apply Bayes' rule ===
        f = np.dot(_ibu_reverse_transfer(R, f_prev_smooth), g)
        if not fit_ratios:
            f = util.normalizepdf(f)
        # = = = = = = = = = = = = =

        # monitor progress
        chi2s = util.chi2s(f_prev, f,
                           False)  # Chi square distance between iterations
        if inspect is not None:
            inspect(f, k, chi2s)

        # stop when convergence is assumed
        if chi2s < epsilon:
            break

    return f  # return the last estimate