Ejemplo n.º 1
0
    def test_reflect_2d(self):
        print("\n|Test_KDE_Resample:test_reflect_2d()|")

        seed = np.random.randint(int(1e4))
        seed = 8067
        print(seed)
        np.random.seed(seed)
        NUM = 2000
        xx = np.random.uniform(0.0, 2.0, NUM)
        yy = np.random.normal(1.0, 1.5, NUM)
        yy = yy[yy < 2.0]
        yy = np.concatenate([yy, np.random.choice(yy, NUM - yy.size)])

        data = [xx, yy]
        edges = [utils.spacing(aa, 'lin', 30) for aa in [xx, yy]]
        egrid = [utils.spacing(ee, 'lin', 100, stretch=0.5) for ee in edges]
        cgrid = [utils.midpoints(ee, 'lin') for ee in egrid]
        # width = [np.diff(ee) for ee in egrid]

        xc, yc = np.meshgrid(*cgrid, indexing='ij')

        # grid = np.vstack([xc.ravel(), yc.ravel()])

        hist, *_ = np.histogram2d(*data, bins=egrid, density=True)

        kde = kale.KDE(data)

        reflections = [[[0.0, 2.0], [None, 2.0]], [[0.0, 2.0], None],
                       [None, [None, 2.0]], None]
        for jj, reflect in enumerate(reflections):
            samps_ref = kde.resample(reflect=reflect)
            samps_nrm = kde.resample()

            if reflect is None:
                continue

            for ii, ref in enumerate(reflect):
                if ref is None:
                    continue
                if ref[0] is None:
                    ref[0] = -np.inf
                if ref[1] is None:
                    ref[1] = np.inf

                print(jj, ii, ref)
                for kk, zz in enumerate([samps_nrm[ii], samps_ref[ii]]):
                    inside = (ref[0] < zz) & (zz < ref[1])
                    outside = ((zz < ref[0]) | (ref[1] < zz))

                    print("\tin : ", kk, np.all(inside), np.any(inside))
                    print("\tout: ", kk, np.all(outside), np.any(outside))

                    if kk == 0:
                        assert_false(np.all(inside))
                        assert_true(np.any(outside))
                    else:
                        assert_true(np.all(inside))
                        assert_false(np.any(outside))

        return
Ejemplo n.º 2
0
    def compare_scipy_1d(self, kernel):
        print("\n|Test_KDE_PDF:test_compare_scipy_1d()|")
        NUM = 100
        a1 = np.random.normal(6.0, 1.0, NUM // 2)
        a2 = np.random.lognormal(0, 0.5, size=NUM // 2)
        aa = np.concatenate([a1, a2])

        bins = utils.spacing([-1, 14.0], 'lin', 40)
        grid = utils.spacing(bins, 'lin', 3000)

        methods = ['scott', 0.04, 0.2, 0.8]
        classes = [
            lambda xx, bw: sp.stats.gaussian_kde(xx, bw_method=bw),
            lambda xx, bw: kale.KDE(xx, bandwidth=bw, kernel=kernel)
        ]
        for mm in methods:
            kde_list = []
            for cc in classes:
                try:
                    test = cc(aa, mm).density(grid, probability=True)[1]
                except AttributeError:
                    test = cc(aa, mm).pdf(grid)

                kde_list.append(test)

            print("method: {}".format(mm))
            print("\t" + utils.stats_str(kde_list[0]))
            print("\t" + utils.stats_str(kde_list[1]))
            assert_true(np.allclose(kde_list[0], kde_list[1]))

        return
Ejemplo n.º 3
0
    def reflect_2d(self, kernel):
        print("\n|Test_KDE_PDF:test_reflect_2d()|")
        np.random.seed(124)
        NUM = 1000
        xx = np.random.uniform(0.0, 2.0, NUM)
        yy = np.random.normal(1.0, 1.0, NUM)
        yy = yy[yy < 2.0]
        yy = np.concatenate([yy, np.random.choice(yy, NUM-yy.size)])

        data = [xx, yy]
        edges = [utils.spacing(aa, 'lin', 30) for aa in [xx, yy]]
        egrid = [utils.spacing(ee, 'lin', 100, stretch=0.5) for ee in edges]
        cgrid = [utils.midpoints(ee, 'lin') for ee in egrid]
        width = [np.diff(ee) for ee in egrid]

        xc, yc = np.meshgrid(*cgrid, indexing='ij')

        grid = np.vstack([xc.ravel(), yc.ravel()])

        hist, *_ = np.histogram2d(*data, bins=egrid, density=True)

        kde = kale.KDE(data, kernel=kernel)
        inside_test_func = np.all if kernel._FINITE == 'infinite' else np.any

        reflections = [
            [[0.0, 2.0], [None, 2.0]],
            [[0.0, 2.0], None],
            [None, [None, 2.0]],
            None
        ]
        for jj, reflect in enumerate(reflections):
            pdf_1d = kde.density(grid, reflect=reflect, probability=True)[1]
            pdf = pdf_1d.reshape(hist.shape)

            inside = np.ones_like(pdf_1d, dtype=bool)
            if reflect is None:
                outside = np.zeros_like(pdf_1d, dtype=bool)
            else:
                outside = np.ones_like(pdf_1d, dtype=bool)
                for ii, ref in enumerate(reflect):
                    if ref is None:
                        ref = [-np.inf, np.inf]
                    if ref[0] is None:
                        ref[0] = -np.inf
                    if ref[1] is None:
                        ref[1] = np.inf
                    inside = inside & (ref[0] < grid[ii]) & (grid[ii] < ref[1])
                    outside = outside & ((grid[ii] < ref[0]) | (ref[1] < grid[ii]))

            assert_true(inside_test_func(pdf_1d[inside] > 0.0))
            assert_true(np.allclose(pdf_1d[outside], 0.0))

            area = width[0][:, np.newaxis] * width[1][np.newaxis, :]
            prob_tot = np.sum(pdf * area)
            print(jj, reflect, "prob_tot = {:.4e}".format(prob_tot))
            assert_true(np.isclose(prob_tot, 1.0, rtol=3e-2))

        return
Ejemplo n.º 4
0
    def test_different_bws(self):
        print("\n|Test_KDE_Resample:test_different_bws()|")
        np.random.seed(9235)
        NUM = 1000
        a1 = np.random.normal(6.0, 1.0, NUM // 2)
        a2 = np.random.lognormal(0, 0.5, size=NUM // 2)
        aa = np.concatenate([a1, a2])

        bb = np.random.normal(3.0, 0.02, NUM) + aa / 100

        data = [aa, bb]
        edges = [utils.spacing(dd, 'lin', 100, stretch=1.0) for dd in data]
        cents = [utils.midpoints(ee, 'lin') for ee in edges]

        xe, ye = np.meshgrid(*edges, indexing='ij')
        xc, yc = np.meshgrid(*cents, indexing='ij')

        bws = [0.5, 2.0]
        kde2d = kale.KDE(data, bandwidth=bws)
        kde1d = [kale.KDE(dd, bandwidth=ss) for dd, ss in zip(data, bws)]

        for ii in range(2):
            samp_1d = kde1d[ii].resample(NUM).squeeze()
            samp_2d = kde2d.resample(NUM)[ii]

            # Make sure the two distributions resemble eachother
            ks, pv = sp.stats.ks_2samp(samp_1d, samp_2d)
            # Calibrated to the above seed-value of `9235`
            print("{}, pv = {}".format(ii, pv))
            assert_true(pv > 0.05)

        return
Ejemplo n.º 5
0
    def pdf_params_fixed_bandwidth(self, kernel):
        print("\n|Test_KDE_PDF:pdf_params_fixed_bandwidth()|")
        np.random.seed(124)

        NUM = 1000
        bandwidth = 0.02

        sigma = [2.5, 1.5]
        corr = 0.9

        s2 = np.square(sigma)
        cc = corr * sigma[0] * sigma[1]
        cov = [[s2[0], cc], [cc, s2[1]]]
        cov = np.array(cov)

        data = np.random.multivariate_normal([1.0, 2.0], cov, NUM).T

        sigma = [2.5, 0.5]
        corr = 0.0

        s2 = np.square(sigma)
        cc = corr * sigma[0] * sigma[1]
        cov = [[s2[0], cc], [cc, s2[1]]]
        cov = np.array(cov)
        more = np.random.multivariate_normal([1.0, 6.0], cov, NUM).T
        data = np.concatenate([data, more], axis=-1)

        kde = kale.KDE(data, bandwidth=bandwidth, kernel=kernel)

        edges = [utils.spacing(dd, 'lin', 200, stretch=0.1) for dd in data]
        cents = [utils.midpoints(ee, 'lin') for ee in edges]
        widths = [np.diff(ee) for ee in edges]
        # area = widths[0][:, np.newaxis] * widths[1][np.newaxis, :]

        xe, ye = np.meshgrid(*edges, indexing='ij')
        xc, yc = np.meshgrid(*cents, indexing='ij')
        # grid = np.vstack([xc.ravel(), yc.ravel()])

        hist, *_ = np.histogram2d(*data, bins=edges, density=True)

        for par in range(2):
            xx = cents[par]
            pdf_2d = kde.density(xx, params=par, probability=True)[1]
            kde_1d = kale.KDE(data[par, :], bandwidth=bandwidth, kernel=kernel)
            pdf_1d = kde_1d.density(xx, probability=True)[1]
            # print("matrix : ", kde.bandwidth.matrix, kde_1d.bandwidth.matrix)
            print(f"pdf_1d = {utils.stats_str(pdf_1d)}")
            print(f"pdf_2d = {utils.stats_str(pdf_2d)}")
            assert_true(np.allclose(pdf_2d, pdf_1d, rtol=1e-3))

            for pdf, ls, lw in zip([pdf_2d, pdf_1d], ['-', '--'], [1.5, 3.0]):

                tot = np.sum(pdf * widths[par])
                print("tot = {:.4e}".format(tot))
                assert_true(np.isclose(tot, 1.0, rtol=2e-2))
                vals = [xx, pdf]
                if par == 1:
                    vals = vals[::-1]

        return
Ejemplo n.º 6
0
    def compare_scipy_2d(self, kernel):
        print("\n|Test_KDE_PDF:test_compare_scipy_2d()|")

        NUM = 1000
        a1 = np.random.normal(6.0, 1.0, NUM//2)
        a2 = np.random.lognormal(0, 0.5, size=NUM//2)
        aa = np.concatenate([a1, a2])

        bb = np.random.normal(3.0, 0.02, NUM) + aa/100

        data = [aa, bb]
        edges = [utils.spacing(dd, 'lin', 30, stretch=0.5) for dd in data]
        cents = [utils.midpoints(ee, 'lin') for ee in edges]

        xe, ye = np.meshgrid(*edges, indexing='ij')
        xc, yc = np.meshgrid(*cents, indexing='ij')
        grid = np.vstack([xc.ravel(), yc.ravel()])

        methods = ['scott', 0.04, 0.2, 0.8]
        # classes = [sp.stats.gaussian_kde, kale.KDE]
        classes = [lambda xx, bw: sp.stats.gaussian_kde(xx, bw_method=bw),
                   lambda xx, bw: kale.KDE(xx, bandwidth=bw, kernel=kernel)]
        for mm in methods:
            kdes_list = []
            for cc in classes:
                try:
                    test = cc(data, mm).density(grid, probability=True)[1].reshape(xc.shape).T
                except AttributeError:
                    test = cc(data, mm).pdf(grid).reshape(xc.shape).T

                kdes_list.append(test)

            assert_true(np.allclose(kdes_list[0], kdes_list[1]))

        return
Ejemplo n.º 7
0
    def test_log(self):
        print("\n|Test_Spacing:test_log()|")
        aa = [
            0.56979885, 0.06782166, 38.00982397, 0.76822742, 0.24328732,
            18.22846225, 7.22905804, 0.5140395, 0.97960639, 14.57931413
        ]
        bb = [
            0.06782166, 0.13701255, 0.27679121, 0.55917048, 1.12962989,
            2.28206553, 4.61020298, 9.31347996, 18.81498695, 38.00982397
        ]

        test = utils.spacing(aa, 'log', np.size(bb))
        assert_true(np.allclose(bb, test))
        return
Ejemplo n.º 8
0
    def test_lin(self):
        print("\n|Test_Spacing:test_lin()|")
        aa = [
            64.15474369, 30.23993491, 18.74843086, 90.36893423, 81.49347391,
            21.66373546, 26.36243961, 9.54536041, 33.48985127, 87.77429238
        ]
        bb = [
            9.54536041, 18.5257575, 27.5061546, 36.48655169, 45.46694878,
            54.44734587, 63.42774296, 72.40814005, 81.38853714, 90.36893423
        ]

        test = utils.spacing(aa, 'lin', np.size(bb))
        assert_true(np.allclose(bb, test))
        return
Ejemplo n.º 9
0
    def reflect_1d(self, kernel):
        print("\n|Test_KDE_PDF:reflect_1d()|")

        np.random.seed(124)
        NUM = 1000
        EXTR = [0.0, 2.0]
        aa = np.random.uniform(*EXTR, NUM)

        egrid = utils.spacing(aa, 'lin', 2000, stretch=0.5)
        cgrid = utils.midpoints(egrid, 'lin')
        delta = np.diff(egrid)

        boundaries = [None, EXTR]
        for bnd in boundaries:
            kde = kale.KDE(aa, kernel=kernel)
            pdf = kde.density(cgrid, reflect=bnd, probability=True)[1]

            # If the kernel's support is infinite, then all points outside of boundaries should be
            # nonzero; if it's finite-supported, then only some of them (near edges) will be
            outside_test_func = np.all if kernel._FINITE == 'infinite' else np.any

            # Make sure unitarity is preserved
            tot = np.sum(pdf * delta)
            print("Boundary '{}', total = {:.4e}".format(bnd, tot))
            assert_true(np.isclose(tot, 1.0, rtol=1e-3))

            ratio_extr = np.max(pdf) / np.min(pdf[pdf > 0])
            # No reflection, then non-zero PDF everywhere, and large ratio of extrema
            if bnd is None:
                assert_true(outside_test_func(pdf[cgrid < EXTR[0]] > 0.0))
                assert_true(outside_test_func(pdf[cgrid > EXTR[1]] > 0.0))
                assert_true(ratio_extr > 10.0)
            # No lower-reflection, nonzero values below 0.0
            elif bnd[0] is None:
                assert_true(outside_test_func(pdf[cgrid < EXTR[0]] > 0.0))
                assert_true(np.all(pdf[cgrid > EXTR[1]] == 0.0))
            # No upper-reflection, nonzero values above 2.0
            elif bnd[1] is None:
                assert_true(np.all(pdf[cgrid < EXTR[0]] == 0.0))
                assert_true(outside_test_func(pdf[cgrid > EXTR[1]] > 0.0))
            else:
                assert_true(np.all(pdf[cgrid < EXTR[0]] == 0.0))
                assert_true(np.all(pdf[cgrid > EXTR[1]] == 0.0))
                assert_true(ratio_extr < 2.0)

        return