Ejemplo n.º 1
0
class TestFitterGauss(object):
    # Test CurveFitter with a noisy gaussian, weighted and unweighted, to see
    # if the parameters and uncertainties come out correct

    @pytest.fixture(autouse=True)
    def setup_method(self, tmpdir):
        self.path = os.path.dirname(os.path.abspath(__file__))
        self.tmpdir = tmpdir.strpath

        theoretical = np.loadtxt(os.path.join(self.path, "gauss_data.txt"))
        xvals, yvals, evals = np.hsplit(theoretical, 3)
        xvals = xvals.flatten()
        yvals = yvals.flatten()
        evals = evals.flatten()

        # these best weighted values and uncertainties obtained with Igor
        self.best_weighted = [-0.00246095, 19.5299, -8.28446e-2, 1.24692]

        self.best_weighted_errors = [
            0.0220313708486,
            1.12879436221,
            0.0447659158681,
            0.0412022938883,
        ]

        self.best_weighted_chisqr = 77.6040960351

        self.best_unweighted = [
            -0.10584111872702096,
            19.240347049328989,
            0.0092623066070940396,
            1.501362314145845,
        ]

        self.best_unweighted_errors = [
            0.34246565477,
            0.689820935208,
            0.0411243173041,
            0.0693429375282,
        ]

        self.best_unweighted_chisqr = 497.102084956

        self.p0 = np.array([0.1, 20.0, 0.1, 0.1])
        self.names = ["bkg", "A", "x0", "width"]
        self.bounds = [(-1, 1), (0, 30), (-5.0, 5.0), (0.001, 2)]

        self.params = Parameters(name="gauss_params")
        for p, name, bound in zip(self.p0, self.names, self.bounds):
            param = Parameter(p, name=name)
            param.range(*bound)
            param.vary = True
            self.params.append(param)

        self.model = Model(self.params, fitfunc=gauss)
        self.data = Data1D((xvals, yvals, evals))
        self.objective = Objective(self.model, self.data)
        return 0

    def test_pickle(self):
        # tests if a CurveFitter can be pickled/unpickled.
        f = CurveFitter(self.objective)
        pkl = pickle.dumps(f)
        g = pickle.loads(pkl)
        g._check_vars_unchanged()

    def test_best_weighted(self):
        assert_equal(len(self.objective.varying_parameters()), 4)
        self.objective.setp(self.p0)

        f = CurveFitter(self.objective, nwalkers=100)
        res = f.fit("least_squares", jac="3-point")

        output = res.x
        assert_almost_equal(output, self.best_weighted, 3)
        assert_almost_equal(self.objective.chisqr(), self.best_weighted_chisqr,
                            5)

        # compare the residuals
        res = (self.data.y - self.model(self.data.x)) / self.data.y_err
        assert_equal(self.objective.residuals(), res)

        # compare objective.covar to the best_weighted_errors
        uncertainties = [param.stderr for param in self.params]
        assert_allclose(uncertainties, self.best_weighted_errors, rtol=0.005)

        # we're also going to try the checkpointing here.
        checkpoint = os.path.join(self.tmpdir, "checkpoint.txt")

        # compare samples to best_weighted_errors
        np.random.seed(1)
        f.sample(steps=201, random_state=1, verbose=False, f=checkpoint)
        process_chain(self.objective, f.chain, nburn=50, nthin=10)
        uncertainties = [param.stderr for param in self.params]
        assert_allclose(uncertainties, self.best_weighted_errors, rtol=0.07)

        # test that the checkpoint worked
        check_array = np.loadtxt(checkpoint)
        check_array = check_array.reshape(201, f._nwalkers, f.nvary)
        assert_allclose(check_array, f.chain)

        # test loading the checkpoint
        chain = load_chain(checkpoint)
        assert_allclose(chain, f.chain)

        f.initialise("jitter")
        f.sample(steps=2, nthin=4, f=checkpoint, verbose=False)
        assert_equal(f.chain.shape[0], 2)

        # we should be able to produce 2 * 100 steps from the generator
        g = self.objective.pgen(ngen=20000000000)
        s = [i for i, a in enumerate(g)]
        assert_equal(np.max(s), 200 - 1)
        g = self.objective.pgen(ngen=200)
        pvec = next(g)
        assert_equal(pvec.size, len(self.objective.parameters.flattened()))

        # check that all the parameters are returned via pgen, not only those
        # being varied.
        self.params[0].vary = False
        f = CurveFitter(self.objective, nwalkers=100)
        f.initialise("jitter")
        f.sample(steps=2, nthin=4, f=checkpoint, verbose=False)
        g = self.objective.pgen(ngen=100)
        pvec = next(g)
        assert_equal(pvec.size, len(self.objective.parameters.flattened()))

        # the following test won't work because of emcee/gh226.
        # chain = load_chain(checkpoint)
        # assert_(chain.shape == f.chain.shape)
        # assert_allclose(chain, f.chain)

        # try reproducing best fit with parallel tempering
        self.params[0].vary = True
        f = CurveFitter(self.objective, nwalkers=100, ntemps=10)
        f.fit("differential_evolution", seed=1)

        f.sample(steps=201, random_state=1, verbose=False)
        process_chain(self.objective, f.chain, nburn=50, nthin=15)
        print(self.params[0].chain.shape, self.params[0].chain)

        uncertainties = [param.stderr for param in self.params]
        assert_allclose(uncertainties, self.best_weighted_errors, rtol=0.07)

    def test_best_unweighted(self):
        self.objective.weighted = False
        f = CurveFitter(self.objective, nwalkers=100)
        res = f.fit()

        output = res.x
        assert_almost_equal(self.objective.chisqr(),
                            self.best_unweighted_chisqr)
        assert_almost_equal(output, self.best_unweighted, 5)

        # compare the residuals
        res = self.data.y - self.model(self.data.x)
        assert_equal(self.objective.residuals(), res)

        # compare objective._covar to the best_unweighted_errors
        uncertainties = np.array([param.stderr for param in self.params])
        assert_almost_equal(uncertainties, self.best_unweighted_errors, 3)

        # the samples won't compare to the covariance matrix...
        # f.sample(nsteps=150, nburn=20, nthin=30, random_state=1)
        # uncertainties = [param.stderr for param in self.params]
        # assert_allclose(uncertainties, self.best_unweighted_errors,
        #                 rtol=0.15)

    def test_all_minimisers(self):
        """test minimisers against the Gaussian fit"""
        f = CurveFitter(self.objective)

        methods = ["differential_evolution", "L-BFGS-B", "least_squares"]
        if hasattr(sciopt, "shgo"):
            methods.append("shgo")
        if hasattr(sciopt, "dual_annealing"):
            methods.append("dual_annealing")

        for method in methods:
            self.objective.setp(self.p0)
            res = f.fit(method=method)
            assert_almost_equal(res.x, self.best_weighted, 3)

        # smoke test to check that we can use nlpost
        self.objective.setp(self.p0)
        logp0 = self.objective.logp()

        # check that probabilities are calculated correctly
        assert_allclose(
            self.objective.logpost(),
            self.objective.logp() + self.objective.logl(),
        )
        assert_allclose(self.objective.nlpost(), -self.objective.logpost())
        assert_allclose(self.objective.nlpost(self.p0),
                        -self.objective.logpost(self.p0))

        # if the priors are all uniform then the only difference between
        # logpost and logl is a constant. A minimiser should converge on the
        # same answer. The following tests examine that.
        # The test works for dual_annealing, but not for differential
        # evolution, not sure why that is.
        self.objective.setp(self.p0)
        res1 = f.fit(method="dual_annealing", seed=1)
        assert_almost_equal(res1.x, self.best_weighted, 3)
        nll1 = self.objective.nll()
        nlpost1 = self.objective.nlpost()

        self.objective.setp(self.p0)
        res2 = f.fit(method="dual_annealing", target="nlpost", seed=1)
        assert_almost_equal(res2.x, self.best_weighted, 3)
        nll2 = self.objective.nll()
        nlpost2 = self.objective.nlpost()

        assert_allclose(nlpost1, nlpost2, atol=0.001)
        assert_allclose(nll1, nll2, atol=0.001)

        # these two priors are calculated for different parameter values
        # (before and after the fit) they should be the same because all
        # the parameters have uniform priors.
        assert_almost_equal(self.objective.logp(), logp0)

    def test_pymc3_sample(self):
        # test sampling with pymc3
        try:
            import pymc3 as pm
            from refnx.analysis import pymc3_model
        except (ModuleNotFoundError, ImportError, AttributeError):
            # can't run test if pymc3/theano not installed
            return

        with pymc3_model(self.objective):
            s = pm.NUTS()
            pm.sample(
                200,
                tune=100,
                step=s,
                discard_tuned_samples=True,
                compute_convergence_checks=False,
                random_seed=1,
            )
Ejemplo n.º 2
0
class TestObjective(object):
    def setup_method(self):
        # Choose the "true" parameters.

        # Reproducible results!
        np.random.seed(123)

        self.m_true = -0.9594
        self.b_true = 4.294
        self.f_true = 0.534
        self.m_ls = -1.1040757010910947
        self.b_ls = 5.4405552502319505

        # Generate some synthetic data from the model.
        N = 50
        x = np.sort(10 * np.random.rand(N))
        y_err = 0.1 + 0.5 * np.random.rand(N)
        y = self.m_true * x + self.b_true
        y += np.abs(self.f_true * y) * np.random.randn(N)
        y += y_err * np.random.randn(N)

        self.data = Data1D(data=(x, y, y_err))

        self.p = Parameter(self.b_ls, 'b') | Parameter(self.m_ls, 'm')
        self.model = Model(self.p, fitfunc=line)
        self.objective = Objective(self.model, self.data)

        # want b and m
        self.p[0].vary = True
        self.p[1].vary = True

        mod = np.array([
            4.78166609, 4.42364699, 4.16404064, 3.50343504, 3.4257084,
            2.93594347, 2.92035638, 2.67533842, 2.28136038, 2.19772983,
            1.99295496, 1.93748334, 1.87484436, 1.65161016, 1.44613461,
            1.11128101, 1.04584535, 0.86055984, 0.76913963, 0.73906649,
            0.73331407, 0.68350418, 0.65216599, 0.59838566, 0.13070299,
            0.10749131, -0.01010195, -0.10010155, -0.29495372, -0.42817431,
            -0.43122391, -0.64637715, -1.30560686, -1.32626428, -1.44835768,
            -1.52589881, -1.56371158, -2.12048349, -2.24899179, -2.50292682,
            -2.53576659, -2.55797996, -2.60870542, -2.7074727, -3.93781479,
            -4.12415366, -4.42313742, -4.98368609, -5.38782395, -5.44077086
        ])
        self.mod = mod

    def test_model(self):
        # test that the line data produced by our model is the same as the
        # test data
        assert_almost_equal(self.model(self.data.x), self.mod)

    def test_synthetic_data(self):
        # test that we create the correct synthetic data by performing a least
        # squares fit on it
        assert_(self.data.y_err is not None)

        x, y, y_err, _ = self.data.data
        A = np.vstack((np.ones_like(x), x)).T
        C = np.diag(y_err * y_err)
        cov = np.linalg.inv(np.dot(A.T, np.linalg.solve(C, A)))
        b_ls, m_ls = np.dot(cov, np.dot(A.T, np.linalg.solve(C, y)))

        assert_almost_equal(b_ls, self.b_ls)
        assert_almost_equal(m_ls, self.m_ls)

    def test_setp(self):
        # check that we can set parameters
        self.p[0].vary = False

        assert_(len(self.objective.varying_parameters()) == 1)
        self.objective.setp(np.array([1.23]))
        assert_equal(self.p[1].value, 1.23)
        self.objective.setp(np.array([1.234, 1.23]))
        assert_equal(np.array(self.p), [1.234, 1.23])

    def test_pvals(self):
        assert_equal(self.objective.parameters.pvals, [self.b_ls, self.m_ls])
        self.objective.parameters.pvals = [1, 2]
        assert_equal(self.objective.parameters.pvals, [1, 2.])

    def test_logp(self):
        self.p[0].range(0, 10)
        assert_almost_equal(self.objective.logp(), np.log(0.1))

        # logp should set parameters
        self.objective.logp([8, 2])
        assert_equal(np.array(self.objective.parameters), [8, 2])

        # if we supply a value outside the range it should return -inf
        assert_equal(self.objective.logp([-1, 2]), -np.inf)

    def test_logpost(self):
        # http://dan.iel.fm/emcee/current/user/line/
        assert_almost_equal(self.objective.logp(), 0)

        assert_almost_equal(self.objective.nlpost(), -self.objective.logpost())

        # the uncertainties are underestimated in this example...
        # amendment factor because dfm emcee example does not include 2pi
        amend = 0.5 * self.objective.npoints * np.log(2 * np.pi)
        assert_almost_equal(self.objective.logl() + amend, -559.01078135444595)
        assert_almost_equal(self.objective.logpost() + amend,
                            -559.01078135444595)

    def test_chisqr(self):
        assert_almost_equal(self.objective.chisqr(), 1231.1096772954229)

    def test_residuals(self):
        # weighted, with and without transform
        assert_almost_equal(self.objective.residuals(),
                            (self.data.y - self.mod) / self.data.y_err)

        objective = Objective(self.model,
                              self.data,
                              transform=Transform('lin'))
        assert_almost_equal(objective.residuals(),
                            (self.data.y - self.mod) / self.data.y_err)

        # unweighted, with and without transform
        objective = Objective(self.model, self.data, use_weights=False)
        assert_almost_equal(objective.residuals(), self.data.y - self.mod)

        objective = Objective(self.model,
                              self.data,
                              use_weights=False,
                              transform=Transform('lin'))
        assert_almost_equal(objective.residuals(), self.data.y - self.mod)

    def test_masked_dataset(self):
        residuals = self.objective.residuals()

        mask = np.full_like(self.objective.data.y, True, bool)
        mask[1] = False
        self.objective.data.mask = mask

        assert_equal(self.objective.residuals().size, residuals.size - 1)

    def test_logp_extra(self):
        self.objective.logp_extra = logp_extra

        # repeat logp test
        self.p[0].range(0, 10)
        assert_almost_equal(self.objective.logp(), np.log(0.1) + 1)

    def test_objective_pickle(self):
        # can you pickle the objective function?
        pkl = pickle.dumps(self.objective)
        pickle.loads(pkl)

        # check the ForkingPickler as well.
        if hasattr(ForkingPickler, 'dumps'):
            pkl = ForkingPickler.dumps(self.objective)
            pickle.loads(pkl)

        # can you pickle with an extra function present?
        self.objective.logp_extra = logp_extra
        pkl = pickle.dumps(self.objective)
        pickle.loads(pkl)

        # check the ForkingPickler as well.
        if hasattr(ForkingPickler, 'dumps'):
            pkl = ForkingPickler.dumps(self.objective)
            pickle.loads(pkl)

    def test_transform_pickle(self):
        # can you pickle the Transform object?
        pkl = pickle.dumps(Transform('logY'))
        pickle.loads(pkl)

    def test_transform(self):
        pth = os.path.dirname(os.path.abspath(__file__))

        fname = os.path.join(pth, 'c_PLP0011859_q.txt')
        data = ReflectDataset(fname)
        t = Transform('logY')

        yt, et = t(data.x, data.y, y_err=data.y_err)
        assert_equal(yt, np.log10(data.y))

        yt, _ = t(data.x, data.y, y_err=None)
        assert_equal(yt, np.log10(data.y))

        EPy, EPe = EP.EPlog10(data.y, data.y_err)
        assert_equal(yt, EPy)
        assert_equal(et, EPe)

    def test_repr_transform(self):
        p = Transform(None)
        q = eval(repr(p))
        assert (p.form == q.form)

        p = Transform('logY')
        q = eval(repr(p))
        assert (p.form == q.form)

    def test_lnsigma(self):
        # check that lnsigma works correctly, by using the emcee line fit
        # example
        def logp(theta, x, y, yerr):
            m, b, lnf = theta
            if -5.0 < m < 0.5 and 0.0 < b < 10.0 and -10.0 < lnf < 1.0:
                return 0.0
            return -np.inf

        def logl(theta, x, y, yerr):
            m, b, lnf = theta
            model = m * x + b
            inv_sigma2 = 1.0 / (yerr**2 + model**2 * np.exp(2 * lnf))
            print(inv_sigma2)
            return -0.5 * (np.sum((y - model)**2 * inv_sigma2 -
                                  np.log(inv_sigma2)))

        x, y, yerr, _ = self.data.data

        theta = [self.m_true, self.b_true, np.log(self.f_true)]
        bo = BaseObjective(theta, logl, logp=logp, fcn_args=(x, y, yerr))

        lnsigma = Parameter(np.log(self.f_true),
                            'lnsigma',
                            bounds=(-10, 1),
                            vary=True)
        self.objective.setp(np.array([self.b_true, self.m_true]))
        self.objective.lnsigma = lnsigma

        # amendment factor because dfm emcee example does not include 2pi
        amend = 0.5 * self.objective.npoints * np.log(2 * np.pi)

        assert_allclose(self.objective.logl() + amend, bo.logl())

    def test_base_emcee(self):
        # check that the base objective works against the emcee example.
        def logp(theta, x, y, yerr):
            m, b, lnf = theta
            if -5.0 < m < 0.5 and 0.0 < b < 10.0 and -10.0 < lnf < 1.0:
                return 0.0
            return -np.inf

        def logl(theta, x, y, yerr):
            m, b, lnf = theta
            model = m * x + b
            inv_sigma2 = 1.0 / (yerr**2 + model**2 * np.exp(2 * lnf))
            return -0.5 * (np.sum((y - model)**2 * inv_sigma2 -
                                  np.log(inv_sigma2)))

        x, y, yerr, _ = self.data.data

        theta = [self.m_true, self.b_true, np.log(self.f_true)]
        bo = BaseObjective(theta, logl, logp=logp, fcn_args=(x, y, yerr))

        # test that the wrapper gives the same logl as the direct function
        assert_almost_equal(bo.logl(theta), logl(theta, x, y, yerr))
        assert_almost_equal(bo.logl(theta), -bo.nll(theta))
        assert_almost_equal(bo.nll(theta), 12.8885352412)

        # Find the maximum likelihood value.
        result = minimize(bo.nll, theta)

        # for repeatable sampling
        np.random.seed(1)

        ndim, nwalkers = 3, 100
        pos = [
            result["x"] + 1e-4 * np.random.randn(ndim) for i in range(nwalkers)
        ]

        sampler = emcee.EnsembleSampler(nwalkers, ndim, bo.logpost)
        state = emcee.State(pos, random_state=np.random.get_state())
        sampler.run_mcmc(state, 800)

        burnin = 200
        samples = sampler.get_chain()[burnin:, :, :].reshape((-1, ndim))
        samples[:, 2] = np.exp(samples[:, 2])
        m_mc, b_mc, f_mc = map(
            lambda v: (v[1], v[2] - v[1], v[1] - v[0]),
            zip(*np.percentile(samples, [16, 50, 84], axis=0)))
        assert_allclose(m_mc, (-1.0071664, 0.0809444, 0.0784894), rtol=0.04)

        assert_allclose(b_mc, (4.5428107, 0.3549174, 0.3673304), rtol=0.04)

        assert_allclose(f_mc, (0.4610898, 0.0823304, 0.0640812), rtol=0.06)

        # # smoke test for covariance matrix
        bo.parameters = np.array(result['x'])
        covar1 = bo.covar()
        uncertainties = np.sqrt(np.diag(covar1))

        # covariance from objective._covar should be almost equal to
        # the covariance matrix from sampling
        covar2 = np.cov(samples.T)
        assert_almost_equal(np.sqrt(np.diag(covar2))[:2], uncertainties[:2], 2)

        # check covariance of self.objective
        # TODO
        var_arr = result['x'][:]
        var_arr[0], var_arr[1], var_arr[2] = var_arr[2], var_arr[1], var_arr[0]

        # assert_(self.objective.data.weighted)
        # self.objective.parameters.pvals = var_arr
        # covar3 = self.objective.covar()
        # uncertainties3 = np.sqrt(np.diag(covar3))
        # assert_almost_equal(uncertainties3, uncertainties)
        # assert(False)

    def test_covar(self):
        # checks objective.covar against optimize.least_squares covariance.
        path = os.path.dirname(os.path.abspath(__file__))

        theoretical = np.loadtxt(os.path.join(path, 'gauss_data.txt'))
        xvals, yvals, evals = np.hsplit(theoretical, 3)
        xvals = xvals.flatten()
        yvals = yvals.flatten()
        evals = evals.flatten()

        p0 = np.array([0.1, 20., 0.1, 0.1])
        names = ['bkg', 'A', 'x0', 'width']
        bounds = [(-1, 1), (0, 30), (-5., 5.), (0.001, 2)]

        params = Parameters(name="gauss_params")
        for p, name, bound in zip(p0, names, bounds):
            param = Parameter(p, name=name)
            param.range(*bound)
            param.vary = True
            params.append(param)

        model = Model(params, fitfunc=gauss)
        data = Data1D((xvals, yvals, evals))
        objective = Objective(model, data)

        # first calculate least_squares jac/hess/covariance matrices
        res = least_squares(objective.residuals,
                            np.array(params),
                            jac='3-point')

        hess_least_squares = np.matmul(res.jac.T, res.jac)
        covar_least_squares = np.linalg.inv(hess_least_squares)

        # now calculate corresponding matrices by hand, to see if the approach
        # concurs with least_squares
        objective.setp(res.x)
        _pvals = np.array(res.x)

        def residuals_scaler(vals):
            return np.squeeze(objective.residuals(_pvals * vals))

        jac = approx_derivative(residuals_scaler, np.ones_like(_pvals))
        hess = np.matmul(jac.T, jac)
        covar = np.linalg.inv(hess)

        covar = covar * np.atleast_2d(_pvals) * np.atleast_2d(_pvals).T

        assert_allclose(covar, covar_least_squares)

        # check that objective.covar corresponds to the least_squares
        # covariance matrix
        objective.setp(res.x)
        _pvals = np.array(res.x)
        covar_objective = objective.covar()
        assert_allclose(covar_objective, covar_least_squares)

        # now see what happens with a parameter that has no effect on residuals
        param = Parameter(1.234, name='dummy')
        param.vary = True
        params.append(param)

        from pytest import raises
        with raises(LinAlgError):
            objective.covar()

    @pytest.mark.xfail
    def test_pymc3(self):
        # test objective logl against pymc3

        # don't run this test if pymc3 is not installed
        try:
            import pymc3 as pm
        except ImportError:
            return

        logl = self.objective.logl()

        from refnx.analysis import pymc_objective
        from refnx.analysis.objective import _to_pymc3_distribution

        mod = pymc_objective(self.objective)
        with mod:
            pymc_logl = mod.logp({
                'p0': self.p[0].value,
                'p1': self.p[1].value
            })

        assert_allclose(logl, pymc_logl)

        # now check some of the distributions
        with pm.Model():
            p = Parameter(1, bounds=(1, 10))
            d = _to_pymc3_distribution('a', p)
            assert_almost_equal(d.distribution.logp(2).eval(), p.logp(2))
            assert_(np.isneginf(d.distribution.logp(-1).eval()))

            q = Parameter(1, bounds=PDF(stats.uniform(1, 9)))
            d = _to_pymc3_distribution('b', q)
            assert_almost_equal(d.distribution.logp(2).eval(), q.logp(2))
            assert_(np.isneginf(d.distribution.logp(-1).eval()))

            p = Parameter(1, bounds=PDF(stats.uniform))
            d = _to_pymc3_distribution('c', p)
            assert_almost_equal(d.distribution.logp(0.5).eval(), p.logp(0.5))

            p = Parameter(1, bounds=PDF(stats.norm))
            d = _to_pymc3_distribution('d', p)
            assert_almost_equal(d.distribution.logp(2).eval(), p.logp(2))

            p = Parameter(1, bounds=PDF(stats.norm(1, 10)))
            d = _to_pymc3_distribution('e', p)
            assert_almost_equal(d.distribution.logp(2).eval(), p.logp(2))