Beispiel #1
0
class TestCurveFitter(object):
    def setup_method(self):
        # Reproducible results!
        np.random.seed(123)

        self.m_true = -0.9594
        self.b_true = 4.294
        self.f_true = 0.534
        self.m_ls = -1.1040757010910947
        self.b_ls = 5.4405552502319505

        # Generate some synthetic data from the model.
        N = 50
        x = np.sort(10 * np.random.rand(N))
        y_err = 0.1 + 0.5 * np.random.rand(N)
        y = self.m_true * x + self.b_true
        y += np.abs(self.f_true * y) * np.random.randn(N)
        y += y_err * np.random.randn(N)

        self.data = Data1D(data=(x, y, y_err))

        self.p = Parameter(self.b_ls, "b", vary=True, bounds=(-100, 100))
        self.p |= Parameter(self.m_ls, "m", vary=True, bounds=(-100, 100))

        self.model = Model(self.p, fitfunc=line)
        self.objective = Objective(self.model, self.data)
        assert_(len(self.objective.varying_parameters()) == 2)

        mod = np.array([
            4.78166609,
            4.42364699,
            4.16404064,
            3.50343504,
            3.4257084,
            2.93594347,
            2.92035638,
            2.67533842,
            2.28136038,
            2.19772983,
            1.99295496,
            1.93748334,
            1.87484436,
            1.65161016,
            1.44613461,
            1.11128101,
            1.04584535,
            0.86055984,
            0.76913963,
            0.73906649,
            0.73331407,
            0.68350418,
            0.65216599,
            0.59838566,
            0.13070299,
            0.10749131,
            -0.01010195,
            -0.10010155,
            -0.29495372,
            -0.42817431,
            -0.43122391,
            -0.64637715,
            -1.30560686,
            -1.32626428,
            -1.44835768,
            -1.52589881,
            -1.56371158,
            -2.12048349,
            -2.24899179,
            -2.50292682,
            -2.53576659,
            -2.55797996,
            -2.60870542,
            -2.7074727,
            -3.93781479,
            -4.12415366,
            -4.42313742,
            -4.98368609,
            -5.38782395,
            -5.44077086,
        ])
        self.mod = mod

        self.mcfitter = CurveFitter(self.objective)

    def test_bounds_list(self):
        bnds = bounds_list(self.p)
        assert_allclose(bnds, [(-100, 100), (-100, 100)])

        # try making a Parameter bound a normal distribution, then get an
        # approximation to box bounds
        self.p[0].bounds = PDF(norm(0, 1))
        assert_allclose(bounds_list(self.p),
                        [norm(0, 1).ppf([0.005, 0.995]), (-100, 100)])

    def test_constraints(self):
        # constraints should work during fitting
        self.p[0].value = 5.4

        self.p[1].constraint = -0.203 * self.p[0]
        assert_equal(self.p[1].value, self.p[0].value * -0.203)
        res = self.mcfitter.fit()

        assert_(res.success)
        assert_equal(len(self.objective.varying_parameters()), 1)

        # lnsigma is parameters[0]
        assert_(self.p[0] is self.objective.parameters.flattened()[0])
        assert_(self.p[1] is self.objective.parameters.flattened()[1])
        assert_almost_equal(self.p[0].value, res.x[0])
        assert_almost_equal(self.p[1].value, self.p[0].value * -0.203)

        # check that constraints work during sampling
        # the CurveFitter has to be set up again if you change how the
        # parameters are being fitted.
        mcfitter = CurveFitter(self.objective)
        assert_(mcfitter.nvary == 1)
        mcfitter.sample(5)
        assert_equal(self.p[1].value, self.p[0].value * -0.203)
        # the constrained parameters should have a chain
        assert_(self.p[0].chain is not None)
        assert_(self.p[1].chain is not None)
        assert_allclose(self.p[1].chain, self.p[0].chain * -0.203)

    def test_mcmc(self):
        self.mcfitter.sample(steps=50, nthin=1, verbose=False)

        assert_equal(self.mcfitter.nvary, 2)

        # smoke test for corner plot
        self.mcfitter.objective.corner()

        # we're not doing Parallel Tempering here.
        assert_(self.mcfitter._ntemps == -1)
        assert_(isinstance(self.mcfitter.sampler, emcee.EnsembleSampler))

        # should be able to multithread
        mcfitter = CurveFitter(self.objective, nwalkers=50)
        res = mcfitter.sample(steps=33, nthin=2, verbose=False, pool=2)

        # check that the autocorrelation function at least runs
        acfs = mcfitter.acf(nburn=10)
        assert_equal(acfs.shape[-1], mcfitter.nvary)

        # check the standalone autocorrelation calculator
        acfs2 = autocorrelation_chain(mcfitter.chain, nburn=10)
        assert_equal(acfs, acfs2)

        # check integrated_time
        integrated_time(acfs2, tol=5)

        # check chain shape
        assert_equal(mcfitter.chain.shape, (33, 50, 2))
        # assert_equal(mcfitter._lastpos, mcfitter.chain[:, -1, :])
        assert_equal(res[0].chain.shape, (33, 50))

        # if the number of parameters changes there should be an Exception
        # raised
        from pytest import raises

        with raises(RuntimeError):
            self.p[0].vary = False
            self.mcfitter.sample(1)

        # can fix by making the sampler again
        self.mcfitter.make_sampler()
        self.mcfitter.sample(1)

    def test_random_seed(self):
        # check that MCMC sampling is reproducible
        self.mcfitter.sample(steps=2, random_state=1)

        # get a starting pos
        starting_pos = self.mcfitter._state.coords

        # is sampling reproducible
        self.mcfitter.reset()
        self.mcfitter.initialise(pos=starting_pos)
        self.mcfitter.sample(3, random_state=1, pool=1)
        chain1 = np.copy(self.mcfitter.chain)

        self.mcfitter.reset()
        self.mcfitter.initialise(pos=starting_pos)
        self.mcfitter.sample(3, random_state=1, pool=1)
        chain2 = np.copy(self.mcfitter.chain)

        assert_equal(chain1, chain2)

    def test_mcmc_pt(self):
        # smoke test for parallel tempering
        x = np.array(self.objective.parameters)

        mcfitter = CurveFitter(self.objective, ntemps=10, nwalkers=50)
        assert_equal(mcfitter.sampler.ntemps, 10)

        # check that the parallel sampling works
        # and that chain shape is correct
        res = mcfitter.sample(steps=5, nthin=2, verbose=False, pool=-1)
        assert_equal(mcfitter.chain.shape, (5, 10, 50, 2))
        assert_equal(res[0].chain.shape, (5, 50))
        assert_equal(mcfitter.chain[:, 0, :, 0], res[0].chain)
        assert_equal(mcfitter.chain[:, 0, :, 1], res[1].chain)
        chain = np.copy(mcfitter.chain)

        # the sampler should store the probability
        assert_equal(mcfitter.logpost.shape, (5, 10, 50))
        assert_allclose(mcfitter.logpost, mcfitter.sampler._ptchain.logP)

        logprobs = mcfitter.logpost
        highest_prob_loc = np.argmax(logprobs[:, 0])
        idx = np.unravel_index(highest_prob_loc, logprobs[:, 0].shape)
        idx = list(idx)
        idx.insert(1, 0)
        idx = tuple(idx)
        assert_equal(idx, mcfitter.index_max_prob)
        pvals = mcfitter.chain[idx]
        assert_allclose(logprobs[idx], self.objective.logpost(pvals))

        # try resetting the chain
        mcfitter.reset()

        # test for reproducible operation
        self.objective.setp(x)
        mcfitter = CurveFitter(self.objective, ntemps=10, nwalkers=50)
        mcfitter.initialise("jitter", random_state=1)
        mcfitter.sample(steps=5, nthin=2, verbose=False, random_state=2)
        chain = np.copy(mcfitter.chain)

        self.objective.setp(x)
        mcfitter = CurveFitter(self.objective, ntemps=10, nwalkers=50)
        mcfitter.initialise("jitter", random_state=1)
        mcfitter.sample(steps=5, nthin=2, verbose=False, random_state=2)
        chain2 = np.copy(mcfitter.chain)

        assert_allclose(chain2, chain)

    def test_mcmc_init(self):
        # smoke test for sampler initialisation
        # TODO check that the initialisation worked.
        # reproducible initialisation with random_state dependents
        self.mcfitter.initialise("prior", random_state=1)
        starting_pos = np.copy(self.mcfitter._state.coords)

        self.mcfitter.initialise("prior", random_state=1)
        starting_pos2 = self.mcfitter._state.coords
        assert_equal(starting_pos, starting_pos2)

        self.mcfitter.initialise("jitter", random_state=1)
        starting_pos = np.copy(self.mcfitter._state.coords)

        self.mcfitter.initialise("jitter", random_state=1)
        starting_pos2 = self.mcfitter._state.coords
        assert_equal(starting_pos, starting_pos2)

        mcfitter = CurveFitter(self.objective, nwalkers=100)
        mcfitter.initialise("covar")
        assert_equal(mcfitter._state.coords.shape, (100, 2))
        mcfitter.initialise("prior")
        assert_equal(mcfitter._state.coords.shape, (100, 2))
        mcfitter.initialise("jitter")
        assert_equal(mcfitter._state.coords.shape, (100, 2))
        # initialise with last position
        mcfitter.sample(steps=1)
        chain = mcfitter.chain
        mcfitter.initialise(pos=chain[-1])
        assert_equal(mcfitter._state.coords.shape, (100, 2))
        # initialise with chain
        mcfitter.sample(steps=2)
        chain = mcfitter.chain
        mcfitter.initialise(pos=chain)
        assert_equal(mcfitter._state.coords, chain[-1])
        # initialise with chain if it's never been run before
        mcfitter = CurveFitter(self.objective, nwalkers=100)
        mcfitter.initialise(chain)

        # initialise for Parallel tempering
        mcfitter = CurveFitter(self.objective, ntemps=20, nwalkers=100)
        mcfitter.initialise("covar")
        assert_equal(mcfitter._state.coords.shape, (20, 100, 2))
        mcfitter.initialise("prior")
        assert_equal(mcfitter._state.coords.shape, (20, 100, 2))
        mcfitter.initialise("jitter")
        assert_equal(mcfitter._state.coords.shape, (20, 100, 2))
        # initialise with last position
        mcfitter.sample(steps=1)
        chain = mcfitter.chain
        mcfitter.initialise(pos=chain[-1])
        assert_equal(mcfitter._state.coords.shape, (20, 100, 2))
        # initialise with chain
        mcfitter.sample(steps=2)
        chain = mcfitter.chain
        mcfitter.initialise(pos=np.copy(chain))
        assert_equal(mcfitter._state.coords, chain[-1])
        # initialise with chain if it's never been run before
        mcfitter = CurveFitter(self.objective, nwalkers=100, ntemps=20)
        mcfitter.initialise(chain)

    def test_fit_smoke(self):
        # smoke tests to check that fit runs
        def callback(xk):
            return

        def callback2(xk, **kws):
            return

        # L-BFGS-B
        res0 = self.mcfitter.fit(callback=callback)
        assert_almost_equal(res0.x, [self.b_ls, self.m_ls], 6)
        res0 = self.mcfitter.fit()
        res0 = self.mcfitter.fit(verbose=False)
        res0 = self.mcfitter.fit(verbose=False, callback=callback)

        # least_squares
        res1 = self.mcfitter.fit(method="least_squares")
        assert_almost_equal(res1.x, [self.b_ls, self.m_ls], 6)

        # least_squares doesn't accept a callback. As well as testing that
        # least_squares works, it checks that providing a callback doesn't
        # trip the fitter up.
        res1 = self.mcfitter.fit(method="least_squares", callback=callback)
        assert_almost_equal(res1.x, [self.b_ls, self.m_ls], 6)

        # need full bounds for differential_evolution
        self.p[0].range(3, 7)
        self.p[1].range(-2, 0)
        res2 = self.mcfitter.fit(
            method="differential_evolution",
            seed=1,
            popsize=10,
            maxiter=100,
            callback=callback2,
        )
        assert_almost_equal(res2.x, [self.b_ls, self.m_ls], 6)

        # check that the res object has covar and stderr
        assert_("covar" in res0)
        assert_("stderr" in res0)

    def test_NIST(self):
        # Run all the NIST standard tests with leastsq
        for model in NIST_Models:
            try:
                NIST_runner(model)
            except Exception:
                print(model)
                raise
class TestGlobalFitting(object):
    def setup_method(self):
        self.pth = os.path.dirname(os.path.abspath(__file__))

        self.si = SLD(2.07, name='Si')
        self.sio2 = SLD(3.47, name='SiO2')
        self.d2o = SLD(6.36, name='d2o')
        self.h2o = SLD(-0.56, name='h2o')
        self.cm3 = SLD(3.5, name='cm3')
        self.polymer = SLD(2, name='polymer')

        self.sio2_l = self.sio2(40, 3)
        self.polymer_l = self.polymer(200, 3)

        self.structure = (self.si | self.sio2_l | self.polymer_l
                          | self.d2o(0, 3))

        fname = os.path.join(self.pth, 'c_PLP0011859_q.txt')

        self.dataset = ReflectDataset(fname)
        self.model = ReflectModel(self.structure, bkg=2e-7)
        self.objective = Objective(self.model,
                                   self.dataset,
                                   use_weights=False,
                                   transform=Transform('logY'))
        self.global_objective = GlobalObjective([self.objective])

    def test_residuals_length(self):
        # the residuals should be the same length as the data
        residuals = self.global_objective.residuals()
        assert_equal(residuals.size, len(self.dataset))

    def test_globalfitting(self):
        # smoke test for can the global fitting run?
        # also tests that global fitting gives same output as
        # normal fitting (for a single dataset)
        self.model.scale.setp(vary=True, bounds=(0.1, 2))
        self.model.bkg.setp(vary=True, bounds=(1e-10, 8e-6))
        self.structure[-1].rough.setp(vary=True, bounds=(0.2, 6))
        self.sio2_l.thick.setp(vary=True, bounds=(0.2, 80))
        self.polymer_l.thick.setp(bounds=(0.01, 400), vary=True)
        self.polymer_l.sld.real.setp(vary=True, bounds=(0.01, 4))

        self.objective.transform = Transform('logY')

        starting = np.array(self.objective.parameters)
        with np.errstate(invalid='raise'):
            g = CurveFitter(self.global_objective)
            res_g = g.fit()

            # need the same starting point
            self.objective.setp(starting)
            f = CurveFitter(self.objective)
            res_f = f.fit()

            # individual and global should give the same fit.
            assert_almost_equal(res_g.x, res_f.x)

    def test_multipledataset_corefinement(self):
        # test corefinement of three datasets
        data361 = ReflectDataset(os.path.join(self.pth, 'e361r.txt'))
        data365 = ReflectDataset(os.path.join(self.pth, 'e365r.txt'))
        data366 = ReflectDataset(os.path.join(self.pth, 'e366r.txt'))

        si = SLD(2.07, name='Si')
        sio2 = SLD(3.47, name='SiO2')
        d2o = SLD(6.36, name='d2o')
        h2o = SLD(-0.56, name='h2o')
        cm3 = SLD(3.47, name='cm3')
        polymer = SLD(1, name='polymer')

        structure361 = si | sio2(10, 4) | polymer(200, 3) | d2o(0, 3)
        structure365 = si | structure361[1] | structure361[2] | cm3(0, 3)
        structure366 = si | structure361[1] | structure361[2] | h2o(0, 3)

        structure365[-1].rough = structure361[-1].rough
        structure366[-1].rough = structure361[-1].rough

        structure361[1].thick.setp(vary=True, bounds=(0, 20))
        structure361[2].thick.setp(value=200., bounds=(200., 250.), vary=True)
        structure361[2].sld.real.setp(vary=True, bounds=(0, 2))
        structure361[2].vfsolv.setp(value=5., bounds=(0., 100.), vary=True)

        model361 = ReflectModel(structure361, bkg=2e-5)
        model365 = ReflectModel(structure365, bkg=2e-5)
        model366 = ReflectModel(structure366, bkg=2e-5)

        model361.bkg.setp(vary=True, bounds=(1e-6, 5e-5))
        model365.bkg.setp(vary=True, bounds=(1e-6, 5e-5))
        model366.bkg.setp(vary=True, bounds=(1e-6, 5e-5))

        objective361 = Objective(model361, data361)
        objective365 = Objective(model365, data365)
        objective366 = Objective(model366, data366)

        global_objective = GlobalObjective(
            [objective361, objective365, objective366])
        # are the right numbers of parameters varying?
        assert_equal(len(global_objective.varying_parameters()), 7)

        # can we set the parameters?
        global_objective.setp(np.array([1e-5, 10, 212, 1, 10, 1e-5, 1e-5]))

        f = CurveFitter(global_objective)
        f.fit()

        indiv_chisqr = np.sum(
            [objective.chisqr() for objective in global_objective.objectives])

        # the overall chi2 should be sum of individual chi2
        global_chisqr = global_objective.chisqr()
        assert_almost_equal(global_chisqr, indiv_chisqr)

        # now check that the parameters were held in common correctly.
        slabs361 = structure361.slabs()
        slabs365 = structure365.slabs()
        slabs366 = structure366.slabs()

        assert_equal(slabs365[0:2, 0:5], slabs361[0:2, 0:5])
        assert_equal(slabs366[0:2, 0:5], slabs361[0:2, 0:5])
        assert_equal(slabs365[-1, 3], slabs361[-1, 3])
        assert_equal(slabs366[-1, 3], slabs361[-1, 3])

        # check that the residuals are the correct lengths
        res361 = objective361.residuals()
        res365 = objective365.residuals()
        res366 = objective366.residuals()
        res_global = global_objective.residuals()
        assert_allclose(res_global[0:len(res361)], res361, rtol=1e-5)
        assert_allclose(res_global[len(res361):len(res361) + len(res365)],
                        res365,
                        rtol=1e-5)
        assert_allclose(res_global[len(res361) + len(res365):],
                        res366,
                        rtol=1e-5)

        repr(global_objective)
Beispiel #3
0
class TestFitterGauss(object):
    # Test CurveFitter with a noisy gaussian, weighted and unweighted, to see
    # if the parameters and uncertainties come out correct

    @pytest.fixture(autouse=True)
    def setup_method(self, tmpdir):
        self.path = os.path.dirname(os.path.abspath(__file__))
        self.tmpdir = tmpdir.strpath

        theoretical = np.loadtxt(os.path.join(self.path, "gauss_data.txt"))
        xvals, yvals, evals = np.hsplit(theoretical, 3)
        xvals = xvals.flatten()
        yvals = yvals.flatten()
        evals = evals.flatten()

        # these best weighted values and uncertainties obtained with Igor
        self.best_weighted = [-0.00246095, 19.5299, -8.28446e-2, 1.24692]

        self.best_weighted_errors = [
            0.0220313708486,
            1.12879436221,
            0.0447659158681,
            0.0412022938883,
        ]

        self.best_weighted_chisqr = 77.6040960351

        self.best_unweighted = [
            -0.10584111872702096,
            19.240347049328989,
            0.0092623066070940396,
            1.501362314145845,
        ]

        self.best_unweighted_errors = [
            0.34246565477,
            0.689820935208,
            0.0411243173041,
            0.0693429375282,
        ]

        self.best_unweighted_chisqr = 497.102084956

        self.p0 = np.array([0.1, 20.0, 0.1, 0.1])
        self.names = ["bkg", "A", "x0", "width"]
        self.bounds = [(-1, 1), (0, 30), (-5.0, 5.0), (0.001, 2)]

        self.params = Parameters(name="gauss_params")
        for p, name, bound in zip(self.p0, self.names, self.bounds):
            param = Parameter(p, name=name)
            param.range(*bound)
            param.vary = True
            self.params.append(param)

        self.model = Model(self.params, fitfunc=gauss)
        self.data = Data1D((xvals, yvals, evals))
        self.objective = Objective(self.model, self.data)
        return 0

    def test_pickle(self):
        # tests if a CurveFitter can be pickled/unpickled.
        f = CurveFitter(self.objective)
        pkl = pickle.dumps(f)
        g = pickle.loads(pkl)
        g._check_vars_unchanged()

    def test_best_weighted(self):
        assert_equal(len(self.objective.varying_parameters()), 4)
        self.objective.setp(self.p0)

        f = CurveFitter(self.objective, nwalkers=100)
        res = f.fit("least_squares", jac="3-point")

        output = res.x
        assert_almost_equal(output, self.best_weighted, 3)
        assert_almost_equal(self.objective.chisqr(), self.best_weighted_chisqr,
                            5)

        # compare the residuals
        res = (self.data.y - self.model(self.data.x)) / self.data.y_err
        assert_equal(self.objective.residuals(), res)

        # compare objective.covar to the best_weighted_errors
        uncertainties = [param.stderr for param in self.params]
        assert_allclose(uncertainties, self.best_weighted_errors, rtol=0.005)

        # we're also going to try the checkpointing here.
        checkpoint = os.path.join(self.tmpdir, "checkpoint.txt")

        # compare samples to best_weighted_errors
        np.random.seed(1)
        f.sample(steps=201, random_state=1, verbose=False, f=checkpoint)
        process_chain(self.objective, f.chain, nburn=50, nthin=10)
        uncertainties = [param.stderr for param in self.params]
        assert_allclose(uncertainties, self.best_weighted_errors, rtol=0.07)

        # test that the checkpoint worked
        check_array = np.loadtxt(checkpoint)
        check_array = check_array.reshape(201, f._nwalkers, f.nvary)
        assert_allclose(check_array, f.chain)

        # test loading the checkpoint
        chain = load_chain(checkpoint)
        assert_allclose(chain, f.chain)

        f.initialise("jitter")
        f.sample(steps=2, nthin=4, f=checkpoint, verbose=False)
        assert_equal(f.chain.shape[0], 2)

        # we should be able to produce 2 * 100 steps from the generator
        g = self.objective.pgen(ngen=20000000000)
        s = [i for i, a in enumerate(g)]
        assert_equal(np.max(s), 200 - 1)
        g = self.objective.pgen(ngen=200)
        pvec = next(g)
        assert_equal(pvec.size, len(self.objective.parameters.flattened()))

        # check that all the parameters are returned via pgen, not only those
        # being varied.
        self.params[0].vary = False
        f = CurveFitter(self.objective, nwalkers=100)
        f.initialise("jitter")
        f.sample(steps=2, nthin=4, f=checkpoint, verbose=False)
        g = self.objective.pgen(ngen=100)
        pvec = next(g)
        assert_equal(pvec.size, len(self.objective.parameters.flattened()))

        # the following test won't work because of emcee/gh226.
        # chain = load_chain(checkpoint)
        # assert_(chain.shape == f.chain.shape)
        # assert_allclose(chain, f.chain)

        # try reproducing best fit with parallel tempering
        self.params[0].vary = True
        f = CurveFitter(self.objective, nwalkers=100, ntemps=10)
        f.fit("differential_evolution", seed=1)

        f.sample(steps=201, random_state=1, verbose=False)
        process_chain(self.objective, f.chain, nburn=50, nthin=15)
        print(self.params[0].chain.shape, self.params[0].chain)

        uncertainties = [param.stderr for param in self.params]
        assert_allclose(uncertainties, self.best_weighted_errors, rtol=0.07)

    def test_best_unweighted(self):
        self.objective.weighted = False
        f = CurveFitter(self.objective, nwalkers=100)
        res = f.fit()

        output = res.x
        assert_almost_equal(self.objective.chisqr(),
                            self.best_unweighted_chisqr)
        assert_almost_equal(output, self.best_unweighted, 5)

        # compare the residuals
        res = self.data.y - self.model(self.data.x)
        assert_equal(self.objective.residuals(), res)

        # compare objective._covar to the best_unweighted_errors
        uncertainties = np.array([param.stderr for param in self.params])
        assert_almost_equal(uncertainties, self.best_unweighted_errors, 3)

        # the samples won't compare to the covariance matrix...
        # f.sample(nsteps=150, nburn=20, nthin=30, random_state=1)
        # uncertainties = [param.stderr for param in self.params]
        # assert_allclose(uncertainties, self.best_unweighted_errors,
        #                 rtol=0.15)

    def test_all_minimisers(self):
        """test minimisers against the Gaussian fit"""
        f = CurveFitter(self.objective)

        methods = ["differential_evolution", "L-BFGS-B", "least_squares"]
        if hasattr(sciopt, "shgo"):
            methods.append("shgo")
        if hasattr(sciopt, "dual_annealing"):
            methods.append("dual_annealing")

        for method in methods:
            self.objective.setp(self.p0)
            res = f.fit(method=method)
            assert_almost_equal(res.x, self.best_weighted, 3)

        # smoke test to check that we can use nlpost
        self.objective.setp(self.p0)
        logp0 = self.objective.logp()

        # check that probabilities are calculated correctly
        assert_allclose(
            self.objective.logpost(),
            self.objective.logp() + self.objective.logl(),
        )
        assert_allclose(self.objective.nlpost(), -self.objective.logpost())
        assert_allclose(self.objective.nlpost(self.p0),
                        -self.objective.logpost(self.p0))

        # if the priors are all uniform then the only difference between
        # logpost and logl is a constant. A minimiser should converge on the
        # same answer. The following tests examine that.
        # The test works for dual_annealing, but not for differential
        # evolution, not sure why that is.
        self.objective.setp(self.p0)
        res1 = f.fit(method="dual_annealing", seed=1)
        assert_almost_equal(res1.x, self.best_weighted, 3)
        nll1 = self.objective.nll()
        nlpost1 = self.objective.nlpost()

        self.objective.setp(self.p0)
        res2 = f.fit(method="dual_annealing", target="nlpost", seed=1)
        assert_almost_equal(res2.x, self.best_weighted, 3)
        nll2 = self.objective.nll()
        nlpost2 = self.objective.nlpost()

        assert_allclose(nlpost1, nlpost2, atol=0.001)
        assert_allclose(nll1, nll2, atol=0.001)

        # these two priors are calculated for different parameter values
        # (before and after the fit) they should be the same because all
        # the parameters have uniform priors.
        assert_almost_equal(self.objective.logp(), logp0)

    def test_pymc3_sample(self):
        # test sampling with pymc3
        try:
            import pymc3 as pm
            from refnx.analysis import pymc3_model
        except (ModuleNotFoundError, ImportError, AttributeError):
            # can't run test if pymc3/theano not installed
            return

        with pymc3_model(self.objective):
            s = pm.NUTS()
            pm.sample(
                200,
                tune=100,
                step=s,
                discard_tuned_samples=True,
                compute_convergence_checks=False,
                random_seed=1,
            )
Beispiel #4
0
class TestObjective(object):
    def setup_method(self):
        # Choose the "true" parameters.

        # Reproducible results!
        np.random.seed(123)

        self.m_true = -0.9594
        self.b_true = 4.294
        self.f_true = 0.534
        self.m_ls = -1.1040757010910947
        self.b_ls = 5.4405552502319505

        # Generate some synthetic data from the model.
        N = 50
        x = np.sort(10 * np.random.rand(N))
        y_err = 0.1 + 0.5 * np.random.rand(N)
        y = self.m_true * x + self.b_true
        y += np.abs(self.f_true * y) * np.random.randn(N)
        y += y_err * np.random.randn(N)

        self.data = Data1D(data=(x, y, y_err))

        self.p = Parameter(self.b_ls, 'b') | Parameter(self.m_ls, 'm')
        self.model = Model(self.p, fitfunc=line)
        self.objective = Objective(self.model, self.data)

        # want b and m
        self.p[0].vary = True
        self.p[1].vary = True

        mod = np.array([
            4.78166609, 4.42364699, 4.16404064, 3.50343504, 3.4257084,
            2.93594347, 2.92035638, 2.67533842, 2.28136038, 2.19772983,
            1.99295496, 1.93748334, 1.87484436, 1.65161016, 1.44613461,
            1.11128101, 1.04584535, 0.86055984, 0.76913963, 0.73906649,
            0.73331407, 0.68350418, 0.65216599, 0.59838566, 0.13070299,
            0.10749131, -0.01010195, -0.10010155, -0.29495372, -0.42817431,
            -0.43122391, -0.64637715, -1.30560686, -1.32626428, -1.44835768,
            -1.52589881, -1.56371158, -2.12048349, -2.24899179, -2.50292682,
            -2.53576659, -2.55797996, -2.60870542, -2.7074727, -3.93781479,
            -4.12415366, -4.42313742, -4.98368609, -5.38782395, -5.44077086
        ])
        self.mod = mod

    def test_model(self):
        # test that the line data produced by our model is the same as the
        # test data
        assert_almost_equal(self.model(self.data.x), self.mod)

    def test_synthetic_data(self):
        # test that we create the correct synthetic data by performing a least
        # squares fit on it
        assert_(self.data.y_err is not None)

        x, y, y_err, _ = self.data.data
        A = np.vstack((np.ones_like(x), x)).T
        C = np.diag(y_err * y_err)
        cov = np.linalg.inv(np.dot(A.T, np.linalg.solve(C, A)))
        b_ls, m_ls = np.dot(cov, np.dot(A.T, np.linalg.solve(C, y)))

        assert_almost_equal(b_ls, self.b_ls)
        assert_almost_equal(m_ls, self.m_ls)

    def test_setp(self):
        # check that we can set parameters
        self.p[0].vary = False

        assert_(len(self.objective.varying_parameters()) == 1)
        self.objective.setp(np.array([1.23]))
        assert_equal(self.p[1].value, 1.23)
        self.objective.setp(np.array([1.234, 1.23]))
        assert_equal(np.array(self.p), [1.234, 1.23])

    def test_pvals(self):
        assert_equal(self.objective.parameters.pvals, [self.b_ls, self.m_ls])
        self.objective.parameters.pvals = [1, 2]
        assert_equal(self.objective.parameters.pvals, [1, 2.])

    def test_lnprior(self):
        self.p[0].range(0, 10)
        assert_almost_equal(self.objective.lnprior(), np.log(0.1))

        # lnprior should set parameters
        self.objective.lnprior([8, 2])
        assert_equal(np.array(self.objective.parameters), [8, 2])

        # if we supply a value outside the range it should return -inf
        assert_equal(self.objective.lnprior([-1, 2]), -np.inf)

    def test_lnprob(self):
        # http://dan.iel.fm/emcee/current/user/line/
        assert_almost_equal(self.objective.lnprior(), 0)
        # the uncertainties are underestimated in this example...
        assert_almost_equal(self.objective.lnlike(), -559.01078135444595)
        assert_almost_equal(self.objective.lnprob(), -559.01078135444595)

    def test_chisqr(self):
        assert_almost_equal(self.objective.chisqr(), 1231.1096772954229)

    def test_residuals(self):
        # weighted, with and without transform
        assert_almost_equal(self.objective.residuals(),
                            (self.data.y - self.mod) / self.data.y_err)

        objective = Objective(self.model,
                              self.data,
                              transform=Transform('lin'))
        assert_almost_equal(objective.residuals(),
                            (self.data.y - self.mod) / self.data.y_err)

        # unweighted, with and without transform
        objective = Objective(self.model, self.data, use_weights=False)
        assert_almost_equal(objective.residuals(), self.data.y - self.mod)

        objective = Objective(self.model,
                              self.data,
                              use_weights=False,
                              transform=Transform('lin'))
        assert_almost_equal(objective.residuals(), self.data.y - self.mod)

    def test_lnprob_extra(self):
        self.objective.lnprob_extra = lnprob_extra

        # repeat lnprior test
        self.p[0].range(0, 10)
        assert_almost_equal(self.objective.lnprior(), np.log(0.1) + 1)

    def test_objective_pickle(self):
        # can you pickle the objective function?
        pkl = pickle.dumps(self.objective)
        pickle.loads(pkl)

        # check the ForkingPickler as well.
        if hasattr(ForkingPickler, 'dumps'):
            pkl = ForkingPickler.dumps(self.objective)
            pickle.loads(pkl)

        # can you pickle with an extra function present?
        self.objective.lnprob_extra = lnprob_extra
        pkl = pickle.dumps(self.objective)
        pickle.loads(pkl)

        # check the ForkingPickler as well.
        if hasattr(ForkingPickler, 'dumps'):
            pkl = ForkingPickler.dumps(self.objective)
            pickle.loads(pkl)

    def test_transform_pickle(self):
        # can you pickle the Transform object?
        pkl = pickle.dumps(Transform('logY'))
        pickle.loads(pkl)

    def test_transform(self):
        pth = os.path.dirname(os.path.abspath(__file__))

        fname = os.path.join(pth, 'c_PLP0011859_q.txt')
        data = ReflectDataset(fname)
        t = Transform('logY')

        yt, et = t(data.x, data.y, y_err=data.y_err)
        assert_equal(yt, np.log10(data.y))

        yt, _ = t(data.x, data.y, y_err=None)
        assert_equal(yt, np.log10(data.y))

        EPy, EPe = EP.EPlog10(data.y, data.y_err)
        assert_equal(yt, EPy)
        assert_equal(et, EPe)

    def test_lnsigma(self):
        # check that lnsigma works correctly
        def lnprior(theta, x, y, yerr):
            m, b, lnf = theta
            if -5.0 < m < 0.5 and 0.0 < b < 10.0 and -10.0 < lnf < 1.0:
                return 0.0
            return -np.inf

        def lnlike(theta, x, y, yerr):
            m, b, lnf = theta
            model = m * x + b
            inv_sigma2 = 1.0 / (yerr**2 + model**2 * np.exp(2 * lnf))
            print(inv_sigma2)
            return -0.5 * (np.sum((y - model)**2 * inv_sigma2 -
                                  np.log(inv_sigma2)))

        x, y, yerr, _ = self.data.data

        theta = [self.m_true, self.b_true, np.log(self.f_true)]
        bo = BaseObjective(theta,
                           lnlike,
                           lnprior=lnprior,
                           fcn_args=(x, y, yerr))

        lnsigma = Parameter(np.log(self.f_true),
                            'lnsigma',
                            bounds=(-10, 1),
                            vary=True)
        self.objective.setp(np.array([self.b_true, self.m_true]))
        self.objective.lnsigma = lnsigma

        assert_allclose(self.objective.lnlike(), bo.lnlike())

    def test_base_emcee(self):
        # check that the base objective works against the emcee example.
        def lnprior(theta, x, y, yerr):
            m, b, lnf = theta
            if -5.0 < m < 0.5 and 0.0 < b < 10.0 and -10.0 < lnf < 1.0:
                return 0.0
            return -np.inf

        def lnlike(theta, x, y, yerr):
            m, b, lnf = theta
            model = m * x + b
            inv_sigma2 = 1.0 / (yerr**2 + model**2 * np.exp(2 * lnf))
            return -0.5 * (np.sum((y - model)**2 * inv_sigma2 -
                                  np.log(inv_sigma2)))

        x, y, yerr, _ = self.data.data

        theta = [self.m_true, self.b_true, np.log(self.f_true)]
        bo = BaseObjective(theta,
                           lnlike,
                           lnprior=lnprior,
                           fcn_args=(x, y, yerr))

        # test that the wrapper gives the same lnlike as the direct function
        assert_almost_equal(bo.lnlike(theta), lnlike(theta, x, y, yerr))
        assert_almost_equal(bo.lnlike(theta), -bo.nll(theta))
        assert_almost_equal(bo.nll(theta), 12.8885352412)

        # Find the maximum likelihood value.
        result = minimize(bo.nll, theta)

        # for repeatable sampling
        np.random.seed(1)

        ndim, nwalkers = 3, 100
        pos = [
            result["x"] + 1e-4 * np.random.randn(ndim) for i in range(nwalkers)
        ]

        sampler = emcee.EnsembleSampler(nwalkers, ndim, bo.lnprob)
        sampler.run_mcmc(pos, 800, rstate0=np.random.get_state())

        burnin = 200
        samples = sampler.chain[:, burnin:, :].reshape((-1, ndim))
        samples[:, 2] = np.exp(samples[:, 2])
        m_mc, b_mc, f_mc = map(
            lambda v: (v[1], v[2] - v[1], v[1] - v[0]),
            zip(*np.percentile(samples, [16, 50, 84], axis=0)))
        assert_allclose(m_mc, (-1.0071664, 0.0809444, 0.0784894), rtol=0.04)

        assert_allclose(b_mc, (4.5428107, 0.3549174, 0.3673304), rtol=0.04)

        assert_allclose(f_mc, (0.4610898, 0.0823304, 0.0640812), rtol=0.06)

        # # smoke test for covariance matrix
        bo.parameters = np.array(result['x'])
        covar1 = bo.covar()
        uncertainties = np.sqrt(np.diag(covar1))

        # covariance from objective._covar should be almost equal to
        # the covariance matrix from sampling
        covar2 = np.cov(samples.T)
        assert_almost_equal(np.sqrt(np.diag(covar2))[:2], uncertainties[:2], 2)

        # check covariance of self.objective
        # TODO
        var_arr = result['x'][:]
        var_arr[0], var_arr[1], var_arr[2] = var_arr[2], var_arr[1], var_arr[0]

        # assert_(self.objective.data.weighted)
        # self.objective.parameters.pvals = var_arr
        # covar3 = self.objective.covar()
        # uncertainties3 = np.sqrt(np.diag(covar3))
        # assert_almost_equal(uncertainties3, uncertainties)
        # assert(False)

    def test_covar(self):
        # checks objective.covar against optimize.least_squares covariance.
        path = os.path.dirname(os.path.abspath(__file__))

        theoretical = np.loadtxt(os.path.join(path, 'gauss_data.txt'))
        xvals, yvals, evals = np.hsplit(theoretical, 3)
        xvals = xvals.flatten()
        yvals = yvals.flatten()
        evals = evals.flatten()

        p0 = np.array([0.1, 20., 0.1, 0.1])
        names = ['bkg', 'A', 'x0', 'width']
        bounds = [(-1, 1), (0, 30), (-5., 5.), (0.001, 2)]

        params = Parameters(name="gauss_params")
        for p, name, bound in zip(p0, names, bounds):
            param = Parameter(p, name=name)
            param.range(*bound)
            param.vary = True
            params.append(param)

        model = Model(params, fitfunc=gauss)
        data = Data1D((xvals, yvals, evals))
        objective = Objective(model, data)

        # first calculate least_squares jac/hess/covariance matrices
        res = least_squares(objective.residuals,
                            np.array(params),
                            jac='3-point')

        hess_least_squares = np.matmul(res.jac.T, res.jac)
        covar_least_squares = np.linalg.inv(hess_least_squares)

        # now calculate corresponding matrices by hand, to see if the approach
        # concurs with least_squares
        objective.setp(res.x)
        _pvals = np.array(res.x)

        def residuals_scaler(vals):
            return np.squeeze(objective.residuals(_pvals * vals))

        jac = approx_derivative(residuals_scaler, np.ones_like(_pvals))
        hess = np.matmul(jac.T, jac)
        covar = np.linalg.inv(hess)

        covar = covar * np.atleast_2d(_pvals) * np.atleast_2d(_pvals).T

        assert_allclose(covar, covar_least_squares)

        # check that objective.covar corresponds to the least_squares
        # covariance matrix
        objective.setp(res.x)
        _pvals = np.array(res.x)
        covar_objective = objective.covar()
        assert_allclose(covar_objective, covar_least_squares)

        # now see what happens with a parameter that has no effect on residuals
        param = Parameter(1.234, name='dummy')
        param.vary = True
        params.append(param)

        from pytest import raises
        with raises(LinAlgError):
            objective.covar()
Beispiel #5
0
    def test_covar(self):
        # checks objective.covar against optimize.least_squares covariance.
        path = os.path.dirname(os.path.abspath(__file__))

        theoretical = np.loadtxt(os.path.join(path, 'gauss_data.txt'))
        xvals, yvals, evals = np.hsplit(theoretical, 3)
        xvals = xvals.flatten()
        yvals = yvals.flatten()
        evals = evals.flatten()

        p0 = np.array([0.1, 20., 0.1, 0.1])
        names = ['bkg', 'A', 'x0', 'width']
        bounds = [(-1, 1), (0, 30), (-5., 5.), (0.001, 2)]

        params = Parameters(name="gauss_params")
        for p, name, bound in zip(p0, names, bounds):
            param = Parameter(p, name=name)
            param.range(*bound)
            param.vary = True
            params.append(param)

        model = Model(params, fitfunc=gauss)
        data = Data1D((xvals, yvals, evals))
        objective = Objective(model, data)

        # first calculate least_squares jac/hess/covariance matrices
        res = least_squares(objective.residuals,
                            np.array(params),
                            jac='3-point')

        hess_least_squares = np.matmul(res.jac.T, res.jac)
        covar_least_squares = np.linalg.inv(hess_least_squares)

        # now calculate corresponding matrices by hand, to see if the approach
        # concurs with least_squares
        objective.setp(res.x)
        _pvals = np.array(res.x)

        def residuals_scaler(vals):
            return np.squeeze(objective.residuals(_pvals * vals))

        jac = approx_derivative(residuals_scaler, np.ones_like(_pvals))
        hess = np.matmul(jac.T, jac)
        covar = np.linalg.inv(hess)

        covar = covar * np.atleast_2d(_pvals) * np.atleast_2d(_pvals).T

        assert_allclose(covar, covar_least_squares)

        # check that objective.covar corresponds to the least_squares
        # covariance matrix
        objective.setp(res.x)
        _pvals = np.array(res.x)
        covar_objective = objective.covar()
        assert_allclose(covar_objective, covar_least_squares)

        # now see what happens with a parameter that has no effect on residuals
        param = Parameter(1.234, name='dummy')
        param.vary = True
        params.append(param)

        from pytest import raises
        with raises(LinAlgError):
            objective.covar()
Beispiel #6
0
class TestFitterGauss(object):
    # Test CurveFitter with a noisy gaussian, weighted and unweighted, to see
    # if the parameters and uncertainties come out correct

    @pytest.fixture(autouse=True)
    def setup_method(self, tmpdir):
        self.path = os.path.dirname(os.path.abspath(__file__))
        self.tmpdir = tmpdir.strpath

        theoretical = np.loadtxt(os.path.join(self.path, 'gauss_data.txt'))
        xvals, yvals, evals = np.hsplit(theoretical, 3)
        xvals = xvals.flatten()
        yvals = yvals.flatten()
        evals = evals.flatten()

        # these best weighted values and uncertainties obtained with Igor
        self.best_weighted = [-0.00246095, 19.5299, -8.28446e-2, 1.24692]

        self.best_weighted_errors = [0.0220313708486, 1.12879436221,
                                     0.0447659158681, 0.0412022938883]

        self.best_weighted_chisqr = 77.6040960351

        self.best_unweighted = [-0.10584111872702096, 19.240347049328989,
                                0.0092623066070940396, 1.501362314145845]

        self.best_unweighted_errors = [0.34246565477, 0.689820935208,
                                       0.0411243173041, 0.0693429375282]

        self.best_unweighted_chisqr = 497.102084956

        self.p0 = np.array([0.1, 20., 0.1, 0.1])
        self.names = ['bkg', 'A', 'x0', 'width']
        self.bounds = [(-1, 1), (0, 30), (-5., 5.), (0.001, 2)]

        self.params = Parameters(name="gauss_params")
        for p, name, bound in zip(self.p0, self.names, self.bounds):
            param = Parameter(p, name=name)
            param.range(*bound)
            param.vary = True
            self.params.append(param)

        self.model = Model(self.params, fitfunc=gauss)
        self.data = Data1D((xvals, yvals, evals))
        self.objective = Objective(self.model, self.data)
        return 0

    def test_best_weighted(self):
        assert_equal(len(self.objective.varying_parameters()), 4)
        self.objective.setp(self.p0)

        f = CurveFitter(self.objective, nwalkers=100)
        res = f.fit('least_squares', jac='3-point')

        output = res.x
        assert_almost_equal(output, self.best_weighted, 3)
        assert_almost_equal(self.objective.chisqr(),
                            self.best_weighted_chisqr, 5)

        # compare the residuals
        res = (self.data.y - self.model(self.data.x)) / self.data.y_err
        assert_equal(self.objective.residuals(), res)

        # compare objective.covar to the best_weighted_errors
        uncertainties = [param.stderr for param in self.params]
        assert_allclose(uncertainties, self.best_weighted_errors, rtol=0.005)

        # we're also going to try the checkpointing here.
        checkpoint = os.path.join(self.tmpdir, 'checkpoint.txt')

        # compare samples to best_weighted_errors
        np.random.seed(1)
        f.sample(steps=101, random_state=1, verbose=False, f=checkpoint)
        process_chain(self.objective, f.chain, nburn=50, nthin=10)
        uncertainties = [param.stderr for param in self.params]
        assert_allclose(uncertainties, self.best_weighted_errors, rtol=0.07)

        # test that the checkpoint worked
        check_array = np.loadtxt(checkpoint)
        check_array = check_array.reshape(101, f._nwalkers, f.nvary)
        assert_allclose(check_array, f.chain)

        # test loading the checkpoint
        chain = load_chain(checkpoint)
        assert_allclose(chain, f.chain)

        f.initialise('jitter')
        f.sample(steps=2, nthin=4, f=checkpoint, verbose=False)
        assert_equal(f.chain.shape[0], 2)

        # the following test won't work because of emcee/gh226.
        # chain = load_chain(checkpoint)
        # assert_(chain.shape == f.chain.shape)
        # assert_allclose(chain, f.chain)

    def test_best_unweighted(self):
        self.objective.weighted = False
        f = CurveFitter(self.objective, nwalkers=100)
        res = f.fit()

        output = res.x
        assert_almost_equal(self.objective.chisqr(),
                            self.best_unweighted_chisqr)
        assert_almost_equal(output, self.best_unweighted, 5)

        # compare the residuals
        res = self.data.y - self.model(self.data.x)
        assert_equal(self.objective.residuals(), res)

        # compare objective._covar to the best_unweighted_errors
        uncertainties = np.array([param.stderr for param in self.params])
        assert_almost_equal(uncertainties, self.best_unweighted_errors, 3)
Beispiel #7
0
class TestObjective(object):
    def setup_method(self):
        # Choose the "true" parameters.

        # Reproducible results!
        np.random.seed(123)

        self.m_true = -0.9594
        self.b_true = 4.294
        self.f_true = 0.534
        self.m_ls = -1.1040757010910947
        self.b_ls = 5.4405552502319505

        # Generate some synthetic data from the model.
        N = 50
        x = np.sort(10 * np.random.rand(N))
        y_err = 0.1 + 0.5 * np.random.rand(N)
        y = self.m_true * x + self.b_true
        y += np.abs(self.f_true * y) * np.random.randn(N)
        y += y_err * np.random.randn(N)

        self.data = Data1D(data=(x, y, y_err))

        self.p = Parameter(self.b_ls, "b") | Parameter(self.m_ls, "m")
        self.model = Model(self.p, fitfunc=line)
        self.objective = Objective(self.model, self.data)

        # want b and m
        self.p[0].vary = True
        self.p[1].vary = True

        mod = np.array([
            4.78166609,
            4.42364699,
            4.16404064,
            3.50343504,
            3.4257084,
            2.93594347,
            2.92035638,
            2.67533842,
            2.28136038,
            2.19772983,
            1.99295496,
            1.93748334,
            1.87484436,
            1.65161016,
            1.44613461,
            1.11128101,
            1.04584535,
            0.86055984,
            0.76913963,
            0.73906649,
            0.73331407,
            0.68350418,
            0.65216599,
            0.59838566,
            0.13070299,
            0.10749131,
            -0.01010195,
            -0.10010155,
            -0.29495372,
            -0.42817431,
            -0.43122391,
            -0.64637715,
            -1.30560686,
            -1.32626428,
            -1.44835768,
            -1.52589881,
            -1.56371158,
            -2.12048349,
            -2.24899179,
            -2.50292682,
            -2.53576659,
            -2.55797996,
            -2.60870542,
            -2.7074727,
            -3.93781479,
            -4.12415366,
            -4.42313742,
            -4.98368609,
            -5.38782395,
            -5.44077086,
        ])
        self.mod = mod

    def test_model(self):
        # test that the line data produced by our model is the same as the
        # test data
        assert_almost_equal(self.model(self.data.x), self.mod)

    def test_synthetic_data(self):
        # test that we create the correct synthetic data by performing a least
        # squares fit on it
        assert_(self.data.y_err is not None)

        x, y, y_err, _ = self.data.data
        A = np.vstack((np.ones_like(x), x)).T
        C = np.diag(y_err * y_err)
        cov = np.linalg.inv(np.dot(A.T, np.linalg.solve(C, A)))
        b_ls, m_ls = np.dot(cov, np.dot(A.T, np.linalg.solve(C, y)))

        assert_almost_equal(b_ls, self.b_ls)
        assert_almost_equal(m_ls, self.m_ls)

    def test_setp(self):
        # check that we can set parameters
        self.p[0].vary = False

        assert_(len(self.objective.varying_parameters()) == 1)
        self.objective.setp(np.array([1.23]))
        assert_equal(self.p[1].value, 1.23)
        self.objective.setp(np.array([1.234, 1.23]))
        assert_equal(np.array(self.p), [1.234, 1.23])

    def test_pvals(self):
        assert_equal(self.objective.parameters.pvals, [self.b_ls, self.m_ls])
        self.objective.parameters.pvals = [1, 2]
        assert_equal(self.objective.parameters.pvals, [1, 2.0])

    def test_logp(self):
        self.p[0].range(0, 10)
        assert_almost_equal(self.objective.logp(), np.log(0.1))

        # logp should set parameters
        self.objective.logp([8, 2])
        assert_equal(np.array(self.objective.parameters), [8, 2])

        # if we supply a value outside the range it should return -inf
        assert_equal(self.objective.logp([-1, 2]), -np.inf)

    def test_logpost(self):
        # http://dan.iel.fm/emcee/current/user/line/
        assert_almost_equal(self.objective.logp(), 0)

        assert_almost_equal(self.objective.nlpost(), -self.objective.logpost())

        # the uncertainties are underestimated in this example...
        # amendment factor because dfm emcee example does not include 2pi
        amend = 0.5 * self.objective.npoints * np.log(2 * np.pi)
        assert_almost_equal(self.objective.logl() + amend, -559.01078135444595)
        assert_almost_equal(self.objective.logpost() + amend,
                            -559.01078135444595)

    def test_prior_transform(self):
        self.p[0].bounds = PDF(stats.uniform(-10, 20))
        self.p[1].bounds = PDF(stats.norm(loc=5, scale=10))
        x = self.objective.prior_transform([0.1, 0.9])
        assert_allclose(
            x,
            stats.uniform.ppf(0.1, -10, 20),
            stats.norm.ppf(0.9, loc=5, scale=10),
        )

    def test_chisqr(self):
        assert_almost_equal(self.objective.chisqr(), 1231.1096772954229)

    def test_residuals(self):
        # weighted, with and without transform
        assert_almost_equal(
            self.objective.residuals(),
            (self.data.y - self.mod) / self.data.y_err,
        )

        objective = Objective(self.model,
                              self.data,
                              transform=Transform("lin"))
        assert_almost_equal(objective.residuals(),
                            (self.data.y - self.mod) / self.data.y_err)

        # unweighted, with and without transform
        objective = Objective(self.model, self.data, use_weights=False)
        assert_almost_equal(objective.residuals(), self.data.y - self.mod)

        objective = Objective(
            self.model,
            self.data,
            use_weights=False,
            transform=Transform("lin"),
        )
        assert_almost_equal(objective.residuals(), self.data.y - self.mod)

    def test_masked_dataset(self):
        residuals = self.objective.residuals()

        mask = np.full_like(self.objective.data.y, True, bool)
        mask[1] = False
        self.objective.data.mask = mask

        assert_equal(self.objective.residuals().size, residuals.size - 1)

    def test_logp_extra(self):
        original_logl = self.objective.logl()
        self.objective.logp_extra = logp_extra
        assert_almost_equal(self.objective.logl(), original_logl + 1)

    def test_objective_pickle(self):
        # can you pickle the objective function?
        pkl = pickle.dumps(self.objective)
        pickle.loads(pkl)

        # check the ForkingPickler as well.
        if hasattr(ForkingPickler, "dumps"):
            pkl = ForkingPickler.dumps(self.objective)
            pickle.loads(pkl)

        # can you pickle with an extra function present?
        self.objective.logp_extra = logp_extra
        pkl = pickle.dumps(self.objective)
        pickle.loads(pkl)

        # check the ForkingPickler as well.
        if hasattr(ForkingPickler, "dumps"):
            pkl = ForkingPickler.dumps(self.objective)
            pickle.loads(pkl)

    def test_transform_pickle(self):
        # can you pickle the Transform object?
        pkl = pickle.dumps(Transform("logY"))
        pickle.loads(pkl)

    def test_transform(self):
        pth = os.path.dirname(os.path.abspath(__file__))

        fname = os.path.join(pth, "c_PLP0011859_q.txt")
        data = ReflectDataset(fname)
        t = Transform("logY")

        yt, et = t(data.x, data.y, y_err=data.y_err)
        assert_equal(yt, np.log10(data.y))

        yt, _ = t(data.x, data.y, y_err=None)
        assert_equal(yt, np.log10(data.y))

        EPy, EPe = EP.EPlog10(data.y, data.y_err)
        assert_equal(yt, EPy)
        assert_equal(et, EPe)

    def test_repr_transform(self):
        p = Transform(None)
        q = eval(repr(p))
        assert p.form == q.form

        p = Transform("logY")
        q = eval(repr(p))
        assert p.form == q.form

    def test_lnsigma(self):
        # check that lnsigma works correctly, by using the emcee line fit
        # example
        def logp(theta, x, y, yerr):
            m, b, lnf = theta
            if -5.0 < m < 0.5 and 0.0 < b < 10.0 and -10.0 < lnf < 1.0:
                return 0.0
            return -np.inf

        def logl(theta, x, y, yerr):
            m, b, lnf = theta
            model = m * x + b
            inv_sigma2 = 1.0 / (yerr**2 + model**2 * np.exp(2 * lnf))
            print(inv_sigma2)
            return -0.5 * (np.sum((y - model)**2 * inv_sigma2 -
                                  np.log(inv_sigma2)))

        x, y, yerr, _ = self.data.data

        theta = [self.m_true, self.b_true, np.log(self.f_true)]
        bo = BaseObjective(theta, logl, logp=logp, fcn_args=(x, y, yerr))

        lnsigma = Parameter(np.log(self.f_true),
                            "lnsigma",
                            bounds=(-10, 1),
                            vary=True)
        self.objective.setp(np.array([self.b_true, self.m_true]))
        self.objective.lnsigma = lnsigma

        # amendment factor because dfm emcee example does not include 2pi
        amend = 0.5 * self.objective.npoints * np.log(2 * np.pi)

        assert_allclose(self.objective.logl() + amend, bo.logl())

    def test_base_emcee(self):
        # check that the base objective works against the emcee example.
        def logp(theta, x, y, yerr):
            m, b, lnf = theta
            if -5.0 < m < 0.5 and 0.0 < b < 10.0 and -10.0 < lnf < 1.0:
                return 0.0
            return -np.inf

        def logl(theta, x, y, yerr):
            m, b, lnf = theta
            model = m * x + b
            inv_sigma2 = 1.0 / (yerr**2 + model**2 * np.exp(2 * lnf))
            return -0.5 * (np.sum((y - model)**2 * inv_sigma2 -
                                  np.log(inv_sigma2)))

        x, y, yerr, _ = self.data.data

        theta = [self.m_true, self.b_true, np.log(self.f_true)]
        bo = BaseObjective(theta, logl, logp=logp, fcn_args=(x, y, yerr))

        # test that the wrapper gives the same logl as the direct function
        assert_almost_equal(bo.logl(theta), logl(theta, x, y, yerr))
        assert_almost_equal(bo.logl(theta), -bo.nll(theta))
        assert_almost_equal(bo.nll(theta), 12.8885352412)

        # Find the maximum likelihood value.
        result = minimize(bo.nll, theta)

        # for repeatable sampling
        np.random.seed(1)

        ndim, nwalkers = 3, 100
        pos = [
            result["x"] + 1e-4 * np.random.randn(ndim) for i in range(nwalkers)
        ]

        sampler = emcee.EnsembleSampler(nwalkers, ndim, bo.logpost)
        state = emcee.State(pos, random_state=np.random.get_state())
        sampler.run_mcmc(state, 800)

        burnin = 200
        samples = sampler.get_chain()[burnin:, :, :].reshape((-1, ndim))
        samples[:, 2] = np.exp(samples[:, 2])
        m_mc, b_mc, f_mc = map(
            lambda v: (v[1], v[2] - v[1], v[1] - v[0]),
            zip(*np.percentile(samples, [16, 50, 84], axis=0)),
        )
        assert_allclose(m_mc, (-1.0071664, 0.0809444, 0.0784894), rtol=0.04)

        assert_allclose(b_mc, (4.5428107, 0.3549174, 0.3673304), rtol=0.04)

        assert_allclose(f_mc, (0.4610898, 0.0823304, 0.0640812), rtol=0.06)

        # # smoke test for covariance matrix
        bo.parameters = np.array(result["x"])
        covar1 = bo.covar()
        uncertainties = np.sqrt(np.diag(covar1))

        # covariance from objective._covar should be almost equal to
        # the covariance matrix from sampling
        covar2 = np.cov(samples.T)
        assert_almost_equal(np.sqrt(np.diag(covar2))[:2], uncertainties[:2], 2)

        # check covariance of self.objective
        # TODO
        var_arr = result["x"][:]
        var_arr[0], var_arr[1], var_arr[2] = var_arr[2], var_arr[1], var_arr[0]

        # assert_(self.objective.data.weighted)
        # self.objective.parameters.pvals = var_arr
        # covar3 = self.objective.covar()
        # uncertainties3 = np.sqrt(np.diag(covar3))
        # assert_almost_equal(uncertainties3, uncertainties)
        # assert(False)

    def test_covar(self):
        # checks objective.covar against optimize.least_squares covariance.
        path = os.path.dirname(os.path.abspath(__file__))

        theoretical = np.loadtxt(os.path.join(path, "gauss_data.txt"))
        xvals, yvals, evals = np.hsplit(theoretical, 3)
        xvals = xvals.flatten()
        yvals = yvals.flatten()
        evals = evals.flatten()

        p0 = np.array([0.1, 20.0, 0.1, 0.1])
        names = ["bkg", "A", "x0", "width"]
        bounds = [(-1, 1), (0, 30), (-5.0, 5.0), (0.001, 2)]

        params = Parameters(name="gauss_params")
        for p, name, bound in zip(p0, names, bounds):
            param = Parameter(p, name=name)
            param.range(*bound)
            param.vary = True
            params.append(param)

        model = Model(params, fitfunc=gauss)
        data = Data1D((xvals, yvals, evals))
        objective = Objective(model, data)

        # first calculate least_squares jac/hess/covariance matrices
        res = least_squares(objective.residuals,
                            np.array(params),
                            jac="3-point")

        hess_least_squares = np.matmul(res.jac.T, res.jac)
        covar_least_squares = np.linalg.inv(hess_least_squares)

        # now calculate corresponding matrices by hand, to see if the approach
        # concurs with least_squares
        objective.setp(res.x)
        _pvals = np.array(res.x)

        def residuals_scaler(vals):
            return np.squeeze(objective.residuals(_pvals * vals))

        jac = approx_derivative(residuals_scaler, np.ones_like(_pvals))
        hess = np.matmul(jac.T, jac)
        covar = np.linalg.inv(hess)

        covar = covar * np.atleast_2d(_pvals) * np.atleast_2d(_pvals).T

        assert_allclose(covar, covar_least_squares)

        # check that objective.covar corresponds to the least_squares
        # covariance matrix, J.T x J
        objective.setp(res.x)
        covar_objective = objective.covar()
        assert_allclose(covar_objective, covar_least_squares)

        # sometimes the residuals method may not be usable, see if
        # objective.covar calculated from a scalar works
        objective.setp(res.x)
        covar_objective = objective.covar("nll")
        assert_allclose(
            np.sqrt(np.diag(covar_objective)),
            np.sqrt(np.diag(covar_least_squares)),
            rtol=0.08,
        )

        # now see what happens with a parameter that has no effect on residuals
        param = Parameter(1.234, name="dummy")
        param.vary = True
        params.append(param)

        from pytest import raises

        with raises(LinAlgError):
            objective.covar()

    @pytest.mark.xfail
    def test_pymc3(self):
        # test objective logl against pymc3

        # don't run this test if pymc3 is not installed
        try:
            import pymc3 as pm
        except ImportError:
            return

        logl = self.objective.logl()

        from refnx.analysis import pymc3_model
        from refnx.analysis.objective import _to_pymc3_distribution

        mod = pymc3_model(self.objective)
        with mod:
            pymc_logl = mod.logp({
                "p0": self.p[0].value,
                "p1": self.p[1].value
            })

        assert_allclose(logl, pymc_logl)

        # now check some of the distributions
        with pm.Model():
            p = Parameter(1, bounds=(1, 10))
            d = _to_pymc3_distribution("a", p)
            assert_almost_equal(d.distribution.logp(2).eval(), p.logp(2))
            assert_(np.isneginf(d.distribution.logp(-1).eval()))

            q = Parameter(1, bounds=PDF(stats.uniform(1, 9)))
            d = _to_pymc3_distribution("b", q)
            assert_almost_equal(d.distribution.logp(2).eval(), q.logp(2))
            assert_(np.isneginf(d.distribution.logp(-1).eval()))

            p = Parameter(1, bounds=PDF(stats.uniform))
            d = _to_pymc3_distribution("c", p)
            assert_almost_equal(d.distribution.logp(0.5).eval(), p.logp(0.5))

            p = Parameter(1, bounds=PDF(stats.norm))
            d = _to_pymc3_distribution("d", p)
            assert_almost_equal(d.distribution.logp(2).eval(), p.logp(2))

            p = Parameter(1, bounds=PDF(stats.norm(1, 10)))
            d = _to_pymc3_distribution("e", p)
            assert_almost_equal(d.distribution.logp(2).eval(), p.logp(2))