def test_parallel_map_funcs2(): def tst(ncores, sg, stat, opt): sd = DataSimulFit('sd', [d, d], numcores=2) f = Fit(sd, sg, stat, opt) result = f.fit() return result def cmp_results(result, tol=1.0e-3): assert result.succeeded parvals = (1.7555670572301785, 1.5092728216164186, 4.893136872267538) assert result.numpoints == 200 # use tol in approx? assert result.parvals == pytest.approx(parvals) numpy.random.seed(0) x = numpy.linspace(-5., 5., 100) ampl = 5 pos = 1.5 sigma = 0.75 err = 0.25 y = ampl * numpy.exp(-0.5 * (x - pos)**2 / sigma**2) y += numpy.random.normal(0., err, x.shape) d = Data1D('junk', x, y) g = Gauss1D() opt = LevMar() stat = LeastSq() sg = SimulFitModel('sg', [g, g]) result = tst(1, sg, stat, opt) cmp_results(result) result = tst(2, sg, stat, opt) cmp_results(result)
def test_fitresults_multi(method): """Fit multiple datasets""" d1 = Data1D('dx', [1, 2, 3], [4, 2, 2]) d2 = Data1D('dx', [4, 5, 6, 10], [4, 4, 2, 4]) d = DataSimulFit('combined', (d1, d2)) m1 = Const1D() m1.c0 = 3 m = SimulFitModel('silly', (m1, m1)) fr = fit.Fit(d, m, method=method(), stat=LeastSq()).fit() fr.datasets = ['ddx', 'ddy'] r = fr._repr_html_() assert r is not None assert '<summary>Summary (9)</summary>' in r assert '<td>const1d.c0</td>' in r assert '<div class="dataname">Datasets</div><div class="dataval">ddx,ddy</div>' in r assert '<div class="dataname">Method</div><div class="dataval">{}</div>'.format(fr.methodname) in r assert '<div class="dataname">Statistic</div><div class="dataval">leastsq</div>' in r assert '<div class="dataname">Δ statistic</div><div class="dataval">0.142857</div>' in r assert '<div class="dataname">Number of data points</div><div class="dataval">7</div>' in r assert '<div class="dataname">Degrees of freedom</div><div class="dataval">6</div>' in r
def test_regrid_binaryop_1d(reset_seed): """issue #762, Cannot regrid a composite model (BinaryOpModel)""" np.random.seed(0) leastsq = LeastSq() levmar = LevMar() mygauss = MyGauss() myconst = MyConst1D() mymodel = mygauss + myconst x = np.linspace(-5., 5., 5) err = 0.25 y = mymodel(x) + np.random.normal(mygauss.pos.val, err, x.shape) mygauss.counter = 0 myconst.counter = 0 data = Data1D('one', x, y) fit = Fit(data, mymodel, leastsq, levmar) result = fit.fit() assert result.numpoints == x.size assert result.statval < 1.0 assert mygauss.counter == myconst.counter assert (result.nfev + 4) * x.size == mygauss.counter mygauss.counter = 0 myconst.counter = 0 x_regrid = np.linspace(-5., 5., 25) mymodel_regrid = mymodel.regrid(x_regrid) fit = Fit(data, mymodel_regrid, leastsq, levmar) result = fit.fit() assert result.numpoints == x.size assert result.statval < 1.0 assert mygauss.counter == myconst.counter assert (result.nfev + 4) * x_regrid.size == mygauss.counter
def call(self, niter, seed): pars = {} pars_index = {} index = 0 for par in self.model.pars: if par.frozen is False: name = '%s.%s' % (par.modelname, par.name) pars_index[index] = name pars[name] = [] index += 1 data = self.data y = data.y x = data.x if type(data) == Data1DAsymmetricErrs: y_l = y - data.elo y_h = y + data.ehi elif isinstance(data, (Data1D,)): y_l = data.staterror y_h = data.staterror else: msg ="{0} {1}".format(ReSampleData.__name__, type(data)) raise NotImplementedError(msg) numpy.random.seed(seed) for j in range(niter): ry = [] for i in range(len(y_l)): a = y_l[i] b = y_h[i] r = -1 while r < a or r > b: sigma = b - y[i] u = numpy.random.random_sample() if u < 0.5: sigma=y[i]-a r = numpy.random.normal(loc=y[i],scale=sigma,size=None) if u < 0.5 and r > y[i]: r = -1 if u > 0.5 and r < y[i]: r = -1 ry.append(r) # fit is performed for each simulated data point fit = Fit(Data1D('tmp', x, ry), self.model, LeastSq( ), LevMar()) fit_result = fit.fit() for index, val in enumerate(fit_result.parvals): name = pars_index[index] pars[name].append(val) result = {} for index, name in pars_index.items(): avg = numpy.average(pars[name]) std = numpy.std(pars[name]) print(name, ': avg =', avg, ', std =', std) result[name] = pars[name] return result
def test_cache(): """To make sure that the runtime fit(cache=???) works""" x = np.array([1.0, 2.0, 3.0]) model = MyCacheTestModel() par = np.array([1.1, 2.0, 3.0]) y = model.calc(par, x) data = Data1D('tmp', x, y) fit = Fit(data, model, LeastSq()) fit.fit(cache=False)
def test_gauss_gauss(self): g1, g2 = Gauss1D(), Gauss1D() g1.fwhm = 1.3 g1.pos = 1.5 g2.fwhm = 4. g2.pos = -2.0 sdata = DataSimulFit('d4d5', (self.d4, self.d5)) smodel = SimulFitModel('g1g2', (g1, g2)) sfit = Fit(sdata, smodel, method=LevMar(), stat=LeastSq()) result = sfit.fit() self.compare_results(self._fit_g2g2_bench, result)
def test_lrt_dows_not_like_gaussian(): """A very basic check that LRT errors out for gaussian. We are taking advantage of the code to not have to set up very much infrastructure (e.g. calling run with non-sensical values like None). """ lrt = sim.LikelihoodRatioTest() with pytest.raises(TypeError) as exc: lrt.run(None, None, None, stat=LeastSq()) emsg = 'Sherpa fit statistic must be Cash or CStat for likelihood ratio test' assert str(exc.value) == emsg
def test_fitresults(method): d = Data1D('dx', [1, 2, 3], [4, 2, 2]) m = Const1D() m.c0 = 3 fr = fit.Fit(d, m, method=method(), stat=LeastSq()).fit() r = fr._repr_html_() assert r is not None assert '<summary>Fit parameters</summary>' in r assert '<summary>Summary (8)' in r assert '<td>const1d.c0</td>' in r assert '<div class="dataname">Method</div><div class="dataval">{}</div>'.format(fr.methodname) in r assert '<div class="dataname">Statistic</div><div class="dataval">leastsq</div>' in r assert '<div class="dataname">Δ statistic</div><div class="dataval">0.333333</div>' in r assert '<div class="dataname">Number of data points</div><div class="dataval">3</div>' in r assert '<div class="dataname">Degrees of freedom</div><div class="dataval">2</div>' in r
def test_leastsq_stat(hide_logging, reset_xspec, setup_group): fit = Fit(setup_group['data'], setup_group['model'], LeastSq(), LevMar()) results = fit.fit() _fit_leastsq_results_bench = { 'succeeded': 1, 'numpoints': 143, 'dof': 140, 'istatval': 117067.64900554597, 'statval': 4203.173180288109, 'parvals': numpy.array([1.808142494916457, 5.461611041944977, -1.907736527635154]) } compare_results(_fit_leastsq_results_bench, results, tol=2e-4)
def test_leastsq_stat(self): fit = Fit(self.data, self.model, LeastSq(), NelderMead()) results = fit.fit() self.compare_results(self._fit_leastsq_results_bench, results)
def test_regrid_binaryop_1d(): """issue #762, Cannot regrid a composite model (BinaryOpModel)""" from sherpa.stats import LeastSq from sherpa.fit import Fit from sherpa.optmethods import LevMar class MyConst1D(RegriddableModel1D): def __init__(self, name='myconst1d'): self.c0 = Parameter(name, 'c0', 3.1) self.counter = 0 ArithmeticModel.__init__(self, name, (self.c0, )) def calc(self, par, *args, **kwargs): x = args[0] self.counter += x.size return par[0] class MyGauss(RegriddableModel1D): def __init__(self, name='mygauss'): self.sigma = Parameter(name, 'sigma', 10, min=0, max=10) self.pos = Parameter(name, 'pos', 0, min=-10, max=10) self.ampl = Parameter(name, 'ampl', 5) self.counter = 0 ArithmeticModel.__init__(self, name, (self.sigma, self.pos, self.ampl)) def calc(self, par, *args, **kwargs): sigma, pos, ampl = par[0], par[1], par[2] x = args[0] self.counter += x.size return ampl * np.exp(-0.5 * (args[0] - pos)**2 / sigma**2) np.random.seed(0) leastsq = LeastSq() levmar = LevMar() mygauss = MyGauss() myconst = MyConst1D() mymodel = mygauss + myconst x = np.linspace(-5., 5., 5) err = 0.25 y = mymodel(x) + np.random.normal(mygauss.pos.val, err, x.shape) mygauss.counter = 0 myconst.counter = 0 data = Data1D('one', x, y) fit = Fit(data, mymodel, leastsq, levmar) result = fit.fit() assert result.numpoints == x.size assert result.statval < 1.0 assert mygauss.counter == myconst.counter assert (result.nfev + 4) * x.size == mygauss.counter mygauss.counter = 0 myconst.counter = 0 x_regrid = np.linspace(-5., 5., 25) mymodel_regrid = mymodel.regrid(x_regrid) fit = Fit(data, mymodel_regrid, leastsq, levmar) result = fit.fit() assert result.numpoints == x.size assert result.statval < 1.0 assert mygauss.counter == myconst.counter assert (result.nfev + 4) * x_regrid.size == mygauss.counter
def call(self, niter, seed=None): """Resample the data and fit the model to each iteration. .. versionadded:: 4.12.2 The samples and statistic keys were added to the return value, the parameter values are returned as NumPy arrays rather than as lists, and the seed parameter was made optional. Parameters ---------- niter : int The number of iterations. seed : int or None, optional The seed value. Returns ------- sampled : dict The keys are samples, which contains the resampled data used in the fits as a niter by ndata array, and the free parameters in the fit, containing a NumPy array containing the fit parameter for each iteration (of size niter). Notes ----- The fit for each iteration uses the input values of the model parameters as the starting point. The parameters of the model are not changed by this method. """ # Each fit is reset to this set of values as the starting point orig_pars = self.model.thawedpars pars = {} pars_index = [] for par in self.model.pars: if par.frozen: continue name = par.fullname pars_index.append(name) pars[name] = numpy.zeros(niter) data = self.data y = data.y x = data.x if type(data) == Data1DAsymmetricErrs: y_l = y - data.elo y_h = y + data.ehi elif isinstance(data, (Data1D, )): y_l = data.staterror y_h = data.staterror else: msg = "{0} {1}".format(ReSampleData.__name__, type(data)) raise NotImplementedError(msg) ny = len(y) fake_data = Data1D('tmp', x, numpy.zeros(ny)) numpy.random.seed(seed) ry_all = numpy.zeros((niter, ny), dtype=y_l.dtype) stats = numpy.zeros(niter) for j in range(niter): ry = ry_all[j] for i in range(ny): a = y_l[i] b = y_h[i] r = None while r is None: # Flip between low or hi # u = 0 pick low # u = 1 pick high # # Switching to randint rather than random_sample # leads to different answers, so the tests fail, # so leave as is. # # u = numpy.random.randint(low=0, high=2) # u = numpy.random.random_sample() u = 0 if u < 0.5 else 1 # Rather than dropping this value, we could # reflect it (ie multiply it by -1 if the sign # is wrong). Would this affect the statistical # properties? # dr = numpy.random.normal(loc=0, scale=1, size=None) if u == 0: if dr > 0: continue sigma = y[i] - a else: if dr < 0: continue sigma = b - y[i] r = y[i] + dr * sigma ry[i] = r # fit is performed for each simulated data point, and we # always start at the original best-fit location to # start the fit (by making sure we always reset after a fit). # fake_data.y = ry fit = Fit(fake_data, self.model, LeastSq(), LevMar()) try: fit_result = fit.fit() finally: self.model.thawedpars = orig_pars stats[j] = fit_result.statval for name, val in zip(fit_result.parnames, fit_result.parvals): pars[name][j] = val result = {'samples': ry_all, 'statistic': stats} for name in pars_index: avg = numpy.average(pars[name]) std = numpy.std(pars[name]) info('{} : avg = {} , std = {}'.format(name, avg, std, std)) result[name] = pars[name] return result
#guess parameters, this is important or sherpa won't know where to start looking G1.fwhm = .05 G1.pos = 1083.03 + ref_value * 5 mdl = G1 mplot = ModelPlot() mplot.prepare(d, mdl) dplot = DataPlot() dplot.prepare(d) mplot.overplot() #set error methods, ChiSq() or LeastSq() #Chi square is a way to compare which profile best describes data, ie: is it more gaussian or lorentzian #Least Square says how good the data fits the particular model instance #opt - optimizers improve the fit. Monte Carlo is what I used, it is slow but it is most robust. Many options on sherpas site ustat = LeastSq() opt = MonCar() #LevMar() #NelderMead() # #apply actual Fit f = Fit(d, mdl, stat=ustat, method=opt) res = f.fit() fplot = FitPlot() mplot.prepare(d, mdl) fplot.prepare(dplot, mplot) fplot.plot() #param_errors = f.est_errors() #plotting routine plt.plot(d.x, d.y, "c.", label="Data") plt.plot(d.x, mdl(d.x), linewidth=2, label="Gaussian")
mdl = Polynom1D() report("print(mdl)") mdl.c2.thaw() from sherpa.plot import ModelPlot mplot = ModelPlot() mplot.prepare(d, mdl) dplot.plot() mplot.overplot() savefig("data_model_initial.png") from sherpa.stats import LeastSq from sherpa.optmethods import NelderMead from sherpa.fit import Fit f = Fit(d, mdl, stat=LeastSq(), method=NelderMead()) report("print(f)") res = f.fit() dump("res.succeeded") report("res.format()") report("res") report("mdl") stat2 = f.calc_stat() print("Statistic = {:.4f}".format(stat2)) from sherpa.plot import FitPlot, ResidPlot, SplitPlot fplot = FitPlot()
def fit(star_name, data, model, silent=False, breakdown=False): """A function that will fit a given multi-part model to a given spectrum. :param star_name: Name of the target star :type star_name: str :param data: Spectrum data in the form (wave, flux) :type data: tuple :param model: An unfit spectrum model :type model: object :param silent: If true, no plots will generate, defaults to False :type silent: bool :return: model that is fit to the data :rtype: object """ wave, flux = data # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% d = Data1D(star_name, wave, flux) # ========================================== # Initial guesses # Dataset 1 dplot = DataPlot() dplot.prepare(d) if silent is False: dplot.plot() mplot = ModelPlot() mplot.prepare(d, model) if silent is False: dplot.plot() mplot.overplot() plt.show() # ========================================= # Fitting happens here - don't break please start = time.time() stat = LeastSq() opt = LevMar() opt.verbose = 0 opt.ftol = 1e-15 opt.xtol = 1e-15 opt.gtol = 1e-15 opt.epsfcn = 1e-15 if silent is False: print(opt) vfit = Fit(d, model, stat=stat, method=opt) if silent is False: print(vfit) vres = vfit.fit() if silent is False: print() print() print(vres.format()) # ========================================= # Plotting after fit # Dataset 1 if silent is False: fplot = FitPlot() mplot.prepare(d, model) fplot.prepare(dplot, mplot) fplot.plot() # residual plt.title(star_name) plt.plot(wave, flux - model(wave)) # plt.xaxis(fontsize = ) plt.xlabel("Wavelength (AA)", fontsize=12) plt.ylabel("Flux", fontsize=12) plt.tick_params(axis="both", labelsize=12) if silent is False: duration = time.time() - start print() print("Time taken: " + str(duration)) print() plt.show() if breakdown is True: params = [] cont = model[0] if silent is False: plt.scatter(wave, flux, marker=".", c="black") plt.plot(wave, model(wave), c="C1") for line in model: if line.name[0] != "(": if line.name == "Cont_flux": if silent is False: print(line) plt.plot(wave, line(wave), linestyle="--") else: params.append(line) if silent is False: print() print(line) plt.plot(wave, line(wave) * cont(wave), linestyle="--") plt.show() return model, params return model
def multifit(star_name, data_list, model_list, silent=False): """A function that will fit 2 models to 2 spectra simultaneously. This was created to fit the NaI doublets at ~3300 and ~5890 Angstroms. :param star_name: Name of the target star :type star_name: str :param data_list: List of spectrum data in the form [(wave, flux), (wave, flux),...] :type data_list: tuple :param model_list: A list of unfit spectrum models :type model_list: list :param silent: If true, no plots will generate, defaults to False :type silent: bool :return: models that are fit to the data :rtype: list """ wave1, flux1 = data_list[0] wave2, flux2 = data_list[1] model1 = model_list[0] model2 = model_list[1] name_1 = star_name + " 1" name_2 = star_name + " 2" d1 = Data1D(name_1, wave1, flux1) d2 = Data1D(name_2, wave2, flux2) dall = DataSimulFit("combined", (d1, d2)) mall = SimulFitModel("combined", (model1, model2)) # # ========================================== # # Initial guesses # Dataset 1 dplot1 = DataPlot() dplot1.prepare(d1) if silent is False: dplot1.plot() mplot1 = ModelPlot() mplot1.prepare(d1, model1) if silent is False: dplot1.plot() mplot1.overplot() plt.show() # Dataset 2 dplot2 = DataPlot() dplot2.prepare(d2) if silent is False: dplot2.plot() mplot2 = ModelPlot() mplot2.prepare(d2, model2) if silent is False: dplot2.plot() mplot2.overplot() plt.show() # # ========================================= # # Fitting happens here - don't break please stat = LeastSq() opt = LevMar() opt.verbose = 0 opt.ftol = 1e-15 opt.xtol = 1e-15 opt.gtol = 1e-15 opt.epsfcn = 1e-15 print(opt) vfit = Fit(dall, mall, stat=stat, method=opt) print(vfit) vres = vfit.fit() print() print() print("Did the fit succeed? [bool]") print(vres.succeeded) print() print() print(vres.format()) # # ========================================= # # Plotting after fit if silent is False: # Dataset 1 fplot1 = FitPlot() mplot1.prepare(d1, model1) fplot1.prepare(dplot1, mplot1) fplot1.plot() # residual title = "Data 1" plt.title(title) plt.plot(wave1, flux1 - model1(wave1)) plt.show() # Dataset 2 fplot2 = FitPlot() mplot2.prepare(d2, model2) fplot2.prepare(dplot2, mplot2) fplot2.plot() # residual title = "Data 2" plt.title(title) plt.plot(wave2, flux2 - model2(wave2)) plt.show() # both datasets - no residuals splot = SplitPlot() splot.addplot(fplot1) splot.addplot(fplot2) plt.tight_layout() plt.show() return model_list