def NIST_runner(dataset, method='least_squares', chi_atol=1e-5, val_rtol=1e-2, err_rtol=5e-3): NIST_dataset = ReadNistData(dataset) x, y = (NIST_dataset['x'], NIST_dataset['y']) if dataset == 'Nelson': y = np.log(y) params = NIST_dataset['start'] fitfunc = NIST_Models[dataset][0] model = Model(params, fitfunc) objective = Objective(model, (x, y)) fitter = CurveFitter(objective) result = fitter.fit(method=method) assert_allclose(objective.chisqr(), NIST_dataset['sum_squares'], atol=chi_atol) certval = NIST_dataset['cert_values'] assert_allclose(result.x, certval, rtol=val_rtol) if 'stderr' in result: certerr = NIST_dataset['cert_stderr'] assert_allclose(result.stderr, certerr, rtol=err_rtol)
def NIST_runner( dataset, method="least_squares", chi_atol=1e-5, val_rtol=1e-2, err_rtol=6e-3, ): NIST_dataset = ReadNistData(dataset) x, y = (NIST_dataset["x"], NIST_dataset["y"]) if dataset == "Nelson": y = np.log(y) params = NIST_dataset["start"] fitfunc = NIST_Models[dataset][0] model = Model(params, fitfunc) objective = Objective(model, (x, y)) fitter = CurveFitter(objective) result = fitter.fit(method=method) assert_allclose(objective.chisqr(), NIST_dataset["sum_squares"], atol=chi_atol) certval = NIST_dataset["cert_values"] assert_allclose(result.x, certval, rtol=val_rtol) if "stderr" in result: certerr = NIST_dataset["cert_stderr"] assert_allclose(result.stderr, certerr, rtol=err_rtol)
def test_code_fragment(self): e361 = ReflectDataset(os.path.join(self.pth, "e361r.txt")) si = SLD(2.07, name="Si") sio2 = SLD(3.47, name="SiO2") d2o = SLD(6.36, name="D2O") polymer = SLD(1, name="polymer") # e361 is an older dataset, but well characterised self.structure361 = si | sio2(10, 4) | polymer(200, 3) | d2o(0, 3) self.model361 = ReflectModel(self.structure361, bkg=2e-5) self.model361.scale.vary = True self.model361.bkg.vary = True self.model361.scale.range(0.1, 2) self.model361.bkg.range(0, 5e-5) # d2o self.structure361[-1].sld.real.vary = True self.structure361[-1].sld.real.range(6, 6.36) self.structure361[1].thick.vary = True self.structure361[1].thick.range(5, 20) self.structure361[2].thick.vary = True self.structure361[2].thick.range(100, 220) self.structure361[2].sld.real.vary = True self.structure361[2].sld.real.range(0.2, 1.5) objective = Objective(self.model361, e361, transform=Transform("logY")) objective2 = eval(repr(objective)) assert_allclose(objective2.chisqr(), objective.chisqr()) exec(repr(objective)) exec(code_fragment(objective)) # artificially link the two thicknesses together # check that we can reproduce the objective from the repr self.structure361[2].thick.constraint = self.structure361[1].thick fragment = code_fragment(objective) fragment = fragment + "\nobj = objective()\nresult = obj.chisqr()" d = {} # need to provide the globals dictionary to exec, so it can see imports # e.g. https://bit.ly/2RFOF7i (from stackoverflow) exec(fragment, globals(), d) assert_allclose(d["result"], objective.chisqr())
def test_multidimensionality(self): # Check that ND data can be used with an objective/model/data # (or at least it doesn't stand in the way) rng = np.random.default_rng() x = rng.uniform(size=100).reshape(50, 2) desired = line_ND(x, self.p) assert desired.shape == (50, 2) data = Data1D((x, desired)) model = Model(self.p, fitfunc=line_ND) y = model(x) assert_allclose(y, desired) objective = Objective(model, data) assert_allclose(objective.chisqr(), 0) assert_allclose(objective.generative(), desired) assert_allclose(objective.residuals(), 0) assert objective.residuals().shape == (50, 2) objective.logl() objective.logpost() covar = objective.covar() assert covar.shape == (2, 2)
refy = np.zeros((n.shape[0], dataset.x.size)) sldy = [] chi = np.zeros((n.shape[0])) print(n.shape[0]) for i in range(n.shape[0]): sim.av_layers = n[i, :, :] model = ReflectModel(sim) model.scale.setp(1, vary=True, bounds=(0.00000001, np.inf)) model.bkg.setp(dataset.y[-1], vary=False) objective = Objective(model, dataset, transform=Transform('YX4')) fitter = CurveFitter(objective) res = fitter.fit() refy[i] = model(dataset.x, x_err=dataset.x_err)*(dataset.x)**4 sldy.append(sim.sld_profile()[1]) chi[i] = objective.chisqr() all_chi = np.append(all_chi, objective.chisqr()) if i == 0: ax1.errorbar(dataset.x, dataset.y*(dataset.x)**4 * 10**(ci-1), yerr=dataset.y_err*( dataset.x)**4 * 10**(ci-1), linestyle='', marker='o', color=sns.color_palette()[ci]) if i % 5 == 0: ax1.plot(dataset.x, model(dataset.x, x_err=dataset.x_err)*( dataset.x)**4 * 10**(ci-1), color=sns.color_palette()[ci], alpha=0.1) zs, sld = sim.sld_profile()
class Motofit(object): """ An interactive slab modeller (Jupyter/ipywidgets based) for Neutron and X-ray reflectometry data. The interactive modeller is designed to be used in a Jupyter notebook. >>> # specify that plots are in a separate graph window >>> %matplotlib qt >>> # alternately if you want the graph to be embedded in the notebook use >>> # %matplotlib notebook >>> from refnx.reflect import Motofit >>> # create an instance of the modeller >>> app = Motofit() >>> # display it in the notebook by calling the object with a datafile. >>> app('dataset1.txt') >>> # lets fit a different dataset >>> app2 = Motofit() >>> app2('dataset2.txt') The `Motofit` instance has several useful attributes that can be used in other cells. For example, one can access the `objective` and `curvefitter` attributes for more advanced fitting functionality than is available in the GUI. A `code` attribute can be used to retrieve a Python code fragment that can be used as a basis for developing more complicated models, such as interparameter constraints, global fitting, etc. Attributes ---------- dataset: :class:`refnx.dataset.Data1D` The dataset associated with the modeller model: :class:`refnx.reflect.ReflectModel` Calculates a theoretical model, from an interfacial structure (`model.Structure`). objective: :class:`refnx.analysis.Objective` The Objective that allows one to compare the model against the data. fig: :class:`matplotlib.figure.Figure` Graph displaying the data. """ def __init__(self): # attributes for the graph # for the graph self.qmin = 0.005 self.qmax = 0.5 self.qpnt = 1000 self.fig = None self.ax_data = None self.ax_residual = None self.ax_sld = None # gridspecs specify how the plots are laid out. Gridspec1 is when the # residuals plot is displayed. Gridspec2 is when it's not visible self._gridspec1 = gridspec.GridSpec(2, 2, height_ratios=[5, 1], width_ratios=[1, 1], hspace=0.01) self._gridspec2 = gridspec.GridSpec(1, 2) self.theoretical_plot = None self.theoretical_plot_sld = None # attributes for a user dataset self.dataset = None self.objective = None self._curvefitter = None self.data_plot = None self.residuals_plot = None self.data_plot_sld = None self.dataset_name = widgets.Text(description="dataset:") self.dataset_name.disabled = True self.chisqr = widgets.FloatText(description="chi-squared:") self.chisqr.disabled = True # fronting slab0 = Slab(0, 0, 0) slab1 = Slab(25, 3.47, 3) slab2 = Slab(0, 2.07, 3) structure = slab0 | slab1 | slab2 rename_params(structure) self.model = ReflectModel(structure) structure = slab0 | slab1 | slab2 self.model = ReflectModel(structure) # give some default parameter limits self.model.scale.bounds = (0.1, 2) self.model.bkg.bounds = (1e-8, 2e-5) self.model.dq.bounds = (0, 20) for slab in self.model.structure: slab.thick.bounds = (0, 2 * slab.thick.value) slab.sld.real.bounds = (0, 2 * slab.sld.real.value) slab.sld.imag.bounds = (0, 2 * slab.sld.imag.value) slab.rough.bounds = (0, 2 * slab.rough.value) # the main GUI widget self.display_box = widgets.VBox() self.tab = widgets.Tab() self.tab.set_title(0, "Model") self.tab.set_title(1, "Limits") self.tab.set_title(2, "Options") self.tab.observe(self._on_tab_changed, names="selected_index") # an output area for messages. self.output = widgets.Output() # options tab self.plot_type = widgets.Dropdown( options=["lin", "logY", "YX4", "YX2"], value="lin", description="Plot Type:", disabled=False, ) self.plot_type.observe(self._on_plot_type_changed, names="value") self.use_weights = widgets.RadioButtons( options=["Yes", "No"], value="Yes", description="use dataset weights?", style={"description_width": "initial"}, ) self.use_weights.observe(self._on_use_weights_changed, names="value") self.transform = Transform("lin") self.display_residuals = widgets.Checkbox( value=False, description="Display residuals") self.display_residuals.observe(self._on_display_residuals_changed, names="value") self.model_view = None self.set_model(self.model) def save_model(self, *args, f=None): """ Serialise a model to a pickle file. If `f` is not specified then the file name is constructed from the current dataset name; if there is no current dataset then the filename is constructed from the current time. These constructed filenames will be in the current working directory, for a specific save location `f` must be provided. This method is only intended to be used to serialise models created by this interactive Jupyter widget modeller. Parameters ---------- f: file like or str, optional File to save model to. """ if f is None: f = "model_" + datetime.datetime.now().isoformat() + ".pkl" if self.dataset is not None: f = "model_" + self.dataset.name + ".pkl" with possibly_open_file(f) as g: pickle.dump(self.model, g) def load_model(self, *args, f=None): """ Load a serialised model. If `f` is not specified then an attempt will be made to find a model corresponding to the current dataset name, `'model_' + self.dataset.name + '.pkl'`. If there is no current dataset then the most recent model will be loaded. This method is only intended to be used to deserialise models created by this interactive Jupyter widget modeller, and will not successfully load complicated ReflectModel created outside of the interactive modeller. Parameters ---------- f: file like or str, optional pickle file to load model from. """ if f is None and self.dataset is not None: # try and load the model corresponding to the current dataset f = "model_" + self.dataset.name + ".pkl" elif f is None: # load the most recent model file files = list(filter(os.path.isfile, glob.glob("model_*.pkl"))) files.sort(key=lambda x: os.path.getmtime(x)) files.reverse() if len(files): f = files[0] if f is None: self._print("No model file is specified/available.") return try: with possibly_open_file(f, "rb") as g: reflect_model = pickle.load(g) self.set_model(reflect_model) except (RuntimeError, FileNotFoundError) as exc: # RuntimeError if the file isn't a ReflectModel # FileNotFoundError if the specified file name wasn't found self._print(repr(exc), repr(f)) def set_model(self, model): """ Change the `refnx.reflect.ReflectModel` associated with the `Motofit` instance. Parameters ---------- model: refnx.reflect.ReflectModel """ if not isinstance(model, ReflectModel): raise RuntimeError("`model` was not an instance of ReflectModel") if self.model_view is not None: self.model_view.unobserve_all() # figure out if the reflect_model is a different instance. If it is # then the objective has to be updated. if model is not self.model: self.model = model self._update_analysis_objects() self.model = model self.model_view = ReflectModelView(self.model) self.model_view.observe(self.update_model, names=["view_changed"]) self.model_view.observe(self.redraw, names=["view_redraw"]) # observe when the number of varying parameters changed. This # invalidates a curvefitter, and a new one has to be produced. self.model_view.observe(self._on_num_varying_changed, names=["num_varying"]) self.model_view.do_fit_button.on_click(self.do_fit) self.model_view.to_code_button.on_click(self._to_code) self.model_view.save_model_button.on_click(self.save_model) self.model_view.load_model_button.on_click(self.load_model) self.redraw(None) def update_model(self, change): """ Updates the plots when the parameters change Parameters ---------- change """ if not self.fig: return q = np.linspace(self.qmin, self.qmax, self.qpnt) theoretical = self.model.model(q) yt, _ = self.transform(q, theoretical) sld_profile = self.model.structure.sld_profile() z, sld = sld_profile if self.theoretical_plot is not None: self.theoretical_plot.set_data(q, yt) self.theoretical_plot_sld.set_data(z, sld) self.ax_sld.relim() self.ax_sld.autoscale_view() if self.dataset is not None: # if there's a dataset loaded then residuals_plot # should exist residuals = self.objective.residuals() self.chisqr.value = np.sum(residuals**2) self.residuals_plot.set_data(self.dataset.x, residuals) self.ax_residual.relim() self.ax_residual.autoscale_view() self.fig.canvas.draw() def _on_num_varying_changed(self, change): # observe when the number of varying parameters changed. This # invalidates a curvefitter, and a new one has to be produced. if change["new"] != change["old"]: self._curvefitter = None def _update_analysis_objects(self): use_weights = self.use_weights.value == "Yes" self.objective = Objective( self.model, self.dataset, transform=self.transform, use_weights=use_weights, ) self._curvefitter = None def __call__(self, data=None, model=None): """ Display the `Motofit` GUI in a Jupyter notebook cell. Parameters ---------- data: refnx.dataset.Data1D The dataset to associate with the `Motofit` instance. model: refnx.reflect.ReflectModel or str or file-like A model to associate with the data. If `model` is a `str` or `file`-like then the `load_model` method will be used to try and load the model from file. This assumes that the file is a pickle of a `ReflectModel` """ # the theoretical model # display the main graph import matplotlib.pyplot as plt self.fig = plt.figure(figsize=(9, 4)) # grid specs depending on whether the residuals are displayed if self.display_residuals.value: d_gs = self._gridspec1[0, 0] sld_gs = self._gridspec1[:, 1] else: d_gs = self._gridspec2[0, 0] sld_gs = self._gridspec2[0, 1] self.ax_data = self.fig.add_subplot(d_gs) self.ax_data.set_xlabel(r"$Q/\AA^{-1}$") self.ax_data.set_ylabel("Reflectivity") self.ax_data.grid(True, color="b", linestyle="--", linewidth=0.1) self.ax_sld = self.fig.add_subplot(sld_gs) self.ax_sld.set_ylabel(r"$\rho/10^{-6}\AA^{-2}$") self.ax_sld.set_xlabel(r"$z/\AA$") self.ax_residual = self.fig.add_subplot(self._gridspec1[1, 0], sharex=self.ax_data) self.ax_residual.set_xlabel(r"$Q/\AA^{-1}$") self.ax_residual.grid(True, color="b", linestyle="--", linewidth=0.1) self.ax_residual.set_visible(self.display_residuals.value) with warnings.catch_warnings(): warnings.simplefilter("ignore") self.fig.tight_layout() q = np.linspace(self.qmin, self.qmax, self.qpnt) theoretical = self.model.model(q) yt, _ = self.transform(q, theoretical) self.theoretical_plot = self.ax_data.plot(q, yt, zorder=2)[0] self.ax_data.set_yscale("log") z, sld = self.model.structure.sld_profile() self.theoretical_plot_sld = self.ax_sld.plot(z, sld)[0] # the figure has been reset, so remove ref to the data_plot, # residual_plot self.data_plot = None self.residuals_plot = None self.dataset = None if data is not None: self.load_data(data) if isinstance(model, ReflectModel): self.set_model(model) return self.display_box elif model is not None: self.load_model(model) return self.display_box self.redraw(None) return self.display_box def load_data(self, data): """ Load a dataset into the `Motofit` instance. Parameters ---------- data: refnx.dataset.Data1D, or str, or file-like """ if isinstance(data, ReflectDataset): self.dataset = data else: self.dataset = ReflectDataset(data) self.dataset_name.value = self.dataset.name # loading a dataset changes the objective and curvefitter self._update_analysis_objects() self.qmin = np.min(self.dataset.x) self.qmax = np.max(self.dataset.x) if self.fig is not None: yt, et = self.transform(self.dataset.x, self.dataset.y) if self.data_plot is None: (self.data_plot, ) = self.ax_data.plot( self.dataset.x, yt, label=self.dataset.name, ms=2, marker="o", ls="", zorder=1, ) self.data_plot.set_label(self.dataset.name) self.ax_data.legend() # no need to calculate residuals here, that'll be updated in # the redraw method (self.residuals_plot, ) = self.ax_residual.plot(self.dataset.x) else: self.data_plot.set_xdata(self.dataset.x) self.data_plot.set_ydata(yt) # calculate theoretical model over same range as data # use redraw over update_model because it ensures chi2 widget gets # displayed self.redraw(None) self.ax_data.relim() self.ax_data.autoscale_view() self.ax_residual.relim() self.ax_residual.autoscale_view() self.fig.canvas.draw() def redraw(self, change): """ Redraw the Jupyter GUI associated with the `Motofit` instance. """ self._update_display_box(self.display_box) self.update_model(None) @property def curvefitter(self): """ class:`CurveFitter` : Object for fitting the data based on the objective. """ if self.objective is not None and self._curvefitter is None: self._curvefitter = CurveFitter(self.objective) return self._curvefitter def _print(self, string): """ Print to the output widget """ from IPython.display import clear_output with self.output: clear_output() print(string) def do_fit(self, *args): """ Ask the Motofit object to perform a fit (differential evolution). Parameters ---------- change Notes ----- After performing the fit the Jupyter display is updated. """ if self.dataset is None: return if not self.model.parameters.varying_parameters(): self._print("No parameters are being varied") return try: logp = self.objective.logp() if not np.isfinite(logp): self._print("One of your parameter values lies outside its" " bounds. Please adjust the value, or the bounds.") return except ZeroDivisionError: self._print("One parameter has equal lower and upper bounds." " Either alter the bounds, or don't let that" " parameter vary.") return def callback(xk, convergence): self.chisqr.value = self.objective.chisqr(xk) self.curvefitter.fit("differential_evolution", callback=callback) # need to update the widgets as the model will be updated. # this also redraws GUI. # self.model_view.refresh() self.set_model(self.model) self._print(str(self.objective)) def _to_code(self, change=None): self._print(self.code) @property def code(self): """ str : A Python code fragment capable of fitting the data. Executable Python code fragment for the GUI model. """ if self.objective is None: self._update_analysis_objects() return to_code(self.objective) def _on_tab_changed(self, change): pass def _on_plot_type_changed(self, change): """ User would like to plot and fit as logR/linR/RQ4/RQ2, etc """ self.transform = Transform(change["new"]) if self.objective is not None: self.objective.transform = self.transform if self.dataset is not None: yt, _ = self.transform(self.dataset.x, self.dataset.y) self.data_plot.set_xdata(self.dataset.x) self.data_plot.set_ydata(yt) self.update_model(None) # probably have to change LHS axis of the data plot when # going between different plot types. if change["new"] == "logY": self.ax_data.set_yscale("linear") else: self.ax_data.set_yscale("log") self.ax_data.relim() self.ax_data.autoscale_view() self.fig.canvas.draw() def _on_use_weights_changed(self, change): self._update_analysis_objects() self.update_model(None) def _on_display_residuals_changed(self, change): import matplotlib.pyplot as plt if change["new"]: self.ax_residual.set_visible(True) self.ax_data.set_position(self._gridspec1[0, 0].get_position( self.fig)) self.ax_sld.set_position(self._gridspec1[:, 1].get_position(self.fig)) plt.setp(self.ax_data.get_xticklabels(), visible=False) else: self.ax_residual.set_visible(False) self.ax_data.set_position(self._gridspec2[:, 0].get_position( self.fig)) self.ax_sld.set_position(self._gridspec2[:, 1].get_position(self.fig)) plt.setp(self.ax_data.get_xticklabels(), visible=True) @property def _options_box(self): return widgets.VBox( [self.plot_type, self.use_weights, self.display_residuals]) def _update_display_box(self, box): """ Redraw the Jupyter GUI associated with the `Motofit` instance """ vbox_widgets = [] if self.dataset is not None: vbox_widgets.append(widgets.HBox([self.dataset_name, self.chisqr])) self.tab.children = [ self.model_view.model_box, self.model_view.limits_box, self._options_box, ] vbox_widgets.append(self.tab) vbox_widgets.append(self.output) box.children = tuple(vbox_widgets)
class TestFitterGauss(object): # Test CurveFitter with a noisy gaussian, weighted and unweighted, to see # if the parameters and uncertainties come out correct @pytest.fixture(autouse=True) def setup_method(self, tmpdir): self.path = os.path.dirname(os.path.abspath(__file__)) self.tmpdir = tmpdir.strpath theoretical = np.loadtxt(os.path.join(self.path, "gauss_data.txt")) xvals, yvals, evals = np.hsplit(theoretical, 3) xvals = xvals.flatten() yvals = yvals.flatten() evals = evals.flatten() # these best weighted values and uncertainties obtained with Igor self.best_weighted = [-0.00246095, 19.5299, -8.28446e-2, 1.24692] self.best_weighted_errors = [ 0.0220313708486, 1.12879436221, 0.0447659158681, 0.0412022938883, ] self.best_weighted_chisqr = 77.6040960351 self.best_unweighted = [ -0.10584111872702096, 19.240347049328989, 0.0092623066070940396, 1.501362314145845, ] self.best_unweighted_errors = [ 0.34246565477, 0.689820935208, 0.0411243173041, 0.0693429375282, ] self.best_unweighted_chisqr = 497.102084956 self.p0 = np.array([0.1, 20.0, 0.1, 0.1]) self.names = ["bkg", "A", "x0", "width"] self.bounds = [(-1, 1), (0, 30), (-5.0, 5.0), (0.001, 2)] self.params = Parameters(name="gauss_params") for p, name, bound in zip(self.p0, self.names, self.bounds): param = Parameter(p, name=name) param.range(*bound) param.vary = True self.params.append(param) self.model = Model(self.params, fitfunc=gauss) self.data = Data1D((xvals, yvals, evals)) self.objective = Objective(self.model, self.data) return 0 def test_pickle(self): # tests if a CurveFitter can be pickled/unpickled. f = CurveFitter(self.objective) pkl = pickle.dumps(f) g = pickle.loads(pkl) g._check_vars_unchanged() def test_best_weighted(self): assert_equal(len(self.objective.varying_parameters()), 4) self.objective.setp(self.p0) f = CurveFitter(self.objective, nwalkers=100) res = f.fit("least_squares", jac="3-point") output = res.x assert_almost_equal(output, self.best_weighted, 3) assert_almost_equal(self.objective.chisqr(), self.best_weighted_chisqr, 5) # compare the residuals res = (self.data.y - self.model(self.data.x)) / self.data.y_err assert_equal(self.objective.residuals(), res) # compare objective.covar to the best_weighted_errors uncertainties = [param.stderr for param in self.params] assert_allclose(uncertainties, self.best_weighted_errors, rtol=0.005) # we're also going to try the checkpointing here. checkpoint = os.path.join(self.tmpdir, "checkpoint.txt") # compare samples to best_weighted_errors np.random.seed(1) f.sample(steps=201, random_state=1, verbose=False, f=checkpoint) process_chain(self.objective, f.chain, nburn=50, nthin=10) uncertainties = [param.stderr for param in self.params] assert_allclose(uncertainties, self.best_weighted_errors, rtol=0.07) # test that the checkpoint worked check_array = np.loadtxt(checkpoint) check_array = check_array.reshape(201, f._nwalkers, f.nvary) assert_allclose(check_array, f.chain) # test loading the checkpoint chain = load_chain(checkpoint) assert_allclose(chain, f.chain) f.initialise("jitter") f.sample(steps=2, nthin=4, f=checkpoint, verbose=False) assert_equal(f.chain.shape[0], 2) # we should be able to produce 2 * 100 steps from the generator g = self.objective.pgen(ngen=20000000000) s = [i for i, a in enumerate(g)] assert_equal(np.max(s), 200 - 1) g = self.objective.pgen(ngen=200) pvec = next(g) assert_equal(pvec.size, len(self.objective.parameters.flattened())) # check that all the parameters are returned via pgen, not only those # being varied. self.params[0].vary = False f = CurveFitter(self.objective, nwalkers=100) f.initialise("jitter") f.sample(steps=2, nthin=4, f=checkpoint, verbose=False) g = self.objective.pgen(ngen=100) pvec = next(g) assert_equal(pvec.size, len(self.objective.parameters.flattened())) # the following test won't work because of emcee/gh226. # chain = load_chain(checkpoint) # assert_(chain.shape == f.chain.shape) # assert_allclose(chain, f.chain) # try reproducing best fit with parallel tempering self.params[0].vary = True f = CurveFitter(self.objective, nwalkers=100, ntemps=10) f.fit("differential_evolution", seed=1) f.sample(steps=201, random_state=1, verbose=False) process_chain(self.objective, f.chain, nburn=50, nthin=15) print(self.params[0].chain.shape, self.params[0].chain) uncertainties = [param.stderr for param in self.params] assert_allclose(uncertainties, self.best_weighted_errors, rtol=0.07) def test_best_unweighted(self): self.objective.weighted = False f = CurveFitter(self.objective, nwalkers=100) res = f.fit() output = res.x assert_almost_equal(self.objective.chisqr(), self.best_unweighted_chisqr) assert_almost_equal(output, self.best_unweighted, 5) # compare the residuals res = self.data.y - self.model(self.data.x) assert_equal(self.objective.residuals(), res) # compare objective._covar to the best_unweighted_errors uncertainties = np.array([param.stderr for param in self.params]) assert_almost_equal(uncertainties, self.best_unweighted_errors, 3) # the samples won't compare to the covariance matrix... # f.sample(nsteps=150, nburn=20, nthin=30, random_state=1) # uncertainties = [param.stderr for param in self.params] # assert_allclose(uncertainties, self.best_unweighted_errors, # rtol=0.15) def test_all_minimisers(self): """test minimisers against the Gaussian fit""" f = CurveFitter(self.objective) methods = ["differential_evolution", "L-BFGS-B", "least_squares"] if hasattr(sciopt, "shgo"): methods.append("shgo") if hasattr(sciopt, "dual_annealing"): methods.append("dual_annealing") for method in methods: self.objective.setp(self.p0) res = f.fit(method=method) assert_almost_equal(res.x, self.best_weighted, 3) # smoke test to check that we can use nlpost self.objective.setp(self.p0) logp0 = self.objective.logp() # check that probabilities are calculated correctly assert_allclose( self.objective.logpost(), self.objective.logp() + self.objective.logl(), ) assert_allclose(self.objective.nlpost(), -self.objective.logpost()) assert_allclose(self.objective.nlpost(self.p0), -self.objective.logpost(self.p0)) # if the priors are all uniform then the only difference between # logpost and logl is a constant. A minimiser should converge on the # same answer. The following tests examine that. # The test works for dual_annealing, but not for differential # evolution, not sure why that is. self.objective.setp(self.p0) res1 = f.fit(method="dual_annealing", seed=1) assert_almost_equal(res1.x, self.best_weighted, 3) nll1 = self.objective.nll() nlpost1 = self.objective.nlpost() self.objective.setp(self.p0) res2 = f.fit(method="dual_annealing", target="nlpost", seed=1) assert_almost_equal(res2.x, self.best_weighted, 3) nll2 = self.objective.nll() nlpost2 = self.objective.nlpost() assert_allclose(nlpost1, nlpost2, atol=0.001) assert_allclose(nll1, nll2, atol=0.001) # these two priors are calculated for different parameter values # (before and after the fit) they should be the same because all # the parameters have uniform priors. assert_almost_equal(self.objective.logp(), logp0) def test_pymc3_sample(self): # test sampling with pymc3 try: import pymc3 as pm from refnx.analysis import pymc3_model except (ModuleNotFoundError, ImportError, AttributeError): # can't run test if pymc3/theano not installed return with pymc3_model(self.objective): s = pm.NUTS() pm.sample( 200, tune=100, step=s, discard_tuned_samples=True, compute_convergence_checks=False, random_seed=1, )
class TestObjective(object): def setup_method(self): # Choose the "true" parameters. # Reproducible results! np.random.seed(123) self.m_true = -0.9594 self.b_true = 4.294 self.f_true = 0.534 self.m_ls = -1.1040757010910947 self.b_ls = 5.4405552502319505 # Generate some synthetic data from the model. N = 50 x = np.sort(10 * np.random.rand(N)) y_err = 0.1 + 0.5 * np.random.rand(N) y = self.m_true * x + self.b_true y += np.abs(self.f_true * y) * np.random.randn(N) y += y_err * np.random.randn(N) self.data = Data1D(data=(x, y, y_err)) self.p = Parameter(self.b_ls, 'b') | Parameter(self.m_ls, 'm') self.model = Model(self.p, fitfunc=line) self.objective = Objective(self.model, self.data) # want b and m self.p[0].vary = True self.p[1].vary = True mod = np.array([ 4.78166609, 4.42364699, 4.16404064, 3.50343504, 3.4257084, 2.93594347, 2.92035638, 2.67533842, 2.28136038, 2.19772983, 1.99295496, 1.93748334, 1.87484436, 1.65161016, 1.44613461, 1.11128101, 1.04584535, 0.86055984, 0.76913963, 0.73906649, 0.73331407, 0.68350418, 0.65216599, 0.59838566, 0.13070299, 0.10749131, -0.01010195, -0.10010155, -0.29495372, -0.42817431, -0.43122391, -0.64637715, -1.30560686, -1.32626428, -1.44835768, -1.52589881, -1.56371158, -2.12048349, -2.24899179, -2.50292682, -2.53576659, -2.55797996, -2.60870542, -2.7074727, -3.93781479, -4.12415366, -4.42313742, -4.98368609, -5.38782395, -5.44077086 ]) self.mod = mod def test_model(self): # test that the line data produced by our model is the same as the # test data assert_almost_equal(self.model(self.data.x), self.mod) def test_synthetic_data(self): # test that we create the correct synthetic data by performing a least # squares fit on it assert_(self.data.y_err is not None) x, y, y_err, _ = self.data.data A = np.vstack((np.ones_like(x), x)).T C = np.diag(y_err * y_err) cov = np.linalg.inv(np.dot(A.T, np.linalg.solve(C, A))) b_ls, m_ls = np.dot(cov, np.dot(A.T, np.linalg.solve(C, y))) assert_almost_equal(b_ls, self.b_ls) assert_almost_equal(m_ls, self.m_ls) def test_setp(self): # check that we can set parameters self.p[0].vary = False assert_(len(self.objective.varying_parameters()) == 1) self.objective.setp(np.array([1.23])) assert_equal(self.p[1].value, 1.23) self.objective.setp(np.array([1.234, 1.23])) assert_equal(np.array(self.p), [1.234, 1.23]) def test_pvals(self): assert_equal(self.objective.parameters.pvals, [self.b_ls, self.m_ls]) self.objective.parameters.pvals = [1, 2] assert_equal(self.objective.parameters.pvals, [1, 2.]) def test_lnprior(self): self.p[0].range(0, 10) assert_almost_equal(self.objective.lnprior(), np.log(0.1)) # lnprior should set parameters self.objective.lnprior([8, 2]) assert_equal(np.array(self.objective.parameters), [8, 2]) # if we supply a value outside the range it should return -inf assert_equal(self.objective.lnprior([-1, 2]), -np.inf) def test_lnprob(self): # http://dan.iel.fm/emcee/current/user/line/ assert_almost_equal(self.objective.lnprior(), 0) # the uncertainties are underestimated in this example... assert_almost_equal(self.objective.lnlike(), -559.01078135444595) assert_almost_equal(self.objective.lnprob(), -559.01078135444595) def test_chisqr(self): assert_almost_equal(self.objective.chisqr(), 1231.1096772954229) def test_residuals(self): # weighted, with and without transform assert_almost_equal(self.objective.residuals(), (self.data.y - self.mod) / self.data.y_err) objective = Objective(self.model, self.data, transform=Transform('lin')) assert_almost_equal(objective.residuals(), (self.data.y - self.mod) / self.data.y_err) # unweighted, with and without transform objective = Objective(self.model, self.data, use_weights=False) assert_almost_equal(objective.residuals(), self.data.y - self.mod) objective = Objective(self.model, self.data, use_weights=False, transform=Transform('lin')) assert_almost_equal(objective.residuals(), self.data.y - self.mod) def test_lnprob_extra(self): self.objective.lnprob_extra = lnprob_extra # repeat lnprior test self.p[0].range(0, 10) assert_almost_equal(self.objective.lnprior(), np.log(0.1) + 1) def test_objective_pickle(self): # can you pickle the objective function? pkl = pickle.dumps(self.objective) pickle.loads(pkl) # check the ForkingPickler as well. if hasattr(ForkingPickler, 'dumps'): pkl = ForkingPickler.dumps(self.objective) pickle.loads(pkl) # can you pickle with an extra function present? self.objective.lnprob_extra = lnprob_extra pkl = pickle.dumps(self.objective) pickle.loads(pkl) # check the ForkingPickler as well. if hasattr(ForkingPickler, 'dumps'): pkl = ForkingPickler.dumps(self.objective) pickle.loads(pkl) def test_transform_pickle(self): # can you pickle the Transform object? pkl = pickle.dumps(Transform('logY')) pickle.loads(pkl) def test_transform(self): pth = os.path.dirname(os.path.abspath(__file__)) fname = os.path.join(pth, 'c_PLP0011859_q.txt') data = ReflectDataset(fname) t = Transform('logY') yt, et = t(data.x, data.y, y_err=data.y_err) assert_equal(yt, np.log10(data.y)) yt, _ = t(data.x, data.y, y_err=None) assert_equal(yt, np.log10(data.y)) EPy, EPe = EP.EPlog10(data.y, data.y_err) assert_equal(yt, EPy) assert_equal(et, EPe) def test_lnsigma(self): # check that lnsigma works correctly def lnprior(theta, x, y, yerr): m, b, lnf = theta if -5.0 < m < 0.5 and 0.0 < b < 10.0 and -10.0 < lnf < 1.0: return 0.0 return -np.inf def lnlike(theta, x, y, yerr): m, b, lnf = theta model = m * x + b inv_sigma2 = 1.0 / (yerr**2 + model**2 * np.exp(2 * lnf)) print(inv_sigma2) return -0.5 * (np.sum((y - model)**2 * inv_sigma2 - np.log(inv_sigma2))) x, y, yerr, _ = self.data.data theta = [self.m_true, self.b_true, np.log(self.f_true)] bo = BaseObjective(theta, lnlike, lnprior=lnprior, fcn_args=(x, y, yerr)) lnsigma = Parameter(np.log(self.f_true), 'lnsigma', bounds=(-10, 1), vary=True) self.objective.setp(np.array([self.b_true, self.m_true])) self.objective.lnsigma = lnsigma assert_allclose(self.objective.lnlike(), bo.lnlike()) def test_base_emcee(self): # check that the base objective works against the emcee example. def lnprior(theta, x, y, yerr): m, b, lnf = theta if -5.0 < m < 0.5 and 0.0 < b < 10.0 and -10.0 < lnf < 1.0: return 0.0 return -np.inf def lnlike(theta, x, y, yerr): m, b, lnf = theta model = m * x + b inv_sigma2 = 1.0 / (yerr**2 + model**2 * np.exp(2 * lnf)) return -0.5 * (np.sum((y - model)**2 * inv_sigma2 - np.log(inv_sigma2))) x, y, yerr, _ = self.data.data theta = [self.m_true, self.b_true, np.log(self.f_true)] bo = BaseObjective(theta, lnlike, lnprior=lnprior, fcn_args=(x, y, yerr)) # test that the wrapper gives the same lnlike as the direct function assert_almost_equal(bo.lnlike(theta), lnlike(theta, x, y, yerr)) assert_almost_equal(bo.lnlike(theta), -bo.nll(theta)) assert_almost_equal(bo.nll(theta), 12.8885352412) # Find the maximum likelihood value. result = minimize(bo.nll, theta) # for repeatable sampling np.random.seed(1) ndim, nwalkers = 3, 100 pos = [ result["x"] + 1e-4 * np.random.randn(ndim) for i in range(nwalkers) ] sampler = emcee.EnsembleSampler(nwalkers, ndim, bo.lnprob) sampler.run_mcmc(pos, 800, rstate0=np.random.get_state()) burnin = 200 samples = sampler.chain[:, burnin:, :].reshape((-1, ndim)) samples[:, 2] = np.exp(samples[:, 2]) m_mc, b_mc, f_mc = map( lambda v: (v[1], v[2] - v[1], v[1] - v[0]), zip(*np.percentile(samples, [16, 50, 84], axis=0))) assert_allclose(m_mc, (-1.0071664, 0.0809444, 0.0784894), rtol=0.04) assert_allclose(b_mc, (4.5428107, 0.3549174, 0.3673304), rtol=0.04) assert_allclose(f_mc, (0.4610898, 0.0823304, 0.0640812), rtol=0.06) # # smoke test for covariance matrix bo.parameters = np.array(result['x']) covar1 = bo.covar() uncertainties = np.sqrt(np.diag(covar1)) # covariance from objective._covar should be almost equal to # the covariance matrix from sampling covar2 = np.cov(samples.T) assert_almost_equal(np.sqrt(np.diag(covar2))[:2], uncertainties[:2], 2) # check covariance of self.objective # TODO var_arr = result['x'][:] var_arr[0], var_arr[1], var_arr[2] = var_arr[2], var_arr[1], var_arr[0] # assert_(self.objective.data.weighted) # self.objective.parameters.pvals = var_arr # covar3 = self.objective.covar() # uncertainties3 = np.sqrt(np.diag(covar3)) # assert_almost_equal(uncertainties3, uncertainties) # assert(False) def test_covar(self): # checks objective.covar against optimize.least_squares covariance. path = os.path.dirname(os.path.abspath(__file__)) theoretical = np.loadtxt(os.path.join(path, 'gauss_data.txt')) xvals, yvals, evals = np.hsplit(theoretical, 3) xvals = xvals.flatten() yvals = yvals.flatten() evals = evals.flatten() p0 = np.array([0.1, 20., 0.1, 0.1]) names = ['bkg', 'A', 'x0', 'width'] bounds = [(-1, 1), (0, 30), (-5., 5.), (0.001, 2)] params = Parameters(name="gauss_params") for p, name, bound in zip(p0, names, bounds): param = Parameter(p, name=name) param.range(*bound) param.vary = True params.append(param) model = Model(params, fitfunc=gauss) data = Data1D((xvals, yvals, evals)) objective = Objective(model, data) # first calculate least_squares jac/hess/covariance matrices res = least_squares(objective.residuals, np.array(params), jac='3-point') hess_least_squares = np.matmul(res.jac.T, res.jac) covar_least_squares = np.linalg.inv(hess_least_squares) # now calculate corresponding matrices by hand, to see if the approach # concurs with least_squares objective.setp(res.x) _pvals = np.array(res.x) def residuals_scaler(vals): return np.squeeze(objective.residuals(_pvals * vals)) jac = approx_derivative(residuals_scaler, np.ones_like(_pvals)) hess = np.matmul(jac.T, jac) covar = np.linalg.inv(hess) covar = covar * np.atleast_2d(_pvals) * np.atleast_2d(_pvals).T assert_allclose(covar, covar_least_squares) # check that objective.covar corresponds to the least_squares # covariance matrix objective.setp(res.x) _pvals = np.array(res.x) covar_objective = objective.covar() assert_allclose(covar_objective, covar_least_squares) # now see what happens with a parameter that has no effect on residuals param = Parameter(1.234, name='dummy') param.vary = True params.append(param) from pytest import raises with raises(LinAlgError): objective.covar()
class TestFitterGauss(object): # Test CurveFitter with a noisy gaussian, weighted and unweighted, to see # if the parameters and uncertainties come out correct @pytest.fixture(autouse=True) def setup_method(self, tmpdir): self.path = os.path.dirname(os.path.abspath(__file__)) self.tmpdir = tmpdir.strpath theoretical = np.loadtxt(os.path.join(self.path, 'gauss_data.txt')) xvals, yvals, evals = np.hsplit(theoretical, 3) xvals = xvals.flatten() yvals = yvals.flatten() evals = evals.flatten() # these best weighted values and uncertainties obtained with Igor self.best_weighted = [-0.00246095, 19.5299, -8.28446e-2, 1.24692] self.best_weighted_errors = [0.0220313708486, 1.12879436221, 0.0447659158681, 0.0412022938883] self.best_weighted_chisqr = 77.6040960351 self.best_unweighted = [-0.10584111872702096, 19.240347049328989, 0.0092623066070940396, 1.501362314145845] self.best_unweighted_errors = [0.34246565477, 0.689820935208, 0.0411243173041, 0.0693429375282] self.best_unweighted_chisqr = 497.102084956 self.p0 = np.array([0.1, 20., 0.1, 0.1]) self.names = ['bkg', 'A', 'x0', 'width'] self.bounds = [(-1, 1), (0, 30), (-5., 5.), (0.001, 2)] self.params = Parameters(name="gauss_params") for p, name, bound in zip(self.p0, self.names, self.bounds): param = Parameter(p, name=name) param.range(*bound) param.vary = True self.params.append(param) self.model = Model(self.params, fitfunc=gauss) self.data = Data1D((xvals, yvals, evals)) self.objective = Objective(self.model, self.data) return 0 def test_best_weighted(self): assert_equal(len(self.objective.varying_parameters()), 4) self.objective.setp(self.p0) f = CurveFitter(self.objective, nwalkers=100) res = f.fit('least_squares', jac='3-point') output = res.x assert_almost_equal(output, self.best_weighted, 3) assert_almost_equal(self.objective.chisqr(), self.best_weighted_chisqr, 5) # compare the residuals res = (self.data.y - self.model(self.data.x)) / self.data.y_err assert_equal(self.objective.residuals(), res) # compare objective.covar to the best_weighted_errors uncertainties = [param.stderr for param in self.params] assert_allclose(uncertainties, self.best_weighted_errors, rtol=0.005) # we're also going to try the checkpointing here. checkpoint = os.path.join(self.tmpdir, 'checkpoint.txt') # compare samples to best_weighted_errors np.random.seed(1) f.sample(steps=101, random_state=1, verbose=False, f=checkpoint) process_chain(self.objective, f.chain, nburn=50, nthin=10) uncertainties = [param.stderr for param in self.params] assert_allclose(uncertainties, self.best_weighted_errors, rtol=0.07) # test that the checkpoint worked check_array = np.loadtxt(checkpoint) check_array = check_array.reshape(101, f._nwalkers, f.nvary) assert_allclose(check_array, f.chain) # test loading the checkpoint chain = load_chain(checkpoint) assert_allclose(chain, f.chain) f.initialise('jitter') f.sample(steps=2, nthin=4, f=checkpoint, verbose=False) assert_equal(f.chain.shape[0], 2) # the following test won't work because of emcee/gh226. # chain = load_chain(checkpoint) # assert_(chain.shape == f.chain.shape) # assert_allclose(chain, f.chain) def test_best_unweighted(self): self.objective.weighted = False f = CurveFitter(self.objective, nwalkers=100) res = f.fit() output = res.x assert_almost_equal(self.objective.chisqr(), self.best_unweighted_chisqr) assert_almost_equal(output, self.best_unweighted, 5) # compare the residuals res = self.data.y - self.model(self.data.x) assert_equal(self.objective.residuals(), res) # compare objective._covar to the best_unweighted_errors uncertainties = np.array([param.stderr for param in self.params]) assert_almost_equal(uncertainties, self.best_unweighted_errors, 3)
class TestObjective(object): def setup_method(self): # Choose the "true" parameters. # Reproducible results! np.random.seed(123) self.m_true = -0.9594 self.b_true = 4.294 self.f_true = 0.534 self.m_ls = -1.1040757010910947 self.b_ls = 5.4405552502319505 # Generate some synthetic data from the model. N = 50 x = np.sort(10 * np.random.rand(N)) y_err = 0.1 + 0.5 * np.random.rand(N) y = self.m_true * x + self.b_true y += np.abs(self.f_true * y) * np.random.randn(N) y += y_err * np.random.randn(N) self.data = Data1D(data=(x, y, y_err)) self.p = Parameter(self.b_ls, "b") | Parameter(self.m_ls, "m") self.model = Model(self.p, fitfunc=line) self.objective = Objective(self.model, self.data) # want b and m self.p[0].vary = True self.p[1].vary = True mod = np.array([ 4.78166609, 4.42364699, 4.16404064, 3.50343504, 3.4257084, 2.93594347, 2.92035638, 2.67533842, 2.28136038, 2.19772983, 1.99295496, 1.93748334, 1.87484436, 1.65161016, 1.44613461, 1.11128101, 1.04584535, 0.86055984, 0.76913963, 0.73906649, 0.73331407, 0.68350418, 0.65216599, 0.59838566, 0.13070299, 0.10749131, -0.01010195, -0.10010155, -0.29495372, -0.42817431, -0.43122391, -0.64637715, -1.30560686, -1.32626428, -1.44835768, -1.52589881, -1.56371158, -2.12048349, -2.24899179, -2.50292682, -2.53576659, -2.55797996, -2.60870542, -2.7074727, -3.93781479, -4.12415366, -4.42313742, -4.98368609, -5.38782395, -5.44077086, ]) self.mod = mod def test_model(self): # test that the line data produced by our model is the same as the # test data assert_almost_equal(self.model(self.data.x), self.mod) def test_synthetic_data(self): # test that we create the correct synthetic data by performing a least # squares fit on it assert_(self.data.y_err is not None) x, y, y_err, _ = self.data.data A = np.vstack((np.ones_like(x), x)).T C = np.diag(y_err * y_err) cov = np.linalg.inv(np.dot(A.T, np.linalg.solve(C, A))) b_ls, m_ls = np.dot(cov, np.dot(A.T, np.linalg.solve(C, y))) assert_almost_equal(b_ls, self.b_ls) assert_almost_equal(m_ls, self.m_ls) def test_setp(self): # check that we can set parameters self.p[0].vary = False assert_(len(self.objective.varying_parameters()) == 1) self.objective.setp(np.array([1.23])) assert_equal(self.p[1].value, 1.23) self.objective.setp(np.array([1.234, 1.23])) assert_equal(np.array(self.p), [1.234, 1.23]) def test_pvals(self): assert_equal(self.objective.parameters.pvals, [self.b_ls, self.m_ls]) self.objective.parameters.pvals = [1, 2] assert_equal(self.objective.parameters.pvals, [1, 2.0]) def test_logp(self): self.p[0].range(0, 10) assert_almost_equal(self.objective.logp(), np.log(0.1)) # logp should set parameters self.objective.logp([8, 2]) assert_equal(np.array(self.objective.parameters), [8, 2]) # if we supply a value outside the range it should return -inf assert_equal(self.objective.logp([-1, 2]), -np.inf) def test_logpost(self): # http://dan.iel.fm/emcee/current/user/line/ assert_almost_equal(self.objective.logp(), 0) assert_almost_equal(self.objective.nlpost(), -self.objective.logpost()) # the uncertainties are underestimated in this example... # amendment factor because dfm emcee example does not include 2pi amend = 0.5 * self.objective.npoints * np.log(2 * np.pi) assert_almost_equal(self.objective.logl() + amend, -559.01078135444595) assert_almost_equal(self.objective.logpost() + amend, -559.01078135444595) def test_prior_transform(self): self.p[0].bounds = PDF(stats.uniform(-10, 20)) self.p[1].bounds = PDF(stats.norm(loc=5, scale=10)) x = self.objective.prior_transform([0.1, 0.9]) assert_allclose( x, stats.uniform.ppf(0.1, -10, 20), stats.norm.ppf(0.9, loc=5, scale=10), ) def test_chisqr(self): assert_almost_equal(self.objective.chisqr(), 1231.1096772954229) def test_residuals(self): # weighted, with and without transform assert_almost_equal( self.objective.residuals(), (self.data.y - self.mod) / self.data.y_err, ) objective = Objective(self.model, self.data, transform=Transform("lin")) assert_almost_equal(objective.residuals(), (self.data.y - self.mod) / self.data.y_err) # unweighted, with and without transform objective = Objective(self.model, self.data, use_weights=False) assert_almost_equal(objective.residuals(), self.data.y - self.mod) objective = Objective( self.model, self.data, use_weights=False, transform=Transform("lin"), ) assert_almost_equal(objective.residuals(), self.data.y - self.mod) def test_masked_dataset(self): residuals = self.objective.residuals() mask = np.full_like(self.objective.data.y, True, bool) mask[1] = False self.objective.data.mask = mask assert_equal(self.objective.residuals().size, residuals.size - 1) def test_logp_extra(self): original_logl = self.objective.logl() self.objective.logp_extra = logp_extra assert_almost_equal(self.objective.logl(), original_logl + 1) def test_objective_pickle(self): # can you pickle the objective function? pkl = pickle.dumps(self.objective) pickle.loads(pkl) # check the ForkingPickler as well. if hasattr(ForkingPickler, "dumps"): pkl = ForkingPickler.dumps(self.objective) pickle.loads(pkl) # can you pickle with an extra function present? self.objective.logp_extra = logp_extra pkl = pickle.dumps(self.objective) pickle.loads(pkl) # check the ForkingPickler as well. if hasattr(ForkingPickler, "dumps"): pkl = ForkingPickler.dumps(self.objective) pickle.loads(pkl) def test_transform_pickle(self): # can you pickle the Transform object? pkl = pickle.dumps(Transform("logY")) pickle.loads(pkl) def test_transform(self): pth = os.path.dirname(os.path.abspath(__file__)) fname = os.path.join(pth, "c_PLP0011859_q.txt") data = ReflectDataset(fname) t = Transform("logY") yt, et = t(data.x, data.y, y_err=data.y_err) assert_equal(yt, np.log10(data.y)) yt, _ = t(data.x, data.y, y_err=None) assert_equal(yt, np.log10(data.y)) EPy, EPe = EP.EPlog10(data.y, data.y_err) assert_equal(yt, EPy) assert_equal(et, EPe) def test_repr_transform(self): p = Transform(None) q = eval(repr(p)) assert p.form == q.form p = Transform("logY") q = eval(repr(p)) assert p.form == q.form def test_lnsigma(self): # check that lnsigma works correctly, by using the emcee line fit # example def logp(theta, x, y, yerr): m, b, lnf = theta if -5.0 < m < 0.5 and 0.0 < b < 10.0 and -10.0 < lnf < 1.0: return 0.0 return -np.inf def logl(theta, x, y, yerr): m, b, lnf = theta model = m * x + b inv_sigma2 = 1.0 / (yerr**2 + model**2 * np.exp(2 * lnf)) print(inv_sigma2) return -0.5 * (np.sum((y - model)**2 * inv_sigma2 - np.log(inv_sigma2))) x, y, yerr, _ = self.data.data theta = [self.m_true, self.b_true, np.log(self.f_true)] bo = BaseObjective(theta, logl, logp=logp, fcn_args=(x, y, yerr)) lnsigma = Parameter(np.log(self.f_true), "lnsigma", bounds=(-10, 1), vary=True) self.objective.setp(np.array([self.b_true, self.m_true])) self.objective.lnsigma = lnsigma # amendment factor because dfm emcee example does not include 2pi amend = 0.5 * self.objective.npoints * np.log(2 * np.pi) assert_allclose(self.objective.logl() + amend, bo.logl()) def test_base_emcee(self): # check that the base objective works against the emcee example. def logp(theta, x, y, yerr): m, b, lnf = theta if -5.0 < m < 0.5 and 0.0 < b < 10.0 and -10.0 < lnf < 1.0: return 0.0 return -np.inf def logl(theta, x, y, yerr): m, b, lnf = theta model = m * x + b inv_sigma2 = 1.0 / (yerr**2 + model**2 * np.exp(2 * lnf)) return -0.5 * (np.sum((y - model)**2 * inv_sigma2 - np.log(inv_sigma2))) x, y, yerr, _ = self.data.data theta = [self.m_true, self.b_true, np.log(self.f_true)] bo = BaseObjective(theta, logl, logp=logp, fcn_args=(x, y, yerr)) # test that the wrapper gives the same logl as the direct function assert_almost_equal(bo.logl(theta), logl(theta, x, y, yerr)) assert_almost_equal(bo.logl(theta), -bo.nll(theta)) assert_almost_equal(bo.nll(theta), 12.8885352412) # Find the maximum likelihood value. result = minimize(bo.nll, theta) # for repeatable sampling np.random.seed(1) ndim, nwalkers = 3, 100 pos = [ result["x"] + 1e-4 * np.random.randn(ndim) for i in range(nwalkers) ] sampler = emcee.EnsembleSampler(nwalkers, ndim, bo.logpost) state = emcee.State(pos, random_state=np.random.get_state()) sampler.run_mcmc(state, 800) burnin = 200 samples = sampler.get_chain()[burnin:, :, :].reshape((-1, ndim)) samples[:, 2] = np.exp(samples[:, 2]) m_mc, b_mc, f_mc = map( lambda v: (v[1], v[2] - v[1], v[1] - v[0]), zip(*np.percentile(samples, [16, 50, 84], axis=0)), ) assert_allclose(m_mc, (-1.0071664, 0.0809444, 0.0784894), rtol=0.04) assert_allclose(b_mc, (4.5428107, 0.3549174, 0.3673304), rtol=0.04) assert_allclose(f_mc, (0.4610898, 0.0823304, 0.0640812), rtol=0.06) # # smoke test for covariance matrix bo.parameters = np.array(result["x"]) covar1 = bo.covar() uncertainties = np.sqrt(np.diag(covar1)) # covariance from objective._covar should be almost equal to # the covariance matrix from sampling covar2 = np.cov(samples.T) assert_almost_equal(np.sqrt(np.diag(covar2))[:2], uncertainties[:2], 2) # check covariance of self.objective # TODO var_arr = result["x"][:] var_arr[0], var_arr[1], var_arr[2] = var_arr[2], var_arr[1], var_arr[0] # assert_(self.objective.data.weighted) # self.objective.parameters.pvals = var_arr # covar3 = self.objective.covar() # uncertainties3 = np.sqrt(np.diag(covar3)) # assert_almost_equal(uncertainties3, uncertainties) # assert(False) def test_covar(self): # checks objective.covar against optimize.least_squares covariance. path = os.path.dirname(os.path.abspath(__file__)) theoretical = np.loadtxt(os.path.join(path, "gauss_data.txt")) xvals, yvals, evals = np.hsplit(theoretical, 3) xvals = xvals.flatten() yvals = yvals.flatten() evals = evals.flatten() p0 = np.array([0.1, 20.0, 0.1, 0.1]) names = ["bkg", "A", "x0", "width"] bounds = [(-1, 1), (0, 30), (-5.0, 5.0), (0.001, 2)] params = Parameters(name="gauss_params") for p, name, bound in zip(p0, names, bounds): param = Parameter(p, name=name) param.range(*bound) param.vary = True params.append(param) model = Model(params, fitfunc=gauss) data = Data1D((xvals, yvals, evals)) objective = Objective(model, data) # first calculate least_squares jac/hess/covariance matrices res = least_squares(objective.residuals, np.array(params), jac="3-point") hess_least_squares = np.matmul(res.jac.T, res.jac) covar_least_squares = np.linalg.inv(hess_least_squares) # now calculate corresponding matrices by hand, to see if the approach # concurs with least_squares objective.setp(res.x) _pvals = np.array(res.x) def residuals_scaler(vals): return np.squeeze(objective.residuals(_pvals * vals)) jac = approx_derivative(residuals_scaler, np.ones_like(_pvals)) hess = np.matmul(jac.T, jac) covar = np.linalg.inv(hess) covar = covar * np.atleast_2d(_pvals) * np.atleast_2d(_pvals).T assert_allclose(covar, covar_least_squares) # check that objective.covar corresponds to the least_squares # covariance matrix, J.T x J objective.setp(res.x) covar_objective = objective.covar() assert_allclose(covar_objective, covar_least_squares) # sometimes the residuals method may not be usable, see if # objective.covar calculated from a scalar works objective.setp(res.x) covar_objective = objective.covar("nll") assert_allclose( np.sqrt(np.diag(covar_objective)), np.sqrt(np.diag(covar_least_squares)), rtol=0.08, ) # now see what happens with a parameter that has no effect on residuals param = Parameter(1.234, name="dummy") param.vary = True params.append(param) from pytest import raises with raises(LinAlgError): objective.covar() @pytest.mark.xfail def test_pymc3(self): # test objective logl against pymc3 # don't run this test if pymc3 is not installed try: import pymc3 as pm except ImportError: return logl = self.objective.logl() from refnx.analysis import pymc3_model from refnx.analysis.objective import _to_pymc3_distribution mod = pymc3_model(self.objective) with mod: pymc_logl = mod.logp({ "p0": self.p[0].value, "p1": self.p[1].value }) assert_allclose(logl, pymc_logl) # now check some of the distributions with pm.Model(): p = Parameter(1, bounds=(1, 10)) d = _to_pymc3_distribution("a", p) assert_almost_equal(d.distribution.logp(2).eval(), p.logp(2)) assert_(np.isneginf(d.distribution.logp(-1).eval())) q = Parameter(1, bounds=PDF(stats.uniform(1, 9))) d = _to_pymc3_distribution("b", q) assert_almost_equal(d.distribution.logp(2).eval(), q.logp(2)) assert_(np.isneginf(d.distribution.logp(-1).eval())) p = Parameter(1, bounds=PDF(stats.uniform)) d = _to_pymc3_distribution("c", p) assert_almost_equal(d.distribution.logp(0.5).eval(), p.logp(0.5)) p = Parameter(1, bounds=PDF(stats.norm)) d = _to_pymc3_distribution("d", p) assert_almost_equal(d.distribution.logp(2).eval(), p.logp(2)) p = Parameter(1, bounds=PDF(stats.norm(1, 10))) d = _to_pymc3_distribution("e", p) assert_almost_equal(d.distribution.logp(2).eval(), p.logp(2))
class Motofit(object): """ An interactive slab modeller (Jupyter/ipywidgets based) for Neutron and X-ray reflectometry data. The interactive modeller is designed to be used in a Jupyter notebook. Usage ----- >>> # specify that plots are in a separate graph window >>> %matplotlib qt >>> # alternately if you want the graph to be embedded in the notebook use >>> # %matplotlib notebook >>> from refnx.reflect import Motofit >>> # create an instance of the modeller >>> app = Motofit() >>> # display it in the notebook by calling the object with a datafile. >>> app('dataset1.txt') >>> # lets fit a different dataset >>> app2 = Motofit() >>> app2('dataset2.txt') The `Motofit` instance has several useful attributes that can be used in other cells. For example, one can access the `objective` and `curvefitter` attributes for more advanced fitting functionality than is available in the GUI. A `code` attribute can be used to retrieve a Python code fragment that can be used as a basis for developing more complicated models, such as interparameter constraints, global fitting, etc. Attributes ---------- dataset: refnx.reflect.Data1D The dataset associated with the modeller model: refnx.reflect.ReflectModel Calculates a theoretical model, from an interfacial structure (`model.Structure`). objective: refnx.analysis.Objective The Objective that allows one to compare the model against the data. curvefitter: refnx.analysis.CurveFitter Object for fitting the data based on the objective. fig: matplotlib.Figure Graph displaying the data. code: str A Python code fragment capable of fitting the data. Methods ------- __call__ - display the GUI in a Jupyter cell save_model - save the current model to a pickle file load_model - load a pickle file and set it as the current file set_model - use an existing `refnx.reflect.ReflectModel` to set the GUI model load_data - load a dataset do_fit - do a fit redraw - Update the notebook cell containing the GUI """ def __init__(self): # attributes for the graph # for the graph self.qmin = 0.005 self.qmax = 0.5 self.qpnt = 1000 self.fig = None self.ax_data = None self.ax_residual = None self.ax_sld = None # gridspecs specify how the plots are laid out. Gridspec1 is when the # residuals plot is displayed. Gridspec2 is when it's not visible self._gridspec1 = gridspec.GridSpec(2, 2, height_ratios=[5, 1], width_ratios=[1, 1], hspace=0.01) self._gridspec2 = gridspec.GridSpec(1, 2) self.theoretical_plot = None self.theoretical_plot_sld = None # attributes for a user dataset self.dataset = None self.objective = None self._curvefitter = None self.data_plot = None self.residuals_plot = None self.data_plot_sld = None self.dataset_name = widgets.Text(description='dataset:') self.dataset_name.disabled = True self.chisqr = widgets.FloatText(description='chi-squared:') self.chisqr.disabled = True # fronting slab0 = Slab(0, 0, 0) slab1 = Slab(25, 3.47, 3) slab2 = Slab(0, 2.07, 3) structure = slab0 | slab1 | slab2 rename_params(structure) self.model = ReflectModel(structure) structure = slab0 | slab1 | slab2 self.model = ReflectModel(structure) # give some default parameter limits self.model.scale.bounds = (0.1, 2) self.model.bkg.bounds = (1e-8, 2e-5) self.model.dq.bounds = (0, 20) for slab in self.model.structure: slab.thick.bounds = (0, 2 * slab.thick.value) slab.sld.real.bounds = (0, 2 * slab.sld.real.value) slab.sld.imag.bounds = (0, 2 * slab.sld.imag.value) slab.rough.bounds = (0, 2 * slab.rough.value) # the main GUI widget self.display_box = widgets.VBox() self.tab = widgets.Tab() self.tab.set_title(0, 'Model') self.tab.set_title(1, 'Limits') self.tab.set_title(2, 'Options') self.tab.observe(self._on_tab_changed, names='selected_index') # an output area for messages. self.output = widgets.Output() # options tab self.plot_type = widgets.Dropdown( options=['lin', 'logY', 'YX4', 'YX2'], value='lin', description='Plot Type:', disabled=False) self.plot_type.observe(self._on_plot_type_changed, names='value') self.use_weights = widgets.RadioButtons( options=['Yes', 'No'], value='Yes', description='use dataset weights?', style={'description_width': 'initial'}) self.use_weights.observe(self._on_use_weights_changed, names='value') self.transform = Transform('lin') self.display_residuals = widgets.Checkbox( value=False, description='Display residuals') self.display_residuals.observe(self._on_display_residuals_changed, names='value') self.model_view = None self.set_model(self.model) def save_model(self, f=None): """ Serialise a model to a pickle file. Parameters ---------- f: file like or str File to save model to. """ if f is None: f = 'model_' + datetime.datetime.now().isoformat() + '.pkl' if self.dataset is not None: f = 'model_' + self.dataset.name + '.pkl' with possibly_open_file(f) as g: pickle.dump(self.model, g) def load_model(self, f): """ Load a serialised model. Parameters ---------- f: file like or str pickle file to load model from. """ with possibly_open_file(f) as g: reflect_model = pickle.load(g) self.set_model(reflect_model) self._print(repr(self.objective)) def set_model(self, model): """ Change the `refnx.reflect.ReflectModel` associated with the `Motofit` instance. Parameters ---------- model: refnx.reflect.ReflectModel """ if self.model_view is not None: self.model_view.unobserve_all() # figure out if the reflect_model is a different instance. If it is # then the objective has to be updated. if model is not self.model: self.model = model self._update_analysis_objects() self.model = model self.model_view = ReflectModelView(self.model) self.model_view.observe(self.update_model, names=['view_changed']) self.model_view.observe(self.redraw, names=['view_redraw']) # observe when the number of varying parameters changed. This # invalidates a curvefitter, and a new one has to be produced. self.model_view.observe(self._on_num_varying_changed, names=['num_varying']) self.model_view.do_fit_button.on_click(self.do_fit) self.model_view.to_code_button.on_click(self._to_code) self.redraw(None) def update_model(self, change): """ Updates the plots when the parameters change Parameters ---------- change """ if not self.fig: return q = np.linspace(self.qmin, self.qmax, self.qpnt) theoretical = self.model.model(q) yt, _ = self.transform(q, theoretical) sld_profile = self.model.structure.sld_profile() z, sld = sld_profile if self.theoretical_plot is not None: self.theoretical_plot.set_xdata(q) self.theoretical_plot.set_ydata(yt) self.theoretical_plot_sld.set_xdata(z) self.theoretical_plot_sld.set_ydata(sld) self.ax_sld.relim() self.ax_sld.autoscale_view() if self.dataset is not None: # if there's a dataset loaded then residuals_plot # should exist residuals = self.objective.residuals() self.chisqr.value = np.sum(residuals**2) self.residuals_plot.set_xdata(self.dataset.x) self.residuals_plot.set_ydata(residuals) self.ax_residual.relim() self.ax_residual.autoscale_view() self.fig.canvas.draw() def _on_num_varying_changed(self, change): # observe when the number of varying parameters changed. This # invalidates a curvefitter, and a new one has to be produced. if change['new'] != change['old']: self._curvefitter = None def _update_analysis_objects(self): use_weights = self.use_weights.value == 'Yes' self.objective = Objective(self.model, self.dataset, transform=self.transform, use_weights=use_weights) self._curvefitter = None def __call__(self, data=None, model=None): """ Display the `Motofit` GUI in a Jupyter notebook cell. Parameters ---------- data: refnx.dataset.Data1D The dataset to associate with the `Motofit` instance. model: refnx.reflect.ReflectModel or str or file-like A model to associate with the data. If `model` is a `str` or `file`-like then the `load_model` method will be used to try and load the model from file. This assumes that the file is a pickle of a `ReflectModel` """ # the theoretical model # display the main graph self.fig = plt.figure(figsize=(9, 4)) # grid specs depending on whether the residuals are displayed if self.display_residuals.value: d_gs = self._gridspec1[0, 0] sld_gs = self._gridspec1[:, 1] else: d_gs = self._gridspec2[0, 0] sld_gs = self._gridspec2[0, 1] self.ax_data = self.fig.add_subplot(d_gs) self.ax_data.set_xlabel('$Q/\AA^{-1}$') self.ax_data.set_ylabel('Reflectivity') self.ax_data.grid(True, color='b', linestyle='--', linewidth=0.1) self.ax_sld = self.fig.add_subplot(sld_gs) self.ax_sld.set_ylabel('$\\rho/10^{-6}\AA^{-2}$') self.ax_sld.set_xlabel('$z/\AA$') self.ax_residual = self.fig.add_subplot(self._gridspec1[1, 0], sharex=self.ax_data) self.ax_residual.set_xlabel('$Q/\AA^{-1}$') self.ax_residual.grid(True, color='b', linestyle='--', linewidth=0.1) self.ax_residual.set_visible(self.display_residuals.value) with warnings.catch_warnings(): warnings.simplefilter("ignore") self.fig.tight_layout() q = np.linspace(self.qmin, self.qmax, self.qpnt) theoretical = self.model.model(q) yt, _ = self.transform(q, theoretical) self.theoretical_plot = self.ax_data.plot(q, yt, zorder=2)[0] self.ax_data.set_yscale('log') z, sld = self.model.structure.sld_profile() self.theoretical_plot_sld = self.ax_sld.plot(z, sld)[0] # the figure has been reset, so remove ref to the data_plot, # residual_plot self.data_plot = None self.residuals_plot = None self.dataset = None if data is not None: self.load_data(data) if isinstance(model, ReflectModel): self.set_model(model) return self.display_box elif model is not None: self.load_model(model) return self.display_box self.redraw(None) return self.display_box def load_data(self, data): """ Load a dataset into the `Motofit` instance. Parameters ---------- data: refnx.dataset.Data1D, or str, or file-like """ if isinstance(data, ReflectDataset): self.dataset = data else: self.dataset = ReflectDataset(data) self.dataset_name.value = self.dataset.name # loading a dataset changes the objective and curvefitter self._update_analysis_objects() self.qmin = np.min(self.dataset.x) self.qmax = np.max(self.dataset.x) if self.fig is not None: yt, et = self.transform(self.dataset.x, self.dataset.y) if self.data_plot is None: self.data_plot, = self.ax_data.plot(self.dataset.x, yt, label=self.dataset.name, ms=2, marker='o', ls='', zorder=1) self.data_plot.set_label(self.dataset.name) self.ax_data.legend() # no need to calculate residuals here, that'll be updated in # the redraw method self.residuals_plot, = self.ax_residual.plot(self.dataset.x) else: self.data_plot.set_xdata(self.dataset.x) self.data_plot.set_ydata(yt) # calculate theoretical model over same range as data # use redraw over update_model because it ensures chi2 widget gets # displayed self.redraw(None) self.ax_data.relim() self.ax_data.autoscale_view() self.ax_residual.relim() self.ax_residual.autoscale_view() self.fig.canvas.draw() def redraw(self, change): """ Redraw the Jupyter GUI associated with the `Motofit` instance. """ self._update_display_box(self.display_box) self.update_model(None) @property def curvefitter(self): if self.objective is not None and self._curvefitter is None: self._curvefitter = CurveFitter(self.objective) return self._curvefitter def _print(self, string): """ Print to the output widget """ with self.output: clear_output() print(string) def do_fit(self, change=None): """ Ask the Motofit object to perform a fit (differential evolution). Parameters ---------- change Notes ----- After performing the fit the Jupyter display is updated. """ if self.dataset is None: return if not self.model.parameters.varying_parameters(): self._print("No parameters are being varied") return try: lnprior = self.objective.lnprior() if not np.isfinite(lnprior): self._print("One of your parameter values lies outside its" " bounds. Please adjust the value, or the bounds.") return except ZeroDivisionError: self._print("One parameter has equal lower and upper bounds." " Either alter the bounds, or don't let that" " parameter vary.") return def callback(xk, convergence): self.chisqr.value = self.objective.chisqr(xk) self.curvefitter.fit('differential_evolution', callback=callback) # need to update the widgets as the model will be updated. # this also redraws GUI. # self.model_view.refresh() self.set_model(self.model) self._print(repr(self.objective)) def _to_code(self, change=None): self._print(self.code) @property def code(self): """ Executable Python code fragment for the GUI model. """ if self.objective is None: self._update_analysis_objects() return to_code(self.objective) def _on_tab_changed(self, change): pass def _on_plot_type_changed(self, change): """ User would like to plot and fit as logR/linR/RQ4/RQ2, etc """ self.transform = Transform(change['new']) if self.objective is not None: self.objective.transform = self.transform if self.dataset is not None: yt, _ = self.transform(self.dataset.x, self.dataset.y) self.data_plot.set_xdata(self.dataset.x) self.data_plot.set_ydata(yt) self.update_model(None) # probably have to change LHS axis of the data plot when # going between different plot types. if change['new'] == 'logY': self.ax_data.set_yscale('linear') else: self.ax_data.set_yscale('log') self.ax_data.relim() self.ax_data.autoscale_view() self.fig.canvas.draw() def _on_use_weights_changed(self, change): self._update_analysis_objects() self.update_model(None) def _on_display_residuals_changed(self, change): if change['new']: self.ax_residual.set_visible(True) self.ax_data.set_position(self._gridspec1[0, 0].get_position( self.fig)) self.ax_sld.set_position(self._gridspec1[:, 1].get_position(self.fig)) plt.setp(self.ax_data.get_xticklabels(), visible=False) else: self.ax_residual.set_visible(False) self.ax_data.set_position(self._gridspec2[:, 0].get_position( self.fig)) self.ax_sld.set_position(self._gridspec2[:, 1].get_position(self.fig)) plt.setp(self.ax_data.get_xticklabels(), visible=True) @property def _options_box(self): return widgets.VBox( [self.plot_type, self.use_weights, self.display_residuals]) def _update_display_box(self, box): """ Redraw the Jupyter GUI associated with the `Motofit` instance """ vbox_widgets = [] if self.dataset is not None: vbox_widgets.append(widgets.HBox([self.dataset_name, self.chisqr])) self.tab.children = [ self.model_view.model_box, self.model_view.limits_box, self._options_box ] vbox_widgets.append(self.tab) vbox_widgets.append(self.output) box.children = tuple(vbox_widgets)