def run_replicates(self, replicates=None): if replicates is None: replicates = self.replicates else: from .replicates import Replicates replicates = Replicates(estimator=self, replicates=replicates) summary, results = replicates.run() logging.debug("Uncertainty estimation from replicates") return summary, results
def fit(self, point_estimate=True, replicates=False, bootstrap=False, convergence_test=False, **kwargs): """Run fitting, configuration are from the object Args: point_estimate (bool): if do point estimation, default True replicates (bool): if use replicate for uncertainty estimation, default False bootstrap (bool): if do bootstrap, default False convergence_test (bool): if do convergence test, default False """ save_to = kwargs.pop('save_to', self.save_to) overwrite = kwargs.pop('overwrite', self.overwrite) from pathlib import Path if save_to and (overwrite is False) and Path(save_to).exists(): # result stream to hard drive, check if the result exists from json import JSONDecodeError try: # don't do fitting if can saved result is readable logging.debug(f'Try to recover info from {save_to}...') self.results = FitResults.from_json(json_path=save_to, estimator=self) logging.debug('Found, skip fitting...') return None except JSONDecodeError: # still do the fitting logging.debug('Can not parse JSON file, continue fitting...') logging.debug('Perform fitting...') rnd_seed = kwargs.pop('rnd_seed', self.config.rnd_seed) if rnd_seed: np.random.seed(rnd_seed) if point_estimate: results = self.point_estimate(**kwargs) self.results.point_estimation.params = results['params'] if results['metrics'] is not None: self.results.point_estimation.params = self.results.point_estimation.params.append(results['metrics']) self.results.point_estimation.pcov = results['pcov'] summary = pd.Series(name=self.name) if bootstrap and (len(self.x_data) >= 2): bs_summary, self.results.uncertainty.bs_records = self.run_bootstrap(**kwargs) summary = pd.concat([summary, bs_summary]) if replicates: rep_summary, self.results.uncertainty.rep_results = self.run_replicates(**kwargs) summary = pd.concat([summary, rep_summary]) if len(summary) > 0: self.results.uncertainty.summary = summary if convergence_test: self.results.convergence.summary, self.results.convergence.records = self.convergence_test(**kwargs) if self.save_to: # stream to disk as JSON file from pathlib import Path check_dir(Path(self.save_to).parent) self.results.to_json(self.save_to)
def run_bootstrap(self, bs_record_num=None, **kwargs): if bs_record_num is None: bs_record_num = self.config.bs_record_num if self.bootstrap is None: # if not initialized, enforce a bootstrap from .bootstrap import Bootstrap self.bootstrap = Bootstrap(estimator=self, **kwargs) else: # update if new bootstrap config is assigned if 'bs_method' in kwargs.keys(): self.bootstrap.bs_method = kwargs['bs_method'] if 'bootstrap_num' in kwargs.keys(): self.bootstrap.bootstrap_num = kwargs['bootstrap_num'] if 'grouper' in kwargs.keys(): self.bootstrap.grouper = kwargs['grouper'] summary, results = self.bootstrap.run() if 0 <= bs_record_num <= results.shape[0]: results = results.sample(n=bs_record_num, replace=False, axis=0) logging.debug(f"Bootstrap using {self.bootstrap.bs_method} for " f"{self.bootstrap.bootstrap_num} and " f"save {self.bootstrap.bs_record_num} records") return summary, results
def _work_fn(worker, point_estimate, bootstrap, convergence_test, replicates): """Utility work function to parallelize workers""" worker.fit(point_estimate=point_estimate, bootstrap=bootstrap, convergence_test=convergence_test, replicates=replicates) logging.debug(f'\nFit sequence: {worker.name}') logging.debug(worker.x_data) logging.debug(worker.y_data) return worker
def point_estimate(self, **kwargs): results = self._fit(**kwargs) logging.debug(f'Point estimation for {self.__repr__()} finished') return results
def _fit(self, model=None, x_data=None, y_data=None, sigma=None, bounds="unspecified", metrics=None, init_guess=None, curve_fit_kwargs=None): from scipy.optimize import curve_fit from ..utility.func_tools import update_none from ..utility.func_tools import get_func_params model = update_none(model, self.model) parameters = get_func_params(model, required_only=True)[1:] x_data = update_none(x_data, self.x_data) y_data = update_none(y_data, self.y_data) sigma = update_none(sigma, self.config.sigma) if len(x_data) != len(sigma): sigma = None logging.debug('Sigma is ignored as it has different length as x_data') if bounds == "unspecified": bounds = self.config.bounds if bounds is None: bounds = (-np.inf, np.inf) metrics = update_none(metrics, self.config.metrics) init_guess = update_none(init_guess, self.config.init_guess) curve_fit_kwargs = update_none(curve_fit_kwargs, self.config.curve_fit_kwargs) try: if not init_guess: # by default, use a random guess form (0, 1) init_guess = [np.random.random() for _ in parameters] if curve_fit_kwargs is None: curve_fit_kwargs = {} params, pcov = curve_fit(f=model, xdata=x_data, ydata=y_data, sigma=sigma, bounds=bounds, p0=init_guess, **curve_fit_kwargs) if metrics is not None: metrics_res = pd.Series({name: fn(params) for name, fn in metrics.items()}) else: metrics_res = None except RuntimeError: logging.warning( f"RuntimeError on \n" f'\tx = {x_data}\n' f'\ty={y_data}\n' f'\tsigma={sigma}' ) params = np.full(fill_value=np.nan, shape=len(parameters)) pcov = np.full(fill_value=np.nan, shape=(len(parameters), len(parameters))) if metrics is not None: metrics_res = pd.Series({name: np.nan for name, fn in metrics.items()}) else: metrics_res = None except ValueError: logging.warning( f"ValueError on \n" f'\tx={x_data}\n' f'\ty={y_data}\n' f'\tsigma={sigma}' ) params = np.full(fill_value=np.nan, shape=len(parameters)) pcov = np.full(fill_value=np.nan, shape=(len(parameters), len(parameters))) if metrics is not None: metrics_res = pd.Series({name: np.nan for name, fn in metrics.items()}) else: metrics_res = None except: logging.warning( f"Other error observed on\n" f'\tx={x_data}\n' f'\ty={y_data}\n' f'\tsigma={sigma}' ) params = np.full(fill_value=np.nan, shape=len(parameters)) pcov = np.full(fill_value=np.nan, shape=(len(parameters), len(parameters))) if metrics is not None: metrics_res = pd.Series({name: np.nan for name, fn in metrics.items()}) else: metrics_res = None return { 'params': pd.Series(data=params, index=parameters), 'pcov': pd.DataFrame(data=pcov, index=parameters, columns=parameters), 'metrics': metrics_res }
def __init__(self, x_data, y_data, model, name=None, x_label=None, y_label=None, sigma=None, bounds=None, init_guess=None, opt_method='trf', exclude_zero=False, metrics=None, rnd_seed=None, curve_fit_kwargs=None, replicates=None, bootstrap_num=0, bs_record_num=0, bs_method='pct_res', bs_stats=None, grouper=None, record_full=False, conv_reps=0, conv_init_range=None, conv_stats=None, save_to=None, overwrite=False, verbose=1): from ..utility.func_tools import AttrScope, get_func_params from .bootstrap import Bootstrap from .convergence import ConvergenceTester from .replicates import Replicates super().__init__() if verbose == 0: logging.set_level('warning') elif verbose == 1: logging.set_level('info') elif verbose == 2: logging.set_level('debug') else: logging.error("verbose should be 0, 1, or 2", error_type=ValueError) if len(x_data) != len(y_data): logging.error('Shapes of x and y do not match', error_type=ValueError) self.model = model self.parameters = get_func_params(model, required_only=True)[1:] self.name = name self.config = AttrScope( x_label=x_label, y_label=y_label, opt_method=opt_method, exclude_zero=exclude_zero, init_guess=init_guess, rnd_seed=rnd_seed, curve_fit_kwargs={} if curve_fit_kwargs is None else curve_fit_kwargs ) if isinstance(x_data, list): x_data = np.array(x_data) if isinstance(y_data, list): y_data = np.array(y_data) if exclude_zero is True: mask = y_data != 0 else: mask = np.repeat(True, x_data.shape[0]) self.x_data = x_data[mask] self.y_data = y_data[mask] if sigma is None: self.config.sigma = np.ones(len(self.y_data)) elif isinstance(sigma, list): self.config.sigma = np.array(sigma)[mask] else: self.config.sigma = sigma[mask] if bounds is None: self.config.bounds = (-np.inf, np.inf) else: self.config.bounds = bounds if replicates is not None: self.replicates = Replicates(estimator=self, replicates=replicates) self.config.add(replicates=replicates) if bootstrap_num > 0 and len(self.x_data) > 1: if bs_record_num is None: bs_record_num = 0 self.bootstrap = Bootstrap(estimator=self, bootstrap_num=bootstrap_num, bs_record_num=bs_record_num, bs_method=bs_method, bs_stats=bs_stats, grouper=grouper, record_full=record_full) else: self.bootstrap = None self.config.add( bootstrap_num=bootstrap_num, bs_record_num=bs_record_num, bs_method=bs_method, bs_stats=bs_stats, record_full=record_full, grouper=grouper ) if conv_reps > 0: self.converge_tester = ConvergenceTester(conv_reps=conv_reps, estimator=self, conv_init_range=conv_init_range, conv_stats=conv_stats) else: self.converge_tester = None self.config.add( conv_reps=conv_reps, conv_init_range=conv_init_range, conv_stats=conv_stats ) self.config.metrics = metrics self.results = FitResults(estimator=self) self.save_to = save_to self.overwrite = overwrite logging.debug(f"{self.__repr__()} initiated")