Ejemplo n.º 1
0
    def run_replicates(self, replicates=None):
        if replicates is None:
            replicates = self.replicates
        else:
            from .replicates import Replicates
            replicates = Replicates(estimator=self, replicates=replicates)

        summary, results = replicates.run()
        logging.debug("Uncertainty estimation from replicates")
        return summary, results
Ejemplo n.º 2
0
    def fit(self, point_estimate=True, replicates=False, bootstrap=False, convergence_test=False, **kwargs):
        """Run fitting, configuration are from the object
        Args:
            point_estimate (bool): if do point estimation, default True
            replicates (bool): if use replicate for uncertainty estimation, default False
            bootstrap (bool): if do bootstrap, default False
            convergence_test (bool): if do convergence test, default False
        """

        save_to = kwargs.pop('save_to', self.save_to)
        overwrite = kwargs.pop('overwrite', self.overwrite)
        from pathlib import Path
        if save_to and (overwrite is False) and Path(save_to).exists():
            # result stream to hard drive, check if the result exists
            from json import JSONDecodeError
            try:
                # don't do fitting if can saved result is readable
                logging.debug(f'Try to recover info from {save_to}...')
                self.results = FitResults.from_json(json_path=save_to, estimator=self)
                logging.debug('Found, skip fitting...')
                return None
            except JSONDecodeError:
                # still do the fitting
                logging.debug('Can not parse JSON file, continue fitting...')
        logging.debug('Perform fitting...')
        rnd_seed = kwargs.pop('rnd_seed', self.config.rnd_seed)
        if rnd_seed:
            np.random.seed(rnd_seed)

        if point_estimate:
            results = self.point_estimate(**kwargs)
            self.results.point_estimation.params = results['params']
            if results['metrics'] is not None:
                self.results.point_estimation.params = self.results.point_estimation.params.append(results['metrics'])
            self.results.point_estimation.pcov = results['pcov']

        summary = pd.Series(name=self.name)

        if bootstrap and (len(self.x_data) >= 2):
            bs_summary, self.results.uncertainty.bs_records = self.run_bootstrap(**kwargs)
            summary = pd.concat([summary, bs_summary])

        if replicates:
            rep_summary, self.results.uncertainty.rep_results = self.run_replicates(**kwargs)
            summary = pd.concat([summary, rep_summary])

        if len(summary) > 0:
            self.results.uncertainty.summary = summary

        if convergence_test:
            self.results.convergence.summary, self.results.convergence.records = self.convergence_test(**kwargs)

        if self.save_to:
            # stream to disk as JSON file
            from pathlib import Path
            check_dir(Path(self.save_to).parent)
            self.results.to_json(self.save_to)
Ejemplo n.º 3
0
    def run_bootstrap(self, bs_record_num=None, **kwargs):
        if bs_record_num is None:
            bs_record_num = self.config.bs_record_num
        if self.bootstrap is None:
            # if not initialized, enforce a bootstrap
            from .bootstrap import Bootstrap
            self.bootstrap = Bootstrap(estimator=self, **kwargs)
        else:
            # update if new bootstrap config is assigned
            if 'bs_method' in kwargs.keys():
                self.bootstrap.bs_method = kwargs['bs_method']
            if 'bootstrap_num' in kwargs.keys():
                self.bootstrap.bootstrap_num = kwargs['bootstrap_num']
            if 'grouper' in kwargs.keys():
                self.bootstrap.grouper = kwargs['grouper']

        summary, results = self.bootstrap.run()
        if 0 <= bs_record_num <= results.shape[0]:
            results = results.sample(n=bs_record_num, replace=False, axis=0)

        logging.debug(f"Bootstrap using {self.bootstrap.bs_method} for "
                      f"{self.bootstrap.bootstrap_num} and "
                      f"save {self.bootstrap.bs_record_num} records")
        return summary, results
Ejemplo n.º 4
0
def _work_fn(worker, point_estimate, bootstrap, convergence_test, replicates):
    """Utility work function to parallelize workers"""
    worker.fit(point_estimate=point_estimate,
               bootstrap=bootstrap,
               convergence_test=convergence_test,
               replicates=replicates)
    logging.debug(f'\nFit sequence: {worker.name}')
    logging.debug(worker.x_data)
    logging.debug(worker.y_data)
    return worker
Ejemplo n.º 5
0
 def point_estimate(self, **kwargs):
     results = self._fit(**kwargs)
     logging.debug(f'Point estimation for {self.__repr__()} finished')
     return results
Ejemplo n.º 6
0
    def _fit(self, model=None, x_data=None, y_data=None, sigma=None, bounds="unspecified",
             metrics=None, init_guess=None, curve_fit_kwargs=None):

        from scipy.optimize import curve_fit
        from ..utility.func_tools import update_none
        from ..utility.func_tools import get_func_params

        model = update_none(model, self.model)
        parameters = get_func_params(model, required_only=True)[1:]
        x_data = update_none(x_data, self.x_data)
        y_data = update_none(y_data, self.y_data)
        sigma = update_none(sigma, self.config.sigma)
        if len(x_data) != len(sigma):
            sigma = None
            logging.debug('Sigma is ignored as it has different length as x_data')
        if bounds == "unspecified":
            bounds = self.config.bounds
        if bounds is None:
            bounds = (-np.inf, np.inf)
        metrics = update_none(metrics, self.config.metrics)

        init_guess = update_none(init_guess, self.config.init_guess)
        curve_fit_kwargs = update_none(curve_fit_kwargs, self.config.curve_fit_kwargs)

        try:
            if not init_guess:
                # by default, use a random guess form (0, 1)
                init_guess = [np.random.random() for _ in parameters]
            if curve_fit_kwargs is None:
                curve_fit_kwargs = {}
            params, pcov = curve_fit(f=model, xdata=x_data, ydata=y_data,
                                     sigma=sigma, bounds=bounds, p0=init_guess, **curve_fit_kwargs)
            if metrics is not None:
                metrics_res = pd.Series({name: fn(params) for name, fn in metrics.items()})
            else:
                metrics_res = None
        except RuntimeError:
            logging.warning(
                f"RuntimeError on \n"
                f'\tx = {x_data}\n'
                f'\ty={y_data}\n'
                f'\tsigma={sigma}'
            )
            params = np.full(fill_value=np.nan, shape=len(parameters))
            pcov = np.full(fill_value=np.nan, shape=(len(parameters), len(parameters)))
            if metrics is not None:
                metrics_res = pd.Series({name: np.nan for name, fn in metrics.items()})
            else:
                metrics_res = None
        except ValueError:
            logging.warning(
                f"ValueError on \n"
                f'\tx={x_data}\n'
                f'\ty={y_data}\n'
                f'\tsigma={sigma}'
            )
            params = np.full(fill_value=np.nan, shape=len(parameters))
            pcov = np.full(fill_value=np.nan, shape=(len(parameters), len(parameters)))
            if metrics is not None:
                metrics_res = pd.Series({name: np.nan for name, fn in metrics.items()})
            else:
                metrics_res = None
        except:
            logging.warning(
                f"Other error observed on\n"
                f'\tx={x_data}\n'
                f'\ty={y_data}\n'
                f'\tsigma={sigma}'
            )
            params = np.full(fill_value=np.nan, shape=len(parameters))
            pcov = np.full(fill_value=np.nan, shape=(len(parameters), len(parameters)))
            if metrics is not None:
                metrics_res = pd.Series({name: np.nan for name, fn in metrics.items()})
            else:
                metrics_res = None

        return {
            'params': pd.Series(data=params, index=parameters),
            'pcov': pd.DataFrame(data=pcov, index=parameters, columns=parameters),
            'metrics': metrics_res
        }
Ejemplo n.º 7
0
    def __init__(self, x_data, y_data, model, name=None, x_label=None, y_label=None,
                 sigma=None, bounds=None, init_guess=None,
                 opt_method='trf', exclude_zero=False, metrics=None, rnd_seed=None, curve_fit_kwargs=None,
                 replicates=None,
                 bootstrap_num=0, bs_record_num=0, bs_method='pct_res', bs_stats=None, grouper=None, record_full=False,
                 conv_reps=0, conv_init_range=None, conv_stats=None,
                 save_to=None, overwrite=False, verbose=1):

        from ..utility.func_tools import AttrScope, get_func_params
        from .bootstrap import Bootstrap
        from .convergence import ConvergenceTester
        from .replicates import Replicates

        super().__init__()
        if verbose == 0:
            logging.set_level('warning')
        elif verbose == 1:
            logging.set_level('info')
        elif verbose == 2:
            logging.set_level('debug')
        else:
            logging.error("verbose should be 0, 1, or 2", error_type=ValueError)

        if len(x_data) != len(y_data):
            logging.error('Shapes of x and y do not match', error_type=ValueError)

        self.model = model
        self.parameters = get_func_params(model, required_only=True)[1:]
        self.name = name
        self.config = AttrScope(
            x_label=x_label, y_label=y_label,
            opt_method=opt_method,
            exclude_zero=exclude_zero,
            init_guess=init_guess,
            rnd_seed=rnd_seed,
            curve_fit_kwargs={} if curve_fit_kwargs is None else curve_fit_kwargs
        )

        if isinstance(x_data, list):
            x_data = np.array(x_data)
        if isinstance(y_data, list):
            y_data = np.array(y_data)
        if exclude_zero is True:
            mask = y_data != 0
        else:
            mask = np.repeat(True, x_data.shape[0])

        self.x_data = x_data[mask]
        self.y_data = y_data[mask]
        if sigma is None:
            self.config.sigma = np.ones(len(self.y_data))
        elif isinstance(sigma, list):
            self.config.sigma = np.array(sigma)[mask]
        else:
            self.config.sigma = sigma[mask]

        if bounds is None:
            self.config.bounds = (-np.inf, np.inf)
        else:
            self.config.bounds = bounds

        if replicates is not None:
            self.replicates = Replicates(estimator=self, replicates=replicates)
        self.config.add(replicates=replicates)

        if bootstrap_num > 0 and len(self.x_data) > 1:
            if bs_record_num is None:
                bs_record_num = 0
            self.bootstrap = Bootstrap(estimator=self, bootstrap_num=bootstrap_num, bs_record_num=bs_record_num,
                                       bs_method=bs_method, bs_stats=bs_stats, grouper=grouper, record_full=record_full)
        else:
            self.bootstrap = None
        self.config.add(
            bootstrap_num=bootstrap_num,
            bs_record_num=bs_record_num,
            bs_method=bs_method,
            bs_stats=bs_stats,
            record_full=record_full,
            grouper=grouper
        )

        if conv_reps > 0:
            self.converge_tester = ConvergenceTester(conv_reps=conv_reps, estimator=self,
                                                     conv_init_range=conv_init_range, conv_stats=conv_stats)
        else:
            self.converge_tester = None
        self.config.add(
            conv_reps=conv_reps,
            conv_init_range=conv_init_range,
            conv_stats=conv_stats
        )

        self.config.metrics = metrics
        self.results = FitResults(estimator=self)
        self.save_to = save_to
        self.overwrite = overwrite
        logging.debug(f"{self.__repr__()} initiated")