def _score_tau(self, tau, data, quantile): """ Calculate score for the tau value. Args: tau (int): tau value [min] data (pandas.DataFrame): Index reset index Columns - Date (pd.Timestamp): Observation date - Susceptible(int): the number of susceptible cases - Infected (int): the number of currently infected cases - Fatal(int): the number of fatal cases - Recovered (int): the number of recovered cases quantile (float): quantile to guess ODE parameter values for the candidates of tau """ info_dict = self._info_dict.copy() for (phase, phase_dict) in info_dict.items(): start, end = phase_dict[self.START], phase_dict[self.END] df = data.loc[(start <= data[self.DATE]) & (data[self.DATE] <= end)] info_dict[phase]["param"] = self._model.guess(df, tau, q=quantile) solver = _MultiPhaseODESolver(self._model, self._first, tau) sim_df = solver.simulate(*info_dict.values()) evaluator = Evaluator(data.set_index(self.DATE), sim_df.set_index(self.DATE)) return evaluator.score(metric=self._metric)
def __init__(self, start_date, end_date, population): self._ensure_date_order(start_date, end_date, name="end_date") self._start_date = start_date self._end_date = end_date self._population = self._ensure_population(population) # Summary of information self.info_dict = { self.START: start_date, self.END: end_date, self.N: population, self.ODE: None, self.RT: None } self._ode_dict = {self.TAU: None} self.day_param_dict = {} self.est_dict = { **{metric: None for metric in Evaluator.metrics()}, self.TRIALS: None, self.RUNTIME: None } # Init self._id_dict = None self._enabled = True self._model = None self._record_df = pd.DataFrame() self.y0_dict = {} self._estimator = None
def _score(self, tau, param_dict): """ Calculate score. Args: tau (int): tau value [min] param_dict (dict[str, int or float]): dictionary of parameter values Returns: float: score """ self.tau = tau self._set_taufree() cols = [self.TS, *self.variables_evaluate] rec_df = self.taufree_df.loc[:, cols] sim_df = self._simulate(self.step_n, param_dict).loc[:, cols] evaluator = Evaluator(rec_df, sim_df, on=self.TS) return evaluator.score(metric=self._metric)
def _score(self, **kwargs): """ Objective function to minimize. Score will be calculated the data and metric. Args: kwargs: values of non-dimensional model parameters, including rho and sigma Returns: float: score """ # Simulate with applied parameter values solver = _ODESolver(model=self._model, **kwargs) sim_df = solver.run(step_n=self._step_n, **self._y0_dict) # The first variable (Susceptible) will be ignored in score calculation taufree_df = self._taufree_df.loc[:, self._taufree_df.columns[1:]] sim_df = sim_df.loc[:, sim_df.columns[1:]] # Calculate score evaluator = Evaluator(taufree_df, sim_df, how="inner", on=None) return evaluator.score(metric=self._metric)
def __init__(self, model, first_date, tau=None, metric="RMSLE", n_jobs=-1): self._model = self._ensure_subclass(model, ModelBase, name="model") self._first = self._ensure_date(first_date, name="first_date") self._metric = self._ensure_selectable(metric, Evaluator.metrics(), name="metric") self._n_jobs = cpu_count( ) if n_jobs == -1 else self._ensure_natural_int(n_jobs, name="n_jobs") # Tau value [min] or None self._tau = self._ensure_tau(tau, accept_none=True) # {"0th": output of self.add()} self._info_dict = {}
def score(self, variables=None, phases=None, y0_dict=None, **kwargs): """ Evaluate accuracy of phase setting and parameter estimation of selected enabled phases. Args: variables (list[str] or None): variables to use in calculation phases (list[str] or None): phases to use in calculation y0_dict(dict[str, float] or None): dictionary of initial values of variables kwargs: keyword arguments of covsirphy.Evaluator.score() Returns: float: score with the specified metrics Note: If @variables is None, ["Infected", "Fatal", "Recovered"] will be used. "Confirmed", "Infected", "Fatal" and "Recovered" can be used in @variables. If @phases is None, all phases will be used. """ # Arguments variables = variables or [self.CI, self.F, self.R] variables = self._ensure_list(variables, self.VALUE_COLUMNS, name="variables") # Disable the non-target phases all_phases, _ = self.past_phases(phases=None) target_phases, _ = self.past_phases(phases=phases) ignored_phases = list(set(all_phases) - set(target_phases)) if ignored_phases: self.disable(ignored_phases) # Get the number of cases rec_df, sim_df = self._compare_with_actual(variables=variables, y0_dict=y0_dict) # Calculate score evaluator = Evaluator(rec_df, sim_df) score = evaluator.score(**find_args(Evaluator.score, **kwargs)) # Enable the disabled non-target phases if ignored_phases: self.enable(ignored_phases) return score
def score(self, change_points, metric): """ Calculate scores of the phases. Args: change_points (list[pandas.Timestamp]): list of change points metric (str): metric name Returns: list[float]: scores for phases Note: Please refer to covsirphy.Evaluator.score() for metric names """ fit_df = self._fitting(change_points) phases = [self.num2str(num) for num in range(len(change_points) + 1)] scores = [] for phase in phases: df = fit_df[[self.ACTUAL, phase]].dropna() evaluator = Evaluator(df[self.ACTUAL], df[phase], how="all") scores.append(evaluator.score(metric=metric)) return scores
def score_train(self, metric): """ Calculate score with training dataset. Args: metric (str): metric name, refer to covsirphy.Evaluator.score() Returns: float: evaluation score """ pred_train = pd.DataFrame(self._regressor.predict(self._X_train), columns=self._y_train.columns) return Evaluator(pred_train, self._y_train, how="all").score(metric=metric)
def estimate_tau(self, data, guess_quantile=0.5): """ Select tau value [min] which minimize the score of the metric. Args: data (pandas.DataFrame): Index reset index Columns - Date (pd.Timestamp): Observation date - Susceptible(int): the number of susceptible cases - Infected (int): the number of currently infected cases - Fatal(int): the number of fatal cases - Recovered (int): the number of recovered cases guess_quantile (float): quantile to guess ODE parameter values for the candidates of tau Returns: int: estimated tau value [min] Raises: covsirphy.UnExecutedError: phase information was not set Note: ODE parameter for each tau value will be guessed by .guess() classmethod of the model. Tau value will be selected from the divisors of 1440 [min] and set to self. """ self._ensure_dataframe(data, name="data", columns=self.DSIFR_COLUMNS) df = data.loc[:, self.DSIFR_COLUMNS] if not self._info_dict: raise UnExecutedError("ODEHandler.add()") # Calculate scores of tau candidates self._ensure_float(guess_quantile, name="quantile") calc_f = functools.partial(self._score_tau, data=df, quantile=guess_quantile) divisors = self.divisors(1440) if self._n_jobs == 1: scores = [calc_f(candidate) for candidate in divisors] else: with Pool(self._n_jobs) as p: scores = p.map(calc_f, divisors) score_dict = {k: v for (k, v) in zip(divisors, scores)} # Return the best tau value comp_f = { True: min, False: max }[Evaluator.smaller_is_better(metric=self._metric)] self._tau = comp_f(score_dict.items(), key=lambda x: x[1])[0] return self._tau
def __init__(self, model, data, tau, metric, quantiles): self._model = self._ensure_subclass(model, ModelBase, name="model") self._ensure_dataframe(data, name="data", columns=self.DSIFR_COLUMNS) self._tau = self._ensure_tau(tau, accept_none=False) self._metric = self._ensure_selectable(metric, Evaluator.metrics(), name="metric") # time steps (index), variables of the model df = model.convert(data, tau) self._taufree_df = df.copy() # Initial values self._y0_dict = df.iloc[0].to_dict() # Total population self._population = df.iloc[0].sum() # Step numbers self._step_n = df.index.max() # Parameter range self._range_dict = model.guess(data, tau, q=quantiles) # Max values of the variables self._max_dict = {v: df[v].max() for v in model.VARIABLES}
def fit(self, metric): """ Fit regressors and select the best regressor based on the scores with test dataset. Args: metric (str): metric name to select the best regressor Raises: ValueError: un-expected parameter values were predcited by all regressors, out of range (0, 1) Returns: float: the best score Note: All regressors are here. - Indicators -> Parameters with Elastic Net - Indicators(n)/Indicators(n-1) -> Parameters(n)/Parameters(n-1) with Elastic Net """ # All approaches approach_dict = { _ParamElasticNetRegressor.DESC: self._fit_param_reg(_ParamElasticNetRegressor), _RateElasticNetRegressor.DESC: self._fit_param_reg(_RateElasticNetRegressor), } # Predicted all parameter values must be >= 0 self._reg_dict = { k: v for (k, v) in approach_dict.items() if v.predict().ge(0).all().all() and v.predict().le(1).all().all() } if not self._reg_dict: raise ValueError( "Un-expected parameter values were predcited by all regressors, out of range (0, 1)." ) # Select the best regressor with the metric score_dict = { k: v.score_test(metric=metric) for (k, v) in self._reg_dict.items() } self._best, score = Evaluator.best_one(score_dict, metric=metric) return score
def fit(self, metric): """ Fit regressors and select the best regressor based on the scores with test dataset. Args: metric (str): metric name to select the best regressor Note: All regressors are here. - Indicators -> Parameters with Elastic Net """ # All approaches approach_dict = { _ParamElasticNetRegressor.DESC: self._fit_param_reg(_ParamElasticNetRegressor), _RateElasticNetRegressor.DESC: self._fit_param_reg(_RateElasticNetRegressor), } # Predicted all parameter values must be >= 0 self._reg_dict = { k: v for (k, v) in approach_dict.items() if v.predict().ge(0).all().all() and v.predict().le(1).all().all()} # Select the best regressor with the metric comp_f = {True: min, False: max}[Evaluator.smaller_is_better(metric=metric)] self._best, _ = comp_f(self._reg_dict.items(), key=lambda x: x[1].score_test(metric=metric))