Beispiel #1
0
    def _score_tau(self, tau, data, quantile):
        """
        Calculate score for the tau value.

        Args:
            tau (int): tau value [min]
            data (pandas.DataFrame):
                Index
                    reset index
                Columns
                    - Date (pd.Timestamp): Observation date
                    - Susceptible(int): the number of susceptible cases
                    - Infected (int): the number of currently infected cases
                    - Fatal(int): the number of fatal cases
                    - Recovered (int): the number of recovered cases
            quantile (float): quantile to guess ODE parameter values for the candidates of tau
        """
        info_dict = self._info_dict.copy()
        for (phase, phase_dict) in info_dict.items():
            start, end = phase_dict[self.START], phase_dict[self.END]
            df = data.loc[(start <= data[self.DATE])
                          & (data[self.DATE] <= end)]
            info_dict[phase]["param"] = self._model.guess(df, tau, q=quantile)
        solver = _MultiPhaseODESolver(self._model, self._first, tau)
        sim_df = solver.simulate(*info_dict.values())
        evaluator = Evaluator(data.set_index(self.DATE),
                              sim_df.set_index(self.DATE))
        return evaluator.score(metric=self._metric)
Beispiel #2
0
 def __init__(self, start_date, end_date, population):
     self._ensure_date_order(start_date, end_date, name="end_date")
     self._start_date = start_date
     self._end_date = end_date
     self._population = self._ensure_population(population)
     # Summary of information
     self.info_dict = {
         self.START: start_date,
         self.END: end_date,
         self.N: population,
         self.ODE: None,
         self.RT: None
     }
     self._ode_dict = {self.TAU: None}
     self.day_param_dict = {}
     self.est_dict = {
         **{metric: None
            for metric in Evaluator.metrics()}, self.TRIALS: None,
         self.RUNTIME: None
     }
     # Init
     self._id_dict = None
     self._enabled = True
     self._model = None
     self._record_df = pd.DataFrame()
     self.y0_dict = {}
     self._estimator = None
Beispiel #3
0
    def _score(self, tau, param_dict):
        """
        Calculate score.

        Args:
            tau (int): tau value [min]
            param_dict (dict[str, int or float]): dictionary of parameter values

        Returns:
            float: score
        """
        self.tau = tau
        self._set_taufree()
        cols = [self.TS, *self.variables_evaluate]
        rec_df = self.taufree_df.loc[:, cols]
        sim_df = self._simulate(self.step_n, param_dict).loc[:, cols]
        evaluator = Evaluator(rec_df, sim_df, on=self.TS)
        return evaluator.score(metric=self._metric)
    def _score(self, **kwargs):
        """
        Objective function to minimize.
        Score will be calculated the data and metric.

        Args:
            kwargs: values of non-dimensional model parameters, including rho and sigma

        Returns:
            float: score
        """
        # Simulate with applied parameter values
        solver = _ODESolver(model=self._model, **kwargs)
        sim_df = solver.run(step_n=self._step_n, **self._y0_dict)
        # The first variable (Susceptible) will be ignored in score calculation
        taufree_df = self._taufree_df.loc[:, self._taufree_df.columns[1:]]
        sim_df = sim_df.loc[:, sim_df.columns[1:]]
        # Calculate score
        evaluator = Evaluator(taufree_df, sim_df, how="inner", on=None)
        return evaluator.score(metric=self._metric)
Beispiel #5
0
 def __init__(self, model, first_date, tau=None, metric="RMSLE", n_jobs=-1):
     self._model = self._ensure_subclass(model, ModelBase, name="model")
     self._first = self._ensure_date(first_date, name="first_date")
     self._metric = self._ensure_selectable(metric,
                                            Evaluator.metrics(),
                                            name="metric")
     self._n_jobs = cpu_count(
     ) if n_jobs == -1 else self._ensure_natural_int(n_jobs, name="n_jobs")
     # Tau value [min] or None
     self._tau = self._ensure_tau(tau, accept_none=True)
     # {"0th": output of self.add()}
     self._info_dict = {}
    def score(self, variables=None, phases=None, y0_dict=None, **kwargs):
        """
        Evaluate accuracy of phase setting and parameter estimation of selected enabled phases.

        Args:
            variables (list[str] or None): variables to use in calculation
            phases (list[str] or None): phases to use in calculation
            y0_dict(dict[str, float] or None): dictionary of initial values of variables
            kwargs: keyword arguments of covsirphy.Evaluator.score()

        Returns:
            float: score with the specified metrics

        Note:
            If @variables is None, ["Infected", "Fatal", "Recovered"] will be used.
            "Confirmed", "Infected", "Fatal" and "Recovered" can be used in @variables.
            If @phases is None, all phases will be used.
        """
        # Arguments
        variables = variables or [self.CI, self.F, self.R]
        variables = self._ensure_list(variables,
                                      self.VALUE_COLUMNS,
                                      name="variables")
        # Disable the non-target phases
        all_phases, _ = self.past_phases(phases=None)
        target_phases, _ = self.past_phases(phases=phases)
        ignored_phases = list(set(all_phases) - set(target_phases))
        if ignored_phases:
            self.disable(ignored_phases)
        # Get the number of cases
        rec_df, sim_df = self._compare_with_actual(variables=variables,
                                                   y0_dict=y0_dict)
        # Calculate score
        evaluator = Evaluator(rec_df, sim_df)
        score = evaluator.score(**find_args(Evaluator.score, **kwargs))
        # Enable the disabled non-target phases
        if ignored_phases:
            self.enable(ignored_phases)
        return score
    def score(self, change_points, metric):
        """
        Calculate scores of the phases.

        Args:
            change_points (list[pandas.Timestamp]): list of change points
            metric (str): metric name

        Returns:
            list[float]: scores for phases

        Note:
            Please refer to covsirphy.Evaluator.score() for metric names
        """
        fit_df = self._fitting(change_points)
        phases = [self.num2str(num) for num in range(len(change_points) + 1)]
        scores = []
        for phase in phases:
            df = fit_df[[self.ACTUAL, phase]].dropna()
            evaluator = Evaluator(df[self.ACTUAL], df[phase], how="all")
            scores.append(evaluator.score(metric=metric))
        return scores
Beispiel #8
0
    def score_train(self, metric):
        """
        Calculate score with training dataset.

        Args:
            metric (str): metric name, refer to covsirphy.Evaluator.score()

        Returns:
            float: evaluation score
        """
        pred_train = pd.DataFrame(self._regressor.predict(self._X_train),
                                  columns=self._y_train.columns)
        return Evaluator(pred_train, self._y_train,
                         how="all").score(metric=metric)
Beispiel #9
0
    def estimate_tau(self, data, guess_quantile=0.5):
        """
        Select tau value [min] which minimize the score of the metric.

        Args:
            data (pandas.DataFrame):
                Index
                    reset index
                Columns
                    - Date (pd.Timestamp): Observation date
                    - Susceptible(int): the number of susceptible cases
                    - Infected (int): the number of currently infected cases
                    - Fatal(int): the number of fatal cases
                    - Recovered (int): the number of recovered cases
            guess_quantile (float): quantile to guess ODE parameter values for the candidates of tau

        Returns:
            int: estimated tau value [min]

        Raises:
            covsirphy.UnExecutedError: phase information was not set

        Note:
            ODE parameter for each tau value will be guessed by .guess() classmethod of the model.
            Tau value will be selected from the divisors of 1440 [min] and set to self.
        """
        self._ensure_dataframe(data, name="data", columns=self.DSIFR_COLUMNS)
        df = data.loc[:, self.DSIFR_COLUMNS]
        if not self._info_dict:
            raise UnExecutedError("ODEHandler.add()")
        # Calculate scores of tau candidates
        self._ensure_float(guess_quantile, name="quantile")
        calc_f = functools.partial(self._score_tau,
                                   data=df,
                                   quantile=guess_quantile)
        divisors = self.divisors(1440)
        if self._n_jobs == 1:
            scores = [calc_f(candidate) for candidate in divisors]
        else:
            with Pool(self._n_jobs) as p:
                scores = p.map(calc_f, divisors)
        score_dict = {k: v for (k, v) in zip(divisors, scores)}
        # Return the best tau value
        comp_f = {
            True: min,
            False: max
        }[Evaluator.smaller_is_better(metric=self._metric)]
        self._tau = comp_f(score_dict.items(), key=lambda x: x[1])[0]
        return self._tau
 def __init__(self, model, data, tau, metric, quantiles):
     self._model = self._ensure_subclass(model, ModelBase, name="model")
     self._ensure_dataframe(data, name="data", columns=self.DSIFR_COLUMNS)
     self._tau = self._ensure_tau(tau, accept_none=False)
     self._metric = self._ensure_selectable(metric, Evaluator.metrics(), name="metric")
     # time steps (index), variables of the model
     df = model.convert(data, tau)
     self._taufree_df = df.copy()
     # Initial values
     self._y0_dict = df.iloc[0].to_dict()
     # Total population
     self._population = df.iloc[0].sum()
     # Step numbers
     self._step_n = df.index.max()
     # Parameter range
     self._range_dict = model.guess(data, tau, q=quantiles)
     # Max values of the variables
     self._max_dict = {v: df[v].max() for v in model.VARIABLES}
Beispiel #11
0
    def fit(self, metric):
        """
        Fit regressors and select the best regressor based on the scores with test dataset.

        Args:
            metric (str): metric name to select the best regressor

        Raises:
            ValueError: un-expected parameter values were predcited by all regressors, out of range (0, 1)

        Returns:
            float: the best score

        Note:
            All regressors are here.
            - Indicators -> Parameters with Elastic Net
            - Indicators(n)/Indicators(n-1) -> Parameters(n)/Parameters(n-1) with Elastic Net
        """
        # All approaches
        approach_dict = {
            _ParamElasticNetRegressor.DESC:
            self._fit_param_reg(_ParamElasticNetRegressor),
            _RateElasticNetRegressor.DESC:
            self._fit_param_reg(_RateElasticNetRegressor),
        }
        # Predicted all parameter values must be >= 0
        self._reg_dict = {
            k: v
            for (k, v) in approach_dict.items()
            if v.predict().ge(0).all().all() and v.predict().le(1).all().all()
        }
        if not self._reg_dict:
            raise ValueError(
                "Un-expected parameter values were predcited by all regressors, out of range (0, 1)."
            )
        # Select the best regressor with the metric
        score_dict = {
            k: v.score_test(metric=metric)
            for (k, v) in self._reg_dict.items()
        }
        self._best, score = Evaluator.best_one(score_dict, metric=metric)
        return score
Beispiel #12
0
    def fit(self, metric):
        """
        Fit regressors and select the best regressor based on the scores with test dataset.

        Args:
            metric (str): metric name to select the best regressor

        Note:
            All regressors are here.
            - Indicators -> Parameters with Elastic Net
        """
        # All approaches
        approach_dict = {
            _ParamElasticNetRegressor.DESC: self._fit_param_reg(_ParamElasticNetRegressor),
            _RateElasticNetRegressor.DESC: self._fit_param_reg(_RateElasticNetRegressor),
        }
        # Predicted all parameter values must be >= 0
        self._reg_dict = {
            k: v for (k, v) in approach_dict.items()
            if v.predict().ge(0).all().all() and v.predict().le(1).all().all()}
        # Select the best regressor with the metric
        comp_f = {True: min, False: max}[Evaluator.smaller_is_better(metric=metric)]
        self._best, _ = comp_f(self._reg_dict.items(), key=lambda x: x[1].score_test(metric=metric))