Ejemplo n.º 1
0
    def run(self, n_jobs=-1, **kwargs):
        """
        Run estimation.

        Args:
            n_jobs (int): the number of parallel jobs or -1 (CPU count)
            kwargs: keyword arguments of model parameters and covsirphy.Estimator.run()
        """
        units = self._units[:]
        results = []
        # The number of parallel jobs
        n_jobs = cpu_count() if n_jobs == -1 else n_jobs
        # Start optimization
        print(f"\n<{self.model.NAME} model: parameter estimation>")
        print(f"Running optimization with {n_jobs} CPUs...")
        stopwatch = StopWatch()
        # Estimation of the last phase will be done to determine tau value
        if self._tau is None:
            unit_sel, units = units[-1], units[:-1]
            unit_est = self._run(unit=unit_sel, tau=None, **kwargs)
            self._tau = unit_est.tau
            results = [unit_est]
        # Estimation of each phase
        est_f = functools.partial(self._run, tau=self._tau, **kwargs)
        with Pool(n_jobs) as p:
            units_est = p.map(est_f, units)
        results.extend(units_est)
        # Completion
        stopwatch.stop()
        print(f"Completed optimization. Total: {stopwatch.show()}")
        return results
Ejemplo n.º 2
0
    def run(self,
            timeout=60,
            reset_n_max=3,
            timeout_iteration=5,
            allowance=(0.98, 1.02),
            seed=0,
            **kwargs):
        """
        Run optimization.
        If the result satisfied the following conditions, optimization ends.
        - all values are not under than 0
        - values of monotonic increasing variables increases monotonically
        - predicted values are in the allowance when each actual value shows max value

        Args:
            timeout (int): time-out of run
            reset_n_max (int): if study was reset @reset_n_max times, will not be reset anymore
            timeout_iteration (int): time-out of one iteration
            allowance (tuple(float, float)): the allowance of the predicted value
            seed (int or None): random seed of hyperparameter optimization
            kwargs: other keyword arguments will be ignored

        Notes:
            @n_jobs was obsoleted because this is not effective for Optuna.
        """
        # Create a study of optuna
        if self.study is None:
            self._init_study(seed=seed)
        reset_n = 0
        iteration_n = math.ceil(timeout / timeout_iteration)
        increasing_cols = [f"{v}{self.P}" for v in self.model.VARS_INCLEASE]
        stopwatch = StopWatch()
        for _ in range(iteration_n):
            # Perform optimization
            self.study.optimize(self.objective,
                                n_jobs=1,
                                timeout=timeout_iteration)
            # Create a table to compare observed/estimated values
            tau = self.tau or super().param()[self.TAU]
            train_df = self.divide_minutes(tau)
            comp_df = self.compare(train_df, self.predict())
            # Check monotonic variables
            mono_ok_list = [
                comp_df[col].is_monotonic_increasing for col in increasing_cols
            ]
            if not all(mono_ok_list):
                if reset_n == reset_n_max - 1:
                    break
                # Initialize the study
                self._init_study()
                reset_n += 1
                continue
            # Need additional trials when the values are not in allowance
            if self._is_in_allowance(comp_df, allowance):
                break
        # Calculate run-time and the number of trials
        self.run_time = stopwatch.stop()
        self.run_time_show = stopwatch.show()
        self.total_trials = len(self.study.trials)
Ejemplo n.º 3
0
 def run(self,
         n_points,
         min_duration=7,
         allowance=3,
         timeout=60,
         n_trials_iteration=10,
         n_jobs=-1):
     """
     Run optimization.
     @n_points <int>: the number of change points
     @min_duration <int>: minimum duration of one phase [days]
         - must be over 2
     @allowance <int>: allowance of change points [days]
         - if the estimated change points was equal to previous iteration
           with this allowance, stop running.
     @timeout <int>: time-out of run
     @n_trials_iteration <int>: the number of trials in one iteration
     @n_jobs <int>: the number of parallel jobs or -1 (CPU count)
     @return self
     """
     self.n_points = n_points
     self.min_duration = min_duration
     stopwatch = StopWatch()
     if min_duration <= 2:
         raise ValueError("@min_duration must be over 2.")
     if n_points <= 0:
         self.run_time = 0
         self.total_trials = 0
         return self
     if self.study is None:
         self._init_study()
     print("Finding change points of S-R trend...")
     while True:
         self.add_trial(n_trials_iteration, n_jobs)
         # Check whether the change points are fixed (with allowance) or not
         allow_obj = timedelta(days=allowance)
         fixed_ok = [
             abs(self.date_obj(this) - self.date_obj(previous)) <= allow_obj
             for (this, previous
                  ) in zip(self.change_dates, self.change_dates_previous)
         ]
         # Calculate cumulative run-time
         self.run_time = stopwatch.stop()
         self.total_trials = len(self.study.trials)
         # If fixed or time-out, break
         if (all(fixed_ok) and
                 self.change_dates_previous) or (self.run_time > timeout):
             print(
                 f"\rFinished {self.total_trials} trials in {stopwatch.show()}.\n",
                 end=str())
             break
         stopwatch.stop()
         print(
             f"\rPerformed {self.total_trials} trials in {stopwatch.show()}.",
             end=str())
         self.change_dates_previous = self.change_dates[:]
     return self
Ejemplo n.º 4
0
    def estimate(self, model, name="Main", phases=None, n_jobs=-1, **kwargs):
        """
        Estimate the parameters of the model using the records.

        Args:
            model (covsirphy.ModelBase): ODE model
            name (str): phase series name
            phases (list[str]): list of phase names, like 1st, 2nd...
            n_jobs (int): the number of parallel jobs or -1 (CPU count)
            kwargs: keyword arguments of model parameters and covsirphy.Estimator.run()

        Notes:
            - If 'Main' was used as @name, main PhaseSeries will be used.
            - If @name phase was not registered, new PhaseSeries will be created.
            - If @phases is None, all past phase will be used.
        """
        # Check model
        model = self.validate_subclass(model, ModelBase, "model")
        # Only one phase series will be used
        if "series_list" in kwargs.keys():
            raise KeyError(
                "Because @series_list was obsoleted in Scenario.estimate(),"
                " please specify the phase name using @name argument.")
        # Validate the phases
        try:
            phase_dict = self.series_dict[name].to_dict()
        except KeyError:
            raise KeyError(f"{name} has not been defined.")
        past_phases = list(phase_dict.keys())
        phases = past_phases[:] if phases is None else phases
        future_phases = list(set(phases) - set(past_phases))
        if future_phases:
            raise KeyError(
                f"{future_phases[0]} is not a past phase or not registered.")
        # The number of parallel jobs
        n_jobs = cpu_count() if n_jobs == -1 else n_jobs
        # Start optimization
        print(f"\n<{name} scenario: perform parameter estimation>")
        print(f"Running optimization with {n_jobs} CPUs...")
        stopwatch = StopWatch()
        # Estimation of the last phase will be done to determine tau value
        phase_sel, phases = phases[-1], phases[:-1]
        result_tuple_sel = self._estimate(model, phase=phase_sel, **kwargs)
        self._update_self(*result_tuple_sel)
        # Estimation of each phase
        est_f = functools.partial(self._estimate, model, **kwargs)
        with Pool(n_jobs) as p:
            result_nest = p.map(est_f, phases)
        for result_tuple in result_nest:
            self._update_self(*result_tuple)
        # Completion
        stopwatch.stop()
        print(f"Completed optimization. Total: {stopwatch.show()}")
Ejemplo n.º 5
0
    def to_dict(self):
        """
        Summarize the results of optimization.

        Returns:
            dict[str, float or int]:
                - (parameters of the model)
                - tau
                - Rt: basic or phase-dependent reproduction number
                - (dimensional parameters [day])
                - {metric name}: score with the metric
                - Trials: the number of trials
                - Runtime: run time of estimation
        """
        tau, param_dict = self._param()
        model_instance = self.model(population=self.population, **param_dict)
        return {
            **param_dict, self.TAU:
            tau,
            self.RT:
            model_instance.calc_r0(),
            **model_instance.calc_days_dict(tau), self._metric:
            self._score(tau, param_dict),
            self.TRIALS:
            self.total_trials,
            self.RUNTIME:
            StopWatch.show(self.runtime)
        }
Ejemplo n.º 6
0
    def run(self,
            timeout=180,
            reset_n_max=3,
            timeout_iteration=5,
            tail_n=4,
            allowance=(0.99, 1.01),
            seed=0,
            pruner="threshold",
            upper=0.5,
            percentile=50,
            metric=None,
            metrics="RMSLE",
            **kwargs):
        """
        Run optimization.
        If the result satisfied the following conditions, optimization ends.
        - Score did not change in the last @tail_n iterations.
        - Monotonic increasing variables increases monotonically.
        - Predicted values are in the allowance when each actual value shows max value.

        Args:
            timeout (int): timeout of optimization
            reset_n_max (int): if study was reset @reset_n_max times, will not be reset anymore
            timeout_iteration (int): time-out of one iteration
            tail_n (int): the number of iterations to decide whether score did not change for the last iterations
            allowance (tuple(float, float)): the allowance of the predicted value
            seed (int or None): random seed of hyperparameter optimization
            pruner (str): hyperband, median, threshold or percentile
            upper (float): works for "threshold" pruner,
                intermediate score is larger than this value, it prunes
            percentile (float): works for "Percentile" pruner,
                the best intermediate value is in the bottom percentile among trials, it prunes
            metric (str or None): metric name or None (use @metrics)
            metrics (str): alias of @metric
            kwargs: keyword arguments of ModelBase.param_range()

        Note:
            @n_jobs was obsoleted because this does not work effectively in Optuna.

        Note:
            Please refer to covsirphy.Evaluator.score() for metric names
        """
        self._metric = metric or metrics
        self._param_range_dict = find_args(self.model.param_range, **kwargs)
        # Create a study of optuna
        if self.study is None:
            self._init_study(seed=seed,
                             pruner=pruner,
                             upper=upper,
                             percentile=percentile)
        reset_n = 0
        iteration_n = math.ceil(timeout / timeout_iteration)
        increasing_cols = [f"{v}{self.P}" for v in self.model.VARS_INCLEASE]
        stopwatch = StopWatch()
        scores = []
        for _ in range(iteration_n):
            # Perform optimization
            self.study.optimize(self._objective,
                                n_jobs=1,
                                timeout=timeout_iteration)
            # If score did not change in the last iterations, stop running
            tau, param_dict = self._param()
            scores.append(self._score(tau=tau, param_dict=param_dict))
            if len(scores) >= tail_n and len(set(scores[-tail_n:])) == 1:
                break
            # Create a table to compare observed/estimated values
            comp_df = self._compare(tau=tau, param_dict=param_dict)
            # Check monotonic variables
            mono_ok_list = [
                comp_df[col].is_monotonic_increasing for col in increasing_cols
            ]
            if not all(mono_ok_list):
                if reset_n == reset_n_max - 1:
                    break
                # Initialize the study
                self._init_study(seed=seed)
                reset_n += 1
                continue
            # Need additional trials when the values are not in allowance
            if self._is_in_allowance(comp_df, allowance):
                break
        # Calculate run-time and the number of trials
        self.runtime += stopwatch.stop()
        self.total_trials = len(self.study.trials)
Ejemplo n.º 7
0
    def run(self,
            timeout=60,
            reset_n_max=3,
            timeout_iteration=10,
            allowance=(0.8, 1.2),
            n_jobs=-1,
            seed=None):
        """
        Run optimization.
        If the result satisfied all conditions, optimization ends.
            - all values are not under than 0
            - values of monotonic increasing variables increases monotonically
            - predicted values are in the allowance when each actual value shows max value

        Args:
            timeout (int): time-out of run
            reset_n_max (int): if study was reset @reset_n_max times, will not be reset anymore
            timeout_iteration (int): time-out of one iteration
            allowance (tuple(float, float)): the allowance of the predicted value
            n_jobs (int): the number of parallel jobs or -1 (CPU count)
            seed (int or None): random seed of hyperparameter optimization

        Notes:
            @seed will effective when @n_jobs is 1

        Returns:
            None
        """
        if seed is not None and n_jobs != 1:
            raise ValueError(
                "@seed must be None when @n_jobs is not equal to 1.")
        if self.study is None:
            self._init_study(seed=seed)
        print("\tRunning optimization...")
        stopwatch = StopWatch()
        reset_n = 0
        while True:
            # Perform optimization
            self._run_trial(n_jobs=n_jobs, timeout_iteration=timeout_iteration)
            self.run_time = stopwatch.stop()
            self.total_trials = len(self.study.trials)
            # Time-out
            if self.run_time >= timeout:
                break
            print(
                f"\r\tPerformed {self.total_trials} trials in {stopwatch.show()}.",
                end=str())
            # Create a table to compare observed/estimated values
            tau = super().param()[self.TAU]
            train_df = self.divide_minutes(tau)
            comp_df = self.compare(train_df, self.predict())
            # Check monotonic variables
            mono_ok_list = [
                comp_df[f"{v}{self.P}"].is_monotonic_increasing
                for v in self.model.VARS_INCLEASE
            ]
            if not all(mono_ok_list):
                reset_n += 1
                if reset_n <= reset_n_max:
                    # Initialize the study
                    self._init_study()
                    stopwatch = StopWatch()
                    continue
            # Check the values when argmax(actual)
            values_nest = [
                comp_df.loc[comp_df[f"{v}{self.A}"].idxmax(),
                            [f"{v}{self.A}", f"{v}{self.P}"]].tolist()
                for v in self.model.VARIABLES
            ]
            last_ok_list = [(a * allowance[0] <= p) and (p <= a * allowance[1])
                            for (a, p) in values_nest]
            if not all(last_ok_list):
                continue
            break
        stopwatch.stop()
        print(
            f"\r\tFinished {self.total_trials} trials in {stopwatch.show()}.\n",
            end=str())
        return None
Ejemplo n.º 8
0
 def run(self, timeout=180, n_jobs=-1,
         timeout_iteration=10, allowance=(0.8, 1.2)):
     """
     Run optimization.
     If the result satisfied all conditions, optimization ends.
         - all values are not under than 0
         - values of monotonic increasing variables increases monotonically
         - predicted values are in the allowance
             when each actual value shows max value
     - @timeout <int>: time-out of run
     @n_jobs <int>: the number of parallel jobs or -1 (CPU count)
     @timeout_iteration <int>: time-out of one iteration
     @allowance <tuple(float, float)>:
         - the allowance of the predicted value
     @return None
     """
     if self.study is None:
         self._init_study()
     print("\tRunning optimization...")
     stopwatch = StopWatch()
     while True:
         # Perform optimization
         self._add_trial(n_jobs=n_jobs, timeout_iteration=timeout_iteration)
         self.run_time = stopwatch.stop()
         self.total_trials = len(self.study.trials)
         # Time-out
         if self.run_time >= timeout:
             break
         print(
             f"\r\tPerformed {self.total_trials} trials in {stopwatch.show()}.",
             end=str()
         )
         # Create a table to compare observed/estimated values
         tau = super().param()[self.TAU]
         train_df = self.divide_minutes(tau)
         comp_df = self.compare(train_df, self.predict())
         # All values are not under than 0
         if (comp_df < 0).values.sum():
             continue
         # Check monotonic variables
         mono_ok_list = [
             comp_df[f"{v}{self.P}"].is_monotonic_increasing
             for v in self.model.VARS_INCLEASE
         ]
         if not all(mono_ok_list):
             # Initialize the study
             self._init_study()
             continue
         # Check the values when argmax(actual)
         values_nest = [
             comp_df.loc[
                 comp_df[f"{v}{self.A}"].idxmax(),
                 [f"{v}{self.A}", f"{v}{self.P}"]
             ].tolist()
             for v in self.model.VARIABLES
         ]
         last_ok_list = [
             (a * allowance[0] <= p) and (p <= a * allowance[1])
             for (a, p) in values_nest
         ]
         if not all(last_ok_list):
             continue
         break
     stopwatch.stop()
     print(
         f"\r\tFinished {self.total_trials} trials in {stopwatch.show()}.\n",
         end=str()
     )
     return None
Ejemplo n.º 9
0
    def run(self, check_dict, study_dict):
        """
        Perform parameter estimation of the ODE model, not including tau.

        Args:
            check_dict (dict[str, object]): setting of validation
                - timeout (int): timeout of optimization
                - timeout_iteration (int): timeout of one iteration
                - tail_n (int): the number of iterations to decide whether score did not change for the last iterations
                - allowance (tuple(float, float)): the allowance of the max predicted values
            study_dict (dict[str, object]): setting of optimization study
                - pruner (str): kind of pruner (hyperband, median, threshold or percentile)
                - upper (float): works for "threshold" pruner, intermediate score is larger than this value, it prunes
                - percentile (float): works for "Percentile" pruner, the best intermediate value is in the bottom percentile among trials, it prunes

        Returns:
            dict(str, object):
                - Rt (float): phase-dependent reproduction number
                - (dict(str, float)): estimated parameter values
                - (dict(str, int or float)): day parameters, including 1/beta [days]
                - {metric}: score with the estimated parameter values
                - Trials (int): the number of trials
                - Runtime (str): runtime of optimization

        Note:
            Please refer to covsirphy.Evaluator.score() for metric names.
        """
        timeout = check_dict.get("timeout", 180)
        timeout_iteration = check_dict.get("timeout_iteration", 5)
        tail_n = check_dict.get("tail_n", 4)
        allowance = check_dict.get("allowance", (0.99, 1.01))
        # Initialize optimization
        study_kwargs = {
            "pruner": "threshold",
            "upper": 0.5,
            "percentile": 50,
            "seed": 0
        }
        study_kwargs.update(study_dict)
        study = self._init_study(**find_args(self._init_study, **study_kwargs))
        # The number of iterations
        iteration_n = math.ceil(timeout / timeout_iteration)
        stopmatch = StopWatch()
        # Optimization
        scores = []
        param_dict = {}
        for _ in range(iteration_n):
            # Run iteration
            study.optimize(self._objective,
                           n_jobs=1,
                           timeout=timeout_iteration)
            param_dict = study.best_params.copy()
            # If score did not change in the last iterations, stop running
            scores.append(self._score(**param_dict))
            if len(scores) >= tail_n and len(set(scores[-tail_n:])) == 1:
                break
            # Check max values are in the allowance
            if self._is_in_allowance(allowance, **param_dict):
                break
        model_instance = self._model(self._population, **param_dict)
        return {
            self.RT:
            model_instance.calc_r0(),
            **param_dict.copy(),
            **model_instance.calc_days_dict(self._tau),
            self._metric:
            self._score(**param_dict),
            self.TRIALS:
            len(study.trials),
            self.RUNTIME:
            stopmatch.stop_show(),
        }
Ejemplo n.º 10
0
    def estimate_params(self,
                        data,
                        quantiles=(0.1, 0.9),
                        check_dict=None,
                        study_dict=None,
                        **kwargs):
        """
        Estimate ODE parameter values of the all phases to minimize the score of the metric.

        Args:
            data (pandas.DataFrame):
                Index
                    reset index
                Columns
                    - Date (pd.Timestamp): Observation date
                    - Susceptible(int): the number of susceptible cases
                    - Infected (int): the number of currently infected cases
                    - Fatal(int): the number of fatal cases
                    - Recovered (int): the number of recovered cases
            quantiles (tuple(int, int)): quantiles to cut parameter range, like confidence interval
            check_dict (dict[str, object] or None): setting of validation
                - None means {"timeout": 180, "timeout_interation": 5, "tail_n": 4, "allowance": (0.99, 1.01)}
                - timeout (int): timeout of optimization
                - timeout_iteration (int): timeout of one iteration
                - tail_n (int): the number of iterations to decide whether score did not change for the last iterations
                - allowance (tuple(float, float)): the allowance of the max predicted values
            study_dict (dict[str, object] or None): setting of optimization study
                - None means {"pruner": "threshold", "upper": 0.5, "percentile": 50, "seed": 0}
                - pruner (str): kind of pruner (hyperband, median, threshold or percentile)
                - upper (float): works for "threshold" pruner, intermediate score is larger than this value, it prunes
                - percentile (float): works for "Percentile" pruner, the best intermediate value is in the bottom percentile among trials, it prunes
            kwargs: we can set arguments directly. E.g. timeout=180 for check_dict={"timeout": 180,...}

        Raises:
            covsirphy.UnExecutedError: either tau value or phase information was not set

        Returns:
            dict(str, object): setting of the phase (key: phase name)
                - Start (pandas.Timestamp): start date
                - End (pandas.Timestamp): end date
                - Rt (float): phase-dependent reproduction number
                - (str, float): estimated parameter values, including rho
                - (int or float): day parameters, including 1/beta [days]
                - {metric}: score with the estimated parameter values
                - Trials (int): the number of trials
                - Runtime (str): runtime of optimization
        """
        print(f"\n<{self._model.NAME} model: parameter estimation>")
        print(f"Running optimization with {self._n_jobs} CPUs...")
        stopwatch = StopWatch()
        # Arguments
        self._ensure_dataframe(data, name="data", columns=self.DSIFR_COLUMNS)
        df = data.loc[:, self.DSIFR_COLUMNS]
        if not self._info_dict:
            raise UnExecutedError("ODEHandler.add()")
        if self._tau is None:
            raise UnExecutedError(
                "ODEHandler.estimate_tau()",
                message="or specify tau when creating an instance of ODEHandler"
            )
        # Arguments used in the old Estimator
        check_dict = check_dict or {
            "timeout": 180,
            "timeout_interation": 5,
            "tail_n": 4,
            "allowance": (0.99, 1.01)
        }
        check_dict.update(kwargs)
        study_dict = study_dict or {
            "pruner": "threshold",
            "upper": 0.5,
            "percentile": 50,
            "seed": 0
        }
        study_dict.update(kwargs)
        # ODE parameter estimation
        est_f = functools.partial(self._estimate_params,
                                  data=df,
                                  quantiles=quantiles,
                                  check_dict=check_dict,
                                  study_dict=study_dict)
        phases = list(self._info_dict.keys())
        if self._n_jobs == 1:
            est_dict_list = [est_f(ph) for ph in phases]
        else:
            with Pool(self._n_jobs) as p:
                est_dict_list = p.map(est_f, phases)
        for (phase, est_dict) in zip(phases, est_dict_list):
            self._info_dict[phase]["param"] = {
                param: est_dict[param]
                for param in self._model.PARAMETERS
            }
        print(f"Completed optimization. Total: {stopwatch.stop_show()}")
        return {
            k: {
                self.START: self._info_dict[k][self.START],
                self.END: self._info_dict[k][self.END],
                **v
            }
            for (k, v) in zip(phases, est_dict_list)
        }
Ejemplo n.º 11
0
    def run(self,
            timeout=60,
            reset_n_max=3,
            timeout_iteration=5,
            allowance=(0.8, 1.2),
            seed=0,
            stdout=True,
            **kwargs):
        """
        Run optimization.
        If the result satisfied the following conditions, optimization ends.
        - all values are not under than 0
        - values of monotonic increasing variables increases monotonically
        - predicted values are in the allowance when each actual value shows max value

        Args:
            timeout (int): time-out of run
            reset_n_max (int): if study was reset @reset_n_max times, will not be reset anymore
            timeout_iteration (int): time-out of one iteration
            allowance (tuple(float, float)): the allowance of the predicted value
            seed (int or None): random seed of hyperparameter optimization
            stdout (bool): whether show the status of progress or not

        Notes:
            @n_jobs was obsoleted because this is not effective for Optuna.
        """
        if "n_jobs" in kwargs.keys():
            raise KeyError("@n_jobs of Estimator.run() was obsoleted.")
        # Create a study of optuna
        if self.study is None:
            self._init_study(seed=seed)
        reset_n = 0
        iteration_n = math.ceil(timeout / timeout_iteration)
        increasing_cols = [f"{v}{self.P}" for v in self.model.VARS_INCLEASE]
        if stdout:
            print("\tRunning optimization...")
        stopwatch = StopWatch()
        for _ in range(iteration_n):
            # Perform optimization
            self._run_trial(timeout_iteration=timeout_iteration)
            # Create a table to compare observed/estimated values
            tau = super().param()[self.TAU]
            train_df = self.divide_minutes(tau)
            comp_df = self.compare(train_df, self.predict())
            # Check monotonic variables
            mono_ok_list = [
                comp_df[col].is_monotonic_increasing for col in increasing_cols
            ]
            if not all(mono_ok_list):
                if reset_n == reset_n_max - 1:
                    break
                # Initialize the study
                self._init_study()
                reset_n += 1
                continue
            # Need additional trials when the values are not in allowance
            if self._is_in_allowance(comp_df, allowance):
                break
        # Calculate run-time and the number of trials
        self.run_time = stopwatch.stop()
        self.total_trials = len(self.study.trials)
        if stdout:
            print(
                f"\tFinished {self.total_trials} trials in {stopwatch.show()}.",
            )