Beispiel #1
0
    def run(self, n_jobs=-1, **kwargs):
        """
        Run estimation.

        Args:
            n_jobs (int): the number of parallel jobs or -1 (CPU count)
            kwargs: keyword arguments of model parameters and covsirphy.Estimator.run()
        """
        units = self._units[:]
        results = []
        # The number of parallel jobs
        n_jobs = cpu_count() if n_jobs == -1 else n_jobs
        # Start optimization
        print(f"\n<{self.model.NAME} model: parameter estimation>")
        print(f"Running optimization with {n_jobs} CPUs...")
        stopwatch = StopWatch()
        # Estimation of the last phase will be done to determine tau value
        if self._tau is None:
            unit_sel, units = units[-1], units[:-1]
            unit_est = self._run(unit=unit_sel, tau=None, **kwargs)
            self._tau = unit_est.tau
            results = [unit_est]
        # Estimation of each phase
        est_f = functools.partial(self._run, tau=self._tau, **kwargs)
        with Pool(n_jobs) as p:
            units_est = p.map(est_f, units)
        results.extend(units_est)
        # Completion
        stopwatch.stop()
        print(f"Completed optimization. Total: {stopwatch.show()}")
        return results
 def run(self,
         n_points,
         min_duration=7,
         allowance=3,
         timeout=60,
         n_trials_iteration=10,
         n_jobs=-1):
     """
     Run optimization.
     @n_points <int>: the number of change points
     @min_duration <int>: minimum duration of one phase [days]
         - must be over 2
     @allowance <int>: allowance of change points [days]
         - if the estimated change points was equal to previous iteration
           with this allowance, stop running.
     @timeout <int>: time-out of run
     @n_trials_iteration <int>: the number of trials in one iteration
     @n_jobs <int>: the number of parallel jobs or -1 (CPU count)
     @return self
     """
     self.n_points = n_points
     self.min_duration = min_duration
     stopwatch = StopWatch()
     if min_duration <= 2:
         raise ValueError("@min_duration must be over 2.")
     if n_points <= 0:
         self.run_time = 0
         self.total_trials = 0
         return self
     if self.study is None:
         self._init_study()
     print("Finding change points of S-R trend...")
     while True:
         self.add_trial(n_trials_iteration, n_jobs)
         # Check whether the change points are fixed (with allowance) or not
         allow_obj = timedelta(days=allowance)
         fixed_ok = [
             abs(self.date_obj(this) - self.date_obj(previous)) <= allow_obj
             for (this, previous
                  ) in zip(self.change_dates, self.change_dates_previous)
         ]
         # Calculate cumulative run-time
         self.run_time = stopwatch.stop()
         self.total_trials = len(self.study.trials)
         # If fixed or time-out, break
         if (all(fixed_ok) and
                 self.change_dates_previous) or (self.run_time > timeout):
             print(
                 f"\rFinished {self.total_trials} trials in {stopwatch.show()}.\n",
                 end=str())
             break
         stopwatch.stop()
         print(
             f"\rPerformed {self.total_trials} trials in {stopwatch.show()}.",
             end=str())
         self.change_dates_previous = self.change_dates[:]
     return self
Beispiel #3
0
    def estimate(self, model, name="Main", phases=None, n_jobs=-1, **kwargs):
        """
        Estimate the parameters of the model using the records.

        Args:
            model (covsirphy.ModelBase): ODE model
            name (str): phase series name
            phases (list[str]): list of phase names, like 1st, 2nd...
            n_jobs (int): the number of parallel jobs or -1 (CPU count)
            kwargs: keyword arguments of model parameters and covsirphy.Estimator.run()

        Notes:
            - If 'Main' was used as @name, main PhaseSeries will be used.
            - If @name phase was not registered, new PhaseSeries will be created.
            - If @phases is None, all past phase will be used.
        """
        # Check model
        model = self.validate_subclass(model, ModelBase, "model")
        # Only one phase series will be used
        if "series_list" in kwargs.keys():
            raise KeyError(
                "Because @series_list was obsoleted in Scenario.estimate(),"
                " please specify the phase name using @name argument.")
        # Validate the phases
        try:
            phase_dict = self.series_dict[name].to_dict()
        except KeyError:
            raise KeyError(f"{name} has not been defined.")
        past_phases = list(phase_dict.keys())
        phases = past_phases[:] if phases is None else phases
        future_phases = list(set(phases) - set(past_phases))
        if future_phases:
            raise KeyError(
                f"{future_phases[0]} is not a past phase or not registered.")
        # The number of parallel jobs
        n_jobs = cpu_count() if n_jobs == -1 else n_jobs
        # Start optimization
        print(f"\n<{name} scenario: perform parameter estimation>")
        print(f"Running optimization with {n_jobs} CPUs...")
        stopwatch = StopWatch()
        # Estimation of the last phase will be done to determine tau value
        phase_sel, phases = phases[-1], phases[:-1]
        result_tuple_sel = self._estimate(model, phase=phase_sel, **kwargs)
        self._update_self(*result_tuple_sel)
        # Estimation of each phase
        est_f = functools.partial(self._estimate, model, **kwargs)
        with Pool(n_jobs) as p:
            result_nest = p.map(est_f, phases)
        for result_tuple in result_nest:
            self._update_self(*result_tuple)
        # Completion
        stopwatch.stop()
        print(f"Completed optimization. Total: {stopwatch.show()}")
Beispiel #4
0
    def run(self,
            timeout=60,
            reset_n_max=3,
            timeout_iteration=5,
            allowance=(0.98, 1.02),
            seed=0,
            **kwargs):
        """
        Run optimization.
        If the result satisfied the following conditions, optimization ends.
        - all values are not under than 0
        - values of monotonic increasing variables increases monotonically
        - predicted values are in the allowance when each actual value shows max value

        Args:
            timeout (int): time-out of run
            reset_n_max (int): if study was reset @reset_n_max times, will not be reset anymore
            timeout_iteration (int): time-out of one iteration
            allowance (tuple(float, float)): the allowance of the predicted value
            seed (int or None): random seed of hyperparameter optimization
            kwargs: other keyword arguments will be ignored

        Notes:
            @n_jobs was obsoleted because this is not effective for Optuna.
        """
        # Create a study of optuna
        if self.study is None:
            self._init_study(seed=seed)
        reset_n = 0
        iteration_n = math.ceil(timeout / timeout_iteration)
        increasing_cols = [f"{v}{self.P}" for v in self.model.VARS_INCLEASE]
        stopwatch = StopWatch()
        for _ in range(iteration_n):
            # Perform optimization
            self.study.optimize(self.objective,
                                n_jobs=1,
                                timeout=timeout_iteration)
            # Create a table to compare observed/estimated values
            tau = self.tau or super().param()[self.TAU]
            train_df = self.divide_minutes(tau)
            comp_df = self.compare(train_df, self.predict())
            # Check monotonic variables
            mono_ok_list = [
                comp_df[col].is_monotonic_increasing for col in increasing_cols
            ]
            if not all(mono_ok_list):
                if reset_n == reset_n_max - 1:
                    break
                # Initialize the study
                self._init_study()
                reset_n += 1
                continue
            # Need additional trials when the values are not in allowance
            if self._is_in_allowance(comp_df, allowance):
                break
        # Calculate run-time and the number of trials
        self.run_time = stopwatch.stop()
        self.run_time_show = stopwatch.show()
        self.total_trials = len(self.study.trials)
Beispiel #5
0
    def run(self,
            timeout=180,
            reset_n_max=3,
            timeout_iteration=5,
            tail_n=4,
            allowance=(0.99, 1.01),
            seed=0,
            pruner="threshold",
            upper=0.5,
            percentile=50,
            metric=None,
            metrics="RMSLE",
            **kwargs):
        """
        Run optimization.
        If the result satisfied the following conditions, optimization ends.
        - Score did not change in the last @tail_n iterations.
        - Monotonic increasing variables increases monotonically.
        - Predicted values are in the allowance when each actual value shows max value.

        Args:
            timeout (int): timeout of optimization
            reset_n_max (int): if study was reset @reset_n_max times, will not be reset anymore
            timeout_iteration (int): time-out of one iteration
            tail_n (int): the number of iterations to decide whether score did not change for the last iterations
            allowance (tuple(float, float)): the allowance of the predicted value
            seed (int or None): random seed of hyperparameter optimization
            pruner (str): hyperband, median, threshold or percentile
            upper (float): works for "threshold" pruner,
                intermediate score is larger than this value, it prunes
            percentile (float): works for "Percentile" pruner,
                the best intermediate value is in the bottom percentile among trials, it prunes
            metric (str or None): metric name or None (use @metrics)
            metrics (str): alias of @metric
            kwargs: keyword arguments of ModelBase.param_range()

        Note:
            @n_jobs was obsoleted because this does not work effectively in Optuna.

        Note:
            Please refer to covsirphy.Evaluator.score() for metric names
        """
        self._metric = metric or metrics
        self._param_range_dict = find_args(self.model.param_range, **kwargs)
        # Create a study of optuna
        if self.study is None:
            self._init_study(seed=seed,
                             pruner=pruner,
                             upper=upper,
                             percentile=percentile)
        reset_n = 0
        iteration_n = math.ceil(timeout / timeout_iteration)
        increasing_cols = [f"{v}{self.P}" for v in self.model.VARS_INCLEASE]
        stopwatch = StopWatch()
        scores = []
        for _ in range(iteration_n):
            # Perform optimization
            self.study.optimize(self._objective,
                                n_jobs=1,
                                timeout=timeout_iteration)
            # If score did not change in the last iterations, stop running
            tau, param_dict = self._param()
            scores.append(self._score(tau=tau, param_dict=param_dict))
            if len(scores) >= tail_n and len(set(scores[-tail_n:])) == 1:
                break
            # Create a table to compare observed/estimated values
            comp_df = self._compare(tau=tau, param_dict=param_dict)
            # Check monotonic variables
            mono_ok_list = [
                comp_df[col].is_monotonic_increasing for col in increasing_cols
            ]
            if not all(mono_ok_list):
                if reset_n == reset_n_max - 1:
                    break
                # Initialize the study
                self._init_study(seed=seed)
                reset_n += 1
                continue
            # Need additional trials when the values are not in allowance
            if self._is_in_allowance(comp_df, allowance):
                break
        # Calculate run-time and the number of trials
        self.runtime += stopwatch.stop()
        self.total_trials = len(self.study.trials)
Beispiel #6
0
    def run(self,
            timeout=60,
            reset_n_max=3,
            timeout_iteration=10,
            allowance=(0.8, 1.2),
            n_jobs=-1,
            seed=None):
        """
        Run optimization.
        If the result satisfied all conditions, optimization ends.
            - all values are not under than 0
            - values of monotonic increasing variables increases monotonically
            - predicted values are in the allowance when each actual value shows max value

        Args:
            timeout (int): time-out of run
            reset_n_max (int): if study was reset @reset_n_max times, will not be reset anymore
            timeout_iteration (int): time-out of one iteration
            allowance (tuple(float, float)): the allowance of the predicted value
            n_jobs (int): the number of parallel jobs or -1 (CPU count)
            seed (int or None): random seed of hyperparameter optimization

        Notes:
            @seed will effective when @n_jobs is 1

        Returns:
            None
        """
        if seed is not None and n_jobs != 1:
            raise ValueError(
                "@seed must be None when @n_jobs is not equal to 1.")
        if self.study is None:
            self._init_study(seed=seed)
        print("\tRunning optimization...")
        stopwatch = StopWatch()
        reset_n = 0
        while True:
            # Perform optimization
            self._run_trial(n_jobs=n_jobs, timeout_iteration=timeout_iteration)
            self.run_time = stopwatch.stop()
            self.total_trials = len(self.study.trials)
            # Time-out
            if self.run_time >= timeout:
                break
            print(
                f"\r\tPerformed {self.total_trials} trials in {stopwatch.show()}.",
                end=str())
            # Create a table to compare observed/estimated values
            tau = super().param()[self.TAU]
            train_df = self.divide_minutes(tau)
            comp_df = self.compare(train_df, self.predict())
            # Check monotonic variables
            mono_ok_list = [
                comp_df[f"{v}{self.P}"].is_monotonic_increasing
                for v in self.model.VARS_INCLEASE
            ]
            if not all(mono_ok_list):
                reset_n += 1
                if reset_n <= reset_n_max:
                    # Initialize the study
                    self._init_study()
                    stopwatch = StopWatch()
                    continue
            # Check the values when argmax(actual)
            values_nest = [
                comp_df.loc[comp_df[f"{v}{self.A}"].idxmax(),
                            [f"{v}{self.A}", f"{v}{self.P}"]].tolist()
                for v in self.model.VARIABLES
            ]
            last_ok_list = [(a * allowance[0] <= p) and (p <= a * allowance[1])
                            for (a, p) in values_nest]
            if not all(last_ok_list):
                continue
            break
        stopwatch.stop()
        print(
            f"\r\tFinished {self.total_trials} trials in {stopwatch.show()}.\n",
            end=str())
        return None
Beispiel #7
0
 def run(self, timeout=180, n_jobs=-1,
         timeout_iteration=10, allowance=(0.8, 1.2)):
     """
     Run optimization.
     If the result satisfied all conditions, optimization ends.
         - all values are not under than 0
         - values of monotonic increasing variables increases monotonically
         - predicted values are in the allowance
             when each actual value shows max value
     - @timeout <int>: time-out of run
     @n_jobs <int>: the number of parallel jobs or -1 (CPU count)
     @timeout_iteration <int>: time-out of one iteration
     @allowance <tuple(float, float)>:
         - the allowance of the predicted value
     @return None
     """
     if self.study is None:
         self._init_study()
     print("\tRunning optimization...")
     stopwatch = StopWatch()
     while True:
         # Perform optimization
         self._add_trial(n_jobs=n_jobs, timeout_iteration=timeout_iteration)
         self.run_time = stopwatch.stop()
         self.total_trials = len(self.study.trials)
         # Time-out
         if self.run_time >= timeout:
             break
         print(
             f"\r\tPerformed {self.total_trials} trials in {stopwatch.show()}.",
             end=str()
         )
         # Create a table to compare observed/estimated values
         tau = super().param()[self.TAU]
         train_df = self.divide_minutes(tau)
         comp_df = self.compare(train_df, self.predict())
         # All values are not under than 0
         if (comp_df < 0).values.sum():
             continue
         # Check monotonic variables
         mono_ok_list = [
             comp_df[f"{v}{self.P}"].is_monotonic_increasing
             for v in self.model.VARS_INCLEASE
         ]
         if not all(mono_ok_list):
             # Initialize the study
             self._init_study()
             continue
         # Check the values when argmax(actual)
         values_nest = [
             comp_df.loc[
                 comp_df[f"{v}{self.A}"].idxmax(),
                 [f"{v}{self.A}", f"{v}{self.P}"]
             ].tolist()
             for v in self.model.VARIABLES
         ]
         last_ok_list = [
             (a * allowance[0] <= p) and (p <= a * allowance[1])
             for (a, p) in values_nest
         ]
         if not all(last_ok_list):
             continue
         break
     stopwatch.stop()
     print(
         f"\r\tFinished {self.total_trials} trials in {stopwatch.show()}.\n",
         end=str()
     )
     return None
Beispiel #8
0
    def run(self,
            timeout=60,
            reset_n_max=3,
            timeout_iteration=5,
            allowance=(0.8, 1.2),
            seed=0,
            stdout=True,
            **kwargs):
        """
        Run optimization.
        If the result satisfied the following conditions, optimization ends.
        - all values are not under than 0
        - values of monotonic increasing variables increases monotonically
        - predicted values are in the allowance when each actual value shows max value

        Args:
            timeout (int): time-out of run
            reset_n_max (int): if study was reset @reset_n_max times, will not be reset anymore
            timeout_iteration (int): time-out of one iteration
            allowance (tuple(float, float)): the allowance of the predicted value
            seed (int or None): random seed of hyperparameter optimization
            stdout (bool): whether show the status of progress or not

        Notes:
            @n_jobs was obsoleted because this is not effective for Optuna.
        """
        if "n_jobs" in kwargs.keys():
            raise KeyError("@n_jobs of Estimator.run() was obsoleted.")
        # Create a study of optuna
        if self.study is None:
            self._init_study(seed=seed)
        reset_n = 0
        iteration_n = math.ceil(timeout / timeout_iteration)
        increasing_cols = [f"{v}{self.P}" for v in self.model.VARS_INCLEASE]
        if stdout:
            print("\tRunning optimization...")
        stopwatch = StopWatch()
        for _ in range(iteration_n):
            # Perform optimization
            self._run_trial(timeout_iteration=timeout_iteration)
            # Create a table to compare observed/estimated values
            tau = super().param()[self.TAU]
            train_df = self.divide_minutes(tau)
            comp_df = self.compare(train_df, self.predict())
            # Check monotonic variables
            mono_ok_list = [
                comp_df[col].is_monotonic_increasing for col in increasing_cols
            ]
            if not all(mono_ok_list):
                if reset_n == reset_n_max - 1:
                    break
                # Initialize the study
                self._init_study()
                reset_n += 1
                continue
            # Need additional trials when the values are not in allowance
            if self._is_in_allowance(comp_df, allowance):
                break
        # Calculate run-time and the number of trials
        self.run_time = stopwatch.stop()
        self.total_trials = len(self.study.trials)
        if stdout:
            print(
                f"\tFinished {self.total_trials} trials in {stopwatch.show()}.",
            )