def run(self, n_jobs=-1, **kwargs): """ Run estimation. Args: n_jobs (int): the number of parallel jobs or -1 (CPU count) kwargs: keyword arguments of model parameters and covsirphy.Estimator.run() """ units = self._units[:] results = [] # The number of parallel jobs n_jobs = cpu_count() if n_jobs == -1 else n_jobs # Start optimization print(f"\n<{self.model.NAME} model: parameter estimation>") print(f"Running optimization with {n_jobs} CPUs...") stopwatch = StopWatch() # Estimation of the last phase will be done to determine tau value if self._tau is None: unit_sel, units = units[-1], units[:-1] unit_est = self._run(unit=unit_sel, tau=None, **kwargs) self._tau = unit_est.tau results = [unit_est] # Estimation of each phase est_f = functools.partial(self._run, tau=self._tau, **kwargs) with Pool(n_jobs) as p: units_est = p.map(est_f, units) results.extend(units_est) # Completion stopwatch.stop() print(f"Completed optimization. Total: {stopwatch.show()}") return results
def run(self, n_points, min_duration=7, allowance=3, timeout=60, n_trials_iteration=10, n_jobs=-1): """ Run optimization. @n_points <int>: the number of change points @min_duration <int>: minimum duration of one phase [days] - must be over 2 @allowance <int>: allowance of change points [days] - if the estimated change points was equal to previous iteration with this allowance, stop running. @timeout <int>: time-out of run @n_trials_iteration <int>: the number of trials in one iteration @n_jobs <int>: the number of parallel jobs or -1 (CPU count) @return self """ self.n_points = n_points self.min_duration = min_duration stopwatch = StopWatch() if min_duration <= 2: raise ValueError("@min_duration must be over 2.") if n_points <= 0: self.run_time = 0 self.total_trials = 0 return self if self.study is None: self._init_study() print("Finding change points of S-R trend...") while True: self.add_trial(n_trials_iteration, n_jobs) # Check whether the change points are fixed (with allowance) or not allow_obj = timedelta(days=allowance) fixed_ok = [ abs(self.date_obj(this) - self.date_obj(previous)) <= allow_obj for (this, previous ) in zip(self.change_dates, self.change_dates_previous) ] # Calculate cumulative run-time self.run_time = stopwatch.stop() self.total_trials = len(self.study.trials) # If fixed or time-out, break if (all(fixed_ok) and self.change_dates_previous) or (self.run_time > timeout): print( f"\rFinished {self.total_trials} trials in {stopwatch.show()}.\n", end=str()) break stopwatch.stop() print( f"\rPerformed {self.total_trials} trials in {stopwatch.show()}.", end=str()) self.change_dates_previous = self.change_dates[:] return self
def estimate(self, model, name="Main", phases=None, n_jobs=-1, **kwargs): """ Estimate the parameters of the model using the records. Args: model (covsirphy.ModelBase): ODE model name (str): phase series name phases (list[str]): list of phase names, like 1st, 2nd... n_jobs (int): the number of parallel jobs or -1 (CPU count) kwargs: keyword arguments of model parameters and covsirphy.Estimator.run() Notes: - If 'Main' was used as @name, main PhaseSeries will be used. - If @name phase was not registered, new PhaseSeries will be created. - If @phases is None, all past phase will be used. """ # Check model model = self.validate_subclass(model, ModelBase, "model") # Only one phase series will be used if "series_list" in kwargs.keys(): raise KeyError( "Because @series_list was obsoleted in Scenario.estimate()," " please specify the phase name using @name argument.") # Validate the phases try: phase_dict = self.series_dict[name].to_dict() except KeyError: raise KeyError(f"{name} has not been defined.") past_phases = list(phase_dict.keys()) phases = past_phases[:] if phases is None else phases future_phases = list(set(phases) - set(past_phases)) if future_phases: raise KeyError( f"{future_phases[0]} is not a past phase or not registered.") # The number of parallel jobs n_jobs = cpu_count() if n_jobs == -1 else n_jobs # Start optimization print(f"\n<{name} scenario: perform parameter estimation>") print(f"Running optimization with {n_jobs} CPUs...") stopwatch = StopWatch() # Estimation of the last phase will be done to determine tau value phase_sel, phases = phases[-1], phases[:-1] result_tuple_sel = self._estimate(model, phase=phase_sel, **kwargs) self._update_self(*result_tuple_sel) # Estimation of each phase est_f = functools.partial(self._estimate, model, **kwargs) with Pool(n_jobs) as p: result_nest = p.map(est_f, phases) for result_tuple in result_nest: self._update_self(*result_tuple) # Completion stopwatch.stop() print(f"Completed optimization. Total: {stopwatch.show()}")
def run(self, timeout=60, reset_n_max=3, timeout_iteration=5, allowance=(0.98, 1.02), seed=0, **kwargs): """ Run optimization. If the result satisfied the following conditions, optimization ends. - all values are not under than 0 - values of monotonic increasing variables increases monotonically - predicted values are in the allowance when each actual value shows max value Args: timeout (int): time-out of run reset_n_max (int): if study was reset @reset_n_max times, will not be reset anymore timeout_iteration (int): time-out of one iteration allowance (tuple(float, float)): the allowance of the predicted value seed (int or None): random seed of hyperparameter optimization kwargs: other keyword arguments will be ignored Notes: @n_jobs was obsoleted because this is not effective for Optuna. """ # Create a study of optuna if self.study is None: self._init_study(seed=seed) reset_n = 0 iteration_n = math.ceil(timeout / timeout_iteration) increasing_cols = [f"{v}{self.P}" for v in self.model.VARS_INCLEASE] stopwatch = StopWatch() for _ in range(iteration_n): # Perform optimization self.study.optimize(self.objective, n_jobs=1, timeout=timeout_iteration) # Create a table to compare observed/estimated values tau = self.tau or super().param()[self.TAU] train_df = self.divide_minutes(tau) comp_df = self.compare(train_df, self.predict()) # Check monotonic variables mono_ok_list = [ comp_df[col].is_monotonic_increasing for col in increasing_cols ] if not all(mono_ok_list): if reset_n == reset_n_max - 1: break # Initialize the study self._init_study() reset_n += 1 continue # Need additional trials when the values are not in allowance if self._is_in_allowance(comp_df, allowance): break # Calculate run-time and the number of trials self.run_time = stopwatch.stop() self.run_time_show = stopwatch.show() self.total_trials = len(self.study.trials)
def run(self, timeout=180, reset_n_max=3, timeout_iteration=5, tail_n=4, allowance=(0.99, 1.01), seed=0, pruner="threshold", upper=0.5, percentile=50, metric=None, metrics="RMSLE", **kwargs): """ Run optimization. If the result satisfied the following conditions, optimization ends. - Score did not change in the last @tail_n iterations. - Monotonic increasing variables increases monotonically. - Predicted values are in the allowance when each actual value shows max value. Args: timeout (int): timeout of optimization reset_n_max (int): if study was reset @reset_n_max times, will not be reset anymore timeout_iteration (int): time-out of one iteration tail_n (int): the number of iterations to decide whether score did not change for the last iterations allowance (tuple(float, float)): the allowance of the predicted value seed (int or None): random seed of hyperparameter optimization pruner (str): hyperband, median, threshold or percentile upper (float): works for "threshold" pruner, intermediate score is larger than this value, it prunes percentile (float): works for "Percentile" pruner, the best intermediate value is in the bottom percentile among trials, it prunes metric (str or None): metric name or None (use @metrics) metrics (str): alias of @metric kwargs: keyword arguments of ModelBase.param_range() Note: @n_jobs was obsoleted because this does not work effectively in Optuna. Note: Please refer to covsirphy.Evaluator.score() for metric names """ self._metric = metric or metrics self._param_range_dict = find_args(self.model.param_range, **kwargs) # Create a study of optuna if self.study is None: self._init_study(seed=seed, pruner=pruner, upper=upper, percentile=percentile) reset_n = 0 iteration_n = math.ceil(timeout / timeout_iteration) increasing_cols = [f"{v}{self.P}" for v in self.model.VARS_INCLEASE] stopwatch = StopWatch() scores = [] for _ in range(iteration_n): # Perform optimization self.study.optimize(self._objective, n_jobs=1, timeout=timeout_iteration) # If score did not change in the last iterations, stop running tau, param_dict = self._param() scores.append(self._score(tau=tau, param_dict=param_dict)) if len(scores) >= tail_n and len(set(scores[-tail_n:])) == 1: break # Create a table to compare observed/estimated values comp_df = self._compare(tau=tau, param_dict=param_dict) # Check monotonic variables mono_ok_list = [ comp_df[col].is_monotonic_increasing for col in increasing_cols ] if not all(mono_ok_list): if reset_n == reset_n_max - 1: break # Initialize the study self._init_study(seed=seed) reset_n += 1 continue # Need additional trials when the values are not in allowance if self._is_in_allowance(comp_df, allowance): break # Calculate run-time and the number of trials self.runtime += stopwatch.stop() self.total_trials = len(self.study.trials)
def run(self, timeout=60, reset_n_max=3, timeout_iteration=10, allowance=(0.8, 1.2), n_jobs=-1, seed=None): """ Run optimization. If the result satisfied all conditions, optimization ends. - all values are not under than 0 - values of monotonic increasing variables increases monotonically - predicted values are in the allowance when each actual value shows max value Args: timeout (int): time-out of run reset_n_max (int): if study was reset @reset_n_max times, will not be reset anymore timeout_iteration (int): time-out of one iteration allowance (tuple(float, float)): the allowance of the predicted value n_jobs (int): the number of parallel jobs or -1 (CPU count) seed (int or None): random seed of hyperparameter optimization Notes: @seed will effective when @n_jobs is 1 Returns: None """ if seed is not None and n_jobs != 1: raise ValueError( "@seed must be None when @n_jobs is not equal to 1.") if self.study is None: self._init_study(seed=seed) print("\tRunning optimization...") stopwatch = StopWatch() reset_n = 0 while True: # Perform optimization self._run_trial(n_jobs=n_jobs, timeout_iteration=timeout_iteration) self.run_time = stopwatch.stop() self.total_trials = len(self.study.trials) # Time-out if self.run_time >= timeout: break print( f"\r\tPerformed {self.total_trials} trials in {stopwatch.show()}.", end=str()) # Create a table to compare observed/estimated values tau = super().param()[self.TAU] train_df = self.divide_minutes(tau) comp_df = self.compare(train_df, self.predict()) # Check monotonic variables mono_ok_list = [ comp_df[f"{v}{self.P}"].is_monotonic_increasing for v in self.model.VARS_INCLEASE ] if not all(mono_ok_list): reset_n += 1 if reset_n <= reset_n_max: # Initialize the study self._init_study() stopwatch = StopWatch() continue # Check the values when argmax(actual) values_nest = [ comp_df.loc[comp_df[f"{v}{self.A}"].idxmax(), [f"{v}{self.A}", f"{v}{self.P}"]].tolist() for v in self.model.VARIABLES ] last_ok_list = [(a * allowance[0] <= p) and (p <= a * allowance[1]) for (a, p) in values_nest] if not all(last_ok_list): continue break stopwatch.stop() print( f"\r\tFinished {self.total_trials} trials in {stopwatch.show()}.\n", end=str()) return None
def run(self, timeout=180, n_jobs=-1, timeout_iteration=10, allowance=(0.8, 1.2)): """ Run optimization. If the result satisfied all conditions, optimization ends. - all values are not under than 0 - values of monotonic increasing variables increases monotonically - predicted values are in the allowance when each actual value shows max value - @timeout <int>: time-out of run @n_jobs <int>: the number of parallel jobs or -1 (CPU count) @timeout_iteration <int>: time-out of one iteration @allowance <tuple(float, float)>: - the allowance of the predicted value @return None """ if self.study is None: self._init_study() print("\tRunning optimization...") stopwatch = StopWatch() while True: # Perform optimization self._add_trial(n_jobs=n_jobs, timeout_iteration=timeout_iteration) self.run_time = stopwatch.stop() self.total_trials = len(self.study.trials) # Time-out if self.run_time >= timeout: break print( f"\r\tPerformed {self.total_trials} trials in {stopwatch.show()}.", end=str() ) # Create a table to compare observed/estimated values tau = super().param()[self.TAU] train_df = self.divide_minutes(tau) comp_df = self.compare(train_df, self.predict()) # All values are not under than 0 if (comp_df < 0).values.sum(): continue # Check monotonic variables mono_ok_list = [ comp_df[f"{v}{self.P}"].is_monotonic_increasing for v in self.model.VARS_INCLEASE ] if not all(mono_ok_list): # Initialize the study self._init_study() continue # Check the values when argmax(actual) values_nest = [ comp_df.loc[ comp_df[f"{v}{self.A}"].idxmax(), [f"{v}{self.A}", f"{v}{self.P}"] ].tolist() for v in self.model.VARIABLES ] last_ok_list = [ (a * allowance[0] <= p) and (p <= a * allowance[1]) for (a, p) in values_nest ] if not all(last_ok_list): continue break stopwatch.stop() print( f"\r\tFinished {self.total_trials} trials in {stopwatch.show()}.\n", end=str() ) return None
def run(self, timeout=60, reset_n_max=3, timeout_iteration=5, allowance=(0.8, 1.2), seed=0, stdout=True, **kwargs): """ Run optimization. If the result satisfied the following conditions, optimization ends. - all values are not under than 0 - values of monotonic increasing variables increases monotonically - predicted values are in the allowance when each actual value shows max value Args: timeout (int): time-out of run reset_n_max (int): if study was reset @reset_n_max times, will not be reset anymore timeout_iteration (int): time-out of one iteration allowance (tuple(float, float)): the allowance of the predicted value seed (int or None): random seed of hyperparameter optimization stdout (bool): whether show the status of progress or not Notes: @n_jobs was obsoleted because this is not effective for Optuna. """ if "n_jobs" in kwargs.keys(): raise KeyError("@n_jobs of Estimator.run() was obsoleted.") # Create a study of optuna if self.study is None: self._init_study(seed=seed) reset_n = 0 iteration_n = math.ceil(timeout / timeout_iteration) increasing_cols = [f"{v}{self.P}" for v in self.model.VARS_INCLEASE] if stdout: print("\tRunning optimization...") stopwatch = StopWatch() for _ in range(iteration_n): # Perform optimization self._run_trial(timeout_iteration=timeout_iteration) # Create a table to compare observed/estimated values tau = super().param()[self.TAU] train_df = self.divide_minutes(tau) comp_df = self.compare(train_df, self.predict()) # Check monotonic variables mono_ok_list = [ comp_df[col].is_monotonic_increasing for col in increasing_cols ] if not all(mono_ok_list): if reset_n == reset_n_max - 1: break # Initialize the study self._init_study() reset_n += 1 continue # Need additional trials when the values are not in allowance if self._is_in_allowance(comp_df, allowance): break # Calculate run-time and the number of trials self.run_time = stopwatch.stop() self.total_trials = len(self.study.trials) if stdout: print( f"\tFinished {self.total_trials} trials in {stopwatch.show()}.", )