def _curve_fitting(self, phase, start_date, end_date): """ Perform curve fitting for the phase. Args: phase (str): phase name start_date (str): start date of the phase end_date (str): end date of the phase Returns: tuple (pandas.DataFrame): Result of curve fitting Index: reset index Columns: - (phase name)_predicted: predicted value of Susceptible - (phase_name)_actual: actual value of Susceptible - (phase_name)_Recovered: Recovered (int): minimum value of R, which is the change point of the curve """ sr_df = self.sr_df.copy() sta = self.date_obj(start_date) end = self.date_obj(end_date) sr_df = sr_df.loc[(sr_df.index >= sta) & (sr_df.index <= end), :] trend = Trend(sr_df) df = trend.run() if trend.rmsle() > self.max_rmsle: df[f"{self.S}{self.P}"] = None # Get min value for vline r_value = int(df[self.R].min()) # Rename the columns phase = self.INITIAL if phase == "0th" else phase df = df.rename({f"{self.S}{self.P}": f"{phase}{self.P}"}, axis=1) df = df.rename({f"{self.S}{self.A}": f"{phase}{self.A}"}, axis=1) df = df.rename({f"{self.R}": f"{phase}_{self.R}"}, axis=1) return (df, r_value)
def error_f(self, start_dates, end_dates): """ Definition of error score to minimize in the study. This is weighted average of RMSLE scores. @start_dates <list[str]>: list of start date of phases (candidates) @end_dates <list[str]>: list of end date of phases (candidates) @return <float> : score of the error function to minimize """ scores = list() for (start_date, end_date) in zip(start_dates, end_dates): population = self.pop_dict[start_date] trend = Trend(self.clean_df, population, self.country, province=self.province, start_date=start_date, end_date=end_date) trend.analyse() scores.append(trend.rmsle()) return np.average(scores, weights=range(1, len(scores) + 1))
def _curve_fitting(self, phase, info): """ Perform curve fitting for the phase. Args: phase (str): phase name info (dict[str]): start date, end date and population Returns: (tuple) (pandas.DataFrame): Result of curve fitting Index: reset index Columns: - (phase name)_predicted: predicted value of Susceptible - (phase_name)_actual: actual value of Susceptible - (phase_name)_Recovered: Recovered (int): minimum value of R, which is the change point of the curve """ start_date = info[self.START] end_date = info[self.END] population = info[self.N] trend = Trend( self.jhu_data, population, self.country, province=self.province, start_date=start_date, end_date=end_date ) trend.analyse() df = trend.result() if trend.rmsle() > self.max_rmsle: df[f"{self.S}{self.P}"] = None # Get min value for vline r_value = int(df[self.R].min()) # Rename the columns phase = self.INITIAL if phase == "0th" else phase df = df.rename({f"{self.S}{self.P}": f"{phase}{self.P}"}, axis=1) df = df.rename({f"{self.S}{self.A}": f"{phase}{self.A}"}, axis=1) df = df.rename({f"{self.R}": f"{phase}_{self.R}"}, axis=1) return (df, r_value)