def _curve_fitting(self, phase, start_date, end_date): """ Perform curve fitting for the phase. Args: phase (str): phase name start_date (str): start date of the phase end_date (str): end date of the phase Returns: tuple (pandas.DataFrame): Result of curve fitting Index: reset index Columns: - (phase name)_predicted: predicted value of Susceptible - (phase_name)_actual: actual value of Susceptible - (phase_name)_Recovered: Recovered (int): minimum value of R, which is the change point of the curve """ sr_df = self.sr_df.copy() sta = self.date_obj(start_date) end = self.date_obj(end_date) sr_df = sr_df.loc[(sr_df.index >= sta) & (sr_df.index <= end), :] trend = Trend(sr_df) df = trend.run() if trend.rmsle() > self.max_rmsle: df[f"{self.S}{self.P}"] = None # Get min value for vline r_value = int(df[self.R].min()) # Rename the columns phase = self.INITIAL if phase == "0th" else phase df = df.rename({f"{self.S}{self.P}": f"{phase}{self.P}"}, axis=1) df = df.rename({f"{self.S}{self.A}": f"{phase}{self.A}"}, axis=1) df = df.rename({f"{self.R}": f"{phase}_{self.R}"}, axis=1) return (df, r_value)
def show(self, area, change_dates=None, **kwargs): """ show the S-R trend in a figure. Args: area (str): area name change_dates (list[str] or None): list of change points kwargs: keyword arguments of covsirphy.trend_plot() Note: @change_dates must be specified if ChangeFinder.run() was not done. """ # Curve fitting start_dates, end_dates = self.date_range(change_dates) nested = [ self._curve_fitting(self.num2str(num), start_date, end_date) for (num, (start_date, end_date)) in enumerate(zip(start_dates, end_dates)) ] df_list, vlines = zip(*nested) comp_df = pd.concat([self.sr_df, *df_list], axis=1) comp_df = comp_df.rename({self.S: f"{self.S}{self.A}"}, axis=1) comp_df = comp_df.apply( lambda x: pd.to_numeric(x, errors="coerce", downcast="integer"), axis=0) # Show figure pred_cols = [col for col in comp_df.columns if col.endswith(self.P)] if len(pred_cols) == 1: title = f"{area}: S-R trend without change points" else: _list = self._change_dates[:] strings = [", ".join(_list[i: i + 6]) for i in range(0, len(_list), 6)] change_str = ",\n".join(strings) title = f"{area}: S-R trend changed on\n{change_str}" Trend.show_with_many( result_df=comp_df, predicted_cols=pred_cols, title=title, v=vlines[1:], **kwargs)
def show(self, show_figure=True, filename=None): """ show the result as a figure and return a dictionary of phases. Args: @show_figure (bool): if True, show the result as a figure. @filename (str): filename of the figure, or None (display figure) Returns: (covsirphy.PhaseSeries) """ # Create phase dictionary phase_series = self._create_phases() phase_dict = phase_series.to_dict() # Curve fitting nested = [ self._curve_fitting(phase, info) for (phase, info) in phase_dict.items() ] df_list, vlines = zip(*nested) comp_df = pd.concat(df_list[1:], axis=1) comp_df[self.R] = comp_df.fillna(0).loc[ :, comp_df.columns.str.endswith(self.R) ].sum(axis=1) comp_df[f"{self.S}{self.A}"] = comp_df.fillna(0).loc[ :, comp_df.columns.str.endswith(self.A) ].sum(axis=1) comp_df = comp_df.apply( lambda x: pd.to_numeric(x, errors="coerce", downcast="integer"), axis=0 ) # Show figure if not show_figure: return phase_series pred_cols = comp_df.loc[ :, comp_df.columns.str.endswith(self.P) ].columns.tolist() if len(pred_cols) == 1: title = f"{self.area}: S-R trend without change points" else: _list = self.change_dates[:] strings = [ ", ".join(_list[i: i + 6]) for i in range(0, len(_list), 6) ] change_str = ",\n".join(strings) title = f"{self.area}: S-R trend changed on\n{change_str}" Trend.show_with_many( result_df=comp_df, predicted_cols=pred_cols, title=title, vlines=vlines[2:], filename=filename ) return phase_series
def error_f(self, start_dates, end_dates): """ Definition of error score to minimize in the study. This is weighted average of RMSLE scores. @start_dates <list[str]>: list of start date of phases (candidates) @end_dates <list[str]>: list of end date of phases (candidates) @return <float> : score of the error function to minimize """ scores = list() for (start_date, end_date) in zip(start_dates, end_dates): population = self.pop_dict[start_date] trend = Trend(self.clean_df, population, self.country, province=self.province, start_date=start_date, end_date=end_date) trend.analyse() scores.append(trend.rmsle()) return np.average(scores, weights=range(1, len(scores) + 1))
def _curve_fitting(self, phase, info): """ Perform curve fitting for the phase. Args: phase (str): phase name info (dict[str]): start date, end date and population Returns: (tuple) (pandas.DataFrame): Result of curve fitting Index: reset index Columns: - (phase name)_predicted: predicted value of Susceptible - (phase_name)_actual: actual value of Susceptible - (phase_name)_Recovered: Recovered (int): minimum value of R, which is the change point of the curve """ start_date = info[self.START] end_date = info[self.END] population = info[self.N] trend = Trend( self.jhu_data, population, self.country, province=self.province, start_date=start_date, end_date=end_date ) trend.analyse() df = trend.result() if trend.rmsle() > self.max_rmsle: df[f"{self.S}{self.P}"] = None # Get min value for vline r_value = int(df[self.R].min()) # Rename the columns phase = self.INITIAL if phase == "0th" else phase df = df.rename({f"{self.S}{self.P}": f"{phase}{self.P}"}, axis=1) df = df.rename({f"{self.S}{self.A}": f"{phase}{self.A}"}, axis=1) df = df.rename({f"{self.R}": f"{phase}_{self.R}"}, axis=1) return (df, r_value)
def show(self, show_figure=True, filename=None): """ show the result as a figure and return a dictionary of phases. Args: @show_figure <bool>: - if True, show the result as a figure. @filename <str>: filename of the figure, or None (show figure) Returns: <covsirphy.PhaseSeries> """ # Create phase dictionary phase_series = self._create_phases() phase_dict = phase_series.to_dict() # Curve fitting df_list = list() vlines = list() for (phase, info) in phase_dict.items(): start_date = info[self.START] end_date = info[self.END] population = info[self.N] trend = Trend(self.clean_df, population, self.country, province=self.province, start_date=start_date, end_date=end_date) trend.analyse() df = trend.result() # Get min value for vline vlines.append(df[self.R].min()) # Rename the columns phase = phase.replace("0th", self.INITIAL) df = df.rename({f"{self.S}{self.P}": f"{phase}{self.P}"}, axis=1) df = df.rename({f"{self.S}{self.A}": f"{phase}{self.A}"}, axis=1) df = df.rename({f"{self.R}": f"{phase}_{self.R}"}, axis=1) df_list.append(df) if self.n_points == 0: comp_df = pd.concat(df_list, axis=1) else: comp_df = pd.concat(df_list[1:], axis=1) comp_df[self.R] = comp_df.fillna( 0).loc[:, comp_df.columns.str.endswith(self.R)].sum(axis=1) comp_df[f"{self.S}{self.A}"] = comp_df.fillna( 0).loc[:, comp_df.columns.str.endswith(self.A)].sum(axis=1) comp_df = comp_df.apply( lambda x: pd.to_numeric(x, errors="coerce", downcast="integer"), axis=0) # Show figure if not show_figure: return phase_series pred_cols = comp_df.loc[:, comp_df.columns.str. endswith(self.P)].columns.tolist() if len(pred_cols) == 1: title = f"{self.area}: S-R trend without change points" else: change_str = ", ".join(self.change_dates) title = f"{self.area}: S-R trend changed on {change_str}" Trend.show_with_many(result_df=comp_df, predicted_cols=pred_cols, title=title, vlines=vlines[2:], filename=filename) return phase_series