def history(self, target, y0_dict=None, show_figure=True, filename=None): """ Show the history of variables and parameter values to compare scenarios. Args: target (str): parameter or variable to show (Rt etc.) y0_dict (dict or None): dictionary of initial values or None - key (str): variable name - value (float): initial value show_figure (bool): If True, show the result as a figure filename (str): filename of the figure, or None (show figure) """ df = self.track(y0_dict=y0_dict) if target not in df.columns: col_str = ", ".join(list(df.columns)) raise KeyError( f"@target must be selected from {col_str}, but {target} was applied." ) df = df.pivot_table(values=target, index=self.DATE, columns=self.SERIES, aggfunc="last") if show_figure: series = self._series_dict["Main"] change_dates = [unit.start_date for unit in series][1:] title = f"{self.area}: {self.RT_FULL if target == self.RT else target} over time" line_plot(df, title, ylabel=target, h=1.0 if target == self.RT else None, v=change_dates, math_scale=False, filename=filename) return df
def records(self, show_figure=True, filename=None): """ Return the records as a dataframe. Args: show_figure (bool): if True, show the records as a line-plot. filename (str): filename of the figure, or None (show figure) Returns: (pandas.DataFrame) Index: reset index Columns: - Date (pd.TimeStamp): Observation date - Confirmed (int): the number of confirmed cases - Infected (int): the number of currently infected cases - Fatal (int): the number of fatal cases - Recovered (int): the number of recovered cases (> 0) Notes: Records with Recovered > 0 will be selected. """ df = self.jhu_data.subset(country=self.country, province=self.province) if not show_figure: return df line_plot(df.set_index(self.DATE).drop(self.C, axis=1), f"{self.area}: Cases over time", y_integer=True, filename=filename) return df
def param_history(self, targets=None, box_plot=True, **kwargs): """ Show the ratio to 1st parameters as a figure (bar plot). @targets <list[str] or str>: parameters to show (including Rt etc.) @box_plot <bool>: if True, box plot. if False, line plot. @kwargs: keword arguments of pd.DataFrame.plot or line_plot() """ _ = self.show_parameters() targets = self.param_df.columns if targets is None else targets targets = [targets] if isinstance(targets, str) else targets if "R0" in targets: targets = [t.replace("R0", "Rt") for t in targets] df = self.param_df.loc[:, targets] df.index = self.param_df[["start_date", "end_date"]].apply( lambda x: f"{x[0]}-{x[1].replace('-', 'today')}", axis=1 ) df = df / df.iloc[0] if box_plot: df.plot.bar(title="Ratio to 1st parameters", **kwargs) plt.xticks(rotation=0) plt.legend(bbox_to_anchor=(1.02, 0), loc="lower left", borderaxespad=0) plt.show() else: _df = df.reset_index(drop=True) _df.index = _df.index + 1 line_plot( _df, title="Ratio to 1st parameters", xlabel="Phase", ylabel=str(), math_scale=False, **kwargs )
def param_history(self, targets=None, name="Main", divide_by_first=True, show_figure=True, filename=None, show_box_plot=True, **kwargs): """ Return subset of summary. Args: targets (list[str] or str): parameters to show (Rt etc.) name (str): phase series name divide_by_first (bool): if True, divide the values by 1st phase's values show_box_plot (bool): if True, box plot. if False, line plot show_figure (bool): If True, show the result as a figure filename (str): filename of the figure, or None (show figure) kwargs: keword arguments of pd.DataFrame.plot or line_plot() Returns: (pandas.DataFrame) Notes: If 'Main' was used as @name, main PhaseSeries will be used. """ # Check arguments if "box_plot" in kwargs.keys(): raise KeyError("Please use 'show_box_plot', not 'box_plot'") name = self.MAIN if name == "Main" else name if name not in self.series_dict.keys(): raise KeyError(f"@name {name} scenario has not been registered.") # Select target to show df = self._param_history(targets, name) # Divide by the first phase parameters if divide_by_first: df = df / df.iloc[0, :] title = f"{self.area}: Ratio to 1st phase parameters ({name} scenario)" else: title = f"{self.area}: History of parameter values ({name} scenario)" if not show_figure: return df if show_box_plot: h_values = [1.0] if divide_by_first or self.RT in targets else None box_plot(df, title, h=h_values, filename=filename) return df _df = df.reset_index(drop=True) _df.index = _df.index + 1 h = 1.0 if divide_by_first else None line_plot(_df, title=title, xlabel="Phase", ylabel=str(), math_scale=False, h=h, filename=filename) return df
def param_history(self, param, roll_window=None, show_figure=True, filename=None, **kwargs): """ Return subset of summary and show a figure to show the history in each country. Args: param (str): parameter to show roll_window (int or None): rolling average window if necessary show_figure (bool): If True, show the result as a figure filename (str): filename of the figure, or None (show figure) kwargs: keword arguments of pd.DataFrame.plot or line_plot() Returns: pandas.DataFrame: Index: (int) phase number Columns: (str) country names Values: parameter values """ if self.model is None: raise TypeError( "PolicyMeasures.estimate(model) must be done in advance.") selectable_params = [ *self.model.PARAMETERS, *self.model.DAY_PARAMETERS, self.RT ] if param not in selectable_params: sel_param_str = ', '.join(selectable_params) raise KeyError( f"@param must be selected from {sel_param_str}, but {param} was applied." ) # Get the parameter value of each date df = self.summary().reset_index() df[self.START] = pd.to_datetime(df[self.START], format=self.DATE_FORMAT) df[self.END] = pd.to_datetime(df[self.END], format=self.DATE_FORMAT) df[self.DATE] = df[[self.START, self.END]].apply( lambda x: pd.date_range(x[0], x[1]).tolist(), axis=1) df = df.explode(self.DATE) df = df.pivot_table(values=param, index=self.DATE, columns=self.COUNTRY) # Rolling mean if roll_window is not None: roll_window = self.ensure_natural_int(roll_window, name="roll_window") df = df.rolling(window=roll_window).mean() # Show figure if not show_figure: return df line_plot(df, title=f"History of {param} in each country", ylabel=param, h=1 if param == self.RT else None) return df
def simulate(self, name="Main", y0_dict=None, show_figure=True, filename=None): """ Simulate ODE models with set parameter values and show it as a figure. Args: name (str): phase series name. If 'Main', main PhaseSeries will be used y0_dict (dict): - key (str): variable name - value (float): initial value - dictionary of initial values or None - if model will be changed in the later phase, must be specified show_figure (bool): - if True, show the result as a figure. filename (str): filename of the figure, or None (show figure) Returns: (pandas.DataFrame) Index: reset index Columns: - Date (str): date, like 31Dec2020 - Country (str): country/region name - Province (str): province/prefecture/state name - variables of the models (int): Confirmed (int) etc. """ name = self.MAIN if name == "Main" else name df = self.series_dict[name].summary() # Future phases must be added in advance if self.FUTURE not in df[self.TENSE].unique(): raise KeyError( f"Future phases of {name} scenario must be registered by Scenario.add_phase() in advance." ) # Simulation dim_df, start_objects = self._simulate(name=name, y0_dict=y0_dict) dim_df = dim_df.set_index(self.DATE).resample("D").mean() dim_df = dim_df.astype(np.int64) fig_df = dim_df.copy() dim_df[self.DATE] = dim_df.index.strftime(self.DATE_FORMAT) dim_df = dim_df.reset_index(drop=True) dim_df = dim_df.loc[:, [self.DATE, *dim_df.columns.tolist()[:-1]]] # Return dataframe if figure is not needed if not show_figure: return dim_df # Show figure fig_cols_set = set(fig_df.columns) & set(self.FIG_COLUMNS) fig_cols = [col for col in self.FIG_COLUMNS if col in fig_cols_set] line_plot(fig_df[fig_cols], title=f"{self.area}: Predicted number of cases", filename=filename, y_integer=True, v=start_objects[1:]) return dim_df
def show_record(self): """ Show the records. """ line_plot( self.record_df.drop("Confirmed", axis=1).set_index("Date"), f"{self.name}: Cases over time", y_integer=True ) return self.record_df
def history(self, param, roll_window=None, show_figure=True, filename=None, **kwargs): """ Return subset of summary and show a figure to show the history of all countries. Args: param (str): parameter/day parameter/Rt/OxCGRT score to show roll_window (int or None): rolling average window if necessary show_figure (bool): If True, show the result as a figure filename (str): filename of the figure, or None (show figure) kwargs: keword arguments of line_plot() Returns: pandas.DataFrame: Index: Date (pd.TimeStamp) date Columns: (str) country names Values: parameter values """ # Get the parameter value of each date df = self.track() # Select the param if param not in df.columns: sel_param_str = ', '.join(df.columns.tolist()) raise KeyError( f"@param must be selected from {sel_param_str}, but {param} was applied." ) df = df.pivot_table(values=param, index=self.DATE, columns=self.COUNTRY, aggfunc="last") # Rolling mean if roll_window is not None: roll_window = self.ensure_natural_int(roll_window, name="roll_window") df = df.rolling(window=roll_window).mean() # Show figure if not show_figure: return df line_plot(df, title=f"History of {param} in each country", ylabel=param, h=1 if param == self.RT else None, filename=filename, **kwargs) return df
def line_plot(self, df, show_figure=True, filename=None, **kwargs): """ Display or save a line plot of the dataframe. Args: show_figure (bool): whether show figure when interactive mode or not filename (str or None): filename of the figure or None (not save) when script mode Note: When interactive mode and @show_figure is True, display the figure. When script mode and filename is not None, save the figure. When using interactive shell, we can change the modes by Scenario.interactive = True/False. """ if self._interactive and show_figure: return line_plot(df=df, filename=None, **kwargs) if not self._interactive and filename is not None: return line_plot(df=df, filename=filename, **kwargs)
def simulate(self, name="Main", y0_dict=None, show_figure=True, filename=None): """ Simulate ODE models with set parameter values and show it as a figure. Args: name (str): phase series name. If 'Main', main PhaseSeries will be used y0_dict (dict or None): dictionary of initial values or None - key (str): variable name - value (float): initial value show_figure (bool): - if True, show the result as a figure. filename (str): filename of the figure, or None (show figure) Returns: (pandas.DataFrame) Index: reset index Columns: - Date (pd.TimeStamp): Observation date - Country (str): country/region name - Province (str): province/prefecture/state name - Variables of the model and dataset (int): Confirmed etc. """ series = self._ensure_name(name) # Simulation sim_df = series.simulate(record_df=self.record_df, y0_dict=y0_dict) if not show_figure: return sim_df # Show figure df = sim_df.set_index(self.DATE) fig_cols_set = set(df.columns) & set(self.FIG_COLUMNS) fig_cols = [col for col in self.FIG_COLUMNS if col in fig_cols_set] change_dates = [unit.start_date for unit in series][1:] line_plot( df[fig_cols], title=f"{self.area}: Predicted number of cases ({name} scenario)", filename=filename, y_integer=True, v=change_dates) return sim_df
def restore_graph(self, drop_cols=None, min_infected=1, **kwargs): """ Show the dimentional simulate data as a figure. @drop_cols <list[str]>: the columns not to be shown @min_infected <int>: if Infected < min_infected, the records will not be used @kwargs: keyword arguments of line_plot() function """ df = self.restore_df(min_infected=min_infected) if drop_cols is not None: df = df.drop(drop_cols, axis=1) today = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) axvlines = [today, *self.axvlines] if len( self.axvlines) == 1 else self.axvlines[:] line_plot(df, title=f"{self.name}: {', '.join(self.title_list)}", v=axvlines[:-1], h=self.total_population, **kwargs)
def predict_graph(self, step_n, name=None, excluded_cols=None): """ Predict the values in the future and create a figure. @step_n <int>: the number of steps @name <str>: name of the area @excluded_cols <list[str]>: the excluded columns in the figure """ if self.name is not None: name = self.name else: name = str() if name is None else name df = self.predict_df(step_n=step_n) if excluded_cols is not None: df = df.drop(excluded_cols, axis=1) r0 = self.param_dict["R0"] title = f"Prediction in {name} with {self.model.NAME} model: R0 = {r0}" today = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) line_plot(df, title, v=today, h=self.total_population)
def history_rate(self, params=None, name="Main", show_figure=True, filename=None): """ Show change rates of parameter values in one figure. We can find the parameters which increased/decreased significantly. Args: params (list[str] or None): parameters to show name (str): phase series name show_figure (bool): If True, show the result as a figure filename (str): filename of the figure, or None (show figure) """ df = self._track_param(name=name) model = self._series_dict[name].unit("last").model cols = list(set(df.columns) & set(model.PARAMETERS)) if params is not None: if not isinstance(params, (list, set)): raise TypeError( f"@params must be a list of parameters, but {params} were applied." ) cols = list(set(cols) & set(params)) or cols df = df.loc[:, cols] / df.loc[df.index[0], cols] if show_figure: series = self._series_dict[name] change_dates = [unit.start_date for unit in series][1:] f_date = df.index[0].strftime(self.DATE_FORMAT) title = f"{self.area}: {model.NAME} parameter change rates over time (1.0 on {f_date})" ylabel = f"Value per that on {f_date}" line_plot(df, title, ylabel=ylabel, v=change_dates, math_scale=False, filename=filename) return df
def compare_estimated_numbers(self, phases=None): """ Compare the number of confimred cases estimated with the parameters and show graph. @variable <str>: variable to compare @phases <list[str]>: phase to show (if None, all) """ phases = list(self.phase_dict.keys()) if phases is None else phases # Observed df = pd.DataFrame(self.record_df.set_index("Date")["Confirmed"]) # Estimated for (num, estimator) in self.estimator_dict.items(): model, info_dict, param_dict = estimator.info() diff = (datetime.today() - info_dict["start_time"]).total_seconds() day_n = int(diff / 60 / 60 / 24 + 1) predicter = Predicter(**info_dict) predicter.add(model, end_day_n=day_n, **param_dict) # Calculate the number of confirmed cases new_df = predicter.restore_df().drop( "Susceptible", axis=1 ).sum(axis=1) new_df = new_df.resample("D").last() df = pd.concat([df, new_df], axis=1) # Show graph df = df.fillna(0).astype(np.int64) df.columns = ["Observed"] + \ [f"{phase}_param" for phase in self.phase_dict.keys()] df = df.loc[ self.phase_dict["1st"]["start_date"]: self.record_df["Date"].max(), :] for col in df.columns[1:]: if col[:col.find("_")] not in phases: continue line_plot( df.replace(0, np.nan)[["Observed", col]], f"Confirmed cases over time: Actual and predicted with {col}", y_integer=True )
def positive_rate(self, country, province=None, window=7, show_figure=True, filename=None): """ Return the PCR rate of a country as a dataframe. Args: country(str): country name or ISO3 code province(str or None): province name window (int): window of moving average, >= 1 show_figure (bool): if True, show the records as a line-plot. filename (str): filename of the figure, or None (display figure) Raises: covsirphy.PCRIncorrectPreconditionError: the dataset has too many missing values Returns: pandas.DataFrame Index reset index Columns - Date (pandas.TimeStamp): Observation date - Tests (int): the number of total tests performed - Confirmed (int): the number of confirmed cases - Tests_diff (int): daily tests performed - Confirmed_diff (int): daily confirmed cases - Test_positive_rate (float): positive rate (%) of the daily cases over the total daily tests performed Note: If non monotonic records were found for either confirmed cases or tests, "with partially complemented tests data" will be added to the title of the figure. """ window = self._ensure_natural_int(window, name="window") # Subset with area country_alias = self.ensure_country_name(country) province = province or self.UNKNOWN try: subset_df = self._subset_select(country_alias, province) except PCRIncorrectPreconditionError: raise PCRIncorrectPreconditionError( country=country, province=province, message="Too many missing Tests records") from None # Process PCR data df, is_complemented = self._pcr_processing(subset_df, window) # Calculate PCR values df[self.PCR_RATE] = df[[self.C_DIFF, self.T_DIFF ]].apply(lambda x: x[0] / x[1] * 100 if x[1] > self.min_pcr_tests else 0, axis=1) if not show_figure: return df # Create figure area = self.area_name(country, province=province) comp_status = "\nwith partially complemented tests data" if is_complemented else "" line_plot( df.set_index(self.DATE)[self.PCR_RATE], title=f"{area}: Test positive rate (%) over time {comp_status}", ylabel="Test positive rate (%)", y_integer=True, filename=filename, show_legend=False, ) return df