def history(self, param, roll_window=None, show_figure=True, filename=None, **kwargs): """ Return subset of summary and show a figure to show the history of all countries. Args: param (str): parameter/day parameter/Rt/OxCGRT score to show roll_window (int or None): rolling average window if necessary show_figure (bool): If True, show the result as a figure filename (str): filename of the figure, or None (show figure) kwargs: keword arguments of line_plot() Returns: pandas.DataFrame: Index Date (pd.Timestamp) date Columns (str) country names Values: parameter values """ # Get the parameter value of each date df = self.track() # Select the param if param not in df.columns: sel_param_str = ', '.join(df.columns.tolist()) raise KeyError( f"@param must be selected from {sel_param_str}, but {param} was applied." ) df = df.pivot_table(values=param, index=self.DATE, columns=self.COUNTRY, aggfunc="last") # Rolling mean if roll_window is not None: roll_window = self._ensure_natural_int(roll_window, name="roll_window") df = df.rolling(window=roll_window).mean() # Show figure if not show_figure: return df line_plot(df, title=f"History of {param} in each country", ylabel=param, h=1 if param == self.RT else None, filename=filename, **kwargs) return df
def positive_rate(self, country, province=None, window=7, last_date=None, show_figure=True, filename=None): """ Return the PCR rate of a country as a dataframe. Args: country(str): country name or ISO3 code province(str or None): province name window (int): window of moving average, >= 1 last_date (str or None): the last date of the total tests records or None (max date of main dataset) show_figure (bool): if True, show the records as a line-plot. filename (str): filename of the figure, or None (display figure) Raises: covsirphy.PCRIncorrectPreconditionError: the dataset has too many missing values Returns: pandas.DataFrame Index reset index Columns - Date (pandas.TimeStamp): Observation date - Tests (int): the number of total tests performed - Confirmed (int): the number of confirmed cases - Tests_diff (int): daily tests performed - Confirmed_diff (int): daily confirmed cases - Test_positive_rate (float): positive rate (%) of the daily cases over the total daily tests performed Note: If non monotonic records were found for either confirmed cases or tests, "with partially complemented tests data" will be added to the title of the figure. """ window = self._ensure_natural_int(window, name="window") # Subset with area country_alias = self.ensure_country_name(country) province = province or self.UNKNOWN try: subset_df = self._subset_select(country_alias, province) except PCRIncorrectPreconditionError: raise PCRIncorrectPreconditionError( country=country, province=province, message="Too many missing Tests records") from None # Limit tests records to last date if last_date is not None: subset_df = subset_df.loc[ subset_df[self.DATE] <= pd.to_datetime(last_date)] # Process PCR data df, is_complemented = self._pcr_processing(subset_df, window) # Calculate PCR values df[self.PCR_RATE] = df[[self.C_DIFF, self.T_DIFF ]].apply(lambda x: x[0] / x[1] * 100 if x[1] > self.min_pcr_tests else 0, axis=1) if not show_figure: return df # Create figure area = self.area_name(country, province=province) comp_status = "\nwith partially complemented tests data" if is_complemented else "" line_plot( df.set_index(self.DATE)[self.PCR_RATE], title=f"{area}: Test positive rate (%) over time {comp_status}", ylabel="Test positive rate (%)", y_integer=True, filename=filename, show_legend=False, ) return df