예제 #1
0
    def history(self,
                param,
                roll_window=None,
                show_figure=True,
                filename=None,
                **kwargs):
        """
        Return subset of summary and show a figure to show the history of all countries.

        Args:
            param (str): parameter/day parameter/Rt/OxCGRT score to show
            roll_window (int or None): rolling average window if necessary
            show_figure (bool): If True, show the result as a figure
            filename (str): filename of the figure, or None (show figure)
            kwargs: keword arguments of line_plot()

        Returns:
            pandas.DataFrame:
                Index
                    Date (pd.Timestamp) date
                Columns
                    (str) country names
                Values:
                    parameter values
        """
        # Get the parameter value of each date
        df = self.track()
        # Select the param
        if param not in df.columns:
            sel_param_str = ', '.join(df.columns.tolist())
            raise KeyError(
                f"@param must be selected from {sel_param_str}, but {param} was applied."
            )
        df = df.pivot_table(values=param,
                            index=self.DATE,
                            columns=self.COUNTRY,
                            aggfunc="last")
        # Rolling mean
        if roll_window is not None:
            roll_window = self._ensure_natural_int(roll_window,
                                                   name="roll_window")
            df = df.rolling(window=roll_window).mean()
        # Show figure
        if not show_figure:
            return df
        line_plot(df,
                  title=f"History of {param} in each country",
                  ylabel=param,
                  h=1 if param == self.RT else None,
                  filename=filename,
                  **kwargs)
        return df
예제 #2
0
    def positive_rate(self,
                      country,
                      province=None,
                      window=7,
                      last_date=None,
                      show_figure=True,
                      filename=None):
        """
        Return the PCR rate of a country as a dataframe.

        Args:
            country(str): country name or ISO3 code
            province(str or None): province name
            window (int): window of moving average, >= 1
            last_date (str or None): the last date of the total tests records or None (max date of main dataset)
            show_figure (bool): if True, show the records as a line-plot.
            filename (str): filename of the figure, or None (display figure)

        Raises:
            covsirphy.PCRIncorrectPreconditionError: the dataset has too many missing values

        Returns:
            pandas.DataFrame
                Index
                    reset index
                Columns
                    - Date (pandas.TimeStamp): Observation date
                    - Tests (int): the number of total tests performed
                    - Confirmed (int): the number of confirmed cases
                    - Tests_diff (int): daily tests performed
                    - Confirmed_diff (int): daily confirmed cases
                    - Test_positive_rate (float): positive rate (%) of the daily cases over the total daily tests performed

        Note:
            If non monotonic records were found for either confirmed cases or tests,
            "with partially complemented tests data" will be added to the title of the figure.
        """
        window = self._ensure_natural_int(window, name="window")
        # Subset with area
        country_alias = self.ensure_country_name(country)
        province = province or self.UNKNOWN
        try:
            subset_df = self._subset_select(country_alias, province)
        except PCRIncorrectPreconditionError:
            raise PCRIncorrectPreconditionError(
                country=country,
                province=province,
                message="Too many missing Tests records") from None
        # Limit tests records to last date
        if last_date is not None:
            subset_df = subset_df.loc[
                subset_df[self.DATE] <= pd.to_datetime(last_date)]
        # Process PCR data
        df, is_complemented = self._pcr_processing(subset_df, window)
        # Calculate PCR values
        df[self.PCR_RATE] = df[[self.C_DIFF, self.T_DIFF
                                ]].apply(lambda x: x[0] / x[1] * 100
                                         if x[1] > self.min_pcr_tests else 0,
                                         axis=1)
        if not show_figure:
            return df
        # Create figure
        area = self.area_name(country, province=province)
        comp_status = "\nwith partially complemented tests data" if is_complemented else ""
        line_plot(
            df.set_index(self.DATE)[self.PCR_RATE],
            title=f"{area}: Test positive rate (%) over time {comp_status}",
            ylabel="Test positive rate (%)",
            y_integer=True,
            filename=filename,
            show_legend=False,
        )
        return df