예제 #1
0
    def simulate(self,
                 name="Main",
                 y0_dict=None,
                 show_figure=True,
                 filename=None):
        """
        Simulate ODE models with setted parameter values and show it as a figure.

        Args:
            name (str): phase series name. If 'Main', main PhaseSeries will be used
            y0_dict (dict):
                - key (str): variable name
                - value (float): initial value
                - dictionary of initial values or None
                - if model will be changed in the later phase, must be specified
            show_figure (bool):
                - if True, show the result as a figure.
            filename (str): filename of the figure, or None (show figure)

        Returns:
            (pandas.DataFrame)
                Index:
                    reset index
                Columns:
                    - Date (str): date, like 31Dec2020
                    - Country (str): country/region name
                    - Province (str): province/prefecture/state name
                    - variables of the models (int): Confirmed (int) etc.
        """
        name = self.MAIN if name == "Main" else name
        df = self.series_dict[name].summary()
        # Future phases must be added in advance
        if self.FUTURE not in df[self.TENSE].unique():
            raise KeyError(
                f"Future phases of {name} scenario must be registered by Scenario.add_phase() in advance."
            )
        # Simulation
        dim_df, start_objects = self._simulate(name=name, y0_dict=y0_dict)
        dim_df = dim_df.set_index(self.DATE).resample("D").mean()
        dim_df = dim_df.astype(np.int64)
        fig_df = dim_df.copy()
        dim_df[self.DATE] = dim_df.index.strftime(self.DATE_FORMAT)
        dim_df = dim_df.reset_index(drop=True)
        dim_df = dim_df.loc[:, [self.DATE, *dim_df.columns.tolist()[:-1]]]
        # Return dataframe if figure is not needed
        if not show_figure:
            return dim_df
        # Show figure
        fig_cols_set = set(fig_df.columns) & set(self.FIG_COLUMNS)
        fig_cols = [col for col in self.FIG_COLUMNS if col in fig_cols_set]
        line_plot(fig_df[fig_cols],
                  title=f"{self.area}: Predicted number of cases",
                  filename=filename,
                  y_integer=True,
                  v=start_objects[1:])
        return dim_df
예제 #2
0
 def records(self, show_figure=True, filename=None):
     """
     Return the records as a dataframe.
     @show_figure <bool>:
         - if True, show the records as a line-plot.
     @filename <str>: filename of the figure, or None (show figure)
     """
     df = self.jhu_data.subset(self.country, province=self.province)
     if not show_figure:
         return df
     line_plot(df.set_index(self.DATE).drop(self.C, axis=1),
               f"{self.area}: Cases over time",
               y_integer=True,
               filename=filename)
     return df
예제 #3
0
 def param_history(self,
                   targets=None,
                   name="Main",
                   divide_by_first=True,
                   show_figure=True,
                   filename=None,
                   box_plot=True,
                   **kwargs):
     """
     Return subset of summary.
     @targets <list[str]/str>: parameters to show (Rt etc.)
     @name <str>: phase series name
         - if 'Main', main PhaseSeries will be used
     @divide_by_first <bool>: if True, divide the values by 1st phase's values
     @box_plot <bool>: if True, box plot. if False, line plot.
     @show_figure <bool>:
         - if True, show the result as a figure.
     @filename <str>: filename of the figure, or None (show figure)
     @kwargs: keword arguments of pd.DataFrame.plot or line_plot()
     @return <pd.DataFrame>
     """
     if filename is not None:
         plt.switch_backend("Agg")
     name = self.MAIN if name == "Main" else name
     if name not in self.series_dict.keys():
         raise KeyError(f"@name {name} scenario has not been registered.")
     df = self.series_dict[name].summary()
     model_param_nest = [m.PARAMETERS for m in self.model_dict.values()]
     model_day_nest = [m.DAY_PARAMETERS for m in self.model_dict.values()]
     model_parameters = self.flatten(model_param_nest)
     model_day_params = self.flatten(model_day_nest)
     selectable_cols = [
         self.N, *model_parameters, self.RT, *model_day_params
     ]
     targets = [targets] if isinstance(targets, str) else targets
     targets = selectable_cols if targets is None else targets
     if not set(targets).issubset(set(selectable_cols)):
         raise KeyError(
             f"@targets must be a subset of {', '.join(selectable_cols)}.")
     df = df.loc[:, targets]
     if divide_by_first:
         df = df / df.iloc[0, :]
         title = f"{self.area}: Ratio to 1st phase parameters ({name} scenario)"
     else:
         title = f"{self.area}: History of parameter values ({name} scenario)"
     if box_plot:
         df.plot.bar(title=title)
         plt.xticks(rotation=0)
         if divide_by_first or self.RT in targets:
             plt.axhline(y=1.0, color="black", linestyle=":")
         plt.legend(bbox_to_anchor=(1.02, 0),
                    loc="lower left",
                    borderaxespad=0)
         plt.tight_layout()
         if filename is None:
             plt.show()
             return df
         plt.savefig(filename,
                     bbox_inches="tight",
                     transparent=False,
                     dpi=300)
         plt.clf()
         return df
     _df = df.reset_index(drop=True)
     _df.index = _df.index + 1
     h = 1.0 if divide_by_first else None
     line_plot(_df,
               title=title,
               xlabel="Phase",
               ylabel=str(),
               math_scale=False,
               h=h,
               show_figure=show_figure,
               filename=filename)
예제 #4
0
 def simulate(self,
              name="Main",
              y0_dict=None,
              show_figure=True,
              filename=None):
     """
     Simulate ODE models with setted parameter values.
     @name <str>: phase series name
         - if 'Main', main PhaseSeries will be used
     @y0_dict <doct[str]=float>:
         - dictionary of initial values or None
         - if model will be changed in the later phase, must be specified
     @show_figure <bool>:
         - if True, show the result as a figure.
     @filename <str>: filename of the figure, or None (show figure)
     @return <pd.DataFrame>
         - index <int>: reseted index
         - Date <str>: date, like 31Dec2020
         - Country <str>: country/region name
         - Province <str>: province/prefecture/state name
         - variables of the models <int>: Confirmed <int> etc.
     """
     # TODO: Refactoring, split this method
     name = self.MAIN if name == "Main" else name
     df = self.series_dict[name].summary()
     # Future must be added in advance
     if self.FUTURE not in df[self.TENSE].unique():
         raise KeyError(
             f"Future phases of {name} scenario must be registered by Scenario.add_phase() in advance."
         )
     simulator = ODESimulator(
         self.country,
         province="-" if self.province is None else self.province)
     start_dates = list()
     for phase in df.index:
         model_name = df.loc[phase, self.ODE]
         model = self.model_dict[model_name]
         start_obj = self.date_obj(df.loc[phase, self.START])
         start_dates.append(start_obj)
         end_obj = self.date_obj(df.loc[phase, self.END])
         phase_seconds = (end_obj - start_obj).total_seconds() + 1
         step_n = round(phase_seconds / (60 * self.tau))
         population = df.loc[phase, self.N]
         param_dict = df[model.PARAMETERS].to_dict(orient="index")[phase]
         if phase == self.num2str(1):
             # Calculate intial values
             nondim_data = NondimData(self.clean_df,
                                      country=self.country,
                                      province=self.province)
             nondim_df = nondim_data.make(model, population)
             init_index = [
                 date_obj
                 for (date_obj,
                      _) in self.series_dict[name].phase_dict.items()
                 if date_obj == start_obj
             ][0]
             y0_dict_phase = {
                 v: nondim_df.loc[init_index, v]
                 for v in model.VARIABLES
             }
         else:
             try:
                 y0_dict_phase = y0_dict.copy()
             except AttributeError:
                 y0_dict_phase = None
         simulator.add(model,
                       step_n,
                       population,
                       param_dict=param_dict,
                       y0_dict=y0_dict_phase)
     simulator.run()
     dim_df = simulator.dim(self.tau, df.loc[self.num2str(1), self.START])
     dim_df = dim_df.set_index(self.DATE).resample("D").mean()
     # TODO: smoothing the values
     dim_df = dim_df.astype(np.int64)
     fig_df = dim_df.copy()
     dim_df[self.DATE] = dim_df.index.strftime(self.DATE_FORMAT)
     dim_df = dim_df.reset_index(drop=True)
     dim_df = dim_df.loc[:, [self.DATE, *dim_df.columns.tolist()[:-1]]]
     if not show_figure:
         return dim_df
     # Show figure
     fig_cols_set = set(fig_df.columns) & set(self.FIG_COLUMNS)
     fig_cols = [col for col in self.FIG_COLUMNS if col in fig_cols_set]
     # TODO: add vertical lines to line-plot with tau and step_n
     line_plot(fig_df[fig_cols],
               title=f"{self.area}: Predicted number of cases",
               filename=filename,
               y_integer=True,
               v=start_dates[1:])
     return dim_df
예제 #5
0
    def param_history(self,
                      targets=None,
                      name="Main",
                      divide_by_first=True,
                      show_figure=True,
                      filename=None,
                      show_box_plot=True,
                      **kwargs):
        """
        Return subset of summary.

        Args:
            targets (list[str]/str): parameters to show (Rt etc.)
            name (str): phase series name
            divide_by_first (bool): if True, divide the values by 1st phase's values
            show_box_plot (bool): if True, box plot. if False, line plot.
            show_figure (bool): If True, show the result as a figure.
            filename (str): filename of the figure, or None (show figure)
            kwargs: keword arguments of pd.DataFrame.plot or line_plot()

        Returns:
            (pandas.DataFrame)

        Notes:
            If 'Main' was used as @name, main PhaseSeries will be used.
        """
        if "box_plot" in kwargs.keys():
            raise KeyError("Please use 'show_box_plot', not 'box_plot'")
        name = self.MAIN if name == "Main" else name
        if name not in self.series_dict.keys():
            raise KeyError(f"@name {name} scenario has not been registered.")
        df = self.series_dict[name].summary()
        model_param_nest = [m.PARAMETERS for m in self.model_dict.values()]
        model_day_nest = [m.DAY_PARAMETERS for m in self.model_dict.values()]
        model_parameters = self.flatten(model_param_nest)
        model_day_params = self.flatten(model_day_nest)
        selectable_cols = [
            self.N, *model_parameters, self.RT, *model_day_params
        ]
        targets = [targets] if isinstance(targets, str) else targets
        targets = selectable_cols if targets is None else targets
        if not set(targets).issubset(set(selectable_cols)):
            raise KeyError(
                f"@targets must be a subset of {', '.join(selectable_cols)}.")
        df = df.loc[:, targets]
        if divide_by_first:
            df = df / df.iloc[0, :]
            title = f"{self.area}: Ratio to 1st phase parameters ({name} scenario)"
        else:
            title = f"{self.area}: History of parameter values ({name} scenario)"
        if show_box_plot:
            h_values = [1.0] if divide_by_first or self.RT in targets else None
            box_plot(df, title, h=h_values, filename=filename)
            return df
        _df = df.reset_index(drop=True)
        _df.index = _df.index + 1
        h = 1.0 if divide_by_first else None
        line_plot(_df,
                  title=title,
                  xlabel="Phase",
                  ylabel=str(),
                  math_scale=False,
                  h=h,
                  show_figure=show_figure,
                  filename=filename)
        return df