コード例 #1
0
    def _curve_fitting(self, phase, start_date, end_date):
        """
        Perform curve fitting for the phase.

        Args:
            phase (str): phase name
            start_date (str): start date of the phase
            end_date (str): end date of the phase

        Returns:
            tuple
                (pandas.DataFrame): Result of curve fitting
                    Index: reset index
                    Columns:
                        - (phase name)_predicted: predicted value of Susceptible
                        - (phase_name)_actual: actual value of Susceptible
                        - (phase_name)_Recovered: Recovered
                (int): minimum value of R, which is the change point of the curve
        """
        sr_df = self.sr_df.copy()
        sta = self.date_obj(start_date)
        end = self.date_obj(end_date)
        sr_df = sr_df.loc[(sr_df.index >= sta) & (sr_df.index <= end), :]
        trend = Trend(sr_df)
        df = trend.run()
        if trend.rmsle() > self.max_rmsle:
            df[f"{self.S}{self.P}"] = None
        # Get min value for vline
        r_value = int(df[self.R].min())
        # Rename the columns
        phase = self.INITIAL if phase == "0th" else phase
        df = df.rename({f"{self.S}{self.P}": f"{phase}{self.P}"}, axis=1)
        df = df.rename({f"{self.S}{self.A}": f"{phase}{self.A}"}, axis=1)
        df = df.rename({f"{self.R}": f"{phase}_{self.R}"}, axis=1)
        return (df, r_value)
コード例 #2
0
 def error_f(self, start_dates, end_dates):
     """
     Definition of error score to minimize in the study.
     This is weighted average of RMSLE scores.
     @start_dates <list[str]>: list of start date of phases (candidates)
     @end_dates <list[str]>: list of end date of phases (candidates)
     @return <float> : score of the error function to minimize
     """
     scores = list()
     for (start_date, end_date) in zip(start_dates, end_dates):
         population = self.pop_dict[start_date]
         trend = Trend(self.clean_df,
                       population,
                       self.country,
                       province=self.province,
                       start_date=start_date,
                       end_date=end_date)
         trend.analyse()
         scores.append(trend.rmsle())
     return np.average(scores, weights=range(1, len(scores) + 1))
コード例 #3
0
    def _curve_fitting(self, phase, info):
        """
        Perform curve fitting for the phase.

        Args:
            phase (str): phase name
            info (dict[str]): start date, end date and population

        Returns:
            (tuple)
                (pandas.DataFrame): Result of curve fitting
                    Index: reset index
                    Columns:
                        - (phase name)_predicted: predicted value of Susceptible
                        - (phase_name)_actual: actual value of Susceptible
                        - (phase_name)_Recovered: Recovered
                (int): minimum value of R, which is the change point of the curve
        """
        start_date = info[self.START]
        end_date = info[self.END]
        population = info[self.N]
        trend = Trend(
            self.jhu_data, population, self.country, province=self.province,
            start_date=start_date, end_date=end_date
        )
        trend.analyse()
        df = trend.result()
        if trend.rmsle() > self.max_rmsle:
            df[f"{self.S}{self.P}"] = None
        # Get min value for vline
        r_value = int(df[self.R].min())
        # Rename the columns
        phase = self.INITIAL if phase == "0th" else phase
        df = df.rename({f"{self.S}{self.P}": f"{phase}{self.P}"}, axis=1)
        df = df.rename({f"{self.S}{self.A}": f"{phase}{self.A}"}, axis=1)
        df = df.rename({f"{self.R}": f"{phase}_{self.R}"}, axis=1)
        return (df, r_value)