Beispiel #1
0
    def gen_rolling_feature(self,
                            window_size,
                            settings="comprehensive",
                            full_settings=None,
                            n_jobs=1):
        '''
        Generate aggregation feature for each sample.
        This method will be implemented by tsfresh.

        TODO: relationship with scale should be figured out.

        :param window_size: int, generate feature according to the rolling result.
        :param settings: str or dict. If a string is set, then it must be one of "comprehensive"
               "minimal" and "efficient". If a dict is set, then it should follow the instruction
               for default_fc_parameters in tsfresh. The value is defaulted to "comprehensive".
        :param full_settings: dict. It should follow the instruction for kind_to_fc_parameters in
               tsfresh. The value is defaulted to None.
        :param n_jobs: int. The number of processes to use for parallelization.

        :return: the tsdataset instance.
        '''
        assert not self._has_generate_agg_feature,\
            "Only one of gen_global_feature and gen_rolling_feature should be called."
        if isinstance(settings, str):
            assert settings in ["comprehensive", "minimal", "efficient"], \
                f"settings str should be one of \"comprehensive\", \"minimal\", \"efficient\"\
                    , but found {settings}."

            default_fc_parameters = DEFAULT_PARAMS[settings]
        else:
            default_fc_parameters = settings

        df_rolled = roll_time_series(self.df,
                                     column_id=self.id_col,
                                     column_sort=self.dt_col,
                                     max_timeshift=window_size - 1,
                                     min_timeshift=window_size - 1,
                                     n_jobs=n_jobs)
        if not full_settings:
            self.roll_feature_df = extract_features(
                df_rolled,
                column_id=self.id_col,
                column_sort=self.dt_col,
                default_fc_parameters=default_fc_parameters,
                n_jobs=n_jobs)
        else:
            self.roll_feature_df = extract_features(
                df_rolled,
                column_id=self.id_col,
                column_sort=self.dt_col,
                kind_to_fc_parameters=full_settings,
                n_jobs=n_jobs)
        impute_tsfresh(self.roll_feature_df)

        self.feature_col += list(self.roll_feature_df.columns)
        self.roll_additional_feature = list(self.roll_feature_df.columns)
        self._has_generate_agg_feature = True
        return self
Beispiel #2
0
    def gen_rolling_feature(self,
                            window_size,
                            settings="comprehensive",
                            full_settings=None,
                            n_jobs=1):
        '''
        Generate aggregation feature for each sample.
        This method will be implemented by tsfresh.
        Make sure that the specified column name does not contain '__'.

        TODO: relationship with scale should be figured out.

        :param window_size: int, generate feature according to the rolling result.
        :param settings: str or dict. If a string is set, then it must be one of "comprehensive"
               "minimal" and "efficient". If a dict is set, then it should follow the instruction
               for default_fc_parameters in tsfresh. The value is defaulted to "comprehensive".
        :param full_settings: dict. It should follow the instruction for kind_to_fc_parameters in
               tsfresh. The value is defaulted to None.
        :param n_jobs: int. The number of processes to use for parallelization.

        :return: the tsdataset instance.
        '''
        from tsfresh.utilities.dataframe_functions import roll_time_series
        from tsfresh.utilities.dataframe_functions import impute as impute_tsfresh
        from tsfresh import extract_features
        from tsfresh.feature_extraction import ComprehensiveFCParameters, \
            MinimalFCParameters, EfficientFCParameters

        DEFAULT_PARAMS = {
            "comprehensive": ComprehensiveFCParameters(),
            "minimal": MinimalFCParameters(),
            "efficient": EfficientFCParameters()
        }

        assert not self._has_generate_agg_feature,\
            "Only one of gen_global_feature and gen_rolling_feature should be called."
        if isinstance(settings, str):
            assert settings in ['comprehensive', 'minimal', 'efficient'], \
                "settings str should be one of 'comprehensive', 'minimal', 'efficient'"\
                f", but found {settings}."
            default_fc_parameters = DEFAULT_PARAMS[settings]
        else:
            default_fc_parameters = settings

        assert window_size < self.df.groupby(self.id_col).size().min() + 1, "gen_rolling_feature "\
            "should have a window_size smaller than shortest time series length."
        df_rolled = roll_time_series(self.df,
                                     column_id=self.id_col,
                                     column_sort=self.dt_col,
                                     max_timeshift=window_size - 1,
                                     min_timeshift=window_size - 1,
                                     n_jobs=n_jobs)
        if not full_settings:
            self.roll_feature_df = extract_features(
                df_rolled,
                column_id=self.id_col,
                column_sort=self.dt_col,
                default_fc_parameters=default_fc_parameters,
                n_jobs=n_jobs)
        else:
            self.roll_feature_df = extract_features(
                df_rolled,
                column_id=self.id_col,
                column_sort=self.dt_col,
                kind_to_fc_parameters=full_settings,
                n_jobs=n_jobs)
        impute_tsfresh(self.roll_feature_df)

        self.feature_col += list(self.roll_feature_df.columns)
        self.roll_additional_feature = list(self.roll_feature_df.columns)
        self._has_generate_agg_feature = True
        return self