Beispiel #1
0
    def gen_global_feature(self,
                           settings="comprehensive",
                           full_settings=None,
                           n_jobs=1):
        '''
        Generate per-time-series feature for each time series.
        This method will be implemented by tsfresh.
        Make sure that the specified column name does not contain '__'.

        TODO: relationship with scale should be figured out.

        :param settings: str or dict. If a string is set, then it must be one of "comprehensive"
               "minimal" and "efficient". If a dict is set, then it should follow the instruction
               for default_fc_parameters in tsfresh. The value is defaulted to "comprehensive".
        :param full_settings: dict. It should follow the instruction for kind_to_fc_parameters in
               tsfresh. The value is defaulted to None.
        :param n_jobs: int. The number of processes to use for parallelization.

        :return: the tsdataset instance.
        '''
        assert not self._has_generate_agg_feature, \
            "Only one of gen_global_feature and gen_rolling_feature should be called."
        if full_settings is not None:
            self.df,\
                addtional_feature =\
                generate_global_features(input_df=self.df,
                                         column_id=self.id_col,
                                         column_sort=self.dt_col,
                                         kind_to_fc_parameters=full_settings,
                                         n_jobs=n_jobs)
            self.feature_col += addtional_feature
            return self

        if isinstance(settings, str):
            assert settings in ["comprehensive", "minimal", "efficient"], \
                f"settings str should be one of \"comprehensive\", \"minimal\", \"efficient\"\
                    , but found {settings}."

            default_fc_parameters = DEFAULT_PARAMS[settings]
        else:
            default_fc_parameters = settings

        self.df,\
            addtional_feature =\
            generate_global_features(input_df=self.df,
                                     column_id=self.id_col,
                                     column_sort=self.dt_col,
                                     default_fc_parameters=default_fc_parameters,
                                     n_jobs=n_jobs)

        self.feature_col += addtional_feature
        self._has_generate_agg_feature = True
        return self
Beispiel #2
0
    def gen_global_feature(self, settings="comprehensive", full_settings=None):
        '''
        Generate per-time-series feature for each time series.
        This method will be implemented by tsfresh.

        :param settings: str or dict. If a string is set, then it must be one of "comprehensive"
               "minimal" and "efficient". If a dict is set then it should follow the instruction
               for default_fc_parameters in tsfresh. The value is defaulted to "comprehensive".
        :param full_settings: dict. It should follow the instruction for kind_to_fc_parameters in
               tsfresh. The value is defaulted to None.

        :return: the tsdataset instance.

        '''
        if full_settings is not None:
            self.df = generate_global_features(
                input_df=self.df,
                column_id=self.id_col,
                column_sort=self.dt_col,
                kind_to_fc_parameters=full_settings)
            return self

        from tsfresh.feature_extraction import ComprehensiveFCParameters,\
            MinimalFCParameters, EfficientFCParameters
        default_params = {
            "comprehensive": ComprehensiveFCParameters(),
            "minimal": MinimalFCParameters(),
            "efficient": EfficientFCParameters()
        }

        if isinstance(settings, str):
            assert settings in ["comprehensive", "minimal", "efficient"], \
                f"settings str should be one of \"comprehensive\", \"minimal\", \"efficient\"\
                    , but found {settings}."

            default_fc_parameters = default_params[settings]
        else:
            default_fc_parameters = settings

        self.df,\
            addtional_feature =\
            generate_global_features(input_df=self.df,
                                     column_id=self.id_col,
                                     column_sort=self.dt_col,
                                     default_fc_parameters=default_fc_parameters)

        self.feature_col += addtional_feature

        return self
    def test_gen_global_feature_multi_id(self):
        dates = pd.date_range('1/1/2019', periods=8)
        data = np.random.randn(8, 3)
        df = pd.DataFrame({
            "datetime": dates,
            "values": data[:, 0],
            "A": data[:, 1],
            "B": data[:, 2],
            "id": ["00"] * 4 + ["01"] * 4
        })
        from tsfresh.feature_extraction import MinimalFCParameters
        for params in [MinimalFCParameters()]:
            output_df, _ = generate_global_features(
                input_df=df,
                column_id="id",
                column_sort="datetime",
                default_fc_parameters=params)

            assert "datetime" in output_df.columns
            assert "values" in output_df.columns
            assert "A" in output_df.columns
            assert "B" in output_df.columns
            assert "id" in output_df.columns

            for col in output_df.columns:
                if col in ["datetime", "values", "A", "B", "id"]:
                    continue
                assert len(set(output_df[output_df["id"] == "00"][col])) == 1
                assert len(set(output_df[output_df["id"] == "01"][col])) == 1
                assert output_df[output_df["id"] ==
                                 "00"][col].isna().sum() == 0
                assert output_df[output_df["id"] ==
                                 "01"][col].isna().sum() == 0