Exemple #1
0
    def test_from_columns(self):
        tsn = "TEST_TIME_SERIES"

        fset = FeatureExtractionSettings()
        self.assertRaises(TypeError, fset.from_columns, 42)
        self.assertRaises(TypeError, fset.from_columns, 42)
        self.assertRaises(ValueError, fset.from_columns, ["This is not a column name"])
        self.assertRaises(ValueError, fset.from_columns, ["This__neither"])
        self.assertRaises(ValueError, fset.from_columns, ["This__also__not"])

        # Aggregate functions
        feature_names = [tsn + '__sum_values', tsn + "__median", tsn + "__length", tsn + "__sample_entropy"]


        # Aggregate functions with params
        feature_names += [tsn + '__quantile__q_10', tsn + '__quantile__q_70', tsn + '__number_peaks__n_30',
                          tsn + '__value_count__value_inf', tsn + '__value_count__value_-inf',
                          tsn + '__value_count__value_nan']

        # Apply functions
        feature_names += [tsn + '__ar_coefficient__k_20__coeff_4', tsn + '__ar_coefficient__coeff_10__k_-1']

        cset = fset.from_columns(feature_names)

        six.assertCountEqual(self, list(cset.kind_to_calculation_settings_mapping[tsn].keys()), 
          ["sum_values", "median", "length", "sample_entropy", "quantile", "number_peaks", "ar_coefficient",
                                  "value_count"])
        
        self.assertEqual(cset.kind_to_calculation_settings_mapping[tsn]["sum_values"], None)
        self.assertEqual(cset.kind_to_calculation_settings_mapping[tsn]["ar_coefficient"],
                         [{"k": 20, "coeff": 4}, {"k": -1, "coeff": 10}])

        self.assertEqual(cset.kind_to_calculation_settings_mapping[tsn]["value_count"],
                         [{"value": np.PINF}, {"value": np.NINF}, {"value": np.NaN}])
Exemple #2
0
    def transform(self, X):
        """
        After the fit step, it is known which features are relevant. Only extract those from the time series handed in
        with the function :func:`~set_timeseries_container`.

        If evaluate_only_added_features is False, also delete the irrelevant, already present features in the data frame.

        :param X: the data sample to add the relevant (and delete the irrelevant) features to.
        :type X: pandas.DataFrame or numpy.array

        :return: a data sample with the same information as X, but with added relevant time series features and
            deleted irrelevant information (only if evaluate_only_added_features is False).
        :rtype: pandas.DataFrame
        """
        if self.feature_selector.relevant_features is None:
            raise RuntimeError("You have to call fit before.")

        if self.timeseries_container is None:
            raise RuntimeError(
                "You have to provide a time series using the set_timeseries_container function before."
            )

        self.feature_extractor.set_timeseries_container(
            self.timeseries_container)

        relevant_time_series_features = set(
            self.feature_selector.relevant_features) - set(
                pd.DataFrame(X).columns)

        relevant_extraction_settings = FeatureExtractionSettings.from_columns(
            relevant_time_series_features)
        relevant_extraction_settings.set_default = False

        # Set imputing strategy
        if self.feature_extractor.settings.IMPUTE is impute_dataframe_range:
            relevant_extraction_settings.IMPUTE = partial(
                impute_dataframe_range,
                col_to_max=self.col_to_max,
                col_to_min=self.col_to_min,
                col_to_median=self.col_to_median)
        else:
            relevant_extraction_settings.IMPUTE = self.feature_extractor.settings.IMPUTE

        relevant_feature_extractor = FeatureAugmenter(
            settings=relevant_extraction_settings,
            column_id=self.feature_extractor.column_id,
            column_sort=self.feature_extractor.column_sort,
            column_kind=self.feature_extractor.column_kind,
            column_value=self.feature_extractor.column_value)

        relevant_feature_extractor.set_timeseries_container(
            self.feature_extractor.timeseries_container)

        X_augmented = relevant_feature_extractor.transform(X)

        return X_augmented.copy().loc[:,
                                      self.feature_selector.relevant_features]
Exemple #3
0
    def transform(self, X):
        """
        After the fit step, it is known which features are relevant. Only extract those from the time series handed in
        with the function :func:`~set_timeseries_container`.

        If evaluate_only_added_features is False, also delete the irrelevant, already present features in the data frame.

        :param X: the data sample to add the relevant (and delete the irrelevant) features to.
        :type X: pandas.DataFrame or numpy.array

        :return: a data sample with the same information as X, but with added relevant time series features and
            deleted irrelevant information (only if evaluate_only_added_features is False).
        :rtype: pandas.DataFrame
        """
        if self.feature_selector.relevant_features is None:
            raise RuntimeError("You have to call fit before.")

        if self.timeseries_container is None:
            raise RuntimeError(
                "You have to provide a time series using the set_timeseries_container function before."
            )

        self.feature_extractor.set_timeseries_container(
            self.timeseries_container)

        # We can only extract features that originate from time series
        relevant_extraction_settings = FeatureExtractionSettings.from_columns(
            list(
                set(self.feature_selector.relevant_features) -
                set(pd.DataFrame(X).columns)))

        relevant_extraction_settings.set_default = False
        relevant_extraction_settings.IMPUTE = self.feature_extractor.settings.IMPUTE

        feature_augmenter_restricted = FeatureAugmenter(
            settings=relevant_extraction_settings,
            column_id=self.feature_extractor.column_id,
            column_sort=self.feature_extractor.column_sort,
            column_kind=self.feature_extractor.column_kind,
            column_value=self.feature_extractor.column_value)

        feature_augmenter_restricted.set_timeseries_container(
            self.feature_extractor.timeseries_container)

        if self.evaluate_only_added_features:
            X_tsfresh = feature_augmenter_restricted.transform(
                X).loc[:, self.feature_selector.relevant_features]
            return pd.concat([X_tsfresh, X], axis=1)
        else:
            X_tsfresh = feature_augmenter_restricted.transform(X)
            return X_tsfresh.loc[:, self.feature_selector.relevant_features]