def test_filter_only_tsfresh_features_false(self): """ The boolean flag `filter_only_tsfresh_features` makes sure that only the time series based features are filtered. This unit tests checks that """ augmenter = RelevantFeatureAugmenter( kind_to_fc_parameters=self.kind_to_fc_parameters, filter_only_tsfresh_features=False, column_value="val", column_id="id", column_sort="sort", column_kind="kind") df, y = self.create_test_data_sample_with_target() X = pd.DataFrame(index=np.unique(df.id)) X["pre_drop"] = 0 X["pre_keep"] = y augmenter.set_timeseries_container(df) augmenter.fit(X, y) transformed_X = augmenter.transform(X.copy()) fit_transformed_X = augmenter.fit_transform(X, y) self.assertEqual( sum(["pre_keep" == column for column in transformed_X.columns]), 1) self.assertEqual( sum(["pre_drop" == column for column in transformed_X.columns]), 0) self.assertEqual( sum(["pre_keep" == column for column in fit_transformed_X.columns]), 1) self.assertEqual( sum(["pre_drop" == column for column in fit_transformed_X.columns]), 0)
def test_filter_only_tsfresh_features_true(self): """ The boolean flag `filter_only_tsfresh_features` makes sure that only the time series based features are filtered. This unit tests checks that """ augmenter = RelevantFeatureAugmenter( kind_to_fc_parameters=self.kind_to_fc_parameters, filter_only_tsfresh_features=True, column_value="val", column_id="id", column_sort="sort", column_kind="kind") y = pd.Series({10: 1, 500: 0}) X = pd.DataFrame(index=[10, 500]) X["pre_feature"] = 0 augmenter.set_timeseries_container(self.test_df) augmenter.fit(X, y) transformed_X = augmenter.transform(X.copy()) fit_transformed_X = augmenter.fit_transform(X, y) self.assertEqual( sum(["pre_feature" == column for column in transformed_X.columns]), 1) self.assertEqual( sum([ "pre_feature" == column for column in fit_transformed_X.columns ]), 1)
def test_multiclass_selection(self): augmenter = RelevantFeatureAugmenter( column_value="val", column_id="id", column_sort="sort", column_kind="kind", multiclass=True, n_significant=3, ) df, y = self.create_test_data_sample_with_multiclass_target() X = pd.DataFrame(index=np.unique(df.id)) augmenter.set_timeseries_container(df) fit_transformed_X = augmenter.fit_transform(X, y) self.assertEqual(len(fit_transformed_X.columns), 4)
def test_nothing_relevant(self): augmenter = RelevantFeatureAugmenter( kind_to_fc_parameters=self.kind_to_fc_parameters, column_value="val", column_id="id", column_sort="sort", column_kind="kind") y = pd.Series({10: 1, 500: 0}) X = pd.DataFrame(index=[10, 500]) augmenter.set_timeseries_container(self.test_df) augmenter.fit(X, y) transformed_X = augmenter.transform(X.copy()) fit_transformed_X = augmenter.fit_transform(X, y) self.assertEqual(list(transformed_X.columns), []) self.assertEqual(list(transformed_X.index), list(X.index)) self.assertEqual(list(fit_transformed_X.columns), []) self.assertEqual(list(fit_transformed_X.index), list(X.index))