def test_variance_drop_all(self): data, label = get_data_label(load_boston()) method = SelectionMethod.Variance(threshold=100000) selector = Selective(method) try: selector.fit(data) selector.transform(data) except ValueError: pass
def test_correlation(self): data, label = get_data_label(load_boston()) method = SelectionMethod.Correlation(0.60) selector = Selective(method) selector.fit(data) subset = selector.transform(data) self.assertListEqual(list(subset.columns), ['CRIM', 'ZN', 'INDUS', 'CHAS', 'RM', 'PTRATIO', 'B'])
def test_correlation_small(self): data, label = get_data_label(load_boston()) data = data.drop(columns=["CHAS", "NOX", "RM", "DIS", "RAD", "TAX", "PTRATIO", "INDUS"]) method = SelectionMethod.Correlation(0.60) selector = Selective(method) selector.fit(data) subset = selector.transform(data) self.assertListEqual(list(subset.columns), ['CRIM', 'ZN', 'AGE', 'B'])
def test_tree_invalid_num_features(self): data, label = get_data_label(load_boston()) data = data.drop(columns=[ "CHAS", "NOX", "RM", "DIS", "RAD", "TAX", "PTRATIO", "INDUS" ]) method = SelectionMethod.TreeBased(num_features=100) selector = Selective(method) with self.assertRaises(ValueError): selector.fit(data, label)
def test_variance_zero_threshold(self): data, label = get_data_label(load_boston()) method = SelectionMethod.Variance(threshold=0) selector = Selective(method) selector.fit(data) subset = selector.transform(data) # Reduced columns self.assertEqual(data.shape[1], 13) self.assertEqual(subset.shape[1], 13)
def test_anova_classif_top_percentile(self): data, label = get_data_label(load_iris()) method = SelectionMethod.Statistical(num_features=0.5) selector = Selective(method) selector.fit(data, label) subset = selector.transform(data) # Reduced columns self.assertEqual(subset.shape[1], 2) self.assertListEqual(list(subset.columns), ['petal length (cm)', 'petal width (cm)'])
def test_chi_regress_top_percentile_invalid(self): data, label = get_data_label(load_boston()) data = data.drop(columns=[ "CHAS", "NOX", "RM", "DIS", "RAD", "TAX", "PTRATIO", "INDUS" ]) method = SelectionMethod.Statistical(num_features=0.6, method="chi_square") selector = Selective(method) with self.assertRaises(TypeError): selector.fit(data, label)
def test_tree_estimator_lightgbm_classif_top_k(self): data, label = get_data_label(load_iris()) method = SelectionMethod.TreeBased( num_features=2, estimator=XGBClassifier(random_state=Constants.default_seed)) selector = Selective(method) selector.fit(data, label) subset = selector.transform(data) # Reduced columns self.assertEqual(subset.shape[1], 2)
def test_variance_drop_target(self): data, label = get_data_label(load_boston()) method = SelectionMethod.Variance(threshold=85) selector = Selective(method) selector.fit(data) subset = selector.transform(data) # Reduced columns self.assertEqual(data.shape[1], 13) self.assertEqual(subset.shape[1], 4) self.assertListEqual(list(subset.columns), ['ZN', 'AGE', 'TAX', 'B'])
def test_tree_classif_top_k(self): data, label = get_data_label(load_iris()) method = SelectionMethod.TreeBased(num_features=2) selector = Selective(method) selector.fit(data, label) subset = selector.transform(data) # Reduced columns self.assertEqual(subset.shape[1], 2) self.assertListEqual(list(subset.columns), ['petal length (cm)', 'petal width (cm)'])
def test_anova_regress_top_percentile_all(self): data, label = get_data_label(load_boston()) data = data.drop(columns=["CHAS", "NOX", "RM", "DIS", "RAD", "TAX", "PTRATIO", "INDUS"]) method = SelectionMethod.Statistical(num_features=1.0) selector = Selective(method) selector.fit(data, label) subset = selector.transform(data) # Reduced columns self.assertEqual(data.shape[1], subset.shape[1]) self.assertListEqual(list(data.columns), list(subset.columns))
def test_vif_top_k_no_label(self): data, label = get_data_label(load_iris()) method = SelectionMethod.Statistical(num_features=2, method="variance_inflation") selector = Selective(method) selector.fit(data) subset = selector.transform(data) # Reduced columns self.assertEqual(subset.shape[1], 2) self.assertListEqual(list(subset.columns), ['sepal width (cm)', 'petal width (cm)'])
def test_linear_regress_top_k_all(self): data, label = get_data_label(load_boston()) data = data.drop(columns=[ "CHAS", "NOX", "RM", "DIS", "RAD", "TAX", "PTRATIO", "INDUS" ]) method = SelectionMethod.Linear(num_features=5) selector = Selective(method) selector.fit(data, label) subset = selector.transform(data) # Reduced columns self.assertListEqual(list(data.columns), list(subset.columns))
def test_tree_regress_top_percentile(self): data, label = get_data_label(load_boston()) data = data.drop(columns=[ "CHAS", "NOX", "RM", "DIS", "RAD", "TAX", "PTRATIO", "INDUS" ]) method = SelectionMethod.TreeBased(num_features=0.6) selector = Selective(method) selector.fit(data, label) subset = selector.transform(data) # Reduced columns self.assertEqual(subset.shape[1], 3) self.assertListEqual(list(subset.columns), ['CRIM', 'AGE', 'LSTAT'])
def test_tree_estimator_adaboost_classif_top_k(self): data, label = get_data_label(load_iris()) method = SelectionMethod.TreeBased( num_features=2, estimator=AdaBoostClassifier(random_state=Constants.default_seed)) selector = Selective(method) selector.fit(data, label) subset = selector.transform(data) # Reduced columns self.assertEqual(subset.shape[1], 2) self.assertListEqual(list(subset.columns), ['petal length (cm)', 'petal width (cm)'])
def test_linear_classif_top_percentile_all(self): data, label = get_data_label(load_iris()) method = SelectionMethod.Linear(num_features=1.0) selector = Selective(method) selector.fit(data, label) subset = selector.transform(data) # Reduced columns self.assertEqual(subset.shape[1], 4) self.assertListEqual(list(subset.columns), [ 'sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)' ])
def test_ridge_regress_top_k(self): data, label = get_data_label(load_boston()) data = data.drop(columns=[ "CHAS", "NOX", "RM", "DIS", "RAD", "TAX", "PTRATIO", "INDUS" ]) method = SelectionMethod.Linear(num_features=3, regularization="ridge") selector = Selective(method) selector.fit(data, label) subset = selector.transform(data) # Reduced columns self.assertEqual(subset.shape[1], 3) self.assertListEqual(list(subset.columns), ['CRIM', 'AGE', 'LSTAT'])
def test_tree_estimator_xgboost_regress_top_k(self): data, label = get_data_label(load_boston()) data = data.drop(columns=[ "CHAS", "NOX", "RM", "DIS", "RAD", "TAX", "PTRATIO", "INDUS" ]) method = SelectionMethod.TreeBased( num_features=3, estimator=XGBRegressor(random_state=Constants.default_seed)) selector = Selective(method) selector.fit(data, label) subset = selector.transform(data) # Reduced columns self.assertEqual(subset.shape[1], 3)
def test_mutual_classif_top_k_all(self): data, label = get_data_label(load_iris()) method = SelectionMethod.Statistical(num_features=4, method="mutual_info") selector = Selective(method) selector.fit(data, label) subset = selector.transform(data) # Reduced columns self.assertEqual(subset.shape[1], 4) self.assertListEqual(list(subset.columns), [ 'sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)' ])
def test_variance_lt1(self): data, label = get_data_label(load_boston()) method = SelectionMethod.Variance(threshold=1.0) selector = Selective(method) selector.fit(data) subset = selector.transform(data) # Reduced columns self.assertEqual(data.shape[1], 13) self.assertEqual(subset.shape[1], 10) self.assertListEqual(list(subset.columns), [ 'CRIM', 'ZN', 'INDUS', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT' ])
def test_vif_top_k_regression(self): data, label = get_data_label(load_boston()) data = data.drop(columns=[ "CHAS", "NOX", "RM", "DIS", "RAD", "TAX", "PTRATIO", "INDUS" ]) method = SelectionMethod.Statistical(num_features=2, method="variance_inflation") selector = Selective(method) selector.fit(data, label) subset = selector.transform(data) # Reduced columns self.assertEqual(subset.shape[1], 2) self.assertListEqual(list(subset.columns), ['CRIM', 'ZN'])