def setUp(self): self.X, self.y = rand_df(shape=(100, 10)) self.test_X = rand_df(shape=(100, 10), labeled=False) self.steps = [ EmptyStep(), PearsonCorrStep(4), PCAStep(kwargs={'n_components': 2}) ]
def setUp(self): self.X = rand_df(shape=(100, 10), labeled=False) self.test_X = rand_df(shape=(100, 10), labeled=False) self.steps = [ EmptyStep(), StandardScalerStep(append_input=True), PCAStep(kwargs={'n_components': 2}) ]
class ListTests(unittest.TestCase, StepTest): step = ListSelectionStep(features=['11', '22']) X, y = rand_df() test_X = rand_df(labeled=False) # Tests that a key error is raised when the features do not exist def test_key_error(self): s = ListSelectionStep(features=['11', 'not_a_feature']) tr_X, tr_y = rand_df(shape=(10, 15)) with self.assertRaises(KeyError): s.fit(tr_X, y=tr_y)
def test_pipeline_step(self): tr_data_X = rand_df(shape=(100, 20), labeled=False) te_data = rand_df(shape=(100, 20), labeled=False) scale_step = StandardScalerStep() pca_step = PCAStep(append_input=False, kwargs={'n_components': 5}) poly_step = PolyStep(kwargs={'degree': 3, 'include_bias': False}) pipeline = Pipeline([ scale_step, Pipeline([pca_step, poly_step], append_input=True), EmptyStep() ]) r = pipeline.fit_transform(tr_data_X) self.assertEqual(type(r), pd.DataFrame) r = pipeline.transform(te_data) self.assertEqual(type(r), pd.DataFrame)
def test_pipeline_step(self): tr_data_X, tr_data_y = rand_df_classification(shape=(100, 20), classes=3) te_data = rand_df(shape=(100, 20), labeled=False) scale_step = StandardScalerStep() chi_step = ChiSqSelectionStep(select_kwargs={'k': 20}) corr_step = PearsonCorrStep(num_features=0.1) pca_step = PCAStep(append_input=False, kwargs={'n_components': 5}) poly_step = PolyStep(kwargs={'degree': 3, 'include_bias': False}) pipeline = Pipeline([ scale_step, Pipeline([pca_step, poly_step], append_input=True), chi_step ]) r, _ = pipeline.fit_transform(tr_data_X, y=tr_data_y) self.assertEqual(type(r), pd.DataFrame) r = pipeline.transform(te_data) self.assertEqual(type(r), pd.DataFrame)
class ADASYNTests1(unittest.TestCase, StepTest): step = ADASYNStep(kwargs={'ratio': {0.0: 100, 1.0: 100}}) X, y = rand_df_classification(val_range=(0, 100)) test_X = rand_df(val_range=(0, 100))
class SinTests3(unittest.TestCase, StepTest): step = SinStep(append_input=True, columns=['1', '2', '3']) X, y = rand_df() test_X = rand_df(labeled=False)
class SinTests1(unittest.TestCase, StepTest): step = SinStep() X, y = rand_df() test_X = rand_df(labeled=False)
class PolyTests1(unittest.TestCase, StepTest): step = PolyStep() X, y = rand_df() test_X = rand_df(labeled=False)
class LogTests3(unittest.TestCase, StepTest): step = LogStep(append_input=True, columns=['1', '10'], log_func=np.log10) X, y = rand_df(val_range=(0, 100)) test_X = rand_df(val_range=(0, 100), labeled=False)
class LogTests1(unittest.TestCase, StepTest): step = LogStep() X, y = rand_df(val_range=(0, 100)) test_X = rand_df(val_range=(0, 100), labeled=False)
class TreeTests2(unittest.TestCase, StepTest): step = TreeSelectionStep(tree_model=ExtraTreesClassifier) X, y = rand_df_classification() test_X = rand_df(labeled=False)
class TreeTests1(unittest.TestCase, StepTest): step = TreeSelectionStep() X, y = rand_df() test_X = rand_df(labeled=False)
class PearsonCorrTests2(unittest.TestCase, StepTest): step = PearsonCorrStep(num_features=50) X, y = rand_df() test_X = rand_df(labeled=False)
class ListTests2(unittest.TestCase, StepTest): step = ListSelectionStep(features=['11', '22']) X = rand_df(labeled=False) y = None test_X = rand_df(labeled=False)
class LDATransform1(unittest.TestCase, StepTest): step = LDATransformStep() X, y = rand_df_classification() test_X = rand_df(labeled=False)
class LDATransform2(unittest.TestCase, StepTest): step = LDATransformStep(append_input=True) X, y = rand_df_classification() test_X = rand_df(labeled=False)
class ABODTests(unittest.TestCase, StepTest): step = ABODStep(num_remove=1) X, y = rand_df_classification(outlier=True) test_X = rand_df(labeled=False, outlier=True)
class LogTests2(unittest.TestCase, StepTest): step = LogStep() X = rand_df(val_range=(0, 100), labeled=False) y = None test_X = rand_df(val_range=(0, 100), labeled=False)
class IsoForestDefault(unittest.TestCase, StepTest): step = IsoForestStep() X, y = rand_df(outlier=True) test_X = rand_df(labeled=False, outlier=True)
class PCATests2(unittest.TestCase, StepTest): step = PCAStep(append_input=True) X = rand_df(labeled=False) y = None test_X = rand_df(labeled=False)
class IsoForestIncludeY(unittest.TestCase, StepTest): step = IsoForestStep(include_y=False) X, y = rand_df(outlier=True) test_X = rand_df(labeled=False, outlier=True)
class PolyTests3(unittest.TestCase, StepTest): step = PolyStep(append_input=True) X = rand_df(shape=(100, 10), labeled=False) y = None test_X = rand_df(shape=(50, 10), labeled=False)
class LFODefault(unittest.TestCase, StepTest): step = LOFStep() X, y = rand_df_classification(outlier=True) test_X = rand_df(labeled=False, outlier=True)
class SinTests2(unittest.TestCase, StepTest): step = SinStep() X = rand_df(labeled=False) y = None test_X = rand_df(labeled=False)
class LFOIncludeY(unittest.TestCase, StepTest): step = LOFStep(include_y=False) X, y = rand_df(outlier=True) test_X = rand_df(labeled=False, outlier=True)
class StandardScalerTests1(unittest.TestCase, StepTest): step = StandardScalerStep() X, y = rand_df() test_X = rand_df(labeled=False)
class StandardScalerTests2(unittest.TestCase, StepTest): step = StandardScalerStep() X = rand_df(labeled=False) y = None test_X = rand_df(labeled=False)
class SMOTETests1(unittest.TestCase, StepTest): step = SMOTEStep() X, y = rand_df_classification(val_range=(0, 100)) test_X = rand_df(val_range=(0, 100))
class StandardScalerTests3(unittest.TestCase, StepTest): step = StandardScalerStep(append_input=True) X, y = rand_df() test_X = rand_df(labeled=False)