예제 #1
0
    def test_learner_datasets(self):
        opts = dict(random_seed=0)
        test_data = [
            (MLPRegressor, load_boston(**opts),
             0.85),  # Boston housing dataset
            (MLPClassifier, load_titanic(**opts), 0.75),  # Titanic dataset
        ]

        for learner, train_test_datasets, target_score in test_data:
            dataset, test_ds = train_test_datasets
            pipeline = Pipeline([
                PipelineStep(
                    name="preprocessor",
                    learner=StandardPreprocessor,
                    kwargs={
                        "continuous": dataset.continuous,
                        "categorical": dataset.categorical,
                    },
                ),
                PipelineStep(name="estimator", learner=learner, kwargs=opts),
            ])

            history = pipeline.train(dataset.input_fn,
                                     max_score=target_score,
                                     progress=True)
            test_score = pipeline.score(*test_ds[:])
            self.assertGreaterEqual(max(history.scores), target_score,
                                    dataset.name)
            print("%s\t%.3f\t%.3f" %
                  (dataset.name, max(history.scores), test_score))
예제 #2
0
    def test_chaining_feature_drop_encoder_onehot_scaler_variance_threshold(self):
        n = 100
        nums = list(range(10))
        data_0 = [numpy.ones(n) * -1]
        data_1 = [numpy.random.random(n) * 10 for _ in range(3)]
        data_2 = [generate_array_uints(n=n, max_int=10, random_seed=0)]
        data_concat = data_0 + data_1 + data_2

        all_steps = (
            PipelineStep(learner=DummyTransformer, kwargs={}),
            PipelineStep(learner=FeatureDrop, kwargs={"columns": [0]}),
            PipelineStep(learner=MinMaxScaler, kwargs={"columns": [1, 2, 3]}),
            PipelineStep(learner=LabelEncoder, kwargs={"columns": {0: [], 4: nums}}),
            PipelineStep(learner=VarianceThreshold, kwargs={"columns": [1, 2, 3]}),
        )

        transformers = itertools.combinations_with_replacement(all_steps, len(all_steps))
        for steps in transformers:
            pipeline = Pipeline(steps)
            try:
                pipeline.fit(data_concat).transform(data_concat)
            except Exception as ex:
                print("Pipeline failed")
                for step in pipeline.steps:
                    print(step, getattr(step[1], "classes_", None))
                raise (ex)
예제 #3
0
 def test_pipeline_builtin(self):
     steps1 = [PipelineStep(name="transformer", learner=DummyTransformer)]
     steps2 = [PipelineStep(name="estimator", learner=DummyRegressor)]
     steps3 = [
         PipelineStep(name="transformer", learner=DummyTransformer),
         PipelineStep(name="estimator", learner=DummyRegressor),
     ]
     for steps in (steps1, steps2, steps3):
         self.assertTrue(test_learner(Pipeline, steps=steps))
예제 #4
0
    def test_fit_pipeline(self):
        pipeline1 = Pipeline(
            [PipelineStep(name="transformer", learner=DummyTransformer)])
        pipeline2 = Pipeline(
            [PipelineStep(name="estimator", learner=DummyRegressor)])
        pipeline3 = Pipeline([
            PipelineStep(name="transformer", learner=DummyTransformer),
            PipelineStep(name="estimator", learner=DummyRegressor),
        ])
        data, target = numpy.random.random(10), numpy.ones(10)

        for pipeline in [pipeline1, pipeline2, pipeline3]:
            pipeline_ = pipeline.fit(data, y=target)
            self.assertEqual(pipeline, pipeline_)
예제 #5
0
    def test_feature_drop_thrice_different(self):
        n = 100
        data = [numpy.ones(n) * i for i in range(4)]
        pipeline = Pipeline(
            [
                PipelineStep(name=1, learner=FeatureDrop, kwargs={"columns": [0]}),
                PipelineStep(name=2, learner=FeatureDrop, kwargs={"columns": [1]}),
                PipelineStep(name=3, learner=FeatureDrop, kwargs={"columns": [2]}),
            ]
        )

        output = pipeline.fit(data).transform(data)
        self.assertEqual(1, len(output))
        for i in range(1):
            self.assertListEqual((numpy.ones(n) * i + 3).tolist(), output[i].tolist())
예제 #6
0
    def test_feature_drop_twice_same(self):
        n = 100
        data = [numpy.ones(n) * i for i in range(4)]
        pipeline = Pipeline(
            [
                PipelineStep(name=1, learner=FeatureDrop, kwargs={"columns": [0]}),
                PipelineStep(name=2, learner=FeatureDrop, kwargs={"columns": [0]}),
            ]
        )

        # The expectation is that dropping a column that no longer exists is a no-op
        output = pipeline.fit(data).transform(data)
        self.assertEqual(3, len(output))
        for i in range(3):
            self.assertListEqual((numpy.ones(n) * i + 1).tolist(), output[i].tolist())
예제 #7
0
    def test_fit_predict_pipeline(self):
        pipeline = Pipeline(
            [PipelineStep(name="estimator", learner=DummyRegressor)])
        data1, target1 = numpy.random.random(10), numpy.ones(10)

        target2 = pipeline.fit(data1, target1).predict(data1)
        self.assertTrue(numpy.array_equal(target1, target2))
예제 #8
0
    def test_fit_transform_pipeline(self):
        pipeline = Pipeline(
            [PipelineStep(name="transformer", learner=DummyTransformer)])
        data1 = numpy.random.random(10)

        data2 = pipeline.fit(data1).transform(data1)
        self.assertTrue(numpy.array_equal(data1, data2))
예제 #9
0
    def test_feature_drop_following_encoder(self):
        n = 100
        nums = list(range(10))
        data_0 = [numpy.random.random(n) * 10 for _ in range(4)]
        data_1 = [generate_array_uints(n=n, max_int=10, random_seed=0)]
        data_concat = data_0 + data_1

        pipeline = Pipeline(
            [
                PipelineStep(name=1, learner=FeatureDrop, kwargs={"columns": [0]}),
                PipelineStep(name=2, learner=LabelEncoder, kwargs={"columns": {0: nums, 4: nums}}),
            ]
        )

        output = pipeline.fit(data_concat).transform(data_concat)
        self.assertEqual(4, len(output))
        self.assertGreaterEqual(output[-1].max(), 1)
예제 #10
0
    def test_feature_drop_following_scaler(self):
        n = 100
        data_0 = [numpy.ones(n) * -1]
        data_1 = [numpy.random.random(n) * 10 for _ in range(4)]
        data_concat = data_0 + data_1

        pipeline = Pipeline(
            [
                PipelineStep(name=1, learner=FeatureDrop, kwargs={"columns": [0]}),
                PipelineStep(name=2, learner=MinMaxScaler, kwargs={"columns": [1, 2, 3]}),
            ]
        )

        output = pipeline.fit(data_concat).transform(data_concat)
        self.assertEqual(4, len(output))
        self.assertGreaterEqual(output[-1].max(), 1)
        for i, col in enumerate(output[:-1]):
            self.assertLessEqual(col.max(), 1, "Column %d" % i)
예제 #11
0
    def test_feature_drop_twice_different(self):
        n = 100
        data = [numpy.ones(n) * i for i in range(4)]
        pipeline = Pipeline(
            [
                PipelineStep(name=1, learner=FeatureDrop, kwargs={"columns": [0]}),
                PipelineStep(name=2, learner=FeatureDrop, kwargs={"columns": [1]}),
            ]
        )

        output = pipeline.fit(data).transform(data)
        self.assertEqual(2, len(output))
        for i in range(2):
            self.assertListEqual((numpy.ones(n) * i + 2).tolist(), output[i].tolist())

        # Send input change event
        change_map = ChangeMap(len(output), idx_add=[0], idx_del=[0])
        pipeline.on_input_shape_changed(change_map)
        output = pipeline.fit(data).transform(data)
예제 #12
0
    def test_encoder_onehot_following_feature_drop(self):
        n = 100
        nums = list(range(10))
        data_0 = [generate_array_uints(n=n, max_int=10, random_seed=0)]
        data_1 = [numpy.random.random(n) for _ in range(4)]
        data_concat = data_0 + data_1

        pipeline = Pipeline(
            [
                PipelineStep(name=1, learner=FeatureDrop, kwargs={"columns": [1, 2, 3]}),
                PipelineStep(name=2, learner=OneHotEncoder, kwargs={"columns": {0: nums}}),
            ]
        )

        output = pipeline.fit(data_concat).transform(data_concat)
        self.assertEqual(len(nums) + 1, len(output))
        for i, col in enumerate(output[:-1]):
            self.assertGreaterEqual(col.max(), 1, i)
        self.assertLessEqual(output[-1].max(), 1)
예제 #13
0
    def test_feature_drop_following_scaler_then_encoder(self):
        n = 100
        nums = list(range(10))
        data_0 = [numpy.ones(n) * -1]
        data_1 = [numpy.random.random(n) * 10 for _ in range(3)]
        data_2 = [generate_array_uints(n=n, max_int=10, random_seed=0)]
        data_concat = data_0 + data_1 + data_2

        pipeline = Pipeline(
            [
                PipelineStep(name=1, learner=FeatureDrop, kwargs={"columns": [0]}),
                PipelineStep(name=2, learner=MinMaxScaler, kwargs={"columns": [1, 2, 3]}),
                PipelineStep(name=3, learner=LabelEncoder, kwargs={"columns": {4: nums}}),
            ]
        )

        output = pipeline.fit(data_concat).transform(data_concat)
        self.assertEqual(4, len(output))
        self.assertGreaterEqual(output[0].max(), 1)
        for col in output[1:-1]:
            self.assertLessEqual(col.max(), 1)
예제 #14
0
    def test_inverse_transform_pipeline(self):
        pipeline = Pipeline(
            [PipelineStep(name="transformer", learner=DummyTransformer)])
        data1 = numpy.random.random(10)

        # Calling inverse transform without fitting first should fail
        self.assertRaises(AssertionError,
                          lambda: pipeline.inverse_transform(data1))

        data2 = pipeline.fit(data1).transform(data1)
        data3 = pipeline.inverse_transform(data1)
        self.assertTrue(numpy.array_equal(data1, data2))
        self.assertTrue(numpy.array_equal(data1, data3))
예제 #15
0
 def test_create_pipeline(self):
     pipeline = Pipeline(
         [PipelineStep(name="transformer", learner=DummyTransformer)])
     self.assertTrue(pipeline)
예제 #16
0
 def test_iter_parameters_pipeline(self):
     step = PipelineStep(name="1",
                         learner=CustomEstimator,
                         kwargs=CustomEstimator.hyperparameters(None))
     params = Pipeline.hyperparameters(None, [step])
     expected = [
         {
             "steps": [
                 PipelineStep(name="1",
                              learner=CustomEstimator,
                              kwargs={
                                  "a": 1,
                                  "b": "x"
                              })
             ]
         },
         {
             "steps": [
                 PipelineStep(name="1",
                              learner=CustomEstimator,
                              kwargs={
                                  "a": 1,
                                  "b": "y"
                              })
             ]
         },
         {
             "steps": [
                 PipelineStep(name="1",
                              learner=CustomEstimator,
                              kwargs={
                                  "a": 1,
                                  "b": "z"
                              })
             ]
         },
         {
             "steps": [
                 PipelineStep(name="1",
                              learner=CustomEstimator,
                              kwargs={
                                  "a": 2,
                                  "b": "x"
                              })
             ]
         },
         {
             "steps": [
                 PipelineStep(name="1",
                              learner=CustomEstimator,
                              kwargs={
                                  "a": 2,
                                  "b": "y"
                              })
             ]
         },
         {
             "steps": [
                 PipelineStep(name="1",
                              learner=CustomEstimator,
                              kwargs={
                                  "a": 2,
                                  "b": "z"
                              })
             ]
         },
         {
             "steps": [
                 PipelineStep(name="1",
                              learner=CustomEstimator,
                              kwargs={
                                  "a": 3,
                                  "b": "x"
                              })
             ]
         },
         {
             "steps": [
                 PipelineStep(name="1",
                              learner=CustomEstimator,
                              kwargs={
                                  "a": 3,
                                  "b": "y"
                              })
             ]
         },
         {
             "steps": [
                 PipelineStep(name="1",
                              learner=CustomEstimator,
                              kwargs={
                                  "a": 3,
                                  "b": "z"
                              })
             ]
         },
     ]
     result = list(_iter_parameters(Pipeline, params))
     self.assertListEqual(expected, result)