コード例 #1
0
    def test_ensemble_supports_user_defined_transforms(self):
        test2_df = test_df.copy(deep=True)
        test2_df = test2_df.append(pd.DataFrame({'c1': [9, 11], 'c2': [1, 1]}))

        r1 = OrdinaryLeastSquaresRegressor(**olsrArgs)
        r1.fit(train_df)
        result1 = r1.predict(test2_df)

        r2 = OnlineGradientDescentRegressor(**ogdArgs)
        r2.fit(train_df)
        result2 = r2.predict(test2_df)

        r3 = LightGbmRegressor(**lgbmArgs)
        r3.fit(train_df)
        result3 = r3.predict(test2_df)

        r1 = OrdinaryLeastSquaresRegressor(**olsrArgs)
        r2 = OnlineGradientDescentRegressor(**ogdArgs)
        r3 = LightGbmRegressor(**lgbmArgs)

        pipeline = Pipeline([
            RangeFilter(min=0, max=10, columns='c1'),
            VotingRegressor(estimators=[r1, r2, r3], combiner='Average')
        ])
        pipeline.fit(train_df)
        result4 = pipeline.predict(test2_df)

        self.assertEqual(len(result4), 3)

        average1 = (result1[0] + result2[0] + result3[0]) / 3
        average2 = (result1[1] + result2[1] + result3[1]) / 3
        average3 = (result1[2] + result2[2] + result3[2]) / 3
        self.assertAlmostEqual(average1, result4.loc[0, 'Score'], places=5)
        self.assertAlmostEqual(average2, result4.loc[1, 'Score'], places=5)
        self.assertAlmostEqual(average3, result4.loc[2, 'Score'], places=5)
コード例 #2
0
    def test_passing_in_a_single_predictor_returns_new_pipeline(self):
        train_dropped_df = train_df.drop(['c0'], axis=1)
        test_dropped_df = test_df.drop(['c0'], axis=1)

        predictor = OnlineGradientDescentRegressor(label='c2', feature=['c1'])
        predictor.fit(train_dropped_df)
        result_1 = predictor.predict(test_dropped_df)

        combined_pipeline = Pipeline.combine_models(predictor)
        result_2 = combined_pipeline.predict(test_dropped_df)

        self.assertEqual(result_1[0], result_2.loc[0, 'Score'])
        self.assertEqual(result_1[1], result_2.loc[1, 'Score'])
        self.assertTrue(isinstance(combined_pipeline, Pipeline))
コード例 #3
0
    def test_combine_transform_and_predictor(self):
        transform = OneHotVectorizer() << 'c0'
        df = transform.fit_transform(train_df, as_binary_data_stream=True)

        predictor = OnlineGradientDescentRegressor(label='c2',
                                                   feature=['c0', 'c1'])
        predictor.fit(df)

        df = transform.transform(test_df, as_binary_data_stream=True)
        result_1 = predictor.predict(df)

        combined_pipeline = Pipeline.combine_models(transform, predictor)
        result_2 = combined_pipeline.predict(test_df)

        self.assertEqual(result_1[0], result_2.loc[0, 'Score'])
        self.assertEqual(result_1[1], result_2.loc[1, 'Score'])
コード例 #4
0
    def test_fit_predictor_with_idv(self):
        train_data = {
            'c0': ['a', 'b', 'a', 'b'],
            'c1': [1, 2, 3, 4],
            'c2': [2, 3, 4, 5]
        }
        train_df = pd.DataFrame(train_data).astype({
            'c1': np.float64,
            'c2': np.float64
        })

        test_data = {
            'c0': ['a', 'b', 'b'],
            'c1': [1.5, 2.3, 3.7],
            'c2': [2.2, 4.9, 2.7]
        }
        test_df = pd.DataFrame(test_data).astype({
            'c1': np.float64,
            'c2': np.float64
        })

        # Fit a transform pipeline to the training data
        transform_pipeline = Pipeline([OneHotVectorizer() << 'c0'])
        transform_pipeline.fit(train_df)
        df = transform_pipeline.transform(train_df, as_binary_data_stream=True)

        # Fit a predictor pipeline given a transformed BinaryDataStream
        predictor = OnlineGradientDescentRegressor(label='c2',
                                                   feature=['c0', 'c1'])
        predictor_pipeline = Pipeline([predictor])
        predictor_pipeline.fit(df)

        # Perform a prediction given the test data using
        # the transform and predictor defined previously.
        df = transform_pipeline.transform(test_df, as_binary_data_stream=True)
        result_1 = predictor_pipeline.predict(df)

        # Create expected result
        xf = OneHotVectorizer() << 'c0'
        df = xf.fit_transform(train_df)
        predictor = OnlineGradientDescentRegressor(
            label='c2', feature=['c0.a', 'c0.b', 'c1'])
        predictor.fit(df)
        df = xf.transform(test_df)
        expected_result = predictor.predict(df)

        self.assertTrue(result_1.loc[:, 'Score'].equals(expected_result))
コード例 #5
0
    def test_ensemble_with_average_and_median_combiner(self):
        r1 = OrdinaryLeastSquaresRegressor(**olsrArgs)
        r1.fit(train_df)
        result1 = r1.predict(test_df)

        r2 = OnlineGradientDescentRegressor(**ogdArgs)
        r2.fit(train_df)
        result2 = r2.predict(test_df)

        r3 = LightGbmRegressor(**lgbmArgs)
        r3.fit(train_df)
        result3 = r3.predict(test_df)

        r1 = OrdinaryLeastSquaresRegressor(**olsrArgs)
        r2 = OnlineGradientDescentRegressor(**ogdArgs)
        r3 = LightGbmRegressor(**lgbmArgs)

        pipeline = Pipeline([VotingRegressor(estimators=[r1, r2, r3], combiner='Average')])
        pipeline.fit(train_df)
        result4 = pipeline.predict(test_df)

        average1 = (result1[0] + result2[0] + result3[0]) / 3
        average2 = (result1[1] + result2[1] + result3[1]) / 3
        self.assertAlmostEqual(average1, result4.loc[0, 'Score'], places=5)
        self.assertAlmostEqual(average2, result4.loc[1, 'Score'], places=5)

        r1 = OrdinaryLeastSquaresRegressor(**olsrArgs)
        r2 = OnlineGradientDescentRegressor(**ogdArgs)
        r3 = LightGbmRegressor(**lgbmArgs)

        pipeline = Pipeline([VotingRegressor(estimators=[r1, r2, r3], combiner='Median')])
        pipeline.fit(train_df)
        result4 = pipeline.predict(test_df)

        median1 = sorted([result1.loc[0], result2.loc[0], result3.loc[0]])[1]
        median2 = sorted([result1.loc[1], result2.loc[1], result3.loc[1]])[1]

        self.assertEqual(median1, result4.loc[0, 'Score'])
        self.assertEqual(median2, result4.loc[1, 'Score'])
コード例 #6
0
    'label': 'c2',
    'random_state': 1,
    'number_of_leaves': 200,
    'minimum_example_count_per_leaf': 1,
    'normalize': 'Yes'
}

if show_individual_predictions:
    r1 = OrdinaryLeastSquaresRegressor(**olsrArgs)
    r1.fit(train_df)
    result = r1.predict(test_df)
    print(result)

    r2 = OnlineGradientDescentRegressor(**ogdArgs)
    r2.fit(train_df)
    result = r2.predict(test_df)
    print(result)

    r3 = LightGbmRegressor(**lgbmArgs)
    r3.fit(train_df)
    result = r3.predict(test_df)
    print(result)

# Perform a prediction using an ensemble
# of all three of the above predictors.

r1 = OrdinaryLeastSquaresRegressor(**olsrArgs)
r2 = OnlineGradientDescentRegressor(**ogdArgs)
r3 = LightGbmRegressor(**lgbmArgs)
pipeline = Pipeline(
    [VotingRegressor(estimators=[r1, r2, r3], combiner='Average')])