예제 #1
0
 def test_piecewise_regressor_issue(self):
     X, y = make_regression(10000, n_features=1, n_informative=1,  # pylint: disable=W0632
                            n_targets=1)
     y = y.reshape((-1, 1))
     model = PiecewiseRegressor(
         binner=DecisionTreeRegressor(min_samples_leaf=300))
     model.fit(X, y)
     vvc = model.predict(X)
     self.assertEqual(vvc.shape, (X.shape[0], ))
예제 #2
0
 def test_piecewise_regressor_pandas(self):
     X = pandas.DataFrame(numpy.array([[0.1, 0.2], [0.2, 0.3]]))
     Y = numpy.array([1., 1.1])
     clr = LinearRegression(fit_intercept=False)
     clr.fit(X, Y)
     clq = PiecewiseRegressor()
     clq.fit(X, Y)
     pred1 = clr.predict(X)
     pred2 = clq.predict(X)
     self.assertEqual(pred1, pred2)
예제 #3
0
 def test_piecewise_regressor_intercept_weights3(self):
     X = numpy.array([[0.1, 0.2], [0.2, 0.3], [0.3, 0.3]])
     Y = numpy.array([1., 1.1, 1.2])
     W = numpy.array([1., 1., 1.])
     clr = LinearRegression(fit_intercept=True)
     clr.fit(X, Y, W)
     clq = PiecewiseRegressor(verbose=False)
     clq.fit(X, Y, W)
     pred1 = clr.predict(X)
     pred2 = clq.predict(X)
     self.assertNotEqual(pred2.min(), pred2.max())
     self.assertEqual(pred1, pred2)
예제 #4
0
 def test_piecewise_regressor_diff(self):
     X = numpy.array([[0.1], [0.2], [0.3], [0.4], [0.5]])
     Y = numpy.array([1., 1.1, 1.2, 10, 1.4])
     clr = LinearRegression()
     clr.fit(X, Y)
     clq = PiecewiseRegressor(verbose=False)
     clq.fit(X, Y)
     pred1 = clr.predict(X)
     self.assertNotEmpty(pred1)
     pred2 = clq.predict(X)
     self.assertEqual(len(clq.estimators_), 2)
     p1 = clq.estimators_[0].predict(X[:3, :])
     p2 = clq.estimators_[1].predict(X[3:, :])
     self.assertEqual(pred2[:3], p1)
     self.assertEqual(pred2[-2:], p2)
     sc = clq.score(X, Y)
     self.assertEqual(sc, 1)
예제 #5
0
 def test_piecewise_regressor_no_intercept_bins(self):
     X = numpy.array([[0.1, 0.2], [0.2, 0.3], [0.2, 0.35], [0.2, 0.36]])
     Y = numpy.array([1., 1.1, 1.15, 1.2])
     clr = LinearRegression(fit_intercept=False)
     clr.fit(X, Y)
     clq = PiecewiseRegressor(binner="bins")
     clq.fit(X, Y)
     pred1 = clr.predict(X)
     pred2 = clq.predict(X)
     self.assertEqual(pred1.shape, (4, ))
     self.assertEqual(pred2.shape, (4, ))
     sc1 = clr.score(X, Y)
     sc2 = clq.score(X, Y)
     self.assertIsInstance(sc1, float)
     self.assertIsInstance(sc2, float)
     paths = clq.binner_.transform(X)
     self.assertIn(paths.shape, ((4, 7), (4, 8), (4, 9), (4, 10)))
     self.assertNotEqual(pred2.min(), pred2.max())
예제 #6
0
 def test_piecewise_regressor_no_intercept(self):
     X = numpy.array([[0.1, 0.2], [0.2, 0.3], [0.2, 0.35], [0.2, 0.36]])
     Y = numpy.array([1., 1.1, 1.15, 1.2])
     clr = LinearRegression(fit_intercept=False)
     clr.fit(X, Y)
     clq = PiecewiseRegressor()
     clq.fit(X, Y)
     pred1 = clr.predict(X)
     pred2 = clq.predict(X)
     self.assertEqual(pred1.shape, (4, ))
     self.assertEqual(pred2.shape, (4, ))
     sc1 = clr.score(X, Y)
     sc2 = clq.score(X, Y)
     sc3 = clq.binner_.score(X, Y)
     self.assertIsInstance(sc1, float)
     self.assertIsInstance(sc2, float)
     self.assertIsInstance(sc3, float)
     paths = clq.binner_.decision_path(X)
     s = paths.sum()
     self.assertEqual(s, 8)
     self.assertNotEqual(pred2.min(), pred2.max())
     self.assertGreater(clq.n_estimators_, 1)
예제 #7
0
 def test_piecewise_regressor_list(self):
     X = [[0.1, 0.2], [0.2, 0.3]]
     Y = numpy.array([1., 1.1])
     clq = PiecewiseRegressor()
     self.assertRaise(lambda: clq.fit(X, Y), TypeError)
예제 #8
0
 def test_piecewise_regressor_raise(self):
     X, y = make_regression(10000, n_features=2, n_informative=2,  # pylint: disable=W0632
                            n_targets=2)
     model = PiecewiseRegressor(
         binner=DecisionTreeRegressor(min_samples_leaf=300))
     self.assertRaise(lambda: model.fit(X, y), RuntimeError)
예제 #9
0
            verbose=True)
    else:
        model = DecisionTreeRegressor(splitter="best",
                                      max_leaf_nodes=40,
                                      criterion="friedman_mse")
    # model = LinearRegression()
    dataset_features = [f for f in features if f in data.columns]
    input_features = dataset_features + ["IThreshold", "CThreshold"
                                         ] + c + parameters
    x = data[input_features]
    y = data[measures[0]]

    d = x.copy()
    X = squish(d)

    pred = model.fit(np.asarray(X), transform(y))
    print()
    print(method)
    y_pred = untransform(pred.predict(X.values))
    score = (((y - y_pred)**2).sum()**0.5 / len(y_pred))
    if score < best_model.get(method, (None, 1e12))[1]:
        best_model[method] = pred, score
    else:
        pred, _ = best_model[method]
    y_pred = untransform(pred.predict(X.values))
    print(((y - y_pred)**2).sum()**0.5 / len(y_pred))
    print(np.quantile(np.abs(y - y_pred), 0.5))
    print(np.quantile(np.abs(y - y_pred), 0.67))
    print(np.quantile(np.abs(y - y_pred), 0.9))
    print(np.quantile(np.abs(y - y_pred), 0.95))