def test_piecewise_regressor_issue(self): X, y = make_regression(10000, n_features=1, n_informative=1, # pylint: disable=W0632 n_targets=1) y = y.reshape((-1, 1)) model = PiecewiseRegressor( binner=DecisionTreeRegressor(min_samples_leaf=300)) model.fit(X, y) vvc = model.predict(X) self.assertEqual(vvc.shape, (X.shape[0], ))
def test_piecewise_regressor_pandas(self): X = pandas.DataFrame(numpy.array([[0.1, 0.2], [0.2, 0.3]])) Y = numpy.array([1., 1.1]) clr = LinearRegression(fit_intercept=False) clr.fit(X, Y) clq = PiecewiseRegressor() clq.fit(X, Y) pred1 = clr.predict(X) pred2 = clq.predict(X) self.assertEqual(pred1, pred2)
def test_piecewise_regressor_intercept_weights3(self): X = numpy.array([[0.1, 0.2], [0.2, 0.3], [0.3, 0.3]]) Y = numpy.array([1., 1.1, 1.2]) W = numpy.array([1., 1., 1.]) clr = LinearRegression(fit_intercept=True) clr.fit(X, Y, W) clq = PiecewiseRegressor(verbose=False) clq.fit(X, Y, W) pred1 = clr.predict(X) pred2 = clq.predict(X) self.assertNotEqual(pred2.min(), pred2.max()) self.assertEqual(pred1, pred2)
def test_piecewise_regressor_diff(self): X = numpy.array([[0.1], [0.2], [0.3], [0.4], [0.5]]) Y = numpy.array([1., 1.1, 1.2, 10, 1.4]) clr = LinearRegression() clr.fit(X, Y) clq = PiecewiseRegressor(verbose=False) clq.fit(X, Y) pred1 = clr.predict(X) self.assertNotEmpty(pred1) pred2 = clq.predict(X) self.assertEqual(len(clq.estimators_), 2) p1 = clq.estimators_[0].predict(X[:3, :]) p2 = clq.estimators_[1].predict(X[3:, :]) self.assertEqual(pred2[:3], p1) self.assertEqual(pred2[-2:], p2) sc = clq.score(X, Y) self.assertEqual(sc, 1)
def test_piecewise_regressor_no_intercept_bins(self): X = numpy.array([[0.1, 0.2], [0.2, 0.3], [0.2, 0.35], [0.2, 0.36]]) Y = numpy.array([1., 1.1, 1.15, 1.2]) clr = LinearRegression(fit_intercept=False) clr.fit(X, Y) clq = PiecewiseRegressor(binner="bins") clq.fit(X, Y) pred1 = clr.predict(X) pred2 = clq.predict(X) self.assertEqual(pred1.shape, (4, )) self.assertEqual(pred2.shape, (4, )) sc1 = clr.score(X, Y) sc2 = clq.score(X, Y) self.assertIsInstance(sc1, float) self.assertIsInstance(sc2, float) paths = clq.binner_.transform(X) self.assertIn(paths.shape, ((4, 7), (4, 8), (4, 9), (4, 10))) self.assertNotEqual(pred2.min(), pred2.max())
def test_piecewise_regressor_no_intercept(self): X = numpy.array([[0.1, 0.2], [0.2, 0.3], [0.2, 0.35], [0.2, 0.36]]) Y = numpy.array([1., 1.1, 1.15, 1.2]) clr = LinearRegression(fit_intercept=False) clr.fit(X, Y) clq = PiecewiseRegressor() clq.fit(X, Y) pred1 = clr.predict(X) pred2 = clq.predict(X) self.assertEqual(pred1.shape, (4, )) self.assertEqual(pred2.shape, (4, )) sc1 = clr.score(X, Y) sc2 = clq.score(X, Y) sc3 = clq.binner_.score(X, Y) self.assertIsInstance(sc1, float) self.assertIsInstance(sc2, float) self.assertIsInstance(sc3, float) paths = clq.binner_.decision_path(X) s = paths.sum() self.assertEqual(s, 8) self.assertNotEqual(pred2.min(), pred2.max()) self.assertGreater(clq.n_estimators_, 1)
def test_piecewise_regressor_list(self): X = [[0.1, 0.2], [0.2, 0.3]] Y = numpy.array([1., 1.1]) clq = PiecewiseRegressor() self.assertRaise(lambda: clq.fit(X, Y), TypeError)
def test_piecewise_regressor_raise(self): X, y = make_regression(10000, n_features=2, n_informative=2, # pylint: disable=W0632 n_targets=2) model = PiecewiseRegressor( binner=DecisionTreeRegressor(min_samples_leaf=300)) self.assertRaise(lambda: model.fit(X, y), RuntimeError)
verbose=True) else: model = DecisionTreeRegressor(splitter="best", max_leaf_nodes=40, criterion="friedman_mse") # model = LinearRegression() dataset_features = [f for f in features if f in data.columns] input_features = dataset_features + ["IThreshold", "CThreshold" ] + c + parameters x = data[input_features] y = data[measures[0]] d = x.copy() X = squish(d) pred = model.fit(np.asarray(X), transform(y)) print() print(method) y_pred = untransform(pred.predict(X.values)) score = (((y - y_pred)**2).sum()**0.5 / len(y_pred)) if score < best_model.get(method, (None, 1e12))[1]: best_model[method] = pred, score else: pred, _ = best_model[method] y_pred = untransform(pred.predict(X.values)) print(((y - y_pred)**2).sum()**0.5 / len(y_pred)) print(np.quantile(np.abs(y - y_pred), 0.5)) print(np.quantile(np.abs(y - y_pred), 0.67)) print(np.quantile(np.abs(y - y_pred), 0.9)) print(np.quantile(np.abs(y - y_pred), 0.95))