def test_preprocessors(self): table = Table('housing') learners = [LinearRegressionLearner(preprocessors=[]), LinearRegressionLearner()] results = CrossValidation(table, learners, k=3) rmse = RMSE(results) self.assertLess(rmse[0], rmse[1])
def test_LinearSVR(self): nrows, ncols = 200, 5 X = np.random.rand(nrows, ncols) y = X.dot(np.random.rand(ncols)) data = Table(X, y) learn = SVRLearner() res = CrossValidation(data, [learn], k=2) self.assertLess(RMSE(res)[0], 0.15)
def test_NuSVR(self): nrows, ncols = 200, 5 X = np.random.rand(nrows, ncols) y = X.dot(np.random.rand(ncols)) data = Table(X, y) learn = NuSVRLearner(kernel='rbf', gamma=0.1) res = CrossValidation(data, [learn], k=2) self.assertLess(RMSE(res)[0], 0.1)
def test_SGDRegression(self): nrows, ncols = 500, 5 X = np.random.rand(nrows, ncols) y = X.dot(np.random.rand(ncols)) data = Table(X, y) sgd = SGDRegressionLearner() res = CrossValidation(data, [sgd], k=3) self.assertLess(RMSE(res)[0], 0.1)
def test_SGDRegression(self): nrows, ncols = 500, 5 X = np.random.rand(nrows, ncols) y = X.dot(np.random.rand(ncols)) data = Table.from_numpy(None, X, y) sgd = SGDRegressionLearner() cv = CrossValidation(k=3) res = cv(data, [sgd]) self.assertLess(RMSE(res)[0], 0.1)
def test_adaboost_reg_base_estimator(self): np.random.seed(0) stump_estimator = TreeRegressionLearner(max_depth=1) tree_estimator = TreeRegressionLearner() stump = SklAdaBoostRegressionLearner(base_estimator=stump_estimator) tree = SklAdaBoostRegressionLearner(base_estimator=tree_estimator) results = CrossValidation(self.housing, [stump, tree], k=3) rmse = RMSE(results) self.assertGreaterEqual(rmse[0], rmse[1])
def test_NuSVR(self): nrows, ncols = 200, 5 X = np.random.rand(nrows, ncols) y = X.dot(np.random.rand(ncols)) data = Table.from_numpy(None, X, y) learn = NuSVRLearner(kernel='rbf', gamma=0.1) cv = CrossValidation(k=2) res = cv(data, [learn]) self.assertLess(RMSE(res)[0], 0.1)
def test_Regression(self): ridge = RidgeRegressionLearner() lasso = LassoRegressionLearner() elastic = ElasticNetLearner() elasticCV = ElasticNetCVLearner() mean = MeanLearner() learners = [ridge, lasso, elastic, elasticCV, mean] res = CrossValidation(self.housing, learners, k=2) rmse = RMSE(res) for i in range(len(learners) - 1): self.assertLess(rmse[i], rmse[-1])
def test_cv_preprocess(self): def fun(x, a): return x[:, 0] + a imputer = Impute() learner = CurveFitLearner(fun, ["a"], ["CRIM"]) cv = CrossValidation(k=2) results = cv(self.data, [learner]) rmse1 = RMSE(results)[0] learner = CurveFitLearner(fun, ["a"], ["CRIM"]) cv = CrossValidation(k=2) results = cv(self.data, [learner], preprocessor=imputer) rmse2 = RMSE(results)[0] learner = CurveFitLearner(fun, ["a"], ["CRIM"], preprocessors=imputer) cv = CrossValidation(k=2) results = cv(self.data, [learner]) rmse3 = RMSE(results)[0] self.assertEqual(rmse1, rmse2) self.assertEqual(rmse2, rmse3)
def test_PolynomialLearner(self): x = np.array([0.172, 0.167, 0.337, 0.420, 0.355, 0.710, 0.801, 0.876]) y = np.array([0.784, 0.746, 0.345, 0.363, 0.366, 0.833, 0.490, 0.445]) data = Table(x.reshape(-1, 1), y) data.domain = Domain([ContinuousVariable('x')], class_vars=[ContinuousVariable('y')]) linear = LinearRegressionLearner() polynomial2 = PolynomialLearner(linear, degree=2) polynomial3 = PolynomialLearner(linear, degree=3) res = TestOnTrainingData(data, [linear, polynomial2, polynomial3]) rmse = RMSE(res) self.assertGreater(rmse[0], rmse[1]) self.assertGreater(rmse[1], rmse[2])
def apply(self): degree = int(self.polynomialexpansion) learner = self.LEARNER(preprocessors=self.preprocessors, degree=degree, learner=LinearRegressionLearner() if self.learner is None else self.learner) learner.name = self.learner_name predictor = None self.Error.all_none.clear() if self.data is not None: attributes = self.x_var_model[self.x_var_index] class_var = self.y_var_model[self.y_var_index] data_table = Table(Domain([attributes], class_vars=[class_var]), self.data) # all lines has nan if sum( math.isnan(line[0]) or math.isnan(line.get_class()) for line in data_table) == len(data_table): self.Error.all_none() self.clear_plot() return predictor = learner(data_table) preprocessed_data = data_table for preprocessor in learner.active_preprocessors: preprocessed_data = preprocessor(preprocessed_data) x = preprocessed_data.X.ravel() y = preprocessed_data.Y.ravel() linspace = np.linspace(np.nanmin(x), np.nanmax(x), 1000).reshape(-1, 1) values = predictor(linspace, predictor.Value) # calculate prediction for x from data predicted = TestOnTrainingData(preprocessed_data, [learner]) self.rmse = round(RMSE(predicted)[0], 6) self.mae = round(MAE(predicted)[0], 6) # plot error bars self.plot_error_bars(x, predicted.actual, predicted.predicted.ravel()) # plot data points self.plot_scatter_points(x, y) # plot regression line self.plot_regression_line(linspace.ravel(), values.ravel()) x_label = self.x_var_model[self.x_var_index] axis = self.plot.getAxis("bottom") axis.setLabel(x_label) y_label = self.y_var_model[self.y_var_index] axis = self.plot.getAxis("left") axis.setLabel(y_label) self.set_range(x, y) self.send("Learner", learner) self.send("Predictor", predictor) # Send model coefficents model = None if predictor is not None: model = predictor.model if hasattr(model, "model"): model = model.model elif hasattr(model, "skl_model"): model = model.skl_model if model is not None and hasattr(model, "coef_"): domain = Domain([ContinuousVariable("coef", number_of_decimals=7)], metas=[StringVariable("name")]) coefs = [model.intercept_ + model.coef_[0]] + list(model.coef_[1:]) names = ["1", x_label] + \ ["{}^{}".format(x_label, i) for i in range(2, degree + 1)] coef_table = Table(domain, list(zip(coefs, names))) self.send("Coefficients", coef_table) else: self.send("Coefficients", None) self.send_data()
def test_XGB(self, learner_class: XGBBase): booster = learner_class() cv = CrossValidation(k=10) results = cv(self.housing, [booster]) RMSE(results)
def test_GBTrees(self): booster = CatGBRegressor() cv = CrossValidation(k=10) results = cv(self.housing, [booster]) RMSE(results)
def test_continuous(self): res = CrossValidation(k=3)(self.housing, [LinearRegressionLearner()]) self.assertLess(RMSE(res), 5)
def test_cv(self): learner = CurveFitLearner(func, [], ["CRIM"]) cv = CrossValidation(k=10) results = cv(self.data, [learner]) RMSE(results)
def test_adaboost_reg(self): learn = SklAdaBoostRegressionLearner() cv = CrossValidation(k=3) results = cv(self.housing, [learn]) _ = RMSE(results)
def test_RandomForestRegression(self): table = Table('housing') forest = RandomForestRegressionLearner() results = CrossValidation(table, [forest], k=10) _ = RMSE(results)
def test_RandomForestRegression(self): forest = RandomForestRegressionLearner() results = CrossValidation(self.housing, [forest], k=10) _ = RMSE(results)
def test_adaboost_reg(self): learn = SklAdaBoostRegressionLearner() results = CrossValidation(self.housing, [learn], k=10) _ = RMSE(results)