def test_tweedie_convergence(max_depth, split_criterion): np.random.seed(33) bootstrap = None max_features = 1.0 n_estimators = 1 min_impurity_decrease = 1e-5 n_datapoints = 1000 tweedie = { "poisson": { "power": 1, "gen": np.random.poisson, "args": [0.01] }, "gamma": { "power": 2, "gen": np.random.gamma, "args": [2.0] }, "inverse_gaussian": { "power": 3, "gen": np.random.wald, "args": [0.1, 2.0] } } # generating random dataset with tweedie distribution X = np.random.random((n_datapoints, 4)).astype(np.float32) y = tweedie[split_criterion]["gen"](*tweedie[split_criterion]["args"], size=n_datapoints).astype(np.float32) tweedie_preds = curfr(split_criterion=split_criterion, max_depth=max_depth, n_estimators=n_estimators, bootstrap=bootstrap, max_features=max_features, min_impurity_decrease=min_impurity_decrease).fit( X, y).predict(X) mse_preds = curfr(split_criterion=2, max_depth=max_depth, n_estimators=n_estimators, bootstrap=bootstrap, max_features=max_features, min_impurity_decrease=min_impurity_decrease).fit( X, y).predict(X) # y should not be non-positive for mean_poisson_deviance mask = mse_preds > 0 mse_tweedie_deviance = mean_tweedie_deviance( y[mask], mse_preds[mask], power=tweedie[split_criterion]["power"]) tweedie_tweedie_deviance = mean_tweedie_deviance( y[mask], tweedie_preds[mask], power=tweedie[split_criterion]["power"]) # model trained on tweedie data with # tweedie criterion must perform better on tweedie loss assert mse_tweedie_deviance >= tweedie_tweedie_deviance
def test_tweedie_deviance_continuity(): n_samples = 100 y_true = np.random.RandomState(0).rand(n_samples) + 0.1 y_pred = np.random.RandomState(1).rand(n_samples) + 0.1 assert_allclose( mean_tweedie_deviance(y_true, y_pred, power=0 - 1e-10), mean_tweedie_deviance(y_true, y_pred, power=0), ) # Ws we get closer to the limit, with 1e-12 difference the absolute # tolerance to pass the below check increases. There are likely # numerical precision issues on the edges of different definition # regions. assert_allclose( mean_tweedie_deviance(y_true, y_pred, power=1 + 1e-10), mean_tweedie_deviance(y_true, y_pred, power=1), atol=1e-6, ) assert_allclose( mean_tweedie_deviance(y_true, y_pred, power=2 - 1e-10), mean_tweedie_deviance(y_true, y_pred, power=2), atol=1e-6, ) assert_allclose( mean_tweedie_deviance(y_true, y_pred, power=2 + 1e-10), mean_tweedie_deviance(y_true, y_pred, power=2), atol=1e-6, )
def evaluate_forecast(self): n = min(len(self.validation_data), len(self.forecasts)) y_forecast = self.forecasts[:n] y_actual = self.validation_data.tail(n)["close"] mean_abs_err = learn.mean_absolute_error(y_actual, y_forecast) mean_sq_err = learn.mean_squared_error(y_actual, y_forecast) mean_sq_lg_err = learn.mean_squared_log_error(y_actual, y_forecast) mean_abs_percent_err = learn.mean_absolute_percentage_error( y_actual, y_forecast) median_abs_err = learn.median_absolute_error(y_actual, y_forecast) mean_gamma_dev = learn.mean_gamma_deviance(y_actual, y_forecast) mean_poisson_dev = learn.mean_poisson_deviance(y_actual, y_forecast) mean_tweedie_dev = learn.mean_tweedie_deviance(y_actual, y_forecast) explained_variance = learn.explained_variance_score( y_actual, y_forecast) max_residual = learn.max_error(y_actual, y_forecast) coeff_determination = learn.r2_score(y_actual, y_forecast) metrics = { "Mean Squared Error (MSE)": mean_sq_err, "Mean Absolute Error (MAE)": mean_abs_err, "Mean Squared Logarithmic Error (MSLE)": mean_sq_lg_err, "Mean Absolute Percentage Error (MAPE)": mean_abs_percent_err, "Median Absolute Error (MedAE)": median_abs_err, "Mean Gamma Deviance": mean_gamma_dev, "Mean Poisson Deviance": mean_poisson_dev, "Mean Tweedie Deviance Error": mean_tweedie_dev, "Explained Variance Regression Score": explained_variance, "Max Residual Error": max_residual, "Coefficient of Determination": coeff_determination } self.metrics = metrics
def mtd(self) -> float: """ Mean tweedie deviance error metric for regression problems :return: float Mean-Tweedie-Deviance-Error Score """ return mean_tweedie_deviance(y_true=self.obs, y_pred=self.pred, sample_weight=None)
def validate(self, X_test, y_test): if not self._is_fitted: raise Exception("Model is not fitted.") scores = {} scores_exp = {} y_pred = self.model.predict(X_test) scores["r2"] = r2_score(y_test, y_pred) scores["mse"] = mean_tweedie_deviance(y_test, y_pred, power=0) scores["poisson_deviance"] = mean_tweedie_deviance(y_test, y_pred, power=1) scores["gamma_deviance"] = mean_tweedie_deviance(y_test, y_pred, power=2) self.test_scores = scores scores_exp["r2"] = r2_score(np.exp(y_test), np.exp(y_pred)) scores_exp["mse"] = mean_tweedie_deviance(np.exp(y_test), np.exp(y_pred), power=0) scores_exp["poisson_deviance"] = mean_tweedie_deviance(np.exp(y_test), np.exp(y_pred), power=1) scores_exp["gamma_deviance"] = mean_tweedie_deviance(np.exp(y_test), np.exp(y_pred), power=2) self.test_scores_exp = scores_exp return self.test_scores, self.test_scores_exp
def test_regression_metrics(n_samples=50): y_true = np.arange(n_samples) y_pred = y_true + 1 assert_almost_equal(mean_squared_error(y_true, y_pred), 1.) assert_almost_equal(mean_squared_log_error(y_true, y_pred), mean_squared_error(np.log(1 + y_true), np.log(1 + y_pred))) assert_almost_equal(mean_absolute_error(y_true, y_pred), 1.) assert_almost_equal(median_absolute_error(y_true, y_pred), 1.) assert_almost_equal(max_error(y_true, y_pred), 1.) assert_almost_equal(r2_score(y_true, y_pred), 0.995, 2) assert_almost_equal(explained_variance_score(y_true, y_pred), 1.) assert_almost_equal(mean_tweedie_deviance(y_true, y_pred, power=0), mean_squared_error(y_true, y_pred)) # Tweedie deviance needs positive y_pred, except for p=0, # p>=2 needs positive y_true # results evaluated by sympy y_true = np.arange(1, 1 + n_samples) y_pred = 2 * y_true n = n_samples assert_almost_equal(mean_tweedie_deviance(y_true, y_pred, power=-1), 5/12 * n * (n**2 + 2 * n + 1)) assert_almost_equal(mean_tweedie_deviance(y_true, y_pred, power=1), (n + 1) * (1 - np.log(2))) assert_almost_equal(mean_tweedie_deviance(y_true, y_pred, power=2), 2 * np.log(2) - 1) assert_almost_equal(mean_tweedie_deviance(y_true, y_pred, power=3/2), ((6 * np.sqrt(2) - 8) / n) * np.sqrt(y_true).sum()) assert_almost_equal(mean_tweedie_deviance(y_true, y_pred, power=3), np.sum(1 / y_true) / (4 * n))
def test_regression_metrics(n_samples=50): y_true = np.arange(n_samples) y_pred = y_true + 1 y_pred_2 = y_true - 1 assert_almost_equal(mean_squared_error(y_true, y_pred), 1.0) assert_almost_equal( mean_squared_log_error(y_true, y_pred), mean_squared_error(np.log(1 + y_true), np.log(1 + y_pred)), ) assert_almost_equal(mean_absolute_error(y_true, y_pred), 1.0) assert_almost_equal(mean_pinball_loss(y_true, y_pred), 0.5) assert_almost_equal(mean_pinball_loss(y_true, y_pred_2), 0.5) assert_almost_equal(mean_pinball_loss(y_true, y_pred, alpha=0.4), 0.6) assert_almost_equal(mean_pinball_loss(y_true, y_pred_2, alpha=0.4), 0.4) assert_almost_equal(median_absolute_error(y_true, y_pred), 1.0) mape = mean_absolute_percentage_error(y_true, y_pred) assert np.isfinite(mape) assert mape > 1e6 assert_almost_equal(max_error(y_true, y_pred), 1.0) assert_almost_equal(r2_score(y_true, y_pred), 0.995, 2) assert_almost_equal(explained_variance_score(y_true, y_pred), 1.0) assert_almost_equal( mean_tweedie_deviance(y_true, y_pred, power=0), mean_squared_error(y_true, y_pred), ) assert_almost_equal(d2_tweedie_score(y_true, y_pred, power=0), r2_score(y_true, y_pred)) # Tweedie deviance needs positive y_pred, except for p=0, # p>=2 needs positive y_true # results evaluated by sympy y_true = np.arange(1, 1 + n_samples) y_pred = 2 * y_true n = n_samples assert_almost_equal( mean_tweedie_deviance(y_true, y_pred, power=-1), 5 / 12 * n * (n**2 + 2 * n + 1), ) assert_almost_equal(mean_tweedie_deviance(y_true, y_pred, power=1), (n + 1) * (1 - np.log(2))) assert_almost_equal(mean_tweedie_deviance(y_true, y_pred, power=2), 2 * np.log(2) - 1) assert_almost_equal( mean_tweedie_deviance(y_true, y_pred, power=3 / 2), ((6 * np.sqrt(2) - 8) / n) * np.sqrt(y_true).sum(), ) assert_almost_equal(mean_tweedie_deviance(y_true, y_pred, power=3), np.sum(1 / y_true) / (4 * n)) dev_mean = 2 * np.mean(xlogy(y_true, 2 * y_true / (n + 1))) assert_almost_equal( d2_tweedie_score(y_true, y_pred, power=1), 1 - (n + 1) * (1 - np.log(2)) / dev_mean, ) dev_mean = 2 * np.log((n + 1) / 2) - 2 / n * np.log(factorial(n)) assert_almost_equal(d2_tweedie_score(y_true, y_pred, power=2), 1 - (2 * np.log(2) - 1) / dev_mean)
def mean_tweedie_deviance(y_true: np.ndarray, y_pred: np.ndarray, *, sample_weight: Optional[np.ndarray] = None, power: float = 0) -> float: """Mean Tweedie deviance regression loss. Read more in the :ref:`User Guide <mean_tweedie_deviance>`. Parameters ---------- y_true : array-like of shape (n_samples,) Ground truth (correct) target values. y_pred : array-like of shape (n_samples,) Estimated target values. sample_weight : array-like of shape (n_samples,), default=None Sample weights. power : float, default=0 Tweedie power parameter. Either power <= 0 or power >= 1. The higher `p` the less weight is given to extreme deviations between true and predicted targets. - power < 0: Extreme stable distribution. Requires: y_pred > 0. - power = 0 : Normal distribution, output corresponds to mean_squared_error. y_true and y_pred can be any real numbers. - power = 1 : Poisson distribution. Requires: y_true >= 0 and y_pred > 0. - 1 < p < 2 : Compound Poisson distribution. Requires: y_true >= 0 and y_pred > 0. - power = 2 : Gamma distribution. Requires: y_true > 0 and y_pred > 0. - power = 3 : Inverse Gaussian distribution. Requires: y_true > 0 and y_pred > 0. - otherwise : Positive stable distribution. Requires: y_true > 0 and y_pred > 0. Returns ------- loss : float A non-negative floating point value (the best value is 0.0). """ y_type, y_true, y_pred, sample_weight, multioutput = _check_reg_targets( y_true, y_pred, sample_weight) if sample_weight is not None: check_consistent_length(y_true, y_pred, sample_weight) else: check_consistent_length(y_true, y_pred) return sklearn_metrics.mean_tweedie_deviance(y_true=y_true, y_pred=y_pred, sample_weight=sample_weight, power=power)
def set_metrics(y_pred, y_true, dict): try: dict["max_error"] = mets.max_error(y_true, y_pred) except: pass try: dict["explained_variance_score"] = mets.explained_variance_score(y_true, y_pred) except: pass try: dict["mean_absolute_error"] = mets.mean_absolute_error(y_true, y_pred) except: pass try: dict["mean_squared_error"] = mets.mean_squared_error(y_true, y_pred) except: pass try: dict["mean_squared_log_error"] = mets.mean_squared_log_error(y_true, y_pred) except: pass try: dict["median_absolute_error"] = mets.median_absolute_error(y_true, y_pred) except: pass try: dict["r2_score"] = mets.r2_score(y_true, y_pred) except: pass try: dict["mean_poisson_deviance"] = mets.mean_poisson_deviance(y_true, y_pred) except: pass try: dict["mean_gamma_deviance"] = mets.mean_gamma_deviance(y_true, y_pred) except: pass try: dict["mean_tweedie_deviance"] = mets.mean_tweedie_deviance(y_true, y_pred) except: pass return dict
def test_regression_metrics_at_limits(): # Single-sample case # Note: for r2 and d2_tweedie see also test_regression_single_sample assert_almost_equal(mean_squared_error([0.0], [0.0]), 0.0) assert_almost_equal(mean_squared_error([0.0], [0.0], squared=False), 0.0) assert_almost_equal(mean_squared_log_error([0.0], [0.0]), 0.0) assert_almost_equal(mean_absolute_error([0.0], [0.0]), 0.0) assert_almost_equal(mean_pinball_loss([0.0], [0.0]), 0.0) assert_almost_equal(mean_absolute_percentage_error([0.0], [0.0]), 0.0) assert_almost_equal(median_absolute_error([0.0], [0.0]), 0.0) assert_almost_equal(max_error([0.0], [0.0]), 0.0) assert_almost_equal(explained_variance_score([0.0], [0.0]), 1.0) # Perfect cases assert_almost_equal(r2_score([0.0, 1], [0.0, 1]), 1.0) assert_almost_equal(d2_pinball_score([0.0, 1], [0.0, 1]), 1.0) # Non-finite cases # R² and explained variance have a fix by default for non-finite cases for s in (r2_score, explained_variance_score): assert_almost_equal(s([0, 0], [1, -1]), 0.0) assert_almost_equal(s([0, 0], [1, -1], force_finite=False), -np.inf) assert_almost_equal(s([1, 1], [1, 1]), 1.0) assert_almost_equal(s([1, 1], [1, 1], force_finite=False), np.nan) msg = ("Mean Squared Logarithmic Error cannot be used when targets " "contain negative values.") with pytest.raises(ValueError, match=msg): mean_squared_log_error([-1.0], [-1.0]) msg = ("Mean Squared Logarithmic Error cannot be used when targets " "contain negative values.") with pytest.raises(ValueError, match=msg): mean_squared_log_error([1.0, 2.0, 3.0], [1.0, -2.0, 3.0]) msg = ("Mean Squared Logarithmic Error cannot be used when targets " "contain negative values.") with pytest.raises(ValueError, match=msg): mean_squared_log_error([1.0, -2.0, 3.0], [1.0, 2.0, 3.0]) # Tweedie deviance error power = -1.2 assert_allclose(mean_tweedie_deviance([0], [1.0], power=power), 2 / (2 - power), rtol=1e-3) msg = "can only be used on strictly positive y_pred." with pytest.raises(ValueError, match=msg): mean_tweedie_deviance([0.0], [0.0], power=power) with pytest.raises(ValueError, match=msg): d2_tweedie_score([0.0] * 2, [0.0] * 2, power=power) assert_almost_equal(mean_tweedie_deviance([0.0], [0.0], power=0), 0.0, 2) power = 1.0 msg = "only be used on non-negative y and strictly positive y_pred." with pytest.raises(ValueError, match=msg): mean_tweedie_deviance([0.0], [0.0], power=power) with pytest.raises(ValueError, match=msg): d2_tweedie_score([0.0] * 2, [0.0] * 2, power=power) power = 1.5 assert_allclose(mean_tweedie_deviance([0.0], [1.0], power=power), 2 / (2 - power)) msg = "only be used on non-negative y and strictly positive y_pred." with pytest.raises(ValueError, match=msg): mean_tweedie_deviance([0.0], [0.0], power=power) with pytest.raises(ValueError, match=msg): d2_tweedie_score([0.0] * 2, [0.0] * 2, power=power) power = 2.0 assert_allclose(mean_tweedie_deviance([1.0], [1.0], power=power), 0.00, atol=1e-8) msg = "can only be used on strictly positive y and y_pred." with pytest.raises(ValueError, match=msg): mean_tweedie_deviance([0.0], [0.0], power=power) with pytest.raises(ValueError, match=msg): d2_tweedie_score([0.0] * 2, [0.0] * 2, power=power) power = 3.0 assert_allclose(mean_tweedie_deviance([1.0], [1.0], power=power), 0.00, atol=1e-8) msg = "can only be used on strictly positive y and y_pred." with pytest.raises(ValueError, match=msg): mean_tweedie_deviance([0.0], [0.0], power=power) with pytest.raises(ValueError, match=msg): d2_tweedie_score([0.0] * 2, [0.0] * 2, power=power) power = 0.5 with pytest.raises(ValueError, match="is only defined for power<=0 and power>=1"): mean_tweedie_deviance([0.0], [0.0], power=power) with pytest.raises(ValueError, match="is only defined for power<=0 and power>=1"): d2_tweedie_score([0.0] * 2, [0.0] * 2, power=power)
def score(self, actual: np.array, predicted: np.array, sample_weight: typing.Optional[np.array] = None, labels: typing.Optional[np.array] = None) -> float: """ :param actual: Ground truth (correct) target values. :param predicted: Estimated target values :param sample_weight: weights :param labels: not used power: default=1.5 sent to function via toml dictionary Tweedie power parameter. Either power <= 0 or power >= 1. To non-default power parameter use recipe_dict add via toml config DAI option. Example: recipe_dict = "{'power':2.0}" Multiple parameters example (first param is for demo only): validate_meta_learner=false\nrecipe_dict = "{'power':2.0}" The higher p the less weight is given to extreme deviations between true and predicted targets. power < 0: Extreme stable distribution. Requires: y_pred > 0. power = 0 : Normal distribution, output corresponds to mean_squared_error. y_true and y_pred can be any real numbers. power = 1 : Poisson distribution. Requires: y_true >= 0 and y_pred > 0. 1 < p < 2 : Compound Poisson distribution. Requires: y_true >= 0 and y_pred > 0. power = 2 : Gamma distribution. Requires: y_true > 0 and y_pred > 0. power = 3 : Inverse Gaussian distribution. Requires: y_true > 0 and y_pred > 0. otherwise : Positive stable distribution. Requires: y_true > 0 and y_pred > 0. :return: score """ try: """Initialize logger to print additional info in case of invalid inputs(exception is raised) and to enable debug prints""" logger = self.logger from h2oaicore.systemutils import loggerinfo # loggerinfo(logger, "Start TW Deviance Scorer.......") # loggerinfo(logger, 'Actual:%s' % str(actual)) # loggerinfo(logger, 'Predicted:%s' % str(predicted)) # loggerinfo(logger, 'Sample W:%s' % str(sample_weight)) from sklearn.metrics import mean_tweedie_deviance if config.recipe_dict is not None: power = config.recipe_dict.get('power', 1.5) else: power = 1.5 # loggerinfo(logger, 'Power:%s' % str(power)) if sample_weight is not None: '''Check if any element of the sample_weight array is nan''' if np.isnan(np.sum(sample_weight)): loggerinfo(logger, 'Sample Weight:%s' % str(sample_weight)) loggerinfo( logger, 'Sample Weight Nan values index:%s' % str(np.argwhere(np.isnan(sample_weight)))) raise RuntimeError( 'Error during Tweedie Deviance score calculation. Invalid sample weight values. Expecting only non-nan values' ) if 0 < power < 1: loggerinfo(logger, 'Power:%s' % str(power)) loggerinfo( logger, """Invalid power value. Power should be one of the following: \n power < 0: Extreme stable distribution. Requires: y_pred > 0. power = 0 : Normal distribution, output corresponds to mean_squared_error. y_true and y_pred can be any real numbers. power = 1 : Poisson distribution. Requires: y_true >= 0 and y_pred > 0. 1 < p < 2 : Compound Poisson distribution. Requires: y_true >= 0 and y_pred > 0. power = 2 : Gamma distribution. Requires: y_true > 0 and y_pred > 0. power = 3 : Inverse Gaussian distribution. Requires: y_true > 0 and y_pred > 0. otherwise : Positive stable distribution. Requires: y_true > 0 and y_pred > 0.""" ) raise RuntimeError( 'Error during Tweedie Deviance score calculation. Invalid power value.' ) actual = actual.astype('float64') predicted = predicted.astype('float64') '''Safety mechanizm in case predictions or actuals are zero''' epsilon = 1E-8 actual += epsilon predicted += epsilon if power == 0: '''No need to validate sign of actual or predicted''' pass elif power < 0: if (predicted <= 0).any(): loggerinfo(logger, 'Predicted:%s' % str(predicted)) loggerinfo( logger, 'Invalid Predicted:%s' % str(predicted[predicted <= 0])) raise RuntimeError( 'Power <0. Error during Tweedie Deviance score calculation. Invalid predicted values. Expecting only positive values' ) elif 1 <= power < 2: if (actual < 0).any(): loggerinfo(logger, 'Actual:%s' % str(actual)) loggerinfo( logger, 'Non-positive Actuals:%s' % str(actual[actual < 0])) raise RuntimeError( '1 <= power < 2. Error during Tweedie Deviance score calculation. Invalid actuals values. Expecting zero or positive values' ) if (predicted <= 0).any() or np.isnan(np.sum(predicted)): loggerinfo(logger, 'Predicted:%s' % str(predicted)) loggerinfo( logger, 'Invalid Predicted:%s' % str(predicted[predicted <= 0])) raise RuntimeError( '1 <= power < 2. Error during Tweedie Deviance score calculation. Invalid predicted values. Expecting only positive values' ) elif power >= 2: if (actual <= 0).any(): loggerinfo(logger, 'Actual:%s' % str(actual)) loggerinfo( logger, 'Non-positive Actuals:%s' % str(actual[actual <= 0])) raise RuntimeError( 'power >= 2. Error during Tweedie Deviance score calculation. Invalid actuals values. Expecting zero or positive values' ) if (predicted <= 0).any() or np.isnan(np.sum(predicted)): loggerinfo(logger, 'Predicted:%s' % str(predicted)) loggerinfo( logger, 'Invalid Predicted:%s' % str(predicted[predicted <= 0])) raise RuntimeError( 'power >= 2. Error during Tweedie Deviance score calculation. Invalid predicted values. Expecting only positive values' ) '''Check if any element of the arrays is nan''' if np.isnan(np.sum(actual)): loggerinfo(logger, 'Actual:%s' % str(actual)) loggerinfo( logger, 'Nan values index:%s' % str(np.argwhere(np.isnan(actual)))) raise RuntimeError( 'Error during Tweedie Deviance score calculation. Invalid actuals values. Expecting only non-nan values' ) if np.isnan(np.sum(predicted)): loggerinfo(logger, 'Predicted:%s' % str(predicted)) loggerinfo( logger, 'Nan values index:%s' % str(np.argwhere(np.isnan(predicted)))) raise RuntimeError( 'Error during Tweedie Deviance score calculation. Invalid predicted values. Expecting only non-nan values' ) score = mean_tweedie_deviance(actual, predicted, sample_weight=sample_weight, power=power) '''Validate that score is non-negative and is not infinity or Nan''' if score >= 0 and score < float("inf"): pass else: loggerinfo(logger, 'Invalid calculated score:%s' % str(score)) raise RuntimeError( 'Error during Tweedie Deviance score calculation. Invalid calculated score:%s. \ Score should be non-negative and less than infinity. Nan is not valid' % str(score)) except Exception as e: '''Print error message into DAI log file''' loggerinfo( logger, 'Error during Tweedie Deviance score calculation. Exception raised: %s' % str(e)) raise return score
def test_regression_metrics_at_limits(): assert_almost_equal(mean_squared_error([0.], [0.]), 0.00, 2) assert_almost_equal(mean_squared_log_error([0.], [0.]), 0.00, 2) assert_almost_equal(mean_absolute_error([0.], [0.]), 0.00, 2) assert_almost_equal(median_absolute_error([0.], [0.]), 0.00, 2) assert_almost_equal(max_error([0.], [0.]), 0.00, 2) assert_almost_equal(explained_variance_score([0.], [0.]), 1.00, 2) assert_almost_equal(r2_score([0., 1], [0., 1]), 1.00, 2) assert_raises_regex( ValueError, "Mean Squared Logarithmic Error cannot be " "used when targets contain negative values.", mean_squared_log_error, [-1.], [-1.]) assert_raises_regex( ValueError, "Mean Squared Logarithmic Error cannot be " "used when targets contain negative values.", mean_squared_log_error, [1., 2., 3.], [1., -2., 3.]) assert_raises_regex( ValueError, "Mean Squared Logarithmic Error cannot be " "used when targets contain negative values.", mean_squared_log_error, [1., -2., 3.], [1., 2., 3.]) # Tweedie deviance error p = -1.2 assert_allclose(mean_tweedie_deviance([0], [1.], p=p), 2. / (2. - p), rtol=1e-3) with pytest.raises(ValueError, match="can only be used on strictly positive y_pred."): mean_tweedie_deviance([0.], [0.], p=p) assert_almost_equal(mean_tweedie_deviance([0.], [0.], p=0), 0.00, 2) msg = "only be used on non-negative y_true and strictly positive y_pred." with pytest.raises(ValueError, match=msg): mean_tweedie_deviance([0.], [0.], p=1.0) p = 1.5 assert_allclose(mean_tweedie_deviance([0.], [1.], p=p), 2. / (2. - p)) msg = "only be used on non-negative y_true and strictly positive y_pred." with pytest.raises(ValueError, match=msg): mean_tweedie_deviance([0.], [0.], p=p) p = 2. assert_allclose(mean_tweedie_deviance([1.], [1.], p=p), 0.00, atol=1e-8) msg = "can only be used on strictly positive y_true and y_pred." with pytest.raises(ValueError, match=msg): mean_tweedie_deviance([0.], [0.], p=p) p = 3. assert_allclose(mean_tweedie_deviance([1.], [1.], p=p), 0.00, atol=1e-8) msg = "can only be used on strictly positive y_true and y_pred." with pytest.raises(ValueError, match=msg): mean_tweedie_deviance([0.], [0.], p=p) with pytest.raises(ValueError, match="deviance is only defined for p<=0 and p>=1."): mean_tweedie_deviance([0.], [0.], p=0.5)
def cross_validate(self, X, y, k=None): if not self._is_fitted: raise Exception("Model is not fitted.") if self.gs_model is None: raise Exception("Cross validation requires a grid search model.") if k == None: k = int(X.shape[0] / 2) if X.shape[0] < 3 else 3 print(f"Cross validating on '{k}' folds") scores = {} scores["r2"] = [] scores["mse"] = [] scores["poisson_deviance"] = [] scores["gamma_deviance"] = [] scores_exp = {} scores_exp["r2"] = [] scores_exp["mse"] = [] scores_exp["poisson_deviance"] = [] scores_exp["gamma_deviance"] = [] kf = KFold(k) for train_index, test_index in kf.split(X): clf_model = RandomForestQuantileRegressor( **self.gs_model.best_params_) clf_model.fit(X[train_index, :], y[train_index]) y_pred = clf_model.predict(X[test_index, :]) y_test = y[test_index] if test_index.__len__() > 1: r2 = r2_score(y_test, y_pred) scores["r2"].append(r2) r2_exp = r2_score(np.exp(y_test), np.exp(y_pred)) scores_exp["r2"].append(r2_exp) mse = mean_tweedie_deviance(y_test, y_pred, power=0) poisson_deviance = mean_tweedie_deviance(y_test, y_pred, power=1) gamma_deviance = mean_tweedie_deviance(y_test, y_pred, power=2) scores["mse"].append(mse) scores["poisson_deviance"].append(poisson_deviance) scores["gamma_deviance"].append(gamma_deviance) mse_exp = mean_tweedie_deviance(np.exp(y_test), np.exp(y_pred), power=0) poisson_deviance_exp = mean_tweedie_deviance(np.exp(y_test), np.exp(y_pred), power=1) gamma_deviance_exp = mean_tweedie_deviance(np.exp(y_test), np.exp(y_pred), power=2) scores_exp["mse"].append(mse_exp) scores_exp["poisson_deviance"].append(poisson_deviance_exp) scores_exp["gamma_deviance"].append(gamma_deviance_exp) final_scores = {} final_scores["r2_mean"] = np.mean( [0 if s < 0 else s for s in scores["r2"]]) final_scores["r2_std"] = np.std( [0 if s < 0 else s for s in scores["r2"]]) final_scores["mse_mean"] = np.mean(scores["mse"]) final_scores["mse_std"] = np.std(scores["mse"]) final_scores["poisson_deviance_mean"] = np.mean( scores["poisson_deviance"]) final_scores["poisson_deviance_std"] = np.std( scores["poisson_deviance"]) final_scores["gamma_deviance_mean"] = np.mean(scores["gamma_deviance"]) final_scores["gamma_deviance_std"] = np.std(scores["gamma_deviance"]) self.cross_validation_scores = final_scores final_scores_exp = {} final_scores_exp["r2_mean"] = np.mean( [0 if s < 0 else s for s in scores_exp["r2"]]) final_scores_exp["r2_std"] = np.std( [0 if s < 0 else s for s in scores_exp["r2"]]) final_scores_exp["mse_mean"] = np.mean(scores_exp["mse"]) final_scores_exp["mse_std"] = np.std(scores_exp["mse"]) final_scores_exp["poisson_deviance_mean"] = np.mean( scores_exp["poisson_deviance"]) final_scores_exp["poisson_deviance_std"] = np.std( scores_exp["poisson_deviance"]) final_scores_exp["gamma_deviance_mean"] = np.mean( scores_exp["gamma_deviance"]) final_scores_exp["gamma_deviance_std"] = np.std( scores_exp["gamma_deviance"]) self.cross_validation_scores_exp = final_scores_exp return self.cross_validation_scores, self.cross_validation_scores_exp
def tweedie_eval(y_pred, y_true): y_true = y_true.get_label() y_true = rollup(y_true.reshape(NUM_ITEMS, -1)).flatten() y_pred = rollup(y_pred.reshape(NUM_ITEMS, -1)).flatten() loss = mean_tweedie_deviance(y_true, y_pred, power=1.5) return "tweedie_eval", loss, False
def _sk_deviance(preds: Tensor, targets: Tensor, power: float): sk_preds = preds.view(-1).numpy() sk_target = targets.view(-1).numpy() return mean_tweedie_deviance(sk_target, sk_preds, power=power)
def test_regression_metrics_at_limits(): assert_almost_equal(mean_squared_error([0.], [0.]), 0.00, 2) assert_almost_equal(mean_squared_error([0.], [0.], squared=False), 0.00, 2) assert_almost_equal(mean_squared_log_error([0.], [0.]), 0.00, 2) assert_almost_equal(mean_absolute_error([0.], [0.]), 0.00, 2) assert_almost_equal(median_absolute_error([0.], [0.]), 0.00, 2) assert_almost_equal(max_error([0.], [0.]), 0.00, 2) assert_almost_equal(explained_variance_score([0.], [0.]), 1.00, 2) assert_almost_equal(r2_score([0., 1], [0., 1]), 1.00, 2) err_msg = ("Mean Squared Logarithmic Error cannot be used when targets " "contain negative values.") with pytest.raises(ValueError, match=err_msg): mean_squared_log_error([-1.], [-1.]) err_msg = ("Mean Squared Logarithmic Error cannot be used when targets " "contain negative values.") with pytest.raises(ValueError, match=err_msg): mean_squared_log_error([1., 2., 3.], [1., -2., 3.]) err_msg = ("Mean Squared Logarithmic Error cannot be used when targets " "contain negative values.") with pytest.raises(ValueError, match=err_msg): mean_squared_log_error([1., -2., 3.], [1., 2., 3.]) # Tweedie deviance error power = -1.2 assert_allclose(mean_tweedie_deviance([0], [1.], power=power), 2 / (2 - power), rtol=1e-3) with pytest.raises(ValueError, match="can only be used on strictly positive y_pred."): mean_tweedie_deviance([0.], [0.], power=power) assert_almost_equal(mean_tweedie_deviance([0.], [0.], power=0), 0.00, 2) msg = "only be used on non-negative y_true and strictly positive y_pred." with pytest.raises(ValueError, match=msg): mean_tweedie_deviance([0.], [0.], power=1.0) power = 1.5 assert_allclose(mean_tweedie_deviance([0.], [1.], power=power), 2 / (2 - power)) msg = "only be used on non-negative y_true and strictly positive y_pred." with pytest.raises(ValueError, match=msg): mean_tweedie_deviance([0.], [0.], power=power) power = 2. assert_allclose(mean_tweedie_deviance([1.], [1.], power=power), 0.00, atol=1e-8) msg = "can only be used on strictly positive y_true and y_pred." with pytest.raises(ValueError, match=msg): mean_tweedie_deviance([0.], [0.], power=power) power = 3. assert_allclose(mean_tweedie_deviance([1.], [1.], power=power), 0.00, atol=1e-8) msg = "can only be used on strictly positive y_true and y_pred." with pytest.raises(ValueError, match=msg): mean_tweedie_deviance([0.], [0.], power=power) with pytest.raises(ValueError, match="is only defined for power<=0 and power>=1"): mean_tweedie_deviance([0.], [0.], power=0.5)
def _mean_tweedie_deviance(y_true, y_pred): from sklearn.metrics import mean_tweedie_deviance return mean_tweedie_deviance(y_true, y_pred)