def test_regression_metrics_multioutput_array(): y_true = np.array([[1, 2], [2.5, -1], [4.5, 3], [5, 7]], dtype=np.float) y_pred = np.array([[1, 1], [2, -1], [5, 4], [5, 6.5]], dtype=np.float) mse = mean_squared_error(y_true, y_pred, multioutput='raw_values') mae = mean_absolute_error(y_true, y_pred, multioutput='raw_values') cp.testing.assert_array_almost_equal(mse, [0.125, 0.5625], decimal=2) cp.testing.assert_array_almost_equal(mae, [0.25, 0.625], decimal=2) weights = np.array([0.4, 0.6], dtype=np.float) msew = mean_squared_error(y_true, y_pred, multioutput=weights) rmsew = mean_squared_error(y_true, y_pred, multioutput=weights, squared=False) assert_almost_equal(msew, 0.39, decimal=2) assert_almost_equal(rmsew, 0.62, decimal=2) y_true = np.array([[0, 0]] * 4, dtype=np.int) y_pred = np.array([[1, 1]] * 4, dtype=np.int) mse = mean_squared_error(y_true, y_pred, multioutput='raw_values') mae = mean_absolute_error(y_true, y_pred, multioutput='raw_values') cp.testing.assert_array_almost_equal(mse, [1., 1.], decimal=2) cp.testing.assert_array_almost_equal(mae, [1., 1.], decimal=2) y_true = np.array([[0.5, 1], [1, 2], [7, 6]]) y_pred = np.array([[0.5, 2], [1, 2.5], [8, 8]]) msle = mean_squared_log_error(y_true, y_pred, multioutput='raw_values') msle2 = mean_squared_error(np.log(1 + y_true), np.log(1 + y_pred), multioutput='raw_values') cp.testing.assert_array_almost_equal(msle, msle2, decimal=2)
def test_regression_metrics(): y_true = np.arange(50, dtype=np.int) y_pred = y_true + 1 assert_almost_equal(mean_squared_error(y_true, y_pred), 1.) assert_almost_equal(mean_squared_log_error(y_true, y_pred), mean_squared_error(np.log(1 + y_true), np.log(1 + y_pred))) assert_almost_equal(mean_absolute_error(y_true, y_pred), 1.)
def gpu_gridsearch_cv(self, estimator, param_dict, xdata, ydata, n_splits=5): """Perform grid search with cross validation and return score""" best_score = np.inf for param in ParameterGrid(param_dict): estimator.set_params(**param) metric_list = [] # Generate CV folds kfold_gen = KFold(n_splits=n_splits, shuffle=True, random_state=0) for train_idx, test_idx in kfold_gen.split(xdata, ydata): xtrain, xtest, ytrain, ytest = xdata[train_idx], xdata[ test_idx], ydata[train_idx], ydata[test_idx] estimator.fit(xtrain, ytrain) ypred = estimator.predict(xtest) score = mean_squared_error(ypred, ytest).item( ) # NB: convert to negative MSE and maximize metric for SKLearn GridSearch metric_list.append(score) metric = np.array(metric_list).mean() best_score = min(metric, best_score) return best_score
def test_mse_vs_msle_custom_weights(): y_true = np.array([0.5, 2, 7, 6], dtype=np.float) y_pred = np.array([0.5, 1, 8, 8], dtype=np.float) weights = np.array([0.2, 0.25, 0.4, 0.15], dtype=np.float) msle = mean_squared_log_error(y_true, y_pred, sample_weight=weights) msle2 = mean_squared_error(np.log(1 + y_true), np.log(1 + y_pred), sample_weight=weights) assert_almost_equal(msle, msle2, decimal=2)
def test_multioutput_regression(): y_true = np.array([[1, 0, 0, 1], [0, 1, 1, 1], [1, 1, 0, 1]]) y_pred = np.array([[0, 0, 0, 1], [1, 0, 1, 1], [0, 0, 0, 1]]) error = mean_squared_error(y_true, y_pred) assert_almost_equal(error, (1. + 2. / 3) / 4.) error = mean_squared_error(y_true, y_pred, squared=False) assert_almost_equal(error, 0.645, decimal=2) error = mean_squared_log_error(y_true, y_pred) assert_almost_equal(error, 0.200, decimal=2) # mean_absolute_error and mean_squared_error are equal because # it is a binary problem. error = mean_absolute_error(y_true, y_pred) assert_almost_equal(error, (1. + 2. / 3) / 4.)