def test_residual_prediction(): data = DataManager('boston') baseline_model = ParametricEstimator(LinearRegression()) model = ParametricEstimator(point=LinearRegression(), std=ResidualEstimator(LinearRegression())) baseline = baseline_model.fit(data.X_train, data.y_train).predict(data.X_test) y_pred = model.fit(data.X_train, data.y_train).predict(data.X_test) baseline_loss = linearized_log_loss(data.y_test, baseline) y_pred_loss = linearized_log_loss(data.y_test, y_pred) assert baseline_loss > y_pred_loss
def test_simple_model(): data = DataManager('boston') model = ParametricEstimator(LinearRegression(), LinearRegression()) y_pred = model.fit(data.X_train, data.y_train).predict(data.X_test) utils.assert_close_prediction(y_pred.point(), data.y_test, within=0.5)
def test_baseline(): data = DataManager('boston') model = ParametricEstimator() y_pred = model.fit(data.X_train, data.y_train).predict(data.X_test) mu = np.mean(data.y_train) sigma = np.std(data.y_train) # is the dummy prediction working? assert (y_pred.point() == np.ones((len(data.X_test))) * mu).all() assert (y_pred.std() == np.ones((len(data.X_test))) * sigma).all() # does subsetting work? assert len(y_pred[1:3].point()) == 2 assert len(y_pred[1:3].lp2()) == 2 # pdf, cdf? x = np.random.randint(0, 10) i = np.random.randint(0, len(data.X_test) - 1) assert y_pred[i].pdf(x) == norm.pdf(x, mu, sigma) assert y_pred[i].cdf(x) == norm.cdf(x, mu, sigma)
from sklearn.ensemble import RandomForestRegressor from sklearn.datasets.base import load_boston from sklearn.model_selection import train_test_split from skpro.parametric import ParametricEstimator from skpro.parametric.estimators import Constant from skpro.metrics import log_loss # Define the parametric model model = ParametricEstimator(point=RandomForestRegressor(), std=Constant('std(y)'), shape='norm') # Train and predict on boston housing data X, y = load_boston(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) y_pred = model.fit(X_train, y_train).predict(X_test) # Obtain the loss loss = log_loss(y_test, y_pred, sample=True, return_std=True) print('Loss: %f+-%f' % loss) # Plot the performance import sys sys.path.append('../') import utils utils.plot_performance(y_test, y_pred)