def log_prediction_error_chart(regressor, X_train, X_test, y_train, y_test, experiment=None): """Log prediction error chart. Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method. Tip: Check `Neptune documentation <https://docs.neptune.ai/integrations/scikit_learn.html>`_ for the full example. Args: regressor (:obj:`regressor`): | Fitted sklearn regressor object X_train (:obj:`ndarray`): | Training data matrix X_test (:obj:`ndarray`): | Testing data matrix y_train (:obj:`ndarray`): | The regression target for training y_test (:obj:`ndarray`): | The regression target for testing experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``): | Neptune ``Experiment`` object to control to which experiment you log the data. | If ``None``, log to currently active, and most recent experiment. Returns: ``None`` Examples: .. code:: python3 rfr = RandomForestRegressor() rfr.fit(X_train, y_train) neptune.init('my_workspace/my_project') neptune.create_experiment() log_prediction_error_chart(rfr, X_train, X_test, y_train, y_test) """ assert is_regressor(regressor), 'regressor should be sklearn regressor.' exp = _validate_experiment(experiment) try: fig, ax = plt.subplots() visualizer = PredictionError(regressor, is_fitted=True, ax=ax) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.finalize() exp.log_image('charts_sklearn', fig, image_name='Prediction Error') plt.close(fig) except Exception as e: print('Did not log prediction error chart. Error: {}'.format(e))
def create_prediction_error_chart(regressor, X_train, X_test, y_train, y_test): """Create prediction error chart. Tip: Check Sklearn-Neptune integration `documentation <https://docs-beta.neptune.ai/essentials/integrations/machine-learning-frameworks/sklearn>`_ for the full example. Args: regressor (:obj:`regressor`): | Fitted sklearn regressor object X_train (:obj:`ndarray`): | Training data matrix X_test (:obj:`ndarray`): | Testing data matrix y_train (:obj:`ndarray`): | The regression target for training y_test (:obj:`ndarray`): | The regression target for testing Returns: ``neptune.types.File`` object that you can assign to run's ``base_namespace``. Examples: .. code:: python3 import neptune.new.integrations.sklearn as npt_utils rfr = RandomForestRegressor() rfr.fit(X_train, y_train) run = neptune.init(project='my_workspace/my_project') run['prediction_error'] = npt_utils.create_prediction_error_chart(rfr, X_train, X_test, y_train, y_test) """ assert is_regressor(regressor), 'regressor should be sklearn regressor.' chart = None try: fig, ax = plt.subplots() visualizer = PredictionError(regressor, is_fitted=True, ax=ax) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.finalize() chart = neptune.types.File.as_image(fig) plt.close(fig) except Exception as e: print('Did not log prediction error chart. Error: {}'.format(e)) return chart
def test_prepredict_regressor(self): """ Test the prepredict estimator with a prediction error plot """ # Make prepredictions X, y = self.continuous.X, self.continuous.y y_pred = LinearRegression().fit(X.train, y.train).predict(X.test) # Create prepredict estimator with prior predictions estimator = PrePredict(y_pred, REGRESSOR) assert estimator.fit(X.train, y.train) is estimator assert estimator.predict(X.train) is y_pred assert estimator.score(X.test, y.test) == pytest.approx(0.9999983124154966, rel=1e-2) # Test that a visualizer works with the pre-predictions. viz = PredictionError(estimator) viz.fit(X.train, y.train) viz.score(X.test, y.test) viz.finalize() self.assert_images_similar(viz, tol=10.0)
def regression(fname="regression.png"): """ Create figures for regression models """ _, axes = plt.subplots(ncols=2, figsize=(18, 6)) alphas = np.logspace(-10, 1, 300) data = load_concrete(split=True) # Plot prediction error in the middle oz = PredictionError(LassoCV(alphas=alphas), ax=axes[0]) oz.fit(data.X.train, data.y.train) oz.score(data.X.test, data.y.test) oz.finalize() # Plot residuals on the right oz = ResidualsPlot(RidgeCV(alphas=alphas), ax=axes[1]) oz.fit(data.X.train, data.y.train) oz.score(data.X.test, data.y.test) oz.finalize() # Save figure path = os.path.join(FIGURES, fname) plt.tight_layout() plt.savefig(path)
Koefisien yang paling besar dari model adalah GrLivArea sebesar 0.3154, artinya harga rumah sensitif dengan kolom ini. Apabila terjadi peningkatan terhadap nilai GrLivArea, harga rumah akan meningkat lebih tinggi dibandingkan apabila terjadi kenaikan pada feature yang lain dengan kenaikan yang sama. Perhatikan juga terdapat feature dengan nilai koefisien yang negatif (ExterQual_TA dan ExterQual_Fa), artinya apabila feature ini meningkat maka harga rumah akan menjadi lebih turun. ''' ''' #### 2. Residual Plot ''' st.write('') visualizer_residual = ResidualsPlot(model_lr) visualizer_residual.fit(X_train, y_train) visualizer_residual.score(X_test, y_test) visualizer_residual.finalize() st.pyplot() ''' Residual berdistribusi paling banyak pada nilai 0. Akan tetapi, masih terdapat nilai residual yang cukup tinggi. Hal ini menyebabkan distribusi dari residual tidak sepenuhnya normal, tetapi menjadi skew. ''' ''' #### 3. Prediction Error ''' st.write('') visualizer_prediction_error = PredictionError(model_lr) visualizer_prediction_error.fit(X_train, y_train) visualizer_prediction_error.score(X_test, y_test) visualizer_prediction_error.finalize() st.pyplot() ''' Antara garis best fit dengan garis identity tidak begitu jauh, sehingga dapat dikatakan bahwa model yang dibuat optimal. '''
def evaluate_results_time_series(df, time_period_col, model, target, path_to_save_report, max_features=None, plot_since_period=0): mean_error = [] with PdfPages(path_to_save_report) as pdf: for period in range(df[time_period_col].min() + 1, df[time_period_col].max() + 1): train = df[df.time_period < period] test = df[df.time_period == period] X_train, X_test = train.drop(target, 1), test.drop(target, 1) y_train, y_test = train[target], test[target] #model.fit(X_train, y_train) y_pred = model.predict(X_test) error = rmse(y_test, y_pred) mean_error.append(error) if period >= plot_since_period: fig = plt.figure(figsize=(22, 5)) title = fig.suptitle( 'Period {} - Error {} - Train size: {} / Test size: {}'. format(period, round(error, 5), len(y_train), len(y_test)), fontsize=14) fig.subplots_adjust(top=0.85, wspace=0.1) ax1 = fig.add_subplot(1, 2, 1) visualizer = PredictionError(model, ax=ax1, line_color="red") visualizer.score(X_test, y_test) visualizer.finalize() ax2 = fig.add_subplot(1, 2, 2) visualizer = ResidualsPlot(model, ax=ax2) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.finalize() #ax3 = fig.add_subplot(1,3,3) #visualize.plot_coefficients(model, X_train) # plt.show() pdf.savefig(fig) plt.close() _logger.info('Period %d - Error %.5f' % (period, error)) else: _logger.info('Period %d - Error %.5f' % (period, error)) _logger.info('Mean Error = %.5f' % np.mean(mean_error)) return model, X_train, y_train, X_test, y_test, mean_error