def log_residuals_chart(regressor,
                        X_train,
                        X_test,
                        y_train,
                        y_test,
                        experiment=None):
    """Log residuals chart.

    Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method.

    Tip:
        Check `Neptune documentation <https://docs.neptune.ai/integrations/scikit_learn.html>`_ for the full example.

    Args:
        regressor (:obj:`regressor`):
            | Fitted sklearn regressor object
        X_train (:obj:`ndarray`):
            | Training data matrix
        X_test (:obj:`ndarray`):
            | Testing data matrix
        y_train (:obj:`ndarray`):
            | The regression target for training
        y_test (:obj:`ndarray`):
            | The regression target for testing
        experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``):
            | Neptune ``Experiment`` object to control to which experiment you log the data.
            | If ``None``, log to currently active, and most recent experiment.

    Returns:
        ``None``

    Examples:
        .. code:: python3

            rfr = RandomForestRegressor()
            rfr.fit(X_train, y_train)

            neptune.init('my_workspace/my_project')
            exp = neptune.create_experiment()

            log_residuals_chart(rfr, X_train, X_test, y_train, y_test, experiment=exp)
    """
    assert is_regressor(regressor), 'regressor should be sklearn regressor.'
    exp = _validate_experiment(experiment)

    try:
        fig, ax = plt.subplots()
        visualizer = ResidualsPlot(regressor, is_fitted=True, ax=ax)
        visualizer.fit(X_train, y_train)
        visualizer.score(X_test, y_test)
        visualizer.finalize()
        exp.log_image('charts_sklearn', fig, image_name='Residuals Plot')
        plt.close(fig)
    except Exception as e:
        print('Did not log residuals chart. Error: {}'.format(e))
Beispiel #2
0
def regression_sanity_check(model, X_train, X_test, y_train, y_test):
    fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(20, 10))
    plt.sca(ax1)
    visualizer = ResidualsPlot(model, ax=ax1)
    visualizer.fit(X_train, y_train)
    visualizer.score(X_test, y_test)
    plt.sca(ax2)
    visualizer2 = PredictionError(model, ax=ax2)
    visualizer2.fit(X_train, y_train)
    visualizer2.score(X_test, y_test)
    visualizer.finalize()
    visualizer2.poof()
Beispiel #3
0
def create_residuals_chart(regressor, X_train, X_test, y_train, y_test):
    """Create residuals chart.

    Tip:
        Check Sklearn-Neptune integration
        `documentation <https://docs-beta.neptune.ai/essentials/integrations/machine-learning-frameworks/sklearn>`_
        for the full example.

    Args:
        regressor (:obj:`regressor`):
            | Fitted sklearn regressor object
        X_train (:obj:`ndarray`):
            | Training data matrix
        X_test (:obj:`ndarray`):
            | Testing data matrix
        y_train (:obj:`ndarray`):
            | The regression target for training
        y_test (:obj:`ndarray`):
            | The regression target for testing

    Returns:
        ``neptune.types.File`` object that you can assign to run's ``base_namespace``.

    Examples:
        .. code:: python3

            import neptune.new.integrations.sklearn as npt_utils

            rfr = RandomForestRegressor()
            rfr.fit(X_train, y_train)

            run = neptune.init(project='my_workspace/my_project')
            run['visuals/residuals'] = npt_utils.create_residuals_chart(rfr, X_train, X_test, y_train, y_test)
    """
    assert is_regressor(regressor), 'regressor should be sklearn regressor.'

    chart = None

    try:
        fig, ax = plt.subplots()
        visualizer = ResidualsPlot(regressor, is_fitted=True, ax=ax)
        visualizer.fit(X_train, y_train)
        visualizer.score(X_test, y_test)
        visualizer.finalize()
        chart = neptune.types.File.as_image(fig)
        plt.close(fig)
    except Exception as e:
        print('Did not log residuals chart. Error: {}'.format(e))

    return chart
Beispiel #4
0
    eli5.formatters.as_dataframe.explain_weights_df(
        estimator=model_lr, feature_names=feature_names)[['feature',
                                                          'weight']])
'''
Koefisien yang paling besar dari model adalah GrLivArea sebesar 0.3154, artinya harga rumah sensitif dengan kolom ini. Apabila
terjadi peningkatan terhadap nilai GrLivArea, harga rumah akan meningkat lebih tinggi dibandingkan apabila terjadi kenaikan pada feature yang lain dengan kenaikan yang sama.
Perhatikan juga terdapat feature dengan nilai koefisien yang negatif (ExterQual_TA dan ExterQual_Fa), artinya apabila feature ini meningkat maka harga rumah akan menjadi lebih turun.
'''
'''
#### 2. Residual Plot
'''
st.write('')
visualizer_residual = ResidualsPlot(model_lr)
visualizer_residual.fit(X_train, y_train)
visualizer_residual.score(X_test, y_test)
visualizer_residual.finalize()

st.pyplot()
'''
Residual berdistribusi paling banyak pada nilai 0. Akan tetapi, masih terdapat nilai residual yang cukup tinggi. Hal ini menyebabkan distribusi dari residual tidak sepenuhnya normal, tetapi menjadi skew.
'''
'''
#### 3. Prediction Error
'''

st.write('')
visualizer_prediction_error = PredictionError(model_lr)
visualizer_prediction_error.fit(X_train, y_train)
visualizer_prediction_error.score(X_test, y_test)
visualizer_prediction_error.finalize()
Beispiel #5
0
def plot(dados, ativo_x, ativo_y, period = 100, tipo = 'residuos', save=False):
    import matplotlib.pyplot as plt
    modelo, y_pred, residuos, media, desvio = coint_period(dados, ativo_x, ativo_y, period = period, model = True)    
    dados = dados.iloc[-period:, :]
    X = dados.loc[:, ativo_x].values[-period:]
    y = dados.loc[:, ativo_y].values[-period:]
    residuos_padronizado = residuos/np.std(residuos)
    
    if ((tipo == 'residuos') | (tipo == 'Residuos')):
        n = period
        plt.figure(figsize=(15, 6))
        plt.title('Série Temporal Resíduos Padronizada {} períodos'.format(period))
        plt.plot(dados['Date'], residuos_padronizado , color='blue', alpha = 0.6, label='Resíduo Padronizado')
        plt.plot(dados['Date'], np.repeat(media,n) , color='black', linestyle='--')
        plt.plot(dados['Date'], np.repeat(2,n) , color='red', linestyle=':', label='Dois Desvios Padrões')
        plt.plot(dados['Date'], np.repeat(-2,n) , color='red', linestyle=':')
        nome = 'Resíduos_{}_x_{}_{}_periodos.png'.format(ativo_x[:-3], ativo_y[:-3], period)
        plt.legend(loc=0)
        if save==False:
            plt.show()
        else:
            plt.savefig(nome)
    elif ((tipo == 'fechamento') | (tipo == 'Fechamento')):
        plt.title('Preço de Fechamento {} períodos'.format(period))
        plt.plot(dados['Date'], X, color='blue', label=ativo_x)
        plt.plot(dados['Date'], y, color='red', label=ativo_y)
        plt.legend(loc=0)
        nome = 'Fechamento_{}_x_{}_{}_periodos.png'.format(ativo_x[:-3], ativo_y[:-3], period)
        if save==False:
            plt.show()
        else:
            plt.savefig(nome)
    
    elif ((tipo == 'spread') | (tipo == 'Spread')):
        nome = 'Spread_{}_x_{}_{}_periodos.png'.format(ativo_x[:-3], ativo_y[:-3], period)
        plt.title(nome+str(period))
        arr = X/y
        plt.plot(dados['Date'], arr, color='blue', label='Spread')
        if save==False:
            plt.show()
        else:
            plt.savefig(nome)
    elif ((tipo == 'regression') | (tipo == 'Regression')):
        from yellowbrick.regressor import ResidualsPlot
        X = X.reshape(-1, 1)
        fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(8, 12))
        ax1.set_title('Regressão Linear dos preços')
        ax1.scatter(X, y, color='blue')
        ax1.plot(X, modelo.predict(X), color='red')

        visualizador = ResidualsPlot(modelo, ax= ax2)
        visualizador.fit(X, y)
        visualizador.finalize()
        
        nome = 'Regression_{}_x_{}_{}_periodos.png'.format(ativo_x[:-3], ativo_y[:-3], period)
        if save==False:
            plt.show()
        else:
            plt.savefig(nome)
   
    else:
        lista = ['residuos', 'fechamento', 'spread', 'regression']
        listaM = ['Residuos', 'Fechamento', 'Spread', 'Regression']
        print('Escolha entre as opções abaixo:')
        for i in range(len(lista)):
            print(lista[i], ' ou ', listaM[i])