Exemple #1
0
def regression_visualization(model, X_train, X_test, y_train, y_test):
    visualizer = PredictionError(model)
    visualizer.fit(X_train, y_train)
    visualizer.score(X_test, y_test)
    plt.title('Score visualization')
    plt.legend()
    st.pyplot()
def peplot():
    X, y = load_concrete()
    X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2)
    oz = PredictionError(Lasso(), ax=newfig())
    oz.fit(X_train, y_train)
    oz.score(X_test, y_test)
    savefig(oz, "prediction_error")
    def prediction_error_plot(lin_model,x_train, y_train, x_test, y_test):
        fig = plt.figure(figsize=(16,12))
        ax1 = fig.add_subplot(111)
        visualizer_pred_err = PredictionError(lin_model, ax=ax1)

        visualizer_pred_err.fit(x_train, y_train)  # Fit the training data to the visualizer
        visualizer_pred_err.score(x_test, y_test)  # Evaluate the model on the test data
        visualizer_pred_err.show()
Exemple #4
0
def visualiza_erros(train_x,train_y,test_x,test_y):
    visualizer = PredictionError(LinearRegression())
    visualizer.fit(train_x, train_y)
    visualizer.score(test_x, test_y)
    visualizer.poof()
    
    visualizer = ResidualsPlot(LinearRegression())
    visualizer.fit(train_x, train_y)
    visualizer.score(test_x, test_y)
    visualizer.poof()
def log_prediction_error_chart(regressor,
                               X_train,
                               X_test,
                               y_train,
                               y_test,
                               experiment=None):
    """Log prediction error chart.

    Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method.

    Tip:
        Check `Neptune documentation <https://docs.neptune.ai/integrations/scikit_learn.html>`_ for the full example.

    Args:
        regressor (:obj:`regressor`):
            | Fitted sklearn regressor object
        X_train (:obj:`ndarray`):
            | Training data matrix
        X_test (:obj:`ndarray`):
            | Testing data matrix
        y_train (:obj:`ndarray`):
            | The regression target for training
        y_test (:obj:`ndarray`):
            | The regression target for testing
        experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``):
            | Neptune ``Experiment`` object to control to which experiment you log the data.
            | If ``None``, log to currently active, and most recent experiment.

    Returns:
        ``None``

    Examples:
        .. code:: python3

            rfr = RandomForestRegressor()
            rfr.fit(X_train, y_train)

            neptune.init('my_workspace/my_project')
            neptune.create_experiment()

            log_prediction_error_chart(rfr, X_train, X_test, y_train, y_test)
    """
    assert is_regressor(regressor), 'regressor should be sklearn regressor.'
    exp = _validate_experiment(experiment)

    try:
        fig, ax = plt.subplots()
        visualizer = PredictionError(regressor, is_fitted=True, ax=ax)
        visualizer.fit(X_train, y_train)
        visualizer.score(X_test, y_test)
        visualizer.finalize()
        exp.log_image('charts_sklearn', fig, image_name='Prediction Error')
        plt.close(fig)
    except Exception as e:
        print('Did not log prediction error chart. Error: {}'.format(e))
Exemple #6
0
	def visualize_prediction_error(self, model_info):
		model = model_info['model']
		X_train = model_info['X_train']
		X_test = model_info['X_test']
		Y_train = model_info['Y_train']
		Y_test = model_info['Y_test']

		visualizer = PredictionError(model)

		visualizer.fit(X_train, Y_train)  # Fit the training data to the visualizer
		visualizer.score(X_test, Y_test)  # Evaluate the model on the test data
Exemple #7
0
def regression_sanity_check(model, X_train, X_test, y_train, y_test):
    fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(20, 10))
    plt.sca(ax1)
    visualizer = ResidualsPlot(model, ax=ax1)
    visualizer.fit(X_train, y_train)
    visualizer.score(X_test, y_test)
    plt.sca(ax2)
    visualizer2 = PredictionError(model, ax=ax2)
    visualizer2.fit(X_train, y_train)
    visualizer2.score(X_test, y_test)
    visualizer.finalize()
    visualizer2.poof()
def create_prediction_error_chart(regressor, X_train, X_test, y_train, y_test):
    """Create prediction error chart.

    Tip:
        Check Sklearn-Neptune integration
        `documentation <https://docs-beta.neptune.ai/essentials/integrations/machine-learning-frameworks/sklearn>`_
        for the full example.

    Args:
        regressor (:obj:`regressor`):
            | Fitted sklearn regressor object
        X_train (:obj:`ndarray`):
            | Training data matrix
        X_test (:obj:`ndarray`):
            | Testing data matrix
        y_train (:obj:`ndarray`):
            | The regression target for training
        y_test (:obj:`ndarray`):
            | The regression target for testing

    Returns:
        ``neptune.types.File`` object that you can assign to run's ``base_namespace``.

    Examples:
        .. code:: python3

            import neptune.new.integrations.sklearn as npt_utils

            rfr = RandomForestRegressor()
            rfr.fit(X_train, y_train)

            run = neptune.init(project='my_workspace/my_project')
            run['prediction_error'] = npt_utils.create_prediction_error_chart(rfr, X_train, X_test, y_train, y_test)
    """
    assert is_regressor(regressor), 'regressor should be sklearn regressor.'

    chart = None

    try:
        fig, ax = plt.subplots()
        visualizer = PredictionError(regressor, is_fitted=True, ax=ax)
        visualizer.fit(X_train, y_train)
        visualizer.score(X_test, y_test)
        visualizer.finalize()
        chart = neptune.types.File.as_image(fig)
        plt.close(fig)
    except Exception as e:
        print('Did not log prediction error chart. Error: {}'.format(e))

    return chart
Exemple #9
0
def perror(ax):
    from sklearn.linear_model import LassoCV
    from yellowbrick.regressor import PredictionError

    features = [
        'cement', 'slag', 'ash', 'water', 'splast', 'coarse', 'fine', 'age'
    ]

    splits = load_data('concrete', cols=features, target='strength', tts=True)
    X_train, X_test, y_train, y_test = splits

    estimator = LassoCV()
    visualizer = PredictionError(estimator, ax=ax)
    visualizer.fit(X_train, y_train)
    visualizer.score(X_test, y_test)
    return visualizer
Exemple #10
0
def testFunc9(savepath='Results/bikeshare_Ridge_PredictionError.png'):
    '''
    基于共享单车数据使用AlphaSelection
    '''
    data = pd.read_csv('fixtures/bikeshare/bikeshare.csv')
    X = data[[
        "season", "month", "hour", "holiday", "weekday", "workingday",
        "weather", "temp", "feelslike", "humidity", "windspeed"
    ]]
    Y = data["riders"]

    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3)
    visualizer = PredictionError(Ridge(alpha=3.181))
    visualizer.fit(X_train, y_train)
    visualizer.score(X_test, y_test)
    visualizer.poof(outpath=savepath)
 def prediction_error_plot(self) -> None:
     """Plot the actual targets from the dataset against the predicted values
     generated by our model. This allows us to see how much variance is in the model.
     """
     visualizer = PredictionError(self.trained_model)
     visualizer.fit(self.X_train,
                    self.y_train)  # Fit the training data to the visualizer
     visualizer.score(self.X_test,
                      self.y_test)  # Evaluate the model on the test data
     save_dir = f"{self.plots_dir}/prediction_error_plot_{self.model_id}.png"
     visualizer.show(outpath=save_dir)
     if not LOCAL:
         upload_to_s3(save_dir,
                      f'plots/prediction_error_plot_{self.model_id}.png',
                      bucket=S3_BUCKET_NAME)
     plt.clf()
def lasso_regression(X_train, y_train, X_test, y_test, plot):
    """
    Perfomring a lasso regression with built in CV and plotting the feature importance
    """
    # Fit the ridge regression
    reg = LassoCV()    
    reg.fit(X_train, y_train)
    print("Best alpha using built-in LassoCV: %f" % reg.alpha_)
    print("Best score using built-in LassoCV: %f" % reg.score(X_train, y_train))
    coef = pd.Series(reg.coef_, index=X_train.columns)
    print(
        "Lasso picked "
        + str(sum(coef != 0))
        + " variables and eliminated the other "
        + str(sum(coef == 0))
        + " variables"
    )
    # Extract the feature importance
    imp_coef = coef.sort_values()
    # Plot the feature importance
    if plot:
        plt.rcParams["figure.figsize"] = (8.0, 10.0)
        imp_coef.plot(kind="barh")
        plt.title("Feature importance using Lasso Model")
        plt.show()

        # Plotting the prediction error
        visualizer = PredictionError(reg, size=(1080, 720))
        visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
        visualizer.score(X_test, y_test)  # Evaluate the model on the test data
        visualizer.show()                 # Finalize and render the figure
        # Visualizing the regression
        visualizer = ResidualsPlot(reg, size=(1080, 720))
        visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
        visualizer.score(X_test, y_test)  # Evaluate the model on the test data
        visualizer.show()                 # Finalize and render the figure
    # Using the test data to calculate a score
    y_pred = reg.predict(X_test)
    # Return metrics
    return {
        "name": "Lasso Regression",
        "R squared": reg.score(X_test, y_test),
        "RMSE": rmse(y_test, y_pred),
        "R squared training": reg.score(X_train, y_train),
        "MAE": mean_absolute_error(y_test, y_pred),
    }
def generate_ordinal_diagnostics(x, y, current_best_model, label_type,
                                 diagnostic_image_path):
    x = np.array(x)
    y = np.array(y)
    kf = KFold(n_splits=10, shuffle=True)
    guesses = []
    for train_index, test_index in kf.split(x):
        X_train, X_test = x[train_index], x[test_index]
        y_train, y_test = np.array(y)[train_index], np.array(y)[test_index]
        model = current_best_model[0].fit(X_train, y_train)
        for guess in zip(y_test.tolist(), model.predict(X_test).tolist()):
            guesses.append(guess)
    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
    if "VotingClassifier" not in str(current_best_model[0].__class__):
        visualizer = ResidualsPlot(current_best_model[0])
        visualizer.fit(X_train, y_train)
        visualizer.score(X_test, y_test)
        visualizer.poof(outpath=diagnostic_image_path + "/residuals_plot.png")
        plt.clf()
        visualizer = PredictionError(current_best_model[0])
        visualizer.fit(X_train, y_train)
        visualizer.score(X_test, y_test)
        visualizer.poof(outpath=diagnostic_image_path +
                        "/prediction_error.png")
        plt.clf()
    visualizer = PCADecomposition(scale=True, center=False, col=y, proj_dim=2)
    visualizer.fit_transform(x, y)
    print(diagnostic_image_path + "/pca_2.png")
    visualizer.poof(outpath=diagnostic_image_path + "/pca_2.png")
    plt.clf()
    visualizer = PCADecomposition(scale=True, center=False, col=y, proj_dim=3)
    visualizer.fit_transform(x, y)
    visualizer.poof(outpath=diagnostic_image_path + "/pca_3.png")
    plt.clf()
    return {
        "mse": mean_squared_error(*np.array(guesses).transpose()),
        "r2": r2_score(*np.array(guesses).transpose()),
        "mae": median_absolute_error(*np.array(guesses).transpose()),
        "evs": explained_variance_score(*np.array(guesses).transpose()),
        "rmse": np.sqrt(mean_squared_error(*np.array(guesses).transpose()))
    }
Exemple #14
0
    def test_prepredict_regressor(self):
        """
        Test the prepredict estimator with a prediction error plot
        """
        # Make prepredictions
        X, y = self.continuous.X, self.continuous.y
        y_pred = LinearRegression().fit(X.train, y.train).predict(X.test)

        # Create prepredict estimator with prior predictions
        estimator = PrePredict(y_pred, REGRESSOR)
        assert estimator.fit(X.train, y.train) is estimator
        assert estimator.predict(X.train) is y_pred
        assert estimator.score(X.test, y.test) == pytest.approx(0.9999983124154966, rel=1e-2)

        # Test that a visualizer works with the pre-predictions.
        viz = PredictionError(estimator)
        viz.fit(X.train, y.train)
        viz.score(X.test, y.test)
        viz.finalize()

        self.assert_images_similar(viz, tol=10.0)
Exemple #15
0
def showError():
    # Load the data
    df = load_data('concrete')
    feature_names = [
        'cement', 'slag', 'ash', 'water', 'splast', 'coarse', 'fine', 'age'
    ]
    target_name = 'strength'

    # Get the X and y data from the DataFrame
    X = df[feature_names].as_matrix()
    y = df[target_name].as_matrix()

    # Create the train and test data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    # Instantiate the linear model and visualizer
    lasso = Lasso()
    visualizer = PredictionError(lasso)

    visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
    visualizer.score(X_test, y_test)  # Evaluate the model on the test data
    g = visualizer.poof()  # Draw/show/poof the data
Exemple #16
0
def regression(fname="regression.png"):
    """
    Create figures for regression models
    """
    _, axes = plt.subplots(ncols=2, figsize=(18, 6))
    alphas = np.logspace(-10, 1, 300)
    data = load_concrete(split=True)

    # Plot prediction error in the middle
    oz = PredictionError(LassoCV(alphas=alphas), ax=axes[0])
    oz.fit(data.X.train, data.y.train)
    oz.score(data.X.test, data.y.test)
    oz.finalize()

    # Plot residuals on the right
    oz = ResidualsPlot(RidgeCV(alphas=alphas), ax=axes[1])
    oz.fit(data.X.train, data.y.train)
    oz.score(data.X.test, data.y.test)
    oz.finalize()

    # Save figure
    path = os.path.join(FIGURES, fname)
    plt.tight_layout()
    plt.savefig(path)
Exemple #17
0
            c=np.sign(lasso.coef_),
            cmap="bwr_r")

######## Yellowbrick

from yellowbrick.regressor import AlphaSelection, ResidualsPlot, PredictionError
from sklearn.linear_model import LassoCV

### Find optimal alpha

alphas = np.logspace(-10, 1, 400)

lasso_alpha = LassoCV(alphas=alphas)
lasso_yb = AlphaSelection(lasso_alpha)
lasso_yb.fit(X, y)
lasso_yb.poof()

### RVF plot

lasso_yb = ResidualsPlot(lasso, hist=True)
lasso_yb.fit(X_train, y_train)
lasso_yb.score(X_test, y_test)
lasso_yb.poof()

### Prediction Error

lasso_yb = PredictionError(lasso, hist=True)
lasso_yb.fit(X_train, y_train)
lasso_yb.score(X_test, y_test)
lasso_yb.poof()
Exemple #18
0
f.close()
'''sns.set(style="darkgrid")
ax = sns.distplot(predictions)
plt.show()

ax = sns.distplot(y_test)
plt.show()'''
plt.hist(predictions, 50, facecolor='g', alpha=0.75, log=True)
plt.hist(y_test, 50, facecolor='b', alpha=0.5, log=True)
plt.title("Comparison of true and predicted meter readings")
plt.show()

plt.subplot(2, 1, 1)
plt.hist(predictions, 50, facecolor='g', alpha=0.75, log=True)
plt.title("Predicted Meter Readings")

plt.subplot(2, 1, 2)
plt.hist(y_test, 50, facecolor='b', alpha=0.5, log=True)
plt.title("True Meter Readings")
plt.show()

visualizer = ResidualsPlot(nn)
visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
visualizer.score(X_test, y_test)  # Evaluate the model on the test data
visualizer.show()

visualizer = PredictionError(nn)
visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
visualizer.score(X_test, y_test)  # Evaluate the model on the test data
visualizer.show()
Exemple #19
0
#load data
visualizations = load_dataset(file_name=config.TRAINING_DATA_FILE)

#set X and y
#adjust X based on feature set to use from config.py (TOP5_FEATURES or FEATURES)
X = visualizations[config.TOP5_FEATURES]
y = visualizations[config.TARGET]

#train test split
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.3,
                                                    random_state=42)

#yellowbrick ResidualsPlotVisualization visual
visualizer = ResidualsPlot(config.BEST_MODEL)
visualizer.fit(X_train, y_train)
visualizer.score(X_test, y_test)
visualizer.show(outpath="visualizations/ResidualsPlotVisualization.pdf")
visualizer.show(outpath="visualizations/ResidualsPlotVisualization.png")
visualizer.show()

#yellowbrick prediction error visual
visualizer = PredictionError(config.BEST_MODEL)
visualizer.fit(X_train, y_train)
visualizer.score(X_test, y_test)
visualizer.show(outpath="visualizations/PredictionErrorVisualization.pdf")
visualizer.show(outpath="visualizations/PredictionErrorVisualization.png")
visualizer.show()
Exemple #20
0
### Yellowbrick

from yellowbrick.regressor import PredictionError, ResidualsPlot

## RVF plot

# Run the following together

lr_yb = ResidualsPlot(lr, hist=True)
lr_yb.fit(X_train, y_train)
lr_yb.score(X_test, y_test)
lr_yb.poof()

## Prediction Error plot

lr_yb = PredictionError(lr, hist=True)
lr_yb.fit(X_train, y_train)
lr_yb.score(X_test, y_test)
lr_yb.poof()



################ Polynomial/Interactions ################


from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import PolynomialFeatures # adds polynomials and interactions

poly_lr = make_pipeline(
    StandardScaler(),
    PolynomialFeatures(degree=2, interaction_only=False, include_bias=False),
from sklearn.metrics import mean_squared_error

%loading data
data = pd.read_csv(‘data.csv’) %pandas function to read csv file
data.head() %shows the first five rows of the data
data.info() %shows information about the data
sns.pairplot(advert, x_vars =[‘A’,’B’,’C’], y_vars=[‘D’],height=7,aspect=0.7) %single function giving sub plots showing relationship between individual predictor and target

X= advert[[‘A’,’B’,’C’]]
Y= advert.D

X_train,  X_test, Y_train, Y_test = train_test_split(X,Y, random_state =1) %splitting dataset in training and testing data
lm1  = LinearRegression().fit(X_train,Y_train)
Print(lm1.intercept_) %gives the value of intercept of the model
Print(lm1.coef_)  %gives values of the coefficients of the model

List(zip([[‘A’,’B’,’C’]],lm1_coef_)) % gives coefficients corresponding to the feature

sns.heatmap(advert.corr(),annot = True) %shows correlation among feature variables also the output 

lm1_preds = lm1.predict(X_test) %prediction

Print(“RMSE:”, np.sqrt(mean_squared_error(y_test, lm1_preds))) %root mean squared error calculation (minimum)
Print(“R^2: ”, r2_score(y_test, lm1_preds)) %R square (maximum)

from yellowbrick.regressor import PredictionError, Residualsplot

Visualizer = PredictionError(lm5).fit(X_train, Y_train)
Visualizer.score(x_test,y_test)
Visualizer.poof; %visualizing the output
Exemple #22
0
alphas = np.logspace(-10, 1, 200)
visualizer = AlphaSelection(RidgeCV(alphas=alphas))
visualizer.fit(xtrain, ytrain)
visualizer.show()

# Optimal model
optimal_alpha = 4.103
ridge_reg = RidgeCV(alphas=np.array([optimal_alpha]))
x = ridge_reg.fit(xtrain, ytrain)
# print("Coefficients: ", ridge_reg.coef_)
y_pred = ridge_reg.predict(xtest)
err = mean_squared_error(ytest, y_pred)
print("MSE for optimal model: ", err)

# Yellowbrick Regressor - Plot error
visualizer = PredictionError(ridge_reg)
visualizer.fit(xtrain, ytrain)
visualizer.score(xtest, ytest)
visualizer.show()

# SHAP Values
explainer = shap.LinearExplainer(ridge_reg, xtrain)
shap_values = explainer.shap_values(xtest)
shap.summary_plot(shap_values, xtest, plot_type='bar')
feature_indices = [
    227, 5, 0, 228, 133, 101, 220, 208, 2, 70, 1, 40, 207, 229, 215, 79, 4,
    125, 100, 98
]
for i in feature_indices:
    print("feature ", i, ": ", xtrain_raw.columns[i])
Exemple #23
0
            c=np.sign(ridge.coef_),
            cmap="bwr_r")

######## Yellowbrick

from yellowbrick.regressor import AlphaSelection, ResidualsPlot, PredictionError
from sklearn.linear_model import RidgeCV

### Find optimal alpha

alphas = np.logspace(-10, 1, 400)

ridge_alpha = RidgeCV(alphas=alphas)
ridge_yb = AlphaSelection(ridge_alpha)
ridge_yb.fit(X, y)
ridge_yb.poof()

### RVF plot

ridge_yb = ResidualsPlot(ridge, hist=True)
ridge_yb.fit(X_train, y_train)
ridge_yb.score(X_test, y_test)
ridge_yb.poof()

### Prediction Error

ridge_yb = PredictionError(ridge, hist=True)
ridge_yb.fit(X_train, y_train)
ridge_yb.score(X_test, y_test)
ridge_yb.poof()
Exemple #24
0
Koefisien yang paling besar dari model adalah GrLivArea sebesar 0.3154, artinya harga rumah sensitif dengan kolom ini. Apabila
terjadi peningkatan terhadap nilai GrLivArea, harga rumah akan meningkat lebih tinggi dibandingkan apabila terjadi kenaikan pada feature yang lain dengan kenaikan yang sama.
Perhatikan juga terdapat feature dengan nilai koefisien yang negatif (ExterQual_TA dan ExterQual_Fa), artinya apabila feature ini meningkat maka harga rumah akan menjadi lebih turun.
'''
'''
#### 2. Residual Plot
'''
st.write('')
visualizer_residual = ResidualsPlot(model_lr)
visualizer_residual.fit(X_train, y_train)
visualizer_residual.score(X_test, y_test)
visualizer_residual.finalize()

st.pyplot()
'''
Residual berdistribusi paling banyak pada nilai 0. Akan tetapi, masih terdapat nilai residual yang cukup tinggi. Hal ini menyebabkan distribusi dari residual tidak sepenuhnya normal, tetapi menjadi skew.
'''
'''
#### 3. Prediction Error
'''

st.write('')
visualizer_prediction_error = PredictionError(model_lr)
visualizer_prediction_error.fit(X_train, y_train)
visualizer_prediction_error.score(X_test, y_test)
visualizer_prediction_error.finalize()

st.pyplot()
'''
Antara garis best fit dengan garis identity tidak begitu jauh, sehingga dapat dikatakan bahwa model yang dibuat optimal.
'''
Exemple #25
0
# In[27]:


# Create training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

X_test


# In[31]:


import joblib

visualizer = PredictionError(Lasso(), size=(800, 600))
visualizer.fit(X_train, y_train)

#regression_model = pickle.dumps(visualizer) 
joblib.dump(visualizer, "regression_model")




#knn_from_pickle = pickle.loads(regression_model) 

#knn_from_pickle.score(X_test, y_test)
#prediction = knn_from_pickle.predict(my_df)

# Call finalize to draw the final yellowbrick-specific elements
visualizer.finalize()
Exemple #26
0
def get_plots():
    all_plots = []
    # FEATURE Visualization

    # Instantiate the visualizer
    plt.figure(figsize=(3.5, 3.5))
    viz = Manifold(manifold="tsne")
    # Fit the data to the visualizer
    viz.fit_transform(X_train, y_train)
    # save to html
    fig = plt.gcf()
    some_htmL = mpld3.fig_to_html(fig)
    all_plots.append("<h4 align='center'>Manifold Visualization</h4>" +
                     some_htmL)
    # clear plot
    plt.clf()

    if ML_ALG_nr == 1:
        # classification

        # Check if we can get the classes
        classes = None
        try:
            classes = list(Enc.inverse_transform(model_def.classes_))
        except ValueError as e:
            app.logger.info(e)

        if classes is not None:
            # Instantiate the classification model and visualizer
            visualizer = ClassPredictionError(DecisionTreeClassifier(),
                                              classes=classes)
            # Fit the training data to the visualizer
            visualizer.fit(X_train, y_train)
            # Evaluate the model on the test data
            visualizer.score(X_test, y_test)
            # save to html
            fig = plt.gcf()
            some_htmL = mpld3.fig_to_html(fig)
            all_plots.append("<h4 align='center'>Class Prediction Error</h4>" +
                             some_htmL)
            # clear plot
            plt.clf()
            # The ConfusionMatrix visualizer taxes a model
            cm = ConfusionMatrix(model_def, classes=classes)
            cm = ConfusionMatrix(model_def, classes=classes)
            # Fit fits the passed model. This is unnecessary if you pass the visualizer a pre-fitted model
            cm.fit(X_train, y_train)
            # To create the ConfusionMatrix, we need some test data. Score runs predict() on the data
            # and then creates the confusion_matrix from scikit-learn.
            cm.score(X_test, y_test)
            # save to html
            fig = plt.gcf()
            some_htmL = mpld3.fig_to_html(fig)
            all_plots.append("<h4 align='center'>Confusion Matrix</h4>" +
                             some_htmL)
            # clear plot
            plt.clf()

        return all_plots

    elif ML_ALG_nr == 0:
        # regression

        # Instantiate the linear model and visualizer
        visualizer = PredictionError(model_def, identity=True)
        visualizer.fit(X_train,
                       y_train)  # Fit the training data to the visualizer
        visualizer.score(X_test, y_test)  # Evaluate the model on the test data
        # save to html
        fig = plt.gcf()
        some_htmL = mpld3.fig_to_html(fig)
        all_plots.append("<h4 align='center'>Prediction Error Plot</h4>" +
                         some_htmL)
        # clear plot
        plt.clf()

        # Instantiate the model and visualizer
        visualizer = ResidualsPlot(model_def)
        visualizer.fit(X_train,
                       y_train)  # Fit the training data to the visualizer
        visualizer.score(X_test, y_test)
        # save to html
        fig = plt.gcf()
        some_htmL = mpld3.fig_to_html(fig)
        all_plots.append("<h4 align='center'>Residuals Plot</h4>" + some_htmL)
        # clear plot
        plt.clf()

        return all_plots
x_train, x_test, y_train, y_test= train_test_split(x,y, random_state=1)

lm5 = LinearRegression().fit(x_train,y_train)
lm5_pred=lm5.predict(x_test)

print("RMSE = ", np.sqrt(mean_squared_error(y_test,lm5_pred)))
print("R^2 = ", r2_score(y_test,lm5_pred))


# In[30]:


from yellowbrick.regressor import PredictionError, ResidualsPlot

visualizer=PredictionError(lm5).fit(x_train, y_train)
visualizer.score(x_test, y_test)
visualizer.show()


# In[32]:


#TASK 7: INTERACTION EFFECT - SYNERGY

advert['interaction']= advert['TV'] * advert['radio']

x=advert[['TV', 'radio', 'interaction']]
y=advert.sales

x_train, x_test, y_train, y_test= train_test_split(x,y, random_state=1)
clf = LinearRegression()
scores = cross_validation.cross_val_score(clf, X_train, y_train, cv=5)
print(scores)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() / 2))

model = LinearRegression()
model.fit(X_train, y_train)
yhat = model.predict(X_test)
r2 = r2_score(y_test, yhat)
me = mse(y_test, yhat)
print("r2={:0.3f} MSE={:0.3f}".format(r2, me))

from yellowbrick.regressor import PredictionError
# Instantiate the visualizer
visualizer = PredictionError(LinearRegression())
# Fit
visualizer.fit(X_train, y_train)
# Score and visualize
visualizer.score(X_test, y_test)
visualizer.poof()

from yellowbrick.regressor import ResidualsPlot

model = ResidualsPlot(LinearRegression())
model.fit(X_train, y_train)
model.score(X_test, y_test)
model.poof()

model = ElasticNetCV(alphas=alphas)
model.fit(X_train, y_train)
Exemple #29
0
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split

from yellowbrick.regressor import PredictionError

if __name__ == '__main__':
    # Load the regression data set
    df = pd.read_csv("../../../examples/data/concrete/concrete.csv")

    feature_names = [
        'cement', 'slag', 'ash', 'water', 'splast', 'coarse', 'fine', 'age'
    ]
    target_name = 'strength'

    # Get the X and y data from the DataFrame
    X = df[feature_names].as_matrix()
    y = df[target_name].as_matrix()

    # Create the train and test data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    # Instantiate the linear model and visualizer
    lasso = Lasso()
    visualizer = PredictionError(lasso)

    visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
    visualizer.score(X_test, y_test)  # Evaluate the model on the test data
    g = visualizer.poof(
        outpath="images/prediction_error.png")  # Draw/show/poof the data
print("Lasso chooses {} variables".format(len(results)))
print(results)

# How does our model perform on the test data?
score_model(lasso)

# What do our residuals look like?
from yellowbrick.regressor import ResidualsPlot
resplot = ResidualsPlot(lasso)
resplot.fit(Xtrain, ytrain)
resplot.score(Xtest, ytest)
g = resplot.poof()

# What does our prediction error look like?
from yellowbrick.regressor import PredictionError
prederr = PredictionError(lasso)
prederr.fit(Xtrain, ytrain)
prederr.score(Xtrain, ytrain)
g = prederr.poof()

# Next, we pull out our fitted values (yhat) and actuals (ytest) to see how they compare.
# We also calculate our residuals by subtracting our fitted values from the actuals.
import matplotlib.pyplot as plt

lasso.fit(Xtrain, ytrain)

yhat = lasso.predict(Xtest)
resid = ytest - yhat

data = pd.DataFrame({
    't': range(1,