def prediction_error_plot(lin_model,x_train, y_train, x_test, y_test): fig = plt.figure(figsize=(16,12)) ax1 = fig.add_subplot(111) visualizer_pred_err = PredictionError(lin_model, ax=ax1) visualizer_pred_err.fit(x_train, y_train) # Fit the training data to the visualizer visualizer_pred_err.score(x_test, y_test) # Evaluate the model on the test data visualizer_pred_err.show()
def log_prediction_error_chart(regressor, X_train, X_test, y_train, y_test, experiment=None): """Log prediction error chart. Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method. Tip: Check `Neptune documentation <https://docs.neptune.ai/integrations/scikit_learn.html>`_ for the full example. Args: regressor (:obj:`regressor`): | Fitted sklearn regressor object X_train (:obj:`ndarray`): | Training data matrix X_test (:obj:`ndarray`): | Testing data matrix y_train (:obj:`ndarray`): | The regression target for training y_test (:obj:`ndarray`): | The regression target for testing experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``): | Neptune ``Experiment`` object to control to which experiment you log the data. | If ``None``, log to currently active, and most recent experiment. Returns: ``None`` Examples: .. code:: python3 rfr = RandomForestRegressor() rfr.fit(X_train, y_train) neptune.init('my_workspace/my_project') neptune.create_experiment() log_prediction_error_chart(rfr, X_train, X_test, y_train, y_test) """ assert is_regressor(regressor), 'regressor should be sklearn regressor.' exp = _validate_experiment(experiment) try: fig, ax = plt.subplots() visualizer = PredictionError(regressor, is_fitted=True, ax=ax) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.finalize() exp.log_image('charts_sklearn', fig, image_name='Prediction Error') plt.close(fig) except Exception as e: print('Did not log prediction error chart. Error: {}'.format(e))
def regression_sanity_check(model, X_train, X_test, y_train, y_test): fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(20, 10)) plt.sca(ax1) visualizer = ResidualsPlot(model, ax=ax1) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) plt.sca(ax2) visualizer2 = PredictionError(model, ax=ax2) visualizer2.fit(X_train, y_train) visualizer2.score(X_test, y_test) visualizer.finalize() visualizer2.poof()
def regression_visualization(model, X_train, X_test, y_train, y_test): visualizer = PredictionError(model) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) plt.title('Score visualization') plt.legend() st.pyplot()
def peplot(): X, y = load_concrete() X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2) oz = PredictionError(Lasso(), ax=newfig()) oz.fit(X_train, y_train) oz.score(X_test, y_test) savefig(oz, "prediction_error")
def create_prediction_error_chart(regressor, X_train, X_test, y_train, y_test): """Create prediction error chart. Tip: Check Sklearn-Neptune integration `documentation <https://docs-beta.neptune.ai/essentials/integrations/machine-learning-frameworks/sklearn>`_ for the full example. Args: regressor (:obj:`regressor`): | Fitted sklearn regressor object X_train (:obj:`ndarray`): | Training data matrix X_test (:obj:`ndarray`): | Testing data matrix y_train (:obj:`ndarray`): | The regression target for training y_test (:obj:`ndarray`): | The regression target for testing Returns: ``neptune.types.File`` object that you can assign to run's ``base_namespace``. Examples: .. code:: python3 import neptune.new.integrations.sklearn as npt_utils rfr = RandomForestRegressor() rfr.fit(X_train, y_train) run = neptune.init(project='my_workspace/my_project') run['prediction_error'] = npt_utils.create_prediction_error_chart(rfr, X_train, X_test, y_train, y_test) """ assert is_regressor(regressor), 'regressor should be sklearn regressor.' chart = None try: fig, ax = plt.subplots() visualizer = PredictionError(regressor, is_fitted=True, ax=ax) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.finalize() chart = neptune.types.File.as_image(fig) plt.close(fig) except Exception as e: print('Did not log prediction error chart. Error: {}'.format(e)) return chart
def prediction_error_plot(self) -> None: """Plot the actual targets from the dataset against the predicted values generated by our model. This allows us to see how much variance is in the model. """ visualizer = PredictionError(self.trained_model) visualizer.fit(self.X_train, self.y_train) # Fit the training data to the visualizer visualizer.score(self.X_test, self.y_test) # Evaluate the model on the test data save_dir = f"{self.plots_dir}/prediction_error_plot_{self.model_id}.png" visualizer.show(outpath=save_dir) if not LOCAL: upload_to_s3(save_dir, f'plots/prediction_error_plot_{self.model_id}.png', bucket=S3_BUCKET_NAME) plt.clf()
def testFunc9(savepath='Results/bikeshare_Ridge_PredictionError.png'): ''' 基于共享单车数据使用AlphaSelection ''' data = pd.read_csv('fixtures/bikeshare/bikeshare.csv') X = data[[ "season", "month", "hour", "holiday", "weekday", "workingday", "weather", "temp", "feelslike", "humidity", "windspeed" ]] Y = data["riders"] X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3) visualizer = PredictionError(Ridge(alpha=3.181)) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.poof(outpath=savepath)
def visualize_prediction_error(self, model_info): model = model_info['model'] X_train = model_info['X_train'] X_test = model_info['X_test'] Y_train = model_info['Y_train'] Y_test = model_info['Y_test'] visualizer = PredictionError(model) visualizer.fit(X_train, Y_train) # Fit the training data to the visualizer visualizer.score(X_test, Y_test) # Evaluate the model on the test data
def test_prepredict_regressor(self): """ Test the prepredict estimator with a prediction error plot """ # Make prepredictions X, y = self.continuous.X, self.continuous.y y_pred = LinearRegression().fit(X.train, y.train).predict(X.test) # Create prepredict estimator with prior predictions estimator = PrePredict(y_pred, REGRESSOR) assert estimator.fit(X.train, y.train) is estimator assert estimator.predict(X.train) is y_pred assert estimator.score(X.test, y.test) == pytest.approx(0.9999983124154966, rel=1e-2) # Test that a visualizer works with the pre-predictions. viz = PredictionError(estimator) viz.fit(X.train, y.train) viz.score(X.test, y.test) viz.finalize() self.assert_images_similar(viz, tol=10.0)
def generate_ordinal_diagnostics(x, y, current_best_model, label_type, diagnostic_image_path): x = np.array(x) y = np.array(y) kf = KFold(n_splits=10, shuffle=True) guesses = [] for train_index, test_index in kf.split(x): X_train, X_test = x[train_index], x[test_index] y_train, y_test = np.array(y)[train_index], np.array(y)[test_index] model = current_best_model[0].fit(X_train, y_train) for guess in zip(y_test.tolist(), model.predict(X_test).tolist()): guesses.append(guess) X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2) if "VotingClassifier" not in str(current_best_model[0].__class__): visualizer = ResidualsPlot(current_best_model[0]) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.poof(outpath=diagnostic_image_path + "/residuals_plot.png") plt.clf() visualizer = PredictionError(current_best_model[0]) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.poof(outpath=diagnostic_image_path + "/prediction_error.png") plt.clf() visualizer = PCADecomposition(scale=True, center=False, col=y, proj_dim=2) visualizer.fit_transform(x, y) print(diagnostic_image_path + "/pca_2.png") visualizer.poof(outpath=diagnostic_image_path + "/pca_2.png") plt.clf() visualizer = PCADecomposition(scale=True, center=False, col=y, proj_dim=3) visualizer.fit_transform(x, y) visualizer.poof(outpath=diagnostic_image_path + "/pca_3.png") plt.clf() return { "mse": mean_squared_error(*np.array(guesses).transpose()), "r2": r2_score(*np.array(guesses).transpose()), "mae": median_absolute_error(*np.array(guesses).transpose()), "evs": explained_variance_score(*np.array(guesses).transpose()), "rmse": np.sqrt(mean_squared_error(*np.array(guesses).transpose())) }
def perror(ax): from sklearn.linear_model import LassoCV from yellowbrick.regressor import PredictionError features = [ 'cement', 'slag', 'ash', 'water', 'splast', 'coarse', 'fine', 'age' ] splits = load_data('concrete', cols=features, target='strength', tts=True) X_train, X_test, y_train, y_test = splits estimator = LassoCV() visualizer = PredictionError(estimator, ax=ax) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) return visualizer
def showError(): # Load the data df = load_data('concrete') feature_names = [ 'cement', 'slag', 'ash', 'water', 'splast', 'coarse', 'fine', 'age' ] target_name = 'strength' # Get the X and y data from the DataFrame X = df[feature_names].as_matrix() y = df[target_name].as_matrix() # Create the train and test data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) # Instantiate the linear model and visualizer lasso = Lasso() visualizer = PredictionError(lasso) visualizer.fit(X_train, y_train) # Fit the training data to the visualizer visualizer.score(X_test, y_test) # Evaluate the model on the test data g = visualizer.poof() # Draw/show/poof the data
### Yellowbrick from yellowbrick.regressor import PredictionError, ResidualsPlot ## RVF plot # Run the following together lr_yb = ResidualsPlot(lr, hist=True) lr_yb.fit(X_train, y_train) lr_yb.score(X_test, y_test) lr_yb.poof() ## Prediction Error plot lr_yb = PredictionError(lr, hist=True) lr_yb.fit(X_train, y_train) lr_yb.score(X_test, y_test) lr_yb.poof() ################ Polynomial/Interactions ################ from sklearn.pipeline import make_pipeline from sklearn.preprocessing import PolynomialFeatures # adds polynomials and interactions poly_lr = make_pipeline( StandardScaler(), PolynomialFeatures(degree=2, interaction_only=False, include_bias=False),
#load data visualizations = load_dataset(file_name=config.TRAINING_DATA_FILE) #set X and y #adjust X based on feature set to use from config.py (TOP5_FEATURES or FEATURES) X = visualizations[config.TOP5_FEATURES] y = visualizations[config.TARGET] #train test split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) #yellowbrick ResidualsPlotVisualization visual visualizer = ResidualsPlot(config.BEST_MODEL) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.show(outpath="visualizations/ResidualsPlotVisualization.pdf") visualizer.show(outpath="visualizations/ResidualsPlotVisualization.png") visualizer.show() #yellowbrick prediction error visual visualizer = PredictionError(config.BEST_MODEL) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.show(outpath="visualizations/PredictionErrorVisualization.pdf") visualizer.show(outpath="visualizations/PredictionErrorVisualization.png") visualizer.show()
from sklearn.metrics import mean_squared_error %loading data data = pd.read_csv(‘data.csv’) %pandas function to read csv file data.head() %shows the first five rows of the data data.info() %shows information about the data sns.pairplot(advert, x_vars =[‘A’,’B’,’C’], y_vars=[‘D’],height=7,aspect=0.7) %single function giving sub plots showing relationship between individual predictor and target X= advert[[‘A’,’B’,’C’]] Y= advert.D X_train, X_test, Y_train, Y_test = train_test_split(X,Y, random_state =1) %splitting dataset in training and testing data lm1 = LinearRegression().fit(X_train,Y_train) Print(lm1.intercept_) %gives the value of intercept of the model Print(lm1.coef_) %gives values of the coefficients of the model List(zip([[‘A’,’B’,’C’]],lm1_coef_)) % gives coefficients corresponding to the feature sns.heatmap(advert.corr(),annot = True) %shows correlation among feature variables also the output lm1_preds = lm1.predict(X_test) %prediction Print(“RMSE:”, np.sqrt(mean_squared_error(y_test, lm1_preds))) %root mean squared error calculation (minimum) Print(“R^2: ”, r2_score(y_test, lm1_preds)) %R square (maximum) from yellowbrick.regressor import PredictionError, Residualsplot Visualizer = PredictionError(lm5).fit(X_train, Y_train) Visualizer.score(x_test,y_test) Visualizer.poof; %visualizing the output
advert.columns = columns # advert.head() # advert.info() col = columns[1:] # sns.pairplot(advert, x_vars=col, y_vars='线路价格(不含税)', height=14, aspect=0.7) X = advert[col] y = advert['线路总成本'] lm1 = LinearRegression() lm1.fit(X, y) lm1_predict = lm1.predict(X[col]) xtrain,xtest,ytrain,ytest = train_test_split(X,y,random_state=1) # print("R^2:",r2_score(y,lm1_predict)) # 高因素影响 R^2: 0.9797304791768885 lm2 = LinearRegression().fit(xtrain,ytrain) lm2_predict = lm2.predict(xtest) print("RMSE2:",np.sqrt(mean_squared_error(ytest, lm2_predict))) print("R^2 lm2:",r2_score(ytest,lm2_predict)) print(lm2.intercept_) print(lm2.coef_) # R^2: 0.9797304791768885 # RMSE: 535.8592414949177 visualizer = PredictionError(lm1).fit(xtrain,ytrain) visualizer.score(xtest,ytest) visualizer.poof() # sns.heatmap(advert.corr(),cmap="YlGnBu",annot=True) # plt.show() print("R^2 lm1:",r2_score(y,lm1_predict)) print(lm1.intercept_) print(lm1.coef_) # plt.show()
import pandas as pd from sklearn.linear_model import Lasso from sklearn.model_selection import train_test_split from yellowbrick.regressor import PredictionError if __name__ == '__main__': # Load the regression data set df = pd.read_csv("../../../examples/data/concrete/concrete.csv") feature_names = ['cement', 'slag', 'ash', 'water', 'splast', 'coarse', 'fine', 'age'] target_name = 'strength' # Get the X and y data from the DataFrame X = df[feature_names] y = df[target_name] # Create the train and test data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) # Instantiate the linear model and visualizer lasso = Lasso() visualizer = PredictionError(lasso) visualizer.fit(X_train, y_train) # Fit the training data to the visualizer visualizer.score(X_test, y_test) # Evaluate the model on the test data g = visualizer.poof(outpath="images/prediction_error.png") # Draw/show/poof the data
clf = LinearRegression() scores = cross_validation.cross_val_score(clf, X_train, y_train, cv=5) print(scores) print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() / 2)) model = LinearRegression() model.fit(X_train, y_train) yhat = model.predict(X_test) r2 = r2_score(y_test, yhat) me = mse(y_test, yhat) print("r2={:0.3f} MSE={:0.3f}".format(r2, me)) from yellowbrick.regressor import PredictionError # Instantiate the visualizer visualizer = PredictionError(LinearRegression()) # Fit visualizer.fit(X_train, y_train) # Score and visualize visualizer.score(X_test, y_test) visualizer.poof() from yellowbrick.regressor import ResidualsPlot model = ResidualsPlot(LinearRegression()) model.fit(X_train, y_train) model.score(X_test, y_test) model.poof() model = ElasticNetCV(alphas=alphas) model.fit(X_train, y_train)
# How do our models perform on the test data? score_model(rf) score_model(rf_random) score_model(rf_best) # What do our residuals look like? from yellowbrick.regressor import ResidualsPlot resplot = ResidualsPlot(rf_best) resplot.fit(Xtrain, ytrain) resplot.score(Xtest, ytest) g = resplot.poof() # What does our prediction error look like? from yellowbrick.regressor import PredictionError prederr = PredictionError(rf_best) prederr.fit(Xtrain, ytrain) prederr.score(Xtest, ytest) g = prederr.poof() # Next, we pull out our fitted values (yhat) and actuals (ytest) to see how they compare. # We also calculate our residuals by subtracting our fitted values from the actuals. import matplotlib.pyplot as plt rf_best.fit(Xtrain, ytrain) yhat = rf_best.predict(Xtest) error = ytest - yhat data = pd.DataFrame({ 't': range(1,
x_train, x_test, y_train, y_test= train_test_split(x,y, random_state=1) lm5 = LinearRegression().fit(x_train,y_train) lm5_pred=lm5.predict(x_test) print("RMSE = ", np.sqrt(mean_squared_error(y_test,lm5_pred))) print("R^2 = ", r2_score(y_test,lm5_pred)) # In[30]: from yellowbrick.regressor import PredictionError, ResidualsPlot visualizer=PredictionError(lm5).fit(x_train, y_train) visualizer.score(x_test, y_test) visualizer.show() # In[32]: #TASK 7: INTERACTION EFFECT - SYNERGY advert['interaction']= advert['TV'] * advert['radio'] x=advert[['TV', 'radio', 'interaction']] y=advert.sales x_train, x_test, y_train, y_test= train_test_split(x,y, random_state=1)
def get_plots(): all_plots = [] # FEATURE Visualization # Instantiate the visualizer plt.figure(figsize=(3.5, 3.5)) viz = Manifold(manifold="tsne") # Fit the data to the visualizer viz.fit_transform(X_train, y_train) # save to html fig = plt.gcf() some_htmL = mpld3.fig_to_html(fig) all_plots.append("<h4 align='center'>Manifold Visualization</h4>" + some_htmL) # clear plot plt.clf() if ML_ALG_nr == 1: # classification # Check if we can get the classes classes = None try: classes = list(Enc.inverse_transform(model_def.classes_)) except ValueError as e: app.logger.info(e) if classes is not None: # Instantiate the classification model and visualizer visualizer = ClassPredictionError(DecisionTreeClassifier(), classes=classes) # Fit the training data to the visualizer visualizer.fit(X_train, y_train) # Evaluate the model on the test data visualizer.score(X_test, y_test) # save to html fig = plt.gcf() some_htmL = mpld3.fig_to_html(fig) all_plots.append("<h4 align='center'>Class Prediction Error</h4>" + some_htmL) # clear plot plt.clf() # The ConfusionMatrix visualizer taxes a model cm = ConfusionMatrix(model_def, classes=classes) cm = ConfusionMatrix(model_def, classes=classes) # Fit fits the passed model. This is unnecessary if you pass the visualizer a pre-fitted model cm.fit(X_train, y_train) # To create the ConfusionMatrix, we need some test data. Score runs predict() on the data # and then creates the confusion_matrix from scikit-learn. cm.score(X_test, y_test) # save to html fig = plt.gcf() some_htmL = mpld3.fig_to_html(fig) all_plots.append("<h4 align='center'>Confusion Matrix</h4>" + some_htmL) # clear plot plt.clf() return all_plots elif ML_ALG_nr == 0: # regression # Instantiate the linear model and visualizer visualizer = PredictionError(model_def, identity=True) visualizer.fit(X_train, y_train) # Fit the training data to the visualizer visualizer.score(X_test, y_test) # Evaluate the model on the test data # save to html fig = plt.gcf() some_htmL = mpld3.fig_to_html(fig) all_plots.append("<h4 align='center'>Prediction Error Plot</h4>" + some_htmL) # clear plot plt.clf() # Instantiate the model and visualizer visualizer = ResidualsPlot(model_def) visualizer.fit(X_train, y_train) # Fit the training data to the visualizer visualizer.score(X_test, y_test) # save to html fig = plt.gcf() some_htmL = mpld3.fig_to_html(fig) all_plots.append("<h4 align='center'>Residuals Plot</h4>" + some_htmL) # clear plot plt.clf() return all_plots
# In[27]: # Create training and test sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) X_test # In[31]: import joblib visualizer = PredictionError(Lasso(), size=(800, 600)) visualizer.fit(X_train, y_train) #regression_model = pickle.dumps(visualizer) joblib.dump(visualizer, "regression_model") #knn_from_pickle = pickle.loads(regression_model) #knn_from_pickle.score(X_test, y_test) #prediction = knn_from_pickle.predict(my_df) # Call finalize to draw the final yellowbrick-specific elements visualizer.finalize()
from sklearn.linear_model import Lasso from sklearn.model_selection import train_test_split from yellowbrick.regressor import PredictionError if __name__ == '__main__': # Load the regression data set df = pd.read_csv("../../../examples/data/concrete/concrete.csv") feature_names = [ 'cement', 'slag', 'ash', 'water', 'splast', 'coarse', 'fine', 'age' ] target_name = 'strength' # Get the X and y data from the DataFrame X = df[feature_names].as_matrix() y = df[target_name].as_matrix() # Create the train and test data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) # Instantiate the linear model and visualizer lasso = Lasso() visualizer = PredictionError(lasso) visualizer.fit(X_train, y_train) # Fit the training data to the visualizer visualizer.score(X_test, y_test) # Evaluate the model on the test data g = visualizer.poof( outpath="images/prediction_error.png") # Draw/show/poof the data
def regression(fname="regression.png"): """ Create figures for regression models """ _, axes = plt.subplots(ncols=2, figsize=(18, 6)) alphas = np.logspace(-10, 1, 300) data = load_concrete(split=True) # Plot prediction error in the middle oz = PredictionError(LassoCV(alphas=alphas), ax=axes[0]) oz.fit(data.X.train, data.y.train) oz.score(data.X.test, data.y.test) oz.finalize() # Plot residuals on the right oz = ResidualsPlot(RidgeCV(alphas=alphas), ax=axes[1]) oz.fit(data.X.train, data.y.train) oz.score(data.X.test, data.y.test) oz.finalize() # Save figure path = os.path.join(FIGURES, fname) plt.tight_layout() plt.savefig(path)
alphas = np.logspace(-10, 1, 200) visualizer = AlphaSelection(RidgeCV(alphas=alphas)) visualizer.fit(xtrain, ytrain) visualizer.show() # Optimal model optimal_alpha = 4.103 ridge_reg = RidgeCV(alphas=np.array([optimal_alpha])) x = ridge_reg.fit(xtrain, ytrain) # print("Coefficients: ", ridge_reg.coef_) y_pred = ridge_reg.predict(xtest) err = mean_squared_error(ytest, y_pred) print("MSE for optimal model: ", err) # Yellowbrick Regressor - Plot error visualizer = PredictionError(ridge_reg) visualizer.fit(xtrain, ytrain) visualizer.score(xtest, ytest) visualizer.show() # SHAP Values explainer = shap.LinearExplainer(ridge_reg, xtrain) shap_values = explainer.shap_values(xtest) shap.summary_plot(shap_values, xtest, plot_type='bar') feature_indices = [ 227, 5, 0, 228, 133, 101, 220, 208, 2, 70, 1, 40, 207, 229, 215, 79, 4, 125, 100, 98 ] for i in feature_indices: print("feature ", i, ": ", xtrain_raw.columns[i])
f.close() '''sns.set(style="darkgrid") ax = sns.distplot(predictions) plt.show() ax = sns.distplot(y_test) plt.show()''' plt.hist(predictions, 50, facecolor='g', alpha=0.75, log=True) plt.hist(y_test, 50, facecolor='b', alpha=0.5, log=True) plt.title("Comparison of true and predicted meter readings") plt.show() plt.subplot(2, 1, 1) plt.hist(predictions, 50, facecolor='g', alpha=0.75, log=True) plt.title("Predicted Meter Readings") plt.subplot(2, 1, 2) plt.hist(y_test, 50, facecolor='b', alpha=0.5, log=True) plt.title("True Meter Readings") plt.show() visualizer = ResidualsPlot(nn) visualizer.fit(X_train, y_train) # Fit the training data to the visualizer visualizer.score(X_test, y_test) # Evaluate the model on the test data visualizer.show() visualizer = PredictionError(nn) visualizer.fit(X_train, y_train) # Fit the training data to the visualizer visualizer.score(X_test, y_test) # Evaluate the model on the test data visualizer.show()
Koefisien yang paling besar dari model adalah GrLivArea sebesar 0.3154, artinya harga rumah sensitif dengan kolom ini. Apabila terjadi peningkatan terhadap nilai GrLivArea, harga rumah akan meningkat lebih tinggi dibandingkan apabila terjadi kenaikan pada feature yang lain dengan kenaikan yang sama. Perhatikan juga terdapat feature dengan nilai koefisien yang negatif (ExterQual_TA dan ExterQual_Fa), artinya apabila feature ini meningkat maka harga rumah akan menjadi lebih turun. ''' ''' #### 2. Residual Plot ''' st.write('') visualizer_residual = ResidualsPlot(model_lr) visualizer_residual.fit(X_train, y_train) visualizer_residual.score(X_test, y_test) visualizer_residual.finalize() st.pyplot() ''' Residual berdistribusi paling banyak pada nilai 0. Akan tetapi, masih terdapat nilai residual yang cukup tinggi. Hal ini menyebabkan distribusi dari residual tidak sepenuhnya normal, tetapi menjadi skew. ''' ''' #### 3. Prediction Error ''' st.write('') visualizer_prediction_error = PredictionError(model_lr) visualizer_prediction_error.fit(X_train, y_train) visualizer_prediction_error.score(X_test, y_test) visualizer_prediction_error.finalize() st.pyplot() ''' Antara garis best fit dengan garis identity tidak begitu jauh, sehingga dapat dikatakan bahwa model yang dibuat optimal. '''
c=np.sign(lasso.coef_), cmap="bwr_r") ######## Yellowbrick from yellowbrick.regressor import AlphaSelection, ResidualsPlot, PredictionError from sklearn.linear_model import LassoCV ### Find optimal alpha alphas = np.logspace(-10, 1, 400) lasso_alpha = LassoCV(alphas=alphas) lasso_yb = AlphaSelection(lasso_alpha) lasso_yb.fit(X, y) lasso_yb.poof() ### RVF plot lasso_yb = ResidualsPlot(lasso, hist=True) lasso_yb.fit(X_train, y_train) lasso_yb.score(X_test, y_test) lasso_yb.poof() ### Prediction Error lasso_yb = PredictionError(lasso, hist=True) lasso_yb.fit(X_train, y_train) lasso_yb.score(X_test, y_test) lasso_yb.poof()
c=np.sign(ridge.coef_), cmap="bwr_r") ######## Yellowbrick from yellowbrick.regressor import AlphaSelection, ResidualsPlot, PredictionError from sklearn.linear_model import RidgeCV ### Find optimal alpha alphas = np.logspace(-10, 1, 400) ridge_alpha = RidgeCV(alphas=alphas) ridge_yb = AlphaSelection(ridge_alpha) ridge_yb.fit(X, y) ridge_yb.poof() ### RVF plot ridge_yb = ResidualsPlot(ridge, hist=True) ridge_yb.fit(X_train, y_train) ridge_yb.score(X_test, y_test) ridge_yb.poof() ### Prediction Error ridge_yb = PredictionError(ridge, hist=True) ridge_yb.fit(X_train, y_train) ridge_yb.score(X_test, y_test) ridge_yb.poof()
#fitting lasso regression and making predictions lasso = Lasso(alpha=14) lasso.fit(X_train, y_train) predictions3 = lasso.predict(X_test) score3 = lasso.score(X_test, y_test) #assessing performance of lasso mae3 = MAE(y_test, predictions3) mse3 = MSE(y_test, predictions3) rmse3 = mse3**(1 / 2) #feature importance lasso_coef = lasso.fit(X, y).coef_ #visualizing regression model visualizer = PredictionError(lasso) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.show() #visualizing regression residuals plot visualizer2 = ResidualsPlot(lasso) visualizer2.fit(X_train, y_train) visualizer2.score(X_test, y_test) visualizer2.show() print('Linear Regression Score: ', score1) print('Ridge Regression Score: ', score2) print('Lasso Regression Score: ', score3)
print("Lasso chooses {} variables".format(len(results))) print(results) # How does our model perform on the test data? score_model(lasso) # What do our residuals look like? from yellowbrick.regressor import ResidualsPlot resplot = ResidualsPlot(lasso) resplot.fit(Xtrain, ytrain) resplot.score(Xtest, ytest) g = resplot.poof() # What does our prediction error look like? from yellowbrick.regressor import PredictionError prederr = PredictionError(lasso) prederr.fit(Xtrain, ytrain) prederr.score(Xtrain, ytrain) g = prederr.poof() # Next, we pull out our fitted values (yhat) and actuals (ytest) to see how they compare. # We also calculate our residuals by subtracting our fitted values from the actuals. import matplotlib.pyplot as plt lasso.fit(Xtrain, ytrain) yhat = lasso.predict(Xtest) resid = ytest - yhat data = pd.DataFrame({ 't': range(1,