Python ResidualsPlot Beispiele, yellowbrick.regressor.ResidualsPlot Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: residuals.py Projekt: DistrictDataLabs/yellowbrick

def plot_residuals(X, y, model, outpath="images/residuals.png", **kwargs):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    _, ax = plt.subplots()

    visualizer = ResidualsPlot(model, ax=ax, **kwargs)
    visualizer.fit(X_train, y_train)
    visualizer.score(X_test, y_test)
    visualizer.poof(outpath=outpath)

Beispiel #2

0

Datei anzeigen

plt.show()

from sklearn.linear_model import LinearRegression
reg = LinearRegression()
reg.fit(xrm, y)
print(reg.score(xrm, y))

xx = np.linspace(min(xrm), max(xrm)).reshape(-1, 1)
plt.scatter(xrm, y, color="blue")
plt.plot(xx, reg.predict(xx), color="red", linewidth=3)
plt.ylabel("y: Value of house / 1000 USD")
plt.xlabel("x: Number of rooms")
plt.show()

from yellowbrick.regressor import ResidualsPlot
visualizer = ResidualsPlot(reg, hist=False)
visualizer.fit(xrm, y)
visualizer.score(xrm, y)
visualizer.poof()

# use data multi var
# split data: 70%-training 30%-testing
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,
                                                    y,
                                                    test_size=0.3,
                                                    random_state=42)
reg = LinearRegression()
reg.fit(x_train, y_train)
y_pred = reg.predict(x_test)
print("R^2 = ", reg.score(x_train, y_train))

Beispiel #3

0

Datei anzeigen

from sklearn.linear_model import LinearRegression
from yellowbrick.regressor import ResidualsPlot

base = pd.read_csv('cars.csv')
base = base.drop(['Unnamed: 0'], axis=1)

X = base.iloc[:, 1].values
y = base.iloc[:, 0].values
correlacao = np.corrcoef(X, y)  # coefiente de correção

X = X.reshape(-1, 1)
modelo = LinearRegression()
modelo.fit(X, y)

modelo.intercept_
modelo.coef_

plt.scatter(X, y)
plt.plot(X, modelo.predict(X), color='red')

# distância 22 pés
modelo.intercept_ + modelo.coef_ * 22

modelo.predict(22)

modelo._residues

visualizador = ResidualsPlot(modelo)
visualizador.fit(X, y)
visualizador.poof()

Beispiel #4

0

Datei anzeigen

# ---------------------------regressão Linear--------------------------------- #
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X, y)                 # aprendizagem 

# ------------------------------Coeficientes---------------------------------- #
# b0
regressor.intercept_
# b1
regressor.coef_

# ---------------------------------Grafico------------------------------------ #
plt.scatter(X, y)                                   #Relação entre x e y por pontos
plt.plot(X, regressor.predict(X), color = 'red')    # reta dos privissores 
plt.title ("Regressão linear simples")
plt.xlabel("Idade", color = 'red')
plt.ylabel("Custo", color = 'red')

# -------------------------previsão pessoa com 40 anos-------------------------#
previsao1 = regressor.intercept_ + regressor.coef_ * 40
previsao2 = regressor.predict(np.array(40).reshape(1, -1)) # calculo manual da previsão 

# --------------------Avaliação da precisão  da Regressão--------------------- #
score = regressor.score(X,y)   

# -------------------------Valores Resuduais Grafico ------------------------- #
from yellowbrick.regressor import ResidualsPlot
visualizador = ResidualsPlot(regressor)
visualizador.fit(X, y)
visualizador.poof()

Beispiel #5

0

Datei anzeigen

# Evaluate the results of the regression
mse_train = mean_squared_error(y_train, pred_train)
mse_val = mean_squared_error(y_val, pred_val)

print("MSE score on train dataset : %s" % mse_train)
print("MSE score on validation dataset : %s" % mse_val)


# This is the first score we obtain for our prediction using the Ridge regression. Obviously it can be improved using more powerful models but already gives us a benchmark to beat from here on.

# In[25]:


# Instantiate the linear model and visualizer
visualizer = ResidualsPlot(clf)

visualizer.fit(x_train, y_train)  # Fit the training data to the model
visualizer.score(x_val, y_val)  # Evaluate the model on the validation data
visualizer.poof()  


# # Tree regression
# 
# In order to do thing properly for this model, we will try and pick the paramters that fit best.
# - First the max depth
# - Second the min samples split
# - Third the min samples leaf
# 

# In[26]:

Beispiel #6

0

Datei anzeigen

    if (regressorName == 'SVR'):
        return SVR(kernel='rbf', gamma='scale', C=1.0, epsilon=0.01)
    if (regressorName == 'MLPRegressor'):
        return MLPRegressor(hidden_layer_sizes=(100, 200))
    if (regressorName == 'DecisionTreeRegressor'):
        return DecisionTreeRegressor()
    if (regressorName == 'RandomForestRegressor'):
        return RandomForestRegressor()
    if (regressorName == 'GradientBoostingRegressor'):
        return GradientBoostingRegressor()


# regressors = ['LinearRegression', 'KNeighborsRegressor', 'SVR', 'MLPRegressor', 'DecisionTreeRegressor', 'RandomForestRegressor', 'GradientBoostingRegressor']
regressors = ['LinearRegression']

for regressorName in regressors:
    for index in range(2):
        print('Running regressor ' + regressorName + ' on column ' +
              str(index))

        dataset = loadData()
        features, labels = getFeaturesAndLabels(dataset, index)
        x_train, x_test, y_train, y_test = Split(features, labels)

        my_title = regressorName + ' on column ' + str(index)
        visualizer = ResidualsPlot(getRegressor(regressorName), title=my_title)
        visualizer.fit(x_train, y_train.ravel())
        score = visualizer.score(x_test, y_test.ravel())
        visualizer.poof()

        print("Score: " + str(score))

Beispiel #7

0

Datei anzeigen

Datei: lasso_lars.py Projekt: mirzask/summer19

lasso_lars = grid.best_estimator_
plt.scatter(range(X_poly.shape[1]),
            lasso_lars.coef_,
            c=np.sign(lasso_lars.coef_),
            cmap="bwr_r")

######## Yellowbrick

from yellowbrick.regressor import AlphaSelection, ResidualsPlot, PredictionError
from sklearn.linear_model import LassoLarsCV

### Find optimal alpha

lassolars_yb = AlphaSelection(LassoLarsCV())
lassolars_yb.fit(X, y)
lassolars_yb.poof()

### RVF plot

lasso_yb = ResidualsPlot(lasso_lars, hist=True)
lasso_yb.fit(X_train, y_train)
lasso_yb.score(X_test, y_test)
lasso_yb.poof()

### Prediction Error

lasso_yb = PredictionError(lasso_lars, hist=True)
lasso_yb.fit(X_train, y_train)
lasso_yb.score(X_test, y_test)
lasso_yb.poof()

Beispiel #8

0

Datei anzeigen

Datei: Numerical_data.py Projekt: mit41196/PREDICTION-OF-HOUSE-PRICES-BASED-ON-VISUAL-AND-NON-VISUAL-FEATURES

x_list = [1, 2, 3, 4, 5]
plt.xlabel("Number of folds")
plt.ylabel("Mean Absolute Error")
plt.plot(x_list, accNN, label="Neural Network")
plt.plot(x_list, accSVR, label="Support Vector Regression")
plt.plot(x_list, accRF, label="Random Forest")
plt.legend()
plt.show()

# # Residual Plot

# In[ ]:

# Reference: https://media.readthedocs.org/pdf/yellowbrick/stable/yellowbrick.pdf
from sklearn.linear_model import LinearRegression
from yellowbrick.regressor import ResidualsPlot
ridge = LinearRegression()
visualizer = ResidualsPlot(ridge)
Ytrain = train['price'] / train['price'].max()
Xtrain = pd.DataFrame(train.drop(['price'], axis=1))
Ytest = test['price'] / test['price'].max()
Xtest = test.drop(['price'], axis=1)
visualizer.fit(Xtrain, Ytrain)  # Fit the training data to the model
visualizer.score(Xtest, Ytest)  # Evaluate the model on the test data
visualizer.poof()

# In[ ]:

# In[ ]:

Beispiel #9

0

Datei anzeigen

modelo = LinearRegression()
modelo.fit(X_reshaped, y) #usado para fazer o treinamento

intercept = modelo.intercept_ #variavel independente no modelo linear
inclinacao = modelo.coef_ #inclinacao

plt.scatter(X_reshaped, y) #para plotar o grafico
plt.scatter(X_reshaped, modelo.predict(X_reshaped), color = 'red') #passando os dados e os as previsões dos dados, ele traçará a linha de melhor ajuste (ou linha de regressão)

"""
exercicio:
    Para uma distância de 22, qual a velocidade prevista?
"""

distancia = 22

#Forma 1:
previsao_metodo_1 = modelo.intercept_ + modelo.coef_ * distancia
#Forma 2
previsao_metodo_2 = modelo.predict(distancia)

#residuais (distancia entre os pontos da tua base de dados, para a linha de regressão)
residuais = modelo._residues #gerado através da sklearn, e não mostra o valor de resíduo de cada ponto. Caso deseje esses valores individuais, devemos usar a biblioteca yellowbrick

"""
usando a biblioteca yellowbrick
"""
visualizador = ResidualsPlot(modelo)
visualizador.fit(X_reshaped, y)
visualizador.poof() #metodo para visualizar o grafico

Beispiel #10

0

Datei anzeigen

lr_log = LinearRegression()
lr_log.fit(X=X_train_log, y=y_train_log)
print(f"Train R2 is {lr_log.score(X=X_train_log, y=y_train_log)}")
print(f"Test R2 is {lr_log.score(X=X_test_log, y=y_test_log)}")

# There is a slight improvement (~2%) in the train R2 and test R2 utilizing log transform

# + [markdown] pycharm={"name": "#%% md\n"}
# ## Model Evaluation - Linear Regression
# ### The following section evaluates the random error, constant variance and normal distribution with mean 0 assumption of linear model in the context of the four initial models utilizing a residual plot from Yellowbrick.
#

# + pycharm={"is_executing": false}
# Residual Plot for Huber LR with no log-transform
from yellowbrick.regressor import ResidualsPlot
rpv_hr = ResidualsPlot(hr)
rpv_hr.fit(X=X_train, y=y_train)
rpv_hr.score(X=X_test, y=y_test)
rpv_hr.poof()

# + pycharm={"is_executing": false}
rpv_lr = ResidualsPlot(lr)
rpv_lr.fit(X=X_train, y=y_train)
rpv_lr.score(X=X_test, y=y_test)
rpv_lr.poof()

# + pycharm={"is_executing": false}
# Residual Plot for LR with log transform
rpv_lr_log = ResidualsPlot(lr_log)
rpv_lr_log.fit(X=X_train_log, y=y_train_log)
rpv_lr_log.score(X=X_test_log, y=y_test_log)

Beispiel #11

0

Datei anzeigen

Datei: untitled0.py Projekt: BRIJESHRANA/FreeCodeCamp-DS

df

print('Mean Absolute Error:', metrics.mean_absolute_error(ytest, ypred))  
print('Mean Squared Error:', metrics.mean_squared_error(ytest, ypred))  
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(ytest, ypred)))
print('Median absolute error:',metrics.median_absolute_error(ytest, ypred))

r2=regressor.score(ytest, ypred)

def mean_absolute_percentage_error(y_true, y_pred):
  y_true, y_pred = np.array(y_true), np.array(y_pred)
  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

mean_absolute_percentage_error(ytest, ypred)

from yellowbrick.regressor import ResidualsPlot

# residuals vs. predicted values
visualizer = ResidualsPlot(regressor)
visualizer.score(Xtest, ytest)  # Evaluate the model on the test data
visualizer.show() 


sns.residplot(ytest, ypred)

np.mean(ytest-ypred)


sns.distplot(ytest-ypred)

Beispiel #12

0

Datei anzeigen

import pandas as pd
from yellowbrick.regressor import ResidualsPlot
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

data = pd.read_csv('../CSV/bikeshare.csv')
X = data[[
    "season", "month", "hour", "holiday", "weekday", "workingday", "weather",
    "temp", "feelslike", "humidity", "windspeed"
]]
y = data["riders"]

# Create training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

visualizer = ResidualsPlot(LinearRegression())
visualizer.fit(X_train, y_train)
visualizer.score(X_test, y_test)
visualizer.poof()

Beispiel #13

0

Datei anzeigen

def residuals_plot(ax=None):
    data = load_concrete(return_dataset=True)
    X, y = data.to_pandas()

    viz = ResidualsPlot(Ridge(), ax=ax)
    return tts_plot(viz, X, y)

Beispiel #14

0

Datei anzeigen

Datei: regressão linear simples - mundo com exceção.py Projekt: MarlonBeloMarques/Consumo-de-bebida-alcoolica-mundial

plt.ylabel('Total de álcool ingerido (L)')
plt.plot(X, modelo_cerveja.predict(X), color = 'red')

#Calculo manual e utilizando o modelo para prever o valor de y, respectivamente
modelo_cerveja.intercept_ + modelo_cerveja.coef_ * 400
modelo_cerveja.predict([[400]])

''' Como no estudo não informou uma porção em litros como referência, podemos fazer suposições a partir desse modelo
por exemplo, se uma pessoa bebe 400 copos de cerveja por ano, e adotando um copo de cerveja com 300 ml (0,3L), uma
pessoa que bebe 400 copos por ano (dependendo de cada país, obviamente), bebe 120 litros de cerveja, e só de alcool puro,
uma pessoa bebe, aproximadamente, 13.65 litros de álcool (cerca de 11.37% aproximadamente) '''

#Visualização dos resíduos e o seu gráfico(resultado entre a distância dos pontos com a linha de regressão)
modelo_cerveja._residues

visualizador_cerveja = ResidualsPlot(modelo_cerveja)
visualizador_cerveja.fit(X, y)
visualizador_cerveja.poof()

'''2)Regressão linear de destilados VS total álcool ingerido'''

A = bebida_mundo.iloc[:, 2].values 
b = bebida_mundo.iloc[:, 4].values 
correlacao_destilados = np.corrcoef(A, b)

A = A.reshape(-1, 1)
modelo_destilados = LinearRegression()
modelo_destilados.fit(A, b)

score_destilados = modelo_destilados.score(A, b)

Beispiel #15

0

Datei anzeigen

# In[30]:


TimeSeriesSplit(max_train_size=None, n_splits=5)


# In[37]:


from sklearn.linear_model import Ridge
from yellowbrick.regressor import ResidualsPlot

# Instantiate the linear model and visualizer
model = Ridge()
visualizer = ResidualsPlot(model, size=(1080, 720))

visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
visualizer.score(X_test, y_test)  # Evaluate the model on the test data
visualizer.show()                 # Draw the data


# In[38]:


from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import BayesianRidge, LinearRegression

regressors = {

Beispiel #16

0

Datei anzeigen

Datei: V - linear_regression.py Projekt: leandrocotrim/curso_R_PY

# inrececção
modelo.intercept_

# coeficiente do modelo
modelo.coef_

# visualizando os dados e modelo
plt.scatter(x2, y)
plt.plot(x2, modelo.predict(x2), color='red')

### previsão

# para da a 22 pes
modelo.intercept_ + modelo.coef_ * 22
# ou
modelo.predict(np.array([22]).reshape(-1, 1))

# sklearn traz apenas um valor total
# residuais
modelo._residues

# para ver todos os residuos
# pip install yellowbrick
# usar o anaconda...

from yellowbrick.regressor import ResidualsPlot

visualizador = ResidualsPlot(modelo)
visualizador.fit(x2, y)
visualizador.plot()

Beispiel #17

0

Datei anzeigen

#REGRESSÃO LINEAR
treino_mod = dict()
teste_mod = dict()
pred = dict()

colors = cycle(['aqua', 'darkorange', 'cornflowerblue'])

for i, color in zip(range(n_clusters), colors):
    treino_mod[i] = var_teste2_op2[var_teste2_op2['pred'] == i].join(
        treino[target_reg])
    teste_mod[i] = base_teste_teste2_op2[base_teste_teste2_op2['pred'] ==
                                         i].join(teste[target_reg])

    X = treino_mod[i][var]
    y = treino_mod[i][target_reg]
    model = LinearRegression().fit(X, y)
    pred[i] = model.predict(teste_mod[i][var])
    plt.figure()
    plt.figure(figsize=[15, 5])
    plt.subplot(1, 2, 1)
    visualizer = ResidualsPlot(model, hist=False)
    visualizer.fit(X, y)
    visualizer.score(teste_mod[i][var], teste_mod[i][target_reg])

    plt.subplot(1, 2, 2)
    plt.scatter(pred[i], teste_mod[i][target_reg], color='darkorange')
    plt.title('Target x Predict')
    plt.xlabel('Predict')
    plt.ylabel('True value')
    visualizer.show()

Beispiel #18

0

Datei anzeigen

"""


@author: LucasLimaPinho
"""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from yellowbrick.regressor import ResidualsPlot

base = pd.read_csv('cars.csv')
base = base.drop(['Unnamed: 0'], axis=1)  #axis = 1 -> erase per collumns

x = base.iloc[:, 1].values
x = x.reshape(-1, 1)
y = base.iloc[:, 0].values
correlacao = np.corrcoef(x, y)
model = LinearRegression()
model.fit(x, y)
model.intercept_
model.coef_
plt.scatter(x, y)
plt.plot(x, model.predict(x), color="red")
model.predict(22)
model._residues
visual = ResidualsPlot(model)
visual.fit(x, y)
visual.poof()

Beispiel #19

0

Datei anzeigen

# %%
# Residuals Plot (Trying new things)

# The residuals plot shows how the model is injecting error, the bold \
# horizontal line at residuals = 0 is no error, and any point above or below \
# that line, indicates the magnitude of error.
# (https://www.scikit-yb.org/en/latest/quickstart.html#installation)

# Load a regression dataset
X, y = load_concrete()

# Create training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

visualizer = ResidualsPlot(LinearRegression())
visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
visualizer.score(X_test, y_test)  # Evaluate the model on the test data
visualizer.show()  # Finalize and render the figure

# Xenia: Saving my plots
plt.show()
fig.set_size_inches(7, 5)
plt.savefig("6._Residuals_Plot.png")
fig.savefig("6._Residuals_Plot.png")

# %%
# New Plots with Temperature & Precipitation

# Time series of flow values with the x axis range limited
fig, ax = plt.subplots()

Beispiel #20

0

Datei anzeigen

Datei: scores.py Projekt: selimelmekki/light_microgrid_sizing

from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import train_test_split, StratifiedKFold, KFold
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from yellowbrick.regressor import ResidualsPlot
from sklearn.linear_model import LinearRegression

scaler = StandardScaler()
# neigh = KNeighborsRegressor(n_neighbors=5)
# regression_visualizers = [ResidualsPlot(neigh), PredictionError(neigh)]
features = [
    "longitude", "latitude", "peak_load", "off-grid", "avg_peak_winter",
    "avg_peak_spring", "avg_peak_summer", "avg_peak_autumn", "avg_base_winter",
    "avg_base_spring", "avg_base_summer", "avg_base_autumn"
]

case_name = "mg_sizing_dataset_with_loc"
df = pd.read_csv("results/" + case_name + ".csv",
                 sep=";|,",
                 engine="python",
                 index_col='index')
X = df[features]
scaler.fit(X)
X = scaler.transform(X)
targets = ["PV", "BAT", "RBAT", "INV", "GEN", "NPV"]
y = df[targets[0]]

model = LinearRegression()
visualizer_residuals = ResidualsPlot(model)
visualizer_residuals.fit(X, y)
visualizer_residuals.show()

Beispiel #21

0

Datei anzeigen

Datei: regressao_linear_simples.py Projekt: samuelsoaress/Python-Study-DataScience-IA

base = base.drop(['Unnamed: 0'], axis=1)

X = base.iloc[:, 1].values  # aqui ele transforma no estilo numpy array
X = X.reshape(-1, 1)  # transforma as colunas em matriz
y = base.iloc[:, 0].values
correlacao = np.corrcoef(X, y)  # aqui é calculado a correlação

modelo = LinearRegression()
modelo.fit(X, y)

modelo.intercept_  # aqui ele mostra a intersecção
modelo.coef_  # aqui o coeficiente

plt.scatter(X, y)  # plota um grafico de dispersão
plt.plot(X, modelo.predict(X),
         color='red')  # ele desenha a linha da regressão no grafico

# distância 22 pés
modelo.intercept_ + modelo.coef_ * 22  # previsão manual

modelo.predict(22)  # aqui o modelo prevê

modelo._residues  # mostra a distancia dos dados pra linha de regressão

visualizador = ResidualsPlot(
    modelo
)  # plota o grafico de residuos mostrando a dispersão abaixo da intersecção
visualizador.fit(X, y)
visualizador.poof(
)  #aqui ele plota o grafico, e quanto mais perto de 0 estiverem os dados mas o modelo está se adequando

Beispiel #22

0

Datei anzeigen

Datei: viz_rf_final.py Projekt: noahnewberger/Bikeshare-DC

viz = FeatureImportances(rf, ax=ax)
viz.fit(Xtrain, ytrain)
viz.poof(outpath="rf_featureimportances.png")

# Rank2d
fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot()
rank = Rank2D(features=feature_cols, algorithm='pearson', ax=ax)
rank.fit(Xtrain, ytrain)
rank.transform(Xtrain)
rank.poof(outpath="rf_rank2d.png")

# Residuals Plot
fig = plt.figure()
ax = fig.add_subplot()
resplot = ResidualsPlot(rf, ax=ax)
resplot.fit(Xtrain, ytrain)
resplot.score(Xtest, ytest)
resplot.poof(outpath="rf_resplot.png")

# Actual vs Predicted
rf.fit(Xtrain, ytrain)
yhat = rf.predict(Xtest)
error = ytest - yhat
data = pd.DataFrame({
    't': test['date'],
    'ytest': ytest,
    'yhat': yhat,
    'error': error,
    'neg_error': np.negative(error),
    'dless': dless

Beispiel #23

0

Datei anzeigen

Datei: create_database.py Projekt: kmkhami/Create-SQLite-db

def show_residusal(model, train_tup, test_tup):
    resPlot = ResidualsPlot(model)
    resPlot.fit(*train_tup)
    resPlot.score(*test_tup)
    resPlot.show()

Beispiel #24

0

Datei anzeigen

Datei: HousingPrice.py Projekt: BRIJESHRANA/FreeCodeCamp-DS

can use only for max 3 variable so if number of predictors are more than 3 than we should go for
Residual plots only. So, it’s good to check always Residual plots.
The most useful way to plot the residuals, though, is with your predicted values on the x-axis, and
your residuals on the y-axis.
'''

'''
To detect nonlinearity one can inspect plots of observed vs. predicted values or
residuals vs. predicted values. The desired outcome is that points are symmetrically
distributed around a diagonal line in the former plot or around a horizontal line in the
latter one. In both cases with a roughly constant variance.
'''
from yellowbrick.regressor import ResidualsPlot

# residuals vs. predicted values
visualizer = ResidualsPlot(regressor)
#visualizer.fit(X_train, y_train) # Fit the training data to the visualizer
visualizer.score(X_test, y_test) # Evaluate the model on the test data
visualizer.show()

visualizer = ResidualsPlot(regressor, hist=False)
visualizer.fit(X_train, y_train)
visualizer.score(X_test, y_test)
visualizer.show()

# Test R2 is the r2 on test data of model
'''
A common use of the residuals plot is to analyze the variance of the error of the regressor. If the
points are randomly dispersed around the horizontal axis, a linear regression model is usually
appropriate for the data; otherwise, a non-linear model is more appropriate. In the case above, we see
a fairly random, uniform distribution of the residuals against the target in two dimensions. This seems

Beispiel #25

0

Datei anzeigen

Datei: 142_Simple_Linear_Regression_Health_Insurance.py Projekt: medeirosgabriel/data_science

import numpy as np

corr = np.corrcoef(features, target)

features = features.reshape(-1, 1)

from sklearn.linear_model import LinearRegression

regression = LinearRegression()
regression.fit(features, target)

print(regression.intercept_, regression.coef_)

import matplotlib.pyplot as plt

plt.scatter(features, target)
plt.plot(features, regression.predict(features), color='red')
plt.title("Simple Linear Regression")
plt.xlabel("Age")
plt.ylabel("Cost")

prediction_1 = regression.predict([[40]])
previcion_2 = regression.intercept_ + regression.coef_ * 40

score = regression.score(features, target)

from yellowbrick.regressor import ResidualsPlot

visualizer = ResidualsPlot(regression)
visualizer.fit(features, target)
visualizer.poof()

Beispiel #26

0

Datei anzeigen

Datei: rf_model_casual.py Projekt: noahnewberger/Bikeshare-DC

2. Model Fitting

* Fit on training data and predict on test data
  * Check residuals and prediction error graphs (yellowbrick)
* Plot predicted values vs actuals (yhat, ytest)
* Calculate and plot residuals (ytest - yhat)
"""

# How do our models perform on the test data?
score_model(rf)
score_model(rf_random)
score_model(rf_best)

# What do our residuals look like?
from yellowbrick.regressor import ResidualsPlot
resplot = ResidualsPlot(rf_best)
resplot.fit(Xtrain, ytrain)
resplot.score(Xtest, ytest)
g = resplot.poof()

# What does our prediction error look like?
from yellowbrick.regressor import PredictionError
prederr = PredictionError(rf_best)
prederr.fit(Xtrain, ytrain)
prederr.score(Xtest, ytest)
g = prederr.poof()

# Next, we pull out our fitted values (yhat) and actuals (ytest) to see how they compare.
# We also calculate our residuals by subtracting our fitted values from the actuals.
import matplotlib.pyplot as plt

Beispiel #27

0

Datei anzeigen

modelo = LinearRegression()  #instancia um objeto LinearRegression
modelo.fit(
    X, Y)  #encaixa os dados de x e y no modelo; faz o treinamento do modelo

modelo.intercept_  #indica onde a linha de regressao intercepta o eixo Y

modelo.coef_  #indica a inclinação da linha

plt.scatter(X, Y)
plt.plot(X, modelo.predict(X),
         color="red")  #o método predict traz os dados que
# o algoritmo previu a partir dos dados reais de X

# prever  a velocidade para o valor distancia = 22 pés. tem duas formas:
modelo.intercept_ + modelo.coef_ * 22

# ou de modo mais direto:

modelo.predict(22)

# residuos da linha de regressao:

modelo._residues

#outra forma de visualizar pela biblioteca Yellowbrick

visualizador = ResidualsPlot(modelo)  #cria objeto ResidualsPlot
visualizador.fit(X, Y)
visualizador.poof()

Beispiel #28

0

Datei anzeigen

Datei: SP500_Index_Research.py Projekt: sws144/quant-trading

params = clf.get_params(deep=True)

#evaluate on test
print("default (R^2) score:" + np.array2string(clf.score(X_test_selected,y_test)))
print("intercept: " + np.array2string(clf.intercept_))
print("coefficients: " + np.array2string(clf.coef_))


# %%
#predict using classifier
y_pred_B = clf.predict(X_all_selected)


# %%
# residual plots
visualizer = ResidualsPlot(clf)
visualizer.fit(X_train_selected, y_train)  # Fit the training data to the visualizer
visualizer.score(X_test_selected, y_test)  # Evaluate the model on the test data
visualizer.poof()                 # Draw/show/poof the data


# %%
#plot all results, including train & test
plt.figure(figsize=(16,6))
#fig3, ax3  = plt.subplots(figsize=(16, 6))

sns.lineplot(X_all['DateFraction'],y_pred_B, color = 'b')
#ax3 = plt.twinx()
sns.lineplot(X_all['DateFraction'],y_all, color = 'g')
sns.lineplot(X_test['DateFraction'],X_all['InflationTrailing5yrFactor']**.2-1, color = 'r')
sns.lineplot(X_test['DateFraction'],X_all['RateGS10']/100, color = 'y')

Beispiel #29

0

Datei anzeigen

Datei: ember_example.1.py Projekt: washcycle/ember

    'Total', 'Precipitation', 'Date', 'Day', 'Brooklyn Bridge',
    'Manhattan Bridge', 'Queensboro Bridge', 'Williamsburg Bridge'
])
y_train = df_new['Total']

#%%
from sklearn import preprocessing
from sklearn.linear_model import Ridge
reg = Ridge(alpha=100)
reg.fit(x_train, y_train)

#%%
reg.coef_

#%%
from sklearn.metrics import r2_score, mean_squared_error
y_pred = reg.predict(x_train)

print(r2_score(y_train, y_pred))
print(mean_squared_error(y_train, y_pred))

#%%
import yellowbrick
res = y_train - y_pred

#%%
from yellowbrick.regressor import ResidualsPlot
visualizer = ResidualsPlot(reg)
visualizer.score(x_train, y_train)  # Evaluate the model on the test data
visualizer.poof()  # Draw/show/poof the data

Beispiel #30

0

Datei anzeigen

Datei: consumo mundial bebida alcoolica(regressão linear simples 2).py Projekt: lcorreasantos/Projeto-Data-Science---Consumo-de-bebida-alcoolica-mundial

#Calculo manual com o modelo treinado para qualquer achar o y (x[beer_servings] = 400)
modelo1.intercept_ + modelo1.coef_ * 400

#Cálculo automático da máquina
modelo1.predict([[400]])
''' Como no estudo não informou uma porção em litros como referência, podemos fazer suposições a partir desse modelo
por exemplo, se uma pessoa bebe 400 copos de cerveja por ano, e adotando um copo de cerveja com 300 ml (0,3L), uma
pessoa que bebe 400 copos por ano (dependendo de cada país, obviamente), bebe 120 litros de cerveja, e só de alcool puro,
uma pessoa bebe, aproximadamente, 13.65 litros de álcool (cerca de 11.37% aproximadamente) '''

#Visualização dos resíduos(resultado entre a distância dos pontos com a linha de referência)
modelo1._residues

#Visualização dos resíduos no gráfico
visualizador1 = ResidualsPlot(modelo1)
visualizador1.fit(X, y)
visualizador1.poof()

#Os resíduos quando mais próximo de zero, melhor o modelo
'''2) Relação linear entre total de álcool ingerido (em Litros) com o total de destilados ingerido (em porções)
   OBS: Bebidas destiladas são todas que tiveram seu processo de destilação (vodca, uísque, tequila, rum, dentre outros)  '''

A = bebida_mundo.iloc[:, 2].values  #spirit_servings
b = bebida_mundo.iloc[:, 4].values  #total_litres_of_alcohol
correlacao2 = np.corrcoef(A, b)

A = A.reshape(-1, 1)
modelo2 = LinearRegression()
modelo2.fit(A, b)

Beispiel #31

0

Datei anzeigen

rmse = sqrt(mse)
print('Accuracy:', accuracy, '%.')
print('Root Mean square Error:', rmse)
print('Mean absolute Error:', mae)
print('R2:', r2)

#maekfold = results_rf.mean()

#print ('Mean absolute Error kfold:', maekfold)

#%%

#plotting results

model = rf
visualizer = ResidualsPlot(model)

visualizer.fit(x_train, y_train)  # Fit the training data to the visualizer
visualizer.score(x_test, y_test)  # Evaluate the model on the test data
visualizer.poof()  # Draw/show/poof the data

visualizer = PredictionError(model)

visualizer.fit(x_train, y_train)  # Fit the training data to the visualizer
visualizer.score(x_test, y_test)  # Evaluate the model on the test data
visualizer.poof()  # Draw/show/poof the data
#%%
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import norm