def plot_residuals(X, y, model, outpath="images/residuals.png", **kwargs):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    _, ax = plt.subplots()

    visualizer = ResidualsPlot(model, ax=ax, **kwargs)
    visualizer.fit(X_train, y_train)
    visualizer.score(X_test, y_test)
    visualizer.poof(outpath=outpath)
Beispiel #2
0
plt.show()

from sklearn.linear_model import LinearRegression
reg = LinearRegression()
reg.fit(xrm, y)
print(reg.score(xrm, y))

xx = np.linspace(min(xrm), max(xrm)).reshape(-1, 1)
plt.scatter(xrm, y, color="blue")
plt.plot(xx, reg.predict(xx), color="red", linewidth=3)
plt.ylabel("y: Value of house / 1000 USD")
plt.xlabel("x: Number of rooms")
plt.show()

from yellowbrick.regressor import ResidualsPlot
visualizer = ResidualsPlot(reg, hist=False)
visualizer.fit(xrm, y)
visualizer.score(xrm, y)
visualizer.poof()

# use data multi var
# split data: 70%-training 30%-testing
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,
                                                    y,
                                                    test_size=0.3,
                                                    random_state=42)
reg = LinearRegression()
reg.fit(x_train, y_train)
y_pred = reg.predict(x_test)
print("R^2 = ", reg.score(x_train, y_train))
Beispiel #3
0
from sklearn.linear_model import LinearRegression
from yellowbrick.regressor import ResidualsPlot

base = pd.read_csv('cars.csv')
base = base.drop(['Unnamed: 0'], axis=1)

X = base.iloc[:, 1].values
y = base.iloc[:, 0].values
correlacao = np.corrcoef(X, y)  # coefiente de correção

X = X.reshape(-1, 1)
modelo = LinearRegression()
modelo.fit(X, y)

modelo.intercept_
modelo.coef_

plt.scatter(X, y)
plt.plot(X, modelo.predict(X), color='red')

# distância 22 pés
modelo.intercept_ + modelo.coef_ * 22

modelo.predict(22)

modelo._residues

visualizador = ResidualsPlot(modelo)
visualizador.fit(X, y)
visualizador.poof()
Beispiel #4
0
# ---------------------------regressão Linear--------------------------------- #
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X, y)                 # aprendizagem 

# ------------------------------Coeficientes---------------------------------- #
# b0
regressor.intercept_
# b1
regressor.coef_

# ---------------------------------Grafico------------------------------------ #
plt.scatter(X, y)                                   #Relação entre x e y por pontos
plt.plot(X, regressor.predict(X), color = 'red')    # reta dos privissores 
plt.title ("Regressão linear simples")
plt.xlabel("Idade", color = 'red')
plt.ylabel("Custo", color = 'red')

# -------------------------previsão pessoa com 40 anos-------------------------#
previsao1 = regressor.intercept_ + regressor.coef_ * 40
previsao2 = regressor.predict(np.array(40).reshape(1, -1)) # calculo manual da previsão 

# --------------------Avaliação da precisão  da Regressão--------------------- #
score = regressor.score(X,y)   

# -------------------------Valores Resuduais Grafico ------------------------- #
from yellowbrick.regressor import ResidualsPlot
visualizador = ResidualsPlot(regressor)
visualizador.fit(X, y)
visualizador.poof()
Beispiel #5
0
# Evaluate the results of the regression
mse_train = mean_squared_error(y_train, pred_train)
mse_val = mean_squared_error(y_val, pred_val)

print("MSE score on train dataset : %s" % mse_train)
print("MSE score on validation dataset : %s" % mse_val)


# This is the first score we obtain for our prediction using the Ridge regression. Obviously it can be improved using more powerful models but already gives us a benchmark to beat from here on.

# In[25]:


# Instantiate the linear model and visualizer
visualizer = ResidualsPlot(clf)

visualizer.fit(x_train, y_train)  # Fit the training data to the model
visualizer.score(x_val, y_val)  # Evaluate the model on the validation data
visualizer.poof()  


# # Tree regression
# 
# In order to do thing properly for this model, we will try and pick the paramters that fit best.
# - First the max depth
# - Second the min samples split
# - Third the min samples leaf
# 

# In[26]:
Beispiel #6
0
    if (regressorName == 'SVR'):
        return SVR(kernel='rbf', gamma='scale', C=1.0, epsilon=0.01)
    if (regressorName == 'MLPRegressor'):
        return MLPRegressor(hidden_layer_sizes=(100, 200))
    if (regressorName == 'DecisionTreeRegressor'):
        return DecisionTreeRegressor()
    if (regressorName == 'RandomForestRegressor'):
        return RandomForestRegressor()
    if (regressorName == 'GradientBoostingRegressor'):
        return GradientBoostingRegressor()


# regressors = ['LinearRegression', 'KNeighborsRegressor', 'SVR', 'MLPRegressor', 'DecisionTreeRegressor', 'RandomForestRegressor', 'GradientBoostingRegressor']
regressors = ['LinearRegression']

for regressorName in regressors:
    for index in range(2):
        print('Running regressor ' + regressorName + ' on column ' +
              str(index))

        dataset = loadData()
        features, labels = getFeaturesAndLabels(dataset, index)
        x_train, x_test, y_train, y_test = Split(features, labels)

        my_title = regressorName + ' on column ' + str(index)
        visualizer = ResidualsPlot(getRegressor(regressorName), title=my_title)
        visualizer.fit(x_train, y_train.ravel())
        score = visualizer.score(x_test, y_test.ravel())
        visualizer.poof()

        print("Score: " + str(score))
Beispiel #7
0
lasso_lars = grid.best_estimator_
plt.scatter(range(X_poly.shape[1]),
            lasso_lars.coef_,
            c=np.sign(lasso_lars.coef_),
            cmap="bwr_r")

######## Yellowbrick

from yellowbrick.regressor import AlphaSelection, ResidualsPlot, PredictionError
from sklearn.linear_model import LassoLarsCV

### Find optimal alpha

lassolars_yb = AlphaSelection(LassoLarsCV())
lassolars_yb.fit(X, y)
lassolars_yb.poof()

### RVF plot

lasso_yb = ResidualsPlot(lasso_lars, hist=True)
lasso_yb.fit(X_train, y_train)
lasso_yb.score(X_test, y_test)
lasso_yb.poof()

### Prediction Error

lasso_yb = PredictionError(lasso_lars, hist=True)
lasso_yb.fit(X_train, y_train)
lasso_yb.score(X_test, y_test)
lasso_yb.poof()
x_list = [1, 2, 3, 4, 5]
plt.xlabel("Number of folds")
plt.ylabel("Mean Absolute Error")
plt.plot(x_list, accNN, label="Neural Network")
plt.plot(x_list, accSVR, label="Support Vector Regression")
plt.plot(x_list, accRF, label="Random Forest")
plt.legend()
plt.show()

# # Residual Plot

# In[ ]:

# Reference: https://media.readthedocs.org/pdf/yellowbrick/stable/yellowbrick.pdf
from sklearn.linear_model import LinearRegression
from yellowbrick.regressor import ResidualsPlot
ridge = LinearRegression()
visualizer = ResidualsPlot(ridge)
Ytrain = train['price'] / train['price'].max()
Xtrain = pd.DataFrame(train.drop(['price'], axis=1))
Ytest = test['price'] / test['price'].max()
Xtest = test.drop(['price'], axis=1)
visualizer.fit(Xtrain, Ytrain)  # Fit the training data to the model
visualizer.score(Xtest, Ytest)  # Evaluate the model on the test data
visualizer.poof()

# In[ ]:

# In[ ]:
Beispiel #9
0
modelo = LinearRegression()
modelo.fit(X_reshaped, y) #usado para fazer o treinamento

intercept = modelo.intercept_ #variavel independente no modelo linear
inclinacao = modelo.coef_ #inclinacao

plt.scatter(X_reshaped, y) #para plotar o grafico
plt.scatter(X_reshaped, modelo.predict(X_reshaped), color = 'red') #passando os dados e os as previsões dos dados, ele traçará a linha de melhor ajuste (ou linha de regressão)

"""
exercicio:
    Para uma distância de 22, qual a velocidade prevista?
"""

distancia = 22

#Forma 1:
previsao_metodo_1 = modelo.intercept_ + modelo.coef_ * distancia
#Forma 2
previsao_metodo_2 = modelo.predict(distancia)

#residuais (distancia entre os pontos da tua base de dados, para a linha de regressão)
residuais = modelo._residues #gerado através da sklearn, e não mostra o valor de resíduo de cada ponto. Caso deseje esses valores individuais, devemos usar a biblioteca yellowbrick

"""
usando a biblioteca yellowbrick
"""
visualizador = ResidualsPlot(modelo)
visualizador.fit(X_reshaped, y)
visualizador.poof() #metodo para visualizar o grafico
Beispiel #10
0
lr_log = LinearRegression()
lr_log.fit(X=X_train_log, y=y_train_log)
print(f"Train R2 is {lr_log.score(X=X_train_log, y=y_train_log)}")
print(f"Test R2 is {lr_log.score(X=X_test_log, y=y_test_log)}")

# There is a slight improvement (~2%) in the train R2 and test R2 utilizing log transform

# + [markdown] pycharm={"name": "#%% md\n"}
# ## Model Evaluation - Linear Regression
# ### The following section evaluates the random error, constant variance and normal distribution with mean 0 assumption of linear model in the context of the four initial models utilizing a residual plot from Yellowbrick.
#

# + pycharm={"is_executing": false}
# Residual Plot for Huber LR with no log-transform
from yellowbrick.regressor import ResidualsPlot
rpv_hr = ResidualsPlot(hr)
rpv_hr.fit(X=X_train, y=y_train)
rpv_hr.score(X=X_test, y=y_test)
rpv_hr.poof()

# + pycharm={"is_executing": false}
rpv_lr = ResidualsPlot(lr)
rpv_lr.fit(X=X_train, y=y_train)
rpv_lr.score(X=X_test, y=y_test)
rpv_lr.poof()

# + pycharm={"is_executing": false}
# Residual Plot for LR with log transform
rpv_lr_log = ResidualsPlot(lr_log)
rpv_lr_log.fit(X=X_train_log, y=y_train_log)
rpv_lr_log.score(X=X_test_log, y=y_test_log)
df

print('Mean Absolute Error:', metrics.mean_absolute_error(ytest, ypred))  
print('Mean Squared Error:', metrics.mean_squared_error(ytest, ypred))  
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(ytest, ypred)))
print('Median absolute error:',metrics.median_absolute_error(ytest, ypred))

r2=regressor.score(ytest, ypred)

def mean_absolute_percentage_error(y_true, y_pred):
  y_true, y_pred = np.array(y_true), np.array(y_pred)
  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

mean_absolute_percentage_error(ytest, ypred)

from yellowbrick.regressor import ResidualsPlot

# residuals vs. predicted values
visualizer = ResidualsPlot(regressor)
visualizer.score(Xtest, ytest)  # Evaluate the model on the test data
visualizer.show() 


sns.residplot(ytest, ypred)

np.mean(ytest-ypred)


sns.distplot(ytest-ypred)

Beispiel #12
0
import pandas as pd
from yellowbrick.regressor import ResidualsPlot
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

data = pd.read_csv('../CSV/bikeshare.csv')
X = data[[
    "season", "month", "hour", "holiday", "weekday", "workingday", "weather",
    "temp", "feelslike", "humidity", "windspeed"
]]
y = data["riders"]

# Create training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

visualizer = ResidualsPlot(LinearRegression())
visualizer.fit(X_train, y_train)
visualizer.score(X_test, y_test)
visualizer.poof()
Beispiel #13
0
def residuals_plot(ax=None):
    data = load_concrete(return_dataset=True)
    X, y = data.to_pandas()

    viz = ResidualsPlot(Ridge(), ax=ax)
    return tts_plot(viz, X, y)
plt.ylabel('Total de álcool ingerido (L)')
plt.plot(X, modelo_cerveja.predict(X), color = 'red')

#Calculo manual e utilizando o modelo para prever o valor de y, respectivamente
modelo_cerveja.intercept_ + modelo_cerveja.coef_ * 400
modelo_cerveja.predict([[400]])

''' Como no estudo não informou uma porção em litros como referência, podemos fazer suposições a partir desse modelo
por exemplo, se uma pessoa bebe 400 copos de cerveja por ano, e adotando um copo de cerveja com 300 ml (0,3L), uma
pessoa que bebe 400 copos por ano (dependendo de cada país, obviamente), bebe 120 litros de cerveja, e só de alcool puro,
uma pessoa bebe, aproximadamente, 13.65 litros de álcool (cerca de 11.37% aproximadamente) '''

#Visualização dos resíduos e o seu gráfico(resultado entre a distância dos pontos com a linha de regressão)
modelo_cerveja._residues

visualizador_cerveja = ResidualsPlot(modelo_cerveja)
visualizador_cerveja.fit(X, y)
visualizador_cerveja.poof()

'''2)Regressão linear de destilados VS total álcool ingerido'''

A = bebida_mundo.iloc[:, 2].values 
b = bebida_mundo.iloc[:, 4].values 
correlacao_destilados = np.corrcoef(A, b)

A = A.reshape(-1, 1)
modelo_destilados = LinearRegression()
modelo_destilados.fit(A, b)

score_destilados = modelo_destilados.score(A, b)
Beispiel #15
0
# In[30]:


TimeSeriesSplit(max_train_size=None, n_splits=5)


# In[37]:


from sklearn.linear_model import Ridge
from yellowbrick.regressor import ResidualsPlot

# Instantiate the linear model and visualizer
model = Ridge()
visualizer = ResidualsPlot(model, size=(1080, 720))

visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
visualizer.score(X_test, y_test)  # Evaluate the model on the test data
visualizer.show()                 # Draw the data


# In[38]:


from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import BayesianRidge, LinearRegression

regressors = {
# inrececção
modelo.intercept_

# coeficiente do modelo
modelo.coef_

# visualizando os dados e modelo
plt.scatter(x2, y)
plt.plot(x2, modelo.predict(x2), color='red')

### previsão

# para da a 22 pes
modelo.intercept_ + modelo.coef_ * 22
# ou
modelo.predict(np.array([22]).reshape(-1, 1))

# sklearn traz apenas um valor total
# residuais
modelo._residues

# para ver todos os residuos
# pip install yellowbrick
# usar o anaconda...

from yellowbrick.regressor import ResidualsPlot

visualizador = ResidualsPlot(modelo)
visualizador.fit(x2, y)
visualizador.plot()
Beispiel #17
0
#REGRESSÃO LINEAR
treino_mod = dict()
teste_mod = dict()
pred = dict()

colors = cycle(['aqua', 'darkorange', 'cornflowerblue'])

for i, color in zip(range(n_clusters), colors):
    treino_mod[i] = var_teste2_op2[var_teste2_op2['pred'] == i].join(
        treino[target_reg])
    teste_mod[i] = base_teste_teste2_op2[base_teste_teste2_op2['pred'] ==
                                         i].join(teste[target_reg])

    X = treino_mod[i][var]
    y = treino_mod[i][target_reg]
    model = LinearRegression().fit(X, y)
    pred[i] = model.predict(teste_mod[i][var])
    plt.figure()
    plt.figure(figsize=[15, 5])
    plt.subplot(1, 2, 1)
    visualizer = ResidualsPlot(model, hist=False)
    visualizer.fit(X, y)
    visualizer.score(teste_mod[i][var], teste_mod[i][target_reg])

    plt.subplot(1, 2, 2)
    plt.scatter(pred[i], teste_mod[i][target_reg], color='darkorange')
    plt.title('Target x Predict')
    plt.xlabel('Predict')
    plt.ylabel('True value')
    visualizer.show()
Beispiel #18
0
"""


@author: LucasLimaPinho
"""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from yellowbrick.regressor import ResidualsPlot

base = pd.read_csv('cars.csv')
base = base.drop(['Unnamed: 0'], axis=1)  #axis = 1 -> erase per collumns

x = base.iloc[:, 1].values
x = x.reshape(-1, 1)
y = base.iloc[:, 0].values
correlacao = np.corrcoef(x, y)
model = LinearRegression()
model.fit(x, y)
model.intercept_
model.coef_
plt.scatter(x, y)
plt.plot(x, model.predict(x), color="red")
model.predict(22)
model._residues
visual = ResidualsPlot(model)
visual.fit(x, y)
visual.poof()
Beispiel #19
0
# %%
# Residuals Plot (Trying new things)

# The residuals plot shows how the model is injecting error, the bold \
# horizontal line at residuals = 0 is no error, and any point above or below \
# that line, indicates the magnitude of error.
# (https://www.scikit-yb.org/en/latest/quickstart.html#installation)

# Load a regression dataset
X, y = load_concrete()

# Create training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

visualizer = ResidualsPlot(LinearRegression())
visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
visualizer.score(X_test, y_test)  # Evaluate the model on the test data
visualizer.show()  # Finalize and render the figure

# Xenia: Saving my plots
plt.show()
fig.set_size_inches(7, 5)
plt.savefig("6._Residuals_Plot.png")
fig.savefig("6._Residuals_Plot.png")

# %%
# New Plots with Temperature & Precipitation

# Time series of flow values with the x axis range limited
fig, ax = plt.subplots()
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import train_test_split, StratifiedKFold, KFold
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from yellowbrick.regressor import ResidualsPlot
from sklearn.linear_model import LinearRegression

scaler = StandardScaler()
# neigh = KNeighborsRegressor(n_neighbors=5)
# regression_visualizers = [ResidualsPlot(neigh), PredictionError(neigh)]
features = [
    "longitude", "latitude", "peak_load", "off-grid", "avg_peak_winter",
    "avg_peak_spring", "avg_peak_summer", "avg_peak_autumn", "avg_base_winter",
    "avg_base_spring", "avg_base_summer", "avg_base_autumn"
]

case_name = "mg_sizing_dataset_with_loc"
df = pd.read_csv("results/" + case_name + ".csv",
                 sep=";|,",
                 engine="python",
                 index_col='index')
X = df[features]
scaler.fit(X)
X = scaler.transform(X)
targets = ["PV", "BAT", "RBAT", "INV", "GEN", "NPV"]
y = df[targets[0]]

model = LinearRegression()
visualizer_residuals = ResidualsPlot(model)
visualizer_residuals.fit(X, y)
visualizer_residuals.show()
base = base.drop(['Unnamed: 0'], axis=1)

X = base.iloc[:, 1].values  # aqui ele transforma no estilo numpy array
X = X.reshape(-1, 1)  # transforma as colunas em matriz
y = base.iloc[:, 0].values
correlacao = np.corrcoef(X, y)  # aqui é calculado a correlação

modelo = LinearRegression()
modelo.fit(X, y)

modelo.intercept_  # aqui ele mostra a intersecção
modelo.coef_  # aqui o coeficiente

plt.scatter(X, y)  # plota um grafico de dispersão
plt.plot(X, modelo.predict(X),
         color='red')  # ele desenha a linha da regressão no grafico

# distância 22 pés
modelo.intercept_ + modelo.coef_ * 22  # previsão manual

modelo.predict(22)  # aqui o modelo prevê

modelo._residues  # mostra a distancia dos dados pra linha de regressão

visualizador = ResidualsPlot(
    modelo
)  # plota o grafico de residuos mostrando a dispersão abaixo da intersecção
visualizador.fit(X, y)
visualizador.poof(
)  #aqui ele plota o grafico, e quanto mais perto de 0 estiverem os dados mas o modelo está se adequando
viz = FeatureImportances(rf, ax=ax)
viz.fit(Xtrain, ytrain)
viz.poof(outpath="rf_featureimportances.png")

# Rank2d
fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot()
rank = Rank2D(features=feature_cols, algorithm='pearson', ax=ax)
rank.fit(Xtrain, ytrain)
rank.transform(Xtrain)
rank.poof(outpath="rf_rank2d.png")

# Residuals Plot
fig = plt.figure()
ax = fig.add_subplot()
resplot = ResidualsPlot(rf, ax=ax)
resplot.fit(Xtrain, ytrain)
resplot.score(Xtest, ytest)
resplot.poof(outpath="rf_resplot.png")

# Actual vs Predicted
rf.fit(Xtrain, ytrain)
yhat = rf.predict(Xtest)
error = ytest - yhat
data = pd.DataFrame({
    't': test['date'],
    'ytest': ytest,
    'yhat': yhat,
    'error': error,
    'neg_error': np.negative(error),
    'dless': dless
def show_residusal(model, train_tup, test_tup):
    resPlot = ResidualsPlot(model)
    resPlot.fit(*train_tup)
    resPlot.score(*test_tup)
    resPlot.show()
can use only for max 3 variable so if number of predictors are more than 3 than we should go for 
Residual plots only. So, it’s good to check always Residual plots.
The most useful way to plot the residuals, though, is with your predicted values on the x-axis, and 
your residuals on the y-axis.
'''

'''
To detect nonlinearity one can inspect plots of observed vs. predicted values or 
residuals vs. predicted values. The desired outcome is that points are symmetrically 
distributed around a diagonal line in the former plot or around a horizontal line in the 
latter one. In both cases with a roughly constant variance.
'''
from yellowbrick.regressor import ResidualsPlot

# residuals vs. predicted values
visualizer = ResidualsPlot(regressor)
#visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
visualizer.score(X_test, y_test)  # Evaluate the model on the test data
visualizer.show()      

visualizer = ResidualsPlot(regressor, hist=False)
visualizer.fit(X_train, y_train)
visualizer.score(X_test, y_test)
visualizer.show()

# Test R2 is the r2 on test data of model
'''
A common use of the residuals plot is to analyze the variance of the error of the regressor. If the 
points are randomly dispersed around the horizontal axis, a linear regression model is usually 
appropriate for the data; otherwise, a non-linear model is more appropriate. In the case above, we see 
a fairly random, uniform distribution of the residuals against the target in two dimensions. This seems 
import numpy as np

corr = np.corrcoef(features, target)

features = features.reshape(-1, 1)

from sklearn.linear_model import LinearRegression

regression = LinearRegression()
regression.fit(features, target)

print(regression.intercept_, regression.coef_)

import matplotlib.pyplot as plt

plt.scatter(features, target)
plt.plot(features, regression.predict(features), color='red')
plt.title("Simple Linear Regression")
plt.xlabel("Age")
plt.ylabel("Cost")

prediction_1 = regression.predict([[40]])
previcion_2 = regression.intercept_ + regression.coef_ * 40

score = regression.score(features, target)

from yellowbrick.regressor import ResidualsPlot

visualizer = ResidualsPlot(regression)
visualizer.fit(features, target)
visualizer.poof()
2. Model Fitting

* Fit on training data and predict on test data
  * Check residuals and prediction error graphs (yellowbrick)
* Plot predicted values vs actuals (yhat, ytest)
* Calculate and plot residuals (ytest - yhat)
"""

# How do our models perform on the test data?
score_model(rf)
score_model(rf_random)
score_model(rf_best)

# What do our residuals look like?
from yellowbrick.regressor import ResidualsPlot
resplot = ResidualsPlot(rf_best)
resplot.fit(Xtrain, ytrain)
resplot.score(Xtest, ytest)
g = resplot.poof()

# What does our prediction error look like?
from yellowbrick.regressor import PredictionError
prederr = PredictionError(rf_best)
prederr.fit(Xtrain, ytrain)
prederr.score(Xtest, ytest)
g = prederr.poof()

# Next, we pull out our fitted values (yhat) and actuals (ytest) to see how they compare.
# We also calculate our residuals by subtracting our fitted values from the actuals.
import matplotlib.pyplot as plt
Beispiel #27
0
modelo = LinearRegression()  #instancia um objeto LinearRegression
modelo.fit(
    X, Y)  #encaixa os dados de x e y no modelo; faz o treinamento do modelo

modelo.intercept_  #indica onde a linha de regressao intercepta o eixo Y

modelo.coef_  #indica a inclinação da linha

plt.scatter(X, Y)
plt.plot(X, modelo.predict(X),
         color="red")  #o método predict traz os dados que
# o algoritmo previu a partir dos dados reais de X

# prever  a velocidade para o valor distancia = 22 pés. tem duas formas:
modelo.intercept_ + modelo.coef_ * 22

# ou de modo mais direto:

modelo.predict(22)

# residuos da linha de regressao:

modelo._residues

#outra forma de visualizar pela biblioteca Yellowbrick

visualizador = ResidualsPlot(modelo)  #cria objeto ResidualsPlot
visualizador.fit(X, Y)
visualizador.poof()
params = clf.get_params(deep=True)

#evaluate on test
print("default (R^2) score:" + np.array2string(clf.score(X_test_selected,y_test)))
print("intercept: " + np.array2string(clf.intercept_))
print("coefficients: " + np.array2string(clf.coef_))


# %%
#predict using classifier
y_pred_B = clf.predict(X_all_selected)


# %%
# residual plots
visualizer = ResidualsPlot(clf)
visualizer.fit(X_train_selected, y_train)  # Fit the training data to the visualizer
visualizer.score(X_test_selected, y_test)  # Evaluate the model on the test data
visualizer.poof()                 # Draw/show/poof the data


# %%
#plot all results, including train & test
plt.figure(figsize=(16,6))
#fig3, ax3  = plt.subplots(figsize=(16, 6))

sns.lineplot(X_all['DateFraction'],y_pred_B, color = 'b')
#ax3 = plt.twinx()
sns.lineplot(X_all['DateFraction'],y_all, color = 'g')
sns.lineplot(X_test['DateFraction'],X_all['InflationTrailing5yrFactor']**.2-1, color = 'r')
sns.lineplot(X_test['DateFraction'],X_all['RateGS10']/100, color = 'y')
Beispiel #29
0
    'Total', 'Precipitation', 'Date', 'Day', 'Brooklyn Bridge',
    'Manhattan Bridge', 'Queensboro Bridge', 'Williamsburg Bridge'
])
y_train = df_new['Total']

#%%
from sklearn import preprocessing
from sklearn.linear_model import Ridge
reg = Ridge(alpha=100)
reg.fit(x_train, y_train)

#%%
reg.coef_

#%%
from sklearn.metrics import r2_score, mean_squared_error
y_pred = reg.predict(x_train)

print(r2_score(y_train, y_pred))
print(mean_squared_error(y_train, y_pred))

#%%
import yellowbrick
res = y_train - y_pred

#%%
from yellowbrick.regressor import ResidualsPlot
visualizer = ResidualsPlot(reg)
visualizer.score(x_train, y_train)  # Evaluate the model on the test data
visualizer.poof()  # Draw/show/poof the data
#Calculo manual com o modelo treinado para qualquer achar o y (x[beer_servings] = 400)
modelo1.intercept_ + modelo1.coef_ * 400

#Cálculo automático da máquina
modelo1.predict([[400]])
''' Como no estudo não informou uma porção em litros como referência, podemos fazer suposições a partir desse modelo
por exemplo, se uma pessoa bebe 400 copos de cerveja por ano, e adotando um copo de cerveja com 300 ml (0,3L), uma
pessoa que bebe 400 copos por ano (dependendo de cada país, obviamente), bebe 120 litros de cerveja, e só de alcool puro,
uma pessoa bebe, aproximadamente, 13.65 litros de álcool (cerca de 11.37% aproximadamente) '''

#Visualização dos resíduos(resultado entre a distância dos pontos com a linha de referência)
modelo1._residues

#Visualização dos resíduos no gráfico
visualizador1 = ResidualsPlot(modelo1)
visualizador1.fit(X, y)
visualizador1.poof()

#Os resíduos quando mais próximo de zero, melhor o modelo
'''2) Relação linear entre total de álcool ingerido (em Litros) com o total de destilados ingerido (em porções)
   OBS: Bebidas destiladas são todas que tiveram seu processo de destilação (vodca, uísque, tequila, rum, dentre outros)  '''

A = bebida_mundo.iloc[:, 2].values  #spirit_servings
b = bebida_mundo.iloc[:, 4].values  #total_litres_of_alcohol
correlacao2 = np.corrcoef(A, b)

A = A.reshape(-1, 1)
modelo2 = LinearRegression()
modelo2.fit(A, b)
Beispiel #31
0
rmse = sqrt(mse)
print('Accuracy:', accuracy, '%.')
print('Root Mean square Error:', rmse)
print('Mean absolute Error:', mae)
print('R2:', r2)

#maekfold = results_rf.mean()

#print ('Mean absolute Error kfold:', maekfold)

#%%

#plotting results

model = rf
visualizer = ResidualsPlot(model)

visualizer.fit(x_train, y_train)  # Fit the training data to the visualizer
visualizer.score(x_test, y_test)  # Evaluate the model on the test data
visualizer.poof()  # Draw/show/poof the data

visualizer = PredictionError(model)

visualizer.fit(x_train, y_train)  # Fit the training data to the visualizer
visualizer.score(x_test, y_test)  # Evaluate the model on the test data
visualizer.poof()  # Draw/show/poof the data
#%%
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import norm