def residual_plot(model_properties=None, output_path=None): ''' Method that shows the residual plot of the trained model ''' if model_properties is None or output_path is None: raise ValueError('Need Model properties and Output path as arguments !') estimator = model_properties['estimator'] X_train = model_properties['X_train'] y_train = model_properties['y_train'] X_validation = model_properties['X_validation'] y_validation = model_properties['y_validation'] config_map = model_properties['config_map'] X_scaler = model_properties['X_scaler'] y_scaler = model_properties['y_scaler'] X_train[config_map['scale_columns']] = X_scaler.transform( X_train[config_map['scale_columns']]) y_train[config_map['label']] = y_scaler.transform( y_train[config_map['label']]) X_validation[config_map['scale_columns']] = X_scaler.transform( X_validation[config_map['scale_columns']]) y_validation[config_map['label']] = y_scaler.transform( y_validation[config_map['label']]) visualizer = ResidualsPlot(estimator) visualizer.fit(X_train.values, y_train.values) visualizer.score(X_validation.values, y_validation.values) visualizer.poof(outpath=os.path.join(output_path, 'residual_plot.png')) return None
def plot_residuals(X, y, model, outpath="images/residuals.png", **kwargs): X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) _, ax = plt.subplots() visualizer = ResidualsPlot(model, ax=ax, **kwargs) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.poof(outpath=outpath)
def residuals_plot(model, X_test, y_test, road): """ param model : 已训练好的模型 X_test : 测试集数据 y_test : 测试集标签 """ visualizer = ResidualsPlot(model) visualizer.score(X_test, y_test) visualizer.poof(road)
def visualize_residuals_plot(self, model_info): model = model_info['model'] X_train = model_info['X_train'] X_test = model_info['X_test'] Y_train = model_info['Y_train'] Y_test = model_info['Y_test'] visualizer = ResidualsPlot(model) visualizer.fit(X_train, Y_train) # Fit the training data to the model visualizer.score(X_test, Y_test) # Evaluate the model on the test data visualizer.poof() # Draw/show/poof the data
def testFunc7(savepath='Results/bikeshare_LinearRegression_ResidualsPlot.png'): ''' 基于共享单车数据使用线性回归模型预测 ''' data = pd.read_csv('fixtures/bikeshare/bikeshare.csv') X = data[[ "season", "month", "hour", "holiday", "weekday", "workingday", "weather", "temp", "feelslike", "humidity", "windspeed" ]] Y = data["riders"] X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3) visualizer = ResidualsPlot(LinearRegression()) visualizer.fit(X_test, y_test) visualizer.poof(outpath=savepath)
def test_for_homoscedasticity(X_train, y_train, X_test, y_test): """ Plot the data and check for homoscedasticity. Arguments: X_train (dataframe): examples in the training set X_test (dataframe): examples in the test set y_train (dataframe): target in the training set y_train (dataframe): target in the test set """ lr = LinearRegression() visualizer = ResidualsPlot(lr) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) #there should be no clear pattern visualizer.poof()
def visualize_pred_residuals(X_train, X_test, y_train, y_test): model = linear_model.Ridge(alpha=0.05) fitted = model.fit(X_train, y_train) visualizer = ResidualsPlot(fitted, size=(1080, 720)) pred = fitted.predict(X_test) r = stats.linregress(pred, y_test) print(r[2]) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.poof() cvr = model_selection.cross_validate(model, X_test, y_test, cv=10, return_train_score=True) print('Training scores:', cvr['train_score'], '\n') print('Testing scores:', cvr['test_score'])
def vis_residuals(model, features, target): ''' ''' vis_residuals = ResidualsPlot(model, size=(1080, 720)) vis_residuals.fit(features, target) vis = vis_residuals.poof() vis return vis
def slr(self, iv, dv, plot_relationship=False, plot_residuals=True): # Create simple linear regression model self.slr_model = LinearRegression(fit_intercept=True) y = self.data[dv] x = self.data[iv] self.slr_model.fit(x[:, np.newaxis], y) xfit = np.linspace(-4, 4, 1000) yfit = self.slr_model.predict(xfit[:, np.newaxis]) if plot_relationship: sns.lmplot(x=iv, y=dv, data=self.data, height=7, aspect=1.25) plt.plot(xfit, yfit) plt.ylabel(dv) plt.xlabel(iv) plt.title("{} = {} • {} + {}".format(dv, round(self.slr_model.coef_[0], 5), iv, round(self.slr_model.intercept_, 5))) plt.subplots_adjust(left=.095, right=.95, top=.9, bottom=.15) plt.xlim(-100, max(self.data["Counts"])*1.1) if plot_residuals: from yellowbrick.regressor import ResidualsPlot # Instantiate the linear model and visualizer visualizer = ResidualsPlot(model=self.slr_model) visualizer.fit(x[:, np.newaxis], y) # Fit the training data to the model visualizer.poof() print("Simple Linear Regression\n{} = {} • {} + {}".format(dv, round(self.slr_model.coef_[0], 5), iv, round(self.slr_model.intercept_, 5))) # Predicts RMSE y_predict = self.slr_model.predict(x.values.reshape(-1, 1)) rmse = sqrt(((y - y_predict) ** 2).values.mean()) self.df_rmse.loc["Linear"] = round(rmse, 5) print("\n", self.df_rmse)
class PrincipalComponentRegressor(Regressor): def __init__(self, n_components): super().__init__() self.n_components = n_components self.regressor = LinearRegression() self.pca = None def fit(self, x_train, y_train, standardize=False): self.pca = PCA(self.n_components) self.x_train = self.pca.fit_transform(x_train) self.y_train = y_train self.regressor.fit(self.x_train, self.y_train) self._inference() return self.regressor.intercept_, self.regressor.coef_, self.p, self.regressor.score(self.x_train, y_train) def predict(self, x_test): try: x_test_transform = self.pca.transform(x_test) except ValueError: x_test_transform = x_test prediction = self.regressor.predict(x_test_transform) return prediction def residual_plot(self, x_test=None, y_test=None): if self.standardize: x_test = self.standardizescaler.transform(x_test) try: self.residual_visualizer = ResidualsPlot(self.regressor) except yellowbrick.exceptions.YellowbrickTypeError: self.residual_visualizer = ResidualsPlot(self.regressor.regressor) self.residual_visualizer.fit(self.x_train, self.y_train) if x_test is not None and y_test is not None: try: self.residual_visualizer.score(x_test, y_test) except ValueError: x_test = self.pca.transform(x_test) self.residual_visualizer.score(x_test, y_test) self.residual_visualizer.poof()
def main(): data = pd.read_csv('plano-saude.csv') # .values transform to a numpy array x = data.iloc[:, 0].values y = data.iloc[:, 1].values corr_coef = np.corrcoef(x, y) # algoritmos no scikit learn necessitam estar no formato de matriz x = x.reshape(-1, 1) regression = LinearRegression() # realizando o treinamento regression.fit(x, y) # b0 regression.intercept_ # b1 regression.coef_ plt.scatter(x, y) plt.plot(x, regression.predict(x), color='red') plt.title('Regressão linear simples') plt.xlabel('Idade') plt.ylabel('Custo') value = [40] value = np.asarray(value) value = value.reshape(-1, 1) prevision1 = regression.predict(value) # y = b0 + b1 * x1 prevision2 = regression.intercept_ + regression.coef_ * value # verificando a pontuacao do algoritmo de regressão score = regression.score(x, y) # plotando um grafico para melhor visualizacao dos dados. visualizer = ResidualsPlot(regression) visualizer.fit(x, y) # Train R² é a mesma coisa que regression.score visualizer.poof()
class RandForestRegressor(Regressor): def __init__(self): super().__init__() self.regressor = RandomForestRegressor() def fit(self, x_train, y_train, standardize=False): self.standardize = standardize if self.standardize: self.standardizescaler.fit(x_train) x_train = self.standardizescaler.transform(x_train) self.x_train = x_train self.y_train = y_train self.regressor.fit(self.x_train, self.y_train.ravel()) self._inference() return self.rsquared def residual_plot(self, x_test=None, y_test=None): if self.standardize: x_test = self.standardizescaler.transform(x_test) try: self.residual_visualizer = ResidualsPlot(self.regressor) except yellowbrick.exceptions.YellowbrickTypeError: self.residual_visualizer = ResidualsPlot(self.regressor.regressor) y_train = self.y_train.ravel() self.residual_visualizer.fit(self.x_train, y_train) if x_test is not None and y_test is not None: y_test = y_test.ravel() self.residual_visualizer.score(x_test, y_test) self.residual_visualizer.poof() def predict(self, x_test): if self.standardize: x_test = self.standardizescaler.transform(x_test) return self.regressor.predict(x_test).reshape(-1, 1)
def generate_ordinal_diagnostics(x, y, current_best_model, label_type, diagnostic_image_path): x = np.array(x) y = np.array(y) kf = KFold(n_splits=10, shuffle=True) guesses = [] for train_index, test_index in kf.split(x): X_train, X_test = x[train_index], x[test_index] y_train, y_test = np.array(y)[train_index], np.array(y)[test_index] model = current_best_model[0].fit(X_train, y_train) for guess in zip(y_test.tolist(), model.predict(X_test).tolist()): guesses.append(guess) X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2) if "VotingClassifier" not in str(current_best_model[0].__class__): visualizer = ResidualsPlot(current_best_model[0]) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.poof(outpath=diagnostic_image_path + "/residuals_plot.png") plt.clf() visualizer = PredictionError(current_best_model[0]) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.poof(outpath=diagnostic_image_path + "/prediction_error.png") plt.clf() visualizer = PCADecomposition(scale=True, center=False, col=y, proj_dim=2) visualizer.fit_transform(x, y) print(diagnostic_image_path + "/pca_2.png") visualizer.poof(outpath=diagnostic_image_path + "/pca_2.png") plt.clf() visualizer = PCADecomposition(scale=True, center=False, col=y, proj_dim=3) visualizer.fit_transform(x, y) visualizer.poof(outpath=diagnostic_image_path + "/pca_3.png") plt.clf() return { "mse": mean_squared_error(*np.array(guesses).transpose()), "r2": r2_score(*np.array(guesses).transpose()), "mae": median_absolute_error(*np.array(guesses).transpose()), "evs": explained_variance_score(*np.array(guesses).transpose()), "rmse": np.sqrt(mean_squared_error(*np.array(guesses).transpose())) }
def showResiduals(): # Load the data df = load_data('concrete') feature_names = [ 'cement', 'slag', 'ash', 'water', 'splast', 'coarse', 'fine', 'age' ] target_name = 'strength' # Get the X and y data from the DataFrame X = df[feature_names].as_matrix() y = df[target_name].as_matrix() # Create the train and test data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) # Instantiate the linear model and visualizer ridge = Ridge() visualizer = ResidualsPlot(ridge) visualizer.fit(X_train, y_train) # Fit the training data to the visualizer visualizer.score(X_test, y_test) # Evaluate the model on the test data g = visualizer.poof() # Draw/show/poof the data
import pandas as pd from sklearn.linear_model import Ridge from sklearn.model_selection import train_test_split from yellowbrick.regressor import ResidualsPlot if __name__ == '__main__': # Load the regression data set df = pd.read_csv("../../../examples/data/concrete/concrete.csv") feature_names = ['cement', 'slag', 'ash', 'water', 'splast', 'coarse', 'fine', 'age'] target_name = 'strength' # Get the X and y data from the DataFrame X = df[feature_names].as_matrix() y = df[target_name].as_matrix() # Create the train and test data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) # Instantiate the linear model and visualizer ridge = Ridge() visualizer = ResidualsPlot(ridge) visualizer.fit(X_train, y_train) # Fit the training data to the visualizer visualizer.score(X_test, y_test) # Evaluate the model on the test data g = visualizer.poof(outpath="images/residuals.png") # Draw/show/poof the data
print(f"Test R2 is {lr_log.score(X=X_test_log, y=y_test_log)}") # There is a slight improvement (~2%) in the train R2 and test R2 utilizing log transform # + [markdown] pycharm={"name": "#%% md\n"} # ## Model Evaluation - Linear Regression # ### The following section evaluates the random error, constant variance and normal distribution with mean 0 assumption of linear model in the context of the four initial models utilizing a residual plot from Yellowbrick. # # - # Residual Plot for Huber LR with no log-transform from yellowbrick.regressor import ResidualsPlot rpv_hr = ResidualsPlot(hr) rpv_hr.fit(X=X_train, y=y_train) rpv_hr.score(X=X_test, y=y_test) rpv_hr.poof() rpv_lr = ResidualsPlot(lr) rpv_lr.fit(X=X_train, y=y_train) rpv_lr.score(X=X_test, y=y_test) rpv_lr.poof() # Residual Plot for LR with log transform rpv_lr_log = ResidualsPlot(lr_log) rpv_lr_log.fit(X=X_train_log, y=y_train_log) rpv_lr_log.score(X=X_test_log, y=y_test_log) rpv_lr_log.poof() # + [markdown] pycharm={"name": "#%% md\n"} # ## Model Evaluation of Ordinary Least Squares -Log Transform # - Evaluation of log-transformed OLS model as the residuals plot appeared to satisfy most of the principal assumptions of linear regression.
class Regressor: def __init__(self): self.parameters = dict() self.regressor = None self.sse = None self.sst = None self.adjrsquared = None self.rsquared = None self._x_train = None self._y_train = None self.n = None # x_k refers to the number of the predictors of x_train self.x_k = None # y_k refers to the number of the responses of y_train self.y_k = None self.p = None self.standardize = None self.standardizescaler = StandardScaler() self.residual_visualizer = None @property def x_train(self): return self._x_train @x_train.setter def x_train(self, x_train): self._x_train = x_train try: self.x_k = x_train.shape[1] self.n = x_train.shape[0] except IndexError: self.x_k = 1 self.n = x_train.shape[0] self._x_train = self._x_train.reshape(-1, 1) @property def y_train(self): return self._y_train @y_train.setter def y_train(self, y_train): self._y_train = y_train try: self.y_k = y_train.shape[1] except IndexError: self.y_k = y_train.shape[0] self._y_train = self._y_train.reshape(-1, 1) def _inference(self): try: self.rsquared = self.regressor.score(self.x_train, self.y_train) except AttributeError: self.rsquared = self.regressor.regressor.score( self.x_train, self.y_train) self.adjrsquared = ME.ModelEvaluation.AdjRsquared(self) # Store some info of the model. self.sst = np.sum((self.y_train - np.mean(self.y_train, axis=0))**2, axis=0) self.sse = np.sum((self.predict(self.x_train) - self.y_train)**2, axis=0) self.sse_scaled = self.sse / float(self.x_train.shape[0] - self.x_train.shape[1]) if type(self.sse_scaled) == np.float64: self.sse_scaled = [self.sse_scaled] try: if not self.standardize: x_train = self.x_train - np.mean(self.x_train, axis=0) else: x_train = self.x_train var_beta = self.sse_scaled * (np.linalg.inv( np.dot(x_train.T, x_train)).diagonal()) self.se = np.sqrt(var_beta) except np.linalg.linalg.LinAlgError: return except TypeError: return try: self.t = self.regressor.coef_ / np.array(self.se) except AttributeError: try: self.t = self.parameters['beta'] / self.se except KeyError: return self.p = [ 2 * (1 - stats.t.cdf(np.abs(i), (len(x_train) - 1))) for i in self.t ] def fit(self, x_train, y_train, standardize=False): x_train = x_train y_train = y_train self.standardize = standardize if self.standardize: self.standardizescaler.fit(x_train) x_train = self.standardizescaler.transform(x_train) self.x_train = x_train self.y_train = y_train self.regressor.fit(self.x_train, self.y_train) self._inference() return self.regressor.intercept_, self.regressor.coef_, self.p, self.regressor.score( x_train, y_train) def predict(self, x_test): if self.standardize: x_test = self.standardizescaler.transform(x_test) try: return self.regressor.predict(x_test) except AttributeError: return self.regressor.predict(x_test=x_test) def regression_plot(self, x_test, y_test): scatter = plt.scatter(x_test, y_test, color='b') try: line = plt.plot(x_test, self.regressor.predict(x_test), color='r') except AttributeError: line = plt.plot(x_test, self.regressor.predict(x_test), color='r') plt.ylabel('response') plt.xlabel('explanatory') plt.legend(handles=[ scatter, line[0], ], labels=[ 'Scatter Plot', 'Intercept:{}, Slope:{},\n R-square:{}'.format( self.regressor.intercept_, self.regressor.coef_, self.regressor.score(x_test, y_test)) ], loc='best') plt.title('Scatter Plot and Regression') def residual_plot(self, x_test=None, y_test=None): if self.standardize: x_test = self.standardizescaler.transform(x_test) try: self.residual_visualizer = ResidualsPlot(self.regressor) except yellowbrick.exceptions.YellowbrickTypeError: self.residual_visualizer = ResidualsPlot(self.regressor.regressor) self.residual_visualizer.fit(self.x_train, self.y_train) if x_test is not None and y_test is not None: self.residual_visualizer.score(x_test, y_test) self.residual_visualizer.poof() def get_score(self, x_test, y_test): if self.standardize: x_test = self.standardizescaler.transform(x_test) try: return self.regressor.score(x_test, y_test) except AttributeError: return self.regressor.Get_Score(x_test, y_test)
lr.score(X_test, y_test) ### Yellowbrick from yellowbrick.regressor import PredictionError, ResidualsPlot ## RVF plot # Run the following together lr_yb = ResidualsPlot(lr, hist=True) lr_yb.fit(X_train, y_train) lr_yb.score(X_test, y_test) lr_yb.poof() ## Prediction Error plot lr_yb = PredictionError(lr, hist=True) lr_yb.fit(X_train, y_train) lr_yb.score(X_test, y_test) lr_yb.poof() ################ Polynomial/Interactions ################ from sklearn.pipeline import make_pipeline from sklearn.preprocessing import PolynomialFeatures # adds polynomials and interactions
# Separando dados X = df.values[:,0] y = df.values[:,1] X = X.reshape(-1,1) # Criando Modelo model = LinearRegression() model.fit(X,y) y_pred = model.predict(X) # Visualização plt.plot(X,y_pred,color='red') # plot da regressão plt.scatter(x=X,y=y) # plot dos pontos plt.title("Regressão Linear Simples") # titulo plt.xlabel("Idade") # eixo X plt.ylabel("Custo"); # eixo Y visual = ResidualsPlot(model) visual.fit(X,y) visual.poof() # Valor de corelação ou score model.score(X,y)
reg = LinearRegression() reg.fit(xrm, y) print(reg.score(xrm, y)) xx = np.linspace(min(xrm), max(xrm)).reshape(-1, 1) plt.scatter(xrm, y, color="blue") plt.plot(xx, reg.predict(xx), color="red", linewidth=3) plt.ylabel("y: Value of house / 1000 USD") plt.xlabel("x: Number of rooms") plt.show() from yellowbrick.regressor import ResidualsPlot visualizer = ResidualsPlot(reg, hist=False) visualizer.fit(xrm, y) visualizer.score(xrm, y) visualizer.poof() # use data multi var # split data: 70%-training 30%-testing from sklearn.model_selection import train_test_split x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42) reg = LinearRegression() reg.fit(x_train, y_train) y_pred = reg.predict(x_test) print("R^2 = ", reg.score(x_train, y_train)) from yellowbrick.regressor import ResidualsPlot viz = ResidualsPlot(reg, hist=False)
from yellowbrick.regressor import ResidualsPlot dataset_cars = pd.read_csv('cars.csv') dataset_cars = dataset_cars.drop(['Unnamed: 0'], axis=1) X = dataset_cars.iloc[:, 1].values y = dataset_cars.iloc[:, 0].values correlation = np.corrcoef(X, y) X = X.reshape(-1, 1) model_linear_regression = LinearRegression() model_linear_regression.fit(X, y) print("interception of the trained model: ", model_linear_regression.intercept_) print("Inclination of trained model: ", model_linear_regression.coef_) plt.scatter(X, y) plt.plot(X, model_linear_regression.predict(X), color='red') distance_stop = np.array([[22]]) model_linear_regression.predict(distance_stop) print("Residues of the trained model: ", model_linear_regression._residues) visualization = ResidualsPlot(model_linear_regression) visualization.fit(X, y) visualization.poof()
rank.poof(outpath="lasso_rank2d.png") # Feature Importances (naive, 18 variable case) fig = plt.figure() ax = fig.add_subplot() featimp = FeatureImportances(lasso, ax=ax) featimp.fit(Xt, ytrain) featimp.poof(outpath="lasso_featureimportances18.png") # Residuals Plot fig = plt.figure() ax = fig.add_subplot() resplot = ResidualsPlot(lasso, ax=ax) resplot.fit(Xtrain, ytrain) resplot.score(Xtest, ytest) resplot.poof(outpath="lasso_resplot.png") # Actual vs Predicted lasso.fit(Xtrain, ytrain) yhat = lasso.predict(Xtest) error = ytest - yhat data = pd.DataFrame({ 't': test['date'], 'ytest': ytest, 'yhat': yhat, 'error': error, 'neg_error': np.negative(error), 'dless': dless }) fig, ax = plt.subplots() plt.plot('t', 'ytest', data=data, color='blue', linewidth=1, label='actual')
# y = B0 + B1*X # Coeficientes # B0 regressor.intercept_ # B1 regressor.coef_ # Plotar em um gráfico plt.scatter(X, y) plt.plot(X, regressor.predict(X), color="green") plt.title("Regressão Linear Simples") plt.xlabel("idade") plt.ylabel("Custo") plt.show() # Utilizando o modelo predict1 = regressor.intercept_ + regressor.coef_*40 # 40 = variavel de escolha[idade] print(predict1) # Mostra o R² # Coeficiente R²: diz o quanto o meu modelo explica seus resultados. É um valor entre 0 e 1. Quanto mais próximo de 1, melhor. (Nem sempre este racíocinio e válido, necessita de análise) score = regressor.score(X, y) print(score) # Distância dos dados reais até a reta preditiva, com este método é possível ver o R-Square(R²) vizualizador = ResidualsPlot(regressor) vizualizador.fit(X, y) vizualizador.poof()
x_train = df_new.drop(columns=[ 'Total', 'Precipitation', 'High Temp (°F)', 'Low Temp (°F)', 'Date', 'Day' ]) y_train = df_new['Total'] #%% from sklearn import preprocessing from sklearn.linear_model import Ridge reg = Ridge(alpha=100) reg.fit(x_train, y_train) #%% reg.coef_ #%% from sklearn.metrics import r2_score, mean_squared_error y_pred = reg.predict(x_train) print(r2_score(y_train, y_pred)) print(mean_squared_error(y_train, y_pred)) #%% import yellowbrick res = y_train - y_pred #%% from yellowbrick.regressor import ResidualsPlot visualizer = ResidualsPlot(reg) visualizer.score(x_train, y_train) # Evaluate the model on the test data visualizer.poof() # Draw/show/poof the data
plt.plot(X, modelo_cerveja.predict(X), color='red') #Calculo manual e utilizando o modelo para prever o valor de y, respectivamente modelo_cerveja.intercept_ + modelo_cerveja.coef_ * 400 modelo_cerveja.predict([[400]]) '''OBS: Como no estudo não informou uma porção em litros como referência, podemos fazer suposições a partir desse modelo por exemplo, se uma pessoa bebe 400 copos de cerveja por ano, e adotando um copo de cerveja com 300 ml (0,3L), uma pessoa que bebe 400 copos por ano (dependendo de cada país, obviamente), bebe 120 litros de cerveja, e só de alcool puro, uma pessoa bebe, aproximadamente, 13.88 litros de álcool (cerca de 11.56% aproximadamente) ''' #Visualização dos resíduos e o seu gráfico(resultado entre a distância dos pontos com a linha de regressão) modelo_cerveja._residues visualizador_cerveja = ResidualsPlot(modelo_cerveja) visualizador_cerveja.fit(X, y) visualizador_cerveja.poof() '''2)Regressão linear de destilados VS total álcool ingerido''' A = bebida_mundo.iloc[:, 2].values b = bebida_mundo.iloc[:, 4].values correlacao_destilados = np.corrcoef(A, b) A = A.reshape(-1, 1) modelo_destilados = LinearRegression() modelo_destilados.fit(A, b) score_destilados = modelo_destilados.score(A, b) modelo_destilados.intercept_ modelo_destilados.coef_
#Cálculo automático da máquina modelo1.predict([[400]]) ''' Como no estudo não informou uma porção em litros como referência, podemos fazer suposições a partir desse modelo por exemplo, se uma pessoa bebe 400 copos de cerveja por ano, e adotando um copo de cerveja com 300 ml (0,3L), uma pessoa que bebe 400 copos por ano (dependendo de cada país, obviamente), bebe 120 litros de cerveja, e só de alcool puro, uma pessoa bebe, aproximadamente, 13.88 litros de álcool (cerca de 11.56% aproximadamente) ''' #Visualização dos resíduos(resultado entre a distância dos pontos com a linha de referência) modelo1._residues #Visualização dos resíduos no gráfico visualizador1 = ResidualsPlot(modelo1) visualizador1.fit(X, y) visualizador1.poof() #Os resíduos quando mais próximo de zero, melhor o modelo '''2) Relação linear entre total de álcool ingerido (em Litros) com o total de destilados ingerido (em porções) OBS: Bebidas destiladas são todas que tiveram seu processo de destilação (vodca, uísque, tequila, rum, dentre outros) ''' A = bebida_mundo.iloc[:, 2].values #spirit_servings b = bebida_mundo.iloc[:, 4].values #total_litres_of_alcohol correlacao2 = np.corrcoef(A, b) A = A.reshape(-1, 1) modelo2 = LinearRegression() modelo2.fit(A, b) modelo2.intercept_
# Intercecção entre x e y (inicio da linha de regressão) print(modelo.intercept_) # Coeficiente print(modelo.coef_) #%% # Gera o grafico # scatter - gera o grafico com os pontos plt.scatter(X, Y) # plot - com base nos pontos, gera a linha de melhor ajuste plt.plot(X, modelo.predict(X), color='red') # Obs - Rode os dois comandos acima simuntaneamente para montar o grafico # de disperção com a linha de melhor ajuste # Distancia de parada 22 pés(previsão de qual velocidade estava) distancia = 22 modelo.intercept_ + modelo.coef_ * distancia # ou modelo.predict(np.array(distancia).reshape(-1, 1)) # Residuais - Distancia entre os pontos com base na linha de regressão print(modelo._residues) #%% # Gera um novo grafico com base no modelo para melhor visualização dos residuais visualizador = ResidualsPlot(modelo) visualizador.fit(X, Y) visualizador.poof()
y, test_size=0.25, random_state=32) m1 = LinearRegression().fit(X_train, y_train) print('M1 (price): ', m1.score(X_test, y_test)) m1_y = m1.predict(X_test) plt.scatter(X_test, y_test, edgecolors='blue') plt.plot(X_test, m1_y, linewidth=3) plt.title('M1') plt.xlabel('Price') plt.ylabel('Sales') plt.show() visualiser = ResidualsPlot(m1) visualiser.score(X_test, y_test) visualiser.poof() #second model (M2) using price, store X = finalDF.drop(['billboard', 'printout', 'sat', 'comp', 'sales'], axis=1) y = finalDF['sales'] #splitting data into train, test X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=32) m2 = LinearRegression().fit(X_train, y_train) print('M2 (price, store): ', m2.score(X_test, y_test)) m2_y = m2.predict(X_test) visualiser = ResidualsPlot(m2)
mse = np.mean((pred - y_test)**2) mse ## calculating score ridgeReg.score(X_test,y_test) from yellowbrick.regressor import ResidualsPlot # Instantiate the linear model and visualizer ridge = Ridge() visualizer = ResidualsPlot(ridge) visualizer.fit(X_train, y_train) # Fit the training data to the model visualizer.score(X_test, y_test) # Evaluate the model on the test data visualizer.poof() ##Apply different algos as on X_train,X_test,y_train,y_test # Fitting K-NN to the Training set from sklearn.neighbors import KNeighborsClassifier classifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2) classifier.fit(X_train, y_train) # Predicting the Test set results pred_y = classifier.predict(X_test) # Making the Confusion Matrix from sklearn.metrics import confusion_matrix from sklearn.metrics import classification_report
# Sort by magnitude results['sorted'] = results[0].abs() results.sort_values(by='sorted', inplace=True, ascending=False) print("Lasso chooses {} variables".format(len(results))) print(results) # How does our model perform on the test data? score_model(lasso) # What do our residuals look like? from yellowbrick.regressor import ResidualsPlot resplot = ResidualsPlot(lasso) resplot.fit(Xtrain, ytrain) resplot.score(Xtest, ytest) g = resplot.poof() # What does our prediction error look like? from yellowbrick.regressor import PredictionError prederr = PredictionError(lasso) prederr.fit(Xtrain, ytrain) prederr.score(Xtrain, ytrain) g = prederr.poof() # Next, we pull out our fitted values (yhat) and actuals (ytest) to see how they compare. # We also calculate our residuals by subtracting our fitted values from the actuals. import matplotlib.pyplot as plt lasso.fit(Xtrain, ytrain) yhat = lasso.predict(Xtest)