def test_residuals(self): # Input parameters tests args = getargspec(residuals) self.assertEqual( len(args[0]), 2, "Expected argument(s) %d, Given %d" % (2, len(args[0]))) self.assertEqual( args[3], None, "Expected default values do not match given default values") # Return type tests dataframe = load_data('data/house_prices_multivariate.csv') X_house_prices, y_house_prices = data_splitter(dataframe) lm = linear_regression(X_house_prices, y_house_prices) y_pred_house_prices, _, ___, __ = linear_predictor( lm, X_house_prices, y_house_prices) residuals_house_prices = residuals(y_house_prices, y_pred_house_prices) self.assertIsInstance( residuals_house_prices, pandas.Series, "Expected data type for return value is `numpy.ndarray`, you are returning %s" % (type(residuals_house_prices))) # Return value tests self.assertEqual( residuals_house_prices.shape, (1379, ), "Return `residuals` shape does not match expected value") self.assertAlmostEqual(residuals_house_prices.iloc[0], -14665.2446233, 2, "Return value does not match expected value") self.assertAlmostEqual(residuals_house_prices.iloc[20], 28662.5413679, 2, "Return value does not match expected value") self.assertAlmostEqual(residuals_house_prices.iloc[30], -28207.3493296, 2, "Return value does not match expected value") self.assertAlmostEqual(residuals_house_prices.iloc[40], 26990.3010116, 2, "Return value does not match expected value")
# %load q05_residuals/build.py # Default Imports from greyatomlib.linear_regression.q01_load_data.build import load_data from greyatomlib.linear_regression.q02_data_splitter.build import data_splitter from greyatomlib.linear_regression.q03_linear_regression.build import linear_regression from greyatomlib.linear_regression.q04_linear_predictor.build import linear_predictor from sklearn.linear_model import LinearRegression dataframe = load_data('data/house_prices_multivariate.csv') X, y = data_splitter(dataframe) linear_model = linear_regression(X, y) y_pred, _, __, ___ = linear_predictor(linear_model, X, y) def residuals(y, y_pred): error_residuals = (y - y_pred) return error_residuals
# %load q05_residuals/build.py # Default Imports from greyatomlib.linear_regression.q01_load_data.build import load_data from greyatomlib.linear_regression.q02_data_splitter.build import data_splitter from greyatomlib.linear_regression.q03_linear_regression.build import linear_regression from greyatomlib.linear_regression.q04_linear_predictor.build import linear_predictor from sklearn.linear_model import LinearRegression dataframe = load_data('data/house_prices_multivariate.csv') X, y = data_splitter(dataframe) linear_model = linear_regression(X, y) y_pred, mse, mae, r2 = linear_predictor(linear_model, X, y) def residuals(y, y_pred): residuals_house_prices = y - y_pred return residuals_house_prices c = residuals(y, y_pred) c
from unittest import TestCase from inspect import getargspec import pandas from greyatomlib.linear_regression.q01_load_data.build import load_data from greyatomlib.linear_regression.q02_data_splitter.build import data_splitter from greyatomlib.linear_regression.q04_linear_predictor.build import linear_predictor from greyatomlib.linear_regression.q03_linear_regression.build import linear_regression from ..build import residuals dataframe = load_data('data/house_prices_multivariate.csv') X_house_prices, y_house_prices = data_splitter(dataframe) lm = linear_regression(X_house_prices, y_house_prices) y_pred_house_prices, _, ___, __ = linear_predictor(lm, X_house_prices, y_house_prices) residuals_house_prices = residuals(y_house_prices, y_pred_house_prices) class TestResiduals(TestCase): def test_residuals_arguments(self): # Input parameters tests args = getargspec(residuals) self.assertEqual( len(args[0]), 2, "Expected argument(s) %d, Given %d" % (2, len(args[0]))) def test_residuals_defaults(self): args = getargspec(residuals) self.assertEqual( args[3], None, "Expected default values do not match given default values")
# Default Imports from greyatomlib.linear_regression.q01_load_data.build import load_data from greyatomlib.linear_regression.q02_data_splitter.build import data_splitter from greyatomlib.linear_regression.q03_linear_regression.build import linear_regression from greyatomlib.linear_regression.q04_linear_predictor.build import linear_predictor from greyatomlib.linear_regression.q05_residuals.build import residuals from sklearn.linear_model import LinearRegression import matplotlib.pyplot as plt import pylab import scipy.stats as stats dataframe = load_data('data/house_prices_multivariate.csv') X, y = data_splitter(dataframe) linear_model = linear_regression(X, y) y_pred,MSE_sklearn,MAE_sklearn, r2_sklearn = linear_predictor(linear_model, X, y) error_residuals = residuals(y, y_pred) def qq_residuals(error_residuals): stats.probplot(error_residuals, dist="norm", plot=pylab) pylab.show()
# %load q06_plot_residuals/build.py # Default Imports from greyatomlib.linear_regression.q01_load_data.build import load_data from greyatomlib.linear_regression.q02_data_splitter.build import data_splitter from greyatomlib.linear_regression.q03_linear_regression.build import linear_regression from greyatomlib.linear_regression.q04_linear_predictor.build import linear_predictor from greyatomlib.linear_regression.q05_residuals.build import residuals from sklearn.linear_model import LinearRegression import matplotlib.pyplot as plt plt.switch_backend('agg') dataframe = load_data('data/house_prices_multivariate.csv') X, y = data_splitter(dataframe) linear_model = linear_regression(X, y) y_pred, rms, rma, r2 = linear_predictor(linear_model, X, y) error_residuals = residuals(y, y_pred) # Your code here def plot_residuals(y, error_residuals): plt.figure(figsize=(10, 4)) plt.scatter(y, error_residuals) plt.xlabel('Sale price') plt.ylabel('Errors') plt.title('Residual plot') plt.show() plot_residuals(y, error_residuals)
# %load q08_qq_residuals/build.py # Default Imports from greyatomlib.linear_regression.q01_load_data.build import load_data from greyatomlib.linear_regression.q02_data_splitter.build import data_splitter from greyatomlib.linear_regression.q03_linear_regression.build import linear_regression from greyatomlib.linear_regression.q04_linear_predictor.build import linear_predictor from greyatomlib.linear_regression.q05_residuals.build import residuals from sklearn.linear_model import LinearRegression import matplotlib.pyplot as plt import pylab import scipy.stats as stats dataframe = load_data('data/house_prices_multivariate.csv') X, y = data_splitter(dataframe) linear_model = linear_regression(X, y) y_pred, y_mse, y_mae, y_r2 = linear_predictor(linear_model, X, y) error_residuals = residuals(y, y_pred) # Your code here def qq_residuals(error_residuals): stats.probplot(error_residuals, dist='norm', plot=pylab) pylab.title('Probabality Plot') pylab.ylabel('Ordered Values') pylab.xlabel('Theoretical quantiles') pylab.show()