def get_b0b1(self, x, y): """ In simple linear regression, b1 is the slope of the regression line. b0 is the intercept of the regression line with the y-axis. Equation for the simple regression line : y-hat = b0 + b1*x :param x: a list of x values for a series of points :param y: a list of y values for a series of points :return: b0, b1 """ x_bar = mean(x) y_bar = mean(y) x_minus_xbar = [i - x_bar for i in x] y_minus_ybar = [j - y_bar for j in y] b1_column_denom = [i * j for i, j in zip(x_minus_xbar, y_minus_ybar)] b1_column_num = [i**2 for i in x_minus_xbar] numerator = sum(b1_column_denom) denominator = sum(b1_column_num) b1 = numerator / denominator b0 = y_bar - (b1 * x_bar) return b0, b1
def get_b0b1(self, x, y): """ In simple linear regression, b1 is the slope of the regression line. b0 is the intercept of the regression line with the y-axis. Equation for the simple regression line : y-hat = b0 + b1*x :param x: a list of x values for a series of points :param y: a list of y values for a series of points :return: b0, b1 """ x_bar = mean(x) y_bar = mean(y) x_minus_xbar = [i - x_bar for i in x] y_minus_ybar = [j - y_bar for j in y] b1_column_denom = [i*j for i, j in zip(x_minus_xbar, y_minus_ybar)] b1_column_num = [i**2 for i in x_minus_xbar] numerator = sum(b1_column_denom) denominator = sum(b1_column_num) b1 = numerator / denominator b0 = y_bar - (b1 * x_bar) return b0, b1
def r_squared(self, x, y): """ R-squared is a statistical measure of how close the data is to the fitted regression line. It is also known as the coefficient of determination. The higher the value of R squared, the better the model fits your data. R is always between 0 and 1. :param x: a list of x values for a series of points :param y: a list of y values for a series of points :return: r_squared """ y_bar = mean(y) y_minus_ybar = [j - y_bar for j in y] y_minus_ybar_squared = [j**2 for j in y_minus_ybar] b0, b1 = self.get_b0b1(x, y) y_hat = [b0 + b1 * i for i in x] yhat_minus_ybar = [yh - y_bar for yh in y_hat] yhat_minus_ybar_squared = [h**2 for h in yhat_minus_ybar] r_squared = sum(yhat_minus_ybar_squared) / sum(y_minus_ybar_squared) return r_squared
def r_squared(self, x, y): """ R-squared is a statistical measure of how close the data is to the fitted regression line. It is also known as the coefficient of determination. The higher the value of R squared, the better the model fits your data. R is always between 0 and 1. :param x: a list of x values for a series of points :param y: a list of y values for a series of points :return: r_squared """ y_bar = mean(y) y_minus_ybar = [j - y_bar for j in y] y_minus_ybar_squared = [j ** 2 for j in y_minus_ybar] b0, b1 = self.get_b0b1(x, y) y_hat = [b0 + b1 * i for i in x] yhat_minus_ybar = [yh - y_bar for yh in y_hat] yhat_minus_ybar_squared = [h ** 2 for h in yhat_minus_ybar] r_squared = sum(yhat_minus_ybar_squared) / sum(y_minus_ybar_squared) return r_squared
def scale(data_matrix): """returns the mean and standard deviation of each column""" num_rows, num_cols = shape(data_matrix) means = [mean(get_column(data_matrix,j)) for j in range(num_cols)] stdevs = [standard_deviation(get_column(data_matrix, j)) for j in range(num_cols)] return means, stdevs
def scale(data_matrix): """returns the mean and standard deviation of each column""" num_rows, num_cols = shape(data_matrix) means = [mean(get_column(data_matrix, j)) for j in range(num_cols)] stdevs = [ standard_deviation(get_column(data_matrix, j)) for j in range(num_cols) ] return means, stdevs
def de_mean(x): """translate x by subtracting its mean (so the result has mean 0)""" x_bar = mean(x) return [x_i - x_bar for x_i in x]