def correlation_matrix(data): '''returns the num_cols x num_cols matrix whose (i, j)th entry is the correlation between cols i and j of data''' _, num_columns = lin_alg.shape(data) def matrix_entry(i, j): return stats.correlation(lin_alg.get_col(data, i), get_col(data, j)) return lin_alg.make_matrix(num_columns, num_columns, matrix_entry)
def scale(data_matrix): '''returns mean and sd of each column''' num_rows, num_cols = lin_alg.shape(data_matrix) means = [stats.mean(lin_alg.get_col(data_matrix, j)) for j in range(num_cols)] stdevs = [stats.standard_deviation(lin_alg.get_col(data_matrix, j)) for j in range(num_cols)] return means, stdevs
def de_mean_matrix(A): '''returns results of subtracting from every value in A the value of the mean in that value's column. resulting matrix has mean 0 in every col''' nr, nc = lin_alg.shape(A) column_means, _ = scale(A) return lin_alg.make_matrix(nr, nc, key=lambda i, j: A[i][j] - column_means[j])