def compute_ols(X, y): [n, p] = X.dims() # Notation is as in the Wikipedia article Xt = X.transpose() XtX = Xt * X XtXi = XtX.inv() XtXi_Xt = XtXi * Xt P = X * XtXi_Xt I_n = sackmat_m.make_identity_matrix(n) M = I_n - P ones = [1.0] * n L = I_n - sackmat_m.outer(ones, ones).smul(1.0 / n) # Qxx = E[xi xi'] Qxx = sackmat_m.make_zero_matrix(p, p) for k in xrange(0, n): for i in xrange(0, p): for j in xrange(0, p): Qxx[i][j] += X[k][i] * X[k][j] for i in xrange(0, p): for j in xrange(0, p): Qxx[i][j] /= n Qxxi = Qxx.inv() # This is the essence of OLS: coeffs = sackmat_m.matrix_times_vector(XtXi_Xt, y) s2 = sackmat_m.vector_times_matrix_times_vector(y, M, y) / (n - p) se_of_regression = math.sqrt(s2) sigmahat2 = s2 * (n - p) / n sigmahats = [0] * p for j in xrange(0, p): sigmahats[j] = math.sqrt(1.0 / n * sigmahat2 * Qxxi[j][j]) # The tstat_j is betahat_j / sigma_j tstats = sackmat_m.vecdiv(coeffs, sigmahats) # Same as y - X betahat epsilonhats = sackmat_m.matrix_times_vector(M, y) pvalue = [999.0] * p model_sum_of_sq = 999.0 residual_sum_of_sq = sackmat_m.vecdot(epsilonhats, epsilonhats) total_sum_of_sq = 999.0 # One might assert that P, M are symmetric and idempotent; also PX=X and # MX=0. R2 = sackmat_m.vector_times_matrix_times_vector(y, L*P, y) / \ sackmat_m.vector_times_matrix_times_vector(y, L, y) adjR2 = 1 - (n - 1) / (n - p - 1) * (1 - R2) return ols_info_t(n, p, coeffs, sigmahats, tstats, pvalue, R2, adjR2, se_of_regression, model_sum_of_sq, residual_sum_of_sq, total_sum_of_sq)
def compute_ols(X, y): [n, p] = X.dims() # Notation is as in the Wikipedia article Xt = X.transpose() XtX = Xt * X XtXi = XtX.inv() XtXi_Xt = XtXi * Xt P = X * XtXi_Xt I_n = sackmat_m.make_identity_matrix(n) M = I_n - P ones = [1.0] * n L = I_n - sackmat_m.outer(ones, ones).smul(1.0/n) # Qxx = E[xi xi'] Qxx = sackmat_m.make_zero_matrix(p, p) for k in xrange(0, n): for i in xrange(0, p): for j in xrange(0, p): Qxx[i][j] += X[k][i] * X[k][j] for i in xrange(0, p): for j in xrange(0, p): Qxx[i][j] /= n Qxxi = Qxx.inv() # This is the essence of OLS: coeffs = sackmat_m.matrix_times_vector(XtXi_Xt, y) s2 = sackmat_m.vector_times_matrix_times_vector(y, M, y) / (n-p) se_of_regression = math.sqrt(s2) sigmahat2 = s2 * (n-p) / n sigmahats = [0] * p for j in xrange(0, p): sigmahats[j] = math.sqrt(1.0/n * sigmahat2 * Qxxi[j][j]) # The tstat_j is betahat_j / sigma_j tstats = sackmat_m.vecdiv(coeffs, sigmahats) # Same as y - X betahat epsilonhats = sackmat_m.matrix_times_vector(M, y) pvalue = [999.0] * p model_sum_of_sq = 999.0 residual_sum_of_sq = sackmat_m.vecdot(epsilonhats, epsilonhats) total_sum_of_sq = 999.0 # One might assert that P, M are symmetric and idempotent; also PX=X and # MX=0. R2 = sackmat_m.vector_times_matrix_times_vector(y, L*P, y) / \ sackmat_m.vector_times_matrix_times_vector(y, L, y) adjR2 = 1 - (n-1)/(n-p-1) * (1-R2) return ols_info_t(n, p, coeffs, sigmahats, tstats, pvalue, R2, adjR2, se_of_regression, model_sum_of_sq, residual_sum_of_sq, total_sum_of_sq)
# ---------------------------------------------------------------- basis = [[1, -1], [1, 2]] v = [3, 4] c = sackmat_m.basis_coeffs(v, basis) print "v = ", v print "c = ", c w = sackmat_m.linear_combination(c, basis) print "w = ", w print # ---------------------------------------------------------------- s = 1.0/sqrt(2.0) basis = [[s, s], [s, -s]] n = len(basis) for i in range(0, n): ui = basis[i] for j in range(0, n): uj = basis[j] uiuj = sackmat_m.vecdot(ui, uj) print " <u[%d],u[%d]>=%11.7f" % (i, j, uiuj), print print v = [3, 4] c = sackmat_m.basis_coeffs_on(v, basis) print "v = ", v print "c = ", c w = sackmat_m.linear_combination(c, basis) print "w = ", w
[y] = tabutil_m.float_columns_from_file(sys.argv[1]) xs = tabutil_m.float_columns_from_file(sys.argv[2]) else: print >> sys.stderr, "Usage: %s {y and xs file name}" % (sys.argv[0]) print >> sys.stderr, "Or: %s {xs file name} {y file name}" % (sys.argv[0]) sys.exit(1) # 2. compute XtX k = len(xs) n = len(y) XtX = sackmat_m.make_zero_matrix(k, k) for i in range(0, k): xi = xs[i] for j in range(i, k): xj = xs[j] xi_dot_xj = sackmat_m.vecdot(xi, xj) XtX[i][j] = xi_dot_xj if i != j: XtX[j][i] = xi_dot_xj # 3. commpute XxT.inv XtXi = XtX.inv() # xxx handle singular ... # 4. commpute XtX.inv Xt y Xt = sackmat_m.sackmat(xs) X = Xt.transpose() beta_hat = XtXi * Xt * y print 'beta_hat: ' print beta_hat
from math import * # ---------------------------------------------------------------- basis = [[1, -1], [1, 2]] v = [3, 4] c = sackmat_m.basis_coeffs(v, basis) print "v = ", v print "c = ", c w = sackmat_m.linear_combination(c, basis) print "w = ", w print # ---------------------------------------------------------------- s = 1.0 / sqrt(2.0) basis = [[s, s], [s, -s]] n = len(basis) for i in range(0, n): ui = basis[i] for j in range(0, n): uj = basis[j] uiuj = sackmat_m.vecdot(ui, uj) print " <u[%d],u[%d]>=%11.7f" % (i, j, uiuj), print print v = [3, 4] c = sackmat_m.basis_coeffs_on(v, basis) print "v = ", v print "c = ", c w = sackmat_m.linear_combination(c, basis) print "w = ", w