import sys import numpy as np from pandas import DataFrame from ast import literal_eval import statsmodels.formula.api as sm from sklearn.cross_validation import train_test_split from stupidlySimplePredictions import get_data_from_file command = sys.argv[1] f_string, num_years = command.split() num_years = int(num_years) X,y = get_data_from_file(f_string, num_years) X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=.3,random_state=123) X1_train = np.concatenate((np.ones((len(y_train),1)),X_train), axis=1) result = sm.OLS(y_train,X1_train).fit() print result.summary()
for i in range(np.size(residual,0)): residual[i] = y[i][0] - prodsum[i] return residual def sum_sq_res(p, y, X): '''Compute sum of squared residuals''' prod = p*X prodsum = prod.sum(axis=1) residual = np.zeros([np.size(y,0)]) for i in range(np.size(residual,0)): residual[i] = y[i][0] - prodsum[i] J = (residual**2).sum() return J if __name__ == '__main__': X,y = get_data_from_file('HRminAB50minSeasons5.txt',5) #X = X[:,1:] X = pad(X) # unconstrained linear regression plsq = leastsq(residuals, p0, args=(y,X)) # linear regression constrained to having coefficients sum # to one, so that the result is the optimal weighted average cons = ({'type': 'eq', 'fun': lambda x: np.sum(x) -1}) constrained = minimize(sum_sq_res, p0, args=(y,X), constraints=cons, method='SLSQP', options={'disp':True}) # plot results unc_y = (X*plsq[0]).sum(axis=1) con_y = (X*constrained.x).sum(axis=1)