import sys
import numpy as np
from pandas import DataFrame
from ast import literal_eval
import statsmodels.formula.api as sm
from sklearn.cross_validation import train_test_split
from stupidlySimplePredictions import get_data_from_file


command = sys.argv[1]
f_string, num_years = command.split()
num_years = int(num_years)

X,y = get_data_from_file(f_string, num_years)
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=.3,random_state=123)
	
X1_train = np.concatenate((np.ones((len(y_train),1)),X_train), axis=1)

result = sm.OLS(y_train,X1_train).fit()
print result.summary()
	for i in range(np.size(residual,0)):
		residual[i] = y[i][0] - prodsum[i]
	return residual

def sum_sq_res(p, y, X):
	'''Compute sum of squared residuals'''
	prod = p*X
	prodsum = prod.sum(axis=1)
	residual = np.zeros([np.size(y,0)])
	for i in range(np.size(residual,0)):
		residual[i] = y[i][0] - prodsum[i]
	J = (residual**2).sum()
	return J
	
if __name__ == '__main__':
	X,y = get_data_from_file('HRminAB50minSeasons5.txt',5)
	#X = X[:,1:]
	X = pad(X)
	
	# unconstrained linear regression
	plsq = leastsq(residuals, p0, args=(y,X))
	
	# linear regression constrained to having coefficients sum
	# to one, so that the result is the optimal weighted average
	cons = ({'type': 'eq', 'fun': lambda x: np.sum(x) -1})
	constrained = minimize(sum_sq_res, p0, args=(y,X), constraints=cons,
							method='SLSQP', options={'disp':True})
	
	# plot results
	unc_y = (X*plsq[0]).sum(axis=1)
	con_y = (X*constrained.x).sum(axis=1)