# Importing the libraries
from sklearn.linear_model import LinearRegression
from DataProcessing import getTrainTestData
from EvaluateStrategy import evaluateStrategy


# Get the training and testing data
X, y, X_test, y_test = getTrainTestData()

# Initialize regressor
regressor = LinearRegression()
regressor.fit(X, y)

# Predicting the Test set results
y_pred = regressor.predict(X_test)

# Evaluate the results
evaluateStrategy(y_pred)





from sklearn.linear_model import LinearRegression
from sklearn.ensemble import GradientBoostingRegressor
from EvaluateStrategy import evaluateStrategy
from DataProcessing import getTrainTestData
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from math import sqrt

# This script creates and tests different combinations of model ensemlble based on:
# the three models: Linear Regression, Ridge Regression and GBRT

X_train, y_train, X_test, y_test, features, dataset, test = getTrainTestData()

# Linear Regression
lr = LinearRegression()
lr.fit(X_train, y_train)
lr_y_pred = lr.predict(X_test)
rms_linear = sqrt(mean_squared_error(y_test, lr_y_pred))
print("Linear regression RMSE= %f" % rms_linear)

# Ridge Regression
ridgeReg = Ridge(alpha=0.05, normalize=True)
ridgeReg.fit(X_train, y_train)
rigde_y_pred = ridgeReg.predict(X_test)
rms_ridge = sqrt(mean_squared_error(y_test, rigde_y_pred))
print("Ridge regression RMSE= %f" % rms_ridge)

# GBRT
meta_regressor = GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,
             learning_rate=0.1, loss='ls', max_depth=10, max_features=1.0,
             max_leaf_nodes=None, min_impurity_decrease=0.0,
from sklearn.linear_model import Ridge
from DataProcessing import getTrainTestData
from EvaluateStrategy import evaluateStrategy
from sklearn.grid_search import GridSearchCV

# Get the training and testing data
X_train, y_train, X_test, y_test, c, v, d = getTrainTestData()

# Initialize regressor
ridgeReg = Ridge(normalize=True)
param_grid = {'alpha': [0.001, 0.01, 0.05, 0.1, 1]}
classifier = GridSearchCV(estimator=ridgeReg,
                          cv=3,
                          param_grid=param_grid,
                          n_jobs=1)
classifier.fit(X_train, y_train)

best_logistic_regressor = classifier.best_estimator_
print("Best Estimator:")
print(best_logistic_regressor)

# This is the best estimator, uncomment to use and comment GridSearchCV
#best_logistic_regressor = Ridge(alpha=0.05, copy_X=True, fit_intercept=True, max_iter=None,
#   normalize=True, random_state=None, solver='auto', tol=0.001)
best_logistic_regressor.fit(X_train, y_train)

# Predicting the Test set results
y_pred = best_logistic_regressor.predict(X_test)

# Evaluate the results
evaluateStrategy(y_pred)