# Importing the libraries from sklearn.linear_model import LinearRegression from DataProcessing import getTrainTestData from EvaluateStrategy import evaluateStrategy # Get the training and testing data X, y, X_test, y_test = getTrainTestData() # Initialize regressor regressor = LinearRegression() regressor.fit(X, y) # Predicting the Test set results y_pred = regressor.predict(X_test) # Evaluate the results evaluateStrategy(y_pred)
from sklearn.linear_model import LinearRegression from sklearn.ensemble import GradientBoostingRegressor from EvaluateStrategy import evaluateStrategy from DataProcessing import getTrainTestData from sklearn.linear_model import Ridge from sklearn.metrics import mean_squared_error from math import sqrt # This script creates and tests different combinations of model ensemlble based on: # the three models: Linear Regression, Ridge Regression and GBRT X_train, y_train, X_test, y_test, features, dataset, test = getTrainTestData() # Linear Regression lr = LinearRegression() lr.fit(X_train, y_train) lr_y_pred = lr.predict(X_test) rms_linear = sqrt(mean_squared_error(y_test, lr_y_pred)) print("Linear regression RMSE= %f" % rms_linear) # Ridge Regression ridgeReg = Ridge(alpha=0.05, normalize=True) ridgeReg.fit(X_train, y_train) rigde_y_pred = ridgeReg.predict(X_test) rms_ridge = sqrt(mean_squared_error(y_test, rigde_y_pred)) print("Ridge regression RMSE= %f" % rms_ridge) # GBRT meta_regressor = GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None, learning_rate=0.1, loss='ls', max_depth=10, max_features=1.0, max_leaf_nodes=None, min_impurity_decrease=0.0,
from sklearn.linear_model import Ridge from DataProcessing import getTrainTestData from EvaluateStrategy import evaluateStrategy from sklearn.grid_search import GridSearchCV # Get the training and testing data X_train, y_train, X_test, y_test, c, v, d = getTrainTestData() # Initialize regressor ridgeReg = Ridge(normalize=True) param_grid = {'alpha': [0.001, 0.01, 0.05, 0.1, 1]} classifier = GridSearchCV(estimator=ridgeReg, cv=3, param_grid=param_grid, n_jobs=1) classifier.fit(X_train, y_train) best_logistic_regressor = classifier.best_estimator_ print("Best Estimator:") print(best_logistic_regressor) # This is the best estimator, uncomment to use and comment GridSearchCV #best_logistic_regressor = Ridge(alpha=0.05, copy_X=True, fit_intercept=True, max_iter=None, # normalize=True, random_state=None, solver='auto', tol=0.001) best_logistic_regressor.fit(X_train, y_train) # Predicting the Test set results y_pred = best_logistic_regressor.predict(X_test) # Evaluate the results evaluateStrategy(y_pred)