def __init__(self, pathToData): self.dataFilePath = pathToData self.algoname = 'Boosting' self.datasetName = 'Abalone' self.baseEstimater = dtr() self.classifier = abr(base_estimator=self.baseEstimater) self.cv = 5
def get_regression_model(algo, poly_Order=2, **kwargs): # for key in kwargs: print(key) sys.exit() print_mod_info = False ### Regression models ### https://stackoverflow.com/questions/12860841/python-import-in-if if algo == 'XGR': mod = xgr(**kwargs) elif algo == 'RFR': mod = rfr(**kwargs) elif algo == 'ABR': mod = abr(**kwargs) elif algo == 'P1R': mod = LinearRegression(**kwargs) elif algo == 'P2R': mod = make_pipeline(PolynomialFeatures(poly_Order), Ridge(**kwargs)) elif algo == 'ANN': mod = MLPRegressor(**kwargs) elif algo == 'ELN': mod = ElasticNet(**kwargs) # add parameters later elif algo == 'E2R': mod = make_pipeline(PolynomialFeatures(poly_Order), ElasticNet(**kwargs)) elif algo == 'PLS': mod = PLSRegression(**kwargs) else: print('Algorithm has not yet been added to the menu.') sys.exit() return mod
def regression(self, metric="root_mean_squared_error", folds=10, alphas=[], graph=False): size = 1.3 * self.report_width // 10 models = {} models["Linear regressor"] = lr() models["Lasso regressor"] = lassor() models["Lasso CV regressor"] = lassocvr() models["Ridge regressor"] = rr(alpha=0, normalize=True) models["Ridge CV regressor"] = rcvr(alphas = alphas) models["K nearest neighbors regressor K2u"] = knnr(n_neighbors=2, weights='uniform') models["K nearest neighbors regressor K2d"] = knnr(n_neighbors=2, weights='distance') models["K nearest neighbors regressor K5"] = knnr(n_neighbors=5) models["K nearest neighbors regressor K10"] = knnr(n_neighbors=10) models["SGD regressor"] = sgdr(max_iter=10000, warm_start=True) models["Decision tree regressor"] = dtr() models["Decision tree regressor D3"] = dtr(max_depth=3) models["Random forest regressor"] = rfr() models["Ada boost regressor"] = abr() models["Gradient boost regressor"] = gbr() models["Support vector regressor"] = svr() self.models = models print('\n') print(self.report_width * '*', '\n*') print('* REGRESSION RESULTS - BEFORE PARAMETERS BOOSTING \n*') #kf = StratifiedKFold(n_splits=folds, shuffle=True) kf = KFold(n_splits=folds) results = [] names = [] for model_name in models: cv_scores = -1 * cross_val_score(models[model_name], self.Xt_train, self.yt_train.values.ravel(), cv=kf, scoring=metric) results.append(cv_scores) names.append(model_name) print(self.report_width * '*', '') report = pd.DataFrame({'Regressor': names, 'Score': results}) report['Score (avg)'] = report.Score.apply(lambda x: x.mean()) report['Score (std)'] = report.Score.apply(lambda x: x.std()) report['Score (VC)'] = 100 * report['Score (std)'] / report['Score (avg)'] report.sort_values(by='Score (avg)', inplace=True) report.drop('Score', axis=1, inplace=True) display(report) print('\n') if graph: fig, ax = plt.subplots(figsize=(size, 0.5 * size)) plt.title('Regressor Comparison') #ax = fig.add_subplot(111) plt.boxplot(results) ax.set_xticklabels(names) plt.xticks(rotation=45) plt.subplots_adjust(hspace=0.0) plt.show() return None
def get_regression_model(algo, settings, print_mod_info=False): ### Regression models ### https://stackoverflow.com/questions/12860841/python-import-in-if if algo == 'XGR': mod = xgr(n_estimators=settings[0], max_depth=settings[1]) if print_mod_info: print('XGBoost:', mod) elif algo == 'RFR': mod = rfr(n_estimators=settings[0]) if print_mod_info: print('Random Forest:', mod) elif algo == 'ABR': mod = abr(n_estimators=settings[0]) if print_mod_info: print('AdaBoost:', mod) elif algo == 'P1R': mod = LinearRegression() if print_mod_info: print('Linear:', mod) elif algo == 'P2R': mod = make_pipeline(PolynomialFeatures(settings[0]), Ridge()) if print_mod_info: print('Poly 2:', mod) elif algo == 'ANN': mod = MLPRegressor( solver='lbfgs', hidden_layer_sizes=(settings[0], settings[1]), # (137,73), tol=settings[2]) if print_mod_info: print('Neural Net Regression:', mod) elif algo == 'ELN': mod = ElasticNet(alpha=settings[0], l1_ratio=settings[1]) # add parameters later if print_mod_info: print('Elastic Net Regression:', mod) elif algo == 'E2R': mod = make_pipeline( PolynomialFeatures(settings[0]), ElasticNet(alpha=settings[1], l1_ratio=settings[2])) if print_mod_info: print('Poly 2:', mod) elif algo == 'PLS': mod = PLSRegression(n_components=settings[0]) if print_mod_info: print('Partial Least Squares Regression:', mod) else: print('Algorithm not setup yet.') sys.exit() return mod
def regression(self, metric, folds=10, alphas=[], printt=True, graph=False): size = self.graph_width # significant model setup differences should be list as different models models = {} models["Linear regressor"] = lr() models["Lasso regressor"] = lassor() models["Lasso CV regressor"] = lassocvr() models["Ridge regressor"] = rr(alpha=0, normalize=True) models["Ridge CV regressor"] = rcvr(alphas = alphas) models["Elastic net regressor"] = enr() models["K nearest neighbors regressor K2u"] = knnr(n_neighbors=2, weights='uniform') models["K nearest neighbors regressor K2d"] = knnr(n_neighbors=2, weights='distance') models["K nearest neighbors regressor K5"] = knnr(n_neighbors=5) models["K nearest neighbors regressor K10"] = knnr(n_neighbors=10) models["SGD regressor"] = sgdr(max_iter=10000, warm_start=True) models["Decision tree regressor"] = dtr() models["Decision tree regressor D3"] = dtr(max_depth=3) models["Random forest regressor"] = rfr() models["Ada boost regressor"] = abr() models["Gradient boost regressor"] = gbr() models["Support vector regressor RBF"] = svr() models["Support vector regressor Linear"] = svr('linear') models["Support vector regressor Poly"] = svr(kernel='poly') self.models = models kf = KFold(n_splits=folds, shuffle=True) results = [] names = [] et = [] for model_name in models: start = time.time() cv_scores = -1 * cross_val_score(models[model_name], self.Xt_train, self.yt_train, cv=kf, scoring=metric) results.append(cv_scores) names.append(model_name) et.append((time.time() - start)) report = pd.DataFrame({'Model': names, 'Score': results, 'Elapsed Time': et}) report['Score (avg)'] = report.Score.apply(lambda x: np.sqrt(x).mean()) report['Score (std)'] = report.Score.apply(lambda x: np.sqrt(x).std()) report['Score (VC)'] = 100 * report['Score (std)'] / report['Score (avg)'] report.sort_values(by='Score (avg)', inplace=True) report.drop('Score', axis=1, inplace=True) report.reset_index(inplace=True, drop=True) self.report_performance = report if printt: print('\n') print(self.report_width * '*', '\n*') print('* REGRESSION RESULTS - BEFORE PARAMETERS BOOSTING \n*') print(self.report_width * '*', '') print(report) print('\n') if graph: fig, ax = plt.subplots(figsize=(size, 0.5 * size)) plt.title('Regressor Comparison') #ax = fig.add_subplot(111) plt.boxplot(results) ax.set_xticklabels(names) plt.xticks(rotation=45) plt.subplots_adjust(hspace=0.0, bottom=0.25) self.graphs_model.append(fig) plt.show() return None