def ridgereport(self, folds, cores, plotit, saveit): self.folds = folds self.cores = cores if self.modeltype == 'classify': self.pipeline = Pipeline([("imputer", Imputer(axis=0)), # Todo: get Logistic feature selection working ("randlogit", RandomizedLogisticRegression()), ("logit", LogisticRegression()) ]) algorithm = "Logistic_Regression" baseparam = "{'logit__C': (np.logspace(-2, 2, 10))" elif self.modeltype == 'regress': algorithm = Ridge() self.pipeline = modelutilities.buildclfpipeline(algorithm, self.impute, self.modeltype) # self.pipeline = Pipeline([("imputer", Imputer(axis=0)), # # Todo: get Logistic feature selection working (test on IU) # #("randlogit", RandomizedLogisticRegression()), # ("regress", LinearRegression()) baseparam = {'regress__alpha': (np.logspace(-2, 2, 10)), 'regress__normalize': (True, False)} self.parameters = modelutilities.buildgridparameters(baseparam, self.impute, self.modeltype) #Todo: make plotting symmetric between regress/class # Fit/Predict, run report, and return fit return TuneModel.clfreport(self, printsample=True, plotit=plotit, saveit=saveit, title="RidgeRegression")
def randomforestreport(self, folds, cores, plotit, saveit): self.folds = folds self.cores = cores if self.modeltype == 'classify': # self.pipeline = Pipeline([("imputer", Imputer( # axis=0)), # ("feature_selection", SelectFromModel( # LinearSVC(), threshold="median")), # ("randforest", RandomForestClassifier())]) algorithm = RandomForestClassifier() self.pipeline = modelutilities.buildclfpipeline(algorithm, self.impute, self.modeltype) baseparam = {'classify__n_estimators': [10,50,100,250,500]} self.parameters = modelutilities.buildgridparameters(baseparam, self.impute, self.modeltype) elif self.modeltype == "regress": # todo: Add auto feature selection into auto pipeline creation # self.pipeline = Pipeline([("imputer", Imputer(axis=0)), # #("feature_selection", RFE( # # RandomForestRegressor(), 6)), # ("regress", RandomForestRegressor())]) algorithm = RandomForestRegressor() self.pipeline = modelutilities.buildclfpipeline(algorithm, self.impute, self.modeltype) baseparam = {'regress__n_estimators': [10,50,100,250,500]} self.parameters = modelutilities.buildgridparameters(baseparam, self.impute, self.modeltype) # self.pipeline = Pipeline([("imputer", Imputer(axis=0)), # # Todo: get Logistic feature selection working (test on IU) # #("randlogit", RandomizedLogisticRegression()), # ("regress", RandomForestRegressor()) # ]) # Fit/Predict, run report, and return fit return TuneModel.clfreport(self, printsample=True, plotit=plotit, saveit=saveit, title="RandomForest")
def logitreport(self, folds, cores, plotit, saveit): self.folds = folds self.cores = cores if self.modeltype == 'classify': algorithm = LogisticRegression() self.pipeline = modelutilities.buildclfpipeline(algorithm, self.impute, self.modeltype) # self.pipeline = Pipeline([("imputer", Imputer(axis=0)), # # Todo: get Logistic feature selection working (test on IU) # ("randlogit", RandomizedLogisticRegression()), # ("logit", LogisticRegression()) # ]) baseparam = {'classify__C': (np.logspace(-2, 2, 10))} self.parameters = modelutilities.buildgridparameters(baseparam, self.impute, self.modeltype) elif self.modeltype == 'regress': algorithm = LinearRegression() self.pipeline = modelutilities.buildclfpipeline(algorithm, self.impute, self.modeltype) # self.pipeline = Pipeline([("imputer", Imputer(axis=0)), # # Todo: get Logistic feature selection working (test on IU) # #("randlogit", RandomizedLogisticRegression()), # ("regress", LinearRegression()) baseparam = {'regress__normalize': (False, True)} self.parameters = modelutilities.buildgridparameters(baseparam, self.impute, self.modeltype) #Todo: make plotting symmetric between regress/class #Todo: make plotit, saveit False everywhere by default (such that they're optional parameters) # Fit/Predict, run report, and return fit return TuneModel.clfreport(self, printsample=True, plotit=plotit, saveit=saveit, title="LinearRegression")
def treereport(self, folds, cores, plotit, saveit): self.folds = folds self.cores = cores if self.modeltype == 'classify': self.pipeline = Pipeline([("imputer", Imputer( axis=0)), ("feature_selection", SelectFromModel( LinearSVC(), threshold="median")), ("trees", DecisionTreeClassifier())]) title = "Decision_Tree_Classifier" #algorithm = DecisionTreeClassifier() #self.pipeline = modelutilities.buildclfpipeline(algorithm, self.impute, self.modeltype) # Set the parameters by cross-validation self.parameters = {'trees__criterion': ["gini", "entropy"], 'trees__class_weight': ["balanced"], 'imputer__strategy': ('mean', 'median', 'most_frequent')} elif self.modeltype == "regress": # self.pipeline = Pipeline([("imputer", Imputer(axis=0)), # # Todo: get Logistic feature selection working (test on IU) # #("randlogit", RandomizedLogisticRegression()), # ("regress", DecisionTreeRegressor()) # ]) algorithm = DecisionTreeRegressor() self.pipeline = modelutilities.buildclfpipeline(algorithm, self.impute, self.modeltype) baseparam = {'regress__splitter': ('best','random')} self.parameters = modelutilities.buildgridparameters(baseparam, self.impute, self.modeltype) # Fit/Predict, run report, and return fit return TuneModel.clfreport(self, printsample=True, plotit=plotit, saveit=saveit, title="DecisionTree")
def gradboostreport(self, folds, cores, plotit, saveit): self.folds = folds self.cores = cores if self.modeltype == 'classify': pass #todo: set up classifier gridsearch elif self.modeltype == "regress": # self.pipeline = Pipeline([("imputer", Imputer(axis=0)), # #("feature_selection", RFE( # # RandomForestRegressor(), 6)), # ("regress", RandomForestRegressor())]) algorithm = GradientBoostingRegressor() # self.pipeline = modelutilities.buildclfpipeline(algorithm, self.impute, self.modeltype) # baseparam = {'regress__loss': ['ls','lad'], #'regress__learning_rate': [0.05, 0.1], 'regress__n_estimators': [75,100,250,350,500]} # self.parameters = modelutilities.buildgridparameters(baseparam, self.impute, self.modeltype) # self.pipeline = Pipeline([("imputer", Imputer(axis=0)), # # Todo: get Logistic feature selection working (test on IU) # #("randlogit", RandomizedLogisticRegression()), # ("regress", RandomForestRegressor()) # ]) # Fit/Predict, run report, and return fit return TuneModel.clfreport(self, printsample=True, plotit=plotit, saveit=saveit, title="GradientBoosting")
def svmreport(self, folds, cores, plotit, saveit): self.folds = folds self.cores = cores if self.modeltype == 'classify': self.pipeline = Pipeline([("imputer", Imputer( axis=0)), ("feature_selection", SelectFromModel( LinearSVC(), threshold="median")), ("lsvc", LinearSVC())]) # Set the parameters by cross-validation self.parameters = {'imputer__strategy': ('mean', 'median', 'most_frequent'), 'lsvc__C': (np.logspace(-2, 2, 10))} title = "Linear_SVC" elif self.modeltype == "regress": # self.pipeline = Pipeline([("imputer", Imputer(axis=0)), # # Todo: get Logistic feature selection working (test on IU) # #("randlogit", RandomizedLogisticRegression()), # ("lsvr", LinearSVR())]) algorithm = LinearSVR() self.pipeline = modelutilities.buildclfpipeline(algorithm, self.impute, self.modeltype) baseparam = {'regress__C': (np.logspace(-2, 2, 10))} self.parameters = modelutilities.buildgridparameters(baseparam, self.impute, self.modeltype) # todo: fix plt.show() stopping program execution # Fit/Predict, run report, and return fit return TuneModel.clfreport(self, printsample=True, plotit=plotit, saveit=saveit, title="SVM")