def test_sk_OrthogonalMatchingPursuitCV(): print("Testing sklearn, OrthogonalMatchingPursuitCV...") mod = linear_model.OrthogonalMatchingPursuitCV() X, y = iris_data mod.fit(X, y) docs = {'name': "OrthogonalMatchingPursuitCV test"} fv = X[0, :] upload(mod, fv, docs)
def cross_validated_estimators_tests(): models = [ linear_model.ElasticNetCV(), linear_model.LarsCV(), linear_model.LassoCV(), linear_model.LassoLarsCV(), linear_model.LogisticRegressionCV(), linear_model.OrthogonalMatchingPursuitCV(), linear_model.RidgeClassifierCV(), linear_model.RidgeCV() ] for model in models: cross_validated_estimators(model)
def orthogonal_matching_pursuit_cv_autoregression(df, sliding_window=1): predictions = [] terminal_condition = False x_start = 0 x_end = sliding_window data = [df.ix[ind] for ind in df.index] while x_end != len(data)-1: Y = data[x_end+1] X = data[x_start:x_end] regressor = linear_model.OrthogonalMatchingPursuitCV() regressor.fit(X,Y) predictions.append(regressor.predict(X)) return predictions
def test_model_orthogonal_matching_pursuit_cv(self): model, X = fit_regression_model( linear_model.OrthogonalMatchingPursuitCV()) model_onnx = convert_sklearn( model, "orthogonal matching pursuit cv", [("input", FloatTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET) self.assertIsNotNone(model_onnx) dump_data_and_model(X, model, model_onnx, verbose=False, basename="SklearnOrthogonalMatchingPursuitCV-Dec4")
def test_model_orthogonal_matching_pursuit_cv(self): model, X = fit_regression_model( linear_model.OrthogonalMatchingPursuitCV()) model_onnx = convert_sklearn( model, "orthogonal matching pursuit cv", [("input", FloatTensorType([None, X.shape[1]]))]) self.assertIsNotNone(model_onnx) dump_data_and_model( X, model, model_onnx, verbose=False, basename="SklearnOrthogonalMatchingPursuitCV-Dec4", allow_failure="StrictVersion(" "onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )
def sklearn_liner_model_regressions(xTrain, xTest, yTrain, yTest): modelForConsideration: DataFrame = pd.DataFrame() LinerModels = \ [ linear_model.ARDRegression(), linear_model.BayesianRidge(), linear_model.ElasticNet(), linear_model.ElasticNetCV(), linear_model.HuberRegressor(), linear_model.Lars(), linear_model.LarsCV(), linear_model.Lasso(), linear_model.LassoCV(), linear_model.LassoLars(), linear_model.LassoLarsCV(), linear_model.LassoLarsIC(), linear_model.LinearRegression(), linear_model.MultiTaskLasso(), linear_model.MultiTaskElasticNet(), linear_model.MultiTaskLassoCV(), linear_model.MultiTaskElasticNetCV(), linear_model.OrthogonalMatchingPursuit(), linear_model.OrthogonalMatchingPursuitCV(), linear_model.PassiveAggressiveClassifier(), linear_model.PassiveAggressiveRegressor(), linear_model.Perceptron(), linear_model.RANSACRegressor(), linear_model.Ridge(), linear_model.RidgeClassifier(), linear_model.RidgeClassifierCV(), linear_model.RidgeCV(), linear_model.SGDClassifier(), linear_model.SGDRegressor(), linear_model.TheilSenRegressor(), linear_model.enet_path(xTrain, yTrain), linear_model.lars_path(xTrain, yTrain), linear_model.lasso_path(xTrain, yTrain), # linear_model.LogisticRegression() # ,linear_model.LogisticRegressionCV(),linear_model.logistic_regression_path(xTrain, yTrain), linear_model.orthogonal_mp(xTrain, yTrain), linear_model.orthogonal_mp_gram(), linear_model.ridge_regression() ] for model in LinerModels: modelName: str = model.__class__.__name__ try: # print(f"Preparing Model {modelName}") if modelName == "LogisticRegression": model = linear_model.LogisticRegression(random_state=0) model.fit(xTrain, yTrain) yTrainPredict = model.predict(xTrain) yTestPredict = model.predict(xTest) errorList = calculate_prediction_error(modelName, yTestPredict, yTest, yTrainPredict, yTrain) if errorList["Test Average Error"][0] < 30 and errorList[ "Train Average Error"][0] < 30: try: modelForConsideration = modelForConsideration.append( errorList) except (Exception) as e: print(e) except (Exception, ArithmeticError) as e: print(f"Error occurred while preparing Model {modelName}") return modelForConsideration
def __init__( self, method, yrange, params, i=0 ): #TODO: yrange doesn't currently do anything. Remove or do something with it! self.algorithm_list = [ 'PLS', 'GP', 'OLS', 'OMP', 'Lasso', 'Elastic Net', 'Ridge', 'Bayesian Ridge', 'ARD', 'LARS', 'LASSO LARS', 'SVR', 'KRR', ] self.method = method self.outliers = None self.ransac = False print(params) if self.method[i] == 'PLS': self.model = PLSRegression(**params[i]) if self.method[i] == 'OLS': self.model = linear.LinearRegression(**params[i]) if self.method[i] == 'OMP': # check whether to do CV or not self.do_cv = params[i]['CV'] # create a temporary set of parameters params_temp = copy.copy(params[i]) # Remove CV parameter params_temp.pop('CV') if self.do_cv is False: self.model = linear.OrthogonalMatchingPursuit(**params_temp) else: params_temp.pop('precompute') self.model = linear.OrthogonalMatchingPursuitCV(**params_temp) if self.method[i] == 'LASSO': # create a temporary set of parameters params_temp = copy.copy(params[i]) # check whether to do CV or not try: self.do_cv = params[i]['CV'] # Remove CV parameter params_temp.pop('CV') except: self.do_cv = False if self.do_cv is False: self.model = linear.Lasso(**params_temp) else: params_temp.pop('alpha') self.model = linear.LassoCV(**params_temp) if self.method[i] == 'Elastic Net': params_temp = copy.copy(params[i]) try: self.do_cv = params[i]['CV'] params_temp.pop('CV') except: self.do_cv = False if self.do_cv is False: self.model = linear.ElasticNet(**params_temp) else: params_temp['l1_ratio'] = [.1, .5, .7, .9, .95, .99, 1] self.model = linear.ElasticNetCV(**params_temp) if self.method[i] == 'Ridge': # create a temporary set of parameters params_temp = copy.copy(params[i]) try: # check whether to do CV or not self.do_cv = params[i]['CV'] # Remove CV parameter params_temp.pop('CV') except: self.do_cv = False if self.do_cv: self.model = linear.RidgeCV(**params_temp) else: self.model = linear.Ridge(**params_temp) if self.method[i] == 'BRR': self.model = linear.BayesianRidge(**params[i]) if self.method[i] == 'ARD': self.model = linear.ARDRegression(**params[i]) if self.method[i] == 'LARS': # create a temporary set of parameters params_temp = copy.copy(params[i]) try: # check whether to do CV or not self.do_cv = params[i]['CV'] # Remove CV parameter params_temp.pop('CV') except: self.do_cv = False if self.do_cv is False: self.model = linear.Lars(**params_temp) else: self.model = linear.LarsCV(**params_temp) if self.method[i] == 'LASSO LARS': model = params[i]['model'] params_temp = copy.copy(params[i]) params_temp.pop('model') if model == 0: self.model = linear.LassoLars(**params_temp) elif model == 1: self.model = linear.LassoLarsCV(**params_temp) elif model == 2: self.model = linear.LassoLarsIC(**params_temp) else: print("Something went wrong, \'model\' should be 0, 1, or 2") if self.method[i] == 'SVR': self.model = svm.SVR(**params[i]) if self.method[i] == 'KRR': self.model = kernel_ridge.KernelRidge(**params[i]) if self.method[i] == 'GP': # get the method for dimensionality reduction and the number of components self.reduce_dim = params[i]['reduce_dim'] self.n_components = params[i]['n_components'] # create a temporary set of parameters params_temp = copy.copy(params[i]) # Remove parameters not accepted by Gaussian Process params_temp.pop('reduce_dim') params_temp.pop('n_components') self.model = GaussianProcess(**params_temp)
classification(svm.NuSVC(kernel="rbf", **SVC_PARAMS)), # Linear Regression regression(linear_model.LinearRegression()), regression(linear_model.HuberRegressor()), regression(linear_model.ElasticNet(random_state=RANDOM_SEED)), regression(linear_model.ElasticNetCV(random_state=RANDOM_SEED)), regression(linear_model.TheilSenRegressor(random_state=RANDOM_SEED)), regression(linear_model.Lars()), regression(linear_model.LarsCV()), regression(linear_model.Lasso(random_state=RANDOM_SEED)), regression(linear_model.LassoCV(random_state=RANDOM_SEED)), regression(linear_model.LassoLars()), regression(linear_model.LassoLarsIC()), regression(linear_model.OrthogonalMatchingPursuit()), regression(linear_model.OrthogonalMatchingPursuitCV()), regression(linear_model.Ridge(random_state=RANDOM_SEED)), regression(linear_model.RidgeCV()), regression(linear_model.BayesianRidge()), regression(linear_model.ARDRegression()), regression(linear_model.SGDRegressor(random_state=RANDOM_SEED)), regression( linear_model.PassiveAggressiveRegressor(random_state=RANDOM_SEED)), # Logistic Regression classification( linear_model.LogisticRegression(random_state=RANDOM_SEED)), classification( linear_model.LogisticRegressionCV(random_state=RANDOM_SEED)), classification(linear_model.RidgeClassifier(random_state=RANDOM_SEED)), classification(linear_model.RidgeClassifierCV()),
# open a file to save calculated coefficients coefs_file = open('OMP_coefs.txt', 'w') coefs_file.write('OMP algorithm with CV to calculate %s' % X_name) # write the predictor names to file for pred in predictor_names: coefs_file.write('%s\t' % pred) # add column for R2 and fold coefs_file.write('R2\tfold\n') #make figure to plot predicted vs measured plt.figure(figsize=(5 * len(cv_list), 5)) sb = 1 # subplot index # loop over the list of cv fold values to calculate the model, plot the results, and save to file for cv_ in cv_list: OMP = linear_model.OrthogonalMatchingPursuitCV(cv=cv_) OMPfit = OMP.fit(predictors, X[:, 0]) OMP_coefs = OMPfit.coef_ OMP_predicted = OMP.predict(predictors) OMP_score = OMP.score(predictors, OMP_predicted) D = X[:, 0] * scale_D + mean_D[0] Dpredicted = OMP_predicted * scale_D + mean_D[0] # calculate the R2 of the fit R2 = 1 - sum((D - Dpredicted)**2) / sum((D - np.mean(D))**2) print('CV fold = %d' % cv_) print 'Number of non-zero coefficients:', OMPfit.n_nonzero_coefs_ print 'R2 :', R2
print("Set up KFolds...") n_splits = 5 kf = KFold(n_splits=n_splits) kf.get_n_splits(X) predictions0 = np.zeros((test.shape[0], n_splits)) predictions1 = np.zeros((test.shape[0], n_splits)) score = 0 print("Starting ", n_splits, "-fold CV loop...") oof_predictions = np.zeros(X.shape[0]) for fold, (train_index, test_index) in enumerate(kf.split(X)): X_train, X_valid = X[train_index, :], X[test_index, :] y_train, y_valid = y[train_index], y[test_index] clf = linear_model.OrthogonalMatchingPursuitCV() clf.fit(X_train, y_train) pred0 = clf.predict(X) pred1 = clf.predict(test) oof_predictions[test_index] = clf.predict(X_valid) predictions0[:, fold] = pred0 predictions1[:, fold] = pred1 score += r2_score(y_train, clf.predict(X_train)) print('Fold %d: Score %f' % (fold, clf.score(X_train, y_train))) prediction0 = predictions0.mean(axis=1) prediction1 = predictions1.mean(axis=1) score /= n_splits oof_score = r2_score(y, oof_predictions)
def get_regression_estimators(r, regression_models): if r == 'ARDRegression': regression_models[r] = linear_model.ARDRegression() elif r == 'BayesianRidge': regression_models[r] = linear_model.BayesianRidge() elif r == 'ElasticNet': regression_models[r] = linear_model.ElasticNet() elif r == 'ElasticNetCV': regression_models[r] = linear_model.ElasticNetCV() elif r == 'HuberRegressor': regression_models[r] = linear_model.HuberRegressor() elif r == 'Lars': regression_models[r] = linear_model.Lars() elif r == 'LarsCV': regression_models[r] = linear_model.LarsCV() elif r == 'Lasso': regression_models[r] = linear_model.Lasso() elif r == 'LassoCV': regression_models[r] = linear_model.LassoCV() elif r == 'LassoLars': regression_models[r] = linear_model.LassoLars() elif r == 'LassoLarsCV': regression_models[r] = linear_model.LassoLarsCV() elif r == 'LassoLarsIC': regression_models[r] = linear_model.LassoLarsIC() elif r == 'LinearRegression': regression_models[r] = linear_model.LinearRegression() elif r == 'LogisticRegression': regression_models[r] = linear_model.LogisticRegression() elif r == 'LogisticRegressionCV': regression_models[r] = linear_model.LogisticRegressionCV() elif r == 'MultiTaskElasticNet': regression_models[r] = linear_model.MultiTaskElasticNet() elif r == 'MultiTaskElasticNetCV': regression_models[r] = linear_model.MultiTaskElasticNetCV() elif r == 'MultiTaskLasso': regression_models[r] = linear_model.MultiTaskLasso() elif r == 'MultiTaskLassoCV': regression_models[r] = linear_model.MultiTaskLassoCV() elif r == 'OrthogonalMatchingPursuit': regression_models[r] = linear_model.OrthogonalMatchingPursuit() elif r == 'OrthogonalMatchingPursuitCV': regression_models[r] = linear_model.OrthogonalMatchingPursuitCV() elif r == 'PassiveAggressiveClassifier': regression_models[r] = linear_model.PassiveAggressiveClassifier() elif r == 'PassiveAggressiveRegressor': regression_models[r] = linear_model.PassiveAggressiveRegressor() elif r == 'Perceptron': regression_models[r] = linear_model.Perceptron() elif r == 'RANSACRegressor': regression_models[r] = linear_model.RANSACRegressor() elif r == 'Ridge': regression_models[r] = linear_model.Ridge() elif r == 'RidgeClassifier': regression_models[r] = linear_model.RidgeClassifier() elif r == 'RidgeClassifierCV': regression_models[r] = linear_model.RidgeClassifierCV() elif r == 'RidgeCV': regression_models[r] = linear_model.RidgeCV() elif r == 'SGDClassifier': regression_models[r] = linear_model.SGDClassifier() elif r == 'SGDRegressor': regression_models[r] = linear_model.SGDRegressor() elif r == 'TheilSenRegressor': regression_models[r] = linear_model.TheilSenRegressor() else: print( r + " is an unsupported regression type. Check if you have misspelled the name." )
def models(self) -> Dict[str, LinearModel]: return { "LinearRegression": linear_model.LinearRegression( ), # LinearRegression([…]) Ordinary least squares Linear Regression. "ARDRegression": linear_model.ARDRegression( ), # ARDRegression([n_iter, tol, …]) Bayesian ARD regression. "BayesianRidge": linear_model.BayesianRidge( ), # BayesianRidge([n_iter, tol, …]) Bayesian ridge regression. "HuberRegressor": linear_model.HuberRegressor( ), # HuberRegressor([epsilon, …]) Linear regression model that is robust to outliers. "OrthogonalMatchingPursuitCV": linear_model.OrthogonalMatchingPursuitCV( cv=5 ), # OrthogonalMatchingPursuitCV([…]) Cross-validated Orthogonal Matching Pursuit model (OMP). "Perceptron": linear_model.Perceptron( max_iter=1000, tol=1e-3 ), # Perceptron([penalty, alpha, …]) Read more in the User Guide. "RANSACRegressor": linear_model.RANSACRegressor( ), # RANSACRegressor([…]) RANSAC (RANdom SAmple Consensus) algorithm. "SGDRegressor": linear_model.SGDRegressor( max_iter=1000, tol=1e-3 ), # SGDRegressor([loss, penalty, …]) Linear model fitted by minimizing a regularized empirical loss with SGD "TheilSenRegressor": linear_model.TheilSenRegressor( ), # TheilSenRegressor([…]) Theil-Sen Estimator: robust multivariate regression model. "PassiveAggressiveRegressor": linear_model.PassiveAggressiveRegressor( max_iter=1000, tol=1e-3 ), # PassiveAggressiveRegressor([C, …]) Passive Aggressive Regressor "Lars": linear_model.Lars( eps=0.01 ), # Lars([fit_intercept, verbose, …]) Least Angle Regression model a.k.a. "LarsCV": linear_model.LarsCV( cv=5, eps=0.01 ), # LarsCV([fit_intercept, …]) Cross-validated Least Angle Regression model. "Lasso": linear_model.Lasso( alpha=1, max_iter=1000 ), # Lasso([alpha, fit_intercept, …]) Linear Model trained with L1 prior as regularizer (aka the Lasso) "LassoCV": linear_model.LassoCV( cv=5 ), # LassoCV([eps, n_alphas, …]) Lasso linear model with iterative fitting along a regularization path. "LassoLars": linear_model.LassoLars( eps=0.01 ), # LassoLars([alpha, …]) Lasso model fit with Least Angle Regression a.k.a. "LassoLarsCV": linear_model.LassoLarsCV( cv=5, eps=0.01, max_iter=100 ), # LassoLarsCV([fit_intercept, …]) Cross-validated Lasso, using the LARS algorithm. "LassoLarsIC": linear_model.LassoLarsIC( eps=0.01 ), # LassoLarsIC([criterion, …]) Lasso model fit with Lars using BIC or AIC for model selection "Ridge": linear_model.Ridge( ), # Ridge([alpha, fit_intercept, …]) Linear least squares with l2 regularization. "RidgeClassifier": linear_model.RidgeClassifier( ), # RidgeClassifier([alpha, …]) Classifier using Ridge regression. "RidgeClassifierCV": linear_model.RidgeClassifierCV( cv=5 ), # RidgeClassifierCV([alphas, …]) Ridge classifier with built-in cross-validation. "RidgeCV": linear_model.RidgeCV( cv=5 ), # RidgeCV([alphas, …]) Ridge regression with built-in cross-validation. "SGDClassifier": linear_model.SGDClassifier( max_iter=1000, tol=1e-3 ), # SGDClassifier([loss, penalty, …]) Linear classifiers (SVM, logistic regression, a.o.) with SGD training. "ElasticNet": linear_model.ElasticNet( ), # linear_model.ElasticNet([alpha, l1_ratio, …]) Linear regression with combined L1 and L2 priors as regularizer. "ElasticNetCV": linear_model.ElasticNetCV( cv=5 ), # linear_model.ElasticNetCV([l1_ratio, eps, …]) Elastic Net model with iterative fitting along a regularization path. ### Ignore These # "LogisticRegression": linear_model.LogisticRegression(), # LogisticRegression([penalty, …]) Logistic Regression (aka logit, MaxEnt) classifier. # "LogisticRegressionCV": linear_model.LogisticRegressionCV(cv=5), # LogisticRegressionCV([Cs, …]) Logistic Regression CV (aka logit, MaxEnt) classifier. # "MultiTaskLasso": linear_model.MultiTaskLasso(), # MultiTaskLasso([alpha, …]) Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer. # "MultiTaskElasticNet": linear_model.MultiTaskElasticNet(), # MultiTaskElasticNet([alpha, …]) Multi-task ElasticNet model trained with L1/L2 mixed-norm as regularizer # "MultiTaskLassoCV": linear_model.MultiTaskLassoCV(cv=5), # MultiTaskLassoCV([eps, …]) Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer. # "MultiTaskElasticNetCV": linear_model.MultiTaskElasticNetCV(cv=5), # MultiTaskElasticNetCV([…]) Multi-task L1/L2 ElasticNet with built-in cross-validation. # "OrthogonalMatchingPursuit": linear_model.OrthogonalMatchingPursuit(), # OrthogonalMatchingPursuit([…]) Orthogonal Matching Pursuit model (OMP) # "PassiveAggressiveClassifier": linear_model.PassiveAggressiveClassifier(), # PassiveAggressiveClassifier([…]) Passive Aggressive Classifier ### Normalization seems to make the score worse! # "LinearRegressionNormalize": linear_model.LinearRegression(normalize=True), # LinearRegression([…]) Ordinary least squares Linear Regression. # "RidgeCVNormalize": linear_model.RidgeCV(cv=5, normalize=True), # RidgeCV([alphas, …]) Ridge regression with built-in cross-validation. # "LassoLarsNormalize": linear_model.LassoLars(eps=0.01, normalize=True), # LassoLars([alpha, …]) Lasso model fit with Least Angle Regression a.k.a. # "LassoLarsICNormalize": linear_model.LassoLarsIC(eps=0.01, normalize=True), # LassoLarsIC([criterion, …]) Lasso model fit with Lars using BIC or AIC for model selection # "ARDRegressionNormalize": linear_model.ARDRegression(normalize=True), # ARDRegression([n_iter, tol, …]) Bayesian ARD regression. # "BayesianRidgeNormalize": linear_model.BayesianRidge(normalize=True), # BayesianRidge([n_iter, tol, …]) Bayesian ridge regression. }
def __init__(self, method, yrange, params, i=0, ransacparams={}): self.method = method self.outliers = None self.inliers = None self.ransac = False self.yrange = yrange[i] if self.method[i] == 'PLS': self.model = PLSRegression(**params[i]) if self.method[i] == 'OLS': self.model = linear.LinearRegression(**params[i]) if self.method[i] == 'OMP': #check whether to do CV or not self.do_cv = params[i]['CV'] # create a temporary set of parameters params_temp = copy.copy(params[i]) # Remove CV parameter params_temp.pop('CV') if self.do_cv is False: self.model = linear.OrthogonalMatchingPursuit(**params_temp) else: params_temp.pop('n_nonzero_coefs') self.model = linear.OrthogonalMatchingPursuitCV(**params_temp) if self.method[i] == 'Lasso': # check whether to do CV or not self.do_cv = params[i]['CV'] # create a temporary set of parameters params_temp = copy.copy(params[i]) # Remove CV parameter params_temp.pop('CV') if self.do_cv is False: self.model = linear.Lasso(**params_temp) else: params_temp.pop('alpha') self.model = linear.LassoCV(**params_temp) if self.method[i] == 'Elastic Net': # check whether to do CV or not self.do_cv = params[i]['CV'] # create a temporary set of parameters params_temp = copy.copy(params[i]) # Remove CV parameter params_temp.pop('CV') if self.do_cv is False: self.model = linear.ElasticNet(**params_temp) else: params_temp.pop('alpha') self.model = linear.ElasticNetCV(**params_temp) if self.method[i] == 'Ridge': # check whether to do CV or not self.do_cv = params[i]['CV'] # create a temporary set of parameters params_temp = copy.copy(params[i]) # Remove CV parameter params_temp.pop('CV') if self.do_cv is False: self.model = linear.Ridge(**params_temp) else: #Ridge requires a specific set of alphas to be provided... this needs more work to be implemented correctly self.model = linear.RidgeCV(**params_temp) if self.method[i] == 'Bayesian Ridge': self.model = linear.BayesianRidge(**params[i]) if self.method[i] == 'ARD': self.model = linear.ARDRegression(**params[i]) if self.method[i] == 'LARS': # check whether to do CV or not self.do_cv = params[i]['CV'] # create a temporary set of parameters params_temp = copy.copy(params[i]) # Remove CV parameter params_temp.pop('CV') if self.do_cv is False: self.model = linear.Lars(**params_temp) else: self.model = linear.LarsCV(**params_temp) if self.method[i] == 'Lasso LARS': # check whether to do CV or not self.do_cv = params[i]['CV'] # check whether to do IC or not self.do_ic = params[i]['IC'] # create a temporary set of parameters params_temp = copy.copy(params[i]) # Remove CV and IC parameter params_temp.pop('CV') params_temp.pop('IC') if self.do_cv is False and self.do_ic is False: self.model = linear.LassoLars(**params[i]) if self.do_cv is True and self.do_ic is False: self.model = linear.LassoLarsCV(**params[i]) if self.do_cv is False and self.do_ic is True: self.model = linear.LassoLarsIC(**params[i]) if self.do_cv is True and self.do_ic is True: print( "Can't use both cross validation AND information criterion to optimize!" ) if self.method[i] == 'SVR': self.model = svm.SVR(**params[i]) if self.method[i] == 'KRR': self.model = kernel_ridge.KernelRidge(**params[i]) if self.method[i] == 'GP': #get the method for dimensionality reduction and the number of components self.reduce_dim = params[i]['reduce_dim'] self.n_components = params[i]['n_components'] #create a temporary set of parameters params_temp = copy.copy(params[i]) #Remove parameters not accepted by Gaussian Process params_temp.pop('reduce_dim') params_temp.pop('n_components') self.model = GaussianProcess(**params_temp)
modeldict = { 'ardregression': lm.ARDRegression(), 'bayesianridge': lm.BayesianRidge(), 'elasticnet': lm.ElasticNet(), 'elasticnetcv': lm.ElasticNetCV(), 'huberregression': lm.HuberRegressor(), 'lars': lm.Lars(), 'larscv': lm.LarsCV(), 'lasso': lm.Lasso(), 'lassocv': lm.LassoCV(), 'lassolars': lm.LassoLars(), 'lassolarscv': lm.LassoLarsCV(), 'lassolarsic': lm.LassoLarsIC(), 'linearregression': lm.LinearRegression(), 'orthogonalmatchingpursuit': lm.OrthogonalMatchingPursuit(), 'orthogonalmatchingpursuitcv': lm.OrthogonalMatchingPursuitCV(), 'passiveagressiveregressor': lm.PassiveAggressiveRegressor(), 'ridge': lm.Ridge(), 'ridgecv': lm.RidgeCV(), 'sgdregressor': lm.SGDRegressor(), 'theilsenregressor': lm.TheilSenRegressor(), 'decisiontreeregressor': DecisionTreeRegressor(), 'randomforestregressor': RandomForestRegressor(), 'adaboostregressor': AdaBoostRegressor(), 'baggingregressor': BaggingRegressor(), 'extratreeregressor': ExtraTreeRegressor(), 'linearsvr': LinearSVR(), 'nusvr': NuSVR(), 'svr': SVR(), }
def run_simple_model(train_x, train_y, dev_x, dev_y, test_x, test_y, model_type, out_dir=None, class_weight=None): from sklearn import datasets, neighbors, linear_model, svm totalTime = 0 startTrainTime = time() logger.info("Start training...") if model_type == 'ARDRegression': model = linear_model.ARDRegression().fit(train_x, train_y) elif model_type == 'BayesianRidge': model = linear_model.BayesianRidge().fit(train_x, train_y) elif model_type == 'ElasticNet': model = linear_model.ElasticNet().fit(train_x, train_y) elif model_type == 'ElasticNetCV': model = linear_model.ElasticNetCV().fit(train_x, train_y) elif model_type == 'HuberRegressor': model = linear_model.HuberRegressor().fit(train_x, train_y) elif model_type == 'Lars': model = linear_model.Lars().fit(train_x, train_y) elif model_type == 'LarsCV': model = linear_model.LarsCV().fit(train_x, train_y) elif model_type == 'Lasso': model = linear_model.Lasso().fit(train_x, train_y) elif model_type == 'LassoCV': model = linear_model.LassoCV().fit(train_x, train_y) elif model_type == 'LassoLars': model = linear_model.LassoLars().fit(train_x, train_y) elif model_type == 'LassoLarsCV': model = linear_model.LassoLarsCV().fit(train_x, train_y) elif model_type == 'LassoLarsIC': model = linear_model.LassoLarsIC().fit(train_x, train_y) elif model_type == 'LinearRegression': model = linear_model.LinearRegression().fit(train_x, train_y) elif model_type == 'LogisticRegression': model = linear_model.LogisticRegression(class_weight=class_weight).fit(train_x, train_y) elif model_type == 'LogisticRegressionCV': model = linear_model.LogisticRegressionCV(class_weight=class_weight).fit(train_x, train_y) elif model_type == 'MultiTaskLasso': model = linear_model.MultiTaskLasso().fit(train_x, train_y) elif model_type == 'MultiTaskElasticNet': model = linear_model.MultiTaskElasticNet().fit(train_x, train_y) elif model_type == 'MultiTaskLassoCV': model = linear_model.MultiTaskLassoCV().fit(train_x, train_y) elif model_type == 'MultiTaskElasticNetCV': model = linear_model.MultiTaskElasticNetCV().fit(train_x, train_y) elif model_type == 'OrthogonalMatchingPursuit': model = linear_model.OrthogonalMatchingPursuit().fit(train_x, train_y) elif model_type == 'OrthogonalMatchingPursuitCV': model = linear_model.OrthogonalMatchingPursuitCV().fit(train_x, train_y) elif model_type == 'PassiveAggressiveClassifier': model = linear_model.PassiveAggressiveClassifier(class_weight=class_weight).fit(train_x, train_y) elif model_type == 'PassiveAggressiveRegressor': model = linear_model.PassiveAggressiveRegressor().fit(train_x, train_y) elif model_type == 'Perceptron': model = linear_model.Perceptron(class_weight=class_weight).fit(train_x, train_y) elif model_type == 'RandomizedLasso': model = linear_model.RandomizedLasso().fit(train_x, train_y) elif model_type == 'RandomizedLogisticRegression': model = linear_model.RandomizedLogisticRegression().fit(train_x, train_y) elif model_type == 'RANSACRegressor': model = linear_model.RANSACRegressor().fit(train_x, train_y) elif model_type == 'Ridge': model = linear_model.Ridge().fit(train_x, train_y) elif model_type == 'RidgeClassifier': model = linear_model.RidgeClassifier(class_weight=class_weight).fit(train_x, train_y) elif model_type == 'RidgeClassifierCV': model = linear_model.RidgeClassifierCV(class_weight=class_weight).fit(train_x, train_y) elif model_type == 'RidgeCV': model = linear_model.RidgeCV().fit(train_x, train_y) elif model_type == 'SGDClassifier': model = linear_model.SGDClassifier(class_weight=class_weight).fit(train_x, train_y) elif model_type == 'SGDRegressor': model = linear_model.SGDRegressor().fit(train_x, train_y) elif model_type == 'TheilSenRegressor': model = linear_model.TheilSenRegressor().fit(train_x, train_y) elif model_type == 'lars_path': model = linear_model.lars_path().fit(train_x, train_y) elif model_type == 'lasso_path': model = linear_model.lasso_path().fit(train_x, train_y) elif model_type == 'lasso_stability_path': model = linear_model.lasso_stability_path().fit(train_x, train_y) elif model_type == 'logistic_regression_path': model = linear_model.logistic_regression_path(class_weight=class_weight).fit(train_x, train_y) elif model_type == 'orthogonal_mp': model = linear_model.orthogonal_mp().fit(train_x, train_y) elif model_type == 'orthogonal_mp_gram': model = linear_model.orthogonal_mp_gram().fit(train_x, train_y) elif model_type == 'LinearSVC': model = svm.LinearSVC(class_weight=class_weight).fit(train_x, train_y) elif model_type == 'SVC': model = svm.SVC(class_weight=class_weight, degree=3).fit(train_x, train_y) else: raise NotImplementedError('Model not implemented') logger.info("Finished training.") endTrainTime = time() trainTime = endTrainTime - startTrainTime logger.info("Training time : %d seconds" % trainTime) logger.info("Start predicting train set...") train_pred_y = model.predict(train_x) logger.info("Finished predicting train set.") logger.info("Start predicting test set...") test_pred_y = model.predict(test_x) logger.info("Finished predicting test set.") endTestTime = time() testTime = endTestTime - endTrainTime logger.info("Testing time : %d seconds" % testTime) totalTime += trainTime + testTime train_pred_y = np.round(train_pred_y) test_pred_y = np.round(test_pred_y) np.savetxt(out_dir + '/preds/best_test_pred' + '.txt', test_pred_y, fmt='%i') logger.info('[TRAIN] Acc: %.3f' % (accuracy_score(train_y, train_pred_y))) logger.info('[TEST] Acc: %.3f' % (accuracy_score(test_y, test_pred_y))) return accuracy_score(test_y, test_pred_y)