def getKernel(s, i, j, **kwargs): if s == 'custom': # Format: kernel_params = {'gamma' : j} return kr.KernelRidge(kernel=customKernelOption, alpha=i, kernel_params=kwargs['kernal_params']) else: return kr.KernelRidge(kernel=s, alpha=i, gamma=j)
def test_params(X, Y): test_param(linear_model.LinearRegression(), '', X, Y) for gamma in [0.05, 0.1, 0.2]: for alpha in [0.1, 0.01, 1e-3]: test_param(kernel_ridge.KernelRidge(alpha=alpha, kernel='rbf', gamma=gamma), 'rbf g=' + str(gamma) + ", a=" + str(alpha), X, Y) for degree in [2, 3, 4, 5]: test_param(kernel_ridge.KernelRidge(kernel='polynomial', degree=degree), degree, X, Y) test_param(tree.DecisionTreeRegressor(), '', X, Y)
def Unfair_Prediction(self, kernel, lmd, gamma, avails, use_S=False): if use_S: X = np.c_[self.trainX1, self.trainS] else: X = self.trainX1 if not kernel: #linear lr = linear_model.Ridge(alpha=lmd, fit_intercept=True) #lr = linear_model.LinearRegression(fit_intercept=True) else: lr = kernel_ridge.KernelRidge(alpha=lmd, kernel="rbf", gamma=gamma) lr.fit(X, self.trainY) validS, validX1, validX2, validY = self.getValidationData() testS, testX1, testX2, testY = self.getPredictData() if use_S: predictX_train = np.c_[self.trainX1, self.trainS] predictX_valid = np.c_[validX1, validS] predictX_test = np.c_[testX1, testS] else: predictX_train = self.trainX1 predictX_valid = validX1 predictX_test = testX1 yhat_train = lr.predict(predictX_train).flatten() yhat_valid = lr.predict(predictX_valid).flatten() yhat_test = lr.predict(predictX_test).flatten() #print ("genvar=",np.mean([(testY[i])**2 for i in range(len(testY))])**0.5) #print ("unfair genavg=",np.mean([(testY[i]-yhat_test[i])**2 for i in range(len(testY))])**0.5) return Result(self.trainY, yhat_train, self.trainS, avails), Result(validY, yhat_valid, validS, avails), Result(testY, yhat_test, testS, avails)
def _get_base_ml_model(method): regressor = None if method == 'lr': regressor = linear_model.LinearRegression() if method == 'huber': regressor = linear_model.HuberRegressor(max_iter=50) regressor = multioutput.MultiOutputRegressor(regressor) if method == 'svr': regressor = svm.LinearSVR() regressor = multioutput.MultiOutputRegressor(regressor) if method == 'kr': regressor = kernel_ridge.KernelRidge(kernel='rbf') if method == 'rf': regressor = ensemble.RandomForestRegressor(n_estimators=50, n_jobs=8) if method == 'gbm': regressor = lgb.LGBMRegressor(max_depth=20, num_leaves=1000, n_estimators=100, min_child_samples=5, random_state=42) regressor = multioutput.MultiOutputRegressor(regressor) if method == 'nn': regressor = neural_network.MLPRegressor(hidden_layer_sizes=(25, 25), early_stopping=True, max_iter=1000000, alpha=0.01) return regressor
def optimize_KernelRidge(X_train, y_train): opt = modelSel.GridSearchCV(skKR.KernelRidge(cache_size=500), param_kernelRidge, cv=5, scoring=scoreFunction) opt.fit(X_train, y_train) return formatOptimal(opt.best_params_)
def GLS(X,y,model = "ols",**kwargs): """ model = "ols","ridge","lasso","lar" """ if model == "ols": md1 = linear_model.LinearRegression(fit_intercept=True).fit(X,y) md0 = linear_model.LinearRegression(fit_intercept=False).fit(X,y) if model == "ridge": alpha = get_alpha(kwargs,default=10**0.5) md1 = linear_model.Ridge(alpha=alpha,fit_intercept=True).fit(X,y) md0 = linear_model.Ridge(alpha=alpha, fit_intercept=False).fit(X, y) if model == 'lasso': alpha = get_alpha(kwargs, default=0.1) md1 = linear_model.Lasso(alpha=alpha,fit_intercept=True).fit(X,y) md0 = linear_model.Lasso(alpha=alpha, fit_intercept=False).fit(X, y) if model == 'lar': """ TO DO """ md1 = linear_model.Lars(fit_intercept=True).fit(X,y) md0 = linear_model.Lars(fit_intercept=False).fit(X,y) if model == 'kernel': alpha, kernel, gamma, degree, coef0 = get_kernel_coef(kwargs["alpha"]) md1 = kernel_ridge.KernelRidge(alpha=alpha,kernel=kernel,gamma=gamma,degree=degree,coef0=coef0).fit(X,y) md0 = md1 if model == 'xgb': md1 = xgb.XGBRegressor().fit(X,y) md0 = md1 return {"1":md1, "-1":md0, "type":'GLS'}
def __regression(x, y, regressor_return=False): # TheilSen Regression ts_y = y.ravel() ts = sk_lm.TheilSenRegressor() ts.fit(x, ts_y) # r squared ts_y_pred = ts.predict(x) ts_y_mean = np.mean(ts_y) ts_ssr = np.sum((ts_y_pred - ts_y_mean)**2) ts_sst = np.sum((ts_y - ts_y_mean)**2) #ts_score = np.absolute(ts.score(index_norm, ts_y)) ts_score = ts_ssr / ts_sst # Ridge Regression # Normalizating & reshaping ridge = sk_lm.Ridge(alpha=0.01, normalize=False) ridge.fit(x, y) ridge_score = np.absolute(ridge.score(x, y)) # Kernel Ridge Regression kernel_ridge = sk_kr.KernelRidge(kernel='rbf', alpha=0.01) kernel_ridge.fit(x, y) kr_score = np.absolute(kernel_ridge.score(x, y)) if regressor_return: return (ts, ridge, kernel_ridge) else: return (ts_score, ridge_score, kr_score)
def kernel_ridge_reg_fit(X, y, data_name, binning_threshold=0.75, n_splits=5, log=True): """ Build Ridge regression model Input: X - regression_X, original y - regression_y, original data_name - the name of the data binning_threshold - threshold used for vertical log binning log - whether take the log of the variables for preprocessing """ # Scale the data X_scaled, y_scaled, X_scaler, y_scaler = scale_data(X, y) # Prepare for Stratified K Fold # Split y bin_result_dict, bin_edge, _ = vertical_log_binning( binning_threshold, dict(zip(range(len(list(y))), list(y)))) split_y = list(bin_result_dict.values()) # Cross Validation experiment cv = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=0) predictions = [] for train, test in cv.split(X_scaled, split_y): y_test = y[test].flatten() ridge_reg = kernel_ridge.KernelRidge(alpha=10**(-5), gamma=5 * 10**(-5), kernel="rbf") ridge_reg.fit(X_scaled[train], y_scaled[train]) predict_this_fold = list(ridge_reg.predict(X_scaled[test]).flatten()) predict_this_fold_rescale = 10**( y_scaler.inverse_transform(predict_this_fold)) predictions.append((y_test, predict_this_fold_rescale)) return predictions
def optimize_KernelRidge(X_train, y_train): opt = modelSel.RandomizedSearchCV(estimator=skKR.KernelRidge(), param_distributions=param_kernelRidge, cv=5, scoring=scoreFunction) opt.fit(X_train, y_train) return formatOptimal(opt.best_params_)
def setup_KernelRidge(learner_settings): alpha = 1 kernel = 'linear' gamma = None degree = 3 coef0 = 1 kernel_params = None for additional_setting in learner_settings: # split identifier=value, so you can identify value and the variable setting_value_pair = additional_setting.split("=") if setting_value_pair[0] == "alpha": alpha = float(setting_value_pair[1]) if setting_value_pair[0] == "kernel": kernel = setting_value_pair[1] if setting_value_pair[0] == "gamma": gamma = float(setting_value_pair[1]) if setting_value_pair[0] == "degree": degree = int(setting_value_pair[1]) if setting_value_pair[0] == "coef0": coef0 = int(setting_value_pair[1]) if setting_value_pair[0] == "kernel_params": kernel_params = setting_value_pair[1] return skKR.KernelRidge(alpha=alpha, kernel=kernel, gamma=gamma, degree=degree, coef0=coef0, kernel_params=kernel_params)
def get_all_regrs(): regrs = { "Linear regression": linear_model.LinearRegression(), # "Perceptron": linear_model.Perceptron(), "Lars": linear_model.Lars(), "Lasso": linear_model.LassoCV(max_iter=5000), # "Passive Aggressive": linear_model.PassiveAggressiveRegressor(), "PLS": PLS(n_components=3), "Random Forest": ensemble.RandomForestRegressor(), "Gradient Boost": ensemble.GradientBoostingRegressor(), "Extra Trees": ensemble.ExtraTreesRegressor(max_depth=2), "Ada Boost": ensemble.AdaBoostRegressor( base_estimator=tree.DecisionTreeRegressor(max_depth=2), n_estimators=250), "Gaussian Process": gaussian_process.GaussianProcessRegressor(), # "Isotonic": isotonic.IsotonicRegression(), "Kernel Ridge": kernel_ridge.KernelRidge(), "Ridge CV": linear_model.RidgeCV(), # "Exp tranform": TransformedTargetRegressor(regressor=PLS(n_components=3), # func=np.exp, # inverse_func=np.log), # "Log tranform": TransformedTargetRegressor(regressor=PLS(n_components=3), # func=np.log, # inverse_func=np.exp), # "Inv tranform": TransformedTargetRegressor(regressor=PLS(n_components=3), # func=invert, # inverse_func=invert), # "Log regressor": linear_model.LogisticRegressionCV(), "ML Perceptron": neural_network.MLPRegressor(max_iter=50000, hidden_layer_sizes=(5, 5)), "Linear SVR": linear_svc, "RBF SVR": svm.SVR(kernel='rbf'), "Poly SVR": svm.SVR(kernel='poly'), # "Sigmoid SVR": svm.SVR(kernel='sigmoid'), "Bayesian Ridge": linear_model.BayesianRidge(), "Huber": linear_model.HuberRegressor(), # "Poisson": linear_model.PoissonRegressor(), "K-neighbors": neighbors.KNeighborsRegressor() } # "Radius Neighbors": neighbors.RadiusNeighborsRegressor()} return regrs
def fit_vels(depths, vels_modeled, vels_train): y = vels_modeled - vels_train y = y print y.shape print depths.shape regr = kr.KernelRidge(alpha=0.1, kernel=gp.kernels.RBF(1000)) regr.fit(depths, y) # gpr_pwave.fit(depths,y) return regr #gpr_pwave
def run_specific_combination(test_frame, reg_type, column_list): target_feature = test_frame['Endurance_Score'] test_df = test_frame.filter(column_list, axis=1) X_train, X_test, y_train, y_test = train_test_split( test_df, target_feature.values.reshape(-1,1), test_size=0.20, random_state=0) if reg_type == 'dt': regr = DecisionTreeRegressor(max_depth=2) elif reg_type == 'lin': regr = linear_model.LinearRegression() elif reg_type == 'ridge': regr = linear_model.Ridge(alpha=1500.0) elif reg_type == 'lasso': regr = linear_model.Lasso(alpha=10.0) elif reg_type == 'bayridge': regr = linear_model.BayesianRidge() elif reg_type == 'sgd': regr = linear_model.SGDRegressor(loss='huber') elif reg_type == 'lars': regr = linear_model.Lars(n_nonzero_coefs=np.inf) elif reg_type == 'pasagv': regr = linear_model.PassiveAggressiveRegressor(random_state=0) elif reg_type == 'kernelridge': regr = kernel_ridge.KernelRidge() elif reg_type == 'svr': regr = svm.SVR() elif reg_type == 'kneigh': regr = neighbors.KNeighborsRegressor(algorithm='kd_tree') elif reg_type == 'gauss': regr = gaussian_process.GaussianProcessRegressor() elif reg_type == 'gbr': params = {'n_estimators': 760, 'max_depth': 4, 'min_samples_split': 3, 'learning_rate': 0.026, 'loss': 'huber'} regr = GradientBoostingRegressor(**params) elif reg_type == 'ran': regr = RandomForestRegressor(n_estimators=300, max_depth=8) elif reg_type == 'et': regr = ExtraTreesRegressor() else: return x_train_frame = X_train.copy() del x_train_frame['Title'] del x_train_frame['Artist'] regr.fit(x_train_frame, y_train.ravel()) x_test_frame = X_test.copy() del x_test_frame['Title'] del x_test_frame['Artist'] y_pred = regr.predict(x_test_frame) rmse = mean_squared_error(y_test, y_pred) score = r2_score(y_test, y_pred) print("R2-score: {}, RMSE: {}".format(score, math.sqrt(rmse))) result_df = pd.DataFrame(columns=['Song', 'Artist', 'Endurance_Score', 'Predicted_Endurance_Score']) result_df['Song'] = X_test['Title'] result_df['Artist'] = X_test['Artist'] result_df['Endurance_Score'] = y_test.ravel() result_df['Predicted_Endurance_Score'] = y_pred result_df.to_csv('{0}/{1}.csv'.format(path_final_csv, 'predicted_midtermdata'), index=False)
def __call__(self, X, Y): nprobs = len(X) if len(Y) != nprobs: raise ValueError("Number of problems for input ({}) and target ({}) do not match".format(nprobs, len(Y))) alpha = np.zeros((nprobs, X[0].shape[0])) for t in range(nprobs): clf = kernel_ridge.KernelRidge(alpha=self.mu, kernel=self.kernel) clf.fit(X[t], Y[t]) alpha[t, :] = clf.dual_coef_.flatten().copy() return alpha, [partial(self.kernel, x) for x in X[0]]
def __init__(self, method, params, i=0): self.algorithm_list = [ 'PLS', 'GP', 'OLS', 'OMP', 'Lasso', 'Elastic Net', 'Ridge', 'Bayesian Ridge', 'ARD', 'LARS', 'LASSO LARS', 'SVR', 'KRR', 'GBR' ] self.method = method self.outliers = None self.ransac = False #print(params) if self.method[i] == 'PLS': self.model = PLSRegression(**params[i]) if self.method[i] == 'OLS': self.model = linear.LinearRegression(**params[i]) if self.method[i] == 'OMP': # create a temporary set of parameters params_temp = copy.copy(params[i]) self.model = linear.OrthogonalMatchingPursuit(**params_temp) if self.method[i] == 'LASSO': # create a temporary set of parameters params_temp = copy.copy(params[i]) self.model = linear.Lasso(**params_temp) if self.method[i] == 'Elastic Net': params_temp = copy.copy(params[i]) self.model = linear.ElasticNet(**params_temp) if self.method[i] == 'Ridge': # create a temporary set of parameters params_temp = copy.copy(params[i]) self.model = linear.Ridge(**params_temp) if self.method[i] == 'BRR': self.model = linear.BayesianRidge(**params[i]) if self.method[i] == 'ARD': self.model = linear.ARDRegression(**params[i]) if self.method[i] == 'LARS': # create a temporary set of parameters params_temp = copy.copy(params[i]) self.model = linear.Lars(**params_temp) if self.method[i] == 'SVR': self.model = svm.SVR(**params[i]) if self.method[i] == 'KRR': self.model = kernel_ridge.KernelRidge(**params[i])
def model_initialization(name, **kwargs): if name == 'linear_regression': model = linear_model.LinearRegression() elif 'ridge_regression' in name: alpha = kwargs['alpha'] model = linear_model.Ridge(alpha) elif 'lasso_regression' in name: alpha = kwargs['alpha'] model = linear_model.Lasso(alpha) elif 'elastic_net' in name: alpha = kwargs['alpha'] if 'alpha' in kwargs else 1 r = kwargs['l1_ratio'] if 'l1_ratio' in kwargs else 0.5 model = linear_model.ElasticNet(alpha=alpha, l1_ratio=r, max_iter=2000) elif 'SGD_regression' in name: l = kwargs['loss'] if 'loss' in kwargs else 'squared_loss' p = kwargs['penalty'] if 'penalty' in kwargs else 'l2' alpha = kwargs['alpha'] if 'alpha' in kwargs else 0.0001 r = kwargs['l1_ratio'] if 'l1_ratio' in kwargs else 0.15 model = linear_model.SGDRegressor(loss=l, penalty=p, alpha=alpha, l1_ratio=r, n_iter=5) elif 'kernel_ridge' in name: kernel = kwargs['kernel'] model = kernel_ridge.KernelRidge(kernel=kernel) elif 'support_vector_regression' in name: kernel = kwargs['kernel'] c = kwargs['C'] if 'C' in kwargs else 1 d = kwargs['degree'] if 'degree' in kwargs else 3 g = kwargs['gamma'] if 'gamma' in kwargs else 'auto' model = svm.SVR(kernel=kernel, C=c, degree=d, gamma=g, cache_size=1000, max_iter=2000) elif 'gradient_boost_regression' in name: l = kwargs['loss'] if 'loss' in kwargs else 'ls' r = kwargs['learning_rate'] if 'learning_rate' in kwargs else 0.1 e = kwargs['n_estimators'] if 'n_estimators' in kwargs else 100 model = ensemble.GradientBoostingRegressor(loss=l, learning_rate=r, n_estimators=e) elif 'xgboost_regression' in name: d = kwargs['max_depth'] if 'max_depth' in kwargs else 3 ne = kwargs['n_estimators'] if 'n_estimators' in kwargs else 100 model = xgb.XGBRegressor(max_depth=d, n_estimators=ne) print('max_depth:', d, 'n_e:', ne) return model
def __init__(self, alpha=1.0, kernel='linear', gamma=None, degree=3, coef0=1, kernel_params=None): ridge_reg = kernel_ridge.KernelRidge(alpha=alpha, kernel=kernel, gamma=gamma, degree=degree, coef0=coef0, kernel_params=kernel_params) super(KernelRidgeRegression, self).__init__(ridge_reg)
def ridgeKernelCV_regression(params, X_tr, X_ts, y_tr, y_ts, kernel='linear'): # kernel = ‘linear’ | ‘poly’ | ‘rbf’ | ‘sigmoid’ | ‘precomputed’ kr = sk.KernelRidge(kernel=kernel) clf = sm.GridSearchCV(kr, params) clf.fit(X_tr, y_tr) kr = clf.best_estimator_ y_pred = kr.predict(X_ts) mean_err = np.mean(np.abs(y_pred - y_ts)) var_err = np.var(np.abs(y_pred - y_ts)) return clf.best_params_['alpha'], mean_err, var_err, kr.dual_coef_, y_pred
def make_ada_model(train_df, synergy_score, n_tree=120, base_estimator="RF"): from sklearn import ensemble from sklearn import kernel_ridge if base_estimator == "RF": base_estimator_ = ensemble.RandomForestRegressor() elif base_estimator == "Kernel": base_estimator_ = kernel_ridge.KernelRidge(alpha=0.01, kernel='poly', degree=2) model = ensemble.AdaBoostRegressor(n_estimators=n_tree, base_estimator=base_estimator_, learning_rate=0.9) model = fit(model, train_df, synergy_score) return model
def train_kernel_regression(KNOWN_filepath: str, QTABLE_filepath: str, QUANTITIES_filepath: str, writepath: str, cutoff: int = 4): """ Train a regression model using a database of observations and their Q values. Disregard data with less than "cutoff" updates. :param KNOWN_filepath: Database of observations :param QTABLE_filepath: Q Table for observations :param QUANTITIES_filepath: Counts updates in Q Table :param writepath Where to dump the model parameters :param cutoff Disregard data with fewer updates than this :return: """ QTABLE = np.load(QTABLE_filepath) KNOWN = np.load(KNOWN_filepath) QUANTITIES = np.load(QUANTITIES_filepath) KNOWN_extended = np.tile(KNOWN, (6, 1)) actions = [] for i in range(6): actions.append(np.ones(KNOWN.shape[0]) * i) action_codes = np.concatenate(tuple(actions)) qtable_cols = np.concatenate(tuple([QTABLE[:, i] for i in range(6)])) quantities_cols = np.concatenate( tuple([QUANTITIES[:, i] for i in range(6)])) KNOWN_extended = np.append(KNOWN_extended, action_codes[:, np.newaxis], axis=1) target_rows = np.where(quantities_cols >= cutoff) data = KNOWN_extended[target_rows] target = qtable_cols[target_rows] clf = kernel_ridge.KernelRidge(kernel="poly") clf.fit(data, target) pickle.dump(clf, open(writepath, "wb")) return clf
def getRegressor( regressorName, regressorParams): if regressorName == 'p': return PolynomialRegressor(**regressorParams) elif regressorName == 'ch': return ChebyshevRegressor(**regressorParams) elif regressorName == 'c': return ClusteredRegressor(**regressorParams) elif regressorName == 'a': return AveragedRegressor(**regressorParams) elif regressorName == 'rf': return ensemble.RandomForestRegressor(**regressorParams) #(max_depth=5) elif regressorName == 'cfr': return CapFloorRegionRegressor(**regressorParams) elif regressorName == 'svr': return svm.SVR(**regressorParams) elif regressorName == 'kr': return kernel_ridge.KernelRidge(**regressorParams) else: raise Exception('Unhandled regression method: ' + regressorName)
def kernel_ridge_version(X_tr,y_tr,X_te,y_te): # Create linear regression object tuned_parameters = [{'kernel':['linear'],'alpha': [0.001,0.05,0.01,0.1,1]}, {'kernel':['rbf'],'alpha':[0.001,0.05,0.01,0.1,1],'gamma':[0.1,1,10,100,500]}] print("# Tuning hyper-parameters") clf = grid_search.GridSearchCV(kernel_ridge.KernelRidge(alpha=1, coef0=1, degree=3, gamma=None, kernel='linear', kernel_params=None), tuned_parameters, cv=TimeSeriesCV(len(y_tr),fold=5)) clf.fit(X_tr, y_tr) print("Best parameters set found on development set:") print(clf.best_params_) print("Grid scores on development set:") for params, mean_score, scores in clf.grid_scores_: print("%0.3f (+/-%0.03f) for %r" % (mean_score, scores.std() * 2, params)) print("The model is trained on the full development set.") print("The scores are computed on the full evaluation set.") print('R^2: %.2f' % clf.score(X_te, y_te)) rss=np.mean((clf.predict(X_te) - y_te) ** 2) print("Residual sum of squares: %.2f" % rss) return (clf.score(X_te, y_te),rss,clf.best_params_)
def get_model(model_type, c=0, epsilon=0, gamma=0): if model_type == RBF: model = model = svm.SVR(kernel='rbf', C=c, epsilon=epsilon, gamma=gamma) elif model_type == POLY2: model = svm.SVR(kernel='poly', C=c, degree=2, epsilon=epsilon) elif model_type == POLY3: model = svm.SVR(kernel='poly', C=c, degree=3, epsilon=epsilon) elif model_type == POLY4: model = svm.SVR(kernel='poly', C=c, degree=4, epsilon=epsilon) elif model_type == LIN: model = svm.SVR(kernel='linear', C=c, epsilon=epsilon) elif model_type == Rand_F: model = ensemble.RandomForestRegressor() elif model_type == SGD: model = linear_model.SGDRegressor() elif model_type == KRR: model = kernel_ridge.KernelRidge(kernel='linear', alpha=1/(2*c)) elif model_type == DT: model = DecisionTreeRegressor() else: raise(ValueError('unknown model type: ' + str(model_type))) return model
def est_KR(): hp = [{ 'kernel': ( 'poly', 'rbf', ), 'alpha': ( 1e-3, 1e-2, 1e-1, 0.0, 0.5, 1., ), # 'degree': (), # 'coef0': (), 'gamma': ( 0.1, 1, 2, ), }] est = kernel_ridge.KernelRidge() return est, hp
def evaluate_regression(scat, target, cross_val_folds, kind='linear', alphas=10.**(-np.arange(0, 10))): for i, alpha in enumerate(alphas): if kind == 'linear': model = linear_model.Ridge(alpha=alpha) elif kind == 'bilinear': model = kernel_ridge.KernelRidge(alpha=alpha, kernel='poly', degree=2) else: raise ValueError('Invalid kind {}'.format(kind)) regressor = pipeline.make_pipeline(preprocessing.StandardScaler(), model) scat_prediction = model_selection.cross_val_predict(regressor, X=scat, y=target, cv=cross_val_folds) scat_MAE = np.mean(np.abs(scat_prediction - target)) scat_RMSE = np.sqrt(np.mean((scat_prediction - target)**2)) print('Ridge regression, alpha: {}, MAE: {}, RMSE: {}'.format( alpha, scat_MAE, scat_RMSE))
def est_KR(est): hyper_params = [{ 'kernel': ( 'poly', 'rbf', ), 'alpha': ( 1e-3, 1e-2, 1e-1, 0.0, 0.5, 1., ), # 'degree': (), # 'coef0': (), 'gamma': ( 0.1, 1, 2, ), }] est = kernel_ridge.KernelRidge()
def __init__( self, method, yrange, params, i=0 ): #TODO: yrange doesn't currently do anything. Remove or do something with it! self.algorithm_list = [ 'PLS', 'GP', 'OLS', 'OMP', 'Lasso', 'Elastic Net', 'Ridge', 'Bayesian Ridge', 'ARD', 'LARS', 'LASSO LARS', 'SVR', 'KRR', ] self.method = method self.outliers = None self.ransac = False print(params) if self.method[i] == 'PLS': self.model = PLSRegression(**params[i]) if self.method[i] == 'OLS': self.model = linear.LinearRegression(**params[i]) if self.method[i] == 'OMP': # check whether to do CV or not self.do_cv = params[i]['CV'] # create a temporary set of parameters params_temp = copy.copy(params[i]) # Remove CV parameter params_temp.pop('CV') if self.do_cv is False: self.model = linear.OrthogonalMatchingPursuit(**params_temp) else: params_temp.pop('precompute') self.model = linear.OrthogonalMatchingPursuitCV(**params_temp) if self.method[i] == 'LASSO': # create a temporary set of parameters params_temp = copy.copy(params[i]) # check whether to do CV or not try: self.do_cv = params[i]['CV'] # Remove CV parameter params_temp.pop('CV') except: self.do_cv = False if self.do_cv is False: self.model = linear.Lasso(**params_temp) else: params_temp.pop('alpha') self.model = linear.LassoCV(**params_temp) if self.method[i] == 'Elastic Net': params_temp = copy.copy(params[i]) try: self.do_cv = params[i]['CV'] params_temp.pop('CV') except: self.do_cv = False if self.do_cv is False: self.model = linear.ElasticNet(**params_temp) else: params_temp['l1_ratio'] = [.1, .5, .7, .9, .95, .99, 1] self.model = linear.ElasticNetCV(**params_temp) if self.method[i] == 'Ridge': # create a temporary set of parameters params_temp = copy.copy(params[i]) try: # check whether to do CV or not self.do_cv = params[i]['CV'] # Remove CV parameter params_temp.pop('CV') except: self.do_cv = False if self.do_cv: self.model = linear.RidgeCV(**params_temp) else: self.model = linear.Ridge(**params_temp) if self.method[i] == 'BRR': self.model = linear.BayesianRidge(**params[i]) if self.method[i] == 'ARD': self.model = linear.ARDRegression(**params[i]) if self.method[i] == 'LARS': # create a temporary set of parameters params_temp = copy.copy(params[i]) try: # check whether to do CV or not self.do_cv = params[i]['CV'] # Remove CV parameter params_temp.pop('CV') except: self.do_cv = False if self.do_cv is False: self.model = linear.Lars(**params_temp) else: self.model = linear.LarsCV(**params_temp) if self.method[i] == 'LASSO LARS': model = params[i]['model'] params_temp = copy.copy(params[i]) params_temp.pop('model') if model == 0: self.model = linear.LassoLars(**params_temp) elif model == 1: self.model = linear.LassoLarsCV(**params_temp) elif model == 2: self.model = linear.LassoLarsIC(**params_temp) else: print("Something went wrong, \'model\' should be 0, 1, or 2") if self.method[i] == 'SVR': self.model = svm.SVR(**params[i]) if self.method[i] == 'KRR': self.model = kernel_ridge.KernelRidge(**params[i]) if self.method[i] == 'GP': # get the method for dimensionality reduction and the number of components self.reduce_dim = params[i]['reduce_dim'] self.n_components = params[i]['n_components'] # create a temporary set of parameters params_temp = copy.copy(params[i]) # Remove parameters not accepted by Gaussian Process params_temp.pop('reduce_dim') params_temp.pop('n_components') self.model = GaussianProcess(**params_temp)
path = 'E://utils' sys.path.append(path) import common_utils as utils import regression_utils as rutils from sklearn import metrics, kernel_ridge, svm, model_selection scoring = metrics.make_scorer(rutils.rmse, greater_is_better=False) X, y = rutils.generate_nonlinear_synthetic_data_regression(n_samples=200, n_features=1) X_train, X_test, y_train, y_test = model_selection.train_test_split( X, y, test_size=0.1, random_state=1) rutils.plot_data_2d_regression(X_train, y_train) kernel_lr = kernel_ridge.KernelRidge(kernel="rbf") kernel_lr_grid = { 'alpha': [0.0001, 0.01, 0.05, 0.2, 0.5, 1], 'gamma': [0.01, 0.1, 1, 2, 3, 4, 5, 10] } final_kernel_lr_model = utils.grid_search_best_model(kernel_lr, kernel_lr_grid, X_train, y_train, scoring=scoring) rutils.plot_model_2d_regression(final_kernel_lr_model, X_train, y_train) rutils.regression_performance(final_kernel_lr_model, X_test, y_test) kernel_svm = svm.SVR(kernel="rbf") kernel_svm_grid = { 'C': [0.2, 0.5, 10, 20, 50, 100],
LABELS = ["Curr_home_p", "Curr_away_p", "Curr_draw_p"] data_set_path = 'train.csv' df = pd.read_csv(data_set_path) train, test = train_test_split(df, test_size=0.2) x_train = train.as_matrix(columns=FEATURES) x_test = test.as_matrix(columns=FEATURES) z_train = train.as_matrix(columns=LABELS) z_test = test.as_matrix(columns=LABELS) model = sys.argv[1] if model == 'linear': clf = kernel_ridge.KernelRidge(kernel="linear") elif model == 'poly': clf = kernel_ridge.KernelRidge(kernel="poly", degree=2, alpha=0.6, gamma=0.9) elif model == 'tree': clf = tree.DecisionTreeRegressor(max_depth=5, min_samples_leaf=4) clf.fit(x_train, z_train) z_pred = clf.predict(x_test) cv_scores = cross_val_score(clf, x_train, z_train, cv=5) print("Accuracy: %0.2f (+/- %0.2f)" % (cv_scores.mean(), cv_scores.std() * 2))
#特征筛选,使用RLR from sklearn.linear_model import RandomizedLogisticRegression as RLR rlr = RLR() rlr.fit(train_data, probs) rlr.get_support() #准备回归分类器 import sklearn from sklearn import gaussian_process, kernel_ridge, isotonic from sklearn.ensemble import ExtraTreesClassifier Regressors = { # 'pls':cross_decomposition.PLSRegression(),报错 'gradient boosting': ensemble.GradientBoostingRegressor(), # 'gaussian':gaussian_process.GaussianProcessRegressor(),报错 # 'isotonic':isotonic.IsotonicRegression(),报错 'kernelridge': kernel_ridge.KernelRidge(), 'ARD': linear_model.ARDRegression(), 'bayesianridge': linear_model.BayesianRidge(), # 'elasticnet':linear_model.ElasticNet(),#报错 'HuberRegressor': linear_model.HuberRegressor(), 'LinearRegression': linear_model.LinearRegression(), # 'logistic':linear_model.LogisticRegression(),报错 # 'linear_model.RidgeClassifier':linear_model.RidgeClassifier(),报错 'k-neighbor': neighbors.KNeighborsRegressor(), 'SVR': svm.LinearSVR(), 'NUSVR': svm.NuSVR(), 'extra tree': tree.ExtraTreeRegressor(), 'decesion tree': tree.DecisionTreeRegressor(), # 'random losgistic':linear_model.RandomizedLogisticRegression(),报错 # 'dummy':dummy.DummyRegressor()报错 }