def test_regression_bootstrap_sampled_hyperpar_tune(self): """Compares the single and multi hyperparameter tuning.""" # Single hyperparameter tune prior to bootstrapping. kwargs = {'data': self.data, 'target': self.target, 'bootstraps': 5} elastic_net = linear_model.ElasticNet(random_state=1) elastic_net_cv = linear_model.ElasticNetCV(random_state=10, cv=3) outer_tune = bootstrap.regression_bootstrap( regressor=elastic_net, regressor_cv=elastic_net_cv, **kwargs) outer_coef_std = outer_tune.std(axis=0).mean() # Hyperparameters re-tuned on every bootstrap sample. elastic_net = linear_model.ElasticNetCV(random_state=10, cv=3) elastic_net_cv = None outer_inner_tune = bootstrap.regression_bootstrap( regressor=elastic_net, regressor_cv=elastic_net_cv, **kwargs) outer_inner_coef_std = outer_inner_tune.std(axis=0).mean() # Confirm that running separate instances gives same results for single # tune. This is identical setup to outer_tune. elastic_net = linear_model.ElasticNet(random_state=1) elastic_net_cv = linear_model.ElasticNetCV(random_state=10, cv=3) outer_tune2 = bootstrap.regression_bootstrap( regressor=elastic_net, regressor_cv=elastic_net_cv, **kwargs) outer2_coef_std = outer_tune2.std(axis=0).mean() self.assertNotEqual(outer_coef_std, outer_inner_coef_std) self.assertEqual(outer_coef_std, outer2_coef_std)
def elasticnet(X, Y_casual, Y_registered, testSet_final): alpha = 0.001 l1_ratio = 0.1 glmnet1 = linear_model.ElasticNetCV() glmnet2 = linear_model.ElasticNetCV() glmnet1.fit(X, Y_casual) glmnet2.fit(X, Y_registered) glmnet1_Y = np.exp(glmnet1.predict(testSet_final)) - 1 glmnet2_Y = np.exp(glmnet2.predict(testSet_final)) - 1 final_prediction = np.intp(np.around(glmnet1_Y + glmnet2_Y)) return final_prediction
def fit_model(self, x, y, modelName='lin'): model = None if modelName == 'lin': regLin = linear_model.LinearRegression() regLin.fit(x, y) model = regLin elif modelName == 'ridge': regRidge = linear_model.Ridge() regRidge.fit(x, y) model = regRidge elif modelName == 'boost': params = { 'n_estimators': 430, 'max_depth': 5, 'min_samples_split': 2, 'learning_rate': 0.01, 'loss': 'ls' } gradBoost = sk.GradientBoostingRegressor(**params) gradBoost.fit(x, y) model = gradBoost elif modelName == 'elastic': regElastic = linear_model.ElasticNetCV() regElastic.fit(x, y) model = regElastic return model
def useElasticNetCV(xTest, yTest, xTrain, yTrain): enModel = linear_model.ElasticNetCV(cv=10,random_state=0) # enModel.set_params(alpha=0.1) enModel.fit(xTrain,yTrain) coef = enModel.coef_ yPredict = enModel.predict(xTest) return yPredict,enModel
def linearMethod(train_data, train_target, model): X_train, X_test, y_train, y_test = train_test_split(train_data, train_target, test_size=0.2) #切分数据集 #对特征数据进行归一化处理 scaler = StandardScaler() scaler.fit(X_train) X_train = scaler.transform(X_train) X_test = scaler.transform(X_test) if model == 'Ridge': linear = lm.RidgeCV() #岭回归交叉验证,对惩罚系数进行调优 elif model == 'Lasso': linear = lm.LassoCV() #套索回归交叉验证,对惩罚系数进行调优 #linear = lm.Lasso(alpha = reg.alpha_,normalize=True)#套索回归,设置normalize参数对特征进行缩放,设置alpha惩罚系数 elif model == 'ElasticNet': linear = lm.ElasticNetCV() #弹性网络回归交叉验证,对惩罚系数进行调优 #linear = lm.ElasticNet(alpha = reg.alpha_,normalize=True)#弹性网络回归,设置normalize参数对特征进行缩放,设置alpha惩罚系数 else: linear = lm.LinearRegression() #最小二乘法,设置normalize参数对特征进行缩放 linear.fit(X_train, y_train) #训练模型参数 #评分函数,返回R方,拟合优度 print "train's score: ", linear.score(X_train, y_train) print "test's score: ", linear.score(X_test, y_test)
def get_estim(self, y): ''' Returns a list of estimators appropriate for the supervised learning problem. Distinctions are made between regression and classification problems, different sample sizes, and different types of output variables. When possible, seeds are set to achieve constant results ''' self.estimators = [] if self.method_type == 'regr': self.estimators.append(linear_model.ElasticNetCV(random_state=1, normalize=True)) self.estimators.append(ensemble.GradientBoostingRegressor(random_state=1)) self.estimators.append(ensemble.RandomForestRegressor(random_state=1)) if y.shape[0] <= 5000: self.estimators.append(svm.SVR()) else: if y.shape[0] < 50: if len(np.unique(y)) == 2: self.estimators.append(naive_bayes.BernoulliNB()) self.estimators.append(linear_model.SGDClassifier(loss='log', random_state=1)) else: self.estimators.append(naive_bayes.MultinomialNB()) self.estimators.append(naive_bayes.GaussianNB()) self.estimators.append(ensemble.RandomForestClassifier(random_state=1)) if y.shape[0] <= 5000: self.estimators.append(svm.SVC(probability=True))
def regressor_cv_default(): return linear_model.ElasticNetCV( l1_ratio=[.0001, .1, .5, .7, .9, .95, .99, 1], n_alphas=100, cv=10, random_state=30, normalize=True)
def md_modelsele_get_obj(self): """ alpha lambda intercept thresh family=c("gaussian","binomial","poisson","multinomial","cox","mgaussian") bj_map = {"c":"reg:linear", "b":"binary:logistic", "p":"count:poisson", "s":"survival:cox", "m":"multi:softprob", "r":"rank:pairwise"} :return: """ pub_param = ["verbose", "n_jobs","random_state", "copy_X", "solver", "max_iter"] linear_map = {"intercept":"fit_intercept", "lambda":"alpha", "alpha":"l1_ratio", "thresh":"tol"} cv_linear_map = {"intercept":"fit_intercept", "lambda":"alphas", "alpha":"l1_ratio", "thresh":"tol"} log_map = {"intercept":"fit_intercept", "thresh":"tol"} params = {k:v for k,v in self.kw.items() if k in pub_param} if self.target_type == "c": if self.kw.get("cv"): params = dict(params, **{cv_linear_map[k]:v for k,v in self.kw.items() if k in cv_linear_map}) return glm.ElasticNetCV(**params) params = dict(params, **{linear_map[k]: v for k, v in self.kw.items() if k in linear_map}) return glm.LinearRegression(**params) if self.target_type == "b": params = dict(params, **{log_map[k]: v for k, v in self.kw.items() if k in log_map}) params["penalty"] = "l1" if self.kw.get("alpha", 1) == 1 else "l2" if self.kw.get("cv"): lambdas = self.kw.get("lambda", 1) lambdas = lambdas if isinstance(lambdas, list) else [lambdas] params["Cs"] = list(1/np.array(lambdas)) return glm.LogisticRegressionCV(cv = self.kw["cv"], **params) params["C"] = 1/self.kw.get("lambda", 1) return glm.LogisticRegression(**params)
def create_predictor(method): ols = linear_model.LinearRegression(fit_intercept=False) lasso = linear_model.LassoCV(cv=5, fit_intercept=False) elnet = linear_model.ElasticNetCV( l1_ratio=[.01, .1, .3, .5, .7, .9, .95, .99, 1], cv=5) gbt = ensemble.GradientBoostingRegressor(n_estimators=50, max_depth=2) rf = ensemble.RandomForestRegressor(max_depth=2, random_state=0, n_estimators=50) ridge = linear_model.RidgeCV(alphas=[1e-2, 1e-1, 1, 3, 5, 10, 20], cv=5, fit_intercept=False) if method == 'ols': return ols elif method == 'lasso': return lasso elif method == 'elnet': return elnet elif method == 'gbt': return gbt elif method == 'rf': return rf elif method == 'ridge': return ridge
def image_lasso(self, vis_arr, sphere, alpha, scale=True, use_cv=False): gamma = self.make_gamma(sphere) proj_operator_real = np.real(gamma) proj_operator_imag = np.imag(gamma) proj_operator = np.block([[proj_operator_real], [proj_operator_imag]]) vis_aux = np.concatenate((np.real(vis_arr), np.imag(vis_arr))) # Save proj operator for Further Analysis. if False: fname = "l1_big_files.npz" np.savez_compressed(fname, gamma_re=proj_operator_real, gamma_im=proj_operator_imag, vis_re=np.real(vis_arr), vis_im=np.imag(vis_arr)) logger.info("Operator file {} saved".format(fname)) logger.info("proj_operator = {}".format(proj_operator.shape)) logger.info("vis_aux = {}".format(vis_aux.shape)) n_s = sphere.pixels.shape[0] if not use_cv: reg = linear_model.ElasticNet(alpha=alpha/np.sqrt(n_s), l1_ratio=1.0, max_iter=10000, positive=True) reg.fit(proj_operator, vis_aux) else: reg = linear_model.ElasticNetCV(l1_ratio=1.0, cv=5, max_iter=10000, positive=True) reg.fit(proj_operator, vis_aux) logger.info("Cross Validation = {}".format(reg.alpha_)) sky = reg.coef_ logger.info("sky = {}".format(sky.shape)) sphere.set_visible_pixels(sky, scale) return sky.reshape(-1,1)
def setup_models(): models = [] models.append(tree.DecisionTreeRegressor()) original_params = { 'n_estimators': 1000, 'max_leaf_nodes': 17, 'max_depth': None, 'random_state': 2, 'min_samples_split': 5 } setting = {'learning_rate': 0.1, 'subsample': 1.0} params = dict(original_params) params.update(setting) gbr = ensemble.GradientBoostingRegressor(**params) models.append(gbr) svr = svm.SVR() models.append(svr) models.append(linear_model.LinearRegression()) models.append(linear_model.RidgeCV(alphas=[0.01, 0.1, 1.0, 10.0])) models.append(linear_model.LassoCV(alphas=[0.01, 0.1, 1.0, 10.0])) #models.append(linear_model.MultiTaskLassoCV(alphas=[0.01, 0.1, 1.0, 10.0])) models.append(linear_model.ElasticNetCV(alphas=[0.01, 0.1, 1.0, 10.0])) #models.append(linear_model.MultiTaskElasticNetCV(alphas=[0.01, 0.1, 1.0, 10.0])) models.append(linear_model.BayesianRidge()) models.append(linear_model.SGDRegressor()) models.append(linear_model.PassiveAggressiveRegressor()) models.append(linear_model.RANSACRegressor()) models.append(linear_model.TheilSenRegressor()) models.append(linear_model.HuberRegressor()) return models
def linear_regression(mdl, method=None): """ :param mdl: mdl of type RegressionModel :param method: regualrisation method to run """ if method is not None: mdl.model_name = mdl.model_name + "_" + method sc = StandardScaler() train_df_x = sc.fit_transform(mdl.train_x) test_df_x = sc.transform(mdl.test_x) if method is None: mdl.model = linear_model.LinearRegression() elif method == "LASSO": mdl.model = linear_model.LassoCV(max_iter=100) elif method == "RIDGE": mdl.model = linear_model.RidgeCV(max_iter=100) elif method == "ELASTIC": mdl.model = linear_model.ElasticNetCV() else: ValueError("Unknown Linear method") mdl.model.fit(X=train_df_x, y=mdl.train_y) evaluate_model(mdl, train_df_x, test_df_x)
def start_ltm(tup, taus, w=0.1, add_coh=False, use_cv=False, add_const=False, verbose=False, **kwargs): """Calculate the lifetime density map for given data. Parameters ---------- tup : datatuple tuple with wl, t, data taus : list of floats Used to build the basis vectors. w : float, optional Used sigma for calculating the , by default 0.1. add_coh : bool, optional If true, coherent contributions are added to the basis. By default False. use_cv : bool, optional Whether to use cross-validation, by default False add_const : bool, optional Whether to add an explict constant, by default False verbose : bool, optional Wheater to be verobse, by default False Returns ------- tuple of (linear_model, coefs, fit, alphas) The linear model is the used sklearn model. Coefs is the arrary of the coefficents, fit contains the resulting fit and alphas is an array of the applied alpha value when using cv. """ X = _make_base(tup, taus, w=w, add_const=add_const, add_coh=add_coh) if not use_cv: mod = lm.ElasticNet(**kwargs, l1_ratio=0.98) else: mod = lm.ElasticNetCV(**kwargs, l1_ratio=0.98) mod.fit_intercept = not add_const mod.warm_start = 1 coefs = np.empty((X.shape[1], tup.data.shape[1])) fit = np.empty_like(tup.data) alphas = np.empty(tup.data.shape[1]) for i in range(tup.data.shape[1]): if verbose: print(i, 'ha', end=';') mod.fit(X, tup.data[:, i]) coefs[:, i] = mod.coef_.copy() fit[:, i] = mod.predict(X) if hasattr(mod, 'alpha_'): alphas[i] = mod.alpha_ return mod, coefs, fit, alphas
def train_models(x, y): model1 = linear_model.Lars(n_nonzero_coefs=1) model2 = linear_model.ElasticNetCV() model3 = linear_model.BayesianRidge() model1.fit(x, y) model2.fit(x, y) model3.fit(x, y) return [model1, model2, model3]
def fit_elnet(X, y): print('performing Elastic Net regression') model = linear_model.ElasticNetCV( max_iter=1000000, l1_ratio=[.001, .1, .5, .7, .9, .95, .99, 1], fit_intercept=False) model.fit(X, y) return model
def test_sk_ElasticNetCV(): print("Testing sklearn, ElasticNetCV...") mod = linear_model.ElasticNetCV() X, y = iris_data mod.fit(X, y) docs = {'name': "ElasticNetCV test"} fv = X[0, :] upload(mod, fv, docs)
def image_lasso(self, vis_arr, sphere, alpha, l1_ratio, scale=False, use_cv=False): gamma = self.make_gamma(sphere) vis_aux = vis_to_real(vis_arr) # Save proj operator for Further Analysis. if False: fname = "l1_big_files.npz" np.savez_compressed(fname, gamma_re=gamma, vis_re=np.real(vis_arr), vis_im=np.imag(vis_arr)) logger.info("Operator file {} saved".format(fname)) logger.info("gamma = {}".format(gamma.shape)) logger.info("vis_aux = {}".format(vis_aux.shape)) n_s = sphere.pixels.shape[0] if not use_cv: reg = linear_model.ElasticNet( alpha=alpha / np.sqrt(n_s), l1_ratio=l1_ratio, tol=1e-6, max_iter=100000, positive=True, ) reg.fit(gamma, vis_aux) else: reg = linear_model.ElasticNetCV(l1_ratio=l1_ratio, cv=5, max_iter=10000, positive=True) reg.fit(gamma, vis_aux) logger.info("Cross Validation alpha: {} l1_ratio: {}".format( reg.alpha_, reg.l1_ratio)) sky = reg.coef_ logger.info("sky = {}".format(sky.shape)) residual = vis_aux - gamma @ sky residual_norm = np.linalg.norm(residual)**2 solution_norm = np.linalg.norm(sky)**2 score = reg.score(gamma, vis_aux) logger.info("Alpha: {}: Loss: {}: rnorm: {}: snorm: {}".format( alpha, score, residual_norm, solution_norm)) sphere.set_visible_pixels(sky, scale) return sky.reshape(-1, 1)
def predict(self, demand_fixture_data, params=None): ''' Predicts across index using fitted model params Parameters ---------- demand_fixture_data : pandas.DataFrame Formatted input data as returned by :code:`ModelDataFormatter.create_demand_fixture()` params : dict, default None Parameters found during model fit. If None, `.fit()` must be called before this method can be used. - :code:`X_design_matrix`: patsy design matrix used in formatting design matrix. - :code:`formula`: patsy formula used in creating design matrix. - :code:`coefficients`: ElasticNetCV coefficients. - :code:`intercept`: ElasticNetCV intercept. Returns ------- output : pandas.DataFrame Dataframe of energy values as given by the fitted model across the index given in :code:`demand_fixture_data`. ''' # needs only tempF if params is None: params = self.params model_data = demand_fixture_data.resample(self.model_freq).agg( {'tempF': np.mean}) model_data.loc[:, 'CDD'] = np.maximum(model_data.tempF - self.cooling_base_temp, 0.) model_data.loc[:, 'HDD'] = np.maximum(self.heating_base_temp - model_data.tempF, 0.) holiday_names = self._holidays_indexed(model_data.index) model_data.loc[:, 'holiday_name'] = holiday_names design_info = params["X_design_info"] (X,) = patsy.build_design_matrices([design_info], model_data, return_type='dataframe') model_obj = linear_model.ElasticNetCV(l1_ratio=self.l1_ratio, fit_intercept=False) model_obj.coef_ = params["coefficients"] model_obj.intercept_ = params["intercept"] predicted = pd.Series(model_obj.predict(X), index=X.index) # add NaNs back in predicted = predicted.reindex(model_data.index) return predicted
def fit_per_store(self, X: DataFromHDF, y: DataFromHDF, store, with_cv=False): store_train = X.get() sales = y.get_column() date = X.get_column('Date') assert date.shape[0] == store_train.shape[0] # assert store_train.shape == (len(store_train_idx), len(features)) logger.debug('Store {0:4d}: train shape {1}, sales shape{2}'.format( int(store), store_train.shape, sales.shape)) logger.debug(store_train.values.flags) cv = list( cv_generator(store_train, date, self.steps, predict_interval=self.predict_interval, step_by=self.step_by)) en = linear_model.ElasticNetCV(l1_ratio=self.l1_ratio, n_alphas=self.n_alphas, cv=cv, n_jobs=self.n_jobs, selection=self.selection) with warnings_to_log('ConvergenceWarning'): fit = en.fit(store_train, sales) self.models[store] = fit logger.debug('Store {0:4d}: alpha {alpha}, l1 ratio {l1_ratio}'.format( int(store), alpha=fit.alpha_, l1_ratio=fit.l1_ratio_)) logger.debug('Store {0:4d}: Best MSE {1}'.format( int(store), fit.mse_path_.ravel().min())) if with_cv: cv_errors = [] for fold in cv: cv_en = linear_model.ElasticNet(alpha=fit.alpha_, l1_ratio=fit.l1_ratio_) cv_train = store_train.iloc[fold[0], :] cv_train_sales = sales[fold[0]] cv_fit = cv_en.fit(cv_train, cv_train_sales) cv_test = store_train.iloc[fold[1], :] cv_test_sales = sales[fold[1]] cv_pred = cv_fit.predict(cv_test) cv_error = rmspe( np.exp(cv_pred) * cv_test['Open'], np.exp(cv_test_sales)) cv_errors.append(cv_error) cv_median_error = np.median(cv_errors) logger.debug('Store {0}. CV errors {1}'.format(store, cv_errors)) logger.debug('Store {0}. CV median error {1}'.format( store, cv_median_error))
def elasticnet(files, transparency2): data = pd.DataFrame(np.transpose(vectorize_all(files)), columns=files) target = np.ndarray.flatten(transparency2) scaler = StandardScaler() data_std = scaler.fit_transform(data) data_std = pd.DataFrame(data_std, columns=files) X_train, X_test, y_train, y_test = train_test_split(data_std, target, test_size=0.30) return linear_model.ElasticNetCV().fit(X_train, y_train)
def elasticnet_train(vid, test_size): data, valid_data, train_X, train_Y, test_X, test_Y, valid_X = get_data(vid, test_size) model = linear_model.ElasticNetCV(alphas=[0.0001, 0.0005, 0.001, 0.01, 0.1, 1, 10], l1_ratio=[.01, .1, .5, .9, .99], max_iter=5000).fit(train_X, train_Y) output = model.predict(test_X) print(str(vid) + ' ' + str(evalerror(output, test_Y))) predict = model.predict(valid_X) res = pd.DataFrame({'vehicle_id': valid_data['vehicle_id'], 'charge_energy': predict}) return res, test_Y, output
def run_en(exp, tfs_index): targets = np.transpose(resample(np.array(exp.T))) tfs = np.transpose(targets[tfs_index]) model = lm.ElasticNetCV(n_jobs=-1, cv=3) coef_mat = [] for i, j in enumerate(targets): #print(str(i) + ' of ' + str(len(targets.T))) model.fit(tfs, targets[i]) coef_mat.append(model.coef_) coef_mat = (np.array(coef_mat) > 0) * 1 return coef_mat
def linear_model_main(X_parameters, Y_parameters, predict_input): # Create ridge regression object regr = linear_model.ElasticNetCV(fit_intercept=True, normalize=False, l1_ratio=0.5, tol=0.01, cv=10, max_iter=1000) regr.fit(X_parameters, Y_parameters) predict_outcome = regr.predict(predict_input) return predict_outcome
def ElasticNetCV(l1_ratio=l1_ratio_default, fit_intercept=fit_intercept_default, **kwargs): """ Purpose: Model that has a mix of L1 and L2 regularization and chooses the lamda (called alpha) based on cross validation later when it is fitted """ return linear_model.ElasticNetCV(l1_ratio=l1_ratio, fit_intercept=fit_intercept, **kwargs)
def test_model_elastic_net_cv_regressor(self): model, X = fit_regression_model(linear_model.ElasticNetCV()) model_onnx = convert_sklearn( model, "scikit-learn elastic-net regression", [("input", FloatTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET) self.assertIsNotNone(model_onnx) dump_data_and_model(X, model, model_onnx, basename="SklearnElasticNetCV-Dec4")
def regressionMethods(independent, dependent, regType=0): if regType == 0: clf = linear_model.RidgeCV(alphas=[0.1, 1.0, 10.0]) elif regType == 1: clf = linear_model.LassoCV(alphas=[0.1, 1.0, 10.0]) elif regtype == 2: clf = linear_model.LassoLarsIC(criterion='bic') elif regType == 3: clf = linear_model.ElasticNetCV(alphas=[0.1, 1.0, 10.0]) clf.fit (independent, dependent) return clf
def cross_validated_estimators_tests(): models = [ linear_model.ElasticNetCV(), linear_model.LarsCV(), linear_model.LassoCV(), linear_model.LassoLarsCV(), linear_model.LogisticRegressionCV(), linear_model.OrthogonalMatchingPursuitCV(), linear_model.RidgeClassifierCV(), linear_model.RidgeCV() ] for model in models: cross_validated_estimators(model)
def fit(self, X, y): print "Fitting an ElasticNetCV regressor..." self.standardizer = preprocessing.StandardScaler() X = self.standardizer.fit_transform(X) cv = model_selection.ShuffleSplit(n_splits=5, test_size=0.2, random_state=0) self.clf = linear_model.ElasticNetCV( l1_ratio=[.1, .5, .7, .9, .95, .99, 1], cv=cv, n_jobs=7, normalize=True) self.clf.fit(X, y)
def learn_model(self, x, y, clf, lam=None): if (lam is None and self.initlam != -1): lam = self.initlam if (clf is not None): if (lam is not None): clf = linear_model.ElasticNetCV(max_iter=10000) clf.fit(x, y) lam = clf.alpha_ clf = linear_model.ElasticNet(alpha=lam, \ max_iter=10000, \ warm_start=True) clf.fit(x, y) return clf, lam
def get_new_clf(solver, folds=3, alphas=100): kf = KFold(n_splits=folds, shuffle=False) if "linear" == solver: clf = linear_model.LinearRegression(fit_intercept=False) if "ridge" == solver: alphas = np.arange(1 / alphas, 10 + 1 / alphas, 10 / alphas) clf = linear_model.RidgeCV(alphas=alphas, fit_intercept=False, cv=kf) elif "lasso" == solver: clf = linear_model.LassoCV(n_alphas=alphas, fit_intercept=False, cv=kf) elif "elastic" == solver: clf = linear_model.ElasticNetCV(n_alphas=alphas, fit_intercept=False, cv=kf) return clf