def Lars_reg(par=False): est = Lars() if par: est = Lars(**par) myDict = {} myDict['n_nonzero_coefs'] = [1, 5, 10, 50, 100, 200, 300] return est, myDict
def fit(self, X, y): assert not y is None, f'y:{y}' k = X.shape[1] self.k_ = k if self.max_k is None: if self.k_share is None: self.max_k = 500 else: self.max_k = int(k * self.k_share) if self.selector is None: self.selector = 'Lars' if self.selector == 'Lars': selector = Lars(fit_intercept=1, normalize=1, n_nonzero_coefs=self.max_k) elif self.selector == 'elastic-net': selector = ElasticNet(fit_intercept=True, selection='random', tol=0.001, max_iter=5000, warm_start=1, random_state=0) else: selector = self.selector selector.fit(X, y) self.col_select_ = np.arange(k)[np.abs(selector.coef_) > 0.0001] if self.col_select_.size < 1: self.col_select_ = np.arange(1) return self
def define_clfs_params(): clfs = { 'RIDGE': Ridge(), 'LINEAR': LinearRegression(), 'LASSO': Lasso(), 'ELNET': ElasticNet(), 'LARS': Lars(), 'LLARS': LassoLars(), 'BRIDGE': BayesianRidge(), 'PER': Perceptron() } grid = { 'RIDGE': { 'alpha': [0.01, 0.1, 1, 10] }, 'LINEAR': {}, 'LASSO': { 'alpha': [0.01, 0.1, 1, 10] }, 'ELNET': { 'alpha': [0.01, 0.1, 1, 10], 'l1_ratio': [0.25, 0.5, 0.75] }, 'LARS': {}, 'LLARS': {}, 'BRIDGE': {}, 'PER': {} } return clfs, grid
def _create_regressor(self): if self.mode == 'default': return Lars() if self.mode == 'lasso': return LassoLars(alpha=self.alpha) raise ValueError('Unexpected mode ' + self.mode + '. Expected "default" or "lasso"')
def get_hyperparameters_model(): param_dist = {} clf = Lars() model = {'lars': {'model': clf, 'param_distributions': param_dist}} return model
def runmodel_sklearn(chromosome, train, test, modelname, feature, label): model = { 'GBRT': GradientBoostingRegressor(max_depth=7, loss='huber'), #'xgb': xgb.XGBRegressor(nthread = 10,objective='reg:linear', n_estimators = 10, max_depth = 3), 'SVR': SVR(), 'Lasso': Lasso(), 'Linear': LinearRegression(), 'DecisionTree': DecisionTreeRegressor(max_depth=6), 'RandomForest': RandomForestRegressor(random_state=1, n_jobs=12), 'Ridge': Ridge(), 'AdaBoost': AdaBoostRegressor(), 'BayesianRidge': BayesianRidge(compute_score=True), 'KNN': KNeighborsRegressor(n_neighbors=12), 'ExtraTrees': ExtraTreesRegressor(random_state=1, n_jobs=12), 'SGD': SGDRegressor(loss='huber', penalty='elasticnet', random_state=1), 'PassiveAggressive': PassiveAggressiveRegressor(), 'ElasticNet': ElasticNet(), 'Lars': Lars(), #'lgm': lgb.LGBMRegressor(objective='regression',num_leaves=40, learning_rate=0.1,n_estimators=20, num_threads = 10), #'xgb_parallel': xgb.XGBRegressor(objective='reg:linear', n_estimators = 10, max_depth = 3, nthread = 4) } newtrain = make_dataframe(chromosome, train) if len(newtrain) == 0: return 1000000000 estimator = model[modelname] #return pearsonr(estimator.fit(newtrain[feature], newtrain[label]).predict(test[feature]), test[label])[0] estimator.fit(newtrain[feature], newtrain[label]) return np.sqrt( np.power(estimator.predict(test[feature]) - test[label], 2).mean()) / np.sqrt(np.power(test[label], 2).mean())
def get_regressors_variable(nmodels='all'): """ Returns one of or all variable selection regressors """ # 1. Elastic net lr1 = ElasticNet() # 2. Elastic net lr2 = Lars() # 3. Lasso lr3 = Lasso() # 4. LassoLars lr4 = LassoLars() # 5. OrthogonalMatchingPursuit lr5 = OrthogonalMatchingPursuit() if (nmodels == 'all'): models = [lr1, lr2, lr3, lr4, lr5] else: models = ['lr' + str(nmodels)] return models
def LarsRegressorGS(X_train, X_test, y_train, y_test): reg = Lars() grid_values = { 'n_nonzero_coefs': list(range(100, 500, 100)), } grid_reg = GridSearchCV( reg, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg.fit(X_train, y_train) reg = grid_reg.best_estimator_ reg.fit(X_train, y_train) y_pred = reg.predict(X_test) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred = reg.predict(X=X_train) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) best_params: dict = grid_reg.best_params_ saveBestParams(nameOfModel="LarsRegressorGS", best_params=best_params) logSave(nameOfModel="LarsRegressorGS", reg=reg, metrics=metrics, val_metrics=val_metrics)
def __init__(self, **kwargs): super().__init__() tune_grid = {} tune_distributions = {} tune_grid = { "fit_intercept": [True, False], "normalize": [True, False], "eps": [ 0.00001, 0.0001, 0.001, 0.01, 0.05, 0.0005, 0.005, 0.00005, 0.02, 0.007, 0.1, ], } tune_distributions = { "eps": UniformDistribution(0.00001, 0.1), } self.tune_grid = tune_grid self.tune_distributions = tune_distributions self.estimator = Lars(**kwargs)
def fit(self, X, y): assert not y is None, f'y:{y}' k = X.shape[1] self.k_ = k if self.max_k is None: if self.k_share is None: self.max_k = k+1 else: self.max_k = int(k * self.k_share) steps = [('scaler', StandardScaler())] if self.selector is None: self.selector = 'Lars' if self.selector == 'Lars': steps.append(('selector', Lars(fit_intercept=1, normalize=False, n_nonzero_coefs=self.max_k))) elif self.selector == 'elastic-net': steps.append(('selector', ElasticNet(fit_intercept=True, selection='random', tol=0.01, max_iter=500, warm_start=False, random_state=0, normalize=False))) else: steps.append(('selector', self.selector)) kshrinker = Pipeline(steps=steps) kshrinker.fit(X, y) coefs = kshrinker['selector'].coef_ self.col_select_ = np.arange(k)[np.abs(coefs) > 0.0001] if self.col_select_.size < 1: self.col_select_ = np.arange(1) return self
def __init__(self): # 알고리즘 이름 self._name = 'lars' # 기본 경로 self._f_path = os.path.abspath( os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir)) # 경고 메시지 삭제 warnings.filterwarnings('ignore') # 원본 데이터 로드 data = pd.read_csv(self._f_path + "/regression/resource/regression_sample.csv", sep=",", encoding="utf-8") # 학습 및 테스트 데이터 분리 self._x = (data["year"] <= 2017) self._y = (data["year"] >= 2018) # 학습 데이터 분리 self._x_train, self._y_train = self.preprocessing(data[self._x]) # 테스트 데이터 분리 self._x_test, self._y_test = self.preprocessing(data[self._y]) # 모델 선언 self._model = Lars(normalize=False) # 모델 학습 self._model.fit(self._x_train, self._y_train)
def get_classifier_scores(pred_labels, mixed_data, target_nmf_pp_data, target_data, use_lasso=True, n_nonzeros=10): aris = np.zeros(5) active_genes = range(mixed_data.shape[0]) include_inds = [] pred_lbls = np.unique(pred_labels) for p in pred_lbls: inds = np.where(pred_labels == p)[0] if inds.size >= 2: include_inds.extend(inds) if len(include_inds) > 2: if use_lasso: cls = OneVsRestClassifier(Lars(fit_intercept=True, normalize=True, copy_X=True, n_nonzero_coefs=50)).fit(mixed_data[:, include_inds].T.copy(), pred_labels[include_inds].copy()) # collect active indices active_genes = [] for e in range(len(cls.estimators_)): active_genes.extend(cls.estimators_[e].active_) active_genes = np.unique(active_genes) print active_genes print active_genes.shape else: cls = OneVsRestClassifier(LinearRegression(fit_intercept=True, normalize=False, copy_X=True, n_jobs=1)).fit( mixed_data[:, include_inds].T.copy(), pred_labels[include_inds].copy()) ret = cls.predict(target_data[:, include_inds].T.copy()) aris[4] = metrics.adjusted_rand_score(ret, pred_labels[include_inds].copy()) aris[0] = unsupervised_acc_kta(target_data[active_genes, :].copy(), pred_labels.copy(), kernel='linear') aris[1] = unsupervised_acc_silhouette(target_data[active_genes, :].copy(), pred_labels.copy(), metric='euclidean') aris[2] = unsupervised_acc_silhouette(target_data[active_genes, :].copy(), pred_labels.copy(), metric='pearson') aris[3] = unsupervised_acc_silhouette(target_data[active_genes, :].copy(), pred_labels.copy(), metric='spearman') return aris
def create_model_LARS(state_matrix, transcription_factors): regulators = {} for i in range(len(transcription_factors)): #Declaration for training set for the Target Gene X = [] y = [] for j in range(1, len(state_matrix)): X.append(state_matrix[j - 1].tolist()) y.append(state_matrix[j][i] - state_matrix[j - 1][i]) #Initialise the LARS Model lars = Lars() #Fit the training data into the Model lars.fit(X, y) #Extract the important features corresponding to a particular gene coefficients = lars.coef_ #Add to the dictionary regulators[transcription_factors[i]] = coefficients return regulators
def getModel(self, _params): return Lars( fit_intercept= _params['fit_intercept'], normalize= _params['normalize'], eps= _params['eps'], copy_X= _params['copy_X'], )
def connectWidgets(self): # LARS/ # LARSCV lars = Lars() self.fit_interceptCheckBox.setChecked(lars.fit_intercept) self.normalizeCheckBox.setChecked(lars.normalize) self.n_nonzero_coefsSpinBox.setValue(lars.n_nonzero_coefs)
def ml_lars(X_train, y_train, cv=5, beta=0.75, params=None): model = Lars() if not params: params = {'n_nonzero_coefs': [n for n in range(30, 150, 20)]} max_score = 0 best_t = 0 best_model = "" best_grid = "" for t in [0, 0.05, 0.1, 0.2, 0.25, 0.3, 0.45, 0.4, 0.45, 0.5, 0.6]: scorer = make_scorer(new_scorer, threshold=t, beta=beta) model_grid = GridSearchCV(model, param_grid=params, scoring=scorer, cv=cv, verbose=0, n_jobs=-1) model_grid.fit(X_train, y_train) if max_score < model_grid.best_score_: best_model = model_grid.best_estimator_ best_t = t best_grid = model_grid model_grid = best_grid best_model = best_grid.best_estimator_ print("Best Score : {}".format(model_grid.best_score_)) print("Threshold :", best_t) print("Best Params : {}".format(model_grid.best_params_))
def __init__(self, X, y, lar_params, nfolds=3, n_jobs=1, scoring=None, verbose=True): self._code="lar" if verbose: print ("Constructed Lars: " +self._code) AbstractRegressorPredictiveModel.__init__(self, "regressor", X, y, lar_params, nfolds, n_jobs, scoring, verbose) self._model = self.constructRegressor(Lars())
def connectWidgets(self): lars = Lars() self.fit_intercept_listWidget.setCurrentItem( self.fit_intercept_listWidget.findItems(str(lars.fit_intercept), QtCore.Qt.MatchExactly)[0]) self.normalize_list.setCurrentItem( self.normalize_list.findItems(str(lars.normalize), QtCore.Qt.MatchExactly)[0]) self.n_nonzero_coefsLineEdit.setText(str(lars.n_nonzero_coefs))
def run_Lars(single_time): save_folder = 'Lars' model = Lars() if single_time: comparison_algorithm.training_test_with_sklearnmodel( save_folder=save_folder, model=model) else: save_folder += '10t' comparison_algorithm.training_test_10times_sklearnmodel( save_folder=save_folder, model=model)
def _load_model(cls, fh): params = _parse_literal(fh) active = _parse_literal(fh) coef_shape = _parse_literal(fh) m = Lars().set_params(**params) m.intercept_ = 0.0 n = int(np.prod(coef_shape)) * 8 m.coef_ = np.fromstring(fh.read(n)).reshape(coef_shape) m.active_ = active return m
def run(self): params = { 'fit_intercept': self.fit_interceptCheckBox.isChecked(), 'verbose': False, 'normalize': self.normalizeCheckBox.isChecked(), 'precompute': 'auto', 'n_nonzero_coefs': self.n_nonzero_coefsSpinBox.value(), 'copy_X': True, 'fit_path': True } return params, self.getChangedValues(params, Lars())
def runLarsRegressor(self): lm = Lars(fit_intercept=True, normalize=True) print("Lars Regressor\n") lm.fit(self.m_X_train, self.m_y_train) predictY = lm.predict(self.m_X_test) score = lm.score(self.m_X_test, self.m_y_test) predictTraingY = lm.predict(self.m_X_train) self.displayPredictPlot(predictY) self.displayResidualPlot(predictY, predictTraingY) self.dispalyModelResult(lm, predictY, score)
def get_models(models=dict()): models['lr'] = LinearRegression() models['lasso'] = Lasso() models['ridge'] = Ridge() models['en'] = ElasticNet() models['huber'] = HuberRegressor() models['lars'] = Lars() models['llars'] = LassoLars() models['pa'] = PassiveAggressiveRegressor(max_iter=1000, tol=1e-3) models['ranscac'] = RANSACRegressor() models['sgd'] = SGDRegressor(max_iter=1000, tol=1e-3) print('Defined %d models' % len(models)) return models
def load_Least_Angle_Regression(): ''' Load Least Angle Regression and gives a name for the output files. Parameters : None Returns : model_name : (str) Name of the model for output file. regr : (REgressor) Longitude and Latitude Regressor ''' model_name = "Least Angle Regression" regr = Lars() return model_name, regr
def run(self, X, y=None): """ Fits filter Parameters ---------- X : numpy array, shape (n_samples, n_features) The training input samples. y : numpy array, optional The target values (ignored). Returns ---------- W : array-like, shape (n_features, k) Feature weight matrix. See Also -------- examples -------- from ITMO_FS.filters.sparse import MCFS from sklearn.datasets import make_classification import numpy as np dataset = make_classification(n_samples=100, n_features=20, n_informative=4, n_redundant=0, shuffle=False) data, target = np.array(dataset[0]), np.array(dataset[1]) model = MCFS(d=5, k=2, scheme='heat') weights = model.run(data, target) print(model.feature_ranking(weights)) """ n_samples, n_features = X.shape graph = NearestNeighbors(n_neighbors=self.p + 1, algorithm='ball_tree').fit(X).kneighbors_graph(X).toarray() graph = graph + graph.T indices = [[(i, j) for j in range(n_samples)] for i in range(n_samples)] func = np.vectorize(lambda xy: graph[xy[0]][xy[1]] * self.scheme(X[xy[0]], X[xy[1]]), signature='(1)->()') W = func(indices) D = np.diag(W.sum(axis=0)) L = D - W eigvals, Y = eigh(type=1, a=L, b=D, eigvals=(0, self.k - 1)) weights = np.zeros((n_features, self.k)) for i in range(self.k): clf = Lars(n_nonzero_coefs=self.d) clf.fit(X, Y[:, i]) weights[:, i] = clf.coef_ return weights
def choose_regression_algorithm(method = "LR"): if method == "LR": regression_algorithm = LinearRegression() elif method == "Lasso": regression_algorithm = Lasso() elif method == "Ridge": regression_algorithm = Ridge() elif method == "HR": regression_algorithm = HuberRegressor() elif method == "SVR": regression_algorithm = SVR() elif method == "LL": regression_algorithm = LassoLars() elif method == "ARDR": regression_algorithm = ARDRegression() elif method == "BR": regression_algorithm = BayesianRidge() elif method == "ElasticNet": regression_algorithm = ElasticNet() elif method == "Lars": regression_algorithm = Lars() elif method == "PA": regression_algorithm = PassiveAggressiveRegressor() elif method == "RANSAC": regression_algorithm = RANSACRegressor() elif method == "TS": regression_algorithm = TheilSenRegressor() elif method == "LP": regression_algorithm = lars_path() elif method == "RR": regression_algorithm = ridge_regression() else: print("You haven't chosen a valide classifier!!!") print("method used:\t", method) return regression_algorithm
def get_model(PARAMS): '''Get model according to parameters''' model_dict = { 'LinearRegression': LinearRegression(), 'Ridge': Ridge(), 'Lars': Lars(), 'ARDRegression': ARDRegression() } if not model_dict.get(PARAMS['model_name']): LOG.exception('Not supported model!') exit(1) model = model_dict[PARAMS['model_name']] model.normalize = bool(PARAMS['normalize']) return model
def LarsRegressor(X_train, X_test, y_train, y_test): reg = Lars() reg.fit(X_train, y_train) y_pred = reg.predict(X_test) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred = reg.predict(X=X_train) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) logSave(nameOfModel="LarsRegressor", reg=reg, metrics=metrics, val_metrics=val_metrics)
def get_models_multioutput(models=dict()): # linear models models['lr'] = MultiOutputRegressor(LinearRegression()) alpha = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] for a in alpha: models['lasso-' + str(a)] = MultiOutputRegressor(Lasso(alpha=a)) for a in alpha: models['ridge-' + str(a)] = MultiOutputRegressor(Ridge(alpha=a)) for a1 in alpha: for a2 in alpha: name = 'en-' + str(a1) + '-' + str(a2) models[name] = MultiOutputRegressor(ElasticNet(a1, a2)) models['huber'] = MultiOutputRegressor(HuberRegressor()) models['lars'] = MultiOutputRegressor(Lars()) models['llars'] = MultiOutputRegressor(LassoLars()) models['pa'] = MultiOutputRegressor( PassiveAggressiveRegressor(max_iter=1000, tol=1e-3)) models['ranscac'] = MultiOutputRegressor(RANSACRegressor()) models['sgd'] = MultiOutputRegressor(SGDRegressor(max_iter=1000, tol=1e-3)) models['theil'] = MultiOutputRegressor(TheilSenRegressor()) # non-linear models n_neighbors = range(1, 21) for k in n_neighbors: models['knn-' + str(k)] = MultiOutputRegressor( KNeighborsRegressor(n_neighbors=k)) models['cart'] = MultiOutputRegressor(DecisionTreeRegressor()) models['extra'] = MultiOutputRegressor(ExtraTreeRegressor()) models['svml'] = MultiOutputRegressor(SVR(kernel='linear')) models['svmp'] = MultiOutputRegressor(SVR(kernel='poly')) c_values = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] for c in c_values: models['svmr' + str(c)] = SVR(C=c) # ensemble models n_trees = 100 models['ada'] = MultiOutputRegressor( AdaBoostRegressor(n_estimators=n_trees)) models['bag'] = MultiOutputRegressor( BaggingRegressor(n_estimators=n_trees)) models['rf'] = MultiOutputRegressor( RandomForestRegressor(n_estimators=n_trees)) models['et'] = MultiOutputRegressor( ExtraTreesRegressor(n_estimators=n_trees)) models['gbm'] = MultiOutputRegressor( GradientBoostingRegressor(n_estimators=n_trees)) print('Defined %d models' % len(models)) return models
def regress(X_train_imp, y_train, X_test, y_test): import math from sklearn.metrics import mean_squared_error from sklearn.linear_model import LinearRegression, Ridge, Lasso from sklearn.linear_model import LassoLars, BayesianRidge from sklearn.linear_model import ElasticNet, Lars # Next line is "dictionary" data structure from class 1 runs = [] # https://scikit-learn.org/stable/modules/linear_model.html # 1.1.1 LinearRegression # 1.1.2 Ridge # 1.1.3 Lasso # 1.1.8 LassoLars # 1.1.10 BayesianRidge # 1.1.5 ElasticNet # 1.1.7 Lars d_models = { "Linear_Regression": LinearRegression(), "Ridge": Ridge(alpha=0.5), "Lasso": Lasso(alpha=0.1), "LassoLars": LassoLars(alpha=0.1), "BayesianRidge": BayesianRidge(), "ElasticNet": ElasticNet(alpha=0.5, l1_ratio=0.7), "Lars": Lars(n_nonzero_coefs=3) } models_list = d_models.keys() print("---- models lists ---") print(models_list, "\n") for regression_model in models_list: regressor = d_models[regression_model] regressor.fit(X_train_imp, y_train) y_predict = regressor.predict(X_test) regression_model_mse = mean_squared_error(y_predict, y_test) sqrt_regression_model_mse = math.sqrt(regression_model_mse) print(regression_model, "\nRMSE:", sqrt_regression_model_mse) runs.append(sqrt_regression_model_mse) print(regressor.coef_) print(regressor.intercept_) print("=======") return runs