def fit(self, X, y): assert not y is None, f'y:{y}' k = X.shape[1] self.k_ = k if self.max_k is None: if self.k_share is None: self.max_k = 500 else: self.max_k = int(k * self.k_share) if self.selector is None: self.selector = 'Lars' if self.selector == 'Lars': selector = Lars(fit_intercept=1, normalize=1, n_nonzero_coefs=self.max_k) elif self.selector == 'elastic-net': selector = ElasticNet(fit_intercept=True, selection='random', tol=0.001, max_iter=5000, warm_start=1, random_state=0) else: selector = self.selector selector.fit(X, y) self.col_select_ = np.arange(k)[np.abs(selector.coef_) > 0.0001] if self.col_select_.size < 1: self.col_select_ = np.arange(1) return self
def create_model_LARS(state_matrix, transcription_factors): regulators = {} for i in range(len(transcription_factors)): #Declaration for training set for the Target Gene X = [] y = [] for j in range(1, len(state_matrix)): X.append(state_matrix[j - 1].tolist()) y.append(state_matrix[j][i] - state_matrix[j - 1][i]) #Initialise the LARS Model lars = Lars() #Fit the training data into the Model lars.fit(X, y) #Extract the important features corresponding to a particular gene coefficients = lars.coef_ #Add to the dictionary regulators[transcription_factors[i]] = coefficients return regulators
def Lars_reg(par=False): est = Lars() if par: est = Lars(**par) myDict = {} myDict['n_nonzero_coefs'] = [1, 5, 10, 50, 100, 200, 300] return est, myDict
def __init__(self): # 알고리즘 이름 self._name = 'lars' # 기본 경로 self._f_path = os.path.abspath( os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir)) # 경고 메시지 삭제 warnings.filterwarnings('ignore') # 원본 데이터 로드 data = pd.read_csv(self._f_path + "/regression/resource/regression_sample.csv", sep=",", encoding="utf-8") # 학습 및 테스트 데이터 분리 self._x = (data["year"] <= 2017) self._y = (data["year"] >= 2018) # 학습 데이터 분리 self._x_train, self._y_train = self.preprocessing(data[self._x]) # 테스트 데이터 분리 self._x_test, self._y_test = self.preprocessing(data[self._y]) # 모델 선언 self._model = Lars(normalize=False) # 모델 학습 self._model.fit(self._x_train, self._y_train)
def _load_model(cls, fh): params = _parse_literal(fh) active = _parse_literal(fh) coef_shape = _parse_literal(fh) m = Lars().set_params(**params) m.intercept_ = 0.0 n = int(np.prod(coef_shape)) * 8 m.coef_ = np.fromstring(fh.read(n)).reshape(coef_shape) m.active_ = active return m
def run(self, X, y=None): """ Fits filter Parameters ---------- X : numpy array, shape (n_samples, n_features) The training input samples. y : numpy array, optional The target values (ignored). Returns ---------- W : array-like, shape (n_features, k) Feature weight matrix. See Also -------- examples -------- from ITMO_FS.filters.sparse import MCFS from sklearn.datasets import make_classification import numpy as np dataset = make_classification(n_samples=100, n_features=20, n_informative=4, n_redundant=0, shuffle=False) data, target = np.array(dataset[0]), np.array(dataset[1]) model = MCFS(d=5, k=2, scheme='heat') weights = model.run(data, target) print(model.feature_ranking(weights)) """ n_samples, n_features = X.shape graph = NearestNeighbors(n_neighbors=self.p + 1, algorithm='ball_tree').fit(X).kneighbors_graph(X).toarray() graph = graph + graph.T indices = [[(i, j) for j in range(n_samples)] for i in range(n_samples)] func = np.vectorize(lambda xy: graph[xy[0]][xy[1]] * self.scheme(X[xy[0]], X[xy[1]]), signature='(1)->()') W = func(indices) D = np.diag(W.sum(axis=0)) L = D - W eigvals, Y = eigh(type=1, a=L, b=D, eigvals=(0, self.k - 1)) weights = np.zeros((n_features, self.k)) for i in range(self.k): clf = Lars(n_nonzero_coefs=self.d) clf.fit(X, Y[:, i]) weights[:, i] = clf.coef_ return weights
class _LarsImpl: def __init__(self, **hyperparams): self._hyperparams = hyperparams self._wrapped_model = Op(**self._hyperparams) def fit(self, X, y=None): if y is not None: self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def predict(self, X): return self._wrapped_model.predict(X)
def define_clfs_params(): clfs = { 'RIDGE': Ridge(), 'LINEAR': LinearRegression(), 'LASSO': Lasso(), 'ELNET': ElasticNet(), 'LARS': Lars(), 'LLARS': LassoLars(), 'BRIDGE': BayesianRidge(), 'PER': Perceptron() } grid = { 'RIDGE': { 'alpha': [0.01, 0.1, 1, 10] }, 'LINEAR': {}, 'LASSO': { 'alpha': [0.01, 0.1, 1, 10] }, 'ELNET': { 'alpha': [0.01, 0.1, 1, 10], 'l1_ratio': [0.25, 0.5, 0.75] }, 'LARS': {}, 'LLARS': {}, 'BRIDGE': {}, 'PER': {} } return clfs, grid
def connectWidgets(self): # LARS/ # LARSCV lars = Lars() self.fit_interceptCheckBox.setChecked(lars.fit_intercept) self.normalizeCheckBox.setChecked(lars.normalize) self.n_nonzero_coefsSpinBox.setValue(lars.n_nonzero_coefs)
def _create_regressor(self): if self.mode == 'default': return Lars() if self.mode == 'lasso': return LassoLars(alpha=self.alpha) raise ValueError('Unexpected mode ' + self.mode + '. Expected "default" or "lasso"')
def get_regressors_variable(nmodels='all'): """ Returns one of or all variable selection regressors """ # 1. Elastic net lr1 = ElasticNet() # 2. Elastic net lr2 = Lars() # 3. Lasso lr3 = Lasso() # 4. LassoLars lr4 = LassoLars() # 5. OrthogonalMatchingPursuit lr5 = OrthogonalMatchingPursuit() if (nmodels == 'all'): models = [lr1, lr2, lr3, lr4, lr5] else: models = ['lr' + str(nmodels)] return models
def fit(self, X, y): assert not y is None, f'y:{y}' k = X.shape[1] self.k_ = k if self.max_k is None: if self.k_share is None: self.max_k = k+1 else: self.max_k = int(k * self.k_share) steps = [('scaler', StandardScaler())] if self.selector is None: self.selector = 'Lars' if self.selector == 'Lars': steps.append(('selector', Lars(fit_intercept=1, normalize=False, n_nonzero_coefs=self.max_k))) elif self.selector == 'elastic-net': steps.append(('selector', ElasticNet(fit_intercept=True, selection='random', tol=0.01, max_iter=500, warm_start=False, random_state=0, normalize=False))) else: steps.append(('selector', self.selector)) kshrinker = Pipeline(steps=steps) kshrinker.fit(X, y) coefs = kshrinker['selector'].coef_ self.col_select_ = np.arange(k)[np.abs(coefs) > 0.0001] if self.col_select_.size < 1: self.col_select_ = np.arange(1) return self
def runmodel_sklearn(chromosome, train, test, modelname, feature, label): model = { 'GBRT': GradientBoostingRegressor(max_depth=7, loss='huber'), #'xgb': xgb.XGBRegressor(nthread = 10,objective='reg:linear', n_estimators = 10, max_depth = 3), 'SVR': SVR(), 'Lasso': Lasso(), 'Linear': LinearRegression(), 'DecisionTree': DecisionTreeRegressor(max_depth=6), 'RandomForest': RandomForestRegressor(random_state=1, n_jobs=12), 'Ridge': Ridge(), 'AdaBoost': AdaBoostRegressor(), 'BayesianRidge': BayesianRidge(compute_score=True), 'KNN': KNeighborsRegressor(n_neighbors=12), 'ExtraTrees': ExtraTreesRegressor(random_state=1, n_jobs=12), 'SGD': SGDRegressor(loss='huber', penalty='elasticnet', random_state=1), 'PassiveAggressive': PassiveAggressiveRegressor(), 'ElasticNet': ElasticNet(), 'Lars': Lars(), #'lgm': lgb.LGBMRegressor(objective='regression',num_leaves=40, learning_rate=0.1,n_estimators=20, num_threads = 10), #'xgb_parallel': xgb.XGBRegressor(objective='reg:linear', n_estimators = 10, max_depth = 3, nthread = 4) } newtrain = make_dataframe(chromosome, train) if len(newtrain) == 0: return 1000000000 estimator = model[modelname] #return pearsonr(estimator.fit(newtrain[feature], newtrain[label]).predict(test[feature]), test[label])[0] estimator.fit(newtrain[feature], newtrain[label]) return np.sqrt( np.power(estimator.predict(test[feature]) - test[label], 2).mean()) / np.sqrt(np.power(test[label], 2).mean())
def __init__(self, **kwargs): super().__init__() tune_grid = {} tune_distributions = {} tune_grid = { "fit_intercept": [True, False], "normalize": [True, False], "eps": [ 0.00001, 0.0001, 0.001, 0.01, 0.05, 0.0005, 0.005, 0.00005, 0.02, 0.007, 0.1, ], } tune_distributions = { "eps": UniformDistribution(0.00001, 0.1), } self.tune_grid = tune_grid self.tune_distributions = tune_distributions self.estimator = Lars(**kwargs)
def get_hyperparameters_model(): param_dist = {} clf = Lars() model = {'lars': {'model': clf, 'param_distributions': param_dist}} return model
def getModel(self, _params): return Lars( fit_intercept= _params['fit_intercept'], normalize= _params['normalize'], eps= _params['eps'], copy_X= _params['copy_X'], )
def get_classifier_scores(pred_labels, mixed_data, target_nmf_pp_data, target_data, use_lasso=True, n_nonzeros=10): aris = np.zeros(5) active_genes = range(mixed_data.shape[0]) include_inds = [] pred_lbls = np.unique(pred_labels) for p in pred_lbls: inds = np.where(pred_labels == p)[0] if inds.size >= 2: include_inds.extend(inds) if len(include_inds) > 2: if use_lasso: cls = OneVsRestClassifier(Lars(fit_intercept=True, normalize=True, copy_X=True, n_nonzero_coefs=50)).fit(mixed_data[:, include_inds].T.copy(), pred_labels[include_inds].copy()) # collect active indices active_genes = [] for e in range(len(cls.estimators_)): active_genes.extend(cls.estimators_[e].active_) active_genes = np.unique(active_genes) print active_genes print active_genes.shape else: cls = OneVsRestClassifier(LinearRegression(fit_intercept=True, normalize=False, copy_X=True, n_jobs=1)).fit( mixed_data[:, include_inds].T.copy(), pred_labels[include_inds].copy()) ret = cls.predict(target_data[:, include_inds].T.copy()) aris[4] = metrics.adjusted_rand_score(ret, pred_labels[include_inds].copy()) aris[0] = unsupervised_acc_kta(target_data[active_genes, :].copy(), pred_labels.copy(), kernel='linear') aris[1] = unsupervised_acc_silhouette(target_data[active_genes, :].copy(), pred_labels.copy(), metric='euclidean') aris[2] = unsupervised_acc_silhouette(target_data[active_genes, :].copy(), pred_labels.copy(), metric='pearson') aris[3] = unsupervised_acc_silhouette(target_data[active_genes, :].copy(), pred_labels.copy(), metric='spearman') return aris
def ml_lars(X_train, y_train, cv=5, beta=0.75, params=None): model = Lars() if not params: params = {'n_nonzero_coefs': [n for n in range(30, 150, 20)]} max_score = 0 best_t = 0 best_model = "" best_grid = "" for t in [0, 0.05, 0.1, 0.2, 0.25, 0.3, 0.45, 0.4, 0.45, 0.5, 0.6]: scorer = make_scorer(new_scorer, threshold=t, beta=beta) model_grid = GridSearchCV(model, param_grid=params, scoring=scorer, cv=cv, verbose=0, n_jobs=-1) model_grid.fit(X_train, y_train) if max_score < model_grid.best_score_: best_model = model_grid.best_estimator_ best_t = t best_grid = model_grid model_grid = best_grid best_model = best_grid.best_estimator_ print("Best Score : {}".format(model_grid.best_score_)) print("Threshold :", best_t) print("Best Params : {}".format(model_grid.best_params_))
def __init__(self, X, y, lar_params, nfolds=3, n_jobs=1, scoring=None, verbose=True): self._code="lar" if verbose: print ("Constructed Lars: " +self._code) AbstractRegressorPredictiveModel.__init__(self, "regressor", X, y, lar_params, nfolds, n_jobs, scoring, verbose) self._model = self.constructRegressor(Lars())
def connectWidgets(self): lars = Lars() self.fit_intercept_listWidget.setCurrentItem( self.fit_intercept_listWidget.findItems(str(lars.fit_intercept), QtCore.Qt.MatchExactly)[0]) self.normalize_list.setCurrentItem( self.normalize_list.findItems(str(lars.normalize), QtCore.Qt.MatchExactly)[0]) self.n_nonzero_coefsLineEdit.setText(str(lars.n_nonzero_coefs))
def perform_LARS(normalized_matrix,genes): #Number of Genes no_genes = len(genes) #Dictionary for top regulators for each gene regulators = {} for i in range(0,no_genes): #Current Gene for which the Top Regulators are being found current_y = normalized_matrix[:,i] #Create a copy of the matrix temp_matrix = normalized_matrix.copy() #Remove the current feature temp_matrix = np.delete(temp_matrix,i,axis=1) #Computation of the coefficients after training with Least Angle Regression Method coefficients = Lars() #Fit the Model coefficients.fit(temp_matrix,current_y) #Coefficient values coeff_values = coefficients.coef_ #Copy the genes into a temporary list gene_copy = list(genes) #Remove the Gene to create the appropriate indexes gene_copy.remove(genes[i]) #Perform Stability Selection to get an effective rank of the top regulators rank_dict_score = stability_selection(temp_matrix,genes,2000,current_y,gene_copy) #Top Regulators top_regulators = find_top_regulators(rank_dict_score) #Append to regulators regulators[genes[i]] = top_regulators return regulators
def run_Lars(single_time): save_folder = 'Lars' model = Lars() if single_time: comparison_algorithm.training_test_with_sklearnmodel( save_folder=save_folder, model=model) else: save_folder += '10t' comparison_algorithm.training_test_10times_sklearnmodel( save_folder=save_folder, model=model)
def run(self): params = { 'fit_intercept': self.fit_interceptCheckBox.isChecked(), 'verbose': False, 'normalize': self.normalizeCheckBox.isChecked(), 'precompute': 'auto', 'n_nonzero_coefs': self.n_nonzero_coefsSpinBox.value(), 'copy_X': True, 'fit_path': True } return params, self.getChangedValues(params, Lars())
def LarsRegressorGS(X_train, X_test, y_train, y_test): reg = Lars() grid_values = { 'n_nonzero_coefs': list(range(100, 500, 100)), } grid_reg = GridSearchCV( reg, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg.fit(X_train, y_train) reg = grid_reg.best_estimator_ reg.fit(X_train, y_train) y_pred = reg.predict(X_test) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred = reg.predict(X=X_train) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) best_params: dict = grid_reg.best_params_ saveBestParams(nameOfModel="LarsRegressorGS", best_params=best_params) logSave(nameOfModel="LarsRegressorGS", reg=reg, metrics=metrics, val_metrics=val_metrics)
def load_Least_Angle_Regression(): ''' Load Least Angle Regression and gives a name for the output files. Parameters : None Returns : model_name : (str) Name of the model for output file. regr : (REgressor) Longitude and Latitude Regressor ''' model_name = "Least Angle Regression" regr = Lars() return model_name, regr
def get_models(models=dict()): models['lr'] = LinearRegression() models['lasso'] = Lasso() models['ridge'] = Ridge() models['en'] = ElasticNet() models['huber'] = HuberRegressor() models['lars'] = Lars() models['llars'] = LassoLars() models['pa'] = PassiveAggressiveRegressor(max_iter=1000, tol=1e-3) models['ranscac'] = RANSACRegressor() models['sgd'] = SGDRegressor(max_iter=1000, tol=1e-3) print('Defined %d models' % len(models)) return models
def choose_regression_algorithm(method = "LR"): if method == "LR": regression_algorithm = LinearRegression() elif method == "Lasso": regression_algorithm = Lasso() elif method == "Ridge": regression_algorithm = Ridge() elif method == "HR": regression_algorithm = HuberRegressor() elif method == "SVR": regression_algorithm = SVR() elif method == "LL": regression_algorithm = LassoLars() elif method == "ARDR": regression_algorithm = ARDRegression() elif method == "BR": regression_algorithm = BayesianRidge() elif method == "ElasticNet": regression_algorithm = ElasticNet() elif method == "Lars": regression_algorithm = Lars() elif method == "PA": regression_algorithm = PassiveAggressiveRegressor() elif method == "RANSAC": regression_algorithm = RANSACRegressor() elif method == "TS": regression_algorithm = TheilSenRegressor() elif method == "LP": regression_algorithm = lars_path() elif method == "RR": regression_algorithm = ridge_regression() else: print("You haven't chosen a valide classifier!!!") print("method used:\t", method) return regression_algorithm
def runLarsRegressor(self): lm = Lars(fit_intercept=True, normalize=True) print("Lars Regressor\n") lm.fit(self.m_X_train, self.m_y_train) predictY = lm.predict(self.m_X_test) score = lm.score(self.m_X_test, self.m_y_test) predictTraingY = lm.predict(self.m_X_train) self.displayPredictPlot(predictY) self.displayResidualPlot(predictY, predictTraingY) self.dispalyModelResult(lm, predictY, score)
def create_model_LARS(state_matrix, transcription_factors): regulators = {} for i in range(0, len(transcription_factors)): #Create the training set X = [] y = [] for j in range(1, len(state_matrix)): #Append the expression level of the previous step X.append(state_matrix[j - 1].tolist()) #The output value is the difference / rate of change of expression y.append(state_matrix[j][i] - state_matrix[j - 1][i]) #Copy the list of Transcription Factors tf_copy = list(transcription_factors) #Remove the current transcription factor tf_copy.remove(tf_copy[i]) #Remove the corresponding column from the training set [expression.remove(expression[i]) for expression in X] """ Feature Selection using Least Angle Regression """ #Initialise the model using Least Angle Regression lars = Lars() #Fit the training data into the Model lars.fit(X, y) #Extract the important features corresponding to a particular gene coefficients = lars.coef_ #Regulators for the Network regulators[transcription_factors[i]] = coefficients return regulators
def main(): from_root = "~/Documents/School/ComputerScience/ahcompsci/Scikit-Learning-StanleyWei/scikit-utkproject/dataset/fiftytwo" path = "dataset/whitemensmall/" dirs = os.listdir(path) main_df = add_images_from_dirs(dirs, path) train_images, test_images = train_test_split(main_df.loc[:, "image"], main_df.loc[:, "gender"]) # train_df = train_df.loc[train_df['ethnicity'] == "0"] # test_df = test_df.loc[test_df['ethnicity'] == "0"] train_x = flatten_image_df(train_images) test_x = flatten_image_df(test_images) clf = Lars() # train_x = np.array(train_df.loc[:, "image"]) # x_train = train_x.flatten().reshape(len(train_df), -1) clf.fit(train_x, train_df.loc[:, "age"].to_numpy()) coefficients = clf.coef_ # print(coefficients) coefficients_array = np.array(coefficients).reshape( len(train_df.image[0]), -1) # print(coefficients_array) # heatmap = plt.imshow(coefficients_array, cmap = "hot", interpolation = "nearest") coefficients_abs = coefficients for i in range(len(coefficients_abs)): coefficients_abs[i] = abs(coefficients_abs[i]) coefficients_array_abs = np.array(coefficients_abs).reshape( len(train_df.image[0]), -1) heatmap = plt.imshow(coefficients_array_abs, cmap="hot", interpolation="nearest") # heatmap_extremes = plt.imshow(coefficients_array_abs, vmax = 0.025, cmap = "hot", interpolation = "nearest") plt.colorbar(heatmap) # plt.colorbar(heatmap_extremes) plt.show()
# ('ppru', 'ppr_submission_user.csv', 'ppr_fitted_user.csv'), # ('pprg', 'ppr_submission_global.csv', 'ppr_fitted_global.csv'), ] fitted = pd.DataFrame(index=review_data.index) submission = pd.DataFrame(index=review_data_final.index) for name, sub_name, fit_name in blend_inputs: f_df = pd.read_csv(os.path.join('..', fit_name)) f_df.index = review_data.index fitted[name] = f_df['stars'] s_df = pd.read_csv(os.path.join('..', sub_name)) s_df.index = review_data_final.index submission[name] = s_df['stars'] gbr = GradientBoostingRegressor(max_depth=3,verbose=2) gbr.fit(fitted, review_data['stars']) pred = gbr.predict(submission) pd.DataFrame({'review_id' : submission.index, 'stars' : np.maximum(1, np.minimum(5, pred))}).to_csv('../gbr_submission.csv', index=False) lar = Lars(fit_intercept=True, verbose=2, normalize=True, fit_path=True) lar.fit(fitted, review_data['stars']) pred = lar.predict(submission) pd.DataFrame({'review_id' : submission.index, 'stars' : np.maximum(1, np.minimum(5, pred))}).to_csv('../lar_submission.csv', index=False) ridge = Ridge() ridge.fit(fitted, review_data['stars']) pred = ridge.predict(submission) pd.DataFrame({'review_id' : submission.index, 'stars' : np.maximum(1, np.minimum(5, pred))}).to_csv('../ridge_submission.csv', index=False) ## TODO: blend based on size of rating neighborhood
def larsLearn(kap): lars = Lars(n_nonzero_coefs=kap,fit_intercept=False) lars.fit(X_train,y_train) return lars
lasso_beta = np.array([lasso.coef_]).T lasso_gamma = np.array([[0. if abs(x) < 1e-100 else 1. for x in lasso.coef_]]).T # P = lambda X: lasso.predict(X) lasso_predictor = PredictorWrapper.PredictorWrapper(lasso_beta,lasso_gamma,lasso.predict) dill.dump(lasso_predictor,open('%sLASSO.p' % logDir,'wb')) with open(logFile,'a') as f: f.write('Lasso c: %15.10f alpha: %15.10f\n' % (1./(2.* X_tr.shape[0]), optLam)) ############## ## LARS_SET ## ############## kappa = [2,4,10] for k in kappa: lars = Lars(n_nonzero_coefs=k,fit_intercept=False) lars.fit(X_tr,y_tr) lars_beta = np.array([lars.coef_]).T lars_gamma = np.zeros((X_tr.shape[1],1)) lars_gamma[lars.active_] = 1. lars_predictor = PredictorWrapper.PredictorWrapper(lars_beta,lars_gamma,lars.predict) dill.dump(lars_predictor,open('%sLARS_%02d.p' % (logDir,k),'wb')) ############## ## LARS_OPT ## ############## larsKappas = np.linspace(0,40,41,dtype=int) def larsEval(learned): learned_yhat = np.array([learned.predict(X_val)]).T learned_mse = sum((y_val - learned_yhat) ** 2)[0]
model = sm.OLS(housing['labels'], housing['data']) results = model.fit() print results.summary() # Part B preds_train = lin.predict(housing['data']) preds_test = lin.predict(housing['testdata']) ave_sq_loss_train = ((housing['labels'] - preds_train) ** 2).sum()/len(housing['data'][:,1]) ave_sq_loss_test = ((housing['testlabels'] - preds_test) ** 2).sum()/len(housing['testdata'][:,1]) print ave_sq_loss_train print ave_sq_loss_test # Part C housing['data'] = housing['data'][:,1:14] housing['testdata'] = housing['testdata'][:,1:14] from sklearn.linear_model import Lars reduced = Lars(fit_intercept = True, n_nonzero_coefs = 3) reduced.fit(housing['data'], housing['labels']) print reduced.intercept_ print reduced.coef_
new_reg_data = reg_data[:, mask] print new_reg_data.shape #(200, 11) #Taking a more fundamental基本的 approach to regularization正则化 with LARS #Least-angle regression (LARS) is a regression technique that is well suited for #high-dimensional problems, that is, p >> n, where p denotes the columns or features #and n is the number of samples. from sklearn.datasets import make_regression reg_data, reg_target = make_regression(n_samples=200, n_features=500, n_informative=10, noise=2) from sklearn.linear_model import Lars lars = Lars(n_nonzero_coefs=10) lars.fit(reg_data, reg_target) print np.sum(lars.coef_ != 0) #10 train_n = 100 lars_12 = Lars(n_nonzero_coefs=12) lars_12.fit(reg_data[:train_n], reg_target[:train_n]) lars_500 = Lars() # it's 500 by default lars_500.fit(reg_data[:train_n], reg_target[:train_n]); #Now, to see how well each feature fit the unknown data, do the following: np.mean(np.power(reg_target[train_n:] - lars_12.predict(reg_data[train_n:]), 2)) #31.527714163321001 np.mean(np.power(reg_target[train_n:] - lars_500.predict(reg_data[train_n:]), 2)) #9.6198147535136237e+30
print "<<<<< N = %d >>>>>" % n cdg = CDG.CollinearDataGenerator(p = 20,sparsity=.8) X = cdg.getX(n) p = X.shape[1] y = cdg.getY(X) print cdg.gamma val_size = int(0.1 * X.shape[0]) X_val = X[0:val_size,:] y_val = y[0:val_size,:] X_train = X[val_size:,:] y_train = y[val_size:,:] lars = Lars(n_nonzero_coefs=2) lars.fit(X,y) # print lars.coef_ alphas, order, coefs = lars_path(X,y.T[0],verbose=True) # print alphas print order magnitudes = sorted(list(enumerate(coefs[:,-1])),key=lambda x: x[1]) magnitudes = map(lambda x: x[0],magnitudes) print magnitudes # print coefs quantities = coefs[:,-1] quantities = np.array([quantities[i] for i in order]) # print quantities total = sum(abs(quantities)) # # print total
# LARS Regression import numpy as np from sklearn import datasets from sklearn.linear_model import Lars # load the diabetes datasets dataset = datasets.load_diabetes() # fit a LARS model to the data model = Lars() model.fit(dataset.data, dataset.target) print(model) # make predictions expected = dataset.target predicted = model.predict(dataset.data) # summarize the fit of the model mse = np.mean((predicted-expected)**2) print(mse) print(model.score(dataset.data, dataset.target))