def GLS(X,y,model = "ols",**kwargs): """ model = "ols","ridge","lasso","lar" """ if model == "ols": md1 = linear_model.LinearRegression(fit_intercept=True).fit(X,y) md0 = linear_model.LinearRegression(fit_intercept=False).fit(X,y) if model == "ridge": alpha = get_alpha(kwargs,default=10**0.5) md1 = linear_model.Ridge(alpha=alpha,fit_intercept=True).fit(X,y) md0 = linear_model.Ridge(alpha=alpha, fit_intercept=False).fit(X, y) if model == 'lasso': alpha = get_alpha(kwargs, default=0.1) md1 = linear_model.Lasso(alpha=alpha,fit_intercept=True).fit(X,y) md0 = linear_model.Lasso(alpha=alpha, fit_intercept=False).fit(X, y) if model == 'lar': """ TO DO """ md1 = linear_model.Lars(fit_intercept=True).fit(X,y) md0 = linear_model.Lars(fit_intercept=False).fit(X,y) if model == 'kernel': alpha, kernel, gamma, degree, coef0 = get_kernel_coef(kwargs["alpha"]) md1 = kernel_ridge.KernelRidge(alpha=alpha,kernel=kernel,gamma=gamma,degree=degree,coef0=coef0).fit(X,y) md0 = md1 if model == 'xgb': md1 = xgb.XGBRegressor().fit(X,y) md0 = md1 return {"1":md1, "-1":md0, "type":'GLS'}
def test_multitarget(): # Assure that estimators receiving multidimensional y do the right thing X = diabetes.data Y = np.vstack([diabetes.target, diabetes.target**2]).T n_targets = Y.shape[1] estimators = [ linear_model.LassoLars(), linear_model.Lars(), # regression test for gh-1615 linear_model.LassoLars(fit_intercept=False), linear_model.Lars(fit_intercept=False), ] for estimator in estimators: estimator.fit(X, Y) Y_pred = estimator.predict(X) alphas, active, coef, path = (estimator.alphas_, estimator.active_, estimator.coef_, estimator.coef_path_) for k in range(n_targets): estimator.fit(X, Y[:, k]) y_pred = estimator.predict(X) assert_array_almost_equal(alphas[k], estimator.alphas_) assert_array_almost_equal(active[k], estimator.active_) assert_array_almost_equal(coef[k], estimator.coef_) assert_array_almost_equal(path[k], estimator.coef_path_) assert_array_almost_equal(Y_pred[:, k], y_pred)
def main(): X_train, y_train, X_test = load_data('v2') # tunning model = linear_model.Lars() # dict with tunning parameters param_grid = {'n_nonzero_coefs': range(50, 70, 1)} kfold = KFold(n_splits=folds, random_state=seed) scorer = make_scorer(rmse, greater_is_better=False) grid_search = GridSearchCV(model, param_grid, n_jobs=-1, cv=kfold, verbose=1, scoring=scorer) grid_result = grid_search.fit(X_train, y_train) means = grid_result.cv_results_['mean_test_score'] stds = grid_result.cv_results_['std_test_score'] params = grid_result.cv_results_['params'] for mean, stdev, param in zip(means, stds, params): #print("%f (%f) with: %r" % (mean, stdev, param)) print("{:06.5f} ({:06.5f}) with {}".format(mean, stdev, param)) # summarize results print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
def train(self, dataset): model = linear_model.Lars() independent_data = self.get_independent_variable_data(dataset) dependent_data = self.get_dependent_variable_data(dataset) trained_regression = model.fit(independent_data, dependent_data) return TrainedSklearnModel(self, trained_regression)
def boston_lars(x=datasets.load_boston()['data'], y=datasets.load_boston()['target']): print('Least Angle Regression\n') model = linear_model.Lars() num_train = 500 num_val = 6 x_train_ones = numpy.ones((num_train, 1)) x_train = numpy.column_stack((x_train_ones, numpy.array(x[0:num_train]))) y_train = y[0:num_train] x_val_ones = numpy.ones((num_val, 1)) x_val = numpy.column_stack((x_val_ones, numpy.array(x[num_train:num_train+num_val]))) y_val = y[num_train:num_train+num_val] print('Number of training data: %i' % len(x_train)) print('Number of validation data: %i' % len(x_val)) coef = model.fit(x_train, y_train).coef_ variance = 0 print('Coefficient: %s' % coef) for i in range(0, num_val): x = x_val[i] y = y_val[i] hypo = model.predict([x])[0] print('\n%i.' % (i + 1)) print('X: %s\nHypothesis: %s \ny: %s\nVariance: %s' % (x, hypo, y, (hypo - y) ** 2)) variance += (hypo - y_val[i]) ** 2 plt.scatter(i + 1, hypo, c='b') plt.scatter(i + 1, y, c='g') mean_variance = variance/num_val print('\nMean Variance: %s\n' % mean_variance) plt.show()
def scikit_Lars(self): """Function to generate a multiple regression model sklearn.linear_model.Lars""" # import libraries from sklearn import linear_model import numpy as np # Get a model columns = len(self.x_in[0]) model = linear_model.Lars(n_nonzero_coefs=columns, eps=1e-17) model.fit(self.x_in, self.y_in) # Generate string with regression equation self.reg_model = str("y = %8.3f" % float(model.intercept_)) for i in range(columns): aux_string = str(" + %8.3f" % float(model.coef_[i]) + "x" + str(i + 1)) self.reg_model += aux_string # Get Multiple model alphas = [] alphas.append(float(model.intercept_)) for i in range(columns): alphas.append(model.coef_[i]) # Set up array with coefficients self.b = np.array(alphas)
def test_lars_add_features(verbose=False): """ assure that at least some features get added if necessary test for 6d2b4c """ linear_model.Lars(verbose=verbose, fit_intercept=True).fit( np.array( [[0.02863763, 0.88144085, -0.02052429, -0.10648066, -0.06396584, -0.18338974], [0.02038287, 0.51463335, -0.31734681, -0.12830467, 0.16870657, 0.02169503], [0.14411476, 0.37666599, 0.2764702, 0.0723859, -0.03812009, 0.03663579], [-0.29411448, 0.33321005, 0.09429278, -0.10635334, 0.02827505, -0.07307312], [-0.40929514, 0.57692643, -0.12559217, 0.19001991, 0.07381565, -0.0072319], [-0.01763028, 1., 0.04437242, 0.11870747, 0.1235008, -0.27375014], [-0.06482493, 0.1233536, 0.15686536, 0.02059646, -0.31723546, 0.42050836], [-0.18806577, 0.01970053, 0.02258482, -0.03216307, 0.17196751, 0.34123213], [0.11277307, 0.15590351, 0.11231502, 0.22009306, 0.1811108, 0.51456405], [0.03228484, -0.12317732, -0.34223564, 0.08323492, -0.15770904, 0.39392212], [-0.00586796, 0.04902901, 0.18020746, 0.04370165, -0.06686751, 0.50099547], [-0.12951744, 0.21978613, -0.04762174, -0.27227304, -0.02722684, 0.57449581]]), np.array([0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1]))
def main(predictions=False): train = pd.read_csv("./data/X_train_v1.csv") y_train = train['SalePrice'] X_train = train.loc[:, 'MSSubClass':'SaleCondition_Partial'] test = pd.read_csv("./data/X_test_v1.csv") X_test = test.loc[:, 'MSSubClass':'SaleCondition_Partial'] # build model model = linear_model.Lars(n_nonzero_coefs=64) # fit model model.fit(X_train, y_train) # evaluate model results = rmse_cv(model, X_train, y_train) print("RMSE-{}-CV({})={:06.5f}+-{:06.5f}".format(model_label, folds, results.mean(), results.std())) # # predict if predictions: y_test_pred_log = model.predict(X_test) y_test_pred = np.expm1(y_test_pred_log) submission = pd.DataFrame({'Id': test['Id'], 'SalePrice': y_test_pred}) subFileName = "./submissions/sub-" + model_label + "-" + time.strftime( "%Y%m%d-%H%M%S") + ".csv" print("saving to file: " + subFileName) submission.to_csv(subFileName, index=False)
def run(self, x: np.ndarray, y: np.ndarray, design_matrix: np.ndarray): """ Implements the LAR method to compute the polynomial_chaos coefficients. Recommended only for model_selection algorithm. :param x: :class:`numpy.ndarray` containing the training points (samples). :param y: :class:`numpy.ndarray` containing the model evaluations (labels) at the training points. :param design_matrix: matrix containing the evaluation of the polynomials at the input points **x**. :return: Beta (polynomial_chaos coefficients) """ polynomialbasis = design_matrix P = polynomialbasis.shape[1] n_samples, inputs_number = x.shape reg = regresion.Lars(fit_intercept=self.fit_intercept, verbose=self.verbose, n_nonzero_coefs=self.n_nonzero_coefs, normalize=self.normalize) reg.fit(design_matrix, y) # LarsBeta = reg.coef_path_ c_ = reg.coef_ self.Beta_path = reg.coef_path_ if c_.ndim == 1: c_ = c_.reshape(-1, 1) return c_, None, np.shape(c_)[1]
def get_regression_models(): models = [('LR', linear_model.LinearRegression()), ('R', linear_model.Ridge()), ('Lo', linear_model.Lasso(alpha=.015)), ('La', linear_model.Lars(positive=True)), ('OMP', linear_model.OrthogonalMatchingPursuit()), ('BR', linear_model.BayesianRidge()), ('GB', ensemble.GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None, learning_rate=0.1, loss='ls', max_depth=5, max_features=None, min_samples_leaf=1, min_samples_split=2, min_weight_fraction_leaf=.0, n_estimators=400, presort='auto', random_state=None, subsample=1.0, verbose=0, warm_start=False)), ('RF', ensemble.RandomForestRegressor()), ('AB', ensemble.AdaBoostRegressor())] return models
def linear_regression(df, id, column_to_predict, n): df = df[df.id == id] x = df.drop(['id', 'timestamp', column_to_predict], axis=1).fillna(0).values y = df[[column_to_predict]].fillna(0).values[:, 0] pca = PCA(n_components=n, whiten=True) x = pca.fit_transform(x) alphas_lars, _, coef_path_lars = linear_model.lars_path(x, y, method='lars') # coef_path_cont_lars = interpolate.interp1d(alphas_lars[::-1], coef_path_lars[:, ::-1]) xx = np.sum(np.abs(coef_path_lars.T), axis=1) xx /= xx[-1] plt.plot(xx, coef_path_lars.T) ymin, ymax = plt.ylim() plt.vlines(xx, ymin, ymax, linestyle='dashed') plt.xlabel('|coef|/max|coef|') plt.ylabel('Coefficients') plt.axis('tight') plt.show() res = linear_model.Lars(n_nonzero_coefs=n, fit_intercept=True, normalize=True) res.fit(x, y) print(res.score(x, y))
def test_all(): dp = DataPreprocessor() dp.read_all() models = [ GradientBoostingRegressor(), MLPRegressor(), DecisionTreeRegressor(), GaussianProcessRegressor(), KNeighborsRegressor(), svm.SVR(), KernelRidge(), linear_model.HuberRegressor(), linear_model.BayesianRidge(), linear_model.LassoLars(alpha=.1), linear_model.Lars(n_nonzero_coefs=25), linear_model.ElasticNet(tol=1), linear_model.Lasso(alpha=0.1, tol=0.1), linear_model.Lasso(alpha=0.3, tol=1), LinearRegression(), linear_model.Ridge(), ] results = [] print(0) for model in models: res = cross_val_score(model, dp.train_inputs, dp.train_outputs, cv=KFold(n_splits=20)) results.append([res.mean(), res.std(), model]) print(1) for r in sorted(results): print(r[2], '\nAccuracy (mean std): {:.4f} {:.4f}'.format(r[0], r[1]), '\n--------------')
def test_lars_n_nonzero_coefs(verbose=False): lars = linear_model.Lars(n_nonzero_coefs=6, verbose=verbose) lars.fit(X, y) assert_equal(len(lars.coef_.nonzero()[0]), 6) # The path should be of length 6 + 1 in a Lars going down to 6 # non-zero coefs assert_equal(len(lars.alphas_), 7)
def predict_author(X, matrix_ids, new_text_vect, columns, model): #Lasso if model == "lasso": model_reg = linear_model.Lasso(alpha = 1.0, fit_intercept=True, max_iter=10000, tol=0.0001) elif model == "lars": model_reg = linear_model.Lars(fit_intercept=True) model_reg.fit(X, new_text_vect) y_s = [] #Calculate distances and predict author w_predicted = model_reg.coef_ num_authors = X.shape[1] residuals = [] for i in range(num_authors): w = np.array([0.0]*num_authors, dtype = float) w[i] = w_predicted[i] y_hat = np.dot(X,w) residuals.append((np.linalg.norm(y_hat-new_text_vect), matrix_ids[i], y_hat)) y_s.append(y_hat) if columns > 1: return str(math.floor(int(min(residuals)[1]))) else: return min(residuals), y_s
def lars(signals, dictionary, n_nonzero=0, alpha=0, lars_params=None, **kwargs): """ "Homotopy" algorithm for solving the Lasso argmin 0.5*||X - DA||_2^2 + r*||A||_1 for all r. This algorithm is supposedly the most accurate for l1 regularization. This is terribly slow, and not very accurate. ~20x slower than OMP. Find this strange as OMP solves a NP-Hard problem and this a convex :param signals: Signals to encode. Shape (signal_size, n_signals) or (signal_size,) :param dictionary: Dictionary, shape (signal_size, n_atoms) :param n_nonzero: Number of nonzero coefficients to use :param alpha: Regularization parameter. Overwrites n_nonzero :param lars_params: See sklearn.linear_models.LassoLars docs :param kwargs: Not used. Just to make calling API for all regularization algorithms the same :return: Sparse codes, shape (n_atoms, n_signals) or (n_atoms,) """ params = { 'precompute': True, 'fit_path': False, 'normalize': True } if n_nonzero > 0 and alpha == 0: params['n_nonzero_coefs'] = int(n_nonzero) model = linear_model.Lars() elif alpha > 0: params['alpha'] = alpha model = linear_model.LassoLars() else: raise ValueError('Need to specify either regularization ' 'parameter alpha or number of nonzero ' 'coefficients n_nonzero') if isinstance(lars_params, dict): params.update(lars_params) model.set_params(**params) model.fit(dictionary.copy(), signals) return model.coef_.T.copy()
def test_model_lars(self): model, X = fit_regression_model(linear_model.Lars()) model_onnx = convert_sklearn( model, "lars", [("input", FloatTensorType([None, X.shape[1]]))], target_opset=TARGET_OPSET) self.assertIsNotNone(model_onnx) dump_data_and_model(X, model, model_onnx, basename="SklearnLars-Dec4")
def __init__(self): self.mortality_predictor = linear_model.ARDRegression() self.natality_predictor = linear_model.Lars() self.migration_predictor = linear_model.LinearRegression() self.mortality_offset = 1000000
def train_models(x, y): model1 = linear_model.Lars(n_nonzero_coefs=1) model2 = linear_model.ElasticNetCV() model3 = linear_model.BayesianRidge() model1.fit(x, y) model2.fit(x, y) model3.fit(x, y) return [model1, model2, model3]
def test_lars_add_features(): # assure that at least some features get added if necessary # test for 6d2b4c # Hilbert matrix n = 5 H = 1. / (np.arange(1, n + 1) + np.arange(n)[:, np.newaxis]) clf = linear_model.Lars(fit_intercept=False).fit(H, np.arange(n)) assert np.all(np.isfinite(clf.coef_))
def test_sk_Lars(): print("Testing sklearn, Lars...") mod = linear_model.Lars() X, y = iris_data mod.fit(X, y) docs = {'name': "Lars test"} fv = X[0, :] upload(mod, fv, docs)
def get_all_regrs(): regrs = { "Linear regression": linear_model.LinearRegression(), # "Perceptron": linear_model.Perceptron(), "Lars": linear_model.Lars(), "Lasso": linear_model.LassoCV(max_iter=5000), # "Passive Aggressive": linear_model.PassiveAggressiveRegressor(), "PLS": PLS(n_components=3), "Random Forest": ensemble.RandomForestRegressor(), "Gradient Boost": ensemble.GradientBoostingRegressor(), "Extra Trees": ensemble.ExtraTreesRegressor(max_depth=2), "Ada Boost": ensemble.AdaBoostRegressor( base_estimator=tree.DecisionTreeRegressor(max_depth=2), n_estimators=250), "Gaussian Process": gaussian_process.GaussianProcessRegressor(), # "Isotonic": isotonic.IsotonicRegression(), "Kernel Ridge": kernel_ridge.KernelRidge(), "Ridge CV": linear_model.RidgeCV(), # "Exp tranform": TransformedTargetRegressor(regressor=PLS(n_components=3), # func=np.exp, # inverse_func=np.log), # "Log tranform": TransformedTargetRegressor(regressor=PLS(n_components=3), # func=np.log, # inverse_func=np.exp), # "Inv tranform": TransformedTargetRegressor(regressor=PLS(n_components=3), # func=invert, # inverse_func=invert), # "Log regressor": linear_model.LogisticRegressionCV(), "ML Perceptron": neural_network.MLPRegressor(max_iter=50000, hidden_layer_sizes=(5, 5)), "Linear SVR": linear_svc, "RBF SVR": svm.SVR(kernel='rbf'), "Poly SVR": svm.SVR(kernel='poly'), # "Sigmoid SVR": svm.SVR(kernel='sigmoid'), "Bayesian Ridge": linear_model.BayesianRidge(), "Huber": linear_model.HuberRegressor(), # "Poisson": linear_model.PoissonRegressor(), "K-neighbors": neighbors.KNeighborsRegressor() } # "Radius Neighbors": neighbors.RadiusNeighborsRegressor()} return regrs
def mcfs(X, n_selected_features, **kwargs): """ This function implements unsupervised feature selection for multi-cluster data. Input ----- X: {numpy array}, shape (n_samples, n_features) input data n_selected_features: {int} number of features to select kwargs: {dictionary} W: {sparse matrix}, shape (n_samples, n_samples) affinity matrix n_clusters: {int} number of clusters (default is 5) Output ------ W: {numpy array}, shape(n_features, n_clusters) feature weight matrix Reference --------- Cai, Deng et al. "Unsupervised Feature Selection for Multi-Cluster Data." KDD 2010. """ # use the default affinity matrix if 'W' not in kwargs: W = construct_W(X) else: W = kwargs['W'] # default number of clusters is 5 if 'n_clusters' not in kwargs: n_clusters = 5 else: n_clusters = kwargs['n_clusters'] # solve the generalized eigen-decomposition problem and get the top K # eigen-vectors with respect to the smallest eigenvalues W = W.toarray() W = (W + W.T) / 2 W_norm = np.diag(np.sqrt(1 / W.sum(1))) W = np.dot(W_norm, np.dot(W, W_norm)) WT = W.T W[W < WT] = WT[W < WT] eigen_value, ul = scipy.linalg.eigh(a=W) Y = np.dot(W_norm, ul[:, -1 * n_clusters - 1:-1]) # solve K L1-regularized regression problem using LARs algorithm with cardinality constraint being d n_sample, n_feature = X.shape W = np.zeros((n_feature, n_clusters)) for i in range(n_clusters): clf = linear_model.Lars(n_nonzero_coefs=n_selected_features) clf.fit(X, Y[:, i]) W[:, i] = clf.coef_ return W
def run_specific_combination(test_frame, reg_type, column_list): target_feature = test_frame['Endurance_Score'] test_df = test_frame.filter(column_list, axis=1) X_train, X_test, y_train, y_test = train_test_split( test_df, target_feature.values.reshape(-1,1), test_size=0.20, random_state=0) if reg_type == 'dt': regr = DecisionTreeRegressor(max_depth=2) elif reg_type == 'lin': regr = linear_model.LinearRegression() elif reg_type == 'ridge': regr = linear_model.Ridge(alpha=1500.0) elif reg_type == 'lasso': regr = linear_model.Lasso(alpha=10.0) elif reg_type == 'bayridge': regr = linear_model.BayesianRidge() elif reg_type == 'sgd': regr = linear_model.SGDRegressor(loss='huber') elif reg_type == 'lars': regr = linear_model.Lars(n_nonzero_coefs=np.inf) elif reg_type == 'pasagv': regr = linear_model.PassiveAggressiveRegressor(random_state=0) elif reg_type == 'kernelridge': regr = kernel_ridge.KernelRidge() elif reg_type == 'svr': regr = svm.SVR() elif reg_type == 'kneigh': regr = neighbors.KNeighborsRegressor(algorithm='kd_tree') elif reg_type == 'gauss': regr = gaussian_process.GaussianProcessRegressor() elif reg_type == 'gbr': params = {'n_estimators': 760, 'max_depth': 4, 'min_samples_split': 3, 'learning_rate': 0.026, 'loss': 'huber'} regr = GradientBoostingRegressor(**params) elif reg_type == 'ran': regr = RandomForestRegressor(n_estimators=300, max_depth=8) elif reg_type == 'et': regr = ExtraTreesRegressor() else: return x_train_frame = X_train.copy() del x_train_frame['Title'] del x_train_frame['Artist'] regr.fit(x_train_frame, y_train.ravel()) x_test_frame = X_test.copy() del x_test_frame['Title'] del x_test_frame['Artist'] y_pred = regr.predict(x_test_frame) rmse = mean_squared_error(y_test, y_pred) score = r2_score(y_test, y_pred) print("R2-score: {}, RMSE: {}".format(score, math.sqrt(rmse))) result_df = pd.DataFrame(columns=['Song', 'Artist', 'Endurance_Score', 'Predicted_Endurance_Score']) result_df['Song'] = X_test['Title'] result_df['Artist'] = X_test['Artist'] result_df['Endurance_Score'] = y_test.ravel() result_df['Predicted_Endurance_Score'] = y_pred result_df.to_csv('{0}/{1}.csv'.format(path_final_csv, 'predicted_midtermdata'), index=False)
def _train(self): x = self._train_set.features y = self._train_set.outputs self._transform = preprocessing.PolynomialFeatures(1) clf = linear_model.Lars(n_nonzero_coefs=400, fit_intercept=True) clf.fit(self._transform.fit_transform(x, y), y) self._model = clf.predict
def leastAngleRegression(self): lar = linear_model.Lars() lar.fit(self.training_order_start_end_districts_and_time, self.training_number_of_orders) predicted_number_of_orders = lar.predict( self.testing_order_start_end_districts_and_time) current_ride_prediction_error = numpy.mean( (predicted_number_of_orders - self.testing_number_of_orders)**2) print(current_ride_prediction_error) print(lar.coef_)
def train_lars_regressor(X, y, k_fold): ## CROSS VALIDATION # Without evenly distributing classes kf = KFold(n_splits=k_fold, shuffle=True, random_state=None) kf.get_n_splits() print(kf) print(kf.get_n_splits()) # Evenly Distributing Classes: Stratified K-fold # skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=None) # skf.get_n_splits(X, y) # print(skf) # SVM classifier definition regressor = linear_model.Lars(fit_intercept=True, verbose=False, normalize=True, precompute='auto', n_nonzero_coefs=500, eps=2.2204460492503131e-16, copy_X=True, fit_path=True, positive=False) # k-Fold loop counter = 0 model = [] r2 = [] print("\nShape") print(X.shape) print(y.shape) for train_index, test_index in kf.split(X, y=y): # print("TRAIN: ", train_index, " TEST: ", test_index) # get indices X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] ### SVM CLASSIFIER #################################### # fit model model.append(regressor.fit(X_train, y_train)) # predict y_pred = model[counter].predict(X_test) print("Coefficient of Determination for Regressor ", counter) r2.append(model[counter].score(X_test, y_test)) print(r2[counter]) counter += 1 # choose svm with highest r2 idx = np.argmax(r2) return model[idx]
def compare_regressors(): """ Compare the performance of regressors. """ regressors = { 'Linear': linear_model.LinearRegression(), 'Ridge': linear_model.Ridge(), 'Lasso': linear_model.Lasso(), 'LAR': linear_model.Lars(), 'SGD': linear_model.SGDRegressor(), #'ARD': linear_model.ARDRegression() # takes a minute or two } _compare_estimators(regressors, classify=False)
def default_models_(self): return { 'Tree': {'clf': tree.DecisionTreeRegressor(), 'param': {'max_depth': [3, 5, 7, 10, 20] }}, 'GBDT': {'clf': ensemble.GradientBoostingRegressor(random_state=1), 'param': { 'n_estimators': [50, 100, 150, 200], 'learning_rate': [0.1], 'max_depth': [4, 6, 8], 'alpha': [0.7, 0.8, 0.9], 'max_leaf_nodes': [10, 20], 'min_samples_split': [2, 4, 7] }}, 'Lin': {'clf': linear_model.LinearRegression(), 'param': { 'fit_intercept': [True, False], 'normalize': [True, False] }}, 'Ridge': {'clf': linear_model.Ridge(), 'param': {}}, 'Lasso': {'clf': linear_model.Lasso(), 'param': {}}, 'ElasN': {'clf': linear_model.ElasticNet(), 'param': {}}, 'Lars': {'clf': linear_model.Lars(), 'param': {}}, 'Bayers': {'clf': linear_model.BayesianRidge(), 'param': {}}, 'Poly2': {'clf': Pipeline([('poly', PolynomialFeatures(degree=2)), ('std_scaler', StandardScaler()), ('line_reg', linear_model.LinearRegression()) ]), 'param': {}}, 'SGD': {'clf': linear_model.SGDRegressor(), 'param': {}}, 'SVM': {'clf': svm.SVR(kernel='rbf', C=1.0, epsilon=1), 'param': { 'C': [1, 10, 100, 1000, 10000] }}, 'Knn': {'clf': neighbors.KNeighborsRegressor(), 'param': {}}, 'RF': {'clf': ensemble.RandomForestRegressor(random_state=1), 'param': {'n_estimators': [10, 30, 50, 100, 150], }}, 'ADA': {'clf': ensemble.AdaBoostRegressor(n_estimators=100), 'param': {}}, 'BAG': {'clf': BaggingRegressor(bootstrap=True), 'param': {'n_estimators': [50, 100, 200]}}, 'ET': {'clf': tree.ExtraTreeRegressor(), 'param': {}}, }
def __init__(self, method, params, i=0): self.algorithm_list = [ 'PLS', 'GP', 'OLS', 'OMP', 'Lasso', 'Elastic Net', 'Ridge', 'Bayesian Ridge', 'ARD', 'LARS', 'LASSO LARS', 'SVR', 'KRR', 'GBR' ] self.method = method self.outliers = None self.ransac = False #print(params) if self.method[i] == 'PLS': self.model = PLSRegression(**params[i]) if self.method[i] == 'OLS': self.model = linear.LinearRegression(**params[i]) if self.method[i] == 'OMP': # create a temporary set of parameters params_temp = copy.copy(params[i]) self.model = linear.OrthogonalMatchingPursuit(**params_temp) if self.method[i] == 'LASSO': # create a temporary set of parameters params_temp = copy.copy(params[i]) self.model = linear.Lasso(**params_temp) if self.method[i] == 'Elastic Net': params_temp = copy.copy(params[i]) self.model = linear.ElasticNet(**params_temp) if self.method[i] == 'Ridge': # create a temporary set of parameters params_temp = copy.copy(params[i]) self.model = linear.Ridge(**params_temp) if self.method[i] == 'BRR': self.model = linear.BayesianRidge(**params[i]) if self.method[i] == 'ARD': self.model = linear.ARDRegression(**params[i]) if self.method[i] == 'LARS': # create a temporary set of parameters params_temp = copy.copy(params[i]) self.model = linear.Lars(**params_temp) if self.method[i] == 'SVR': self.model = svm.SVR(**params[i]) if self.method[i] == 'KRR': self.model = kernel_ridge.KernelRidge(**params[i])
def test_model_lars(self): model, X = _fit_model(linear_model.Lars()) model_onnx = convert_sklearn( model, "lars", [("input", FloatTensorType([None, X.shape[1]]))]) self.assertIsNotNone(model_onnx) dump_data_and_model( X.astype(numpy.float32), model, model_onnx, basename="SklearnLars-Dec4", allow_failure="StrictVersion(" "onnxruntime.__version__)" "<= StrictVersion('0.2.1')", )