class LassoLarsCVImpl(): def __init__(self, fit_intercept=True, verbose=False, max_iter=500, normalize=True, precompute='auto', cv=3, max_n_alphas=1000, n_jobs=None, eps=2.220446049250313e-16, copy_X=True, positive=False): self._hyperparams = { 'fit_intercept': fit_intercept, 'verbose': verbose, 'max_iter': max_iter, 'normalize': normalize, 'precompute': precompute, 'cv': cv, 'max_n_alphas': max_n_alphas, 'n_jobs': n_jobs, 'eps': eps, 'copy_X': copy_X, 'positive': positive} self._wrapped_model = SKLModel(**self._hyperparams) def fit(self, X, y=None): if (y is not None): self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def predict(self, X): return self._wrapped_model.predict(X)
def __init__(self, fit_intercept=True, verbose=False, max_iter=500, normalize=True, precompute='auto', cv=3, max_n_alphas=1000, n_jobs=None, eps=2.220446049250313e-16, copy_X=True, positive=False): self._hyperparams = { 'fit_intercept': fit_intercept, 'verbose': verbose, 'max_iter': max_iter, 'normalize': normalize, 'precompute': precompute, 'cv': cv, 'max_n_alphas': max_n_alphas, 'n_jobs': n_jobs, 'eps': eps, 'copy_X': copy_X, 'positive': positive } self._wrapped_model = Op(**self._hyperparams)
def fit(self, X, y=None): self._sklearn_model = SKLModel(**self._hyperparams) if (y is not None): self._sklearn_model.fit(X, y) else: self._sklearn_model.fit(X) return self
def connectWidgets(self): # LassoLARS ll = LassoLars() self.alpha_text.setText(str(ll.alpha)) self.fit_interceptCheckBox.setChecked(ll.fit_intercept) self.verboseCheckBox.setChecked(ll.verbose) self.normalizeCheckBox.setChecked(ll.normalize) self.setComboBox(self.precomputeComboBox, ['True', 'False', 'auto', 'array-like']) self.defaultComboItem(self.precomputeComboBox, ll.precompute) self.max_iterSpinBox.setValue(ll.max_iter) self.copy_XCheckBox.setChecked(ll.copy_X) self.fit_pathCheckBox.setChecked(ll.fit_path) self.positiveCheckBox.setChecked(ll.positive) # LassoLarsCV llcv = LassoLarsCV() self.max_n_alphasSpinBox.setValue(llcv.max_n_alphas) self.n_jobsSpinBox.setValue(llcv.n_jobs) # LassoLarsIC llic = LassoLarsIC() self.cvSpinBox.setValue(3) self.setComboBox(self.criterionComboBox, ['aic', 'bic']) self.defaultComboItem(self.criterionComboBox, llic.criterion)
def function(self): model = self.modelComboBox.currentIndex() if model == 0: params = { 'alpha': self.alpha_text.text(), 'fit_intercept': self.fit_interceptCheckBox.isChecked(), 'verbose': self.fit_interceptCheckBox.isChecked(), 'normalize': self.normalizeCheckBox.isChecked(), 'precompute': self.precomputeComboBox.currentText(), 'max_iter': self.max_iterSpinBox.value(), 'copy_X': self.copy_XCheckBox.isChecked(), 'fit_path': self.fit_pathCheckBox.isChecked(), 'positive': self.positiveCheckBox.isChecked(), 'model': model } params_check = dict(params) params_check.pop('model') return params, self.getChangedValues(params_check, LassoLars()) elif model == 1: params = { 'fit_intercept': self.fit_interceptCheckBox.isChecked(), 'verbose': self.fit_interceptCheckBox.isChecked(), 'max_iter': self.max_iterSpinBox.value(), 'normalize': self.normalizeCheckBox.isChecked(), 'precompute': self.precomputeComboBox.currentText(), 'cv': self.cvSpinBox.value(), 'max_n_alphas': self.max_n_alphasSpinBox.value(), 'n_jobs': self.n_jobsSpinBox.value(), 'copy_X': self.copy_XCheckBox.isChecked(), 'positive': self.positiveCheckBox.isChecked(), 'model': model } params_check = dict(params) params_check.pop('model') return params, self.getChangedValues(params_check, LassoLarsCV()) elif model == 2: params = { 'criterion': self.criterionComboBox.currentText(), 'fit_intercept': self.fit_interceptCheckBox.isChecked(), 'verbose': self.fit_interceptCheckBox.isChecked(), 'normalize': self.normalizeCheckBox.isChecked(), 'precompute': self.precomputeComboBox.currentText(), 'max_iter': self.max_iterSpinBox.value(), 'copy_X': self.copy_XCheckBox.isChecked(), 'positive': self.positiveCheckBox.isChecked(), 'model': model } params_check = dict(params) params_check.pop('model') return params, self.getChangedValues(params_check, LassoLarsIC()) else: params = {} print("Error")
def set_learning_method(config, X_train, y_train): """ Instantiates the sklearn's class corresponding to the value set in the configuration file for running the learning method. TODO: use reflection to instantiate the classes @param config: configuration object @return: an estimator with fit() and predict() methods """ estimator = None learning_cfg = config.get("learning", None) if learning_cfg: p = learning_cfg.get("parameters", None) o = learning_cfg.get("optimize", None) scorers = \ set_scorer_functions(learning_cfg.get("scorer", ['mae', 'rmse'])) method_name = learning_cfg.get("method", None) if method_name == "SVR": if o: tune_params = set_optimization_params(o) estimator = optimize_model(SVR(), X_train, y_train, tune_params, scorers, o.get("cv", 5), o.get("verbose", True), o.get("n_jobs", 1)) elif p: estimator = SVR(C=p.get("C", 10), epsilon=p.get('epsilon', 0.01), kernel=p.get('kernel', 'rbf'), degree=p.get('degree', 3), gamma=p.get('gamma', 0.0034), tol=p.get('tol', 1e-3), verbose=False) else: estimator = SVR() elif method_name == "RandomForestRegressor": if o: tune_params = set_optimization_params(o) print tune_params estimator = optimize_model(RandomForestRegressor(), X_train, y_train, tune_params, scorers, o.get("cv", 5), o.get("verbose", True), o.get("n_jobs", 1)) elif p: estimator = RandomForestRegressor( n_estimators=p.get("n_estimators", 100), criterion=p.get("criterion", 'mse'), n_jobs=p.get("n_jobs", -1), random_state=p.get("random_state", 0), max_features=p.get("max_features", 'auto')) elif method_name == "SVC": if o: tune_params = set_optimization_params(o) estimator = optimize_model(SVC(), X_train, y_train, tune_params, scorers, o.get('cv', 5), o.get('verbose', True), o.get('n_jobs', 1)) elif p: estimator = SVC(C=p.get('C', 1.0), kernel=p.get('kernel', 'rbf'), degree=p.get('degree', 3), gamma=p.get('gamma', 0.0), coef0=p.get('coef0', 0.0), tol=p.get('tol', 1e-3), verbose=p.get('verbose', False)) else: estimator = SVC() elif method_name == "LassoCV": if p: estimator = LassoCV(eps=p.get('eps', 1e-3), n_alphas=p.get('n_alphas', 100), normalize=p.get('normalize', False), precompute=p.get('precompute', 'auto'), max_iter=p.get('max_iter', 1000), tol=p.get('tol', 1e-4), cv=p.get('cv', 10), verbose=False) else: estimator = LassoCV() elif method_name == "LassoLars": if o: tune_params = set_optimization_params(o) estimator = optimize_model(LassoLars(), X_train, y_train, tune_params, scorers, o.get("cv", 5), o.get("verbose", True), o.get("n_jobs", 1)) if p: estimator = LassoLars(alpha=p.get('alpha', 1.0), fit_intercept=p.get( 'fit_intercept', True), verbose=p.get('verbose', False), normalize=p.get('normalize', True), max_iter=p.get('max_iter', 500), fit_path=p.get('fit_path', True)) else: estimator = LassoLars() elif method_name == "LassoLarsCV": if p: estimator = LassoLarsCV(max_iter=p.get('max_iter', 500), normalize=p.get('normalize', True), max_n_alphas=p.get( 'max_n_alphas', 1000), n_jobs=p.get('n_jobs', 1), cv=p.get('cv', 10), verbose=False) else: estimator = LassoLarsCV() return estimator, scorers
def get_lasso_feature_scores(x, y, mode=CLASSIFICATION, scaling=0.5, sample_fraction=0.75, n_resampling=200, random_state=None): ''' Calculate features scores using a randomized lasso (regression) or randomized logistic regression (classification). This is also known as stability selection. see http://scikit-learn.org/stable/modules/feature_selection.html for details. Parameters ---------- x : structured array y : 1D nd.array mode : {CLASSIFICATION, REGRESSION} scaling : float, optional scaling parameter, should be between 0 and 1 sample_fraction : float, optional the fraction of samples to used in each randomized dataset n_resmpling : int, optional the number of times the model is trained on a random subset of the data random_state : int, optional if it is an int, it specifies the seed to use, defaults to None. Returns ------- pandas DataFrame sorted in descending order of tuples with uncertainty and feature scores ''' uncs = recfunctions.get_names(x.dtype) x = _prepare_experiments(x) if mode==CLASSIFICATION: lfs = RandomizedLogisticRegression(scaling=scaling, sample_fraction=sample_fraction, n_resampling=n_resampling, random_state=random_state) lfs.fit(x,y) elif mode==REGRESSION: # we use LassoLarsCV to determine alpha see # http://scikit-learn.org/stable/auto_examples/linear_model/plot_sparse_recovery.html lars_cv = LassoLarsCV(cv=6).fit(x, y,) alphas = np.linspace(lars_cv.alphas_[0], .1 * lars_cv.alphas_[0], 6) # fit the randomized lasso lfs = RandomizedLasso(alpha=alphas,scaling=scaling, sample_fraction=sample_fraction, n_resampling=n_resampling, random_state=random_state) lfs.fit(x, y) else: raise ValueError('{} invalid value for mode'.format(mode)) importances = lfs.scores_ importances = zip(uncs, importances) importances = list(importances) importances.sort(key=itemgetter(1), reverse=True) importances = pd.DataFrame(importances) return importances
'KMeans':KMeans(), 'KNeighborsClassifier':KNeighborsClassifier(), 'KNeighborsRegressor':KNeighborsRegressor(), 'KernelCenterer':KernelCenterer(), 'KernelDensity':KernelDensity(), 'KernelPCA':KernelPCA(), 'KernelRidge':KernelRidge(), 'LSHForest':LSHForest(), 'LabelPropagation':LabelPropagation(), 'LabelSpreading':LabelSpreading(), 'Lars':Lars(), 'LarsCV':LarsCV(), 'Lasso':Lasso(), 'LassoCV':LassoCV(), 'LassoLars':LassoLars(), 'LassoLarsCV':LassoLarsCV(), 'LassoLarsIC':LassoLarsIC(), 'LatentDirichletAllocation':LatentDirichletAllocation(), 'LedoitWolf':LedoitWolf(), 'LinearDiscriminantAnalysis':LinearDiscriminantAnalysis(), 'LinearRegression':LinearRegression(), 'LinearSVC':LinearSVC(), 'LinearSVR':LinearSVR(), 'LocallyLinearEmbedding':LocallyLinearEmbedding(), 'LogisticRegression':LogisticRegression(), 'LogisticRegressionCV':LogisticRegressionCV(), 'MDS':MDS(), 'MLPClassifier':MLPClassifier(), 'MLPRegressor':MLPRegressor(), 'MaxAbsScaler':MaxAbsScaler(), 'MeanShift':MeanShift(),
# Apply Some Featuring poly_reg = PolynomialFeatures(degree=1) # Transform into numpy object x_train = poly_reg.fit_transform(X_train) X_test = poly_reg.fit_transform(X_test) y_test = np.array(y_test.ix[:, 0]) y_train = np.array(y_train.ix[:, 0]) # Build model with good params model = LassoLarsCV(copy_X=True, cv=None, eps=2.2204460492503131e-16, fit_intercept=True, max_iter=500, max_n_alphas=1000, n_jobs=1, normalize=True, positive=False, precompute='auto', verbose=False) # Fit the model model.fit(x_train, y_train) # Predict y_pred = model.predict(X_test) # Scoring if regression: print('Score on test set:', mean_absolute_error(y_test, y_pred))
def get_lasso_feature_scores(results, classify, scaling=0.5, sample_fraction=0.75, n_resampling=200, random_state=None): ''' Calculate features scores using a randomized lasso (regression) or randomized logistic regression (classification). This is also known as stability selection. see http://scikit-learn.org/stable/modules/feature_selection.html for details. Parameters ---------- results : tuple classify : callable or str a classify function or variable analogous to PRIM scaling : float, optional scaling parameter, should be between 0 and 1 sample_fraction : float, optional the fraction of samples to used in each randomized dataset n_resmpling : int, optional the number of times the model is trained on a random subset of the data random_state : int, optional if it is an int, it specifies the seed to use, defaults to None. Returns ------- list of tuples sorted in descending order of tuples with uncertainty and feature scores ''' experiments, outcomes = results uncs = recfunctions.get_names(experiments.dtype) x = _prepare_experiments(experiments) y, categorical = _prepare_outcomes(outcomes, classify) if categorical: lfs = RandomizedLogisticRegression(scaling=scaling, sample_fraction=sample_fraction, n_resampling=n_resampling, random_state=random_state) lfs.fit(x, y) else: # we use LassoLarsCV to determine alpha see # http://scikit-learn.org/stable/auto_examples/linear_model/plot_sparse_recovery.html lars_cv = LassoLarsCV(cv=6).fit( x, y, ) alphas = np.linspace(lars_cv.alphas_[0], .1 * lars_cv.alphas_[0], 6) # fit the randomized lasso lfs = RandomizedLasso(alpha=alphas, scaling=scaling, sample_fraction=sample_fraction, n_resampling=n_resampling, random_state=random_state) lfs.fit(x, y) importances = lfs.scores_ importances = zip(uncs, importances) importances = list(importances) importances.sort(key=itemgetter(1), reverse=True) return importances