Esempio n. 1
0
    def choose_optimizer(self,
                         LassoType='Lasso',
                         RegCoef=0.00001,
                         cv=5,
                         criterion='aic',
                         maxiter=10000,
                         tolerance=0.0001,
                         normalize=True):

        if LassoType == 'Lasso':
            lin = linear_model.Lasso(alpha=RegCoef,
                                     max_iter=maxiter,
                                     normalize=normalize,
                                     tol=tolerance)
        elif LassoType == 'LassoCV':
            lin = linear_model.LassoCV(cv=cv,
                                       normalize=normalize,
                                       max_iter=maxiter)
        elif LassoType == 'LassoLarsCV':
            lin = linear_model.LassoLarsCV(cv=cv,
                                           normalize=normalize,
                                           max_iter=maxiter)
        elif LassoType == 'LarsCV':
            lin = linear_model.LarsCV(cv=cv,
                                      normalize=normalize,
                                      max_iter=maxiter)
        elif LassoType == 'LassoLarsIC':
            lin = linear_model.LassoLarsIC(criterion=criterion,
                                           normalize=normalize,
                                           max_iter=maxiter)
        else:
            raise Exception("wrong option")

        return lin
Esempio n. 2
0
def test_sk_LarsCV():
    print("Testomg sklearn, LarsCV...")
    mod = linear_model.LarsCV()
    X, y = iris_data
    mod.fit(X, y)
    docs = {'name': "LarsCV test"}
    fv = X[0, :]
    upload(mod, fv, docs)
Esempio n. 3
0
 def test_model_lars_cv(self):
     model, X = fit_regression_model(linear_model.LarsCV())
     model_onnx = convert_sklearn(
         model,
         "lars", [("input", FloatTensorType([None, X.shape[1]]))],
         target_opset=TARGET_OPSET)
     self.assertIsNotNone(model_onnx)
     dump_data_and_model(X,
                         model,
                         model_onnx,
                         basename="SklearnLarsCV-Dec4")
def lars():
    behavior_data, conn_data = pu.load_data_full_subjects()
    conn_data.astype(float)

    categorical_variables = ['smoking', 'deanxit_antidepressants', 'rivotril_antianxiety', 'sex']
    categorical_data = behavior_data[categorical_variables]
    dummy_coded_categorical = pu.dummy_code_binary(categorical_data)
    covariate_data = pd.concat([behavior_data['age'], dummy_coded_categorical], axis=1)

    ml_data = pd.concat([conn_data, covariate_data], axis=1)
    target = behavior_data['distress_TQ'].values.astype(float)

    feature_names = list(ml_data)
    continuous_features = [f for f in feature_names if 'categorical' not in f]
    continuous_indices = [ml_data.columns.get_loc(cont) for cont in continuous_features]

    categorical_features = [f for f in feature_names if 'categorical' in f]
    categorical_indices = [ml_data.columns.get_loc(cat) for cat in categorical_features]

    ml_continuous = ml_data.values[:, continuous_indices]
    ml_categorical = ml_data.values[:, categorical_indices]

    # Standardization for continuous data
    preproc = preprocessing.StandardScaler().fit(ml_continuous)
    ml_z = preproc.transform(ml_continuous)

    # Variance threshold for categorical data
    varthresh = feature_selection.VarianceThreshold(threshold=0).fit(ml_categorical)
    ml_v = varthresh.transform(ml_categorical)

    ml_preprocessed = np.hstack((ml_z, ml_v))

    # Feature selection with extra trees
    clf = ensemble.ExtraTreesRegressor()
    model = feature_selection.SelectFromModel(clf, threshold="2*mean")
    # Transform train and test data with feature selection model
    ml_cleaned = model.fit_transform(ml_preprocessed, target)
    feature_indices = model.get_support(indices=True)
    cleaned_features = [feature_names[i] for i in feature_indices]

    lars_classifier = linear_model.LarsCV(cv=3, normalize=False, fit_intercept=False)

    lars_classifier.fit(ml_cleaned, target)
    predicted = lars_classifier.predict(ml_cleaned)

    r2 = lars_classifier.score(ml_cleaned, target)

    exp_var = metrics.explained_variance_score(target, predicted)
    max_err = metrics.max_error(target, predicted)
    mae = metrics.mean_absolute_error(target, predicted)
    mse = metrics.mean_squared_error(target, predicted)
    print(r2)
Esempio n. 5
0
def cross_validated_estimators_tests():
    models = [
        linear_model.ElasticNetCV(),
        linear_model.LarsCV(),
        linear_model.LassoCV(),
        linear_model.LassoLarsCV(),
        linear_model.LogisticRegressionCV(),
        linear_model.OrthogonalMatchingPursuitCV(),
        linear_model.RidgeClassifierCV(),
        linear_model.RidgeCV()
    ]
    for model in models:
        cross_validated_estimators(model)
 def test_model_lars_cv(self):
     model, X = _fit_model(linear_model.LarsCV())
     model_onnx = convert_sklearn(
         model, "lars", [("input", FloatTensorType([None, X.shape[1]]))])
     self.assertIsNotNone(model_onnx)
     dump_data_and_model(
         X.astype(numpy.float32),
         model,
         model_onnx,
         basename="SklearnLarsCV-Dec4",
         allow_failure="StrictVersion("
         "onnxruntime.__version__)"
         "<= StrictVersion('0.2.1')",
     )
 def test_model_lars_cv(self):
     model, X = fit_regression_model(linear_model.LarsCV())
     model_onnx = convert_sklearn(
         model,
         "lars", [("input", FloatTensorType([None, X.shape[1]]))],
         target_opset=TARGET_OPSET)
     self.assertIsNotNone(model_onnx)
     dump_data_and_model(
         X,
         model,
         model_onnx,
         basename="SklearnLarsCV-Dec4",
         allow_failure="StrictVersion("
         "onnxruntime.__version__)"
         "<= StrictVersion('0.2.1')",
     )
Esempio n. 8
0
def sklearn_liner_model_regressions(xTrain, xTest, yTrain, yTest):
    modelForConsideration: DataFrame = pd.DataFrame()
    LinerModels = \
        [
            linear_model.ARDRegression(), linear_model.BayesianRidge(), linear_model.ElasticNet(),
            linear_model.ElasticNetCV(),
            linear_model.HuberRegressor(), linear_model.Lars(), linear_model.LarsCV(), linear_model.Lasso(),
            linear_model.LassoCV(), linear_model.LassoLars(), linear_model.LassoLarsCV(), linear_model.LassoLarsIC(),
            linear_model.LinearRegression(), linear_model.MultiTaskLasso(),
            linear_model.MultiTaskElasticNet(), linear_model.MultiTaskLassoCV(), linear_model.MultiTaskElasticNetCV(),
            linear_model.OrthogonalMatchingPursuit(),
            linear_model.OrthogonalMatchingPursuitCV(), linear_model.PassiveAggressiveClassifier(),
            linear_model.PassiveAggressiveRegressor(), linear_model.Perceptron(),
            linear_model.RANSACRegressor(), linear_model.Ridge(), linear_model.RidgeClassifier(),
            linear_model.RidgeClassifierCV(),
            linear_model.RidgeCV(), linear_model.SGDClassifier(), linear_model.SGDRegressor(),
            linear_model.TheilSenRegressor(),
            linear_model.enet_path(xTrain, yTrain),
            linear_model.lars_path(xTrain, yTrain), linear_model.lasso_path(xTrain, yTrain),
            # linear_model.LogisticRegression()
            # ,linear_model.LogisticRegressionCV(),linear_model.logistic_regression_path(xTrain, yTrain), linear_model.orthogonal_mp(xTrain, yTrain), linear_model.orthogonal_mp_gram(), linear_model.ridge_regression()
        ]
    for model in LinerModels:
        modelName: str = model.__class__.__name__
        try:
            # print(f"Preparing Model {modelName}")
            if modelName == "LogisticRegression":
                model = linear_model.LogisticRegression(random_state=0)
            model.fit(xTrain, yTrain)
            yTrainPredict = model.predict(xTrain)
            yTestPredict = model.predict(xTest)
            errorList = calculate_prediction_error(modelName, yTestPredict,
                                                   yTest, yTrainPredict,
                                                   yTrain)

            if errorList["Test Average Error"][0] < 30 and errorList[
                    "Train Average Error"][0] < 30:
                try:
                    modelForConsideration = modelForConsideration.append(
                        errorList)
                except (Exception) as e:
                    print(e)

        except (Exception, ArithmeticError) as e:
            print(f"Error occurred while preparing Model {modelName}")
    return modelForConsideration
Esempio n. 9
0
def fs_lars_cv(X,
               y,
               feat_list,
               n_alphas=1000,
               cv=10,
               max_iter=1000,
               hard_shrink=None):
    '''Wrapper function to build a LarsCV model from sklearn and return important features'''

    lcv = linear_model.LarsCV(n_jobs=max(1,
                                         mp.cpu_count() - 1),
                              max_n_alphas=n_alphas,
                              cv=cv,
                              max_iter=max_iter)
    coefs = lcv.fit(X, y).coef_

    # force shrinkage to zero if hard_shrink is provided
    if hard_shrink is not None: np.place(coefs, np.abs(coefs) < hard_shrink, 0)

    selected_feats = list(it.compress(feat_list, coefs))

    return selected_feats
Esempio n. 10
0
        fit_dic['poly3'] = poly3fit
    if 'spline' in fits:
        spline_params = splrep(x, y, s=s, k=3)
        splinefit = splev(x_new, spline_params)
        fit_dic['spline'] = splinefit
    return fit_dic


modeldict = {
    'ardregression': lm.ARDRegression(),
    'bayesianridge': lm.BayesianRidge(),
    'elasticnet': lm.ElasticNet(),
    'elasticnetcv': lm.ElasticNetCV(),
    'huberregression': lm.HuberRegressor(),
    'lars': lm.Lars(),
    'larscv': lm.LarsCV(),
    'lasso': lm.Lasso(),
    'lassocv': lm.LassoCV(),
    'lassolars': lm.LassoLars(),
    'lassolarscv': lm.LassoLarsCV(),
    'lassolarsic': lm.LassoLarsIC(),
    'linearregression': lm.LinearRegression(),
    'orthogonalmatchingpursuit': lm.OrthogonalMatchingPursuit(),
    'orthogonalmatchingpursuitcv': lm.OrthogonalMatchingPursuitCV(),
    'passiveagressiveregressor': lm.PassiveAggressiveRegressor(),
    'ridge': lm.Ridge(),
    'ridgecv': lm.RidgeCV(),
    'sgdregressor': lm.SGDRegressor(),
    'theilsenregressor': lm.TheilSenRegressor(),
    'decisiontreeregressor': DecisionTreeRegressor(),
    'randomforestregressor': RandomForestRegressor(),
Esempio n. 11
0
        classification_binary(svm.SVC(kernel="rbf", **SVC_PARAMS)),
        classification_binary(svm.SVC(kernel="linear", **SVC_PARAMS)),
        classification_binary(svm.SVC(kernel="poly", degree=2, **SVC_PARAMS)),
        classification_binary(svm.SVC(kernel="sigmoid", **SVC_PARAMS)),
        classification_binary(svm.NuSVC(kernel="rbf", **SVC_PARAMS)),
        classification(svm.SVC(kernel="rbf", **SVC_PARAMS)),
        classification(svm.NuSVC(kernel="rbf", **SVC_PARAMS)),

        # Linear Regression
        regression(linear_model.LinearRegression()),
        regression(linear_model.HuberRegressor()),
        regression(linear_model.ElasticNet(random_state=RANDOM_SEED)),
        regression(linear_model.ElasticNetCV(random_state=RANDOM_SEED)),
        regression(linear_model.TheilSenRegressor(random_state=RANDOM_SEED)),
        regression(linear_model.Lars()),
        regression(linear_model.LarsCV()),
        regression(linear_model.Lasso(random_state=RANDOM_SEED)),
        regression(linear_model.LassoCV(random_state=RANDOM_SEED)),
        regression(linear_model.LassoLars()),
        regression(linear_model.LassoLarsIC()),
        regression(linear_model.OrthogonalMatchingPursuit()),
        regression(linear_model.OrthogonalMatchingPursuitCV()),
        regression(linear_model.Ridge(random_state=RANDOM_SEED)),
        regression(linear_model.RidgeCV()),
        regression(linear_model.BayesianRidge()),
        regression(linear_model.ARDRegression()),
        regression(linear_model.SGDRegressor(random_state=RANDOM_SEED)),
        regression(
            linear_model.PassiveAggressiveRegressor(random_state=RANDOM_SEED)),

        # Logistic Regression
Esempio n. 12
0
def fit_regression(P, x, u, rule="LS", retall=False, **kws):
    """
Fit a polynomial chaos expansion using linear regression.

Parameters
----------
P : Poly
    Polynomial chaos expansion with `P.shape=(M,)` and `P.dim=D`.
x : array_like
    Collocation nodes with `x.shape=(D,K)`.
u : array_like
    Model evaluations with `len(u)=K`.
retall : bool
    If True return uhat in addition to R
rule : str
    Regression method used.

    The follwong methods uses scikits-learn as backend.
    See `sklearn.linear_model` for more details.

    Key     Scikit-learn    Description
    ---     ------------    -----------
        Parameters      Description
        ----------      -----------

    "BARD"  ARDRegression   Bayesian ARD Regression
        n_iter=300      Maximum iterations
        tol=1e-3        Optimization tolerance
        alpha_1=1e-6    Gamma scale parameter
        alpha_2=1e-6    Gamma inverse scale parameter
        lambda_1=1e-6   Gamma shape parameter
        lambda_2=1e-6   Gamma inverse scale parameter
        threshold_lambda=1e-4   Upper pruning threshold

    "BR"    BayesianRidge   Bayesian Ridge Regression
        n_iter=300      Maximum iterations
        tol=1e-3        Optimization tolerance
        alpha_1=1e-6    Gamma scale parameter
        alpha_2=1e-6    Gamma inverse scale parameter
        lambda_1=1e-6   Gamma shape parameter
        lambda_2=1e-6   Gamma inverse scale parameter

    "EN"    ElastiNet       Elastic Net
        alpha=1.0       Dampening parameter
        rho             Mixing parameter in [0,1]
        max_iter=300    Maximum iterations
        tol             Optimization tolerance

    "ENC"   ElasticNetCV    EN w/Cross Validation
        rho             Dampening parameter(s)
        eps=1e-3        min(alpha)/max(alpha)
        n_alphas        Number of alphas
        alphas          List of alphas
        max_iter        Maximum iterations
        tol             Optimization tolerance
        cv=3            Cross validation folds

    "LA"    Lars            Least Angle Regression
        n_nonzero_coefs Number of non-zero coefficients
        eps             Cholesky regularization

    "LAC"   LarsCV          LAR w/Cross Validation
        max_iter        Maximum iterations
        cv=5            Cross validation folds
        max_n_alphas    Max points for residuals in cv

    "LAS"   Lasso           Least Absolute Shrinkage and
                            Selection Operator
        alpha=1.0       Dampening parameter
        max_iter        Maximum iterations
        tol             Optimization tolerance

    "LASC"  LassoCV         LAS w/Cross Validation
        eps=1e-3        min(alpha)/max(alpha)
        n_alphas        Number of alphas
        alphas          List of alphas
        max_iter        Maximum iterations
        tol             Optimization tolerance
        cv=3            Cross validation folds

    "LL"    LassoLars       Lasso and Lars model
        max_iter        Maximum iterations
        eps             Cholesky regularization

    "LLC"   LassoLarsCV     LL w/Cross Validation
        max_iter        Maximum iterations
        cv=5            Cross validation folds
        max_n_alphas    Max points for residuals in cv
        eps             Cholesky regularization

    "LLIC"  LassoLarsIC     LL w/AIC or BIC
        criterion       "AIC" or "BIC" criterion
        max_iter        Maximum iterations
        eps             Cholesky regularization

    "OMP"   OrthogonalMatchingPursuit
        n_nonzero_coefs Number of non-zero coefficients
        tol             Max residual norm (instead of non-zero coef)

    Local methods

    Key     Description
    ---     -----------
    "LS"    Ordenary Least Squares

    "T"     Ridge Regression/Tikhonov Regularization
        order           Order of regularization (or custom matrix)
        alpha           Dampning parameter (else estimated from gcv)

    "TC"    T w/Cross Validation
        order           Order of regularization (or custom matrix)
        alpha           Dampning parameter (else estimated from gcv)


Returns
-------
R[, uhat]

R : Poly
    Fitted polynomial with `R.shape=u.shape[1:]` and `R.dim=D`.
uhat : np.ndarray
    The Fourier coefficients in the estimation.

Examples
--------
>>> P = cp.Poly([1, x, y])
>>> s = [[-1,-1,1,1], [-1,1,-1,1]]
>>> u = [0,1,1,2]
>>> print fit_regression(P, s, u)
0.5q1+0.5q0+1.0

    """

    x = np.array(x)
    if len(x.shape) == 1:
        x = x.reshape(1, *x.shape)
    u = np.array(u)

    Q = P(*x).T
    shape = u.shape[1:]
    u = u.reshape(u.shape[0], int(np.prod(u.shape[1:])))

    rule = rule.upper()

    # Local rules
    if rule == "LS":
        uhat = la.lstsq(Q, u)[0].T

    elif rule == "T":
        uhat, alphas = rlstsq(Q, u, kws.get("order", 0),
                              kws.get("alpha", None), False, True)
        uhat = uhat.T

    elif rule == "TC":
        uhat = rlstsq(Q, u, kws.get("order", 0), kws.get("alpha", None), True)
        uhat = uhat.T

    else:

        # Scikit-learn wrapper
        try:
            _ = lm
        except:
            raise NotImplementedError("sklearn not installed")

        if rule == "BARD":
            solver = lm.ARDRegression(fit_intercept=False, copy_X=False, **kws)

        elif rule == "BR":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = lm.BayesianRidge(**kws)

        elif rule == "EN":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = lm.ElasticNet(**kws)

        elif rule == "ENC":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = lm.ElasticNetCV(**kws)

        elif rule == "LA":  # success
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = lm.Lars(**kws)

        elif rule == "LAC":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = lm.LarsCV(**kws)

        elif rule == "LAS":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = lm.Lasso(**kws)

        elif rule == "LASC":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = lm.LassoCV(**kws)

        elif rule == "LL":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = lm.LassoLars(**kws)

        elif rule == "LLC":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = lm.LassoLarsCV(**kws)

        elif rule == "LLIC":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = lm.LassoLarsIC(**kws)

        elif rule == "OMP":
            solver = lm.OrthogonalMatchingPursuit(**kws)

        uhat = solver.fit(Q, u).coef_

    u = u.reshape(u.shape[0], *shape)

    R = po.sum((P * uhat), -1)
    R = po.reshape(R, shape)

    if retall == 1:
        return R, uhat
    elif retall == 2:
        if rule == "T":
            return R, uhat, Q, alphas
        return R, uhat, Q
    return R
Esempio n. 13
0
 def models(self) -> Dict[str, LinearModel]:
     return {
         "LarsCV": linear_model.LarsCV(cv=5, eps=0.01),
     }
Esempio n. 14
0
    def error(self):
        return self.error_cv['stacked_learner']

    @property
    def coefficents(self):
        return self.weights


if __name__ == "__main__":
    from sklearn import datasets, linear_model, neighbors
    
    # Load Example Dataset for regression
    np.random.seed(100)
    X, y = datasets.make_friedman1(1000)
    
    # All the learners
    leaners = {
        0: ('OLS', linear_model.LinearRegression()),
        1: ('ElasticNetCV', linear_model.ElasticNetCV()),
        2: ('Ridge', linear_model.RidgeCV()),
        3: ('LARS', linear_model.LarsCV()),
        4: ('LASSO', linear_model.LassoCV()),
        5: ('kNN', neighbors.KNeighborsRegressor())
    }
    
    stacked_model = SuperSklearn(leaners)
    stacked_model.fit(X, y)
    y_pred = stacked_model.predict(X)

    print(stacked_model.error)
    print(stacked_model.coefficents)
Esempio n. 15
0
from sklearn import datasets, linear_model, neighbors, svm, ensemble
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from base import SuperLearner
from base import BMA
import warnings
import numpy as np
warnings.filterwarnings("ignore", category=DeprecationWarning)

seed1 = 0
seed2 = 555
v_folds = 5
ols = linear_model.LinearRegression()
elnet = linear_model.ElasticNetCV(l1_ratio=0.5, cv=v_folds, normalize=True)
ridge = linear_model.RidgeCV(cv=v_folds)
lars = linear_model.LarsCV(cv=v_folds, normalize=True)
lasso = linear_model.LassoCV(cv=v_folds, normalize=True)
nn = neighbors.KNeighborsRegressor(weights='uniform')
svm1 = svm.SVR(kernel='linear', C=10, gamma='auto')
svm2 = svm.SVR(kernel='poly', C=10, gamma='auto')
rf = ensemble.RandomForestRegressor(n_estimators=200,
                                    max_depth=4,
                                    min_samples_split=2,
                                    random_state=seed1)
model_lib = [ols, rf, elnet, ridge, lars, lasso, nn, svm1, svm2]
model_names = [
    "OLS", "RF", "ElasticNet", "Ridge", "LARS", "LASSO", "kNN", "SVM rbf",
    "SVM poly"
]
meta_learner = ols
diabetes = datasets.load_diabetes()
Esempio n. 16
0
    def __init__(self, method, yrange, params, i=0, ransacparams={}):
        self.method = method
        self.outliers = None
        self.inliers = None
        self.ransac = False
        self.yrange = yrange[i]

        if self.method[i] == 'PLS':
            self.model = PLSRegression(**params[i])
        if self.method[i] == 'OLS':
            self.model = linear.LinearRegression(**params[i])
        if self.method[i] == 'OMP':
            #check whether to do CV or not
            self.do_cv = params[i]['CV']
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            # Remove CV parameter
            params_temp.pop('CV')
            if self.do_cv is False:
                self.model = linear.OrthogonalMatchingPursuit(**params_temp)
            else:
                params_temp.pop('n_nonzero_coefs')
                self.model = linear.OrthogonalMatchingPursuitCV(**params_temp)

        if self.method[i] == 'Lasso':
            # check whether to do CV or not
            self.do_cv = params[i]['CV']
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            # Remove CV parameter
            params_temp.pop('CV')
            if self.do_cv is False:
                self.model = linear.Lasso(**params_temp)
            else:
                params_temp.pop('alpha')
                self.model = linear.LassoCV(**params_temp)

        if self.method[i] == 'Elastic Net':
            # check whether to do CV or not
            self.do_cv = params[i]['CV']
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            # Remove CV parameter
            params_temp.pop('CV')
            if self.do_cv is False:
                self.model = linear.ElasticNet(**params_temp)
            else:
                params_temp.pop('alpha')
                self.model = linear.ElasticNetCV(**params_temp)

        if self.method[i] == 'Ridge':
            # check whether to do CV or not
            self.do_cv = params[i]['CV']
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            # Remove CV parameter
            params_temp.pop('CV')
            if self.do_cv is False:
                self.model = linear.Ridge(**params_temp)
            else:
                #Ridge requires a specific set of alphas to be provided... this needs more work to be implemented correctly
                self.model = linear.RidgeCV(**params_temp)

        if self.method[i] == 'Bayesian Ridge':
            self.model = linear.BayesianRidge(**params[i])
        if self.method[i] == 'ARD':
            self.model = linear.ARDRegression(**params[i])
        if self.method[i] == 'LARS':
            # check whether to do CV or not
            self.do_cv = params[i]['CV']
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            # Remove CV parameter
            params_temp.pop('CV')
            if self.do_cv is False:
                self.model = linear.Lars(**params_temp)
            else:
                self.model = linear.LarsCV(**params_temp)

        if self.method[i] == 'Lasso LARS':
            # check whether to do CV or not
            self.do_cv = params[i]['CV']
            # check whether to do IC or not
            self.do_ic = params[i]['IC']
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            # Remove CV and IC parameter
            params_temp.pop('CV')
            params_temp.pop('IC')
            if self.do_cv is False and self.do_ic is False:
                self.model = linear.LassoLars(**params[i])
            if self.do_cv is True and self.do_ic is False:
                self.model = linear.LassoLarsCV(**params[i])
            if self.do_cv is False and self.do_ic is True:
                self.model = linear.LassoLarsIC(**params[i])
            if self.do_cv is True and self.do_ic is True:
                print(
                    "Can't use both cross validation AND information criterion to optimize!"
                )

        if self.method[i] == 'SVR':
            self.model = svm.SVR(**params[i])
        if self.method[i] == 'KRR':
            self.model = kernel_ridge.KernelRidge(**params[i])

        if self.method[i] == 'GP':
            #get the method for dimensionality reduction and the number of components
            self.reduce_dim = params[i]['reduce_dim']
            self.n_components = params[i]['n_components']
            #create a temporary set of parameters
            params_temp = copy.copy(params[i])
            #Remove parameters not accepted by Gaussian Process
            params_temp.pop('reduce_dim')
            params_temp.pop('n_components')
            self.model = GaussianProcess(**params_temp)
Esempio n. 17
0
penalty = ['l1', 'l2']
n_iter = [100, 200, 300, 400, 500]

ridge = linear_model.RidgeCV(alphas=alphas, cv=GRIDSEARCH_NUM_CV_FOLDS)

RidgeRegressionStrategy = TabRegrStrategy(estimator=ridge,
                                          name='RidgeRegression')

lasso = linear_model.LassoCV(alphas=alphas,
                             cv=GRIDSEARCH_NUM_CV_FOLDS,
                             n_jobs=GRIDSEARCH_CV_NUM_PARALLEL_JOBS)

LassoStrategy = TabRegrStrategy(estimator=lasso, name='Lasso')

lasso_lars = linear_model.LarsCV(max_n_alphas=max_n_alphas,
                                 cv=GRIDSEARCH_NUM_CV_FOLDS,
                                 n_jobs=GRIDSEARCH_CV_NUM_PARALLEL_JOBS)

LassoLarsStrategy = TabRegrStrategy(estimator=lasso_lars, name='LassoLars')

logistic_regression = GridSearchCV(estimator=linear_model.LogisticRegression(),
                                   param_grid={
                                       'C': c_param,
                                       'penalty': penalty,
                                   },
                                   n_jobs=GRIDSEARCH_CV_NUM_PARALLEL_JOBS,
                                   cv=GRIDSEARCH_NUM_CV_FOLDS)

LogisticRegressionStrategy = TabRegrStrategy(estimator=logistic_regression,
                                             name='LogisticRegression')
Esempio n. 18
0
def evaluate(parkingID):
    dataset = getData(int(parkingID))

    #set targets
    target = pandas.DataFrame()
    target['space'] = dataset['space']
    del dataset['space']
    targets = pandas.DataFrame(MinMaxScaler().fit_transform(target),
                               columns=target.columns)
    targetSet = createAvailabilityGroups(targets)

    # exclude day,date from timestamp and normalize based on 24h
    for x in xrange(0, len(dataset['timestamp'])):
        ts = int(dataset['timestamp'][x])
        h = datetime.utcfromtimestamp(ts).strftime('%H')
        m = datetime.utcfromtimestamp(ts).strftime('%M')
        dataset['timestamp'][x] = str(int(h) * 60 + int(m))

    # create train and test sets
    trainSet, testSet, trainTarget, testTarget = model_selection.train_test_split(
        dataset, targetSet, test_size=0.4, random_state=0)

    # Spot Check Algorithms
    models = []
    models.append(('RidgeCV',
                   linear_model.RidgeCV(
                       cv=model_selection.KFold(n_splits=10, random_state=0))))
    models.append(('BayisionRidge', linear_model.BayesianRidge()))
    models.append(('Huber', linear_model.HuberRegressor()))
    models.append(('Lars',
                   linear_model.LarsCV(
                       cv=model_selection.KFold(n_splits=10, random_state=0))))
    models.append(('Lasso',
                   linear_model.LassoCV(
                       cv=model_selection.KFold(n_splits=10, random_state=0))))
    models.append(('Linear', linear_model.LinearRegression()))
    models.append(('AdaBoost', ensemble.AdaBoostRegressor()))
    models.append(('ExtraTree',
                   ensemble.ExtraTreesRegressor(n_estimators=100,
                                                random_state=0)))
    models.append(('RandomForest',
                   ensemble.RandomForestRegressor(n_estimators=100,
                                                  random_state=0)))
    models.append(('PassiveAgressive',
                   linear_model.PassiveAggressiveRegressor(random_state=0)))

    # evaluate each model in turn
    results = []
    names = []
    print "MSE for parking %d" % int(parkingID)
    print "-----------------"
    best = ""
    bestMSE = 100
    for name, model in models:
        estimator = model.fit(trainSet, trainTarget)
        prediction = estimator.predict(testSet)
        error = mse(prediction, testTarget)
        print "%s: %f" % (name, error)
        if error < bestMSE:
            bestMSE = error
            best = name

    print "\nBest: %s\t\tMSE: %f\n" % (best, bestMSE)
Esempio n. 19
0
    def models(self) -> Dict[str, LinearModel]:
        return {
            "LinearRegression":
            linear_model.LinearRegression(
            ),  # LinearRegression([…])	Ordinary least squares Linear Regression.
            "ARDRegression":
            linear_model.ARDRegression(
            ),  #  ARDRegression([n_iter, tol, …])	Bayesian ARD regression.
            "BayesianRidge":
            linear_model.BayesianRidge(
            ),  # BayesianRidge([n_iter, tol, …])	Bayesian ridge regression.
            "HuberRegressor":
            linear_model.HuberRegressor(
            ),  # HuberRegressor([epsilon, …])	Linear regression model that is robust to outliers.
            "OrthogonalMatchingPursuitCV":
            linear_model.OrthogonalMatchingPursuitCV(
                cv=5
            ),  # OrthogonalMatchingPursuitCV([…])	Cross-validated Orthogonal Matching Pursuit model (OMP).
            "Perceptron":
            linear_model.Perceptron(
                max_iter=1000, tol=1e-3
            ),  # Perceptron([penalty, alpha, …])	Read more in the User Guide.
            "RANSACRegressor":
            linear_model.RANSACRegressor(
            ),  # RANSACRegressor([…])	RANSAC (RANdom SAmple Consensus) algorithm.
            "SGDRegressor":
            linear_model.SGDRegressor(
                max_iter=1000, tol=1e-3
            ),  # SGDRegressor([loss, penalty, …])	Linear model fitted by minimizing a regularized empirical loss with SGD
            "TheilSenRegressor":
            linear_model.TheilSenRegressor(
            ),  # TheilSenRegressor([…])	Theil-Sen Estimator: robust multivariate regression model.
            "PassiveAggressiveRegressor":
            linear_model.PassiveAggressiveRegressor(
                max_iter=1000, tol=1e-3
            ),  # PassiveAggressiveRegressor([C, …])	Passive Aggressive Regressor
            "Lars":
            linear_model.Lars(
                eps=0.01
            ),  # Lars([fit_intercept, verbose, …])	Least Angle Regression model a.k.a.
            "LarsCV":
            linear_model.LarsCV(
                cv=5, eps=0.01
            ),  # LarsCV([fit_intercept, …])	Cross-validated Least Angle Regression model.
            "Lasso":
            linear_model.Lasso(
                alpha=1, max_iter=1000
            ),  # Lasso([alpha, fit_intercept, …])	Linear Model trained with L1 prior as regularizer (aka the Lasso)
            "LassoCV":
            linear_model.LassoCV(
                cv=5
            ),  # LassoCV([eps, n_alphas, …])	Lasso linear model with iterative fitting along a regularization path.
            "LassoLars":
            linear_model.LassoLars(
                eps=0.01
            ),  # LassoLars([alpha, …])	Lasso model fit with Least Angle Regression a.k.a.
            "LassoLarsCV":
            linear_model.LassoLarsCV(
                cv=5, eps=0.01, max_iter=100
            ),  # LassoLarsCV([fit_intercept, …])	Cross-validated Lasso, using the LARS algorithm.
            "LassoLarsIC":
            linear_model.LassoLarsIC(
                eps=0.01
            ),  # LassoLarsIC([criterion, …])	Lasso model fit with Lars using BIC or AIC for model selection
            "Ridge":
            linear_model.Ridge(
            ),  # Ridge([alpha, fit_intercept, …])	Linear least squares with l2 regularization.
            "RidgeClassifier":
            linear_model.RidgeClassifier(
            ),  # RidgeClassifier([alpha, …])	Classifier using Ridge regression.
            "RidgeClassifierCV":
            linear_model.RidgeClassifierCV(
                cv=5
            ),  # RidgeClassifierCV([alphas, …])	Ridge classifier with built-in cross-validation.
            "RidgeCV":
            linear_model.RidgeCV(
                cv=5
            ),  # RidgeCV([alphas, …])	Ridge regression with built-in cross-validation.
            "SGDClassifier":
            linear_model.SGDClassifier(
                max_iter=1000, tol=1e-3
            ),  # SGDClassifier([loss, penalty, …])	Linear classifiers (SVM, logistic regression, a.o.) with SGD training.
            "ElasticNet":
            linear_model.ElasticNet(
            ),  # linear_model.ElasticNet([alpha, l1_ratio, …])	Linear regression with combined L1 and L2 priors as regularizer.
            "ElasticNetCV":
            linear_model.ElasticNetCV(
                cv=5
            ),  # linear_model.ElasticNetCV([l1_ratio, eps, …])	Elastic Net model with iterative fitting along a regularization path.

            ### Ignore These
            # "LogisticRegression":           linear_model.LogisticRegression(),                    # LogisticRegression([penalty, …])	Logistic Regression (aka logit, MaxEnt) classifier.
            # "LogisticRegressionCV":         linear_model.LogisticRegressionCV(cv=5),              # LogisticRegressionCV([Cs, …])	Logistic Regression CV (aka logit, MaxEnt) classifier.
            # "MultiTaskLasso":               linear_model.MultiTaskLasso(),                        # MultiTaskLasso([alpha, …])	Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer.
            # "MultiTaskElasticNet":          linear_model.MultiTaskElasticNet(),                   # MultiTaskElasticNet([alpha, …])	Multi-task ElasticNet model trained with L1/L2 mixed-norm as regularizer
            # "MultiTaskLassoCV":             linear_model.MultiTaskLassoCV(cv=5),                  # MultiTaskLassoCV([eps, …])	Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer.
            # "MultiTaskElasticNetCV":        linear_model.MultiTaskElasticNetCV(cv=5),             # MultiTaskElasticNetCV([…])	Multi-task L1/L2 ElasticNet with built-in cross-validation.
            # "OrthogonalMatchingPursuit":    linear_model.OrthogonalMatchingPursuit(),             # OrthogonalMatchingPursuit([…])	Orthogonal Matching Pursuit model (OMP)
            # "PassiveAggressiveClassifier":  linear_model.PassiveAggressiveClassifier(),           # PassiveAggressiveClassifier([…])	Passive Aggressive Classifier

            ### Normalization seems to make the score worse!
            # "LinearRegressionNormalize":    linear_model.LinearRegression(normalize=True),          # LinearRegression([…])	Ordinary least squares Linear Regression.
            # "RidgeCVNormalize":             linear_model.RidgeCV(cv=5, normalize=True),             # RidgeCV([alphas, …])	Ridge regression with built-in cross-validation.
            # "LassoLarsNormalize":           linear_model.LassoLars(eps=0.01, normalize=True),       # LassoLars([alpha, …])	Lasso model fit with Least Angle Regression a.k.a.
            # "LassoLarsICNormalize":         linear_model.LassoLarsIC(eps=0.01, normalize=True),     # LassoLarsIC([criterion, …])	Lasso model fit with Lars using BIC or AIC for model selection
            # "ARDRegressionNormalize":       linear_model.ARDRegression(normalize=True),             #  ARDRegression([n_iter, tol, …])	Bayesian ARD regression.
            # "BayesianRidgeNormalize":       linear_model.BayesianRidge(normalize=True),             # BayesianRidge([n_iter, tol, …])	Bayesian ridge regression.
        }
Esempio n. 20
0
Unigrams_Count_Map = CountVectorizer(ngram_range=(1, 2),
                                     token_pattern=r'\b\w+\b',
                                     max_df=0.99,
                                     min_df=0.01)
Train_List_Unigrams = Unigrams_Count_Map.fit_transform(train_review_text)
#Train_List_Unigrams = Train_List_Unigrams.toarray();
print(Train_List_Unigrams.shape)
print(len(trainsenti))
#trainsenti = np.asarray(trainsenti).reshape(Train_List_Unigrams.shape[0],1)

#print (trainsenti.shape)
#for i in range(100):
#    Train_List_Unigrams = np.hstack((Train_List_Unigrams, np.array(trainsenti).reshape(trainsenti.shape[0], 1)))

print(Train_List_Unigrams.shape)
regr = linear_model.LarsCV(max_n_alphas=4000)
#Train_List_Unigrams = bsr_matrix(Train_List_Unigrams)

Test_List_Unigrams = Unigrams_Count_Map.transform(test_review_textz)
#Test_List_Unigrams = Test_List_Unigrams.toarray();
#testsenti = np.asarray(testsenti).reshape(Test_List_Unigrams.shape[0],1)
#for i in range(100):
#    Test_List_Unigrams = np.hstack((Test_List_Unigrams, testsenti))
print("tran shape", Train_List_Unigrams.shape)
print("test shape", Test_List_Unigrams.shape)
#Train_List_Unigrams = bsr_matrix(Train_List_Unigrams)
#Test_List_Unigrams = bsr_matrix(Test_List_Unigrams)
linear_regression(Train_List_Unigrams, trainlabels1, regr, Test_List_Unigrams,
                  testlabels1, 'one')
linear_regression(Train_List_Unigrams, trainlabels2, regr, Test_List_Unigrams,
                  testlabels2, 'two')
Esempio n. 21
0
x_train = df_train[columns]
y_train = df_train[['y']]

model = linear_model.Ridge(normalize=True)
selector = RFECV(model, step=1, cv=2)
selector = selector.fit(x_train, y_train)
selected_columns = [columns[i] for i in np.where(selector.support_ == True)[0]]
#print("Optimal number of features : %d" % selector.n_features_)
#plt.figure()
#plt.xlabel("Number of features selected")
#plt.ylabel("Cross validation score (nb of correct classifications)")
#plt.plot(range(1, len(selector.grid_scores_) + 1), selector.grid_scores_)
#plt.show()

model = linear_model.LarsCV(max_iter=200, normalize=True, cv=5, n_jobs=-1)
model.fit(x_train[selected_columns], y_train)
selected_columns = [selected_columns[col_id] for col_id in model.active_]
print(selected_columns)

for col in selected_columns:
    model1 = linear_model.LarsCV(max_iter=200, normalize=True, cv=2, n_jobs=-1)
    model1.fit(x_train[[col]], y_train)
    x_test = df_test[[col]].fillna(df_train.mean(axis=0))
    y_test = model1.predict(x_test)
    print(col + ': ' + str(r_score(y, y_test)))

x_test = df_test[selected_columns].fillna(df_train.mean(axis=0))
y_test = model.predict(x_test[selected_columns])

print('global: ' + str(r_score(y, y_test)))
Esempio n. 22
0
def fit_regression(P, x, u, rule="LS", retall=False, **kws):
    """
    Fit a polynomial chaos expansion using linear regression.

    Args:
        P (Poly) : Polynomial expansion with `P.shape=(M,)` and `P.dim=D`.
        x (array_like) : Collocation nodes with `x.shape=(D,K)`.
        u (array_like) : Model evaluations with `len(u)=K`.
        retall (bool) : If True return Fourier coefficients in addition to R.
        rule (str) : Regression method used.

    Returns:
        (Poly, np.ndarray) : Fitted polynomial with `R.shape=u.shape[1:]` and
                `R.dim=D`. The Fourier coefficients in the estimation.

    Examples:
        >>> x, y = cp.variable(2)
        >>> P = cp.Poly([1, x, y])
        >>> s = [[-1,-1,1,1], [-1,1,-1,1]]
        >>> u = [0,1,1,2]
        >>> print(cp.around(cp.fit_regression(P, s, u), 14))
        0.5q0+0.5q1+1.0
    """
    x = np.array(x)
    if len(x.shape) == 1:
        x = x.reshape(1, *x.shape)
    u = np.array(u)

    Q = P(*x).T
    shape = u.shape[1:]
    u = u.reshape(u.shape[0], int(np.prod(u.shape[1:])))

    rule = rule.upper()

    # Local rules
    if rule == "LS":
        uhat = linalg.lstsq(Q, u)[0].T

    elif rule == "T":
        uhat, alphas = rlstsq(Q, u, kws.get("order", 0),
                              kws.get("alpha", None), False, True)
        uhat = uhat.T

    elif rule == "TC":
        uhat = rlstsq(Q, u, kws.get("order", 0), kws.get("alpha", None), True)
        uhat = uhat.T

    else:

        # Scikit-learn wrapper
        try:
            _ = linear_model
        except:
            raise NotImplementedError("sklearn not installed")

        if rule == "BARD":
            solver = linear_model.ARDRegression(fit_intercept=False,
                                                copy_X=False,
                                                **kws)

        elif rule == "BR":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = linear_model.BayesianRidge(**kws)

        elif rule == "EN":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = linear_model.ElasticNet(**kws)

        elif rule == "ENC":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = linear_model.ElasticNetCV(**kws)

        elif rule == "LA":  # success
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = linear_model.Lars(**kws)

        elif rule == "LAC":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = linear_model.LarsCV(**kws)

        elif rule == "LAS":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = linear_model.Lasso(**kws)

        elif rule == "LASC":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = linear_model.LassoCV(**kws)

        elif rule == "LL":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = linear_model.LassoLars(**kws)

        elif rule == "LLC":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = linear_model.LassoLarsCV(**kws)

        elif rule == "LLIC":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = linear_model.LassoLarsIC(**kws)

        elif rule == "OMP":
            solver = linear_model.OrthogonalMatchingPursuit(**kws)

        uhat = solver.fit(Q, u).coef_

    u = u.reshape(u.shape[0], *shape)

    R = cp.poly.sum((P * uhat), -1)
    R = cp.poly.reshape(R, shape)

    if retall == 1:
        return R, uhat

    elif retall == 2:
        if rule == "T":
            return R, uhat, Q, alphas
        return R, uhat, Q

    return R
Esempio n. 23
0
 AdaBoostRegressor(),
 BaggingRegressor(),
 linear_model.BayesianRidge(),
 CCA(),
 DecisionTreeRegressor(),
 linear_model.ElasticNet(),
 linear_model.ElasticNetCV(),
 ExtraTreeRegressor(),
 ExtraTreesRegressor(),
 GaussianProcessRegressor(),
 GradientBoostingRegressor(random_state=50),
 linear_model.HuberRegressor(),
 KNeighborsRegressor(),
 KernelRidge(),
 linear_model.Lars(),
 linear_model.LarsCV(),
 linear_model.Lasso(),
 linear_model.LassoCV(),
 linear_model.LassoLars(),
 linear_model.LassoLarsCV(),
 linear_model.LassoLarsIC(),
 linear_model.LinearRegression(),
 LinearSVR(),
 #linear_model.LogisticRegression(),
 #linear_model.LogisticRegressionCV(),
 MLPRegressor(),
 #linear_model.ModifiedHuber(),
 #linear_model.MultiTaskElasticNet(),
 #linear_model.MultiTaskElasticNetCV(),
 #linear_model.MultiTaskLasso(),
 #linear_model.MultiTaskLassoCV(),
Esempio n. 24
0
    def __init__(
        self,
        method,
        yrange,
        params,
        i=0
    ):  #TODO: yrange doesn't currently do anything. Remove or do something with it!
        self.algorithm_list = [
            'PLS',
            'GP',
            'OLS',
            'OMP',
            'Lasso',
            'Elastic Net',
            'Ridge',
            'Bayesian Ridge',
            'ARD',
            'LARS',
            'LASSO LARS',
            'SVR',
            'KRR',
        ]
        self.method = method
        self.outliers = None
        self.ransac = False

        print(params)
        if self.method[i] == 'PLS':
            self.model = PLSRegression(**params[i])

        if self.method[i] == 'OLS':
            self.model = linear.LinearRegression(**params[i])

        if self.method[i] == 'OMP':
            # check whether to do CV or not
            self.do_cv = params[i]['CV']
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            # Remove CV parameter
            params_temp.pop('CV')
            if self.do_cv is False:
                self.model = linear.OrthogonalMatchingPursuit(**params_temp)
            else:
                params_temp.pop('precompute')
                self.model = linear.OrthogonalMatchingPursuitCV(**params_temp)

        if self.method[i] == 'LASSO':
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            # check whether to do CV or not
            try:
                self.do_cv = params[i]['CV']
                # Remove CV parameter
                params_temp.pop('CV')
            except:
                self.do_cv = False

            if self.do_cv is False:
                self.model = linear.Lasso(**params_temp)
            else:
                params_temp.pop('alpha')
                self.model = linear.LassoCV(**params_temp)

        if self.method[i] == 'Elastic Net':
            params_temp = copy.copy(params[i])
            try:
                self.do_cv = params[i]['CV']
                params_temp.pop('CV')
            except:
                self.do_cv = False

            if self.do_cv is False:
                self.model = linear.ElasticNet(**params_temp)
            else:
                params_temp['l1_ratio'] = [.1, .5, .7, .9, .95, .99, 1]
                self.model = linear.ElasticNetCV(**params_temp)

        if self.method[i] == 'Ridge':
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            try:
                # check whether to do CV or not
                self.do_cv = params[i]['CV']

                # Remove CV parameter
                params_temp.pop('CV')
            except:
                self.do_cv = False

            if self.do_cv:
                self.model = linear.RidgeCV(**params_temp)
            else:
                self.model = linear.Ridge(**params_temp)

        if self.method[i] == 'BRR':
            self.model = linear.BayesianRidge(**params[i])

        if self.method[i] == 'ARD':
            self.model = linear.ARDRegression(**params[i])

        if self.method[i] == 'LARS':
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            try:
                # check whether to do CV or not
                self.do_cv = params[i]['CV']

                # Remove CV parameter
                params_temp.pop('CV')
            except:
                self.do_cv = False

            if self.do_cv is False:
                self.model = linear.Lars(**params_temp)
            else:
                self.model = linear.LarsCV(**params_temp)

        if self.method[i] == 'LASSO LARS':
            model = params[i]['model']
            params_temp = copy.copy(params[i])
            params_temp.pop('model')

            if model == 0:
                self.model = linear.LassoLars(**params_temp)
            elif model == 1:
                self.model = linear.LassoLarsCV(**params_temp)
            elif model == 2:
                self.model = linear.LassoLarsIC(**params_temp)
            else:
                print("Something went wrong, \'model\' should be 0, 1, or 2")

        if self.method[i] == 'SVR':
            self.model = svm.SVR(**params[i])

        if self.method[i] == 'KRR':
            self.model = kernel_ridge.KernelRidge(**params[i])

        if self.method[i] == 'GP':
            # get the method for dimensionality reduction and the number of components
            self.reduce_dim = params[i]['reduce_dim']
            self.n_components = params[i]['n_components']
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            # Remove parameters not accepted by Gaussian Process
            params_temp.pop('reduce_dim')
            params_temp.pop('n_components')
            self.model = GaussianProcess(**params_temp)
Esempio n. 25
0
def get_regression_estimators(r, regression_models):
    if r == 'ARDRegression':
        regression_models[r] = linear_model.ARDRegression()
    elif r == 'BayesianRidge':
        regression_models[r] = linear_model.BayesianRidge()
    elif r == 'ElasticNet':
        regression_models[r] = linear_model.ElasticNet()
    elif r == 'ElasticNetCV':
        regression_models[r] = linear_model.ElasticNetCV()
    elif r == 'HuberRegressor':
        regression_models[r] = linear_model.HuberRegressor()
    elif r == 'Lars':
        regression_models[r] = linear_model.Lars()
    elif r == 'LarsCV':
        regression_models[r] = linear_model.LarsCV()
    elif r == 'Lasso':
        regression_models[r] = linear_model.Lasso()
    elif r == 'LassoCV':
        regression_models[r] = linear_model.LassoCV()
    elif r == 'LassoLars':
        regression_models[r] = linear_model.LassoLars()
    elif r == 'LassoLarsCV':
        regression_models[r] = linear_model.LassoLarsCV()
    elif r == 'LassoLarsIC':
        regression_models[r] = linear_model.LassoLarsIC()
    elif r == 'LinearRegression':
        regression_models[r] = linear_model.LinearRegression()
    elif r == 'LogisticRegression':
        regression_models[r] = linear_model.LogisticRegression()
    elif r == 'LogisticRegressionCV':
        regression_models[r] = linear_model.LogisticRegressionCV()
    elif r == 'MultiTaskElasticNet':
        regression_models[r] = linear_model.MultiTaskElasticNet()
    elif r == 'MultiTaskElasticNetCV':
        regression_models[r] = linear_model.MultiTaskElasticNetCV()
    elif r == 'MultiTaskLasso':
        regression_models[r] = linear_model.MultiTaskLasso()
    elif r == 'MultiTaskLassoCV':
        regression_models[r] = linear_model.MultiTaskLassoCV()
    elif r == 'OrthogonalMatchingPursuit':
        regression_models[r] = linear_model.OrthogonalMatchingPursuit()
    elif r == 'OrthogonalMatchingPursuitCV':
        regression_models[r] = linear_model.OrthogonalMatchingPursuitCV()
    elif r == 'PassiveAggressiveClassifier':
        regression_models[r] = linear_model.PassiveAggressiveClassifier()
    elif r == 'PassiveAggressiveRegressor':
        regression_models[r] = linear_model.PassiveAggressiveRegressor()
    elif r == 'Perceptron':
        regression_models[r] = linear_model.Perceptron()
    elif r == 'RANSACRegressor':
        regression_models[r] = linear_model.RANSACRegressor()
    elif r == 'Ridge':
        regression_models[r] = linear_model.Ridge()
    elif r == 'RidgeClassifier':
        regression_models[r] = linear_model.RidgeClassifier()
    elif r == 'RidgeClassifierCV':
        regression_models[r] = linear_model.RidgeClassifierCV()
    elif r == 'RidgeCV':
        regression_models[r] = linear_model.RidgeCV()
    elif r == 'SGDClassifier':
        regression_models[r] = linear_model.SGDClassifier()
    elif r == 'SGDRegressor':
        regression_models[r] = linear_model.SGDRegressor()
    elif r == 'TheilSenRegressor':
        regression_models[r] = linear_model.TheilSenRegressor()
    else:
        print(
            r +
            " is an unsupported regression type. Check if you have misspelled the name."
        )
Esempio n. 26
0
# Level 2 Score: 

clf = linear_model.PassiveAggressiveRegressor(n_iter=100, loss='squared_epsilon_insensitive', random_state=rnd, verbose=0)
model_sum = blend_proba(clf=clf, X_train=train, y=target, X_test=test, nfolds=5, seed=rnd, category="regressor", filename = "PasAggR", setused=setused, tag = "2")


# Level 2 Score: 

clf = discriminant_analysis.LinearDiscriminantAnalysis()
model_sum = blend_proba(clf=clf, X_train=train, y=target, X_test=test, nfolds=5, seed=rnd, category="classifier", filename = "LDA", setused=setused)


# Level 2 Score: 

clf = linear_model.LarsCV(cv=5, verbose=0)
model_sum = blend_proba(clf=clf, X_train=train, y=target, X_test=test, nfolds=5, seed=rnd, category="regressor", filename = "LeastAngle", setused=setused)


# Level 2 Score: 

clf = linear_model.ElasticNetCV(cv=5, verbose=0)
model_sum = blend_proba(clf=clf, X_train=train, y=target, X_test=test, nfolds=5,seed=rnd, category="regressor", filename = "ElasticNet", setused=setused)


# Level 2 Score: 

clf = linear_model.BayesianRidge()
model_sum = blend_proba(clf=clf, X_train=train, y=target, X_test=test, nfolds=5, seed=rnd, category="regressor", filename = "BayesianRidge", setused=setused)

#plt.ylabel('explained_variance_')

n_components = np.arange(1,21)
estimator = GridSearchCV(pipe, dict(pca__n_components=n_components), cv=cv_splits)
estimator.fit(x_train, y_train)
n_components = estimator.best_estimator_.named_steps['pca'].n_components
print(n_components)
                                                    
pca = decomposition.PCA(whiten=True, n_components=3)
model = linear_model.LinearRegression(normalize=True)
pipe = Pipeline(steps=[('pca', pca), ('lr', model)])                          
pipe.fit(x_train, y_train)   
y_test = pipe.predict(x_test[columns])                          
print('global: ' + str(utilities.r_score(y, y_test)))

model = linear_model.LarsCV(max_iter=1000, normalize=True, cv=cv_splits, n_jobs=-1)
#model = linear_model.LassoCV(max_iter=1000, normalize=True, cv=cv, n_jobs=-1)
model.fit(x_train, y_train)


N = x_train.shape[0]
splits = 10
idxs = np.arange(N)
cv_splits = [(idxs[:i], idxs[i:]) for i in range(int(N/splits)+1, N, int(N/splits))]


rfecv = RFECV(estimator=linear_model.Ridge(normalize=True), step=1, cv=cv_splits)
rfecv.fit(x_train, y_train)
selected_columns = [columns[i] for i in np.where(rfecv.support_==True)[0]]
print(selected_columns)
features = data.iloc[:, 3:]
print(features.head(5))
featurenames = features.columns
#features.drop('zipcode',1,inplace=True)
#features.drop('lat',1,inplace=True)
#features.drop('long',1,inplace=True)

scalerNorm = Normalizer(norm='l2')
scalerStandard = StandardScaler().fit(features)
#scalerX.fit(features)
#features = scalerX.transform(features)
features = scalerStandard.transform(features)

print(features.shape)

Lars_cv = linearmodels.LarsCV(cv=6).fit(features, y)
Lasso_cv = linearmodels.LassoCV(cv=6).fit(features, y)
alphas = np.linspace(Lars_cv.alphas_[0], .1 * Lars_cv.alphas_[0], 6)
Randomized_lasso = linearmodels.RandomizedLasso(alpha=alphas, random_state=42)

linear_regression = linearmodels.LinearRegression()
linear_SVR = LinearSVR(loss='squared_epsilon_insensitive')

featureselector_Lars = feature_selection.SelectFromModel(Lars_cv, prefit=True)
featureselector_Lasso = feature_selection.SelectFromModel(Lasso_cv,
                                                          prefit=True)
featureselector_RLasso = Randomized_lasso.fit(features, y)

print(Lars_cv.coef_)
print(Lasso_cv.coef_)
print(Randomized_lasso.scores_)
Esempio n. 29
0
def run_simple_model(train_x, train_y, dev_x, dev_y, test_x, test_y, model_type, out_dir=None, class_weight=None):
    from sklearn import datasets, neighbors, linear_model, svm

    totalTime = 0

    startTrainTime = time()
    logger.info("Start training...")
    if model_type == 'ARDRegression':
        model = linear_model.ARDRegression().fit(train_x, train_y)
    elif model_type == 'BayesianRidge':
        model = linear_model.BayesianRidge().fit(train_x, train_y)
    elif model_type == 'ElasticNet':
        model = linear_model.ElasticNet().fit(train_x, train_y)
    elif model_type == 'ElasticNetCV':
        model = linear_model.ElasticNetCV().fit(train_x, train_y)
    elif model_type == 'HuberRegressor':
        model = linear_model.HuberRegressor().fit(train_x, train_y)
    elif model_type == 'Lars':
        model = linear_model.Lars().fit(train_x, train_y)
    elif model_type == 'LarsCV':
        model = linear_model.LarsCV().fit(train_x, train_y)
    elif model_type == 'Lasso':
        model = linear_model.Lasso().fit(train_x, train_y)
    elif model_type == 'LassoCV':
        model = linear_model.LassoCV().fit(train_x, train_y)
    elif model_type == 'LassoLars':
        model = linear_model.LassoLars().fit(train_x, train_y)
    elif model_type == 'LassoLarsCV':
        model = linear_model.LassoLarsCV().fit(train_x, train_y)
    elif model_type == 'LassoLarsIC':
        model = linear_model.LassoLarsIC().fit(train_x, train_y)
    elif model_type == 'LinearRegression':
        model = linear_model.LinearRegression().fit(train_x, train_y)
    elif model_type == 'LogisticRegression':
        model = linear_model.LogisticRegression(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'LogisticRegressionCV':
        model = linear_model.LogisticRegressionCV(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'MultiTaskLasso':
        model = linear_model.MultiTaskLasso().fit(train_x, train_y)
    elif model_type == 'MultiTaskElasticNet':
        model = linear_model.MultiTaskElasticNet().fit(train_x, train_y)
    elif model_type == 'MultiTaskLassoCV':
        model = linear_model.MultiTaskLassoCV().fit(train_x, train_y)
    elif model_type == 'MultiTaskElasticNetCV':
        model = linear_model.MultiTaskElasticNetCV().fit(train_x, train_y)
    elif model_type == 'OrthogonalMatchingPursuit':
        model = linear_model.OrthogonalMatchingPursuit().fit(train_x, train_y)
    elif model_type == 'OrthogonalMatchingPursuitCV':
        model = linear_model.OrthogonalMatchingPursuitCV().fit(train_x, train_y)
    elif model_type == 'PassiveAggressiveClassifier':
        model = linear_model.PassiveAggressiveClassifier(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'PassiveAggressiveRegressor':
        model = linear_model.PassiveAggressiveRegressor().fit(train_x, train_y)
    elif model_type == 'Perceptron':
        model = linear_model.Perceptron(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'RandomizedLasso':
        model = linear_model.RandomizedLasso().fit(train_x, train_y)
    elif model_type == 'RandomizedLogisticRegression':
        model = linear_model.RandomizedLogisticRegression().fit(train_x, train_y)
    elif model_type == 'RANSACRegressor':
        model = linear_model.RANSACRegressor().fit(train_x, train_y)
    elif model_type == 'Ridge':
        model = linear_model.Ridge().fit(train_x, train_y)
    elif model_type == 'RidgeClassifier':
        model = linear_model.RidgeClassifier(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'RidgeClassifierCV':
        model = linear_model.RidgeClassifierCV(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'RidgeCV':
        model = linear_model.RidgeCV().fit(train_x, train_y)
    elif model_type == 'SGDClassifier':
        model = linear_model.SGDClassifier(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'SGDRegressor':
        model = linear_model.SGDRegressor().fit(train_x, train_y)
    elif model_type == 'TheilSenRegressor':
        model = linear_model.TheilSenRegressor().fit(train_x, train_y)
    elif model_type == 'lars_path':
        model = linear_model.lars_path().fit(train_x, train_y)
    elif model_type == 'lasso_path':
        model = linear_model.lasso_path().fit(train_x, train_y)
    elif model_type == 'lasso_stability_path':
        model = linear_model.lasso_stability_path().fit(train_x, train_y)
    elif model_type == 'logistic_regression_path':
        model = linear_model.logistic_regression_path(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'orthogonal_mp':
        model = linear_model.orthogonal_mp().fit(train_x, train_y)
    elif model_type == 'orthogonal_mp_gram':
        model = linear_model.orthogonal_mp_gram().fit(train_x, train_y)
    elif model_type == 'LinearSVC':
        model = svm.LinearSVC(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'SVC':
        model = svm.SVC(class_weight=class_weight, degree=3).fit(train_x, train_y)
    else:
        raise NotImplementedError('Model not implemented')

        
    logger.info("Finished training.")
    endTrainTime = time()
    trainTime = endTrainTime - startTrainTime
    logger.info("Training time : %d seconds" % trainTime)


    logger.info("Start predicting train set...")
    train_pred_y = model.predict(train_x)
    logger.info("Finished predicting train set.")
    logger.info("Start predicting test set...")
    test_pred_y = model.predict(test_x)
    logger.info("Finished predicting test set.")
    endTestTime = time()
    testTime = endTestTime - endTrainTime
    logger.info("Testing time : %d seconds" % testTime)
    totalTime += trainTime + testTime

    train_pred_y = np.round(train_pred_y)
    test_pred_y = np.round(test_pred_y)

    np.savetxt(out_dir + '/preds/best_test_pred' + '.txt', test_pred_y, fmt='%i')

    logger.info('[TRAIN] Acc: %.3f' % (accuracy_score(train_y, train_pred_y)))
    logger.info('[TEST]  Acc: %.3f' % (accuracy_score(test_y, test_pred_y)))

    return accuracy_score(test_y, test_pred_y)