コード例 #1
0
def sklearn_liner_model_regressions(xTrain, xTest, yTrain, yTest):
    modelForConsideration: DataFrame = pd.DataFrame()
    LinerModels = \
        [
            linear_model.ARDRegression(), linear_model.BayesianRidge(), linear_model.ElasticNet(),
            linear_model.ElasticNetCV(),
            linear_model.HuberRegressor(), linear_model.Lars(), linear_model.LarsCV(), linear_model.Lasso(),
            linear_model.LassoCV(), linear_model.LassoLars(), linear_model.LassoLarsCV(), linear_model.LassoLarsIC(),
            linear_model.LinearRegression(), linear_model.MultiTaskLasso(),
            linear_model.MultiTaskElasticNet(), linear_model.MultiTaskLassoCV(), linear_model.MultiTaskElasticNetCV(),
            linear_model.OrthogonalMatchingPursuit(),
            linear_model.OrthogonalMatchingPursuitCV(), linear_model.PassiveAggressiveClassifier(),
            linear_model.PassiveAggressiveRegressor(), linear_model.Perceptron(),
            linear_model.RANSACRegressor(), linear_model.Ridge(), linear_model.RidgeClassifier(),
            linear_model.RidgeClassifierCV(),
            linear_model.RidgeCV(), linear_model.SGDClassifier(), linear_model.SGDRegressor(),
            linear_model.TheilSenRegressor(),
            linear_model.enet_path(xTrain, yTrain),
            linear_model.lars_path(xTrain, yTrain), linear_model.lasso_path(xTrain, yTrain),
            # linear_model.LogisticRegression()
            # ,linear_model.LogisticRegressionCV(),linear_model.logistic_regression_path(xTrain, yTrain), linear_model.orthogonal_mp(xTrain, yTrain), linear_model.orthogonal_mp_gram(), linear_model.ridge_regression()
        ]
    for model in LinerModels:
        modelName: str = model.__class__.__name__
        try:
            # print(f"Preparing Model {modelName}")
            if modelName == "LogisticRegression":
                model = linear_model.LogisticRegression(random_state=0)
            model.fit(xTrain, yTrain)
            yTrainPredict = model.predict(xTrain)
            yTestPredict = model.predict(xTest)
            errorList = calculate_prediction_error(modelName, yTestPredict,
                                                   yTest, yTrainPredict,
                                                   yTrain)

            if errorList["Test Average Error"][0] < 30 and errorList[
                    "Train Average Error"][0] < 30:
                try:
                    modelForConsideration = modelForConsideration.append(
                        errorList)
                except (Exception) as e:
                    print(e)

        except (Exception, ArithmeticError) as e:
            print(f"Error occurred while preparing Model {modelName}")
    return modelForConsideration
 def test_model_multi_task_elasticnet_cv(self):
     model, X = fit_regression_model(linear_model.MultiTaskElasticNetCV(),
                                     n_targets=2)
     model_onnx = convert_sklearn(
         model, "multi-task elasticnet cv",
         [("input", FloatTensorType([None, X.shape[1]]))])
     self.assertIsNotNone(model_onnx)
     dump_data_and_model(
         X,
         model,
         model_onnx,
         verbose=False,
         basename="SklearnMultiTaskElasticNetCV-Dec4",
         allow_failure="StrictVersion("
         "onnxruntime.__version__)"
         "<= StrictVersion('0.2.1')",
     )
コード例 #3
0
ファイル: multi_dof_lasso.py プロジェクト: okankoc/LSDP
def multi_task_elastic_net(X, q, cv=False, alpha=0.0038, l1_ratio=0.632):
    '''
    Multi Task Elastic Net with dimensions forced to share features
    both l1 and l2 regularization is employed in the Elastic Net formulation

    Running cross-val gives alpha = 0.0038, l1_ratio = 0.632
    '''
    if cv:
        l1_ratio_list = np.linspace(0.1, 1.0, 10)
        #l1_ratio_list = 1-np.exp(-np.arange(1, 10)/2.0)
        clf = lm.MultiTaskElasticNetCV(l1_ratio=l1_ratio_list, eps=1e-3, n_alphas=100, alphas=None,
                                       fit_intercept=False, cv=3, verbose=True, n_jobs=-1)
    else:
        clf = lm.MultiTaskElasticNet(
            alpha=alpha, l1_ratio=l1_ratio, fit_intercept=False)
    clf.fit(X, q)
    theta = clf.coef_.T
    res = q - np.dot(X, theta)
    return theta, res
コード例 #4
0
ファイル: edmd.py プロジェクト: PastorD/ensemblempc
    def tune_fit(self,
                 X,
                 X_d,
                 Z,
                 Z_dot,
                 U=None,
                 U_nom=None,
                 l1_ratio=array([1])):

        # Construct EDMD matrices using Elastic Net L1 and L2 regularization
        if U is None and U_nom is None:
            input = Z.transpose()
        else:
            input = concatenate((Z.transpose(), U.transpose()), axis=1)
        output = Z_dot.transpose()

        reg_model_cv = linear_model.MultiTaskElasticNetCV(l1_ratio=l1_ratio,
                                                          fit_intercept=False,
                                                          normalize=False,
                                                          cv=5,
                                                          n_jobs=-1,
                                                          selection='random')
        reg_model_cv.fit(input, output)

        self.A = reg_model_cv.coef_[:self.n_lift, :self.n_lift]
        if not (U is None and U_nom is None):
            self.B = reg_model_cv.coef_[:self.n_lift, self.n_lift:]

        if self.override_C:
            self.C = zeros((self.n, self.n_lift))
            self.C[:self.n, :self.n] = eye(self.n)
            self.C = multiply(self.C, self.Z_std.transpose())

        else:
            raise Exception(
                'Warning: Learning of C not implemented for regularized regression.'
            )

        self.l1 = reg_model_cv.alpha_
        self.l1_ratio = reg_model_cv.l1_ratio_

        print('EDMD l1: ', self.l1, self.l1_ratio)
コード例 #5
0
def run_regression(args):
    ##parse input tuple
    y = args[0]  ##spike data
    X = args[1]  ##regressors
    ##initialize the regression
    regr = linear_model.MultiTaskElasticNetCV(fit_intercept=True)
    ##fit the model
    regr.fit(X, y)
    ##get the coefficients
    coeff = regr.coef_
    ##get the accuracy of the prediction
    score = cross_val_score(regr, X, y)
    ##determine the number of significant units at this timepoint
    num_sig = np.zeros(coeff.shape)
    for u in range(coeff.shape[0]):  ##the number of units
        #F,p = t_test_coeffs(y[:,u],X) ##uncomment to use t-test (parametric)
        p = permutation_test(coeff[u, :], y[:, u],
                             X)  ##uncomment for permutation test
        sig_idx = np.where(p <= 0.05)[0]
        num_sig[u, sig_idx] = 1
    return coeff, num_sig, abs(score).mean()
コード例 #6
0
 test_1_pc = pca_train_1.transform(test_1)  # t x pc_num
 test_2_pc = pca_train_2.transform(test_2)
 # smooth data
 if smooth_flag:
     train_1_pc = gaussian_filter1d(train_1_pc.T, sigma).T
     train_2_pc = gaussian_filter1d(train_2_pc.T, sigma).T
     test_1_pc = gaussian_filter1d(test_1_pc.T, sigma).T
     test_2_pc = gaussian_filter1d(test_2_pc.T, sigma).T
 # save explained variance ratio
 train_1_var_ratio = pca_train_1.explained_variance_ratio_
 train_2_var_ratio = pca_train_2.explained_variance_ratio_  # 1 x pc_num
 train_2_var = pca_train_2.explained_variance_  # 1 x pc_num
 # fit into model: regularization or linear regression
 if regularization_flag == True:  # use regularization model
     # initialize and fit model
     reg = linear_model.MultiTaskElasticNetCV(
         max_iter=10000, n_jobs=4, alphas=[0.01])
     reg.fit(train_1_pc, train_2_pc)
     # predict on test set, compute error
     predict_reg = reg.predict(test_1_pc)
     err_reg = predict_reg - test_2_pc
     # save prediction
     out_file = mask_out_dir + 'run_' + str(
         this_run) + '_regularization_predict_001.npy'
     np.save(out_file, predict_reg)
     # save variance ratio and penalization
     var_ratio = err_reg.var() / test_2_pc.var()
     out_file_json = mask_out_dir + 'run_' + str(
         this_run) + '_regularization_predict_001.json'
     with open(out_file_json, 'w+') as outfile:
         json.dump(
             'variance ratio (err_var / ans_var): %f' %
コード例 #7
0
                            else:
                                train_1 = np.concatenate(
                                    (train_1,
                                     np.load(sub_1_data_dir + sub_1 + '_' +
                                             mask_1 + '_run_' + str(run) +
                                             '_normalized.npy')))
                                train_2 = np.concatenate(
                                    (train_2,
                                     np.load(sub_2_data_dir + sub_2 + '_' +
                                             mask_2 + '_run_' + str(run) +
                                             '_normalized.npy')))

                        # fit into model: regularization or linear regression
                        if regularization_flag == True:  # use regularization model
                            # initialize and fit model
                            reg = linear_model.MultiTaskElasticNetCV(
                                max_iter=10000, n_jobs=4)
                            reg.fit(train_1, train_2)
                            t3 = time.time()
                            # predict on test set, compute error
                            predict_reg = reg.predict(test_1)
                            err_reg = predict_reg - test_2
                            t4 = time.time()
                            # print('regularization squared error: %f' % np.sum(err_reg * err_reg))
                            # print('regularization test_2 square: %f' % np.sum(test_2 * test_2))
                            # write prediction to file
                            out_file = mask_out_dir + 'run_' + str(
                                this_run) + '_regularization_predict.npy'
                            np.save(out_file, predict_reg)
                            var_ratio = []
                            for v in range(0, test_2.shape[1]):
                                dif_var = err_reg[:, v].var()
コード例 #8
0
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, explained_variance_score
import matplotlib.pyplot as plt
import numpy as np

# 多任务岭回归
x, y = datasets.make_regression(n_samples=1000, n_features=1, n_targets=10, noise=10, random_state=0)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=0)

# 弹性网络
reg = linear_model.MultiTaskElasticNet(0.1) # 多任务弹性网络回归
reg = linear_model.MultiTaskLasso(0.1) # 多任务lasso回归
reg = linear_model.MultiTaskLassoCV(0.1) # 多任务lasso回归
reg = linear_model.MultiTaskElasticNetCV(0.1) # 多任务弹性网络回归


reg.fit(x_train, y_train)

print(reg.coef_, reg.intercept_)

y_pred = reg.predict(x_test)

# 平均绝对误差
print(mean_absolute_error(y_test, y_pred))

# 均方误差
print(mean_squared_error(y_test, y_pred))

# R2评分
コード例 #9
0
ファイル: mylinear.py プロジェクト: austinkk/competition_lib
            return False
        self.model = linear_model.LassoCV().fit(self.tr_data, self.tr_label)
        return True
    
    def train_with_RidgeCV(self):
        if self.tr_data == None or self.tr_label = None:
            print ("lack of train data or train label")
            return False
        self.model = linear_model.RidgeClassifierCV().fit(self.tr_data, self.tr_label)
        return True

    def train_with_ElasticNetCV(self):
        if self.tr_data == None or self.tr_label = None:
            print ("lack of train data or train label")
            return False
        self.model = linear_model.MultiTaskElasticNetCV().fit(self.tr_data, self.tr_label)
        return True
        
    def set_default_params(self):
        self.params = {
            'penalty': 'l2',
            'C': 1.0,
            'solver':'lbfgs'
        }
    
    def find_best_params(self, cv = 5):
        C = [0.1, 0.2, 0.5, 0.8, 1.5, 3, 5]
        fit_intercept = [True, False]
        penalty = ['l1', 'l2']
        solver = ['newton-cg','lbfgs','liblinear','sag','saga']
        param_grid = dict(C = C, fit_intercept = fit_intercept, penalty = penalty, solver = solver)
コード例 #10
0
ファイル: test_model.py プロジェクト: yl3506/iMVPD_denoise
# linear regression step
# split datasets into training and testing sets
a_train = a[:img_a_data_shape[3] - 50, :]
a_test = a[img_a_data_shape[3] - 50:, :] 
b_train = b[:img_b_data_shape[3] - 50, :]
b_test = b[img_b_data_shape[3] - 50:, :]
# check if the split is in correct size
print(a_train.shape)
print(a_test.shape)
# initialize linear regression model
print("try linear regression model")
regr = linear_model.LinearRegression() # with default settings
# fit in training sets
regr.fit(a_train, b_train)
# get coefficients of the training resutl
print(regr.coef_)
# testing
predict_lin = regr.predict(a_test)
# check with the answer and calculate error (squared)
err_lin = predict_lin - b_test
print('squared error: %f' % np.sum(err_lin * err_lin))
print('b_test * b_test: %f' % np.sum(b_test * b_test))

# now try regularization model
clf = linear_model.MultiTaskElasticNetCV()
clf.fit(a_train, b_train)
predict_clf = clf.predict(a_test)
err_clf = b_test - predict_clf
print("squared error: %f" % np.sum(err_clf * err_clf))
print("b_test * b_test: %f" np.sum(b_test * b_test))
コード例 #11
0
import pandas as pd
import sklearn.linear_model as linear_model
from src.misc.evaluation import mape
import numpy as np

x_train = pd.read_csv('train_X.csv', index_col=0)
x_test = pd.read_csv('test_X.csv', index_col=0)
y_train = pd.read_csv('train_Y.csv', index_col=0)
y_test = pd.read_csv('test_Y.csv', index_col=0)
regr_multi_svr = linear_model.MultiTaskElasticNetCV()
regr_multi_svr.fit(x_train, y_train)
test_predict = regr_multi_svr.predict(x_test)
mymape = mape(test_predict, y_test)
print(np.mean(np.array(mymape)))
コード例 #12
0
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
import sklearn.linear_model as linear_model

import pandas as pd
import numpy as np

if __name__ == '__main__':

    ##load data
    x_train = pd.read_csv('train_X.csv', index_col =0)
    x_test = pd.read_csv('test_X.csv', index_col =0)
    y_train = pd.read_csv('train_Y.csv', index_col =0)
    y_test = pd.read_csv('test_Y.csv', index_col =0)

    pipe_svr = Pipeline([ ('reg', linear_model.MultiTaskElasticNetCV())])

    print(pipe_svr.get_params().keys())

    grid_param_svr = {
        "reg__alphas": np.arange(0.0, 2, 0.1),
        'reg__l1_ratio': np.arange(0, 1, 0.01),
    }

    gs_svr = (GridSearchCV(estimator=pipe_svr,
                          param_grid=grid_param_svr,
                          cv=2,
                          scoring = 'neg_mean_absolute_error',
                          n_jobs = 8))

    gs_svr = gs_svr.fit(x_train,y_train)
コード例 #13
0
ファイル: regressions.py プロジェクト: kordk/stoch_epi_lib
    LassoSep = lm.LassoCV(max_iter=3000)
    LassoSep.fit(trainDataIn, trainDataOut_byGene[i])
    LassoSepParams[i] = LassoSep.alpha_
    #using only connected sites
    if len(network_bygene[TranscOrder[i]]):
        trainIntmp = trainDataIn[:, network_bygene[TranscOrder[i]]]
        LassoNetSep = lm.LassoCV(max_iter=3000)
        LassoNetSep.fit(trainIntmp, trainDataOut_byGene[i])
        NetLassoSepParams[i] = LassoNetSep.alpha_

    print("Fit LASSO alpha, gene" + str(i))

parameters["LASSOalpha"] = LassoSepParams
parameters["NetLASSOalpha"] = NetLassoSepParams

ElasticNet = lm.MultiTaskElasticNetCV(max_iter=3000)
ElasticNet.fit(trainDataIn, trainDataOut)
parameters["MTENalpha"] = ElasticNet.alpha_
parameters["MTENl1R"] = ElasticNet.l1_ratio_

print("Fit MT EN alpha & l1 Ratio")

ENSep_Alaphas = {}
ENSep_l1Rs = {}
NetENSep_Alaphas = {}
NetENSep_l1Rs = {}

for i in range(len(trainDataOut_byGene)):
    ENSep = lm.ElasticNetCV(max_iter=3000)
    ENSep.fit(trainDataIn, trainDataOut_byGene[i])
    ENSep_Alaphas[i] = ENSep.alpha_
コード例 #14
0
def predict_atlas(fpaths_refspace_train,
                  fpaths_secspace_train,
                  fpaths_refspace_predict,
                  outlier_removal_ref=None,
                  outlier_removal_sec=None,
                  outlier_removal_cov=None,
                  covariates_to_use=None,
                  regressor='MO-SVR',
                  n_jobs=1,
                  save_predictions=False,
                  save_pipeline=False,
                  verbose=False,
                  outlier_options_ref={},
                  outlier_options_sec={},
                  outlier_options_cov={},
                  regressor_options={'kernel': 'rbf'},
                  pipeline_options={
                      'zscore_X': False,
                      'zscore_y': False,
                      'pca_X': False,
                      'pca_y': False,
                      'rezscore_X': False,
                      'rezscore_y': False,
                      'subselect_X': None,
                      'subselect_y': None,
                      'add_covariates': None
                  }):
    """Predict a secondary channel feature space by fitting an atlas regression
    model on paired "secondary channel - reference channel" training data and
    then performing regression on "reference channel"-only test data.

    Input data is retrieved from files specified in lists of file paths and the
    predicted output data is written to the corresponding paths, appropriately
    named and tagged as 'PREDICTED'.

    The channel names for the predicted channels are added to the metadata
    channels index (also tagged as 'PREDICTED') and the full atlas regression
    objects are also added to the metadata.

    Parameters
    ----------
    fpaths_refspace_train : single string or list of strings
        A path or list of paths (either local from cwd or global) to npy files
        containing training feature space data for the reference channel used
        as the basis of prediction (usually the shape space).
    fpaths_secspace_train : single string or list of strings
        A path or list of paths (either local from cwd or global) to npy files
        containing training feature space data for the secondary channel that
        is to be the target of the regression.
    fpaths_refspace_predict : single string or list of strings
        A path or list of paths (either local from cwd or global) to npy files
        containing prediction feature space data for the reference channel
        based on which the target secondary channel will be predicted
    outlier_removal_ref : string or None, optional, default None
        If None, no outlier removal is done on the reference feature space.
        Otherwise this must be a string denoting the method for outlier removal
        (one of `absolute_thresh`, `percentile_thresh`,
        `merged_percentile_thresh` or `isolation_forest`). Note that outlier
        removal is only done on training data, not on prediction data.
        See katachi.utilities.outlier_removal.RemoveOutliers for more info.
    outlier_removal_sec : string or None, optional, default None 
        If None, no outlier removal is done on the target feature space.
        Otherwise this must be a string denoting the method for outlier removal
        (see outlier_removal_ref above).
    outlier_removal_cov : string or None, optional, default None
        If None, no outlier removal is done based on covariate information.
        Otherwise this must be a string denoting the method for outlier removal
        (see outlier_removal_ref above).
    covariates_to_use : string, list of strings or None, optional, default None
        A string denoting the selection tree to select a covariate to be used
        for outlier detection from the HierarchicalData covariate object. Can
        also be a list of multiple such strings, in which case the covariates
        are merged into an fspace. The specified covariates must each be single
        numeric columns.
    regressor : string or sklearn regressor instance, optional, default 'MO-SVR'
        If a string, must be one of 'MO-SVR', 'MT-ENetCV', 'MT-Lasso', 'MLP'. 
        In the first case a multioutput SVR is used for regression, in the 
        second a Multi-Task Elastic Net with Cross Validation, in the third a 
        Multi-Task Lasso linear regression, and in the fourth a Multi-Layer 
        Perceptron. If an sklearn(-like) regressor instance is passed, it 
        must be a multivariate-multivariable regressor that supports the fit 
        and predict methods.
    n_jobs : int, optional, default 1
        Number of processes available for use during multi-processed model
        fitting and prediction. Works for 'MO-SVR', 'MT-ENetCV' and 'MT-Lasso' 
        regressors.
        WARNING: The 'MLP' regressor also performs multi-processing but does
        not seem to support an n_jobs argument.
    save_predictions : bool, optional, default False
        If True, the predictions are saved in the corresponding paths and the
        metadata is updated.
    save_pipeline : bool, optional, default False
        If True, the atlas pipeline object is saved in the corresponding paths
        as a separate file with the name `<prim_ID>_atlas_pipeline.pkl`.
    verbose : bool, optional, default False
        If True, more information is printed.
    outlier_options_ref : dict, optional, default {}
        kwarg dictionary for the chosen outlier removal method to be applied
        to the reference feature space.
        See katachi.utilities.outlier_removal.RemoveOutliers for more info.
    outlier_options_sec : dict, optional, default {}
        kwarg dictionary for the chosen outlier removal method to be applied
        to the target feature space.
        See katachi.utilities.outlier_removal.RemoveOutliers for more info.
    outlier_options_cov : dict, optional, default {}
        kwarg dictionary for the chosen outlier removal method to be applied
        to the covariates. There default is to fall back to the defaults of
        katachi.utilities.outlier_removal.RemoveOutliers.
    regressor_options : dict, optional, default is a standard RBF MO-SVR
        kwarg dictionary for the chosen regressor's instantiation.
        See the chosen regressor's doc string for more information.
    pipeline_options : dict, optional, default is no additional processing
        kwarg dictionary for AtlasPipeline instantiation.
        See the AtlasPipeline doc string for more information.

    Returns
    -------
    secspace_predict : array of shape (n_predict_samples, n_secspace_features)
        Predicted secondary channel feature space.
    refspace_predict_idx : array of shape (n_predict_samples)
        Index array mapping rows (cells) of secspace_predict to paths (prims)
        in fpaths_refspace_predict.
    atlas_pipeline : predict_atlas.AtlasPipeline instance
        Fitted instance of the regressor pipeline.
    """

    #--------------------------------------------------------------------------

    ### Load data

    if verbose: print "\n# Loading data..."

    # Handle cases of single paths for training data
    if type(fpaths_secspace_train) == str and type(
            fpaths_refspace_train) == str:
        fpaths_secspace_train = [fpaths_secspace_train]
        fpaths_refspace_train = [fpaths_refspace_train]
    elif (type(fpaths_secspace_train) == str
          or type(fpaths_refspace_train) == str
          or len(fpaths_secspace_train) != len(fpaths_refspace_train)):
        raise IOError("Different number of secondary and reference space " +
                      "input file paths specified.")

    # Handle cases of single paths for prediction data
    if type(fpaths_refspace_predict) == str:
        fpaths_refspace_predict = [fpaths_refspace_predict]

    # Load training data
    secspace_train = []
    refspace_train = []
    for secpath, refpath in zip(fpaths_secspace_train, fpaths_refspace_train):
        secspace_train.append(np.load(secpath))
        refspace_train.append(np.load(refpath))
    secspace_train = np.concatenate(secspace_train, axis=0)
    refspace_train = np.concatenate(refspace_train, axis=0)

    # Check that everything is fine
    if not secspace_train.shape[0] == refspace_train.shape[0]:
        raise IOError("Secondary and reference space do not have the same " +
                      "number of cells.")

    # Load prediction data
    refspace_predict = []
    refspace_predict_idx = []
    for idx, refpath in enumerate(fpaths_refspace_predict):
        refspace_predict.append(np.load(refpath))
        refspace_predict_idx.append(
            [idx for v in range(refspace_predict[-1].shape[0])])
    refspace_predict = np.concatenate(refspace_predict, axis=0)
    refspace_predict_idx = np.concatenate(refspace_predict_idx, axis=0)

    # Check that everything is fine
    if not refspace_train.shape[1] == refspace_predict.shape[1]:
        raise IOError("Reference feature spaces for training and prediction " +
                      "do not have the same number of features!")

    # Handle covariate loading
    if outlier_removal_cov is not None:

        # Sanity checks
        if covariates_to_use is None:
            raise IOError(
                "When outlier_removal_cov is not None, covariates " +
                "to use for determining outliers must be specified " +
                "in covariates_to_use!")

        # Handle single covariates
        if type(covariates_to_use) == str:
            covariates_to_use = [covariates_to_use]

        # Load covariates
        covars = []
        for refpath in fpaths_refspace_train:

            # Create covarpath
            revdir, reffile = os.path.split(refpath)
            covpath = os.path.join(revdir, reffile[:10] + '_covariates.pkl')

            # Load covar file
            with open(covpath, 'rb') as covfile:
                covtree = pickle.load(covfile)

            # Get relevant covariates
            covs2use = []
            for c2u in covariates_to_use:
                covs2use.append(np.expand_dims(covtree._gad(c2u), -1))
            covs2use = np.concatenate(covs2use, axis=1)

            # Add to other samples
            covars.append(covs2use)

        # Concatenate
        covars = np.concatenate(covars)

    #--------------------------------------------------------------------------

    ### Prepare regressor

    # Report
    if verbose: print "\n# Preparing regressor..."

    # Multi-Output Support Vector Regression with RBF Kernel
    if regressor == 'MO-SVR':
        svr = svm.SVR(**regressor_options)
        regressor = multioutput.MultiOutputRegressor(svr, n_jobs=n_jobs)

    # Multi-task Elastic Net Regression with Cross Validation
    elif regressor == 'MT-ENetCV':
        regressor = linear_model.MultiTaskElasticNetCV(random_state=42,
                                                       n_jobs=n_jobs)

    # Multivariate-Multivariable Linear Regression by Multi-Task Lasso
    elif regressor == 'MT-Lasso':
        regressor = linear_model.MultiTaskLassoCV(random_state=42,
                                                  n_jobs=n_jobs,
                                                  **regressor_options)

    # Multi-Layer Perceptron Regressor
    elif regressor == 'MLP':
        regressor = neural_network.MLPRegressor(random_state=42,
                                                **regressor_options)

    # Other regressor strings
    elif type(regressor) == str:
        raise ValueError('Regressor not recognized.')

    # Regressor object given as argument
    else:

        # Check if object has fit method
        fit_attr = getattr(regressor, "fit", False)
        if not callable(fit_attr):
            raise ValueError("Regressor object has no 'fit' method.")

        # Check if object has predict method
        predict_attr = getattr(regressor, "predict", False)
        if not callable(predict_attr):
            raise ValueError("Regressor object has no 'predict' method.")

    #--------------------------------------------------------------------------

    ### Remove outliers from training data

    # Find and remove outliers based on covariate values
    if outlier_removal_cov is not None:

        # Report
        if verbose:
            print "\n# Removing outliers based on covariates..."
            print "Started with %i," % refspace_train.shape[0],

        # Find and remove outliers
        orem_cov = RemoveOutliers(outlier_removal_cov, **outlier_options_cov)
        orem_cov.fit(covars)
        covars, (refspace_train, secspace_train) = orem_cov.transform(
            covars, [refspace_train, secspace_train])

        # Report
        if verbose:
            print "removed %i, kept %i samples" % (orem_cov.X_removed_,
                                                   refspace_train.shape[0])

    # Find and remove outliers based on reference space
    if outlier_removal_ref is not None:

        # Report
        if verbose:
            print "\n# Removing reference outliers..."
            print "Started with %i," % refspace_train.shape[0],

        # Find and remove outliers
        orem_ref = RemoveOutliers(outlier_removal_ref, **outlier_options_ref)
        orem_ref.fit(refspace_train)
        refspace_train, secspace_train = orem_ref.transform(
            refspace_train, secspace_train)

        # Report
        if verbose:
            print "removed %i, kept %i samples" % (orem_ref.X_removed_,
                                                   refspace_train.shape[0])

    # Find and remove outliers based on secondary space
    if outlier_removal_sec is not None:

        # Report
        if verbose:
            print "\n# Removing target outliers..."
            print "Started with %i," % refspace_train.shape[0],

        # Find and remove outliers
        orem_sec = RemoveOutliers(outlier_removal_sec, **outlier_options_sec)
        orem_sec.fit(secspace_train)
        secspace_train, refspace_train = orem_sec.transform(
            secspace_train, refspace_train)

        # Report
        if verbose:
            print "removed %i, kept %i samples" % (orem_sec.X_removed_,
                                                   refspace_train.shape[0])

    #--------------------------------------------------------------------------

    ### Fit and predict

    # Construct pipeline
    atlas_pipeline = AtlasPipeline(regressor,
                                   verbose=verbose,
                                   **pipeline_options)

    # Fit
    if verbose: print "\n# Fitting..."
    atlas_pipeline.fit(refspace_train, secspace_train)

    # Predict
    if verbose: print "\n# Predicting..."
    secspace_predict = atlas_pipeline.predict(refspace_predict)

    #--------------------------------------------------------------------------

    ### Update the metadata

    if save_predictions:

        if verbose: print "\n# Saving metadata..."

        # For each path...
        for idx, refpath in enumerate(fpaths_refspace_predict):

            # Load metadata file
            refdir, reffname = os.path.split(refpath)
            prim_ID = reffname[:10]
            metapath = os.path.join(refdir, prim_ID + "_stack_metadata.pkl")
            with open(metapath, "rb") as metafile:
                metadict = pickle.load(metafile)

            # Construct channel designation
            pattern = re.compile("8bit_(.+?(?=_))")
            secpath = fpaths_secspace_train[0]
            channel = re.search(pattern, secpath).group(1) + "_PREDICTED"

            # Add channel to metadata
            if not channel in metadict["channels"]:
                metadict["channels"].append(channel)

            # Save metadata
            with open(metapath, "wb") as outfile:
                pickle.dump(metadict,
                            outfile,
                            protocol=pickle.HIGHEST_PROTOCOL)

    #--------------------------------------------------------------------------

    ### Save fitted atlas pipeline as separate metadata file

    if save_pipeline:

        if verbose: print "\n# Saving pipeline..."

        # For each path...
        for idx, refpath in enumerate(fpaths_refspace_predict):

            # Load atlas metadata file if it exists
            refdir, reffname = os.path.split(refpath)
            prim_ID = reffname[:10]
            atlaspath = os.path.join(refdir, prim_ID + "_atlas_pipeline.pkl")
            if os.path.isfile(atlaspath):
                with open(atlaspath, "rb") as atlasfile:
                    atlasdict = pickle.load(atlasfile)
            else:
                atlasdict = {}

            # Construct designation
            pattern = re.compile("8bit_(.+?(?=\.))")
            secpath = fpaths_secspace_train[0]
            atlasname = re.search(pattern, secpath).group(1) + "_ATLASPIP"

            # Add pipeline to dict
            atlasdict[atlasname] = atlas_pipeline

            # Save atlas dict
            with open(atlaspath, "wb") as outfile:
                pickle.dump(atlasdict,
                            outfile,
                            protocol=pickle.HIGHEST_PROTOCOL)

    #--------------------------------------------------------------------------

    ### Save the predictions

    if save_predictions:

        if verbose: print "\n# Saving predictions..."

        # For each path...
        for idx, refpath in enumerate(fpaths_refspace_predict):

            # Construct outpath
            to_replace = refpath[refpath.index("8bit_") + 5:]
            secpath = fpaths_secspace_train[0]
            replace_by = secpath[secpath.index("8bit_") + 5:]
            replace_by = replace_by[:-4] + "_PREDICTED.npy"
            outpath = refpath.replace(to_replace, replace_by)

            # Write file
            np.save(outpath, secspace_predict[refspace_predict_idx == idx])

    #--------------------------------------------------------------------------

    ### Return results

    # Report
    if verbose: print "\nDone!"

    # Return
    return secspace_predict, refspace_predict_idx, atlas_pipeline
コード例 #15
0
ファイル: edmd.py プロジェクト: xileyicheng/keedmd
    def fit(self, X, X_d, Z, Z_dot, U=None, U_nom=None, X_dot=None):
        """
        Fit a EDMD object with the given basis function

        Sizes:
        - Ntraj: number of trajectories
        - N: number of timesteps
        - ns: number or original states
        - nu: number of control inputs

        Inputs:
        - X: state with all trajectories, numpy 3d array [NtrajxN, ns]
        - X_d: desired state with all trajectories, numpy 3d array [NtrajxN, ns]
        - Z: lifted state with all trajectories, numpy[NtrajxN, ns]
        - Z: derivative of lifted state with all trajectories, numpy[NtrajxN, ns]
        - U: control input, numpy 3d array [NtrajxN, nu]
        - U_nom: nominal control input, numpy 3d array [NtrajxN, nu]
        - t: time, numpy 2d array [Ntraj, N]
        """

        if self.l1 == 0.:
            # Construct EDMD matrices as described in M. Korda, I. Mezic, "Linear predictors for nonlinear dynamical systems: Koopman operator meets model predictive control":
            W = concatenate((Z_dot, X), axis=0)
            if U is None and U_nom is None:
                V = Z
            else:
                V = concatenate((Z, U), axis=0)
            VVt = dot(V,V.transpose())
            WVt = dot(W,V.transpose())
            M = dot(WVt, linalg.pinv(VVt))
            self.A = M[:self.n_lift,:self.n_lift]
            if U is None and U_nom is None:
                self.B = None
            else:
                self.B = M[:self.n_lift,self.n_lift:]
            self.C = M[self.n_lift:,:self.n_lift]

            if self.override_C:
                self.C = zeros(self.C.shape)
                self.C[:self.n,:self.n] = eye(self.n)
                self.C = multiply(self.C, self.Z_std.transpose())

        else:
            # Construct EDMD matrices using Elastic Net L1 and L2 regularization
            if U is None and U_nom is None:
                input = Z.transpose()
            else:
                input = concatenate((Z.transpose(), U.transpose()), axis=1)
            output = Z_dot.transpose()

            CV = False
            if CV:
                reg_model = linear_model.MultiTaskElasticNetCV(alphas=None, copy_X=True, cv=5, eps=0.001, fit_intercept=True,
                                        l1_ratio=self.l1_ratio, max_iter=1e6, n_alphas=100, n_jobs=None,
                                        normalize=False, positive=False, precompute='auto', random_state=0,
                                        selection='random', tol=0.0001, verbose=0)
            else:
                reg_model = linear_model.ElasticNet(alpha=self.l1, l1_ratio=self.l1_ratio, fit_intercept=False, normalize=False, selection='random', max_iter=1e5)
            reg_model.fit(input,output)

            self.A = reg_model.coef_[:self.n_lift,:self.n_lift]
            if not (U is None and U_nom is None):
                self.B = reg_model.coef_[:self.n_lift, self.n_lift:]
            if self.override_C:
                self.C = zeros((self.n,self.n_lift))
                self.C[:self.n,:self.n] = eye(self.n)
                self.C = multiply(self.C, self.Z_std.transpose())
            else:
                input = Z.T
                output = X.T
                reg_model_C = linear_model.ElasticNet(alpha=self.l1, l1_ratio=self.l1_ratio, fit_intercept=False,
                                                    normalize=False, selection='random', max_iter=1e5)
                reg_model_C.fit(input, output)
                self.C = reg_model_C.coef_
コード例 #16
0
def get_regression_estimators(r, regression_models):
    if r == 'ARDRegression':
        regression_models[r] = linear_model.ARDRegression()
    elif r == 'BayesianRidge':
        regression_models[r] = linear_model.BayesianRidge()
    elif r == 'ElasticNet':
        regression_models[r] = linear_model.ElasticNet()
    elif r == 'ElasticNetCV':
        regression_models[r] = linear_model.ElasticNetCV()
    elif r == 'HuberRegressor':
        regression_models[r] = linear_model.HuberRegressor()
    elif r == 'Lars':
        regression_models[r] = linear_model.Lars()
    elif r == 'LarsCV':
        regression_models[r] = linear_model.LarsCV()
    elif r == 'Lasso':
        regression_models[r] = linear_model.Lasso()
    elif r == 'LassoCV':
        regression_models[r] = linear_model.LassoCV()
    elif r == 'LassoLars':
        regression_models[r] = linear_model.LassoLars()
    elif r == 'LassoLarsCV':
        regression_models[r] = linear_model.LassoLarsCV()
    elif r == 'LassoLarsIC':
        regression_models[r] = linear_model.LassoLarsIC()
    elif r == 'LinearRegression':
        regression_models[r] = linear_model.LinearRegression()
    elif r == 'LogisticRegression':
        regression_models[r] = linear_model.LogisticRegression()
    elif r == 'LogisticRegressionCV':
        regression_models[r] = linear_model.LogisticRegressionCV()
    elif r == 'MultiTaskElasticNet':
        regression_models[r] = linear_model.MultiTaskElasticNet()
    elif r == 'MultiTaskElasticNetCV':
        regression_models[r] = linear_model.MultiTaskElasticNetCV()
    elif r == 'MultiTaskLasso':
        regression_models[r] = linear_model.MultiTaskLasso()
    elif r == 'MultiTaskLassoCV':
        regression_models[r] = linear_model.MultiTaskLassoCV()
    elif r == 'OrthogonalMatchingPursuit':
        regression_models[r] = linear_model.OrthogonalMatchingPursuit()
    elif r == 'OrthogonalMatchingPursuitCV':
        regression_models[r] = linear_model.OrthogonalMatchingPursuitCV()
    elif r == 'PassiveAggressiveClassifier':
        regression_models[r] = linear_model.PassiveAggressiveClassifier()
    elif r == 'PassiveAggressiveRegressor':
        regression_models[r] = linear_model.PassiveAggressiveRegressor()
    elif r == 'Perceptron':
        regression_models[r] = linear_model.Perceptron()
    elif r == 'RANSACRegressor':
        regression_models[r] = linear_model.RANSACRegressor()
    elif r == 'Ridge':
        regression_models[r] = linear_model.Ridge()
    elif r == 'RidgeClassifier':
        regression_models[r] = linear_model.RidgeClassifier()
    elif r == 'RidgeClassifierCV':
        regression_models[r] = linear_model.RidgeClassifierCV()
    elif r == 'RidgeCV':
        regression_models[r] = linear_model.RidgeCV()
    elif r == 'SGDClassifier':
        regression_models[r] = linear_model.SGDClassifier()
    elif r == 'SGDRegressor':
        regression_models[r] = linear_model.SGDRegressor()
    elif r == 'TheilSenRegressor':
        regression_models[r] = linear_model.TheilSenRegressor()
    else:
        print(
            r +
            " is an unsupported regression type. Check if you have misspelled the name."
        )
コード例 #17
0
ファイル: keedmd.py プロジェクト: PastorD/ensemblempc
    def tune_fit(self, X, X_d, Z, Z_dot, U, U_nom, l1_ratio=array([1])):

        reg_model_cv = linear_model.MultiTaskElasticNetCV(l1_ratio=l1_ratio,
                                                          fit_intercept=False,
                                                          normalize=False,
                                                          cv=5,
                                                          n_jobs=-1,
                                                          selection='random',
                                                          max_iter=1e5)

        # Solve least squares problem to find A and B for velocity terms:
        if self.episodic:
            input_vel = concatenate((Z, U - U_nom), axis=0).T
        else:
            input_vel = concatenate((Z, U), axis=0).T
        output_vel = Z_dot[int(self.n / 2):self.n, :].T

        reg_model_cv.fit(input_vel, output_vel)

        sol_vel = atleast_2d(reg_model_cv.coef_)
        A_vel = sol_vel[:, :self.n_lift]
        B_vel = sol_vel[:, self.n_lift:]
        self.l1_vel = reg_model_cv.alpha_
        self.l1_ratio_vel = reg_model_cv.l1_ratio_

        # Construct A matrix
        self.A = zeros((self.n_lift, self.n_lift))
        self.A[:int(self.n / 2), int(self.n / 2):self.n] = eye(
            int(self.n / 2))  # Known kinematics
        self.A[int(self.n / 2):self.n, :] = A_vel
        self.A[self.n:, self.n:] = diag(self.basis.Lambda)

        # Solve least squares problem to find B for position terms:
        if self.episodic:
            input_pos = (U - U_nom).T
        else:
            input_pos = U.T
        output_pos = (Z_dot[:int(self.n / 2), :] -
                      dot(self.A[:int(self.n / 2), :], Z)).T
        reg_model_cv.fit(input_pos, output_pos)
        B_pos = atleast_2d(reg_model_cv.coef_)
        self.l1_pos = reg_model_cv.alpha_
        self.l1_ratio_pos = reg_model_cv.l1_ratio_

        # Solve least squares problem to find B for eigenfunction terms:
        input_eig = (U - U_nom).T
        output_eig = (Z_dot[self.n:, :] - dot(self.A[self.n:, :], Z)).T
        reg_model_cv.fit(input_eig, output_eig)
        B_eig = atleast_2d(reg_model_cv.coef_)
        self.l1_eig = reg_model_cv.alpha_
        self.l1_ratio_eig = reg_model_cv.l1_ratio_

        # Construct B matrix:
        self.B = concatenate((B_pos, B_vel, B_eig), axis=0)

        if self.override_C:
            self.C = zeros((self.n, self.n_lift))
            self.C[:self.n, :self.n] = eye(self.n)
            self.C = multiply(self.C, self.Z_std.transpose())
        else:
            raise Exception(
                'Warning: Learning of C not implemented for structured regression.'
            )

        if not self.episodic:
            if self.K_p is None or self.K_p is None:
                raise Exception('Nominal controller gains not defined.')
            self.A[self.n:, :self.n] -= dot(
                self.B[self.n:, :], concatenate((self.K_p, self.K_d), axis=1))

        print('KEEDMD l1 (pos, vel, eig): ', self.l1_pos, self.l1_vel,
              self.l1_eig)
        print('KEEDMD l1 ratio (pos, vel, eig): ', self.l1_ratio_pos,
              self.l1_ratio_vel, self.l1_ratio_eig)
コード例 #18
0
def run_simple_model(train_x, train_y, dev_x, dev_y, test_x, test_y, model_type, out_dir=None, class_weight=None):
    from sklearn import datasets, neighbors, linear_model, svm

    totalTime = 0

    startTrainTime = time()
    logger.info("Start training...")
    if model_type == 'ARDRegression':
        model = linear_model.ARDRegression().fit(train_x, train_y)
    elif model_type == 'BayesianRidge':
        model = linear_model.BayesianRidge().fit(train_x, train_y)
    elif model_type == 'ElasticNet':
        model = linear_model.ElasticNet().fit(train_x, train_y)
    elif model_type == 'ElasticNetCV':
        model = linear_model.ElasticNetCV().fit(train_x, train_y)
    elif model_type == 'HuberRegressor':
        model = linear_model.HuberRegressor().fit(train_x, train_y)
    elif model_type == 'Lars':
        model = linear_model.Lars().fit(train_x, train_y)
    elif model_type == 'LarsCV':
        model = linear_model.LarsCV().fit(train_x, train_y)
    elif model_type == 'Lasso':
        model = linear_model.Lasso().fit(train_x, train_y)
    elif model_type == 'LassoCV':
        model = linear_model.LassoCV().fit(train_x, train_y)
    elif model_type == 'LassoLars':
        model = linear_model.LassoLars().fit(train_x, train_y)
    elif model_type == 'LassoLarsCV':
        model = linear_model.LassoLarsCV().fit(train_x, train_y)
    elif model_type == 'LassoLarsIC':
        model = linear_model.LassoLarsIC().fit(train_x, train_y)
    elif model_type == 'LinearRegression':
        model = linear_model.LinearRegression().fit(train_x, train_y)
    elif model_type == 'LogisticRegression':
        model = linear_model.LogisticRegression(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'LogisticRegressionCV':
        model = linear_model.LogisticRegressionCV(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'MultiTaskLasso':
        model = linear_model.MultiTaskLasso().fit(train_x, train_y)
    elif model_type == 'MultiTaskElasticNet':
        model = linear_model.MultiTaskElasticNet().fit(train_x, train_y)
    elif model_type == 'MultiTaskLassoCV':
        model = linear_model.MultiTaskLassoCV().fit(train_x, train_y)
    elif model_type == 'MultiTaskElasticNetCV':
        model = linear_model.MultiTaskElasticNetCV().fit(train_x, train_y)
    elif model_type == 'OrthogonalMatchingPursuit':
        model = linear_model.OrthogonalMatchingPursuit().fit(train_x, train_y)
    elif model_type == 'OrthogonalMatchingPursuitCV':
        model = linear_model.OrthogonalMatchingPursuitCV().fit(train_x, train_y)
    elif model_type == 'PassiveAggressiveClassifier':
        model = linear_model.PassiveAggressiveClassifier(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'PassiveAggressiveRegressor':
        model = linear_model.PassiveAggressiveRegressor().fit(train_x, train_y)
    elif model_type == 'Perceptron':
        model = linear_model.Perceptron(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'RandomizedLasso':
        model = linear_model.RandomizedLasso().fit(train_x, train_y)
    elif model_type == 'RandomizedLogisticRegression':
        model = linear_model.RandomizedLogisticRegression().fit(train_x, train_y)
    elif model_type == 'RANSACRegressor':
        model = linear_model.RANSACRegressor().fit(train_x, train_y)
    elif model_type == 'Ridge':
        model = linear_model.Ridge().fit(train_x, train_y)
    elif model_type == 'RidgeClassifier':
        model = linear_model.RidgeClassifier(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'RidgeClassifierCV':
        model = linear_model.RidgeClassifierCV(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'RidgeCV':
        model = linear_model.RidgeCV().fit(train_x, train_y)
    elif model_type == 'SGDClassifier':
        model = linear_model.SGDClassifier(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'SGDRegressor':
        model = linear_model.SGDRegressor().fit(train_x, train_y)
    elif model_type == 'TheilSenRegressor':
        model = linear_model.TheilSenRegressor().fit(train_x, train_y)
    elif model_type == 'lars_path':
        model = linear_model.lars_path().fit(train_x, train_y)
    elif model_type == 'lasso_path':
        model = linear_model.lasso_path().fit(train_x, train_y)
    elif model_type == 'lasso_stability_path':
        model = linear_model.lasso_stability_path().fit(train_x, train_y)
    elif model_type == 'logistic_regression_path':
        model = linear_model.logistic_regression_path(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'orthogonal_mp':
        model = linear_model.orthogonal_mp().fit(train_x, train_y)
    elif model_type == 'orthogonal_mp_gram':
        model = linear_model.orthogonal_mp_gram().fit(train_x, train_y)
    elif model_type == 'LinearSVC':
        model = svm.LinearSVC(class_weight=class_weight).fit(train_x, train_y)
    elif model_type == 'SVC':
        model = svm.SVC(class_weight=class_weight, degree=3).fit(train_x, train_y)
    else:
        raise NotImplementedError('Model not implemented')

        
    logger.info("Finished training.")
    endTrainTime = time()
    trainTime = endTrainTime - startTrainTime
    logger.info("Training time : %d seconds" % trainTime)


    logger.info("Start predicting train set...")
    train_pred_y = model.predict(train_x)
    logger.info("Finished predicting train set.")
    logger.info("Start predicting test set...")
    test_pred_y = model.predict(test_x)
    logger.info("Finished predicting test set.")
    endTestTime = time()
    testTime = endTestTime - endTrainTime
    logger.info("Testing time : %d seconds" % testTime)
    totalTime += trainTime + testTime

    train_pred_y = np.round(train_pred_y)
    test_pred_y = np.round(test_pred_y)

    np.savetxt(out_dir + '/preds/best_test_pred' + '.txt', test_pred_y, fmt='%i')

    logger.info('[TRAIN] Acc: %.3f' % (accuracy_score(train_y, train_pred_y)))
    logger.info('[TEST]  Acc: %.3f' % (accuracy_score(test_y, test_pred_y)))

    return accuracy_score(test_y, test_pred_y)