コード例 #1
0
    def __init__(self):
        # 알고리즘 이름
        self._name = 'larscv'

        # 기본 경로
        self._f_path = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir))

        # 경고 메시지 삭제
        warnings.filterwarnings('ignore')

        # 원본 데이터 로드
        data = pd.read_csv(self._f_path + "/regression/resource/regression_sample.csv", sep=",", encoding="utf-8")

        # 학습 및 테스트 데이터 분리
        self._x = (data["year"] <= 2017)
        self._y = (data["year"] >= 2018)

        # 학습 데이터 분리
        self._x_train, self._y_train = self.preprocessing(data[self._x])
        # 테스트 데이터 분리
        self._x_test, self._y_test = self.preprocessing(data[self._y])

        # 모델 선언
        self._model = LarsCV(normalize=False)

        # 모델 학습
        self._model.fit(self._x_train, self._y_train)
コード例 #2
0
def larscv():
    X, y = make_regression(n_samples=200,n_features=10, noise=4.0, random_state=0)
    reg = LarsCV(cv=2).fit(X, y)
    print(reg.score(X, y) )
    print(X[:,0].shape,y.shape)
    plt.plot(X[:,0], y)
    plt.scatter(X[:,0], y)
    plt.show()
コード例 #3
0
 def Lars_regression(self, X_train, y_train, X_test, y_test):
     
     my_cv = RepeatedKFold(n_splits=10, n_repeats=10, random_state=42)
     best_model = LarsCV(cv=my_cv, n_jobs=-1)
     best_model.fit(X_train, y_train)
     y_pred = best_model.predict(X_test)
     mae = mean_absolute_error(y_test, y_pred)
     mse = mean_squared_error(y_test, y_pred)
     r2 = r2_score(y_test, y_pred)
     
     return best_model, mse, mae, r2
コード例 #4
0
ファイル: lars_cv.py プロジェクト: vickyvishal/lale
class _LarsCVImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)
コード例 #5
0
ファイル: Train_Models.py プロジェクト: ydk818/ABCD_ML
def train_regression_model(X, y, model_type='elastic cv', cv=3, extra_params={}):
    '''Wrapper function to train various regression models with X,y input,
       where extra params can be passed to override any default parameters''' 

    model_type = model_type.lower()

    if model_type == 'linear':
        model = LinearRegression(fit_intercept=True)
    
    elif model_type == 'elastic cv':
        model = ElasticNetCV(cv=cv)
    
    elif model_type == 'omp cv':
        model = OrthogonalMatchingPursuitCV(cv=cv)
    
    elif model_type == 'lars cv':
        model = LarsCV(cv=cv)
    
    elif model_type == 'ridge cv':
        model = RidgeCV(cv=cv)
    
    elif model_type == 'full lightgbm':
        model = Train_Light_GBM(X, y, int_cv=cv, regression=True, **extra_params)
        return model
        
    model.fit(X, y)
    return model
コード例 #6
0
def ResultsLARS(DataSet, Y):
    X_train, X_test, y_train, y_test = train_test_split(DataSet,
                                                        Y,
                                                        train_size=0.75)
    LAR_cv = LarsCV(normalize=True)
    LAR_model = LAR_cv.fit(X_train, y_train)
    LAR_prediction = LAR_model.predict(X_test)
    LAR_mae = np.mean(np.abs(y_test - LAR_prediction))
    LAR_coefs = dict(
        zip(['Intercept'] + DataSet.columns.tolist(),
            np.round(
                np.concatenate((LAR_model.intercept_, LAR_model.coef_),
                               axis=None), 3)))
    print('Least Angle Regression MAE: {}'.format(np.round(LAR_mae, 3)))
    print('Least Angle Regression coefficients:{}'.format(LAR_coefs))
    del LAR_coefs['Intercept']
    DictionaryPlot(LAR_coefs, 'Least Angle Regression')
コード例 #7
0
 def test_model_lars_cv(self):
     model, X = fit_regression_model(LarsCV())
     model_onnx = convert_sklearn(
         model, "lars", [("input", FloatTensorType([None, X.shape[1]]))])
     self.assertIsNotNone(model_onnx)
     dump_data_and_model(X,
                         model,
                         model_onnx,
                         basename="SklearnLarsCV-Dec4")
def get_model_by_name(model_name):
    return {
        'Linear Regression': LinearRegression(),
        'Lars CV': LarsCV(cv=10),
        'Lasso CV': LassoCV(cv=10),
        'Ridge CV': RidgeCV(cv=10),
        'Elastic Net CV': ElasticNetCV(cv=10),
        'Orthogonal Matching Pursuit CV': OrthogonalMatchingPursuitCV(cv=10),
        'Decision Tree Regressor': DecisionTreeRegressor(max_depth=3),
    }[model_name]
コード例 #9
0
ファイル: ML.py プロジェクト: sahahn/ABCD_Ev_Search
def train_regression_model(X, y, model_type='elastic', cv=3):

    if model_type == 'linear':
        model = LinearRegression(fit_intercept=True)
    elif model_type == 'elastic cv':
        model = ElasticNetCV(cv=cv)
    elif model_type == 'omp cv':
        model = OrthogonalMatchingPursuitCV(cv=cv)
    elif model_type == 'lars cv':
        model = LarsCV(cv=cv)
    elif model_type == 'ridge cv':
        model = RidgeCV(cv=cv)
    elif model_type == 'simple xgboost':
        model = XGBRegressor()
    elif model_type == 'simple lightgbm':
        model = LGBMRegressor()
    elif model_type == 'full lightgbm':
        model = train_light_gbm_regressor(X, y, cv, n_params=10, test_size=.2)
        return model

    model.fit(X, y)
    return model
コード例 #10
0
def fit_linear_model(basis_matrix, train_vals, solver_type, **kwargs):
    solvers = {
        'lasso_lars': LassoLarsCV(cv=kwargs['cv']).fit,
        'lasso': LassoCV(cv=kwargs['cv']).fit,
        'lars': LarsCV(cv=kwargs['cv']).fit,
        'omp': OrthogonalMatchingPursuitCV(cv=kwargs['cv'], verbose=5).fit
    }
    assert train_vals.ndim == 2
    if solver_type in solvers:
        fit = solvers[solver_type]
        res = fit(basis_matrix, train_vals[:, 0])
    else:
        msg = f'Solver type {solver_type} not supported\n'
        msg += 'Supported solvers are:\n'
        for key in solvers.keys():
            msg += f'\t{key}\n'
        raise Exception(msg)

    cv_score = res.score(basis_matrix, train_vals[:, 0])
    coef = res.coef_[:, np.newaxis]
    coef[0] = res.intercept_
    return coef, cv_score
コード例 #11
0
def check_w(w=[12, 24, 36, 48, 60]):
    '''
    robustness check for w_min, save the prediction results (Avew window) and OOS R_square

    Parameters
    ----------
    w: possible w_min  (list)
    '''
    for w_min in w:
        #linear ML prediction
        pre1 = linear_prediction(RidgeCV(), w_min=w_min, window_type="Avew")
        pre2 = linear_prediction(LassoCV(cv=5),
                                 w_min=w_min,
                                 window_type="Avew")
        pre3 = linear_prediction(ElasticNetCV(cv=5),
                                 w_min=w_min,
                                 window_type="Avew")
        pre4 = linear_prediction(LarsCV(cv=5), w_min=w_min, window_type="Avew")
        pre5 = linear_prediction(OrthogonalMatchingPursuitCV(cv=5),
                                 w_min=w_min,
                                 window_type="Avew")
        pre6 = MR(w_min=w_min, window_type="Avew")
        all_pre = pd.DataFrame({
            'Kintchen Sink': pre6,
            "ridge": pre1,
            "lasso": pre2,
            "elasticnet": pre3,
            "lars": pre4,
            "OMP": pre5,
        })
        all_pre['FC'] = all_pre.iloc[:, 1:].mean(axis=1)
        #save the prediction results
        all_pre.to_csv(
            os.path.join(path, "稳健性检验", "w_min", "预测结果",
                         "w_min=" + str(w_min) + ".csv"))
        #R2 test
        R2_test(all_pre, name="w_min=" + str(w_min) +
                ".csv")  #then you need move the result on your own
コード例 #12
0
ファイル: larscv.py プロジェクト: shriy-singh/DataScientist
def _larscv(*,
            train,
            test,
            x_predict=None,
            metrics,
            fit_intercept=True,
            verbose=False,
            max_iter=500,
            normalize=True,
            precompute='auto',
            cv=None,
            max_n_alphas=1000,
            n_jobs=None,
            eps=2.220446049250313e-16,
            copy_X=True):
    """For more info visit : 
        https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LarsCV.html#sklearn.linear_model.LarsCV
    """

    model = LarsCV(fit_intercept=fit_intercept,
                   verbose=verbose,
                   max_iter=max_iter,
                   normalize=normalize,
                   precompute=precompute,
                   cv=cv,
                   max_n_alphas=max_n_alphas,
                   n_jobs=n_jobs,
                   eps=eps,
                   copy_X=copy_X)
    model.fit(train[0], train[1])
    model_name = 'LarsCV'
    y_hat = model.predict(test[0])

    if metrics == 'mse':
        accuracy = _mse(test[1], y_hat)
    if metrics == 'rmse':
        accuracy = _rmse(test[1], y_hat)
    if metrics == 'mae':
        accuracy = _mae(test[1], y_hat)

    if x_predict is None:
        return (model_name, accuracy, None)

    y_predict = model.predict(x_predict)
    return (model_name, accuracy, y_predict)
コード例 #13
0
ファイル: hdmrpy.py プロジェクト: frbennett/rshdmrpy
 def ridge_regression(self, **kwargs):
     if self._regression_type == 'lasso':
         self.ridgereg = LassoCV(max_iter=50000)
         #self.ridgereg = LassoCV(max_iter=1e5, cv=10)
         self.ridgereg.fit(self.data, self.Y)
     elif self._regression_type == 'ard':
         self.ridgereg = ARDRegression()
         self.ridgereg.fit(self.data, self.Y)
     elif self._regression_type == 'elastic':
         self.ridgereg = ElasticNetCV(cv=10)
         self.ridgereg.fit(self.data, self.Y)
     elif self._regression_type == 'lars':
         self.ridgereg = LarsCV(cv=10)
         self.ridgereg.fit(self.data, self.Y)
     elif self._regression_type == 'lassolars':
         self.ridgereg = LassoLarsCV(cv=5)
         self.ridgereg.fit(self.data, self.Y)
     elif self._regression_type == 'ordinary':
         self.ridgereg = LinearRegression()
         self.ridgereg.fit(self.data, self.Y)
     elif self._regression_type == 'ridge':
         self.ridgereg = RidgeCV()
         self.ridgereg.fit(self.data, self.Y)
コード例 #14
0
classifiers = [
    SVC(kernel="rbf", probability=True),
    SVC(kernel='linear', probability=True),
    SVC(kernel='sigmoid', probability=True),
    SVC(kernel='poly', probability=True, degree=3),
    SVC(kernel='poly', probability=True, degree=4),
    SVC(kernel='poly', probability=True, degree=5),
    DecisionTreeClassifier(),
    KNeighborsClassifier(),
    GaussianNB(),
    RandomForestClassifier(),
    AdaBoostClassifier(),
    QuadraticDiscriminantAnalysis(),
    LinearDiscriminantAnalysis(),
    ElasticNetCV(max_iter=10000),
    LarsCV(),
    LassoCV(max_iter=10000),
    LassoLarsCV(),
    LogisticRegressionCV(scoring=multi_class_log_loss),
    MultiTaskElasticNetCV(),
    MultiTaskLassoCV(),
    OrthogonalMatchingPursuitCV(),
    RidgeClassifierCV()
]
algorithm = 17
if len(sys.argv) > 1:
    algorithm = int(sys.argv[1])

name = names[algorithm]
clf = classifiers[algorithm]
output_file_name = output_file_names[algorithm] + file_identifier
コード例 #15
0
 def default_model_create(self, x, y):
     self.model = LarsCV(cv=self.cv)
     return True
コード例 #16
0
ファイル: ks-checkpoint.py プロジェクト: GJBoth/DeePyMoD_SBL
uu = uu[:, :100]
X = np.transpose((t_grid.flatten(), x_grid.flatten()))
y = uu.reshape((uu.size, 1))

noise_level = 0.0
y_noisy = y + noise_level * np.std(y) * np.random.randn(y[:, 0].size, 1)
number_of_samples = 20000

idx = np.random.permutation(y.shape[0])
X_train = torch.tensor(X[idx, :][:number_of_samples],
                       dtype=torch.float32,
                       requires_grad=True)
y_train = torch.tensor(y_noisy[idx, :][:number_of_samples],
                       dtype=torch.float32)

estimator = LarsCV(fit_intercept=False)

config = {
    'n_in': 2,
    'hidden_dims': [20, 20, 20, 20, 20, 20, 20],
    'n_out': 1,
    'library_function': library_1D_in,
    'library_args': {
        'poly_order': 1,
        'diff_order': 4
    },
    'sparsity_estimator': estimator
}
model = DeepModDynamic(**config)
optimizer = torch.optim.Adam(model.network_parameters(),
                             betas=(0.99, 0.99),
コード例 #17
0
    clf = BaseEstimator()
    res = explain_weights(clf, vec=vec)
    assert 'BaseEstimator' in res.error
    for expl in format_as_all(res, clf):
        assert 'Error' in expl
        assert 'BaseEstimator' in expl
    with pytest.raises(TypeError):
        explain_weights(clf, unknown_argument=True)


@pytest.mark.parametrize(['reg'], [
    [ElasticNet(random_state=42)],
    [ElasticNetCV(random_state=42)],
    [HuberRegressor()],
    [Lars()],
    [LarsCV(max_n_alphas=10)],
    [Lasso(random_state=42)],
    [LassoCV(random_state=42)],
    [LassoLars(alpha=0.01)],
    [LassoLarsCV(max_n_alphas=10)],
    [LassoLarsIC()],
    [OrthogonalMatchingPursuit(n_nonzero_coefs=10)],
    [OrthogonalMatchingPursuitCV()],
    [PassiveAggressiveRegressor(C=0.1, random_state=42)],
    [Ridge(random_state=42)],
    [RidgeCV()],
    [SGDRegressor(random_state=42)],
    [LinearRegression()],
    [LinearSVR(random_state=42)],
    [TheilSenRegressor(random_state=42)],
])
コード例 #18
0
ファイル: main.py プロジェクト: yang-wang-ck/jpmml-sklearn
	store_pkl(pipeline, name)
	mpg = DataFrame(pipeline.predict(auto_X), columns = ["mpg"])
	store_csv(mpg, name)

if "Auto" in datasets:
	build_auto(AdaBoostRegressor(DecisionTreeRegressor(random_state = 13, min_samples_leaf = 5), random_state = 13, n_estimators = 17), "AdaBoostAuto")
	build_auto(ARDRegression(normalize = True), "BayesianARDAuto")
	build_auto(BayesianRidge(normalize = True), "BayesianRidgeAuto")
	build_auto(DecisionTreeRegressor(random_state = 13, min_samples_leaf = 2), "DecisionTreeAuto", compact = False)
	build_auto(BaggingRegressor(DecisionTreeRegressor(random_state = 13, min_samples_leaf = 5), random_state = 13, n_estimators = 3, max_features = 0.5), "DecisionTreeEnsembleAuto")
	build_auto(DummyRegressor(strategy = "median"), "DummyAuto")
	build_auto(ElasticNetCV(random_state = 13), "ElasticNetAuto")
	build_auto(ExtraTreesRegressor(random_state = 13, min_samples_leaf = 5), "ExtraTreesAuto")
	build_auto(GradientBoostingRegressor(random_state = 13, init = None), "GradientBoostingAuto")
	build_auto(HuberRegressor(), "HuberAuto")
	build_auto(LarsCV(), "LarsAuto")
	build_auto(LassoCV(random_state = 13), "LassoAuto")
	build_auto(LassoLarsCV(), "LassoLarsAuto")
	build_auto(OptimalLGBMRegressor(objective = "regression", n_estimators = 17, num_iteration = 11), "LGBMAuto", num_iteration = 11)
	build_auto(LinearRegression(), "LinearRegressionAuto")
	build_auto(BaggingRegressor(LinearRegression(), random_state = 13, max_features = 0.75), "LinearRegressionEnsembleAuto")
	build_auto(OrthogonalMatchingPursuitCV(), "OMPAuto")
	build_auto(RandomForestRegressor(random_state = 13, min_samples_leaf = 3), "RandomForestAuto", flat = True)
	build_auto(RidgeCV(), "RidgeAuto")
	build_auto(TheilSenRegressor(n_subsamples = 15, random_state = 13), "TheilSenAuto")
	build_auto(OptimalXGBRegressor(objective = "reg:linear", ntree_limit = 31), "XGBAuto", ntree_limit = 31)

if "Auto" in datasets:
	build_auto(TransformedTargetRegressor(DecisionTreeRegressor(random_state = 13)), "TransformedDecisionTreeAuto")
	build_auto(TransformedTargetRegressor(LinearRegression(), func = numpy.log, inverse_func = numpy.exp), "TransformedLinearRegressionAuto")
コード例 #19
0
print(regr.alpha_)
print(regr.intercept_)

plt.scatter(X[:, 0], y, color='black')
plt.scatter(X[:, 0], pred, color='red')
plt.show()

#%% Least Angle Regression LARS:
#Lars:fit_intercept, verbose, normalize
#LarsCV: fit_intercept, verbose, normalize, cv

from sklearn.linear_model import LarsCV, Lars
from sklearn.datasets import make_regression
import matplotlib.pyplot as plt
X, y = make_regression(n_samples=200, noise=4.0, random_state=0)
reg = LarsCV(cv=5).fit(X, y)
reg.score(X, y)
reg.alpha_
pred = reg.predict(X[:, ])

plt.scatter(X[:, 0], y, color='black')
plt.scatter(X[:, 0], pred, color='red')
plt.show()

reg2 = Lars().fit(X, y)
reg2.score(X, y)
reg2.alpha_
pred = reg2.predict(X[:, ])

#%% LassoLars: alpha, fit_intercept, normalize
#LassoLarsCV: alpha, fit_intercept, normalize, cv
コード例 #20
0
def main():

    # let's create a folder with a unique name to store results
    folderName = datetime.datetime.now().strftime(
        "%Y-%m-%d-%H-%M") + "-regression"
    if not os.path.exists(folderName): os.makedirs(folderName)

    # initialize logging
    common.initialize_logging(folderName)

    regressorsList = [

        # human-designed regressors
        [
            HumanRegressor("y = a_0 + a_1 * x + a_2 * x**2 + a_3 * x**3",
                           map_variables_to_features={"x": 0}),
            "HumanRegressor"
        ],
        [PolynomialRegressor(2), "PolynomialRegressor2"],
        #[PolynomialRegressor(3), "PolynomialRegressor3"],
        # keras neural network
        #[ANNRegressor(epochs=500, batch_size=32, layers=[16,4]), "KerasRegressor8-4"],
        #[ANNRegressor(epochs=700, batch_size=32, layers=[16,8]), "KerasRegressor16-8"],

        # cross decomposition
        [PLSRegression(), "PLSRegression"],

        # ensemble
        [AdaBoostRegressor(), "AdaBoostRegressor"],
        [BaggingRegressor(), "BaggingRegressor"],
        [BaggingRegressor(n_estimators=100), "BaggingRegressor_100"],
        [BaggingRegressor(n_estimators=300), "BaggingRegressor_300"],
        [ExtraTreesRegressor(), "ExtraTreesRegressor"],
        [GradientBoostingRegressor(), "GradientBoostingRegressor"],
        [RandomForestRegressor(), "RandomForestRegressor"],
        [RandomForestRegressor(n_estimators=100), "RandomForestRegressor_100"],
        [RandomForestRegressor(n_estimators=300), "RandomForestRegressor_300"],

        # isotonic
        #[IsotonicRegression(), "IsotonicRegression"], # apparently wants "X" as a 1d array

        # kernel ridge
        [KernelRidge(), "KernelRidge"],

        # linear
        #[ARDRegression(), "ARDRegression"], # takes too much time to train
        [BayesianRidge(), "BayesianRidge"],
        [ElasticNetCV(), "ElasticNetCV"],
        [LarsCV(), "LarsCV"],
        [LassoCV(), "LassoCV"],
        [LinearRegression(), "LinearRegression"],
        [PassiveAggressiveRegressor(), "PassiveAggressiveRegressor"],

        # neighbors
        [KNeighborsRegressor(), "KNeighborsRegressor"],
        [RadiusNeighborsRegressor(), "RadiusNeighborsRegressor"],

        # neural networks
        #[BernoulliRBM(), "BernoulliRBM"], # has a different interface, no "predict"

        # svm
        [SVR(), "SVR"],
        [LinearSVR(), "LinearSVR"],
        [NuSVR(), "NuSVR"],

        # tree
        [DecisionTreeRegressor(), "DecisionTreeRegressor (max depth 10)"],
        [ExtraTreeRegressor(), "ExtraTreeRegressor"],

        # generalized additive models
        [LinearGAM(n_splines=20), "LinearGAM(n_splines=20)"],

        # gaussian processes
        [
            GaussianProcessRegressor(kernel=DotProduct() + WhiteKernel()),
            "GaussianProcessRegressor"
        ],
    ]

    X = y = X_train = X_test = y_train = y_test = variablesX = variablesY = None
    numberOfSplits = 10  # TODO change number of splits from command line

    if True:
        # this is just a dumb benchmark
        X, y, variablesX, variablesY = common.loadEasyBenchmark()

    if False:
        X, y, variablesX, variablesY = common.loadChristianQuestionnaireRegression(
        )

    if False:
        X, y, variablesX, variablesY = common.loadYongShiDataCalibration2(
            "TIMBER")

    if False:
        X, y, variablesX, variablesY = common.loadLaurentBouvierNewData()

    if False:
        X, y, variablesX, variablesY = common.loadYongShiDataCalibration()

    if False:
        from sklearn.datasets import load_linnerud
        X, y = load_linnerud(return_X_y=True)

    if False:
        X, y, variablesX, variablesY = common.loadYingYingData()

    if False:
        X, y, variablesX, variablesY = common.loadCleaningDataGermanSpecific()
        #X, y, variablesX, variablesY = common.loadCleaningDataGerman()

    if False:
        X, y, variablesX, variablesY = common.loadInsects()

    if False:
        X, y, variablesX, variablesY = common.loadMilkProcessPipesDimensionalAnalysis(
        )
        #X, y, variablesX, variablesY = common.loadMilkProcessPipes()

    if False:  # ecosystem services
        X, y, variablesX, variablesY = common.loadEcosystemServices()

    if False:
        X, y, variablesX, variablesY = common.loadMarcoSoil()

    if False:
        # load dataset
        X, y = common.loadEureqaRegression()
        # randomly split between training and test
        #X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

    if False:
        # load dataset
        X_train, X_test, y_train, y_test = common.loadBiscuitExample()
        logging.info("X_train: " + str(X_train.shape))
        logging.info("X_test: " + str(X_test.shape))
        logging.info("y_train: " + str(y_train.shape))
        logging.info("y_test: " + str(y_test.shape))

        # in this particular case, I create the "global" X and y by putting together the two arrays
        X = np.append(X_train, X_test, axis=0)
        y = np.append(y_train, y_test, axis=0)

    if False:
        # load dataset
        X_train, X_test, y_train, y_test = common.loadAromoptiExample()
        logging.info("X_train: " + str(X_train.shape))
        logging.info("X_test: " + str(X_test.shape))
        logging.info("y_train: " + str(y_train.shape))
        logging.info("y_test: " + str(y_test.shape))

        # in this particular case, I create the "global" X and y by putting together the two arrays
        X = np.append(X_train, X_test, axis=0)
        y = np.append(y_train, y_test, axis=0)

    logging.info(
        "Regressing %d output variables, in function of %d input variables..."
        % (y.shape[1], X.shape[1]))

    # if the names of the variables are not specified, let's specify them!
    if variablesY is None:
        variablesY = ["y" + str(i) for i in range(0, len(y[0]))]
    if variablesX is None:
        variablesX = ["X" + str(i) for i in range(0, len(X[0]))]

    performances = dict()

    for variableIndex, variableY in enumerate(variablesY):

        logging.info("** Now evaluating models for variable \"%s\"... **" %
                     variableY)

        # obtain data
        y_ = y[:, variableIndex].ravel()

        # assume here that you will have train/test indexes instead
        # it's also easier for the plots, as we do not face the issue
        # of duplicate values (e.g. same value with two indexes)
        rs = ShuffleSplit(n_splits=numberOfSplits, random_state=42)
        #rs = LeaveOneOut()

        # initialize performance dictionary of arrays
        performances[variableY] = dict()
        for regressor, regressorName in regressorsList:
            performances[variableY][regressorName] = dict()
            performances[variableY][regressorName]["r^2"] = []
            performances[variableY][regressorName]["e.v"] = []
            performances[variableY][regressorName]["mse"] = []
            performances[variableY][regressorName]["mae"] = []
            performances[variableY][regressorName]["predicted"] = []

        # this is used to store all values of each fold, in order; maybe there's a smarter way to do it
        foldPointsInOrder = []

        # and now, for every regressor
        for foldIndex, indexes in enumerate(rs.split(X)):

            train_index, test_index = indexes

            X_train = X[train_index]
            y_train = y_[train_index]
            X_test = X[test_index]
            y_test = y_[test_index]

            # normalize
            logging.info("Normalizing data...")
            scalerX = StandardScaler()
            scalerY = StandardScaler()

            X_train = scalerX.fit_transform(X_train)
            X_test = scalerX.transform(X_test)

            y_train = scalerY.fit_transform(y_train.reshape(-1, 1)).ravel(
            )  # this "reshape/ravel" here is just to avoid warnings, it has no true effect on data
            y_test = scalerY.transform(y_test.reshape(-1, 1)).ravel()

            # now, we store points of the folder in order of how they appear
            foldPointsInOrder.extend(list(scalerY.inverse_transform(y_test)))

            for regressorIndex, regressorData in enumerate(regressorsList):

                regressor = regressorData[0]
                regressorName = regressorData[1]

                logging.info("Fold #%d/%d: training regressor #%d/%d \"%s\"" %
                             (foldIndex + 1, numberOfSplits, regressorIndex +
                              1, len(regressorsList), regressorName))

                try:
                    regressor.fit(X_train, y_train)

                    y_test_predicted = regressor.predict(X_test)
                    r2Test = r2_score(y_test, y_test_predicted)
                    mseTest = mean_squared_error(y_test, y_test_predicted)
                    maeTest = mean_absolute_error(y_test, y_test_predicted)
                    varianceTest = explained_variance_score(
                        y_test, y_test_predicted)

                    logging.info("R^2 score (test): %.4f" % r2Test)
                    logging.info("EV score (test): %.4f" % varianceTest)
                    logging.info("MSE score (test): %.4f" % mseTest)
                    logging.info("MAE score (test): %.4f" % maeTest)

                    # add performance to the list of performances
                    performances[variableY][regressorName]["r^2"].append(
                        r2Test)
                    performances[variableY][regressorName]["e.v"].append(
                        varianceTest)
                    performances[variableY][regressorName]["mse"].append(
                        mseTest)
                    performances[variableY][regressorName]["mae"].append(
                        maeTest)
                    # also record the predictions, to be used later in a global figure
                    performances[variableY][regressorName]["predicted"].extend(
                        list(scalerY.inverse_transform(y_test_predicted)))

                    try:
                        import matplotlib.pyplot as plt

                        # plotting first figure, with points 'x' and 'o'
                        y_predicted = regressor.predict(scalerX.transform(
                            X))  # 'X' was never wholly rescaled before
                        y_train_predicted = regressor.predict(X_train)

                        plt.figure()

                        plt.scatter(train_index,
                                    y_train,
                                    c="gray",
                                    label="training data")
                        plt.scatter(test_index,
                                    y_test,
                                    c="green",
                                    label="test data")

                        plt.plot(np.arange(len(y_predicted)),
                                 y_predicted,
                                 'x',
                                 c="red",
                                 label="regression")
                        plt.xlabel("order of data samples")
                        plt.ylabel("target")
                        plt.title(regressorName + ", R^2=%.4f (test)" % r2Test)
                        plt.legend()

                        logging.info("Saving figure...")
                        plt.savefig(
                            os.path.join(
                                folderName, regressorName + "-" + variableY +
                                "-fold-" + str(foldIndex + 1) + ".pdf"))
                        plt.close()

                        # plotting second figure, with everything close to a middle line
                        plt.figure()

                        plt.plot(y_train,
                                 y_train_predicted,
                                 'r.',
                                 label="training set")  # points
                        plt.plot(y_test,
                                 y_test_predicted,
                                 'go',
                                 label="test set")  # points
                        plt.plot([
                            min(y_train.min(), y_test.min()),
                            max(y_train.max(), y_test.max())
                        ],
                                 [
                                     min(y_train_predicted.min(),
                                         y_test_predicted.min()),
                                     max(y_train_predicted.max(),
                                         y_test_predicted.max())
                                 ], 'k--')  # line

                        plt.xlabel("measured")
                        plt.ylabel("predicted")
                        plt.title(regressorName + " measured vs predicted, " +
                                  variableY)
                        plt.legend(loc='best')

                        plt.savefig(
                            os.path.join(
                                folderName, regressorName + "-" + variableY +
                                "-fold-" + str(foldIndex + 1) + "-b.pdf"))
                        plt.close()

                        # also, save ordered list of features
                        featuresByImportance = relativeFeatureImportance(
                            regressor)

                        # if list exists, write feature importance to disk
                        # TODO horrible hack here, to avoid issues with GAM
                        if len(featuresByImportance
                               ) > 0 and "GAM" not in regressorName:
                            featureImportanceFileName = regressorName + "-" + variableY + "-featureImportance-fold" + str(
                                foldIndex) + ".csv"
                            with open(
                                    os.path.join(folderName,
                                                 featureImportanceFileName),
                                    "w") as fp:
                                fp.write("feature,importance\n")
                                for featureImportance, featureIndex in featuresByImportance:
                                    fp.write(variablesX[int(featureIndex)] +
                                             "," + str(featureImportance) +
                                             "\n")

                    except ImportError:
                        logging.info(
                            "Cannot import matplotlib. Skipping plots...")

                except Exception as e:
                    logging.info("Regressor \"" + regressorName +
                                 "\" failed on variable \"" + variableY +
                                 "\":" + str(e))

    logging.info("Final summary:")
    with open(os.path.join(folderName, "00_summary.txt"), "w") as fp:

        for variableY in variablesY:

            logging.info("For variable \"" + variableY + "\"")
            fp.write("For variable: " + variableY + " = f(" + variablesX[0])
            for i in range(1, len(variablesX)):
                fp.write("," + variablesX[i])
            fp.write(")\n")

            # create a list from the dictionary and sort it
            sortedPerformances = sorted(
                [(performances[variableY][regressorName], regressorName)
                 for regressorName in performances[variableY]],
                key=lambda x: np.mean(x[0]["r^2"]),
                reverse=True)

            for regressorData in sortedPerformances:
                regressorName = regressorData[1]
                regressorScore = regressorData[0]

                r2Mean = np.mean(regressorScore["r^2"])
                r2std = np.std(regressorScore["r^2"])

                varianceMean = np.mean(regressorScore["e.v"])
                varianceStd = np.std(regressorScore["e.v"])

                mseMean = np.mean(regressorScore["mse"])
                mseStd = np.std(regressorScore["mse"])

                maeMean = np.mean(regressorScore["mae"])
                maeStd = np.std(regressorScore["mae"])

                logging.info(
                    "\t- %s, R^2=%.4f (std=%.4f), Explained Variance=%.4f (std=%.4f), MSE=%.4f (std=%.4f), MAE=%.4f (std=%.4f)"
                    % (regressorName, r2Mean, r2std, varianceMean, varianceStd,
                       mseMean, mseStd, maeMean, maeStd))

                fp.write(
                    "\t- %s, R^2=%.4f (std=%.4f), Explained Variance=%.4f (std=%.4f), MSE=%.4f (std=%.4f), MAE=%.4f (std=%.4f)\n"
                    % (regressorName, r2Mean, r2std, varianceMean, varianceStd,
                       mseMean, mseStd, maeMean, maeStd))

                fp.write("\t\t- R^2:" +
                         str(["%.4f" % x
                              for x in regressorScore["r^2"]]) + "\n")
                fp.write("\t\t- E.V.:" +
                         str(["%.4f" % x
                              for x in regressorScore["e.v"]]) + "\n")
                fp.write("\t\t- MSE:" +
                         str(["%.4f" % x
                              for x in regressorScore["mse"]]) + "\n")
                fp.write("\t\t- MAE:" +
                         str(["%.4f" % x
                              for x in regressorScore["mae"]]) + "\n")

                # also, plot a "global" graph
                # issue here, if a regressor fails, you have incongruent matrixes: a check is in order
                # TODO also, the plot looks really bad if some values are negative; turn everything to absolute values?
                if len(foldPointsInOrder) == len(regressorScore["predicted"]):
                    fig = plt.figure()
                    ax = fig.add_subplot(111)

                    #bottom_left_corner = [min(foldPointsInOrder), max(foldPointsInOrder)]
                    #top_right_corner = [min(regressorScore["predicted"]), max(regressorScore["predicted"])]
                    x_bottom_top = [0, max(foldPointsInOrder)]
                    y_bottom_top = [0, max(foldPointsInOrder)]

                    ax.plot(foldPointsInOrder, regressorScore["predicted"],
                            'g.')  # points
                    ax.plot(x_bottom_top, y_bottom_top, 'k--',
                            label="1:1")  # line
                    ax.plot(x_bottom_top,
                            [y_bottom_top[0] * 1.20, y_bottom_top[1] * 1.20],
                            'r--',
                            label="20% error")
                    ax.plot(x_bottom_top,
                            [y_bottom_top[0] * 0.80, y_bottom_top[1] * 0.80],
                            'r--')

                    ax.set_title(regressorName + " measured vs predicted, " +
                                 variableY + " (all test)")
                    ax.set_xlabel("measured")
                    ax.set_ylabel("predicted")
                    ax.legend(loc='best')

                    plt.savefig(
                        os.path.join(
                            folderName,
                            regressorName + "-" + variableY + "-global-b.png"))
                    plt.close(fig)
コード例 #21
0
ファイル: mmm.py プロジェクト: pabloazurduy/mkt_mix_model
    },
    {
        'name': 'LLCV',
        'mdl': LassoLarsCV(max_n_alphas=1000)
    },
    {
        'name': 'LLaic',
        'mdl': LassoLarsIC(criterion='aic')
    },
    {
        'name': 'ENCV',
        'mdl': ElasticNetCV(n_alphas=100)
    },
    {
        'name': 'LarsCV',
        'mdl': LarsCV(max_n_alphas=1000)
    },
    {
        'name': 'LR',
        'mdl': LinearRegression()
    },
    {
        'name': 'ARDR',
        'mdl': ARDRegression()
    },
    {
        'name': 'BYR',
        'mdl': BayesianRidge()
    },
]
コード例 #22
0
        np.round(
            np.concatenate(
                (elastic_net_model.intercept_, elastic_net_model.coef_),
                axis=None), 3)))

print('Elastic Net MSE: {}'.format(np.round(elastic_net_mae, 3)))
print('Elastic Net coefficients:', elastic_net_coefs)

##############################################################################
###################### LEAST ANGLE REGRESSION ################################
##############################################################################
print(
    "##############################################################################"
)
print("LEAST ANGLE REGRESSION")
LAR_cv = LarsCV(normalize=True)
LAR_model = LAR_cv.fit(X_train, y_train)
LAR_prediction = LAR_model.predict(X_test)
LAR_mae = mean_squared_error(y_test, LAR_prediction)
LAR_coefs = dict(
    zip(['Intercept'] + data.columns.tolist()[:-1],
        np.round(
            np.concatenate((LAR_model.intercept_, LAR_model.coef_), axis=None),
            3)))

print('Least Angle Regression MSE: {}'.format(np.round(LAR_mae, 3)))
print('Least Angle Regression coefficients:', LAR_coefs)

##############################################################################
################## PRINCIPAL COMPONENTS REGRESSION ###########################
##############################################################################
コード例 #23
0
ファイル: Linear Models.py プロジェクト: chenzhongtao/source
print np.sum(lars.coef_ != 0)
#10

train_n = 100
lars_12 = Lars(n_nonzero_coefs=12)
lars_12.fit(reg_data[:train_n], reg_target[:train_n])
lars_500 = Lars() # it's 500 by default
lars_500.fit(reg_data[:train_n], reg_target[:train_n]);
#Now, to see how well each feature fit the unknown data, do the following:
np.mean(np.power(reg_target[train_n:] - lars_12.predict(reg_data[train_n:]), 2))
#31.527714163321001
np.mean(np.power(reg_target[train_n:] - lars_500.predict(reg_data[train_n:]), 2))
#9.6198147535136237e+30

from sklearn.linear_model import LarsCV
lcv = LarsCV()
lcv.fit(reg_data, reg_target)

print np.sum(lcv.coef_ != 0)
#44


#Using linear methods for classification –logistic regression  逻辑回归

from sklearn.datasets import make_classification
X, y = make_classification(n_samples=1000, n_features=4)

from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()

X_train = X[:-200]
コード例 #24
0
        'PLSRegression 2D',
        make_pipeline(
            StandardScaler(), PCA(n_components=0.95),
            PolynomialFeatures(2, interaction_only=True, include_bias=True),
            PLSRegression())))

models.append(
    ModelClass('LinearRegressor',
               make_pipeline(StandardScaler(), LinearRegression())))

models.append(
    ModelClass('HuberRegressor',
               make_pipeline(StandardScaler(), HuberRegressor())))

models.append(
    ModelClass('Lars', make_pipeline(LarsCV(cv=cv_inner, normalize=True))))

models.append(
    ModelClass('LassoLarsCV', LassoLarsCV(cv=cv_inner, normalize=True)))

models.append(ModelClass('LassoLarsIC', make_pipeline(LassoLarsIC())))

models.append(
    ModelClass('BayesianRidge', make_pipeline(StandardScaler(),
                                              BayesianRidge())))

models.append(
    ModelClass(
        'ElasticNet kBest std',
        make_pipeline(
            StandardScaler(), SelectKBest(mutual_info_regression, k=6),
コード例 #25
0
ファイル: lars_cv.py プロジェクト: vickyvishal/lale
 def __init__(self, **hyperparams):
     self._hyperparams = hyperparams
     self._wrapped_model = Op(**self._hyperparams)
コード例 #26
0
class LarsCvClass:
    """
    Name      : LarsCV
    Attribute : None
    Method    : predict, predict_by_cv, save_model
    """

    def __init__(self):
        # 알고리즘 이름
        self._name = 'larscv'

        # 기본 경로
        self._f_path = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir))

        # 경고 메시지 삭제
        warnings.filterwarnings('ignore')

        # 원본 데이터 로드
        data = pd.read_csv(self._f_path + "/regression/resource/regression_sample.csv", sep=",", encoding="utf-8")

        # 학습 및 테스트 데이터 분리
        self._x = (data["year"] <= 2017)
        self._y = (data["year"] >= 2018)

        # 학습 데이터 분리
        self._x_train, self._y_train = self.preprocessing(data[self._x])
        # 테스트 데이터 분리
        self._x_test, self._y_test = self.preprocessing(data[self._y])

        # 모델 선언
        self._model = LarsCV(normalize=False)

        # 모델 학습
        self._model.fit(self._x_train, self._y_train)

    # 데이터 전처리
    def preprocessing(self, data):
        # 학습
        x = []
        # 레이블
        y = []
        # 기준점(7일)
        base_interval = 7
        # 기온
        temps = list(data["temperature"])

        for i in range(len(temps)):
            if i < base_interval:
                continue
            y.append(temps[i])

            xa = []

            for p in range(base_interval):
                d = i + p - base_interval
                xa.append(temps[d])
            x.append(xa)
        return x, y

    # 일반 예측
    def predict(self, save_img=False, show_chart=False):
        # 예측
        y_pred = self._model.predict(self._x_test)

        # 스코어 정보
        score = r2_score(self._y_test, y_pred)

        # 리포트 확인
        if hasattr(self._model, 'coef_') and hasattr(self._model, 'intercept_'):
            print(f'Coef = {self._model.coef_}')
            print(f'intercept = {self._model.intercept_}')

        print(f'Score = {score}')

        # 이미지 저장 여부
        if save_img:
            self.save_chart_image(y_pred, show_chart)

        # 예측 값  & 스코어
        return [list(y_pred), score]

    #  CV 예측(Cross Validation)
    def predict_by_cv(self):
        # Regression 알고리즘은 실 프로젝트 상황에 맞게 Cross Validation 구현
        return False

    #  GridSearchCV 예측
    def predict_by_gs(self):
        pass

    # 모델 저장 및 갱신
    def save_model(self, renew=False):
        # 모델 저장
        if not renew:
            # 처음 저장
            joblib.dump(self._model, self._f_path + f'/model/{self._name}_rg.pkl')
        else:
            # 기존 모델 대체
            if os.path.isfile(self._f_path + f'/model/{self._name}_rg.pkl'):
                os.rename(self._f_path + f'/model/{self._name}_rg.pkl',
                          self._f_path + f'/model/{str(self._name) + str(time.time())}_rg.pkl')
            joblib.dump(self._model, self._f_path + f'/model/{self._name}_rg.pkl')

    # 회귀 차트 저장
    def save_chart_image(self, data, show_chart):
        # 사이즈
        plt.figure(figsize=(15, 10), dpi=100)

        # 레이블
        plt.plot(self._y_test, c='r')

        # 예측 값
        plt.plot(data, c='b')

        # 이미지로 저장
        plt.savefig('./chart_images/tenki-kion-lr.png')

        # 차트 확인(Optional)
        if show_chart:
            plt.show()

    def __del__(self):
        del self._x_train, self._x_test, self._y_train, self._y_test, self._x, self._y, self._model
コード例 #27
0
ファイル: credito_ampliado.py プロジェクト: royopa/test
n_splits = (len(df_general) - 50)

models = {}

models[1] = make_pipeline(
    StandardScaler(), SelectFromModel(DecisionTreeRegressor(),
                                      prefit=False)).fit(df_general, ibc)
models[2] = make_pipeline(
    StandardScaler(),
    SelectFromModel(ElasticNetCV(normalize=False,
                                 cv=TimeSeriesSplit(n_splits)),
                    prefit=False)).fit(df_general, ibc)
models[3] = make_pipeline(
    StandardScaler(),
    SelectFromModel(LarsCV(normalize=False, cv=TimeSeriesSplit(n_splits)),
                    prefit=False)).fit(df_general, ibc)
models[4] = make_pipeline(
    StandardScaler(),
    SelectFromModel(BayesianRidge(normalize=False),
                    prefit=False)).fit(df_general, ibc)
models[5] = make_pipeline(
    StandardScaler(),
    RFECV(LinearRegression(),
          cv=TimeSeriesSplit(n_splits))).fit(df_general, ibc)
models[6] = make_pipeline(StandardScaler(),
                          SelectKBest(mutual_info_regression,
                                      1)).fit(df_general, ibc)
models[7] = make_pipeline(StandardScaler(),
                          SelectKBest(mutual_info_regression,
                                      3)).fit(df_general, ibc)
コード例 #28
0
def GetAllModelsForComparison(X_train, Y_train):
    models = {
        'ARDRegression': ARDRegression(),
        'BayesianRidge': BayesianRidge(),
        'ElasticNet': ElasticNet(),
        'ElasticNetCV': ElasticNetCV(),
        'Hinge': Hinge(),
        #'Huber': Huber(),
        'HuberRegressor': HuberRegressor(),
        'Lars': Lars(),
        'LarsCV': LarsCV(),
        'Lasso': Lasso(),
        'LassoCV': LassoCV(),
        'LassoLars': LassoLars(),
        'LassoLarsCV': LassoLarsCV(),
        'LinearRegression': LinearRegression(),
        'Log': Log(),
        'LogisticRegression': LogisticRegression(),
        'LogisticRegressionCV': LogisticRegressionCV(),
        'ModifiedHuber': ModifiedHuber(),
        'MultiTaskElasticNet': MultiTaskElasticNet(),
        'MultiTaskElasticNetCV': MultiTaskElasticNetCV(),
        'MultiTaskLasso': MultiTaskLasso(),
        'MultiTaskLassoCV': MultiTaskLassoCV(),
        'OrthogonalMatchingPursuit': OrthogonalMatchingPursuit(),
        'OrthogonalMatchingPursuitCV': OrthogonalMatchingPursuitCV(),
        'PassiveAggressiveClassifier': PassiveAggressiveClassifier(),
        'PassiveAggressiveRegressor': PassiveAggressiveRegressor(),
        'Perceptron': Perceptron(),
        'RANSACRegressor': RANSACRegressor(),
        #'RandomizedLasso': RandomizedLasso(),
        #'RandomizedLogisticRegression': RandomizedLogisticRegression(),
        'Ridge': Ridge(),
        'RidgeCV': RidgeCV(),
        'RidgeClassifier': RidgeClassifier(),
        'SGDClassifier': SGDClassifier(),
        'SGDRegressor': SGDRegressor(),
        'SquaredLoss': SquaredLoss(),
        'TheilSenRegressor': TheilSenRegressor(),
        'BaseEstimator': BaseEstimator(),
        'ClassifierMixin': ClassifierMixin(),
        'LinearClassifierMixin': LinearClassifierMixin(),
        'LinearDiscriminantAnalysis': LinearDiscriminantAnalysis(),
        'QuadraticDiscriminantAnalysis': QuadraticDiscriminantAnalysis(),
        'StandardScaler': StandardScaler(),
        'TransformerMixin': TransformerMixin(),
        'BaseEstimator': BaseEstimator(),
        'KernelRidge': KernelRidge(),
        'RegressorMixin': RegressorMixin(),
        'LinearSVC': LinearSVC(),
        'LinearSVR': LinearSVR(),
        'NuSVC': NuSVC(),
        'NuSVR': NuSVR(),
        'OneClassSVM': OneClassSVM(),
        'SVC': SVC(),
        'SVR': SVR(),
        'SGDClassifier': SGDClassifier(),
        'SGDRegressor': SGDRegressor(),
        #'BallTree': BallTree(),
        #'DistanceMetric': DistanceMetric(),
        #'KDTree': KDTree(),
        'KNeighborsClassifier': KNeighborsClassifier(),
        'KNeighborsRegressor': KNeighborsRegressor(),
        'KernelDensity': KernelDensity(),
        #'LSHForest': LSHForest(),
        'LocalOutlierFactor': LocalOutlierFactor(),
        'NearestCentroid': NearestCentroid(),
        'NearestNeighbors': NearestNeighbors(),
        'RadiusNeighborsClassifier': RadiusNeighborsClassifier(),
        'RadiusNeighborsRegressor': RadiusNeighborsRegressor(),
        #'GaussianProcess': GaussianProcess(),
        'GaussianProcessRegressor': GaussianProcessRegressor(),
        'GaussianProcessClassifier': GaussianProcessClassifier(),
        'CCA': CCA(),
        'PLSCanonical': PLSCanonical(),
        'PLSRegression': PLSRegression(),
        'PLSSVD': PLSSVD(),
        #'ABCMeta': ABCMeta(),
        #'BaseDiscreteNB': BaseDiscreteNB(),
        'BaseEstimator': BaseEstimator(),
        #'BaseNB': BaseNB(),
        'BernoulliNB': BernoulliNB(),
        'ClassifierMixin': ClassifierMixin(),
        'GaussianNB': GaussianNB(),
        'LabelBinarizer': LabelBinarizer(),
        'MultinomialNB': MultinomialNB(),
        'DecisionTreeClassifier': DecisionTreeClassifier(),
        'DecisionTreeRegressor': DecisionTreeRegressor(),
        'ExtraTreeClassifier': ExtraTreeClassifier(),
        'AdaBoostClassifier': AdaBoostClassifier(),
        'AdaBoostRegressor': AdaBoostRegressor(),
        'BaggingClassifier': BaggingClassifier(),
        'BaggingRegressor': BaggingRegressor(),
        #'BaseEnsemble': BaseEnsemble(),
        'ExtraTreesClassifier': ExtraTreesClassifier(),
        'ExtraTreesRegressor': ExtraTreesRegressor(),
        'GradientBoostingClassifier': GradientBoostingClassifier(),
        'GradientBoostingRegressor': GradientBoostingRegressor(),
        'IsolationForest': IsolationForest(),
        'RandomForestClassifier': RandomForestClassifier(),
        'RandomForestRegressor': RandomForestRegressor(),
        'RandomTreesEmbedding': RandomTreesEmbedding(),
        #'VotingClassifier': VotingClassifier(),
        'BaseEstimator': BaseEstimator(),
        'ClassifierMixin': ClassifierMixin(),
        'LabelBinarizer': LabelBinarizer(),
        'MetaEstimatorMixin': MetaEstimatorMixin(),
        #'OneVsOneClassifier': OneVsOneClassifier(),
        #'OneVsRestClassifier': OneVsRestClassifier(),
        #'OutputCodeClassifier': OutputCodeClassifier(),
        'Parallel': Parallel(),
        #'ABCMeta': ABCMeta(),
        'BaseEstimator': BaseEstimator(),
        #'ClassifierChain': ClassifierChain(),
        'ClassifierMixin': ClassifierMixin(),
        'MetaEstimatorMixin': MetaEstimatorMixin(),
        #'MultiOutputClassifier': MultiOutputClassifier(),
        #'MultiOutputEstimator': MultiOutputEstimator(),
        #'MultiOutputRegressor': MultiOutputRegressor(),
        'Parallel': Parallel(),
        'RegressorMixin': RegressorMixin(),
        'LabelPropagation': LabelPropagation(),
        'LabelSpreading': LabelSpreading(),
        'BaseEstimator': BaseEstimator(),
        'IsotonicRegression': IsotonicRegression(),
        'RegressorMixin': RegressorMixin(),
        'TransformerMixin': TransformerMixin(),
        'BernoulliRBM': BernoulliRBM(),
        'MLPClassifier': MLPClassifier(),
        'MLPRegressor': MLPRegressor()
    }
    return models
コード例 #29
0
X_learning = df_all_data[:train_index]
X_test = df_all_data[train_index:]

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LarsCV, Lasso, LassoCV, ElasticNet, ElasticNetCV
from sklearn.linear_model import LassoLars, LassoLarsCV, Ridge, RidgeCV

from sklearn.model_selection import cross_val_score, KFold, GridSearchCV

import xgboost as xgb

models = []
models.append(("LrE", LinearRegression()))
models.append(("RidCV", RidgeCV()))
models.append(("LarCV", LarsCV()))
models.append(("LasCV", LassoCV()))
models.append(("ElNCV", ElasticNetCV()))
models.append(("LaLaCV", LassoLarsCV()))
models.append(("XGB", xgb.XGBRegressor()))

kfold = KFold(n_splits=10)


def getCVResult(models, X_learning, Y_learning):

    for name, model in models:
        cv_results = cross_val_score(model,
                                     X_learning,
                                     Y_learning,
                                     scoring='neg_mean_squared_error',
コード例 #30
0
if "Auto" in datasets:
	build_auto(AdaBoostRegressor(DecisionTreeRegressor(min_samples_leaf = 5, random_state = 13), random_state = 13, n_estimators = 17), "AdaBoostAuto")
	build_auto(ARDRegression(normalize = True), "BayesianARDAuto")
	build_auto(BayesianRidge(normalize = True), "BayesianRidgeAuto")
	build_auto(DecisionTreeRegressor(min_samples_leaf = 2, random_state = 13), "DecisionTreeAuto", compact = False)
	build_auto(BaggingRegressor(DecisionTreeRegressor(min_samples_leaf = 5, random_state = 13), n_estimators = 3, max_features = 0.5, random_state = 13), "DecisionTreeEnsembleAuto")
	build_auto(DummyRegressor(strategy = "median"), "DummyAuto")
	build_auto(ElasticNetCV(cv = 3, random_state = 13), "ElasticNetAuto")
	build_auto(ExtraTreesRegressor(n_estimators = 10, min_samples_leaf = 5, random_state = 13), "ExtraTreesAuto")
	build_auto(GBDTLMRegressor(RandomForestRegressor(n_estimators = 7, max_depth = 6, random_state = 13), LinearRegression()), "GBDTLMAuto")
	build_auto(GBDTLMRegressor(XGBRFRegressor(n_estimators = 17, max_depth = 6, random_state = 13), ElasticNet(random_state = 13)), "XGBRFLMAuto")
	build_auto(GradientBoostingRegressor(init = None, random_state = 13), "GradientBoostingAuto")
	build_auto(HistGradientBoostingRegressor(max_iter = 31, random_state = 13), "HistGradientBoostingAuto")
	build_auto(HuberRegressor(), "HuberAuto")
	build_auto(LarsCV(cv = 3), "LarsAuto")
	build_auto(LassoCV(cv = 3, random_state = 13), "LassoAuto")
	build_auto(LassoLarsCV(cv = 3), "LassoLarsAuto")
	build_auto(LinearRegression(), "LinearRegressionAuto")
	build_auto(BaggingRegressor(LinearRegression(), max_features = 0.75, random_state = 13), "LinearRegressionEnsembleAuto")
	build_auto(OrthogonalMatchingPursuitCV(cv = 3), "OMPAuto")
	build_auto(RandomForestRegressor(n_estimators = 10, min_samples_leaf = 3, random_state = 13), "RandomForestAuto", flat = True)
	build_auto(RidgeCV(), "RidgeAuto")
	build_auto(StackingRegressor([("ridge", Ridge(random_state = 13)), ("lasso", Lasso(random_state = 13))], final_estimator = GradientBoostingRegressor(n_estimators = 7, random_state = 13)), "StackingEnsembleAuto")
	build_auto(TheilSenRegressor(n_subsamples = 31, random_state = 13), "TheilSenAuto")
	build_auto(VotingRegressor([("dt", DecisionTreeRegressor(random_state = 13)), ("knn", KNeighborsRegressor()), ("lr", LinearRegression())], weights = [3, 1, 2]), "VotingEnsembleAuto")
	build_auto(XGBRFRegressor(n_estimators = 31, max_depth = 6, random_state = 13), "XGBRFAuto")

if "Auto" in datasets:
	build_auto(TransformedTargetRegressor(DecisionTreeRegressor(random_state = 13)), "TransformedDecisionTreeAuto")
	build_auto(TransformedTargetRegressor(LinearRegression(), func = numpy.log, inverse_func = numpy.exp), "TransformedLinearRegressionAuto")
コード例 #31
0
# - データを半分に取り分けてLARSモデルで学習

# 変数定義
# --- 訓練データ数
train_n = 100

# インスタンス生成と学習
# --- 非ゼロ係数の数を12個とする
lars_12 = Lars(n_nonzero_coefs=12)
lars_12.fit(reg_data[:train_n], reg_target[:train_n])

# インスタンス生成と学習
# --- 非ゼロ係数の数を500個とする(デフォルト)
lars_500 = Lars(n_nonzero_coefs=500)
lars_500.fit(reg_data[:train_n], reg_target[:train_n])

# 平均二乗誤差
np.mean(
    np.power(reg_target[train_n:] - lars_500.predict(reg_data[train_n:]), 2))

# 3 特徴量選択としてのLARS ---------------------------------------------------------------------

# インスタンス生成
lcv = LarsCV()

# 学習
lcv.fit(reg_data, reg_target)

# 非ゼロの係数
np.sum(lcv.coef_ != 0)