Exemple #1
0
def _test_ridge_cv(filter_):
    ridge_cv = RidgeCV()
    ridge_cv.fit(filter_(X_diabetes), y_diabetes)
    ridge_cv.predict(filter_(X_diabetes))

    assert len(ridge_cv.coef_.shape) == 1
    assert type(ridge_cv.intercept_) == np.float64

    cv = KFold(5)
    ridge_cv.set_params(cv=cv)
    ridge_cv.fit(filter_(X_diabetes), y_diabetes)
    ridge_cv.predict(filter_(X_diabetes))

    assert len(ridge_cv.coef_.shape) == 1
    assert type(ridge_cv.intercept_) == np.float64
Exemple #2
0
    def linear_model(self, train_set, test_set, type = 'ridge_regression'):
        print '          <<<<<<<<<<<<<<<<<<<<<< linear_model  >>>>>>>>>>>>>>>>>>>>>>>> '


        xTrain, xTest, yTrain, yTest, yPrevTest, yPrevTrain, yPrevIndex = self.prepare_data(train_set,test_set)



        if type == 'ridge_regression':
            print '      <<< ridge-regression >>> '
            cvParams = {'ridgecv': [{'alphas': np.array([1, .1, .01, .001, .0001, 10, 100, 1000, 10000, 100000, 100000, 1000000])}]}
            model = RidgeCV()
            model.set_params(**dict((k, v[0] if isinstance(v, list) else v) for k,v in cvParams['ridgecv'][0].iteritems()))
        else:
            print '      <<< linear_regression >>>'
            model = linear_model.LinearRegression()


        model.fit(xTrain, yTrain)
        pred_test = model.predict(xTest)
        pred_train = model.predict(xTrain)

        print 'test MSE: ', mean_squared_error(yTest, pred_test)
        print 'train MSE: ', mean_squared_error(yTrain, pred_train)

        print 'test MAE: ', mean_absolute_error(yTest, pred_test)
        print 'train MAE: ', mean_absolute_error(yTrain, pred_train)


        print 'test accuracy: ' , sum(1 for x,y in zip(np.sign(pred_test - yPrevTest),np.sign(yTest - yPrevTest)) if x == y) / float(len(yTest))
        print 'train accuracy: ' , sum(1 for x,y in zip(np.sign(pred_train - yPrevTrain),np.sign(yTrain - yPrevTrain)) if x == y) / float(len(yTrain))

        coef = model.coef_
        print 'coef: '
        print coef

        if type == 'ridge_regression':
            print 'best alpha: '
            print model.alpha_

        return pred_train, pred_test
    def __init__(self, num_dists=2, sigma=0.1, base_learner=None, **kwargs):
        self.num_dists = num_dists
        self.sigma = sigma
        
        if base_learner is None:
            base_learner = RidgeCV(fit_intercept=False, \
                    alphas=[0.001, 0.01, 0.1, 100, 1000], cv=None,
                    store_cv_values=True)
        
        if 'fit_intercept' not in kwargs:
            kwargs['fit_intercept'] = False

        self.base_learner = base_learner.set_params(**kwargs)
        self.R = None
        self.model = None
Exemple #4
0
class Model:
    params = Params()
    testsize = float(params.get_data_params()['testsize'])
    random_state = int(params.get_data_params()['randomstate'])

    def __init__(self, X, y, **kwargs):
        self.X, self.x_val, self.y, self.y_val = train_test_split(
            X, y, test_size=self.testsize, random_state=self.random_state)
        self.model = None
        self.FeatureSelectionType = kwargs['featureselection']
        self.features = self.X.columns
        self.model_coefficients = None
        self.EvalMetrics = kwargs['evalmetric']
        self.FeatSelCvFolds = kwargs['featureselectioncvfolds']
        self.CvFolds = kwargs['gridsearchcvfolds']
        self.gridSearch = kwargs['gridsearchcv']

        self.set_model()
        self.feature_selection()
        self.fit_model()

    def __repr__(self):
        return "Model(" + str(self.model) + ")"

    def set_model(self):
        model_name = self.params.get_model()
        if model_name == 'lr':
            self.model = LinearRegression()
            params = self.params.get_linear_reg()
            del params['regularization']
            self.model = self.model.set_params(**params)
        elif model_name == 'svr':
            self.model = SVR()
            self.model = self.model.set_params(**self.params.get_svr())
        else:
            pass

    def lasso(self):
        estimator = LassoCV(cv=5, max_iter=10000)
        selector = SelectFromModel(estimator)
        selector = selector.fit(self.X, self.y)
        bool_mask = selector.get_support()
        self.features = list(compress(self.X.columns, bool_mask))
        self.X = self.X.loc[:, bool_mask]
        self.x_val = self.x_val.loc[:, bool_mask]

    def feature_selection(self):
        if self.FeatureSelectionType.lower() == "lasso":
            self.lasso()
        else:
            self.features = self.X.columns

    def fit_model(self):
        print("Fitting model..")
        model_type = self.params.get_model()
        if self.gridSearch and model_type == 'svr':
            params_grid = {'C': [0.001, 0.01, 0.1, 1, 10, 100],
                           'gamma': [0.0001, 0.001, 0.01, 0.1, 1],
                           'kernel': ['linear', 'rbf'],
                           }
            reg = GridSearchCV(self.model,
                               params_grid,
                               cv=5)
            reg.fit(self.X, self.y)
            self.model = self.model.set_params(**reg.best_params_)
            self.model.fit(self.X, self.y)
        elif model_type =='lr':
            regularization = self.params.get_linear_reg()['regularization']
            if regularization.lower() == 'ridge':
                params = self.model.get_params()
                del params['copy_X']
                del params['n_jobs']
                self.model = RidgeCV(cv=5)
                self.model = self.model.set_params(**params)
                self.model.fit(self.X, self.y)
            else:
                self.model.fit(self.X, self.y)
        else:
            self.model.fit(self.X, self.y)


    def score_model(self):
        cv_fold = int(self.params.get_test_params()['testcvfold'])
        train_score_result = []
        val_score_result = []
        score_result = pd.DataFrame()
        y_pred = self.model.predict(self.x_val)
        scoring_list = ['r2', 'neg_mean_absolute_error', 'neg_mean_squared_error']
        for scoring in scoring_list:
            train_score_result.append(
                np.abs(np.mean(cross_val_score(self.model,
                                               self.X,
                                               self.y,
                                               scoring=scoring,
                                               cv=cv_fold))))
        train_score_result.append(np.sqrt(train_score_result[2]))
        val_score_result.append(r2_score(self.y_val, y_pred))
        val_score_result.append(mean_absolute_error(self.y_val, y_pred))
        val_score_result.append(mean_squared_error(self.y_val, y_pred))
        val_score_result.append(np.sqrt(val_score_result[2]))
        score_result['mean {}-fold cv'.format(self.FeatSelCvFolds)] = train_score_result
        score_result['validation score'] = val_score_result
        score_result.index = ['r2', 'MAE', 'MSE', 'RMSE']
        print("""
#########################################################
########        Model and final parameters        #######
#########################################################\n
{}
""".format(self.model)
              )
        if self.params.get_model() == 'lr':
            coef_feature = pd.DataFrame()
            coef_feature['Coefficients'] = self.model.coef_
            coef_feature['Feature'] = self.X.columns
        else:
            print("Predictors used:")
            coef_feature = self.X.columns.values
        print("{}".format(coef_feature)
              )
        print("""
#########################################################
#########                Score                   #########
#########################################################\n
{}
""".format(score_result))
Exemple #5
0
    def pre_linear_model( self, train_set, test_set , old_pred_train , old_pred_test , type = 'ridge_regression' ):
        print '          <<<<<<<<<<<<<<<<<<<<<< pre_linear_model  >>>>>>>>>>>>>>>>>>>>>>>> '


        xTrain, xTest, yTrain, yTest, yPrevTest, yPrevTrain, yPrevIndex = self.prepare_data(train_set,test_set, remove_prev_label= True)

        yTest = yTest - old_pred_test
        yTrain = yTrain - old_pred_train

        if type == 'ridge_regression':
            print '      <<< ridge-regression >>> '
            cvParams = {'ridgecv': [{'alphas': np.array([1, .1, .01, .001, .0001, 10, 100, 1000, 10000, 100000, 100000, 1000000, 10000000, 100000000, 1000000000 ])}]}
            model = RidgeCV()
            model.set_params(**dict((k, v[0] if isinstance(v, list) else v) for k,v in cvParams['ridgecv'][0].iteritems()))
        else:
            print '      <<< linear_regression >>>'
            model = linear_model.LinearRegression()


        model.fit(xTrain, yTrain)
        pred_test = model.predict(xTest)
        pred_train = model.predict(xTrain)


        yTest = yTest + old_pred_test
        pred_test = pred_test + old_pred_test

        yTrain = yTrain +old_pred_train
        pred_train = pred_train + old_pred_train



        print 'test MSE: ', mean_squared_error(yTest, pred_test)
        print 'train MSE: ', mean_squared_error(yTrain, pred_train)

        print 'test MAE: ', mean_absolute_error(yTest, pred_test)
        print 'train MAE: ', mean_absolute_error(yTrain, pred_train)





        print 'test accuracy: ' , sum(1 for x,y in zip(np.sign(pred_test - yPrevTest),np.sign(yTest - yPrevTest)) if x == y) / float(len(yTest))
        print 'train accuracy: ' , sum(1 for x,y in zip(np.sign(pred_train - yPrevTrain),np.sign(yTrain - yPrevTrain)) if x == y) / float(len(yTrain))

        coef = model.coef_
        print 'coef: '
        print coef

        if type == 'ridge_regression':
            print 'best alpha: '
            print model.alpha_


        a = []
        a.append(yPrevTest.tolist())
        a.append(yTest)
        a.append(pred_test)
        a  = np.transpose(a)
        print a[:25,:]
        print a[25:50,:]

        return pred_train, pred_test