Ejemplo n.º 1
0
def test_sm_blend():
    df = pd.read_csv(get_path('test_data.csv'), header=[0, 1])
    x = df['wvl']
    y = df[('comp', 'SiO2')]

    model1 = reg.regression(method=['PLS'],
                            params=[{
                                'n_components': 3,
                                'scale': False
                            }])
    model1.fit(x, y)
    df[('predict', 'model1')] = model1.predict(x)

    model2 = reg.regression(method=['PLS'],
                            params=[{
                                'n_components': 5,
                                'scale': False
                            }])
    model2.fit(x, y)
    df[('predict', 'model2')] = model2.predict(x)

    model3 = reg.regression(method=['PLS'],
                            params=[{
                                'n_components': 4,
                                'scale': False
                            }])
    model3.fit(x, y)
    df[('predict', 'model3')] = model3.predict(x)

    predictions = [
        df[('predict', 'model2')], df[('predict', 'model1')],
        df[('predict', 'model3')], df[('predict', 'model1')]
    ]

    blendranges = [[-9999, 30], [20, 60], [50, 9999]]
    sm_obj = sm.sm(blendranges)
    blended_predictions = sm_obj.do_blend(
        np.array(predictions))  #without optimization
    rmse = np.sqrt(np.average((blended_predictions - df[('comp', 'SiO2')])**2))
    np.testing.assert_almost_equal(rmse, 12.703434300128926, decimal=5)

    blended_predictions = sm_obj.do_blend(
        np.array(predictions),
        truevals=np.array(df[('comp', 'SiO2')]))  #with optimization
    rmse = np.sqrt(np.average((blended_predictions - df[('comp', 'SiO2')])**2))
    expected_blendranges = [
        -9999., 36.5198746, 47.98157746, 56.2537253, 118.94036468, 9999.
    ]
    np.testing.assert_almost_equal(rmse, 9.954065920454982, decimal=5)
    np.testing.assert_allclose(expected_blendranges,
                               sm_obj.blendranges,
                               rtol=1e-5)
    def setup(self):
        self.setComboBox(self.chooseDataComboBox, self.datakeys)

        method = self.chooseAlgorithmComboBox.currentText()
        xvars = [str(x.text()) for x in self.xVariableList.selectedItems()]
        yvars = [('comp', str(y.text()))
                 for y in self.yVariableList.selectedItems()]
        yrange = [
            self.yMinDoubleSpinBox.value(),
            self.yMaxDoubleSpinBox.value()
        ]
        try:
            params, modelkey = self.alg[
                self.chooseAlgorithmComboBox.currentText()].run()
            try:
                modelkey = "{} - {} - ({}, {}) {}".format(
                    method, yvars[0][-1], yrange[0], yrange[1], modelkey)
            except:
                modelkey = "Problem naming model - make sure you have selected a y variable"
                pass
            self.list_amend(self.modelkeys, self.curr_count, modelkey)
            #print(params, modelkey)
            self.models[modelkey] = regression.regression([method], [yrange],
                                                          [params])
            self.model_xvars[modelkey] = xvars
            self.model_yvars[modelkey] = yvars

            if 'Model Coefficients' not in self.datakeys:
                self.datakeys.append('Model Coefficients')

            else:
                pass

        except:
            pass
Ejemplo n.º 3
0
def test_OLS():
    regress = regression(method=['OLS'], params=[{'fit_intercept': True}])
    regress.fit(x, y)
    prediction = np.squeeze(regress.predict(x))
    rmse = np.sqrt(np.average((prediction - y)**2))
    expected = 5.604104598379565
    np.testing.assert_almost_equal(rmse, expected)
Ejemplo n.º 4
0
def test_OMP():
    regress = regression(method=['OMP'], params=[{'fit_intercept': True}])
    regress.fit(x, y)
    prediction = np.squeeze(regress.predict(x))
    rmse = np.sqrt(np.average((prediction - y)**2))
    expected = 9.835802028648189
    np.testing.assert_almost_equal(rmse, expected)
Ejemplo n.º 5
0
    def run(self):
        if 'Model Coefficients' in self.datakeys:
            pass
        else:
            Modules.data_count += 1
            self.list_amend(self.datakeys, Modules.data_count,
                            'Model Coefficients')
        Modules.model_count += 1
        self.count = Modules.model_count

        method = self.chooseAlgorithmComboBox.currentText()
        datakey = self.chooseDataComboBox.currentText()
        xvars = [str(x.text()) for x in self.xVariableList.selectedItems()]
        yvars = [('comp', str(y.text()))
                 for y in self.yVariableList.selectedItems()]
        yrange = [
            self.yMinDoubleSpinBox.value(),
            self.yMaxDoubleSpinBox.value()
        ]

        params, modelkey = self.alg[
            self.chooseAlgorithmComboBox.currentText()].run()
        modelkey = "{} - {} - ({}, {}) {}".format(method, yvars[0][-1],
                                                  yrange[0], yrange[1],
                                                  modelkey)
        self.list_amend(self.modelkeys, self.count, modelkey)
        self.models[modelkey] = regression.regression([method], [yrange],
                                                      [params])

        x = self.data[datakey].df[xvars]
        y = self.data[datakey].df[yvars]
        x = np.array(x)
        y = np.array(y)
        ymask = np.squeeze((y > yrange[0]) & (y < yrange[1]))
        y = y[ymask]
        x = x[ymask, :]
        self.models[modelkey].fit(x, y)
        self.model_xvars[modelkey] = xvars
        self.model_yvars[modelkey] = yvars
        try:
            coef = np.squeeze(self.models[modelkey].model.coef_)
            coef = pd.DataFrame(coef)
            coef.index = pd.MultiIndex.from_tuples(
                self.data[datakey].df[xvars].columns.values)
            coef = coef.T
            coef[('meta', 'Model')] = modelkey
            try:
                coef[('meta',
                      'Intercept')] = self.models[modelkey].model.intercept_
            except:
                pass
            try:
                self.data['Model Coefficients'] = spectral_data(
                    pd.concat([self.data['Model Coefficients'].df, coef]))
            except:
                self.data['Model Coefficients'] = spectral_data(coef)

        except:
            pass
Ejemplo n.º 6
0
def test_KRR():
    regress = regression(method=['KRR'], yrange=[0.0, 100.0],
                         params=[{'alpha': 0,
                                  'kernel': 'linear',
                                  'gamma': 'None',
                                  'degree': 3.0,
                                  'coef0': 1.0,
                                  'kernel_params': 'None'}])
Ejemplo n.º 7
0
def test_badfit():
    regress = regression(method=['PLS'],
                         params=[{
                             'n_components': 300,
                             'scale': False
                         }])
    regress.fit(x, y)
    assert regress.goodfit == False
Ejemplo n.º 8
0
def test_LASSO():
    regress = regression(method=['LASSO'], yrange=[0.0, 100.0],
                         params=[{'alpha': 1.0,
                                  'fit_intercept': True,
                                  'max_iter': 1000,
                                  'tol': 0.0001,
                                  'positive': False,
                                  'selection': 'random'}])
Ejemplo n.º 9
0
def test_Ridge():
    regress = regression(method=['Ridge'], yrange=[0.0, 100.0],
                         params=[{'alpha': 1.0,
                                  'copy_X': True,
                                  'fit_intercept': True,
                                  'max_iter': 'None',
                                  'normalize': False,
                                  'solver': 'auto',
                                  'tol': 0.0,
                                  'random_state': ''}])
Ejemplo n.º 10
0
def test_LARS2_CV_true():
    regress = regression(method=['LARS'], yrange=[0.0, 100.0],
                         params=[{'fit_intercept': True,
                                  'positive': False,
                                  'verbose': False,
                                  'normalize': False,
                                  'precompute': True,
                                  'copy_X': True,
                                  'eps': 2.220445,
                                  'CV': True}])
Ejemplo n.º 11
0
def test_Lasso_LARS_model_none():
    regress = regression(method=['Lasso LARS'], yrange=[0.0, 100.0],
                         params=[{'fit_intercept': True,
                                  'positive': False,
                                  'verbose': False,
                                  'normalize': True,
                                  'copy_X': True,
                                  'precompute': 'Auto',
                                  'max_iter': 500,
                                  'model': None,
                                  'eps': 2.220446}])
Ejemplo n.º 12
0
def test_LARS():
    regress = regression(method=['LARS'], yrange=[0.0, 100.0],
                         params=[{'n_nonzero_coefs': 500,
                                  'fit_intercept': True,
                                  'positive': False,
                                  'verbose': False,
                                  'normalize': False,
                                  'precompute': True,
                                  'copy_X': True,
                                  'eps': 2.220445,
                                  'fit_path': True}])
Ejemplo n.º 13
0
def test_Ridge():
    regress = regression(method=['Ridge'],
                         params=[{
                             'alpha': 1.0,
                             'fit_intercept': True
                         }])
    regress.fit(x, y)
    prediction = np.squeeze(regress.predict(x))
    rmse = np.sqrt(np.average((prediction - y)**2))
    expected = 19.29172384871638
    np.testing.assert_almost_equal(rmse, expected)
Ejemplo n.º 14
0
def test_LASSO():
    regress = regression(method=['LASSO'],
                         params=[{
                             'alpha': 1.0,
                             'fit_intercept': True,
                             'positive': False
                         }])
    regress.fit(x, y)
    prediction = np.squeeze(regress.predict(x))
    rmse = np.sqrt(np.average((prediction - y)**2))
    expected = 22.815757879917708
    np.testing.assert_almost_equal(rmse, expected)
Ejemplo n.º 15
0
def test_SVR():
    regress = regression(method=['SVR'], yrange=[0.0, 100.0], params=[{'C': 1.0,
                                                                       'epsilon': 0.1,
                                                                       'kernel': 'rbf',
                                                                       'degree': 0,
                                                                       'gamma': 'auto',
                                                                       'coef0': 0.0,
                                                                       'shrinking': False,
                                                                       'tol': 0.001,
                                                                       'cache_size': 200,
                                                                       'verbose': False,
                                                                       'max_iter': -1}])
Ejemplo n.º 16
0
def test_Elastic_Net_CV_true():
    regress = regression(method=['Elastic Net'], yrange=[0.0, 100.0],
                         params=[{'l1_ratio': 0.5,
                                  'fit_intercept': True,
                                  'normalize': False,
                                  'precompute': 'False',
                                  'max_iter': 1000,
                                  'copy_X': True,
                                  'tol': 0.0001,
                                  'positive': False,
                                  'selection': 'cyclic',
                                  'random_state': 'None',
                                  'CV': True}])
Ejemplo n.º 17
0
def test_Elastic_Net():
    regress = regression(method=['Elastic Net'],
                         params=[{
                             'alpha': 1.0,
                             'l1_ratio': 0.5,
                             'fit_intercept': True,
                             'positive': False
                         }])
    regress.fit(x, y)
    prediction = np.squeeze(regress.predict(x))
    rmse = np.sqrt(np.average((prediction - y)**2))
    expected = 22.800420060822468
    np.testing.assert_almost_equal(rmse, expected)
Ejemplo n.º 18
0
def test_Bayesian_Ridge():
    regress = regression(method=['Bayesian Ridge'], yrange=[0.0, 100.0],
                         params=[{'n_iter': 300,
                                  'tol': 0.001,
                                  'alpha_1': 0.001,
                                  'alpha_2': 1e-06,
                                  'lambda_1': 1e-06,
                                  'lambda_2': 1e-06,
                                  'compute_score': False,
                                  'fit_intercept': True,
                                  'normalize': False,
                                  'copy_X': True,
                                  'verbose': False}])
Ejemplo n.º 19
0
def test_ARD():
    regress = regression(method=['ARD'], yrange=[0.0, 100.0],
                         params=[{'n_iter': 300,
                                  'tol': 0.001,
                                  'alpha_1': 0.001,
                                  'alpha_2': 1e-06,
                                  'lambda_1': 1e-06,
                                  'lambda_2': 1e-06,
                                  'compute_score': False,
                                  'threshold_lambda': 100000,
                                  'fit_intercept': True,
                                  'normalize': False,
                                  'copy_X': True,
                                  'verbose': False}])
Ejemplo n.º 20
0
def test_KRR():
    regress = regression(method=['KRR'],
                         params=[{
                             'alpha': 0,
                             'kernel': 'linear',
                             'gamma': 'None',
                             'degree': 3.0,
                             'coef0': 1.0,
                             'kernel_params': 'None'
                         }])
    regress.fit(x, y)
    prediction = np.squeeze(regress.predict(x))
    rmse = np.sqrt(np.average((prediction - y)**2))
    expected = 5.603702809509191
    np.testing.assert_almost_equal(rmse, expected, decimal=2)
Ejemplo n.º 21
0
def test_LARS():
    regress = regression(method=['LARS'],
                         params=[{
                             'n_nonzero_coefs': 5,
                             'fit_intercept': True,
                             'normalize': False,
                             'precompute': True,
                             'copy_X': True,
                             'eps': 2.220445,
                             'fit_path': True
                         }])
    regress.fit(x, y)
    prediction = np.squeeze(regress.predict(x))
    rmse = np.sqrt(np.average((prediction - y)**2))
    expected = 21.952591101815294
    np.testing.assert_almost_equal(rmse, expected)
Ejemplo n.º 22
0
def test_Bayesian_Ridge():
    regress = regression(method=['BRR'],
                         params=[{
                             'n_iter': 300,
                             'tol': 0.001,
                             'alpha_1': 0.001,
                             'alpha_2': 1e-06,
                             'lambda_1': 1e-06,
                             'lambda_2': 1e-06,
                             'compute_score': False,
                             'fit_intercept': True,
                             'normalize': False
                         }])
    regress.fit(x, y)
    prediction = np.squeeze(regress.predict(x))
    rmse = np.sqrt(np.average((prediction - y)**2))
    expected = 6.3894201026386135
    np.testing.assert_almost_equal(rmse, expected)
Ejemplo n.º 23
0
def test_SVR():
    regress = regression(method=['SVR'],
                         params=[{
                             'C': 1.0,
                             'epsilon': 0.1,
                             'kernel': 'rbf',
                             'degree': 0,
                             'gamma': 'auto',
                             'coef0': 0.0,
                             'shrinking': False,
                             'tol': 0.001,
                             'cache_size': 200,
                             'verbose': False,
                             'max_iter': -1
                         }])
    regress.fit(x, y)
    prediction = np.squeeze(regress.predict(x))
    rmse = np.sqrt(np.average((prediction - y)**2))
    expected = 23.740048198035947
    np.testing.assert_almost_equal(rmse, expected)
Ejemplo n.º 24
0
def test_ARD():
    regress = regression(method=['ARD'],
                         params=[{
                             'n_iter': 300,
                             'tol': 0.001,
                             'alpha_1': 0.001,
                             'alpha_2': 1e-06,
                             'lambda_1': 1e-06,
                             'lambda_2': 1e-06,
                             'compute_score': False,
                             'threshold_lambda': 100000,
                             'fit_intercept': True,
                             'normalize': False,
                             'copy_X': True,
                             'verbose': False
                         }])
    regress.fit(x, y)
    prediction = np.squeeze(regress.predict(x))
    rmse = np.sqrt(np.average((prediction - y)**2))
    expected = 6.714452573751844
    np.testing.assert_almost_equal(rmse, expected)
Ejemplo n.º 25
0
def test_PLS():
    regress = regression(method=['PLS'],
                         params=[{
                             'n_components': 3,
                             'scale': False
                         }])
    regress.fit(x, y)
    prediction = np.squeeze(regress.predict(x))
    rmse = np.sqrt(np.average((prediction - y)**2))
    expected = 9.568890617713505
    np.testing.assert_almost_equal(rmse, expected)

    regress.calc_Qres_Lev(x)
    Qres_expected = [
        0.04055878, 0.04188589, 0.04159104, 0.04374264, 0.04080776, 0.04072383,
        0.04057845, 0.04053754, 0.04056575, 0.04077855
    ]
    np.testing.assert_array_almost_equal(regress.Q_res[0:10], Qres_expected)
    leverage_expected = [
        0.01225164, 0.01219529, 0.01431885, 0.03043435, 0.05013193, 0.01418457,
        0.01055998, 0.00554777, 0.00891671, 0.00912439
    ]
    np.testing.assert_array_almost_equal(regress.leverage[0:10],
                                         leverage_expected)
Ejemplo n.º 26
0
def test_OLS():
    regress = regression(method=['OLS'], yrange=[0.0, 100.0],
                         params=[{'fit_intercept': True}])
Ejemplo n.º 27
0
def test_OMP_CV_false():
    regress = regression(method=['OMP'], yrange=[0.0, 100.0], params=[{'fit_intercept': True,
                                                                       'CV': False}])
Ejemplo n.º 28
0
def test_Ridge_CV_true():
    regress = regression(method=['Ridge'], yrange=[0.0, 100.0],
                         params=[{'fit_intercept': True,
                                  'normalize': False,
                                  'CV': True}])
Ejemplo n.º 29
0
def test_OMP():
    regress = regression(method=['OMP'], yrange=[0.0, 100.0], params=[{'fit_intercept': True,
                                                                       'precompute': True}])
Ejemplo n.º 30
0
Archivo: cv.py Proyecto: jlaura/PySAT-1
    def do_cv(self, Train, xcols='wvl', ycol=('comp', 'SiO2'), method='PLS',
              yrange=[0, 100]):


        try:
            cv_iterator = LeaveOneLabelOut(
            Train[('meta', 'Folds')])  # create an iterator for cross validation based on the predefined folds
        except:
            print('***No folds found! Did you remember to define folds before running cross validation?***')

        rmsecv_folds = []
        rmsec = []
        rmsecv = []
        models = []
        modelkeys = []

        # loop through the grid of parameters, do cross validation for each permutation
        # try:
        #     self.progress.setMaximum(len(self.paramgrid))
        #     self.progress.setValue(0)
        #     self.progress.show()
        # except:
        #     pass

        for i in list(range(len(self.paramgrid))):
            print(self.paramgrid[i])
#            self.progress.setValue(i)
            model = regression([method], [yrange], [self.paramgrid[i]])
            modelkey = "{} - {} - ({}, {}) {}".format(method, ycol[0][-1], yrange[0], yrange[1], self.paramgrid[i])

            rmsecv_folds_tmp = []  # Create empty list to hold RMSECV for each fold
            for train, holdout in cv_iterator:  # Iterate through each of the folds in the training set

                cvcol = ('predict', '"'+method + '-CV-' + str(self.paramgrid[
                                                           i])+'"')  # ycol[-1]+'_cv_'+method+'_param'+str(i))  #create the name of the column in which results will be stored

                cv_train = Train.iloc[train]  # extract the data to be used to create the model
                cv_holdout = Train.iloc[holdout]  # extract the data that will be held out of the model
                model.fit(cv_train[xcols], cv_train[ycol])
                if model.goodfit:
                    y_pred_holdout = model.predict(cv_holdout[xcols])
                else:
                    y_pred_holdout = cv_holdout[ycol] * np.nan
                Train.set_value(Train.index[holdout], cvcol, y_pred_holdout)
                rmsecv_folds_tmp.append(RMSE(y_pred_holdout, cv_holdout[ycol]))

            rmsecv_folds.append(rmsecv_folds_tmp)
            rmsecv.append(RMSE(Train[ycol], Train[cvcol]))

            model.fit(Train[xcols], Train[ycol])
            if model.goodfit:
                models.append(model)
                modelkeys.append(modelkey)
                ypred_train = model.predict(Train[xcols])

            else:
                ypred_train = Train[ycol] * np.nan
            calcol = ('predict', '"'+method + '-Cal-' + str(self.paramgrid[i])+'"')
            Train[calcol] = ypred_train
            rmsec.append(RMSE(ypred_train, Train[ycol]))

        output = pd.DataFrame(self.paramgrid)
        output['RMSEC'] = rmsec
        output['RMSECV'] = rmsecv
        rmsecv_folds = np.array(rmsecv_folds)
        for i in list(range(len(rmsecv_folds[0, :]))):
            label = 'Fold' + str(i)
            output[label] = rmsecv_folds[:, i]
        cols = output.columns.values
        cols = [('cv', i) for i in cols]
        output.columns = pd.MultiIndex.from_tuples(cols)
        return Train, output, models, modelkeys
Ejemplo n.º 31
0
Archivo: cv.py Proyecto: sumesh1/PyHAT
    def do_cv(self,
              Train,
              xcols='wvl',
              ycol=('comp', 'SiO2'),
              method='PLS',
              yrange=None,
              calc_path=False,
              alphas=None):

        models = []
        modelkeys = []
        predictkeys = []

        if yrange is None:
            yrange = [np.min(Train[ycol]), np.max(Train(ycol))]

        for i in list(range(len(self.paramgrid))):
            print('Permutation ' + str(i + 1) + ' of ' +
                  str(len(self.paramgrid)))
            paramstring = ''
            for key in self.paramgrid[i].keys():
                paramstring = paramstring + key + ': ' + str(
                    self.paramgrid[i][key]) + '; '
            print(paramstring[:-2])

            try:
                # create an iterator for cross validation based on the predefined folds
                cv_iterator = LeaveOneGroupOut().split(
                    Train[xcols], Train[ycol], Train[('meta', 'Folds')])
                n_folds = LeaveOneGroupOut().get_n_splits(
                    groups=Train[('meta', 'Folds')])

            except KeyError:
                print(
                    '***No folds found! Did you remember to define folds before running cross validation?***'
                )
                return 0

            # create an empty output data frame to serve as template
            output_tmp = pd.DataFrame()
            # add columns for RMSEC, RMSECV, and RMSE for the folds
            output_tmp['RMSEC'] = 0
            output_tmp['RMSECV'] = 0

            #for f in np.array(range(n_folds)) + 1:
            for f in np.array(range(n_folds)) + 1:
                output_tmp['Fold ' + str(f)] = 0
            #fill in the output template based on the current permutation parameters
            for k in self.paramgrid[i].keys():
                output_tmp.at[0, k] = self.paramgrid[i][k]
            if alphas is not None:
                output_tmp = pd.concat([output_tmp] * len(alphas))
                output_tmp['alphas'] = alphas

            output_tmp['Method'] = method

            rmsecv_folds_tmp = np.empty(
                shape=(0))  # Create empty array to hold RMSECV for each fold
            alphas_out = np.empty(shape=(0))
            cvcols_all = np.empty(shape=(0))

            foldcount = 1

            for train, holdout in cv_iterator:  # Iterate through each of the folds in the training set

                cv_train = Train.iloc[
                    train]  # extract the data to be used to create the model
                cv_holdout = Train.iloc[
                    holdout]  # extract the data that will be held out of the model

                if calc_path:
                    # get X and y data
                    X = cv_train[xcols]
                    y = cv_train[ycol]

                    #do the path calculation
                    path_alphas,\
                    path_coefs,\
                    intercepts,\
                    path_n_iters,\
                    y_pred_holdouts,\
                    fold_rmses,\
                    cvcols = path_calc(X, y, cv_holdout[xcols], cv_holdout[ycol], alphas, self.paramgrid[i], yname = ycol[0][-1], method = method)

                    output_tmp['Fold ' + str(foldcount)] = fold_rmses
                    for n in list(range(len(path_alphas))):
                        Train.at[Train.index[holdout],
                                 cvcols[n]] = y_pred_holdouts[n]

                else:

                    if method == 'Local Regression':
                        params = self.paramgrid[i]
                        try:
                            #on the first pass, pop off the n_neigbors parameter so it can be passed correctly
                            n_neighbors = params['n_neighbors']
                            params.pop('n_neighbors')
                        except:
                            pass
                        cvcols = [
                            ('predict',
                             '"' + method + '- CV -' + str(self.paramgrid[i]) +
                             ' n_neighbors: ' + str(n_neighbors) + '"')
                        ]
                        model = local_regression.LocalRegression(
                            params, n_neighbors=n_neighbors)
                        y_pred_holdout, coeffs, intercepts = model.fit_predict(
                            cv_train[xcols], cv_train[ycol], cv_holdout[xcols])
                    else:
                        cvcols = [('predict', '"' + method + '- CV -' +
                                   str(self.paramgrid[i]) + '"')]

                        #fit the model and predict the held-out data
                        model = regression([method], [self.paramgrid[i]])
                        model.fit(cv_train[xcols], cv_train[ycol])
                        if model.goodfit:
                            y_pred_holdout = model.predict(cv_holdout[xcols])
                        else:
                            y_pred_holdout = cv_holdout[ycol] * np.nan
                    #add the predictions to the appropriate column in the training data
                    Train.at[Train.index[holdout], cvcols[0]] = y_pred_holdout
                    #append the RMSECV to the list
                    output_tmp['Fold ' + str(foldcount)] = RMSE(
                        y_pred_holdout, cv_holdout[ycol])
                    pass

                foldcount = foldcount + 1

            #now that all the folds have been held out and predicted, calculate the overall rmsecv and add it to the output
            rmsecv = []
            for col in cvcols:
                rmsecv.append(RMSE(Train[col], Train[ycol]))
                predictkeys.append(col[-1])
            output_tmp['RMSECV'] = rmsecv

            #fit the model on the full training set using the current settings
            if calc_path:
                X = Train[xcols]
                y = Train[ycol]

                path_alphas, \
                path_coefs, \
                intercepts, \
                path_n_iters, \
                ypred_train, \
                rmsec_train, \
                cols = path_calc(X, y, X, y, alphas, self.paramgrid[i], colname = 'Cal', yname = ycol[0][-1], method = method)

                for n in list(range(len(path_alphas))):
                    Train[cols[n]] = ypred_train[
                        n]  #put the training set predictions in the data frame
                    predictkeys.append(cols[n][-1])
                    #create the model and manually set its parameters based on the path results rather than training it
                    model = regression([method], [self.paramgrid[i]])
                    model.model.set_params(alpha=path_alphas[n])
                    setattr(model.model, 'intercept_', intercepts[n])
                    setattr(model.model, 'coef_', np.squeeze(path_coefs)[:, n])
                    setattr(model.model, 'n_iter_', path_n_iters[n])

                    #add the model and its name to the list
                    models.append(model)
                    modelkey = "{} - {} - ({}, {}) Alpha: {}, {}".format(
                        method, ycol[0][-1], yrange[0], yrange[1],
                        path_alphas[n], self.paramgrid[i])
                    modelkeys.append(modelkey)

                output_tmp['RMSEC'] = rmsec_train
            else:
                if method == 'Local Regression':
                    model = local_regression.LocalRegression(
                        self.paramgrid[i], n_neighbors=n_neighbors)
                    modelkey = "{} - {} - ({}, {}) {} n_neighbors: {}".format(
                        method, ycol[0][-1], yrange[0], yrange[1],
                        self.paramgrid[i], n_neighbors)
                else:
                    model = regression([method], [self.paramgrid[i]])
                    modelkey = "{} - {} - ({}, {}) {}".format(
                        method, ycol[0][-1], yrange[0], yrange[1],
                        self.paramgrid[i])
                models.append(model)
                modelkeys.append(modelkey)
                ypred_train = Train[ycol] * np.nan
                if method == 'Local Regression':
                    ypred_train, coeffs, intercepts = model.fit_predict(
                        Train[xcols], Train[ycol], Train[xcols])
                else:
                    model.fit(Train[xcols], Train[ycol])
                    #if the fit is good, then predict the training set
                    if model.goodfit:
                        ypred_train = model.predict(Train[xcols])
                    else:
                        models = models[:-1]
                        modelkeys = modelkeys[:-1]

                #add the calibration predictions to the appropriate column
                if method == 'Local Regression':
                    calcol = ('predict', '"' + method + '- Cal -' +
                              str(self.paramgrid[i]) + ' n_neighbors: ' +
                              str(n_neighbors) + '"')
                else:
                    calcol = ('predict', '"' + method + '- Cal -' +
                              str(self.paramgrid[i]) + '"')
                predictkeys.append(calcol[-1])
                Train[calcol] = ypred_train
                #append the RMSEC for the current settings to the cllection of all RMSECs
                output_tmp['RMSEC'] = RMSE(ypred_train, Train[ycol])

            try:
                output = pd.concat((output, output_tmp))
            except:
                output = output_tmp

        #make the columns of the output data drame multi-indexed
        cols = output.columns.values
        cols = [('cv', i) for i in cols]
        output.columns = pd.MultiIndex.from_tuples(cols)

        return Train, output, models, modelkeys, predictkeys
Ejemplo n.º 32
0
def test_PLS():
    regress = regression(method=['PLS'], yrange=[0.0, 100.0],
                         params=[{'n_components': 0,'scale': False}])