Example #1
0
 def predict(self, x):
     # x is a list of data frames to feed into each submodel.
     # This allows different normalizations to be used with each submodel
     predictions = []
     for i, k in enumerate(self.submodels):
         xtemp = x[i].xs('wvl', axis=1, level=0, drop_level=False)
         xtemp, mean_vect = meancenter(xtemp, 'wvl', previous_mean=self.mean_vects[i])
         predictions.append(k.predict(xtemp['wvl']))
     return predictions
Example #2
0
 def predict(self, x):
     # x is a list of data frames to feed into each submodel.
     # This allows different normalizations to be used with each submodel
     predictions = []
     for i, k in enumerate(self.submodels):
         xtemp = x[i].xs('wvl', axis=1, level=0, drop_level=False)
         xtemp, mean_vect = meancenter(xtemp, 'wvl', previous_mean=self.mean_vects[i])
         predictions.append(k.predict(xtemp['wvl']))
     return predictions
Example #3
0
    def fit(self, trainsets, ranges, ncs, ycol, figpath=None):
        self.ranges = ranges
        self.ncs = ncs
        self.ycol = ycol
        submodels = []
        mean_vects = []
        for i, rangei in enumerate(ranges):
            data_tmp = within_range.within_range(trainsets[i], rangei, ycol)
            x = data_tmp.xs('wvl', axis=1, level=0, drop_level=False)
            y = data_tmp['meta'][ycol]
            x_centered, x_mean_vect = meancenter(
                x, 'wvl')  # mean center training data
            pls = PLSRegression(n_components=ncs[i], scale=False)
            pls.fit(x, y)
            submodels.append(pls)
            mean_vects.append(x_mean_vect)
            if figpath is not None:
                # calculate spectral residuals
                E = x_centered - np.dot(pls.x_scores_,
                                        pls.x_loadings_.transpose())
                Q_res = np.dot(E, E.transpose()).diagonal()
                # calculate leverage
                T = pls.x_scores_
                leverage = np.diag(
                    T @ np.linalg.inv(T.transpose() @ T) @ T.transpose())

                plot.figure()
                plot.scatter(leverage, Q_res, color='r', edgecolor='k')
                plot.title(ycol + ' (' + str(rangei[0]) + '-' +
                           str(rangei[1]) + ')')
                plot.xlabel('Leverage')
                plot.ylabel('Q')
                plot.ylim([0, 1.1 * np.max(Q_res)])
                plot.xlim([0, 1.1 * np.max(leverage)])

                plot.savefig(figpath + '/' + ycol + '_' + str(rangei[0]) +
                             '-' + str(rangei[1]) + 'Qres_vs_Leverage.png',
                             dpi=600)
                self.leverage = leverage
                self.Q_res = Q_res
            self.submodels = submodels
            self.mean_vects = mean_vects
Example #4
0
    def fit(self, trainsets, ranges, ncs, ycol, figpath=None):
        self.ranges = ranges
        self.ncs = ncs
        self.ycol = ycol
        submodels = []
        mean_vects = []
        for i, rangei in enumerate(ranges):
            data_tmp = within_range.within_range(trainsets[i], rangei, ycol)
            x = data_tmp.xs('wvl', axis=1, level=0, drop_level=False)
            y = data_tmp['meta'][ycol]
            x_centered, x_mean_vect = meancenter(x, 'wvl')  # mean center training data
            pls = PLSRegression(n_components=ncs[i], scale=False)
            pls.fit(x, y)
            submodels.append(pls)
            mean_vects.append(x_mean_vect)
            if figpath is not None:
                # calculate spectral residuals
                E = x_centered - np.dot(pls.x_scores_, pls.x_loadings_.transpose())
                Q_res = np.dot(E, E.transpose()).diagonal()
                # calculate leverage
                T = pls.x_scores_
                leverage = np.diag(T @ np.linalg.inv(T.transpose() @ T) @ T.transpose())

                plot.figure()
                plot.scatter(leverage, Q_res, color='r', edgecolor='k')
                plot.title(ycol + ' (' + str(rangei[0]) + '-' + str(rangei[1]) + ')')
                plot.xlabel('Leverage')
                plot.ylabel('Q')
                plot.ylim([0, 1.1 * np.max(Q_res)])
                plot.xlim([0, 1.1 * np.max(leverage)])

                plot.savefig(
                    figpath + '/' + ycol + '_' + str(rangei[0]) + '-' + str(rangei[1]) + 'Qres_vs_Leverage.png',
                    dpi=600)
                self.leverage = leverage
                self.Q_res = Q_res
            self.submodels = submodels
            self.mean_vects = mean_vects
Example #5
0
def pls_cv(Train, Test=None, nc=20, nfolds=5, ycol='SiO2', doplot=True, outpath='.', plotfile='pls_cv.png'):
    # create empty arrays for the RMSE values
    pls_rmsecv = np.empty(nc)
    pls_rmsec = np.empty(nc)
    # If there is a test set provided, create the RMSEP array to hold test set errors
    if Test is not None:
        pls_rmsep = np.empty(nc)

    # loop through each number of components
    for i in range(1, nc + 1):
        print('nc=' + str(i))
        Train[('meta', ycol + '_cv_PLS_nc' + str(
            i))] = 0  # create a column to hold the PLS cross validation results for this nc
        Train[
            ('meta', ycol + '_PLS_nc' + str(i))] = 0  # create a column to hold the PLS training set results for this nc
        if Test is not None:
            Test[
                ('meta', ycol + '_PLS_nc' + str(i))] = 0  # create a column to hold the PLS test set results for this nc

        # Do the cross validation
        cv_iterator = LeaveOneLabelOut(
            Train[('meta', 'Folds')])  # create the iterator for cross validation within the training data

        for train, holdout in cv_iterator:  # Iterate through each of the folds in the training set
            cv_train = Train.iloc[train]
            cv_holdout = Train.iloc[holdout]

            # Do PLS for this number of components
            cv_train_centered, cv_train_mean_vect = meancenter(cv_train)  # mean center training data
            cv_holdout_centered, cv_holdout_mean_vect = meancenter(cv_holdout,
                                                                   previous_mean=cv_train_mean_vect)  # apply same mean centering to holdout data
            pls = PLSRegression(n_components=i, scale=False)
            pls.fit(cv_train_centered['wvl'], cv_train_centered['meta'][ycol])
            y_pred_holdout = pls.predict(cv_holdout_centered['wvl'])
            Train.set_value(Train.index[holdout], ('meta', ycol + '_cv_PLS_nc' + str(i)), y_pred_holdout)

        pls_rmsecv[i - 1] = np.sqrt(
            np.mean(np.subtract(Train[('meta', ycol)], Train[('meta', ycol + '_cv_PLS_nc' + str(i))]) ** 2, axis=0))

        # Do train and test set PLS predictions for this number of components
        Train_centered, Train_mean_vect = meancenter(Train)
        pls = PLSRegression(n_components=i, scale=False)
        pls.fit(Train_centered['wvl'], Train_centered['meta'][ycol])

        y_pred = pls.predict(Train_centered['wvl'])
        Train.set_value(Train.index, ('meta', ycol + '_PLS_nc' + str(i)), y_pred)
        pls_rmsec[i - 1] = np.sqrt(
            np.mean(np.subtract(Train[('meta', ycol)], Train[('meta', ycol + '_PLS_nc' + str(i))]) ** 2, axis=0))

        if Test is not None:
            Test_centered, Train_mean_vect = meancenter(Test, previous_mean=Train_mean_vect)
            y_pred = pls.predict(Test_centered['wvl'])
            Test.set_value(Test.index, ('meta', ycol + '_PLS_nc' + str(i)), y_pred)
            pls_rmsep[i - 1] = np.sqrt(
                np.mean(np.subtract(Test[('meta', ycol)], Test[('meta', ycol + '_PLS_nc' + str(i))]) ** 2, axis=0))

    if doplot == True:
        plot.figure()
        plot.title(ycol)
        plot.xlabel('# of components')
        plot.ylabel(ycol + ' RMSE (wt.%)')
        plot.plot(range(1, nc + 1), pls_rmsecv, label='RMSECV', color='r')
        plot.plot(range(1, nc + 1), pls_rmsec, label='RMSEC', color='b')
        if Test is not None:
            plot.plot(range(1, nc + 1), pls_rmsep, label='RMSEP', color='g')
        plot.legend(loc=0, fontsize=6)
        plot.savefig(outpath + '/' + plotfile, dpi=600)

    rmses = {'RMSEC': pls_rmsec, 'RMSECV': pls_rmsecv}
    if Test is not None:
        rmses['RMSEP'] = pls_rmsep
    return rmses