예제 #1
0
    def run(self):
        Modules.data_count += 1
        self.train_ind = Modules.data_count
        Modules.data_count += 1
        self.test_ind = Modules.data_count

        datakey = self.chooseDataToStratifyComboBox.currentText()
        nfolds = self.nFoldsSpinBox.value()
        try:
            testfold = int(self.testFoldsSpinBox.value())
        except:
            testfold = 1
        colname = ('comp', self.chooseVarComboBox.currentText())
        self.data[datakey] = spectral_data(
            stratified_folds(self.data[datakey].df,
                             nfolds=nfolds,
                             sortby=colname))
        self.data[datakey + '-Train'] = spectral_data(
            rows_match(self.data[datakey].df, ('meta', 'Folds'), [testfold],
                       invert=True))
        self.data[datakey + '-Test'] = spectral_data(
            rows_match(self.data[datakey].df, ('meta', 'Folds'), [testfold]))
        self.list_amend(self.datakeys, self.train_ind, datakey + '-Train')
        self.list_amend(self.datakeys, self.test_ind, datakey + '-Test')
        print(self.datakeys)
        print('Test set: ' +
              str(self.data[datakey + '-Test'].df.index.shape[0]))
        print('Training set: ' +
              str(self.data[datakey + '-Train'].df.index.shape[0]))
예제 #2
0
    def run(self):
        if 'Model Coefficients' in self.datakeys:
            pass
        else:
            Modules.data_count += 1
            self.list_amend(self.datakeys, Modules.data_count,
                            'Model Coefficients')
        Modules.model_count += 1
        self.count = Modules.model_count

        method = self.chooseAlgorithmComboBox.currentText()
        datakey = self.chooseDataComboBox.currentText()
        xvars = [str(x.text()) for x in self.xVariableList.selectedItems()]
        yvars = [('comp', str(y.text()))
                 for y in self.yVariableList.selectedItems()]
        yrange = [
            self.yMinDoubleSpinBox.value(),
            self.yMaxDoubleSpinBox.value()
        ]

        params, modelkey = self.alg[
            self.chooseAlgorithmComboBox.currentText()].run()
        modelkey = "{} - {} - ({}, {}) {}".format(method, yvars[0][-1],
                                                  yrange[0], yrange[1],
                                                  modelkey)
        self.list_amend(self.modelkeys, self.count, modelkey)
        self.models[modelkey] = regression.regression([method], [yrange],
                                                      [params])

        x = self.data[datakey].df[xvars]
        y = self.data[datakey].df[yvars]
        x = np.array(x)
        y = np.array(y)
        ymask = np.squeeze((y > yrange[0]) & (y < yrange[1]))
        y = y[ymask]
        x = x[ymask, :]
        self.models[modelkey].fit(x, y)
        self.model_xvars[modelkey] = xvars
        self.model_yvars[modelkey] = yvars
        try:
            coef = np.squeeze(self.models[modelkey].model.coef_)
            coef = pd.DataFrame(coef)
            coef.index = pd.MultiIndex.from_tuples(
                self.data[datakey].df[xvars].columns.values)
            coef = coef.T
            coef[('meta', 'Model')] = modelkey
            try:
                coef[('meta',
                      'Intercept')] = self.models[modelkey].model.intercept_
            except:
                pass
            try:
                self.data['Model Coefficients'] = spectral_data(
                    pd.concat([self.data['Model Coefficients'].df, coef]))
            except:
                self.data['Model Coefficients'] = spectral_data(coef)

        except:
            pass
예제 #3
0
    def run(self):
        method = self.chooseAlgorithmComboBox.currentText()
        datakey = self.chooseDataComboBox.currentText()
        # return method parameters and parameters that changed
        methodParameters, _changed = self.getMethodParams(self.chooseAlgorithmComboBox.currentIndex())

        datakey_new = datakey + '-Baseline Removed-' + method + str(_changed)
        datakey_baseline = datakey + '-Baseline-' + method + str(_changed)
        self.datakeys.append(datakey_new)
        self.datakeys.append(datakey_baseline)
        self.data[datakey_new] = self.data[datakey].df.copy(deep=True)
        df, df_baseline = remove_baseline(self.data[datakey_new],method, segment=True, params=methodParameters)
        self.data[datakey_new] = spectral_data(df)
        self.data[datakey_baseline] = spectral_data(df_baseline)
예제 #4
0
    def run(self):
        if 'Standardization Vectors' in self.datakeys:
            pass
        else:
            Modules.data_count += 1
            self.list_amend(self.datakeys, Modules.data_count,
                            'Standardization Vectors')

        datakey_to_scale = self.chooseDataComboBox.currentText()
        datakey_to_fit = self.comboBox.currentText()

        try:
            scaler = StandardScaler()
            scaler.fit(self.data[datakey_to_fit].df['wvl'])
            self.data[datakey_to_scale].df['wvl'] = scaler.transform(
                self.data[datakey_to_scale].df['wvl'])

            print(
                datakey_to_scale +
                " standardized using spectral channel mean and standard deviations from "
                + datakey_to_fit)

            try:
                scaler_out = pd.DataFrame(
                    np.vstack((scaler.var_, scaler.mean_)).T)
                scaler_out.index = [
                    ('wvl', x)
                    for x in self.data[datakey_to_fit].df['wvl'].columns.values
                ]
                scaler_out = scaler_out.T
                scaler_out[('meta', 'Dataset')] = datakey_to_fit

                try:
                    self.data['Standardization Vectors'] = spectral_data(
                        pd.concat([
                            self.data['Standardization Vectors'].df, scaler_out
                        ]))
                except:
                    self.data['Standardization Vectors'] = spectral_data(
                        scaler_out)

            except:
                pass

        except Exception as e:
            print(e)
예제 #5
0
 def update_dataname(self):
     keyname = self.dataSetNameLineEdit.text()
     filename = self.fileNameLineEdit.text()
     self.list_amend(self.datakeys, self.curr_count, keyname)
     try:
         self.data[keyname] = spectral_data(pd.read_csv(filename, header=[0, 1], verbose=False, nrows=2))
     except:
         pass
예제 #6
0
 def run(self, filename = None, keyname = None):
     if filename == None:
         filename = self.fileNameLineEdit.text()
     if keyname == None:
         keyname = self.dataSetNameLineEdit.text()
     print('Loading data file: ' + str(filename))
     self.data[keyname] = spectral_data(pd.read_csv(filename, header=[0, 1], verbose=False))
     self.list_amend(self.datakeys, self.curr_count, keyname)
 def run(self):
     self.lookupfilename = self.lookupfile.text()
     self.read_lookupdata()
     left_on = self.left_on.currentText()
     right_on = self.right_on.currentText()
     data = self.data[self.choosedata.currentText()]
     data = spectral_data(
         lookup.lookup(data.df,
                       lookupdf=self.lookupdata,
                       left_on=left_on,
                       right_on=right_on))
     self.data[self.choosedata.currentText()] = data
 def combine_data(self):
     dataSet1 = self.dataSet1ComboBox.currentText()
     dataSet2 = self.dataSet2ComboBox.currentText()
     newkey = self.outputToDataSetLineEdit.text()
     if newkey != '':
         self.datakeys.append(newkey)
         try:
             self.data[newkey] = spectral_data(
                 pd.concat([self.data[dataSet1].df, self.data[dataSet2].df],
                           ignore_index=True))
         except:
             pass
    def run(self):
        datakey = self.chooseDataToStratifyComboBox.currentText()
        nfolds = self.nFoldsSpinBox.value()
        try:
            testfold = int(self.testFoldsSpinBox.value())
        except:
            testfold = 1
        colname = ('comp', self.chooseVarComboBox.currentText())
        self.data[datakey] = spectral_data(
            stratified_folds(self.data[datakey].df,
                             nfolds=nfolds,
                             sortby=colname))

        self.data[datakey + '-Train'] = spectral_data(
            rows_match(self.data[datakey].df, ('meta', 'Folds'), [testfold],
                       invert=True))
        self.data[datakey + '-Test'] = spectral_data(
            rows_match(self.data[datakey].df, ('meta', 'Folds'), [testfold]))
        self.datakeys.append(datakey + '-Train')
        self.datakeys.append(datakey + '-Test')

        print(self.data.keys())
        print(self.data[datakey + '-Test'].df.index.shape)
        print(self.data[datakey + '-Train'].df.index.shape)

        #self.stratifiedfoldshist()
        folds = self.data[datakey].df[('meta', 'Folds')]
        folds_unique = folds.unique()[np.isfinite(folds.unique())]
        for fold in folds_unique:
            dat_col_folds = self.data[datakey].df[colname][folds == fold]
            plt.hist(dat_col_folds, bins=20)
            plt.xlabel(colname[1])
            plt.ylabel('Frequency')
            plt.title('Histogram of Fold ' + str(int(fold)))
            #plt.axis([0, 100, 0, 100])
            #plt.grid(True)
            # plt.show()
            plt.savefig(self.outpath + '//' + colname[1] + '_fold' +
                        str(int(fold)) + '_hist.png')
            plt.clf()
    def run(self):
        method = self.chooseAlgorithmComboBox.currentText()
        xvars = [str(x.text()) for x in self.xVariableList.selectedItems()]
        yvars = [('comp', str(y.text()))
                 for y in self.yVariableList.selectedItems()]
        fit_intercept = self.fit_intercept.isChecked()
        force_positive = self.forcepositive.isChecked()
        params = {
            'fit_intercept': self.fit_intercept.isChecked(),
            'max_iter': 10000,
            'positive': self.forcepositive.isChecked(),
            'selection': 'random',
            'l1_ratio': [.1, .5, .7, .9, .95, .99, 1]
        }
        localmodel = local_regression.LocalRegression(
            params, n_neighbors=self.n_neighbors_spin.value())
        traindata = self.data[self.choosedata_train.currentText()]
        predictdata = self.data[self.choosedata_predict.currentText()]
        x_train = np.array(traindata.df[xvars])
        y_train = np.array(traindata.df[yvars])
        x_predict = np.array(predictdata.df[xvars])
        predictions, coefs, intercepts = localmodel.fit_predict(
            x_train, y_train, x_predict)
        predictname = ('predict', 'Local LASSO - ' +
                       self.choosedata_predict.currentText() + ' - Predict')
        self.data[self.choosedata_predict.currentText(
        )].df[predictname] = predictions

        coefs = pd.DataFrame(coefs,
                             columns=pd.MultiIndex.from_tuples(
                                 self.data[self.choosedata_predict.currentText(
                                 )].df[xvars].columns.values))
        coefs[('meta', 'Intercept')] = intercepts
        try:
            self.data['Model Coefficients'] = spectral_data(
                pd.concat([self.data['Model Coefficients'].df, coefs]))
        except:
            self.data['Model Coefficients'] = spectral_data(coefs)
            self.datakeys.append('Model Coefficients')
예제 #11
0
 def plot_spect_update_list(self, obj):
     try:
         obj.clear()
         self.data[self.chooseDataComboBox.currentText()] = spectral_data(
             enumerate_duplicates(
                 self.data[self.chooseDataComboBox.currentText()].df,
                 self.chooseColumnComboBox.currentText()))
         rowchoices = self.data[self.chooseDataComboBox.currentText()].df[(
             'meta', self.chooseColumnComboBox.currentText())]
         for i in rowchoices:
             obj.addItem(i)
     except:
         pass
예제 #12
0
def test_combine_datasets(qtbot):
    form = QtWidgets.QWidget()
    gui = CombineDataSets()
    gui.setupUi(form)

    key1 = 'test1'
    key2 = 'test2'
    outkey = 'data'

    __location__ = os.path.realpath(
        os.path.join(os.getcwd(), os.path.dirname(__file__)))
    filename = os.path.join(__location__, 'dataset.csv')

    gui.data[key1] = spectral_data(
        pd.read_csv(filename, header=[0, 1], verbose=True))
    gui.data[key2] = spectral_data(
        pd.read_csv(filename, header=[0, 1], verbose=True))

    gui.dataSet1ComboBox.addItem(key1)
    gui.dataSet1ComboBox.setItemText(0, key1)
    gui.dataSet2ComboBox.addItem(key2)
    gui.dataSet2ComboBox.setItemText(0, key2)
    gui.outputToDataSetLineEdit.setText(outkey)

    gui.run()

    print(gui.dataSet1ComboBox.currentText(),
          gui.dataSet2ComboBox.currentText())
    print(gui.data)

    try:
        assert_frame_equal(
            gui.data['data'].df,
            spectral_data(pd.concat([gui.data[key1].df,
                                     gui.data[key2].df])).df)
        assert True
    except:
        assert False
    def setup(self):
        """
        The setup here is only doing the first 2 rows of our dataset
        This will cut down on time to load.

        :return:
        """
        try:
            filename = self.fileNameLineEdit.text()
            keyname = self.dataSetNameLineEdit.text()
            self.data[keyname] = spectral_data(
                pd.read_csv(filename, header=[0, 1], verbose=False, nrows=2))
            self.list_amend(self.datakeys, self.curr_count, keyname)
        except:
            pass
예제 #14
0
    def run(self):
        datakey = self.chooseDataToStratifyComboBox.currentText()
        nfolds = self.nFoldsSpinBox.value()
        try:
            testfold = int(self.testFoldsSpinBox.value())
        except:
            testfold = 1
        colname = ('comp', self.chooseVarComboBox.currentText())
        self.data[datakey] = spectral_data(
            stratified_folds(self.data[datakey].df,
                             nfolds=nfolds,
                             sortby=colname))

        self.data[datakey + '-Train'] = spectral_data(
            rows_match(self.data[datakey].df, ('meta', 'Folds'), [testfold],
                       invert=True))
        self.data[datakey + '-Test'] = spectral_data(
            rows_match(self.data[datakey].df, ('meta', 'Folds'), [testfold]))
        self.datakeys.append(datakey + '-Train')
        self.datakeys.append(datakey + '-Test')

        print(self.data.keys())
        print(self.data[datakey + '-Test'].df.index.shape)
        print(self.data[datakey + '-Train'].df.index.shape)
예제 #15
0
    def run(self):
        datakey = self.chooseDataComboBox.currentText()
        colname = self.splitOnUniqueValuesOfComboBox.currentText()
        vars_level0 = self.data[datakey].df.columns.get_level_values(0)
        vars_level1 = self.data[datakey].df.columns.get_level_values(1)
        vars_level1 = list(vars_level1[vars_level0 != 'wvl'])
        vars_level0 = list(vars_level0[vars_level0 != 'wvl'])
        colname = (vars_level0[vars_level1.index(colname)], colname)

        coldata = np.array([str(i) for i in self.data[datakey].df[colname]])
        unique_values = np.unique(coldata)
        for i in unique_values:
            new_datakey = datakey + ' - ' + str(i)
            self.datakeys.append(new_datakey)
            self.data[new_datakey] = spectral_data(
                self.data[datakey].df.ix[coldata == i])
 def run(self):
     Modules.data_count += 1
     self.count = Modules.data_count
     dataSet1 = self.dataSet1ComboBox.currentText()
     dataSet2 = self.dataSet2ComboBox.currentText()
     newkey = self.outputToDataSetLineEdit.text()
     if newkey != '':
         self.list_amend(self.datakeys, self.count, newkey)
         try:
             data1 = self.data[dataSet1].df
             data2 = self.data[dataSet2].df
             data1[('meta', 'Dataset')] = dataSet1
             data2[('meta', 'Dataset')] = dataSet2
             newdata = spectral_data(
                 pd.concat([data1, data2], ignore_index=True))
             self.data[newkey] = newdata
         except:
             pass
    def run(self, filename=None, keyname=None):
        Modules.data_count += 1
        self.count = Modules.data_count
        if filename == None:
            filename = self.fileNameLineEdit.text()
        if keyname == None:
            keyname = self.dataSetNameLineEdit.text()

        #if the datakey exists, add a number to it to make it unique
        number = 1
        while keyname in self.datakeys:
            number += 1
            keyname = keyname + ' - ' + str(number)

        print('Loading data file: ' + str(filename))
        self.data[keyname] = spectral_data(
            pd.read_csv(filename, header=[0, 1], verbose=False))
        self.list_amend(self.datakeys, self.count, keyname)
    def run(self):
        match_vectors = []
        logic_list = []
        datakey = self.chooseData.currentText()
        for i in self.operations:
            values_tmp = i.GetValues()
            if i.hidden == None:
                match_vectors.append(self.evaluate_operation(datakey, values_tmp))
                logic_list.append(values_tmp['logic'])
            else:
                if i.hidden.isChecked() == False:
                    match_vectors.append(self.evaluate_operation(datakey, values_tmp))
                    logic_list.append(values_tmp['logic'])

        match_combined = np.all(match_vectors, axis=0)
        print(self.data[datakey].df.shape)
        self.data[datakey] = spectral_data(self.data[datakey].df.ix[~match_combined], dim_red=self.data[datakey].dim_red)
        print(self.data[datakey].df.shape)
예제 #19
0
def jsc_batch(directory, LUT_files, searchstring='*.txt', to_csv=None):
    # Read in the lookup tables to expand filename metadata
    refdata = read_refdata(LUT_files)
    # get the list of files that match the search string in the given directory
    filelist = file_search(directory, searchstring)
    spectIDs = []  # create an empty list to hold the spectrometer IDs
    libsIDs = []
    timestamps = []
    locs = []
    for file in filelist:
        filesplit = os.path.basename(file).split('_')
        spectIDs.append(
            filesplit[6])  # get the spectrometer IDs for each file in the list
        libsIDs.append(filesplit[0])
        timestamps.append(filesplit[-1].split('.')[0])
        locs.append(filesplit[1])
    spectIDs_unique = np.unique(spectIDs)  # get the unique spectrometer IDs
    libsIDs_unique = np.unique(libsIDs)
    dfs = [
    ]  # create an empty list to hold the data frames for each spectrometer

    # loop through each LIBS ID
    alldata = []
    for ID in libsIDs_unique:
        print('Working on : ' + str(ID))
        sublist = filelist[np.in1d(libsIDs, ID)]
        locs = []
        for file in sublist:
            locs.append(os.path.basename(file).split('_')[1])
        locs_unique = np.unique(locs)
        # loop through each location for that libs ID
        for loc in locs_unique:
            print(loc)
            sub_sublist = sublist[np.in1d(
                locs, loc)]  # get the files for that LIBSID and location
            data = JSC(sub_sublist, refdata)
            alldata.append(data)
            pass

    combined = pd.concat(alldata)
    if to_csv is not None:
        print('Writing combined data to: ' + to_csv)
        combined.to_csv(to_csv)
    return spectral_data(combined)
    def update_datakeys(self, setup=False):
        datakey = self.chooseDataComboBox.currentText()
        colname = self.splitOnUniqueValuesOfComboBox.currentText()
        vars_level0 = self.data[datakey].df.columns.get_level_values(0)
        vars_level1 = self.data[datakey].df.columns.get_level_values(1)
        vars_level1 = list(vars_level1[vars_level0 != 'wvl'])
        vars_level0 = list(vars_level0[vars_level0 != 'wvl'])
        colname = (vars_level0[vars_level1.index(colname)], colname)

        coldata = np.array([str(i) for i in self.data[datakey].df[colname]])
        unique_values = np.unique(coldata)
        for i in unique_values:
            new_datakey = datakey + ' - ' + str(i)
            if not new_datakey in self.datakeys:
                Modules.data_count += 1
                self.list_amend(self.datakeys, Modules.data_count, new_datakey)
                if setup == False:
                    self.data[new_datakey] = spectral_data(
                        self.data[datakey].df.iloc[coldata == i])
예제 #21
0
    def run(self, filename=None, keyname=None):
        Modules.data_count += 1
        self.count = Modules.data_count
        if filename == None:
            filename = self.fileNameLineEdit.text()
        if keyname == None:
            keyname = self.dataSetNameLineEdit.text()

        #if the datakey exists, add a number to it to make it unique
        number = 1
        while keyname in self.datakeys:
            number += 1
            keyname = keyname + ' - ' + str(number)

        print('Loading data file: ' + str(filename))
        data = pd.read_csv(filename, header=[0, 1], verbose=False)
        try:
            #remove duplicate wvl values
            data_wvl = data['wvl']
            data_no_wvl = data.drop(columns='wvl')

            good_wvls = []
            for i in data_wvl.columns:
                try:
                    i = float(i)
                    good_wvls.append(True)
                except:
                    print("Removing column " + str(i))
                    good_wvls.append(False)

            data_wvl = data_wvl.iloc[:, good_wvls]
            data_wvl.columns = pd.MultiIndex.from_tuples([
                ('wvl', float(i)) for i in data_wvl.columns
            ])
            data = pd.merge(data_no_wvl,
                            data_wvl,
                            left_index=True,
                            right_index=True)
        except:
            pass
        self.data[keyname] = spectral_data(data)
        self.list_amend(self.datakeys, self.count, keyname)
        self.datafiles[keyname] = os.path.basename(filename)
    def run(self):
        method = self.chooseAlgorithmComboBox.currentText()
        datakey = self.chooseDataComboBox.currentText()
        xvars = [str(x.text()) for x in self.xVariableList.selectedItems()]
        yvars = [('comp', str(y.text()))
                 for y in self.yVariableList.selectedItems()]
        yrange = [
            self.yMinDoubleSpinBox.value(),
            self.yMaxDoubleSpinBox.value()
        ]
        # Warning: Params passing through cv.cv(params) needs to be in lists
        # Example: {'n_components': [4], 'scale': [False]}
        params, modelkey = self.alg[
            self.chooseAlgorithmComboBox.currentText()].run()

        #if the method supports it, separate out alpha from the other parameters and prepare for calculating path
        path_methods = ['Elastic Net', 'LASSO']  #, 'Ridge']
        if method in path_methods:
            calc_path = True
            alphas = params.pop('alpha')
        else:
            alphas = None
            calc_path = False
        y = np.array(self.data[datakey].df[yvars])
        match = np.squeeze((y > yrange[0]) & (y < yrange[1]))
        data_for_cv = spectral_data(self.data[datakey].df.ix[match])
        paramgrid = list(
            ParameterGrid(params))  # create a grid of parameter permutations
        cv_obj = cv.cv(paramgrid)
        try:
            cv_iterator = LeaveOneGroupOut().split(
                data_for_cv.df[xvars], data_for_cv.df[yvars],
                data_for_cv.df[('meta', 'Folds')]
            )  # create an iterator for cross validation based on the predefined folds
            n_folds = LeaveOneGroupOut().get_n_splits(
                groups=data_for_cv.df[('meta', 'Folds')])

        except:
            print(
                '***No folds found! Did you remember to define folds before running cross validation?***'
            )

        self.data[
            datakey].df, self.cv_results, cvmodels, cvmodelkeys, cvpredictkeys = cv_obj.do_cv(
                data_for_cv.df,
                cv_iterator,
                xcols=xvars,
                ycol=yvars,
                yrange=yrange,
                method=method,
                alphas=alphas,
                calc_path=calc_path,
                n_folds=n_folds)
        for key in cvpredictkeys:
            self.list_amend(self.predictkeys, len(self.predictkeys), key)

        for n, key in enumerate(cvmodelkeys):
            self.list_amend(self.modelkeys, len(self.modelkeys), key)
            self.modelkeys.append(key)
            self.models[key] = cvmodels[n]
            self.model_xvars[key] = xvars
            self.model_yvars[key] = yvars
            if method != 'GP':
                coef = np.squeeze(cvmodels[n].model.coef_)
                coef = pd.DataFrame(coef)
                coef.index = pd.MultiIndex.from_tuples(
                    self.data[datakey].df[xvars].columns.values)
                coef = coef.T
                coef[('meta', 'Model')] = key
                try:
                    coef[('meta', 'Intercept')] = cvmodels[n].model.intercept_
                except:
                    pass
                try:
                    self.data['Model Coefficients'] = spectral_data(
                        pd.concat([self.data['Model Coefficients'].df, coef]))
                except:
                    self.data['Model Coefficients'] = spectral_data(coef)
                    self.datakeys.append('Model Coefficients')

        number = 1
        cvid = str('CV Results ' + modelkey + ' - ' + yvars[0][1])
        while cvid in self.datakeys:
            number += 1
            cvid = str('CV Results ' + modelkey + ' - ' +
                       yvars[0][1]) + ' - ' + str(number)

        self.datakeys.append(cvid)
        self.data[cvid] = self.cv_results
    def setup(self):
        try:
            method = self.chooseAlgorithmComboBox.currentText()
            datakey = self.chooseDataComboBox.currentText()
            xvars = [str(x.text()) for x in self.xVariableList.selectedItems()]
            yvars = [('comp', str(y.text()))
                     for y in self.yVariableList.selectedItems()]
            yrange = [
                self.yMinDoubleSpinBox.value(),
                self.yMaxDoubleSpinBox.value()
            ]
            # Warning: Params passing through cv.cv(params) needs to be in lists
            # Example: {'n_components': [4], 'scale': [False]}
            params, modelkey = self.alg[
                self.chooseAlgorithmComboBox.currentText()].run()

            #if the method supports it, separate out alpha from the other parameters and prepare for calculating path
            path_methods = ['Elastic Net', 'LASSO']  #, 'Ridge']
            if method in path_methods:
                alphas = params.pop('alpha')
            else:
                alphas = None

            paramgrid = list(ParameterGrid(
                params))  # create a grid of parameter permutations
            cv_obj = cv.cv(paramgrid)
            cvpredictkeys = []
            cvmodelkeys = []
            for i in range(len(paramgrid)):
                if alphas is not None:
                    for j in range(len(alphas)):
                        keytemp = '"' + method + ' - ' + yvars[0][
                            -1] + ' - CV - Alpha:' + str(
                                alphas[j]) + ' - ' + str(paramgrid[i]) + '"'
                        cvpredictkeys.append(keytemp)
                        keytemp = '"' + method + ' - ' + yvars[0][
                            -1] + ' - Cal - Alpha:' + str(
                                alphas[j]) + ' - ' + str(paramgrid[i]) + '"'
                        cvpredictkeys.append(keytemp)

                        modelkeytemp = "{} - {} - ({}, {}) Alpha: {}, {}".format(
                            method, yvars[0][-1], yrange[0], yrange[1],
                            alphas[j], paramgrid[i])
                        cvmodelkeys.append(modelkeytemp)

                else:
                    keytemp = '"' + method + '- Cal -' + str(
                        paramgrid[i]) + '"'
                    cvpredictkeys.append(keytemp)
                    keytemp = '"' + method + '- Cal -' + str(
                        paramgrid[i]) + '"'
                    cvpredictkeys.append(keytemp)

                    modelkeytemp = "{} - {} - ({}, {}) {}".format(
                        method, yvars[0][-1], yrange[0], yrange[1],
                        paramgrid[i])
                    cvmodelkeys.append(modelkeytemp)

            for key in cvpredictkeys:
                self.list_amend(self.predictkeys, len(self.predictkeys), key)
                self.data[datakey].df[(
                    'predict', key
                )] = 9999  #Need to fill the data frame with dummy values until CV is actually run

            for n, key in enumerate(cvmodelkeys):
                self.list_amend(self.modelkeys, len(self.modelkeys), key)
                self.modelkeys.append(key)
                self.model_xvars[key] = xvars
                self.model_yvars[key] = yvars
                if method != 'GP':
                    coef = self.data[datakey].df[xvars[
                        0]].columns.values * 0.0 + 9999  #Fill with dummy coeffs before model is run
                    coef = pd.DataFrame(coef)
                    coef.index = pd.MultiIndex.from_tuples(
                        self.data[datakey].df[xvars].columns.values)
                    coef = coef.T
                    coef[('meta', 'Model')] = key
                    try:
                        coef[(
                            'meta', 'Intercept'
                        )] = 0  #Fill intercept with zeros prior to model run
                    except:
                        pass
                    try:
                        self.data['Model Coefficients'] = spectral_data(
                            pd.concat(
                                [self.data['Model Coefficients'].df, coef]))
                    except:
                        self.data['Model Coefficients'] = spectral_data(coef)
                        self.datakeys.append('Model Coefficients')

            self.list_amend(self.datakeys, len(self.datakeys),
                            'CV Results ' + modelkey)
        except:
            pass
예제 #24
0
    def run(self):
        datakeyA = self.chooseDataA.currentText()
        datakeyB = self.chooseDataB.currentText()
        dataAmatchcol = self.chooseDataAMatch.currentText()
        dataBmatchcol = self.chooseDataBMatch.currentText()

        paramgrid = [{'method': 'None'}]
        if self.PDScheckbox.isChecked():
            paramgrid.extend(
                list(ParameterGrid(self.alg['PDS - Piecewise DS'][0].run())))
        if self.PDSPLScheckBox.isChecked():
            paramgrid.extend(
                list(
                    ParameterGrid(
                        self.alg['PDS-PLS - PDS using Partial Least Squares']
                        [0].run())))
        if self.DScheckbox.isChecked():
            paramgrid.extend(
                list(
                    ParameterGrid(
                        self.alg['DS - Direct Standardization'][0].run())))
        if self.LASSODScheckbox.isChecked():
            paramgrid.extend(list(ParameterGrid(
                self.alg['LASSO DS'][0].run())))
        if self.Ratiocheckbox.isChecked():
            paramgrid.extend([{'method': 'Ratio'}])
        if self.SparseDScheckBox.isChecked():
            paramgrid.extend(
                list(ParameterGrid(self.alg['Sparse Low Rank DS'][0].run())))
        if self.RidgeDScheckBox.isChecked():
            paramgrid.extend(list(ParameterGrid(
                self.alg['Ridge DS'][0].run())))
        if self.CCAcheckBox.isChecked():
            paramgrid.extend(
                list(
                    ParameterGrid(
                        self.alg['CCA - Canonical Correlation Analysis']
                        [0].run())))
        if self.NewCCAcheckBox.isChecked():
            paramgrid.extend(list(ParameterGrid(self.alg['New CCA'][0].run())))
        if self.ForwardBackwardcheckBox.isChecked():
            paramgrid.extend(
                list(ParameterGrid(self.alg['Forward Backward DS'][0].run())))
        if self.IPDDScheckBox.isChecked():
            paramgrid.extend(
                list(
                    ParameterGrid(
                        self.alg['Incremental Proximal Descent DS'][0].run())))

        #get the data sets
        A = self.data[datakeyA].df
        B = self.data[datakeyB].df
        A_mean, B_mean = caltran_utils.prepare_data(A, B, dataAmatchcol,
                                                    dataBmatchcol)

        #prepare for cross validation
        uniquevals = np.unique(A_mean[('meta', dataAmatchcol)])
        cv_results = pd.DataFrame()
        ind = 0

        for params in paramgrid:  #step through all the different permutations
            print(params)
            transformed_datakey = datakeyA + '-' + str(params)
            for key in params.keys():  # store parameters in the results file
                cv_results.loc[ind, key] = params[key]
            ct_obj = cal_tran.cal_tran(
                params)  #create a caltran object using the current parameters
            A_mean_transformed = copy.deepcopy(A_mean)
            A_mean_transformed['wvl'] = A_mean_transformed['wvl'] * 0
            rmses = []
            for val in uniquevals:  #hold out each unique spectrum in turn
                print(val)
                # define the validation data (the held out spectrum)
                # and the training data (the spectra that are not held out)
                # for both data sets
                val_data_A = np.squeeze(
                    np.array(A_mean[A_mean[('meta',
                                            dataAmatchcol)] == val]['wvl'],
                             dtype='float'))
                train_data_A = np.squeeze(
                    np.array(
                        A_mean[A_mean[('meta', dataAmatchcol)] != val]['wvl'],
                        dtype='float'))
                val_data_B = np.squeeze(
                    np.array(B_mean[B_mean[('meta',
                                            dataBmatchcol)] == val]['wvl'],
                             dtype='float'))
                train_data_B = np.squeeze(
                    np.array(
                        B_mean[B_mean[('meta', dataBmatchcol)] != val]['wvl'],
                        dtype='float'))

                ct_obj.derive_transform(
                    train_data_A, train_data_B
                )  #derive the transform based on the training data
                val_data_A_transformed = ct_obj.apply_transform(
                    val_data_A
                )  #apply the transform to the held out spectrum from A

                if self.keep_spectra_checkBox.isChecked():
                    A_mean_transformed.loc[
                        A_mean_transformed[('meta', dataAmatchcol)] == val,
                        'wvl'] = val_data_A_transformed  #this step is very slow, can we speed it up?
                rmses.append(mismatch_rmse(val_data_A_transformed, val_data_B))
                cv_results.loc[ind, val + '_RMSE'] = rmses[
                    -1]  #record the RMSE for the held out spectrum
            cv_results.loc[ind, 'average_RMSE'] = np.mean(rmses)
            if self.keep_spectra_checkBox.isChecked():
                Modules.data_count += 1
                self.index = Modules.data_count
                self.list_amend(self.datakeys, self.index, transformed_datakey)
                self.data[transformed_datakey] = spectral_data.spectral_data(
                    A_mean_transformed)
            ind = ind + 1
        cv_results.columns = pd.MultiIndex.from_tuples([
            ('cv', col) for col in cv_results.columns
        ])

        cvid = 'Caltran CV Results'
        number = 1
        while cvid in self.datakeys:
            number += 1
            cvid = cvid + ' - ' + str(number)

        Modules.data_count += 1
        self.index = Modules.data_count
        self.list_amend(self.datakeys, self.index, cvid)
        self.data[cvid] = cv_results
예제 #25
0
def ccam_batch(directory, searchstring='*.csv', to_csv=None, lookupfile=None, ave=True, progressbar=None):
    # Determine if the file is a .csv or .SAV
    if '.sav' in searchstring.lower():
        is_sav = True
    else:
        is_sav = False
    filelist = file_search(directory, searchstring)
    basenames = np.zeros_like(filelist)
    sclocks = np.zeros_like(filelist)
    P_version = np.zeros_like(filelist, dtype='int')

    # Extract the sclock and version for each file and ensure that only one
    # file per sclock is being read, and that it is the one with the highest version number
    for i, name in enumerate(filelist):
        basenames[i] = os.path.basename(name)
        sclocks[i] = basenames[i][4:13]  # extract the sclock
        P_version[i] = basenames[i][-5:-4]  # extract the version

    sclocks_unique = np.unique(sclocks)  # find unique sclocks
    filelist_new = np.array([], dtype='str')
    for i in sclocks_unique:
        match = (sclocks == i)  # find all instances with matching sclocks
        maxP = P_version[match] == max(P_version[match])  # find the highest version among these files
        filelist_new = np.append(filelist_new, filelist[match][maxP])  # keep only the file with thei highest version

    filelist = filelist_new
    # Should add a progress bar for importing large numbers of files
    dt = []
    if progressbar:
        progressbar.setWindowTitle('ChemCam data progress')
        progressbar.setRange(0, filelist.size)
        progressbar.show()
    filecount = 0
    for i in filelist:
        filecount = filecount + 1
        print(i)
        try:
            if is_sav:
                t = time.time()
                tmp = CCAM_SAV(i, ave=ave)
                dt.append(time.time() - t)
            else:
                t = time.time()
                tmp = CCAM_CSV(i)

                dt.append(time.time() - t)
            if i == filelist[0]:
                combined = tmp

            else:
                # This ensures that rounding errors are not causing mismatches in columns
                cols1 = list(combined['wvl'].columns)
                cols2 = list(tmp['wvl'].columns)
                if set(cols1) == set(cols2):
                    combined = pd.concat([combined, tmp])
                else:
                    print("Wavelengths don't match!")
        except:
            pass
        if progressbar:
            progressbar.setValue(filecount)
            QtCore.QCoreApplication.processEvents()
        pass

    combined.loc[:, ('meta', 'sclock')] = pd.to_numeric(combined.loc[:, ('meta', 'sclock')])

    if lookupfile is not None:
        combined = lookup(combined, lookupfile=lookupfile)
    if to_csv is not None:
        combined.to_csv(to_csv)
    return spectral_data(combined)
 def run(self):
     dataSet1 = self.dataSet1ComboBox.currentText()
     dataSet2 = self.dataSet2ComboBox.currentText()
     newkey = self.outputToDataSetLineEdit.text()
     self.datakeys.append(newkey)
     self.data[newkey] = spectral_data(pd.concat([self.data[dataSet1].df, self.data[dataSet2].df], ignore_index=True))
예제 #27
0
def ccam_batch(directory,
               searchstring='*.csv',
               to_csv=None,
               lookupfile=None,
               ave=True,
               progressbar=None,
               left_on='sclock',
               right_on='Spacecraft Clock',
               versioncheck=True):
    # Determine if the file is a .csv or .SAV
    if 'sav' in searchstring.lower():
        is_sav = True
    else:
        is_sav = False
    filelist = file_search(directory, searchstring)
    if len(filelist) == 0:
        print('No files found in ' + directory + ' using search string ' +
              searchstring)
        return
    basenames = np.zeros_like(filelist)
    sclocks = np.zeros_like(filelist)
    P_version = np.zeros_like(filelist, dtype='int')

    if versioncheck == True:
        # Extract the sclock and version for each file and ensure that only one
        # file per sclock is being read, and that it is the one with the highest version number
        for i, name in enumerate(filelist):
            basenames[i] = os.path.basename(name)
            sclocks[i] = basenames[i][4:13]  # extract the sclock
            P_version[i] = basenames[i][-5:-4]  # extract the version

        sclocks_unique = np.unique(sclocks)  # find unique sclocks
        filelist_new = np.array([], dtype='str')
        for i in sclocks_unique:
            match = (sclocks == i)  # find all instances with matching sclocks
            maxP = P_version[match] == max(
                P_version[match])  # find the highest version among these files
            filelist_new = np.append(
                filelist_new, filelist[match]
                [maxP])  # keep only the file with thei highest version

        filelist = filelist_new
    # Should add a progress bar for importing large numbers of files
    dt = []
    if progressbar:
        from PyQt5 import QtCore  # only rely on PyQt5 if a progressbar object has been passed
        progressbar.setWindowTitle('ChemCam data progress')
        progressbar.setRange(0, filelist.size)
        progressbar.show()
    filecount = 0
    workinglist = []
    subcount = 0

    for i, file in enumerate(filelist):
        filecount = filecount + 1
        print('File #' + str(filecount))
        print(file)
        if is_sav:
            tmp = CCAM_SAV(file, ave=ave)
        else:
            tmp = CCAM_CSV(file, ave=ave)
        try:
            # This ensures that rounding errors are not causing mismatches in columns
            cols1 = list(combined['wvl'].columns)
            cols2 = list(tmp['wvl'].columns)
            if set(cols1) == set(cols2):
                combined = pd.concat([combined, tmp])
            else:
                print("Wavelengths don't match!")
        except:
            combined = tmp
        # if doing single shots, save out the data every 50 files so that the program doesn't run out of memory
        if filecount % 50 == 0 and ave == False:
            workingfilename = 'temporary_data_files_' + str(
                subcount) + '-' + str(filecount) + '.csv'
            workinglist.append(workingfilename)
            combined.to_csv(workingfilename)
            subcount = filecount
            del combined
            gc.collect()
        if progressbar:
            progressbar.setValue(filecount)
            QtCore.QCoreApplication.processEvents()
        pass
    if ave == False:
        for f in workinglist:
            pass

    try:
        combined.loc[:, ('meta',
                         'sclock')] = pd.to_numeric(combined.loc[:,
                                                                 ('meta',
                                                                  'sclock')])
    except:
        pass

    if lookupfile is not None:
        try:
            combined = lookup(combined,
                              lookupfile=lookupfile,
                              left_on=left_on,
                              right_on=right_on,
                              skiprows=1)
        except:
            combined = lookup(combined,
                              lookupfile=lookupfile,
                              left_on=left_on,
                              right_on=right_on,
                              skiprows=0)
    if to_csv is not None:
        combined.to_csv(to_csv)
    return spectral_data(combined)
예제 #28
0
    def run(self):
        self.cv_results_combined = None #clear previous results in case of re-run

        if 'Model Coefficients' in self.datakeys:
            pass
        else:
            Modules.data_count += 1
            self.coef_index = Modules.data_count
            self.list_amend(self.datakeys, self.coef_index, 'Model Coefficients')

        Modules.data_count += 1
        self.results_index = Modules.data_count

        paramgrids = {}
        if self.ARDcheckbox.isChecked():
            paramgrids['ARD']=list(ParameterGrid(self.alg['ARD'][0].run()))
        if self.BRRcheckbox.isChecked():
            paramgrids['BRR']=list(ParameterGrid(self.alg['BRR'][0].run()))
        if self.ENetcheckbox.isChecked():
            enet_params=self.alg['Elastic Net'][0].run()
            params = enet_params[0]
            params['alpha'] = enet_params[1]
            paramgrids['Elastic Net']=list(ParameterGrid(params))
        # if self.GPcheckBox.isChecked():
        #     paramgrids.append(list(ParameterGrid(self.alg['GP - Gaussian Processes'][0].run())))
        if self.LARScheckbox.isChecked():
            paramgrids['LARS']=list(ParameterGrid(self.alg['LARS'][0].run()))
        if self.LASSOcheckBox.isChecked():
            lasso_params=self.alg['LASSO'][0].run()
            params = lasso_params[0]
            params['alpha'] = lasso_params[1]
            paramgrids['LASSO'] = list(ParameterGrid(params))
            #paramgrids['LASSO']={'alphas':lasso_params[1],'params':list(ParameterGrid(lasso_params[0]))}

        if self.OLScheckBox.isChecked():
            paramgrids['OLS']=list(ParameterGrid(self.alg['OLS'][0].run()))
        if self.OMPcheckBox.isChecked():
            paramgrids['OMP']=list(ParameterGrid(self.alg['OMP'][0].run()))
        if self.PLScheckBox.isChecked():
            paramgrids['PLS']=list(ParameterGrid(self.alg['PLS'][0].run()))
        if self.RidgecheckBox.isChecked():
            paramgrids['Ridge']=list(ParameterGrid(self.alg['Ridge'][0].run()))
        if self.SVRcheckBox.isChecked():
            paramgrids['SVR']=list(ParameterGrid(self.alg['SVR'][0].run()))
        if self.LocalcheckBox.isChecked():
            paramgrids['Local Regression']=list(ParameterGrid(self.alg['Local Regression'][0].run()))
        if self.GBRcheckBox.isChecked():
            paramgrids['GBR'] = list(ParameterGrid(self.alg['GBR'][0].run()))
        if self.RFcheckBox.isChecked():
            paramgrids['RF'] = list(ParameterGrid(self.alg['RF'][0].run()))
        datakey = self.chooseDataComboBox.currentText()
        xvars = [str(x.text()) for x in self.xVariableList.selectedItems()]
        yvars = [('comp', str(y.text())) for y in self.yVariableList.selectedItems()]
        yrange = [self.yMinDoubleSpinBox.value(), self.yMaxDoubleSpinBox.value()]
        y = np.array(self.data[datakey].df[yvars])
        match = np.squeeze((y > yrange[0]) & (y < yrange[1]))
        data_for_cv = spectral_data(self.data[datakey].df.loc[match])


        for key in paramgrids.keys():
            print('===== Cross validating '+key+' =====')
            method=key
            paramgrid = paramgrids[key]

            cv_obj = cv.cv(paramgrid)

            data_for_cv_out, cv_results, cvmodels, cvmodelkeys, cvpredictkeys = cv_obj.do_cv(data_for_cv.df, xcols=xvars,
                                                                                         ycol=yvars, yrange=yrange, method=method)

            try:
                cv_results[('cv','Data_file')] = self.datafiles[datakey]
            except:
                pass
            cv_results[('cv','ymin')] = yrange[0]
            cv_results[('cv','ymax')] = yrange[1]
            cv_results[('cv','ycol')] = yvars[0][1]

            data_for_cv = spectral_data(data_for_cv_out)

            self.cv_results_combined = pd.concat((self.cv_results_combined,cv_results))

            for key in cvpredictkeys:
                self.list_amend(self.predictkeys, len(self.predictkeys), key)

            for n, key in enumerate(cvmodelkeys):
                Modules.model_count += 1
                self.list_amend(self.modelkeys, Modules.model_count, key)
                self.models[key] = cvmodels[n]
                self.model_xvars[key] = xvars
                self.model_yvars[key] = yvars
                if method != 'GP':
                    try:
                        coef = np.squeeze(cvmodels[n].model.coef_)
                        coef = pd.DataFrame(coef)
                        coef.index = pd.MultiIndex.from_tuples(self.data[datakey].df[xvars].columns.values)
                        coef = coef.T
                        coef[('meta', 'Model')] = key
                        try:
                            coef[('meta', 'Intercept')] = cvmodels[n].model.intercept_
                        except:
                            pass
                        try:
                            self.data['Model Coefficients'] = spectral_data(
                                pd.concat([self.data['Model Coefficients'].df, coef]))
                        except:
                            self.data['Model Coefficients'] = spectral_data(coef)
                    except:
                        pass

        number = 1
        cvid = str('CV Results - ' + yvars[0][1])
        while cvid in self.datakeys:
            number += 1
            cvid = str('CV Results - ' + yvars[0][1]) + ' - ' + str(number)

        self.list_amend(self.datakeys,self.results_index,cvid)
        self.data[cvid] = spectral_data(self.cv_results_combined)

        Modules.data_count += 1
        new_datakey = datakey + '-' +str(yvars)+' '+ str(yrange)+'-CV Predictions'
        self.list_amend(self.datakeys, Modules.data_count, new_datakey)
        self.data[new_datakey] = spectral_data(data_for_cv_out)
예제 #29
0
    def run(self):
        paramgrids = {}
        if self.ARDcheckbox.isChecked():
            paramgrids['ARD']=list(ParameterGrid(self.alg['ARD'][0].run()))
        if self.BRRcheckbox.isChecked():
            paramgrids['BRR']=list(ParameterGrid(self.alg['BRR'][0].run()))
        if self.ENetcheckbox.isChecked():
            enet_params=self.alg['Elastic Net'][0].run()
            paramgrids['Elastic Net']={'alphas':enet_params[1],'params':list(ParameterGrid(enet_params[0]))}
        # if self.GPcheckBox.isChecked():
        #     paramgrids.append(list(ParameterGrid(self.alg['GP - Gaussian Processes'][0].run())))
        if self.LARScheckbox.isChecked():
            paramgrids['LARS']=list(ParameterGrid(self.alg['LARS'][0].run()))
        if self.LASSOcheckBox.isChecked():
            lasso_params=self.alg['LASSO'][0].run()
            paramgrids['LASSO']={'alphas':lasso_params[1],'params':list(ParameterGrid(lasso_params[0]))}

        if self.OLScheckBox.isChecked():
            paramgrids['OLS']=list(ParameterGrid(self.alg['OLS'][0].run()))
        if self.OMPcheckBox.isChecked():
            paramgrids['OMP']=list(ParameterGrid(self.alg['OMP'][0].run()))
        if self.PLScheckBox.isChecked():
            paramgrids['PLS']=list(ParameterGrid(self.alg['PLS'][0].run()))
        if self.RidgecheckBox.isChecked():
            paramgrids['Ridge']=list(ParameterGrid(self.alg['Ridge'][0].run()))
        if self.SVRcheckBox.isChecked():
            paramgrids['SVR']=list(ParameterGrid(self.alg['SVR'][0].run()))
        if self.LocalcheckBox.isChecked():
            paramgrids['Local Regression']=list(ParameterGrid(self.alg['Local Regression'][0].run()))

        datakey = self.chooseDataComboBox.currentText()
        xvars = [str(x.text()) for x in self.xVariableList.selectedItems()]
        yvars = [('comp', str(y.text())) for y in self.yVariableList.selectedItems()]
        yrange = [self.yMinDoubleSpinBox.value(), self.yMaxDoubleSpinBox.value()]
        y = np.array(self.data[datakey].df[yvars])
        match = np.squeeze((y > yrange[0]) & (y < yrange[1]))
        data_for_cv = spectral_data(self.data[datakey].df.ix[match])


        for key in paramgrids.keys():
            print('===== Cross validating '+key+' =====')
            method=key
            #if the method supports it, separate out alpha from the other parameters and prepare for calculating path
            path_methods =  ['Elastic Net', 'LASSO']#, 'Ridge']
            if method in path_methods:
                calc_path = True
                alphas = paramgrids[key]['alphas']
                paramgrid = paramgrids[key]['params']
            else:
                alphas = None
                calc_path = False
                paramgrid = paramgrids[key]
            progbar = QtWidgets.QProgressBar()
            cv_obj = cv.cv(paramgrid, progressbar=progbar)

            self.data[datakey].df, cv_results, cvmodels, cvmodelkeys, cvpredictkeys = cv_obj.do_cv(data_for_cv.df, xcols=xvars,
                                                                                         ycol=yvars, yrange=yrange, method=method,
                                                                                         alphas = alphas, calc_path = calc_path)
            try:
                self.cv_results_combined = pd.concat((self.cv_results_combined,cv_results))
            except:
                self.cv_results_combined = cv_results

            for key in cvpredictkeys:
                self.list_amend(self.predictkeys, len(self.predictkeys), key)

            for n, key in enumerate(cvmodelkeys):
                self.list_amend(self.modelkeys, len(self.modelkeys), key)
                self.models[key] = cvmodels[n]
                self.model_xvars[key] = xvars
                self.model_yvars[key] = yvars
                if method != 'GP':
                    coef = np.squeeze(cvmodels[n].model.coef_)
                    coef = pd.DataFrame(coef)
                    coef.index = pd.MultiIndex.from_tuples(self.data[datakey].df[xvars].columns.values)
                    coef = coef.T
                    coef[('meta', 'Model')] = key
                    try:
                        coef[('meta', 'Intercept')] = cvmodels[n].model.intercept_
                    except:
                        pass
                    try:
                        self.data['Model Coefficients'] = spectral_data(
                            pd.concat([self.data['Model Coefficients'].df, coef]))
                    except:
                        self.data['Model Coefficients'] = spectral_data(coef)
                        self.datakeys.append('Model Coefficients')

        number = 1
        cvid = str('CV Results - ' + yvars[0][1])
        while cvid in self.datakeys:
            number += 1
            cvid = str('CV Results - ' + yvars[0][1]) + ' - ' + str(number)

        self.datakeys.append(cvid)
        self.data[cvid] = spectral_data(self.cv_results_combined)