def function(self): method = self.chooseAlgorithmComboBox.currentText() datakey = self.chooseDataComboBox.currentText() # return method parameters and parameters that changed methodParameters, _changed = self.getMethodParams(self.chooseAlgorithmComboBox.currentIndex()) datakey_new = datakey + '-Baseline Removed-' + method + str(_changed) datakey_baseline = datakey + '-Baseline-' + method + str(_changed) self.datakeys.append(datakey_new) self.datakeys.append(datakey_baseline) self.data[datakey_new] = spectral_data(self.data[datakey].df.copy(deep=True)) self.data[datakey_new].remove_baseline(method, segment=True, params=methodParameters) self.data[datakey_baseline] = spectral_data(self.data[datakey_new].df_baseline)
def run(self): method = self.chooseAlgorithmComboBox.currentText() datakey = self.chooseDataComboBox.currentText() xvars = [str(x.text()) for x in self.xVariableList.selectedItems()] yvars = [('comp', str(y.text())) for y in self.yVariableList.selectedItems()] yrange = [ self.yMinDoubleSpinBox.value(), self.yMaxDoubleSpinBox.value() ] params, modelkey = self.getMethodParams( self.chooseAlgorithmComboBox.currentIndex()) y = np.array(self.data[datakey].df[yvars]) match = np.squeeze((y > yrange[0]) & (y < yrange[1])) data_for_cv = spectral_data(self.data[datakey].df.ix[match]) # Warning: Params passing through cv.cv(params) needs to be in lists # Example: {'n_components': [4], 'scale': [False]} cv_obj = cv.cv(params) self.data[ datakey].df, self.cv_results, cvmodels, cvmodelkeys = cv_obj.do_cv( data_for_cv.df, xcols=xvars, ycol=yvars, yrange=yrange, method=method) for n, key in enumerate(cvmodelkeys): self.modelkeys.append(key) self.models[key] = cvmodels[n] self.model_xvars[key] = xvars self.model_yvars[key] = yvars if method != 'GP': coef = np.squeeze(cvmodels[n].model.coef_) coef = pd.DataFrame(coef) coef.index = pd.MultiIndex.from_tuples( self.data[datakey].df[xvars].columns.values) coef = coef.T coef[('meta', 'Model')] = key try: coef[('meta', 'Intercept')] = cvmodels[n].model.intercept_ except: pass try: self.data['Model Coefficients'] = spectral_data( pd.concat([self.data['Model Coefficients'].df, coef])) except: self.data['Model Coefficients'] = spectral_data(coef) self.datakeys.append('Model Coefficients') self.datakeys.append('CV Results ' + modelkey) self.data['CV Results ' + modelkey] = self.cv_results
def run(self): # return method parameters and parameters that changed method = self.chooseAlgorithmComboBox.currentText() datakey = self.chooseDataComboBox.currentText() # return method parameters and parameters that changed methodParameters, _changed = self.getMethodParams(self.chooseAlgorithmComboBox.currentIndex()) datakey_new = datakey + '-Baseline Removed-' + method + str(_changed) datakey_baseline = datakey + '-Baseline-' + method + str(_changed) self.datakeys.append(datakey_new) self.datakeys.append(datakey_baseline) self.data[datakey_new] = spectral_data(self.data[datakey].df.copy(deep=True)) self.data[datakey_new].remove_baseline(method, segment=True, params=methodParameters) self.data[datakey_baseline] = spectral_data(self.data[datakey_new].df_baseline) #@@TODO make sure that this is the data that we want to propagate. self.setCurrentData(datakey_new)
def do_get_data(self, filename, keyname): try: print('Loading data file: ' + str(filename)) self.data[keyname] = spectral_data(pd.read_csv(filename, header=[0, 1])) self.datakeys.append(keyname) pass except Exception as e: print('Problem reading data: {}'.format(e))
def run(self): method = self.chooseAlgorithmComboBox.currentText() datakey = self.chooseDataComboBox.currentText() xvars = [str(x.text()) for x in self.xVariableList.selectedItems()] yvars = [('comp', str(y.text())) for y in self.yVariableList.selectedItems()] yrange = [ self.yMinDoubleSpinBox.value(), self.yMaxDoubleSpinBox.value() ] params, modelkey = self.getMethodParams( self.chooseAlgorithmComboBox.currentIndex()) modelkey = "{} - {} - ({}, {}) {}".format(method, yvars[0][-1], yrange[0], yrange[1], modelkey) self.modelkeys.append(modelkey) print(params, modelkey) self.models[modelkey] = regression.regression([method], [yrange], [params]) x = self.data[datakey].df[xvars] y = self.data[datakey].df[yvars] x = np.array(x) y = np.array(y) ymask = np.squeeze((y > yrange[0]) & (y < yrange[1])) y = y[ymask] x = x[ymask, :] self.models[modelkey].fit(x, y) self.model_xvars[modelkey] = xvars self.model_yvars[modelkey] = yvars coef = np.squeeze(self.models[modelkey].model.coef_) coef = pd.DataFrame(coef) coef.index = pd.MultiIndex.from_tuples( self.data[datakey].df[xvars].columns.values) coef = coef.T coef[('meta', 'Model')] = modelkey try: self.data['Model Coefficients'] = spectral_data( pd.concat([self.data['Model Coefficients'].df, coef])) except: self.data['Model Coefficients'] = spectral_data(coef) self.datakeys.append('Model Coefficients') self.current_model = modelkey self.current_data = datakey
def function(self): params = self.getGuiParams() filename = params['fileNameLineEdit'] keyname = params['dataSetNameLineEdit'] print('Loading data file: ' + str(filename)) if keyname in self.datakeys: print("That data set name is already in use. Try something else.") else: self.data[keyname] = spectral_data( pd.read_csv(filename, header=[0, 1], verbose=True)) self.datakeys.append(keyname)
def run(self): datakey = self.chooseDataComboBox.currentText() colname = self.splitOnUniqueValuesOfComboBox.currentText() vars_level0 = self.data[datakey].df.columns.get_level_values(0) vars_level1 = self.data[datakey].df.columns.get_level_values(1) vars_level1 = list(vars_level1[vars_level0 != 'wvl']) vars_level0 = list(vars_level0[vars_level0 != 'wvl']) colname = (vars_level0[vars_level1.index(colname)], colname) coldata = np.array([str(i) for i in self.data[datakey].df[colname]]) unique_values = np.unique(coldata) for i in unique_values: new_datakey = datakey + ' - ' + str(i) self.datakeys.append(new_datakey) self.data[new_datakey] = spectral_data( self.data[datakey].df.ix[coldata == i])
def jsc_batch(directory, LUT_files, searchstring='*.txt', to_csv=None): # Read in the lookup tables to expand filename metadata refdata = read_refdata(LUT_files) # get the list of files that match the search string in the given directory filelist = file_search(directory, searchstring) spectIDs = [] # create an empty list to hold the spectrometer IDs libsIDs = [] timestamps = [] locs = [] for file in filelist: filesplit = os.path.basename(file).split('_') spectIDs.append( filesplit[6]) # get the spectrometer IDs for each file in the list libsIDs.append(filesplit[0]) timestamps.append(filesplit[-1].split('.')[0]) locs.append(filesplit[1]) spectIDs_unique = np.unique(spectIDs) # get the unique spectrometer IDs libsIDs_unique = np.unique(libsIDs) dfs = [ ] # create an empty list to hold the data frames for each spectrometer # loop through each LIBS ID alldata = [] for ID in libsIDs_unique: print('Working on : ' + str(ID)) sublist = filelist[np.in1d(libsIDs, ID)] locs = [] for file in sublist: locs.append(os.path.basename(file).split('_')[1]) locs_unique = np.unique(locs) # loop through each location for that libs ID for loc in locs_unique: print(loc) sub_sublist = sublist[np.in1d( locs, loc)] # get the files for that LIBSID and location data = JSC(sub_sublist, refdata) alldata.append(data) pass combined = pd.concat(alldata) if to_csv is not None: print('Writing combined data to: ' + to_csv) combined.to_csv(to_csv) return spectral_data(combined)
def jsc_batch(directory, LUT_files, searchstring='*.txt', to_csv=None): # Read in the lookup tables to expand filename metadata refdata = read_refdata(LUT_files) # get the list of files that match the search string in the given directory filelist = file_search(directory, searchstring) spectIDs = [] # create an empty list to hold the spectrometer IDs libsIDs = [] timestamps = [] locs = [] for file in filelist: filesplit = os.path.basename(file).split('_') spectIDs.append(filesplit[6]) # get the spectrometer IDs for each file in the list libsIDs.append(filesplit[0]) timestamps.append(filesplit[-1].split('.')[0]) locs.append(filesplit[1]) spectIDs_unique = np.unique(spectIDs) # get the unique spectrometer IDs libsIDs_unique = np.unique(libsIDs) dfs = [] # create an empty list to hold the data frames for each spectrometer # loop through each LIBS ID alldata = [] for ID in libsIDs_unique: print('Working on : ' + str(ID)) sublist = filelist[np.in1d(libsIDs, ID)] locs = [] for file in sublist: locs.append(os.path.basename(file).split('_')[1]) locs_unique = np.unique(locs) # loop through each location for that libs ID for loc in locs_unique: print(loc) sub_sublist = sublist[np.in1d(locs, loc)] # get the files for that LIBSID and location data = JSC(sub_sublist, refdata) alldata.append(data) pass combined = pd.concat(alldata) if to_csv is not None: print('Writing combined data to: ' + to_csv) combined.to_csv(to_csv) return spectral_data(combined)
def run(self): match_vectors = [] logic_list = [] datakey = self.chooseData.currentText() for i in self.operations: values_tmp = i.GetValues() if i.hidden == None: match_vectors.append(self.evaluate_operation(datakey, values_tmp)) logic_list.append(values_tmp['logic']) else: if i.hidden.isChecked() == False: match_vectors.append(self.evaluate_operation(datakey, values_tmp)) logic_list.append(values_tmp['logic']) match_combined = np.all(match_vectors, axis=0) print(self.data[datakey].df.shape) self.data[datakey] = spectral_data(self.data[datakey].df.ix[~match_combined]) print(self.data[datakey].df.shape) pass
filelist = glob.glob(r"E:\ChemCam\Calibration Data\LANL_testbed\Caltargets\*calib.sav") filelist2 = glob.glob(r"E:\ChemCam\Calibration Data\LANL_testbed\Caltargets\test.sav") data2 = readsav(filelist2[0]) data = readsav(filelist[0]) muv = data['calibspecmuv'] muv_orig = muv x = data['defuv'] # numpy.arange(len(muv)) muv = numpy.array([muv, muv]) muv = pd.DataFrame(muv) colnames = [] for i, j in enumerate(x): colnames.append(('wvl', x[i])) muv.columns = pd.MultiIndex.from_tuples(colnames) muv = spectral_data(muv) muv2 = spectral_data(muv) muv.remove_baseline(method='ccam', params={'int_flag_': 2, 'lvmin_': 6, 'lv_': 10}) # muv2.remove_baseline(method='wavelet',params=) # this was causing setup.py to crash, it has been commented out # muv_denoise,muv_noise=ccam_denoise.ccam_denoise(muv,sig=3,niter=4) # plot.figure() # plot.plot(muv_noise) # muv_nocont,cont=baseline_code.ccam_remove_continuum.ccam_remove_continuum(x,muv,10,lvmin=6,int_flag=2) plot.figure(figsize=[11, 8]) plot.plot(x, muv.df['wvl'].iloc[0], label='Continuum Removed', linewidth=0.5) plot.plot(x, muv.df_baseline['wvl'].iloc[0], label='Continuum', linewidth=0.5) plot.plot(x, muv_orig, label='Original', linewidth=0.5) plot.plot(x, data2['muv_cont'], label='IDL Continuum', linestyle='--', linewidth=0.5) plot.legend()
def ccam_batch(directory, searchstring='*.csv', to_csv=None, lookupfile=None, ave=True, progressbar=None): # Determine if the file is a .csv or .SAV if '.sav' in searchstring.lower(): is_sav = True else: is_sav = False filelist = file_search(directory, searchstring) basenames = np.zeros_like(filelist) sclocks = np.zeros_like(filelist) P_version = np.zeros_like(filelist, dtype='int') # Extract the sclock and version for each file and ensure that only one # file per sclock is being read, and that it is the one with the highest version number for i, name in enumerate(filelist): basenames[i] = os.path.basename(name) sclocks[i] = basenames[i][4:13] # extract the sclock P_version[i] = basenames[i][-5:-4] # extract the version sclocks_unique = np.unique(sclocks) # find unique sclocks filelist_new = np.array([], dtype='str') for i in sclocks_unique: match = (sclocks == i) # find all instances with matching sclocks maxP = P_version[match] == max(P_version[match]) # find the highest version among these files filelist_new = np.append(filelist_new, filelist[match][maxP]) # keep only the file with thei highest version filelist = filelist_new # Should add a progress bar for importing large numbers of files dt = [] if progressbar: from PyQt5 import QtCore # only rely on PyQt5 if a progressbar object has been passed progressbar.setWindowTitle('ChemCam data progress') progressbar.setRange(0, filelist.size) progressbar.show() filecount = 0 for i, file in enumerate(filelist): filecount = filecount + 1 print(file) if is_sav: tmp = CCAM_SAV(file, ave=ave) else: tmp = CCAM_CSV(file, ave=ave) if i == 0: combined = tmp else: # This ensures that rounding errors are not causing mismatches in columns cols1 = list(combined['wvl'].columns) cols2 = list(tmp['wvl'].columns) if set(cols1) == set(cols2): combined = pd.concat([combined, tmp]) else: print("Wavelengths don't match!") if progressbar: progressbar.setValue(filecount) QtCore.QCoreApplication.processEvents() pass combined.loc[:, ('meta', 'sclock')] = pd.to_numeric(combined.loc[:, ('meta', 'sclock')]) if lookupfile is not None: combined = lookup(combined, lookupfile=lookupfile.replace('[','').replace(']','').replace("'",'').replace(' ','').split(',')) if to_csv is not None: combined.to_csv(to_csv) return spectral_data(combined)
from libpysat.fileio import io_ccam_pds import libpysat.spectral.spectral_data as sd import pandas as pd import numpy as np path = r"C:\Users\rbanderson\Desktop\test_data\pdl" db = r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Sample_Data\full_db_mars_corrected_dopedTiO2_pandas_format.csv" data = spectral_data(io_ccam_pds.ccam_batch(path, '*CCS*.SAV', ave=True)) #data.peak_area() db = sd.spectral_data(pd.read_csv(db, header=[0, 1])) db.peak_area() pass
# Load one set of data that will be used to create the model import pandas as pd from libpysat.spectral.spectral_data import spectral_data data1 = "G:/.csvfile/full_db_mars_corrected_dopedTiO2_pandas_format.csv" data1 = spectral_data(pd.read_csv(data1, header=[0, 1], verbose=True)) # Load a second set of data that will serve as the "unknown" (even though in this test case I'm using some known data I had on hand) data2 = "G:/.csvfile/lab_data_averages_pandas_format.csv" data2 = spectral_data(pd.read_csv(data2, header=[0, 1], verbose=True)) # Interpolate the "unknown" data to the same wavelengths as the "known" data data1.interp(data2.df['wvl'].columns) # Mask out unwanted portions the first data set (example mask files are now included in the "inputs" directory) maskfile = "G:/.csvfile/mask_minors_noise.csv" data1.mask(maskfile, maskvar='wvl') # Apply the same mask to the second data set maskfile = "G:/.csvfile/mask_minors_noise.csv" data2.mask(maskfile, maskvar='wvl') # Normalize the spectra in the first data set so that the sum of each spectrum (from 0-1000 nm) is 1 data1.norm([(0, 1000)], 'wvl') # Same normalization on the second data set data2.norm([(0, 1000)], 'wvl') # Get rid of rows in the first data set that don't have compositions for SiO2 (if you don't do this, it causes problems later on...) For JSC data # Divide the first data set into 5 folds with similar distributions of SiO2 compositions. Set fold 3 to be used as the test set, use the remaining folds as a training set. colname = ('comp', 'SiO2') nfolds = 3 testfold = 2 data1.stratified_folds(nfolds=nfolds, sortby=colname) data1_train = data1.rows_match(('meta', 'Folds'), [testfold], invert=True)
# Load one set of data that will be used to create the model import pandas as pd from libpysat.spectral.spectral_data import spectral_data data1 = "G:/.csvfile/full_db_mars_corrected_dopedTiO2_pandas_format.csv" data1 = spectral_data(pd.read_csv(data1, header=[0, 1], verbose=True)) # Load a second set of data that will serve as the "unknown" (even though in this test case I'm using some known data I had on hand) data2 = "G:/.csvfile/lab_data_averages_pandas_format.csv" data2 = spectral_data(pd.read_csv(data2, header=[0, 1], verbose=True)) # Interpolate the "unknown" data to the same wavelengths as the "known" data data1.interp(data2.df['wvl'].columns) # Mask out unwanted portions the first data set (example mask files are now included in the "inputs" directory) maskfile = "G:/.csvfile/mask_minors_noise.csv" data1.mask(maskfile, maskvar='wvl') # Apply the same mask to the second data set maskfile = "G:/.csvfile/mask_minors_noise.csv" data2.mask(maskfile, maskvar='wvl') # Normalize the spectra in the first data set so that the sum of each spectrum (from 0-1000 nm) is 1 data1.norm([(0, 1000)], 'wvl') # Same normalization on the second data set data2.norm([(0, 1000)], 'wvl') # Get rid of rows in the first data set that don't have compositions for SiO2 (if you don't do this, it causes problems later on...) For JSC data # Divide the first data set into 5 folds with similar distributions of SiO2 compositions. Set fold 3 to be used as the test set, use the remaining folds as a training set. colname = ('comp', 'SiO2') nfolds = 3 testfold = 2 data1.stratified_folds(nfolds=nfolds, sortby=colname) data1_train = data1.rows_match(('meta', 'Folds'), [testfold], invert=True) data1_test = data1.rows_match(('meta', 'Folds'), [testfold])
def ccam_batch(directory, searchstring='*.csv', to_csv=None, lookupfile=None, ave=True, progressbar=None): # Determine if the file is a .csv or .SAV if '.sav' in searchstring.lower(): is_sav = True else: is_sav = False filelist = file_search(directory, searchstring) basenames = np.zeros_like(filelist) sclocks = np.zeros_like(filelist) P_version = np.zeros_like(filelist, dtype='int') # Extract the sclock and version for each file and ensure that only one # file per sclock is being read, and that it is the one with the highest version number for i, name in enumerate(filelist): basenames[i] = os.path.basename(name) sclocks[i] = basenames[i][4:13] # extract the sclock P_version[i] = basenames[i][-5:-4] # extract the version sclocks_unique = np.unique(sclocks) # find unique sclocks filelist_new = np.array([], dtype='str') for i in sclocks_unique: match = (sclocks == i) # find all instances with matching sclocks maxP = P_version[match] == max( P_version[match]) # find the highest version among these files filelist_new = np.append( filelist_new, filelist[match] [maxP]) # keep only the file with thei highest version filelist = filelist_new # Should add a progress bar for importing large numbers of files dt = [] if progressbar: progressbar.setWindowTitle('ChemCam data progress') progressbar.setRange(0, filelist.size) progressbar.show() filecount = 0 for i in filelist: filecount = filecount + 1 print(i) try: if is_sav: t = time.time() tmp = CCAM_SAV(i, ave=ave) dt.append(time.time() - t) else: t = time.time() tmp = CCAM_CSV(i) dt.append(time.time() - t) if i == filelist[0]: combined = tmp else: # This ensures that rounding errors are not causing mismatches in columns cols1 = list(combined['wvl'].columns) cols2 = list(tmp['wvl'].columns) if set(cols1) == set(cols2): combined = pd.concat([combined, tmp]) else: print("Wavelengths don't match!") except: pass if progressbar: progressbar.setValue(filecount) QtCore.QCoreApplication.processEvents() pass combined.loc[:, ('meta', 'sclock')] = pd.to_numeric(combined.loc[:, ('meta', 'sclock')]) if lookupfile is not None: combined = lookup(combined, lookupfile=lookupfile) if to_csv is not None: combined.to_csv(to_csv) return spectral_data(combined)
from libpysat.fileio import io_ccam_pds import libpysat.spectral.spectral_data as sd import pandas as pd import numpy as np path=r"C:\Users\rbanderson\Desktop\test_data\pdl" db=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Sample_Data\full_db_mars_corrected_dopedTiO2_pandas_format.csv" data=io_ccam_pds.ccam_batch(path,'*CCS*.SAV',ave=True) #data.peak_area() db=sd.spectral_data(pd.read_csv(db, header=[0, 1])) db.peak_area() pass
# -*- coding: utf-8 -*- """ Created on Fri Nov 07 12:29:20 2014 @author: rbanderson """ import matplotlib.pyplot as plot import pandas as pd from libpysat.spectral.spectral_data import spectral_data # import mlpy.wavelet filename = r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Sample_Data\JSC_data_combined_20170307.csv" data = spectral_data(pd.read_csv(filename, header=[0, 1])) data = data.df.ix[0:5] colcheck = data_spect.columns.values < 350 data_spect = data_spect.iloc[:, colcheck] data = spectral_data(data_spect) data.remove_baseline(method='ccam', params={ 'int_flag_': 2, 'lvmin_': 6, 'lv_': 10 }) x = data.df.columns.values plot.figure(figsize=[11, 8]) plot.plot(x, data_orig, label='Original', linewidth=0.5) plot.plot(x, data.df.iloc[0], label='Continuum Removed', linewidth=0.5) plot.plot(x, data.df_baseline.iloc[0], label='Continuum', linewidth=0.5)
r"E:\ChemCam\Calibration Data\LANL_testbed\Caltargets\*calib.sav") filelist2 = glob.glob( r"E:\ChemCam\Calibration Data\LANL_testbed\Caltargets\test.sav") data2 = readsav(filelist2[0]) data = readsav(filelist[0]) muv = data['calibspecmuv'] muv_orig = muv x = data['defuv'] # numpy.arange(len(muv)) muv = numpy.array([muv, muv]) muv = pd.DataFrame(muv) colnames = [] for i, j in enumerate(x): colnames.append(('wvl', x[i])) muv.columns = pd.MultiIndex.from_tuples(colnames) muv = spectral_data(muv) muv2 = spectral_data(muv) muv.remove_baseline(method='ccam', params={ 'int_flag_': 2, 'lvmin_': 6, 'lv_': 10 }) # muv2.remove_baseline(method='wavelet',params=) # this was causing setup.py to crash, it has been commented out # muv_denoise,muv_noise=ccam_denoise.ccam_denoise(muv,sig=3,niter=4) # plot.figure() # plot.plot(muv_noise) # muv_nocont,cont=baseline_code.ccam_remove_continuum.ccam_remove_continuum(x,muv,10,lvmin=6,int_flag=2) plot.figure(figsize=[11, 8]) plot.plot(x, muv.df['wvl'].iloc[0], label='Continuum Removed', linewidth=0.5)
# -*- coding: utf-8 -*- """ Created on Fri Nov 07 12:29:20 2014 @author: rbanderson """ import matplotlib.pyplot as plot import pandas as pd from libpysat.spectral.spectral_data import spectral_data # import mlpy.wavelet filename = r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Sample_Data\JSC_data_combined_20170307.csv" data = spectral_data(pd.read_csv(filename, header=[0, 1])) data = data.df.ix[0:5] colcheck = data_spect.columns.values < 350 data_spect = data_spect.iloc[:, colcheck] data = spectral_data(data_spect) data.remove_baseline(method='ccam', params={'int_flag_': 2, 'lvmin_': 6, 'lv_': 10}) x = data.df.columns.values plot.figure(figsize=[11, 8]) plot.plot(x, data_orig, label='Original', linewidth=0.5) plot.plot(x, data.df.iloc[0], label='Continuum Removed', linewidth=0.5) plot.plot(x, data.df_baseline.iloc[0], label='Continuum', linewidth=0.5) plot.legend() plot.savefig('cont_test.png', dpi=1000) plot.show() # plot.figure(figsize=[11,8])