def jsc_batch(directory, LUT_files, searchstring='*.txt', to_csv=None): #Read in the lookup tables to expand filename metadata refdata = read_refdata(LUT_files) #get the list of files that match the search string in the given directory filelist = file_search(directory, searchstring) spectIDs = [] #create an empty list to hold the spectrometer IDs for file in filelist: spectIDs.append(os.path.basename(file).split( '_')[6]) #get the spectrometer IDs for each file in the list spectIDs_unique = np.unique(spectIDs) #get the unique spectrometer IDs dfs = [ ] #create an empty list to hold the data frames for each spectrometer #loop through each spectrometer, read the spectra and combine them into a single data frame for that spectrometer for spect in spectIDs_unique: sublist = filelist[np.in1d(spectIDs, spect)] temp = [JSC(sublist[0], refdata)] for file in sublist[1:]: temp.append(JSC(file, refdata)) dfs.append(pd.concat(temp)) #now combine the data frames for the different spectrometers into a single data frame containing all the data combined = dfs[0] for df in dfs[1:]: combined = combined.merge(df) if to_csv is not None: combined.to_csv(to_csv) return spectral_data(combined)
def removenull(self,datakey,colname): try: print(self.data[datakey].df.shape) self.data[datakey] = spectral_data(self.data[datakey].df.ix[-self.data[datakey].df[colname].isnull()]) print(self.data[datakey].df.shape) except Exception as e: error_print(e)
def get_data(self, filename, keyname): try: print('Loading data file: ' + str(filename)) self.data[keyname] = spectral_data(pd.read_csv(filename, header=[0, 1])) self.datakeys.append(keyname) pass except Exception as e: error_print('Problem reading data: {}'.format(e))
def set_spectral(self, data_base): """ Usage: k_data = pysat_func.set_spectral(pysat_func.get_file_known_data()) u_data = pysat_func.set_spectral(pysat_func.get_file_unknown_data()) The user will choose from either database of unknowndata or knowndata this means usage will be either: :param data_base: :return spectra: """ data = pd.read_csv(data_base, header=[0, 1]) return spectral_data(data)
def jsc_batch(directory, LUT_files, searchstring='*.txt', to_csv=None): #Read in the lookup tables to expand filename metadata refdata = read_refdata(LUT_files) #get the list of files that match the search string in the given directory filelist = file_search(directory, searchstring) spectIDs = [] #create an empty list to hold the spectrometer IDs libsIDs = [] timestamps = [] locs = [] for file in filelist: filesplit = os.path.basename(file).split('_') spectIDs.append( filesplit[6]) #get the spectrometer IDs for each file in the list libsIDs.append(filesplit[0]) timestamps.append(filesplit[-1].split('.')[0]) locs.append(filesplit[1]) spectIDs_unique = np.unique(spectIDs) #get the unique spectrometer IDs libsIDs_unique = np.unique(libsIDs) dfs = [ ] #create an empty list to hold the data frames for each spectrometer #loop through each LIBS ID alldata = [] for ID in libsIDs_unique: print('Working on : ' + str(ID)) sublist = filelist[np.in1d(libsIDs, ID)] locs = [] for file in sublist: locs.append(os.path.basename(file).split('_')[1]) locs_unique = np.unique(locs) #loop through each location for that libs ID for loc in locs_unique: print(loc) sub_sublist = sublist[np.in1d( locs, loc)] #get the files for that LIBSID and location data = JSC(sub_sublist, refdata) alldata.append(data) pass combined = pd.concat(alldata) if to_csv is not None: print('Writing combined data to: ' + to_csv) combined.to_csv(to_csv) return spectral_data(combined)
def ccam_batch(directory, searchstring='*.csv', to_csv=None, lookupfile=None, ave=True, progressbar=None): #Determine if the file is a .csv or .SAV if '.sav' in searchstring.lower(): is_sav = True else: is_sav = False filelist = file_search(directory, searchstring) basenames = np.zeros_like(filelist) sclocks = np.zeros_like(filelist) P_version = np.zeros_like(filelist, dtype='int') #Extract the sclock and version for each file and ensure that only one #file per sclock is being read, and that it is the one with the highest version number for i, name in enumerate(filelist): basenames[i] = os.path.basename(name) sclocks[i] = basenames[i][4:13] #extract the sclock P_version[i] = basenames[i][-5:-4] #extract the version sclocks_unique = np.unique(sclocks) #find unique sclocks filelist_new = np.array([], dtype='str') for i in sclocks_unique: match = (sclocks == i) #find all instances with matching sclocks maxP = P_version[match] == max( P_version[match]) #find the highest version among these files filelist_new = np.append( filelist_new, filelist[match] [maxP]) #keep only the file with thei highest version filelist = filelist_new #Should add a progress bar for importing large numbers of files dt = [] if progressbar: progressbar.setWindowTitle('ChemCam data progress') progressbar.setRange(0, filelist.size) progressbar.show() filecount = 0 for i in filelist: filecount = filecount + 1 print(i) try: if is_sav: t = time.time() tmp = CCAM_SAV(i, ave=ave) dt.append(time.time() - t) else: t = time.time() tmp = CCAM_CSV(i) dt.append(time.time() - t) if i == filelist[0]: combined = tmp else: #This ensures that rounding errors are not causing mismatches in columns cols1 = list(combined['wvl'].columns) cols2 = list(tmp['wvl'].columns) if set(cols1) == set(cols2): combined = pd.concat([combined, tmp]) else: print("Wavelengths don't match!") except: pass if progressbar: progressbar.setValue(filecount) QtCore.QCoreApplication.processEvents() pass combined.loc[:, ('meta', 'sclock')] = pd.to_numeric(combined.loc[:, ('meta', 'sclock')]) if lookupfile is not None: combined = lookup(combined, lookupfile=lookupfile) if to_csv is not None: combined.to_csv(to_csv) return spectral_data(combined)
filelist=glob.glob(r"E:\ChemCam\Calibration Data\LANL_testbed\Caltargets\*calib.sav") filelist2=glob.glob(r"E:\ChemCam\Calibration Data\LANL_testbed\Caltargets\test.sav") data2=readsav(filelist2[0]) data=readsav(filelist[0]) muv=data['calibspecmuv'] muv_orig=muv x=data['defuv']#numpy.arange(len(muv)) muv=numpy.array([muv,muv]) muv=pd.DataFrame(muv) colnames=[] for i,j in enumerate(x): colnames.append(('wvl',x[i])) muv.columns=pd.MultiIndex.from_tuples(colnames) muv=spectral_data(muv) muv2=spectral_data(muv) muv.remove_baseline(method='ccam',params={'int_flag_':2,'lvmin_':6,'lv_':10}) muv2.remove_baseline(method='wavelet',params=) #muv_denoise,muv_noise=ccam_denoise.ccam_denoise(muv,sig=3,niter=4) #plot.figure() #plot.plot(muv_noise) #muv_nocont,cont=baseline_code.ccam_remove_continuum.ccam_remove_continuum(x,muv,10,lvmin=6,int_flag=2) plot.figure(figsize=[11,8]) plot.plot(x,muv.df['wvl'].iloc[0],label='Continuum Removed',linewidth=0.5) plot.plot(x,muv.df_baseline['wvl'].iloc[0],label='Continuum',linewidth=0.5) plot.plot(x,muv_orig,label='Original',linewidth=0.5) plot.plot(x,data2['muv_cont'],label='IDL Continuum',linestyle='--',linewidth=0.5) plot.legend()
######read unknown data (only do this the first time since it's slow) #unknowndatadir=r"C:\Users\rbanderson\Documents\Projects\MSL\ChemCam\Lab Data" #unknowndatasearch='CM*.SAV' #unknowndatacsv=r"C:\Users\rbanderson\Documents\Projects\MSL\ChemCam\Lab Data\lab_data_averages_pandas_format.csv" #unknown_data=ccs_batch(unknowndatadir,searchstring=unknowndatasearch) # ##write it to a csv file for future use (much faster than reading individual files each time) # ##this writes all the data, including single shots, to a file (can get very large!!) #unknown_data.df.to_csv(unknowndatacsv) # ##this writes just the average spectra to a file #unknown_data.df.loc['average'].to_csv(unknowndatacsv) #put the training data dataframe into a spectral_data object data=spectral_data(data) print('read unknown data from the combined csv file (much faster)') unknowndatacsv=r"C:\Users\rbanderson\Documents\Projects\MSL\ChemCam\Lab Data\lab_data_averages_pandas_format.csv" unknown_data=pd.read_csv(unknowndatacsv,header=[0,1]) unknown_data=spectral_data(unknown_data) print('Interpolate unknown data onto the same exact wavelengths as the training data') unknown_data.interp(data.df['wvl'].columns) print('Mask out unwanted portions of the data') maskfile=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Input\mask_minors_noise.csv" data.mask(maskfile) unknown_data.mask(maskfile)
from sklearn import linear_model from sklearn.cross_decomposition.pls_ import PLSRegression from pysat.plotting import plots import time from pysat.regression import cv from pysat.regression.regression import regression #import matplotlib.pyplot as plot import warnings warnings.filterwarnings('ignore') print('Read training database with mixtures') db = r"C:\Users\rbanderson\Documents\Projects\MSL\ChemCam\Database\full_db_mars_corrected_dopedTiO2_mixtures_pandas.csv" data_mix = pd.read_csv(db, header=[0, 1]) data_mix = spectral_data(data_mix) print('Read training database') db = r"C:\Users\rbanderson\Documents\Projects\MSL\ChemCam\Database\full_db_mars_corrected_dopedTiO2_pandas_format.csv" data = pd.read_csv(db, header=[0, 1]) data = spectral_data(data) print('Mask out unwanted portions of the data') maskfile = r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Input\mask_minors_noise.csv" data.mask(maskfile) data_mix.mask(maskfile) print( 'Normalize spectra by specifying the wavelength ranges over which to normalize' ) ranges = [(0, 1000)] #this is equivalent to "norm3"
@author: rbanderson """ import glob import numpy import scipy import pysat.spectral.baseline_code.ccam_remove_continuum from scipy.io.idl import readsav import matplotlib.pyplot as plot import numpy import pandas as pd from pysat.spectral.spectral_data import spectral_data # import mlpy.wavelet filename = r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Sample_Data\JSC_data_combined_20170307.csv" data = spectral_data(pd.read_csv(filename, header=[0, 1])) data = data.df.ix[0:5] colcheck = data_spect.columns.values < 350 data_spect = data_spect.iloc[:, colcheck] data = spectral_data(data_spect) data.remove_baseline(method='ccam', params={ 'int_flag_': 2, 'lvmin_': 6, 'lv_': 10 }) x = data.df.columns.values plot.figure(figsize=[11, 8]) plot.plot(x, data_orig, label='Original', linewidth=0.5) plot.plot(x, data.df.iloc[0], label='Continuum Removed', linewidth=0.5) plot.plot(x, data.df_baseline.iloc[0], label='Continuum', linewidth=0.5)