def jsc_batch(directory, LUT_files,searchstring='*.txt',to_csv=None): #Read in the lookup tables to expand filename metadata refdata=read_refdata(LUT_files) #get the list of files that match the search string in the given directory filelist=file_search(directory,searchstring) spectIDs=[] #create an empty list to hold the spectrometer IDs for file in filelist: spectIDs.append(os.path.basename(file).split('_')[6]) #get the spectrometer IDs for each file in the list spectIDs_unique=np.unique(spectIDs) #get the unique spectrometer IDs dfs=[] #create an empty list to hold the data frames for each spectrometer #loop through each spectrometer, read the spectra and combine them into a single data frame for that spectrometer for spect in spectIDs_unique: sublist=filelist[np.in1d(spectIDs,spect)] temp=[JSC(sublist[0],refdata)] for file in sublist[1:]: temp.append(JSC(file,refdata)) dfs.append(pd.concat(temp)) #now combine the data frames for the different spectrometers into a single data frame containing all the data combined=dfs[0] for df in dfs[1:]: combined=combined.merge(df) if to_csv is not None: combined.to_csv(to_csv) return spectral_data(combined)
def ccs_batch(directory,searchstring='*CCS*.csv',is_sav=False,to_csv=None,lookupfile=None): #Determine if the file is a .csv or .SAV if 'SAV' in searchstring: is_sav=True else: is_sav=False filelist=file_search(directory,searchstring) basenames=np.zeros_like(filelist) sclocks=np.zeros_like(filelist) P_version=np.zeros_like(filelist,dtype='int') #Extract the sclock and version for each file and ensure that only one #file per sclock is being read, and that it is the one with the highest version number for i,name in enumerate(filelist): basenames[i]=os.path.basename(name) sclocks[i]=basenames[i][4:13] #extract the sclock P_version[i]=basenames[i][-5:-4] #extract the version sclocks_unique=np.unique(sclocks) #find unique sclocks filelist_new=np.array([],dtype='str') for i in sclocks_unique: match=(sclocks==i) #find all instances with matching sclocks maxP=P_version[match]==max(P_version[match]) #find the highest version among these files filelist_new=np.append(filelist_new,filelist[match][maxP]) #keep only the file with thei highest version filelist=filelist_new #Should add a progress bar for importing large numbers of files dt=[] for i in filelist: if is_sav: t=time.time() tmp=CCS_SAV(i) dt.append(time.time()-t) else: t=time.time() tmp=CCS(i) dt.append(time.time()-t) if i==filelist[0]: combined=tmp else: #This ensures that rounding errors are not causing mismatches in columns cols1=list(combined['wvl'].columns) cols2=list(tmp['wvl'].columns) if set(cols1)==set(cols2): combined=pd.concat([combined,tmp]) else: print("Wavelengths don't match!") combined.loc[:,('meta','sclock')]=pd.to_numeric(combined.loc[:,('meta','sclock')]) if lookupfile is not None: combined=lookup(combined,lookupfile=lookupfile) if to_csv is not None: combined.to_csv(to_csv) return spectral_data(combined)
######read unknown data (only do this the first time since it's slow) #unknowndatadir=r"C:\Users\rbanderson\Documents\Projects\MSL\ChemCam\Lab Data" #unknowndatasearch='CM*.SAV' #unknowndatacsv=r"C:\Users\rbanderson\Documents\Projects\MSL\ChemCam\Lab Data\lab_data_averages_pandas_format.csv" #unknown_data=ccs_batch(unknowndatadir,searchstring=unknowndatasearch) # ##write it to a csv file for future use (much faster than reading individual files each time) # ##this writes all the data, including single shots, to a file (can get very large!!) #unknown_data.df.to_csv(unknowndatacsv) # ##this writes just the average spectra to a file #unknown_data.df.loc['average'].to_csv(unknowndatacsv) #put the training data dataframe into a spectral_data object data=spectral_data(data) ##########read unknown data from the combined csv file (much faster) unknowndatacsv=r"C:\Users\rbanderson\Documents\Projects\MSL\ChemCam\Lab Data\lab_data_averages_pandas_format.csv" unknown_data=pd.read_csv(unknowndatacsv,header=[0,1]) unknown_data=spectral_data(unknown_data) #Interpolate unknown data onto the same exact wavelengths as the training data unknown_data.interp(data.df['wvl'].columns) #Mask out unwanted portions of the data maskfile=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Input\mask_minors_noise.csv" data.mask(maskfile) unknown_data.mask(maskfile)
#foo=ccs1.norm(ranges) #t2=time.time() #ccs2=ccs_batch(data_dir2,searchstring='*CCS*.csv',to_csv=r'..\csv_output_test.csv',lookupfile=masterlists) #dt2=time.time()-t2 #print(dt1) # #spect_table=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Input\Spectrometer_Table.csv" #experiment_table=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Input\Experiment_Setup_Table.csv" #laser_table=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Input\Laser_Setup_Table.csv" #sample_table=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Input\Sample_Table.csv" #LUT_files={'spect':spect_table,'exp':experiment_table,'laser':laser_table,'sample':sample_table} # #data_dir=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Sample_Data\LIBS USGS\DATA" #t=time.time() #JSC_data=jsc_batch(data_dir,LUT_files,to_csv='../../JSC_test.csv') JSC=spectral_data(pd.read_csv('../../JSC_test.csv',header=[0,1],index_col=0)) JSC_als,als_baseline=JSC.remove_baseline(method='als') JSC_dietrich,dietrich_baseline=JSC.remove_baseline(method='dietrich') JSC_polyfit,polyfit_baseline=JSC.remove_baseline(method='polyfit') JSC_airpls,airpls_baseline=JSC.remove_baseline(method='airpls') JSC_fabc,fabc_baseline=JSC.remove_baseline(method='fabc') JSC_kk,kk_baseline=JSC.remove_baseline(method='kk') #JSC_mario,mario_baseline=JSC.remove_baseline(method='mario') JSC_median,median_baseline=JSC.remove_baseline(method='median') JSC_rubberband,rubberband_baseline=JSC.remove_baseline(method='rubberband') wvls=JSC.df['wvl'].columns.values plot.plot(wvls,JSC.df['wvl'].loc[0,:]) plot.plot(wvls,als_baseline.df['wvl'].loc[0,:]) plot.plot(wvls,dietrich_baseline.df['wvl'].loc[0,:]) plot.plot(wvls,polyfit_baseline.df['wvl'].loc[0,:])
# Known Data known_db = os.path.expanduser("~\\full_db_mars_corrected_dopedTiO2_pandas_format.csv") panda_known_db = pd.read_csv(known_db, header=[0, 1]) # Unknown Data unknown_db = os.path.expanduser("~\\lab_data_averages_pandas_format.csv") panda_unknown_db = pd.read_csv(unknown_db, header=[0, 1]) # Mask File mask_file = os.path.expanduser("~\\mask_minors_nouse.csv") ################################################### # Spectral setup # spectral analysis data ################################################### # Known Data k_spec_db = spectral_data(panda_known_db) # Uknown Data u_spec_db = spectral_data(panda_unknown_db) ################################################### # Interpolate uknown spec data ################################################### u_spec_db.interp(data.df['wv1'].columns) ################################################### # Mask data ################################################### data.mask(mask_file) u_spec_db.mask(mask_file)