def jsc_batch(directory, LUT_files,searchstring='*.txt',to_csv=None): #Read in the lookup tables to expand filename metadata refdata=read_refdata(LUT_files) #get the list of files that match the search string in the given directory filelist=file_search(directory,searchstring) spectIDs=[] #create an empty list to hold the spectrometer IDs for file in filelist: spectIDs.append(os.path.basename(file).split('_')[6]) #get the spectrometer IDs for each file in the list spectIDs_unique=np.unique(spectIDs) #get the unique spectrometer IDs dfs=[] #create an empty list to hold the data frames for each spectrometer #loop through each spectrometer, read the spectra and combine them into a single data frame for that spectrometer for spect in spectIDs_unique: sublist=filelist[np.in1d(spectIDs,spect)] temp=[JSC(sublist[0],refdata)] for file in sublist[1:]: temp.append(JSC(file,refdata)) dfs.append(pd.concat(temp)) #now combine the data frames for the different spectrometers into a single data frame containing all the data combined=dfs[0] for df in dfs[1:]: combined=combined.merge(df) if to_csv is not None: combined.to_csv(to_csv) return spectral_data(combined)
def ccs_batch(directory,searchstring='*CCS*.csv',is_sav=False,to_csv=None,lookupfile=None): #Determine if the file is a .csv or .SAV if 'SAV' in searchstring: is_sav=True else: is_sav=False filelist=file_search(directory,searchstring) basenames=np.zeros_like(filelist) sclocks=np.zeros_like(filelist) P_version=np.zeros_like(filelist,dtype='int') #Extract the sclock and version for each file and ensure that only one #file per sclock is being read, and that it is the one with the highest version number for i,name in enumerate(filelist): basenames[i]=os.path.basename(name) sclocks[i]=basenames[i][4:13] #extract the sclock P_version[i]=basenames[i][-5:-4] #extract the version sclocks_unique=np.unique(sclocks) #find unique sclocks filelist_new=np.array([],dtype='str') for i in sclocks_unique: match=(sclocks==i) #find all instances with matching sclocks maxP=P_version[match]==max(P_version[match]) #find the highest version among these files filelist_new=np.append(filelist_new,filelist[match][maxP]) #keep only the file with thei highest version filelist=filelist_new #Should add a progress bar for importing large numbers of files dt=[] for i in filelist: if is_sav: t=time.time() tmp=CCS_SAV(i) dt.append(time.time()-t) else: t=time.time() tmp=CCS(i) dt.append(time.time()-t) if i==filelist[0]: combined=tmp else: #This ensures that rounding errors are not causing mismatches in columns cols1=list(combined['wvl'].columns) cols2=list(tmp['wvl'].columns) if set(cols1)==set(cols2): combined=pd.concat([combined,tmp]) else: print("Wavelengths don't match!") combined.loc[:,('meta','sclock')]=pd.to_numeric(combined.loc[:,('meta','sclock')]) if lookupfile is not None: combined=lookup(combined,lookupfile=lookupfile) if to_csv is not None: combined.to_csv(to_csv) return spectral_data(combined)
def ccs_batch(directory,searchstring='*CCS*.csv',is_sav=False): if 'SAV' in searchstring: is_sav=True else: is_sav=False filelist=file_search(directory,searchstring) basenames=np.zeros_like(filelist) sclocks=np.zeros_like(filelist) P_version=np.zeros_like(filelist,dtype='int') #Extract the sclock and version for each file and ensure that only one #file per sclock is being read, and that it is the one with the highest version number for i,name in enumerate(filelist): basenames[i]=os.path.basename(name) sclocks[i]=basenames[i][4:13] P_version[i]=basenames[i][-5:-4] sclocks_unique=np.unique(sclocks) filelist_new=np.array([],dtype='str') for i in sclocks_unique: match=(sclocks==i) maxP=P_version[match]==max(P_version[match]) filelist_new=np.append(filelist_new,filelist[match][maxP]) filelist=filelist_new #any way to speed this up for large numbers of files? #Should add a progress bar for importing large numbers of files for i in filelist: if is_sav: tmp=CCS_SAV(i) else: tmp=CCS(i) try: #This ensures that rounding errors are not causing mismatches in columns cols1=list(combined['wvl'].columns) cols2=list(tmp['wvl'].columns) if set(cols1)==set(cols2): combined=pd.concat([combined,tmp]) else: print("Wavelengths don't match!") except: combined=tmp return combined