Example #1
0
def jsc_batch(directory, LUT_files,searchstring='*.txt',to_csv=None):
    #Read in the lookup tables to expand filename metadata                  
    refdata=read_refdata(LUT_files)
    #get the list of files that match the search string in the given directory    
    filelist=file_search(directory,searchstring)
    spectIDs=[] #create an empty list to hold the spectrometer IDs
    
    for file in filelist:
        spectIDs.append(os.path.basename(file).split('_')[6]) #get the spectrometer IDs for each file in the list
    spectIDs_unique=np.unique(spectIDs) #get the unique spectrometer IDs
    dfs=[]  #create an empty list to hold the data frames for each spectrometer  

    #loop through each spectrometer, read the spectra and combine them into a single data frame for that spectrometer    
    for spect in spectIDs_unique:
        sublist=filelist[np.in1d(spectIDs,spect)]
        temp=[JSC(sublist[0],refdata)]
        for file in sublist[1:]:
            temp.append(JSC(file,refdata))
        dfs.append(pd.concat(temp))
        
    #now combine the data frames for the different spectrometers into a single data frame containing all the data    
    combined=dfs[0]
    for df in dfs[1:]:
        combined=combined.merge(df)
    
    if to_csv is not None:
        combined.to_csv(to_csv)
    return spectral_data(combined)
Example #2
0
def ccs_batch(directory,searchstring='*CCS*.csv',is_sav=False,to_csv=None,lookupfile=None):
    #Determine if the file is a .csv or .SAV
    if 'SAV' in searchstring:
        is_sav=True
    else:
        is_sav=False
    filelist=file_search(directory,searchstring)
    basenames=np.zeros_like(filelist)
    sclocks=np.zeros_like(filelist)
    P_version=np.zeros_like(filelist,dtype='int')
    
    #Extract the sclock and version for each file and ensure that only one 
    #file per sclock is being read, and that it is the one with the highest version number
    for i,name in enumerate(filelist):
        basenames[i]=os.path.basename(name)
        sclocks[i]=basenames[i][4:13] #extract the sclock
        P_version[i]=basenames[i][-5:-4] #extract the version
    sclocks_unique=np.unique(sclocks) #find unique sclocks
    filelist_new=np.array([],dtype='str')
    for i in sclocks_unique:
        match=(sclocks==i) #find all instances with matching sclocks
        maxP=P_version[match]==max(P_version[match])  #find the highest version among these files
        filelist_new=np.append(filelist_new,filelist[match][maxP]) #keep only the file with thei highest version
        
    filelist=filelist_new
    #Should add a progress bar for importing large numbers of files    
    dt=[]
    for i in filelist:
        
        if is_sav:
            t=time.time()
            tmp=CCS_SAV(i)
            dt.append(time.time()-t)
        else:
            t=time.time()
            tmp=CCS(i)
            
            dt.append(time.time()-t)
        if i==filelist[0]:
            combined=tmp
        else:
            #This ensures that rounding errors are not causing mismatches in columns            
            cols1=list(combined['wvl'].columns)
            cols2=list(tmp['wvl'].columns)
            if set(cols1)==set(cols2):
                combined=pd.concat([combined,tmp])
            else:
                print("Wavelengths don't match!")

    combined.loc[:,('meta','sclock')]=pd.to_numeric(combined.loc[:,('meta','sclock')])
        
    if lookupfile is not None:
        combined=lookup(combined,lookupfile=lookupfile)
    if to_csv is not None:
        combined.to_csv(to_csv)
    return spectral_data(combined)
    
        
Example #3
0
def ccs_batch(directory,searchstring='*CCS*.csv',is_sav=False):
   
    if 'SAV' in searchstring:
        is_sav=True
    else:
        is_sav=False
    filelist=file_search(directory,searchstring)
    basenames=np.zeros_like(filelist)
    sclocks=np.zeros_like(filelist)
    P_version=np.zeros_like(filelist,dtype='int')
    
    #Extract the sclock and version for each file and ensure that only one 
    #file per sclock is being read, and that it is the one with the highest version number
    for i,name in enumerate(filelist):
        basenames[i]=os.path.basename(name)
        sclocks[i]=basenames[i][4:13]
        P_version[i]=basenames[i][-5:-4]
    sclocks_unique=np.unique(sclocks)
    filelist_new=np.array([],dtype='str')
    for i in sclocks_unique:
        match=(sclocks==i)
        maxP=P_version[match]==max(P_version[match])
        filelist_new=np.append(filelist_new,filelist[match][maxP])
        
    filelist=filelist_new
    #any way to speed this up for large numbers of files? 
    #Should add a progress bar for importing large numbers of files    
    for i in filelist:
        if is_sav:
            tmp=CCS_SAV(i)
          
        else:
            tmp=CCS(i)
          
        try:
            #This ensures that rounding errors are not causing mismatches in columns            
            cols1=list(combined['wvl'].columns)
            cols2=list(tmp['wvl'].columns)
            if set(cols1)==set(cols2):
                combined=pd.concat([combined,tmp])
            else:
                print("Wavelengths don't match!")
        except:
            combined=tmp
    return combined