Esempio n. 1
0
def jsc_batch(directory, LUT_files, searchstring='*.txt', to_csv=None):
    #Read in the lookup tables to expand filename metadata
    refdata = read_refdata(LUT_files)
    #get the list of files that match the search string in the given directory
    filelist = file_search(directory, searchstring)
    spectIDs = []  #create an empty list to hold the spectrometer IDs

    for file in filelist:
        spectIDs.append(os.path.basename(file).split(
            '_')[6])  #get the spectrometer IDs for each file in the list
    spectIDs_unique = np.unique(spectIDs)  #get the unique spectrometer IDs
    dfs = [
    ]  #create an empty list to hold the data frames for each spectrometer

    #loop through each spectrometer, read the spectra and combine them into a single data frame for that spectrometer
    for spect in spectIDs_unique:
        sublist = filelist[np.in1d(spectIDs, spect)]
        temp = [JSC(sublist[0], refdata)]
        for file in sublist[1:]:
            temp.append(JSC(file, refdata))
        dfs.append(pd.concat(temp))

    #now combine the data frames for the different spectrometers into a single data frame containing all the data
    combined = dfs[0]
    for df in dfs[1:]:
        combined = combined.merge(df)

    if to_csv is not None:
        combined.to_csv(to_csv)
    return spectral_data(combined)
    def removenull(self,datakey,colname):
        try:
            print(self.data[datakey].df.shape)
            self.data[datakey] = spectral_data(self.data[datakey].df.ix[-self.data[datakey].df[colname].isnull()])
            print(self.data[datakey].df.shape)

        except Exception as e:
            error_print(e)
 def get_data(self, filename, keyname):
     try:
         print('Loading data file: ' + str(filename))
         self.data[keyname] = spectral_data(pd.read_csv(filename, header=[0, 1]))
         self.datakeys.append(keyname)
         pass
     except Exception as e:
         error_print('Problem reading data: {}'.format(e))
Esempio n. 4
0
    def set_spectral(self, data_base):
        """
        Usage:
        k_data = pysat_func.set_spectral(pysat_func.get_file_known_data())
        u_data = pysat_func.set_spectral(pysat_func.get_file_unknown_data())

        The user will choose from either database of unknowndata or knowndata
        this means usage will be either:
        :param data_base:
        :return spectra:
        """
        data = pd.read_csv(data_base, header=[0, 1])
        return spectral_data(data)
Esempio n. 5
0
def jsc_batch(directory, LUT_files, searchstring='*.txt', to_csv=None):
    #Read in the lookup tables to expand filename metadata
    refdata = read_refdata(LUT_files)
    #get the list of files that match the search string in the given directory
    filelist = file_search(directory, searchstring)
    spectIDs = []  #create an empty list to hold the spectrometer IDs
    libsIDs = []
    timestamps = []
    locs = []
    for file in filelist:
        filesplit = os.path.basename(file).split('_')
        spectIDs.append(
            filesplit[6])  #get the spectrometer IDs for each file in the list
        libsIDs.append(filesplit[0])
        timestamps.append(filesplit[-1].split('.')[0])
        locs.append(filesplit[1])
    spectIDs_unique = np.unique(spectIDs)  #get the unique spectrometer IDs
    libsIDs_unique = np.unique(libsIDs)
    dfs = [
    ]  #create an empty list to hold the data frames for each spectrometer

    #loop through each LIBS ID
    alldata = []
    for ID in libsIDs_unique:
        print('Working on : ' + str(ID))
        sublist = filelist[np.in1d(libsIDs, ID)]
        locs = []
        for file in sublist:
            locs.append(os.path.basename(file).split('_')[1])
        locs_unique = np.unique(locs)
        #loop through each location for that libs ID
        for loc in locs_unique:
            print(loc)
            sub_sublist = sublist[np.in1d(
                locs, loc)]  #get the files for that LIBSID and location
            data = JSC(sub_sublist, refdata)
            alldata.append(data)
            pass

    combined = pd.concat(alldata)
    if to_csv is not None:
        print('Writing combined data to: ' + to_csv)
        combined.to_csv(to_csv)
    return spectral_data(combined)
Esempio n. 6
0
def ccam_batch(directory,
               searchstring='*.csv',
               to_csv=None,
               lookupfile=None,
               ave=True,
               progressbar=None):
    #Determine if the file is a .csv or .SAV
    if '.sav' in searchstring.lower():
        is_sav = True
    else:
        is_sav = False
    filelist = file_search(directory, searchstring)
    basenames = np.zeros_like(filelist)
    sclocks = np.zeros_like(filelist)
    P_version = np.zeros_like(filelist, dtype='int')

    #Extract the sclock and version for each file and ensure that only one
    #file per sclock is being read, and that it is the one with the highest version number
    for i, name in enumerate(filelist):
        basenames[i] = os.path.basename(name)
        sclocks[i] = basenames[i][4:13]  #extract the sclock
        P_version[i] = basenames[i][-5:-4]  #extract the version

    sclocks_unique = np.unique(sclocks)  #find unique sclocks
    filelist_new = np.array([], dtype='str')
    for i in sclocks_unique:
        match = (sclocks == i)  #find all instances with matching sclocks
        maxP = P_version[match] == max(
            P_version[match])  #find the highest version among these files
        filelist_new = np.append(
            filelist_new, filelist[match]
            [maxP])  #keep only the file with thei highest version

    filelist = filelist_new
    #Should add a progress bar for importing large numbers of files
    dt = []
    if progressbar:
        progressbar.setWindowTitle('ChemCam data progress')
        progressbar.setRange(0, filelist.size)
        progressbar.show()
    filecount = 0
    for i in filelist:
        filecount = filecount + 1
        print(i)
        try:
            if is_sav:
                t = time.time()
                tmp = CCAM_SAV(i, ave=ave)
                dt.append(time.time() - t)
            else:
                t = time.time()
                tmp = CCAM_CSV(i)

                dt.append(time.time() - t)
            if i == filelist[0]:
                combined = tmp

            else:
                #This ensures that rounding errors are not causing mismatches in columns
                cols1 = list(combined['wvl'].columns)
                cols2 = list(tmp['wvl'].columns)
                if set(cols1) == set(cols2):
                    combined = pd.concat([combined, tmp])
                else:
                    print("Wavelengths don't match!")
        except:
            pass
        if progressbar:
            progressbar.setValue(filecount)
            QtCore.QCoreApplication.processEvents()
        pass

    combined.loc[:,
                 ('meta', 'sclock')] = pd.to_numeric(combined.loc[:,
                                                                  ('meta',
                                                                   'sclock')])

    if lookupfile is not None:
        combined = lookup(combined, lookupfile=lookupfile)
    if to_csv is not None:
        combined.to_csv(to_csv)
    return spectral_data(combined)
filelist=glob.glob(r"E:\ChemCam\Calibration Data\LANL_testbed\Caltargets\*calib.sav")
filelist2=glob.glob(r"E:\ChemCam\Calibration Data\LANL_testbed\Caltargets\test.sav")
data2=readsav(filelist2[0])
data=readsav(filelist[0])
muv=data['calibspecmuv']
muv_orig=muv
x=data['defuv']#numpy.arange(len(muv))
muv=numpy.array([muv,muv])

muv=pd.DataFrame(muv)
colnames=[]
for i,j in enumerate(x):
    colnames.append(('wvl',x[i]))
muv.columns=pd.MultiIndex.from_tuples(colnames)
muv=spectral_data(muv)
muv2=spectral_data(muv)
muv.remove_baseline(method='ccam',params={'int_flag_':2,'lvmin_':6,'lv_':10})
muv2.remove_baseline(method='wavelet',params=)
#muv_denoise,muv_noise=ccam_denoise.ccam_denoise(muv,sig=3,niter=4)
#plot.figure()
#plot.plot(muv_noise)


#muv_nocont,cont=baseline_code.ccam_remove_continuum.ccam_remove_continuum(x,muv,10,lvmin=6,int_flag=2)
plot.figure(figsize=[11,8])
plot.plot(x,muv.df['wvl'].iloc[0],label='Continuum Removed',linewidth=0.5)
plot.plot(x,muv.df_baseline['wvl'].iloc[0],label='Continuum',linewidth=0.5)
plot.plot(x,muv_orig,label='Original',linewidth=0.5)
plot.plot(x,data2['muv_cont'],label='IDL Continuum',linestyle='--',linewidth=0.5)
plot.legend()
Esempio n. 8
0
######read unknown data (only do this the first time since it's slow)
#unknowndatadir=r"C:\Users\rbanderson\Documents\Projects\MSL\ChemCam\Lab Data"
#unknowndatasearch='CM*.SAV'
#unknowndatacsv=r"C:\Users\rbanderson\Documents\Projects\MSL\ChemCam\Lab Data\lab_data_averages_pandas_format.csv"
#unknown_data=ccs_batch(unknowndatadir,searchstring=unknowndatasearch)
#
##write it to a csv file for future use (much faster than reading individual files each time)
#
##this writes all the data, including single shots, to a file (can get very large!!)
#unknown_data.df.to_csv(unknowndatacsv)
#
##this writes just the average spectra to a file
#unknown_data.df.loc['average'].to_csv(unknowndatacsv)

#put the training data dataframe into a spectral_data object
data=spectral_data(data)


print('read unknown data from the combined csv file (much faster)')
unknowndatacsv=r"C:\Users\rbanderson\Documents\Projects\MSL\ChemCam\Lab Data\lab_data_averages_pandas_format.csv"
unknown_data=pd.read_csv(unknowndatacsv,header=[0,1])
unknown_data=spectral_data(unknown_data)

print('Interpolate unknown data onto the same exact wavelengths as the training data')
unknown_data.interp(data.df['wvl'].columns)

print('Mask out unwanted portions of the data')
maskfile=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Input\mask_minors_noise.csv"
data.mask(maskfile)
unknown_data.mask(maskfile)
Esempio n. 9
0
from sklearn import linear_model
from sklearn.cross_decomposition.pls_ import PLSRegression
from pysat.plotting import plots
import time
from pysat.regression import cv
from pysat.regression.regression import regression

#import matplotlib.pyplot as plot
import warnings

warnings.filterwarnings('ignore')

print('Read training database with mixtures')
db = r"C:\Users\rbanderson\Documents\Projects\MSL\ChemCam\Database\full_db_mars_corrected_dopedTiO2_mixtures_pandas.csv"
data_mix = pd.read_csv(db, header=[0, 1])
data_mix = spectral_data(data_mix)

print('Read training database')
db = r"C:\Users\rbanderson\Documents\Projects\MSL\ChemCam\Database\full_db_mars_corrected_dopedTiO2_pandas_format.csv"
data = pd.read_csv(db, header=[0, 1])
data = spectral_data(data)

print('Mask out unwanted portions of the data')
maskfile = r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Input\mask_minors_noise.csv"
data.mask(maskfile)
data_mix.mask(maskfile)

print(
    'Normalize spectra by specifying the wavelength ranges over which to normalize'
)
ranges = [(0, 1000)]  #this is equivalent to "norm3"
Esempio n. 10
0
@author: rbanderson
"""

import glob
import numpy
import scipy
import pysat.spectral.baseline_code.ccam_remove_continuum
from scipy.io.idl import readsav
import matplotlib.pyplot as plot
import numpy
import pandas as pd
from pysat.spectral.spectral_data import spectral_data
# import mlpy.wavelet

filename = r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Sample_Data\JSC_data_combined_20170307.csv"
data = spectral_data(pd.read_csv(filename, header=[0, 1]))
data = data.df.ix[0:5]
colcheck = data_spect.columns.values < 350
data_spect = data_spect.iloc[:, colcheck]
data = spectral_data(data_spect)
data.remove_baseline(method='ccam',
                     params={
                         'int_flag_': 2,
                         'lvmin_': 6,
                         'lv_': 10
                     })
x = data.df.columns.values
plot.figure(figsize=[11, 8])
plot.plot(x, data_orig, label='Original', linewidth=0.5)
plot.plot(x, data.df.iloc[0], label='Continuum Removed', linewidth=0.5)
plot.plot(x, data.df_baseline.iloc[0], label='Continuum', linewidth=0.5)