Ejemplo n.º 1
0
def jsc_batch(directory, LUT_files, searchstring='*.txt', to_csv=None):
    #Read in the lookup tables to expand filename metadata
    refdata = read_refdata(LUT_files)
    #get the list of files that match the search string in the given directory
    filelist = file_search(directory, searchstring)
    spectIDs = []  #create an empty list to hold the spectrometer IDs

    for file in filelist:
        spectIDs.append(os.path.basename(file).split(
            '_')[6])  #get the spectrometer IDs for each file in the list
    spectIDs_unique = np.unique(spectIDs)  #get the unique spectrometer IDs
    dfs = [
    ]  #create an empty list to hold the data frames for each spectrometer

    #loop through each spectrometer, read the spectra and combine them into a single data frame for that spectrometer
    for spect in spectIDs_unique:
        sublist = filelist[np.in1d(spectIDs, spect)]
        temp = [JSC(sublist[0], refdata)]
        for file in sublist[1:]:
            temp.append(JSC(file, refdata))
        dfs.append(pd.concat(temp))

    #now combine the data frames for the different spectrometers into a single data frame containing all the data
    combined = dfs[0]
    for df in dfs[1:]:
        combined = combined.merge(df)

    if to_csv is not None:
        combined.to_csv(to_csv)
    return spectral_data(combined)
Ejemplo n.º 2
0
def jsc_batch(directory, LUT_files, searchstring='*.txt', to_csv=None):
    #Read in the lookup tables to expand filename metadata
    refdata = read_refdata(LUT_files)
    #get the list of files that match the search string in the given directory
    filelist = file_search(directory, searchstring)
    spectIDs = []  #create an empty list to hold the spectrometer IDs
    libsIDs = []
    timestamps = []
    locs = []
    for file in filelist:
        filesplit = os.path.basename(file).split('_')
        spectIDs.append(
            filesplit[6])  #get the spectrometer IDs for each file in the list
        libsIDs.append(filesplit[0])
        timestamps.append(filesplit[-1].split('.')[0])
        locs.append(filesplit[1])
    spectIDs_unique = np.unique(spectIDs)  #get the unique spectrometer IDs
    libsIDs_unique = np.unique(libsIDs)
    dfs = [
    ]  #create an empty list to hold the data frames for each spectrometer

    #loop through each LIBS ID
    alldata = []
    for ID in libsIDs_unique:
        print('Working on : ' + str(ID))
        sublist = filelist[np.in1d(libsIDs, ID)]
        locs = []
        for file in sublist:
            locs.append(os.path.basename(file).split('_')[1])
        locs_unique = np.unique(locs)
        #loop through each location for that libs ID
        for loc in locs_unique:
            print(loc)
            sub_sublist = sublist[np.in1d(
                locs, loc)]  #get the files for that LIBSID and location
            data = JSC(sub_sublist, refdata)
            alldata.append(data)
            pass

    combined = pd.concat(alldata)
    if to_csv is not None:
        print('Writing combined data to: ' + to_csv)
        combined.to_csv(to_csv)
    return spectral_data(combined)
Ejemplo n.º 3
0
def ccam_batch(directory,
               searchstring='*.csv',
               to_csv=None,
               lookupfile=None,
               ave=True,
               progressbar=None):
    #Determine if the file is a .csv or .SAV
    if '.sav' in searchstring.lower():
        is_sav = True
    else:
        is_sav = False
    filelist = file_search(directory, searchstring)
    basenames = np.zeros_like(filelist)
    sclocks = np.zeros_like(filelist)
    P_version = np.zeros_like(filelist, dtype='int')

    #Extract the sclock and version for each file and ensure that only one
    #file per sclock is being read, and that it is the one with the highest version number
    for i, name in enumerate(filelist):
        basenames[i] = os.path.basename(name)
        sclocks[i] = basenames[i][4:13]  #extract the sclock
        P_version[i] = basenames[i][-5:-4]  #extract the version

    sclocks_unique = np.unique(sclocks)  #find unique sclocks
    filelist_new = np.array([], dtype='str')
    for i in sclocks_unique:
        match = (sclocks == i)  #find all instances with matching sclocks
        maxP = P_version[match] == max(
            P_version[match])  #find the highest version among these files
        filelist_new = np.append(
            filelist_new, filelist[match]
            [maxP])  #keep only the file with thei highest version

    filelist = filelist_new
    #Should add a progress bar for importing large numbers of files
    dt = []
    if progressbar:
        progressbar.setWindowTitle('ChemCam data progress')
        progressbar.setRange(0, filelist.size)
        progressbar.show()
    filecount = 0
    for i in filelist:
        filecount = filecount + 1
        print(i)
        try:
            if is_sav:
                t = time.time()
                tmp = CCAM_SAV(i, ave=ave)
                dt.append(time.time() - t)
            else:
                t = time.time()
                tmp = CCAM_CSV(i)

                dt.append(time.time() - t)
            if i == filelist[0]:
                combined = tmp

            else:
                #This ensures that rounding errors are not causing mismatches in columns
                cols1 = list(combined['wvl'].columns)
                cols2 = list(tmp['wvl'].columns)
                if set(cols1) == set(cols2):
                    combined = pd.concat([combined, tmp])
                else:
                    print("Wavelengths don't match!")
        except:
            pass
        if progressbar:
            progressbar.setValue(filecount)
            QtCore.QCoreApplication.processEvents()
        pass

    combined.loc[:,
                 ('meta', 'sclock')] = pd.to_numeric(combined.loc[:,
                                                                  ('meta',
                                                                   'sclock')])

    if lookupfile is not None:
        combined = lookup(combined, lookupfile=lookupfile)
    if to_csv is not None:
        combined.to_csv(to_csv)
    return spectral_data(combined)