def read_spectrum(self, filename): #split the filename to get init/default values (d, f, e) = get_directory_filename_extension(filename) idstr = f company = e instrument = company #initialize data to empty matrix dm = np.array([]) #get header data rh = ReaderHelper() (nskip, wcol, rcol, hdr) = rh.read_spectrum_header(filename, headersep = "=") #get the numerical data dm = rh.read_spectrum_data(filename, numskiprows = nskip, wavecol = wcol, reflcol = rcol) #if tags are mentioned in file use those inplace of init/default if "name" in hdr: idstr = get_directory_filename_extension(hdr["name"])[1] # print("SigReader: idstr = {}".format(idstr)) company = "sig" if "instrument" in hdr: instrument = hdr["instrument"].strip() #return details return (dm, idstr, company, instrument)
def read_spectrum(self, filename): #split the filename to get init/default values (d, f, e) = get_directory_filename_extension(filename) idstr = f company = e instrument = company #initialize data to empty matrix dm = np.array([]) #get header data rh = ReaderHelper() (nskip, wcol, rcol, hdr) = rh.read_spectrum_header(filename, headersep = ":") #get the numerical data dm = rh.read_spectrum_data(filename, numskiprows = nskip, wavecol = wcol, reflcol = rcol) #if tags are mentioned in file use those inplace of init/default if "File Name" in hdr: idstr = get_directory_filename_extension(hdr["File Name"])[1] company = "sed" if "Instrument" in hdr: instrument = hdr["Instrument"].strip() #return details return (dm, idstr, company, instrument)
def read_spectrum(self, filename): #split the filename to get init/default values (d, f, e) = get_directory_filename_extension(filename) idstr = f company = e instrument = company #initialize data to empty matrix dm = np.array([]) #get header data rh = ReaderHelper() (nskip, wcol, rcol, hdr) = rh.read_spectrum_header(filename, headersep = "=", datasep = ",") #get the numerical data dm = rh.read_spectrum_data(filename, numskiprows = nskip, wavecol = wcol, reflcol = rcol, datasep = ",") #if tags are mentioned in file use those inplace of init/default if "sdal_idstr" in hdr: idstr = hdr["sdal_idstr"] if "sdal_company" in hdr: company = hdr["sdal_company"] if "sdal_instrument" in hdr: instrument = hdr["sdal_instrument"] #return details return (dm, idstr, company, instrument)
def read(self, filename): d = json.loads(open(filename).read()) num_spectra = len(d['Spectra']) idstr = get_directory_filename_extension(filename)[1] company = 'piccolo' instrument = 'piccolo' spectrums = [] for i in range(num_spectra): #the metadata for the spectrum metadata = d['Spectra'][i]['Metadata'] #processing the measurements (pixels) #read the pixel values pixels = np.array(d['Spectra'][i]['Pixels'], dtype = np.double) # print('pixels = {}'.format(pixels[-10:-1])) num_pixels = len(pixels) #get nonlinearity coefficients from metadata nlin_coeffs = np.array(metadata['NonlinearityCorrectionCoefficients']) #build pmat to contain [1, p, p^2, p^3, ...] pmat = np.ndarray((num_pixels, len(nlin_coeffs)), dtype = np.double) pmat[:, 0] = 1.0 for c in range(1, len(nlin_coeffs)): pmat[:, c] = pmat[:, (c - 1)]*pixels #apply the nonlinearity coefficients pixels = np.dot(pmat, nlin_coeffs) # print('\n becomes \n') # print('pixels = {}'.format(pixels[-10:-1])) # print(nlin_coeffs) #processing the wavelengths #create a 0 start index list widxs = np.arange(0, num_pixels) #get wavelength polynomial coefficients from metadata wave_coeffs = np.array(metadata['WavelengthCalibrationCoefficients']) #build matrix that looks like [1, x, x^2, x^3, ....] wmat = np.ndarray((num_pixels, len(wave_coeffs)), dtype = np.double) wmat[:, 0] = 1 for c in range(1, len(wave_coeffs)): wmat[:, c] = wmat[:, (c - 1)]*widxs #apply polynomial coefficients to get real wavelengths waves = np.dot(wmat, wave_coeffs) #create and append spectrum spectrums.append(Spectrum(data = np.column_stack((waves, pixels)), idstr = idstr, company = company, instrument = instrument, metadata = metadata)) # print(===============================================\n\n") return spectrums
def read_spectrums(self, filename, ancillary1 = ""): #check validity of filename if not os.path.exists(filename) or not os.path.isfile(filename): print("{}: {} is invalid".format(__file__, filename)) sys.exit(0) if not os.path.exists(ancillary1) or not os.path.isfile(ancillary1): print("{}: {} is invalid".format(__file__, ancillary1)) sys.exit(0) #choose reader based on file extension ext = get_directory_filename_extension(filename)[2] dms, idstrs, cos, instrs = [], [], [], [] if ext == "sli": (dms, idstrs, cos, instrs) = EnviReader().read_spectrums(filename, ancillary1) return [Spectrum(dms[i], idstrs[i], cos[i], instrs[i]) for i in range(len(dms))]
def read_spectrums(self, filename, ancillary1=""): #check validity of filename if not os.path.exists(filename) or not os.path.isfile(filename): print("{}: {} is invalid".format(__file__, filename)) sys.exit(0) if not os.path.exists(ancillary1) or not os.path.isfile(ancillary1): print("{}: {} is invalid".format(__file__, ancillary1)) sys.exit(0) #choose reader based on file extension ext = get_directory_filename_extension(filename)[2] dms, idstrs, cos, instrs = [], [], [], [] if ext == "sli": (dms, idstrs, cos, instrs) = EnviReader().read_spectrums(filename, ancillary1) return [ Spectrum(dms[i], idstrs[i], cos[i], instrs[i]) for i in range(len(dms)) ]
def read_spectrum(self, filename, ancillary_filename=""): #check validity of filename if not os.path.exists(filename) or not os.path.isfile(filename): print("{}: {} is invalid".format(__file__, filename)) sys.exit(0) #choose reader based on file extension ext = get_directory_filename_extension(filename)[2] dm, idstr, co, instr = np.array([]), "", "", "" if ext == "csv" or ext == "txt": (dm, idstr, co, instr) = CsvReader().read_spectrum(filename) elif ext == "asd" or ext == 'ASD': (dm, idstr, co, instr) = AsdReader().read_spectrum(filename) elif ext == "sed": (dm, idstr, co, instr) = SedReader().read_spectrum(filename) elif ext == "sig": (dm, idstr, co, instr) = SigReader().read_spectrum(filename) # print("Sdal Reader: idstr = {}".format(idstr)) else: print("{}: Invalid file type {}".format(__file__, ext)) sys.exit(0) return Spectrum(dm, idstr, co, instr)
def read_spectrum(self, filename, ancillary_filename = ""): #check validity of filename if not os.path.exists(filename) or not os.path.isfile(filename): print("{}: {} is invalid".format(__file__, filename)) sys.exit(0) #choose reader based on file extension ext = get_directory_filename_extension(filename)[2] dm, idstr, co, instr = np.array([]), "", "", "" if ext == "csv" or ext == "txt": (dm, idstr, co, instr) = CsvReader().read_spectrum(filename) elif ext == "asd" or ext == 'ASD': (dm, idstr, co, instr) = AsdReader().read_spectrum(filename) elif ext == "sed": (dm, idstr, co, instr) = SedReader().read_spectrum(filename) elif ext == "sig": (dm, idstr, co, instr) = SigReader().read_spectrum(filename) # print("Sdal Reader: idstr = {}".format(idstr)) else: print("{}: Invalid file type {}".format(__file__, ext)) sys.exit(0) return Spectrum(dm, idstr, co, instr)
def process(params): # params.print_params() #get the project params and verify project = params.get_params("project") if project: verify_project(project) else: print("--project is required") sys.exit(0) #get the resampling params resampling = params.get_params("resampling") #get the jumpcorrection params and verify jumpcorrection = params.get_params("jumpcorrection") if jumpcorrection: verify_jumpcorrection(jumpcorrection) #get the groupings and verify them groupings = {grp:params.get_params(grp) for grp in params.get_groups()} verify_groupings(params.default_group, params.get_groups(), groupings) tags = ["raw", params.default_group] specs = defaultdict(list) #specs["raw"] created #get the filenames allfiles = os.listdir(project["indir"]) extfiles = [] for f in allfiles: ext = get_directory_filename_extension(f)[2] if ext == project["fileext"]: extfiles.append(os.path.join(project["indir"], f)) #read the raw spectrums uniquifier = WaveUniquifier() rawspecs = [SdalReader().read_spectrum(f) for f in extfiles] uniqspecs = [uniquifier.uniquify(s) for s in rawspecs] specs["raw"] = uniqspecs #specs["preproc"] created #do the pre-processing prepspecs = specs["raw"] if resampling: resampler = WaveResampler(rstype = resampling["type"], wavestart = resampling["range"][0], wavestop = resampling["range"][1], spacing = resampling["spacing"]) rsspecs = [resampler.resample(s) for s in prepspecs] prepspecs = rsspecs if jumpcorrection: corrector = JumpCorrector(jumpcorrection["wavelengths"], jumpcorrection["stablezone"]) jcspecs = [corrector.correct(s) for s in prepspecs] prepspecs = jcspecs #detect the references refdet = ReferenceDetector(context = "gveg") nonrefs = [] refs = [] for s in prepspecs: if refdet.is_reference(s): refs.append(s) else: nonrefs.append(s) specs[params.default_group] = nonrefs #specs[group_tag] created #do the grouping for t in groupings: tags.append(t) itag = groupings[t]["intag"] patt = groupings[t]["pattern"] regex = SpectrumRegex() tgrps = regex.make_groups(specs[itag], patt) for tg in tgrps: sg = SpectrumGroup(spectrums = tgrps[tg]) ms = sg.mean_spectrum() ms.idstr = tg specs[t].append(ms) # subsets = {grp:params.get_params(grp) for grp in params.get_subsets()} # print(subsets) # for t in subsets: # itag = subsets[t]["intag"] # otag = subsets[t]["outtag"] # wavestart = subsets[t]["range"][0] # wavestop = subsets[t]["range"][1] # for s in specs[itag]: # subspec = s.wavelength_subset(wavestart, wavestop) # subspec.idstr = subspec.idstr + otag # print("idstr = {}".format(subspec.idstr)) # specs[otag].append(subspec) #create outputs prjdir = os.path.join(project["outdir"], project["name"]) os.mkdir(prjdir) for t in specs: tdir = os.path.join(prjdir, t) os.mkdir(tdir) tgrpfn = "___{}___.csv".format(t) for s in specs[t]: s.write_csv(odir = tdir) sg = SpectrumGroup(spectrums = specs[t]) sg.write_csv(tdir, tgrpfn)
def read_spectrum(self, filename): #read the contents of the binary file binconts = None with open(filename, 'rb') as f: binconts = f.read() #the file version fileversion = "".join(struct.unpack("ccc", binconts[0:(0 + 3)])) #the instrument number instnumber = str(struct.unpack("H", binconts[400:(400 + 2)])[0]) #the instrument model instmodel = struct.unpack("B", binconts[431:(431 + 1)])[0] instmodel = self._instrtype[int(instmodel)] #start wavelength wavestart = struct.unpack("f", binconts[191:(191 + 4)])[0] #step wavelength wavestep = struct.unpack("f", binconts[195:(195 + 4)])[0] #data format data_format = struct.unpack("B", binconts[199:(199 + 1)])[0] #number of channels numchannels = struct.unpack("h", binconts[204:(204 + 2)])[0] #construct wavelength vector wavestop = wavestart + numchannels*wavestep - 1 wavs = np.linspace(wavestart, wavestop, numchannels) #format string to unpack target and reference values fmt = "f"*numchannels if data_format == 2: fmt = 'd'*numchannels if data_format == 0: fmt = 'f'*numchannels size = numchannels*8 refls = [] if fileversion == 'ASD': refls = np.array(struct.unpack(fmt, binconts[484:(484 + size)])) if fileversion == 'as7': tgts = np.array(struct.unpack(fmt, binconts[484:(484 + size)])) ref_flag = struct.unpack('?', binconts[484 + size: 484 + size + 1])[0] print("filename = {}, ref_flag = {}".format(filename, ref_flag)) desc_length = struct.unpack('H', binconts[484 + size + 18: 484 + size + 18 + 2])[0] print("desc_length = {}".format(desc_length)) print("size = {}".format(size)) #'H' with 2 bytes works #HACK: search for best set of values buff = 50 #used to be 50 minptp = 1000000.0 refstart = -1 for s in range(17712 - buff, 17712 + buff): if (s + size) < len(binconts): tmp = np.array(struct.unpack(fmt, binconts[s:(s + size)])) if np.min(tmp) > 1.0 and np.max(tmp) > 50.0 and np.ptp(tmp) < minptp: minptp = np.ptp(tmp) refstart = s refs = np.array([]) if refstart > -1: refs = np.array(struct.unpack(fmt, binconts[refstart:(refstart + size)])) #compute reflectances refls = tgts #refls = tgts/refs #original print(tgts) print(refs) # #read target values ## tgts = np.array(struct.unpack(fmt, binconts[484:(484 + size)])) # tgts = np.array(struct.unpack(fmt, binconts[484:(484 + size)])) # print(tgts[:50]) # return # # # #read reference values ## refs = np.array(struct.unpack(fmt, binconts[17712:(17712 + size)])) # #HACK: search for best set of values # buff = 50 # minptp = 1000000.0 # refstart = -1 # for s in range(17712 - buff, 17712 + buff): # if (s + size) < len(binconts): # tmp = np.array(struct.unpack(fmt, binconts[s:(s + size)])) # if np.min(tmp) > 0.0 and np.max(tmp) > 50.0 and np.ptp(tmp) < minptp: # minptp = np.ptp(tmp) # refstart = s # refs = np.array([]) # if refstart > -1: # refs = np.array(struct.unpack(fmt, binconts[refstart:(refstart + size)])) # # #compute reflectances # refls = tgts/refs # #create and return Spectrum object if np.size(refls) == numchannels: (d, f, e) = get_directory_filename_extension(filename) idstr = f company = "asd" instrument = "_".join([instmodel, instnumber, fileversion]) return (np.column_stack((wavs, refls)), idstr, company, instrument) else: print("AsdReader: reflectances not read") sys.exit(0)
default = '_', dest = "substitute_character") par.add_argument("--recursive", action = 'store_true', default = False, dest = "recursive") #parse it params = par.parse_known_args(sys.argv[1:])[0].__dict__ in_dir = params['input_directory'] sub_char = params['substitute_character'] recursive = params['recursive'] #get the list of filenames filenames = [] if recursive: for root, dirs, files in os.walk(in_dir): for f in files: filenames.append(os.path.join(root, f)) else: filenames = [os.path.join(in_dir, f) for f in os.listdir(f) is os.path.isfile(f)] #make the changes for srcf in filenames: d, f, e = get_directory_filename_extension(srcf) tkns = f.split() if len(tkns) > 1: dstf = os.path.join(d, sub_char.join(tkns) + "." + e) shutil.copyfile(srcf, dstf)
def read_spectrum(self, filename): #read the contents of the binary file binconts = None with open(filename, 'rb') as f: binconts = f.read() #the file version fileversion = "".join(struct.unpack("ccc", binconts[0:(0 + 3)])) #the instrument number instnumber = str(struct.unpack("H", binconts[400:(400 + 2)])[0]) #the instrument model instmodel = struct.unpack("B", binconts[431:(431 + 1)])[0] instmodel = self._instrtype[int(instmodel)] #start wavelength wavestart = struct.unpack("f", binconts[191:(191 + 4)])[0] #step wavelength wavestep = struct.unpack("f", binconts[195:(195 + 4)])[0] #data format data_format = struct.unpack("B", binconts[199:(199 + 1)])[0] #number of channels numchannels = struct.unpack("h", binconts[204:(204 + 2)])[0] #construct wavelength vector wavestop = wavestart + numchannels * wavestep - 1 wavs = np.linspace(wavestart, wavestop, numchannels) #format string to unpack target and reference values fmt = "f" * numchannels if data_format == 2: fmt = 'd' * numchannels if data_format == 0: fmt = 'f' * numchannels size = numchannels * 8 refls = [] if fileversion == 'ASD': refls = np.array(struct.unpack(fmt, binconts[484:(484 + size)])) if fileversion == 'as7': tgts = np.array(struct.unpack(fmt, binconts[484:(484 + size)])) ref_flag = struct.unpack('?', binconts[484 + size:484 + size + 1])[0] print("filename = {}, ref_flag = {}".format(filename, ref_flag)) desc_length = struct.unpack( 'H', binconts[484 + size + 18:484 + size + 18 + 2])[0] print("desc_length = {}".format(desc_length)) print("size = {}".format(size)) #'H' with 2 bytes works #HACK: search for best set of values buff = 50 #used to be 50 minptp = 1000000.0 refstart = -1 for s in range(17712 - buff, 17712 + buff): if (s + size) < len(binconts): tmp = np.array(struct.unpack(fmt, binconts[s:(s + size)])) if np.min(tmp) > 1.0 and np.max(tmp) > 50.0 and np.ptp( tmp) < minptp: minptp = np.ptp(tmp) refstart = s refs = np.array([]) if refstart > -1: refs = np.array( struct.unpack(fmt, binconts[refstart:(refstart + size)])) #compute reflectances refls = tgts #refls = tgts/refs #original print(tgts) print(refs) # #read target values ## tgts = np.array(struct.unpack(fmt, binconts[484:(484 + size)])) # tgts = np.array(struct.unpack(fmt, binconts[484:(484 + size)])) # print(tgts[:50]) # return # # # #read reference values ## refs = np.array(struct.unpack(fmt, binconts[17712:(17712 + size)])) # #HACK: search for best set of values # buff = 50 # minptp = 1000000.0 # refstart = -1 # for s in range(17712 - buff, 17712 + buff): # if (s + size) < len(binconts): # tmp = np.array(struct.unpack(fmt, binconts[s:(s + size)])) # if np.min(tmp) > 0.0 and np.max(tmp) > 50.0 and np.ptp(tmp) < minptp: # minptp = np.ptp(tmp) # refstart = s # refs = np.array([]) # if refstart > -1: # refs = np.array(struct.unpack(fmt, binconts[refstart:(refstart + size)])) # # #compute reflectances # refls = tgts/refs # #create and return Spectrum object if np.size(refls) == numchannels: (d, f, e) = get_directory_filename_extension(filename) idstr = f company = "asd" instrument = "_".join([instmodel, instnumber, fileversion]) return (np.column_stack((wavs, refls)), idstr, company, instrument) else: print("AsdReader: reflectances not read") sys.exit(0)
print("filename_space_remover.py") # setup a parser par = argparse.ArgumentParser() par.add_argument("--input_directory", type=str, required=True, dest="input_directory") par.add_argument("--substitute_character", type=str, required=False, default="_", dest="substitute_character") par.add_argument("--recursive", action="store_true", default=False, dest="recursive") # parse it params = par.parse_known_args(sys.argv[1:])[0].__dict__ in_dir = params["input_directory"] sub_char = params["substitute_character"] recursive = params["recursive"] # get the list of filenames filenames = [] if recursive: for root, dirs, files in os.walk(in_dir): for f in files: filenames.append(os.path.join(root, f)) else: filenames = [os.path.join(in_dir, f) for f in os.listdir(f) is os.path.isfile(f)] # make the changes for srcf in filenames: d, f, e = get_directory_filename_extension(srcf) tkns = f.split() if len(tkns) > 1: dstf = os.path.join(d, sub_char.join(tkns) + "." + e) shutil.copyfile(srcf, dstf)