def from_gwu_chem_UVVIS(filelist, sortnames=False, shortname=True, cut_extension=False, name=''): ''' Format for comma delimited two column data from GWU chemistry's UVVis. These have no useful metadata or dark data and so it is important that users either pass in a correctly sorted filelist. Once the dataframe is created, on can do df=df.reindex(columns=[correct order]). It uses read_csv() to and creates a list of dataframes. Afterwards, concat() merges these. Kwds: sortnames- Will attempt to autosort the filelist. Otherwise, order of files passed in is directly used as columns. shortname- If false, full file path is used as the column name. If true, only the filename is used. cut_extension- If using the shortname, this will determine if the file extension is saved or cut from the data.''' if shortname: fget = lambda x: get_shortname(x, cut_extension=cut_extension) else: fget = lambda x: x ### Either full names or short names of filelist working_names = [fget(afile) for afile in filelist] dflist = [ read_csv( afile, sep=',', header=None, index_col=0, skiprows=2, na_values=' ', #Used to be ' \r', or is this from IR? names=[fget(afile)]) for afile in filelist ] ### THIS IS BUSTED, PUTTING NANS EVERYWHERE EXCEPT ONE FILE, but dflist itself ws nice. dataframe = concat(dflist, axis=1) ### concat tries to sort these, so this will preserve the sort order if sortnames: dataframe = dataframe.reindex(columns=sorted(working_names)) dataframe = TimeSpectra(dataframe) #this is fine dataframe.metadata = None dataframe.filedict = None dataframe.baseline = None dataframe.specunit = 'nm' #This autodetected in plots if name: dataframe.name = name return dataframe
def from_gwu_chem_UVVIS(filelist, sortnames=False, shortname=True, cut_extension=False, name=''): ''' Format for comma delimited two column data from GWU chemistry's UVVis. These have no useful metadata or dark data and so it is important that users either pass in a correctly sorted filelist. Once the dataframe is created, on can do df=df.reindex(columns=[correct order]). It uses read_csv() to and creates a list of dataframes. Afterwards, concat() merges these. Kwds: sortnames- Will attempt to autosort the filelist. Otherwise, order of files passed in is directly used as columns. shortname- If false, full file path is used as the column name. If true, only the filename is used. cut_extension- If using the shortname, this will determine if the file extension is saved or cut from the data.''' if shortname: fget=lambda x:get_shortname(x, cut_extension=cut_extension) else: fget=lambda x: x ### Either full names or short names of filelist working_names=[fget(afile) for afile in filelist] dflist=[read_csv(afile, sep=',', header=None, index_col=0, skiprows=2, na_values=' ', #Used to be ' \r', or is this from IR? names=[fget(afile)]) for afile in filelist] ### THIS IS BUSTED, PUTTING NANS EVERYWHERE EXCEPT ONE FILE, but dflist itself ws nice. dataframe=concat(dflist, axis=1) ### concat tries to sort these, so this will preserve the sort order if sortnames: dataframe=dataframe.reindex(columns=sorted(working_names)) dataframe=TimeSpectra(dataframe) #this is fine dataframe.metadata=None dataframe.filedict=None dataframe.baseline=None dataframe.specunit='nm' #This autodetected in plots if name: dataframe.name=name return dataframe
def from_timefile_datafile(datafile, timefile, extract_dark=True, name=''): ''' Converts old-style spectral data from GWU phys lab into a dataframe with timestamp column index and wavelength row indicies. Creates the DataFrame from a dictionary of Series, keyed by datetime. **name becomes name of dataframe''' tlines=open(timefile,'r').readlines() tlines=[line.strip().split() for line in tlines] tlines.pop(0) time_file_dict=dict((_get_datetime_timefile(tline),tline[0]) for tline in tlines) ### Read in data matrix, separate first row (wavelengths) from the rest of the data wavedata=np.genfromtxt(datafile, dtype='float', skip_header=1) data, wavelengths=wavedata[:,1::], wavedata[:,0] #Separate wavelength column ### Sort datetimes here before assigning/removing dark spec etc... sorted_tfd=sorted(time_file_dict.items()) sorted_times, sorted_files=zip(*( (((i[0]), (i[1])) for i in sorted_tfd))) ### Seek darkfile. If found, take it out of dataframe. ### if extract_dark: darkfile=extract_darkfile(sorted_files, return_null=True) if darkfile: ####Find baseline by reverse lookup (lookup by value) and get index position #darkindex, darktime=[(idx, time) for idx, (time, afile) in enumerate(sorted_tfd) if afile == darkfile][0] darkindex=sorted_files.index(darkfile) darktime=sorted_times[darkindex] baseline=Series(data[:,darkindex], index=wavelengths, name=darkfile) del time_file_dict[darktime] #Intentionally remove sorted_times=list(sorted_times) #Need to do in two steps sorted_times.remove(darktime) data=np.delete(data, darkindex, 1) #Delete dark column from numpy data else: baseline=None dataframe=TimeSpectra(data, columns=sorted_times, index=wavelengths) ### Add field attributes to dataframe dataframe.baseline=baseline dataframe.filedict=time_file_dict if name: dataframe.name=name ### Get headermeta data from first line in timefile that isn't darkfile. Only checks one line ### Does not check for consistency for line in tlines: if line[0]==darkfile: pass else: meta_partial=_get_headermetadata_timefile(line[0]) #DOUBLE CHECK THIS WORKS break ### Extract remaining metadata (file/time info) and return ### meta_general=get_headermetadata_dataframe(dataframe, time_file_dict) meta_general.update(meta_partial) dataframe.metadata=meta_general dataframe.specunit='nm' #This autodetected in plots ### Sort dataframe by ascending time (could also sort spectral data) ### dataframe.sort(axis=1, inplace=True) #axis1=columns return dataframe
def from_timefile_datafile(datafile, timefile, extract_dark=True, name=''): ''' Converts old-style spectral data from GWU phys lab into a dataframe with timestamp column index and wavelength row indicies. Creates the DataFrame from a dictionary of Series, keyed by datetime. **name becomes name of dataframe''' tlines = open(timefile, 'r').readlines() tlines = [line.strip().split() for line in tlines] tlines.pop(0) time_file_dict = dict( (_get_datetime_timefile(tline), tline[0]) for tline in tlines) ### Read in data matrix, separate first row (wavelengths) from the rest of the data wavedata = np.genfromtxt(datafile, dtype='float', skip_header=1) data, wavelengths = wavedata[:, 1::], wavedata[:, 0] #Separate wavelength column ### Sort datetimes here before assigning/removing dark spec etc... sorted_tfd = sorted(time_file_dict.items()) sorted_times, sorted_files = zip(*((((i[0]), (i[1])) for i in sorted_tfd))) ### Seek darkfile. If found, take it out of dataframe. ### if extract_dark: darkfile = extract_darkfile(sorted_files, return_null=True) if darkfile: ####Find baseline by reverse lookup (lookup by value) and get index position #darkindex, darktime=[(idx, time) for idx, (time, afile) in enumerate(sorted_tfd) if afile == darkfile][0] darkindex = sorted_files.index(darkfile) darktime = sorted_times[darkindex] baseline = Series(data[:, darkindex], index=wavelengths, name=darkfile) del time_file_dict[darktime] #Intentionally remove sorted_times = list(sorted_times) #Need to do in two steps sorted_times.remove(darktime) data = np.delete(data, darkindex, 1) #Delete dark column from numpy data else: baseline = None dataframe = TimeSpectra(data, columns=sorted_times, index=wavelengths) ### Add field attributes to dataframe dataframe.baseline = baseline dataframe.filedict = time_file_dict if name: dataframe.name = name ### Get headermeta data from first line in timefile that isn't darkfile. Only checks one line ### Does not check for consistency for line in tlines: if line[0] == darkfile: pass else: meta_partial = _get_headermetadata_timefile( line[0]) #DOUBLE CHECK THIS WORKS break ### Extract remaining metadata (file/time info) and return ### meta_general = get_headermetadata_dataframe(dataframe, time_file_dict) meta_general.update(meta_partial) dataframe.metadata = meta_general dataframe.specunit = 'nm' #This autodetected in plots ### Sort dataframe by ascending time (could also sort spectral data) ### dataframe.sort(axis=1, inplace=True) #axis1=columns return dataframe