Esempio n. 1
0
def from_gwu_chem_UVVIS(filelist,
                        sortnames=False,
                        shortname=True,
                        cut_extension=False,
                        name=''):
    ''' Format for comma delimited two column data from GWU chemistry's UVVis.  These have no useful metadata
    or dark data and so it is important that users either pass in a correctly sorted filelist.  Once the 
    dataframe is created, on can do df=df.reindex(columns=[correct order]).  
    
    It uses read_csv() to and creates a list of dataframes.  Afterwards, concat() merges these.
    
    Kwds:
       sortnames- Will attempt to autosort the filelist. Otherwise, order of files passed in is
                  directly used as columns.
       shortname- If false, full file path is used as the column name.  If true, only the filename is used. 
       
       cut_extension- If using the shortname, this will determine if the file extension is saved or cut from the data.'''

    if shortname:
        fget = lambda x: get_shortname(x, cut_extension=cut_extension)
    else:
        fget = lambda x: x

    ### Either full names or short names of filelist
    working_names = [fget(afile) for afile in filelist]

    dflist = [
        read_csv(
            afile,
            sep=',',
            header=None,
            index_col=0,
            skiprows=2,
            na_values=' ',  #Used to be ' \r', or is this from IR?
            names=[fget(afile)]) for afile in filelist
    ]

    ### THIS IS BUSTED, PUTTING NANS EVERYWHERE EXCEPT ONE FILE, but dflist itself ws nice.
    dataframe = concat(dflist, axis=1)

    ### concat tries to sort these, so this will preserve the sort order
    if sortnames:
        dataframe = dataframe.reindex(columns=sorted(working_names))

    dataframe = TimeSpectra(dataframe)  #this is fine

    dataframe.metadata = None
    dataframe.filedict = None
    dataframe.baseline = None
    dataframe.specunit = 'nm'  #This autodetected in plots
    if name:
        dataframe.name = name

    return dataframe
Esempio n. 2
0
def from_gwu_chem_UVVIS(filelist, sortnames=False, shortname=True, cut_extension=False, name=''):
    ''' Format for comma delimited two column data from GWU chemistry's UVVis.  These have no useful metadata
    or dark data and so it is important that users either pass in a correctly sorted filelist.  Once the 
    dataframe is created, on can do df=df.reindex(columns=[correct order]).  
    
    It uses read_csv() to and creates a list of dataframes.  Afterwards, concat() merges these.
    
    Kwds:
       sortnames- Will attempt to autosort the filelist. Otherwise, order of files passed in is
                  directly used as columns.
       shortname- If false, full file path is used as the column name.  If true, only the filename is used. 
       
       cut_extension- If using the shortname, this will determine if the file extension is saved or cut from the data.'''

    if shortname:
        fget=lambda x:get_shortname(x, cut_extension=cut_extension)
    else:
        fget=lambda x: x
    
    ### Either full names or short names of filelist    
    working_names=[fget(afile) for afile in filelist]
        

    dflist=[read_csv(afile, sep=',', header=None, index_col=0, skiprows=2, na_values=' ',  #Used to be ' \r', or is this from IR?
                               names=[fget(afile)]) for afile in filelist]
    
    ### THIS IS BUSTED, PUTTING NANS EVERYWHERE EXCEPT ONE FILE, but dflist itself ws nice.
    dataframe=concat(dflist, axis=1)
                        
    ### concat tries to sort these, so this will preserve the sort order
    if sortnames:
        dataframe=dataframe.reindex(columns=sorted(working_names))

    dataframe=TimeSpectra(dataframe) #this is fine

    dataframe.metadata=None
    dataframe.filedict=None
    dataframe.baseline=None
    dataframe.specunit='nm' #This autodetected in plots    
    if name:
        dataframe.name=name
    
    return dataframe
Esempio n. 3
0
def from_timefile_datafile(datafile, timefile, extract_dark=True, name=''): 
    ''' Converts old-style spectral data from GWU phys lab into  
    a dataframe with timestamp column index and wavelength row indicies.

    Creates the DataFrame from a dictionary of Series, keyed by datetime.
    **name becomes name of dataframe''' 

    tlines=open(timefile,'r').readlines()
    tlines=[line.strip().split() for line in tlines]           
    tlines.pop(0)

    time_file_dict=dict((_get_datetime_timefile(tline),tline[0]) for tline in tlines)

    ### Read in data matrix, separate first row (wavelengths) from the rest of the data
    wavedata=np.genfromtxt(datafile, dtype='float', skip_header=1)
    data, wavelengths=wavedata[:,1::], wavedata[:,0] #Separate wavelength column

    ### Sort datetimes here before assigning/removing dark spec etc...
    sorted_tfd=sorted(time_file_dict.items())
    sorted_times, sorted_files=zip(*( (((i[0]), (i[1])) for i in sorted_tfd)))

    ### Seek darkfile.  If found, take it out of dataframe. ###
    if extract_dark:
        darkfile=extract_darkfile(sorted_files, return_null=True)   

    if darkfile:    
        ####Find baseline by reverse lookup (lookup by value) and get index position

        #darkindex, darktime=[(idx, time) for idx, (time, afile) in enumerate(sorted_tfd) if afile == darkfile][0]
        darkindex=sorted_files.index(darkfile)
        darktime=sorted_times[darkindex]
        baseline=Series(data[:,darkindex], index=wavelengths, name=darkfile) 


        del time_file_dict[darktime] #Intentionally remove
        sorted_times=list(sorted_times) #Need to do in two steps
        sorted_times.remove(darktime)
        data=np.delete(data, darkindex, 1)  #Delete dark column from numpy data           
    else:
        baseline=None

    dataframe=TimeSpectra(data, columns=sorted_times, index=wavelengths)      
    

    ### Add field attributes to dataframe
    dataframe.baseline=baseline 
    dataframe.filedict=time_file_dict
    if name:
        dataframe.name=name

    ### Get headermeta data from first line in timefile that isn't darkfile.  Only checks one line
    ### Does not check for consistency
    for line in tlines:
        if line[0]==darkfile:
            pass
        else:
            meta_partial=_get_headermetadata_timefile(line[0])  #DOUBLE CHECK THIS WORKS
            break   

    ### Extract remaining metadata (file/time info) and return ###
    meta_general=get_headermetadata_dataframe(dataframe, time_file_dict) 
    meta_general.update(meta_partial)
    dataframe.metadata=meta_general
    dataframe.specunit='nm'  #This autodetected in plots    

    ### Sort dataframe by ascending time (could also sort spectral data) ###
    dataframe.sort(axis=1, inplace=True) #axis1=columns

    return dataframe
Esempio n. 4
0
def from_timefile_datafile(datafile, timefile, extract_dark=True, name=''):
    ''' Converts old-style spectral data from GWU phys lab into  
    a dataframe with timestamp column index and wavelength row indicies.

    Creates the DataFrame from a dictionary of Series, keyed by datetime.
    **name becomes name of dataframe'''

    tlines = open(timefile, 'r').readlines()
    tlines = [line.strip().split() for line in tlines]
    tlines.pop(0)

    time_file_dict = dict(
        (_get_datetime_timefile(tline), tline[0]) for tline in tlines)

    ### Read in data matrix, separate first row (wavelengths) from the rest of the data
    wavedata = np.genfromtxt(datafile, dtype='float', skip_header=1)
    data, wavelengths = wavedata[:,
                                 1::], wavedata[:,
                                                0]  #Separate wavelength column

    ### Sort datetimes here before assigning/removing dark spec etc...
    sorted_tfd = sorted(time_file_dict.items())
    sorted_times, sorted_files = zip(*((((i[0]), (i[1])) for i in sorted_tfd)))

    ### Seek darkfile.  If found, take it out of dataframe. ###
    if extract_dark:
        darkfile = extract_darkfile(sorted_files, return_null=True)

    if darkfile:
        ####Find baseline by reverse lookup (lookup by value) and get index position

        #darkindex, darktime=[(idx, time) for idx, (time, afile) in enumerate(sorted_tfd) if afile == darkfile][0]
        darkindex = sorted_files.index(darkfile)
        darktime = sorted_times[darkindex]
        baseline = Series(data[:, darkindex], index=wavelengths, name=darkfile)

        del time_file_dict[darktime]  #Intentionally remove
        sorted_times = list(sorted_times)  #Need to do in two steps
        sorted_times.remove(darktime)
        data = np.delete(data, darkindex,
                         1)  #Delete dark column from numpy data
    else:
        baseline = None

    dataframe = TimeSpectra(data, columns=sorted_times, index=wavelengths)

    ### Add field attributes to dataframe
    dataframe.baseline = baseline
    dataframe.filedict = time_file_dict
    if name:
        dataframe.name = name

    ### Get headermeta data from first line in timefile that isn't darkfile.  Only checks one line
    ### Does not check for consistency
    for line in tlines:
        if line[0] == darkfile:
            pass
        else:
            meta_partial = _get_headermetadata_timefile(
                line[0])  #DOUBLE CHECK THIS WORKS
            break

    ### Extract remaining metadata (file/time info) and return ###
    meta_general = get_headermetadata_dataframe(dataframe, time_file_dict)
    meta_general.update(meta_partial)
    dataframe.metadata = meta_general
    dataframe.specunit = 'nm'  #This autodetected in plots

    ### Sort dataframe by ascending time (could also sort spectral data) ###
    dataframe.sort(axis=1, inplace=True)  #axis1=columns

    return dataframe