Ejemplo n.º 1
0
def from_gwu_chem_UVVIS(filelist, sortnames=False, shortname=True, cut_extension=False, name=None):
    ''' Format for comma delimited two column data from GWU chemistry's UVVis.  These have no useful metadata
    or dark data and so it is important that users either pass in a correctly sorted filelist.  Once the 
    dataframe is created, on can do df=df.reindex(columns=[correct order]).  
    
    It uses read_csv() to and creates a list of dataframes.  Afterwards, concat() merges these.
    
    Kwds:
       sortnames- Will attempt to autosort the filelist. Otherwise, order of files passed in is
                  directly used as columns.
       shortname- If false, full file path is used as the column name.  If true, only the filename is used. 
       
       cut_extension- If using the shortname, this will determine if the file extension is saved or cut from the data.'''

    if shortname:
        fget=lambda x:get_shortname(x, cut_extension=cut_extension)
    else:
        fget=lambda x: x
    
    ### Either full names or short names of filelist    
    working_names=[fget(afile) for afile in filelist]
        

    dflist=[read_csv(afile, sep=',', header=None, index_col=0, skiprows=2, na_values=' ',  #Used to be ' \r', or is this from IR?
                               names=[fget(afile)]) for afile in filelist]
    
    ### THIS IS BUSTED, PUTTING NANS EVERYWHERE EXCEPT ONE FILE, but dflist itself ws nice.
    dataframe=concat(dflist, axis=1)
                        
    ### concat tries to sort these, so this will preserve the sort order
    if sortnames:
        dataframe=dataframe.reindex(columns=sorted(working_names))

    dataframe=TimeSpectra(dataframe) #this is fine

    dataframe.metadata=None
    dataframe.filedict=None
    dataframe.darkseries=None
    dataframe.specunit='nm' #This autodetected in plots    
    dataframe.name=name
    
    return dataframe
Ejemplo n.º 2
0
def from_spec_files(file_list, specframe=None, skiphead=17, skipfoot=1,\
                    check_for_overlapping_time=True, extract_dark=True, name=None):
    ''' Takes in raw files directly from Ocean optics spectrometer and creates a spectral dataframe.   
    This is somewhat customized to my analysis, but has general arguments for future adaptations.

    Built to work with 2-column data only!!!

    Dataframe is constructed from a list of dictionaries.
    Each dataframe gets an appended headerdata attribute (dataframe.headerdata) which is a dictionary,
    keyed by columns and stores (infile, header, footer) data so no info is lost between files.

    Constructed to work for non-equally spaced datafiles, or non-identical data (aka wavelengths can have nans).
    **kwargs
       - check_for_overlapping will raise errors if any files have identical times. Otherwise, time
       is overwritten.  Really only useful for testing or otherwise cornercase instances.
       - Extract dark will attempt to find a filename with caseinsenstive string match to "dark".  If dark
       not found, will print warning.  If multiple darks found, will raise error.
       -skiphead/skipfoot are mostly for reminder that this filetype has a 17 line header and a 1 line footer.'''


    ### Need to add functionality for spectral dataframe (join/concat)
    if specframe:
        raise NotImplemented

    dict_of_series={} #Dict of series eventually merged to dataframe   
    time_file_dict={} #Dict of time:filename (darkfile intentionally excluded)

    ### If looking for a darkfile, this will find it.  Bit redundant but I'm lazy..###
    if extract_dark:
        darkfile=extract_darkfile(file_list, return_null=True)

        if darkfile:
            with open(darkfile) as f:
                header=[f.next().strip() for x in xrange(skiphead)]

            wavedata=np.genfromtxt(darkfile, dtype=spec_dtype, skip_header=skiphead, skip_footer=skipfoot) 
            darktime=_get_datetime_specsuite(header)        
            darkseries=Series(wavedata['intensity'], index=wavedata['wavelength'], name=darkfile)

            file_list.remove(darkfile)
            f.close()
        else:
            darkseries=None

    for infile in file_list:

        ###Read in only the header lines, not all the lines of the file
        ###Strips and splits in one go
        with open(infile) as f:
            header=[f.next().strip() for x in xrange(skiphead)]

        ###Store wavelength, intensity data in a 2-column datatime for easy itemlookup 
        ###Eg wavedata['wavelength']
        wavedata=np.genfromtxt(infile, dtype=spec_dtype, skip_header=skiphead, skip_footer=skipfoot) 

        ### Extract time data from header
        datetime=_get_datetime_specsuite(header) 

        ### Make sure timepoints aren't overlapping with any others
        if check_for_overlapping_time:
            try:
                time_file_dict[datetime]
            except KeyError:
                pass
            else:
                raise KeyError('Duplicate time %s found in between files %s, %s'\
                               %(datetime,infile, time_file_dict[datetime]) )

        time_file_dict[datetime]=infile
        dict_of_series[datetime]=Series(wavedata['intensity'], index=wavedata['wavelength'])

        f.close()

    ### Make dataframe, add filenames, darkseries and metadata attributes (note, DateTimeIndex auto sorts!!)
    dataframe=TimeSpectra(dict_of_series)
    dataframe.specunit='nm'
    dataframe.filedict=time_file_dict
    dataframe.darkseries=darkseries  #KEEP THIS AS DARK SERIES RECALL IT IS SEPARATE FROM BASELINE OR REFERENCE..
    dataframe.name=name    

    ### Take metadata from first file in filelist that isn't darkfile
    for infile in file_list:
        if infile==darkfile:
            pass
        else:
            with open(infile) as f:
                header=[f.next().strip() for x in xrange(skiphead)]         
            meta_partial=_get_metadata_fromheader(header)
            break      

    meta_general=get_headermetadata_dataframe(dataframe, time_file_dict) 
    meta_general.update(meta_partial)
    dataframe.metadata=meta_general   


    return dataframe
Ejemplo n.º 3
0
def from_timefile_datafile(datafile, timefile, extract_dark=True, name=None): 
    ''' Converts old-style spectral data from GWU phys lab into  
    a dataframe with timestamp column index and wavelength row indicies.

    Creates the DataFrame from a dictionary of Series, keyed by datetime.
    **name becomes name of dataframe''' 

    tlines=open(timefile,'r').readlines()
    tlines=[line.strip().split() for line in tlines]           
    tlines.pop(0)

    time_file_dict=dict((_get_datetime_timefile(tline),tline[0]) for tline in tlines)

    ### Read in data matrix, separate first row (wavelengths) from the rest of the data
    wavedata=np.genfromtxt(datafile, dtype='float', skip_header=1)
    data, wavelengths=wavedata[:,1::], wavedata[:,0] #Separate wavelength column

    ### Sort datetimes here before assigning/removing dark spec etc...
    sorted_tfd=sorted(time_file_dict.items())
    sorted_times, sorted_files=zip(*( (((i[0]), (i[1])) for i in sorted_tfd)))

    ### Seek darkfile.  If found, take it out of dataframe. ###
    if extract_dark:
        darkfile=extract_darkfile(sorted_files, return_null=True)   

    if darkfile:    
        ####Fine darkseries by reverse lookup (lookup by value) and get index position

        #darkindex, darktime=[(idx, time) for idx, (time, afile) in enumerate(sorted_tfd) if afile == darkfile][0]
        darkindex=sorted_files.index(darkfile)
        darktime=sorted_times[darkindex]
        darkseries=Series(data[:,darkindex], index=wavelengths, name=darkfile) 


        del time_file_dict[darktime] #Intentionally remove
        sorted_times=list(sorted_times) #Need to do in two steps
        sorted_times.remove(darktime)
        data=np.delete(data, darkindex, 1)  #Delete dark column from numpy data           
    else:
        darkseries=None

    dataframe=TimeSpectra(data, columns=sorted_times, index=wavelengths)      
    


    ### Add field attributes to dataframe
    dataframe.darkseries=darkseries 
    dataframe.filedict=time_file_dict
    dataframe.name=name

    ### Get headermeta data from first line in timefile that isn't darkfile.  Only checks one line
    ### Does not check for consistency
    for line in tlines:
        if line[0]==darkfile:
            pass
        else:
            meta_partial=_get_headermetadata_timefile(line[0])  #DOUBLE CHECK THIS WORKS
            break   

    ### Extract remaining metadata (file/time info) and return ###
    meta_general=get_headermetadata_dataframe(dataframe, time_file_dict) 
    meta_general.update(meta_partial)
    dataframe.metadata=meta_general
    dataframe.specunit='nm'  #This autodetected in plots    

    ### Sort dataframe by ascending time (could also sort spectral data) ###
    dataframe.sort(axis=1, inplace=True) #axis1=columns

    return dataframe