Esempio n. 1
0
def sim_NDregion(shape,lineshapes,params,amps):
    """
    Simulate a arbitrary dimensional region with one or more peaks.

    Parameters:

    * shape         tuple of region shape
    * lineshapes    List of lineshapes by label (str) or a lineshape class.
                    See fit_NDregion for additional documentation.
    * params        P-length list (P is the number of peaks in region) of 
                    N-length lists of tuples where each each tuple is 
                    lineshape parameters for a given peak and dimension.
    * amps          P-length of peak amplitudes.

    Returns: array containing simulated region

    """
    # parse the user-friendly input into a format digestable by s_NDregion
    
    # parse the shape
    ndim = len(shape)
    
    # parse the lineshape parameters
    if len(lineshapes) != ndim:
        raise ValueError("Incorrect number of lineshapes provided")

    ls_classes = []
    for l in lineshapes:
        if type(l) is str:
            ls_classes.append(ls_str2class(l))
        else:
            ls_classes.append(l)

    # determind the number of parameters in each dimension.
    dim_nparam = [c.nparam(l) for l,c in zip(shape,ls_classes)]

    # parse the params parameter
    n_peaks = len(params)
    p = []
    for i,param in enumerate(params):
        if len(param) != ndim:
            err = "Incorrect number of parameters for peak %i"
            raise ValueError(err%(i))
        for j,dim_param in enumerate(param):
            if len(dim_param) != dim_nparam[j]:
                err = "Incorrect number of parameters in peak %i dimension %i"
                raise ValueError(err%(i,j))

            for g in dim_param:
                p.append(g)

    # parse the amps parameter
    if len(amps) != n_peaks:
        raise ValueError("Incorrect number of amplitudes provided")
    p = list(amps) + p # amplitudes appended to front of p

    # DEBUGGING
    #print "p",p
    #print "shape",shape
    #print "ls_classes",ls_classes
    #print "n_peaks",n_peaks

    return s_NDregion(p,shape,ls_classes,n_peaks)
Esempio n. 2
0
def fit_spectrum(spectrum,lineshapes,params,amps,bounds,ampbounds,centers,
                    rIDs,box_width,error_flag,verb=True,**kw):
    """
    Fit a spectrum by region which contain one or more peaks.

    Parameters:

    * spectrum      Slicable spectral data.
    * lineshape     List of lineshapes by label (str) or a lineshape class.
                    See fit_NDregion for details.
    * params        P-length list (P is the number of peaks in region) of 
                    N-length lists of tuples where each each tuple is the 
                    optimiztion starting parameters for a given peak and 
                    dimension lineshape.
    * amps          P-length list of amplitudes.
    * bounds        List of bounds for parameter of same shape as params.  If
                    none of the parameters in a given dimension have limits 
                    None can be used, otherwise each dimension should have a 
                    list/tuple of (min,max) or None for each parameter.  
                    min or max may be None when there is no bound in a given 
                    direction.
    * ampbounds     P-length list of bounds for the amplitude with format 
                    similar to bounds.
    * centers       List of N-tuples indicating peak centers.
    * rIDs          P-length list of region numbers (peak with the same
                    region number are fit together).
    * box_width     N-tuple indicating box width to add and subtract from
                    peak centers to form region around peak to fit.
    * error_flag    Set to True to estimate errors for each lineshape
                    parameter and amplitude.
    * verb          Set to True to print summary of each region fit, False
                    supresses all printing.
    * kw            Additional keywords passed to the scipy.optimize.leastsq
                    function.
    
    Returns: param_best,amp_best,iers if error_flag is False
             param_best,amp_best,param_err,amp_err,iers if error_flag is True
        

    * params_best   Optimal values for lineshape parameters with same format
                    as params input parameter.
    * amp_best      List of optimal peak amplitudes.
    * param_err     Estimated lineshape parameter errors with same format
                    as oarans inout parameter. (Optional)
    * amp_err       Estimated peak amplitude errors.
    * iers          List of interger flag from scipy.optimize.leastsq 
                    indicating if the solution was found for a given peak.  
                    1,2,3,4 indicates that a solution was found. Other indicate
                    an error.

    """
    pbest = [[]]*len(params)
    pbest_err = [[]]*len(params)
    abest = [[]]*len(params)
    abest_err = [[]]*len(params) 
    iers  = [[]]*len(params) 
    shape = spectrum.shape



    ls_classes = []
    for l in lineshapes:
        if type(l) is str:
            ls_classes.append(ls_str2class(l))
        else:
            ls_classes.append(l)

    cIDs = set(rIDs)    # region values to loop over

    for cID in cIDs:

        cpeaks = [i for i,v in enumerate(rIDs) if v==cID]

        # select the parameter
        cparams    = [params[i]    for i in cpeaks]
        camps      = [amps[i]      for i in cpeaks]
        cbounds    = [bounds[i]    for i in cpeaks]
        campbounds = [ampbounds[i] for i in cpeaks]
        ccenters   = [centers[i]   for i in cpeaks]
    
        # find the box edges
        bcenters = np.round(np.array(ccenters).astype('int'))
        bmin = bcenters-box_width
        bmax = bcenters+box_width+1

        # correct for spectrum edges
        for i in range(len(shape)): 
            bmin[:,i][np.where(bmin[:,i] < 0) ] = 0
        for i,v in enumerate(shape):
            bmax[:,i][np.where(bmax[:,i] > v)] = v

        # find the region limits
        rmin = edge = np.array(bmin).min(0)
        rmax = np.array(bmax).max(0)

        # cut the spectrum
        s = tuple([slice(mn,mx) for mn,mx in zip(rmin,rmax)])
        region = spectrum[s]

        # add edge to the box limits
        ebmin = bmin - edge
        ebmax = bmax - edge

        # create the weight mask array
        wmask = np.zeros(region.shape,dtype='bool')
        for bmn,bmx in zip(ebmin,ebmax):
            s = tuple([slice(mn,mx) for mn,mx in zip(bmn,bmx)])
            wmask[s] = True

        # add edges to the initial parameters
        ecparams = [ [ ls.add_edge(p,(mn,mx)) for ls,mn,mx,p in
                  zip(ls_classes,rmin,rmax,g)] for g in cparams ]
   
        # TODO make this better...
        ecbounds = [ [ zip(*[ls.add_edge(b,(mn,mx)) for b in zip(*db)]) 
                 for ls,mn,mx,db in zip(ls_classes,rmin,rmax,pb) ] 
                 for pb in cbounds ]

        # fit the region
        t = fit_NDregion(region,ls_classes,ecparams,camps,ecbounds,campbounds,
                         wmask,error_flag,**kw)
        
        if error_flag:
           ecpbest,acbest,ecpbest_err,acbest_err,ier 
           cpbest_err = [ [ ls.remove_edge(p,(mn,mx)) for ls,mn,mx,p in
                        zip(ls_classes,rmin,rmax,g)] for g in ecpbest_err]
        else:
            ecpbest,acbest,ier = t
        

        # remove edges from best fit parameters
        cpbest = [ [ ls.remove_edge(p,(mn,mx)) for ls,mn,mx,p in
                zip(ls_classes,rmin,rmax,g)] for g in ecpbest]

        if verb:
            print "-----------------------"
            print "cID:",cID,"ier:",ier,"Peaks fit",cpeaks
            print "fit parameters:",cpbest
            print "fit amplitudes",acbest


        for i,pb,ab in zip(cpeaks,cpbest,acbest):
            pbest[i]=pb
            abest[i]=ab
            iers[i] = ier

        if error_flag:
            for i,pb,ab in zip(cpeaks,cpbest_err,acbest_err):
                pbest_err[i]=pb
                abest_err[i]=ab
        
    if error_flag==False:
        return pbest,abest,iers
    
    return  pbest,abest,pbest_err,abest_err,iers
Esempio n. 3
0
def fit_NDregion(region,lineshapes,params,amps,bounds=None,
                 ampbounds=None,wmask=None,error_flag=False,**kw):
    """
    Fit a N-dimensional region.

    Parameters:
    
    * region         N-dimensional region to fit.
    * lineshapes     List of lineshapes by label (str) or a lineshape class.
    * params         P-length list (P is the number of peaks in region) of 
                     N-length lists of tuples where each each tuple is the 
                     optimiztion starting parameters for a given peak and 
                     dimension lineshape.
    * amps           P-length list of amplitudes.
    * bounds         List of bounds for parameter of same shape as params.  If
                     none of the parameters in a given dimension have limits 
                     None can be used, otherwise each dimension should have a 
                     list/tuple of (min,max) or None for each parameter.  
                     min or max may be None when there is no bound in a given 
                     direction.
    * ampbounds      P-length list of bounds for the amplitude with format 
                     similar to bounds.
    * wmask          Array with same shape as region which is used to weight
                     points in the err calculation, typically a boolean array
                     is used to exclude certain points in the region.  Default
                     of None will include all points in the region equally
                     in the error calculation.
    * error_flag     Set to True to estimate errors for each lineshape 
                     parameter and amplitude.

    * kw             Additional keywords passed to the scipy.optimize.leastsq 
                     function.

    Returns: param_best,amp_best,ier if error_flag is False
             param_best,amp_best,param_err,amp_err,ier if error_flag is True
        

    * params_best   Optimal values for lineshape parameters with same format
                    as params input parameter.
    * amp_best      List of optimal peak amplitudes.
    * param_err     Estimated lineshape parameter errors with same format
                    as oarans inout parameter. (Optional)
    * amp_err       Estimated peak amplitude errors.
    * ier           Interger flag from scipy.optimize.leastsq indicating if
                    the solution was found.  1,2,3,4 indicates that a solution
                    was found.  Otherwise the solution was not found.

    Note on the lineshape parameter:

    Elements of the lineshape parameter list can be string indicating the
    lineshape of given dimension or an instance of a lineshape class 
    which provide a sim method which takes two arguments, the first being the 
    length of the lineshape the second being a list of lineshape parameters, 
    and returns a simulated lineshape as well as a nparam method which when 
    given the length of lineshape returns the number of parameters needed to
    describe the lineshape. Currently the following strings are allowed:

    * 'g' or 'gauss'    Gaussian (normal) lineshape.
    * 'l' or 'lorentz'  Lorentzian lineshape.
    * 'v' or 'voigt'    Voigt lineshape.
    * 'pv' or 'pvoight' Pseudo Voigt lineshape
    * 's' or 'scale'    Scaled lineshape.

    The first four lineshapes (Gaussian, Lorentzian, Voigt and Pseudo Voigt)
    all take a FWHM scale parameter.

    The following are all valid lineshapes parameters for a 2D Gaussian peak:

    ['g','g']
    ['gauss','gauss']
    [ng.lineshapes1d.gauss(),ng.lineshapes1d.gauss()]
    
    """
    # this function parses the user-friendly input into a format digestable
    # by f_NDregion, performs the fitting, then format the fitting results
    # into a user friendly format

    # parse the region parameter
    ndim = region.ndim
    shape = region.shape

    # parse the lineshape parameter
    if len(lineshapes) != ndim:
        raise ValueError("Incorrect number of lineshapes provided")
    
    ls_classes = []
    for l in lineshapes:
        if type(l) is str:
            ls_classes.append(ls_str2class(l))
        else:
            ls_classes.append(l)
    
    # determind the number of parameter in each dimension
    dim_nparam = [c.nparam(l) for l,c in zip(shape,ls_classes)] 

    # parse params
    n_peaks = len(params)
    p0 = []
    for i,guess in enumerate(params):  # peak loop
        if len(guess) != ndim:
            err = "Incorrect number of params for peak %i"
            raise ValueError(err%(i))
        
        for j,dim_guess in enumerate(guess):    # dimension loop
            if len(dim_guess) != dim_nparam[j]:
                err = "Incorrect number of parameters in peak %i dimension %i"
                raise ValueError(err%(i,j))
            
            for g in dim_guess: # parameter loop
                p0.append(g)

    
    # parse the bounds parameter
    if bounds == None:   # No bounds 
        peak_bounds = [[(None,None)]*i for i in dim_nparam]
        bounds = [peak_bounds]*n_peaks

    if len(bounds) != n_peaks:
        raise ("Incorrect number of parameter bounds provided")

    # build the parameter bound list to be passed to f_NDregion
    p_bounds = []
    for i,peak_bounds in enumerate(bounds): # peak loop
        
        if peak_bounds == None:
            peak_bounds = [[(None,None)]*i for i in dim_nparam]
        
        if len(peak_bounds) != ndim:
            err = "Incorrect number of bounds for peak %i"
            raise ValueError(err%(i))
        
        for j,dim_bounds in enumerate(peak_bounds):    # dimension loop
            
            if dim_bounds == None:
                dim_bounds = [(None,None)]*dim_nparam[j]
           
            if len(dim_bounds) != dim_nparam[j]:
                err = "Incorrect number of bounds for peak %i dimension %i"
                raise ValueError(err%(i,j))

            for k,b in enumerate(dim_bounds):    # parameter loop
                if b == None:
                    b = (None,None)

                if len(b) != 2:
                    err  = "No min/max for peak %i dim %i parameter %i"
                    raise ValueError(err%(i,j,k))
                
                p_bounds.append(b)
    
    # parse amps parameter
    if len(amps) != n_peaks:
        raise ValueError("Incorrect number of amplitude guesses provided")
    p0 = list(amps) + p0 # amplitudes appended to front of p0
   
    # parse ampbounds parameter
    if ampbounds  == None:
        ampbounds = [(None,None)]*n_peaks

    if len(ampbounds) != n_peaks:
        raise ValueError("Incorrect number of amplitude bounds")

    to_add = []
    for k,b in enumerate(ampbounds):
        if b == None:
            b = (None,None)

        if len(b) != 2:
            err = "No min/max for amplitude bound %i"
            raise ValueError(err%(k)) 
        to_add.append(b)
    p_bounds = to_add + p_bounds    # amplitude bound at front of p_bounds

    # parse the wmask parameter
    if wmask == None:   # default is to include all points in region
        wmask = np.ones(shape,dtype='bool')
    if wmask.shape != shape:
        err = "wmask has incorrect shape:"+str(wmask.shape)+   \
              " should be "+str(shape)
        raise ValueError(err)

    # DEBUGGING
    #print "--------------------------------"
    #print region
    #print ls_classes
    #print p0
    #print p_bounds
    #print n_peaks
    #print dim_nparam
    #print "================================="
    #for i,j in zip(p0,p_bounds):
    #    print i,j

    # include full_output=True when errors requested 
    if error_flag:
        kw["full_output"] = True

    
    # perform fitting
    r = f_NDregion(region,ls_classes,p0,p_bounds,n_peaks,wmask,**kw)

    # DEBUGGING
    #print r

    # unpack results depending of if full output requested
    if "full_output" in kw and kw["full_output"]:
        p_best,cov_xi,infodic,mesg,ier = r
    else:
        p_best,ier = r
    
    # unpack and repack p_best
    # pull off the ampltides
    amp_best = p_best[:n_peaks]
    
    # split the remaining parameters into n_peaks equal sized lists
    p_list = split_list(list(p_best[n_peaks:]),n_peaks)

    # for each peak repack the flat parameter lists to reference by dimension
    param_best = [make_slist(l,dim_nparam) for l in p_list]

    # return as is if no errors requested
    if error_flag==False:
        return param_best,amp_best,ier

    # calculate errors
    p_err = calc_errors(region,ls_classes,p_best,cov_xi,n_peaks)

    # unpack and repack the error p_err
    # pull off the amplitude errors
    amp_err = p_err[:n_peaks]
    
    # split the remaining errors into n_peaks equal sized lists
    pe_list = split_list(list(p_err[n_peaks:]),n_peaks)
    
    # for each peak repack the flat errors list to reference by dimension
    param_err = [make_slist(l,dim_nparam) for l in pe_list]
    
    return param_best,amp_best,param_err,amp_err,ier
Esempio n. 4
0
def pick(data,pthres,nthres=None,msep=None,algorithm='connected',
            est_params=True,lineshapes=None,edge=None,diag=False,c_struc=None,
            c_ndil=0,cluster=True,table=True,axis_names=['A','Z','Y','X']):
    """
    Pick (find) peaks in a spectral region. 

    Parameters:

    * data          N-dimensional array to pick peaks in.
    * pthres        Minimum peak height for positive peaks. Set to None to not
                    detect positive peaks.
    * nthres        Minimum peak height for negative peaks (typically a 
                    negative value).  Set to None to not detect negative peaks.
    * msep          N-tuple of minimum peak seperations along each axis.
                    Must be defined if algorithm is 'thresh' or 'thresh-fast'
    * algorithm     Peak picking algorithm to use.  Options are 'thres',
                    'thres-fast', 'downward', or 'connected'
    * est_params    Set to True to perform a rough estimate of linewidths and
                    amplitude for all peaks picked.  False returns only the
                    peak locations.
    * lineshapes    A list of lineshape classes or string shortcuts for each 
                    dimension.  If not specified Gaussian type lineshapes with 
                    a FWHM  linewidth parameter is assumed in each dimension.  
                    This parameter if only used if est_params is True.
    * edge          Tuple to add to peak locations representing the edge of a
                    slices region.  None skips this addition.
    * diag          Set True to consider diagonal points to be  touching in 
                    peak finding algorithm and clustering.
    * c_struc       Structure element to use when applying dilation on segments
                    before applying clustering algorithm. None will apply 
                    default square structure with connectivity one will be 
                    used.
    * c_ndil        Number of dilations to perform on segments before applying
                    clustering algorithm.
    * cluster       Set True to cluster touching peaks.
    * table         Set True to return turn a table.
    * axis_names    List of axis names, the last n will be used for column
                    name prefixes in table where n is the number of dimensions.

    Returns:    locations,[cluster_ids,[scales,amps]] or table

    * locations
    * cluster_ids
    * scales
    * amps

    * table

    """
    ####################
    # Check parameters #
    ####################
    ndim = len(data.shape)
    
    # check msep
    if type(msep) == int:
        msep = (msep,)
    if algorithm in ['thres','thres-fast'] and len(msep) != ndim:
        raise ValueError("msep has incorrect length")
 
    # check algorithm
    if algorithm not in ['thres','thres-fast','downward','connected']:
        raise ValueError('Invalid algorithm %s'%(algorithm))
   
    # check  lineshapes
    if est_params:
        # expand None
        if lineshapes == None:
            lineshapes = [gauss() for i in range(ndim)]
        ls_classes = []
        
        # replace strings
        for l in lineshapes:
            if type(l) is str:
                ls_classes.append(ls_str2class(l))
            else:
                ls_classes.append(l)
        # check that all classes have 2 parameters
        for i,ls in enumerate(ls_classes):
            if ls.nparam(10) != 2:
                s = "Lineshape class %i does not have two parameters"
                raise ValueError(s%(i))
        
        if len(ls_classes) != ndim:
            raise ValueError("Incorrect number of lineshapes")

    if edge!=None and len(edge)!=ndim:
        raise ValueError("edge has incorrect length")


    #######################
    # find positive peaks #
    #######################
    if pthres==None:    # no locations
        ploc = []
        pseq = []

    elif est_params==True:  # find locations and segments
        if algorithm == 'thres':
            ploc,pseg = find_all_thres_fast(data,pthres,msep,True)
        elif algorithm == 'thres-fast':
            ploc,pseg = find_all_thres_fast(data,pthres,msep,True)
        elif algorithm == 'downward':
            ploc,pseg = find_all_downward(data,pthres,True,diag)
        elif algorithm == 'connected':
            ploc,pseg = find_all_connected(data,pthres,True,diag)
        else:
            raise ValueError('Invalid algorithm %s'%(algorithm))

    else:   # find only locations 
        if algorithm == 'thres':
            ploc = find_all_thres_fast(data,pthres,msep,False)
        elif algorithm == 'thres-fast':
            ploc = find_all_thres_fast(data,pthres,msep,False)
        elif algorithm == 'downward':
            ploc = find_all_downward(data,pthres,False,diag)
        elif algorithm == 'connected':
            ploc = find_all_connected(data,pthres,False,diag)
        else:
            raise ValueError('Invalid algorithm %s'%(algorithm))
    

    #######################
    # find negative peaks #
    #######################
    if nthres==None:    # no locations
        nloc = []
        nseg = []
    
    elif est_params==True:  # find locations and segments
        if algorithm == 'thres':
            nloc,nseg = find_all_nthres(data,nthres,msep,True)
        elif algorithm == 'thres-fast':
            nloc,nseg = find_all_nthres_fast(data,nthres,msep,True)
        elif algorithm == 'downward':
            nloc,nseg = find_all_upward(data,nthres,True,diag)
        elif algorithm == 'connected':
            nloc,nseg = find_all_nconnected(data,nthres,True,diag)
        else:
            raise ValueError('Invalid algorithm %s'%(algorithm))
    
    else:   # find only locations
        if algorithm == 'thres':
            nloc = find_all_nthres(data,nthres,msep,False)
        elif algorithm == 'thres-fast':
            nloc = find_all_nthres_fast(data,nthres,msep,False)
        elif algorithm == 'downward':
            nloc = find_all_upward(data,nthres,False,diag)
        elif algorithm == 'connected':
            nloc = find_all_nconnected(data,nthres,False,diag)
        else:
            raise ValueError('Invalid algorithm %s'%(algorithm))
       
    # combine the positive and negative peaks
    locations = ploc+nloc

    #########################################################
    # return locations if no parameter estimation requested #
    #########################################################
    if est_params==False:
        if cluster:     # find clusters
            cluster_ids = clusters(data,locations,pthres,nthres,c_struc,None,                              c_ndil)
            locations = add_edge(locations,edge)
            if table:
                return pack_table(locations,cluster_ids,axis_names=axis_names)
            else:
                return locations,cluster_ids
        else:   # Do not determine clusters
            locations = add_edge(locations,edge)
            if table:
                return pack_table(locations,axis_names=axis_names)
            else:
                return locations
    
    ##################################
    # estimate scales and amplitudes #
    ##################################
    seg_slices = pseg+nseg
    scales = [[]]*len(locations)
    amps = [[]] * len(locations)
    #scales = np.zeros(np.array(locations).shape,dtype=float)
    #amps = np.zeros(len(locations),dtype=float)

    for i,(l,seg_slice) in enumerate(zip(locations,seg_slices)):
        null,scales[i],amps[i]=guess_params_slice(data,l,seg_slice,ls_classes)
    
    ########################################################
    # return locations, scales and amplitudes as requested #
    ########################################################
    if cluster:
        cluster_ids = clusters(data,locations,pthres,nthres,c_struc,None,c_ndil)
        locations = add_edge(locations,edge)
        if table:
            return pack_table(locations,cluster_ids,scales,amps,axis_names)
        else:
            return locations,cluster_ids,scales,amps
    else:
        locations = add_edge(locations,edge)
        if table:
            return pack_table(locations,scales=scales,amps=amps,
                              axis_names=axis_names)
        else:
            return locations,scales,amps