def sim_NDregion(shape,lineshapes,params,amps): """ Simulate a arbitrary dimensional region with one or more peaks. Parameters: * shape tuple of region shape * lineshapes List of lineshapes by label (str) or a lineshape class. See fit_NDregion for additional documentation. * params P-length list (P is the number of peaks in region) of N-length lists of tuples where each each tuple is lineshape parameters for a given peak and dimension. * amps P-length of peak amplitudes. Returns: array containing simulated region """ # parse the user-friendly input into a format digestable by s_NDregion # parse the shape ndim = len(shape) # parse the lineshape parameters if len(lineshapes) != ndim: raise ValueError("Incorrect number of lineshapes provided") ls_classes = [] for l in lineshapes: if type(l) is str: ls_classes.append(ls_str2class(l)) else: ls_classes.append(l) # determind the number of parameters in each dimension. dim_nparam = [c.nparam(l) for l,c in zip(shape,ls_classes)] # parse the params parameter n_peaks = len(params) p = [] for i,param in enumerate(params): if len(param) != ndim: err = "Incorrect number of parameters for peak %i" raise ValueError(err%(i)) for j,dim_param in enumerate(param): if len(dim_param) != dim_nparam[j]: err = "Incorrect number of parameters in peak %i dimension %i" raise ValueError(err%(i,j)) for g in dim_param: p.append(g) # parse the amps parameter if len(amps) != n_peaks: raise ValueError("Incorrect number of amplitudes provided") p = list(amps) + p # amplitudes appended to front of p # DEBUGGING #print "p",p #print "shape",shape #print "ls_classes",ls_classes #print "n_peaks",n_peaks return s_NDregion(p,shape,ls_classes,n_peaks)
def fit_spectrum(spectrum,lineshapes,params,amps,bounds,ampbounds,centers, rIDs,box_width,error_flag,verb=True,**kw): """ Fit a spectrum by region which contain one or more peaks. Parameters: * spectrum Slicable spectral data. * lineshape List of lineshapes by label (str) or a lineshape class. See fit_NDregion for details. * params P-length list (P is the number of peaks in region) of N-length lists of tuples where each each tuple is the optimiztion starting parameters for a given peak and dimension lineshape. * amps P-length list of amplitudes. * bounds List of bounds for parameter of same shape as params. If none of the parameters in a given dimension have limits None can be used, otherwise each dimension should have a list/tuple of (min,max) or None for each parameter. min or max may be None when there is no bound in a given direction. * ampbounds P-length list of bounds for the amplitude with format similar to bounds. * centers List of N-tuples indicating peak centers. * rIDs P-length list of region numbers (peak with the same region number are fit together). * box_width N-tuple indicating box width to add and subtract from peak centers to form region around peak to fit. * error_flag Set to True to estimate errors for each lineshape parameter and amplitude. * verb Set to True to print summary of each region fit, False supresses all printing. * kw Additional keywords passed to the scipy.optimize.leastsq function. Returns: param_best,amp_best,iers if error_flag is False param_best,amp_best,param_err,amp_err,iers if error_flag is True * params_best Optimal values for lineshape parameters with same format as params input parameter. * amp_best List of optimal peak amplitudes. * param_err Estimated lineshape parameter errors with same format as oarans inout parameter. (Optional) * amp_err Estimated peak amplitude errors. * iers List of interger flag from scipy.optimize.leastsq indicating if the solution was found for a given peak. 1,2,3,4 indicates that a solution was found. Other indicate an error. """ pbest = [[]]*len(params) pbest_err = [[]]*len(params) abest = [[]]*len(params) abest_err = [[]]*len(params) iers = [[]]*len(params) shape = spectrum.shape ls_classes = [] for l in lineshapes: if type(l) is str: ls_classes.append(ls_str2class(l)) else: ls_classes.append(l) cIDs = set(rIDs) # region values to loop over for cID in cIDs: cpeaks = [i for i,v in enumerate(rIDs) if v==cID] # select the parameter cparams = [params[i] for i in cpeaks] camps = [amps[i] for i in cpeaks] cbounds = [bounds[i] for i in cpeaks] campbounds = [ampbounds[i] for i in cpeaks] ccenters = [centers[i] for i in cpeaks] # find the box edges bcenters = np.round(np.array(ccenters).astype('int')) bmin = bcenters-box_width bmax = bcenters+box_width+1 # correct for spectrum edges for i in range(len(shape)): bmin[:,i][np.where(bmin[:,i] < 0) ] = 0 for i,v in enumerate(shape): bmax[:,i][np.where(bmax[:,i] > v)] = v # find the region limits rmin = edge = np.array(bmin).min(0) rmax = np.array(bmax).max(0) # cut the spectrum s = tuple([slice(mn,mx) for mn,mx in zip(rmin,rmax)]) region = spectrum[s] # add edge to the box limits ebmin = bmin - edge ebmax = bmax - edge # create the weight mask array wmask = np.zeros(region.shape,dtype='bool') for bmn,bmx in zip(ebmin,ebmax): s = tuple([slice(mn,mx) for mn,mx in zip(bmn,bmx)]) wmask[s] = True # add edges to the initial parameters ecparams = [ [ ls.add_edge(p,(mn,mx)) for ls,mn,mx,p in zip(ls_classes,rmin,rmax,g)] for g in cparams ] # TODO make this better... ecbounds = [ [ zip(*[ls.add_edge(b,(mn,mx)) for b in zip(*db)]) for ls,mn,mx,db in zip(ls_classes,rmin,rmax,pb) ] for pb in cbounds ] # fit the region t = fit_NDregion(region,ls_classes,ecparams,camps,ecbounds,campbounds, wmask,error_flag,**kw) if error_flag: ecpbest,acbest,ecpbest_err,acbest_err,ier cpbest_err = [ [ ls.remove_edge(p,(mn,mx)) for ls,mn,mx,p in zip(ls_classes,rmin,rmax,g)] for g in ecpbest_err] else: ecpbest,acbest,ier = t # remove edges from best fit parameters cpbest = [ [ ls.remove_edge(p,(mn,mx)) for ls,mn,mx,p in zip(ls_classes,rmin,rmax,g)] for g in ecpbest] if verb: print "-----------------------" print "cID:",cID,"ier:",ier,"Peaks fit",cpeaks print "fit parameters:",cpbest print "fit amplitudes",acbest for i,pb,ab in zip(cpeaks,cpbest,acbest): pbest[i]=pb abest[i]=ab iers[i] = ier if error_flag: for i,pb,ab in zip(cpeaks,cpbest_err,acbest_err): pbest_err[i]=pb abest_err[i]=ab if error_flag==False: return pbest,abest,iers return pbest,abest,pbest_err,abest_err,iers
def fit_NDregion(region,lineshapes,params,amps,bounds=None, ampbounds=None,wmask=None,error_flag=False,**kw): """ Fit a N-dimensional region. Parameters: * region N-dimensional region to fit. * lineshapes List of lineshapes by label (str) or a lineshape class. * params P-length list (P is the number of peaks in region) of N-length lists of tuples where each each tuple is the optimiztion starting parameters for a given peak and dimension lineshape. * amps P-length list of amplitudes. * bounds List of bounds for parameter of same shape as params. If none of the parameters in a given dimension have limits None can be used, otherwise each dimension should have a list/tuple of (min,max) or None for each parameter. min or max may be None when there is no bound in a given direction. * ampbounds P-length list of bounds for the amplitude with format similar to bounds. * wmask Array with same shape as region which is used to weight points in the err calculation, typically a boolean array is used to exclude certain points in the region. Default of None will include all points in the region equally in the error calculation. * error_flag Set to True to estimate errors for each lineshape parameter and amplitude. * kw Additional keywords passed to the scipy.optimize.leastsq function. Returns: param_best,amp_best,ier if error_flag is False param_best,amp_best,param_err,amp_err,ier if error_flag is True * params_best Optimal values for lineshape parameters with same format as params input parameter. * amp_best List of optimal peak amplitudes. * param_err Estimated lineshape parameter errors with same format as oarans inout parameter. (Optional) * amp_err Estimated peak amplitude errors. * ier Interger flag from scipy.optimize.leastsq indicating if the solution was found. 1,2,3,4 indicates that a solution was found. Otherwise the solution was not found. Note on the lineshape parameter: Elements of the lineshape parameter list can be string indicating the lineshape of given dimension or an instance of a lineshape class which provide a sim method which takes two arguments, the first being the length of the lineshape the second being a list of lineshape parameters, and returns a simulated lineshape as well as a nparam method which when given the length of lineshape returns the number of parameters needed to describe the lineshape. Currently the following strings are allowed: * 'g' or 'gauss' Gaussian (normal) lineshape. * 'l' or 'lorentz' Lorentzian lineshape. * 'v' or 'voigt' Voigt lineshape. * 'pv' or 'pvoight' Pseudo Voigt lineshape * 's' or 'scale' Scaled lineshape. The first four lineshapes (Gaussian, Lorentzian, Voigt and Pseudo Voigt) all take a FWHM scale parameter. The following are all valid lineshapes parameters for a 2D Gaussian peak: ['g','g'] ['gauss','gauss'] [ng.lineshapes1d.gauss(),ng.lineshapes1d.gauss()] """ # this function parses the user-friendly input into a format digestable # by f_NDregion, performs the fitting, then format the fitting results # into a user friendly format # parse the region parameter ndim = region.ndim shape = region.shape # parse the lineshape parameter if len(lineshapes) != ndim: raise ValueError("Incorrect number of lineshapes provided") ls_classes = [] for l in lineshapes: if type(l) is str: ls_classes.append(ls_str2class(l)) else: ls_classes.append(l) # determind the number of parameter in each dimension dim_nparam = [c.nparam(l) for l,c in zip(shape,ls_classes)] # parse params n_peaks = len(params) p0 = [] for i,guess in enumerate(params): # peak loop if len(guess) != ndim: err = "Incorrect number of params for peak %i" raise ValueError(err%(i)) for j,dim_guess in enumerate(guess): # dimension loop if len(dim_guess) != dim_nparam[j]: err = "Incorrect number of parameters in peak %i dimension %i" raise ValueError(err%(i,j)) for g in dim_guess: # parameter loop p0.append(g) # parse the bounds parameter if bounds == None: # No bounds peak_bounds = [[(None,None)]*i for i in dim_nparam] bounds = [peak_bounds]*n_peaks if len(bounds) != n_peaks: raise ("Incorrect number of parameter bounds provided") # build the parameter bound list to be passed to f_NDregion p_bounds = [] for i,peak_bounds in enumerate(bounds): # peak loop if peak_bounds == None: peak_bounds = [[(None,None)]*i for i in dim_nparam] if len(peak_bounds) != ndim: err = "Incorrect number of bounds for peak %i" raise ValueError(err%(i)) for j,dim_bounds in enumerate(peak_bounds): # dimension loop if dim_bounds == None: dim_bounds = [(None,None)]*dim_nparam[j] if len(dim_bounds) != dim_nparam[j]: err = "Incorrect number of bounds for peak %i dimension %i" raise ValueError(err%(i,j)) for k,b in enumerate(dim_bounds): # parameter loop if b == None: b = (None,None) if len(b) != 2: err = "No min/max for peak %i dim %i parameter %i" raise ValueError(err%(i,j,k)) p_bounds.append(b) # parse amps parameter if len(amps) != n_peaks: raise ValueError("Incorrect number of amplitude guesses provided") p0 = list(amps) + p0 # amplitudes appended to front of p0 # parse ampbounds parameter if ampbounds == None: ampbounds = [(None,None)]*n_peaks if len(ampbounds) != n_peaks: raise ValueError("Incorrect number of amplitude bounds") to_add = [] for k,b in enumerate(ampbounds): if b == None: b = (None,None) if len(b) != 2: err = "No min/max for amplitude bound %i" raise ValueError(err%(k)) to_add.append(b) p_bounds = to_add + p_bounds # amplitude bound at front of p_bounds # parse the wmask parameter if wmask == None: # default is to include all points in region wmask = np.ones(shape,dtype='bool') if wmask.shape != shape: err = "wmask has incorrect shape:"+str(wmask.shape)+ \ " should be "+str(shape) raise ValueError(err) # DEBUGGING #print "--------------------------------" #print region #print ls_classes #print p0 #print p_bounds #print n_peaks #print dim_nparam #print "=================================" #for i,j in zip(p0,p_bounds): # print i,j # include full_output=True when errors requested if error_flag: kw["full_output"] = True # perform fitting r = f_NDregion(region,ls_classes,p0,p_bounds,n_peaks,wmask,**kw) # DEBUGGING #print r # unpack results depending of if full output requested if "full_output" in kw and kw["full_output"]: p_best,cov_xi,infodic,mesg,ier = r else: p_best,ier = r # unpack and repack p_best # pull off the ampltides amp_best = p_best[:n_peaks] # split the remaining parameters into n_peaks equal sized lists p_list = split_list(list(p_best[n_peaks:]),n_peaks) # for each peak repack the flat parameter lists to reference by dimension param_best = [make_slist(l,dim_nparam) for l in p_list] # return as is if no errors requested if error_flag==False: return param_best,amp_best,ier # calculate errors p_err = calc_errors(region,ls_classes,p_best,cov_xi,n_peaks) # unpack and repack the error p_err # pull off the amplitude errors amp_err = p_err[:n_peaks] # split the remaining errors into n_peaks equal sized lists pe_list = split_list(list(p_err[n_peaks:]),n_peaks) # for each peak repack the flat errors list to reference by dimension param_err = [make_slist(l,dim_nparam) for l in pe_list] return param_best,amp_best,param_err,amp_err,ier
def pick(data,pthres,nthres=None,msep=None,algorithm='connected', est_params=True,lineshapes=None,edge=None,diag=False,c_struc=None, c_ndil=0,cluster=True,table=True,axis_names=['A','Z','Y','X']): """ Pick (find) peaks in a spectral region. Parameters: * data N-dimensional array to pick peaks in. * pthres Minimum peak height for positive peaks. Set to None to not detect positive peaks. * nthres Minimum peak height for negative peaks (typically a negative value). Set to None to not detect negative peaks. * msep N-tuple of minimum peak seperations along each axis. Must be defined if algorithm is 'thresh' or 'thresh-fast' * algorithm Peak picking algorithm to use. Options are 'thres', 'thres-fast', 'downward', or 'connected' * est_params Set to True to perform a rough estimate of linewidths and amplitude for all peaks picked. False returns only the peak locations. * lineshapes A list of lineshape classes or string shortcuts for each dimension. If not specified Gaussian type lineshapes with a FWHM linewidth parameter is assumed in each dimension. This parameter if only used if est_params is True. * edge Tuple to add to peak locations representing the edge of a slices region. None skips this addition. * diag Set True to consider diagonal points to be touching in peak finding algorithm and clustering. * c_struc Structure element to use when applying dilation on segments before applying clustering algorithm. None will apply default square structure with connectivity one will be used. * c_ndil Number of dilations to perform on segments before applying clustering algorithm. * cluster Set True to cluster touching peaks. * table Set True to return turn a table. * axis_names List of axis names, the last n will be used for column name prefixes in table where n is the number of dimensions. Returns: locations,[cluster_ids,[scales,amps]] or table * locations * cluster_ids * scales * amps * table """ #################### # Check parameters # #################### ndim = len(data.shape) # check msep if type(msep) == int: msep = (msep,) if algorithm in ['thres','thres-fast'] and len(msep) != ndim: raise ValueError("msep has incorrect length") # check algorithm if algorithm not in ['thres','thres-fast','downward','connected']: raise ValueError('Invalid algorithm %s'%(algorithm)) # check lineshapes if est_params: # expand None if lineshapes == None: lineshapes = [gauss() for i in range(ndim)] ls_classes = [] # replace strings for l in lineshapes: if type(l) is str: ls_classes.append(ls_str2class(l)) else: ls_classes.append(l) # check that all classes have 2 parameters for i,ls in enumerate(ls_classes): if ls.nparam(10) != 2: s = "Lineshape class %i does not have two parameters" raise ValueError(s%(i)) if len(ls_classes) != ndim: raise ValueError("Incorrect number of lineshapes") if edge!=None and len(edge)!=ndim: raise ValueError("edge has incorrect length") ####################### # find positive peaks # ####################### if pthres==None: # no locations ploc = [] pseq = [] elif est_params==True: # find locations and segments if algorithm == 'thres': ploc,pseg = find_all_thres_fast(data,pthres,msep,True) elif algorithm == 'thres-fast': ploc,pseg = find_all_thres_fast(data,pthres,msep,True) elif algorithm == 'downward': ploc,pseg = find_all_downward(data,pthres,True,diag) elif algorithm == 'connected': ploc,pseg = find_all_connected(data,pthres,True,diag) else: raise ValueError('Invalid algorithm %s'%(algorithm)) else: # find only locations if algorithm == 'thres': ploc = find_all_thres_fast(data,pthres,msep,False) elif algorithm == 'thres-fast': ploc = find_all_thres_fast(data,pthres,msep,False) elif algorithm == 'downward': ploc = find_all_downward(data,pthres,False,diag) elif algorithm == 'connected': ploc = find_all_connected(data,pthres,False,diag) else: raise ValueError('Invalid algorithm %s'%(algorithm)) ####################### # find negative peaks # ####################### if nthres==None: # no locations nloc = [] nseg = [] elif est_params==True: # find locations and segments if algorithm == 'thres': nloc,nseg = find_all_nthres(data,nthres,msep,True) elif algorithm == 'thres-fast': nloc,nseg = find_all_nthres_fast(data,nthres,msep,True) elif algorithm == 'downward': nloc,nseg = find_all_upward(data,nthres,True,diag) elif algorithm == 'connected': nloc,nseg = find_all_nconnected(data,nthres,True,diag) else: raise ValueError('Invalid algorithm %s'%(algorithm)) else: # find only locations if algorithm == 'thres': nloc = find_all_nthres(data,nthres,msep,False) elif algorithm == 'thres-fast': nloc = find_all_nthres_fast(data,nthres,msep,False) elif algorithm == 'downward': nloc = find_all_upward(data,nthres,False,diag) elif algorithm == 'connected': nloc = find_all_nconnected(data,nthres,False,diag) else: raise ValueError('Invalid algorithm %s'%(algorithm)) # combine the positive and negative peaks locations = ploc+nloc ######################################################### # return locations if no parameter estimation requested # ######################################################### if est_params==False: if cluster: # find clusters cluster_ids = clusters(data,locations,pthres,nthres,c_struc,None, c_ndil) locations = add_edge(locations,edge) if table: return pack_table(locations,cluster_ids,axis_names=axis_names) else: return locations,cluster_ids else: # Do not determine clusters locations = add_edge(locations,edge) if table: return pack_table(locations,axis_names=axis_names) else: return locations ################################## # estimate scales and amplitudes # ################################## seg_slices = pseg+nseg scales = [[]]*len(locations) amps = [[]] * len(locations) #scales = np.zeros(np.array(locations).shape,dtype=float) #amps = np.zeros(len(locations),dtype=float) for i,(l,seg_slice) in enumerate(zip(locations,seg_slices)): null,scales[i],amps[i]=guess_params_slice(data,l,seg_slice,ls_classes) ######################################################## # return locations, scales and amplitudes as requested # ######################################################## if cluster: cluster_ids = clusters(data,locations,pthres,nthres,c_struc,None,c_ndil) locations = add_edge(locations,edge) if table: return pack_table(locations,cluster_ids,scales,amps,axis_names) else: return locations,cluster_ids,scales,amps else: locations = add_edge(locations,edge) if table: return pack_table(locations,scales=scales,amps=amps, axis_names=axis_names) else: return locations,scales,amps