def load(self, filename): try: import cPickle as pickle except: import pickle in_s = open(filename, 'rb') try: (self.results, ppestr) = pickle.load(in_s) self.ppe = PDFPeakExtraction() self.ppe.readstr(ppestr) # Ugly kluge for the baseline, since FromSequence # can't pickle. for r in self.results: bl = self.ppe.baseline kwds = r[2] if r[2] is not None: kwds = r[2] if hasattr(bl, "estimate_parameters"): r[2] = bl.actualize(default_input="internal", **kwds) else: r[2] = bl.owner().actualize(in_format="internal", **kwds) finally: in_s.close() self.setcurrent(0)
def run(plot=True): ## Suppress mundane output # When running scripts, especially involving multiple trials, it can be # useful to suppress many of the diffpy.srmise messages. Valid levels # include "debug", "info" (the default), "warning", "error", and # "critical." See diffpy.srmise.srmiselog for more information. sml.setlevel("warning") ## Initialize peak extraction from saved trial ppe = PDFPeakExtraction() ppe.read("output/query_results.srmise") ppe.clearcalc() ## Set up extraction parameters # All parameters loaded from .srmise file. # Setting new values will override the previous values. kwds = {} kwds["rng"] = [10.9, 15] # Region of PDF with some overlap. ppe.setvars(**kwds) ## Create multimodel selection object. # The MultimodelSelection class keeps track of the results of peak # extraction as the assumed uncertainty dg is varied. ms = MultimodelSelection() ms.setppe(ppe) ## Define range of dg values # For the purpose of illustration use 15 evenly-spaced values of dg where # 50% < dg < 120% of mean experimental dG in extraction range. dg_mean = np.mean(ppe.dy[ppe.getrangeslice()]) dgs = np.linspace(.5 * dg_mean, 1.2 * dg_mean, 15) ## Perform peak extraction for each of the assumed uncertainties. ms.run(dgs) ## Save results # The file known_dG_models.dat saves the models generated above. The file # known_dG_aics.dat saves the value of the AIC of each model when evaluated # on a Nyquist-sampled grid using each of the dg values used to generate # the models in the first place. dr = np.pi / ppe.qmax ms.save("output/known_dG_models.dat") ms.makeaics(dgs, dr, filename="output/known_dG_aics.dat")
def run(plot=True): ## Suppress mundane output # When running scripts, especially involving multiple trials, it can be # useful to suppress many of the diffpy.srmise messages. Valid levels # include "debug", "info" (the default), "warning", "error", and # "critical." See diffpy.srmise.srmiselog for more information. sml.setlevel("warning") ## Initialize peak extraction from saved trial ppe = PDFPeakExtraction() ppe.read("output/query_results.srmise") ppe.clearcalc() ## Set up extraction parameters # All parameters loaded from .srmise file. # Setting new values will override the previous values. kwds={} kwds["rng"] = [10.9, 15] # Region of PDF with some overlap. ppe.setvars(**kwds) ## Create multimodel selection object. # The MultimodelSelection class keeps track of the results of peak # extraction as the assumed uncertainty dg is varied. ms = MultimodelSelection() ms.setppe(ppe) ## Define range of dg values # For the purpose of illustration use 15 evenly-spaced values of dg where # 50% < dg < 120% of mean experimental dG in extraction range. dg_mean = np.mean(ppe.dy[ppe.getrangeslice()]) dgs = np.linspace(.5*dg_mean, 1.2*dg_mean, 15) ## Perform peak extraction for each of the assumed uncertainties. ms.run(dgs) ## Save results # The file known_dG_models.dat saves the models generated above. The file # known_dG_aics.dat saves the value of the AIC of each model when evaluated # on a Nyquist-sampled grid using each of the dg values used to generate # the models in the first place. dr = np.pi/ppe.qmax ms.save("output/known_dG_models.dat") ms.makeaics(dgs, dr, filename="output/known_dG_aics.dat")
def run(plot=True): ## Suppress mundane output # When running scripts, especially involving multiple trials, it can be # useful to suppress many of the diffpy.srmise messages. Valid levels # include "debug", "info" (the default), "warning", "error", and # "critical." See diffpy.srmise.srmiselog for more information. sml.setlevel("warning") ## Initialize peak extraction ppe = PDFPeakExtraction() ppe.loadpdf("data/C60_fine_qmax21.gr") ## Set up extraction parameters # The FromSequence baseline interpolates (r, G(r)) values read from a # specified file. It has parameters. This particular baseline was # calculated by approximating the C60 sample as a face-centered cubic # lattice of hollow spheres. blfunc = FromSequence("data/C60baseline.dat") kwds = {} kwds["rng"] = [1., 7.25] kwds["baseline"] = blfunc.actualize([]) kwds["cres"] = 0.05 ppe.setvars(**kwds) ## Create multimodel selection object. # The MultimodelSelection class keeps track of the results of peak # extraction as the assumed uncertainty dg is varied. ms = MultimodelSelection() ms.setppe(ppe) ## Define range of dg values # For the purpose of illustration use 20 evenly-spaced values of dg where # 1% < dg < 10% of max gr value between r=1 and 7.25. grmax = np.max(ppe.y[ppe.getrangeslice()]) dgs = np.linspace(.01 * grmax, .10 * grmax, 20) ## Perform peak extraction for each of the assumed uncertainties. ms.run(dgs) ## Save results # The file C60_models.dat saves the models generated above. The file # C60_aics.dat saves the value of the AIC of each model when evaluated # on a Nyquist-sampled grid using each of the dg values used to generate # the models in the first place. dr = np.pi / ppe.qmax ms.save("output/unknown_dG_models.dat") ms.makeaics(dgs, dr, filename="output/unknown_dG_aics.dat")
def run(plot=True): ## Suppress mundane output # When running scripts, especially involving multiple trials, it can be # useful to suppress many of the diffpy.srmise messages. Valid levels # include "debug", "info" (the default), "warning", "error", and # "critical." See diffpy.srmise.srmiselog for more information. sml.setlevel("warning") ## Initialize peak extraction ppe = PDFPeakExtraction() ppe.loadpdf("data/C60_fine_qmax21.gr") ## Set up extraction parameters # The FromSequence baseline interpolates (r, G(r)) values read from a # specified file. It has parameters. This particular baseline was # calculated by approximating the C60 sample as a face-centered cubic # lattice of hollow spheres. blfunc = FromSequence("data/C60baseline.dat") kwds={} kwds["rng"] = [1., 7.25] kwds["baseline"] = blfunc.actualize([]) kwds["cres"] = 0.05 ppe.setvars(**kwds) ## Create multimodel selection object. # The MultimodelSelection class keeps track of the results of peak # extraction as the assumed uncertainty dg is varied. ms = MultimodelSelection() ms.setppe(ppe) ## Define range of dg values # For the purpose of illustration use 20 evenly-spaced values of dg where # 1% < dg < 10% of max gr value between r=1 and 7.25. grmax = np.max(ppe.y[ppe.getrangeslice()]) dgs = np.linspace(.01*grmax, .10*grmax, 20) ## Perform peak extraction for each of the assumed uncertainties. ms.run(dgs) ## Save results # The file C60_models.dat saves the models generated above. The file # C60_aics.dat saves the value of the AIC of each model when evaluated # on a Nyquist-sampled grid using each of the dg values used to generate # the models in the first place. dr = np.pi/ppe.qmax ms.save("output/unknown_dG_models.dat") ms.makeaics(dgs, dr, filename="output/unknown_dG_aics.dat")
def run(plot=True): ## Initialize peak extraction # Create peak extraction object ppe = PDFPeakExtraction() # Load the PDF from a file ppe.loadpdf("data/Ag_nyquist_qmax30.gr") ## Set up extraction parameters. # For convenience we add all parameters to a dictionary before passing them # to the extraction object. # # The "rng" (range) parameter defines the region over which peaks will be # extracted and fit. For the well isolated nearest-neighbor silver peak, # which occurs near 2.9 angstroms, it is sufficient to perform extraction # between 2 and 3.5 angstroms. # # The "baseline" parameter lets us define the PDF baseline, which is # linear for a crystal. If a linear baseline is specified without # numerical parameters diffpy.srmise attempts to estimate them from the # data, and this is usually sufficient when peaks do not overlap much. kwds = {} kwds["rng"] = [2.0, 3.5] kwds["baseline"] = Polynomial(degree=1) # Apply peak extraction parameters. ppe.setvars(**kwds) ## Perform peak extraction ppe.extract() ## Save output # The write() method saves a file which preserves all aspects of peak # extraction and its results, by convention using the .srmise extension, # and which can later be read by diffpy.srmise. # # The writepwa() method saves a file intended as a human-readable summary. # In particular, it reports the position, width (as full-width at # half-maximum), and area of of extracted peaks. The reported values # are for Gaussians in the radial distribution function (RDF) corresponding # to this PDF. ppe.write("output/extract_single_peak.srmise") ppe.writepwa("output/extract_single_peak.pwa") ## Plot results. # Display plot of extracted peak. It is also possible to plot an existing # .srmise file from the command line using # srmise output/Ag_singlepeak.srmise --no-extract --plot # For additional plotting options, run "srmiseplot --help". if plot: makeplot(ppe) plt.show()
def run(plot=True): ## Initialize peak extraction ppe = PDFPeakExtraction() ppe.loadpdf("data/C60_fine_qmax21.gr") ## Set up interpolated baseline. # The FromSequence baseline creates an interpolated baseline from provided # r and G(r) values, either two lists or a file containing (r, G(r)) pairs. # The baseline has no parameters. This particular baseline was estimated # by fitting interparticle correlations of an FCC lattice of hollow # spheres to the PDF. blf = FromSequence("data/C60baseline.dat") bl = blf.actualize([]) ## Set up fitting parameters # A summary of how parameters impact fitting is given below. # "rng" - Same as peak extraction # "baseline" - Same as peak extraction # "qmax" and "nyquist" - If qmax > 0 and Nyquist is true, fitting is # performed on a Nyquist-sampled grid. The data are # never supersampled first. # "dg" - Since the model to fit is prespecified, the uncertainty does not # impact model complexity. Impact on refined parameter values and # estimated uncertainties as per standard chi-square fitting. # "pf" - The peak function used when estimating peak parameters given an # approximate position. Unike peak extraction, peak fitting never # alters the peak function used by initial peaks. # "cres" - Estimation of peak parameters given an approximate position uses # clustering for peak finding. No other effect on peak fitting. # "supersample" - No effect. kwds={} kwds["rng"] = [1., 7.25] kwds["baseline"] = bl kwds["cres"] = 0.05 kwds["dg"] = 5000 # ad hoc, but gives each point equal weight in fit. ppe.setvars(**kwds) ## Set up termination ripples # Peak fitting never changes the peak function, so termination ripples # are not applied automatically as they are in peak extraction. # Termination ripples require setting the underlying peak function and qmax. # In this case they ared added to the default GaussianOverR peak function. # TerminationRipples use the estimation methods of the base peak function. pf = TerminationRipples(ppe.pf[0], ppe.qmax) ppe.setvars(pf=[pf]) # Specify some initial peaks using approximate positions. These use the # peak function passed to PDFPeakExtraction instance. rough_guess = [1.4, 2.4, 2.8, 3.6, 4.1, 4.5, 4.8, 5.2, 5.4, 5.7, 6.1] for g in rough_guess: ppe.estimate_peak(g) # Specify some peaks explicitly. These may be constructed from any peak # function, or combination of peak functions. explicit_guess = [[6.7, .3, 100000], [7.0, .15, 50000]] explicit_peaks = Peaks([pf.actualize(eg, in_format="pwa") \ for eg in explicit_guess]) ppe.add_peaks(explicit_peaks) # Plot initial peaks if plot: makeplot(ppe) plt.title("Initial Peaks") # Perform fit. ppe.fit() ## Save results ppe.write("output/fit_initial.srmise") ppe.writepwa("output/fit_initial.pwa") if plot: plt.figure() makeplot(ppe) plt.show()
def main(): # configure options parsing usage = ("%prog srmise_file [options]\n" "srmise_file can be an extraction file saved by SrMise, " "or a data file saved by PeakStability.") descr = ("A very basic tool for somewhat prettier plotting than provided by " "the basic SrMise classes. Can be used to compare peak positions " "with those from a list.\n" "NOTE: At this time the utility only works with peaks extracted using diffpy.srmise.PDFPeakExtraction.") parser = optparse.OptionParser(usage=usage, description=descr) parser.add_option("--compare", type="string", help="Compare extracted distances to distances listed (1/line) in this file.") parser.add_option("--model", type="int", help="Plot given model from set. Ignored if srmise_file is not a PeakStability file.") parser.add_option("--show", action="store_true", help="execute pylab.show() blocking call") parser.add_option("-o", "--output", type="string", help="save plot to the specified file") parser.add_option("--format", type="string", default="eps", help="output format for plot saving") parser.allow_interspersed_args = True opts, args = parser.parse_args(sys.argv[1:]) if len(args) != 1: parser.error("Exactly one argument required. \n"+usage) filename = args[0] if filename is not None: toplot = PDFPeakExtraction() try: toplot.read(filename) except (Exception): toplot = PeakStability() try: toplot.load(filename) except Exception: print "File '%s' is not a .srmise or PeakStability data file." %filename return if opts.model is not None: try: toplot.setcurrent(opts.model) except (Exception): print "Ignoring model, %s is not a PeakStability file." %filename distances = None if opts.compare is not None: # use baseline from existing file distances = readcompare(opts.compare) setfigformat(figsize=(6., 4.0)) figdict = makeplot(toplot, distances) if opts.output: plt.savefig(opts.output, format=opts.format, dpi=600) if opts.show: plt.show() else: plt.draw() return
def run(plot=True): ## Initialize peak extraction # Create peak extraction object ppe = PDFPeakExtraction() # Load the PDF from a file ppe.loadpdf("data/TiO2_fine_qmax26.gr") ###### Set up extraction parameters. # In this section we'll examine the major extraction parameters in detail. # diffpy.srmise strives to provide reasonable default values for these # parameters. For normal use setting the range, baseline, and uncertainty # should be sufficient. kwds = {} ## Range # Range defaults to the entire PDF if not specified. kwds["rng"] = [1.5, 10.] ## dg # diffpy.srmise selects model complexity based primarily on the uncertainty # of the PDF. Note that very small uncertainties (<1%) can make peak # extraction excessively slow. In general, the smaller the uncertainty the # more complex the model. PDFs which report no uncertainty, or report # unreliable values must be assigned one. By default, a PDF which does not # report uncertainties uses 5% the maximum minus minimum values. Common # causes of unreliable uncertainties include oversampling (uncertainties in # nearby data are strongly correlated, as for this PDF) and/or # integrated diffraction patterns obtained by a method that also introduces # correlation to the 1D diffraction pattern. Consequently, the assumption # of both least-squares fitting and the Akaike Information Criterion that # the data are at least approximately independently distributed is not # valid. In this case results obtained by diffpy.srmise may be useful, # especially when they can be intrepreted in light of prior knowledge, but # strong statistical conclusions cannot be drawn. For additional # discussion of this subtle yet important issue see: # [1] Egami and Billinge. (2012). Underneath the Bragg Peaks: Structural # Analysis of Complex Materials (2nd ed.). Oxford: Pergamon Press. # [2] Granlund, et al. (2015) Acta Crystallographica A, 71(4), 392-409. # doi:10.1107/S2053273315005276 # [3] Yang, et al. (2014). Journal of Applied Crystallography, 47(4), # 1273-1283. doi:10.1107/S1600576714010516 kwds["dg"] = 0.35 # Play with this value! ## baseline # As a crystal PDF, a linear baseline crossing the origin is appropriate. # Here we define the linear baseline B(r) = -.5*r + 0, and explicitly set # the y-intercept as a fixed parameter which will not be fit. For # crystal PDFs the theoretical value of the slope is -4*pi*rho0, where # rho0 is the number density. Nevertheless, imperfect normalization of the # PDF means the experimental baseline is proportional to that value. blfunc = Polynomial(degree=1) slope = -.65 # Play with this value! y_intercept = 0. kwds["baseline"] = blfunc.actualize([slope, y_intercept], free=[True, False]) ## pf # The pf (peakfunction) parameter allows setting the shape of peaks to be # extracted. Termination effects are added automatically to the peak # function during extraction. In the harmonic approximation of atomic # interactions peaks in the PDF are well approximated by a Gaussian/r. # (Note, however, that the values used for peak parameters -- namely # position, width, and area -- are for the Gaussian itself). diffpy.srmise # uses width-limited peaks to reduce the likelihood of extracting # unphysically wide peaks in regions of high overlap. The parameter # indicates the max fwhm permitted. By default, diffpy.srmise uses a # maximum width of 0.7, which is generally reasonable if the r-axis of the # PDF is given in angstroms. Models where many peaks reach the maximum # width, and models that are very sensitive to the choice in maximum width, # are strong signs that diffpy.srmise is having difficulty finding peaks # which are sufficiently constrained by the data. pf = GaussianOverR(0.7) kwds["pf"] = [pf] # Despite the list, only one entry is currently supported. ## qmax # PDFs typically report the value of qmax (i.e. the maximum momentum # transfer q in the measurement), but it can be specified explicitly also. # If the PDF does not report qmax, diffpy.srmise attempts to estimate it # directly from the data. This estimate can also be used by setting qmax # to "automatic". An infinite qmax can be specified by setting qmax to 0, # In that case the Nyquist rate is 0 (infinite resolution), and # diffpy.srmise does not consider Nyquist sampling or termination effects. kwds["qmax"] = 26.0 ## nyquist # This parameter governs whether diffpy.srmise attempts to find a model # on a Nyquist-sampled grid with dr=pi/qmax, which is a grid where data # uncertainties are least correlated without loss of information. By # default this parameter is True whenever qmax > 0, and generally it # should not need to be changed. Setting it to False allows extracted # models retain more complexity because the data appear to have more # statistically independent points than they truly do. For a detailed # discussion of Nyquist sampling and the PDF see: # [4] Farrow et al. (2011). Physical Review B, 84(13), 134105. # doi:10.1103/PhysRevB.84.134105 kwds["nyquist"] = True ## supersample # This parameter dictates the data be oversampled by at least this factor # (relative to the Nyquist rate) during the early stages of peak # extraction. If the input PDF is even more finely sampled, that level of # sampling is used instead. The default value of 4.0 is ad hoc, but has # been empirically sufficient. Increasing this value may help the peak- # finding and clustering process, but reduces speed. kwds["supersample"] = 4.0 ## cres # The cres (clustering resolution) parameter governs the sensitivity of the # clustering method used by diffpy.srmise. In short, when the data are # being clustered, data which are further than the clustering resolution # from any other cluster (measured along the r-axis) are considered to be a # new cluster rather than a member of an existing one. The default value # is the Nyquist sampling interval pi/qmax, and on most data it should not # greatly impact model complexity. In some cases making it smaller may # help the peak-finding process. Here it is roughly half the Nyquist # interval. kwds["cres"] = 0.05 # Apply peak extraction parameters. ppe.setvars(**kwds) ## initial_peaks # Initial peaks are peaks which are kept fixed during the early stages of # peak extraction, effectively condition results upon their values. Since # initial peaks are sometimes dependent on other SrMise parameters (e.g. # the peak function used) it is good practice to set them after other # parameters. Although the "initial_peaks" parameter can be set as with # the parameters above, SrMise provides helper functions to do so more # easily. There are two basic ways to quickly specify initial peaks: # 1) Supplying the approximate position of the peak, and letting # diffpy.srmise estimate the peak parameters. # 2) Explicit specification of peak parameters. ## Initial peaks from approximate positions. # This routine estimates peak parameters by finding the peak-like cluster # containing the specified point. It does not search for occluded peaks, # so works best on well-separated peaks. It does, however, take any # existing initial peaks into account during estimation. positions = [2.0, 4.5] for p in positions: ppe.estimate_peak(p) # adds to initial_peaks ## Initial peaks from explicit parameters. # Adding initial peaks explicitly is similar to defining a baseline. # Namely, choosing a peak function and then actualizing it with given # parameters. For this example peaks are created from the same GaussianOverR # used during extraction, but one could use a different peak function from # diffpy.srmise.peaks if desired. The peak parameters are given in terms # terms of position, width (fwhm), and area, and it is important to specify # that format is being used so they are correctly changed into the # internal parameterization. Here two peaks are added in a region of # overlap, and the width parameter is fixed at a reasonable value to aid # convergence in this region. pars = [[6.2, 0.25, 2.6],[6.45, 0.25, 2.7],[7.15, 0.25, 5]] peaks = [] for p in pars: peaks.append(pf.actualize(p, free=[True, False, True], in_format="pwa")) ppe.add_peaks(peaks) # adds to initial_peaks ## Initial peaks and pruning # While initial peaks condition what other peaks can be extracted, by # default they can also be pruned if a simpler model appears better. To # prevent this, they can be set as non-removable. for ip in ppe.initial_peaks: ip.removable = False ## Plot initial parameters if plot: makeplot(ppe) plt.title("Initial Peaks") ###### Perform peak extraction ppe.extract() ## Save output # The write() method saves a file which preserves all aspects of peak # extraction and its results, by convention using the .srmise extension, # and which can later be read by diffpy.srmise. # # The writepwa() method saves a file intended as a human-readable summary. # In particular, it reports the position, width (as full-width # half-maximum), and area of of extracted peaks. The reported values # are for Gaussians in the radial distribution function (RDF) corresponding # to this PDF. ppe.write("output/parameter_summary.srmise") ppe.writepwa("output/parameter_summary.pwa") ## Plot results. # Display plot of extracted peak. It is also possible to plot an existing # .srmise file from the command line using # srmise output/TiO2_parameterdetail.srmise --no-extract --plot if plot: plt.figure() makeplot(ppe) plt.show()
def run(plot=True): ## Initialize peak extraction # Create peak extraction object ppe = PDFPeakExtraction() # Load the PDF from a file ppe.loadpdf("data/Ag_nyquist_qmax30.gr") # Obtain baseline from a saved diffpy.srmise trial. This is not the # initial baseline estimate from the previous example, but the baseline # after both it and the extracted peaks have been fit to the data. ppebl = PDFPeakExtraction() ppebl.read("output/extract_single_peak.srmise") baseline = ppebl.extracted.baseline ## Set up extraction parameters. # Peaks are extracted between 2 and 10 angstroms, using the baseline # from the isolated peak example. kwds = {} kwds["rng"] = [2.0, 10.] kwds["baseline"] = baseline # Apply peak extraction parameters. ppe.setvars(**kwds) ## Perform peak extraction, and retain object containing a copy of the # model and the full covariance matrix. cov = ppe.extract() print "\n======= Accessing SrMise Results ========" ## Accessing results of extraction # # Model parameters are organized using a nested structure, with a list # of peaks each of which is a list of parameters, similar to the the # following schematic. # Peak # Position # Width # Area # Peak # Position # Width # Area* # ... # Baseline # Slope # Intercept # By convention, the baseline is the final "peak." The ModelCovariance # object returned by extract() can return information about any peak by # using the appropriate tuple of indices (i,j). That is, (i,j) denotes # the jth parameter of the ith peak. For example, the starred parameter # above is the area (index = 2) of the next nearest neighbor (index = 1) # peak. Thus, this parameter can be referenced as (1,2). Several examples # are presented below. print "\n------ Parameter values and uncertainties ------" # ModelCovariance.get() returns a (value, uncertainty) tuple for a given # parameter. These are the results for the nearest-neighbor peak. p0 = cov.get((0,0)) w0 = cov.get((0,1)) a0 = cov.get((0,2)) print "Nearest-neighbor peak: " print " position = %f +/- %f" %p0 print " width = %f +/- %f" %w0 print " area = %f +/- %f" %a0 print " Covariance(width, area) = ", cov.getcovariance((0,1),(0,2)) # Baseline parameters. By convention, baseline is final element in cov. (slope, intercept) = cov.model[-1] print "\nThe linear baseline B(r)=%f*r + %f" \ % tuple(par for par in cov.model[-1]) print "\n ------ Uncertainties from a Saved File --------" # A .srmise file does not save the full covariance matrix, so it must be # recalculated when loading from these files. For example, here is the # nearest-neighbor peak in the file which we used to define the initial # baseline. cov2 = ModelCovariance() ppebl.extracted.fit(fitbaseline=True, cov=cov2, cov_format="default_output") p0_saved = cov2.get((0,0)) w0_saved = cov2.get((0,1)) a0_saved = cov2.get((0,2)) print "Nearest-neighbor peak:" print " position = %f +/- %f" %p0_saved print " width == %f +/- %f" %w0_saved print " area = = %f +/- %f" %a0_saved print " Covariance(width, area) = ", cov2.getcovariance((0,1),(0,2)) print "\n ---------- Alternate Parameterizations ---------" ## Different Parameterizations # Peaks and baselines may have equivalent parameterizations that are useful # in different situations. For example, the types defined by the # GaussianOverR peak function are: # "internal" - Used in diffpy.srmise calculations, explicitly enforces a # maximum peak width # "pwa" - The position, width (full-width at half-maximum), area. # "mu_sigma_area" - The position, width (the distribution standard # deviation sigma), area. # "default_output" - Defines default format to use in most user-facing # scenarios. Maps to the "pwa" parameterization. # "default_input" - Defines default format to use when specifying peak # parameters. Maps to the "internal" parameterization. # All diffpy.srmise peak and baseline functions are required to have the # "internal", "default_output", and "default_input" formats. In many # cases, such as polynomial baselines, all of these are equivalent. # # Suppose you want to know peak widths in terms of the standard deviation # sigma of the Gaussian distribution. It is then appropriate to convert # all peaks to the "mu_sigma_area" format. Valid options for the "parts" # keyword are "peaks", "baseline", or a sequence of indices (e.g. [1,2,3] # would transform the second, third, and fourth peaks). If the keyword # is omitted, the transformation is attempted for all parts of the fit. cov.transform(in_format="pwa", out_format="mu_sigma_area", parts="peaks") print "Width (sigma) of nearest-neighbor peak: %f +/- %f" %cov.get((0,1)) print "\n ------------ Highly Correlated Parameters ------------" # Highly-correlated parameters can indicate difficulties constraining the # fit. This function lists all pairs of parameters with an absolute value # of correlation which exceeds a given threshold. print "|Correlation| > 0.9:" print "par1 par2 corr(par1, par2)" print "\n".join(str(c) for c in cov.correlationwarning(.9)) print "\n-------- Estimate coordination shell occupancy ---------" # Estimate the scale factor and its uncertainty from first peak's intensity. # G_normalized = scale * G_observed # dscale = scale * dG_observed/G_observed scale = 12./a0[0] dscale = scale * a0[1]/a0[0] print "Estimate scale factor assuming nearest-neighbor intensity = 12" print "Scale factor is %f +/- %f" %(scale, dscale) # Reference for number of atoms in coordination shells for FCC. # http://chem-faculty.lsu.edu/watkins/MERLOT/cubic_neighbors/cubic_near_neighbors.html ideal_intensity = [12, 6, 24, 12, 24, 8, 48, 6, 36, 24, 24, 24] # Calculated the scaled intensities and uncertainties. intensity = [] for i in range(0, len(cov.model)-1): (area, darea) = cov.get((i,2)) area *= scale darea = area*np.sqrt((dscale/scale)**2 + (darea/area)**2) intensity.append((ideal_intensity[i], area, darea)) print "\nIntensity" print "Ideal: Estimated" for i in intensity: print "%i: %f +/- %f" %i print "\nTotal intensity" # It is possible to iterate over peaks directly without using indices. # In addition, peak parameters can be accessed using string keys. For the # Gaussian over r all of "position", "width", and "area" are valid. total_observed_intensity = 0 total_ideal_intensity = 0 for peak, ii in zip(cov.model[:-1], ideal_intensity): total_observed_intensity += scale*peak["area"] total_ideal_intensity += ii print "Ideal: Observed (using estimated scale factor)" print "%i: %f" %(total_ideal_intensity, total_observed_intensity) ## Save output ppe.write("output/query_results.srmise") ppe.writepwa("output/query_results.pwa") ## Evaluating a model. # Although the ModelCovariance object is useful, the model used for fitting # can be directly accessed through PDFPeakExtraction as well, albeit # without uncertainties. This is particularly helpful when evaluating a # model since the parameters stay in the "internal" format used for # calculations. For example, here we plot the data and every second peak # on an arbitrary grid. Unlike with ModelCovariance, the baseline and # peaks are kept separate. if plot: plt.figure() grid = np.arange(2, 10, .01) bl = ppe.extracted.baseline everysecondpeak = ppe.extracted.model[::2] plt.plot(ppe.x, ppe.y, 'o') for peak in everysecondpeak: plt.plot(grid, bl.value(grid) + peak.value(grid)) plt.xlim(2, 10) plt.show()
class PeakStability: """Utility to test robustness of peaks. results: [error scalar, model, bl, dr] ppe: a PDFPeakExtraction instance """ def __init__(self): self.results = [] self.ppe = None self.current = None def setppe(self, ppe): self.ppe = ppe def load(self, filename): try: import cPickle as pickle except: import pickle in_s = open(filename, 'rb') try: (self.results, ppestr) = pickle.load(in_s) self.ppe = PDFPeakExtraction() self.ppe.readstr(ppestr) # Ugly kluge for the baseline, since FromSequence # can't pickle. for r in self.results: bl = self.ppe.baseline kwds = r[2] if r[2] is not None: kwds = r[2] if hasattr(bl, "estimate_parameters"): r[2] = bl.actualize(default_input="internal", **kwds) else: r[2] = bl.owner().actualize(in_format="internal", **kwds) finally: in_s.close() self.setcurrent(0) def save(self, filename): try: import cPickle as pickle except: import pickle out_s = open(filename, 'wb') try: # Write to the stream outstr = self.ppe.writestr() # ugly kluge to let FromSequence pickle # (it stores xyrepr() in metadict) results2 = [] for r in self.results: if r[2] is None: bldict = None else: bldict = { "pars": r[2].pars, "free": r[2].free, "removable": r[2].removable, "static_owner": r[2].static_owner } results2.append([r[0], r[1], bldict, r[3]]) pickle.dump([results2, outstr], out_s) finally: out_s.close() def plotseries(self, style='o', **kwds): plt.figure() plt.ioff() for e, r, bl, dr in self.results: peakpos = [p["position"] for p in r] es = [e] * len(peakpos) plt.plot(peakpos, es, style, **kwds) plt.ion() plt.draw() def plot(self, **kwds): """Plot the current model. Keywords passed to pyplot.plot()""" plt.clf() plt.plot(*self.ppe.extracted.plottable(), **kwds) q = self.ppe.extracted.quality() plt.suptitle( "[%i/%i]\n" "Uncertainty: %6.3f. Peaks: %i.\n" "Quality: %6.3f. Chi-square: %6.3f" % (self.current + 1, len(self.results), self.ppe.effective_dy[0], len(self.ppe.extracted.model), q.stat, q.chisq)) def setcurrent(self, idx): """Make the idxth model the active one.""" self.current = idx if idx is not None: result = self.results[idx] self.ppe.setvars(quiet=True, effective_dy=result[0] * np.ones(len(self.ppe.x))) (r, y, dr, dy) = self.ppe.resampledata(result[3]) self.ppe.extracted = ModelCluster(result[1], result[2], r, y, dy, None, self.ppe.error_method, self.ppe.pf) else: self.ppe.clearcalc() def animate(self, results=None, step=False, **kwds): """Show animation of extracted peaks from first to last. Parameters: step - Require keypress to show next plot results - The indices of results to show Keywords passed to pyplot.plot()""" if results is None: results = range(len(self.results)) oldcurrent = self.current self.setcurrent(0) plt.ion() plt.plot(*self.ppe.extracted.plottable()) a = plt.axis() for i in results: self.setcurrent(i) plt.ioff() self.plot(**kwds) plt.ion() plt.draw() if step: raw_input() self.setcurrent(oldcurrent) def run(self, err, savecovs=False): """err is sequence of uncertainties to run at. If savecovs is True, return the covariance matrix for each final fit.""" self.results = [] covs = [] for i, e in enumerate(err): print "---- Running for uncertainty %s (%i/%i) ----" % (e, i, len(err)) self.ppe.clearcalc() self.ppe.setvars(effective_dy=e) if savecovs: covs.append(self.ppe.extract()) else: self.ppe.extract() dr = (self.ppe.extracted.r_cluster[-1] - self.ppe.extracted.r_cluster[0]) / ( len(self.ppe.extracted.r_cluster) - 1) self.results.append( [e, self.ppe.extracted.model, self.ppe.extracted.baseline, dr]) for e, r, bl, dr in self.results: print "---- Results for uncertainty %s ----" % e print r return covs
def run(plot=True): ## Initialize peak extraction # Create peak extraction object ppe = PDFPeakExtraction() # Load the PDF from a file ppe.loadpdf("data/TiO2_fine_qmax26.gr") ###### Set up extraction parameters. # In this section we'll examine the major extraction parameters in detail. # diffpy.srmise strives to provide reasonable default values for these # parameters. For normal use setting the range, baseline, and uncertainty # should be sufficient. kwds = {} ## Range # Range defaults to the entire PDF if not specified. kwds["rng"] = [1.5, 10.] ## dg # diffpy.srmise selects model complexity based primarily on the uncertainty # of the PDF. Note that very small uncertainties (<1%) can make peak # extraction excessively slow. In general, the smaller the uncertainty the # more complex the model. PDFs which report no uncertainty, or report # unreliable values must be assigned one. By default, a PDF which does not # report uncertainties uses 5% the maximum minus minimum values. Common # causes of unreliable uncertainties include oversampling (uncertainties in # nearby data are strongly correlated, as for this PDF) and/or # integrated diffraction patterns obtained by a method that also introduces # correlation to the 1D diffraction pattern. Consequently, the assumption # of both least-squares fitting and the Akaike Information Criterion that # the data are at least approximately independently distributed is not # valid. In this case results obtained by diffpy.srmise may be useful, # especially when they can be intrepreted in light of prior knowledge, but # strong statistical conclusions cannot be drawn. For additional # discussion of this subtle yet important issue see: # [1] Egami and Billinge. (2012). Underneath the Bragg Peaks: Structural # Analysis of Complex Materials (2nd ed.). Oxford: Pergamon Press. # [2] Granlund, et al. (2015) Acta Crystallographica A, 71(4), 392-409. # doi:10.1107/S2053273315005276 # [3] Yang, et al. (2014). Journal of Applied Crystallography, 47(4), # 1273-1283. doi:10.1107/S1600576714010516 kwds["dg"] = 0.35 # Play with this value! ## baseline # As a crystal PDF, a linear baseline crossing the origin is appropriate. # Here we define the linear baseline B(r) = -.5*r + 0, and explicitly set # the y-intercept as a fixed parameter which will not be fit. For # crystal PDFs the theoretical value of the slope is -4*pi*rho0, where # rho0 is the number density. Nevertheless, imperfect normalization of the # PDF means the experimental baseline is proportional to that value. blfunc = Polynomial(degree=1) slope = -.65 # Play with this value! y_intercept = 0. kwds["baseline"] = blfunc.actualize([slope, y_intercept], free=[True, False]) ## pf # The pf (peakfunction) parameter allows setting the shape of peaks to be # extracted. Termination effects are added automatically to the peak # function during extraction. In the harmonic approximation of atomic # interactions peaks in the PDF are well approximated by a Gaussian/r. # (Note, however, that the values used for peak parameters -- namely # position, width, and area -- are for the Gaussian itself). diffpy.srmise # uses width-limited peaks to reduce the likelihood of extracting # unphysically wide peaks in regions of high overlap. The parameter # indicates the max fwhm permitted. By default, diffpy.srmise uses a # maximum width of 0.7, which is generally reasonable if the r-axis of the # PDF is given in angstroms. Models where many peaks reach the maximum # width, and models that are very sensitive to the choice in maximum width, # are strong signs that diffpy.srmise is having difficulty finding peaks # which are sufficiently constrained by the data. pf = GaussianOverR(0.7) kwds["pf"] = [pf ] # Despite the list, only one entry is currently supported. ## qmax # PDFs typically report the value of qmax (i.e. the maximum momentum # transfer q in the measurement), but it can be specified explicitly also. # If the PDF does not report qmax, diffpy.srmise attempts to estimate it # directly from the data. This estimate can also be used by setting qmax # to "automatic". An infinite qmax can be specified by setting qmax to 0, # In that case the Nyquist rate is 0 (infinite resolution), and # diffpy.srmise does not consider Nyquist sampling or termination effects. kwds["qmax"] = 26.0 ## nyquist # This parameter governs whether diffpy.srmise attempts to find a model # on a Nyquist-sampled grid with dr=pi/qmax, which is a grid where data # uncertainties are least correlated without loss of information. By # default this parameter is True whenever qmax > 0, and generally it # should not need to be changed. Setting it to False allows extracted # models retain more complexity because the data appear to have more # statistically independent points than they truly do. For a detailed # discussion of Nyquist sampling and the PDF see: # [4] Farrow et al. (2011). Physical Review B, 84(13), 134105. # doi:10.1103/PhysRevB.84.134105 kwds["nyquist"] = True ## supersample # This parameter dictates the data be oversampled by at least this factor # (relative to the Nyquist rate) during the early stages of peak # extraction. If the input PDF is even more finely sampled, that level of # sampling is used instead. The default value of 4.0 is ad hoc, but has # been empirically sufficient. Increasing this value may help the peak- # finding and clustering process, but reduces speed. kwds["supersample"] = 4.0 ## cres # The cres (clustering resolution) parameter governs the sensitivity of the # clustering method used by diffpy.srmise. In short, when the data are # being clustered, data which are further than the clustering resolution # from any other cluster (measured along the r-axis) are considered to be a # new cluster rather than a member of an existing one. The default value # is the Nyquist sampling interval pi/qmax, and on most data it should not # greatly impact model complexity. In some cases making it smaller may # help the peak-finding process. Here it is roughly half the Nyquist # interval. kwds["cres"] = 0.05 # Apply peak extraction parameters. ppe.setvars(**kwds) ## initial_peaks # Initial peaks are peaks which are kept fixed during the early stages of # peak extraction, effectively condition results upon their values. Since # initial peaks are sometimes dependent on other SrMise parameters (e.g. # the peak function used) it is good practice to set them after other # parameters. Although the "initial_peaks" parameter can be set as with # the parameters above, SrMise provides helper functions to do so more # easily. There are two basic ways to quickly specify initial peaks: # 1) Supplying the approximate position of the peak, and letting # diffpy.srmise estimate the peak parameters. # 2) Explicit specification of peak parameters. ## Initial peaks from approximate positions. # This routine estimates peak parameters by finding the peak-like cluster # containing the specified point. It does not search for occluded peaks, # so works best on well-separated peaks. It does, however, take any # existing initial peaks into account during estimation. positions = [2.0, 4.5] for p in positions: ppe.estimate_peak(p) # adds to initial_peaks ## Initial peaks from explicit parameters. # Adding initial peaks explicitly is similar to defining a baseline. # Namely, choosing a peak function and then actualizing it with given # parameters. For this example peaks are created from the same GaussianOverR # used during extraction, but one could use a different peak function from # diffpy.srmise.peaks if desired. The peak parameters are given in terms # terms of position, width (fwhm), and area, and it is important to specify # that format is being used so they are correctly changed into the # internal parameterization. Here two peaks are added in a region of # overlap, and the width parameter is fixed at a reasonable value to aid # convergence in this region. pars = [[6.2, 0.25, 2.6], [6.45, 0.25, 2.7], [7.15, 0.25, 5]] peaks = [] for p in pars: peaks.append(pf.actualize(p, free=[True, False, True], in_format="pwa")) ppe.add_peaks(peaks) # adds to initial_peaks ## Initial peaks and pruning # While initial peaks condition what other peaks can be extracted, by # default they can also be pruned if a simpler model appears better. To # prevent this, they can be set as non-removable. for ip in ppe.initial_peaks: ip.removable = False ## Plot initial parameters if plot: makeplot(ppe) plt.title("Initial Peaks") ###### Perform peak extraction ppe.extract() ## Save output # The write() method saves a file which preserves all aspects of peak # extraction and its results, by convention using the .srmise extension, # and which can later be read by diffpy.srmise. # # The writepwa() method saves a file intended as a human-readable summary. # In particular, it reports the position, width (as full-width # half-maximum), and area of of extracted peaks. The reported values # are for Gaussians in the radial distribution function (RDF) corresponding # to this PDF. ppe.write("output/parameter_summary.srmise") ppe.writepwa("output/parameter_summary.pwa") ## Plot results. # Display plot of extracted peak. It is also possible to plot an existing # .srmise file from the command line using # srmise output/TiO2_parameterdetail.srmise --no-extract --plot if plot: plt.figure() makeplot(ppe) plt.show()
class PeakStability: """Utility to test robustness of peaks. results: [error scalar, model, bl, dr] ppe: a PDFPeakExtraction instance """ def __init__(self): self.results = [] self.ppe = None self.current = None def setppe(self, ppe): self.ppe = ppe def load(self, filename): try: import cPickle as pickle except: import pickle in_s = open(filename, 'rb') try: (self.results, ppestr) = pickle.load(in_s) self.ppe = PDFPeakExtraction() self.ppe.readstr(ppestr) # Ugly kluge for the baseline, since FromSequence # can't pickle. for r in self.results: bl = self.ppe.baseline kwds = r[2] if r[2] is not None: kwds = r[2] if hasattr(bl, "estimate_parameters"): r[2] = bl.actualize(default_input="internal", **kwds) else: r[2] = bl.owner().actualize(in_format="internal", **kwds) finally: in_s.close() self.setcurrent(0) def save(self, filename): try: import cPickle as pickle except: import pickle out_s = open(filename, 'wb') try: # Write to the stream outstr = self.ppe.writestr() # ugly kluge to let FromSequence pickle # (it stores xyrepr() in metadict) results2 = [] for r in self.results: if r[2] is None: bldict = None else: bldict = {"pars":r[2].pars, "free":r[2].free, "removable":r[2].removable, "static_owner":r[2].static_owner} results2.append([r[0], r[1], bldict, r[3]]) pickle.dump([results2, outstr], out_s) finally: out_s.close() def plotseries(self, style='o', **kwds): plt.figure() plt.ioff() for e, r, bl, dr in self.results: peakpos = [p["position"] for p in r] es = [e]*len(peakpos) plt.plot(peakpos, es, style, **kwds) plt.ion() plt.draw() def plot(self, **kwds): """Plot the current model. Keywords passed to pyplot.plot()""" plt.clf() plt.plot(*self.ppe.extracted.plottable(), **kwds) q = self.ppe.extracted.quality() plt.suptitle("[%i/%i]\n" "Uncertainty: %6.3f. Peaks: %i.\n" "Quality: %6.3f. Chi-square: %6.3f" %(self.current+1, len(self.results), self.ppe.effective_dy[0], len(self.ppe.extracted.model), q.stat, q.chisq)) def setcurrent(self, idx): """Make the idxth model the active one.""" self.current = idx if idx is not None: result = self.results[idx] self.ppe.setvars(quiet=True, effective_dy=result[0]*np.ones(len(self.ppe.x))) (r, y, dr, dy) = self.ppe.resampledata(result[3]) self.ppe.extracted = ModelCluster(result[1], result[2], r, y, dy, None, self.ppe.error_method, self.ppe.pf) else: self.ppe.clearcalc() def animate(self, results=None, step=False, **kwds): """Show animation of extracted peaks from first to last. Parameters: step - Require keypress to show next plot results - The indices of results to show Keywords passed to pyplot.plot()""" if results is None: results = range(len(self.results)) oldcurrent = self.current self.setcurrent(0) plt.ion() plt.plot(*self.ppe.extracted.plottable()) a = plt.axis() for i in results: self.setcurrent(i) plt.ioff() self.plot(**kwds) plt.ion() plt.draw() if step: raw_input() self.setcurrent(oldcurrent) def run(self, err, savecovs=False): """err is sequence of uncertainties to run at. If savecovs is True, return the covariance matrix for each final fit.""" self.results = [] covs = [] for i, e in enumerate(err): print "---- Running for uncertainty %s (%i/%i) ----" %(e, i, len(err)) self.ppe.clearcalc() self.ppe.setvars(effective_dy=e) if savecovs: covs.append(self.ppe.extract()) else: self.ppe.extract() dr = (self.ppe.extracted.r_cluster[-1]-self.ppe.extracted.r_cluster[0])/(len(self.ppe.extracted.r_cluster)-1) self.results.append([e, self.ppe.extracted.model, self.ppe.extracted.baseline, dr]) for e, r, bl, dr in self.results: print "---- Results for uncertainty %s ----" %e print r return covs
def main(): # configure options parsing usage = ("%prog srmise_file [options]\n" "srmise_file can be an extraction file saved by SrMise, " "or a data file saved by PeakStability.") descr = ( "A very basic tool for somewhat prettier plotting than provided by " "the basic SrMise classes. Can be used to compare peak positions " "with those from a list.\n" "NOTE: At this time the utility only works with peaks extracted using diffpy.srmise.PDFPeakExtraction." ) parser = optparse.OptionParser(usage=usage, description=descr) parser.add_option( "--compare", type="string", help= "Compare extracted distances to distances listed (1/line) in this file." ) parser.add_option( "--model", type="int", help= "Plot given model from set. Ignored if srmise_file is not a PeakStability file." ) parser.add_option("--show", action="store_true", help="execute pylab.show() blocking call") parser.add_option("-o", "--output", type="string", help="save plot to the specified file") parser.add_option("--format", type="string", default="eps", help="output format for plot saving") parser.allow_interspersed_args = True opts, args = parser.parse_args(sys.argv[1:]) if len(args) != 1: parser.error("Exactly one argument required. \n" + usage) filename = args[0] if filename is not None: toplot = PDFPeakExtraction() try: toplot.read(filename) except (Exception): toplot = PeakStability() try: toplot.load(filename) except Exception: print "File '%s' is not a .srmise or PeakStability data file." % filename return if opts.model is not None: try: toplot.setcurrent(opts.model) except (Exception): print "Ignoring model, %s is not a PeakStability file." % filename distances = None if opts.compare is not None: # use baseline from existing file distances = readcompare(opts.compare) setfigformat(figsize=(6., 4.0)) figdict = makeplot(toplot, distances) if opts.output: plt.savefig(opts.output, format=opts.format, dpi=600) if opts.show: plt.show() else: plt.draw() return
def run(plot=True): ## Initialize peak extraction ppe = PDFPeakExtraction() ppe.loadpdf("data/C60_fine_qmax21.gr") ## Set up interpolated baseline. # The FromSequence baseline creates an interpolated baseline from provided # r and G(r) values, either two lists or a file containing (r, G(r)) pairs. # The baseline has no parameters. This particular baseline was estimated # by fitting interparticle correlations of an FCC lattice of hollow # spheres to the PDF. blf = FromSequence("data/C60baseline.dat") bl = blf.actualize([]) ## Set up fitting parameters # A summary of how parameters impact fitting is given below. # "rng" - Same as peak extraction # "baseline" - Same as peak extraction # "qmax" and "nyquist" - If qmax > 0 and Nyquist is true, fitting is # performed on a Nyquist-sampled grid. The data are # never supersampled first. # "dg" - Since the model to fit is prespecified, the uncertainty does not # impact model complexity. Impact on refined parameter values and # estimated uncertainties as per standard chi-square fitting. # "pf" - The peak function used when estimating peak parameters given an # approximate position. Unike peak extraction, peak fitting never # alters the peak function used by initial peaks. # "cres" - Estimation of peak parameters given an approximate position uses # clustering for peak finding. No other effect on peak fitting. # "supersample" - No effect. kwds = {} kwds["rng"] = [1., 7.25] kwds["baseline"] = bl kwds["cres"] = 0.05 kwds["dg"] = 5000 # ad hoc, but gives each point equal weight in fit. ppe.setvars(**kwds) ## Set up termination ripples # Peak fitting never changes the peak function, so termination ripples # are not applied automatically as they are in peak extraction. # Termination ripples require setting the underlying peak function and qmax. # In this case they ared added to the default GaussianOverR peak function. # TerminationRipples use the estimation methods of the base peak function. pf = TerminationRipples(ppe.pf[0], ppe.qmax) ppe.setvars(pf=[pf]) # Specify some initial peaks using approximate positions. These use the # peak function passed to PDFPeakExtraction instance. rough_guess = [1.4, 2.4, 2.8, 3.6, 4.1, 4.5, 4.8, 5.2, 5.4, 5.7, 6.1] for g in rough_guess: ppe.estimate_peak(g) # Specify some peaks explicitly. These may be constructed from any peak # function, or combination of peak functions. explicit_guess = [[6.7, .3, 100000], [7.0, .15, 50000]] explicit_peaks = Peaks([pf.actualize(eg, in_format="pwa") \ for eg in explicit_guess]) ppe.add_peaks(explicit_peaks) # Plot initial peaks if plot: makeplot(ppe) plt.title("Initial Peaks") # Perform fit. ppe.fit() ## Save results ppe.write("output/fit_initial.srmise") ppe.writepwa("output/fit_initial.pwa") if plot: plt.figure() makeplot(ppe) plt.show()