Example #1
0
def run(plot=True):
    
    ## Initialize peak extraction
    # Create peak extraction object
    ppe = PDFPeakExtraction()
    
    # Load the PDF from a file
    ppe.loadpdf("data/Ag_nyquist_qmax30.gr")

    ## Set up extraction parameters.
    # For convenience we add all parameters to a dictionary before passing them
    # to the extraction object.
    #
    # The "rng" (range) parameter defines the region over which peaks will be
    # extracted and fit.  For the well isolated nearest-neighbor silver peak,
    # which occurs near 2.9 angstroms, it is sufficient to perform extraction
    # between 2 and 3.5 angstroms.
    #
    # The "baseline" parameter lets us define the PDF baseline, which is
    # linear for a crystal.  If a linear baseline is specified without
    # numerical parameters diffpy.srmise attempts to estimate them from the
    # data, and this is usually sufficient when peaks do not overlap much.
    kwds = {} 
    kwds["rng"] = [2.0, 3.5]
    kwds["baseline"] = Polynomial(degree=1)
    
    # Apply peak extraction parameters.
    ppe.setvars(**kwds)

    ## Perform peak extraction
    ppe.extract()
    
    ## Save output
    # The write() method saves a file which preserves all aspects of peak
    # extraction and its results, by convention using the .srmise extension,
    # and which can later be read by diffpy.srmise.
    #
    # The writepwa() method saves a file intended as a human-readable summary.
    # In particular, it reports the position, width (as full-width at
    # half-maximum), and area of of extracted peaks.  The reported values
    # are for Gaussians in the radial distribution function (RDF) corresponding
    # to this PDF.
    ppe.write("output/extract_single_peak.srmise")
    ppe.writepwa("output/extract_single_peak.pwa")

    ## Plot results.
    # Display plot of extracted peak.  It is also possible to plot an existing
    # .srmise file from the command line using
    #     srmise output/Ag_singlepeak.srmise --no-extract --plot
    # For additional plotting options, run "srmiseplot --help".
    if plot:
        makeplot(ppe)
        plt.show()
Example #2
0
def run(plot=True):

    ## Initialize peak extraction
    ppe = PDFPeakExtraction()
    ppe.loadpdf("data/C60_fine_qmax21.gr")

    ## Set up interpolated baseline.
    # The FromSequence baseline creates an interpolated baseline from provided
    # r and G(r) values, either two lists or a file containing (r, G(r)) pairs.
    # The baseline has no parameters. This particular baseline was estimated
    # by fitting interparticle correlations of an FCC lattice of hollow
    # spheres to the PDF.
    blf = FromSequence("data/C60baseline.dat")
    bl = blf.actualize([])

    ## Set up fitting parameters
    # A summary of how parameters impact fitting is given below.
    # "rng" - Same as peak extraction
    # "baseline" - Same as peak extraction
    # "qmax" and "nyquist" - If qmax > 0 and Nyquist is true, fitting is
    #                        performed on a Nyquist-sampled grid.  The data are
    #                        never supersampled first.
    # "dg" - Since the model to fit is prespecified, the uncertainty does not
    #        impact model complexity.  Impact on refined parameter values and
    #        estimated uncertainties as per standard chi-square fitting.
    # "pf" - The peak function used when estimating peak parameters given an
    #        approximate position.  Unike peak extraction, peak fitting never
    #        alters the peak function used by initial peaks.
    # "cres" - Estimation of peak parameters given an approximate position uses
    #          clustering for peak finding.  No other effect on peak fitting.
    # "supersample" - No effect.
    kwds={}
    kwds["rng"] = [1., 7.25]
    kwds["baseline"] = bl
    kwds["cres"] = 0.05
    kwds["dg"] = 5000 # ad hoc, but gives each point equal weight in fit.
    ppe.setvars(**kwds)

    ## Set up termination ripples
    # Peak fitting never changes the peak function, so termination ripples
    # are not applied automatically as they are in peak extraction.
    # Termination ripples require setting the underlying peak function and qmax.
    # In this case they ared added to the default GaussianOverR peak function.
    # TerminationRipples use the estimation methods of the base peak function.
    pf = TerminationRipples(ppe.pf[0], ppe.qmax)
    ppe.setvars(pf=[pf])

    # Specify some initial peaks using approximate positions.  These use the
    # peak function passed to PDFPeakExtraction instance.
    rough_guess = [1.4, 2.4, 2.8, 3.6, 4.1, 4.5, 4.8, 5.2, 5.4, 5.7, 6.1]
    for g in rough_guess:
        ppe.estimate_peak(g)

    # Specify some peaks explicitly.  These may be constructed from any peak
    # function, or combination of peak functions.
    explicit_guess = [[6.7, .3, 100000], [7.0, .15, 50000]]
    explicit_peaks = Peaks([pf.actualize(eg, in_format="pwa") \
        for eg in explicit_guess])
    ppe.add_peaks(explicit_peaks)

    # Plot initial peaks
    if plot:
        makeplot(ppe)
        plt.title("Initial Peaks")

    # Perform fit.
    ppe.fit()

    ## Save results
    ppe.write("output/fit_initial.srmise")
    ppe.writepwa("output/fit_initial.pwa")

    if plot:
        plt.figure()
        makeplot(ppe)
        plt.show()
def run(plot=True):
    
    ## Initialize peak extraction
    # Create peak extraction object
    ppe = PDFPeakExtraction()
    
    # Load the PDF from a file
    ppe.loadpdf("data/TiO2_fine_qmax26.gr")

    ###### Set up extraction parameters.
    # In this section we'll examine the major extraction parameters in detail.
    # diffpy.srmise strives to provide reasonable default values for these
    # parameters.  For normal use setting the range, baseline, and uncertainty
    # should be sufficient.
    kwds = {}     
    
    ## Range
    # Range defaults to the entire PDF if not specified.
    kwds["rng"] = [1.5, 10.]
    
    ## dg
    # diffpy.srmise selects model complexity based primarily on the uncertainty
    # of the PDF.  Note that very small uncertainties (<1%) can make peak
    # extraction excessively slow.  In general, the smaller the uncertainty the
    # more complex the model.  PDFs which report no uncertainty, or report
    # unreliable values must be assigned one.  By default, a PDF which does not
    # report uncertainties uses 5% the maximum minus minimum values. Common
    # causes of unreliable uncertainties include oversampling (uncertainties in
    # nearby data are strongly correlated, as for this PDF) and/or
    # integrated diffraction patterns obtained by a method that also introduces
    # correlation to the 1D diffraction pattern.  Consequently, the assumption
    # of both least-squares fitting and the Akaike Information Criterion that
    # the data are at least approximately independently distributed is not
    # valid.  In this case results obtained by diffpy.srmise may be useful,
    # especially when they can be intrepreted in light of prior knowledge, but
    # strong statistical conclusions cannot be drawn.  For additional
    # discussion of this subtle yet important issue see:
    # [1] Egami and Billinge. (2012). Underneath the Bragg Peaks: Structural
    #     Analysis of Complex Materials (2nd ed.). Oxford: Pergamon Press.
    # [2] Granlund, et al. (2015) Acta Crystallographica A, 71(4), 392-409.
    #     doi:10.1107/S2053273315005276
    # [3] Yang, et al. (2014). Journal of Applied Crystallography, 47(4),
    #     1273-1283. doi:10.1107/S1600576714010516
    kwds["dg"] = 0.35  # Play with this value!

    ## baseline
    # As a crystal PDF, a linear baseline crossing the origin is appropriate.
    # Here we define the linear baseline B(r) = -.5*r + 0, and explicitly set
    # the y-intercept as a fixed parameter which will not be fit.  For
    # crystal PDFs the theoretical value of the slope is -4*pi*rho0, where
    # rho0 is the number density.  Nevertheless, imperfect normalization of the
    # PDF means the experimental baseline is proportional to that value.
    blfunc = Polynomial(degree=1)
    slope = -.65 # Play with this value!
    y_intercept = 0.
    kwds["baseline"] = blfunc.actualize([slope, y_intercept],
                                        free=[True, False])
    ## pf
    # The pf (peakfunction) parameter allows setting the shape of peaks to be
    # extracted.  Termination effects are added automatically to the peak
    # function during extraction.  In the harmonic approximation of atomic
    # interactions peaks in the PDF are well approximated by a Gaussian/r.
    # (Note, however, that the values used for peak parameters -- namely
    # position, width, and area -- are for the Gaussian itself).  diffpy.srmise
    # uses width-limited peaks to reduce the likelihood of extracting
    # unphysically wide peaks in regions of high overlap.  The parameter
    # indicates the max fwhm permitted.  By default, diffpy.srmise uses a
    # maximum width of 0.7, which is generally reasonable if the r-axis of the
    # PDF is given in angstroms.  Models where many peaks reach the maximum
    # width, and models that are very sensitive to the choice in maximum width,
    # are strong signs that diffpy.srmise is having difficulty finding peaks
    # which are sufficiently constrained by the data.
    pf = GaussianOverR(0.7)
    kwds["pf"] = [pf] # Despite the list, only one entry is currently supported.

    ## qmax
    # PDFs typically report the value of qmax (i.e. the maximum momentum
    # transfer q in the measurement), but it can be specified explicitly also.
    # If the PDF does not report qmax, diffpy.srmise attempts to estimate it
    # directly from the data.  This estimate can also be used by setting qmax
    # to "automatic".  An infinite qmax can be specified by setting qmax to 0,
    # In that case the Nyquist rate is 0 (infinite resolution), and
    # diffpy.srmise does not consider Nyquist sampling or termination effects.
    kwds["qmax"] = 26.0 

    ## nyquist
    # This parameter governs whether diffpy.srmise attempts to find a model
    # on a Nyquist-sampled grid with dr=pi/qmax, which is a grid where data
    # uncertainties are least correlated without loss of information.  By
    # default this parameter is True whenever qmax > 0, and generally it
    # should not need to be changed.  Setting it to False allows extracted
    # models retain more complexity because the data appear to have more
    # statistically independent points than they truly do.  For a detailed
    # discussion of Nyquist sampling and the PDF see:
    # [4] Farrow et al. (2011). Physical Review B, 84(13), 134105.
    #     doi:10.1103/PhysRevB.84.134105
    kwds["nyquist"] = True
    
    ## supersample
    # This parameter dictates the data be oversampled by at least this factor
    # (relative to the Nyquist rate) during the early stages of peak
    # extraction. If the input PDF is even more finely sampled, that level of
    # sampling is used instead.  The default value of 4.0 is ad hoc, but has
    # been empirically sufficient.  Increasing this value may help the peak-
    # finding and clustering process, but reduces speed.
    kwds["supersample"] = 4.0
    
    ## cres
    # The cres (clustering resolution) parameter governs the sensitivity of the
    # clustering method used by diffpy.srmise.  In short, when the data are
    # being clustered, data which are further than the clustering resolution
    # from any other cluster (measured along the r-axis) are considered to be a
    # new cluster rather than a member of an existing one.  The default value
    # is the Nyquist sampling interval pi/qmax, and on most data it should not
    # greatly impact model complexity.  In some cases making it smaller may
    # help the peak-finding process.  Here it is roughly half the Nyquist
    # interval.
    kwds["cres"] = 0.05
    
    # Apply peak extraction parameters.
    ppe.setvars(**kwds)

    ## initial_peaks
    # Initial peaks are peaks which are kept fixed during the early stages of
    # peak extraction, effectively condition results upon their values.  Since
    # initial peaks are sometimes dependent on other SrMise parameters (e.g.
    # the peak function used) it is good practice to set them after other
    # parameters.  Although the "initial_peaks" parameter can be set as with
    # the parameters above, SrMise provides helper functions to do so more
    # easily.  There are two basic ways to quickly specify initial peaks:
    # 1) Supplying the approximate position of the peak, and letting
    # diffpy.srmise estimate the peak parameters.
    # 2) Explicit specification of peak parameters.

    ## Initial peaks from approximate positions.
    # This routine estimates peak parameters by finding the peak-like cluster
    # containing the specified point.  It does not search for occluded peaks,
    # so works best on well-separated peaks.  It does, however, take any
    # existing initial peaks into account during estimation.
    positions = [2.0, 4.5]
    for p in positions:
        ppe.estimate_peak(p) # adds to initial_peaks

    ## Initial peaks from explicit parameters.
    # Adding initial peaks explicitly is similar to defining a baseline.
    # Namely, choosing a peak function and then actualizing it with given
    # parameters. For this example peaks are created from the same GaussianOverR
    # used during extraction, but one could use a different peak function from
    # diffpy.srmise.peaks if desired.  The peak parameters are given in terms
    # terms of position, width (fwhm), and area, and it is important to specify
    # that format is being used so they are correctly changed into the
    # internal parameterization.  Here two peaks are added in a region of
    # overlap, and the width parameter is fixed at a reasonable value to aid
    # convergence in this region.
    pars = [[6.2, 0.25, 2.6],[6.45, 0.25, 2.7],[7.15, 0.25, 5]]
    peaks = []
    for p in pars:
        peaks.append(pf.actualize(p, free=[True, False, True], in_format="pwa"))
    ppe.add_peaks(peaks) # adds to initial_peaks

    ## Initial peaks and pruning
    # While initial peaks condition what other peaks can be extracted, by
    # default they can also be pruned if a simpler model appears better.  To
    # prevent this, they can be set as non-removable.
    for ip in ppe.initial_peaks:
        ip.removable = False

    ## Plot initial parameters
    if plot:
        makeplot(ppe)
        plt.title("Initial Peaks")


    ###### Perform peak extraction
    ppe.extract()

    
    ## Save output
    # The write() method saves a file which preserves all aspects of peak
    # extraction and its results, by convention using the .srmise extension,
    # and which can later be read by diffpy.srmise.
    #
    # The writepwa() method saves a file intended as a human-readable summary.
    # In particular, it reports the position, width (as full-width
    # half-maximum), and area of of extracted peaks.  The reported values
    # are for Gaussians in the radial distribution function (RDF) corresponding
    # to this PDF.
    ppe.write("output/parameter_summary.srmise")
    ppe.writepwa("output/parameter_summary.pwa")

    ## Plot results.
    # Display plot of extracted peak.  It is also possible to plot an existing
    # .srmise file from the command line using
    #     srmise output/TiO2_parameterdetail.srmise --no-extract --plot
    if plot:
        plt.figure()
        makeplot(ppe)
        plt.show()
Example #4
0
def run(plot=True):
    
    ## Initialize peak extraction
    # Create peak extraction object
    ppe = PDFPeakExtraction()
    
    # Load the PDF from a file
    ppe.loadpdf("data/Ag_nyquist_qmax30.gr")
    
    # Obtain baseline from a saved diffpy.srmise trial.  This is not the
    # initial baseline estimate from the previous example, but the baseline
    # after both it and the extracted peaks have been fit to the data.
    ppebl = PDFPeakExtraction()
    ppebl.read("output/extract_single_peak.srmise")
    baseline = ppebl.extracted.baseline

    ## Set up extraction parameters.
    # Peaks are extracted between 2 and 10 angstroms, using the baseline
    # from the isolated peak example.
    kwds = {} 
    kwds["rng"] = [2.0, 10.]
    kwds["baseline"] = baseline
    
    # Apply peak extraction parameters.
    ppe.setvars(**kwds)

    ## Perform peak extraction, and retain object containing a copy of the
    # model and the full covariance matrix.
    cov = ppe.extract()
    

    print "\n======= Accessing SrMise Results ========"
    ## Accessing results of extraction
    #
    # Model parameters are organized using a nested structure, with a list
    # of peaks each of which is a list of parameters, similar to the the
    # following schematic.
    #     Peak
    #         Position
    #         Width
    #         Area
    #     Peak
    #         Position
    #         Width
    #         Area*
    #     ...
    #     Baseline
    #         Slope
    #         Intercept
    # By convention, the baseline is the final "peak."  The ModelCovariance
    # object returned by extract() can return information about any peak by
    # using the appropriate tuple of indices (i,j).  That is, (i,j) denotes
    # the jth parameter of the ith peak.  For example, the starred parameter
    # above is the area (index = 2) of the next nearest neighbor (index = 1)
    # peak. Thus, this parameter can be referenced as (1,2).  Several examples
    # are presented below.
    

    print "\n------ Parameter values and uncertainties ------"
    # ModelCovariance.get() returns a (value, uncertainty) tuple for a given
    # parameter.  These are the results for the nearest-neighbor peak.
    p0 = cov.get((0,0))
    w0 = cov.get((0,1))
    a0 = cov.get((0,2))
    print "Nearest-neighbor peak: "
    print "    position = %f +/- %f" %p0
    print "    width = %f +/- %f" %w0
    print "    area = %f +/- %f" %a0
    print "    Covariance(width, area) = ", cov.getcovariance((0,1),(0,2))

    # Baseline parameters.  By convention, baseline is final element in cov.
    (slope, intercept) = cov.model[-1]
    print "\nThe linear baseline B(r)=%f*r + %f" \
          % tuple(par for par in cov.model[-1])


    print "\n ------ Uncertainties from a Saved File --------"
    # A .srmise file does not save the full covariance matrix, so it must be
    # recalculated when loading from these files.  For example, here is the
    # nearest-neighbor peak in the file which we used to define the initial
    # baseline.
    cov2 = ModelCovariance()
    ppebl.extracted.fit(fitbaseline=True, cov=cov2, cov_format="default_output")
    p0_saved = cov2.get((0,0))
    w0_saved = cov2.get((0,1))
    a0_saved = cov2.get((0,2))
    print "Nearest-neighbor peak:"
    print "    position = %f +/- %f" %p0_saved
    print "    width == %f +/- %f" %w0_saved
    print "    area = = %f +/- %f" %a0_saved
    print "    Covariance(width, area) = ", cov2.getcovariance((0,1),(0,2))


    print "\n ---------- Alternate Parameterizations ---------"
    ## Different Parameterizations
    # Peaks and baselines may have equivalent parameterizations that are useful
    # in different situations.  For example, the types defined by the
    # GaussianOverR peak function are:
    #   "internal" - Used in diffpy.srmise calculations, explicitly enforces a
    #                maximum peak width
    #   "pwa" - The position, width (full-width at half-maximum), area.
    #   "mu_sigma_area" - The position, width (the distribution standard
    #                     deviation sigma), area.
    #   "default_output" - Defines default format to use in most user-facing
    #                      scenarios. Maps to the "pwa" parameterization.
    #   "default_input" - Defines default format to use when specifying peak
    #                     parameters.  Maps to the "internal" parameterization.
    # All diffpy.srmise peak and baseline functions are required to have the
    # "internal", "default_output", and "default_input" formats.  In many
    # cases, such as polynomial baselines, all of these are equivalent.
    #
    # Suppose you want to know peak widths in terms of the standard deviation
    # sigma of the Gaussian distribution.  It is then appropriate to convert
    # all peaks to the "mu_sigma_area" format.  Valid options for the "parts"
    # keyword are "peaks", "baseline", or a sequence of indices (e.g. [1,2,3]
    # would transform the second, third, and fourth peaks).  If the keyword
    # is omitted, the transformation is attempted for all parts of the fit.
    cov.transform(in_format="pwa", out_format="mu_sigma_area", parts="peaks")
    print "Width (sigma) of nearest-neighbor peak: %f +/- %f" %cov.get((0,1))


    print "\n ------------ Highly Correlated Parameters ------------"
    # Highly-correlated parameters can indicate difficulties constraining the
    # fit.  This function lists all pairs of parameters with an absolute value
    # of correlation which exceeds a given threshold.
    print "|Correlation| > 0.9:"
    print "par1     par2    corr(par1, par2)"
    print "\n".join(str(c) for c in cov.correlationwarning(.9))


    print "\n-------- Estimate coordination shell occupancy ---------"
    # Estimate the scale factor and its uncertainty from first peak's intensity.
    # G_normalized = scale * G_observed
    # dscale = scale * dG_observed/G_observed
    scale = 12./a0[0]
    dscale = scale * a0[1]/a0[0]
    print "Estimate scale factor assuming nearest-neighbor intensity = 12"
    print "Scale factor is %f +/- %f" %(scale, dscale)

    # Reference for number of atoms in coordination shells for FCC.
    # http://chem-faculty.lsu.edu/watkins/MERLOT/cubic_neighbors/cubic_near_neighbors.html
    ideal_intensity = [12, 6, 24, 12, 24, 8, 48, 6, 36, 24, 24, 24]
    
    # Calculated the scaled intensities and uncertainties.
    intensity = []
    for i in range(0, len(cov.model)-1):
        (area, darea) = cov.get((i,2))
        area *= scale
        darea = area*np.sqrt((dscale/scale)**2 + (darea/area)**2)
        intensity.append((ideal_intensity[i], area, darea))
    
    print "\nIntensity"
    print "Ideal: Estimated"
    for i in intensity:
        print "%i: %f +/- %f" %i

    print "\nTotal intensity"
    # It is possible to iterate over peaks directly without using indices.
    # In addition, peak parameters can be accessed using string keys.  For the
    # Gaussian over r all of "position", "width", and "area" are valid.
    total_observed_intensity = 0
    total_ideal_intensity = 0
    for peak, ii in zip(cov.model[:-1], ideal_intensity):
        total_observed_intensity += scale*peak["area"]
        total_ideal_intensity += ii
    print "Ideal: Observed (using estimated scale factor)"
    print "%i: %f" %(total_ideal_intensity, total_observed_intensity)


    ## Save output
    ppe.write("output/query_results.srmise")
    ppe.writepwa("output/query_results.pwa")


    ## Evaluating a model.
    # Although the ModelCovariance object is useful, the model used for fitting
    # can be directly accessed through PDFPeakExtraction as well, albeit
    # without uncertainties.  This is particularly helpful when evaluating a
    # model since the parameters stay in the "internal" format used for
    # calculations.  For example, here we plot the data and every second peak
    # on an arbitrary grid.  Unlike with ModelCovariance, the baseline and
    # peaks are kept separate.
    if plot:
        plt.figure()
        grid = np.arange(2, 10, .01)
        bl = ppe.extracted.baseline
        everysecondpeak = ppe.extracted.model[::2]
        plt.plot(ppe.x, ppe.y, 'o')
        for peak in everysecondpeak:
            plt.plot(grid, bl.value(grid) + peak.value(grid))
        plt.xlim(2, 10)
        plt.show()
Example #5
0
def run(plot=True):

    ## Initialize peak extraction
    # Create peak extraction object
    ppe = PDFPeakExtraction()

    # Load the PDF from a file
    ppe.loadpdf("data/TiO2_fine_qmax26.gr")

    ###### Set up extraction parameters.
    # In this section we'll examine the major extraction parameters in detail.
    # diffpy.srmise strives to provide reasonable default values for these
    # parameters.  For normal use setting the range, baseline, and uncertainty
    # should be sufficient.
    kwds = {}

    ## Range
    # Range defaults to the entire PDF if not specified.
    kwds["rng"] = [1.5, 10.]

    ## dg
    # diffpy.srmise selects model complexity based primarily on the uncertainty
    # of the PDF.  Note that very small uncertainties (<1%) can make peak
    # extraction excessively slow.  In general, the smaller the uncertainty the
    # more complex the model.  PDFs which report no uncertainty, or report
    # unreliable values must be assigned one.  By default, a PDF which does not
    # report uncertainties uses 5% the maximum minus minimum values. Common
    # causes of unreliable uncertainties include oversampling (uncertainties in
    # nearby data are strongly correlated, as for this PDF) and/or
    # integrated diffraction patterns obtained by a method that also introduces
    # correlation to the 1D diffraction pattern.  Consequently, the assumption
    # of both least-squares fitting and the Akaike Information Criterion that
    # the data are at least approximately independently distributed is not
    # valid.  In this case results obtained by diffpy.srmise may be useful,
    # especially when they can be intrepreted in light of prior knowledge, but
    # strong statistical conclusions cannot be drawn.  For additional
    # discussion of this subtle yet important issue see:
    # [1] Egami and Billinge. (2012). Underneath the Bragg Peaks: Structural
    #     Analysis of Complex Materials (2nd ed.). Oxford: Pergamon Press.
    # [2] Granlund, et al. (2015) Acta Crystallographica A, 71(4), 392-409.
    #     doi:10.1107/S2053273315005276
    # [3] Yang, et al. (2014). Journal of Applied Crystallography, 47(4),
    #     1273-1283. doi:10.1107/S1600576714010516
    kwds["dg"] = 0.35  # Play with this value!

    ## baseline
    # As a crystal PDF, a linear baseline crossing the origin is appropriate.
    # Here we define the linear baseline B(r) = -.5*r + 0, and explicitly set
    # the y-intercept as a fixed parameter which will not be fit.  For
    # crystal PDFs the theoretical value of the slope is -4*pi*rho0, where
    # rho0 is the number density.  Nevertheless, imperfect normalization of the
    # PDF means the experimental baseline is proportional to that value.
    blfunc = Polynomial(degree=1)
    slope = -.65  # Play with this value!
    y_intercept = 0.
    kwds["baseline"] = blfunc.actualize([slope, y_intercept],
                                        free=[True, False])
    ## pf
    # The pf (peakfunction) parameter allows setting the shape of peaks to be
    # extracted.  Termination effects are added automatically to the peak
    # function during extraction.  In the harmonic approximation of atomic
    # interactions peaks in the PDF are well approximated by a Gaussian/r.
    # (Note, however, that the values used for peak parameters -- namely
    # position, width, and area -- are for the Gaussian itself).  diffpy.srmise
    # uses width-limited peaks to reduce the likelihood of extracting
    # unphysically wide peaks in regions of high overlap.  The parameter
    # indicates the max fwhm permitted.  By default, diffpy.srmise uses a
    # maximum width of 0.7, which is generally reasonable if the r-axis of the
    # PDF is given in angstroms.  Models where many peaks reach the maximum
    # width, and models that are very sensitive to the choice in maximum width,
    # are strong signs that diffpy.srmise is having difficulty finding peaks
    # which are sufficiently constrained by the data.
    pf = GaussianOverR(0.7)
    kwds["pf"] = [pf
                  ]  # Despite the list, only one entry is currently supported.

    ## qmax
    # PDFs typically report the value of qmax (i.e. the maximum momentum
    # transfer q in the measurement), but it can be specified explicitly also.
    # If the PDF does not report qmax, diffpy.srmise attempts to estimate it
    # directly from the data.  This estimate can also be used by setting qmax
    # to "automatic".  An infinite qmax can be specified by setting qmax to 0,
    # In that case the Nyquist rate is 0 (infinite resolution), and
    # diffpy.srmise does not consider Nyquist sampling or termination effects.
    kwds["qmax"] = 26.0

    ## nyquist
    # This parameter governs whether diffpy.srmise attempts to find a model
    # on a Nyquist-sampled grid with dr=pi/qmax, which is a grid where data
    # uncertainties are least correlated without loss of information.  By
    # default this parameter is True whenever qmax > 0, and generally it
    # should not need to be changed.  Setting it to False allows extracted
    # models retain more complexity because the data appear to have more
    # statistically independent points than they truly do.  For a detailed
    # discussion of Nyquist sampling and the PDF see:
    # [4] Farrow et al. (2011). Physical Review B, 84(13), 134105.
    #     doi:10.1103/PhysRevB.84.134105
    kwds["nyquist"] = True

    ## supersample
    # This parameter dictates the data be oversampled by at least this factor
    # (relative to the Nyquist rate) during the early stages of peak
    # extraction. If the input PDF is even more finely sampled, that level of
    # sampling is used instead.  The default value of 4.0 is ad hoc, but has
    # been empirically sufficient.  Increasing this value may help the peak-
    # finding and clustering process, but reduces speed.
    kwds["supersample"] = 4.0

    ## cres
    # The cres (clustering resolution) parameter governs the sensitivity of the
    # clustering method used by diffpy.srmise.  In short, when the data are
    # being clustered, data which are further than the clustering resolution
    # from any other cluster (measured along the r-axis) are considered to be a
    # new cluster rather than a member of an existing one.  The default value
    # is the Nyquist sampling interval pi/qmax, and on most data it should not
    # greatly impact model complexity.  In some cases making it smaller may
    # help the peak-finding process.  Here it is roughly half the Nyquist
    # interval.
    kwds["cres"] = 0.05

    # Apply peak extraction parameters.
    ppe.setvars(**kwds)

    ## initial_peaks
    # Initial peaks are peaks which are kept fixed during the early stages of
    # peak extraction, effectively condition results upon their values.  Since
    # initial peaks are sometimes dependent on other SrMise parameters (e.g.
    # the peak function used) it is good practice to set them after other
    # parameters.  Although the "initial_peaks" parameter can be set as with
    # the parameters above, SrMise provides helper functions to do so more
    # easily.  There are two basic ways to quickly specify initial peaks:
    # 1) Supplying the approximate position of the peak, and letting
    # diffpy.srmise estimate the peak parameters.
    # 2) Explicit specification of peak parameters.

    ## Initial peaks from approximate positions.
    # This routine estimates peak parameters by finding the peak-like cluster
    # containing the specified point.  It does not search for occluded peaks,
    # so works best on well-separated peaks.  It does, however, take any
    # existing initial peaks into account during estimation.
    positions = [2.0, 4.5]
    for p in positions:
        ppe.estimate_peak(p)  # adds to initial_peaks

    ## Initial peaks from explicit parameters.
    # Adding initial peaks explicitly is similar to defining a baseline.
    # Namely, choosing a peak function and then actualizing it with given
    # parameters. For this example peaks are created from the same GaussianOverR
    # used during extraction, but one could use a different peak function from
    # diffpy.srmise.peaks if desired.  The peak parameters are given in terms
    # terms of position, width (fwhm), and area, and it is important to specify
    # that format is being used so they are correctly changed into the
    # internal parameterization.  Here two peaks are added in a region of
    # overlap, and the width parameter is fixed at a reasonable value to aid
    # convergence in this region.
    pars = [[6.2, 0.25, 2.6], [6.45, 0.25, 2.7], [7.15, 0.25, 5]]
    peaks = []
    for p in pars:
        peaks.append(pf.actualize(p, free=[True, False, True],
                                  in_format="pwa"))
    ppe.add_peaks(peaks)  # adds to initial_peaks

    ## Initial peaks and pruning
    # While initial peaks condition what other peaks can be extracted, by
    # default they can also be pruned if a simpler model appears better.  To
    # prevent this, they can be set as non-removable.
    for ip in ppe.initial_peaks:
        ip.removable = False

    ## Plot initial parameters
    if plot:
        makeplot(ppe)
        plt.title("Initial Peaks")

    ###### Perform peak extraction
    ppe.extract()

    ## Save output
    # The write() method saves a file which preserves all aspects of peak
    # extraction and its results, by convention using the .srmise extension,
    # and which can later be read by diffpy.srmise.
    #
    # The writepwa() method saves a file intended as a human-readable summary.
    # In particular, it reports the position, width (as full-width
    # half-maximum), and area of of extracted peaks.  The reported values
    # are for Gaussians in the radial distribution function (RDF) corresponding
    # to this PDF.
    ppe.write("output/parameter_summary.srmise")
    ppe.writepwa("output/parameter_summary.pwa")

    ## Plot results.
    # Display plot of extracted peak.  It is also possible to plot an existing
    # .srmise file from the command line using
    #     srmise output/TiO2_parameterdetail.srmise --no-extract --plot
    if plot:
        plt.figure()
        makeplot(ppe)
        plt.show()
Example #6
0
def run(plot=True):

    ## Initialize peak extraction
    ppe = PDFPeakExtraction()
    ppe.loadpdf("data/C60_fine_qmax21.gr")

    ## Set up interpolated baseline.
    # The FromSequence baseline creates an interpolated baseline from provided
    # r and G(r) values, either two lists or a file containing (r, G(r)) pairs.
    # The baseline has no parameters. This particular baseline was estimated
    # by fitting interparticle correlations of an FCC lattice of hollow
    # spheres to the PDF.
    blf = FromSequence("data/C60baseline.dat")
    bl = blf.actualize([])

    ## Set up fitting parameters
    # A summary of how parameters impact fitting is given below.
    # "rng" - Same as peak extraction
    # "baseline" - Same as peak extraction
    # "qmax" and "nyquist" - If qmax > 0 and Nyquist is true, fitting is
    #                        performed on a Nyquist-sampled grid.  The data are
    #                        never supersampled first.
    # "dg" - Since the model to fit is prespecified, the uncertainty does not
    #        impact model complexity.  Impact on refined parameter values and
    #        estimated uncertainties as per standard chi-square fitting.
    # "pf" - The peak function used when estimating peak parameters given an
    #        approximate position.  Unike peak extraction, peak fitting never
    #        alters the peak function used by initial peaks.
    # "cres" - Estimation of peak parameters given an approximate position uses
    #          clustering for peak finding.  No other effect on peak fitting.
    # "supersample" - No effect.
    kwds = {}
    kwds["rng"] = [1., 7.25]
    kwds["baseline"] = bl
    kwds["cres"] = 0.05
    kwds["dg"] = 5000  # ad hoc, but gives each point equal weight in fit.
    ppe.setvars(**kwds)

    ## Set up termination ripples
    # Peak fitting never changes the peak function, so termination ripples
    # are not applied automatically as they are in peak extraction.
    # Termination ripples require setting the underlying peak function and qmax.
    # In this case they ared added to the default GaussianOverR peak function.
    # TerminationRipples use the estimation methods of the base peak function.
    pf = TerminationRipples(ppe.pf[0], ppe.qmax)
    ppe.setvars(pf=[pf])

    # Specify some initial peaks using approximate positions.  These use the
    # peak function passed to PDFPeakExtraction instance.
    rough_guess = [1.4, 2.4, 2.8, 3.6, 4.1, 4.5, 4.8, 5.2, 5.4, 5.7, 6.1]
    for g in rough_guess:
        ppe.estimate_peak(g)

    # Specify some peaks explicitly.  These may be constructed from any peak
    # function, or combination of peak functions.
    explicit_guess = [[6.7, .3, 100000], [7.0, .15, 50000]]
    explicit_peaks = Peaks([pf.actualize(eg, in_format="pwa") \
        for eg in explicit_guess])
    ppe.add_peaks(explicit_peaks)

    # Plot initial peaks
    if plot:
        makeplot(ppe)
        plt.title("Initial Peaks")

    # Perform fit.
    ppe.fit()

    ## Save results
    ppe.write("output/fit_initial.srmise")
    ppe.writepwa("output/fit_initial.pwa")

    if plot:
        plt.figure()
        makeplot(ppe)
        plt.show()