Esempio n. 1
0
def run(plot=True):
    
    ## Initialize peak extraction
    # Create peak extraction object
    ppe = PDFPeakExtraction()
    
    # Load the PDF from a file
    ppe.loadpdf("data/Ag_nyquist_qmax30.gr")

    ## Set up extraction parameters.
    # For convenience we add all parameters to a dictionary before passing them
    # to the extraction object.
    #
    # The "rng" (range) parameter defines the region over which peaks will be
    # extracted and fit.  For the well isolated nearest-neighbor silver peak,
    # which occurs near 2.9 angstroms, it is sufficient to perform extraction
    # between 2 and 3.5 angstroms.
    #
    # The "baseline" parameter lets us define the PDF baseline, which is
    # linear for a crystal.  If a linear baseline is specified without
    # numerical parameters diffpy.srmise attempts to estimate them from the
    # data, and this is usually sufficient when peaks do not overlap much.
    kwds = {} 
    kwds["rng"] = [2.0, 3.5]
    kwds["baseline"] = Polynomial(degree=1)
    
    # Apply peak extraction parameters.
    ppe.setvars(**kwds)

    ## Perform peak extraction
    ppe.extract()
    
    ## Save output
    # The write() method saves a file which preserves all aspects of peak
    # extraction and its results, by convention using the .srmise extension,
    # and which can later be read by diffpy.srmise.
    #
    # The writepwa() method saves a file intended as a human-readable summary.
    # In particular, it reports the position, width (as full-width at
    # half-maximum), and area of of extracted peaks.  The reported values
    # are for Gaussians in the radial distribution function (RDF) corresponding
    # to this PDF.
    ppe.write("output/extract_single_peak.srmise")
    ppe.writepwa("output/extract_single_peak.pwa")

    ## Plot results.
    # Display plot of extracted peak.  It is also possible to plot an existing
    # .srmise file from the command line using
    #     srmise output/Ag_singlepeak.srmise --no-extract --plot
    # For additional plotting options, run "srmiseplot --help".
    if plot:
        makeplot(ppe)
        plt.show()
Esempio n. 2
0
                print "Could not save result to '%s'." % options.savefile

        if options.pwafile is not None:
            try:
                ext.writepwa(options.pwafile)
            except SrMiseFileError, err:
                print err
                print "Could not save pwa summary to '%s'." % options.pwafile

        print ext
        if cov is not None:
            print cov

        if options.plot:
            from diffpy.srmise.applications.plot import makeplot
            makeplot(ext)
            plt.show()
        elif options.liveplot:
            plt.show()


def parsepars(mp, parseq):
    """Return actualized model from sequence of strings.

    Each item in parseq must be interpretable as a float, or as
    a float with the character 'c' appended.  If 'c' is appended,
    that parameter will be fixed.

    Parameters:
    mp - A ModelPart instance
    parseq - A sequence of string
Esempio n. 3
0
def run(plot=True):

    ## Initialize peak extraction
    ppe = PDFPeakExtraction()
    ppe.loadpdf("data/C60_fine_qmax21.gr")

    ## Set up interpolated baseline.
    # The FromSequence baseline creates an interpolated baseline from provided
    # r and G(r) values, either two lists or a file containing (r, G(r)) pairs.
    # The baseline has no parameters. This particular baseline was estimated
    # by fitting interparticle correlations of an FCC lattice of hollow
    # spheres to the PDF.
    blf = FromSequence("data/C60baseline.dat")
    bl = blf.actualize([])

    ## Set up fitting parameters
    # A summary of how parameters impact fitting is given below.
    # "rng" - Same as peak extraction
    # "baseline" - Same as peak extraction
    # "qmax" and "nyquist" - If qmax > 0 and Nyquist is true, fitting is
    #                        performed on a Nyquist-sampled grid.  The data are
    #                        never supersampled first.
    # "dg" - Since the model to fit is prespecified, the uncertainty does not
    #        impact model complexity.  Impact on refined parameter values and
    #        estimated uncertainties as per standard chi-square fitting.
    # "pf" - The peak function used when estimating peak parameters given an
    #        approximate position.  Unike peak extraction, peak fitting never
    #        alters the peak function used by initial peaks.
    # "cres" - Estimation of peak parameters given an approximate position uses
    #          clustering for peak finding.  No other effect on peak fitting.
    # "supersample" - No effect.
    kwds={}
    kwds["rng"] = [1., 7.25]
    kwds["baseline"] = bl
    kwds["cres"] = 0.05
    kwds["dg"] = 5000 # ad hoc, but gives each point equal weight in fit.
    ppe.setvars(**kwds)

    ## Set up termination ripples
    # Peak fitting never changes the peak function, so termination ripples
    # are not applied automatically as they are in peak extraction.
    # Termination ripples require setting the underlying peak function and qmax.
    # In this case they ared added to the default GaussianOverR peak function.
    # TerminationRipples use the estimation methods of the base peak function.
    pf = TerminationRipples(ppe.pf[0], ppe.qmax)
    ppe.setvars(pf=[pf])

    # Specify some initial peaks using approximate positions.  These use the
    # peak function passed to PDFPeakExtraction instance.
    rough_guess = [1.4, 2.4, 2.8, 3.6, 4.1, 4.5, 4.8, 5.2, 5.4, 5.7, 6.1]
    for g in rough_guess:
        ppe.estimate_peak(g)

    # Specify some peaks explicitly.  These may be constructed from any peak
    # function, or combination of peak functions.
    explicit_guess = [[6.7, .3, 100000], [7.0, .15, 50000]]
    explicit_peaks = Peaks([pf.actualize(eg, in_format="pwa") \
        for eg in explicit_guess])
    ppe.add_peaks(explicit_peaks)

    # Plot initial peaks
    if plot:
        makeplot(ppe)
        plt.title("Initial Peaks")

    # Perform fit.
    ppe.fit()

    ## Save results
    ppe.write("output/fit_initial.srmise")
    ppe.writepwa("output/fit_initial.pwa")

    if plot:
        plt.figure()
        makeplot(ppe)
        plt.show()
def run(plot=True):

    # Suppress mundane output
    sml.setlevel("warning")

    ## Create multimodeling object and load diffpy.srmise results from file.
    ms = MultimodelSelection()
    ms.load("output/known_dG_models.dat")
    ms.loadaics("output/known_dG_aics.dat")

    ## Use Nyquist sampling
    # Standard AIC analysis assumes the data have independent uncertainties.
    # Nyquist sampling minimizes correlations in the PDF, which is the closest
    # approximation to independence possible for the PDF.
    dr = np.pi / ms.ppe.qmax
    (r, y, dr2, dy) = ms.ppe.resampledata(dr)

    ## Classify models
    # All models are placed into classes.  Models in the same class
    # should be essentially identical (same peak parameters, etc.)
    # up to a small tolerance determined by comparing individual peaks. The
    # best model in each class essentially stands in for all the other models
    # in a class in the rest of the analysis.  A tolerance of 0 indicates the
    # models must be exactly identical.  Increasing the tolerance allows
    # increasingly different models to be classified as "identical."  This step
    # reduces a major source of model redundancy, which otherwise weakens
    # AIC-based analysis.  As a rule of thumb, AIC-based analysis is robust
    # to redundant poor models (since they contribute very little to the Akaike
    # probabilities in any case), but redundant good models can significantly
    # alter how models are ranked.  See Granlund (2015) for details.
    tolerance = 0.2
    ms.classify(r, tolerance)

    ## Summarize various facts about the analysis.
    num_models = len(ms.results)
    num_classes = len(ms.classes)
    print "------- Multimodeling Summary --------"
    print "Models: %i" % num_models
    print "Classes: %i (tol=%s)" % (num_classes, tolerance)
    print "Range of dgs: %f-%f" % (ms.dgs[0], ms.dgs[-1])
    print "Nyquist-sampled data points: %i" % len(r)

    ## Get dG usable as key in analysis.
    # The Akaike probabilities were calculated for many assumed values of the
    # experimental uncertainty dG, and each of these assumed dG is used as a
    # key when obtaining the corresponding results.  Numerical precision can
    # make recalculating the exact value difficult, so the dg_key method returns
    # the key closest to its argument.
    dG = ms.dg_key(np.mean(ms.ppe.dy))

    ## Find "best" models.
    # In short, models with greatest Akaike probability.  Akaike probabilities
    # can only be validly compared if they were calculated for identical data,
    # namely identical PDF values *and* uncertainties, and are only reliable
    # with respect to the actual experiment when using a Nyquist-sampled PDF
    # with experimentally determined uncertainties.
    #
    # The present PDF satisifes these conditions, so the rankings below reflect
    # an AIC-based estimate of which of the tested models the data best support.
    print "\n--------- Model Rankings for dG = %f ---------" % dG
    print "Rank  Model  Class  Free         AIC   Prob  File"
    for i in range(len(ms.classes)):

        ## Generate information about best model in ith best class.
        # The get(dG, *args, **kwds) method returns a tuple of values
        # corresponding to string arguments for the best model in best class at
        # given dG. When the corder keyword is given it returns the model from
        # the corderth best class (where 0 is best, 1 is next best, etc.)
        # "model" -> index of model
        # "class" -> index of class
        # "nfree" -> number of free parameters in corresponding model
        # "aic" -> The AIC for this model given uncertainty dG
        # "prob" -> The AIC probability given uncertainty dG
        # These all have dedicated getter functions.  For example, the model
        # index can also be obtained using get_model(dG, corder=i)
        (model, cls, nfree, aic, prob) = \
            ms.get(dG, "model", "class", "nfree", "aic", "prob", corder=i)

        filename_base = "output/known_dG_m" + str(model)

        # Print info for this model
        print "%4i  %5i  %5i  %4i  %10.4e %6.3f  %s" \
            %(i+1, model, cls, nfree, aic, prob, filename_base + ".pwa")

        # A message added as a comment to saved .pwa file.
        msg = [
            "Multimodeling Summary",
            "---------------------",
            "Evaluated at dG: %s" % dG,
            "Model: %i (of %i)" % (model, num_models),
            "Class: %i (of %i, tol=%s)" % (cls, num_classes, tolerance),
            "Akaike probability: %g" % prob,
            "Rank: %i" % (i + 1),
        ]
        msg = "\n".join(msg)

        # Make this the active model
        ms.setcurrent(model)

        # Save .pwa
        ms.ppe.writepwa(filename_base + ".pwa", msg)

        # Plot this model
        if plot:
            plt.figure()
            makeplot(ms.ppe, dcif)
            plt.title("Model %i/Class %i (Rank %i, AIC prob=%f)" \
                %(model, cls, i+1, prob))
            # Uncomment line below to save figures.
            # plt.savefig(filename_base + ".png", format="png")

    ## 3D plot of Akaike probabilities
    # This plot shows the Akaike probabilities of all classes as a function
    # of assumed uncertainty dG.  This gives a rough sense of how the models
    # selected by an AIC-based analysis would vary if the experimental
    # uncertainties contributing to the observed G(r) were different.  The
    # Akaike probabilities calculated for the actual experimental uncertainty
    # are highlighted.
    if plot:
        plt.figure()
        ms.plot3dclassprobs(probfilter=[0.0, 1.], highlight=[dG])
        plt.tight_layout()
        # Uncomment line below to save figure.
        #plt.savefig("output/known_dG_probs.png", format="png", bbox_inches="tight")

    if plot:
        plt.show()
Esempio n. 5
0
def run(plot=True):
    
    ## Initialize peak extraction
    # Create peak extraction object
    ppe = PDFPeakExtraction()
    
    # Load the PDF from a file
    ppe.loadpdf("data/TiO2_fine_qmax26.gr")

    ###### Set up extraction parameters.
    # In this section we'll examine the major extraction parameters in detail.
    # diffpy.srmise strives to provide reasonable default values for these
    # parameters.  For normal use setting the range, baseline, and uncertainty
    # should be sufficient.
    kwds = {}     
    
    ## Range
    # Range defaults to the entire PDF if not specified.
    kwds["rng"] = [1.5, 10.]
    
    ## dg
    # diffpy.srmise selects model complexity based primarily on the uncertainty
    # of the PDF.  Note that very small uncertainties (<1%) can make peak
    # extraction excessively slow.  In general, the smaller the uncertainty the
    # more complex the model.  PDFs which report no uncertainty, or report
    # unreliable values must be assigned one.  By default, a PDF which does not
    # report uncertainties uses 5% the maximum minus minimum values. Common
    # causes of unreliable uncertainties include oversampling (uncertainties in
    # nearby data are strongly correlated, as for this PDF) and/or
    # integrated diffraction patterns obtained by a method that also introduces
    # correlation to the 1D diffraction pattern.  Consequently, the assumption
    # of both least-squares fitting and the Akaike Information Criterion that
    # the data are at least approximately independently distributed is not
    # valid.  In this case results obtained by diffpy.srmise may be useful,
    # especially when they can be intrepreted in light of prior knowledge, but
    # strong statistical conclusions cannot be drawn.  For additional
    # discussion of this subtle yet important issue see:
    # [1] Egami and Billinge. (2012). Underneath the Bragg Peaks: Structural
    #     Analysis of Complex Materials (2nd ed.). Oxford: Pergamon Press.
    # [2] Granlund, et al. (2015) Acta Crystallographica A, 71(4), 392-409.
    #     doi:10.1107/S2053273315005276
    # [3] Yang, et al. (2014). Journal of Applied Crystallography, 47(4),
    #     1273-1283. doi:10.1107/S1600576714010516
    kwds["dg"] = 0.35  # Play with this value!

    ## baseline
    # As a crystal PDF, a linear baseline crossing the origin is appropriate.
    # Here we define the linear baseline B(r) = -.5*r + 0, and explicitly set
    # the y-intercept as a fixed parameter which will not be fit.  For
    # crystal PDFs the theoretical value of the slope is -4*pi*rho0, where
    # rho0 is the number density.  Nevertheless, imperfect normalization of the
    # PDF means the experimental baseline is proportional to that value.
    blfunc = Polynomial(degree=1)
    slope = -.65 # Play with this value!
    y_intercept = 0.
    kwds["baseline"] = blfunc.actualize([slope, y_intercept],
                                        free=[True, False])
    ## pf
    # The pf (peakfunction) parameter allows setting the shape of peaks to be
    # extracted.  Termination effects are added automatically to the peak
    # function during extraction.  In the harmonic approximation of atomic
    # interactions peaks in the PDF are well approximated by a Gaussian/r.
    # (Note, however, that the values used for peak parameters -- namely
    # position, width, and area -- are for the Gaussian itself).  diffpy.srmise
    # uses width-limited peaks to reduce the likelihood of extracting
    # unphysically wide peaks in regions of high overlap.  The parameter
    # indicates the max fwhm permitted.  By default, diffpy.srmise uses a
    # maximum width of 0.7, which is generally reasonable if the r-axis of the
    # PDF is given in angstroms.  Models where many peaks reach the maximum
    # width, and models that are very sensitive to the choice in maximum width,
    # are strong signs that diffpy.srmise is having difficulty finding peaks
    # which are sufficiently constrained by the data.
    pf = GaussianOverR(0.7)
    kwds["pf"] = [pf] # Despite the list, only one entry is currently supported.

    ## qmax
    # PDFs typically report the value of qmax (i.e. the maximum momentum
    # transfer q in the measurement), but it can be specified explicitly also.
    # If the PDF does not report qmax, diffpy.srmise attempts to estimate it
    # directly from the data.  This estimate can also be used by setting qmax
    # to "automatic".  An infinite qmax can be specified by setting qmax to 0,
    # In that case the Nyquist rate is 0 (infinite resolution), and
    # diffpy.srmise does not consider Nyquist sampling or termination effects.
    kwds["qmax"] = 26.0 

    ## nyquist
    # This parameter governs whether diffpy.srmise attempts to find a model
    # on a Nyquist-sampled grid with dr=pi/qmax, which is a grid where data
    # uncertainties are least correlated without loss of information.  By
    # default this parameter is True whenever qmax > 0, and generally it
    # should not need to be changed.  Setting it to False allows extracted
    # models retain more complexity because the data appear to have more
    # statistically independent points than they truly do.  For a detailed
    # discussion of Nyquist sampling and the PDF see:
    # [4] Farrow et al. (2011). Physical Review B, 84(13), 134105.
    #     doi:10.1103/PhysRevB.84.134105
    kwds["nyquist"] = True
    
    ## supersample
    # This parameter dictates the data be oversampled by at least this factor
    # (relative to the Nyquist rate) during the early stages of peak
    # extraction. If the input PDF is even more finely sampled, that level of
    # sampling is used instead.  The default value of 4.0 is ad hoc, but has
    # been empirically sufficient.  Increasing this value may help the peak-
    # finding and clustering process, but reduces speed.
    kwds["supersample"] = 4.0
    
    ## cres
    # The cres (clustering resolution) parameter governs the sensitivity of the
    # clustering method used by diffpy.srmise.  In short, when the data are
    # being clustered, data which are further than the clustering resolution
    # from any other cluster (measured along the r-axis) are considered to be a
    # new cluster rather than a member of an existing one.  The default value
    # is the Nyquist sampling interval pi/qmax, and on most data it should not
    # greatly impact model complexity.  In some cases making it smaller may
    # help the peak-finding process.  Here it is roughly half the Nyquist
    # interval.
    kwds["cres"] = 0.05
    
    # Apply peak extraction parameters.
    ppe.setvars(**kwds)

    ## initial_peaks
    # Initial peaks are peaks which are kept fixed during the early stages of
    # peak extraction, effectively condition results upon their values.  Since
    # initial peaks are sometimes dependent on other SrMise parameters (e.g.
    # the peak function used) it is good practice to set them after other
    # parameters.  Although the "initial_peaks" parameter can be set as with
    # the parameters above, SrMise provides helper functions to do so more
    # easily.  There are two basic ways to quickly specify initial peaks:
    # 1) Supplying the approximate position of the peak, and letting
    # diffpy.srmise estimate the peak parameters.
    # 2) Explicit specification of peak parameters.

    ## Initial peaks from approximate positions.
    # This routine estimates peak parameters by finding the peak-like cluster
    # containing the specified point.  It does not search for occluded peaks,
    # so works best on well-separated peaks.  It does, however, take any
    # existing initial peaks into account during estimation.
    positions = [2.0, 4.5]
    for p in positions:
        ppe.estimate_peak(p) # adds to initial_peaks

    ## Initial peaks from explicit parameters.
    # Adding initial peaks explicitly is similar to defining a baseline.
    # Namely, choosing a peak function and then actualizing it with given
    # parameters. For this example peaks are created from the same GaussianOverR
    # used during extraction, but one could use a different peak function from
    # diffpy.srmise.peaks if desired.  The peak parameters are given in terms
    # terms of position, width (fwhm), and area, and it is important to specify
    # that format is being used so they are correctly changed into the
    # internal parameterization.  Here two peaks are added in a region of
    # overlap, and the width parameter is fixed at a reasonable value to aid
    # convergence in this region.
    pars = [[6.2, 0.25, 2.6],[6.45, 0.25, 2.7],[7.15, 0.25, 5]]
    peaks = []
    for p in pars:
        peaks.append(pf.actualize(p, free=[True, False, True], in_format="pwa"))
    ppe.add_peaks(peaks) # adds to initial_peaks

    ## Initial peaks and pruning
    # While initial peaks condition what other peaks can be extracted, by
    # default they can also be pruned if a simpler model appears better.  To
    # prevent this, they can be set as non-removable.
    for ip in ppe.initial_peaks:
        ip.removable = False

    ## Plot initial parameters
    if plot:
        makeplot(ppe)
        plt.title("Initial Peaks")


    ###### Perform peak extraction
    ppe.extract()

    
    ## Save output
    # The write() method saves a file which preserves all aspects of peak
    # extraction and its results, by convention using the .srmise extension,
    # and which can later be read by diffpy.srmise.
    #
    # The writepwa() method saves a file intended as a human-readable summary.
    # In particular, it reports the position, width (as full-width
    # half-maximum), and area of of extracted peaks.  The reported values
    # are for Gaussians in the radial distribution function (RDF) corresponding
    # to this PDF.
    ppe.write("output/parameter_summary.srmise")
    ppe.writepwa("output/parameter_summary.pwa")

    ## Plot results.
    # Display plot of extracted peak.  It is also possible to plot an existing
    # .srmise file from the command line using
    #     srmise output/TiO2_parameterdetail.srmise --no-extract --plot
    if plot:
        plt.figure()
        makeplot(ppe)
        plt.show()
Esempio n. 6
0
def run(plot=True):

    # Suppress mundane output
    sml.setlevel("warning")

    ## Create multimodeling object and load diffpy.srmise results from file.
    ms = MultimodelSelection()
    ms.load("output/unknown_dG_models.dat")
    ms.loadaics("output/unknown_dG_aics.dat")

    ## Use Nyquist sampling
    # Standard AIC analysis assumes the data have independent uncertainties.
    # Nyquist sampling minimizes correlations in the PDF, which is the closest
    # approximation to independence possible for the PDF.
    dr = np.pi/ms.ppe.qmax
    (r,y,dr2,dy) = ms.ppe.resampledata(dr)

    ## Classify models
    # All models are placed into classes.  Models in the same class
    # should be essentially identical (same peak parameters, etc.)
    # up to a small tolerance determined by comparing individual peaks. The
    # best model in each class essentially stands in for all the other models
    # in a class in the rest of the analysis.  A tolerance of 0 indicates the
    # models must be exactly identical.  Increasing the tolerance allows
    # increasingly different models to be classified as "identical."  This step
    # reduces a major source of model redundancy, which otherwise weakens
    # AIC-based analysis.  As a rule of thumb, AIC-based analysis is robust
    # to redundant poor models (since they contribute very little to the Akaike
    # probabilities in any case), but redundant good models can significantly
    # alter how models are ranked.  See Granlund (2015) for details.
    tolerance = 0.2 
    ms.classify(r, tolerance)

    ## Summarize various facts about the analysis.
    num_models = len(ms.results)
    num_classes = len(ms.classes)
    print "------- Multimodeling Summary --------"
    print "Models: %i" %num_models
    print "Classes: %i (tol=%s)" %(num_classes, tolerance)
    print "Range of dgs: %f-%f" %(ms.dgs[0], ms.dgs[-1])
    print "Nyquist-sampled data points: %i" %len(r)

    ## Find "best" models.
    # In short, models with greatest Akaike probability.  Akaike probabilities
    # can only be validly compared if they were calculated for identical data,
    # namely identical PDF values *and* uncertainties, and are only reliable
    # with respect to the actual experiment when using a Nyquist-sampled PDF
    # with experimentally determined uncertainties.
    #
    # In the present case the PDF uncertainties are not reliable, and so the
    # analysis cannot be performed by specifying the experimental uncertainty
    # dG.  Instead, perform a weaker analysis, calculating the Akaike
    # probabilities for a range of assumed dG, and identifying classes which
    # have greatest probability at least once.  The classes identified in this
    # way have no particular information-theoretic relationship, but if the
    # actual experimental uncertainty is in the interval tested, the best
    # class at the experimental uncertainty is among them.

    # Get classes which are best for one or more dG, and the specific dG in that
    # interval at which they attain greatest Akaike probability.
    best_classes = np.unique([ms.get_class(dG) for dG in ms.dgs])
    best_dGs = []
    for cls in best_classes:
        cls_probs = [ms.get_prob(dG) if ms.get_class(dG) == cls else 0 \
            for dG in ms.dgs]
        dG = ms.dgs[np.argmax(cls_probs)]
        best_dGs.append(dG)

    print "\n--------- Best models for at least one dG ---------" %dG
    print "   Best dG  Model  Class  Free       AIC     Prob  File"
    for dG in best_dGs:

        ## Generate information about best model.
        # The get(dG, *args, **kwds) method returns a tuple of values
        # corresponding to string arguments for the best model in best class at
        # given dG. When the corder keyword is given it returns the model from
        # the corderth best class (where 0 is best, 1 is next best, etc.)
        # "model" -> index of model
        # "class" -> index of class
        # "nfree" -> number of free parameters in corresponding model
        # "aic" -> The AIC for this model given uncertainty dG
        # "prob" -> The AIC probability given uncertainty dG
        # These all have dedicated getter functions.
        (model, cls, nfree, aic, prob) = \
            ms.get(dG, "model", "class", "nfree", "aic", "prob")

        filename_base = "output/unknown_dG_m"+str(model)

        # Print info for this model
        print "%10.4e  %5i  %5i  %4i  %10.4e %6.3f  %s" \
            %(dG, model, cls, nfree, aic, prob, filename_base + ".pwa")

        # A message added as a comment to saved .pwa file.
        best_from = [dg for dg in ms.dgs if ms.get_class(dg) == cls]
        msg = ["Multimodeling Summary",
               "---------------------",
              "Model: %i (of %i)" %(model, num_models),
              "Class: %i (of %i, tol=%s)" %(cls, num_classes, tolerance),
              "Best model from dG: %s-%s" %(best_from[0], best_from[-1]),
              "Evaluated at dG: %s" %dG,
              "Akaike probability: %g" %prob]
        msg = "\n".join(msg)

        # Make this the active model
        ms.setcurrent(model)

        # Save .pwa
        ms.ppe.writepwa(filename_base + ".pwa", msg)

        # Plot this model
        if plot:
            plt.figure()
            makeplot(ms.ppe, dcif)
            plt.title("Model %i/Class %i (Best dG=%f, AIC prob=%f)" \
                %(model, cls, dG, prob))
            # Uncomment line below to save figures.
            # plt.savefig(filename_base + ".png", format="png")


    ## 3D plot of Akaike probabilities
    # This plot shows the Akaike probabilities of all classes as a function
    # of assumed uncertainty dG.  This gives a rough sense of how the models
    # selected by an AIC-based analysis would vary if the experimental
    # uncertainties contributing to the observed G(r) were different.  Models
    # are highlighted at the various dG values found above.
    if plot:
        plt.figure()
        ms.plot3dclassprobs(probfilter=[0.1, 1.], highlight=best_dGs)
        plt.tight_layout()
        # Uncomment line below to save figure.
        #plt.savefig("output/unknown_dG_probs.png", format="png", bbox_inches="tight")

    if plot:
        plt.show()
Esempio n. 7
0
def run(plot=True):

    ## Initialize peak extraction
    # Create peak extraction object
    ppe = PDFPeakExtraction()

    # Load the PDF from a file
    ppe.loadpdf("data/TiO2_fine_qmax26.gr")

    ###### Set up extraction parameters.
    # In this section we'll examine the major extraction parameters in detail.
    # diffpy.srmise strives to provide reasonable default values for these
    # parameters.  For normal use setting the range, baseline, and uncertainty
    # should be sufficient.
    kwds = {}

    ## Range
    # Range defaults to the entire PDF if not specified.
    kwds["rng"] = [1.5, 10.]

    ## dg
    # diffpy.srmise selects model complexity based primarily on the uncertainty
    # of the PDF.  Note that very small uncertainties (<1%) can make peak
    # extraction excessively slow.  In general, the smaller the uncertainty the
    # more complex the model.  PDFs which report no uncertainty, or report
    # unreliable values must be assigned one.  By default, a PDF which does not
    # report uncertainties uses 5% the maximum minus minimum values. Common
    # causes of unreliable uncertainties include oversampling (uncertainties in
    # nearby data are strongly correlated, as for this PDF) and/or
    # integrated diffraction patterns obtained by a method that also introduces
    # correlation to the 1D diffraction pattern.  Consequently, the assumption
    # of both least-squares fitting and the Akaike Information Criterion that
    # the data are at least approximately independently distributed is not
    # valid.  In this case results obtained by diffpy.srmise may be useful,
    # especially when they can be intrepreted in light of prior knowledge, but
    # strong statistical conclusions cannot be drawn.  For additional
    # discussion of this subtle yet important issue see:
    # [1] Egami and Billinge. (2012). Underneath the Bragg Peaks: Structural
    #     Analysis of Complex Materials (2nd ed.). Oxford: Pergamon Press.
    # [2] Granlund, et al. (2015) Acta Crystallographica A, 71(4), 392-409.
    #     doi:10.1107/S2053273315005276
    # [3] Yang, et al. (2014). Journal of Applied Crystallography, 47(4),
    #     1273-1283. doi:10.1107/S1600576714010516
    kwds["dg"] = 0.35  # Play with this value!

    ## baseline
    # As a crystal PDF, a linear baseline crossing the origin is appropriate.
    # Here we define the linear baseline B(r) = -.5*r + 0, and explicitly set
    # the y-intercept as a fixed parameter which will not be fit.  For
    # crystal PDFs the theoretical value of the slope is -4*pi*rho0, where
    # rho0 is the number density.  Nevertheless, imperfect normalization of the
    # PDF means the experimental baseline is proportional to that value.
    blfunc = Polynomial(degree=1)
    slope = -.65  # Play with this value!
    y_intercept = 0.
    kwds["baseline"] = blfunc.actualize([slope, y_intercept],
                                        free=[True, False])
    ## pf
    # The pf (peakfunction) parameter allows setting the shape of peaks to be
    # extracted.  Termination effects are added automatically to the peak
    # function during extraction.  In the harmonic approximation of atomic
    # interactions peaks in the PDF are well approximated by a Gaussian/r.
    # (Note, however, that the values used for peak parameters -- namely
    # position, width, and area -- are for the Gaussian itself).  diffpy.srmise
    # uses width-limited peaks to reduce the likelihood of extracting
    # unphysically wide peaks in regions of high overlap.  The parameter
    # indicates the max fwhm permitted.  By default, diffpy.srmise uses a
    # maximum width of 0.7, which is generally reasonable if the r-axis of the
    # PDF is given in angstroms.  Models where many peaks reach the maximum
    # width, and models that are very sensitive to the choice in maximum width,
    # are strong signs that diffpy.srmise is having difficulty finding peaks
    # which are sufficiently constrained by the data.
    pf = GaussianOverR(0.7)
    kwds["pf"] = [pf
                  ]  # Despite the list, only one entry is currently supported.

    ## qmax
    # PDFs typically report the value of qmax (i.e. the maximum momentum
    # transfer q in the measurement), but it can be specified explicitly also.
    # If the PDF does not report qmax, diffpy.srmise attempts to estimate it
    # directly from the data.  This estimate can also be used by setting qmax
    # to "automatic".  An infinite qmax can be specified by setting qmax to 0,
    # In that case the Nyquist rate is 0 (infinite resolution), and
    # diffpy.srmise does not consider Nyquist sampling or termination effects.
    kwds["qmax"] = 26.0

    ## nyquist
    # This parameter governs whether diffpy.srmise attempts to find a model
    # on a Nyquist-sampled grid with dr=pi/qmax, which is a grid where data
    # uncertainties are least correlated without loss of information.  By
    # default this parameter is True whenever qmax > 0, and generally it
    # should not need to be changed.  Setting it to False allows extracted
    # models retain more complexity because the data appear to have more
    # statistically independent points than they truly do.  For a detailed
    # discussion of Nyquist sampling and the PDF see:
    # [4] Farrow et al. (2011). Physical Review B, 84(13), 134105.
    #     doi:10.1103/PhysRevB.84.134105
    kwds["nyquist"] = True

    ## supersample
    # This parameter dictates the data be oversampled by at least this factor
    # (relative to the Nyquist rate) during the early stages of peak
    # extraction. If the input PDF is even more finely sampled, that level of
    # sampling is used instead.  The default value of 4.0 is ad hoc, but has
    # been empirically sufficient.  Increasing this value may help the peak-
    # finding and clustering process, but reduces speed.
    kwds["supersample"] = 4.0

    ## cres
    # The cres (clustering resolution) parameter governs the sensitivity of the
    # clustering method used by diffpy.srmise.  In short, when the data are
    # being clustered, data which are further than the clustering resolution
    # from any other cluster (measured along the r-axis) are considered to be a
    # new cluster rather than a member of an existing one.  The default value
    # is the Nyquist sampling interval pi/qmax, and on most data it should not
    # greatly impact model complexity.  In some cases making it smaller may
    # help the peak-finding process.  Here it is roughly half the Nyquist
    # interval.
    kwds["cres"] = 0.05

    # Apply peak extraction parameters.
    ppe.setvars(**kwds)

    ## initial_peaks
    # Initial peaks are peaks which are kept fixed during the early stages of
    # peak extraction, effectively condition results upon their values.  Since
    # initial peaks are sometimes dependent on other SrMise parameters (e.g.
    # the peak function used) it is good practice to set them after other
    # parameters.  Although the "initial_peaks" parameter can be set as with
    # the parameters above, SrMise provides helper functions to do so more
    # easily.  There are two basic ways to quickly specify initial peaks:
    # 1) Supplying the approximate position of the peak, and letting
    # diffpy.srmise estimate the peak parameters.
    # 2) Explicit specification of peak parameters.

    ## Initial peaks from approximate positions.
    # This routine estimates peak parameters by finding the peak-like cluster
    # containing the specified point.  It does not search for occluded peaks,
    # so works best on well-separated peaks.  It does, however, take any
    # existing initial peaks into account during estimation.
    positions = [2.0, 4.5]
    for p in positions:
        ppe.estimate_peak(p)  # adds to initial_peaks

    ## Initial peaks from explicit parameters.
    # Adding initial peaks explicitly is similar to defining a baseline.
    # Namely, choosing a peak function and then actualizing it with given
    # parameters. For this example peaks are created from the same GaussianOverR
    # used during extraction, but one could use a different peak function from
    # diffpy.srmise.peaks if desired.  The peak parameters are given in terms
    # terms of position, width (fwhm), and area, and it is important to specify
    # that format is being used so they are correctly changed into the
    # internal parameterization.  Here two peaks are added in a region of
    # overlap, and the width parameter is fixed at a reasonable value to aid
    # convergence in this region.
    pars = [[6.2, 0.25, 2.6], [6.45, 0.25, 2.7], [7.15, 0.25, 5]]
    peaks = []
    for p in pars:
        peaks.append(pf.actualize(p, free=[True, False, True],
                                  in_format="pwa"))
    ppe.add_peaks(peaks)  # adds to initial_peaks

    ## Initial peaks and pruning
    # While initial peaks condition what other peaks can be extracted, by
    # default they can also be pruned if a simpler model appears better.  To
    # prevent this, they can be set as non-removable.
    for ip in ppe.initial_peaks:
        ip.removable = False

    ## Plot initial parameters
    if plot:
        makeplot(ppe)
        plt.title("Initial Peaks")

    ###### Perform peak extraction
    ppe.extract()

    ## Save output
    # The write() method saves a file which preserves all aspects of peak
    # extraction and its results, by convention using the .srmise extension,
    # and which can later be read by diffpy.srmise.
    #
    # The writepwa() method saves a file intended as a human-readable summary.
    # In particular, it reports the position, width (as full-width
    # half-maximum), and area of of extracted peaks.  The reported values
    # are for Gaussians in the radial distribution function (RDF) corresponding
    # to this PDF.
    ppe.write("output/parameter_summary.srmise")
    ppe.writepwa("output/parameter_summary.pwa")

    ## Plot results.
    # Display plot of extracted peak.  It is also possible to plot an existing
    # .srmise file from the command line using
    #     srmise output/TiO2_parameterdetail.srmise --no-extract --plot
    if plot:
        plt.figure()
        makeplot(ppe)
        plt.show()
def run(plot=True):

    # Suppress mundane output
    sml.setlevel("warning")

    ## Create multimodeling object and load diffpy.srmise results from file.
    ms = MultimodelSelection()
    ms.load("output/known_dG_models.dat")
    ms.loadaics("output/known_dG_aics.dat")

    ## Use Nyquist sampling
    # Standard AIC analysis assumes the data have independent uncertainties.
    # Nyquist sampling minimizes correlations in the PDF, which is the closest
    # approximation to independence possible for the PDF.
    dr = np.pi/ms.ppe.qmax
    (r,y,dr2,dy) = ms.ppe.resampledata(dr)

    ## Classify models
    # All models are placed into classes.  Models in the same class
    # should be essentially identical (same peak parameters, etc.)
    # up to a small tolerance determined by comparing individual peaks. The
    # best model in each class essentially stands in for all the other models
    # in a class in the rest of the analysis.  A tolerance of 0 indicates the
    # models must be exactly identical.  Increasing the tolerance allows
    # increasingly different models to be classified as "identical."  This step
    # reduces a major source of model redundancy, which otherwise weakens
    # AIC-based analysis.  As a rule of thumb, AIC-based analysis is robust
    # to redundant poor models (since they contribute very little to the Akaike
    # probabilities in any case), but redundant good models can significantly
    # alter how models are ranked.  See Granlund (2015) for details.
    tolerance = 0.2 
    ms.classify(r, tolerance)

    ## Summarize various facts about the analysis.
    num_models = len(ms.results)
    num_classes = len(ms.classes)
    print "------- Multimodeling Summary --------"
    print "Models: %i" %num_models
    print "Classes: %i (tol=%s)" %(num_classes, tolerance)
    print "Range of dgs: %f-%f" %(ms.dgs[0], ms.dgs[-1])
    print "Nyquist-sampled data points: %i" %len(r)

    ## Get dG usable as key in analysis.
    # The Akaike probabilities were calculated for many assumed values of the
    # experimental uncertainty dG, and each of these assumed dG is used as a
    # key when obtaining the corresponding results.  Numerical precision can
    # make recalculating the exact value difficult, so the dg_key method returns
    # the key closest to its argument.
    dG = ms.dg_key(np.mean(ms.ppe.dy))

    ## Find "best" models.
    # In short, models with greatest Akaike probability.  Akaike probabilities
    # can only be validly compared if they were calculated for identical data,
    # namely identical PDF values *and* uncertainties, and are only reliable
    # with respect to the actual experiment when using a Nyquist-sampled PDF
    # with experimentally determined uncertainties.
    #
    # The present PDF satisifes these conditions, so the rankings below reflect
    # an AIC-based estimate of which of the tested models the data best support.
    print "\n--------- Model Rankings for dG = %f ---------" %dG
    print "Rank  Model  Class  Free         AIC   Prob  File"
    for i in range(len(ms.classes)):

        ## Generate information about best model in ith best class.
        # The get(dG, *args, **kwds) method returns a tuple of values
        # corresponding to string arguments for the best model in best class at
        # given dG. When the corder keyword is given it returns the model from
        # the corderth best class (where 0 is best, 1 is next best, etc.)
        # "model" -> index of model
        # "class" -> index of class
        # "nfree" -> number of free parameters in corresponding model
        # "aic" -> The AIC for this model given uncertainty dG
        # "prob" -> The AIC probability given uncertainty dG
        # These all have dedicated getter functions.  For example, the model
        # index can also be obtained using get_model(dG, corder=i)
        (model, cls, nfree, aic, prob) = \
            ms.get(dG, "model", "class", "nfree", "aic", "prob", corder=i)

        filename_base = "output/known_dG_m"+str(model)

        # Print info for this model
        print "%4i  %5i  %5i  %4i  %10.4e %6.3f  %s" \
            %(i+1, model, cls, nfree, aic, prob, filename_base + ".pwa")

        # A message added as a comment to saved .pwa file.
        msg = ["Multimodeling Summary",
               "---------------------",
              "Evaluated at dG: %s" %dG,
              "Model: %i (of %i)" %(model, num_models),
              "Class: %i (of %i, tol=%s)" %(cls, num_classes, tolerance),
              "Akaike probability: %g" %prob,
              "Rank: %i" %(i+1),]
        msg = "\n".join(msg)

        # Make this the active model
        ms.setcurrent(model)

        # Save .pwa
        ms.ppe.writepwa(filename_base + ".pwa", msg)

        # Plot this model
        if plot:
            plt.figure()
            makeplot(ms.ppe, dcif)
            plt.title("Model %i/Class %i (Rank %i, AIC prob=%f)" \
                %(model, cls, i+1, prob))
            # Uncomment line below to save figures.
            # plt.savefig(filename_base + ".png", format="png")


    ## 3D plot of Akaike probabilities
    # This plot shows the Akaike probabilities of all classes as a function
    # of assumed uncertainty dG.  This gives a rough sense of how the models
    # selected by an AIC-based analysis would vary if the experimental
    # uncertainties contributing to the observed G(r) were different.  The
    # Akaike probabilities calculated for the actual experimental uncertainty
    # are highlighted.
    if plot:
        plt.figure()
        ms.plot3dclassprobs(probfilter=[0.0, 1.], highlight=[dG])
        plt.tight_layout()
        # Uncomment line below to save figure.
        #plt.savefig("output/known_dG_probs.png", format="png", bbox_inches="tight")

    if plot:
        plt.show()
Esempio n. 9
0
        if options.pwafile is not None:
            try:
                ext.writepwa(options.pwafile)
            except SrMiseFileError, err:
                print err
                print "Could not save pwa summary to '%s'." %options.pwafile

        
        print ext
        if cov is not None:
            print cov

        if options.plot:
            from diffpy.srmise.applications.plot import makeplot
            makeplot(ext)
            plt.show()
        elif options.liveplot:
            plt.show()

def parsepars(mp, parseq):
    """Return actualized model from sequence of strings.

    Each item in parseq must be interpretable as a float, or as
    a float with the character 'c' appended.  If 'c' is appended,
    that parameter will be fixed.

    Parameters:
    mp - A ModelPart instance
    parseq - A sequence of string
    """
Esempio n. 10
0
def run(plot=True):

    ## Initialize peak extraction
    ppe = PDFPeakExtraction()
    ppe.loadpdf("data/C60_fine_qmax21.gr")

    ## Set up interpolated baseline.
    # The FromSequence baseline creates an interpolated baseline from provided
    # r and G(r) values, either two lists or a file containing (r, G(r)) pairs.
    # The baseline has no parameters. This particular baseline was estimated
    # by fitting interparticle correlations of an FCC lattice of hollow
    # spheres to the PDF.
    blf = FromSequence("data/C60baseline.dat")
    bl = blf.actualize([])

    ## Set up fitting parameters
    # A summary of how parameters impact fitting is given below.
    # "rng" - Same as peak extraction
    # "baseline" - Same as peak extraction
    # "qmax" and "nyquist" - If qmax > 0 and Nyquist is true, fitting is
    #                        performed on a Nyquist-sampled grid.  The data are
    #                        never supersampled first.
    # "dg" - Since the model to fit is prespecified, the uncertainty does not
    #        impact model complexity.  Impact on refined parameter values and
    #        estimated uncertainties as per standard chi-square fitting.
    # "pf" - The peak function used when estimating peak parameters given an
    #        approximate position.  Unike peak extraction, peak fitting never
    #        alters the peak function used by initial peaks.
    # "cres" - Estimation of peak parameters given an approximate position uses
    #          clustering for peak finding.  No other effect on peak fitting.
    # "supersample" - No effect.
    kwds = {}
    kwds["rng"] = [1., 7.25]
    kwds["baseline"] = bl
    kwds["cres"] = 0.05
    kwds["dg"] = 5000  # ad hoc, but gives each point equal weight in fit.
    ppe.setvars(**kwds)

    ## Set up termination ripples
    # Peak fitting never changes the peak function, so termination ripples
    # are not applied automatically as they are in peak extraction.
    # Termination ripples require setting the underlying peak function and qmax.
    # In this case they ared added to the default GaussianOverR peak function.
    # TerminationRipples use the estimation methods of the base peak function.
    pf = TerminationRipples(ppe.pf[0], ppe.qmax)
    ppe.setvars(pf=[pf])

    # Specify some initial peaks using approximate positions.  These use the
    # peak function passed to PDFPeakExtraction instance.
    rough_guess = [1.4, 2.4, 2.8, 3.6, 4.1, 4.5, 4.8, 5.2, 5.4, 5.7, 6.1]
    for g in rough_guess:
        ppe.estimate_peak(g)

    # Specify some peaks explicitly.  These may be constructed from any peak
    # function, or combination of peak functions.
    explicit_guess = [[6.7, .3, 100000], [7.0, .15, 50000]]
    explicit_peaks = Peaks([pf.actualize(eg, in_format="pwa") \
        for eg in explicit_guess])
    ppe.add_peaks(explicit_peaks)

    # Plot initial peaks
    if plot:
        makeplot(ppe)
        plt.title("Initial Peaks")

    # Perform fit.
    ppe.fit()

    ## Save results
    ppe.write("output/fit_initial.srmise")
    ppe.writepwa("output/fit_initial.pwa")

    if plot:
        plt.figure()
        makeplot(ppe)
        plt.show()