def run(plot=True):

    ## Suppress mundane output
    # When running scripts, especially involving multiple trials, it can be
    # useful to suppress many of the diffpy.srmise messages.  Valid levels
    # include "debug", "info" (the default), "warning", "error", and
    # "critical."  See diffpy.srmise.srmiselog for more information.
    sml.setlevel("warning")

    ## Initialize peak extraction
    ppe = PDFPeakExtraction()
    ppe.loadpdf("data/C60_fine_qmax21.gr")

    ## Set up extraction parameters
    # The FromSequence baseline interpolates (r, G(r)) values read from a
    # specified file.  It has parameters.  This particular baseline was
    # calculated by approximating the C60 sample as a face-centered cubic
    # lattice of hollow spheres.
    blfunc = FromSequence("data/C60baseline.dat")
    kwds = {}
    kwds["rng"] = [1., 7.25]
    kwds["baseline"] = blfunc.actualize([])
    kwds["cres"] = 0.05
    ppe.setvars(**kwds)

    ## Create multimodel selection object.
    # The MultimodelSelection class keeps track of the results of peak
    # extraction as the assumed uncertainty dg is varied.
    ms = MultimodelSelection()
    ms.setppe(ppe)

    ## Define range of dg values
    # For the purpose of illustration use 20 evenly-spaced values of dg where
    # 1% < dg < 10% of max gr value between r=1 and 7.25.
    grmax = np.max(ppe.y[ppe.getrangeslice()])
    dgs = np.linspace(.01 * grmax, .10 * grmax, 20)

    ## Perform peak extraction for each of the assumed uncertainties.
    ms.run(dgs)

    ## Save results
    # The file C60_models.dat saves the models generated above.  The file
    # C60_aics.dat saves the value of the AIC of each model when evaluated
    # on a Nyquist-sampled grid using each of the dg values used to generate
    # the models in the first place.
    dr = np.pi / ppe.qmax
    ms.save("output/unknown_dG_models.dat")
    ms.makeaics(dgs, dr, filename="output/unknown_dG_aics.dat")
def run(plot=True):

    ## Suppress mundane output
    # When running scripts, especially involving multiple trials, it can be
    # useful to suppress many of the diffpy.srmise messages.  Valid levels
    # include "debug", "info" (the default), "warning", "error", and
    # "critical."  See diffpy.srmise.srmiselog for more information.
    sml.setlevel("warning")

    ## Initialize peak extraction
    ppe = PDFPeakExtraction()
    ppe.loadpdf("data/C60_fine_qmax21.gr")

    ## Set up extraction parameters
    # The FromSequence baseline interpolates (r, G(r)) values read from a
    # specified file.  It has parameters.  This particular baseline was
    # calculated by approximating the C60 sample as a face-centered cubic
    # lattice of hollow spheres.
    blfunc = FromSequence("data/C60baseline.dat")
    kwds={}
    kwds["rng"] = [1., 7.25]
    kwds["baseline"] = blfunc.actualize([])
    kwds["cres"] = 0.05
    ppe.setvars(**kwds)

    ## Create multimodel selection object.
    # The MultimodelSelection class keeps track of the results of peak
    # extraction as the assumed uncertainty dg is varied.
    ms = MultimodelSelection()
    ms.setppe(ppe)

    ## Define range of dg values
    # For the purpose of illustration use 20 evenly-spaced values of dg where
    # 1% < dg < 10% of max gr value between r=1 and 7.25.
    grmax = np.max(ppe.y[ppe.getrangeslice()])
    dgs = np.linspace(.01*grmax, .10*grmax, 20)

    ## Perform peak extraction for each of the assumed uncertainties.
    ms.run(dgs)

    ## Save results
    # The file C60_models.dat saves the models generated above.  The file
    # C60_aics.dat saves the value of the AIC of each model when evaluated
    # on a Nyquist-sampled grid using each of the dg values used to generate
    # the models in the first place.
    dr = np.pi/ppe.qmax
    ms.save("output/unknown_dG_models.dat")
    ms.makeaics(dgs, dr, filename="output/unknown_dG_aics.dat")
def run(plot=True):

    ## Suppress mundane output
    # When running scripts, especially involving multiple trials, it can be
    # useful to suppress many of the diffpy.srmise messages.  Valid levels
    # include "debug", "info" (the default), "warning", "error", and
    # "critical."  See diffpy.srmise.srmiselog for more information.
    sml.setlevel("warning")

    ## Initialize peak extraction from saved trial
    ppe = PDFPeakExtraction()
    ppe.read("output/query_results.srmise")
    ppe.clearcalc()

    ## Set up extraction parameters
    # All parameters loaded from .srmise file.
    # Setting new values will override the previous values.
    kwds = {}
    kwds["rng"] = [10.9, 15]  # Region of PDF with some overlap.
    ppe.setvars(**kwds)

    ## Create multimodel selection object.
    # The MultimodelSelection class keeps track of the results of peak
    # extraction as the assumed uncertainty dg is varied.
    ms = MultimodelSelection()
    ms.setppe(ppe)

    ## Define range of dg values
    # For the purpose of illustration use 15 evenly-spaced values of dg where
    # 50% < dg < 120% of mean experimental dG in extraction range.
    dg_mean = np.mean(ppe.dy[ppe.getrangeslice()])
    dgs = np.linspace(.5 * dg_mean, 1.2 * dg_mean, 15)

    ## Perform peak extraction for each of the assumed uncertainties.
    ms.run(dgs)

    ## Save results
    # The file known_dG_models.dat saves the models generated above.  The file
    # known_dG_aics.dat saves the value of the AIC of each model when evaluated
    # on a Nyquist-sampled grid using each of the dg values used to generate
    # the models in the first place.
    dr = np.pi / ppe.qmax
    ms.save("output/known_dG_models.dat")
    ms.makeaics(dgs, dr, filename="output/known_dG_aics.dat")
def run(plot=True):

    ## Suppress mundane output
    # When running scripts, especially involving multiple trials, it can be
    # useful to suppress many of the diffpy.srmise messages.  Valid levels
    # include "debug", "info" (the default), "warning", "error", and
    # "critical."  See diffpy.srmise.srmiselog for more information.
    sml.setlevel("warning")

    ## Initialize peak extraction from saved trial
    ppe = PDFPeakExtraction()
    ppe.read("output/query_results.srmise")
    ppe.clearcalc()

    ## Set up extraction parameters
    # All parameters loaded from .srmise file.
    # Setting new values will override the previous values.
    kwds={}
    kwds["rng"] = [10.9, 15] # Region of PDF with some overlap.
    ppe.setvars(**kwds)

    ## Create multimodel selection object.
    # The MultimodelSelection class keeps track of the results of peak
    # extraction as the assumed uncertainty dg is varied.
    ms = MultimodelSelection()
    ms.setppe(ppe)

    ## Define range of dg values
    # For the purpose of illustration use 15 evenly-spaced values of dg where
    # 50% < dg < 120% of mean experimental dG in extraction range.
    dg_mean = np.mean(ppe.dy[ppe.getrangeslice()])
    dgs = np.linspace(.5*dg_mean, 1.2*dg_mean, 15)

    ## Perform peak extraction for each of the assumed uncertainties.
    ms.run(dgs)

    ## Save results
    # The file known_dG_models.dat saves the models generated above.  The file
    # known_dG_aics.dat saves the value of the AIC of each model when evaluated
    # on a Nyquist-sampled grid using each of the dg values used to generate
    # the models in the first place.
    dr = np.pi/ppe.qmax
    ms.save("output/known_dG_models.dat")
    ms.makeaics(dgs, dr, filename="output/known_dG_aics.dat")
Beispiel #5
0
def main():
    """Default SrMise entry-point."""

    usage = ("usage: %prog pdf_file [options]\n"
             "pdf_file is a file containing a PDF (accepts several "
             "common formats), or a .srmise file.")

    from diffpy.srmise import __version__
    version = "diffpy.srmise " + __version__

    descr = (
        "The SrMise package is a tool to aid extracting and fitting peaks "
        "that comprise a pair distribution function.  This script exposes "
        "basic peak extraction functionality. For many PDFs it is "
        "sufficient to specify the range, baseline, and sometimes an ad "
        "hoc uncertainty. See the discussion of these options below for "
        "further guidance.")

    epilog = (
        "Options set above override those from an existing .srmise "
        "file, as well as the usual defaults summarized here.\n\n"
        "Defaults (when qmax > 0)\n"
        "------------------------\n"
        "baseline - None (identically 0).\n"
        "dg - The uncertainty reported in the PDF (if any), otherwise "
        "5% of maximum value of PDF.\n"
        "nyquist - True\n"
        "range - All the data\n"
        "cres - The Nyquist rate.\n"
        "supersample - 4.0\n"
        "scale - (Deprecated) False\n\n"
        "Defaults (when qmax = 0)\n"
        "------------------------\n"
        "baseline - as above\n"
        "dg - as above\n"
        "nyquist - False (and no effect if True)\n"
        "range - as above\n"
        "cres - Four times the average distance between data points\n"
        "supersample - Parameter has no effect.\n"
        "scale - (Deprecated) False, and no effect if True\n\n"
        "Known issues\n"
        "------------\n"
        "1) Peak extraction works best when the data are moderately "
        "oversampled first.  When qmax > 0 this is handled "
        "automatically, but when qmax = 0 no resampling of any kind is "
        "performed.\n"
        "2) Peak extraction performed on a PDF file and a .srmise file "
        "derived from that data with identical extraction parameters "
        "can give different results even on the same platform.  This is "
        "because the original data may undergo some processing before it "
        "can be saved by SrMise.  For consistent results, always specify "
        "the original PDF, or always load the PDF from a .srmise file "
        "you save before performing any peak extraction on that data.\n"
        "3) Liveplotting depends on the matplotlib backend, and doesn't "
        "implement an idle handler, so interaction with its window will "
        "likely cause a freeze.")

    # TODO: Move to argparse (though not in 2.6 by default) to handle
    # variable-length options without callbacks.  Longterm, the major
    # value is using the same option to specify a baseline that should
    # use estimation vs. one that should use explicitly provided pars.
    parser = OptionParser(usage=usage,
                          description=descr,
                          epilog=epilog,
                          version=version,
                          formatter=IndentedHelpFormatterWithNL())

    parser.set_defaults(plot=False,
                        liveplot=False,
                        wait=False,
                        performextraction=True,
                        verbosity="warning")
    dg_defaults = {
        'absolute': None,
        'data': None,
        'max-fraction': .05,
        'ptp-fraction': .05,
        'dG-fraction': 1.
    }

    parser.add_option("--extract",
                      action="store_true",
                      dest="performextraction",
                      help="[Default] Perform extraction.")
    parser.add_option("--no-extract",
                      action="store_false",
                      dest="performextraction",
                      help="Do not perform extraction.")
    parser.add_option("--range",
                      nargs=2,
                      dest="rng",
                      type="float",
                      metavar="rmin rmax",
                      help="Extract over the range (rmin, rmax).")
    parser.add_option("--qmax",
                      dest="qmax",
                      type="string",
                      metavar="QMAX",
                      help="Model peaks with this maximum q value.")
    parser.add_option("--nyquist",
                      action="store_true",
                      dest="nyquist",
                      help="Use Nyquist resampling if qmax > 0.")
    parser.add_option("--no-nyquist",
                      action="store_false",
                      dest="nyquist",
                      help="Do not use Nyquist resampling.")
    parser.add_option("--pf",
                      dest="peakfunction",
                      metavar="PF",
                      help="Fit peak function PF defined in "
                      "diffpy.srmise.peaks, e.g. "
                      "'GaussianOverR(maxwidth=0.7)'")
    parser.add_option("--cres",
                      dest="cres",
                      type="float",
                      metavar="cres",
                      help="Clustering resolution.")
    parser.add_option("--supersample",
                      dest="supersample",
                      type="float",
                      metavar="SS",
                      help="Minimum initial oversampling rate as multiple of "
                      "Nyquist rate.")
    parser.add_option("--me",
                      "-m",
                      dest="modelevaluator",
                      metavar="ME",
                      help="ModelEvaluator defined in "
                      "diffpy.srmise.modelevaluators, e.g. 'AIC'")

    group = OptionGroup(
        parser, "Baseline Options",
        "SrMise cannot determine the appropriate type of "
        "baseline (e.g. crystalline vs. some nanoparticle) "
        "solely from the data, so the user should specify the "
        "appropriate type and/or parameters. (Default is "
        "identically 0, which is unphysical.) SrMise keeps the "
        "PDF baseline fixed at its initial value until the "
        "final stages of peak extraction, so results are "
        "frequently conditioned on that choice. (See the "
        "SrMise documentation for details.)  A good estimate "
        "is therefore important for best results.  SrMise can "
        "estimate initial parameters from the data for linear "
        "baselines in some situations (all peaks are positive, "
        "and the degree of overlap in the region of extraction "
        "is not too great), but in most cases it is best to "
        "provide reasonable initial parameters.  Run 'srmise "
        "pdf_file.gr [baseline_option] --no-extract --plot' "
        "for different values of the parameters for rapid "
        "visual estimation.")
    group.add_option("--baseline",
                     dest="baseline",
                     metavar="BL",
                     help="Estimate baseline from baseline function BL "
                     "defined in diffpy.srmise.baselines, e.g. "
                     "'Polynomial(degree=1)'.  All parameters are free. "
                     "(Many POSIX shells attempt to interpret the "
                     "parentheses, and on these shells the option should "
                     "be surrounded by quotation marks.)")
    group.add_option("--bcrystal",
                     dest="bcrystal",
                     type="string",
                     metavar="rho0[c]",
                     help="Use linear baseline defined by crystal number "
                     "density rho0. Append 'c' to make parameter "
                     "constant. Equivalent to "
                     "'--bpoly1 -4*pi*rho0[c] 0c'.")
    group.add_option("--bsrmise",
                     dest="bsrmise",
                     type="string",
                     metavar="file",
                     help="Use baseline from specified .srmise file.")
    group.add_option("--bpoly0",
                     dest="bpoly0",
                     type="string",
                     metavar="a0[c]",
                     help="Use constant baseline given by y=a0. "
                     "Append 'c' to make parameter constant.")
    group.add_option("--bpoly1",
                     dest="bpoly1",
                     type="string",
                     nargs=2,
                     metavar="a1[c] a0[c]",
                     help="Use baseline given by y=a1*x + a0.  Append 'c' to "
                     "make parameter constant.")
    group.add_option("--bpoly2",
                     dest="bpoly2",
                     type="string",
                     nargs=3,
                     metavar="a2[c] a1[c] a0[c]",
                     help="Use baseline given by y=a2*x^2+a1*x + a0.  Append "
                     "'c' to make parameter constant.")
    group.add_option("--bseq",
                     dest="bseq",
                     type="string",
                     metavar="FILE",
                     help="Use baseline interpolated from x,y values in FILE. "
                     "This baseline has no free parameters.")
    group.add_option("--bspherical",
                     dest="bspherical",
                     type="string",
                     nargs=2,
                     metavar="s[c] r[c]",
                     help="Use spherical nanoparticle baseline with scale s "
                     "and radius r. Append 'c' to make parameter "
                     "constant.")
    parser.add_option_group(group)

    group = OptionGroup(
        parser, "Uncertainty Options",
        "Ideally a PDF reports the accurate experimentally "
        "determined uncertainty.  In practice, many PDFs "
        "report none, while for others the reported values "
        "are not necessarily reliable. (If in doubt, ask your "
        "friendly neighborhood diffraction expert!) Even when "
        "uncertainties are accurate, it can be "
        "pragmatically useful to see how the results of "
        "peak extraction change when assuming a different "
        "value.  Nevertheless, the primary determinant of "
        "model complexity in SrMise is the uncertainty, so an "
        "ad hoc uncertainty yields ad hoc model complexity. "
        "See the SrMise documentation for further discussion, "
        "including methods to mitigate this issue with "
        "multimodel selection.")
    group.add_option("--dg-mode",
                     dest="dg_mode",
                     type="choice",
                     choices=[
                         'absolute', 'data', 'max-fraction', 'ptp-fraction',
                         'dG-fraction'
                     ],
                     help="Define how values passed to '--dg' are treated. "
                     "Possible values are: \n"
                     "'absolute' - The actual uncertainty in the PDF.\n"
                     "'max-fraction' - Fraction of max value in PDF.\n"
                     "'ptp-fraction' - Fraction of max minus min value "
                     "in the PDF.\n"
                     "'dG-fraction' - Fraction of dG reported by PDF.\n"
                     "If '--dg' is specified but mode is not, then mode "
                     "ia absolute.  Otherwise, 'dG-fraction' is default "
                     "if the PDF reports uncertaintes, and 'max-fraction' "
                     "ia default if it does not.")
    group.add_option("--dg",
                     dest="dg",
                     type="float",
                     help="Perform extraction assuming uncertainty dg. "
                     "Defaults depend on --dg-mode as follows:\n"
                     "'absolute'=%s\n"
                     "'max-fraction'=%s\n"
                     "'ptp-fraction'=%s\n"
                     "'dG-fraction'=%s" %
                     (dg_defaults['absolute'], dg_defaults['max-fraction'],
                      dg_defaults['ptp-fraction'], dg_defaults['dG-fraction']))
    #    group.add_option("--multimodel", nargs=3, dest="multimodel", type="float",
    #                     metavar="dg_min dg_max n",
    #                     help="Generate n models from dg_min to dg_max (given by "
    #                          "--dg-mode) and perform multimodel analysis. "
    #                          "This overrides any value given for --dg")
    parser.add_option_group(group)

    group = OptionGroup(parser, "Saving and Plotting Options", "")
    group.add_option("--pwa",
                     dest="pwafile",
                     metavar="FILE",
                     help="Save summary of result to FILE (.pwa format).")
    group.add_option("--save",
                     dest="savefile",
                     metavar="FILE",
                     help="Save result of extraction to FILE (.srmise "
                     "format).")
    group.add_option("--plot",
                     "-p",
                     action="store_true",
                     dest="plot",
                     help="Plot extracted peaks.")
    group.add_option("--liveplot",
                     "-l",
                     action="store_true",
                     dest="liveplot",
                     help="(Experimental) Plot extracted peaks when fitting.")
    group.add_option("--wait",
                     "-w",
                     action="store_true",
                     dest="wait",
                     help="(Experimental) When using liveplot wait for user "
                     "after plotting.")
    parser.add_option_group(group)

    group = OptionGroup(parser, "Verbosity Options",
                        "Control detail printed to console.")
    group.add_option("--informative",
                     "-i",
                     action="store_const",
                     const="info",
                     dest="verbosity",
                     help="Summary of progress.")
    group.add_option("--quiet",
                     "-q",
                     action="store_const",
                     const="warning",
                     dest="verbosity",
                     help="[Default] Show minimal summary.")
    group.add_option("--silent",
                     "-s",
                     action="store_const",
                     const="critical",
                     dest="verbosity",
                     help="No non-critical output.")
    group.add_option("--verbose",
                     "-v",
                     action="store_const",
                     const="debug",
                     dest="verbosity",
                     help="Show verbose output.")
    parser.add_option_group(group)

    group = OptionGroup(parser, "Deprecated Options", "Not for general use.")
    group.add_option("--scale",
                     action="store_true",
                     dest="scale",
                     help="(Deprecated) Scale supersampled uncertainties by "
                     "sqrt(oversampling) in intermediate steps when "
                     "Nyquist sampling.")
    group.add_option("--no-scale",
                     action="store_false",
                     dest="scale",
                     help="(Deprecated) Never rescale uncertainties.")
    parser.add_option_group(group)

    (options, args) = parser.parse_args()

    if len(args) != 1:
        parser.error("Exactly one argument required. \n" + usage)

    from diffpy.srmise import srmiselog
    srmiselog.setlevel(options.verbosity)

    from diffpy.srmise.pdfpeakextraction import PDFPeakExtraction
    from diffpy.srmise.srmiseerrors import SrMiseDataFormatError, \
                                           SrMiseFileError

    if options.peakfunction is not None:
        from diffpy.srmise import peaks
        try:
            options.peakfunction = eval("peaks." + options.peakfunction)
        except Exception, err:
            print err
            print "Could not create peak function '%s'. Exiting." \
                  %options.peakfunction
            return
def run(plot=True):

    # Suppress mundane output
    sml.setlevel("warning")

    ## Create multimodeling object and load diffpy.srmise results from file.
    ms = MultimodelSelection()
    ms.load("output/known_dG_models.dat")
    ms.loadaics("output/known_dG_aics.dat")

    ## Use Nyquist sampling
    # Standard AIC analysis assumes the data have independent uncertainties.
    # Nyquist sampling minimizes correlations in the PDF, which is the closest
    # approximation to independence possible for the PDF.
    dr = np.pi / ms.ppe.qmax
    (r, y, dr2, dy) = ms.ppe.resampledata(dr)

    ## Classify models
    # All models are placed into classes.  Models in the same class
    # should be essentially identical (same peak parameters, etc.)
    # up to a small tolerance determined by comparing individual peaks. The
    # best model in each class essentially stands in for all the other models
    # in a class in the rest of the analysis.  A tolerance of 0 indicates the
    # models must be exactly identical.  Increasing the tolerance allows
    # increasingly different models to be classified as "identical."  This step
    # reduces a major source of model redundancy, which otherwise weakens
    # AIC-based analysis.  As a rule of thumb, AIC-based analysis is robust
    # to redundant poor models (since they contribute very little to the Akaike
    # probabilities in any case), but redundant good models can significantly
    # alter how models are ranked.  See Granlund (2015) for details.
    tolerance = 0.2
    ms.classify(r, tolerance)

    ## Summarize various facts about the analysis.
    num_models = len(ms.results)
    num_classes = len(ms.classes)
    print "------- Multimodeling Summary --------"
    print "Models: %i" % num_models
    print "Classes: %i (tol=%s)" % (num_classes, tolerance)
    print "Range of dgs: %f-%f" % (ms.dgs[0], ms.dgs[-1])
    print "Nyquist-sampled data points: %i" % len(r)

    ## Get dG usable as key in analysis.
    # The Akaike probabilities were calculated for many assumed values of the
    # experimental uncertainty dG, and each of these assumed dG is used as a
    # key when obtaining the corresponding results.  Numerical precision can
    # make recalculating the exact value difficult, so the dg_key method returns
    # the key closest to its argument.
    dG = ms.dg_key(np.mean(ms.ppe.dy))

    ## Find "best" models.
    # In short, models with greatest Akaike probability.  Akaike probabilities
    # can only be validly compared if they were calculated for identical data,
    # namely identical PDF values *and* uncertainties, and are only reliable
    # with respect to the actual experiment when using a Nyquist-sampled PDF
    # with experimentally determined uncertainties.
    #
    # The present PDF satisifes these conditions, so the rankings below reflect
    # an AIC-based estimate of which of the tested models the data best support.
    print "\n--------- Model Rankings for dG = %f ---------" % dG
    print "Rank  Model  Class  Free         AIC   Prob  File"
    for i in range(len(ms.classes)):

        ## Generate information about best model in ith best class.
        # The get(dG, *args, **kwds) method returns a tuple of values
        # corresponding to string arguments for the best model in best class at
        # given dG. When the corder keyword is given it returns the model from
        # the corderth best class (where 0 is best, 1 is next best, etc.)
        # "model" -> index of model
        # "class" -> index of class
        # "nfree" -> number of free parameters in corresponding model
        # "aic" -> The AIC for this model given uncertainty dG
        # "prob" -> The AIC probability given uncertainty dG
        # These all have dedicated getter functions.  For example, the model
        # index can also be obtained using get_model(dG, corder=i)
        (model, cls, nfree, aic, prob) = \
            ms.get(dG, "model", "class", "nfree", "aic", "prob", corder=i)

        filename_base = "output/known_dG_m" + str(model)

        # Print info for this model
        print "%4i  %5i  %5i  %4i  %10.4e %6.3f  %s" \
            %(i+1, model, cls, nfree, aic, prob, filename_base + ".pwa")

        # A message added as a comment to saved .pwa file.
        msg = [
            "Multimodeling Summary",
            "---------------------",
            "Evaluated at dG: %s" % dG,
            "Model: %i (of %i)" % (model, num_models),
            "Class: %i (of %i, tol=%s)" % (cls, num_classes, tolerance),
            "Akaike probability: %g" % prob,
            "Rank: %i" % (i + 1),
        ]
        msg = "\n".join(msg)

        # Make this the active model
        ms.setcurrent(model)

        # Save .pwa
        ms.ppe.writepwa(filename_base + ".pwa", msg)

        # Plot this model
        if plot:
            plt.figure()
            makeplot(ms.ppe, dcif)
            plt.title("Model %i/Class %i (Rank %i, AIC prob=%f)" \
                %(model, cls, i+1, prob))
            # Uncomment line below to save figures.
            # plt.savefig(filename_base + ".png", format="png")

    ## 3D plot of Akaike probabilities
    # This plot shows the Akaike probabilities of all classes as a function
    # of assumed uncertainty dG.  This gives a rough sense of how the models
    # selected by an AIC-based analysis would vary if the experimental
    # uncertainties contributing to the observed G(r) were different.  The
    # Akaike probabilities calculated for the actual experimental uncertainty
    # are highlighted.
    if plot:
        plt.figure()
        ms.plot3dclassprobs(probfilter=[0.0, 1.], highlight=[dG])
        plt.tight_layout()
        # Uncomment line below to save figure.
        #plt.savefig("output/known_dG_probs.png", format="png", bbox_inches="tight")

    if plot:
        plt.show()
Beispiel #7
0
def run(plot=True):

    # Suppress mundane output
    sml.setlevel("warning")

    ## Create multimodeling object and load diffpy.srmise results from file.
    ms = MultimodelSelection()
    ms.load("output/unknown_dG_models.dat")
    ms.loadaics("output/unknown_dG_aics.dat")

    ## Use Nyquist sampling
    # Standard AIC analysis assumes the data have independent uncertainties.
    # Nyquist sampling minimizes correlations in the PDF, which is the closest
    # approximation to independence possible for the PDF.
    dr = np.pi/ms.ppe.qmax
    (r,y,dr2,dy) = ms.ppe.resampledata(dr)

    ## Classify models
    # All models are placed into classes.  Models in the same class
    # should be essentially identical (same peak parameters, etc.)
    # up to a small tolerance determined by comparing individual peaks. The
    # best model in each class essentially stands in for all the other models
    # in a class in the rest of the analysis.  A tolerance of 0 indicates the
    # models must be exactly identical.  Increasing the tolerance allows
    # increasingly different models to be classified as "identical."  This step
    # reduces a major source of model redundancy, which otherwise weakens
    # AIC-based analysis.  As a rule of thumb, AIC-based analysis is robust
    # to redundant poor models (since they contribute very little to the Akaike
    # probabilities in any case), but redundant good models can significantly
    # alter how models are ranked.  See Granlund (2015) for details.
    tolerance = 0.2 
    ms.classify(r, tolerance)

    ## Summarize various facts about the analysis.
    num_models = len(ms.results)
    num_classes = len(ms.classes)
    print "------- Multimodeling Summary --------"
    print "Models: %i" %num_models
    print "Classes: %i (tol=%s)" %(num_classes, tolerance)
    print "Range of dgs: %f-%f" %(ms.dgs[0], ms.dgs[-1])
    print "Nyquist-sampled data points: %i" %len(r)

    ## Find "best" models.
    # In short, models with greatest Akaike probability.  Akaike probabilities
    # can only be validly compared if they were calculated for identical data,
    # namely identical PDF values *and* uncertainties, and are only reliable
    # with respect to the actual experiment when using a Nyquist-sampled PDF
    # with experimentally determined uncertainties.
    #
    # In the present case the PDF uncertainties are not reliable, and so the
    # analysis cannot be performed by specifying the experimental uncertainty
    # dG.  Instead, perform a weaker analysis, calculating the Akaike
    # probabilities for a range of assumed dG, and identifying classes which
    # have greatest probability at least once.  The classes identified in this
    # way have no particular information-theoretic relationship, but if the
    # actual experimental uncertainty is in the interval tested, the best
    # class at the experimental uncertainty is among them.

    # Get classes which are best for one or more dG, and the specific dG in that
    # interval at which they attain greatest Akaike probability.
    best_classes = np.unique([ms.get_class(dG) for dG in ms.dgs])
    best_dGs = []
    for cls in best_classes:
        cls_probs = [ms.get_prob(dG) if ms.get_class(dG) == cls else 0 \
            for dG in ms.dgs]
        dG = ms.dgs[np.argmax(cls_probs)]
        best_dGs.append(dG)

    print "\n--------- Best models for at least one dG ---------" %dG
    print "   Best dG  Model  Class  Free       AIC     Prob  File"
    for dG in best_dGs:

        ## Generate information about best model.
        # The get(dG, *args, **kwds) method returns a tuple of values
        # corresponding to string arguments for the best model in best class at
        # given dG. When the corder keyword is given it returns the model from
        # the corderth best class (where 0 is best, 1 is next best, etc.)
        # "model" -> index of model
        # "class" -> index of class
        # "nfree" -> number of free parameters in corresponding model
        # "aic" -> The AIC for this model given uncertainty dG
        # "prob" -> The AIC probability given uncertainty dG
        # These all have dedicated getter functions.
        (model, cls, nfree, aic, prob) = \
            ms.get(dG, "model", "class", "nfree", "aic", "prob")

        filename_base = "output/unknown_dG_m"+str(model)

        # Print info for this model
        print "%10.4e  %5i  %5i  %4i  %10.4e %6.3f  %s" \
            %(dG, model, cls, nfree, aic, prob, filename_base + ".pwa")

        # A message added as a comment to saved .pwa file.
        best_from = [dg for dg in ms.dgs if ms.get_class(dg) == cls]
        msg = ["Multimodeling Summary",
               "---------------------",
              "Model: %i (of %i)" %(model, num_models),
              "Class: %i (of %i, tol=%s)" %(cls, num_classes, tolerance),
              "Best model from dG: %s-%s" %(best_from[0], best_from[-1]),
              "Evaluated at dG: %s" %dG,
              "Akaike probability: %g" %prob]
        msg = "\n".join(msg)

        # Make this the active model
        ms.setcurrent(model)

        # Save .pwa
        ms.ppe.writepwa(filename_base + ".pwa", msg)

        # Plot this model
        if plot:
            plt.figure()
            makeplot(ms.ppe, dcif)
            plt.title("Model %i/Class %i (Best dG=%f, AIC prob=%f)" \
                %(model, cls, dG, prob))
            # Uncomment line below to save figures.
            # plt.savefig(filename_base + ".png", format="png")


    ## 3D plot of Akaike probabilities
    # This plot shows the Akaike probabilities of all classes as a function
    # of assumed uncertainty dG.  This gives a rough sense of how the models
    # selected by an AIC-based analysis would vary if the experimental
    # uncertainties contributing to the observed G(r) were different.  Models
    # are highlighted at the various dG values found above.
    if plot:
        plt.figure()
        ms.plot3dclassprobs(probfilter=[0.1, 1.], highlight=best_dGs)
        plt.tight_layout()
        # Uncomment line below to save figure.
        #plt.savefig("output/unknown_dG_probs.png", format="png", bbox_inches="tight")

    if plot:
        plt.show()
def run(plot=True):

    # Suppress mundane output
    sml.setlevel("warning")

    ## Create multimodeling object and load diffpy.srmise results from file.
    ms = MultimodelSelection()
    ms.load("output/known_dG_models.dat")
    ms.loadaics("output/known_dG_aics.dat")

    ## Use Nyquist sampling
    # Standard AIC analysis assumes the data have independent uncertainties.
    # Nyquist sampling minimizes correlations in the PDF, which is the closest
    # approximation to independence possible for the PDF.
    dr = np.pi/ms.ppe.qmax
    (r,y,dr2,dy) = ms.ppe.resampledata(dr)

    ## Classify models
    # All models are placed into classes.  Models in the same class
    # should be essentially identical (same peak parameters, etc.)
    # up to a small tolerance determined by comparing individual peaks. The
    # best model in each class essentially stands in for all the other models
    # in a class in the rest of the analysis.  A tolerance of 0 indicates the
    # models must be exactly identical.  Increasing the tolerance allows
    # increasingly different models to be classified as "identical."  This step
    # reduces a major source of model redundancy, which otherwise weakens
    # AIC-based analysis.  As a rule of thumb, AIC-based analysis is robust
    # to redundant poor models (since they contribute very little to the Akaike
    # probabilities in any case), but redundant good models can significantly
    # alter how models are ranked.  See Granlund (2015) for details.
    tolerance = 0.2 
    ms.classify(r, tolerance)

    ## Summarize various facts about the analysis.
    num_models = len(ms.results)
    num_classes = len(ms.classes)
    print "------- Multimodeling Summary --------"
    print "Models: %i" %num_models
    print "Classes: %i (tol=%s)" %(num_classes, tolerance)
    print "Range of dgs: %f-%f" %(ms.dgs[0], ms.dgs[-1])
    print "Nyquist-sampled data points: %i" %len(r)

    ## Get dG usable as key in analysis.
    # The Akaike probabilities were calculated for many assumed values of the
    # experimental uncertainty dG, and each of these assumed dG is used as a
    # key when obtaining the corresponding results.  Numerical precision can
    # make recalculating the exact value difficult, so the dg_key method returns
    # the key closest to its argument.
    dG = ms.dg_key(np.mean(ms.ppe.dy))

    ## Find "best" models.
    # In short, models with greatest Akaike probability.  Akaike probabilities
    # can only be validly compared if they were calculated for identical data,
    # namely identical PDF values *and* uncertainties, and are only reliable
    # with respect to the actual experiment when using a Nyquist-sampled PDF
    # with experimentally determined uncertainties.
    #
    # The present PDF satisifes these conditions, so the rankings below reflect
    # an AIC-based estimate of which of the tested models the data best support.
    print "\n--------- Model Rankings for dG = %f ---------" %dG
    print "Rank  Model  Class  Free         AIC   Prob  File"
    for i in range(len(ms.classes)):

        ## Generate information about best model in ith best class.
        # The get(dG, *args, **kwds) method returns a tuple of values
        # corresponding to string arguments for the best model in best class at
        # given dG. When the corder keyword is given it returns the model from
        # the corderth best class (where 0 is best, 1 is next best, etc.)
        # "model" -> index of model
        # "class" -> index of class
        # "nfree" -> number of free parameters in corresponding model
        # "aic" -> The AIC for this model given uncertainty dG
        # "prob" -> The AIC probability given uncertainty dG
        # These all have dedicated getter functions.  For example, the model
        # index can also be obtained using get_model(dG, corder=i)
        (model, cls, nfree, aic, prob) = \
            ms.get(dG, "model", "class", "nfree", "aic", "prob", corder=i)

        filename_base = "output/known_dG_m"+str(model)

        # Print info for this model
        print "%4i  %5i  %5i  %4i  %10.4e %6.3f  %s" \
            %(i+1, model, cls, nfree, aic, prob, filename_base + ".pwa")

        # A message added as a comment to saved .pwa file.
        msg = ["Multimodeling Summary",
               "---------------------",
              "Evaluated at dG: %s" %dG,
              "Model: %i (of %i)" %(model, num_models),
              "Class: %i (of %i, tol=%s)" %(cls, num_classes, tolerance),
              "Akaike probability: %g" %prob,
              "Rank: %i" %(i+1),]
        msg = "\n".join(msg)

        # Make this the active model
        ms.setcurrent(model)

        # Save .pwa
        ms.ppe.writepwa(filename_base + ".pwa", msg)

        # Plot this model
        if plot:
            plt.figure()
            makeplot(ms.ppe, dcif)
            plt.title("Model %i/Class %i (Rank %i, AIC prob=%f)" \
                %(model, cls, i+1, prob))
            # Uncomment line below to save figures.
            # plt.savefig(filename_base + ".png", format="png")


    ## 3D plot of Akaike probabilities
    # This plot shows the Akaike probabilities of all classes as a function
    # of assumed uncertainty dG.  This gives a rough sense of how the models
    # selected by an AIC-based analysis would vary if the experimental
    # uncertainties contributing to the observed G(r) were different.  The
    # Akaike probabilities calculated for the actual experimental uncertainty
    # are highlighted.
    if plot:
        plt.figure()
        ms.plot3dclassprobs(probfilter=[0.0, 1.], highlight=[dG])
        plt.tight_layout()
        # Uncomment line below to save figure.
        #plt.savefig("output/known_dG_probs.png", format="png", bbox_inches="tight")

    if plot:
        plt.show()
Beispiel #9
0
def main():
    """Default SrMise entry-point."""

    usage = ("usage: %prog pdf_file [options]\n"
             "pdf_file is a file containing a PDF (accepts several "
             "common formats), or a .srmise file.")

    from diffpy.srmise import __version__
    version = "diffpy.srmise "+__version__

    descr = ("The SrMise package is a tool to aid extracting and fitting peaks "
             "that comprise a pair distribution function.  This script exposes "
             "basic peak extraction functionality. For many PDFs it is "
              "sufficient to specify the range, baseline, and sometimes an ad "
              "hoc uncertainty. See the discussion of these options below for "
              "further guidance.")

    epilog = ("Options set above override those from an existing .srmise "
              "file, as well as the usual defaults summarized here.\n\n"
              "Defaults (when qmax > 0)\n"
              "------------------------\n"
              "baseline - None (identically 0).\n"
              "dg - The uncertainty reported in the PDF (if any), otherwise "
              "5% of maximum value of PDF.\n"
              "nyquist - True\n"
              "range - All the data\n"
              "cres - The Nyquist rate.\n"
              "supersample - 4.0\n"
              "scale - (Deprecated) False\n\n"
              "Defaults (when qmax = 0)\n"
              "------------------------\n"
              "baseline - as above\n"
              "dg - as above\n"
              "nyquist - False (and no effect if True)\n"
              "range - as above\n"
              "cres - Four times the average distance between data points\n"
              "supersample - Parameter has no effect.\n"
              "scale - (Deprecated) False, and no effect if True\n\n"
              "Known issues\n"
              "------------\n"
              "1) Peak extraction works best when the data are moderately "
              "oversampled first.  When qmax > 0 this is handled "
              "automatically, but when qmax = 0 no resampling of any kind is "
              "performed.\n"
              "2) Peak extraction performed on a PDF file and a .srmise file "
              "derived from that data with identical extraction parameters "
              "can give different results even on the same platform.  This is "
              "because the original data may undergo some processing before it "
              "can be saved by SrMise.  For consistent results, always specify "
              "the original PDF, or always load the PDF from a .srmise file "
              "you save before performing any peak extraction on that data.\n"
              "3) Liveplotting depends on the matplotlib backend, and doesn't "
              "implement an idle handler, so interaction with its window will "
              "likely cause a freeze.")

    # TODO: Move to argparse (though not in 2.6 by default) to handle
    # variable-length options without callbacks.  Longterm, the major
    # value is using the same option to specify a baseline that should
    # use estimation vs. one that should use explicitly provided pars.
    parser = OptionParser(usage=usage, description=descr, epilog=epilog,
                          version=version,
                          formatter=IndentedHelpFormatterWithNL())

    parser.set_defaults(plot=False, liveplot=False, wait=False, 
                        performextraction=True, verbosity="warning")
    dg_defaults = {'absolute':None, 'data':None, 'max-fraction':.05,
                   'ptp-fraction':.05, 'dG-fraction':1.}

    parser.add_option("--extract", action="store_true",
                      dest="performextraction",
                      help="[Default] Perform extraction.")
    parser.add_option("--no-extract", action="store_false",
                      dest="performextraction",
                      help="Do not perform extraction.")
    parser.add_option("--range", nargs=2, dest="rng", type="float",
                      metavar="rmin rmax",
                      help="Extract over the range (rmin, rmax).")
    parser.add_option("--qmax", dest="qmax", type="string", metavar="QMAX",
                      help="Model peaks with this maximum q value.")
    parser.add_option("--nyquist", action="store_true", dest="nyquist",
                      help="Use Nyquist resampling if qmax > 0.")
    parser.add_option("--no-nyquist", action="store_false", dest="nyquist",
                      help="Do not use Nyquist resampling.")
    parser.add_option("--pf", dest="peakfunction", metavar="PF",
                      help="Fit peak function PF defined in "
                           "diffpy.srmise.peaks, e.g. "
                           "'GaussianOverR(maxwidth=0.7)'")
    parser.add_option("--cres", dest="cres", type="float", metavar="cres",
                      help="Clustering resolution.")
    parser.add_option("--supersample", dest="supersample", type="float",
                      metavar="SS",
                      help="Minimum initial oversampling rate as multiple of "
                           "Nyquist rate.")
    parser.add_option("--me", "-m", dest="modelevaluator", metavar="ME",
                      help="ModelEvaluator defined in "
                           "diffpy.srmise.modelevaluators, e.g. 'AIC'")

    group = OptionGroup(parser, "Baseline Options",
                        "SrMise cannot determine the appropriate type of "
                        "baseline (e.g. crystalline vs. some nanoparticle) "
                        "solely from the data, so the user should specify the "
                        "appropriate type and/or parameters. (Default is "
                        "identically 0, which is unphysical.) SrMise keeps the "
                        "PDF baseline fixed at its initial value until the "
                        "final stages of peak extraction, so results are "
                        "frequently conditioned on that choice. (See the "
                        "SrMise documentation for details.)  A good estimate "
                        "is therefore important for best results.  SrMise can "
                        "estimate initial parameters from the data for linear "
                        "baselines in some situations (all peaks are positive, "
                        "and the degree of overlap in the region of extraction "
                        "is not too great), but in most cases it is best to "
                        "provide reasonable initial parameters.  Run 'srmise "
                        "pdf_file.gr [baseline_option] --no-extract --plot' "
                        "for different values of the parameters for rapid "
                        "visual estimation.")
    group.add_option("--baseline", dest="baseline", metavar="BL",
                     help="Estimate baseline from baseline function BL "
                          "defined in diffpy.srmise.baselines, e.g. "
                          "'Polynomial(degree=1)'.  All parameters are free. "
                          "(Many POSIX shells attempt to interpret the "
                          "parentheses, and on these shells the option should "
                          "be surrounded by quotation marks.)" )
    group.add_option("--bcrystal", dest="bcrystal", type="string",
                     metavar="rho0[c]",
                     help="Use linear baseline defined by crystal number "
                          "density rho0. Append 'c' to make parameter "
                          "constant. Equivalent to "
                          "'--bpoly1 -4*pi*rho0[c] 0c'.")
    group.add_option("--bsrmise", dest="bsrmise", type="string", metavar="file",
                      help="Use baseline from specified .srmise file.")
    group.add_option("--bpoly0", dest="bpoly0", type="string", metavar="a0[c]",
                      help="Use constant baseline given by y=a0. "
                           "Append 'c' to make parameter constant.")
    group.add_option("--bpoly1", dest="bpoly1", type="string", nargs=2,
                     metavar="a1[c] a0[c]",
                     help="Use baseline given by y=a1*x + a0.  Append 'c' to "
                          "make parameter constant.")
    group.add_option("--bpoly2", dest="bpoly2", type="string", nargs=3,
                     metavar="a2[c] a1[c] a0[c]",
                     help="Use baseline given by y=a2*x^2+a1*x + a0.  Append "
                          "'c' to make parameter constant.")
    group.add_option("--bseq", dest="bseq", type="string", metavar="FILE",
                      help="Use baseline interpolated from x,y values in FILE. "
                           "This baseline has no free parameters.")
    group.add_option("--bspherical", dest="bspherical", type="string", nargs=2,
                     metavar="s[c] r[c]",
                     help="Use spherical nanoparticle baseline with scale s "
                          "and radius r. Append 'c' to make parameter "
                          "constant.")
    parser.add_option_group(group)


    group = OptionGroup(parser, "Uncertainty Options",
                        "Ideally a PDF reports the accurate experimentally "
                        "determined uncertainty.  In practice, many PDFs "
                        "report none, while for others the reported values "
                        "are not necessarily reliable. (If in doubt, ask your "
                        "friendly neighborhood diffraction expert!) Even when "
                        "uncertainties are accurate, it can be "
                        "pragmatically useful to see how the results of "
                        "peak extraction change when assuming a different "
                        "value.  Nevertheless, the primary determinant of "
                        "model complexity in SrMise is the uncertainty, so an "
                        "ad hoc uncertainty yields ad hoc model complexity. "
                        "See the SrMise documentation for further discussion, "
                        "including methods to mitigate this issue with "
                        "multimodel selection.")
    group.add_option("--dg-mode", dest="dg_mode", type="choice",
                     choices=['absolute', 'data', 'max-fraction',
                              'ptp-fraction', 'dG-fraction'],
                     help="Define how values passed to '--dg' are treated. "
                          "Possible values are: \n"
                          "'absolute' - The actual uncertainty in the PDF.\n"
                          "'max-fraction' - Fraction of max value in PDF.\n"
                          "'ptp-fraction' - Fraction of max minus min value "
                          "in the PDF.\n"
                          "'dG-fraction' - Fraction of dG reported by PDF.\n"
                          "If '--dg' is specified but mode is not, then mode "
                          "ia absolute.  Otherwise, 'dG-fraction' is default "
                          "if the PDF reports uncertaintes, and 'max-fraction' "
                          "ia default if it does not.")
    group.add_option("--dg", dest="dg", type="float",
                     help="Perform extraction assuming uncertainty dg. "
                          "Defaults depend on --dg-mode as follows:\n"
                          "'absolute'=%s\n"
                          "'max-fraction'=%s\n"
                          "'ptp-fraction'=%s\n"
                          "'dG-fraction'=%s" %(dg_defaults['absolute'], 
                                               dg_defaults['max-fraction'],
                                               dg_defaults['ptp-fraction'],
                                               dg_defaults['dG-fraction']))
#    group.add_option("--multimodel", nargs=3, dest="multimodel", type="float",
#                     metavar="dg_min dg_max n",
#                     help="Generate n models from dg_min to dg_max (given by "
#                          "--dg-mode) and perform multimodel analysis. "
#                          "This overrides any value given for --dg")
    parser.add_option_group(group)


    group = OptionGroup(parser, "Saving and Plotting Options",
                        "")
    group.add_option("--pwa", dest="pwafile", metavar="FILE",
                      help="Save summary of result to FILE (.pwa format).")
    group.add_option("--save", dest="savefile", metavar="FILE",
                     help="Save result of extraction to FILE (.srmise "
                          "format).")
    group.add_option("--plot", "-p", action="store_true", dest="plot",
                      help="Plot extracted peaks.")
    group.add_option("--liveplot", "-l", action="store_true", dest="liveplot",
                      help="(Experimental) Plot extracted peaks when fitting.")
    group.add_option("--wait", "-w", action="store_true", dest="wait",
                      help="(Experimental) When using liveplot wait for user "
                           "after plotting.")
    parser.add_option_group(group)


    group = OptionGroup(parser, "Verbosity Options",
                        "Control detail printed to console.")
    group.add_option("--informative", "-i", action="store_const", const="info",
                     dest="verbosity",
                     help="Summary of progress.")
    group.add_option("--quiet", "-q", action="store_const", const="warning",
                     dest="verbosity",
                     help="[Default] Show minimal summary.")
    group.add_option("--silent", "-s", action="store_const", const="critical",
                     dest="verbosity",
                     help="No non-critical output.")
    group.add_option("--verbose", "-v", action="store_const", const="debug",
                     dest="verbosity",
                     help="Show verbose output.")
    parser.add_option_group(group)

    group = OptionGroup(parser, "Deprecated Options",
                        "Not for general use.")
    group.add_option("--scale", action="store_true", dest="scale",
                      help="(Deprecated) Scale supersampled uncertainties by "
                           "sqrt(oversampling) in intermediate steps when "
                            "Nyquist sampling.")
    group.add_option("--no-scale", action="store_false", dest="scale",
                      help="(Deprecated) Never rescale uncertainties.")
    parser.add_option_group(group)


    (options, args) = parser.parse_args()

    if len(args) != 1:
        parser.error("Exactly one argument required. \n"+usage)


    from diffpy.srmise import srmiselog
    srmiselog.setlevel(options.verbosity)

    from diffpy.srmise.pdfpeakextraction import PDFPeakExtraction
    from diffpy.srmise.srmiseerrors import SrMiseDataFormatError, \
                                           SrMiseFileError

    if options.peakfunction is not None:
        from diffpy.srmise import peaks
        try:
            options.peakfunction = eval("peaks."+options.peakfunction)
        except Exception, err:
            print err
            print "Could not create peak function '%s'. Exiting." \
                  %options.peakfunction
            return