Python std Examples, lib.statistics.std Python Examples

Example #1

0

Show file

File: spectrum.py Project: tlinnet/relax

def __errors_repl(subset=None, verbosity=0):
    """Calculate the errors for peak intensities from replicated spectra.

    @keyword subset:    The list of spectrum ID strings to restrict the analysis to.
    @type subset:       list of str
    @keyword verbosity: The amount of information to print.  The higher the value, the greater the verbosity.
    @type verbosity:    int
    """

    # Replicated spectra.
    repl = replicated_flags()

    # Are all spectra replicated?
    if False in list(repl.values()):
        all_repl = False
        print("All spectra replicated:  No.")
    else:
        all_repl = True
        print("All spectra replicated:  Yes.")

    # Initialise.
    if not hasattr(cdp, 'sigma_I'):
        cdp.sigma_I = {}
    if not hasattr(cdp, 'var_I'):
        cdp.var_I = {}

    # The subset.
    subset_flag = False
    if not subset:
        subset_flag = True
        subset = cdp.spectrum_ids

    # Loop over the spectra.
    for id in subset:
        # Skip non-replicated spectra.
        if not repl[id]:
            continue

        # Skip replicated spectra which already have been used.
        if id in cdp.var_I and cdp.var_I[id] != 0.0:
            continue

        # The replicated spectra.
        for j in range(len(cdp.replicates)):
            if id in cdp.replicates[j]:
                spectra = cdp.replicates[j]

        # Number of spectra.
        num_spectra = len(spectra)

        # Printout.
        print("\nReplicated spectra:  " + repr(spectra))
        if verbosity:
            print("%-20s%-20s" % ("Spin_ID", "SD"))

        # Calculate the mean value.
        count = 0
        for spin, spin_id in spin_loop(return_id=True):
            # Skip deselected spins.
            if not spin.select:
                continue

            # Skip and deselect spins which have no data.
            if not hasattr(spin, 'peak_intensity'):
                spin.select = False
                continue

            # Missing data.
            missing = False
            for j in range(num_spectra):
                if not spectra[j] in spin.peak_intensity:
                    missing = True
            if missing:
                continue

            # The peak intensities.
            values = []
            for j in range(num_spectra):
                values.append(spin.peak_intensity[spectra[j]])

            # The standard deviation.
            sd = std(values=values, dof=1)

            # Printout.
            if verbosity:
                print("%-20s%-20s" % (spin_id, sd))

            # Sum of variances (for average).
            if not id in cdp.var_I:
                cdp.var_I[id] = 0.0
            cdp.var_I[id] = cdp.var_I[id] + sd**2
            count = count + 1

        # No data catch.
        if not count:
            raise RelaxError(
                "No data is present, unable to calculate errors from replicated spectra."
            )

        # Average variance.
        cdp.var_I[id] = cdp.var_I[id] / float(count)

        # Set all spectra variances.
        for j in range(num_spectra):
            cdp.var_I[spectra[j]] = cdp.var_I[id]

        # Print out.
        print("Standard deviation:  %s" % sqrt(cdp.var_I[id]))

    # Average across all spectra if there are time points with a single spectrum.
    if not all_repl:
        # Print out.
        if subset_flag:
            print("\nVariance averaging over the spectra subset.")
        else:
            print("\nVariance averaging over all spectra.")

        # Initialise.
        var_I = 0.0
        num_dups = 0

        # Loop over all time points.
        for id in cdp.var_I:
            # Only use id's defined in subset
            if id not in subset:
                continue

            # Single spectrum (or extraordinarily accurate NMR spectra!).
            if cdp.var_I[id] == 0.0:
                continue

            # Sum and count.
            var_I = var_I + cdp.var_I[id]
            num_dups = num_dups + 1

        # Average value.
        var_I = var_I / float(num_dups)

        # Assign the average value to all time points.
        for id in subset:
            cdp.var_I[id] = var_I

        # Print out.
        print("Standard deviation for all spins:  " + repr(sqrt(var_I)))

    # Loop over the spectra.
    for id in cdp.var_I:
        # Create the standard deviation data structure.
        cdp.sigma_I[id] = sqrt(cdp.var_I[id])

    # Set the spin specific errors.
    for spin in spin_loop():
        # Skip deselected spins.
        if not spin.select:
            continue

        # Set the error.
        spin.peak_intensity_err = cdp.sigma_I

Example #2

0

Show file

def monte_carlo_error_analysis():
    """Function for calculating errors from the Monte Carlo simulations.

    The standard deviation formula used to calculate the errors is the square root of the
    bias-corrected variance, given by the formula::

                   __________________________
                  /   1
        sd  =    /  ----- * sum({Xi - Xav}^2)
               \/   n - 1

    where
        - n is the total number of simulations.
        - Xi is the parameter value for simulation i.
        - Xav is the mean parameter value for all simulations.
    """

    # Test if the current data pipe exists.
    check_pipe()

    # Test if simulations have been set up.
    if not hasattr(cdp, 'sim_state'):
        raise RelaxError("Monte Carlo simulations have not been set up.")

    # The specific analysis API object.
    api = return_api()

    # Loop over the models.
    for model_info in api.model_loop():
        # Get the selected simulation array.
        select_sim = api.sim_return_selected(model_info=model_info)

        # Loop over the parameters.
        index = 0
        while True:
            # Get the array of simulation parameters for the index.
            param_array = api.sim_return_param(index, model_info=model_info)

            # Break (no more parameters).
            if param_array == None:
                break

            # Handle dictionary type parameters.
            if isinstance(param_array[0], dict):
                # Initialise the standard deviation structure as a dictionary.
                sd = {}

                # Loop over each key.
                for key in param_array[0]:
                    # Create a list of the values for the current key.
                    data = []
                    for i in range(len(param_array)):
                        data.append(param_array[i][key])

                    # Calculate and store the SD.
                    sd[key] = statistics.std(values=data, skip=select_sim)

            # Handle list type parameters.
            elif isinstance(param_array[0], list):
                # Initialise the standard deviation structure as a list.
                sd = []

                # Loop over each element.
                for j in range(len(param_array[0])):
                    # Create a list of the values for the current key.
                    data = []
                    for i in range(len(param_array)):
                        data.append(param_array[i][j])

                    # Calculate and store the SD.
                    sd.append(statistics.std(values=data, skip=select_sim))

            # SD of simulation parameters with values (ie not None).
            elif param_array[0] != None:
                sd = statistics.std(values=param_array, skip=select_sim)

            # Simulation parameters with the value None.
            else:
                sd = None

            # Set the parameter error.
            api.set_error(index, sd, model_info=model_info)

            # Increment the parameter index.
            index = index + 1

    # Turn off the Monte Carlo simulation state, as the MC analysis is now finished.
    cdp.sim_state = False

Example #3

0

Show file

File: spectrum.py Project: pombredanne/relax

def __errors_repl(subset=None, verbosity=0):
    """Calculate the errors for peak intensities from replicated spectra.

    @keyword subset:    The list of spectrum ID strings to restrict the analysis to.
    @type subset:       list of str
    @keyword verbosity: The amount of information to print.  The higher the value, the greater the verbosity.
    @type verbosity:    int
    """

    # Replicated spectra.
    repl = replicated_flags()

    # Are all spectra replicated?
    if False in list(repl.values()):
        all_repl = False
        print("All spectra replicated:  No.")
    else:
        all_repl = True
        print("All spectra replicated:  Yes.")

    # Initialise.
    if not hasattr(cdp, 'sigma_I'):
        cdp.sigma_I = {}
    if not hasattr(cdp, 'var_I'):
        cdp.var_I = {}

    # The subset.
    subset_flag = False
    if not subset:
        subset_flag = True
        subset = cdp.spectrum_ids

    # Loop over the spectra.
    for id in subset:
        # Skip non-replicated spectra.
        if not repl[id]:
            continue

        # Skip replicated spectra which already have been used.
        if id in cdp.var_I and cdp.var_I[id] != 0.0:
            continue

        # The replicated spectra.
        for j in range(len(cdp.replicates)):
            if id in cdp.replicates[j]:
                spectra = cdp.replicates[j]

        # Number of spectra.
        num_spectra = len(spectra)

        # Printout.
        print("\nReplicated spectra:  " + repr(spectra))
        if verbosity:
            print("%-20s%-20s" % ("Spin_ID", "SD"))

        # Calculate the mean value.
        count = 0
        for spin, spin_id in spin_loop(return_id=True):
            # Skip deselected spins.
            if not spin.select:
                continue

            # Skip and deselect spins which have no data.
            if not hasattr(spin, 'peak_intensity'):
                spin.select = False
                continue

            # Missing data.
            missing = False
            for j in range(num_spectra):
                if not spectra[j] in spin.peak_intensity:
                    missing = True
            if missing:
                continue

            # The peak intensities.
            values = []
            for j in range(num_spectra):
                values.append(spin.peak_intensity[spectra[j]])

            # The standard deviation.
            sd = std(values=values, dof=1)

            # Printout.
            if verbosity:
                print("%-20s%-20s" % (spin_id, sd))

            # Sum of variances (for average).
            if not id in cdp.var_I:
                cdp.var_I[id] = 0.0
            cdp.var_I[id] = cdp.var_I[id] + sd**2
            count = count + 1

        # No data catch.
        if not count:
            raise RelaxError("No data is present, unable to calculate errors from replicated spectra.")

        # Average variance.
        cdp.var_I[id] = cdp.var_I[id] / float(count)

        # Set all spectra variances.
        for j in range(num_spectra):
            cdp.var_I[spectra[j]] = cdp.var_I[id]

        # Print out.
        print("Standard deviation:  %s" % sqrt(cdp.var_I[id]))


    # Average across all spectra if there are time points with a single spectrum.
    if not all_repl:
        # Print out.
        if subset_flag:
            print("\nVariance averaging over the spectra subset.")
        else:
            print("\nVariance averaging over all spectra.")

        # Initialise.
        var_I = 0.0
        num_dups = 0

        # Loop over all time points.
        for id in cdp.var_I:
            # Only use id's defined in subset
            if id not in subset:
                continue

            # Single spectrum (or extraordinarily accurate NMR spectra!).
            if cdp.var_I[id] == 0.0:
                continue

            # Sum and count.
            var_I = var_I + cdp.var_I[id]
            num_dups = num_dups + 1

        # Average value.
        var_I = var_I / float(num_dups)

        # Assign the average value to all time points.
        for id in subset:
            cdp.var_I[id] = var_I

        # Print out.
        print("Standard deviation for all spins:  " + repr(sqrt(var_I)))

    # Loop over the spectra.
    for id in cdp.var_I:
        # Create the standard deviation data structure.
        cdp.sigma_I[id] = sqrt(cdp.var_I[id])

    # Set the spin specific errors.
    for spin in spin_loop():
        # Skip deselected spins.
        if not spin.select:
            continue

        # Set the error.
        spin.peak_intensity_err = cdp.sigma_I

Example #4

0

Show file

File: simulation.py Project: pombredanne/relax

        # Calculate R2eff and store it.
        minimise.calculate()
        r2eff_indiv[j, i] = spin.r2eff['800.0_%.1f' % spin_lock[j]]

    # Randomise I0 once.
    int['ref'] = gauss(i0, ERR)

    # Calculate all R2eff and store them.
    minimise.calculate()
    for j in range(len(spin_lock)):
        r2eff_group[j, i] = spin.r2eff['800.0_%.1f' % spin_lock[j]]

# The errors.
for j in range(len(spin_lock)):
    sigma_r2eff_indiv[j] = std(r2eff_indiv[j])
    sigma_r2eff_group[j] = std(r2eff_group[j])

# Plot the data.
file = open('error_plot.agr', 'w')

# Header.
file.write("@with g0\n")
file.write("@    s0 legend  \"Full error formula\"\n")
file.write("@    s1 legend  \"Reduced error formula\"\n")
file.write("@    s2 legend  \"Bootstrap individual dispersion points\"\n")
file.write("@    s3 legend  \"Bootstrap group\"\n")

# The full error formula.
file.write("@target G0.S0\n@type xy\n")
for i in range(len(spin_lock)):