예제 #1
0
def single_pH(filename, t0, ref_filename=None):
    """Perform fitting of Series in one file and calculate derived quantities.
    
    Parameters
    ----------
    filename : file with peak height vs time data at one pH*
    ref_filename : file with SPHERE reference exchange rates at one pH
            
    Returns
    -------
    DataFrame indexed by residue, with decay rate, error, 
        and derived quantities for each residue
    """
    data = nmrfn.parse_hx(filename, t0)
    fits = [fit_decay(data[res], a0=4e7, k0=1e-6) for res in data]
    fits = pd.concat(fits, axis=1, keys=data.columns).T

    if ref_filename is None:
        # Make a handy default.
        ref_filename = extract_pH(filename) + "_sphere"
    ref = nmrfn.sphere_file(ref_filename)

    R = 1.986e-3  # Universal gas constant in kcal K^(-1) mol^(-1)
    T = 298.0  # Temperature in K

    fits["dg"] = -R * T * np.log(fits["k_obs"] / ref)  # Log here is natural log
    fits["dger"] = R * T * fits["k_error"] / fits["k_obs"]
    # fits['lpf'] = np.log10(ref/fits['k_obs']) # Log_10(protectionfactor)
    return fits
예제 #2
0
def hdx_simulation(f, t0="manual"):
    """Simulation of H-D exchange decay to assess accuracy of fitting.
    Generates data from exponential, adds random noise, fits noisy series.
    Prints results as a table to inspect visually.
    
    Parameters
    ==========
    f : string filename (Sparky rh table), provides realistic time values
    t0 : string, default 'manual' applies if timepoints in seconds are already
         encoded in the Sparky rh file columns; otherwise should be a timestamp
         in the format '%Y-%m-%d_%H:%M'
    """
    t_series = nmrfn.parse_hx(f, t0)

    k_input_names = (
        ["5e-05"] * 4 + ["1e-05"] * 4 + ["5e-06"] * 4 + ["1e-06"] * 4 + ["5e-07"] * 4 + ["1e-07"] * 4 + ["5e-08"] * 4
    )
    noise_scale_names = ["0.01", "0.05", "0.1", "0.2"] * 7
    arrays = [k_input_names, noise_scale_names]
    tuples = zip(*arrays)
    index = pd.MultiIndex.from_tuples(tuples, names=["k_in", "noiz"])
    columns = ["rat_avg", "k_avg", "ker_avg", "k_std", "er_rat", "dg_sd"]
    df = pd.DataFrame(columns=columns, index=index)

    k_in = {"5e-05": 5e-5, "1e-05": 1e-5, "5e-06": 5e-6, "1e-06": 1e-6, "5e-07": 5e-7, "1e-07": 1e-7, "5e-08": 5e-8}
    noise_in = {"0.01": 0.01, "0.05": 0.05, "0.1": 0.1, "0.2": 0.2}
    a = 7e7
    for kkey, kval in k_in.items():
        cln = nmrfn.decay(t_series, a, kval)  # clean data
        for nkey, nval in noise_in.items():
            ratios = []
            k_fits = []
            k_errs = []
            while len(k_fits) < 1000:
                noisy = cln + np.random.normal(loc=0, scale=nval * a, size=len(cln))
                ratios.append(noisy[-6:-1].mean() / noisy[0:4])

                a0 = 4e7
                k0 = 1e-6
                popt, pcov = curve_fit(nmrfn.decay, xdata=t_series, ydata=noisy, p0=[a0, k0])

                k_fits.append(popt[1])
                k_errs.append(np.sqrt(pcov.diagonal()[1]))

            ratios = np.array(ratios)
            k_fits = np.array(k_fits)
            k_errs = np.array(k_errs)

            k_avg = k_fits.mean()
            k_er = k_errs.mean()
            k_sd = k_fits.std()
            RT = 1.986e-3 * 298.0
            df.ix[kkey, nkey]["rat_avg"] = "{0:.2f}".format(ratios.mean())
            df.ix[kkey, nkey]["k_avg"] = "{0:.2e}".format(k_avg)
            df.ix[kkey, nkey]["ker_avg"] = "{0:.2e}".format(k_er)
            df.ix[kkey, nkey]["er_rat"] = "{0:.2f}".format(k_er / k_avg)
            df.ix[kkey, nkey]["k_std"] = "{0:.2e}".format(k_sd)
            df.ix[kkey, nkey]["dg_sd"] = "{0:.2f}".format(RT * k_sd / k_avg)

    print(df)