def single_pH(filename, t0, ref_filename=None): """Perform fitting of Series in one file and calculate derived quantities. Parameters ---------- filename : file with peak height vs time data at one pH* ref_filename : file with SPHERE reference exchange rates at one pH Returns ------- DataFrame indexed by residue, with decay rate, error, and derived quantities for each residue """ data = nmrfn.parse_hx(filename, t0) fits = [fit_decay(data[res], a0=4e7, k0=1e-6) for res in data] fits = pd.concat(fits, axis=1, keys=data.columns).T if ref_filename is None: # Make a handy default. ref_filename = extract_pH(filename) + "_sphere" ref = nmrfn.sphere_file(ref_filename) R = 1.986e-3 # Universal gas constant in kcal K^(-1) mol^(-1) T = 298.0 # Temperature in K fits["dg"] = -R * T * np.log(fits["k_obs"] / ref) # Log here is natural log fits["dger"] = R * T * fits["k_error"] / fits["k_obs"] # fits['lpf'] = np.log10(ref/fits['k_obs']) # Log_10(protectionfactor) return fits
def hdx_simulation(f, t0="manual"): """Simulation of H-D exchange decay to assess accuracy of fitting. Generates data from exponential, adds random noise, fits noisy series. Prints results as a table to inspect visually. Parameters ========== f : string filename (Sparky rh table), provides realistic time values t0 : string, default 'manual' applies if timepoints in seconds are already encoded in the Sparky rh file columns; otherwise should be a timestamp in the format '%Y-%m-%d_%H:%M' """ t_series = nmrfn.parse_hx(f, t0) k_input_names = ( ["5e-05"] * 4 + ["1e-05"] * 4 + ["5e-06"] * 4 + ["1e-06"] * 4 + ["5e-07"] * 4 + ["1e-07"] * 4 + ["5e-08"] * 4 ) noise_scale_names = ["0.01", "0.05", "0.1", "0.2"] * 7 arrays = [k_input_names, noise_scale_names] tuples = zip(*arrays) index = pd.MultiIndex.from_tuples(tuples, names=["k_in", "noiz"]) columns = ["rat_avg", "k_avg", "ker_avg", "k_std", "er_rat", "dg_sd"] df = pd.DataFrame(columns=columns, index=index) k_in = {"5e-05": 5e-5, "1e-05": 1e-5, "5e-06": 5e-6, "1e-06": 1e-6, "5e-07": 5e-7, "1e-07": 1e-7, "5e-08": 5e-8} noise_in = {"0.01": 0.01, "0.05": 0.05, "0.1": 0.1, "0.2": 0.2} a = 7e7 for kkey, kval in k_in.items(): cln = nmrfn.decay(t_series, a, kval) # clean data for nkey, nval in noise_in.items(): ratios = [] k_fits = [] k_errs = [] while len(k_fits) < 1000: noisy = cln + np.random.normal(loc=0, scale=nval * a, size=len(cln)) ratios.append(noisy[-6:-1].mean() / noisy[0:4]) a0 = 4e7 k0 = 1e-6 popt, pcov = curve_fit(nmrfn.decay, xdata=t_series, ydata=noisy, p0=[a0, k0]) k_fits.append(popt[1]) k_errs.append(np.sqrt(pcov.diagonal()[1])) ratios = np.array(ratios) k_fits = np.array(k_fits) k_errs = np.array(k_errs) k_avg = k_fits.mean() k_er = k_errs.mean() k_sd = k_fits.std() RT = 1.986e-3 * 298.0 df.ix[kkey, nkey]["rat_avg"] = "{0:.2f}".format(ratios.mean()) df.ix[kkey, nkey]["k_avg"] = "{0:.2e}".format(k_avg) df.ix[kkey, nkey]["ker_avg"] = "{0:.2e}".format(k_er) df.ix[kkey, nkey]["er_rat"] = "{0:.2f}".format(k_er / k_avg) df.ix[kkey, nkey]["k_std"] = "{0:.2e}".format(k_sd) df.ix[kkey, nkey]["dg_sd"] = "{0:.2f}".format(RT * k_sd / k_avg) print(df)