def load_expt(self): parsed = nmrpystar.parse(open(self.data_filename).read()) print(parsed.status) q = ChemicalShiftAnalyzer.find_assigned_shifts(parsed) x = pd.DataFrame(q.rows, columns=q.keys) x = x[[ "Atom_chem_shift.Seq_ID", "Atom_chem_shift.Atom_ID", "Atom_chem_shift.Val" ]] x.rename(columns={ "Atom_chem_shift.Seq_ID": "resSeq", "Atom_chem_shift.Atom_ID": "name", "Atom_chem_shift.Val": "value" }, inplace=True) # Need to make dtypes match to do eventual comparison. x["resSeq"] = x["resSeq"].astype('int') x["value"] = x["value"].astype('float') x["expt"] = "CS" x["system"] = self.identifier expt = x.set_index(["system", "expt", "resSeq", "name"]).value expt = pd.Series(expt.values, multi_index_to_str(expt.index), name="value") return expt
def parseSTR(STR_file): """Parse BMRB file into a python object""" my_STR_hash = hashlib.md5(open(STR_file, 'rb').read()).hexdigest() try: prev_STR_hash = open(".prevSTRhash").read().strip() if my_STR_hash == prev_STR_hash: print("STR file is the same as in the previous run") print("Skip parsing STR...") parsed = pickle.load(open(".prevSTR.pickle", 'rb')) return parsed except FileNotFoundError: pass with open(".prevSTRhash", "w") as prev_HASH: prev_HASH.write(my_STR_hash) star_file = open(STR_file) # open STR file myString = "" for line in star_file: # rean STR file into a string myString += line star_file.close() parsed = nmrpystar.parse(myString) # parsing -> parsed.value if parsed.status != 'success': # check if parsing was successful print('Error during STR parsing: ', parsed) raise SystemExit else: pickle.dump(parsed, open(".prevSTR.pickle", "wb")) return parsed
def load_expt(self): parsed = nmrpystar.parse(open(self.data_filename).read()) print(parsed.status) q = parsed.value.saves["coupling_constant_list_1"].loops[1] x = pd.DataFrame(q.rows, columns=q.keys) x = x[[ "Coupling_constant.Seq_ID_1", "Coupling_constant.Val", "Coupling_constant.Val_err" ]] x.rename(columns={ "Coupling_constant.Seq_ID_1": "resSeq", "Coupling_constant.Val": "value", "Coupling_constant.Val_err": "err" }, inplace=True) # Need to make dtypes match to do eventual comparison. x["resSeq"] = x["resSeq"].astype('int') x["value"] = x["value"].astype('float') x["expt"] = "3JHNHA" x["system"] = self.identifier expt = x.set_index(["system", "expt", "resSeq"]).value expt = pd.Series(expt.values, multi_index_to_str(expt.index)) return expt
def load_expt(self): parsed = nmrpystar.parse(open(self.data_filename).read()) print(parsed.status) q = parsed.value.saves["coupling_constant_list_1"].loops[1] x = pd.DataFrame(q.rows, columns=q.keys) x = x[["Coupling_constant.Seq_ID_1", "Coupling_constant.Val", "Coupling_constant.Val_err"]] x.rename(columns={"Coupling_constant.Seq_ID_1":"resSeq", "Coupling_constant.Val":"value", "Coupling_constant.Val_err":"err"}, inplace=True) # Need to make dtypes match to do eventual comparison. x["resSeq"] = x["resSeq"].astype('int') x["value"] = x["value"].astype('float') x["expt"] = "3JHNHA" x["system"] = self.identifier expt = x.set_index(["system", "expt", "resSeq"]).value expt = pd.Series(expt.values, multi_index_to_str(expt.index)) return expt
def load_expt(self): parsed = nmrpystar.parse(open(self.data_filename).read()) print(parsed.status) q = ChemicalShiftAnalyzer.find_assigned_shifts(parsed) x = pd.DataFrame(q.rows, columns=q.keys) x = x[["Atom_chem_shift.Seq_ID", "Atom_chem_shift.Atom_ID", "Atom_chem_shift.Val"]] x.rename(columns={"Atom_chem_shift.Seq_ID":"resSeq", "Atom_chem_shift.Atom_ID":"name", "Atom_chem_shift.Val":"value"}, inplace=True) # Need to make dtypes match to do eventual comparison. x["resSeq"] = x["resSeq"].astype('int') x["value"] = x["value"].astype('float') x["expt"] = "CS" x["system"] = self.identifier expt = x.set_index(["system", "expt", "resSeq", "name"]).value expt = pd.Series(expt.values, multi_index_to_str(expt.index), name="value") return expt
import pandas as pd import nmrpystar import mdtraj as md t = md.load("./1am7.dcd", top="./1am7_fixed.pdb") prediction = md.nmr.chemical_shifts_shiftx2(t).mean( 1) # Average over time dimensions parsed = nmrpystar.parse(open("./16664.str").read()) print(parsed.status) q = parsed.value.saves["assigned_chem_shift_list_1"].loops[1] x = pd.DataFrame(q.rows, columns=q.keys) x = x[[ "Atom_chem_shift.Seq_ID", "Atom_chem_shift.Atom_ID", "Atom_chem_shift.Val" ]] x.rename(columns={ "Atom_chem_shift.Seq_ID": "resSeq", "Atom_chem_shift.Atom_ID": "name", "Atom_chem_shift.Val": "value" }, inplace=True) # Need to make dtypes match to do eventual comparison. x["resSeq"] = x["resSeq"].astype('int') x["value"] = x["value"].astype('float') expt = x.set_index(["resSeq", "name"]).value delta = (expt - prediction).dropna()
import pandas as pd import nmrpystar bmrb_filename = "/home/kyleb/src/choderalab/ForcefieldData/nmr/2EVN/6338.str" parsed = nmrpystar.parse(open(bmrb_filename).read()) print(parsed.status) q = parsed.value.saves["assigned_chem_shift_list_1"].loops[1] x = pd.DataFrame(q.rows, columns=q.keys) x = x[["Atom_chem_shift.Seq_ID", "Atom_chem_shift.Atom_ID", "Atom_chem_shift.Val"]] x.rename(columns={"Atom_chem_shift.Seq_ID":"resSeq", "Atom_chem_shift.Atom_ID":"name", "Atom_chem_shift.Val":"value"}, inplace=True) # Need to make dtypes match to do eventual comparison. x["resSeq"] = x["resSeq"].astype('int') x["value"] = x["value"].astype('float') x["expt"] = "CS" x["system"] = self.identifier expt = x.set_index(["system", "expt", "resSeq", "name"]).value expt = pd.Series(expt.values, multi_index_to_str(expt.index))
parser = argparse.ArgumentParser() parser.add_argument( '-max', type=int, default=-1, help='Go up to a maximum frame (default is whole trajectory)') args, sys.argv = parser.parse_known_args(sys.argv) # Run compare_shifts.py [PDB] [XTC] [12345.str] t = md.load(sys.argv[2], top=sys.argv[1]) outfnm = sys.argv[4] # if os.path.exists(outfnm): sys.exit() parsed = nmrpystar.parse(open(sys.argv[3]).read()) print(parsed.status) q = parsed.value.saves["assigned_chem_shift_list_1"].loops[1] x = pd.DataFrame(q.rows, columns=q.keys) x = x[[ "Atom_chem_shift.Seq_ID", "Atom_chem_shift.Atom_ID", "Atom_chem_shift.Val" ]] x.rename(columns={ "Atom_chem_shift.Seq_ID": "resSeq", "Atom_chem_shift.Atom_ID": "name", "Atom_chem_shift.Val": "value" }, inplace=True) # Need to make dtypes match to do eventual comparison.
import pandas as pd import nmrpystar bmrb_filename = "/home/kyleb/src/choderalab/ForcefieldData/nmr/2EVN/6338.str" parsed = nmrpystar.parse(open(bmrb_filename).read()) print(parsed.status) q = parsed.value.saves["assigned_chem_shift_list_1"].loops[1] x = pd.DataFrame(q.rows, columns=q.keys) x = x[[ "Atom_chem_shift.Seq_ID", "Atom_chem_shift.Atom_ID", "Atom_chem_shift.Val" ]] x.rename(columns={ "Atom_chem_shift.Seq_ID": "resSeq", "Atom_chem_shift.Atom_ID": "name", "Atom_chem_shift.Val": "value" }, inplace=True) # Need to make dtypes match to do eventual comparison. x["resSeq"] = x["resSeq"].astype('int') x["value"] = x["value"].astype('float') x["expt"] = "CS" x["system"] = self.identifier expt = x.set_index(["system", "expt", "resSeq", "name"]).value expt = pd.Series(expt.values, multi_index_to_str(expt.index))
import pandas as pd import nmrpystar import mdtraj as md import argparse parser = argparse.ArgumentParser() parser.add_argument('-max', type=int, default=-1, help='Go up to a maximum frame (default is whole trajectory)') args, sys.argv = parser.parse_known_args(sys.argv) # Run compare_shifts.py [PDB] [XTC] [12345.str] t = md.load(sys.argv[2], top=sys.argv[1]) outfnm = sys.argv[4] # if os.path.exists(outfnm): sys.exit() parsed = nmrpystar.parse(open(sys.argv[3]).read()) print(parsed.status) q = parsed.value.saves["assigned_chem_shift_list_1"].loops[1] x = pd.DataFrame(q.rows, columns=q.keys) x = x[["Atom_chem_shift.Seq_ID", "Atom_chem_shift.Atom_ID", "Atom_chem_shift.Val"]] x.rename(columns={"Atom_chem_shift.Seq_ID":"resSeq", "Atom_chem_shift.Atom_ID":"name", "Atom_chem_shift.Val":"value"}, inplace=True) # Need to make dtypes match to do eventual comparison. x["resSeq"] = x["resSeq"].astype('int') x["value"] = x["value"].astype('float') expn = x.set_index(["resSeq", "name"]) expt = x.set_index(["resSeq", "name"]).value print "Doing ShiftX2 prediction."
import pandas as pd import nmrpystar import mdtraj as md t = md.load("/home/kyleb/dat/tmp/pro_0.xtc", top="/home/kyleb/dat/tmp/pro.pdb") parsed = nmrpystar.parse(open("./19127.str").read()) print(parsed.status) q = parsed.value.saves["coupling_constant_list_1"].loops[1] x = pd.DataFrame(q.rows, columns=q.keys) x = x[[ "Coupling_constant.Seq_ID_1", "Coupling_constant.Val", "Coupling_constant.Val_err" ]] x.rename(columns={ "Coupling_constant.Seq_ID_1": "resSeq", "Coupling_constant.Val": "value", "Coupling_constant.Val_err": "err" }, inplace=True) # Need to make dtypes match to do eventual comparison. x["resSeq"] = x["resSeq"].astype('int') x["value"] = x["value"].astype('float') expt = x.set_index(["resSeq"]).value top, bonds = t.top.to_dataframe() ind, values = md.compute_J3_HN_HA(t) prediction = pd.Series(values.mean(0), top.ix[ind[:, -1]].resSeq)
t0 = md.load(["./Trajectories_ff99sbnmr/1am7_%d.dcd" % i for i in range(10)], top="./1am7_fixed.pdb")[::stride] t1 = md.load(["./Trajectories/1am7_%d.dcd" % i for i in range(15)], top="./1am7_fixed.pdb")[::stride] #full_prediction0 = md.nmr.chemical_shifts_shiftx2(t0) #full_prediction1 = md.nmr.chemical_shifts_shiftx2(t1) #full_prediction0 = md.nmr.chemical_shifts_spartaplus(t0) #full_prediction1 = md.nmr.chemical_shifts_spartaplus(t1) full_prediction0 = md.nmr.chemical_shifts_ppm(t0) full_prediction1 = md.nmr.chemical_shifts_ppm(t1) parsed = nmrpystar.parse(open("./16664.str").read()) print(parsed.status) q = parsed.value.saves["assigned_chem_shift_list_1"].loops[1] x = pd.DataFrame(q.rows, columns=q.keys) x = x[["Atom_chem_shift.Seq_ID", "Atom_chem_shift.Atom_ID", "Atom_chem_shift.Val"]] x.rename(columns={"Atom_chem_shift.Seq_ID":"resSeq", "Atom_chem_shift.Atom_ID":"name", "Atom_chem_shift.Val":"value"}, inplace=True) # Need to make dtypes match to do eventual comparison. x["resSeq"] = x["resSeq"].astype('int') x["value"] = x["value"].astype('float') expt = x.set_index(["resSeq", "name"]).value prediction0 = full_prediction0.mean(1) # Average over time dimensions
import pandas as pd import nmrpystar import mdtraj as md t = md.load("/home/kyleb/dat/tmp/pro_0.xtc", top="/home/kyleb/dat/tmp/pro.pdb") parsed = nmrpystar.parse(open("./19127.str").read()) print(parsed.status) q = parsed.value.saves["coupling_constant_list_1"].loops[1] x = pd.DataFrame(q.rows, columns=q.keys) x = x[["Coupling_constant.Seq_ID_1", "Coupling_constant.Val", "Coupling_constant.Val_err"]] x.rename( columns={ "Coupling_constant.Seq_ID_1": "resSeq", "Coupling_constant.Val": "value", "Coupling_constant.Val_err": "err", }, inplace=True, ) # Need to make dtypes match to do eventual comparison. x["resSeq"] = x["resSeq"].astype("int") x["value"] = x["value"].astype("float") expt = x.set_index(["resSeq"]).value top, bonds = t.top.to_dataframe() ind, values = md.compute_J3_HN_HA(t) prediction = pd.Series(values.mean(0), top.ix[ind[:, -1]].resSeq)
import pandas as pd import nmrpystar import mdtraj as md t = md.load("./1d3z.dcd", top="./1d3z_frame0.pdb") prediction = md.nmr.chemical_shifts_shiftx2(t).mean(1) # Average over time dimensions parsed = nmrpystar.parse(open("./bmrb17439_v3.str").read()) print(parsed.status) q = parsed.value.saves["assigned_chem_shift_list_1"].loops[1] x = pd.DataFrame(q.rows, columns=q.keys) x = x[["Atom_chem_shift.Seq_ID", "Atom_chem_shift.Atom_ID", "Atom_chem_shift.Val"]] x.rename(columns={"Atom_chem_shift.Seq_ID":"resSeq", "Atom_chem_shift.Atom_ID":"name", "Atom_chem_shift.Val":"value"}, inplace=True) # Need to make dtypes match to do eventual comparison. x["resSeq"] = x["resSeq"].astype('int') x["value"] = x["value"].astype('float') expt = x.set_index(["resSeq", "name"]).value delta = (expt - prediction).dropna() delta.name = "value" rms = (delta ** 2.).reset_index().groupby("name").value.mean() ** 0.5