def load_expt(self):
        parsed = nmrpystar.parse(open(self.data_filename).read())
        print(parsed.status)

        q = ChemicalShiftAnalyzer.find_assigned_shifts(parsed)

        x = pd.DataFrame(q.rows, columns=q.keys)
        x = x[[
            "Atom_chem_shift.Seq_ID", "Atom_chem_shift.Atom_ID",
            "Atom_chem_shift.Val"
        ]]
        x.rename(columns={
            "Atom_chem_shift.Seq_ID": "resSeq",
            "Atom_chem_shift.Atom_ID": "name",
            "Atom_chem_shift.Val": "value"
        },
                 inplace=True)

        # Need to make dtypes match to do eventual comparison.
        x["resSeq"] = x["resSeq"].astype('int')
        x["value"] = x["value"].astype('float')
        x["expt"] = "CS"
        x["system"] = self.identifier

        expt = x.set_index(["system", "expt", "resSeq", "name"]).value

        expt = pd.Series(expt.values,
                         multi_index_to_str(expt.index),
                         name="value")

        return expt
Exemple #2
0
def parseSTR(STR_file):
    """Parse BMRB file into a python object"""

    my_STR_hash = hashlib.md5(open(STR_file, 'rb').read()).hexdigest()

    try:
        prev_STR_hash = open(".prevSTRhash").read().strip()

        if my_STR_hash == prev_STR_hash:
            print("STR file is the same as in the previous run")
            print("Skip parsing STR...")
            parsed = pickle.load(open(".prevSTR.pickle", 'rb'))
            return parsed

    except FileNotFoundError:
        pass

    with open(".prevSTRhash", "w") as prev_HASH:
        prev_HASH.write(my_STR_hash)

    star_file = open(STR_file)  # open STR file
    myString = ""

    for line in star_file:  # rean STR file into a string
        myString += line

    star_file.close()
    parsed = nmrpystar.parse(myString)  # parsing -> parsed.value

    if parsed.status != 'success':  # check if parsing was successful
        print('Error during STR parsing: ', parsed)
        raise SystemExit
    else:
        pickle.dump(parsed, open(".prevSTR.pickle", "wb"))
        return parsed
    def load_expt(self):
        parsed = nmrpystar.parse(open(self.data_filename).read())
        print(parsed.status)

        q = parsed.value.saves["coupling_constant_list_1"].loops[1]
        x = pd.DataFrame(q.rows, columns=q.keys)
        x = x[[
            "Coupling_constant.Seq_ID_1", "Coupling_constant.Val",
            "Coupling_constant.Val_err"
        ]]
        x.rename(columns={
            "Coupling_constant.Seq_ID_1": "resSeq",
            "Coupling_constant.Val": "value",
            "Coupling_constant.Val_err": "err"
        },
                 inplace=True)

        # Need to make dtypes match to do eventual comparison.
        x["resSeq"] = x["resSeq"].astype('int')
        x["value"] = x["value"].astype('float')
        x["expt"] = "3JHNHA"
        x["system"] = self.identifier

        expt = x.set_index(["system", "expt", "resSeq"]).value

        expt = pd.Series(expt.values, multi_index_to_str(expt.index))

        return expt
    def load_expt(self):
        parsed = nmrpystar.parse(open(self.data_filename).read())
        print(parsed.status)

        q = parsed.value.saves["coupling_constant_list_1"].loops[1]
        x = pd.DataFrame(q.rows, columns=q.keys)
        x = x[["Coupling_constant.Seq_ID_1", "Coupling_constant.Val", "Coupling_constant.Val_err"]]
        x.rename(columns={"Coupling_constant.Seq_ID_1":"resSeq", "Coupling_constant.Val":"value", "Coupling_constant.Val_err":"err"}, inplace=True)

        # Need to make dtypes match to do eventual comparison.
        x["resSeq"] = x["resSeq"].astype('int')
        x["value"] = x["value"].astype('float')
        x["expt"] = "3JHNHA"
        x["system"] = self.identifier

        expt = x.set_index(["system", "expt", "resSeq"]).value
        
        expt = pd.Series(expt.values, multi_index_to_str(expt.index))
        
        return expt
    def load_expt(self):
        parsed = nmrpystar.parse(open(self.data_filename).read())
        print(parsed.status)

        q = ChemicalShiftAnalyzer.find_assigned_shifts(parsed)
        
        x = pd.DataFrame(q.rows, columns=q.keys)
        x = x[["Atom_chem_shift.Seq_ID", "Atom_chem_shift.Atom_ID", "Atom_chem_shift.Val"]]
        x.rename(columns={"Atom_chem_shift.Seq_ID":"resSeq", "Atom_chem_shift.Atom_ID":"name", "Atom_chem_shift.Val":"value"}, inplace=True)

        # Need to make dtypes match to do eventual comparison.
        x["resSeq"] = x["resSeq"].astype('int')
        x["value"] = x["value"].astype('float')
        x["expt"] = "CS"
        x["system"] = self.identifier

        expt = x.set_index(["system", "expt", "resSeq", "name"]).value
        
        expt = pd.Series(expt.values, multi_index_to_str(expt.index), name="value")
        
        return expt
Exemple #6
0
import pandas as pd
import nmrpystar
import mdtraj as md

t = md.load("./1am7.dcd", top="./1am7_fixed.pdb")

prediction = md.nmr.chemical_shifts_shiftx2(t).mean(
    1)  # Average over time dimensions
parsed = nmrpystar.parse(open("./16664.str").read())
print(parsed.status)

q = parsed.value.saves["assigned_chem_shift_list_1"].loops[1]
x = pd.DataFrame(q.rows, columns=q.keys)
x = x[[
    "Atom_chem_shift.Seq_ID", "Atom_chem_shift.Atom_ID", "Atom_chem_shift.Val"
]]
x.rename(columns={
    "Atom_chem_shift.Seq_ID": "resSeq",
    "Atom_chem_shift.Atom_ID": "name",
    "Atom_chem_shift.Val": "value"
},
         inplace=True)

# Need to make dtypes match to do eventual comparison.
x["resSeq"] = x["resSeq"].astype('int')
x["value"] = x["value"].astype('float')

expt = x.set_index(["resSeq", "name"]).value

delta = (expt - prediction).dropna()
import pandas as pd
import nmrpystar

bmrb_filename = "/home/kyleb/src/choderalab/ForcefieldData/nmr/2EVN/6338.str"
parsed = nmrpystar.parse(open(bmrb_filename).read())
print(parsed.status)

q = parsed.value.saves["assigned_chem_shift_list_1"].loops[1]
x = pd.DataFrame(q.rows, columns=q.keys)
x = x[["Atom_chem_shift.Seq_ID", "Atom_chem_shift.Atom_ID", "Atom_chem_shift.Val"]]
x.rename(columns={"Atom_chem_shift.Seq_ID":"resSeq", "Atom_chem_shift.Atom_ID":"name", "Atom_chem_shift.Val":"value"}, inplace=True)

# Need to make dtypes match to do eventual comparison.
x["resSeq"] = x["resSeq"].astype('int')
x["value"] = x["value"].astype('float')
x["expt"] = "CS"
x["system"] = self.identifier

expt = x.set_index(["system", "expt", "resSeq", "name"]).value

expt = pd.Series(expt.values, multi_index_to_str(expt.index))
parser = argparse.ArgumentParser()
parser.add_argument(
    '-max',
    type=int,
    default=-1,
    help='Go up to a maximum frame (default is whole trajectory)')
args, sys.argv = parser.parse_known_args(sys.argv)

# Run compare_shifts.py [PDB] [XTC] [12345.str]

t = md.load(sys.argv[2], top=sys.argv[1])
outfnm = sys.argv[4]
# if os.path.exists(outfnm): sys.exit()

parsed = nmrpystar.parse(open(sys.argv[3]).read())
print(parsed.status)

q = parsed.value.saves["assigned_chem_shift_list_1"].loops[1]
x = pd.DataFrame(q.rows, columns=q.keys)
x = x[[
    "Atom_chem_shift.Seq_ID", "Atom_chem_shift.Atom_ID", "Atom_chem_shift.Val"
]]
x.rename(columns={
    "Atom_chem_shift.Seq_ID": "resSeq",
    "Atom_chem_shift.Atom_ID": "name",
    "Atom_chem_shift.Val": "value"
},
         inplace=True)

# Need to make dtypes match to do eventual comparison.
Exemple #9
0
import pandas as pd
import nmrpystar

bmrb_filename = "/home/kyleb/src/choderalab/ForcefieldData/nmr/2EVN/6338.str"
parsed = nmrpystar.parse(open(bmrb_filename).read())
print(parsed.status)

q = parsed.value.saves["assigned_chem_shift_list_1"].loops[1]
x = pd.DataFrame(q.rows, columns=q.keys)
x = x[[
    "Atom_chem_shift.Seq_ID", "Atom_chem_shift.Atom_ID", "Atom_chem_shift.Val"
]]
x.rename(columns={
    "Atom_chem_shift.Seq_ID": "resSeq",
    "Atom_chem_shift.Atom_ID": "name",
    "Atom_chem_shift.Val": "value"
},
         inplace=True)

# Need to make dtypes match to do eventual comparison.
x["resSeq"] = x["resSeq"].astype('int')
x["value"] = x["value"].astype('float')
x["expt"] = "CS"
x["system"] = self.identifier

expt = x.set_index(["system", "expt", "resSeq", "name"]).value

expt = pd.Series(expt.values, multi_index_to_str(expt.index))
import pandas as pd
import nmrpystar
import mdtraj as md
import argparse

parser = argparse.ArgumentParser()
parser.add_argument('-max', type=int, default=-1, help='Go up to a maximum frame (default is whole trajectory)')
args, sys.argv = parser.parse_known_args(sys.argv)

# Run compare_shifts.py [PDB] [XTC] [12345.str]

t = md.load(sys.argv[2], top=sys.argv[1])
outfnm = sys.argv[4]
# if os.path.exists(outfnm): sys.exit()

parsed = nmrpystar.parse(open(sys.argv[3]).read())
print(parsed.status)

q = parsed.value.saves["assigned_chem_shift_list_1"].loops[1]
x = pd.DataFrame(q.rows, columns=q.keys)
x = x[["Atom_chem_shift.Seq_ID", "Atom_chem_shift.Atom_ID", "Atom_chem_shift.Val"]]
x.rename(columns={"Atom_chem_shift.Seq_ID":"resSeq", "Atom_chem_shift.Atom_ID":"name", "Atom_chem_shift.Val":"value"}, inplace=True)

# Need to make dtypes match to do eventual comparison.
x["resSeq"] = x["resSeq"].astype('int')
x["value"] = x["value"].astype('float')

expn = x.set_index(["resSeq", "name"])
expt = x.set_index(["resSeq", "name"]).value

print "Doing ShiftX2 prediction."
import pandas as pd
import nmrpystar
import mdtraj as md

t = md.load("/home/kyleb/dat/tmp/pro_0.xtc", top="/home/kyleb/dat/tmp/pro.pdb")

parsed = nmrpystar.parse(open("./19127.str").read())
print(parsed.status)

q = parsed.value.saves["coupling_constant_list_1"].loops[1]
x = pd.DataFrame(q.rows, columns=q.keys)
x = x[[
    "Coupling_constant.Seq_ID_1", "Coupling_constant.Val",
    "Coupling_constant.Val_err"
]]
x.rename(columns={
    "Coupling_constant.Seq_ID_1": "resSeq",
    "Coupling_constant.Val": "value",
    "Coupling_constant.Val_err": "err"
},
         inplace=True)

# Need to make dtypes match to do eventual comparison.
x["resSeq"] = x["resSeq"].astype('int')
x["value"] = x["value"].astype('float')

expt = x.set_index(["resSeq"]).value

top, bonds = t.top.to_dataframe()
ind, values = md.compute_J3_HN_HA(t)
prediction = pd.Series(values.mean(0), top.ix[ind[:, -1]].resSeq)
t0 = md.load(["./Trajectories_ff99sbnmr/1am7_%d.dcd" % i for i in range(10)], top="./1am7_fixed.pdb")[::stride]
t1 = md.load(["./Trajectories/1am7_%d.dcd" % i for i in range(15)], top="./1am7_fixed.pdb")[::stride]


#full_prediction0 = md.nmr.chemical_shifts_shiftx2(t0)
#full_prediction1 = md.nmr.chemical_shifts_shiftx2(t1)

#full_prediction0 = md.nmr.chemical_shifts_spartaplus(t0)
#full_prediction1 = md.nmr.chemical_shifts_spartaplus(t1)

full_prediction0 = md.nmr.chemical_shifts_ppm(t0)
full_prediction1 = md.nmr.chemical_shifts_ppm(t1)



parsed = nmrpystar.parse(open("./16664.str").read())
print(parsed.status)

q = parsed.value.saves["assigned_chem_shift_list_1"].loops[1]
x = pd.DataFrame(q.rows, columns=q.keys)
x = x[["Atom_chem_shift.Seq_ID", "Atom_chem_shift.Atom_ID", "Atom_chem_shift.Val"]]
x.rename(columns={"Atom_chem_shift.Seq_ID":"resSeq", "Atom_chem_shift.Atom_ID":"name", "Atom_chem_shift.Val":"value"}, inplace=True)

# Need to make dtypes match to do eventual comparison.
x["resSeq"] = x["resSeq"].astype('int')
x["value"] = x["value"].astype('float')
expt = x.set_index(["resSeq", "name"]).value



prediction0 = full_prediction0.mean(1)  # Average over time dimensions
import pandas as pd
import nmrpystar
import mdtraj as md

t = md.load("/home/kyleb/dat/tmp/pro_0.xtc", top="/home/kyleb/dat/tmp/pro.pdb")

parsed = nmrpystar.parse(open("./19127.str").read())
print(parsed.status)

q = parsed.value.saves["coupling_constant_list_1"].loops[1]
x = pd.DataFrame(q.rows, columns=q.keys)
x = x[["Coupling_constant.Seq_ID_1", "Coupling_constant.Val", "Coupling_constant.Val_err"]]
x.rename(
    columns={
        "Coupling_constant.Seq_ID_1": "resSeq",
        "Coupling_constant.Val": "value",
        "Coupling_constant.Val_err": "err",
    },
    inplace=True,
)

# Need to make dtypes match to do eventual comparison.
x["resSeq"] = x["resSeq"].astype("int")
x["value"] = x["value"].astype("float")

expt = x.set_index(["resSeq"]).value

top, bonds = t.top.to_dataframe()
ind, values = md.compute_J3_HN_HA(t)
prediction = pd.Series(values.mean(0), top.ix[ind[:, -1]].resSeq)
import pandas as pd
import nmrpystar
import mdtraj as md

t = md.load("./1d3z.dcd", top="./1d3z_frame0.pdb")

prediction = md.nmr.chemical_shifts_shiftx2(t).mean(1)  # Average over time dimensions
parsed = nmrpystar.parse(open("./bmrb17439_v3.str").read())
print(parsed.status)

q = parsed.value.saves["assigned_chem_shift_list_1"].loops[1]
x = pd.DataFrame(q.rows, columns=q.keys)
x = x[["Atom_chem_shift.Seq_ID", "Atom_chem_shift.Atom_ID", "Atom_chem_shift.Val"]]
x.rename(columns={"Atom_chem_shift.Seq_ID":"resSeq", "Atom_chem_shift.Atom_ID":"name", "Atom_chem_shift.Val":"value"}, inplace=True)

# Need to make dtypes match to do eventual comparison.
x["resSeq"] = x["resSeq"].astype('int')
x["value"] = x["value"].astype('float')

expt = x.set_index(["resSeq", "name"]).value

delta = (expt - prediction).dropna()
delta.name = "value"

rms = (delta ** 2.).reset_index().groupby("name").value.mean() ** 0.5