def run(): from libtbx.utils import format_cpu_times from mmtbx.rotamer import rotamer_eval from mmtbx.rotamer import ramachandran_eval initial_current_working_directory = os.getcwd() rotamer_data_dir = rotamer_eval.find_rotarama_data_dir(optional=True) if rotamer_data_dir is None: print(' Rebuilding rotarama library skipped. Needs rotamer library.') return target_db = rotamer_eval.open_rotarama_dlite( rotarama_data_dir=rotamer_data_dir) # rebuild_pickle_files(data_dir=rotamer_data_dir, # file_prefix="rota500-", # target_db=target_db, # amino_acids=rotamer_eval.aminoAcids) rebuild_pickle_files(data_dir=rotamer_data_dir, file_prefix="rota8000-", target_db=target_db, amino_acids=rotamer_eval.aminoAcids) # ramachandran_data_dir = rotamer_eval.find_rotarama_data_dir() target_db = rotamer_eval.open_rotarama_dlite( rotarama_data_dir=ramachandran_data_dir) rebuild_pickle_files(data_dir=rotamer_data_dir, file_prefix="rama8000-", target_db=target_db, amino_acids=ramachandran_eval.aminoAcids_8000) # rebuild_pickle_files(data_dir=rotamer_data_dir, # file_prefix="rama500-", # target_db=target_db, # amino_acids=ramachandran_eval.aminoAcids) os.chdir(initial_current_working_directory) print(format_cpu_times())
def run(): initial_current_working_directory = os.getcwd() rotamer_data_dir = rotamer_eval.find_rotarama_data_dir(optional=True) if rotamer_data_dir is None: print ' Rebuilding rotarama library skipped. Needs rotamer library.' return target_db = rotamer_eval.open_rotarama_dlite( rotarama_data_dir=rotamer_data_dir) # rebuild_pickle_files(data_dir=rotamer_data_dir, # file_prefix="rota500-", # target_db=target_db, # amino_acids=rotamer_eval.aminoAcids) rebuild_pickle_files(data_dir=rotamer_data_dir, file_prefix="rota8000-", target_db=target_db, amino_acids=rotamer_eval.aminoAcids) # ramachandran_data_dir = rotamer_eval.find_rotarama_data_dir() target_db = rotamer_eval.open_rotarama_dlite( rotarama_data_dir=ramachandran_data_dir) rebuild_pickle_files(data_dir=rotamer_data_dir, file_prefix="rama8000-", target_db=target_db, amino_acids=ramachandran_eval.aminoAcids_8000) # rebuild_pickle_files(data_dir=rotamer_data_dir, # file_prefix="rama500-", # target_db=target_db, # amino_acids=ramachandran_eval.aminoAcids) os.chdir(initial_current_working_directory) print format_cpu_times()
def get_rotarama_data(residue_type=None, pos_type=None, db="rama", convert_to_numpy_array=False): from mmtbx.rotamer import ramachandran_eval from mmtbx.rotamer.rotamer_eval import find_rotarama_data_dir # backwards compatibility if (pos_type == "proline"): pos_type = "trans-proline" if (pos_type == "prepro"): pos_type = "pre-proline" assert (pos_type in [ "general", "cis-proline", "trans-proline", "glycine", "isoleucine or valine", "pre-proline", None ]) assert (db in ["rama", "rota"]) assert (residue_type is not None) or (pos_type is not None) if pos_type is not None: residue_type = ramachandran_eval.aminoAcids_8000[pos_type] if residue_type.lower() in ["phe", "tyr"]: residue_type = "phetyr" assert (residue_type is not None) rama_data_dir = find_rotarama_data_dir() if (db == "rama"): pkl_file = "%s8000-%s.pickle" % (db, residue_type) else: pkl_file = "%s8000-%s.pickle" % (db, residue_type.lower()) ndt = easy_pickle.load(os.path.join(rama_data_dir, pkl_file)) if convert_to_numpy_array: if (db == "rama"): return export_ramachandran_distribution(ndt) else: return export_rotamer_distribution(ndt) else: return ndt
def __init__(self): main_aaTables = RamachandranEval.aaTables self.aaTables = {} for aa,ndt_weakref in main_aaTables.items(): # convert existing weak references to strong references self.aaTables[aa] = ndt_weakref() rama_data_dir = find_rotarama_data_dir() target_db = open_rotarama_dlite(rotarama_data_dir=rama_data_dir) no_update = os.path.exists(os.path.join(rama_data_dir, "NO_UPDATE")) for aa, aafile in aminoAcids_8000.items(): if (self.aaTables.get(aa) is not None): continue data_file = "rama8000-"+aafile+".data" pickle_file = "rama8000-"+aafile+".pickle" pair_info = target_db.pair_info( source_path=data_file, target_path=pickle_file, path_prefix=rama_data_dir) if (((pair_info.needs_update) and (not no_update)) or not os.path.exists(os.path.join(rama_data_dir, pickle_file))) : raise Sorry( "chem_data/rotarama_data/*.pickle files are missing or out of date.\n" " Please run\n" " mmtbx.rebuild_rotarama_cache\n" " to resolve this problem.\n") ndt = easy_pickle.load(file_name=os.path.join( rama_data_dir, pair_info.target.path)) self.aaTables[aa] = ndt main_aaTables[aa] = weakref.ref(ndt)
def get_rotarama_data (residue_type=None, pos_type=None, db="rama", convert_to_numpy_array=False) : from mmtbx.rotamer import ramachandran_eval from mmtbx.rotamer.rotamer_eval import find_rotarama_data_dir # backwards compatibility if (pos_type == "proline") : pos_type = "trans-proline" if (pos_type == "prepro") : pos_type = "pre-proline" assert (pos_type in ["general", "cis-proline", "trans-proline", "glycine", "isoleucine or valine", "pre-proline",None]) assert (db in ["rama", "rota"]) assert (residue_type is not None) or (pos_type is not None) if pos_type is not None : residue_type = ramachandran_eval.aminoAcids_8000[pos_type] if residue_type.lower() in ["phe", "tyr"] : residue_type = "phetyr" assert (residue_type is not None) rama_data_dir = find_rotarama_data_dir() if (db == "rama") : pkl_file = "%s8000-%s.pickle" % (db, residue_type) else : pkl_file = "%s8000-%s.pickle" % (db, residue_type.lower()) ndt = easy_pickle.load(os.path.join(rama_data_dir, pkl_file)) if convert_to_numpy_array : if (db == "rama") : return export_ramachandran_distribution(ndt) else : return export_rotamer_distribution(ndt) else : return ndt
def __init__(self): main_aaTables = RamachandranEval.aaTables self.aaTables = {} for aa, ndt_weakref in main_aaTables.items(): # convert existing weak references to strong references self.aaTables[aa] = ndt_weakref() rama_data_dir = find_rotarama_data_dir() target_db = open_rotarama_dlite(rotarama_data_dir=rama_data_dir) no_update = os.path.exists(os.path.join(rama_data_dir, "NO_UPDATE")) for aa, aafile in aminoAcids_8000.items(): if (self.aaTables.get(aa) is not None): continue data_file = "rama8000-" + aafile + ".data" pickle_file = "rama8000-" + aafile + ".pickle" pair_info = target_db.pair_info(source_path=data_file, target_path=pickle_file, path_prefix=rama_data_dir) if (((pair_info.needs_update) and (not no_update)) or not os.path.exists( os.path.join(rama_data_dir, pickle_file))): raise Sorry( "chem_data/rotarama_data/*.pickle files are missing or out of date.\n" " Please run\n" " mmtbx.rebuild_rotarama_cache\n" " to resolve this problem.\n") ndt = easy_pickle.load( file_name=os.path.join(rama_data_dir, pair_info.target.path)) self.aaTables[aa] = ndt main_aaTables[aa] = weakref.ref(ndt)
def exercise_rotalyze(): regression_pdb = libtbx.env.find_in_repositories( relative_path="phenix_regression/pdb/jcm.pdb", test=os.path.isfile) if (regression_pdb is None): print "Skipping exercise_rotalyze(): input pdb (jcm.pdb) not available" return if (find_rotarama_data_dir(optional=True) is None): print "Skipping exercise_rotalyze(): rotarama_data directory not available" return pdb_in = file_reader.any_file(file_name=regression_pdb) hierarchy = pdb_in.file_object.hierarchy pdb_io = pdb.input(file_name=regression_pdb) r = rotalyze.rotalyze(pdb_hierarchy=hierarchy, outliers_only=True) out = StringIO() r.show_old_output(out=out, verbose=False) output = out.getvalue() assert output.count("OUTLIER") == 246, output.count("OUTLIER") assert output.count(":") == 984, output.count(":") output_lines = output.splitlines() assert len(output_lines) == 123 for lines in output_lines: assert float(lines[12:15]) <= 1.0 r = rotalyze.rotalyze(pdb_hierarchy=hierarchy, outliers_only=False) for unpickle in [False, True]: if unpickle: r = loads(dumps(r)) out = StringIO() r.show_old_output(out=out, verbose=False) for outlier in r.results: assert (len(outlier.xyz) == 3) output = out.getvalue() assert output.count("OUTLIER") == 246 assert output.count(":") == 5144, output.count(":") assert output.count("p") == 120 assert output.count("m") == 324 assert output.count("t") == 486 output_lines = output.splitlines() #for line in output_lines: # print line #STOP() assert len(output_lines) == 643 line_indices = [0, 1, 2, 42, 43, 168, 169, 450, 587, 394, 641, 642] # top500 version line_values = [ " A 14 MET:1.00:3.3:29.2:173.3:287.9::Favored:ptm", " A 15 SER:1.00:0.1:229.0::::OUTLIER:OUTLIER", " A 16 SER:1.00:4.2:277.9::::Favored:m", " A 58 ASN:1.00:2.0:252.4:343.6:::Favored:m-20", " A 59 ILE:1.00:2.0:84.2:186.7:::Allowed:pt", " A 202 GLU:1.00:0.4:272.7:65.9:287.8::OUTLIER:OUTLIER", " A 203 ILE:1.00:5.0:292.9:199.6:::Favored:mt", " B 154 THR:1.00:0.1:356.0::::OUTLIER:OUTLIER", " B 316 TYR:1.00:5.4:153.7:68.6:::Favored:t80", " B 86 ASP:1.00:2.2:321.4:145.1:::Favored:m-20", " B 377 GLU:1.00:45.3:311.7:166.2:160.1::Favored:mt-10", " B 378 THR:1.00:23.5:309.4::::Favored:m" ] # top8000 version line_values = [ " A 14 MET:1.00:1.3:29.2:173.3:287.9::Allowed:ptm", " A 15 SER:1.00:0.1:229.0::::OUTLIER:OUTLIER", " A 16 SER:1.00:3.0:277.9::::Favored:m", " A 58 ASN:1.00:1.0:252.4:343.6:::Allowed:m-40", " A 59 ILE:1.00:0.5:84.2:186.7:::Allowed:pt", " A 202 GLU:1.00:0.0:272.7:65.9:287.8::OUTLIER:OUTLIER", " A 203 ILE:1.00:1.0:292.9:199.6:::Allowed:mt", " B 154 THR:1.00:0.0:356.0::::OUTLIER:OUTLIER", " B 316 TYR:1.00:4.1:153.7:68.6:::Favored:t80", " B 86 ASP:1.00:0.4:321.4:145.1:::Allowed:m-30", " B 377 GLU:1.00:15.0:311.7:166.2:160.1::Favored:mt-10", " B 378 THR:1.00:17.0:309.4::::Favored:m", ] for idx, val in zip(line_indices, line_values): assert (output_lines[idx] == val), (idx, output_lines[idx]) regression_pdb = libtbx.env.find_in_repositories( relative_path="phenix_regression/pdb/pdb1jxt.ent", test=os.path.isfile) if (regression_pdb is None): print "Skipping exercise_ramalyze(): input pdb (pdb1jxt.ent) not available" return pdb_in = file_reader.any_file(file_name=regression_pdb) hierarchy = pdb_in.file_object.hierarchy pdb_io = pdb.input(file_name=regression_pdb) r = rotalyze.rotalyze(pdb_hierarchy=hierarchy, outliers_only=True) out = StringIO() r.show_old_output(out=out, verbose=False) output = out.getvalue().strip() assert output == "" r = rotalyze.rotalyze(pdb_hierarchy=hierarchy, outliers_only=False) for unpickle in [False, True]: if unpickle: r = loads(dumps(r)) out = StringIO() r.show_old_output(out=out, verbose=False) output = out.getvalue() assert not show_diff( output, """\ A 1 THR:1.00:95.4:299.5::::Favored:m A 2 ATHR:0.67:49.5:56.1::::Favored:p A 2 BTHR:0.33:90.4:298.1::::Favored:m A 3 CYS:1.00:12.9:310.5::::Favored:m A 4 CYS:1.00:91.6:293.1::::Favored:m A 5 PRO:1.00:78.8:30.2:319.7:33.8::Favored:Cg_endo A 6 SER:1.00:90.1:68.4::::Favored:p A 7 AILE:0.45:49.6:290.8:178.2:::Favored:mt A 7 BILE:0.55:6.5:284.4:298.4:::Favored:mm A 8 AVAL:0.50:1.1:156.7::::Allowed:t A 8 BVAL:0.30:5.1:71.3::::Favored:p A 8 CVAL:0.20:69.8:172.1::::Favored:t A 10 AARG:0.65:24.7:176.8:66.5:63.9:180.0:Favored:tpp-160 A 10 BARG:0.35:17.5:176.8:72.8:66.4:171.9:Favored:tpp-160 A 11 SER:1.00:51.6:300.9::::Favored:m A 12 AASN:0.50:93.9:286.1:343.8:::Favored:m-40 A 12 BASN:0.50:98.9:288.4:337.6:::Favored:m-40 A 13 APHE:0.65:45.1:187.2:276.4:::Favored:t80 A 13 BPHE:0.35:86.1:179.6:263.1:::Favored:t80 A 14 ASN:1.00:95.2:289.6:333.0:::Favored:m-40 A 15 VAL:1.00:42.3:168.2::::Favored:t A 16 CYS:1.00:40.8:176.5::::Favored:t A 17 ARG:1.00:21.4:289.7:282.8:288.6:158.7:Favored:mmm160 A 18 LEU:1.00:65.0:287.2:173.3:::Favored:mt A 19 PRO:1.00:43.6:24.4:324.8:31.6::Favored:Cg_endo A 21 THR:1.00:5.7:314.0::::Favored:m A 22 APRO:0.55:87.5:333.5:34.0:333.8::Favored:Cg_exo A 23 AGLU:0.50:86.9:290.9:187.1:341.8::Favored:mt-10 A 23 BGLU:0.50:91.7:292.0:183.8:339.2::Favored:mt-10 A 25 ALEU:0.50:95.7:294.4:173.6:::Favored:mt A 26 CYS:1.00:83.0:295.0::::Favored:m A 28 THR:1.00:29.6:52.9::::Favored:p A 29 ATYR:0.65:18.5:161.8:67.8:::Favored:t80 A 29 BTYR:0.35:0.4:191.3:322.7:::Allowed:t80 A 30 ATHR:0.70:60.8:57.4::::Favored:p A 30 BTHR:0.30:6.6:78.1::::Favored:p A 32 CYS:1.00:61.4:301.7::::Favored:m A 33 ILE:1.00:36.6:66.5:173.4:::Favored:pt A 34 AILE:0.70:60.9:303.6:167.6:::Favored:mt A 34 BILE:0.30:31.4:308.5:296.8:::Favored:mm A 35 ILE:1.00:45.6:62.4:170.0:::Favored:pt A 36 PRO:1.00:36.2:22.5:330.5:24.8::Favored:Cg_endo A 39 ATHR:0.70:14.0:311.0::::Favored:m A 39 BTHR:0.30:13.1:288.8::::Favored:m A 40 CYS:1.00:81.4:294.4::::Favored:m A 41 PRO:1.00:35.4:34.4:317.5:33.1::Favored:Cg_endo A 43 AASP:0.75:24.8:56.5:340.3:::Favored:p0 A 43 BASP:0.25:43.2:59.6:349.3:::Favored:p0 A 44 TYR:1.00:85.3:290.9:85.1:::Favored:m-80 A 46 ASN:1.00:38.7:301.6:117.9:::Favored:m110 """)
def exercise_ramalyze(): from mmtbx.rotamer.rotamer_eval import find_rotarama_data_dir regression_pdb = libtbx.env.find_in_repositories( relative_path="phenix_regression/pdb/jcm.pdb", test=os.path.isfile) if (regression_pdb is None): print "Skipping exercise_ramalyze(): input pdb (jcm.pdb) not available" return if (find_rotarama_data_dir(optional=True) is None): print "Skipping exercise_ramalyze(): rotarama_data directory not available" return from iotbx import file_reader # Exercise 1 pdb_in = file_reader.any_file(file_name=regression_pdb) hierarchy = pdb_in.file_object.hierarchy pdb_io = pdb.input(file_name=regression_pdb) hierarchy.atoms().reset_i_seq() r = ramalyze.ramalyze(pdb_hierarchy=hierarchy, outliers_only=True) out = StringIO() r.show_old_output(out=out) output = out.getvalue() assert output.count("OUTLIER") == 100 assert output.count("Favored") == 0 assert output.count("Allowed") == 0 assert output.count("General") == 64 assert output.count("Glycine") == 6 assert output.count("Trans-proline") == 1 assert output.count("Cis-proline") == 0 assert output.count("Pre-proline") == 4 assert output.count("Isoleucine or valine") == 25 assert (len(r.outlier_selection()) == 494) outlier_ids = set([]) atoms = hierarchy.atoms() for i_seq in r.outlier_selection(): atom = atoms[i_seq] atom_group = atoms[i_seq].parent() outlier_ids.add(atom_group.id_str()) outliers1 = sorted([o.atom_group_id_str() for o in r.results]) outliers2 = sorted(list(outlier_ids)) assert (outliers1 == outliers2) r = ramalyze.ramalyze(pdb_hierarchy=hierarchy, outliers_only=False) for unpickle in [False, True]: if unpickle: r = loads(dumps(r)) for outlier in r.results: assert (len(outlier.xyz) == 3) out = StringIO() r.show_old_output(out=out, verbose=False) output = out.getvalue() assert output.count("OUTLIER") == 100 assert output.count("Favored") == 463 assert output.count("Allowed") == 162 assert output.count("General") == 514 assert output.count("Glycine") == 39 assert output.count("Trans-proline") == 23 assert output.count("Cis-proline") == 0 assert output.count("Pre-proline") == 21 assert output.count("Isoleucine or valine") == 128 numtotal = r.get_phi_psi_residues_count() assert r.get_outliers_count_and_fraction() == (100, 100. / numtotal) assert r.get_allowed_count_and_fraction() == (162, 162. / numtotal) assert r.get_favored_count_and_fraction() == (463, 463. / numtotal) assert r.get_general_count_and_fraction() == (514, 514. / numtotal) assert r.get_gly_count_and_fraction() == (39, 39. / numtotal) assert r.get_trans_pro_count_and_fraction() == (23, 23. / numtotal) assert r.get_cis_pro_count_and_fraction() == (0, 0. / numtotal) assert r.get_prepro_count_and_fraction() == (21, 21. / numtotal) assert r.get_ileval_count_and_fraction() == (128, 128. / numtotal) #assert numtotal == 75+154+494 #reasons for this math unclear assert numtotal == 725 output_lines = output.splitlines() assert len(output_lines) == 725 selected_lines = [] for x in [ 0, 1, 168, 169, 715, 716, 717, 718, 719, 720, 721, 722, 723, 724 ]: selected_lines.append(output_lines[x]) assert not show_diff( "\n".join(selected_lines), """\ A 15 SER:35.07:-83.26:131.88:Favored:General A 16 SER:0.74:-111.53:71.36:Allowed:General A 191 ASP:2.66:-42.39:121.87:Favored:Pre-proline A 192 PRO:0.31:-39.12:-31.84:Allowed:Trans-proline B 368 LYS:56.44:-62.97:-53.28:Favored:General B 369 GLU:8.89:-44.36:-45.50:Favored:General B 370 LYS:40.00:-50.00:-39.06:Favored:General B 371 VAL:68.24:-60.38:-51.85:Favored:Isoleucine or valine B 372 LEU:0.02:-61.13:-170.23:OUTLIER:General B 373 ARG:0.02:60.09:-80.26:OUTLIER:General B 374 ALA:0.13:-37.21:-36.12:Allowed:General B 375 LEU:11.84:-89.81:-41.45:Favored:General B 376 ASN:84.33:-58.30:-41.39:Favored:General B 377 GLU:30.88:-56.79:-21.74:Favored:General""") assert (len(r.outlier_selection()) == 494) # Exercise 2 regression_pdb = libtbx.env.find_in_repositories( relative_path="phenix_regression/pdb/pdb1jxt.ent", test=os.path.isfile) pdb_in = file_reader.any_file(file_name=regression_pdb) hierarchy = pdb_in.file_object.hierarchy hierarchy.atoms().reset_i_seq() r = ramalyze.ramalyze(pdb_hierarchy=hierarchy, outliers_only=True) out = StringIO() r.show_old_output(out=out) output = out.getvalue() assert output.count("Favored") == 0 assert output.count("Allowed") == 0 assert output.count("OUTLIER") == 0 r = ramalyze.ramalyze(pdb_hierarchy=hierarchy, outliers_only=False) for unpickle in [False, True]: if unpickle: r = loads(dumps(r)) out = StringIO() r.show_old_output(out=out, verbose=False) output = out.getvalue() assert output.count("Favored") == 50 assert output.count("Allowed") == 1 assert output.count("OUTLIER") == 0 assert output.count("General") == 29 assert output.count("Glycine") == 4 assert output.count("Trans-proline") == 5 assert output.count("Cis-proline") == 0 assert output.count("Pre-proline") == 5 assert output.count("Isoleucine or valine") == 8 numtotal = r.get_phi_psi_residues_count() assert r.get_outliers_count_and_fraction() == (0, 0. / numtotal) assert r.get_allowed_count_and_fraction() == (1, 1. / numtotal) assert r.get_favored_count_and_fraction() == (43, 43. / numtotal) #print r.get_general_count_and_fraction() assert r.get_general_count_and_fraction() == (25, 25. / numtotal) assert r.get_gly_count_and_fraction() == (4, 4. / numtotal) assert r.get_trans_pro_count_and_fraction() == (5, 5. / numtotal) assert r.get_cis_pro_count_and_fraction() == (0, 0. / numtotal) assert r.get_prepro_count_and_fraction() == (5, 5. / numtotal) assert r.get_ileval_count_and_fraction() == (5, 5. / numtotal) output_lines = output.splitlines() assert len(output_lines) == 51 selected_lines = [] for x in [0, 1, 5, 6, 7, 8, 9, 47, 48, 49, 50]: selected_lines.append(output_lines[x]) assert not show_diff( "\n".join(selected_lines), """\ A 2 ATHR:33.85:-106.92:144.23:Favored:General A 3 ACYS:47.07:-132.54:137.26:Favored:General A 7 AILE:98.76:-61.91:-44.35:Favored:Isoleucine or valine A 7 BILE:61.50:-56.21:-51.56:Favored:Isoleucine or valine A 8 AVAL:23.11:-50.35:-49.64:Favored:Isoleucine or valine A 8 BVAL:12.01:-83.20:-12.14:Favored:Isoleucine or valine A 8 CVAL:73.11:-61.22:-36.49:Favored:Isoleucine or valine A 43 AASP:51.81:-94.64:5.45:Favored:General A 43 BASP:56.98:-88.69:-0.12:Favored:General A 44 TYR:1.76:-133.10:58.75:Allowed:General A 45 ALA:57.37:-86.61:-8.57:Favored:General""") # Exercise 3: 2plx excerpt (unusual icode usage) import iotbx.pdb.hierarchy pdb_io = iotbx.pdb.hierarchy.input(pdb_string="""\ ATOM 1468 N GLY A 219 3.721 21.322 10.752 1.00 14.12 N ATOM 1469 CA GLY A 219 3.586 21.486 12.188 1.00 14.85 C ATOM 1470 C GLY A 219 4.462 20.538 12.995 1.00 15.63 C ATOM 1471 O GLY A 219 5.513 20.090 12.512 1.00 14.55 O ATOM 1472 N CYS A 220 4.036 20.213 14.235 1.00 15.02 N ATOM 1473 CA CYS A 220 4.776 19.228 15.068 1.00 15.56 C ATOM 1474 C CYS A 220 3.773 18.322 15.741 1.00 14.69 C ATOM 1475 O CYS A 220 2.799 18.828 16.338 1.00 15.54 O ATOM 1476 CB CYS A 220 5.620 19.906 16.174 1.00 15.72 C ATOM 1477 SG CYS A 220 6.762 21.133 15.448 1.00 15.45 S ATOM 1478 N ALA A 221A 4.054 17.017 15.707 1.00 14.77 N ATOM 1479 CA ALA A 221A 3.274 16.015 16.507 1.00 14.01 C ATOM 1480 C ALA A 221A 1.774 15.992 16.099 1.00 14.50 C ATOM 1481 O ALA A 221A 0.875 15.575 16.881 1.00 14.46 O ATOM 1482 CB ALA A 221A 3.440 16.318 17.935 1.00 12.28 C ATOM 1483 N GLN A 221 1.523 16.390 14.848 1.00 14.52 N ATOM 1484 CA GLN A 221 0.159 16.391 14.325 1.00 15.19 C ATOM 1485 C GLN A 221 -0.229 15.044 13.717 1.00 14.43 C ATOM 1486 O GLN A 221 0.641 14.280 13.307 1.00 16.88 O ATOM 1487 CB GLN A 221 0.002 17.491 13.272 1.00 16.41 C ATOM 1488 CG GLN A 221 0.253 18.906 13.805 1.00 16.52 C ATOM 1489 CD GLN A 221 -0.640 19.181 14.995 1.00 17.87 C ATOM 1490 OE1 GLN A 221 -1.857 19.399 14.826 1.00 13.54 O ATOM 1491 NE2 GLN A 221 -0.050 19.149 16.228 1.00 16.18 N ATOM 1492 N LYS A 222 -1.537 14.773 13.694 1.00 14.34 N ATOM 1493 CA LYS A 222 -2.053 13.536 13.125 1.00 15.07 C ATOM 1494 C LYS A 222 -1.679 13.455 11.655 1.00 14.88 C ATOM 1495 O LYS A 222 -1.856 14.424 10.883 1.00 14.32 O """) r = ramalyze.ramalyze(pdb_hierarchy=pdb_io.hierarchy, outliers_only=False) assert (len(r.results) == 3)
def exercise_ramalyze(): from mmtbx.rotamer.rotamer_eval import find_rotarama_data_dir regression_pdb = libtbx.env.find_in_repositories( relative_path="phenix_regression/pdb/jcm.pdb", test=os.path.isfile) if (regression_pdb is None): print "Skipping exercise_ramalyze(): input pdb (jcm.pdb) not available" return if (find_rotarama_data_dir(optional=True) is None): print "Skipping exercise_ramalyze(): rotarama_data directory not available" return from iotbx import file_reader # Exercise 1 pdb_in = file_reader.any_file(file_name=regression_pdb) hierarchy = pdb_in.file_object.hierarchy pdb_io = pdb.input(file_name=regression_pdb) hierarchy.atoms().reset_i_seq() r = ramalyze.ramalyze( pdb_hierarchy=hierarchy, outliers_only=True) out = StringIO() r.show_old_output(out=out) output = out.getvalue() assert output.count("OUTLIER") == 100 assert output.count("Favored") == 0 assert output.count("Allowed") == 0 assert output.count("General") == 64 assert output.count("Glycine") == 6 assert output.count("Trans-proline") == 1 assert output.count("Cis-proline") == 0 assert output.count("Pre-proline") == 4 assert output.count("Isoleucine or valine") == 25 assert (len(r.outlier_selection()) == 788) outlier_ids = set([]) atoms = hierarchy.atoms() for i_seq in r.outlier_selection() : atom = atoms[i_seq] atom_group = atoms[i_seq].parent() outlier_ids.add(atom_group.id_str()) outliers1 = sorted([ o.atom_group_id_str() for o in r.results ]) outliers2 = sorted(list(outlier_ids)) assert (outliers1 == outliers2) r = ramalyze.ramalyze( pdb_hierarchy=hierarchy, outliers_only=False) for unpickle in [False, True] : if unpickle : r = loads(dumps(r)) for outlier in r.results : assert (len(outlier.xyz) == 3) out = StringIO() r.show_old_output(out=out, verbose=False) output = out.getvalue() assert output.count("OUTLIER") == 100 assert output.count("Favored") == 461 assert output.count("Allowed") == 162 assert output.count("General") == 513 assert output.count("Glycine") == 39 assert output.count("Trans-proline") == 23 assert output.count("Cis-proline") == 0 assert output.count("Pre-proline") == 21 assert output.count("Isoleucine or valine") == 127 numtotal = r.get_phi_psi_residues_count() assert r.get_outliers_count_and_fraction() == (100, 100./numtotal) assert r.get_allowed_count_and_fraction() == (162, 162./numtotal) assert r.get_favored_count_and_fraction() == (461, 461./numtotal) assert r.get_general_count_and_fraction() == (513, 513./numtotal) assert r.get_gly_count_and_fraction() == (39, 39./numtotal) assert r.get_trans_pro_count_and_fraction() == (23, 23./numtotal) assert r.get_cis_pro_count_and_fraction() == (0, 0./numtotal) assert r.get_prepro_count_and_fraction() == (21, 21./numtotal) assert r.get_ileval_count_and_fraction() == (127, 127./numtotal) assert numtotal == 75+154+494 output_lines = output.splitlines() assert len(output_lines) == 723 selected_lines = [] for x in [0, 1, 168, 169, 713, 714, 715, 716, 717, 718, 719, 720, 721, 722]: selected_lines.append(output_lines[x]) assert not show_diff("\n".join(selected_lines), """\ A 15 SER:35.07:-83.26:131.88:Favored:General A 16 SER:0.74:-111.53:71.36:Allowed:General A 191 ASP:2.66:-42.39:121.87:Favored:Pre-proline A 192 PRO:0.31:-39.12:-31.84:Allowed:Trans-proline B 368 LYS:56.44:-62.97:-53.28:Favored:General B 369 GLU:8.89:-44.36:-45.50:Favored:General B 370 LYS:40.00:-50.00:-39.06:Favored:General B 371 VAL:68.24:-60.38:-51.85:Favored:Isoleucine or valine B 372 LEU:0.02:-61.13:-170.23:OUTLIER:General B 373 ARG:0.02:60.09:-80.26:OUTLIER:General B 374 ALA:0.13:-37.21:-36.12:Allowed:General B 375 LEU:11.84:-89.81:-41.45:Favored:General B 376 ASN:84.33:-58.30:-41.39:Favored:General B 377 GLU:30.88:-56.79:-21.74:Favored:General""") assert (len(r.outlier_selection()) == 788) # Exercise 2 regression_pdb = libtbx.env.find_in_repositories( relative_path="phenix_regression/pdb/pdb1jxt.ent", test=os.path.isfile) pdb_in = file_reader.any_file(file_name=regression_pdb) hierarchy = pdb_in.file_object.hierarchy hierarchy.atoms().reset_i_seq() r = ramalyze.ramalyze( pdb_hierarchy=hierarchy, outliers_only=True) out = StringIO() r.show_old_output(out=out) output = out.getvalue() assert output.count("Favored") == 0 assert output.count("Allowed") == 0 assert output.count("OUTLIER") == 0 r = ramalyze.ramalyze( pdb_hierarchy=hierarchy, outliers_only=False) for unpickle in [False, True] : if unpickle : r = loads(dumps(r)) out = StringIO() r.show_old_output(out=out, verbose=False) output = out.getvalue() assert output.count("Favored") == 47 assert output.count("Allowed") == 1 assert output.count("OUTLIER") == 0 assert output.count("General") == 27 assert output.count("Glycine") == 4 assert output.count("Trans-proline") == 4 assert output.count("Cis-proline") == 0 assert output.count("Pre-proline") == 5 assert output.count("Isoleucine or valine") == 8 numtotal = r.get_phi_psi_residues_count() assert r.get_outliers_count_and_fraction() == (0, 0./numtotal) assert r.get_allowed_count_and_fraction() == (1, 1./numtotal) assert r.get_favored_count_and_fraction() == (47, 47./numtotal) assert r.get_general_count_and_fraction() == (27, 27./numtotal) assert r.get_gly_count_and_fraction() == (4, 4./numtotal) assert r.get_trans_pro_count_and_fraction() == (4, 4./numtotal) assert r.get_cis_pro_count_and_fraction() == (0, 0./numtotal) assert r.get_prepro_count_and_fraction() == (5, 5./numtotal) assert r.get_ileval_count_and_fraction() == (8, 8./numtotal) output_lines = output.splitlines() assert len(output_lines) == 48 selected_lines = [] for x in [0, 1, 6, 7, 8, 9, 10, 44, 45, 46, 47]: selected_lines.append(output_lines[x]) assert not show_diff("\n".join(selected_lines), """\ A 2 ATHR:33.85:-106.92:144.23:Favored:General A 2 BTHR:37.07:-97.44:137.00:Favored:General A 7 AILE:98.76:-61.91:-44.35:Favored:Isoleucine or valine A 7 BILE:61.50:-56.21:-51.56:Favored:Isoleucine or valine A 8 AVAL:23.11:-50.35:-49.64:Favored:Isoleucine or valine A 8 BVAL:12.01:-83.20:-12.14:Favored:Isoleucine or valine A 8 CVAL:73.11:-61.22:-36.49:Favored:Isoleucine or valine A 43 AASP:51.81:-94.64:5.45:Favored:General A 43 BASP:56.98:-88.69:-0.12:Favored:General A 44 TYR:1.76:-133.10:58.75:Allowed:General A 45 ALA:57.37:-86.61:-8.57:Favored:General""") # Exercise 3: 2plx excerpt (unusual icode usage) import iotbx.pdb.hierarchy pdb_io = iotbx.pdb.hierarchy.input(pdb_string="""\ ATOM 1468 N GLY A 219 3.721 21.322 10.752 1.00 14.12 N ATOM 1469 CA GLY A 219 3.586 21.486 12.188 1.00 14.85 C ATOM 1470 C GLY A 219 4.462 20.538 12.995 1.00 15.63 C ATOM 1471 O GLY A 219 5.513 20.090 12.512 1.00 14.55 O ATOM 1472 N CYS A 220 4.036 20.213 14.235 1.00 15.02 N ATOM 1473 CA CYS A 220 4.776 19.228 15.068 1.00 15.56 C ATOM 1474 C CYS A 220 3.773 18.322 15.741 1.00 14.69 C ATOM 1475 O CYS A 220 2.799 18.828 16.338 1.00 15.54 O ATOM 1476 CB CYS A 220 5.620 19.906 16.174 1.00 15.72 C ATOM 1477 SG CYS A 220 6.762 21.133 15.448 1.00 15.45 S ATOM 1478 N ALA A 221A 4.054 17.017 15.707 1.00 14.77 N ATOM 1479 CA ALA A 221A 3.274 16.015 16.507 1.00 14.01 C ATOM 1480 C ALA A 221A 1.774 15.992 16.099 1.00 14.50 C ATOM 1481 O ALA A 221A 0.875 15.575 16.881 1.00 14.46 O ATOM 1482 CB ALA A 221A 3.440 16.318 17.935 1.00 12.28 C ATOM 1483 N GLN A 221 1.523 16.390 14.848 1.00 14.52 N ATOM 1484 CA GLN A 221 0.159 16.391 14.325 1.00 15.19 C ATOM 1485 C GLN A 221 -0.229 15.044 13.717 1.00 14.43 C ATOM 1486 O GLN A 221 0.641 14.280 13.307 1.00 16.88 O ATOM 1487 CB GLN A 221 0.002 17.491 13.272 1.00 16.41 C ATOM 1488 CG GLN A 221 0.253 18.906 13.805 1.00 16.52 C ATOM 1489 CD GLN A 221 -0.640 19.181 14.995 1.00 17.87 C ATOM 1490 OE1 GLN A 221 -1.857 19.399 14.826 1.00 13.54 O ATOM 1491 NE2 GLN A 221 -0.050 19.149 16.228 1.00 16.18 N ATOM 1492 N LYS A 222 -1.537 14.773 13.694 1.00 14.34 N ATOM 1493 CA LYS A 222 -2.053 13.536 13.125 1.00 15.07 C ATOM 1494 C LYS A 222 -1.679 13.455 11.655 1.00 14.88 C ATOM 1495 O LYS A 222 -1.856 14.424 10.883 1.00 14.32 O """) r = ramalyze.ramalyze( pdb_hierarchy=pdb_io.hierarchy, outliers_only=False) assert (len(r.results) == 3)
def exercise_rotalyze(): regression_pdb = libtbx.env.find_in_repositories( relative_path="phenix_regression/pdb/jcm.pdb", test=os.path.isfile) if (regression_pdb is None): print "Skipping exercise_rotalyze(): input pdb (jcm.pdb) not available" return if (find_rotarama_data_dir(optional=True) is None): print "Skipping exercise_rotalyze(): rotarama_data directory not available" return pdb_in = file_reader.any_file(file_name=regression_pdb) hierarchy = pdb_in.file_object.hierarchy pdb_io = pdb.input(file_name=regression_pdb) r = rotalyze.rotalyze( pdb_hierarchy=hierarchy, outliers_only=True) out = StringIO() r.show_old_output(out=out, verbose=False) output = out.getvalue() assert output.count("OUTLIER") == 246, output.count("OUTLIER") assert output.count(":") == 984, output.count(":") output_lines = output.splitlines() assert len(output_lines) == 123 for lines in output_lines: assert float(lines[12:15]) <= 1.0 r = rotalyze.rotalyze( pdb_hierarchy=hierarchy, outliers_only=False) for unpickle in [False, True] : if unpickle : r = loads(dumps(r)) out = StringIO() r.show_old_output(out=out, verbose=False) for outlier in r.results : assert (len(outlier.xyz) == 3) output = out.getvalue() assert output.count("OUTLIER") == 246 assert output.count(":") == 5144, output.count(":") assert output.count("p") == 120 assert output.count("m") == 324 assert output.count("t") == 486 output_lines = output.splitlines() #for line in output_lines: # print line #STOP() assert len(output_lines) == 643 line_indices = [0,1,2,42,43,168,169,450,587,394,641,642] # top500 version line_values = [ " A 14 MET:1.00:3.3:29.2:173.3:287.9::Favored:ptm", " A 15 SER:1.00:0.1:229.0::::OUTLIER:OUTLIER", " A 16 SER:1.00:4.2:277.9::::Favored:m", " A 58 ASN:1.00:2.0:252.4:343.6:::Favored:m-20", " A 59 ILE:1.00:2.0:84.2:186.7:::Allowed:pt", " A 202 GLU:1.00:0.4:272.7:65.9:287.8::OUTLIER:OUTLIER", " A 203 ILE:1.00:5.0:292.9:199.6:::Favored:mt", " B 154 THR:1.00:0.1:356.0::::OUTLIER:OUTLIER", " B 316 TYR:1.00:5.4:153.7:68.6:::Favored:t80", " B 86 ASP:1.00:2.2:321.4:145.1:::Favored:m-20", " B 377 GLU:1.00:45.3:311.7:166.2:160.1::Favored:mt-10", " B 378 THR:1.00:23.5:309.4::::Favored:m"] # top8000 version line_values = [ " A 14 MET:1.00:1.3:29.2:173.3:287.9::Allowed:ptm", " A 15 SER:1.00:0.1:229.0::::OUTLIER:OUTLIER", " A 16 SER:1.00:3.0:277.9::::Favored:m", " A 58 ASN:1.00:1.0:252.4:343.6:::Allowed:m-40", " A 59 ILE:1.00:0.5:84.2:186.7:::Allowed:pt", " A 202 GLU:1.00:0.0:272.7:65.9:287.8::OUTLIER:OUTLIER", " A 203 ILE:1.00:1.0:292.9:199.6:::Allowed:mt", " B 154 THR:1.00:0.0:356.0::::OUTLIER:OUTLIER", " B 316 TYR:1.00:4.1:153.7:68.6:::Favored:t80", " B 86 ASP:1.00:0.4:321.4:145.1:::Allowed:m-30", " B 377 GLU:1.00:15.0:311.7:166.2:160.1::Favored:mt-10", " B 378 THR:1.00:17.0:309.4::::Favored:m", ] for idx, val in zip(line_indices, line_values) : assert (output_lines[idx] == val), (idx, output_lines[idx]) regression_pdb = libtbx.env.find_in_repositories( relative_path="phenix_regression/pdb/pdb1jxt.ent", test=os.path.isfile) if (regression_pdb is None): print "Skipping exercise_ramalyze(): input pdb (pdb1jxt.ent) not available" return pdb_in = file_reader.any_file(file_name=regression_pdb) hierarchy = pdb_in.file_object.hierarchy pdb_io = pdb.input(file_name=regression_pdb) r = rotalyze.rotalyze( pdb_hierarchy=hierarchy, outliers_only=True) out = StringIO() r.show_old_output(out=out, verbose=False) output = out.getvalue().strip() assert output == "" r = rotalyze.rotalyze( pdb_hierarchy=hierarchy, outliers_only=False) for unpickle in [False, True] : if unpickle : r = loads(dumps(r)) out = StringIO() r.show_old_output(out=out, verbose=False) output = out.getvalue() assert not show_diff(output,"""\ A 1 THR:1.00:95.4:299.5::::Favored:m A 2 ATHR:0.67:49.5:56.1::::Favored:p A 2 BTHR:0.33:90.4:298.1::::Favored:m A 3 CYS:1.00:12.9:310.5::::Favored:m A 4 CYS:1.00:91.6:293.1::::Favored:m A 5 PRO:1.00:78.8:30.2:319.7:33.8::Favored:Cg_endo A 6 SER:1.00:90.1:68.4::::Favored:p A 7 AILE:0.45:49.6:290.8:178.2:::Favored:mt A 7 BILE:0.55:6.5:284.4:298.4:::Favored:mm A 8 AVAL:0.50:1.1:156.7::::Allowed:t A 8 BVAL:0.30:5.1:71.3::::Favored:p A 8 CVAL:0.20:69.8:172.1::::Favored:t A 10 AARG:0.65:24.7:176.8:66.5:63.9:180.0:Favored:tpp-160 A 10 BARG:0.35:17.5:176.8:72.8:66.4:171.9:Favored:tpp-160 A 11 SER:1.00:51.6:300.9::::Favored:m A 12 AASN:0.50:93.9:286.1:343.8:::Favored:m-40 A 12 BASN:0.50:98.9:288.4:337.6:::Favored:m-40 A 13 APHE:0.65:45.1:187.2:276.4:::Favored:t80 A 13 BPHE:0.35:86.1:179.6:263.1:::Favored:t80 A 14 ASN:1.00:95.2:289.6:333.0:::Favored:m-40 A 15 VAL:1.00:42.3:168.2::::Favored:t A 16 CYS:1.00:40.8:176.5::::Favored:t A 17 ARG:1.00:21.4:289.7:282.8:288.6:158.7:Favored:mmm160 A 18 LEU:1.00:65.0:287.2:173.3:::Favored:mt A 19 PRO:1.00:43.6:24.4:324.8:31.6::Favored:Cg_endo A 21 THR:1.00:5.7:314.0::::Favored:m A 22 APRO:0.55:87.5:333.5:34.0:333.8::Favored:Cg_exo A 23 AGLU:0.50:86.9:290.9:187.1:341.8::Favored:mt-10 A 23 BGLU:0.50:91.7:292.0:183.8:339.2::Favored:mt-10 A 25 ALEU:0.50:95.7:294.4:173.6:::Favored:mt A 26 CYS:1.00:83.0:295.0::::Favored:m A 28 THR:1.00:29.6:52.9::::Favored:p A 29 ATYR:0.65:18.5:161.8:67.8:::Favored:t80 A 29 BTYR:0.35:0.4:191.3:322.7:::Allowed:t80 A 30 ATHR:0.70:60.8:57.4::::Favored:p A 30 BTHR:0.30:6.6:78.1::::Favored:p A 32 CYS:1.00:61.4:301.7::::Favored:m A 33 ILE:1.00:36.6:66.5:173.4:::Favored:pt A 34 AILE:0.70:60.9:303.6:167.6:::Favored:mt A 34 BILE:0.30:31.4:308.5:296.8:::Favored:mm A 35 ILE:1.00:45.6:62.4:170.0:::Favored:pt A 36 PRO:1.00:36.2:22.5:330.5:24.8::Favored:Cg_endo A 39 ATHR:0.70:14.0:311.0::::Favored:m A 39 BTHR:0.30:13.1:288.8::::Favored:m A 40 CYS:1.00:81.4:294.4::::Favored:m A 41 PRO:1.00:35.4:34.4:317.5:33.1::Favored:Cg_endo A 43 AASP:0.75:24.8:56.5:340.3:::Favored:p0 A 43 BASP:0.25:43.2:59.6:349.3:::Favored:p0 A 44 TYR:1.00:85.3:290.9:85.1:::Favored:m-80 A 46 ASN:1.00:38.7:301.6:117.9:::Favored:m110 """)