def tst_01(): print "Comparing mixed model with target..." import iotbx.pdb from cctbx.array_family import flex model_pdb_inp=iotbx.pdb.input(source_info='model', lines=flex.split_lines(model)) crystal_symmetry=model_pdb_inp.crystal_symmetry() model_hierarchy=model_pdb_inp.construct_hierarchy() query_hierarchy=iotbx.pdb.input(source_info='query', lines=flex.split_lines(query)).construct_hierarchy() f=StringIO() r=run(crystal_symmetry=crystal_symmetry, chain_hierarchy=query_hierarchy,target_hierarchy=model_hierarchy,out=f) expected_text=""" Residues matching in forward direction: 16 RMSD: 1.45 Residues matching in reverse direction: 31 RMSD: 1.40 Residues near but not matching one-to-one: 12 RMSD: 1.87 Residues far from target: 2 RMSD: 2.04 """ found_text="\n".join(f.getvalue().splitlines()[-4:]) if remove_blank(found_text)!=remove_blank(expected_text): print "Expected: \n%s \nFound: \n%s" %(expected_text,found_text) raise AssertionError, "FAILED" print "OK"
def exercise_columns_73_76_evaluator(pdb_file_names): if (pdb_file_names is None): print("Skipping exercise_columns_73_76_evaluator():" \ " input files not available") return known_blank = """\ occ_3_bad2.pdb enk_gm.pdb t.pdb phe_a.pdb f_obs_complex.pdb phe_h_bad.pdb one_conf_but_altloc.pdb """.splitlines() known_exactly_one = """\ pdb103l.ent pdb1etn.ent pdb118d.ent pdb161d.ent pdb139l.ent pdb1anp.ent pdb1gky.ent """.splitlines() n_known = [0, 0] for file_name in pdb_file_names: with open(file_name) as f: raw_lines = f.read() lines = flex.split_lines(raw_lines) e = pdb.columns_73_76_evaluator(lines=lines) bn = os.path.basename(file_name) if (bn in known_blank): assert e.finding == "Blank columns 73-76 on ATOM and HETATM records." assert not e.is_old_style n_known[0] += 1 elif (bn in known_exactly_one): assert e.finding == "Exactly one common label in columns 73-76." assert e.is_old_style n_known[1] += 1 assert n_known[0] >= 3 assert n_known[1] >= 3 # lines = flex.split_lines("""\ HEADER HYDROLASE(METALLOPROTEINASE) 17-NOV-93 1THL ATOM 1 N ILE 1 9.581 51.813 -0.720 1.00 31.90 1THL 158 ATOM 2 CA ILE 1 8.335 52.235 -0.041 1.00 52.95 1THL 159 ATOM 3 C ILE 1 7.959 53.741 0.036 1.00 26.88 1THL 160 END """) e = pdb.columns_73_76_evaluator(lines=lines) assert e.finding == "Exactly one common label in columns 73-76." assert e.is_old_style
def exercise_columns_73_76_evaluator(pdb_file_names): if (pdb_file_names is None): print "Skipping exercise_columns_73_76_evaluator():" \ " input files not available" return known_blank = """\ occ_3_bad2.pdb enk_gm.pdb t.pdb phe_a.pdb f_obs_complex.pdb phe_h_bad.pdb one_conf_but_altloc.pdb """.splitlines() known_exactly_one = """\ pdb103l.ent pdb1etn.ent pdb118d.ent pdb161d.ent pdb139l.ent pdb1anp.ent pdb1gky.ent """.splitlines() n_known = [0, 0] for file_name in pdb_file_names: lines = flex.split_lines(open(file_name).read()) e = pdb.columns_73_76_evaluator(lines=lines) bn = os.path.basename(file_name) if (bn in known_blank): assert e.finding == "Blank columns 73-76 on ATOM and HETATM records." assert not e.is_old_style n_known[0] += 1 elif (bn in known_exactly_one): assert e.finding == "Exactly one common label in columns 73-76." assert e.is_old_style n_known[1] += 1 assert n_known[0] >= 3 assert n_known[1] >= 3 # lines = flex.split_lines("""\ HEADER HYDROLASE(METALLOPROTEINASE) 17-NOV-93 1THL ATOM 1 N ILE 1 9.581 51.813 -0.720 1.00 31.90 1THL 158 ATOM 2 CA ILE 1 8.335 52.235 -0.041 1.00 52.95 1THL 159 ATOM 3 C ILE 1 7.959 53.741 0.036 1.00 26.88 1THL 160 END """) e = pdb.columns_73_76_evaluator(lines=lines) assert e.finding == "Exactly one common label in columns 73-76." assert e.is_old_style
def __init__(self, file_object=None, file_name=None): assert [file_object, file_name].count(None) == 1 if (file_object is None): from libtbx import smart_open file_object = smart_open.for_reading(file_name=file_name) from cctbx.array_family import flex super(reader, self).__init__(lines=flex.split_lines(file_object.read()))
def tst_02(): print "Regularizing not allowing insertions...", import iotbx.pdb from cctbx.array_family import flex hierarchy = iotbx.pdb.input( source_info='text', lines=flex.split_lines(text)).construct_hierarchy() r = replace_with_segments_from_pdb(args=['alpha.allow_insertions=false'], pdb_hierarchy=hierarchy, out=null_out()) expected_text = """ ID: 1 ChainID: 'U' RMSD: 1.28 A (n=25) Junction RMSD: 0.68 A (n=8) Complete: True Insertions/deletions: False Input model start: 111 end: 135 length: 25 Replacement start: 111 end: 135 length: 25 """ f = StringIO() for rss in r.model_replacement_segment_summaries: rss.show_summary(out=f) found_text = f.getvalue() if remove_blank(found_text) != remove_blank(expected_text): print "Expected: \n%s \nFound: \n%s" % (expected_text, found_text) raise AssertionError, "FAILED" print "OK"
def tst_02(): print "Regularizing not allowing insertions...", import iotbx.pdb from cctbx.array_family import flex hierarchy=iotbx.pdb.input(source_info='text', lines=flex.split_lines(text)).construct_hierarchy() r=replace_with_segments_from_pdb(args=['alpha.allow_insertions=false'], pdb_hierarchy=hierarchy, out=null_out()) expected_text=""" ID: 1 ChainID: 'U' RMSD: 1.29 A (n=24) Junction RMSD: 0.69 A (n=6) Complete: False Insertions/deletions: False Input model start: 111 end: 135 length: 25 Replacement start: 112 end: 135 length: 24 """ f=StringIO() for rss in r.model_replacement_segment_summaries: rss.show_summary(out=f) found_text=f.getvalue() if remove_blank(found_text)!=remove_blank(expected_text): print "Expected: \n%s \nFound: \n%s" %(expected_text,found_text) raise AssertionError, "FAILED" print "OK"
def get_pdb_hierarchy_from_restraints(code): from mmtbx.monomer_library import server from iotbx import pdb mon_lib_server = server.server() path = mon_lib_server.get_comp_comp_id_direct(code, return_filename=True) cif_obj = server.read_cif(path) ligand_inp = pdb.pdb_input(source_info="Model from %s" % path, lines=flex.split_lines("")) ligand_hierarchy = ligand_inp.construct_hierarchy() model = pdb.hierarchy.model() chain = pdb.hierarchy.chain() chain.id = 'Z' rg = pdb.hierarchy.residue_group() ag = pdb.hierarchy.atom_group() for block, loops in cif_obj.blocks.items(): if block == 'comp_list': continue for loop in loops.iterloops(): for row in loop.iterrows(): if '_chem_comp_atom.comp_id' not in row: break ag.resname = row['_chem_comp_atom.comp_id'] atom = pdb.hierarchy.atom() atom.name = row['_chem_comp_atom.atom_id'] atom.element = '%2s' % row['_chem_comp_atom.type_symbol'] atom.xyz = ( float(row['_chem_comp_atom.x']), float(row['_chem_comp_atom.y']), float(row['_chem_comp_atom.z']), ) ag.append_atom(atom) rg.append_atom_group(ag) chain.append_residue_group(rg) model.append_chain(chain) ligand_hierarchy.append_model(model) ligand_hierarchy.atoms().reset_i_seq() return ligand_hierarchy
def exercise_1(mon_lib_srv, ener_lib): f = open("1.pdb", "w") f.write(pdb_str_1) f.close() log = StringIO() dihedral_proxies = utils.get_complete_dihedral_proxies( raw_records=pdb_str_1) assert len(dihedral_proxies) == 54, \ "Expected 54, got %d" % len(dihedral_proxies) # default run (1 residue is out of NCS) params = mmtbx.model.manager.get_default_pdb_interpretation_params() params.pdb_interpretation.ncs_search.enabled=True pdb_inp = iotbx.pdb.input(source_info=None, lines=flex.split_lines(pdb_str_1)) model = mmtbx.model.manager( model_input = pdb_inp, process_input=True, build_grm=True, pdb_interpretation_params = params) ncs_manager = torsion_ncs.torsion_ncs( model = model, log=log) nprox = ncs_manager.get_n_proxies() assert nprox == 28, "got %d instead of 28" % nprox # supply full NCS cuspars = iotbx.phil.parse(""" pdb_interpretation.ncs_search.enabled=True pdb_interpretation.ncs_group { reference = (chain A ) selection = (chain B ) } """) params = mmtbx.model.manager.get_default_pdb_interpretation_scope() p = params.fetch(cuspars).extract() pdb_inp = iotbx.pdb.input(source_info=None, lines=flex.split_lines(pdb_str_1)) model = mmtbx.model.manager( model_input = pdb_inp, process_input=True, build_grm=True, pdb_interpretation_params = p) ncs_manager = torsion_ncs.torsion_ncs( model = model, log=log) nprox = ncs_manager.get_n_proxies() assert nprox == 40, "got %d instead of 40" % nprox
def exercise_1(mon_lib_srv, ener_lib): f = open("1.pdb", "w") f.write(pdb_str_1) f.close() log = cStringIO.StringIO() dihedral_proxies = utils.get_complete_dihedral_proxies( raw_records=pdb_str_1) assert len(dihedral_proxies) == 54, \ "Expected 54, got %d" % len(dihedral_proxies) # default run (1 residue is out of NCS) params = pdb_interpretation.master_params.extract() params.ncs_search.enabled=True ppf = pdb_interpretation.process( mon_lib_srv=mon_lib_srv, ener_lib=ener_lib, params=params, raw_records=flex.split_lines(pdb_str_1)) ncs_manager = torsion_ncs.torsion_ncs( processed_pdb_file=ppf, ncs_obj=ppf.ncs_obj, log=log) nprox = ncs_manager.get_n_proxies() assert nprox == 28, "got %d instead of 28" % nprox # supply full NCS cuspars = iotbx.phil.parse(""" ncs_search.enabled=True ncs_group { reference = (chain A ) selection = (chain B ) } """) params = pdb_interpretation.master_params p = params.fetch(cuspars).extract() ppf = pdb_interpretation.process( mon_lib_srv=mon_lib_srv, ener_lib=ener_lib, params=p, raw_records=flex.split_lines(pdb_str_1)) ncs_manager = torsion_ncs.torsion_ncs( processed_pdb_file=ppf, ncs_obj=ppf.ncs_obj, log=log) nprox = ncs_manager.get_n_proxies() assert nprox == 40, "got %d instead of 40" % nprox
def tst_02(): print("Comparing mixed model with target with 2 chains...") import iotbx.pdb from cctbx.array_family import flex model_pdb_inp = iotbx.pdb.input(source_info='model', lines=flex.split_lines(model1)) crystal_symmetry = model_pdb_inp.crystal_symmetry() model_hierarchy = model_pdb_inp.construct_hierarchy() query_hierarchy = iotbx.pdb.input( source_info='query', lines=flex.split_lines(query)).construct_hierarchy() f = StringIO() r = run(crystal_symmetry=crystal_symmetry, chain_hierarchy=query_hierarchy, target_hierarchy=model_hierarchy, out=f) expected_text = """ SEQ SCORE is fraction (close and matching target sequence). MEAN LENGTH is the mean length of contiguous segments in the match with target sequence. (Each gap/reverse of direction starts new segment). ----ALL RESIDUES--- CLOSE RESIDUES ONLY % MODEL --CLOSE- --FAR-- FORWARD REVERSE MIXED FOUND CA SEQ RMSD N N N N N SCORE SEQ MATCH(%) SCORE MEAN LENGTH Unique_target 1.55 54 7 14 29 11 39.7 0.26 9.3 0.04 6.0""" found_text = "\n".join(f.getvalue().splitlines()[-10:]) if remove_blank(found_text) != remove_blank(expected_text): print("\n\nExpected: \n%s \n\nFound: \n%s" % (expected_text, found_text)) raise AssertionError("FAILED") from libtbx.test_utils import approx_equal print(r.get_values("forward")) assert approx_equal(r.get_values("forward"), (1.6751069901864204, 14)) print(r.get_values("reverse")) assert approx_equal(r.get_values("reverse"), (1.388466550576198, 29)) print(r.get_values("close")) assert approx_equal(r.get_values("close"), (1.545835235099158, 54)) print(r.get_values("all_far")) assert approx_equal(r.get_values("all_far"), (0, 0)) print("OK")
def tst_03(): print("Comparing mixed model with target with 2 chains...as group") import iotbx.pdb from cctbx.array_family import flex model_pdb_inp = iotbx.pdb.input(source_info='model', lines=flex.split_lines(model1)) crystal_symmetry = model_pdb_inp.crystal_symmetry() model_hierarchy = model_pdb_inp.construct_hierarchy() query_hierarchy = iotbx.pdb.input( source_info='query', lines=flex.split_lines(query)).construct_hierarchy() import os if not os.path.isdir("files"): os.mkdir("files") ff = open(os.path.join("files", "query.pdb"), 'w') print("CRYST1 113.949 113.949 32.474 90.00 90.00 90.00 I 4", file=ff) print(query_hierarchy.as_pdb_string(), file=ff) ff.close() ff = open("model.pdb", 'w') print(model_hierarchy.as_pdb_string(), file=ff) ff.close() f = StringIO() args = ["query_dir=files", "model.pdb"] r = run(args, out=f) expected_text = """ SEQ SCORE is fraction (close and matching target sequence). MEAN LENGTH is the mean length of contiguous segments in the match with target sequence. (Each gap/reverse of direction starts new segment). ----ALL RESIDUES--- CLOSE RESIDUES ONLY % MODEL --CLOSE- --FAR-- FORWARD REVERSE MIXED FOUND CA SEQ RMSD N N N N N SCORE SEQ MATCH(%) SCORE MEAN LENGTH query.pdb 1.55 54 7 14 29 11 39.7 0.26 9.3 0.04 6.0""" found_text = "\n".join(f.getvalue().splitlines()[-10:]) if remove_blank(found_text) != remove_blank(expected_text): print("Expected: \n%s \nFound: \n%s" % (expected_text, found_text)) raise AssertionError("FAILED") print("OK")
def exercise_1(mon_lib_srv, ener_lib): f = open("1.pdb", "w") f.write(pdb_str_1) f.close() log = cStringIO.StringIO() dihedral_proxies = utils.get_complete_dihedral_proxies( raw_records=pdb_str_1) assert len(dihedral_proxies) == 54, \ "Expected 54, got %d" % len(dihedral_proxies) # default run (1 residue is out of NCS) params = pdb_interpretation.master_params.extract() params.ncs_search.enabled = True ppf = pdb_interpretation.process(mon_lib_srv=mon_lib_srv, ener_lib=ener_lib, params=params, raw_records=flex.split_lines(pdb_str_1)) ncs_manager = torsion_ncs.torsion_ncs(processed_pdb_file=ppf, ncs_obj=ppf.ncs_obj, log=log) nprox = ncs_manager.get_n_proxies() assert nprox == 28, "got %d instead of 28" % nprox # supply full NCS cuspars = iotbx.phil.parse(""" ncs_search.enabled=True ncs_group { reference = (chain A ) selection = (chain B ) } """) params = pdb_interpretation.master_params p = params.fetch(cuspars).extract() ppf = pdb_interpretation.process(mon_lib_srv=mon_lib_srv, ener_lib=ener_lib, params=p, raw_records=flex.split_lines(pdb_str_1)) ncs_manager = torsion_ncs.torsion_ncs(processed_pdb_file=ppf, ncs_obj=ppf.ncs_obj, log=log) nprox = ncs_manager.get_n_proxies() assert nprox == 40, "got %d instead of 40" % nprox
def tst_02(): print "Comparing mixed model with target with 2 chains..." import iotbx.pdb from cctbx.array_family import flex model_pdb_inp=iotbx.pdb.input(source_info='model', lines=flex.split_lines(model1)) crystal_symmetry=model_pdb_inp.crystal_symmetry() model_hierarchy=model_pdb_inp.construct_hierarchy() query_hierarchy=iotbx.pdb.input(source_info='query', lines=flex.split_lines(query)).construct_hierarchy() f=StringIO() r=run(crystal_symmetry=crystal_symmetry, chain_hierarchy=query_hierarchy,target_hierarchy=model_hierarchy,out=f) expected_text=""" Space group: I 4 Unit cell: 113.95 113.95 32.47 90.00 90.00 90.00 Looking for chain similarity for None (61 residues) in the model None (272 residues) Residues matching in forward direction: 16 RMSD: 1.45 Residues matching in reverse direction: 31 RMSD: 1.40 Residues near but not matching one-to-one: 12 RMSD: 1.87 All residues near target: 59 RMSD: 1.52 Seq match (%): 6.8 % Found: 43.4 Residues far from target: 2 RMSD: 3.31 """ found_text="\n".join(f.getvalue().splitlines()[-10:]) if remove_blank(found_text)!=remove_blank(expected_text): print "Expected: \n%s \nFound: \n%s" %(expected_text,found_text) raise AssertionError, "FAILED" from libtbx.test_utils import approx_equal print r.get_values("forward") assert approx_equal(r.get_values("forward"),(1.4473857036049544, 16)) print r.get_values("reverse") assert approx_equal(r.get_values("reverse"),(1.3969610738798282, 31)) print r.get_values("close") assert approx_equal(r.get_values("close"),(1.5184018499613678, 59)) print r.get_values("all_far") assert approx_equal(r.get_values("all_far"),(0,0)) print "OK"
def tst_05(): from mmtbx.validation.chain_comparison import \ extract_unique_part_of_hierarchy as euph print("Testing extraction of unique part and unique matching") for m in [modela,modelb,modelaa,modelaab,modelaabaab]: import iotbx.pdb from cctbx.array_family import flex model_pdb_inp=iotbx.pdb.input(source_info='model', lines=flex.split_lines(m)) crystal_symmetry=model_pdb_inp.crystal_symmetry() model_hierarchy=model_pdb_inp.construct_hierarchy() print("\nExtraction of unique MODEL with %s residues" %( model_hierarchy.overall_counts().n_residues)) query_hierarchy=iotbx.pdb.input(source_info='query', lines=flex.split_lines(target)).construct_hierarchy() unique_hierarchy=euph(model_hierarchy,target_ph=query_hierarchy) print("FINAL chain ids: %s \n" %(" ".join(unique_hierarchy.chain_ids()))) print("OK")
def tst_03(): print "Comparing mixed model with target with 2 chains...as group" import iotbx.pdb from cctbx.array_family import flex model_pdb_inp=iotbx.pdb.input(source_info='model', lines=flex.split_lines(model1)) crystal_symmetry=model_pdb_inp.crystal_symmetry() model_hierarchy=model_pdb_inp.construct_hierarchy() query_hierarchy=iotbx.pdb.input(source_info='query', lines=flex.split_lines(query)).construct_hierarchy() import os,shutil if not os.path.isdir("files"): os.mkdir("files") ff=open(os.path.join("files","query.pdb"),'w') print >>ff,"CRYST1 113.949 113.949 32.474 90.00 90.00 90.00 I 4" print >>ff,query_hierarchy.as_pdb_string() ff.close() ff=open("model.pdb",'w') print >>ff,model_hierarchy.as_pdb_string() ff.close() f=StringIO() args=["query_dir=files","model.pdb"] r=run(args,out=f) expected_text=""" SEQ SCORE is fraction (close and matching target sequence). ----ALL RESIDUES---- CLOSE RESIDUES ONLY % MODEL --CLOSE- ---FAR-- FORWARD REVERSE MIXED FOUND CA SEQ RMSD N RMSD N N N N SCORE SEQ MATCH(%) SCORE query.pdb 1.52 59 3.3 2 16 31 12 43.4 0.29 6.8 0.03 """ found_text="\n".join(f.getvalue().splitlines()[-10:]) if remove_blank(found_text)!=remove_blank(expected_text): print "Expected: \n%s \nFound: \n%s" %(expected_text,found_text) raise AssertionError, "FAILED" print "OK"
def exercise_BIOMT(): ''' Verifying BIOMT extraction from pdb file ''' pdb_test_data = '''\ REMARK 300 BIOMOLECULE: 1 REMARK 300 SEE REMARK 350 FOR THE AUTHOR PROVIDED AND/OR PROGRAM REMARK 300 GENERATED ASSEMBLY INFORMATION FOR THE STRUCTURE IN REMARK 300 THIS ENTRY. THE REMARK MAY ALSO PROVIDE INFORMATION ON REMARK 300 BURIED SURFACE AREA. REMARK 300 DETAILS: THE ASSEMBLY REPRESENTED IN THIS ENTRY HAS REGULAR REMARK 300 ICOSAHEDRAL POINT SYMMETRY (SCHOENFLIES SYMBOL = I). REMARK 350 REMARK 350 GENERATING THE BIOMOLECULE REMARK 350 COORDINATES FOR A COMPLETE MULTIMER REPRESENTING THE KNOWN REMARK 350 BIOLOGICALLY SIGNIFICANT OLIGOMERIZATION STATE OF THE REMARK 350 MOLECULE CAN BE GENERATED BY APPLYING BIOMT TRANSFORMATIONS REMARK 350 GIVEN BELOW. BOTH NON-CRYSTALLOGRAPHIC AND REMARK 350 CRYSTALLOGRAPHIC OPERATIONS ARE GIVEN. REMARK 350 REMARK 350 BIOMOLECULE: 1 REMARK 350 APPLY THE FOLLOWING TO CHAINS: L, S REMARK 350 BIOMT1 1 1.000000 0.000000 0.000000 0.00000 REMARK 350 BIOMT2 1 0.000000 1.000000 0.000000 0.00000 REMARK 350 BIOMT3 1 0.000000 0.000000 1.000000 0.00000 REMARK 350 BIOMT1 2 0.309017 -0.809017 0.500000 0.10000 REMARK 350 BIOMT2 2 0.809017 0.500000 0.309017 0.02000 REMARK 350 BIOMT3 2 -0.500000 0.309017 0.809017 0.00300 REMARK 350 BIOMT1 3 -0.809017 -0.500000 0.309017 0.00000 REMARK 350 BIOMT2 3 0.500000 -0.309017 0.809017 0.00000 REMARK 350 BIOMT3 3 -0.309017 0.809017 0.500000 0.00000 REMARK 350 BIOMT1 4 -0.809017 0.500000 -0.309017 0.00000 REMARK 350 BIOMT2 4 -0.500000 -0.309017 0.809017 0.00000 REMARK 350 BIOMT3 4 0.309017 0.809017 0.500000 0.00000 ''' pdb_inp = pdb.pdb_input( source_info=None, lines=flex.split_lines(pdb_test_data)) mtrix_info = pdb_inp.process_BIOMT_records() assert len(mtrix_info.r) == 4 assert approx_equal(mtrix_info.r[0], [ 1.000000,0.000000,0.000000, 0.000000,1.000000,0.000000, 0.000000,0.000000,1.000000]) assert approx_equal(mtrix_info.t[0], [0.00000,0.00000,0.00000]) assert approx_equal(mtrix_info.r[1], [ 0.309017,-0.809017, 0.500000, 0.809017, 0.500000, 0.309017, -0.500000, 0.309017, 0.809017]) assert approx_equal(mtrix_info.t[1], [0.10000,0.02000,0.00300]) assert mtrix_info.serial_number == [1, 2, 3, 4]
def exercise_line_info_exceptions(): pdb.pdb_input(source_info=None, lines=flex.std_string(["ATOM"])) # try: pdb.pdb_input( source_info="some.pdb", lines=flex.split_lines("""\ HETATM 9 2H3 MPR B 5 16.388 0.289 6.613 1.00 0.08 ANISOU 9 2H3 MPR B 5 8+8 848 848 0 0 0 """)) except ValueError, e: assert not show_diff(str(e), """\ some.pdb, line 2: ANISOU 9 2H3 MPR B 5 8+8 848 848 0 0 0 ---------------------------------^ unexpected plus sign.""")
def exercise_line_info_exceptions(): pdb.pdb_input(source_info=None, lines=flex.std_string(["ATOM"])) # try: pdb.pdb_input(source_info="some.pdb", lines=flex.split_lines("""\ HETATM 9 2H3 MPR B 5 16.388 0.289 6.613 1.00 0.08 ANISOU 9 2H3 MPR B 5 8+8 848 848 0 0 0 """)) except ValueError, e: assert not show_diff( str(e), """\ some.pdb, line 2: ANISOU 9 2H3 MPR B 5 8+8 848 848 0 0 0 ---------------------------------^ unexpected plus sign.""")
def exercise_BIOMT(): """ Verifying BIOMT extraction from pdb file """ pdb_test_data = """\ REMARK 300 BIOMOLECULE: 1 REMARK 300 SEE REMARK 350 FOR THE AUTHOR PROVIDED AND/OR PROGRAM REMARK 300 GENERATED ASSEMBLY INFORMATION FOR THE STRUCTURE IN REMARK 300 THIS ENTRY. THE REMARK MAY ALSO PROVIDE INFORMATION ON REMARK 300 BURIED SURFACE AREA. REMARK 300 DETAILS: THE ASSEMBLY REPRESENTED IN THIS ENTRY HAS REGULAR REMARK 300 ICOSAHEDRAL POINT SYMMETRY (SCHOENFLIES SYMBOL = I). REMARK 350 REMARK 350 GENERATING THE BIOMOLECULE REMARK 350 COORDINATES FOR A COMPLETE MULTIMER REPRESENTING THE KNOWN REMARK 350 BIOLOGICALLY SIGNIFICANT OLIGOMERIZATION STATE OF THE REMARK 350 MOLECULE CAN BE GENERATED BY APPLYING BIOMT TRANSFORMATIONS REMARK 350 GIVEN BELOW. BOTH NON-CRYSTALLOGRAPHIC AND REMARK 350 CRYSTALLOGRAPHIC OPERATIONS ARE GIVEN. REMARK 350 REMARK 350 BIOMOLECULE: 1 REMARK 350 APPLY THE FOLLOWING TO CHAINS: L, S REMARK 350 BIOMT1 1 1.000000 0.000000 0.000000 0.00000 REMARK 350 BIOMT2 1 0.000000 1.000000 0.000000 0.00000 REMARK 350 BIOMT3 1 0.000000 0.000000 1.000000 0.00000 REMARK 350 BIOMT1 2 0.309017 -0.809017 0.500000 0.10000 REMARK 350 BIOMT2 2 0.809017 0.500000 0.309017 0.02000 REMARK 350 BIOMT3 2 -0.500000 0.309017 0.809017 0.00300 REMARK 350 BIOMT1 3 -0.809017 -0.500000 0.309017 0.00000 REMARK 350 BIOMT2 3 0.500000 -0.309017 0.809017 0.00000 REMARK 350 BIOMT3 3 -0.309017 0.809017 0.500000 0.00000 REMARK 350 BIOMT1 4 -0.809017 0.500000 -0.309017 0.00000 REMARK 350 BIOMT2 4 -0.500000 -0.309017 0.809017 0.00000 REMARK 350 BIOMT3 4 0.309017 0.809017 0.500000 0.00000 """ pdb_inp = pdb.pdb_input(source_info=None, lines=flex.split_lines(pdb_test_data)) mtrix_info = pdb_inp.process_BIOMT_records() assert len(mtrix_info.r) == 4 assert approx_equal( mtrix_info.r[0], [1.000000, 0.000000, 0.000000, 0.000000, 1.000000, 0.000000, 0.000000, 0.000000, 1.000000] ) assert approx_equal(mtrix_info.t[0], [0.00000, 0.00000, 0.00000]) assert approx_equal( mtrix_info.r[1], [0.309017, -0.809017, 0.500000, 0.809017, 0.500000, 0.309017, -0.500000, 0.309017, 0.809017] ) assert approx_equal(mtrix_info.t[1], [0.10000, 0.02000, 0.00300]) assert mtrix_info.serial_number == [1, 2, 3, 4]
def get_complete_dihedral_proxies( pdb_hierarchy=None, file_name=None, raw_records=None, mon_lib_srv=None, ener_lib=None, crystal_symmetry=None, log=None): # # This function is called only for reference files, that were not processed # yet. For the main file only get_dihedrals_and_phi_psi below is called. # assert [pdb_hierarchy, file_name, raw_records].count(None) == 2 from mmtbx.monomer_library import server, pdb_interpretation import cStringIO if log is None: log = sys.stdout if mon_lib_srv is None: mon_lib_srv = server.server() if ener_lib is None: ener_lib = server.ener_lib() if pdb_hierarchy is not None: raw_records = pdb_hierarchy.as_pdb_string() if raw_records is not None: if (isinstance(raw_records, str)): raw_records = flex.split_lines(raw_records) work_params = pdb_interpretation.master_params.extract() work_params.c_beta_restraints=False work_params.automatic_linking.link_none=True work_params.clash_guard.nonbonded_distance_threshold = None processed_pdb_file_local = \ pdb_interpretation.process( mon_lib_srv=mon_lib_srv, ener_lib=ener_lib, params=work_params, file_name=file_name, raw_records=raw_records, strict_conflict_handling=False, crystal_symmetry=crystal_symmetry, force_symmetry=True, log=cStringIO.StringIO(), substitute_non_crystallographic_unit_cell_if_necessary=True) return get_dihedrals_and_phi_psi(processed_pdb_file_local)
def get_complete_dihedral_proxies( pdb_hierarchy=None, file_name=None, raw_records=None, mon_lib_srv=None, ener_lib=None, crystal_symmetry=None, restraint_objects=None, monomer_parameters=None, log=None): # # This function is called only for reference files, that were not processed # yet. For the main file only get_dihedrals_and_phi_psi below is called. # Still used for reference model torsion restraints # import mmtbx.model assert [pdb_hierarchy, file_name, raw_records].count(None) == 2 from mmtbx.monomer_library import server, pdb_interpretation if log is None: log = sys.stdout if mon_lib_srv is None: mon_lib_srv = server.server() if ener_lib is None: ener_lib = server.ener_lib() if pdb_hierarchy is not None: raw_records = pdb_hierarchy.as_pdb_string() if raw_records is not None: if (isinstance(raw_records, str)): raw_records = flex.split_lines(raw_records) work_params = mmtbx.model.manager.get_default_pdb_interpretation_params() work_params.pdb_interpretation.c_beta_restraints=False work_params.pdb_interpretation.automatic_linking.link_none=True work_params.pdb_interpretation.clash_guard.nonbonded_distance_threshold = None pdb_inp = input(lines=raw_records, source_info=None) model = mmtbx.model.manager( model_input = pdb_inp, build_grm = True, pdb_interpretation_params=work_params, restraint_objects=restraint_objects, monomer_parameters=monomer_parameters, log=null_out()) return get_dihedrals_and_phi_psi(model)
def get_complete_dihedral_proxies(pdb_hierarchy=None, file_name=None, raw_records=None, mon_lib_srv=None, ener_lib=None, crystal_symmetry=None, log=None): # # This function is called only for reference files, that were not processed # yet. For the main file only get_dihedrals_and_phi_psi below is called. # Still used for reference model torsion restraints # assert [pdb_hierarchy, file_name, raw_records].count(None) == 2 from mmtbx.monomer_library import server, pdb_interpretation import cStringIO if log is None: log = sys.stdout if mon_lib_srv is None: mon_lib_srv = server.server() if ener_lib is None: ener_lib = server.ener_lib() if pdb_hierarchy is not None: raw_records = pdb_hierarchy.as_pdb_string() if raw_records is not None: if (isinstance(raw_records, str)): raw_records = flex.split_lines(raw_records) work_params = pdb_interpretation.master_params.extract() work_params.c_beta_restraints = False work_params.automatic_linking.link_none = True work_params.clash_guard.nonbonded_distance_threshold = None processed_pdb_file_local = \ pdb_interpretation.process( mon_lib_srv=mon_lib_srv, ener_lib=ener_lib, params=work_params, file_name=file_name, raw_records=raw_records, strict_conflict_handling=False, crystal_symmetry=crystal_symmetry, force_symmetry=True, log=cStringIO.StringIO(), substitute_non_crystallographic_unit_cell_if_necessary=True) return get_dihedrals_and_phi_psi(processed_pdb_file_local)
def tst_06(): print "Comparing mixed model with target with 2 chains...using ncs" import iotbx.pdb from cctbx.array_family import flex model_pdb_inp = iotbx.pdb.input(source_info='model', lines=flex.split_lines(model1)) crystal_symmetry = model_pdb_inp.crystal_symmetry() f = open('ncs.ncs_spec', 'w') print >> f, ncs_spec f.close() f = open('model.pdb', 'w') print >> f, model1 #model_hierarchy.as_pdb_string() f.close() f = open('query.pdb', 'w') print >> f, query #query_hierarchy.as_pdb_string() f.close() f = StringIO() args = ['model.pdb', 'query.pdb', 'ncs_file=ncs.ncs_spec'] r = run(args, out=f) expected_text = """ SEQ SCORE is fraction (close and matching target sequence). ----ALL RESIDUES--- CLOSE RESIDUES ONLY % MODEL --CLOSE- --FAR-- FORWARD REVERSE MIXED FOUND CA SEQ RMSD N N N N N SCORE SEQ MATCH(%) SCORE Unique_target 1.67 58 64 15 29 14 42.6 0.25 8.6 0.04 """ found_text = "\n".join(f.getvalue().splitlines()[-10:]) if remove_blank(found_text) != remove_blank(expected_text): print "\n\nExpected: \n%s \n\nFound: \n%s" % (expected_text, found_text) raise AssertionError, "FAILED" print "OK"
def tst_06(): print("Comparing mixed model with target with 2 chains...using ncs") import iotbx.pdb from cctbx.array_family import flex model_pdb_inp = iotbx.pdb.input(source_info='model', lines=flex.split_lines(model1)) crystal_symmetry = model_pdb_inp.crystal_symmetry() f = open('ncs.ncs_spec', 'w') print(ncs_spec, file=f) f.close() f = open('model.pdb', 'w') print(model1, file=f) #model_hierarchy.as_pdb_string() f.close() f = open('query.pdb', 'w') print(query, file=f) #query_hierarchy.as_pdb_string() f.close() f = StringIO() args = ['model.pdb', 'query.pdb', 'ncs_file=ncs.ncs_spec'] r = run(args, out=f) expected_text = """ SEQ SCORE is fraction (close and matching target sequence). MEAN LENGTH is the mean length of contiguous segments in the match with target sequence. (Each gap/reverse of direction starts new segment). ----ALL RESIDUES--- CLOSE RESIDUES ONLY % MODEL --CLOSE- --FAR-- FORWARD REVERSE MIXED FOUND CA SEQ RMSD N N N N N SCORE SEQ MATCH(%) SCORE MEAN LENGTH Unique_target 1.67 58 64 15 29 14 42.6 0.25 8.6 0.04 4.5""" found_text = "\n".join(f.getvalue().splitlines()[-10:]) if remove_blank(found_text) != remove_blank(expected_text): print("\n\nExpected: \n%s \n\nFound: \n%s" % (expected_text, found_text)) raise AssertionError("FAILED") print("OK")
def __init__(self, pdb_h, xrs, target_map, grm=None, ncs_restraints_group_list=[], mon_lib_srv=None, rotamer_manager=None, ss_annotation=None, refine_ncs_operators=False, number_of_cycles=1, log=None): from mmtbx.refinement.geometry_minimization import add_rotamer_restraints from mmtbx.model_statistics import geometry_no_grm self.pdb_h = pdb_h self.xrs = xrs self.log = log self.cs = self.xrs.crystal_symmetry() print >> self.log, "Minimizing using reference map..." self.log.flush() self.grm = grm # create a new one # copy-paste from cctbx_project/mmtbx/refinement/geometry_minimization.py: # minimize_wrapper_for_ramachandran if self.grm is None: from mmtbx.monomer_library.pdb_interpretation import grand_master_phil_str from mmtbx.geometry_restraints import reference from mmtbx.command_line.geometry_minimization import \ get_geometry_restraints_manager from libtbx.utils import null_out from scitbx.array_family import flex import mmtbx.utils if self.log is None: self.log = null_out() params_line = grand_master_phil_str import iotbx.phil params = iotbx.phil.parse( input_string=params_line, process_includes=True).extract() params.pdb_interpretation.clash_guard.nonbonded_distance_threshold=None params.pdb_interpretation.peptide_link.ramachandran_restraints = True params.pdb_interpretation.peptide_link.oldfield.weight_scale=3 params.pdb_interpretation.peptide_link.oldfield.plot_cutoff=0.03 params.pdb_interpretation.nonbonded_weight = 500 params.pdb_interpretation.c_beta_restraints=True params.pdb_interpretation.max_reasonable_bond_distance = None params.pdb_interpretation.peptide_link.apply_peptide_plane = True params.pdb_interpretation.ncs_search.enabled = True params.pdb_interpretation.restraints_library.rdl = True processed_pdb_files_srv = mmtbx.utils.\ process_pdb_file_srv( crystal_symmetry= self.cs, pdb_interpretation_params = params.pdb_interpretation, stop_for_unknowns = False, log=self.log, cif_objects=None) processed_pdb_file, junk = processed_pdb_files_srv.\ process_pdb_files(raw_records=flex.split_lines(self.pdb_h.as_pdb_string())) mon_lib_srv = processed_pdb_files_srv.mon_lib_srv ener_lib = processed_pdb_files_srv.ener_lib ncs_restraints_group_list = [] if processed_pdb_file.ncs_obj is not None: ncs_restraints_group_list = processed_pdb_file.ncs_obj.get_ncs_restraints_group_list() grm = get_geometry_restraints_manager( processed_pdb_file, xrs, params=params) # dealing with SS if ss_annotation is not None: from mmtbx.secondary_structure import manager ss_manager = manager( pdb_hierarchy=self.pdb_h, geometry_restraints_manager=grm.geometry, sec_str_from_pdb_file=ss_annotation, params=None, mon_lib_srv=mon_lib_srv, verbose=-1, log=self.log) grm.geometry.set_secondary_structure_restraints( ss_manager=ss_manager, hierarchy=self.pdb_h, log=self.log) else: self.grm.geometry.pair_proxies( sites_cart=self.pdb_h.atoms().extract_xyz()) if self.grm.geometry.ramachandran_manager is not None: self.grm.geometry.ramachandran_manager.update_phi_psi_targets( sites_cart=self.pdb_h.atoms().extract_xyz()) ncs_groups=None if len(ncs_restraints_group_list) > 0: ncs_groups=ncs_restraints_group_list if rotamer_manager is None: from mmtbx.rotamer.rotamer_eval import RotamerEval rotamer_manager = RotamerEval(mon_lib_srv=mon_lib_srv) self.pdb_h.write_pdb_file(file_name="rsr_before_rot_fix.pdb", crystal_symmetry=self.xrs.crystal_symmetry()) # STOP() selection_real_space = xrs.backbone_selection() # selection_real_space = None import mmtbx.refinement.real_space.weight self.w = None for x in xrange(number_of_cycles): print >> self.log, " Updating rotamer restraints..." self.pdb_h, grm = add_rotamer_restraints( pdb_hierarchy = self.pdb_h, restraints_manager = grm, selection = None, sigma = 5, mode = "fix_outliers", accept_allowed = False, mon_lib_srv = mon_lib_srv, rotamer_manager = rotamer_manager) self.xrs = self.pdb_h.extract_xray_structure(crystal_symmetry=self.cs) self.pdb_h.write_pdb_file(file_name="rsr_after_rot_fix.pdb", crystal_symmetry=self.xrs.crystal_symmetry()) # if True: if ncs_restraints_group_list is None or len(ncs_restraints_group_list)==0: #No NCS if self.w is None: print >> self.log, " Determining weight..." self.log.flush() self.weight = mmtbx.refinement.real_space.weight.run( map_data = target_map, xray_structure = self.xrs, pdb_hierarchy = self.pdb_h, geometry_restraints_manager = grm, rms_bonds_limit = 0.015, rms_angles_limit = 1.0) # division is to put more weight onto restraints. Checked. Works. self.w = self.weight.weight/3.0 # self.w = self.weight.weight # self.w =2 # print >> self.log, self.w for s in self.weight.msg_strings: print >> self.log, s print >> self.log, " Minimizing..." print >> self.log, " with weight %f" % self.w self.log.flush() refine_object = simple( target_map = target_map, selection = None, max_iterations = 150, geometry_restraints_manager = grm.geometry, selection_real_space = selection_real_space, states_accumulator = None, ncs_groups = ncs_groups) refine_object.refine(weight = self.w, xray_structure = self.xrs) self.rmsd_bonds_final, self.rmsd_angles_final = refine_object.rmsds() print >> log, "RMSDS:", self.rmsd_bonds_final, self.rmsd_angles_final # print >> log, "sizes:", len(refine_object.sites_cart()), len(self.xrs.scatterers()) self.xrs=self.xrs.replace_sites_cart( new_sites=refine_object.sites_cart(), selection=None) # print >> log, "sizes", self.xrs.scatterers() else: # Yes NCS # copy-paste from macro_cycle_real_space.py import mmtbx.ncs.ncs_utils as nu nu.get_list_of_best_ncs_copy_map_correlation( ncs_groups = ncs_restraints_group_list, xray_structure = self.xrs, map_data = target_map, d_min = 3) if self.w is None: print >> self.log, " Determining weight... (NCS)", self.weight = mmtbx.refinement.real_space.weight.run( map_data = target_map, xray_structure = self.xrs,#.select(sel_master), pdb_hierarchy = self.pdb_h,#.select(sel_master), geometry_restraints_manager = grm, rms_bonds_limit = 0.01, rms_angles_limit = 1.0, ncs_groups = ncs_restraints_group_list) # division supposed to put more weight onto restraints. Need checking. self.w = self.weight.weight/3.0 for s in self.weight.msg_strings: print >> self.log, s print >> self.log, " Minimizing... (NCS)" actions = [[True, False], ] if refine_ncs_operators: actions = [[False, True], [True, False]] for action in actions: refine_sites, refine_transformations = action tfg_obj = mmtbx.refinement.minimization_ncs_constraints.\ target_function_and_grads_real_space( map_data = target_map, xray_structure = self.xrs, ncs_restraints_group_list = ncs_restraints_group_list, refine_selection = None, real_space_gradients_delta = 1, restraints_manager = grm, data_weight = self.w, refine_sites = refine_sites, refine_transformations = refine_transformations) minimized = mmtbx.refinement.minimization_ncs_constraints.lbfgs( target_and_grads_object = tfg_obj, xray_structure = self.xrs, ncs_restraints_group_list = ncs_restraints_group_list, refine_selection = None, finite_grad_differences_test = False, max_iterations = 100, refine_sites = refine_sites, refine_transformations = refine_transformations) self.xrs = tfg_obj.xray_structure # self.structure_monitor.update( # xray_structure = tfg_obj.xray_structure, # accept_as_is = True) self.pdb_h.adopt_xray_structure(self.xrs) # ms = geometry_no_grm( # pdb_hierarchy=self.pdb_h, # molprobity_scores=True) # print >> self.log, ms.format_molprobity_scores(prefix=" ") # print >> log, "pdb_h", self.pdb_h.atoms_size() self.pdb_h.write_pdb_file("after_map_min.pdb")
LINK S SO4 S 188 O2 SO4 S 188 LINK NZ LYS A 680 1.260 C4A PLP D 1 LYS-PLP """.splitlines()] expected_results = [ [[64], [65]], [[64], [66]], [[], []]] for link_record,expected in zip(link_records, expected_results): assert [list(sel) for sel in sel_cache.link_iselections(link_record)] \ == expected # hierarchy = pdb.input(source_info=None, lines=flex.split_lines("""\ CRYST1 21.937 4.866 23.477 90.00 107.08 90.00 P 1 21 1 2 ATOM 2 CA GLY A 1 -9.052 4.207 4.651 1.00 16.57 C ATOM 6 CA ASN A 2 -6.522 2.038 2.831 1.00 14.10 C ATOM 14 CA Asn A 3 -3.193 1.904 4.589 1.00 11.74 C ATOM 22 CA GLN a 4 0.384 1.888 3.199 1.00 10.53 C ATOM 31 CA GLN a 5 3.270 2.361 5.640 1.00 11.39 C ATOM 40 CA ASN a 6 6.831 2.310 4.318 1.00 12.30 C END """)).construct_hierarchy() sel_cache = hierarchy.atom_selection_cache() isel = sel_cache.iselection assert list(isel("chain A")) == [0,1,2] assert list(isel("chain a")) == [3,4,5] assert list(isel("name ca")) == range(6) assert list(isel("resname asn")) == [1,2,5] assert list(isel("resname ASN")) == [1,2,5] assert list(isel("resname Asn")) == [1,2,5] hierarchy = pdb.input(source_info=None, lines=flex.split_lines("""\ CRYST1 21.937 4.866 23.477 90.00 107.08 90.00 P 1 21 1 2 ATOM 2 CA GLY A 1 -9.052 4.207 4.651 1.00 16.57 C
def run(args, crystal_symmetry=None, ncs_object=None, pdb_hierarchy=None, map_data=None, mask_data=None, half_map_data_list=None, half_map_labels_list=None, lower_bounds=None, upper_bounds=None, write_output_files=True, log=None): h = "phenix.map_box: extract box with model and map around selected atoms" if(log is None): log = sys.stdout print_statistics.make_header(h, out=log) default_message="""\ %s. Usage: phenix.map_box model.pdb map_coefficients.mtz selection="chain A and resseq 1:10" or phenix.map_box map.ccp4 density_select=True Parameters:"""%h if(len(args) == 0 and not pdb_hierarchy): print(default_message) master_phil.show(prefix=" ") return # Process inputs ignoring symmetry conflicts just to get the value of # ignore_symmetry_conflicts... inputs = mmtbx.utils.process_command_line_args(args = args, cmd_cs=crystal_symmetry, master_params = master_phil, suppress_symmetry_related_errors=True) params = inputs.params.extract() # Now process inputs for real and write a nice error message if necessary. try: inputs = mmtbx.utils.process_command_line_args(args = args, cmd_cs=crystal_symmetry, master_params = master_phil, suppress_symmetry_related_errors=params.ignore_symmetry_conflicts) except Exception as e: if str(e).find("symmetry mismatch ")>1: raise Sorry(str(e)+"\nTry 'ignore_symmetry_conflicts=True'") else: raise e params = inputs.params.extract() master_phil.format(python_object=params).show(out=log) # Overwrite params with parameters in call if available if lower_bounds: params.lower_bounds=lower_bounds if upper_bounds: params.upper_bounds=upper_bounds # PDB file if params.pdb_file and not inputs.pdb_file_names and not pdb_hierarchy: inputs.pdb_file_names=[params.pdb_file] if(len(inputs.pdb_file_names)!=1 and not params.density_select and not params.mask_select and not pdb_hierarchy and not params.keep_map_size and not params.upper_bounds and not params.extract_unique and not params.bounds_match_this_file): raise Sorry("PDB file is needed unless extract_unique, "+ "density_select, mask_select, keep_map_size \nor bounds are set .") if (len(inputs.pdb_file_names)!=1 and not pdb_hierarchy and \ (params.mask_atoms )): raise Sorry("PDB file is needed for mask_atoms") if params.soft_mask and (not params.resolution) and \ (len(inputs.pdb_file_names)!=1 and not pdb_hierarchy): raise Sorry("Need resolution for soft_mask without PDB file") if ((params.density_select or params.mask_select) and params.keep_map_size): raise Sorry("Cannot set both density_select/mask_select and keep_map_size") if ((params.density_select or params.mask_select) and params.upper_bounds): raise Sorry("Cannot set both density_select/mask_select and bounds") if (params.keep_map_size and params.upper_bounds): raise Sorry("Cannot set both keep_map_size and bounds") if (params.upper_bounds and not params.lower_bounds): raise Sorry("Please set lower_bounds if you set upper_bounds") if (params.extract_unique): if (not params.resolution): raise Sorry("Please set resolution for extract_unique") if (not params.symmetry) and (not params.symmetry_file) and \ (not ncs_object): raise Sorry( "Please supply a symmetry file or symmetry for extract_unique (you "+ "\ncan try symmetry=ALL if you do not know your symmetry or "+ "symmetry=C1 if \nthere is none)") from mmtbx.ncs.ncs import ncs ncs_object=ncs() ncs_object.set_unit_ncs() if params.keep_input_unit_cell_and_grid and ( (params.output_unit_cell_grid is not None ) or (params.output_unit_cell is not None ) ): raise Sorry("If you set keep_input_unit_cell_and_grid then you cannot "+\ "set \noutput_unit_cell_grid or output_unit_cell") if (write_output_files) and ("mtz" in params.output_format) and ( (params.keep_origin) and (not params.keep_map_size)): print("\nNOTE: Skipping write of mtz file as keep_origin=True and \n"+\ "keep_map_size is False\n") params.output_format=remove_element(params.output_format,element='mtz') if (write_output_files) and ("mtz" in params.output_format) and ( (params.extract_unique)): print("\nNOTE: Skipping write of mtz file as extract_unique=True\n") params.output_format=remove_element(params.output_format,element='mtz') if params.output_origin_match_this_file or params.bounds_match_this_file: if params.output_origin_match_this_file: fn=params.output_origin_match_this_file if params.bounds_match_this_file: raise Sorry("Cannot match origin and bounds at same time") else: fn=params.bounds_match_this_file if not params.ccp4_map_file: raise Sorry( "Need to specify your input file with ccp4_map_file=xxx if you use "+ "output_origin_match_this_file=xxxx or bounds_match_this_file=xxxx") af = any_file(fn) if (af.file_type == 'ccp4_map'): origin=af.file_content.data.origin() if params.output_origin_match_this_file: params.output_origin_grid_units=origin print("Origin of (%s,%s,%s) taken from %s" %( origin[0],origin[1],origin[2],fn)) else: all=af.file_content.data.all() params.lower_bounds=origin print("Lower bounds of (%s,%s,%s) taken from %s" %( params.lower_bounds[0],params.lower_bounds[1], params.lower_bounds[2],fn)) params.upper_bounds=list(col(origin)+col(all)-col((1,1,1))) print("upper bounds of (%s,%s,%s) taken from %s" %( params.upper_bounds[0],params.upper_bounds[1], params.upper_bounds[2],fn)) params.bounds_are_absolute=True else: raise Sorry("Unable to interpret %s as map file" %(fn)) if params.output_origin_grid_units is not None and params.keep_origin: params.keep_origin=False print("Setting keep_origin=False as output_origin_grid_units is set") print_statistics.make_sub_header("pdb model", out=log) if len(inputs.pdb_file_names)>0: pdb_inp = iotbx.pdb.input(file_name=inputs.pdb_file_names[0]) pdb_hierarchy = pdb_inp.construct_hierarchy() if pdb_hierarchy: pdb_atoms = pdb_hierarchy.atoms() pdb_atoms.reset_i_seq() else: pdb_hierarchy=None # Map or map coefficients map_coeff = None input_unit_cell_grid=None input_unit_cell=None input_map_labels=None if (not map_data): # read first mtz file if ( (len(inputs.reflection_file_names) > 0) or (params.map_coefficients_file is not None) ): # file in phil takes precedent if (params.map_coefficients_file is not None): if (len(inputs.reflection_file_names) == 0): inputs.reflection_file_names.append(params.map_coefficients_file) else: inputs.reflection_file_names[0] = params.map_coefficients_file map_coeff = reflection_file_utils.extract_miller_array_from_file( file_name = inputs.reflection_file_names[0], label = params.label, type = "complex", log = log) if not crystal_symmetry: crystal_symmetry=map_coeff.crystal_symmetry() fft_map = map_coeff.fft_map(resolution_factor=params.resolution_factor) fft_map.apply_sigma_scaling() map_data = fft_map.real_map_unpadded() map_or_map_coeffs_prefix=os.path.basename( inputs.reflection_file_names[0][:-4]) # or read CCP4 map elif ( (inputs.ccp4_map is not None) or (params.ccp4_map_file is not None) ): if (params.ccp4_map_file is not None): af = any_file(params.ccp4_map_file) if (af.file_type == 'ccp4_map'): inputs.ccp4_map = af.file_content inputs.ccp4_map_file_name = params.ccp4_map_file print_statistics.make_sub_header("CCP4 map", out=log) ccp4_map = inputs.ccp4_map ccp4_map.show_summary(prefix=" ",out=log) if not crystal_symmetry: crystal_symmetry=ccp4_map.crystal_symmetry() map_data = ccp4_map.data #map_data() input_unit_cell_grid=ccp4_map.unit_cell_grid input_unit_cell=ccp4_map.unit_cell_parameters input_map_labels=ccp4_map.get_labels() if inputs.ccp4_map_file_name.endswith(".ccp4"): map_or_map_coeffs_prefix=os.path.basename( inputs.ccp4_map_file_name[:-5]) else: map_or_map_coeffs_prefix=os.path.basename( inputs.ccp4_map_file_name[:-4]) else: # have map_data map_or_map_coeffs_prefix=None if params.half_map_list and (not half_map_data_list): if not params.extract_unique: raise Sorry("Can only use half_map_with extract_unique") print ("Reading half-maps",params.half_map_list) half_map_data_list=[] half_map_labels_list=[] for fn in params.half_map_list: print("Reading half map from %s" %(fn),file=log) af = any_file(fn) print_statistics.make_sub_header("CCP4 map", out=log) h_ccp4_map = af.file_content h_ccp4_map.show_summary(prefix=" ",out=log) h_map_data = h_ccp4_map.data half_map_data_list.append(h_map_data) half_map_labels_list.append(h_ccp4_map.get_labels()) if params.map_scale_factor: print("Applying scale factor of %s to map data on read-in" %( params.map_scale_factor)) map_data=map_data*params.map_scale_factor if params.output_origin_grid_units is not None: origin_to_match=tuple(params.output_origin_grid_units) else: origin_to_match=None if origin_to_match: sc=[] for x,o,a in zip(crystal_symmetry.unit_cell().parameters()[:3], origin_to_match, map_data.all()): sc.append(-x*o/a) shift_cart_for_origin_to_match=tuple(sc) else: origin_to_match=None shift_cart_for_origin_to_match=None if crystal_symmetry and not inputs.crystal_symmetry: inputs.crystal_symmetry=crystal_symmetry # final check that map_data exists if(map_data is None): raise Sorry("Map or map coefficients file is needed.") if len(inputs.pdb_file_names)>0: output_prefix=os.path.basename(inputs.pdb_file_names[0])[:-4] else: output_prefix=map_or_map_coeffs_prefix if not pdb_hierarchy: # get an empty hierarchy from cctbx.array_family import flex pdb_hierarchy=iotbx.pdb.input( source_info='',lines=flex.split_lines('')).construct_hierarchy() xray_structure = pdb_hierarchy.extract_xray_structure( crystal_symmetry=inputs.crystal_symmetry) xray_structure.show_summary(f=log) # if not params.selection: params.selection="all" selection = pdb_hierarchy.atom_selection_cache().selection( string = params.selection) if selection.size(): print_statistics.make_sub_header("atom selection", out=log) print("Selection string: selection='%s'"%params.selection, file=log) print(" selects %d atoms from total %d atoms."%(selection.count(True), selection.size()), file=log) sites_cart_all = xray_structure.sites_cart() sites_cart = sites_cart_all.select(selection) selection = xray_structure.selection_within( radius = params.selection_radius, selection = selection) if not ncs_object: from mmtbx.ncs.ncs import ncs ncs_object=ncs() if params.symmetry_file: ncs_object.read_ncs(params.symmetry_file,log=log) print("Total of %s operators read" %(ncs_object.max_operators()), file=log) if not ncs_object or ncs_object.max_operators()<1: print("No symmetry available", file=log) if ncs_object: n_ops=max(1,ncs_object.max_operators()) else: n_ops=1 # Get sequence if extract_unique is set sequence=None if params.extract_unique or params.mask_select: if params.sequence_file: if n_ops > 1: # get unique part of sequence remove_duplicates=True else: remove_duplicates=False from iotbx.bioinformatics import get_sequences sequence=(" ".join(get_sequences(file_name=params.sequence_file, remove_duplicates=remove_duplicates))) if params.chain_type in ['None',None]: params.chain_type=None if sequence and not params.molecular_mass: # get molecular mass from sequence from iotbx.bioinformatics import text_from_chains_matching_chain_type if params.chain_type in [None,'PROTEIN']: n_protein=len(text_from_chains_matching_chain_type( text=sequence,chain_type='PROTEIN')) else: n_protein=0 if params.chain_type in [None,'RNA']: n_rna=len(text_from_chains_matching_chain_type( text=sequence,chain_type='RNA')) else: n_rna=0 if params.chain_type in [None,'DNA']: n_dna=len(text_from_chains_matching_chain_type( text=sequence,chain_type='DNA')) else: n_dna=0 params.molecular_mass=n_ops*(n_protein*110+(n_rna+n_dna)*330) print("\nEstimate of molecular mass is %.0f " %(params.molecular_mass), file=log) if params.density_select or params.mask_select: print_statistics.make_sub_header( "Extracting box around selected density and writing output files", out=log) else: print_statistics.make_sub_header( "Extracting box around selected atoms and writing output files", out=log) # if params.value_outside_atoms=='mean': print("\nValue outside atoms mask will be set to mean inside mask", file=log) if params.get_half_height_width and params.density_select: print("\nHalf width at half height will be used to id boundaries", file=log) if params.soft_mask and sites_cart_all.size()>0: print("\nSoft mask will be applied to model-based mask", file=log) elif params.soft_mask: print ("\nSoft mask will be applied to outside of map box",file=log) if params.keep_map_size: print("\nEntire map will be kept (not cutting out region)", file=log) if params.restrict_map_size: print("\nOutput map will be within input map", file=log) if params.lower_bounds and params.upper_bounds: print("Bounds for cut out map are (%s,%s,%s) to (%s,%s,%s)" %( tuple(list(params.lower_bounds)+list(params.upper_bounds))), file=log) if mask_data: mask_data=mask_data.as_double() box = mmtbx.utils.extract_box_around_model_and_map( xray_structure = xray_structure, map_data = map_data.as_double(), mask_data = mask_data, box_cushion = params.box_cushion, selection = selection, mask_select = params.mask_select, density_select = params.density_select, threshold = params.density_select_threshold, get_half_height_width = params.get_half_height_width, mask_atoms = params.mask_atoms, soft_mask = params.soft_mask, soft_mask_radius = params.soft_mask_radius, mask_atoms_atom_radius = params.mask_atoms_atom_radius, value_outside_atoms = params.value_outside_atoms, keep_map_size = params.keep_map_size, restrict_map_size = params.restrict_map_size, lower_bounds = params.lower_bounds, upper_bounds = params.upper_bounds, bounds_are_absolute = params.bounds_are_absolute, zero_outside_original_map = params.zero_outside_original_map, extract_unique = params.extract_unique, target_ncs_au_file = params.target_ncs_au_file, regions_to_keep = params.regions_to_keep, box_buffer = params.box_buffer, soft_mask_extract_unique = params.soft_mask_extract_unique, mask_expand_ratio = params.mask_expand_ratio, keep_low_density = params.keep_low_density, chain_type = params.chain_type, sequence = sequence, solvent_content = params.solvent_content, molecular_mass = params.molecular_mass, resolution = params.resolution, ncs_object = ncs_object, symmetry = params.symmetry, half_map_data_list = half_map_data_list, ) ph_box = pdb_hierarchy.select(selection) ph_box.adopt_xray_structure(box.xray_structure_box) box.hierarchy=ph_box if params.mask_select: print("\nSolvent content used in mask_select: %.3f " %( box.get_solvent_content()),file=log) if (inputs and inputs.crystal_symmetry and inputs.ccp4_map and inputs.crystal_symmetry.unit_cell().parameters() and inputs.ccp4_map.unit_cell_parameters ) and ( inputs.crystal_symmetry.unit_cell().parameters() != inputs.ccp4_map.unit_cell_parameters): print("\nNOTE: Input CCP4 map is only part of unit cell:", file=log) print("Full unit cell ('unit cell parameters'): "+\ "(%.1f, %.1f, %.1f, %.1f, %.1f, %.1f) A" %tuple( inputs.ccp4_map.unit_cell_parameters), file=log) print("Size of CCP4 map 'map unit cell': "+\ "(%.1f, %.1f, %.1f, %.1f, %.1f, %.1f) A" %tuple( inputs.crystal_symmetry.unit_cell().parameters()), file=log) print("Full unit cell as grid units: (%s, %s, %s)" %( inputs.ccp4_map.unit_cell_grid), file=log) print("Map unit cell as grid units: (%s, %s, %s)" %( map_data.all()), file=log) box.unit_cell_parameters_from_ccp4_map=inputs.ccp4_map.unit_cell_parameters box.unit_cell_parameters_deduced_from_map_grid=\ inputs.crystal_symmetry.unit_cell().parameters() else: box.unit_cell_parameters_from_ccp4_map=None box.unit_cell_parameters_deduced_from_map_grid=None if box.pdb_outside_box_msg: print(box.pdb_outside_box_msg, file=log) # NOTE: box object is always shifted to place origin at (0,0,0) # NOTE ON ORIGIN SHIFTS: The shifts are applied locally here. The box # object is not affected and always has the origin at (0,0,0) # output_box is copy of box with shift_cart corresponding to the output # files. Normally this is the same as the original shift_cart. However # if user has specified a new output origin it will differ. # For output files ONLY: # keep_origin==False leave origin at (0,0,0) # keep_origin==True: we shift everything back to where it was, # output_origin_grid_units=10,10,10: output origin is at (10,10,10) # ncs_object is original # box.ncs_object is shifted by shift_cart # output_box.ncs_object is shifted back by -new shift_cart # Additional note on output unit_cell and grid_units. # The ccp4-style output map can specify the unit cell and grid units # corresponding to that cell. This can be separate from the origin and # number of grid points in the map as written. If specified, write these # out to the output ccp4 map and also use this unit cell for writing # any output PDB files from copy import deepcopy output_box=deepcopy(box) # won't use box below here except to return it print("\nBox cell dimensions: (%.2f, %.2f, %.2f) A" %( box.box_crystal_symmetry.unit_cell().parameters()[:3]), file=log) if box.shift_cart: print("Working origin moved from grid position of"+\ ": (%d, %d, %d) to (0,0,0) " %( tuple(box.origin_shift_grid_units(reverse=True))), file=log) print("Working origin moved from coordinates of:"+\ " (%.2f, %.2f, %.2f) A to (0,0,0)\n" %( tuple(-col(box.shift_cart))), file=log) if (params.keep_origin): print("\nRestoring original position for output files", file=log) print("Origin will be at grid position of"+\ ": (%d, %d, %d) " %( tuple(box.origin_shift_grid_units(reverse=True))), file=log) print("\nOutput files will be in same location as original", end=' ', file=log) if not params.keep_map_size: print("just cut out.", file=log) else: print("keeping entire map", file=log) print("Note that output maps are only valid in the cut out region.\n", file=log) else: if origin_to_match: output_box.shift_cart=shift_cart_for_origin_to_match if params.output_origin_grid_units: print("Output map origin to be shifted to match target", file=log) print("Placing origin at grid point (%s, %s, %s)" %( origin_to_match)+"\n"+ \ "Final coordinate shift for output files: (%.2f,%.2f,%.2f) A\n" %( tuple(col(output_box.shift_cart)-col(box.shift_cart))), file=log) elif box.shift_cart: output_box.shift_cart=(0,0,0) # not shifting back print("Final origin will be at (0,0,0)", file=log) print("Final coordinate shift for output files: (%.2f,%.2f,%.2f) A\n" %( tuple(col(output_box.shift_cart)-col(box.shift_cart))), file=log) else: print("\nOutput files are in same location as original and origin "+\ "is at (0,0,0)\n", file=log) print("\nBox grid: (%s, %s, %s) " %(output_box.map_box.all()),file=log) ph_output_box_output_location = ph_box.deep_copy() if output_box.shift_cart: # shift coordinates and NCS back by shift_cart # NOTE output_box.shift_cart could be different than box.shift_cart if # there is a target position for the origin and it is not the same as the # original origin. sites_cart = output_box.shift_sites_cart_back( output_box.xray_structure_box.sites_cart()) xrs_offset = ph_output_box_output_location.extract_xray_structure( crystal_symmetry=output_box.xray_structure_box.crystal_symmetry() ).replace_sites_cart(new_sites = sites_cart) ph_output_box_output_location.adopt_xray_structure(xrs_offset) if output_box.ncs_object: output_box.ncs_object=output_box.ncs_object.coordinate_offset( tuple(-col(output_box.shift_cart))) shift_back=True else: shift_back=False if params.keep_input_unit_cell_and_grid and \ (input_unit_cell_grid is not None) and \ (input_unit_cell is not None): params.output_unit_cell=input_unit_cell params.output_unit_cell_grid=input_unit_cell_grid print("Setting output unit cell parameters and unit cell grid to"+\ " match\ninput map file", file=log) if params.output_unit_cell: # Set output unit cell parameters from cctbx import crystal output_crystal_symmetry=crystal.symmetry( unit_cell=params.output_unit_cell, space_group="P1") output_unit_cell=output_crystal_symmetry.unit_cell() print("Output unit cell set to: %.2f, %.2f, %.2f, %.2f, %.2f, %.2f)" %tuple( output_crystal_symmetry.unit_cell().parameters()), file=log) else: output_crystal_symmetry=None # ============= Check/set output unit cell grid and cell parameters ======= if params.output_unit_cell_grid or output_crystal_symmetry: if params.output_unit_cell_grid: output_unit_cell_grid=params.output_unit_cell_grid else: output_unit_cell_grid=output_box.map_box.all() print("Output unit cell grid set to: (%s, %s, %s)" %tuple( output_unit_cell_grid), file=log) expected_output_abc=[] box_spacing=[] output_spacing=[] box_abc=output_box.xray_structure_box.\ crystal_symmetry().unit_cell().parameters()[:3] if output_crystal_symmetry: output_abc=output_crystal_symmetry.unit_cell().parameters()[:3] else: output_abc=[None,None,None] for a_box,a_output,n_box,n_output in zip( box_abc, output_abc, output_box.map_box.all(), output_unit_cell_grid): expected_output_abc.append(a_box*n_output/n_box) box_spacing.append(a_box/n_box) if output_crystal_symmetry: output_spacing.append(a_output/n_output) else: output_spacing.append(a_box/n_box) if output_crystal_symmetry: # make sure it is compatible... r0=expected_output_abc[0]/output_abc[0] r1=expected_output_abc[1]/output_abc[1] r2=expected_output_abc[2]/output_abc[2] from libtbx.test_utils import approx_equal if not approx_equal(r0,r1,eps=0.001) or not approx_equal(r0,r2,eps=0.001): print("WARNING: output_unit_cell and cell_grid will "+\ "change ratio of grid spacing.\nOld spacings: "+\ "(%.2f, %.2f, %.2f) A " %(tuple(box_spacing))+\ "\nNew spacings: (%.2f, %.2f, %.2f) A \n" %(tuple(output_spacing)), file=log) else: output_abc=expected_output_abc from cctbx import crystal output_crystal_symmetry=crystal.symmetry( unit_cell=list(output_abc)+[90,90,90], space_group="P1") print("Output unit cell will be: (%.2f, %.2f, %.2f, %.2f, %.2f, %.2f)\n"%( tuple(output_crystal_symmetry.unit_cell().parameters())), file=log) else: output_unit_cell_grid = output_box.map_box.all() output_crystal_symmetry=output_box.xray_structure_box.crystal_symmetry() # ========== Done check/set output unit cell grid and cell parameters ===== if write_output_files: # Write PDB file if ph_box.overall_counts().n_residues>0: if(params.output_file_name_prefix is None): file_name = "%s_box.pdb"%output_prefix else: file_name = "%s.pdb"%params.output_file_name_prefix ph_output_box_output_location.write_pdb_file(file_name=file_name, crystal_symmetry = output_crystal_symmetry) print("Writing boxed PDB with box unit cell to %s" %( file_name), file=log) # Write NCS file if NCS if output_box.ncs_object and output_box.ncs_object.max_operators()>0: if(params.output_file_name_prefix is None): output_symmetry_file = "%s_box.ncs_spec"%output_prefix else: output_symmetry_file = "%s.ncs_spec"%params.output_file_name_prefix output_box.ncs_object.format_all_for_group_specification( file_name=output_symmetry_file) print("\nWriting symmetry to %s" %( output_symmetry_file), file=log) # Write ccp4 map. if("ccp4" in params.output_format): if(params.output_file_name_prefix is None): file_name = "%s_box.ccp4"%output_prefix else: file_name = "%s.ccp4"%params.output_file_name_prefix from iotbx.mrcfile import create_output_labels if params.extract_unique: program_name='map_box using extract_unique' limitations=["extract_unique"] else: program_name='map_box' limitations=[] labels=create_output_labels(program_name=program_name, input_file_name=inputs.ccp4_map_file_name, input_labels=input_map_labels, limitations=limitations, output_labels=params.output_map_labels) output_box.write_ccp4_map(file_name=file_name, output_crystal_symmetry=output_crystal_symmetry, output_mean=params.output_ccp4_map_mean, output_sd=params.output_ccp4_map_sd, output_unit_cell_grid=output_unit_cell_grid, shift_back=shift_back, output_map_labels=labels, output_external_origin=params.output_external_origin) print("Writing boxed map "+\ "to CCP4 formatted file: %s"%file_name, file=log) if not params.half_map_list: params.half_map_list=[] if not output_box.map_box_half_map_list: output_box.map_box_half_map_list=[] if not half_map_labels_list: half_map_labels_list=len(output_box.map_box_half_map_list)*[None] for hm,labels,fn in zip( output_box.map_box_half_map_list, half_map_labels_list, params.half_map_list): # half maps matching labels=create_output_labels(program_name=program_name, input_file_name=fn, input_labels=labels, limitations=limitations, output_labels=params.output_map_labels) hm_fn="%s_box.ccp4" %( ".".join(os.path.basename(fn).split(".")[:-1])) output_box.write_ccp4_map(file_name=hm_fn, map_data=hm, output_crystal_symmetry=output_crystal_symmetry, output_mean=params.output_ccp4_map_mean, output_sd=params.output_ccp4_map_sd, output_unit_cell_grid=output_unit_cell_grid, shift_back=shift_back, output_map_labels=labels, output_external_origin=params.output_external_origin) print ("Writing boxed half map to: %s " %(hm_fn),file=log) # Write xplor map. Shift back if keep_origin=True if("xplor" in params.output_format): if(params.output_file_name_prefix is None): file_name = "%s_box.xplor"%output_prefix else: file_name = "%s.xplor"%params.output_file_name_prefix output_box.write_xplor_map(file_name=file_name, output_crystal_symmetry=output_crystal_symmetry, output_unit_cell_grid=output_unit_cell_grid, shift_back=shift_back,) print("Writing boxed map "+\ "to X-plor formatted file: %s"%file_name, file=log) # Write mtz map coeffs. Shift back if keep_origin=True if("mtz" in params.output_format): if(params.output_file_name_prefix is None): file_name = "%s_box.mtz"%output_prefix else: file_name = "%s.mtz"%params.output_file_name_prefix print("Writing map coefficients "+\ "to MTZ file: %s"%file_name, file=log) if(map_coeff is not None): d_min = map_coeff.d_min() elif params.resolution is not None: d_min = params.resolution else: d_min = maptbx.d_min_from_map(map_data=output_box.map_box, unit_cell=output_box.xray_structure_box.unit_cell()) output_box.map_coefficients(d_min=d_min, scale_max=params.scale_max, resolution_factor=params.resolution_factor, file_name=file_name, shift_back=shift_back) print(file=log) return box
def exercise_selection(): pdb_inp = pdb.input(source_info=None, lines=flex.split_lines("""\ CRYST1 50.800 50.800 155.300 90.00 90.00 90.00 P 43 21 2 8 MODEL 1 ATOM 4 N SER 1 8.753 29.755 61.685 1.00 49.13 ATOM 5 CA SER 1 9.242 30.200 62.974 1.00 46.62 ANISOU 5 CA SER 1 343 490 2719 -45 -169 617 ATOM 6 C SER 1 10.453 29.500 63.579 1.00 41.99 ATOM 7 O SER 1 10.593 29.607 64.814 1.00 43.24 ANISOU 7 O SER 1 343 490 2719 -45 -169 617 ATOM 8 CB SER 1 8.052 30.189 63.974 1.00 53.00 ATOM 9 OG SER 1 7.294 31.409 63.930 1.00 57.79 ATOM 10 N ARG 2 11.360 28.819 62.827 1.00 36.48 ATOM 11 CA ARG 2 12.548 28.316 63.532 1.00 30.20 ATOM 12 C ARG 2 13.502 29.501 63.500 1.00 25.54 ATOM 13 O ARG 2 13.730 30.037 62.407 1.00 23.86 ATOM 14 CB ARG 2 13.241 27.119 62.861 1.00 27.44 ATOM 15 CG ARG 2 12.412 25.849 62.964 1.00 23.66 ATOM 16 CD ARG 2 13.267 24.651 63.266 1.00 23.98 ATOM 17 NE ARG 2 13.948 24.115 62.135 1.00 22.71 ATOM 18 CZ ARG 2 15.114 23.487 62.201 1.00 21.38 ATOM 19 NH1 ARG 2 15.845 23.331 63.301 1.00 19.34 ATOM 20 NH2 ARG 2 15.575 23.030 61.051 1.00 26.66 ATOM 21 N PRO 3J 13.947 29.997 64.680 1.00 22.94 ATOM 22 CA PRO 3J 14.902 31.100 64.827 1.00 20.19 ATOM 23 C PRO 3J 16.195 30.718 64.086 1.00 18.44 ATOM 24 O PRO 3J 16.545 29.521 64.086 1.00 19.76 ATOM 25 CB PRO 3J 15.133 31.218 66.313 1.00 19.17 ATOM 26 CG PRO 3J 14.065 30.364 66.951 1.00 15.12 ATOM 27 CD PRO 3J 13.816 29.289 65.966 1.00 19.56 ATOM 28 N AILE 4 16.953 31.648 63.512 1.00 15.29 ATOM 29 CA AILE 4 18.243 31.372 62.859 1.00 14.32 ATOM 30 C AILE 4 19.233 32.112 63.743 1.00 13.54 ATOM 31 O AILE 4 19.105 33.315 64.009 1.00 11.84 ATOM 32 CB AILE 4 18.298 31.951 61.406 1.00 13.62 ATOM 33 CG1AILE 4 17.157 31.300 60.620 1.00 18.39 ATOM 34 CG2AILE 4 19.661 31.747 60.743 1.00 13.64 ATOM 35 CD1AILE 4 16.879 32.102 59.355 1.00 16.69 ATOM 28 N BILE 4 16.953 31.648 63.512 1.00 15.29 ATOM 29 CA BILE 4 18.243 31.372 62.859 1.00 14.32 ATOM 30 C BILE 4 19.233 32.112 63.743 1.00 13.54 ATOM 31 O BILE 4 19.105 33.315 64.009 1.00 11.84 ATOM 32 CB BILE 4 18.298 31.951 61.406 1.00 13.62 ATOM 33 CG1BILE 4 17.157 31.300 60.620 1.00 18.39 ATOM 34 CG2BILE 4 19.661 31.747 60.743 1.00 13.64 ATOM1200035 CD1BILE 4 16.879 32.102 59.355 1.00 16.69 TER 36 ILE 4 ENDMDL MODEL 2 HETATM 1451 PA 5GP H 187 29.875 44.488 69.823 1.00 19.62 HETATM 1452 O1A 5GP H 187 28.526 44.888 69.143 1.00 19.86 HETATM 1453 O2A 5GP H 187 30.764 44.617 68.702 1.00 23.42 HETATM 1454 O3A 5GP H 187 30.319 45.004 71.073 1.00 20.20 HETATM 1455 O5* 5GP H 187 29.683 43.016 70.027 1.00 20.32 HETATM 1456 C5* 5GP H 187 30.740 42.297 70.837 1.00 21.47 HETATM 1457 C4* 5GP H 187 30.677 40.747 70.770 1.00 21.56 HETATM 1458 O4* 5GP H 187 29.608 40.160 71.599 1.00 20.50 HETATM 1459 C3* 5GP H 187 30.547 40.121 69.352 1.00 20.18 HETATM 1460 O3* 5GP H 187 31.228 38.864 69.416 1.00 23.65 HETATM 1461 C2* 5GP H 187 29.031 39.871 69.248 1.00 18.78 HETATM 1462 O2* 5GP H 187 28.685 38.690 68.496 1.00 20.45 HETATM 1463 C1* 5GP H 187 28.634 39.641 70.688 1.00 17.09 HETATM 1464 N9 5GP H 187 27.238 39.525 71.076 1.00 15.35 HETATM 1465 C8 5GP H 187 26.330 40.535 70.852 1.00 12.57 HETATM 1466 N7' 5GP H 187 25.175 40.314 71.417 1.00 12.88 HETATM 1467 C5 5GP H 187 25.278 39.082 72.070 1.00 10.75 HETATM 1468 C6 5GP H 187 24.326 38.354 72.827 1.00 9.77 HETATM 1469 O6 5GP H 187 23.169 38.678 73.029 1.00 8.66 HETATM 1470 N1' 5GP H 187 24.836 37.190 73.270 1.00 9.67 HETATM 1471 C2 5GP H 187 26.075 36.701 73.001 1.00 9.84 HETATM 1472 N2 5GP H 187 26.361 35.490 73.520 1.00 9.77 HETATM 1473 N3 5GP H 187 27.005 37.353 72.310 1.00 10.31 HETATM 1474 C4 5GP H 187 26.583 38.559 71.844 1.00 12.50 ENDMDL MODEL 3 HETATM 1475 S SO4 S 188 31.424 42.923 60.396 1.00 55.69 S4+ HETATM 1476 O1 SO4 S 188 31.631 41.513 60.336 1.00 59.84 o1- HETATM 1477 O2 SO4 S 188 32.533 43.699 59.932 1.00 49.98 O1- HETATM 1478 O3 SO4 S 188 31.128 43.217 61.738 1.00 59.44 O1- HETATM 1479 O4 SO4 S 188 30.353 43.201 59.539 1.00 60.54 O1- HETATM 1480 O HOH W 200 29.478 23.354 61.364 1.00 8.67 WATE ATOM 2000 A1 AAA X 1 8.753 29.755 61.685 1.00 49.13 ATOM 2001 A2 AAA X 1 9.242 30.200 62.974 1.00 46.62 ATOM 2002 A1 BBB X 2 11.360 28.819 62.827 1.00 36.48 ATOM 2003 A2 BBB X 2 12.548 28.316 63.532 1.00 30.20 ATOM 2004 A1 AAA Y 1 8.753 29.755 61.685 1.00 49.13 ATOM 2005 A2 AAA Y 1 9.242 30.200 62.974 1.00 46.62 ATOM 2006 A1 CCC Y 5 9.242 30.200 62.974 1.00 46.62 ATOM 2007 A2 BBB Y 2 12.548 28.316 63.532 1.00 30.20 ATOM 2008 A1 AAA Z 1K 8.753 29.755 61.685 1.00 49.13 ATOM 2009 A1 BBB Z 2 11.360 28.819 62.827 1.00 36.48 ATOM 2010 A2 BBB Z 2 12.548 28.316 63.532 1.00 30.20 ATOM 2011 A1 AAAZZ 1K 8.753 29.755 61.685 1.00 49.13 ATOM 2012 A1 BBBZZ 2 11.360 28.819 62.827 1.00 36.48 ATOM 2013 A1 CCCZZ 5 9.242 30.200 62.974 1.00 46.62 ATOM 2014 A1 CCCZZA001 9.242 30.200 62.974 1.00 46.62 ATOM 2015 A1 CCCZZA002 9.242 30.200 62.974 1.00 46.62 ATOM 2016 A1 CCCZZA003 9.242 30.200 62.974 1.00 46.62 ATOM 2017 A1 AAAUU 1K 8.753 29.755 61.685 1.00 49.13 ENDMDL END """)) hierarchy = pdb_inp.construct_hierarchy() sel_cache = hierarchy.atom_selection_cache( special_position_settings=pdb_inp.special_position_settings()) assert sel_cache.n_seq == hierarchy.atoms_size() isel = sel_cache.iselection assert isel("").size() == 0 assert isel("all").size() == sel_cache.n_seq assert isel("none").size() == 0 assert isel("optional none", optional=True).size() == 0 assert isel("optional none", optional=False) is None assert isel("not all").size() == 0 assert isel("not none").size() == sel_cache.n_seq assert list(isel(r"name c?\*")) == [45,46,48,50,52] assert list(isel(r"name 'C?\*'")) == [] assert list(isel(r"name ' C?\*'")) == [45,46,48,50,52] assert list(isel(r"name ' c?\*'")) == [45,46,48,50,52] assert list(isel(r"name n?'")) == [55, 59] for conj in ["and ", ""]: assert list(isel(r"altloc a %sname n" % conj)) == [24] assert list(isel(r"altloc b %sname n" % conj)) == [32] assert list(isel(r"altloc ' ' %sname n" % conj)) == [0,6,17] assert list(isel(r"altid ' ' %sname n" % conj)) == [0,6,17] assert list(isel(r"resname hoh")) == [69] assert list(isel(r"resname SO4")) == [64,65,66,67,68] assert list(isel(r"resname so4")) == [64,65,66,67,68] assert list(isel(r"resname So4")) == [64,65,66,67,68] assert list(isel(r"resname S?4")) == [64,65,66,67,68] assert list(isel(r"resname pro and name cg")) == [22] assert list(isel(r"resname pro and (name cg or name ca)")) == [18,22] assert list(isel(r"resname pro AND (name cg or name ca)")) == [18,22] assert list(isel(r"resname pro and (name cg OR name ca)")) == [18,22] assert list(isel(r"resname pro AND (name cg OR name ca)")) == [18,22] assert list(isel(r"not resname pro and (name cg or name ca)") ) == [1,7,11,25,33] assert list(isel(r"chain h and name o*")) == [41,42,43,44,47,49,51,58] assert list(isel(r"(chain h or chain s) and name o[2-46]")) == [58,66,67,68] assert list(isel(r"resseq 188")) == [64,65,66,67,68] assert list(isel(r"resseq 188")) == [64,65,66,67,68] assert list(isel(r"resseq 1:1")) == [0,1,2,3,4,5,70,71,74,75,78,81] assert list(isel(r"resseq 2:2")) == range(6,17) + [72,73,77,79,80,82] assert list(isel(r"resseq 5:5")) == [76,83] assert list(isel(r"resseq 1:5")) == range(40)+range(70,84) assert list(isel(r"resseq 2:3")) == range(6,24)+[72,73,77,79,80,82] assert list(isel(r"resseq 188:188")) == [64,65,66,67,68] assert list(isel(r"resseq 200:200")) == [69] assert list(isel(r"resseq 188:200")) == [64,65,66,67,68,69] assert list(isel(r"resseq 9999:A002")) == [84,85] assert list(isel(r"resseq A002:A003")) == [85,86] assert list(isel(r"resseq :")) == range(88) assert list(isel(r"resseq :2 and name n*")) == [0,6,13,15,16] assert list(isel(r"resseq 2: and name cb")) == [10,21,28,36] assert list(isel(r"resseq 1:2 and name n*")) == [0,6,13,15,16] assert list(isel(r"resseq 2:4 and name cb")) == [10,21,28,36] assert list(isel(r"model 1 and name cb")) == [4,10,21,28,36] assert list(isel(r"model 2:3 and name o1*")) == [41,65] assert list(isel(r"icode j and name c?")) == [18,21,22,23] assert list(isel(r"resid 188")) == [64,65,66,67,68] assert list(isel(r"resid 3J")) == [17,18,19,20,21,22,23] assert list(isel(r"resid 1K")) == [78,81,87] assert list(isel(r"resid ' 1K'")) == [78,81] assert list(isel(r"resid ' 1K '")) == [87] assert list(isel(r"resi ' 1K'")) == [] assert list(isel(r"resid 1:2")) \ == range(17) + [70,71,72,73,74,75,77,78,79,80,81,82] expected = range(6,17) + [72,73,77,78,79,80,81,82] assert list(isel(r"resid 1K:2")) == expected assert list(isel(r"resid ' 1K:2'")) == expected assert list(isel(r"resid ' 1K:2'")) == expected expected = range(6,40) + [72,73,76,77,78,79,80,81,82,83,87] assert list(isel(r"resi ' 1K: 1K '")) == expected # expected = [7,18,25,33] assert list(isel(r"resseq 2:4 and name ca")) == expected assert list(isel(r"resseq 2 : 4 and name ca")) == expected assert list(isel(r"resseq 2: 4 and name ca")) == expected assert list(isel(r"resseq 2 :4 and name ca")) == expected expected = [1,7,18] assert list(isel(r"resseq :3 and name ca")) == expected assert list(isel(r"resseq : 3 and name ca")) == expected assert list(isel(r"name ca and resseq :3")) == expected assert list(isel(r"name ca and resseq : 3")) == expected expected = [18,25,33] assert list(isel(r"resseq 3: and name ca")) == expected assert list(isel(r"resseq 3 : and name ca")) == expected assert list(isel(r"name ca and resseq 3:")) == expected assert list(isel(r"name ca and resseq 3 :")) == expected assert list(isel(r"name ca and resseq 3")) == [18] expected = [1,7,18,25,33] assert list(isel(r"resseq : and name ca")) == expected assert list(isel(r"name ca and resseq :")) == expected # assert list(isel(r"segid wate")) == [69] assert list(isel(r"element o")) == [65,66,67,68] assert list(isel(r"charge 4+")) == [64] assert list(isel(r"anisou")) == [1, 3] assert list(isel(r"pepnames")) == range(40) assert list(isel(r"single_atom_residue")) == [ 69, 76, 77, 78, 81, 82, 83, 84, 85, 86, 87] assert list(isel(r"hetero")) == list(isel(r"hetatm")) == [ 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69] assert list(isel("within(3, resname PRO)")) == [7, 8, 9, 17, 18, 19, 20, 21, 22, 23, 24, 25, 32, 33, 73, 77, 80] # try: isel(r"resseq") except pdb.atom_selection.AtomSelectionError, e: assert str(e).find( "Missing argument for resseq.") >= 0
def __init__(self, args = None, params = None, ignore_chains = [], required_chains = [], exclude_chains = [], ncs_master_params = ncs_master_params, command_name = "simple_ncs_from_pdb", all_chain_proxies = None, pdb_inp = None, hierarchy = None, suppress_print = False, source_info = None, pdb_file = None, groups_only = None, suggested_ncs_groups = None, log = sys.stdout, quiet=False, exclude_h=False, exclude_d=False, write_ncs_domain_pdb=None, ncs_domain_pdb_stem=None, temp_dir=None ): self.log=log self.quiet=quiet self.exclude_h=exclude_h self.exclude_d=exclude_d self.exclude_chains=exclude_chains self.required_chains=required_chains args=catenate_equals(args).new_args() self.process_inputs(args) if self.args==[]: self.args=None args=self.args if suggested_ncs_groups is None: # take it from args if not directly given suggested_ncs_groups=self.suggested_ncs_groups else: self.suggested_ncs_groups=suggested_ncs_groups allow_recursion=self.allow_recursion exact_match_only=self.exact_match_only master_params_name='simple_ncs_from_pdb' self.Name='simple_ncs_from_pdb' if not suppress_print: citations.add_citation('phenix','simple_ncs_from_pdb') # run a test case if args is not None and 'exercise' in args: self.exercise() return args=self.special_cases(args) master_params=get_composite_master_params( method_list=['simple_ncs_from_pdb'], location_list=['phenix.command_line']) args=self.get_keyword_table(args,out=self.log) # set self.keyword_table summary,header=self.get_summary_and_header(command_name) done,master_params,new_params,changed_params,help=self.get_params( command_name,master_params,args,out=self.log) if params is None : params = new_params if done: return if not quiet: print >>self.log, header if help or (params and params.simple_ncs_from_pdb.verbose): print >>self.log, "Values of all params:" master_params.format(python_object=params).show(out=log) if help or params is None: return # Done with standard processing of inputs # overwrite with direct inputs, if any: if write_ncs_domain_pdb is not None: params.simple_ncs_from_pdb.write_ncs_domain_pdb=write_ncs_domain_pdb if ncs_domain_pdb_stem is not None: params.simple_ncs_from_pdb.ncs_domain_pdb_stem=ncs_domain_pdb_stem if temp_dir is not None: params.simple_ncs_from_pdb.temp_dir=temp_dir # Things that must be defined... self.params=params if not suppress_print: print >>self.log,"Parameters used for simple_ncs_from_pdb:" master_params.format(python_object=params).show(out=self.log) print >>self.log if params.simple_ncs_from_pdb.dry_run: print "ARGS: ",args return # read in the PDB file if needed if((all_chain_proxies is None) and (pdb_inp is None and hierarchy is None)): if(pdb_file is None): if args is not None and args and args[0] and os.path.isfile(args[0]): pdb_file=args[0] elif params.simple_ncs_from_pdb.pdb_in is not None: pdb_file=params.simple_ncs_from_pdb.pdb_in else: raise Sorry("\nNeed PDB file for simple_ncs_from_pdb"+ "\n\nPlease make the PDB file the first argument like this: \n"+ "phenix.simple_ncs_from_pdb mypdb.pdb ...\n") if not os.path.isfile(pdb_file): raise Sorry("The file "+str(pdb_file)+" is missing?") raw_records = flex.std_string() raw_records.extend(flex.split_lines(open(pdb_file).read())) if pdb_inp is None: pdb_inp = iotbx.pdb.input(source_info=None, lines=raw_records) mon_lib_srv = mmtbx.monomer_library.server.server() ener_lib = mmtbx.monomer_library.server.ener_lib() processed_pdb= mmtbx.monomer_library.pdb_interpretation.process( mon_lib_srv=mon_lib_srv, ener_lib=ener_lib, params=None, raw_records=raw_records, strict_conflict_handling=False, max_atoms=None, log=null_out()) all_chain_proxies=processed_pdb.all_chain_proxies self.source_info=pdb_file if hierarchy is None: hierarchy = pdb_inp.construct_hierarchy() # set input params self.verbose=params.simple_ncs_from_pdb.verbose if self.verbose: hierarchy.show(out=self.log) if source_info: self.source_info=source_info if not hasattr(self,'source_info') or not self.source_info: self.source_info="None" #### # self.find_invariant_domains=\ # params.simple_ncs_from_pdb.domain_finding_parameters.find_invariant_domains # self.min_fraction_domain= \ # params.simple_ncs_from_pdb.domain_finding_parameters.min_fraction_domain # self.max_rmsd_domain=params.simple_ncs_from_pdb.domain_finding_parameters.max_rmsd_domain # self.min_percent=params.simple_ncs_from_pdb.min_percent # self.quick=params.simple_ncs_from_pdb.quick # self.maximize_size_of_groups=params.simple_ncs_from_pdb.maximize_size_of_groups # self.require_equal_start_match=params.simple_ncs_from_pdb.require_equal_start_match # self.write_ncs_domain_pdb=params.simple_ncs_from_pdb.write_ncs_domain_pdb # self.ncs_domain_pdb_stem=params.simple_ncs_from_pdb.ncs_domain_pdb_stem # self.all_chain_proxies=all_chain_proxies # self.hierarchy=hierarchy # self.pdb_inp=pdb_inp # self.njump=params.simple_ncs_from_pdb.njump # if self.njump<1: # raise Sorry("njump must be >=1") # self.min_length=params.simple_ncs_from_pdb.min_length # self.min_fraction_represented=params.simple_ncs_from_pdb.min_fraction_represented # Use cctbx tool to get ncs object find_param = params.simple_ncs_from_pdb.domain_finding_parameters inp_param = params.simple_ncs_from_pdb ncs_obj = iotbx.ncs.input( hierarchy=hierarchy, chain_similarity_limit=find_param.similarity_threshold, min_contig_length=find_param.min_contig_length, min_percent=inp_param.min_percent, max_rmsd=inp_param.max_rmsd, max_dist_diff=find_param.match_radius, use_minimal_master_ncs=True, process_similar_chains=True, allow_different_size_res=True, exclude_misaligned_residues=True, check_atom_order=False, write_messages=False, log=self.log, quiet=self.quiet) spec_object = ncs_obj.get_ncs_info_as_spec(write=False) # # identify chains in the PDB file # find_param = self.params.simple_ncs_from_pdb.domain_finding_parameters # ncs_process = ncs_from_pdb( # verbose=self.verbose, # log=self.log, # njump=self.njump, # min_length=self.min_length, # min_percent=self.min_percent, # suggested_ncs_groups=self.suggested_ncs_groups, # require_equal_start_match=self.require_equal_start_match, # maximize_size_of_groups=self.maximize_size_of_groups, # required_chains=self.required_chains, # min_fraction_domain=self.min_fraction_domain, # initial_rms=find_param.initial_rms, # match_radius =find_param.match_radius, # similarity_threshold=find_param.similarity_threshold, # min_contig_length=find_param.min_contig_length, # max_rmsd_domain=find_param.max_rmsd_domain, # min_fraction_represented=self.min_fraction_represented) # # chains,chain_ids,starting_residue_numbers,offset_dict, total_residues= \ # ncs_process.get_chain_list( # hierarchy=hierarchy,ignore_chains=self.ignore_chains) # self.total_residues = total_residues # # so we do not have to pass this around. It does not change ever. # self.offset_dict=offset_dict # if not suppress_print: # print >>self.log,"Chains in this PDB file: ",chain_ids # if self.verbose: # for chain,chain_id,start in zip(chains,chain_ids,starting_residue_numbers): # print >>self.log,"CHAIN: ",chain," ID: ",chain_id," START" ,start # print >>self.log,"OFFSET LIST: " # for id in self.offset_dict.keys(): # print >>self.log,id,self.offset_dict[id] # # set up temp_dir if needed # if params.simple_ncs_from_pdb.temp_dir: # if os.path.isfile(params.simple_ncs_from_pdb.temp_dir): # raise Sorry( # "The directory "+str(params.simple_ncs_from_pdb.temp_dir)+" cannot be created...") # if not os.path.isdir(params.simple_ncs_from_pdb.temp_dir): # os.mkdir(params.simple_ncs_from_pdb.temp_dir) # if not self.quiet: # print >>self.log,"Working in ",params.simple_ncs_from_pdb.temp_dir # groups=[] # [ ['A','B'],['C','D','E]] # # group # list_of_residue_range_list=[] # [ [ [ [1,120],[130-250] ]]] # # group member ranges # [all_chains,all_chain_ids,all_starting_residue_numbers]=[chains,chain_ids, # starting_residue_numbers] # #========Get suggested NCS groups from phil object ====================== # # # If called with suggested_ncs_groups phil object, we pull out all those # # chains here... # # initialize suggested NCS groups if any # ncs_process.suggested_ncs_groups,self.suggested_group_list=\ # ncs_process.get_suggested_groups(suggested_ncs_groups,chain_ids) # # # NOTE: residue_range_list is in original residue numbers, with offsets # used_ids=[] # if self.suggested_group_list: # see if we want to pull out these chains: # print >> self.log, "Getting NCS from suggested chains: " # ncs_process.suggested_ncs_groups=[] # not both # print '(0)' # for [group,residue_range_list] in self.suggested_group_list: # rmsd_list,r_list,trans_list,center_list,residues_in_common_list=\ # ncs_process.get_rmsd(group,hierarchy,chains,chain_ids, # starting_residue_numbers,residue_range_list) # if not rmsd_list or len(rmsd_list)<2: # pass # elif rmsd_list[1]>params.simple_ncs_from_pdb.max_rmsd_user: # print >>self.log,"Warning: requested alignment of ",group,\ # ncs_process.add_offsets(residue_range_list,group),\ # " \nrejected due \nto rmsd =",rmsd_list[1]," > ",\ # params.simple_ncs_from_pdb.max_rmsd_user,\ # ". To keep it, set ncs.max_rmsd_user="******"RMSD for suggested group ",group,\ # ncs_process.add_offsets(residue_range_list,group)," is ",rmsd_list[1] # # Now remove all chains in kept groups from list of chains so we only # # look elsewhere # used_ids=ncs_process.add_ids(groups,used_ids) # [chains,chain_ids,starting_residue_numbers]=\ # ncs_process.remove_used_chains( # chains,chain_ids,starting_residue_numbers,used_ids) #========End of suggested NCS groups from phil object ============== #======= Try to get possible NCS groups from direct comparison of ==== # all pairs of chains. Use a high njump to go quickly... # This will work if all chains in a group are identical # if allow_recursion: # njump_use=params.simple_ncs_from_pdb.njump_recursion # min_length_use=params.simple_ncs_from_pdb.min_length_recursion # if args is not None: # args_use=args # else: # args_use=[] # args_use+=['njump='+str(njump_use),'min_length='+str(min_length_use)] # if ncs_process.suggested_ncs_groups: # present if NCS groups defined as "ACDE" # args_use.append("suggested_ncs_groups="+str(suggested_ncs_groups)) # if self.verbose: # logfile=log # else: # logfile=null_out() # if self.exact_match_only: # args_use.append('exact_match') # args_use.append('no_recursion') # # quick_find_groups=simple_ncs_from_pdb ( # args = args_use, # # at end of arg list to overwrite.. # pdb_inp = pdb_inp, # 091608 # params = params, # ignore_chains = used_ids, # all_chain_proxies = all_chain_proxies, # hierarchy = hierarchy , # suppress_print = True, # source_info = source_info, # log=logfile, # quiet=True, # groups_only = True) # # ncs_process.suggested_ncs_groups=quick_find_groups.sequence_groups # if not suppress_print: # print >>self.log,"GROUPS BASED ON QUICK COMPARISON:",\ # quick_find_groups.sequence_groups # # # ======= End of getting NCS groups from direct comparison ========= # # # ======= Getting groups quickly if this is called by simple_ncs ====== # # This does the work for the recursive call above # if groups_only: # self.sequence_groups,self.sequence_list_of_residue_range_list=\ # ncs_process.find_groups(hierarchy,chains,chain_ids, # starting_residue_numbers, # min_length=params.simple_ncs_from_pdb.min_length, # min_percent=params.simple_ncs_from_pdb.min_percent, # max_rmsd=params.simple_ncs_from_pdb.max_rmsd, # max_rmsd_user=params.simple_ncs_from_pdb.max_rmsd_user, # called_by_self=True, # exact_match_only=exact_match_only) # return # ======= End of getting groups quickly if this is called by simple_ncs ====== # # ====== Get NCS groups using only sequence information and the # # suggested_ncs_groups found above, if any # sequence_groups,sequence_list_of_residue_range_list=\ # ncs_process.find_groups(hierarchy,chains,chain_ids, # starting_residue_numbers, # min_length=params.simple_ncs_from_pdb.min_length, # min_percent=params.simple_ncs_from_pdb.min_percent, # max_rmsd=999999.,max_rmsd_user=15,called_by_self=True, # exact_match_only=exact_match_only) # if self.verbose: # print >>self.log,"SEQUENCE-BASED GROUPS: ",sequence_groups # print >>self.log,sequence_list_of_residue_range_list # ====== End of NCS groups using sequence information and suggested groups======= # JUST HERE problem sequence_list_of_residue_range_list has offsets in # some cases where it should not 021107... # # =======Get new groups with domains on all sequence groups======= # invariant_groups,invariant_list_of_residue_range_list=\ # ncs_process.find_invariant_groups(hierarchy, # sequence_groups,sequence_list_of_residue_range_list, # chains,chain_ids, # starting_residue_numbers) # if invariant_groups: # if self.verbose: # print >>self.log,"Invariant groups found:",\ # invariant_groups,invariant_list_of_residue_range_list # groups+=invariant_groups # list_of_residue_range_list+=invariant_list_of_residue_range_list # =======End of new groups with domains ============= # [chains,chain_ids,starting_residue_numbers]=[all_chains, # restore these # all_chain_ids,all_starting_residue_numbers] # ======== Write out results and make an ncs object with them in it=== # ncs_object=ncs(exclude_h=self.exclude_h,exclude_d=self.exclude_d) # count=0 # for group,residue_range_list in zip(groups,list_of_residue_range_list): # count+=1 # # if necessary, add offsets from self.offset_dict to the values in # # residue_range_list # residue_range_list_with_offsets=ncs_process.add_offsets(residue_range_list,group) # if self.verbose: # print >>self.log,"\nNCS GROUP",count,":",group ,\ # residue_range_list_with_offsets # # group is a list of chain_ids, with the reference one first # # so get rmsd for members of the group from reference # rmsd_list,r_list,trans_list,center_list,residues_in_common_list=\ # ncs_process.get_rmsd(group,hierarchy,chains,chain_ids,starting_residue_numbers, # residue_range_list) # NO OFFSET (I know, it's confusing)! # # if self.write_ncs_domain_pdb: # ncs_domain_pdb=self.make_ncs_domain_pdb( # stem=self.ncs_domain_pdb_stem, # hierarchy=hierarchy,group_number=count, # group=group,residue_range_list=residue_range_list_with_offsets, # params=params) # else: # ncs_domain_pdb=None # if not rmsd_list: # print >>self.log,"\nNCS GROUP",count,":",group ,\ # residue_range_list_with_offsets # print >>self.log,"No rmsd found...giving up on this group" # else: # chain_residue_id=[group,residue_range_list_with_offsets] # ncs_object.import_ncs_group(ncs_rota_matr=r_list, # rmsd_list=rmsd_list, # residues_in_common_list=residues_in_common_list, # center_orth=center_list, # trans_orth=trans_list, # chain_residue_id=chain_residue_id, # ncs_domain_pdb=ncs_domain_pdb, # source_of_ncs_info=self.source_info) ncs_object = spec_object # if len(ncs_object.ncs_groups()) >=1: # and ncs_process.too_few_residues_represented(ncs_object=ncs_object, # total_residues=self.total_residues): # skip entirely # print >>self.log,"Skipping NCS. Too few residues represented (< %6.1f percent of total)" %(100.*self.min_fraction_represented) # ncs_object=ncs(exclude_h=self.exclude_h,exclude_d=self.exclude_d) if len(ncs_object.ncs_groups())<1: if not suppress_print: if self.source_info: print >>self.log,"\nNo NCS found from the chains in ",self.source_info else: print >>self.log,"\nNo NCS found" if not suppress_print: ncs_object.display_all(log=self.log) f=open("simple_ncs_from_pdb.resolve",'w') ncs_object.format_all_for_resolve(log=self.log,out=f) f.close() f=open("simple_ncs_from_pdb.ncs",'w') ncs_object.format_all_for_phenix_refine(log=self.log,out=f) f.close() f=open("simple_ncs_from_pdb.ncs_spec",'w') ncs_object.format_all_for_group_specification(log=self.log,out=f) f.close() self.ncs_object=ncs_object citations.show(out=self.log, source='simple_ncs_from_pdb')
def substitute_ss(real_h, xray_structure, ss_annotation, params = None, grm=None, use_plane_peptide_bond_restr=True, fix_rotamer_outliers=True, cif_objects=None, log=null_out(), rotamer_manager=None, reference_map=None, verbose=False): """ Substitute secondary structure elements in real_h hierarchy with ideal ones _in_place_. Returns reference torsion proxies - the only thing that cannot be restored with little effort outside the procedure. real_h - hierarcy to substitute secondary structure elements. xray_structure - xray_structure - needed to get crystal symmetry (to construct processed_pdb_file and xray_structure is needed to call get_geometry_restraints_manager for no obvious reason). ss_annotation - iotbx.pdb.annotation object. """ t0 = time() if rotamer_manager is None: rotamer_manager = RotamerEval() for model in real_h.models(): for chain in model.chains(): if len(chain.conformers()) > 1: raise Sorry("Alternative conformations are not supported.") processed_params = process_params(params) if not processed_params.enabled: return None expected_n_hbonds = 0 ann = ss_annotation for h in ann.helices: expected_n_hbonds += h.get_n_maximum_hbonds() edited_h = real_h.deep_copy() n_atoms_in_real_h = real_h.atoms_size() selection_cache = real_h.atom_selection_cache() # check the annotation for correctness (atoms are actually in hierarchy) error_msg = "The following secondary structure annotations result in \n" error_msg +="empty atom selections. They don't match the structre: \n" t1 = time() # Checking for SS selections deleted_annotations = ann.remove_empty_annotations( hierarchy=real_h, asc=selection_cache) if not deleted_annotations.is_empty(): if processed_params.skip_empty_ss_elements: if len(deleted_annotations.helices) > 0: print >> log, "Removing the following helices because there are" print >> log, "no corresponding atoms in the model:" for h in deleted_annotations.helices: print >> log, h.as_pdb_str() error_msg += " %s\n" % h if len(deleted_annotations.sheets) > 0: print >> log, "Removing the following sheets because there are" print >> log, "no corresponding atoms in the model:" for sh in deleted_annotations.sheets: print >> log, sh.as_pdb_str() error_msg += " %s\n" % sh.as_pdb_str(strand_id=st.strand_id) else: raise Sorry(error_msg) phil_str = ann.as_restraint_groups() t2 = time() # Actually idelizing SS elements log.write("Replacing ss-elements with ideal ones:\n") log.flush() for h in ann.helices: log.write(" %s\n" % h.as_pdb_str()) log.flush() selstring = h.as_atom_selections() isel = selection_cache.iselection(selstring[0]) all_bsel = flex.bool(n_atoms_in_real_h, False) all_bsel.set_selected(isel, True) sel_h = real_h.select(all_bsel, copy_atoms=True) ideal_h = get_helix(helix_class=h.helix_class, pdb_hierarchy_template=sel_h, rotamer_manager=rotamer_manager) # edited_h.select(all_bsel).atoms().set_xyz(ideal_h.atoms().extract_xyz()) set_xyz_carefully(dest_h=edited_h.select(all_bsel), source_h=ideal_h) for sh in ann.sheets: s = " %s\n" % sh.as_pdb_str() ss = s.replace("\n", "\n ") log.write(ss[:-2]) log.flush() for st in sh.strands: selstring = st.as_atom_selections() isel = selection_cache.iselection(selstring) all_bsel = flex.bool(n_atoms_in_real_h, False) all_bsel.set_selected(isel, True) sel_h = real_h.select(all_bsel, copy_atoms=True) ideal_h = secondary_structure_from_sequence( pdb_str=beta_pdb_str, sequence=None, pdb_hierarchy_template=sel_h, rotamer_manager=rotamer_manager, ) set_xyz_carefully(edited_h.select(all_bsel), ideal_h) # edited_h.select(all_bsel).atoms().set_xyz(ideal_h.atoms().extract_xyz()) t3 = time() pre_result_h = edited_h pre_result_h.reset_i_seq_if_necessary() n_atoms = real_h.atoms_size() bsel = flex.bool(n_atoms, False) helix_selection = flex.bool(n_atoms, False) sheet_selection = flex.bool(n_atoms, False) other_selection = flex.bool(n_atoms, False) ss_for_tors_selection = flex.bool(n_atoms, False) nonss_for_tors_selection = flex.bool(n_atoms, False) selection_cache = real_h.atom_selection_cache() # set all CA atoms to True for other_selection #isel = selection_cache.iselection("name ca") isel = selection_cache.iselection("name ca or name n or name o or name c") other_selection.set_selected(isel, True) n_main_chain_atoms = other_selection.count(True) isel = selection_cache.iselection("name ca or name n or name o or name c") nonss_for_tors_selection.set_selected(isel, True) main_chain_selection_prefix = "(name ca or name n or name o or name c) %s" t4 = time() print >> log, "Preparing selections..." log.flush() # Here we are just preparing selections for h in ann.helices: ss_sels = h.as_atom_selections()[0] selstring = main_chain_selection_prefix % ss_sels isel = selection_cache.iselection(selstring) helix_selection.set_selected(isel, True) other_selection.set_selected(isel, False) isel = selection_cache.iselection(selstring) ss_for_tors_selection.set_selected(isel, True) nonss_for_tors_selection.set_selected(isel, False) for sheet in ann.sheets: for ss_sels in sheet.as_atom_selections(): selstring = main_chain_selection_prefix % ss_sels isel = selection_cache.iselection(selstring) sheet_selection.set_selected(isel, True) other_selection.set_selected(isel, False) isel = selection_cache.iselection(selstring) ss_for_tors_selection.set_selected(isel, True) nonss_for_tors_selection.set_selected(isel, False) t5 = time() isel = selection_cache.iselection( "not name ca and not name n and not name o and not name c") other_selection.set_selected(isel, False) helix_sheet_intersection = helix_selection & sheet_selection if helix_sheet_intersection.count(True) > 0: sheet_selection = sheet_selection & ~helix_sheet_intersection assert ((helix_selection | sheet_selection) & other_selection).count(True)==0 from mmtbx.monomer_library.pdb_interpretation import grand_master_phil_str params_line = grand_master_phil_str params_line += "secondary_structure {%s}" % secondary_structure.sec_str_master_phil_str # print "params_line" # print params_line params = iotbx.phil.parse(input_string=params_line, process_includes=True)#.extract() # This does not the same way for a strange reason. Need to investigate. # The number of resulting hbonds is different later. # w_params = params.extract() # w_params.pdb_interpretation.secondary_structure.protein.remove_outliers = False # w_params.pdb_interpretation.peptide_link.ramachandran_restraints = True # w_params.pdb_interpretation.c_beta_restraints = True # w_params.pdb_interpretation.secondary_structure.enabled = True # params.format(python_object=w_params) # params.show() # print "="*80 # print "="*80 # print "="*80 if grm is None: custom_par_text = "\n".join([ "pdb_interpretation.secondary_structure {protein.remove_outliers = False\n%s}" \ % phil_str, "pdb_interpretation.peptide_link.ramachandran_restraints = True", "c_beta_restraints = True", "pdb_interpretation.secondary_structure.enabled=True", "pdb_interpretation.clash_guard.nonbonded_distance_threshold=None", "pdb_interpretation.max_reasonable_bond_distance=None", # "pdb_interpretation.nonbonded_weight=500", "pdb_interpretation.peptide_link.oldfield.weight_scale=3", "pdb_interpretation.peptide_link.oldfield.plot_cutoff=0.03", "pdb_interpretation.peptide_link.omega_esd_override_value=3", "pdb_interpretation.peptide_link.apply_all_trans=True", ]) if use_plane_peptide_bond_restr: custom_par_text += "\npdb_interpretation.peptide_link.apply_peptide_plane=True" custom_pars = params.fetch( source=iotbx.phil.parse(custom_par_text)).extract() # params.format(python_object=custom_pars) # params.show() # STOP() params = custom_pars # params = w_params t6 = time() import mmtbx.utils processed_pdb_files_srv = mmtbx.utils.\ process_pdb_file_srv( crystal_symmetry= xray_structure.crystal_symmetry(), pdb_interpretation_params = params.pdb_interpretation, log=null_out(), cif_objects=cif_objects) if verbose: print >> log, "Processing file..." log.flush() processed_pdb_file, junk = processed_pdb_files_srv.\ process_pdb_files(raw_records=flex.split_lines(real_h.as_pdb_string())) t7 = time() grm = get_geometry_restraints_manager( processed_pdb_file, xray_structure) t8 = time() else: ss_manager = secondary_structure.manager( pdb_hierarchy=real_h, geometry_restraints_manager=grm.geometry, sec_str_from_pdb_file=ss_annotation, params=None, mon_lib_srv=None, verbose=-1, log=log) grm.geometry.set_secondary_structure_restraints( ss_manager=ss_manager, hierarchy=real_h, log=log) real_h.reset_i_seq_if_necessary() from mmtbx.geometry_restraints import reference if reference_map is None: if verbose: print >> log, "Adding reference coordinate restraints..." grm.geometry.append_reference_coordinate_restraints_in_place( reference.add_coordinate_restraints( sites_cart = real_h.atoms().extract_xyz().select(helix_selection), selection = helix_selection, sigma = processed_params.sigma_on_reference_helix)) grm.geometry.append_reference_coordinate_restraints_in_place( reference.add_coordinate_restraints( sites_cart = real_h.atoms().extract_xyz().select(sheet_selection), selection = sheet_selection, sigma = processed_params.sigma_on_reference_sheet)) grm.geometry.append_reference_coordinate_restraints_in_place( reference.add_coordinate_restraints( sites_cart = real_h.atoms().extract_xyz().select(other_selection), selection = other_selection, sigma = processed_params.sigma_on_reference_non_ss)) if verbose: print >> log, "Adding chi torsion restraints..." grm.geometry.add_chi_torsion_restraints_in_place( pdb_hierarchy = pre_result_h, sites_cart = pre_result_h.atoms().extract_xyz().\ select(ss_for_tors_selection), selection = ss_for_tors_selection, chi_angles_only = False, sigma = processed_params.sigma_on_torsion_ss) grm.geometry.add_chi_torsion_restraints_in_place( pdb_hierarchy = pre_result_h, sites_cart = real_h.atoms().extract_xyz().\ select(nonss_for_tors_selection), selection = nonss_for_tors_selection, chi_angles_only = False, sigma = processed_params.sigma_on_torsion_nonss) real_h.atoms().set_xyz(pre_result_h.atoms().extract_xyz()) t9 = time() if processed_params.file_name_before_regularization is not None: grm.geometry.pair_proxies(sites_cart=real_h.atoms().extract_xyz()) if grm.geometry.ramachandran_manager is not None: grm.geometry.ramachandran_manager.update_phi_psi_targets( sites_cart=real_h.atoms().extract_xyz()) print >> log, "Outputting model before regularization %s" % processed_params.file_name_before_regularization real_h.write_pdb_file( file_name=processed_params.file_name_before_regularization) geo_fname = processed_params.file_name_before_regularization[:-4]+'.geo' print >> log, "Outputting geo file for regularization %s" % geo_fname grm.write_geo_file( site_labels=[atom.id_str() for atom in real_h.atoms()], file_name=geo_fname) #testing number of restraints assert grm.geometry.get_n_den_proxies() == 0 if reference_map is None: assert grm.geometry.get_n_reference_coordinate_proxies() == n_main_chain_atoms refinement_log = null_out() log.write( "Refining geometry of substituted secondary structure elements...") log.flush() if verbose: refinement_log = log from mmtbx.refinement.geometry_minimization import run2 t10 = time() if reference_map is None: obj = run2( restraints_manager = grm, pdb_hierarchy = real_h, correct_special_position_tolerance = 1.0, max_number_of_iterations = processed_params.n_iter, number_of_macro_cycles = processed_params.n_macro, bond = True, nonbonded = True, angle = True, dihedral = True, chirality = True, planarity = True, fix_rotamer_outliers = fix_rotamer_outliers, log = refinement_log) else: ref_xrs = real_h.extract_xray_structure( crystal_symmetry=xray_structure.crystal_symmetry()) minimize_wrapper_with_map( pdb_h=real_h, xrs=ref_xrs, target_map=reference_map, grm=grm, ncs_restraints_group_list=[], mon_lib_srv=None, ss_annotation=ss_annotation, refine_ncs_operators=False, number_of_cycles=processed_params.n_macro, log=log) real_h.write_pdb_file("after_ss_map_min.pdb") log.write(" Done\n") log.flush() t11 = time() # print >> log, "Initial checking, init : %.4f" % (t1-t0) # print >> log, "Checking SS : %.4f" % (t2-t1) # print >> log, "Initializing selections : %.4f" % (t4-t3) # print >> log, "Looping for selections : %.4f" % (t5-t4) # print >> log, "Finalizing selections : %.4f" % (t6-t5) # print >> log, "PDB interpretation : %.4f" % (t7-t6) # print >> log, "Get GRM : %.4f" % (t8-t7) # print >> log, "Adding restraints to GRM : %.4f" % (t9-t8) # print >> log, "Running GM : %.4f" % (t11-t10) # print_hbond_proxies(grm.geometry,real_h) return grm.geometry.get_chi_torsion_proxies()
def exercise(file_name=None, pdb_file_name = None, map_file_name = None , split_pdb_file_name = None, out = sys.stdout): # Set up source data if not os.path.isfile(file_name): raise Sorry("Missing the file: %s" %(file_name)+"\n") print ("Reading from %s" %(file_name)) from iotbx.map_manager import map_manager m = map_manager(file_name) print ("Header information from %s:" %(file_name)) m.show_summary(out = out) map_data = m.map_data().deep_copy() crystal_symmetry = m.crystal_symmetry() unit_cell_parameters = m.crystal_symmetry().unit_cell().parameters() print ("\nMap origin: %s Extent %s" %( map_data.origin(), map_data.all())) print ("Original unit cell, not just unit cell of part in this file): %s" %( str(unit_cell_parameters))) grid_point = (1, 2, 3) if map_data.origin() != (0, 0, 0): # make sure it is inside from scitbx.matrix import col grid_point = tuple (col(grid_point)+col(map_data.origin())) print ("\nValue of map_data at grid point %s: %.3f" %(str(grid_point), map_data[grid_point])) print ("Map data is %s" %(type(map_data))) random_position = (10, 5, 7.9) point_frac = crystal_symmetry.unit_cell().fractionalize(random_position) value_at_point_frac = map_data.eight_point_interpolation(point_frac) print ("Value of map_data at coordinates %s: %.3f" %( str(random_position), value_at_point_frac)) map_data_as_float = map_data.as_float() print ("Map data as float is %s" %(type(map_data_as_float))) # make a little model sites_cart = flex.vec3_double( ((8, 10, 12), (14, 15, 16))) model = model_manager.from_sites_cart( atom_name = ' CA ', resname = 'ALA', chain_id = 'A', b_iso = 30., occ = 1., scatterer = 'C', sites_cart = sites_cart, crystal_symmetry = crystal_symmetry) # Move map and a model to place origin at (0, 0, 0) # map data is new copy but model is shifted in place. from iotbx.map_model_manager import map_model_manager mam = map_model_manager( map_manager = m, model = model.deep_copy(), ) # Read in map and model and split up dm = DataManager() aa = dm.get_map_model_manager(model_file=pdb_file_name, map_files=map_file_name) bb = dm.get_map_model_manager(model_file=split_pdb_file_name, map_files=map_file_name) for selection_method in ['by_chain', 'by_segment','supplied_selections', 'boxes']: if selection_method == 'boxes': choices = [True, False] else: choices = [True] if selection_method == 'by_chain': mask_choices = [True,False] else: mask_choices = [False] for select_final_boxes_based_on_model in choices: for skip_empty_boxes in choices: for mask_choice in mask_choices: if mask_choice: # use split model a=bb.deep_copy() else: # usual a=aa.deep_copy() print ("\nRunning split_up_map_and_model with \n"+ "select_final_boxes_based_on_model="+ "%s skip_empty_boxes=%s selection_method=%s" %( select_final_boxes_based_on_model,skip_empty_boxes,selection_method)) if selection_method == 'by_chain': print ("Mask around unused atoms: %s" %(mask_choice)) box_info = a.split_up_map_and_model_by_chain( mask_around_unselected_atoms=mask_choice) elif selection_method == 'by_segment': box_info = a.split_up_map_and_model_by_segment() elif selection_method == 'supplied_selections': selection = a.model().selection('all') box_info = a.split_up_map_and_model_by_supplied_selections( selection_list = [selection]) elif selection_method == 'boxes': box_info = a.split_up_map_and_model_by_boxes( skip_empty_boxes = skip_empty_boxes, select_final_boxes_based_on_model = select_final_boxes_based_on_model) print (selection_method,skip_empty_boxes, len(box_info.selection_list), box_info.selection_list[0].count(True)) assert (selection_method,skip_empty_boxes, len(box_info.selection_list), box_info.selection_list[0].count(True)) in [ ('by_chain',True,3,19), ("by_chain",True,1,86,), ("by_segment",True,1,86,), ("supplied_selections",True,1,86,), ("boxes",True,13,1,), ("boxes",False,36,0,), ("boxes",True,13,1,), ("boxes",False,36,0,), ] # Change the coordinates in one box small_model = box_info.mmm_list[0].model() small_sites_cart = small_model.get_sites_cart() from scitbx.matrix import col small_sites_cart += col((1,0,0)) small_model.set_crystal_symmetry_and_sites_cart( sites_cart = small_sites_cart, crystal_symmetry = small_model.crystal_symmetry()) # Put everything back together a.merge_split_maps_and_models(box_info = box_info) mam.box_all_maps_around_model_and_shift_origin() shifted_crystal_symmetry = mam.model().crystal_symmetry() shifted_model = mam.model() shifted_map_data = mam.map_data() print ("\nOriginal map origin (grid units):", map_data.origin()) print ("Original model:\n", model.model_as_pdb()) print ("Shifted map origin:", shifted_map_data.origin()) print ("Shifted model:\n", shifted_model.model_as_pdb()) # Save the map_model manager mam_dc=mam.deep_copy() print ("dc",mam) print ("dc mam_dc",mam_dc) # Mask map around atoms mam=mam_dc.deep_copy() print ("dc mam_dc dc",mam_dc) print (mam) mam.mask_all_maps_around_atoms(mask_atoms_atom_radius = 3, set_outside_to_mean_inside=True, soft_mask=False) print ("Mean before masking", mam.map_data().as_1d().min_max_mean().mean) assert approx_equal(mam.map_data().as_1d().min_max_mean().mean, -0.0585683621466) print ("Max before masking", mam.map_data().as_1d().min_max_mean().max) assert approx_equal(mam.map_data().as_1d().min_max_mean().max, -0.0585683621466) # Mask map around atoms, with soft mask mam=mam_dc.deep_copy() mam.mask_all_maps_around_atoms(mask_atoms_atom_radius = 3, soft_mask = True, soft_mask_radius = 5, set_outside_to_mean_inside=True) print ("Mean after first masking", mam.map_data().as_1d().min_max_mean().mean) assert approx_equal(mam.map_data().as_1d().min_max_mean().mean, -0.00177661714805) print ("Max after first masking", mam.map_data().as_1d().min_max_mean().max) assert approx_equal(mam.map_data().as_1d().min_max_mean().max, 0.236853733659) # Mask map around atoms again mam.mask_all_maps_around_atoms(mask_atoms_atom_radius = 3, set_outside_to_mean_inside = True, soft_mask=False) print ("Mean after second masking", mam.map_data().as_1d().min_max_mean().mean) assert approx_equal(mam.map_data().as_1d().min_max_mean().mean, -0.0585683621466) print ("Max after second masking", mam.map_data().as_1d().min_max_mean().max) assert approx_equal(mam.map_data().as_1d().min_max_mean().max, -0.0585683621466) # Mask around edges mam=mam_dc.deep_copy() mam.mask_all_maps_around_edges( soft_mask_radius = 3) print ("Mean after masking edges", mam.map_data().as_1d().min_max_mean().mean) assert approx_equal(mam.map_data().as_1d().min_max_mean().mean, 0.0155055604192) print ("Max after masking edges", mam.map_data().as_1d().min_max_mean().max) assert approx_equal(mam.map_data().as_1d().min_max_mean().max, 0.249827131629) print ("\nWriting map_data and model in shifted position (origin at 0, 0, 0)") output_file_name = 'shifted_map.ccp4' print ("Writing to %s" %(output_file_name)) mrcfile.write_ccp4_map( file_name = output_file_name, crystal_symmetry = shifted_crystal_symmetry, map_data = shifted_map_data, ) output_file_name = 'shifted_model.pdb' f = open(output_file_name, 'w') print (shifted_model.model_as_pdb(), file=f) f.close() print ("\nWriting map_data and model in original position (origin at %s)" %( str(mam.map_manager().origin_shift_grid_units))) output_file_name = 'new_map_original_position.ccp4' print ("Writing to %s" %(output_file_name)) mrcfile.write_ccp4_map( file_name = output_file_name, crystal_symmetry = shifted_crystal_symmetry, map_data = shifted_map_data, origin_shift_grid_units = mam.map_manager().origin_shift_grid_units) print (shifted_model.model_as_pdb()) output_pdb_file_name = 'new_model_original_position.pdb' f = open(output_pdb_file_name, 'w') print (shifted_model.model_as_pdb(), file=f) f.close() # Write as mmcif output_cif_file_name = 'new_model_original_position.cif' f = open(output_cif_file_name, 'w') print (shifted_model.model_as_mmcif(),file = f) f.close() # Read the new map and model import iotbx.pdb new_model = model_manager( model_input = iotbx.pdb.input( source_info = None, lines = flex.split_lines(open(output_pdb_file_name).read())), crystal_symmetry = crystal_symmetry) assert new_model.model_as_pdb() == model.model_as_pdb() new_model_from_cif = model_manager( model_input = iotbx.pdb.input( source_info = None, lines = flex.split_lines(open(output_cif_file_name).read())), crystal_symmetry = crystal_symmetry) assert new_model_from_cif.model_as_pdb() == model.model_as_pdb() # Read and box the original file again in case we modified m in any # previous tests m = map_manager(file_name) mam=map_model_manager(model=model.deep_copy(),map_manager=m) mam.box_all_maps_around_model_and_shift_origin() file_name = output_file_name print ("Reading from %s" %(file_name)) new_map = iotbx.mrcfile.map_reader(file_name = file_name, verbose = False) new_map.data = new_map.data.shift_origin() print ("Header information from %s:" %(file_name)) new_map.show_summary(out = out) assert new_map.map_data().origin() == mam.map_manager().map_data().origin() assert new_map.crystal_symmetry().is_similar_symmetry(mam.map_manager().crystal_symmetry()) # make a map_model_manager with lots of maps and model and ncs from mmtbx.ncs.ncs import ncs ncs_object=ncs() ncs_object.set_unit_ncs() mam = map_model_manager( map_manager = m, ncs_object = ncs_object, map_manager_1 = m.deep_copy(), map_manager_2 = m.deep_copy(), extra_model_list = [model.deep_copy(),model.deep_copy()], extra_model_id_list = ["model_1","model_2"], extra_map_manager_list = [m.deep_copy(),m.deep_copy()], extra_map_manager_id_list = ["extra_1","extra_2"], model = model.deep_copy(), ) # make a map_model_manager with lots of maps and model and ncs and run # with wrapping and ignore_symmetry_conflicts on from mmtbx.ncs.ncs import ncs ncs_object=ncs() ncs_object.set_unit_ncs() m.set_ncs_object(ncs_object.deep_copy()) mam2 = map_model_manager( map_manager = m.deep_copy(), ncs_object = ncs_object.deep_copy(), map_manager_1 = m.deep_copy(), map_manager_2 = m.deep_copy(), extra_model_list = [model.deep_copy(),model.deep_copy()], extra_model_id_list = ["model_1","model_2"], extra_map_manager_list = [m.deep_copy(),m.deep_copy()], extra_map_manager_id_list = ["extra_1","extra_2"], model = model.deep_copy(), ignore_symmetry_conflicts = True, wrapping = m.wrapping(), ) assert mam.map_manager().is_similar(mam2.map_manager()) assert mam.map_manager().is_similar(mam2.map_manager_1()) for m in mam2.map_managers(): assert mam.map_manager().is_similar(m) assert mam.model().shift_cart() == mam2.model().shift_cart() assert mam.model().shift_cart() == mam2.get_model_by_id('model_2').shift_cart() print ("OK")
def run(args, crystal_symmetry=None, log=None): h = "phenix.map_box: extract box with model and map around selected atoms" if(log is None): log = sys.stdout print_statistics.make_header(h, out=log) default_message="""\ %s. Usage: phenix.map_box model.pdb map_coefficients.mtz selection="chain A and resseq 1:10" or phenix.map_box map.ccp4 density_select=True Parameters:"""%h if(len(args) == 0): print default_message master_phil.show(prefix=" ") return inputs = mmtbx.utils.process_command_line_args(args = args, cmd_cs=crystal_symmetry, master_params = master_phil) params = inputs.params.extract() # PDB file if params.pdb_file and not inputs.pdb_file_names: inputs.pdb_file_names=[params.pdb_file] if(len(inputs.pdb_file_names)!=1 and not params.density_select): raise Sorry("PDB file is needed unless density_select is set.") print_statistics.make_sub_header("pdb model", out=log) if len(inputs.pdb_file_names)>0: pdb_inp = iotbx.pdb.input(file_name=inputs.pdb_file_names[0]) pdb_hierarchy = pdb_inp.construct_hierarchy() pdb_atoms = pdb_hierarchy.atoms() pdb_atoms.reset_i_seq() else: pdb_hierarchy=None # Map or map coefficients map_coeff = None if(inputs.ccp4_map is None): if(len(inputs.reflection_file_names)!=1): raise Sorry("Map or map coefficients file is needed.") map_coeff = reflection_file_utils.extract_miller_array_from_file( file_name = inputs.reflection_file_names[0], label = params.label, type = "complex", log = log) fft_map = map_coeff.fft_map(resolution_factor=params.resolution_factor) fft_map.apply_sigma_scaling() map_data = fft_map.real_map_unpadded() map_or_map_coeffs_prefix=os.path.basename( inputs.reflection_file_names[0][:-4]) else: print_statistics.make_sub_header("CCP4 map", out=log) ccp4_map = inputs.ccp4_map ccp4_map.show_summary(prefix=" ") map_data = ccp4_map.map_data() if inputs.ccp4_map_file_name.endswith(".ccp4"): map_or_map_coeffs_prefix=os.path.basename( inputs.ccp4_map_file_name[:-5]) else: map_or_map_coeffs_prefix=os.path.basename( inputs.ccp4_map_file_name[:-4]) # if len(inputs.pdb_file_names)>0: output_prefix=os.path.basename(inputs.pdb_file_names[0])[:-4] else: output_prefix=map_or_map_coeffs_prefix if not pdb_hierarchy: # get an empty hierarchy from cctbx.array_family import flex pdb_hierarchy=iotbx.pdb.input( source_info='',lines=flex.split_lines('')).construct_hierarchy() xray_structure = pdb_hierarchy.extract_xray_structure( crystal_symmetry=inputs.crystal_symmetry) xray_structure.show_summary(f=log) # selection = pdb_hierarchy.atom_selection_cache().selection( string = params.selection) if selection.size(): print_statistics.make_sub_header("atom selection", out=log) print >> log, "Selection string: selection='%s'"%params.selection print >> log, \ " selects %d atoms from total %d atoms."%(selection.count(True), selection.size()) sites_cart_all = xray_structure.sites_cart() sites_cart = sites_cart_all.select(selection) selection = xray_structure.selection_within( radius = params.selection_radius, selection = selection) # if params.density_select: print_statistics.make_sub_header( "Extracting box around selected density and writing output files", out=log) else: print_statistics.make_sub_header( "Extracting box around selected atoms and writing output files", out=log) # box = mmtbx.utils.extract_box_around_model_and_map( xray_structure = xray_structure, map_data = map_data.as_double(), box_cushion = params.box_cushion, selection = selection, density_select = params.density_select, threshold = params.density_select_threshold) if box.initial_shift_cart: print >>log,"\nInitial coordinate shift will be (%.1f,%.1f,%.1f)\n" %( box.initial_shift_cart) if box.total_shift_cart: print >>log,"Final coordinate shift: (%.1f,%.1f,%.1f)" %( box.total_shift_cart) print >>log,"Final cell dimensions: (%.1f,%.1f,%.1f)\n" %( box.box_crystal_symmetry.unit_cell().parameters()[:3]) if box.pdb_outside_box_msg: print >> log, box.pdb_outside_box_msg if(params.output_file_name_prefix is None): file_name = "%s_box.pdb"%output_prefix else: file_name = "%s.pdb"%params.output_file_name_prefix ph_box = pdb_hierarchy.select(selection) ph_box.adopt_xray_structure(box.xray_structure_box) ph_box.write_pdb_file(file_name=file_name, crystal_symmetry = box.xray_structure_box.crystal_symmetry()) if("ccp4" in params.output_format): if(params.output_file_name_prefix is None): file_name = "%s_box.ccp4"%output_prefix else: file_name = "%s.ccp4"%params.output_file_name_prefix print >> log, "writing map to CCP4 formatted file: %s"%file_name box.write_ccp4_map(file_name=file_name) if("xplor" in params.output_format): if(params.output_file_name_prefix is None): file_name = "%s_box.xplor"%output_prefix else: file_name = "%s.xplor"%params.output_file_name_prefix print >> log, "writing map to X-plor formatted file: %s"%file_name box.write_xplor_map(file_name=file_name) if("mtz" in params.output_format): if(params.output_file_name_prefix is None): file_name = "%s_box.mtz"%output_prefix else: file_name = "%s.mtz"%params.output_file_name_prefix print >> log, "writing map coefficients to MTZ file: %s"%file_name if(map_coeff is not None): d_min = map_coeff.d_min() else: d_min = maptbx.d_min_from_map(map_data=box.map_box, unit_cell=box.xray_structure_box.unit_cell()) box.map_coefficients(d_min=d_min, resolution_factor=params.resolution_factor, file_name=file_name) if params.ncs_file: if(params.output_file_name_prefix is None): output_ncs_file = "%s_box.ncs_spec"%output_prefix else: output_ncs_file = "%s.ncs_spec"%params.output_file_name_prefix print >>log,"\nOffsetting NCS in %s and writing to %s" %( params.ncs_file,output_ncs_file) from mmtbx.ncs.ncs import ncs ncs_object=ncs() ncs_object.read_ncs(params.ncs_file,log=log) ncs_object.display_all(log=log) if not ncs_object or ncs_object.max_operators()<1: print >>log,"Skipping...no NCS available" elif box.total_shift_cart: from scitbx.math import matrix print >>log,"Shifting NCS operators "+\ "based on coordinate shift of (%7.1f,%7.1f,%7.1f)" %( tuple(box.total_shift_cart)) ncs_object=ncs_object.coordinate_offset( coordinate_offset=matrix.col(box.total_shift_cart)) ncs_object.display_all(log=log) ncs_object.format_all_for_group_specification( file_name=output_ncs_file) box.ncs_object=ncs_object else: box.ncs_object=None print >> log return box
def run(args, crystal_symmetry=None, ncs_object=None, pdb_hierarchy=None, map_data=None, lower_bounds=None, upper_bounds=None, write_output_files=True, log=None): h = "phenix.map_box: extract box with model and map around selected atoms" if (log is None): log = sys.stdout print_statistics.make_header(h, out=log) default_message = """\ %s. Usage: phenix.map_box model.pdb map_coefficients.mtz selection="chain A and resseq 1:10" or phenix.map_box map.ccp4 density_select=True Parameters:""" % h if (len(args) == 0 and not pdb_hierarchy): print default_message master_phil.show(prefix=" ") return inputs = mmtbx.utils.process_command_line_args(args=args, cmd_cs=crystal_symmetry, master_params=master_phil) params = inputs.params.extract() master_phil.format(python_object=params).show(out=log) # Overwrite params with parameters in call if available if lower_bounds: params.lower_bounds = lower_bounds if upper_bounds: params.upper_bounds = upper_bounds # PDB file if params.pdb_file and not inputs.pdb_file_names and not pdb_hierarchy: inputs.pdb_file_names = [params.pdb_file] if (len(inputs.pdb_file_names) != 1 and not params.density_select and not pdb_hierarchy and not params.keep_map_size and not params.upper_bounds and not params.extract_unique): raise Sorry("PDB file is needed unless extract_unique, " + "density_select, keep_map_size \nor bounds are set .") if (len(inputs.pdb_file_names)!=1 and not pdb_hierarchy and \ (params.mask_atoms or params.soft_mask )): raise Sorry("PDB file is needed for mask_atoms or soft_mask") if (params.density_select and params.keep_map_size): raise Sorry("Cannot set both density_select and keep_map_size") if (params.density_select and params.upper_bounds): raise Sorry("Cannot set both density_select and bounds") if (params.keep_map_size and params.upper_bounds): raise Sorry("Cannot set both keep_map_size and bounds") if (params.upper_bounds and not params.lower_bounds): raise Sorry("Please set lower_bounds if you set upper_bounds") if (params.extract_unique and not params.resolution): raise Sorry("Please set resolution for extract_unique") print_statistics.make_sub_header("pdb model", out=log) if len(inputs.pdb_file_names) > 0: pdb_inp = iotbx.pdb.input(file_name=inputs.pdb_file_names[0]) pdb_hierarchy = pdb_inp.construct_hierarchy() if pdb_hierarchy: pdb_atoms = pdb_hierarchy.atoms() pdb_atoms.reset_i_seq() else: pdb_hierarchy = None # Map or map coefficients map_coeff = None if (not map_data): # read first mtz file if ((len(inputs.reflection_file_names) > 0) or (params.map_coefficients_file is not None)): # file in phil takes precedent if (params.map_coefficients_file is not None): if (len(inputs.reflection_file_names) == 0): inputs.reflection_file_names.append( params.map_coefficients_file) else: inputs.reflection_file_names[ 0] = params.map_coefficients_file map_coeff = reflection_file_utils.extract_miller_array_from_file( file_name=inputs.reflection_file_names[0], label=params.label, type="complex", log=log) if not crystal_symmetry: crystal_symmetry = map_coeff.crystal_symmetry() fft_map = map_coeff.fft_map( resolution_factor=params.resolution_factor) fft_map.apply_sigma_scaling() map_data = fft_map.real_map_unpadded() map_or_map_coeffs_prefix = os.path.basename( inputs.reflection_file_names[0][:-4]) # or read CCP4 map elif ((inputs.ccp4_map is not None) or (params.ccp4_map_file is not None)): if (params.ccp4_map_file is not None): af = any_file(params.ccp4_map_file) if (af.file_type == 'ccp4_map'): inputs.ccp4_map = af.file_content inputs.ccp4_map_file_name = params.ccp4_map_file print_statistics.make_sub_header("CCP4 map", out=log) ccp4_map = inputs.ccp4_map ccp4_map.show_summary(prefix=" ", out=log) if not crystal_symmetry: crystal_symmetry = ccp4_map.crystal_symmetry() map_data = ccp4_map.data #map_data() if inputs.ccp4_map_file_name.endswith(".ccp4"): map_or_map_coeffs_prefix = os.path.basename( inputs.ccp4_map_file_name[:-5]) else: map_or_map_coeffs_prefix = os.path.basename( inputs.ccp4_map_file_name[:-4]) else: # have map_data map_or_map_coeffs_prefix = None if crystal_symmetry and not inputs.crystal_symmetry: inputs.crystal_symmetry = crystal_symmetry # final check that map_data exists if (map_data is None): raise Sorry("Map or map coefficients file is needed.") if len(inputs.pdb_file_names) > 0: output_prefix = os.path.basename(inputs.pdb_file_names[0])[:-4] else: output_prefix = map_or_map_coeffs_prefix if not pdb_hierarchy: # get an empty hierarchy from cctbx.array_family import flex pdb_hierarchy = iotbx.pdb.input( source_info='', lines=flex.split_lines('')).construct_hierarchy() xray_structure = pdb_hierarchy.extract_xray_structure( crystal_symmetry=inputs.crystal_symmetry) xray_structure.show_summary(f=log) # selection = pdb_hierarchy.atom_selection_cache().selection( string=params.selection) if selection.size(): print_statistics.make_sub_header("atom selection", out=log) print >> log, "Selection string: selection='%s'" % params.selection print >> log, \ " selects %d atoms from total %d atoms."%(selection.count(True), selection.size()) sites_cart_all = xray_structure.sites_cart() sites_cart = sites_cart_all.select(selection) selection = xray_structure.selection_within(radius=params.selection_radius, selection=selection) if not ncs_object: from mmtbx.ncs.ncs import ncs ncs_object = ncs() if params.symmetry_file: ncs_object.read_ncs(params.symmetry_file, log=log) print >> log, "Total of %s operators read" % ( ncs_object.max_operators()) if not ncs_object or ncs_object.max_operators() < 1: print >> log, "No symmetry available" if ncs_object: n_ops = max(1, ncs_object.max_operators()) else: n_ops = 1 # Get sequence if extract_unique is set sequence = None if params.extract_unique: if params.sequence_file: if n_ops > 1: # get unique part of sequence and multiply remove_duplicates = True else: remove_duplicates = False from iotbx.bioinformatics import get_sequences sequence = n_ops * (" ".join( get_sequences(file_name=params.sequence_file, remove_duplicates=remove_duplicates))) if sequence and not params.molecular_mass: # get molecular mass from sequence from iotbx.bioinformatics import text_from_chains_matching_chain_type if params.chain_type in [None, 'PROTEIN']: n_protein = len( text_from_chains_matching_chain_type(text=sequence, chain_type='PROTEIN')) else: n_protein = 0 if params.chain_type in [None, 'RNA']: n_rna = len( text_from_chains_matching_chain_type(text=sequence, chain_type='RNA')) else: n_rna = 0 if params.chain_type in [None, 'DNA']: n_dna = len( text_from_chains_matching_chain_type(text=sequence, chain_type='DNA')) else: n_dna = 0 params.molecular_mass = n_protein * 110 + (n_rna + n_dna) * 330 elif not params.molecular_mass: raise Sorry( "Need a sequence file or molecular mass for extract_unique") else: molecular_mass = None # if params.density_select: print_statistics.make_sub_header( "Extracting box around selected density and writing output files", out=log) else: print_statistics.make_sub_header( "Extracting box around selected atoms and writing output files", out=log) # if params.value_outside_atoms == 'mean': print >> log, "\nValue outside atoms mask will be set to mean inside mask" if params.get_half_height_width and params.density_select: print >> log, "\nHalf width at half height will be used to id boundaries" if params.soft_mask and sites_cart_all.size() > 0: print >> log, "\nSoft mask will be applied to model-based mask" if params.keep_map_size: print >> log, "\nEntire map will be kept (not cutting out region)" if params.restrict_map_size: print >> log, "\nOutput map will be within input map" if params.lower_bounds and params.upper_bounds: print >> log, "Bounds for cut out map are (%s,%s,%s) to (%s,%s,%s)" % ( tuple(list(params.lower_bounds) + list(params.upper_bounds))) box = mmtbx.utils.extract_box_around_model_and_map( xray_structure=xray_structure, map_data=map_data.as_double(), box_cushion=params.box_cushion, selection=selection, density_select=params.density_select, threshold=params.density_select_threshold, get_half_height_width=params.get_half_height_width, mask_atoms=params.mask_atoms, soft_mask=params.soft_mask, soft_mask_radius=params.soft_mask_radius, mask_atoms_atom_radius=params.mask_atoms_atom_radius, value_outside_atoms=params.value_outside_atoms, keep_map_size=params.keep_map_size, restrict_map_size=params.restrict_map_size, lower_bounds=params.lower_bounds, upper_bounds=params.upper_bounds, extract_unique=params.extract_unique, chain_type=params.chain_type, sequence=sequence, solvent_content=params.solvent_content, molecular_mass=params.molecular_mass, resolution=params.resolution, ncs_object=ncs_object, symmetry=params.symmetry, ) ph_box = pdb_hierarchy.select(selection) ph_box.adopt_xray_structure(box.xray_structure_box) box.hierarchy = ph_box if (inputs and inputs.crystal_symmetry and inputs.ccp4_map and inputs.crystal_symmetry.unit_cell().parameters() and inputs.ccp4_map.unit_cell_parameters) and ( inputs.crystal_symmetry.unit_cell().parameters() != inputs.ccp4_map.unit_cell_parameters): print >> log, "\nNOTE: Mismatch of unit cell parameters from CCP4 map:" print >>log,"Unit cell from CCP4 map 'unit cell parameters': "+\ "%.1f, %.1f, %.1f, %.1f, %.1f, %.1f)" %tuple( inputs.ccp4_map.unit_cell_parameters) print >>log,"Unit cell from CCP4 map 'map grid': "+\ "%.1f, %.1f, %.1f, %.1f, %.1f, %.1f)" %tuple( inputs.crystal_symmetry.unit_cell().parameters()) print >>log,"\nInterpreting this as the 'unit cell parameters' was "+\ "original map \ndimension and 'map grid' is the "+\ "portion actually in the map that was supplied here.\n" box.unit_cell_parameters_from_ccp4_map = inputs.ccp4_map.unit_cell_parameters box.unit_cell_parameters_deduced_from_map_grid=\ inputs.crystal_symmetry.unit_cell().parameters() else: box.unit_cell_parameters_from_ccp4_map = None box.unit_cell_parameters_deduced_from_map_grid = None # ncs_object is original # box.ncs_object is shifted by shift_cart print >> log, "Box cell dimensions: (%.2f, %.2f, %.2f) A" % ( box.box_crystal_symmetry.unit_cell().parameters()[:3]) if params.keep_origin: print >> log, "Box origin is at grid position of : (%d, %d, %d) " % ( tuple(box.origin_shift_grid_units(reverse=True))) print >> log, "Box origin is at coordinates: (%.2f, %.2f, %.2f) A" % ( tuple(-col(box.shift_cart))) if box.pdb_outside_box_msg: print >> log, box.pdb_outside_box_msg # NOTE: box object is always shifted to place origin at (0,0,0) # For output files ONLY: # keep_origin==False leave origin at (0,0,0) # keep_origin==True: we shift everything back to where it was, if (not params.keep_origin): if box.shift_cart: print >>log,\ "Final coordinate shift for output files: (%.2f,%.2f,%.2f) A" %( tuple(box.shift_cart)) else: print >>log,"\nOutput files are in same location as original: origin "+\ "is at (0,0,0)" else: # keep_origin print >> log, "\nOutput files are in same location as original, just cut out." print >> log, "Note that output maps are only valid in the cut out region.\n" if params.keep_origin: ph_box_original_location = ph_box.deep_copy() sites_cart = box.shift_sites_cart_back( box.xray_structure_box.sites_cart()) xrs_offset = ph_box_original_location.extract_xray_structure( crystal_symmetry=box.xray_structure_box.crystal_symmetry( )).replace_sites_cart(new_sites=sites_cart) ph_box_original_location.adopt_xray_structure(xrs_offset) box.hierarchy_original_location = ph_box_original_location else: box.hierarchy_original_location = None if write_output_files: # Write PDB file if ph_box.overall_counts().n_residues > 0: if (params.output_file_name_prefix is None): file_name = "%s_box.pdb" % output_prefix else: file_name = "%s.pdb" % params.output_file_name_prefix if params.keep_origin: # Keeping origin print >> log, "Writing boxed PDB with box unit cell and in "+\ "original\n position to: %s"%( file_name) ph_box_original_location.write_pdb_file( file_name=file_name, crystal_symmetry=box.xray_structure_box.crystal_symmetry()) else: # write box PDB in box cell print >> log, "Writing shifted boxed PDB to file: %s" % file_name ph_box.write_pdb_file( file_name=file_name, crystal_symmetry=box.xray_structure_box.crystal_symmetry()) # Write NCS file if NCS if ncs_object and ncs_object.max_operators() > 0: if (params.output_file_name_prefix is None): output_symmetry_file = "%s_box.ncs_spec" % output_prefix else: output_symmetry_file = "%s.ncs_spec" % params.output_file_name_prefix if params.keep_origin: if params.symmetry_file: print >> log, "\nDuplicating symmetry in %s and writing to %s" % ( params.symmetry_file, output_symmetry_file) else: print >> log, "\nWriting symmetry to %s" % ( output_symmetry_file) ncs_object.format_all_for_group_specification( file_name=output_symmetry_file) else: print >> log, "\nOffsetting symmetry in %s and writing to %s" % ( params.symmetry_file, output_symmetry_file) box.ncs_object.format_all_for_group_specification( file_name=output_symmetry_file) # Write ccp4 map. Shift back to original location if keep_origin=True if ("ccp4" in params.output_format): if (params.output_file_name_prefix is None): file_name = "%s_box.ccp4" % output_prefix else: file_name = "%s.ccp4" % params.output_file_name_prefix if params.keep_origin: print >> log, "Writing boxed map with box unit_cell and "+\ "original\n position to CCP4 formatted file: %s"%file_name else: print >> log, "Writing box map shifted to (0,0,0) to CCP4 "+\ "formatted file: %s"%file_name box.write_ccp4_map(file_name=file_name, shift_back=params.keep_origin) # Write xplor map. Shift back if keep_origin=True if ("xplor" in params.output_format): if (params.output_file_name_prefix is None): file_name = "%s_box.xplor" % output_prefix else: file_name = "%s.xplor" % params.output_file_name_prefix if params.keep_origin: print >> log, "Writing boxed map with box unit_cell and original "+\ "position\n to X-plor formatted file: %s"%file_name else: print >> log, "Writing box_map shifted to (0,0,0) to X-plor "+\ "formatted file: %s"%file_name box.write_xplor_map(file_name=file_name, shift_back=params.keep_origin) # Write mtz map coeffs. Shift back if keep_origin=True if ("mtz" in params.output_format): if (params.output_file_name_prefix is None): file_name = "%s_box.mtz" % output_prefix else: file_name = "%s.mtz" % params.output_file_name_prefix if params.keep_origin: print >> log, "Writing map coefficients with box_map unit_cell"+\ " but position matching\n "+\ " original position to MTZ file: %s"%file_name else: print >> log, "Writing box_map coefficients shifted to (0,0,0) "+\ "to MTZ file: %s"%file_name if (map_coeff is not None): d_min = map_coeff.d_min() elif params.resolution is not None: d_min = params.resolution else: d_min = maptbx.d_min_from_map( map_data=box.map_box, unit_cell=box.xray_structure_box.unit_cell()) box.map_coefficients(d_min=d_min, resolution_factor=params.resolution_factor, file_name=file_name, shift_back=params.keep_origin) print >> log return box
lines=flex.split_lines("""\ HETATM 9 2H3 MPR B 5 16.388 0.289 6.613 1.00 0.08 ANISOU 9 2H3 MPR B 5 8+8 848 848 0 0 0 """)) except ValueError, e: assert not show_diff(str(e), """\ some.pdb, line 2: ANISOU 9 2H3 MPR B 5 8+8 848 848 0 0 0 ---------------------------------^ unexpected plus sign.""") else: raise Exception_expected try : pdb.pdb_input( source_info="some.pdb", lines=flex.split_lines("""\ HETATM 9 2H3 MPR B 5 16.388 0.289 6.613 1.00 0.08 ANISOU 9 2H3 MPR B 5 8+8 848 848 0 0 0 """), raise_sorry_if_format_error=True) except Sorry : pass else: raise Exception_expected try: pdb.pdb_input( source_info=None, lines=flex.split_lines("""\ HETATM 9 2H3 MPR B 5 16.388 0.289 6.613 1.00 0.08 HETATM 9 2H3 MPR B 5 16.388 0.289 6.613 1.00 0.08 ANISOU 9 2H3 MPR B 5 84- 848 848 0 0 0 """)) except ValueError, e: assert not show_diff(str(e), """\
def read_pdb(): pdbstring = """\ ATOM 0 CA GLY A 3 5.804 -2.100 7.324 1.00 1.36 C ATOM 1 C GLY A 3 4.651 -1.149 7.578 1.00 1.01 C ATOM 2 O GLY A 3 3.598 -1.553 8.071 1.00 1.38 O ATOM 3 N GLY A 3 6.706 -1.622 6.294 1.00 1.11 N ATOM 4 CA PHE A 4 3.819 1.134 7.419 1.00 0.89 C ATOM 5 CB PHE A 4 4.397 2.380 8.094 1.00 1.13 C ATOM 6 C PHE A 4 3.185 1.509 6.084 1.00 0.94 C ATOM 7 N PHE A 4 4.852 0.121 7.242 1.00 0.88 N ATOM 8 O PHE A 4 2.361 2.421 6.010 1.00 1.47 O ATOM 9 CA LEU A 5 3.055 1.059 3.693 1.00 0.87 C ATOM 10 CB LEU A 5 3.965 0.435 2.634 1.00 1.13 C ATOM 11 C LEU A 5 1.634 0.527 3.541 1.00 0.87 C ATOM 12 N LEU A 5 3.576 0.800 5.030 1.00 0.92 N ATOM 13 O LEU A 5 1.246 -0.440 4.196 1.00 1.23 O """ pdb_inp = iotbx.pdb.input( lines=flex.split_lines(pdbstring), source_info=None) sites_cart = pdb_inp.atoms().extract_xyz() # TRANS phi 1 C 2 N 2 CA 2 C 60.00 20.0 3 # TRANS psi 1 N 1 CA 1 C 2 N 160.00 30.0 2 dihedral_proxies = geometry_restraints.shared_dihedral_proxy() # residue 1 psi = geometry_restraints.dihedral_proxy( i_seqs=[3, 0, 1, 7], angle_ideal=160.0, weight=1/30.0**2, periodicity=3 ) dihedral_proxies.append(psi) # residue 2 phi = geometry_restraints.dihedral_proxy( i_seqs=[1, 7, 4, 6], angle_ideal=60.0, weight=1/20.0**2, periodicity=3 ) dihedral_proxies.append(phi) psi = geometry_restraints.dihedral_proxy( i_seqs=[7, 4, 6, 8], angle_ideal=160.0, weight=1/30.0**2, periodicity=3 ) dihedral_proxies.append(psi) # residue 3 phi = geometry_restraints.dihedral_proxy( i_seqs=[6, 12, 9, 11], angle_ideal=60.0, weight=1/20.0**2, periodicity=3 ) dihedral_proxies.append(phi) angle_proxies = geometry_restraints.shared_angle_proxy() ## Residue 1 # a3 a = geometry_restraints.angle_proxy( i_seqs=[3, 0, 1], angle_ideal=0, weight=1 ) angle_proxies.append(a) # a7 a = geometry_restraints.angle_proxy( i_seqs=[2, 1, 7], angle_ideal=0, weight=1 ) angle_proxies.append(a) ## Residue 2 # a1 a = geometry_restraints.angle_proxy( i_seqs=[1, 7, 4], angle_ideal=0, weight=1 ) angle_proxies.append(a) # a3 a = geometry_restraints.angle_proxy( i_seqs=[7, 4, 6], angle_ideal=0, weight=1 ) angle_proxies.append(a) # a7 a = geometry_restraints.angle_proxy( i_seqs=[8, 6, 12], angle_ideal=0, weight=1 ) angle_proxies.append(a) ## Residue 3 # a1 a = geometry_restraints.angle_proxy( i_seqs=[6, 12, 9], angle_ideal=0, weight=1 ) angle_proxies.append(a) # a3 a = geometry_restraints.angle_proxy( i_seqs=[12, 9, 11], angle_ideal=0, weight=1 ) angle_proxies.append(a) # compute dihedral #dihedral = geometry_restraints.dihedral( # sites_cart=sites_cart, # proxy=dihedral_proxies[0]) # Shows real dihedral value #print dihedral.angle_model, dihedral.delta cfd_list = [] cfd = conformation_dependent_restraints.conformation_dependent_restraints( residue_name='GLY', next_residue_name='PHE', conformation_proxies=None, i_phi_proxy=None, # index into dihedral_proxies i_psi_proxy=0, i_dynamic_angles=[None, None, 0, None, None, None, 1], # indexes into angles in angle_proxies i_dynamic_dihedrals=None ) cfd_list.append(cfd) cfd = conformation_dependent_restraints.conformation_dependent_restraints( residue_name='PHE', next_residue_name='LEU', conformation_proxies=None, i_phi_proxy=1, # index into dihedral_proxies i_psi_proxy=2, i_dynamic_angles=[2, None, 3, None, None, None, 4], # indexes into angles in angle_proxies i_dynamic_dihedrals=None ) cfd_list.append(cfd) cfd = conformation_dependent_restraints.conformation_dependent_restraints( residue_name='LEU', next_residue_name=None, conformation_proxies=None, i_phi_proxy=3, # index into dihedral_proxies i_psi_proxy=None, i_dynamic_angles=[5, None, 6, None, None, None, None], # indexes into angles in angle_proxies i_dynamic_dihedrals=None ) cfd_list.append(cfd) for x in range(1, 4): print print 'Starting cycle', x print for cfd in cfd_list: cfd.update_restraints(sites_cart, dihedral_proxies, angle_proxies)
def exercise_xray_structure_simple(): pdb_inp = pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ CRYST1 61.410 54.829 43.543 90.00 90.00 90.00 P 21 21 21 8 ORIGX1 1.000000 0.000000 0.000000 0.00000 ORIGX2 0.000000 1.000000 0.000000 0.00000 ORIGX3 0.000000 0.000000 1.000000 0.00000 SCALE1 0.016284 0.000000 0.000000 0.00000 SCALE2 0.000000 0.018239 0.000000 0.00000 SCALE3 0.000000 0.000000 0.022966 0.00000 ATOM 1 N GLN A 3 35.299 11.075 19.070 1.00 36.89 N ATOM 2 CA GLN A 3 34.482 9.927 18.794 0.63 37.88 C SIGATM 2 CA GLN A 3 1.200 2.300 3.400 0.04 0.05 C ANISOU 2 CA GLN A 3 7794 3221 3376 -1227 1064 2601 C ATOM 3 Q GLN A 3 35.130 8.880 17.864 0.84 37.52 C ANISOU 3 Q GLN A 3 7875 3041 3340 -981 727 2663 C SIGUIJ 3 Q GLN A 3 75 41 40 -1 7 63 C ATOM 4 O GLN A 3 34.548 7.819 17.724 1.00 38.54 STUV ATOM 5 1CB AGLN A 3 32.979 10.223 18.469 1.00 37.80 HETATM 6 CA AION B 1 32.360 11.092 17.308 0.92 35.96 CA2+ HETATM 7 CA ION B 2 30.822 10.665 17.190 1.00 36.87 """)) for use_scale_matrix_if_available in [False, True]: xray_structure = pdb_inp.xray_structure_simple( use_scale_matrix_if_available=use_scale_matrix_if_available) out = StringIO() xray_structure.show_summary(f=out) assert not show_diff( out.getvalue(), """\ Number of scatterers: 7 At special positions: 0 Unit cell: (61.41, 54.829, 43.543, 90, 90, 90) Space group: P 21 21 21 (No. 19) """) out = StringIO() xray_structure.show_scatterers(f=out) assert not show_diff( out.getvalue(), """\ Label, Scattering, Multiplicity, Coordinates, Occupancy, Uiso, Ustar as Uiso pdb=" N GLN A 3 " N 4 ( 0.5748 0.2020 0.4380) 1.00 0.4672 [ - ] pdb=" CA GLN A 3 " C 4 ( 0.5615 0.1811 0.4316) 0.63 [ - ] 0.4797 u_cart = 0.779 0.322 0.338 -0.123 0.106 0.260 pdb=" Q GLN A 3 " C 4 ( 0.5721 0.1620 0.4103) 0.84 [ - ] 0.4752 u_cart = 0.788 0.304 0.334 -0.098 0.073 0.266 pdb=" O GLN A 3 " segid="STUV" O 4 ( 0.5626 0.1426 0.4070)\ 1.00 0.4881 [ - ] pdb="1CB AGLN A 3 " C 4 ( 0.5370 0.1865 0.4242) 1.00 0.4787 [ - ] pdb="CA AION B 1 " Ca2+ 4 ( 0.5270 0.2023 0.3975) 0.92 0.4554 [ - ] pdb="CA ION B 2 " Ca 4 ( 0.5019 0.1945 0.3948) 1.00 0.4670 [ - ] """) # xray_structure = pdb_inp.xray_structure_simple( unit_cube_pseudo_crystal=True) out = StringIO() xray_structure.show_summary(f=out) assert not show_diff( out.getvalue(), """\ Number of scatterers: 7 At special positions: 0 Unit cell: (1, 1, 1, 90, 90, 90) Space group: P 1 (No. 1) """) out = StringIO() xray_structure.show_scatterers(f=out) assert not show_diff( out.getvalue(), """\ Label, Scattering, Multiplicity, Coordinates, Occupancy, Uiso, Ustar as Uiso pdb=" N GLN A 3 " N 1 (35.2990 11.0750 19.0700) 1.00 0.4672 [ - ] pdb=" CA GLN A 3 " C 1 (34.4820 9.9270 18.7940) 0.63 [ - ] 0.4797 u_cart = 0.779 0.322 0.338 -0.123 0.106 0.260 pdb=" Q GLN A 3 " C 1 (35.1300 8.8800 17.8640) 0.84 [ - ] 0.4752 u_cart = 0.788 0.304 0.334 -0.098 0.073 0.266 pdb=" O GLN A 3 " segid="STUV" O 1 (34.5480 7.8190 17.7240)\ 1.00 0.4881 [ - ] pdb="1CB AGLN A 3 " C 1 (32.9790 10.2230 18.4690) 1.00 0.4787 [ - ] pdb="CA AION B 1 " Ca2+ 1 (32.3600 11.0920 17.3080) 0.92 0.4554 [ - ] pdb="CA ION B 2 " Ca 1 (30.8220 10.6650 17.1900) 1.00 0.4670 [ - ] """) # pdb_inp = pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ ATOM 1 N GLN A 3 35.299 11.075 99.070 1.00 36.89 STUV A """)) xray_structure = pdb_inp.xray_structure_simple( enable_scattering_type_unknown=True) assert xray_structure.scatterers()[0].scattering_type == "unknown" try: pdb_inp.xray_structure_simple() except Sorry, e: assert not show_diff( str(e), """\ Unknown chemical element type: "ATOM 1 N GLN A 3 .*.STUV A " To resolve this problem, specify a chemical element type in columns 77-78 of the PDB file, right justified (e.g. " C").""")
def process_inputs(args, log=sys.stdout): print >> log, "-" * 79 print >> log, "PProbe RUN at %s" % time.ctime() print >> log, "Processing all Inputs:" #process phils in order to not overwrite inputs with defaults #phil from above master_phil = phil.parse(master_params_str, process_includes=True) #map params from phenix defaults (phil) maps_phil = phil.parse(mmtbx.maps.map_and_map_coeff_params_str) search_phil = phil.parse(peak_search_param_str) #merge phil objects? total_phil = master_phil.fetch(sources=[maps_phil, search_phil]) #inputs is somehow different -- object with specific params and lists of files #process after all phil? inputs = mmtbx.utils.process_command_line_args(args=args, master_params=total_phil) #params object contains all command line parameters working_phil = inputs.params params = working_phil.extract() #check for master param dictionary if params.input.model_param.model_dict_file is None: params.input.model_param.model_dict_file = os.path.join( PProbe_dataio.get_script_directory(), "pprobe_master.dict") if not os.path.isfile(params.input.model_param.model_dict_file): print >> log, "WARNING -- param file not found!" print >> log, "--> trying pprobe_master.dict . . . " params.input.model_param.model_dict_file = "pprobe_master.dict" if not os.path.isfile(params.input.model_param.model_dict_file): raise Sorry("Master Param Dictionary %s not found!" % params.input.model_param.model_dict_file) if params.pprobe.extract: #check for proper PDB input #count up PDB files found pdb_count = len(inputs.pdb_file_names) for pdbin in (params.input.pdb.model_pdb, params.input.pdb.strip_pdb, params.input.pdb.peaks_pdb): if pdbin is not None: pdb_count = pdb_count + 1 if (pdb_count == 1) and (len(inputs.pdb_file_names) == 1): #one vanilla pdb to be used as model params.input.pdb.model_pdb = inputs.pdb_file_names[0] elif (pdb_count == 3) and (len(inputs.pdb_file_names) == 0): pass #three explicit PDBs, hopefully correct else: raise Sorry("\n\tInput 1 PDB for automatic stripping and peak finding \n"+\ "\tor all PDB files specifically, like so: \n"+\ "\tfor explicit input: \n"+\ "\t\tmodel_pdb=XXX.pdb strip_pdb=YYY.pdb peaks_pdb=ZZZ.pdb \n"+\ "\tfor automatic pdb generation: \n"+\ "\t\tXXX.pdb") #check for proper reflection file input reflection_files = inputs.reflection_files if (len(reflection_files) == 0): raise Sorry("Reflection data or map coefficients required") if (len(reflection_files) > 1): raise Sorry("Only one type of reflection data can be entered \n"+\ "Enter map coefficients with map_coeff_file=XXX.mtz \n"+\ "or structure factor files as XXX.(any supported)") else: params.input.reflection_data.reflection_file_name = reflection_files[ 0].file_name() #filename setup model_basename = os.path.basename( params.input.pdb.model_pdb.split(".")[0]) if (len(model_basename) > 0 and params.output.output_file_name_prefix is None): params.output.output_file_name_prefix = model_basename if params.input.input_map.map_coeff_file is not None: params.input.parameters.write_maps = False new_params = master_phil.format(python_object=params) #okay, see if we're where we want to be print >> log, "Runtime Parameters:" new_params.show() #DATA PROCESSING #setup model pdb (required and should be known) crystal_symmetry = check_symmetry(inputs, params, log) model_pdb_input = iotbx.pdb.input(file_name=params.input.pdb.model_pdb) model_hier = model_pdb_input.construct_hierarchy() model_hier.remove_hd() model_xrs = model_hier.extract_xray_structure( crystal_symmetry=crystal_symmetry) #strip pdb if needed,write result if (params.input.pdb.strip_pdb is None) and (params.input.parameters.map_omit_mode != "asis"): strip_xrs, strip_hier = create_strip_pdb( model_hier, model_xrs, params.input.parameters.map_omit_mode, log) strip_filename = params.output.output_file_name_prefix + "_pprobe_strip.pdb" print >> log, "Writing Strip PDB to: ", strip_filename strip_hier.write_pdb_file(file_name=strip_filename, crystal_symmetry=crystal_symmetry, append_end=True, anisou=False) params.input.pdb.strip_pdb = strip_filename elif params.input.parameters.map_omit_mode == "asis": strip_xrs, strip_hier = model_xrs, model_hier params.input.pdb.strip_pdb = params.input.pdb.model_pdb else: strip_pdb_input = iotbx.pdb.input( file_name=params.input.pdb.strip_pdb) strip_hier = strip_pdb_input.construct_hierarchy() strip_hier.remove_hd() strip_xrs = strip_hier.extract_xray_structure( crystal_symmetry=crystal_symmetry) #Make maps if map_coefficients not input,write out by default if (params.input.input_map.map_coeff_file is None): hkl_in = file_reader.any_file( params.input.reflection_data.reflection_file_name, force_type="hkl") hkl_in.assert_file_type("hkl") reflection_files = [hkl_in.file_object] f_obs, r_free_flags = setup_reflection_data( inputs, params, crystal_symmetry, reflection_files, log) #maps object is list of miller arrays maps = create_pprobe_maps(f_obs, r_free_flags, params, strip_xrs, strip_hier, log) map_fname = params.output.output_file_name_prefix + "_pprobe_maps.mtz" print >> log, "Writing PProbe maps to MTZ file: ", map_fname maps.write_mtz_file(map_fname) params.input.input_map.map_coeff_file = params.output.output_file_name_prefix + "_pprobe_maps.mtz" else: print "READING MAP FILE: ", params.input.input_map.map_coeff_file #setup input map coefficients map_coeff = reflection_file_utils.extract_miller_array_from_file( file_name=params.input.input_map.map_coeff_file, label=params.input.input_map.map_diff_label, type="complex", log=null_log) if params.input.parameters.score_res is None: params.input.parameters.score_res = map_coeff.d_min() print >> log, " Determined Resolution Limit: %.2f" % params.input.parameters.score_res print >> log, " -->Override with \"score_res=XXX\"" map_fname = params.input.input_map.map_coeff_file # if peaks not input, find and write to pdb if params.input.pdb.peaks_pdb is None: if params.input.parameters.map_omit_mode != "valsol": peaks_result = find_map_peaks(params, strip_xrs, log) pdb_str = peaks_pdb_str(peaks_result) peak_pdb = iotbx.pdb.input(source_info=None, lines=flex.split_lines(pdb_str)) peak_hier = peak_pdb.construct_hierarchy() peak_filename = params.output.output_file_name_prefix + "_pprobe_peaks.pdb" print >> log, "Writing Peaks to %s:" % peak_filename peak_hier.write_pdb_file(file_name=peak_filename, crystal_symmetry=crystal_symmetry, append_end=True, anisou=False) params.input.pdb.peaks_pdb = peak_filename else: peak_filename = params.output.output_file_name_prefix + "_pprobe_peaks.pdb" peak_xrs, peak_hier = create_sol_pdb( model_hier, model_xrs, params.input.parameters.map_omit_mode, log) print >> log, "Writing Peaks to %s:" % peak_filename peak_hier.write_pdb_file(file_name=peak_filename, crystal_symmetry=crystal_symmetry, append_end=True, anisou=False) params.input.pdb.peaks_pdb = peak_filename #Wrap up, display file names and info for manual input #save parameters for next stage new_phil = working_phil.format(python_object=params) phil_fname = params.output.output_file_name_prefix + "_pprobe.param" f = open(phil_fname, "w") f.write(new_phil.as_str()) f.close() print >> log, "_" * 79 print >> log, "Inputs Processed, final files:" print >> log, " Model PDB: ", params.input.pdb.model_pdb print >> log, " Strip PDB: ", params.input.pdb.strip_pdb print >> log, " Peaks PDB: ", params.input.pdb.peaks_pdb print >> log, " Map Coeff: ", map_fname print >> log, " Resolution: %.2f" % params.input.parameters.score_res print >> log, " Params: ", phil_fname #also return params return params else: #only rescoring from pkl #filename setup pkl_basename = os.path.basename( params.input.data_pkl.peak_dict.split(".")[0]) if (len(pkl_basename) > 0 and params.output.output_file_name_prefix is None): params.output.output_file_name_prefix = pkl_basename pkl_file = params.input.data_pkl.peak_dict if not os.path.isfile(pkl_file): raise Sorry("\n\tPKL input requested but no file available\n"+\ "\t\t\t cannot find %s" % pkl_file) new_phil = working_phil.format(python_object=params) phil_fname = params.output.output_file_name_prefix + "_pprobe.param" f = open(phil_fname, "w") f.write(new_phil.as_str()) f.close() new_params = master_phil.format(python_object=params) print >> log, "Runtime Parameters:" new_params.show() return params
def exercise_pdb_input(): for i_trial in range(3): pdb_inp = pdb.pdb_input(source_info=None, lines=flex.split_lines("")) assert (pdb_inp.file_type() == "pdb") assert pdb_inp.source_info() == "" assert len(pdb_inp.record_type_counts()) == 0 assert pdb_inp.unknown_section().size() == 0 assert pdb_inp.title_section().size() == 0 assert pdb_inp.remark_section().size() == 0 assert pdb_inp.primary_structure_section().size() == 0 assert pdb_inp.heterogen_section().size() == 0 assert pdb_inp.secondary_structure_section().size() == 0 assert pdb_inp.connectivity_annotation_section().size() == 0 assert pdb_inp.miscellaneous_features_section().size() == 0 assert pdb_inp.crystallographic_section().size() == 0 assert len(pdb_inp.atoms_with_labels()) == 0 assert pdb_inp.atoms().size() == 0 assert pdb_inp.model_ids().size() == 0 assert pdb_inp.model_indices().size() == 0 assert pdb_inp.ter_indices().size() == 0 assert pdb_inp.chain_indices().size() == 0 assert pdb_inp.break_indices().size() == 0 assert pdb_inp.connectivity_section().size() == 0 assert pdb_inp.bookkeeping_section().size() == 0 assert pdb_inp.model_atom_counts().size() == 0 pdb_inp = pdb.pdb_input(source_info="file/name", lines=pdb_string_all_sections) assert pdb_inp.source_info() == "file/name" assert pdb_inp.record_type_counts() == { "KEYWDS": 1, "SEQRES": 1, "LINK ": 1, "ORIGX1": 1, "SITE ": 1, "FTNOTE": 1, "HETSYN": 1, "SIGATM": 2, "MTRIX2": 1, "MTRIX3": 1, "HELIX ": 1, "MTRIX1": 1, "END ": 1, "ANISOU": 2, "TITLE ": 1, "SLTBRG": 1, "REMARK": 1, "TURN ": 1, "SCALE1": 1, "SCALE2": 1, "AUTHOR": 1, "CRYST1": 1, "SIGUIJ": 2, "CISPEP": 1, "ATOM ": 4, "ENDMDL": 2, "ORIGX2": 1, "MODRES": 1, "SOURCE": 1, "FORMUL": 1, "MASTER": 1, "CAVEAT": 1, "HET ": 1, "COMPND": 1, "MODEL ": 2, "REVDAT": 1, "SSBOND": 1, "OBSLTE": 1, "CONECT": 1, "JRNL ": 1, "SPRSDE": 1, " ": 11, "FOOBAR": 1, "HETNAM": 1, "HEADER": 1, "ORIGX3": 1, "BREAK ": 1, "ONHOLD": 1, "SHEET ": 1, "TVECT ": 1, "HYDBND": 1, "TER ": 2, "DBREF ": 1, "EXPDTA": 1, "SCALE3": 1, "HETATM": 2, "SEQADV": 1, "SPLIT ": 1, "NUMMDL": 1, "MDLTYP": 2, "DBREF1": 1, "DBREF2": 1 } assert list(pdb_inp.unknown_section()) == ["FOOBAR BAR FOO"] assert not show_diff( "\n".join(pdb_inp.title_section()), """\ HEADER ISOMERASE 02-JUL-92 1FKB ONHOLD 26-JUN-99 OBSLTE 07-DEC-04 1A0Y 1Y4P TITLE ATOMIC STRUCTURE OF THE RAPAMYCIN HUMAN IMMUNOPHILIN FKBP- SPLIT 2QNH 1VSP COMPND FK506 BINDING PROTEIN (FKBP) COMPLEX WITH IMMUNOSUPPRESSANT SOURCE HUMAN (H**O SAPIENS) RECOMBINANT FORM EXPRESSED IN KEYWDS ISOMERASE EXPDTA X-RAY DIFFRACTION NUMMDL 8 MDLTYP CA ATOMS ONLY, CHAIN B, C, D, E, F, G, H, I, J, K, L, M, N, MDLTYP 2 O, P, Q, R, S, T, U AUTHOR G.D.VAN DUYNE,R.F.STANDAERT,S.L.SCHREIBER,J.C.CLARDY REVDAT 1 31-OCT-93 1FKB 0 JRNL AUTH G.D.VAN DUYNE,R.F.STANDAERT,S.L.SCHREIBER,J.CLARDY SPRSDE 02-SEP-03 1O58 1J6N CAVEAT 1B7F INCORRECT CHIRALITY AT C1* OF U2, CHAIN Q""") assert not show_diff( "\n".join(pdb_inp.remark_section()), """\ REMARK 2 RESOLUTION. 1.7 ANGSTROMS. FTNOTE 1 CIS PEPTIDE: GLY 190 - PHE 191""") assert not show_diff( "\n".join(pdb_inp.primary_structure_section()), """\ DBREF 1HTQ A 601 468 SWS Q10377 GLN1_MYCTU 2 478 DBREF1 1JZX A 1 2880 GB 15805042 DBREF2 1JZX A NC_001263 2587937 2590817 SEQRES 1 A 477 THR GLU LYS THR PRO ASP ASP VAL PHE LYS LEU ALA LYS SEQADV 1KEH ALA A 170 SWS Q9L5D6 SER 199 ENGINEERED MODRES 6NSE CYS A 384 CYS MODIFIED BY CAD""") assert not show_diff( "\n".join(pdb_inp.heterogen_section()), """\ HET GLC A 810 12 HETNAM G6D 6-DEOXY-ALPHA-D-GLUCOSE HETSYN G6D QUINOVOSE FORMUL 2 CA 4(CA1 2+)""") assert not show_diff( "\n".join(pdb_inp.secondary_structure_section()), """\ HELIX 1 1 GLN A 18 GLY A 34 1 17 SHEET 1 A 7 PHE A 257 ALA A 260 0 TURN 1 T1 GLY E 2 THR E 5 BETA, TYPE II""") assert not show_diff( "\n".join(pdb_inp.connectivity_annotation_section()), """\ SSBOND 12 CYS B 191 CYS B 220 LINK N PRO C 61 C GLY A 9 1556 HYDBND N GLY A 148 O PHE B 41 SLTBRG N ILE A 16 OD2 ASP A 194 CISPEP 1 ALA A 183 PRO A 184 1 0.96""") assert not show_diff( "\n".join(pdb_inp.miscellaneous_features_section()), """\ SITE 1 CAB 3 HIS B 57 ASP B 102 SER B 195""") assert not show_diff( "\n".join(pdb_inp.crystallographic_section()), """\ CRYST1 45.920 49.790 89.880 90.00 97.34 90.00 P 1 21 1 4 ORIGX1 1.000000 0.000000 0.000000 0.00000 ORIGX2 0.000000 1.000000 0.000000 0.00000 ORIGX3 0.000000 0.000000 1.000000 0.00000 SCALE1 0.021777 0.000000 0.002805 0.00000 SCALE2 0.000000 0.020084 0.000000 0.00000 SCALE3 0.000000 0.000000 0.011218 0.00000 MTRIX1 1 0.739109 0.012922 -0.673462 17.07460 1 MTRIX2 1 0.015672 -0.999875 -0.001986 21.64730 1 MTRIX3 1 -0.673404 -0.009087 -0.739219 44.75290 1 TVECT 1 0.00000 0.00000 20.42000""") assert len(pdb_inp.atoms_with_labels()) == 6 assert [atom.serial for atom in pdb_inp.atoms()] \ == [" 1", " 2", " 3", " 4", " 9", " 10"] assert [atom.element for atom in pdb_inp.atoms()] \ == [" N", " C", " C", " O", " ", " "] assert list(pdb_inp.model_ids()) == [" 1", " 3"] assert list(pdb_inp.model_indices()) == [4, 6] assert list(pdb_inp.ter_indices()) == [5, 6] assert [list(v) for v in pdb_inp.chain_indices()] == [[4], [5, 6]] assert list(pdb_inp.break_indices()) == [2] assert not show_diff("\n".join(pdb_inp.connectivity_section()), """\ CONECT 5332 5333 5334 5335 5336""") assert not show_diff( "\n".join(pdb_inp.bookkeeping_section()), """\ MASTER 81 0 0 7 3 0 0 645800 20 0 12 END""") assert list(pdb_inp.model_atom_counts()) == [4, 2] # pdb_inp = pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ ATOM 1 CB LYS 109 16.113 7.345 47.084 1.00 20.00 A ATOM 2 CG LYS 109 17.058 6.315 47.703 1.00 20.00 A ATOM 3 CB LYS 109 26.721 1.908 15.275 1.00 20.00 B ATOM 4 CG LYS 109 27.664 2.793 16.091 1.00 20.00 B """)) expected_id_strs = """\ pdb=" CB LYS 109 " segid="A " pdb=" CG LYS 109 " segid="A " pdb=" CB LYS 109 " segid="B " pdb=" CG LYS 109 " segid="B " """.splitlines() for awl, eids in zip(pdb_inp.atoms_with_labels(), expected_id_strs): assert not show_diff(awl.id_str(), eids) pdb_inp = pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ ATOM 12345qN123AR12 C1234Ixyz1234.6781234.6781234.678123.56213.56abcdefS123E1C1 HETATM12345qN123AR12 C1234Ixyz1234.6781234.6781234.678123.56213.56abcdefS123E1C1 """)) for awl in pdb_inp.atoms_with_labels(): assert awl.name == "N123" assert awl.altloc == "A" assert awl.resname == "R12" assert awl.chain_id == "C" assert awl.resseq == "1234" assert awl.icode == "I" assert awl.segid == "S123" assert awl.id_str() == 'pdb="N123AR12 C1234I" segid="S123"' pdb_inp = pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ ATOM 12345qN123AR12 C1234Ixyz1234.6781234.6781234.678123.56213.56abcdef E1C1 HETATM12345qN123AR12 C1234Ixyz1234.6781234.6781234.678123.56213.56abcdef E1C1 """)) for awl in pdb_inp.atoms_with_labels(): assert awl.name == "N123" assert awl.altloc == "A" assert awl.resname == "R12" assert awl.chain_id == "C" assert awl.resseq == "1234" assert awl.icode == "I" assert awl.segid == " " assert awl.id_str() == 'pdb="N123AR12 C1234I"' pdb_inp = pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ ATOM HETATM """)) for awl in pdb_inp.atoms_with_labels(): assert awl.name == " " assert awl.altloc == " " assert awl.resname == " " assert awl.chain_id == " " assert awl.resseq == " " assert awl.icode == " " assert awl.segid == " " assert awl.id_str() == 'pdb=" "' # pdb_inp = pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ """)) assert list(pdb_inp.model_indices()) == [] assert list(pdb_inp.chain_indices()) == [] pdb_inp = pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ ATOM """)) assert list(pdb_inp.model_indices()) == [1] assert [list(v) for v in pdb_inp.chain_indices()] == [[1]] pdb_inp = pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ MODEL 1 ENDMDL """)) assert list(pdb_inp.model_indices()) == [0] assert [list(v) for v in pdb_inp.chain_indices()] == [[]] pdb_inp = pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ MODEL 1 ATOM ENDMDL """)) assert list(pdb_inp.model_indices()) == [1] assert [list(v) for v in pdb_inp.chain_indices()] == [[1]] pdb_inp = pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ MODEL 1 ENDMDL MODEL 2 ENDMDL """)) assert list(pdb_inp.model_indices()) == [0, 0] assert [list(v) for v in pdb_inp.chain_indices()] == [[], []] pdb_inp = pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ MODEL 1 ENDMDL MODEL 2 ATOM ENDMDL """)) assert list(pdb_inp.model_indices()) == [0, 1] assert [list(v) for v in pdb_inp.chain_indices()] == [[], [1]] try: pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ MODEL 1 ENDMDL ATOM """)) except ValueError as e: assert not show_diff( str(e), """\ input line 3: ATOM ^ ATOM or HETATM record is outside MODEL/ENDMDL block.""") else: raise Exception_expected try: pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ MODEL 1 MODEL 2 """)) except ValueError as e: assert not show_diff( str(e), """\ input line 2: MODEL 2 ^ Missing ENDMDL for previous MODEL record.""") else: raise Exception_expected try: pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ ATOM MODEL 1 """)) except ValueError as e: assert not show_diff( str(e), """\ input line 2: MODEL 1 ^ MODEL record must appear before any ATOM or HETATM records.""") else: raise Exception_expected try: pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ ATOM ENDMDL """)) except ValueError as e: assert not show_diff( str(e), """\ input line 2: ENDMDL ^ No matching MODEL record.""") else: raise Exception_expected # for record_name in ["SIGATM", "ANISOU", "SIGUIJ"]: try: pdb.pdb_input(source_info=None, lines=flex.std_string([record_name])) except ValueError as e: assert not show_diff( str(e), """\ input line 1: %s ^ no matching ATOM or HETATM record.""" % record_name) else: raise Exception_expected # pdb_inp = pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ MODEL 1 ATOM C ATOM C ATOM D ATOM E ATOM E ENDMDL MODEL 2 ATOM C ATOM C ATOM D ATOM D ATOM E ENDMDL MODEL 3 ATOM C ATOM D ATOM D ATOM E X ATOM E ENDMDL MODEL 4 ATOM C ATOM D ATOM C ATOM E X ATOM E ENDMDL """)) assert list(pdb_inp.model_indices()) == [5, 10, 15, 20] assert [list(v) for v in pdb_inp.chain_indices()] \ == [[2,3,5],[7,9,10],[11,13,15], [18,20]] # f = open("tmp.pdb", "w") f.close() pdb_inp = pdb.pdb_input(file_name="tmp.pdb") assert pdb_inp.source_info() == "file tmp.pdb" with open("tmp.pdb", "w") as f: f.write("""\ ATOM 1 CA SER 1 1.212 -12.134 3.757 1.00 0.00 ATOM 2 CA LEU 2 1.118 -9.777 0.735 1.00 0.00 """) pdb_inp = pdb.pdb_input(file_name="tmp.pdb") try: pdb.pdb_input(file_name="") except IOError as e: assert str(e).startswith('Cannot open file for reading: ""') else: raise Exception_expected # assert "HIS" in pdb.common_residue_names_amino_acid assert "TRO" in pdb.common_residue_names_modified_amino_acid assert "GUA" in pdb.common_residue_names_rna_dna assert "2MA" in pdb.common_residue_names_modified_rna_dna assert "CD " in pdb.common_residue_names_ccp4_mon_lib_rna_dna assert "HOH" in pdb.common_residue_names_water assert "SO4" in pdb.common_residue_names_small_molecule assert " FE" in pdb.common_residue_names_element get_class = pdb.common_residue_names_get_class assert get_class(name="ALA") == "common_amino_acid" assert get_class(name="0AF") == "modified_amino_acid" assert get_class(name="TRQ") == "modified_amino_acid" assert get_class(name=" U") == "common_rna_dna" assert get_class(name="2MA") == "modified_rna_dna" assert get_class(name="HOH") == "common_water" assert get_class(name="SO4") == "common_small_molecule" assert get_class(name="CL ") == "common_element" assert get_class(name="ABC") == "other" assert get_class(name="CD ") == "common_element" assert get_class(name="CD ", consider_ccp4_mon_lib_rna_dna=True) \ == "ccp4_mon_lib_rna_dna" # assert pdb.rna_dna_reference_residue_name(common_name="ALA") is None for common_names in [ pdb.common_residue_names_rna_dna, pdb.common_residue_names_ccp4_mon_lib_rna_dna ]: for n in common_names: r = pdb.rna_dna_reference_residue_name(common_name=n) assert r is not None assert pdb.rna_dna_reference_residue_name(common_name=" " + n.lower() + " ") == r # for line in """\ CRYST1 61.410 54.829 43.543 90.00 90.00 90.00 P 21 21 21 8 REMARK sg= P2(1)2(1)2(1) a= 61.410 b= 54.829 c= 43.543 alpha= 90 beta= 90 gamma= 90 """.splitlines(): pdb_inp = pdb.pdb_input(source_info=None, lines=flex.std_string([line])) cs = pdb_inp.crystal_symmetry() assert str(cs.unit_cell()) == "(61.41, 54.829, 43.543, 90, 90, 90)" assert str(cs.space_group_info()) == "P 21 21 21" sps = pdb_inp.special_position_settings() assert sps.is_similar_symmetry(cs) assert approx_equal(sps.min_distance_sym_equiv(), 0.5) for weak_symmetry in [False, True]: cs = pdb_inp.crystal_symmetry( crystal_symmetry=crystal.symmetry(unit_cell=(10, 10, 10, 90, 90, 90)), weak_symmetry=weak_symmetry) if (weak_symmetry): assert str( cs.unit_cell()) == "(61.41, 54.829, 43.543, 90, 90, 90)" else: assert str(cs.unit_cell()) == "(10, 10, 10, 90, 90, 90)" assert str(cs.space_group_info()) == "P 21 21 21" sps = pdb_inp.special_position_settings( special_position_settings=cs.special_position_settings( min_distance_sym_equiv=3), weak_symmetry=weak_symmetry) assert sps.is_similar_symmetry(cs) assert approx_equal(sps.min_distance_sym_equiv(), 3) # assert pdb_inp.extract_header_year() is None assert pdb_inp.extract_remark_iii_records(iii=2) == [] pdb_inp = pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ HEADER 02-JUL-92 REMARK 2 RESOLUTION. 1.7 ANGSTROMS. """)) assert pdb_inp.extract_header_year() == 1992 assert pdb_inp.extract_remark_iii_records(iii=2) \ == ['REMARK 2 RESOLUTION. 1.7 ANGSTROMS.'] # extract_connectivity pdb_inp = pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ HETATM 1 C ACT 1428 -0.014 0.010 0.027 1.00 20.00 HETATM 2 O ACT 1428 -0.480 -1.088 -0.009 1.00 20.00 HETATM 3 OXT ACT 1428 -0.744 0.936 -0.009 1.00 20.00 HETATM 4 CH3 ACT 1428 1.238 0.142 -0.009 1.00 20.00 CONECT 1 2 3 4 CONECT 4 1 """)) bonds = pdb_inp.extract_connectivity() assert (len(bonds) == 4) assert (list(bonds[0]) == [1, 2, 3]) and (list(bonds[3]) == [0]) assert (list(bonds[1]) == list(bonds[2]) == [])
if crystal_symmetry and not inputs.crystal_symmetry: inputs.crystal_symmetry = crystal_symmetry # final check that map_data exists if (map_data is None): raise Sorry("Map or map coefficients file is needed.") if len(inputs.pdb_file_names) > 0: output_prefix = os.path.basename(inputs.pdb_file_names[0])[:-4] else: output_prefix = map_or_map_coeffs_prefix if not pdb_hierarchy: # get an empty hierarchy from cctbx.array_family import flex pdb_hierarchy = iotbx.pdb.input( source_info='', lines=flex.split_lines('')).construct_hierarchy() xray_structure = pdb_hierarchy.extract_xray_structure( crystal_symmetry=inputs.crystal_symmetry) xray_structure.show_summary(f=log) # selection = pdb_hierarchy.atom_selection_cache().selection( string=params.selection) if selection.size(): print_statistics.make_sub_header("atom selection", out=log) print >> log, "Selection string: selection='%s'" % params.selection print >> log, \ " selects %d atoms from total %d atoms."%(selection.count(True), selection.size()) sites_cart_all = xray_structure.sites_cart() sites_cart = sites_cart_all.select(selection) selection = xray_structure.selection_within(radius=params.selection_radius,
def exercise_xray_structure_simple(): pdb_inp = pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ CRYST1 61.410 54.829 43.543 90.00 90.00 90.00 P 21 21 21 8 ORIGX1 1.000000 0.000000 0.000000 0.00000 ORIGX2 0.000000 1.000000 0.000000 0.00000 ORIGX3 0.000000 0.000000 1.000000 0.00000 SCALE1 0.016284 0.000000 0.000000 0.00000 SCALE2 0.000000 0.018239 0.000000 0.00000 SCALE3 0.000000 0.000000 0.022966 0.00000 ATOM 1 N GLN A 3 35.299 11.075 19.070 1.00 36.89 N ATOM 2 CA GLN A 3 34.482 9.927 18.794 0.63 37.88 C SIGATM 2 CA GLN A 3 1.200 2.300 3.400 0.04 0.05 C ANISOU 2 CA GLN A 3 7794 3221 3376 -1227 1064 2601 C ATOM 3 Q GLN A 3 35.130 8.880 17.864 0.84 37.52 C ANISOU 3 Q GLN A 3 7875 3041 3340 -981 727 2663 C SIGUIJ 3 Q GLN A 3 75 41 40 -1 7 63 C ATOM 4 O GLN A 3 34.548 7.819 17.724 1.00 38.54 STUV ATOM 5 1CB AGLN A 3 32.979 10.223 18.469 1.00 37.80 HETATM 6 CA AION B 1 32.360 11.092 17.308 0.92 35.96 CA2+ HETATM 7 CA ION B 2 30.822 10.665 17.190 1.00 36.87 """)) for use_scale_matrix_if_available in [False, True]: xray_structure = pdb_inp.xray_structure_simple( use_scale_matrix_if_available=use_scale_matrix_if_available) out = StringIO() xray_structure.show_summary(f=out) assert not show_diff( out.getvalue(), """\ Number of scatterers: 7 At special positions: 0 Unit cell: (61.41, 54.829, 43.543, 90, 90, 90) Space group: P 21 21 21 (No. 19) """) out = StringIO() xray_structure.show_scatterers(f=out) assert not show_diff( out.getvalue(), """\ Label, Scattering, Multiplicity, Coordinates, Occupancy, Uiso, Ustar as Uiso pdb=" N GLN A 3 " N 4 ( 0.5748 0.2020 0.4380) 1.00 0.4672 [ - ] pdb=" CA GLN A 3 " C 4 ( 0.5615 0.1811 0.4316) 0.63 [ - ] 0.4797 u_cart = 0.779 0.322 0.338 -0.123 0.106 0.260 pdb=" Q GLN A 3 " C 4 ( 0.5721 0.1620 0.4103) 0.84 [ - ] 0.4752 u_cart = 0.788 0.304 0.334 -0.098 0.073 0.266 pdb=" O GLN A 3 " segid="STUV" O 4 ( 0.5626 0.1426 0.4070)\ 1.00 0.4881 [ - ] pdb="1CB AGLN A 3 " C 4 ( 0.5370 0.1865 0.4242) 1.00 0.4787 [ - ] pdb="CA AION B 1 " Ca2+ 4 ( 0.5270 0.2023 0.3975) 0.92 0.4554 [ - ] pdb="CA ION B 2 " Ca 4 ( 0.5019 0.1945 0.3948) 1.00 0.4670 [ - ] """) # xray_structure = pdb_inp.xray_structure_simple( unit_cube_pseudo_crystal=True) out = StringIO() xray_structure.show_summary(f=out) assert not show_diff( out.getvalue(), """\ Number of scatterers: 7 At special positions: 0 Unit cell: (1, 1, 1, 90, 90, 90) Space group: P 1 (No. 1) """) out = StringIO() xray_structure.show_scatterers(f=out) assert not show_diff( out.getvalue(), """\ Label, Scattering, Multiplicity, Coordinates, Occupancy, Uiso, Ustar as Uiso pdb=" N GLN A 3 " N 1 (35.2990 11.0750 19.0700) 1.00 0.4672 [ - ] pdb=" CA GLN A 3 " C 1 (34.4820 9.9270 18.7940) 0.63 [ - ] 0.4797 u_cart = 0.779 0.322 0.338 -0.123 0.106 0.260 pdb=" Q GLN A 3 " C 1 (35.1300 8.8800 17.8640) 0.84 [ - ] 0.4752 u_cart = 0.788 0.304 0.334 -0.098 0.073 0.266 pdb=" O GLN A 3 " segid="STUV" O 1 (34.5480 7.8190 17.7240)\ 1.00 0.4881 [ - ] pdb="1CB AGLN A 3 " C 1 (32.9790 10.2230 18.4690) 1.00 0.4787 [ - ] pdb="CA AION B 1 " Ca2+ 1 (32.3600 11.0920 17.3080) 0.92 0.4554 [ - ] pdb="CA ION B 2 " Ca 1 (30.8220 10.6650 17.1900) 1.00 0.4670 [ - ] """) # pdb_inp = pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ ATOM 1 N GLN A 3 35.299 11.075 99.070 1.00 36.89 STUV A """)) xray_structure = pdb_inp.xray_structure_simple( enable_scattering_type_unknown=True) assert xray_structure.scatterers()[0].scattering_type == "unknown" try: pdb_inp.xray_structure_simple() except Sorry as e: assert not show_diff( str(e), """\ Unknown chemical element type: "ATOM 1 N GLN A 3 .*.STUV A " To resolve this problem, specify a chemical element type in columns 77-78 of the PDB file, right justified (e.g. " C").""") else: raise Exception_expected pdb_inp = pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ ATOM 1 1A GLN A 3 35.299 11.075 99.070 1.00 36.89 """)) xray_structure = pdb_inp.xray_structure_simple( enable_scattering_type_unknown=True) assert xray_structure.scatterers()[0].scattering_type == "unknown" try: pdb_inp.xray_structure_simple() except Sorry as e: assert not show_diff( str(e), """\ Unknown chemical element type: "ATOM 1 1A GLN A 3 .*. " To resolve this problem, specify a chemical element type in columns 77-78 of the PDB file, right justified (e.g. " C").""") else: raise Exception_expected pdb_inp = pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ ATOM 1 N GLN A 3 35.299 11.075 99.070 1.00 36.89 Bx5 """)) xray_structure = pdb_inp.xray_structure_simple( enable_scattering_type_unknown=True) assert xray_structure.scatterers()[0].scattering_type == "unknown" try: pdb_inp.xray_structure_simple() except Sorry as e: assert not show_diff( str(e), '''\ Unknown charge: "ATOM 1 N GLN A 3 .*. Bx5" ^^''') else: raise Exception_expected pdb_inp = pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ ATOM 1 N GLN A 3 35.299 11.075 99.070 1.00 36.89 Cs3- """)) xray_structure = pdb_inp.xray_structure_simple() assert xray_structure.scatterers()[0].scattering_type == "Cs" xray_structure = pdb_inp.xray_structure_simple( scattering_type_exact=True, enable_scattering_type_unknown=True) assert xray_structure.scatterers()[0].scattering_type == "unknown" out = StringIO() xray_structure.scattering_type_registry().show(out=out) assert not show_diff( out.getvalue(), """\ Number of scattering types: 1 Type Number sf(0) Gaussians unknown 1 None None sf(0) = scattering factor at diffraction angle 0. """) try: pdb_inp.xray_structure_simple(scattering_type_exact=True) except Sorry as e: assert not show_diff( str(e), '''\ Unknown scattering type: "ATOM 1 N GLN A 3 .*. Cs3-" ^^^^ ^^^^''') else: raise Exception_expected # pdb_inp = pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ MODEL 1 ATOM 1 N GLN A 3 35.299 11.075 19.070 1.00 36.89 ATOM 2 CA GLN A 3 34.482 9.927 18.794 0.63 37.88 ENDMDL MODEL 2 ATOM 1 N GLN A 3 25.299 1.075 9.070 0.54 26.89 ATOM 2 CA GLN A 3 24.482 -1.927 8.794 1.00 27.88 ENDMDL """)) xray_structure = pdb_inp.xray_structure_simple( unit_cube_pseudo_crystal=True) out = StringIO() xray_structure.show_scatterers(f=out) assert not show_diff( out.getvalue(), """\ Label, Scattering, Multiplicity, Coordinates, Occupancy, Uiso, Ustar as Uiso model=" 1" pdb=" N GLN A 3 " N 1 (35.2990 11.0750 19.0700)\ 1.00 0.4672 [ - ] model=" 1" pdb=" CA GLN A 3 " C 1 (34.4820 9.9270 18.7940)\ 0.63 0.4798 [ - ] model=" 2" pdb=" N GLN A 3 " N 1 (25.2990 1.0750 9.0700)\ 0.54 0.3406 [ - ] model=" 2" pdb=" CA GLN A 3 " C 1 (24.4820 -1.9270 8.7940)\ 1.00 0.3531 [ - ] """) xray_structures = pdb_inp.xray_structures_simple( unit_cube_pseudo_crystal=True) assert len(xray_structures) == 2 out = StringIO() xray_structures[0].show_scatterers(f=out) assert not show_diff( out.getvalue(), """\ Label, Scattering, Multiplicity, Coordinates, Occupancy, Uiso, Ustar as Uiso model=" 1" pdb=" N GLN A 3 " N 1 (35.2990 11.0750 19.0700)\ 1.00 0.4672 [ - ] model=" 1" pdb=" CA GLN A 3 " C 1 (34.4820 9.9270 18.7940)\ 0.63 0.4798 [ - ] """) out = StringIO() xray_structures[1].show_scatterers(f=out) assert not show_diff( out.getvalue(), """\ Label, Scattering, Multiplicity, Coordinates, Occupancy, Uiso, Ustar as Uiso model=" 2" pdb=" N GLN A 3 " N 1 (25.2990 1.0750 9.0700)\ 0.54 0.3406 [ - ] model=" 2" pdb=" CA GLN A 3 " C 1 (24.4820 -1.9270 8.7940)\ 1.00 0.3531 [ - ] """) # pdb_inp = pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ ATOM 369 PEAK PEAK 1 61.114 12.134 8.619 1.00 20.00 PEAK ATOM 504 SITE SITE 2 67.707 2.505 14.951 1.00 20.00 SITE """)) xray_structure = pdb_inp.xray_structure_simple() assert xray_structure.scattering_type_registry().type_index_pairs_as_dict() \ == {"const": 0} assert list(xray_structure.scattering_type_registry().unique_counts) == [2] # pdb_inp = pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ """)) assert pdb_inp.xray_structure_simple().scatterers().size() == 0 assert len(pdb_inp.xray_structures_simple()) == 1 pdb_inp = pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ MODEL 1 ENDMDL """)) assert pdb_inp.xray_structure_simple().scatterers().size() == 0 assert len(pdb_inp.xray_structures_simple()) == 1 pdb_inp = pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ MODEL 1 ENDMDL MODEL 2 ENDMDL """)) assert pdb_inp.xray_structure_simple().scatterers().size() == 0 assert len(pdb_inp.xray_structures_simple()) == 2 pdb_inp = pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ MODEL 1 ATOM 1 N GLN A 3 35.299 11.075 99.070 1.00 36.89 O-2 ENDMDL MODEL 2 ENDMDL """)) assert pdb_inp.xray_structure_simple().scatterers().size() == 1 xray_structures = pdb_inp.xray_structures_simple() assert len(xray_structures) == 2 assert xray_structures[0].scatterers().size() == 1 assert xray_structures[1].scatterers().size() == 0 assert xray_structures[0].scatterers()[0].scattering_type == "O2-" pdb_inp = pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ MODEL 1 ENDMDL MODEL 2 ATOM 1 N GLN A 3 35.299 11.075 99.070 1.00 36.89 Fe+3 ENDMDL """)) assert pdb_inp.xray_structure_simple().scatterers().size() == 1 xray_structures = pdb_inp.xray_structures_simple() assert len(xray_structures) == 2 assert xray_structures[0].scatterers().size() == 0 assert xray_structures[1].scatterers().size() == 1 assert xray_structures[1].scatterers()[0].scattering_type == "Fe3+" input_pdb_string = """\ ATOM 1 N GLN A 3 35.299 11.075 19.070 1.00 36.89 N ATOM 2 CA GLN A 3 34.482 9.927 18.794 0.63 37.88 C0 ATOM 3 C GLN A 3 35.130 8.880 17.864 0.84 37.52 C 0 ATOM 4 O GLN A 3 34.548 7.819 17.724 1.00 38.54 O00 ATOM 5 1CB GLN A 3 32.979 10.223 18.469 1.00 37.80 C 1 HETATM 6 CA ION B 1 32.360 11.092 17.308 0.92 35.96 X HETATM 7 CA ION B 2 30.822 10.665 17.190 1.00 36.87 FE4+ ATOM 8 O MET A 5 6.215 22.789 24.067 1.00 0.00 -2 """ pdb_inp = pdb.pdb_input(source_info=None, lines=flex.split_lines(input_pdb_string)) assert [scatterer.scattering_type for scatterer in pdb_inp.xray_structure_simple( scattering_type_exact=True, enable_scattering_type_unknown=True).scatterers()] \ == ["N", "C", "C", "O", "unknown", "unknown", "unknown", "O2-"] # cs1 = crystal.symmetry(unit_cell=(3.113, 3.444, 2.572, 90, 90, 90), space_group_symbol="P1") cs2 = crystal.symmetry(unit_cell=(10, 20, 30, 80, 85, 95), space_group_symbol="P-1") input_pdb_string = """\ ATOM 1 N GLN A 3 35.299 11.075 19.070 1.00 36.89 N ATOM 2 CA GLN A 3 34.482 9.927 18.794 0.63 37.88 C """ pdb_inp = pdb.pdb_input(source_info=None, lines=input_pdb_string) xs = pdb_inp.xray_structure_simple() assert xs.is_similar_symmetry(cs1) xs = pdb_inp.xray_structure_simple(crystal_symmetry=cs2) assert xs.is_similar_symmetry(cs2) input_pdb_string = """\ CRYST1 10.000 20.000 30.000 80.00 85.00 95.00 ATOM 1 N GLN A 3 35.299 11.075 19.070 1.00 36.89 N ATOM 2 CA GLN A 3 34.482 9.927 18.794 0.63 37.88 C """ pdb_inp = pdb.pdb_input(source_info=None, lines=input_pdb_string) xs = pdb_inp.xray_structure_simple() assert xs.is_similar_symmetry( cs1.customized_copy(unit_cell=cs2.unit_cell())) xs = pdb_inp.xray_structure_simple(crystal_symmetry=cs2) assert xs.is_similar_symmetry(cs2) input_pdb_string = """\ CRYST1 P -1 ATOM 1 N GLN A 3 35.299 11.075 19.070 1.00 36.89 N ATOM 2 CA GLN A 3 34.482 9.927 18.794 0.63 37.88 C """ pdb_inp = pdb.pdb_input(source_info=None, lines=input_pdb_string) xs = pdb_inp.xray_structure_simple() assert xs.is_similar_symmetry( cs1.customized_copy(space_group_info=cs2.space_group_info())) xs = pdb_inp.xray_structure_simple(crystal_symmetry=cs2) assert xs.is_similar_symmetry(cs2) input_pdb_string = """\ CRYST1 10.000 20.000 30.000 80.00 85.00 95.00 P -1 ATOM 1 N GLN A 3 35.299 11.075 19.070 1.00 36.89 N ATOM 2 CA GLN A 3 34.482 9.927 18.794 0.63 37.88 C """ pdb_inp = pdb.pdb_input(source_info=None, lines=input_pdb_string) xs = pdb_inp.xray_structure_simple() assert xs.is_similar_symmetry(cs2) xs = pdb_inp.xray_structure_simple(crystal_symmetry=cs1) assert xs.is_similar_symmetry(cs1)
HETATM 9 2H3 MPR B 5 16.388 0.289 6.613 1.00 0.08 ANISOU 9 2H3 MPR B 5 8+8 848 848 0 0 0 """)) except ValueError, e: assert not show_diff( str(e), """\ some.pdb, line 2: ANISOU 9 2H3 MPR B 5 8+8 848 848 0 0 0 ---------------------------------^ unexpected plus sign.""") else: raise Exception_expected try: pdb.pdb_input(source_info="some.pdb", lines=flex.split_lines("""\ HETATM 9 2H3 MPR B 5 16.388 0.289 6.613 1.00 0.08 ANISOU 9 2H3 MPR B 5 8+8 848 848 0 0 0 """), raise_sorry_if_format_error=True) except Sorry: pass else: raise Exception_expected try: pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ HETATM 9 2H3 MPR B 5 16.388 0.289 6.613 1.00 0.08 HETATM 9 2H3 MPR B 5 16.388 0.289 6.613 1.00 0.08 ANISOU 9 2H3 MPR B 5 84- 848 848 0 0 0 """)) except ValueError, e: assert not show_diff(
def exercise_xray_structure_simple(): pdb_inp = pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ CRYST1 61.410 54.829 43.543 90.00 90.00 90.00 P 21 21 21 8 ORIGX1 1.000000 0.000000 0.000000 0.00000 ORIGX2 0.000000 1.000000 0.000000 0.00000 ORIGX3 0.000000 0.000000 1.000000 0.00000 SCALE1 0.016284 0.000000 0.000000 0.00000 SCALE2 0.000000 0.018239 0.000000 0.00000 SCALE3 0.000000 0.000000 0.022966 0.00000 ATOM 1 N GLN A 3 35.299 11.075 19.070 1.00 36.89 N ATOM 2 CA GLN A 3 34.482 9.927 18.794 0.63 37.88 C SIGATM 2 CA GLN A 3 1.200 2.300 3.400 0.04 0.05 C ANISOU 2 CA GLN A 3 7794 3221 3376 -1227 1064 2601 C ATOM 3 Q GLN A 3 35.130 8.880 17.864 0.84 37.52 C ANISOU 3 Q GLN A 3 7875 3041 3340 -981 727 2663 C SIGUIJ 3 Q GLN A 3 75 41 40 -1 7 63 C ATOM 4 O GLN A 3 34.548 7.819 17.724 1.00 38.54 STUV ATOM 5 1CB AGLN A 3 32.979 10.223 18.469 1.00 37.80 HETATM 6 CA AION B 1 32.360 11.092 17.308 0.92 35.96 CA2+ HETATM 7 CA ION B 2 30.822 10.665 17.190 1.00 36.87 """)) for use_scale_matrix_if_available in [False, True]: xray_structure = pdb_inp.xray_structure_simple( use_scale_matrix_if_available=use_scale_matrix_if_available) out = StringIO() xray_structure.show_summary(f=out) assert not show_diff(out.getvalue(), """\ Number of scatterers: 7 At special positions: 0 Unit cell: (61.41, 54.829, 43.543, 90, 90, 90) Space group: P 21 21 21 (No. 19) """) out = StringIO() xray_structure.show_scatterers(f=out) assert not show_diff(out.getvalue(), """\ Label, Scattering, Multiplicity, Coordinates, Occupancy, Uiso, Ustar as Uiso pdb=" N GLN A 3 " N 4 ( 0.5748 0.2020 0.4380) 1.00 0.4672 [ - ] pdb=" CA GLN A 3 " C 4 ( 0.5615 0.1811 0.4316) 0.63 [ - ] 0.4797 u_cart = 0.779 0.322 0.338 -0.123 0.106 0.260 pdb=" Q GLN A 3 " C 4 ( 0.5721 0.1620 0.4103) 0.84 [ - ] 0.4752 u_cart = 0.788 0.304 0.334 -0.098 0.073 0.266 pdb=" O GLN A 3 " segid="STUV" O 4 ( 0.5626 0.1426 0.4070)\ 1.00 0.4881 [ - ] pdb="1CB AGLN A 3 " C 4 ( 0.5370 0.1865 0.4242) 1.00 0.4787 [ - ] pdb="CA AION B 1 " Ca2+ 4 ( 0.5270 0.2023 0.3975) 0.92 0.4554 [ - ] pdb="CA ION B 2 " Ca 4 ( 0.5019 0.1945 0.3948) 1.00 0.4670 [ - ] """) # xray_structure = pdb_inp.xray_structure_simple(unit_cube_pseudo_crystal=True) out = StringIO() xray_structure.show_summary(f=out) assert not show_diff(out.getvalue(), """\ Number of scatterers: 7 At special positions: 0 Unit cell: (1, 1, 1, 90, 90, 90) Space group: P 1 (No. 1) """) out = StringIO() xray_structure.show_scatterers(f=out) assert not show_diff(out.getvalue(), """\ Label, Scattering, Multiplicity, Coordinates, Occupancy, Uiso, Ustar as Uiso pdb=" N GLN A 3 " N 1 (35.2990 11.0750 19.0700) 1.00 0.4672 [ - ] pdb=" CA GLN A 3 " C 1 (34.4820 9.9270 18.7940) 0.63 [ - ] 0.4797 u_cart = 0.779 0.322 0.338 -0.123 0.106 0.260 pdb=" Q GLN A 3 " C 1 (35.1300 8.8800 17.8640) 0.84 [ - ] 0.4752 u_cart = 0.788 0.304 0.334 -0.098 0.073 0.266 pdb=" O GLN A 3 " segid="STUV" O 1 (34.5480 7.8190 17.7240)\ 1.00 0.4881 [ - ] pdb="1CB AGLN A 3 " C 1 (32.9790 10.2230 18.4690) 1.00 0.4787 [ - ] pdb="CA AION B 1 " Ca2+ 1 (32.3600 11.0920 17.3080) 0.92 0.4554 [ - ] pdb="CA ION B 2 " Ca 1 (30.8220 10.6650 17.1900) 1.00 0.4670 [ - ] """) # pdb_inp = pdb.pdb_input( source_info=None, lines=flex.split_lines("""\ ATOM 1 N GLN A 3 35.299 11.075 99.070 1.00 36.89 STUV A """)) xray_structure = pdb_inp.xray_structure_simple( enable_scattering_type_unknown=True) assert xray_structure.scatterers()[0].scattering_type == "unknown" try: pdb_inp.xray_structure_simple() except Sorry, e: assert not show_diff(str(e), """\ Unknown chemical element type: "ATOM 1 N GLN A 3 .*.STUV A " To resolve this problem, specify a chemical element type in columns 77-78 of the PDB file, right justified (e.g. " C").""")
def exercise_pdb_input(): for i_trial in xrange(3): pdb_inp = pdb.pdb_input(source_info=None, lines=flex.split_lines("")) assert (pdb_inp.file_type() == "pdb") assert pdb_inp.source_info() == "" assert len(pdb_inp.record_type_counts()) == 0 assert pdb_inp.unknown_section().size() == 0 assert pdb_inp.title_section().size() == 0 assert pdb_inp.remark_section().size() == 0 assert pdb_inp.primary_structure_section().size() == 0 assert pdb_inp.heterogen_section().size() == 0 assert pdb_inp.secondary_structure_section().size() == 0 assert pdb_inp.connectivity_annotation_section().size() == 0 assert pdb_inp.miscellaneous_features_section().size() == 0 assert pdb_inp.crystallographic_section().size() == 0 assert len(pdb_inp.atoms_with_labels()) == 0 assert pdb_inp.atoms().size() == 0 assert pdb_inp.model_ids().size() == 0 assert pdb_inp.model_indices().size() == 0 assert pdb_inp.ter_indices().size() == 0 assert pdb_inp.chain_indices().size() == 0 assert pdb_inp.break_indices().size() == 0 assert pdb_inp.connectivity_section().size() == 0 assert pdb_inp.bookkeeping_section().size() == 0 assert pdb_inp.model_atom_counts().size() == 0 pdb_inp = pdb.pdb_input(source_info="file/name", lines=pdb_string_all_sections) assert pdb_inp.source_info() == "file/name" assert pdb_inp.record_type_counts() == { "KEYWDS": 1, "SEQRES": 1, "LINK ": 1, "ORIGX1": 1, "SITE ": 1, "FTNOTE": 1, "HETSYN": 1, "SIGATM": 2, "MTRIX2": 1, "MTRIX3": 1, "HELIX ": 1, "MTRIX1": 1, "END ": 1, "ANISOU": 2, "TITLE ": 1, "SLTBRG": 1, "REMARK": 1, "TURN ": 1, "SCALE1": 1, "SCALE2": 1, "AUTHOR": 1, "CRYST1": 1, "SIGUIJ": 2, "CISPEP": 1, "ATOM ": 4, "ENDMDL": 2, "ORIGX2": 1, "MODRES": 1, "SOURCE": 1, "FORMUL": 1, "MASTER": 1, "CAVEAT": 1, "HET ": 1, "COMPND": 1, "MODEL ": 2, "REVDAT": 1, "SSBOND": 1, "OBSLTE": 1, "CONECT": 1, "JRNL ": 1, "SPRSDE": 1, " ": 11, "FOOBAR": 1, "HETNAM": 1, "HEADER": 1, "ORIGX3": 1, "BREAK ": 1, "ONHOLD": 1, "SHEET ": 1, "TVECT ": 1, "HYDBND": 1, "TER ": 2, "DBREF ": 1, "EXPDTA": 1, "SCALE3": 1, "HETATM": 2, "SEQADV": 1, "SPLIT ": 1, "NUMMDL": 1, "MDLTYP": 2, "DBREF1": 1, "DBREF2": 1 } assert list(pdb_inp.unknown_section()) == ["FOOBAR BAR FOO"] assert not show_diff( "\n".join(pdb_inp.title_section()), """\ HEADER ISOMERASE 02-JUL-92 1FKB ONHOLD 26-JUN-99 OBSLTE 07-DEC-04 1A0Y 1Y4P TITLE ATOMIC STRUCTURE OF THE RAPAMYCIN HUMAN IMMUNOPHILIN FKBP- SPLIT 2QNH 1VSP COMPND FK506 BINDING PROTEIN (FKBP) COMPLEX WITH IMMUNOSUPPRESSANT SOURCE HUMAN (H**O SAPIENS) RECOMBINANT FORM EXPRESSED IN KEYWDS ISOMERASE EXPDTA X-RAY DIFFRACTION NUMMDL 8 MDLTYP CA ATOMS ONLY, CHAIN B, C, D, E, F, G, H, I, J, K, L, M, N, MDLTYP 2 O, P, Q, R, S, T, U AUTHOR G.D.VAN DUYNE,R.F.STANDAERT,S.L.SCHREIBER,J.C.CLARDY REVDAT 1 31-OCT-93 1FKB 0 JRNL AUTH G.D.VAN DUYNE,R.F.STANDAERT,S.L.SCHREIBER,J.CLARDY SPRSDE 02-SEP-03 1O58 1J6N CAVEAT 1B7F INCORRECT CHIRALITY AT C1* OF U2, CHAIN Q""") assert not show_diff( "\n".join(pdb_inp.remark_section()), """\ REMARK 2 RESOLUTION. 1.7 ANGSTROMS. FTNOTE 1 CIS PEPTIDE: GLY 190 - PHE 191""") assert not show_diff( "\n".join(pdb_inp.primary_structure_section()), """\ DBREF 1HTQ A 601 468 SWS Q10377 GLN1_MYCTU 2 478 DBREF1 1JZX A 1 2880 GB 15805042 DBREF2 1JZX A NC_001263 2587937 2590817 SEQRES 1 A 477 THR GLU LYS THR PRO ASP ASP VAL PHE LYS LEU ALA LYS SEQADV 1KEH ALA A 170 SWS Q9L5D6 SER 199 ENGINEERED MODRES 6NSE CYS A 384 CYS MODIFIED BY CAD""") assert not show_diff( "\n".join(pdb_inp.heterogen_section()), """\ HET GLC A 810 12 HETNAM G6D 6-DEOXY-ALPHA-D-GLUCOSE HETSYN G6D QUINOVOSE FORMUL 2 CA 4(CA1 2+)""") assert not show_diff( "\n".join(pdb_inp.secondary_structure_section()), """\ HELIX 1 1 GLN A 18 GLY A 34 1 17 SHEET 1 A 7 PHE A 257 ALA A 260 0 TURN 1 T1 GLY E 2 THR E 5 BETA, TYPE II""") assert not show_diff( "\n".join(pdb_inp.connectivity_annotation_section()), """\ SSBOND 12 CYS B 191 CYS B 220 LINK N PRO C 61 C GLY A 9 1556 HYDBND N GLY A 148 O PHE B 41 SLTBRG N ILE A 16 OD2 ASP A 194 CISPEP 1 ALA A 183 PRO A 184 1 0.96""") assert not show_diff( "\n".join(pdb_inp.miscellaneous_features_section()), """\ SITE 1 CAB 3 HIS B 57 ASP B 102 SER B 195""") assert not show_diff( "\n".join(pdb_inp.crystallographic_section()), """\ CRYST1 45.920 49.790 89.880 90.00 97.34 90.00 P 1 21 1 4 ORIGX1 1.000000 0.000000 0.000000 0.00000 ORIGX2 0.000000 1.000000 0.000000 0.00000 ORIGX3 0.000000 0.000000 1.000000 0.00000 SCALE1 0.021777 0.000000 0.002805 0.00000 SCALE2 0.000000 0.020084 0.000000 0.00000 SCALE3 0.000000 0.000000 0.011218 0.00000 MTRIX1 1 0.739109 0.012922 -0.673462 17.07460 1 MTRIX2 1 0.015672 -0.999875 -0.001986 21.64730 1 MTRIX3 1 -0.673404 -0.009087 -0.739219 44.75290 1 TVECT 1 0.00000 0.00000 20.42000""") assert len(pdb_inp.atoms_with_labels()) == 6 assert [atom.serial for atom in pdb_inp.atoms()] \ == [" 1", " 2", " 3", " 4", " 9", " 10"] assert [atom.element for atom in pdb_inp.atoms()] \ == [" N", " C", " C", " O", " ", " "] assert list(pdb_inp.model_ids()) == [" 1", " 3"] assert list(pdb_inp.model_indices()) == [4, 6] assert list(pdb_inp.ter_indices()) == [5, 6] assert [list(v) for v in pdb_inp.chain_indices()] == [[4], [5, 6]] assert list(pdb_inp.break_indices()) == [2] assert not show_diff("\n".join(pdb_inp.connectivity_section()), """\ CONECT 5332 5333 5334 5335 5336""") assert not show_diff( "\n".join(pdb_inp.bookkeeping_section()), """\ MASTER 81 0 0 7 3 0 0 645800 20 0 12 END""") assert list(pdb_inp.model_atom_counts()) == [4, 2] # pdb_inp = pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ ATOM 1 CB LYS 109 16.113 7.345 47.084 1.00 20.00 A ATOM 2 CG LYS 109 17.058 6.315 47.703 1.00 20.00 A ATOM 3 CB LYS 109 26.721 1.908 15.275 1.00 20.00 B ATOM 4 CG LYS 109 27.664 2.793 16.091 1.00 20.00 B """)) expected_id_strs = """\ pdb=" CB LYS 109 " segid="A " pdb=" CG LYS 109 " segid="A " pdb=" CB LYS 109 " segid="B " pdb=" CG LYS 109 " segid="B " """.splitlines() for awl, eids in zip(pdb_inp.atoms_with_labels(), expected_id_strs): assert not show_diff(awl.id_str(), eids) pdb_inp = pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ ATOM 12345qN123AR12 C1234Ixyz1234.6781234.6781234.678123.56213.56abcdefS123E1C1 HETATM12345qN123AR12 C1234Ixyz1234.6781234.6781234.678123.56213.56abcdefS123E1C1 """)) for awl in pdb_inp.atoms_with_labels(): assert awl.name == "N123" assert awl.altloc == "A" assert awl.resname == "R12" assert awl.chain_id == "C" assert awl.resseq == "1234" assert awl.icode == "I" assert awl.segid == "S123" assert awl.id_str() == 'pdb="N123AR12 C1234I" segid="S123"' pdb_inp = pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ ATOM 12345qN123AR12 C1234Ixyz1234.6781234.6781234.678123.56213.56abcdef E1C1 HETATM12345qN123AR12 C1234Ixyz1234.6781234.6781234.678123.56213.56abcdef E1C1 """)) for awl in pdb_inp.atoms_with_labels(): assert awl.name == "N123" assert awl.altloc == "A" assert awl.resname == "R12" assert awl.chain_id == "C" assert awl.resseq == "1234" assert awl.icode == "I" assert awl.segid == " " assert awl.id_str() == 'pdb="N123AR12 C1234I"' pdb_inp = pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ ATOM HETATM """)) for awl in pdb_inp.atoms_with_labels(): assert awl.name == " " assert awl.altloc == " " assert awl.resname == " " assert awl.chain_id == " " assert awl.resseq == " " assert awl.icode == " " assert awl.segid == " " assert awl.id_str() == 'pdb=" "' # pdb_inp = pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ """)) assert list(pdb_inp.model_indices()) == [] assert list(pdb_inp.chain_indices()) == [] pdb_inp = pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ ATOM """)) assert list(pdb_inp.model_indices()) == [1] assert [list(v) for v in pdb_inp.chain_indices()] == [[1]] pdb_inp = pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ MODEL 1 ENDMDL """)) assert list(pdb_inp.model_indices()) == [0] assert [list(v) for v in pdb_inp.chain_indices()] == [[]] pdb_inp = pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ MODEL 1 ATOM ENDMDL """)) assert list(pdb_inp.model_indices()) == [1] assert [list(v) for v in pdb_inp.chain_indices()] == [[1]] pdb_inp = pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ MODEL 1 ENDMDL MODEL 2 ENDMDL """)) assert list(pdb_inp.model_indices()) == [0, 0] assert [list(v) for v in pdb_inp.chain_indices()] == [[], []] pdb_inp = pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ MODEL 1 ENDMDL MODEL 2 ATOM ENDMDL """)) assert list(pdb_inp.model_indices()) == [0, 1] assert [list(v) for v in pdb_inp.chain_indices()] == [[], [1]] try: pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ MODEL 1 ENDMDL ATOM """)) except ValueError, e: assert not show_diff( str(e), """\ input line 3: ATOM ^ ATOM or HETATM record is outside MODEL/ENDMDL block.""")
def exercise_line_info_exceptions(): pdb.pdb_input(source_info=None, lines=flex.std_string(["ATOM"])) # try: pdb.pdb_input(source_info="some.pdb", lines=flex.split_lines("""\ HETATM 9 2H3 MPR B 5 16.388 0.289 6.613 1.00 0.08 ANISOU 9 2H3 MPR B 5 8+8 848 848 0 0 0 """)) except ValueError as e: assert not show_diff( str(e), """\ some.pdb, line 2: ANISOU 9 2H3 MPR B 5 8+8 848 848 0 0 0 ---------------------------------^ unexpected plus sign.""") else: raise Exception_expected try: pdb.pdb_input(source_info="some.pdb", lines=flex.split_lines("""\ HETATM 9 2H3 MPR B 5 16.388 0.289 6.613 1.00 0.08 ANISOU 9 2H3 MPR B 5 8+8 848 848 0 0 0 """), raise_sorry_if_format_error=True) except Sorry: pass else: raise Exception_expected try: pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ HETATM 9 2H3 MPR B 5 16.388 0.289 6.613 1.00 0.08 HETATM 9 2H3 MPR B 5 16.388 0.289 6.613 1.00 0.08 ANISOU 9 2H3 MPR B 5 84- 848 848 0 0 0 """)) except ValueError as e: assert not show_diff( str(e), """\ input line 3: ANISOU 9 2H3 MPR B 5 84- 848 848 0 0 0 ----------------------------------^ unexpected minus sign.""") else: raise Exception_expected try: pdb.pdb_input(source_info=None, lines=flex.split_lines("""\ HETATM 9 2H3 MPR B 5 16.388 0.289 6.613 1.00 0.08 ANISOU 9 2H3 MPR B 5 c 848 848 0 0 0 """)) except ValueError as e: assert not show_diff( str(e), """\ input line 2: ANISOU 9 2H3 MPR B 5 c 848 848 0 0 0 ------------------------------^ unexpected character.""") else: raise Exception_expected # try: pdb.pdb_input( source_info="some.pdb", lines=flex.std_string([ "ATOM 1045 O HOH 30 x 0.530 42.610 45.267 1.00 33.84" ])) except ValueError as e: assert not show_diff( str(e), """\ some.pdb, line 1: ATOM 1045 O HOH 30 x 0.530 42.610 45.267 1.00 33.84 ------------------------------^ not a floating-point number.""") else: raise Exception_expected try: pdb.pdb_input( source_info="some.pdb", lines=flex.std_string([ "ATOM 1045 O HOH 30 x 0.530 42.610 45.267 1.00 33.84" ])) except ValueError as e: assert not show_diff( str(e), """\ some.pdb, line 1: ATOM 1045 O HOH 30 x 0.530 42.610 45.267 1.00 33.84 -------------------------------^ not a floating-point number.""") else: raise Exception_expected try: pdb.pdb_input( source_info="some.pdb", lines=flex.std_string([ "HETATM 4160 O HOH S 272 nan 0.000 0.000 1.00 54.72" ])) except ValueError as e: assert not show_diff( str(e), """\ some.pdb, line 1: HETATM 4160 O HOH S 272 nan 0.000 0.000 1.00 54.72 -----------------------------------^ not a floating-point number.""") else: raise Exception_expected try: pdb.pdb_input( source_info="some.pdb", lines=flex.std_string([ "ATOM 1045 O HOH 30 0x530 42.610 45.267 1.00 33.84" ])) except ValueError as e: assert not show_diff( str(e), """\ some.pdb, line 1: ATOM 1045 O HOH 30 0x530 42.610 45.267 1.00 33.84 ----------------------------------^ unexpected character.""") else: raise Exception_expected
def substitute_ss(real_h, xray_structure, ss_annotation, params = None, grm=None, use_plane_peptide_bond_restr=True, fix_rotamer_outliers=True, cif_objects=None, log=null_out(), rotamer_manager=None, reference_map=None, verbose=False): """ Substitute secondary structure elements in real_h hierarchy with ideal ones _in_place_. Returns reference torsion proxies - the only thing that cannot be restored with little effort outside the procedure. real_h - hierarcy to substitute secondary structure elements. xray_structure - xray_structure - needed to get crystal symmetry (to construct processed_pdb_file and xray_structure is needed to call get_geometry_restraints_manager for no obvious reason). ss_annotation - iotbx.pdb.annotation object. """ t0 = time() if rotamer_manager is None: rotamer_manager = RotamerEval() for model in real_h.models(): for chain in model.chains(): if len(chain.conformers()) > 1: raise Sorry("Alternative conformations are not supported.") processed_params = process_params(params) if not processed_params.enabled: return None expected_n_hbonds = 0 ann = ss_annotation for h in ann.helices: expected_n_hbonds += h.get_n_maximum_hbonds() edited_h = real_h.deep_copy() n_atoms_in_real_h = real_h.atoms_size() selection_cache = real_h.atom_selection_cache() # check the annotation for correctness (atoms are actually in hierarchy) error_msg = "The following secondary structure annotations result in \n" error_msg +="empty atom selections. They don't match the structre: \n" t1 = time() # Checking for SS selections deleted_annotations = ann.remove_empty_annotations( hierarchy=real_h, asc=selection_cache) if not deleted_annotations.is_empty(): if processed_params.skip_empty_ss_elements: if len(deleted_annotations.helices) > 0: print >> log, "Removing the following helices because there are" print >> log, "no corresponding atoms in the model:" for h in deleted_annotations.helices: print >> log, h.as_pdb_str() error_msg += " %s\n" % h if len(deleted_annotations.sheets) > 0: print >> log, "Removing the following sheets because there are" print >> log, "no corresponding atoms in the model:" for sh in deleted_annotations.sheets: print >> log, sh.as_pdb_str() error_msg += " %s\n" % sh.as_pdb_str(strand_id=st.strand_id) else: raise Sorry(error_msg) phil_str = ann.as_restraint_groups() # gathering initial special position atoms special_position_settings = crystal.special_position_settings( crystal_symmetry = xray_structure.crystal_symmetry()) site_symmetry_table = \ special_position_settings.site_symmetry_table( sites_cart = real_h.atoms().extract_xyz(), unconditional_general_position_flags=( real_h.atoms().extract_occ() != 1)) original_spi = site_symmetry_table.special_position_indices() t2 = time() # Actually idelizing SS elements log.write("Replacing ss-elements with ideal ones:\n") log.flush() for h in ann.helices: log.write(" %s\n" % h.as_pdb_str()) log.flush() selstring = h.as_atom_selections() isel = selection_cache.iselection(selstring[0]) all_bsel = flex.bool(n_atoms_in_real_h, False) all_bsel.set_selected(isel, True) sel_h = real_h.select(all_bsel, copy_atoms=True) ideal_h = get_helix(helix_class=h.helix_class, pdb_hierarchy_template=sel_h, rotamer_manager=rotamer_manager) # edited_h.select(all_bsel).atoms().set_xyz(ideal_h.atoms().extract_xyz()) set_xyz_carefully(dest_h=edited_h.select(all_bsel), source_h=ideal_h) for sh in ann.sheets: s = " %s\n" % sh.as_pdb_str() ss = s.replace("\n", "\n ") log.write(ss[:-2]) log.flush() for st in sh.strands: selstring = st.as_atom_selections() isel = selection_cache.iselection(selstring) all_bsel = flex.bool(n_atoms_in_real_h, False) all_bsel.set_selected(isel, True) sel_h = real_h.select(all_bsel, copy_atoms=True) ideal_h = secondary_structure_from_sequence( pdb_str=beta_pdb_str, sequence=None, pdb_hierarchy_template=sel_h, rotamer_manager=rotamer_manager, ) set_xyz_carefully(edited_h.select(all_bsel), ideal_h) # edited_h.select(all_bsel).atoms().set_xyz(ideal_h.atoms().extract_xyz()) t3 = time() pre_result_h = edited_h pre_result_h.reset_i_seq_if_necessary() n_atoms = real_h.atoms_size() bsel = flex.bool(n_atoms, False) helix_selection = flex.bool(n_atoms, False) sheet_selection = flex.bool(n_atoms, False) other_selection = flex.bool(n_atoms, False) ss_for_tors_selection = flex.bool(n_atoms, False) nonss_for_tors_selection = flex.bool(n_atoms, False) selection_cache = real_h.atom_selection_cache() # set all CA atoms to True for other_selection #isel = selection_cache.iselection("name ca") isel = selection_cache.iselection("name ca or name n or name o or name c") other_selection.set_selected(isel, True) n_main_chain_atoms = other_selection.count(True) isel = selection_cache.iselection("name ca or name n or name o or name c") nonss_for_tors_selection.set_selected(isel, True) main_chain_selection_prefix = "(name ca or name n or name o or name c) %s" t4 = time() print >> log, "Preparing selections..." log.flush() # Here we are just preparing selections for h in ann.helices: ss_sels = h.as_atom_selections()[0] selstring = main_chain_selection_prefix % ss_sels isel = selection_cache.iselection(selstring) helix_selection.set_selected(isel, True) other_selection.set_selected(isel, False) isel = selection_cache.iselection(selstring) ss_for_tors_selection.set_selected(isel, True) nonss_for_tors_selection.set_selected(isel, False) for sheet in ann.sheets: for ss_sels in sheet.as_atom_selections(): selstring = main_chain_selection_prefix % ss_sels isel = selection_cache.iselection(selstring) sheet_selection.set_selected(isel, True) other_selection.set_selected(isel, False) isel = selection_cache.iselection(selstring) ss_for_tors_selection.set_selected(isel, True) nonss_for_tors_selection.set_selected(isel, False) t5 = time() isel = selection_cache.iselection( "not name ca and not name n and not name o and not name c") other_selection.set_selected(isel, False) helix_sheet_intersection = helix_selection & sheet_selection if helix_sheet_intersection.count(True) > 0: sheet_selection = sheet_selection & ~helix_sheet_intersection assert ((helix_selection | sheet_selection) & other_selection).count(True)==0 from mmtbx.monomer_library.pdb_interpretation import grand_master_phil_str params_line = grand_master_phil_str params_line += "secondary_structure {%s}" % secondary_structure.sec_str_master_phil_str # print "params_line" # print params_line params = iotbx.phil.parse(input_string=params_line, process_includes=True)#.extract() # This does not the same way for a strange reason. Need to investigate. # The number of resulting hbonds is different later. # w_params = params.extract() # w_params.pdb_interpretation.secondary_structure.protein.remove_outliers = False # w_params.pdb_interpretation.peptide_link.ramachandran_restraints = True # w_params.pdb_interpretation.c_beta_restraints = True # w_params.pdb_interpretation.secondary_structure.enabled = True # params.format(python_object=w_params) # params.show() # print "="*80 # print "="*80 # print "="*80 if grm is None: custom_par_text = "\n".join([ "pdb_interpretation.secondary_structure {protein.remove_outliers = False\n%s}" \ % phil_str, "pdb_interpretation.peptide_link.ramachandran_restraints = True", "c_beta_restraints = True", "pdb_interpretation.secondary_structure.enabled=True", "pdb_interpretation.clash_guard.nonbonded_distance_threshold=None", "pdb_interpretation.max_reasonable_bond_distance=None", # "pdb_interpretation.nonbonded_weight=500", "pdb_interpretation.peptide_link.oldfield.weight_scale=3", "pdb_interpretation.peptide_link.oldfield.plot_cutoff=0.03", "pdb_interpretation.peptide_link.omega_esd_override_value=3", "pdb_interpretation.peptide_link.apply_all_trans=True", ]) if use_plane_peptide_bond_restr: custom_par_text += "\npdb_interpretation.peptide_link.apply_peptide_plane=True" custom_pars = params.fetch( source=iotbx.phil.parse(custom_par_text)).extract() # params.format(python_object=custom_pars) # params.show() # STOP() params = custom_pars # params = w_params t6 = time() import mmtbx.utils processed_pdb_files_srv = mmtbx.utils.\ process_pdb_file_srv( crystal_symmetry= xray_structure.crystal_symmetry(), pdb_interpretation_params = params.pdb_interpretation, log=null_out(), cif_objects=cif_objects) if verbose: print >> log, "Processing file..." log.flush() processed_pdb_file, junk = processed_pdb_files_srv.\ process_pdb_files(raw_records=flex.split_lines(real_h.as_pdb_string())) t7 = time() grm = get_geometry_restraints_manager( processed_pdb_file, xray_structure) t8 = time() else: ss_params = secondary_structure.default_params ss_params.secondary_structure.protein.remove_outliers=False ss_manager = secondary_structure.manager( pdb_hierarchy=real_h, geometry_restraints_manager=grm.geometry, sec_str_from_pdb_file=ss_annotation, params=ss_params.secondary_structure, mon_lib_srv=None, verbose=-1, log=log) grm.geometry.set_secondary_structure_restraints( ss_manager=ss_manager, hierarchy=real_h, log=log) real_h.reset_i_seq_if_necessary() from mmtbx.geometry_restraints import reference if reference_map is None: if verbose: print >> log, "Adding reference coordinate restraints..." grm.geometry.append_reference_coordinate_restraints_in_place( reference.add_coordinate_restraints( sites_cart = real_h.atoms().extract_xyz().select(helix_selection), selection = helix_selection, sigma = processed_params.sigma_on_reference_helix)) grm.geometry.append_reference_coordinate_restraints_in_place( reference.add_coordinate_restraints( sites_cart = real_h.atoms().extract_xyz().select(sheet_selection), selection = sheet_selection, sigma = processed_params.sigma_on_reference_sheet)) grm.geometry.append_reference_coordinate_restraints_in_place( reference.add_coordinate_restraints( sites_cart = real_h.atoms().extract_xyz().select(other_selection), selection = other_selection, sigma = processed_params.sigma_on_reference_non_ss)) if verbose: print >> log, "Adding chi torsion restraints..." grm.geometry.add_chi_torsion_restraints_in_place( pdb_hierarchy = pre_result_h, sites_cart = pre_result_h.atoms().extract_xyz().\ select(ss_for_tors_selection), selection = ss_for_tors_selection, chi_angles_only = False, sigma = processed_params.sigma_on_torsion_ss) grm.geometry.add_chi_torsion_restraints_in_place( pdb_hierarchy = pre_result_h, sites_cart = real_h.atoms().extract_xyz().\ select(nonss_for_tors_selection), selection = nonss_for_tors_selection, chi_angles_only = False, sigma = processed_params.sigma_on_torsion_nonss) real_h.atoms().set_xyz(pre_result_h.atoms().extract_xyz()) # # Check and correct for special positions # special_position_settings = crystal.special_position_settings( crystal_symmetry = xray_structure.crystal_symmetry()) site_symmetry_table = \ special_position_settings.site_symmetry_table( sites_cart = real_h.atoms().extract_xyz(), unconditional_general_position_flags=( real_h.atoms().extract_occ() != 1)) spi = site_symmetry_table.special_position_indices() if spi.size() > 0: print >> log, "Moving atoms from special positions:" for spi_i in spi: if spi_i not in original_spi: new_coords = ( real_h.atoms()[spi_i].xyz[0]+0.2, real_h.atoms()[spi_i].xyz[1]+0.2, real_h.atoms()[spi_i].xyz[2]+0.2) print >> log, " ", real_h.atoms()[spi_i].id_str(), print >> log, tuple(real_h.atoms()[spi_i].xyz), "-->", new_coords real_h.atoms()[spi_i].set_xyz(new_coords) t9 = time() if processed_params.file_name_before_regularization is not None: grm.geometry.pair_proxies(sites_cart=real_h.atoms().extract_xyz()) if grm.geometry.ramachandran_manager is not None: grm.geometry.ramachandran_manager.update_phi_psi_targets( sites_cart=real_h.atoms().extract_xyz()) print >> log, "Outputting model before regularization %s" % processed_params.file_name_before_regularization real_h.write_pdb_file( file_name=processed_params.file_name_before_regularization) geo_fname = processed_params.file_name_before_regularization[:-4]+'.geo' print >> log, "Outputting geo file for regularization %s" % geo_fname grm.write_geo_file( site_labels=[atom.id_str() for atom in real_h.atoms()], file_name=geo_fname) #testing number of restraints assert grm.geometry.get_n_den_proxies() == 0 if reference_map is None: assert grm.geometry.get_n_reference_coordinate_proxies() == n_main_chain_atoms refinement_log = null_out() log.write( "Refining geometry of substituted secondary structure elements...") log.flush() if verbose: refinement_log = log from mmtbx.refinement.geometry_minimization import run2 t10 = time() if reference_map is None: obj = run2( restraints_manager = grm, pdb_hierarchy = real_h, correct_special_position_tolerance = 1.0, max_number_of_iterations = processed_params.n_iter, number_of_macro_cycles = processed_params.n_macro, bond = True, nonbonded = True, angle = True, dihedral = True, chirality = True, planarity = True, fix_rotamer_outliers = fix_rotamer_outliers, log = refinement_log) else: ref_xrs = real_h.extract_xray_structure( crystal_symmetry=xray_structure.crystal_symmetry()) minimize_wrapper_with_map( pdb_h=real_h, xrs=ref_xrs, target_map=reference_map, grm=grm, ncs_restraints_group_list=[], mon_lib_srv=None, ss_annotation=ss_annotation, refine_ncs_operators=False, number_of_cycles=processed_params.n_macro, log=log) real_h.write_pdb_file("after_ss_map_min.pdb") log.write(" Done\n") log.flush() t11 = time() # print >> log, "Initial checking, init : %.4f" % (t1-t0) # print >> log, "Checking SS : %.4f" % (t2-t1) # print >> log, "Initializing selections : %.4f" % (t4-t3) # print >> log, "Looping for selections : %.4f" % (t5-t4) # print >> log, "Finalizing selections : %.4f" % (t6-t5) # print >> log, "PDB interpretation : %.4f" % (t7-t6) # print >> log, "Get GRM : %.4f" % (t8-t7) # print >> log, "Adding restraints to GRM : %.4f" % (t9-t8) # print >> log, "Running GM : %.4f" % (t11-t10) # print_hbond_proxies(grm.geometry,real_h) return grm.geometry.get_chi_torsion_proxies()
def exercise_pdb_input(): for i_trial in xrange(3): pdb_inp = pdb.pdb_input( source_info=None, lines=flex.split_lines("")) assert (pdb_inp.file_type() == "pdb") assert pdb_inp.source_info() == "" assert len(pdb_inp.record_type_counts()) == 0 assert pdb_inp.unknown_section().size() == 0 assert pdb_inp.title_section().size() == 0 assert pdb_inp.remark_section().size() == 0 assert pdb_inp.primary_structure_section().size() == 0 assert pdb_inp.heterogen_section().size() == 0 assert pdb_inp.secondary_structure_section().size() == 0 assert pdb_inp.connectivity_annotation_section().size() == 0 assert pdb_inp.miscellaneous_features_section().size() == 0 assert pdb_inp.crystallographic_section().size() == 0 assert len(pdb_inp.atoms_with_labels()) == 0 assert pdb_inp.atoms().size() == 0 assert pdb_inp.model_ids().size() == 0 assert pdb_inp.model_indices().size() == 0 assert pdb_inp.ter_indices().size() == 0 assert pdb_inp.chain_indices().size() == 0 assert pdb_inp.break_indices().size() == 0 assert pdb_inp.connectivity_section().size() == 0 assert pdb_inp.bookkeeping_section().size() == 0 assert pdb_inp.model_atom_counts().size() == 0 pdb_inp = pdb.pdb_input( source_info="file/name", lines=pdb_string_all_sections) assert pdb_inp.source_info() == "file/name" assert pdb_inp.record_type_counts() == { "KEYWDS": 1, "SEQRES": 1, "LINK ": 1, "ORIGX1": 1, "SITE ": 1, "FTNOTE": 1, "HETSYN": 1, "SIGATM": 2, "MTRIX2": 1, "MTRIX3": 1, "HELIX ": 1, "MTRIX1": 1, "END ": 1, "ANISOU": 2, "TITLE ": 1, "SLTBRG": 1, "REMARK": 1, "TURN ": 1, "SCALE1": 1, "SCALE2": 1, "AUTHOR": 1, "CRYST1": 1, "SIGUIJ": 2, "CISPEP": 1, "ATOM ": 4, "ENDMDL": 2, "ORIGX2": 1, "MODRES": 1, "SOURCE": 1, "FORMUL": 1, "MASTER": 1, "CAVEAT": 1, "HET ": 1, "COMPND": 1, "MODEL ": 2, "REVDAT": 1, "SSBOND": 1, "OBSLTE": 1, "CONECT": 1, "JRNL ": 1, "SPRSDE": 1, " ":11, "FOOBAR": 1, "HETNAM": 1, "HEADER": 1, "ORIGX3": 1, "BREAK ": 1, "ONHOLD": 1, "SHEET ": 1, "TVECT ": 1, "HYDBND": 1, "TER ": 2, "DBREF ": 1, "EXPDTA": 1, "SCALE3": 1, "HETATM": 2, "SEQADV": 1, "SPLIT ": 1, "NUMMDL": 1, "MDLTYP": 2, "DBREF1": 1, "DBREF2": 1} assert list(pdb_inp.unknown_section()) == ["FOOBAR BAR FOO"] assert not show_diff("\n".join(pdb_inp.title_section()), """\ HEADER ISOMERASE 02-JUL-92 1FKB ONHOLD 26-JUN-99 OBSLTE 07-DEC-04 1A0Y 1Y4P TITLE ATOMIC STRUCTURE OF THE RAPAMYCIN HUMAN IMMUNOPHILIN FKBP- SPLIT 2QNH 1VSP COMPND FK506 BINDING PROTEIN (FKBP) COMPLEX WITH IMMUNOSUPPRESSANT SOURCE HUMAN (H**O SAPIENS) RECOMBINANT FORM EXPRESSED IN KEYWDS ISOMERASE EXPDTA X-RAY DIFFRACTION NUMMDL 8 MDLTYP CA ATOMS ONLY, CHAIN B, C, D, E, F, G, H, I, J, K, L, M, N, MDLTYP 2 O, P, Q, R, S, T, U AUTHOR G.D.VAN DUYNE,R.F.STANDAERT,S.L.SCHREIBER,J.C.CLARDY REVDAT 1 31-OCT-93 1FKB 0 JRNL AUTH G.D.VAN DUYNE,R.F.STANDAERT,S.L.SCHREIBER,J.CLARDY SPRSDE 02-SEP-03 1O58 1J6N CAVEAT 1B7F INCORRECT CHIRALITY AT C1* OF U2, CHAIN Q""") assert not show_diff("\n".join(pdb_inp.remark_section()), """\ REMARK 2 RESOLUTION. 1.7 ANGSTROMS. FTNOTE 1 CIS PEPTIDE: GLY 190 - PHE 191""") assert not show_diff("\n".join(pdb_inp.primary_structure_section()), """\ DBREF 1HTQ A 601 468 SWS Q10377 GLN1_MYCTU 2 478 DBREF1 1JZX A 1 2880 GB 15805042 DBREF2 1JZX A NC_001263 2587937 2590817 SEQRES 1 A 477 THR GLU LYS THR PRO ASP ASP VAL PHE LYS LEU ALA LYS SEQADV 1KEH ALA A 170 SWS Q9L5D6 SER 199 ENGINEERED MODRES 6NSE CYS A 384 CYS MODIFIED BY CAD""") assert not show_diff("\n".join(pdb_inp.heterogen_section()), """\ HET GLC A 810 12 HETNAM G6D 6-DEOXY-ALPHA-D-GLUCOSE HETSYN G6D QUINOVOSE FORMUL 2 CA 4(CA1 2+)""") assert not show_diff("\n".join(pdb_inp.secondary_structure_section()), """\ HELIX 1 1 GLN A 18 GLY A 34 1 17 SHEET 1 A 7 PHE A 257 ALA A 260 0 TURN 1 T1 GLY E 2 THR E 5 BETA, TYPE II""") assert not show_diff( "\n".join(pdb_inp.connectivity_annotation_section()), """\ SSBOND 12 CYS B 191 CYS B 220 LINK N PRO C 61 C GLY A 9 1556 HYDBND N GLY A 148 O PHE B 41 SLTBRG N ILE A 16 OD2 ASP A 194 CISPEP 1 ALA A 183 PRO A 184 1 0.96""") assert not show_diff( "\n".join(pdb_inp.miscellaneous_features_section()), """\ SITE 1 CAB 3 HIS B 57 ASP B 102 SER B 195""") assert not show_diff("\n".join(pdb_inp.crystallographic_section()), """\ CRYST1 45.920 49.790 89.880 90.00 97.34 90.00 P 1 21 1 4 ORIGX1 1.000000 0.000000 0.000000 0.00000 ORIGX2 0.000000 1.000000 0.000000 0.00000 ORIGX3 0.000000 0.000000 1.000000 0.00000 SCALE1 0.021777 0.000000 0.002805 0.00000 SCALE2 0.000000 0.020084 0.000000 0.00000 SCALE3 0.000000 0.000000 0.011218 0.00000 MTRIX1 1 0.739109 0.012922 -0.673462 17.07460 1 MTRIX2 1 0.015672 -0.999875 -0.001986 21.64730 1 MTRIX3 1 -0.673404 -0.009087 -0.739219 44.75290 1 TVECT 1 0.00000 0.00000 20.42000""") assert len(pdb_inp.atoms_with_labels()) == 6 assert [atom.serial for atom in pdb_inp.atoms()] \ == [" 1", " 2", " 3", " 4", " 9", " 10"] assert [atom.element for atom in pdb_inp.atoms()] \ == [" N", " C", " C", " O", " ", " "] assert list(pdb_inp.model_ids()) == [" 1", " 3"] assert list(pdb_inp.model_indices()) == [4,6] assert list(pdb_inp.ter_indices()) == [5,6] assert [list(v) for v in pdb_inp.chain_indices()] == [[4],[5,6]] assert list(pdb_inp.break_indices()) == [2] assert not show_diff("\n".join(pdb_inp.connectivity_section()), """\ CONECT 5332 5333 5334 5335 5336""") assert not show_diff("\n".join(pdb_inp.bookkeeping_section()), """\ MASTER 81 0 0 7 3 0 0 645800 20 0 12 END""") assert list(pdb_inp.model_atom_counts()) == [4,2] # pdb_inp = pdb.pdb_input( source_info=None, lines=flex.split_lines("""\ ATOM 1 CB LYS 109 16.113 7.345 47.084 1.00 20.00 A ATOM 2 CG LYS 109 17.058 6.315 47.703 1.00 20.00 A ATOM 3 CB LYS 109 26.721 1.908 15.275 1.00 20.00 B ATOM 4 CG LYS 109 27.664 2.793 16.091 1.00 20.00 B """)) expected_id_strs = """\ pdb=" CB LYS 109 " segid="A " pdb=" CG LYS 109 " segid="A " pdb=" CB LYS 109 " segid="B " pdb=" CG LYS 109 " segid="B " """.splitlines() for awl,eids in zip(pdb_inp.atoms_with_labels(), expected_id_strs): assert not show_diff(awl.id_str(), eids) pdb_inp = pdb.pdb_input( source_info=None, lines=flex.split_lines("""\ ATOM 12345qN123AR12 C1234Ixyz1234.6781234.6781234.678123.56213.56abcdefS123E1C1 HETATM12345qN123AR12 C1234Ixyz1234.6781234.6781234.678123.56213.56abcdefS123E1C1 """)) for awl in pdb_inp.atoms_with_labels(): assert awl.name == "N123" assert awl.altloc == "A" assert awl.resname == "R12" assert awl.chain_id == "C" assert awl.resseq == "1234" assert awl.icode == "I" assert awl.segid == "S123" assert awl.id_str() == 'pdb="N123AR12 C1234I" segid="S123"' pdb_inp = pdb.pdb_input( source_info=None, lines=flex.split_lines("""\ ATOM 12345qN123AR12 C1234Ixyz1234.6781234.6781234.678123.56213.56abcdef E1C1 HETATM12345qN123AR12 C1234Ixyz1234.6781234.6781234.678123.56213.56abcdef E1C1 """)) for awl in pdb_inp.atoms_with_labels(): assert awl.name == "N123" assert awl.altloc == "A" assert awl.resname == "R12" assert awl.chain_id == "C" assert awl.resseq == "1234" assert awl.icode == "I" assert awl.segid == " " assert awl.id_str() == 'pdb="N123AR12 C1234I"' pdb_inp = pdb.pdb_input( source_info=None, lines=flex.split_lines("""\ ATOM HETATM """)) for awl in pdb_inp.atoms_with_labels(): assert awl.name == " " assert awl.altloc == " " assert awl.resname == " " assert awl.chain_id == " " assert awl.resseq == " " assert awl.icode == " " assert awl.segid == " " assert awl.id_str() == 'pdb=" "' # pdb_inp = pdb.pdb_input( source_info=None, lines=flex.split_lines("""\ """)) assert list(pdb_inp.model_indices()) == [] assert list(pdb_inp.chain_indices()) == [] pdb_inp = pdb.pdb_input( source_info=None, lines=flex.split_lines("""\ ATOM """)) assert list(pdb_inp.model_indices()) == [1] assert [list(v) for v in pdb_inp.chain_indices()] == [[1]] pdb_inp = pdb.pdb_input( source_info=None, lines=flex.split_lines("""\ MODEL 1 ENDMDL """)) assert list(pdb_inp.model_indices()) == [0] assert [list(v) for v in pdb_inp.chain_indices()] == [[]] pdb_inp = pdb.pdb_input( source_info=None, lines=flex.split_lines("""\ MODEL 1 ATOM ENDMDL """)) assert list(pdb_inp.model_indices()) == [1] assert [list(v) for v in pdb_inp.chain_indices()] == [[1]] pdb_inp = pdb.pdb_input( source_info=None, lines=flex.split_lines("""\ MODEL 1 ENDMDL MODEL 2 ENDMDL """)) assert list(pdb_inp.model_indices()) == [0,0] assert [list(v) for v in pdb_inp.chain_indices()] == [[],[]] pdb_inp = pdb.pdb_input( source_info=None, lines=flex.split_lines("""\ MODEL 1 ENDMDL MODEL 2 ATOM ENDMDL """)) assert list(pdb_inp.model_indices()) == [0,1] assert [list(v) for v in pdb_inp.chain_indices()] == [[],[1]] try: pdb.pdb_input( source_info=None, lines=flex.split_lines("""\ MODEL 1 ENDMDL ATOM """)) except ValueError, e: assert not show_diff(str(e), """\ input line 3: ATOM ^ ATOM or HETATM record is outside MODEL/ENDMDL block.""")