def __init__ (self, model, pdb_hierarchy=None, # keep for mmtbx.validation_summary (multiple models) fmodel=None, fmodel_neutron=None, sequences=None, flags=None, header_info=None, raw_data=None, unmerged_data=None, keep_hydrogens=True, nuclear=False, save_probe_unformatted_file=None, show_hydrogen_outliers=False, min_cc_two_fofc=0.8, n_bins_data=10, count_anomalous_pairs_separately=False, use_internal_variance=True, outliers_only=True, use_pdb_header_resolution_cutoffs=False, file_name=None, ligand_selection=None, rotamer_library="8000", map_params=None) : assert rotamer_library == "8000", "data_version given to RotamerEval not recognized." for name in self.__slots__ : setattr(self, name, None) # use objects from model self.model = model if (self.model is not None): pdb_hierarchy = self.model.get_hierarchy() xray_structure = self.model.get_xray_structure() geometry_restraints_manager = self.model.get_restraints_manager().geometry crystal_symmetry = self.model.crystal_symmetry() all_chain_proxies = self.model.all_chain_proxies else: assert (pdb_hierarchy is not None) xray_structure = None geometry_restraints_manager = None crystal_symmetry = None all_chain_proxies = None # very important - the i_seq attributes may be extracted later pdb_hierarchy.atoms().reset_i_seq() self.pdb_hierarchy = pdb_hierarchy if (xray_structure is None) : if (fmodel is not None) : xray_structure = fmodel.xray_structure elif (crystal_symmetry is not None) : xray_structure = pdb_hierarchy.extract_xray_structure( crystal_symmetry=crystal_symmetry) self.crystal_symmetry = crystal_symmetry if (crystal_symmetry is None) and (fmodel is not None) : self.crystal_symmetry = fmodel.f_obs().crystal_symmetry() # use maps (fmodel is not used) # run earlier since pdb_hierarchy gets modified use_maps = False if (map_params is not None): use_maps = ( (map_params.input.maps.map_file_name) or ( (map_params.input.maps.map_coefficients_file_name) and (map_params.input.maps.map_coefficients_label) ) ) if (use_maps): if (flags.real_space): self.real_space = experimental.real_space( fmodel=None, model=self.model, cc_min=min_cc_two_fofc, molprobity_map_params=map_params.input.maps) if (flags.waters): self.waters = waters.waters( pdb_hierarchy=pdb_hierarchy, xray_structure=xray_structure, fmodel=None, collect_all=True, molprobity_map_params=map_params.input.maps) self.header_info = header_info if (flags is None) : flags = molprobity_flags() import mmtbx.model.statistics self.model_statistics_geometry = mmtbx.model.statistics.geometry( pdb_hierarchy = pdb_hierarchy, geometry_restraints_manager = geometry_restraints_manager, use_hydrogens = keep_hydrogens, use_nuclear = nuclear) self.model_statistics_geometry_result = \ self.model_statistics_geometry.result() self.ramalyze = self.model_statistics_geometry_result.ramachandran.ramalyze self.omegalyze = self.model_statistics_geometry_result.omega.omegalyze self.rotalyze = self.model_statistics_geometry_result.rotamer.rotalyze self.cbetadev = self.model_statistics_geometry_result.c_beta.cbetadev self.clashes = self.model_statistics_geometry_result.clash.clashes if pdb_hierarchy.contains_protein() : self.find_missing_atoms(out=null_out()) if (flags.nqh) : self.nqh_flips = clashscore.nqh_flips( pdb_hierarchy=pdb_hierarchy) if (pdb_hierarchy.contains_rna() and flags.rna and libtbx.env.has_module(name="suitename")) : if (geometry_restraints_manager is not None) : self.rna = rna_validate.rna_validation( pdb_hierarchy=pdb_hierarchy, geometry_restraints_manager=geometry_restraints_manager, outliers_only=outliers_only, params=None) if (flags.model_stats) and (xray_structure is not None) : self.model_stats = model_properties.model_statistics( pdb_hierarchy=pdb_hierarchy, xray_structure=xray_structure, all_chain_proxies=all_chain_proxies, ignore_hd=(not nuclear), ligand_selection=ligand_selection) if (geometry_restraints_manager is not None) and (flags.restraints) : assert (xray_structure is not None) self.restraints = restraints.combined( pdb_hierarchy=pdb_hierarchy, xray_structure=xray_structure, geometry_restraints_manager=geometry_restraints_manager, ignore_hd=(not nuclear), cdl=getattr(all_chain_proxies, "use_cdl", None)) if (sequences is not None) and (flags.seq) : self.sequence = sequence.validation( pdb_hierarchy=pdb_hierarchy, sequences=sequences, log=null_out(), include_secondary_structure=True, extract_coordinates=True) if (fmodel is not None) : if (use_pdb_header_resolution_cutoffs) and (header_info is not None) : fmodel = fmodel.resolution_filter( d_min=header_info.d_min, d_max=header_info.d_max) if (flags.rfactors) : self.data_stats = experimental.data_statistics(fmodel, raw_data=raw_data, n_bins=n_bins_data, count_anomalous_pairs_separately=count_anomalous_pairs_separately) if (not use_maps): # if maps are used, keep previous results if (flags.real_space): self.real_space = experimental.real_space( model=model, fmodel=fmodel, cc_min=min_cc_two_fofc) if (flags.waters) : self.waters = waters.waters( pdb_hierarchy=pdb_hierarchy, xray_structure=xray_structure, fmodel=fmodel, collect_all=True) if (unmerged_data is not None) : self.merging = experimental.merging_and_model_statistics( f_obs=fmodel.f_obs(), f_model=fmodel.f_model(), r_free_flags=fmodel.r_free_flags(), unmerged_i_obs=unmerged_data, anomalous=count_anomalous_pairs_separately, use_internal_variance=use_internal_variance, n_bins=n_bins_data) if (flags.xtriage) : import mmtbx.scaling.xtriage f_model = abs(fmodel.f_model()).set_observation_type_xray_amplitude() if (raw_data is not None) : f_model, obs = f_model.common_sets(other=raw_data) else : obs = fmodel.f_obs() self.xtriage = mmtbx.scaling.xtriage.xtriage_analyses( miller_obs=obs, miller_calc=f_model, unmerged_obs=unmerged_data, # XXX some redundancy here... text_out=null_out()) if (fmodel_neutron is not None) and (flags.rfactors) : self.neutron_stats = experimental.data_statistics(fmodel_neutron, n_bins=n_bins_data, count_anomalous_pairs_separately=False) if (pdb_hierarchy.models_size() == 1) : self._multi_criterion = multi_criterion_view(pdb_hierarchy) # wilson B self.wilson_b = None if (fmodel is not None): self.wilson_b = fmodel.wilson_b() elif (fmodel_neutron is not None): self.wilson_b = fmodel_neutron.wilson_b() # validate hydrogens self.hydrogens = None if self.model is not None and self.model.has_hd(): # import here to avoid circular import issues from mmtbx.hydrogens.validate_H import validate_H, validate_H_results hydrogens = validate_H(model, nuclear) hydrogens.validate_inputs() hydrogens.run() self.hydrogens = validate_H_results(hydrogens.get_results()) # write probe file if needed (CLI and GUI) if (save_probe_unformatted_file is not None): pcm = self.clashes.probe_clashscore_manager try: with open(save_probe_unformatted_file, 'w') as f: f.write(pcm.probe_unformatted) self.clashes.probe_file = save_probe_unformatted_file except IOError as err: raise Sorry('%s could not be written correctly.\n%s' % (save_probe_unformatted_file, err))
def __init__ (self, pdb_hierarchy, xray_structure=None, fmodel=None, fmodel_neutron=None, geometry_restraints_manager=None, crystal_symmetry=None, sequences=None, flags=None, header_info=None, raw_data=None, unmerged_data=None, all_chain_proxies=None, keep_hydrogens=True, nuclear=False, save_probe_unformatted_file=None, show_hydrogen_outliers=False, min_cc_two_fofc=0.8, n_bins_data=10, count_anomalous_pairs_separately=False, use_internal_variance=True, outliers_only=True, use_pdb_header_resolution_cutoffs=False, file_name=None, ligand_selection=None, rotamer_library="8000", map_params=None) : assert rotamer_library == "8000", "data_version given to RotamerEval not recognized." for name in self.__slots__ : setattr(self, name, None) # very important - the i_seq attributes may be extracted later pdb_hierarchy.atoms().reset_i_seq() self.pdb_hierarchy = pdb_hierarchy if (xray_structure is None) : if (fmodel is not None) : xray_structure = fmodel.xray_structure elif (crystal_symmetry is not None) : xray_structure = pdb_hierarchy.extract_xray_structure( crystal_symmetry=crystal_symmetry) self.crystal_symmetry = crystal_symmetry if (crystal_symmetry is None) and (fmodel is not None) : self.crystal_symmetry = fmodel.f_obs().crystal_symmetry() self.header_info = header_info if (flags is None) : flags = molprobity_flags() if pdb_hierarchy.contains_protein() : if (flags.ramalyze) : self.ramalyze = ramalyze.ramalyze( pdb_hierarchy=pdb_hierarchy, outliers_only=outliers_only, out=null_out(), quiet=True) ##### omegalyze ################################################################ if (flags.omegalyze) : self.omegalyze = omegalyze.omegalyze( pdb_hierarchy=pdb_hierarchy, nontrans_only=outliers_only, out=null_out(), quiet=True) ##### omegalyze ################################################################ if (flags.rotalyze) : self.rotalyze = rotalyze.rotalyze( pdb_hierarchy=pdb_hierarchy, data_version=rotamer_library, outliers_only=outliers_only, out=null_out(), quiet=True) if (flags.cbetadev) : self.cbetadev = cbetadev.cbetadev( pdb_hierarchy=pdb_hierarchy, outliers_only=outliers_only, out=null_out(), quiet=True) if (flags.nqh) : self.nqh_flips = clashscore.nqh_flips( pdb_hierarchy=pdb_hierarchy) if (pdb_hierarchy.contains_rna() and flags.rna and libtbx.env.has_module(name="suitename")) : if (geometry_restraints_manager is not None) : self.rna = rna_validate.rna_validation( pdb_hierarchy=pdb_hierarchy, geometry_restraints_manager=geometry_restraints_manager, outliers_only=outliers_only, params=None) if (flags.clashscore) : self.clashes = clashscore.clashscore( pdb_hierarchy=pdb_hierarchy, save_probe_unformatted_file=save_probe_unformatted_file, nuclear=nuclear, keep_hydrogens=keep_hydrogens, out=null_out(), verbose=False) if (flags.model_stats) and (xray_structure is not None) : self.model_stats = model_properties.model_statistics( pdb_hierarchy=pdb_hierarchy, xray_structure=xray_structure, all_chain_proxies=all_chain_proxies, ignore_hd=(not nuclear), ligand_selection=ligand_selection) if (geometry_restraints_manager is not None) and (flags.restraints) : assert (xray_structure is not None) self.restraints = restraints.combined( pdb_hierarchy=pdb_hierarchy, xray_structure=xray_structure, geometry_restraints_manager=geometry_restraints_manager, ignore_hd=(not nuclear), cdl=getattr(all_chain_proxies, "use_cdl", None)) if (sequences is not None) and (flags.seq) : self.sequence = sequence.validation( pdb_hierarchy=pdb_hierarchy, sequences=sequences, log=null_out(), include_secondary_structure=True, extract_coordinates=True) # use maps (fmodel is not used) use_maps = False if (map_params is not None): use_maps = ( (map_params.input.maps.map_file_name) or ( (map_params.input.maps.map_coefficients_file_name) and (map_params.input.maps.map_coefficients_label) ) ) if (use_maps): if (flags.real_space): self.real_space = experimental.real_space( fmodel=None, pdb_hierarchy=pdb_hierarchy, cc_min=min_cc_two_fofc, molprobity_map_params=map_params.input.maps) if (flags.waters): self.waters = waters.waters( pdb_hierarchy=pdb_hierarchy, xray_structure=xray_structure, fmodel=None, collect_all=True, molprobity_map_params=map_params.input.maps) if (fmodel is not None) : if (use_pdb_header_resolution_cutoffs) and (header_info is not None) : fmodel = fmodel.resolution_filter( d_min=header_info.d_min, d_max=header_info.d_max) if (flags.rfactors) : self.data_stats = experimental.data_statistics(fmodel, raw_data=raw_data, n_bins=n_bins_data, count_anomalous_pairs_separately=count_anomalous_pairs_separately) if (not use_maps): # if maps are used, keep previous results if (flags.real_space): self.real_space = experimental.real_space( fmodel=fmodel, pdb_hierarchy=pdb_hierarchy, cc_min=min_cc_two_fofc) if (flags.waters) : self.waters = waters.waters( pdb_hierarchy=pdb_hierarchy, xray_structure=xray_structure, fmodel=fmodel, collect_all=True) if (unmerged_data is not None) : self.merging = experimental.merging_and_model_statistics( f_obs=fmodel.f_obs(), f_model=fmodel.f_model(), r_free_flags=fmodel.r_free_flags(), unmerged_i_obs=unmerged_data, anomalous=count_anomalous_pairs_separately, use_internal_variance=use_internal_variance, n_bins=n_bins_data) if (flags.xtriage) : import mmtbx.scaling.xtriage f_model = abs(fmodel.f_model()).set_observation_type_xray_amplitude() if (raw_data is not None) : f_model, obs = f_model.common_sets(other=raw_data) else : obs = fmodel.f_obs() self.xtriage = mmtbx.scaling.xtriage.xtriage_analyses( miller_obs=obs, miller_calc=f_model, unmerged_obs=unmerged_data, # XXX some redundancy here... text_out=null_out()) if (fmodel_neutron is not None) and (flags.rfactors) : self.neutron_stats = experimental.data_statistics(fmodel_neutron, n_bins=n_bins_data, count_anomalous_pairs_separately=False) if (pdb_hierarchy.models_size() == 1) : self._multi_criterion = multi_criterion_view(pdb_hierarchy)
def exercise(): import libtbx.utils if (libtbx.utils.detect_multiprocessing_problem() is not None): print("multiprocessing not available, skipping this test") return if (os.name == "nt"): print( "easy_mp fixed_func not supported under Windows, skipping this test" ) return from mmtbx.validation.sequence import validation, get_sequence_n_copies, \ get_sequence_n_copies_from_files import iotbx.bioinformatics import iotbx.pdb from iotbx import file_reader import libtbx.load_env # import dependency from libtbx.test_utils import Exception_expected, contains_lines, approx_equal from six.moves import cStringIO as StringIO pdb_in = iotbx.pdb.input(source_info=None, lines="""\ ATOM 2 CA ARG A 10 -6.299 36.344 7.806 1.00 55.20 C ATOM 25 CA TYR A 11 -3.391 33.962 7.211 1.00 40.56 C ATOM 46 CA ALA A 12 -0.693 34.802 4.693 1.00 67.95 C ATOM 56 CA ALA A 13 0.811 31.422 3.858 1.00 57.97 C ATOM 66 CA GLY A 14 4.466 31.094 2.905 1.00 49.24 C ATOM 73 CA ALA A 15 7.163 28.421 2.671 1.00 54.70 C ATOM 83 CA ILE A 16 6.554 24.685 2.957 1.00 51.79 C ATOM 102 CA LEU A 17 7.691 23.612 6.406 1.00 42.30 C ATOM 121 CA PTY A 18 7.292 19.882 5.861 1.00 36.68 C ATOM 128 CA PHE A 19 5.417 16.968 4.327 1.00 44.99 C ATOM 148 CA GLY A 20 3.466 14.289 6.150 1.00 41.99 C ATOM 155 CA GLY A 21 1.756 11.130 4.965 1.00 35.77 C ATOM 190 CA ALA A 24 1.294 19.658 3.683 1.00 47.02 C ATOM 200 CA VAL A 24A 2.361 22.009 6.464 1.00 37.13 C ATOM 216 CA HIS A 25 2.980 25.633 5.535 1.00 42.52 C ATOM 234 CA LEU A 26 4.518 28.425 7.577 1.00 47.63 C ATOM 253 CA ALA A 27 2.095 31.320 7.634 1.00 38.61 C ATOM 263 CA ARG A 28 1.589 34.719 9.165 1.00 37.04 C END""") seq1 = iotbx.bioinformatics.sequence( "MTTPSHLSDRYELGEILGFGGMSEVHLARD".lower()) v = validation(pdb_hierarchy=pdb_in.construct_hierarchy(), sequences=[seq1], log=null_out(), nproc=1) out = StringIO() v.show(out=out) assert contains_lines( out.getvalue(), """\ sequence identity: 76.47% 13 residue(s) missing from PDB chain (9 at start, 1 at end) 2 gap(s) in chain 4 mismatches to sequence residue IDs: 12 13 15 24""") cif_block = v.sequence_as_cif_block() assert list(cif_block['_struct_ref.pdbx_seq_one_letter_code']) == [ ';MTTPSHLSDRYELGEILGFGGMSEVHLARD\n;' ] # assert approx_equal(cif_block['_struct_ref_seq.pdbx_auth_seq_align_beg'], # ['10', '14', '16', '19', '24']) # assert approx_equal(cif_block['_struct_ref_seq.pdbx_auth_seq_align_end'], # ['11', '14', '17', '21', '28']) # assert approx_equal(cif_block['_struct_ref_seq.db_align_beg'], # ['10', '14', '16', '19', '25']) # assert approx_equal(cif_block['_struct_ref_seq.db_align_end'], # ['11', '14', '17', '21', '29']) # assert cif_block['_struct_ref_seq.pdbx_seq_align_beg_ins_code'][4] == 'A' seq2 = iotbx.bioinformatics.sequence("MTTPSHLSDRYELGEILGFGGMSEVHLA") v = validation(pdb_hierarchy=pdb_in.construct_hierarchy(), sequences=[seq2], log=null_out(), nproc=1) out = StringIO() v.show(out=out) assert contains_lines( out.getvalue(), """\ 1 residues not found in sequence residue IDs: 28""") try: v = validation(pdb_hierarchy=pdb_in.construct_hierarchy(), sequences=[], log=null_out(), nproc=1) except AssertionError: pass else: raise Exception_expected cif_block = v.sequence_as_cif_block() print(list(cif_block['_struct_ref.pdbx_seq_one_letter_code'])) assert list(cif_block['_struct_ref.pdbx_seq_one_letter_code']) == [ ';MTTPSHLSDRYELGEILGFGGMSEVHLA\n;' ] # assert approx_equal(cif_block['_struct_ref_seq.pdbx_auth_seq_align_end'], # ['11', '14', '17', '21', '27']) # assert approx_equal(cif_block['_struct_ref_seq.db_align_end'], # ['11', '14', '17', '21', '28']) # pdb_in2 = iotbx.pdb.input(source_info=None, lines="""\ ATOM 2 CA ARG A 10 -6.299 36.344 7.806 1.00 55.20 C ATOM 25 CA TYR A 11 -3.391 33.962 7.211 1.00 40.56 C ATOM 46 CA ALA A 12 -0.693 34.802 4.693 1.00 67.95 C ATOM 56 CA ALA A 13 0.811 31.422 3.858 1.00 57.97 C ATOM 66 CA GLY A 14 4.466 31.094 2.905 1.00 49.24 C ATOM 73 CA ALA A 15 7.163 28.421 2.671 1.00 54.70 C ATOM 83 CA ILE A 16 6.554 24.685 2.957 1.00 51.79 C ATOM 102 CA LEU A 17 7.691 23.612 6.406 1.00 42.30 C TER ATOM 1936 P G B 2 -22.947 -23.615 15.323 1.00123.20 P ATOM 1959 P C B 3 -26.398 -26.111 19.062 1.00110.06 P ATOM 1979 P U B 4 -29.512 -30.638 21.164 1.00101.06 P ATOM 1999 P C B 5 -30.524 -36.109 21.527 1.00 92.76 P ATOM 2019 P U B 6 -28.684 -41.458 21.223 1.00 87.42 P ATOM 2062 P G B 8 -18.396 -45.415 21.903 1.00 80.35 P ATOM 2085 P A B 9 -13.852 -43.272 24.156 1.00 77.76 P ATOM 2107 P G B 10 -8.285 -44.242 26.815 1.00 79.86 P END """) seq3 = iotbx.bioinformatics.sequence("AGCUUUGGAG") v = validation(pdb_hierarchy=pdb_in2.construct_hierarchy(), sequences=[seq2, seq3], log=null_out(), nproc=1, extract_coordinates=True) out = StringIO() v.show(out=out) cif_block = v.sequence_as_cif_block() assert approx_equal(cif_block['_struct_ref.pdbx_seq_one_letter_code'], [';MTTPSHLSDRYELGEILGFGGMSEVHLA\n;', ';AGCUUUGGAG\n;']) # assert approx_equal(cif_block['_struct_ref_seq.pdbx_auth_seq_align_beg'], # ['10', '14', '16', '2', '6', '8']) # assert approx_equal(cif_block['_struct_ref_seq.pdbx_auth_seq_align_end'], # ['11', '14', '17', '4', '6', '10']) assert (len(v.chains[0].get_outliers_table()) == 3) assert (len(v.get_table_data()) == 4) assert approx_equal( v.chains[0].get_mean_coordinate_for_alignment_range(11, 11), (-0.693, 34.802, 4.693)) assert approx_equal( v.chains[0].get_mean_coordinate_for_alignment_range(11, 14), (2.93675, 31.43475, 3.53175)) assert (v.chains[0].get_highlighted_residues() == [11, 12, 14]) assert contains_lines( out.getvalue(), """\ 3 mismatches to sequence residue IDs: 12 13 15""") assert contains_lines( out.getvalue(), """\ sequence identity: 87.50% 2 residue(s) missing from PDB chain (1 at start, 0 at end) 1 gap(s) in chain 1 mismatches to sequence residue IDs: 5""") s = easy_pickle.dumps(v) seq4 = iotbx.bioinformatics.sequence("") try: v = validation(pdb_hierarchy=pdb_in2.construct_hierarchy(), sequences=[seq4], log=null_out(), nproc=1, extract_coordinates=True) except AssertionError: pass else: raise Exception_expected # check that nucleic acid chain doesn't get aligned against protein sequence pdb_in = iotbx.pdb.input(source_info=None, lines="""\ ATOM 18932 P B DG D 1 -12.183 60.531 25.090 0.50364.79 P ATOM 18963 P B DG D 2 -9.738 55.258 20.689 0.50278.77 P ATOM 18994 P B DA D 3 -10.119 47.855 19.481 0.50355.17 P ATOM 19025 P B DT D 4 -13.664 42.707 21.119 0.50237.06 P ATOM 19056 P B DG D 5 -19.510 39.821 21.770 0.50255.45 P ATOM 19088 P B DA D 6 -26.096 40.001 21.038 0.50437.49 P ATOM 19120 P B DC D 7 -31.790 41.189 18.413 0.50210.00 P ATOM 19149 P B DG D 8 -34.639 41.306 12.582 0.50313.99 P ATOM 19179 P B DA D 9 -34.987 38.244 6.813 0.50158.92 P ATOM 19210 P B DT D 10 -32.560 35.160 1.082 0.50181.38 P HETATM19241 P BTSP D 11 -27.614 30.137 0.455 0.50508.17 P """) sequences, _ = iotbx.bioinformatics.fasta_sequence_parse.parse( """>4GFH:A|PDBID|CHAIN|SEQUENCE MSTEPVSASDKYQKISQLEHILKRPDTYIGSVETQEQLQWIYDEETDCMIEKNVTIVPGLFKIFDEILVNAADNKVRDPS MKRIDVNIHAEEHTIEVKNDGKGIPIEIHNKENIYIPEMIFGHLLTSSNYDDDEKKVTGGRNGYGAKLCNIFSTEFILET ADLNVGQKYVQKWENNMSICHPPKITSYKKGPSYTKVTFKPDLTRFGMKELDNDILGVMRRRVYDINGSVRDINVYLNGK SLKIRNFKNYVELYLKSLEKKRQLDNGEDGAAKSDIPTILYERINNRWEVAFAVSDISFQQISFVNSIATTMGGTHVNYI TDQIVKKISEILKKKKKKSVKSFQIKNNMFIFINCLIENPAFTSQTKEQLTTRVKDFGSRCEIPLEYINKIMKTDLATRM FEIADANEENALKKSDGTRKSRITNYPKLEDANKAGTKEGYKCTLVLTEGDSALSLAVAGLAVVGRDYYGCYPLRGKMLN VREASADQILKNAEIQAIKKIMGLQHRKKYEDTKSLRYGHLMIMTDQDHDGSHIKGLIINFLESSFPGLLDIQGFLLEFI TPIIKVSITKPTKNTIAFYNMPDYEKWREEESHKFTWKQKYYKGLGTSLAQEVREYFSNLDRHLKIFHSLQGNDKDYIDL AFSKKKADDRKEWLRQYEPGTVLDPTLKEIPISDFINKELILFSLADNIRSIPNVLDGFKPGQRKVLYGCFKKNLKSELK VAQLAPYVSECTAYHHGEQSLAQTIIGLAQNFVGSNNIYLLLPNGAFGTRATGGKDAAAARYIYTELNKLTRKIFHPADD PLYKYIQEDEKTVEPEWYLPILPMILVNGAEGIGTGWSTYIPPFNPLEIIKNIRHLMNDEELEQMHPWFRGWTGTIEEIE PLRYRMYGRIEQIGDNVLEITELPARTWTSTIKEYLLLGLSGNDKIKPWIKDMEEQHDDNIKFIITLSPEEMAKTRKIGF YERFKLISPISLMNMVAFDPHGKIKKYNSVNEILSEFYYVRLEYYQKRKDHMSERLQWEVEKYSFQVKFIKMIIEKELTV TNKPRNAIIQELENLGFPRFNKEGKPYYGSPNDEIAEQINDVKGATSDEEDEESSHEDTENVINGPEELYGTYEYLLGMR IWSLTKERYQKLLKQKQEKETELENLLKLSAKDIWNTDLKAFEVGYQEFLQRDAEAR >4GFH:D|PDBID|CHAIN|SEQUENCE GGATGACGATX """) v = validation( pdb_hierarchy=pdb_in.construct_hierarchy(), sequences=sequences, log=null_out(), nproc=1, ) out = StringIO() v.show(out=out) assert v.chains[0].n_missing == 0 assert v.chains[0].n_missing_end == 0 assert v.chains[0].n_missing_start == 0 assert len(v.chains[0].alignment.matches()) == 11 # pdb_in = iotbx.pdb.input(source_info=None, lines="""\ ATOM 2 CA GLY A 1 1.367 0.551 0.300 1.00 7.71 C ATOM 6 CA CYS A 2 2.782 3.785 1.683 1.00 5.18 C ATOM 12 CA CYS A 3 -0.375 5.128 3.282 1.00 5.21 C ATOM 18 CA SER A 4 -0.870 2.048 5.492 1.00 7.19 C ATOM 25 CA LEU A 5 2.786 2.056 6.642 1.00 6.78 C ATOM 33 CA PRO A 6 3.212 4.746 9.312 1.00 7.03 C ATOM 40 CA PRO A 7 6.870 5.690 8.552 1.00 7.97 C ATOM 47 CA CYS A 8 6.021 6.070 4.855 1.00 6.48 C ATOM 53 CA ALA A 9 2.812 8.041 5.452 1.00 7.15 C ATOM 58 CA LEU A 10 4.739 10.382 7.748 1.00 8.36 C ATOM 66 CA SER A 11 7.292 11.200 5.016 1.00 7.00 C ATOM 73 CA ASN A 12 4.649 11.435 2.264 1.00 5.40 C ATOM 81 CA PRO A 13 1.879 13.433 3.968 1.00 5.97 C ATOM 88 CA ASP A 14 0.485 15.371 0.986 1.00 7.70 C ATOM 96 CA TYR A 15 0.565 12.245 -1.180 1.00 6.55 C ATOM 108 CA CYS A 16 -1.466 10.260 1.363 1.00 7.32 C ATOM 113 N NH2 A 17 -2.612 12.308 2.058 1.00 8.11 N """) seq = iotbx.bioinformatics.sequence("GCCSLPPCALSNPDYCX") # match last residue v = validation( pdb_hierarchy=pdb_in.construct_hierarchy(), sequences=[seq], log=null_out(), nproc=1, ) out = StringIO() v.show(out=out) assert v.chains[0].n_missing == 0 assert v.chains[0].n_missing_end == 0 assert v.chains[0].n_missing_start == 0 assert len(v.chains[0].alignment.matches()) == 17 # ignore non-protein residue v = validation(pdb_hierarchy=pdb_in.construct_hierarchy(), sequences=[seq], log=null_out(), nproc=1, ignore_hetatm=True) out = StringIO() v.show(out=out) assert v.chains[0].n_missing == 1 assert v.chains[0].n_missing_end == 1 assert v.chains[0].n_missing_start == 0 assert len(v.chains[0].alignment.matches()) == 17 # pdb_in = iotbx.pdb.input(source_info=None, lines="""\ ATOM 2518 CA PRO C 3 23.450 -5.848 45.723 1.00 85.24 C ATOM 2525 CA GLY C 4 20.066 -4.416 44.815 1.00 79.25 C ATOM 2529 CA PHE C 5 19.408 -0.913 46.032 1.00 77.13 C ATOM 2540 CA GLY C 6 17.384 -1.466 49.208 1.00 83.44 C ATOM 2544 CA GLN C 7 17.316 -5.259 49.606 1.00 89.25 C ATOM 2553 CA GLY C 8 19.061 -6.829 52.657 1.00 90.67 C """) sequences, _ = iotbx.bioinformatics.fasta_sequence_parse.parse( """>1JN5:A|PDBID|CHAIN|SEQUENCE MASVDFKTYVDQACRAAEEFVNVYYTTMDKRRRLLSRLYMGTATLVWNGNAVSGQESLSEFFEMLPSSEFQISVVDCQPV HDEATPSQTTVLVVICGSVKFEGNKQRDFNQNFILTAQASPSNTVWKIASDCFRFQDWAS >1JN5:B|PDBID|CHAIN|SEQUENCE APPCKGSYFGTENLKSLVLHFLQQYYAIYDSGDRQGLLDAYHDGACCSLSIPFIPQNPARSSLAEYFKDSRNVKKLKDPT LRFRLLKHTRLNVVAFLNELPKTQHDVNSFVVDISAQTSTLLCFSVNGVFKEVDGKSRDSLRAFTRTFIAVPASNSGLCI VNDELFVRNASSEEIQRAFAMPAPTPSSSPVPTLSPEQQEMLQAFSTQSGMNLEWSQKCLQDNNWDYTRSAQAFTHLKAK GEIPEVAFMK >1JN5:C|PDBID|CHAIN|SEQUENCE GQSPGFGQGGSV """) v = validation( pdb_hierarchy=pdb_in.construct_hierarchy(), sequences=sequences, log=null_out(), nproc=1, ) out = StringIO() v.show(out=out) assert v.chains[0].n_missing_start == 3 assert v.chains[0].n_missing_end == 3 assert v.chains[0].identity == 1.0 assert v.chains[0].alignment.match_codes == 'iiimmmmmmiii' # pdb_in = iotbx.pdb.input(source_info=None, lines="""\ ATOM 2 CA ALA A 2 -8.453 57.214 -12.754 1.00 52.95 C ATOM 7 CA LEU A 3 -8.574 59.274 -9.471 1.00 24.33 C ATOM 15 CA ARG A 4 -12.178 60.092 -8.575 1.00 28.40 C ATOM 26 CA GLY A 5 -14.170 61.485 -5.667 1.00 26.54 C ATOM 30 CA THR A 6 -17.784 60.743 -4.783 1.00 31.78 C ATOM 37 CA VAL A 7 -19.080 64.405 -4.464 1.00 21.31 C """) seq = iotbx.bioinformatics.sequence("XALRGTV") v = validation( pdb_hierarchy=pdb_in.construct_hierarchy(), sequences=[seq], log=null_out(), nproc=1, ) out = StringIO() v.show(out=out) assert v.chains[0].n_missing_start == 1 assert v.chains[0].n_missing_end == 0 assert v.chains[0].identity == 1.0 assert v.chains[0].alignment.match_codes == 'immmmmm' # pdb_in = iotbx.pdb.input(source_info=None, lines="""\ ATOM 2171 CA ASP I 355 5.591 -11.903 1.133 1.00 41.60 C ATOM 2175 CA PHE I 356 7.082 -8.454 0.828 1.00 39.82 C ATOM 2186 CA GLU I 357 5.814 -6.112 -1.877 1.00 41.12 C ATOM 2195 CA GLU I 358 8.623 -5.111 -4.219 1.00 42.70 C ATOM 2199 CA ILE I 359 10.346 -1.867 -3.363 1.00 43.32 C ATOM 2207 CA PRO I 360 11.658 0.659 -5.880 1.00 44.86 C ATOM 2214 CA GLU I 361 14.921 -0.125 -7.592 1.00 44.32 C ATOM 2219 CA GLU I 362 15.848 3.489 -6.866 1.00 44.27 C HETATM 2224 CA TYS I 363 16.482 2.005 -3.448 1.00 44.52 C """) seq = iotbx.bioinformatics.sequence("NGDFEEIPEEYL") v = validation( pdb_hierarchy=pdb_in.construct_hierarchy(), sequences=[seq], log=null_out(), nproc=1, ) out = StringIO() v.show(out=out) assert v.chains[0].n_missing_start == 2 assert v.chains[0].n_missing_end == 1 assert v.chains[0].identity == 1.0 pdb_in = iotbx.pdb.input(source_info=None, lines="""\ ATOM 450 CA ASN A 1 37.242 41.665 44.160 1.00 35.89 C ATOM 458 CA GLY A 2 37.796 38.269 42.523 1.00 30.13 C HETATM 463 CA AMSE A 3 35.878 39.005 39.326 0.54 22.83 C HETATM 464 CA BMSE A 3 35.892 39.018 39.323 0.46 22.96 C ATOM 478 CA ILE A 4 37.580 38.048 36.061 1.00 22.00 C ATOM 486 CA SER A 5 37.593 40.843 33.476 1.00 18.73 C ATOM 819 CA ALA A 8 25.982 34.781 27.220 1.00 18.43 C ATOM 824 CA ALA A 9 23.292 32.475 28.614 1.00 19.60 C HETATM 830 CA BMSE A 10 22.793 30.814 25.223 0.41 22.60 C HETATM 831 CA CMSE A 10 22.801 30.850 25.208 0.59 22.54 C ATOM 845 CA GLU A 11 26.504 30.054 24.966 1.00 25.19 C ATOM 854 CA GLY A 12 25.907 28.394 28.320 1.00 38.88 C """) seq = iotbx.bioinformatics.sequence("NGMISAAAAMEG") v = validation( pdb_hierarchy=pdb_in.construct_hierarchy(), sequences=[seq], log=null_out(), nproc=1, ) out = StringIO() v.show(out=out) assert v.chains[0].alignment.a == 'NGMISXXAAMEG' assert v.chains[0].alignment.b == 'NGMISAAAAMEG' pdb_in = iotbx.pdb.input(source_info=None, lines="""\ ATOM 4615 CA ALA C 1 1.000 1.000 1.000 1.00 10.00 ATOM 4622 CA ALA C 2 1.000 1.000 1.000 1.00 10.00 ATOM 4627 CA ALA C 3 1.000 1.000 1.000 1.00 10.00 ATOM 4634 CA ALA C 4 1.000 1.000 1.000 1.00 10.00 ATOM 4646 CA ALA C 5 1.000 1.000 1.000 1.00 10.00 ATOM 4658 CA ALA C 6 1.000 1.000 1.000 1.00 10.00 ATOM 4664 CA ALA C 7 1.000 1.000 1.000 1.00 10.00 ATOM 4669 CA ALA C 8 1.000 1.000 1.000 1.00 10.00 ATOM 4680 CA ARG C 9 1.000 1.000 1.000 1.00 10.00 ATOM 4690 CA GLY C 10 1.000 1.000 1.000 1.00 10.00 ATOM 4698 CA PRO C 11 1.000 1.000 1.000 1.00 10.00 ATOM 4705 CA LYS C 12 1.000 1.000 1.000 1.00 10.00 ATOM 4712 CA TRP C 13 1.000 1.000 1.000 1.00 10.00 ATOM 4726 CA GLU C 14 1.000 1.000 1.000 1.00 10.00 ATOM 4738 CA SER C 15 1.000 1.000 1.000 1.00 10.00 ATOM 4744 CA THR C 16 1.000 1.000 1.000 1.00 10.00 ATOM 4751 CA GLY C 17 1.000 1.000 1.000 1.00 10.00 ATOM 4755 CA TYR C 18 1.000 1.000 1.000 1.00 10.00 ATOM 4767 CA PHE C 19 1.000 1.000 1.000 1.00 10.00 ATOM 4778 CA ALA C 20 1.000 1.000 1.000 1.00 10.00 ATOM 4786 CA ALA C 21 1.000 1.000 1.000 1.00 10.00 ATOM 4798 CA TRP C 22 1.000 1.000 1.000 1.00 10.00 ATOM 4812 CA GLY C 23 1.000 1.000 1.000 1.00 10.00 ATOM 4816 CA GLN C 24 1.000 1.000 1.000 1.00 10.00 ATOM 4822 CA GLY C 25 1.000 1.000 1.000 1.00 10.00 ATOM 4826 CA THR C 26 1.000 1.000 1.000 1.00 10.00 ATOM 4833 CA LEU C 27 1.000 1.000 1.000 1.00 10.00 ATOM 4841 CA VAL C 28 1.000 1.000 1.000 1.00 10.00 ATOM 4848 CA THR C 29 1.000 1.000 1.000 1.00 10.00 ATOM 4855 CA VAL C 30 1.000 1.000 1.000 1.00 10.00 ATOM 4862 CA SER C 31 1.000 1.000 1.000 1.00 10.00 ATOM 4868 CA SER C 32 1.000 1.000 1.000 1.00 10.00 END """) seq = iotbx.bioinformatics.sequence( "AAAAAAAARGKWESPAALLKKAAWCSGTLVTVSSASAPKWKSTSGCYFAAPWNKRALRVTVLQSS") v = validation( pdb_hierarchy=pdb_in.construct_hierarchy(), sequences=[seq], log=null_out(), nproc=1, ) out = StringIO() v.show(out=out) # check that shortest matching sequence is chosen # example from 6H4N, chain a, and I sequences, _ = iotbx.bioinformatics.fasta_sequence_parse.parse("""\ >6H4N:a|PDBID|CHAIN|SEQUENCE AAUUGAAGAGUUUGAUCAUGGCUCAGAUUGAACGCUGGCGGCAGGCCUAACACAUGCAAGUCGAACGGUAACAGGAAGAA GCUUGCUUCUUUGCUGACGAGUGGCGGACGGGUGAGUAAUGUCUGGGAAACUGCCUGAUGGAGGGGGAUAACUACUGGAA ACGGUAGCUAAUACCGCAUAACGUCGCAAGACCAAAGAGGGGGACCUUCGGGCCUCUUGCCAUCGGAUGUGCCCAGAUGG GAUUAGCUAGUAGGUGGGGUAACGGCUCACCUAGGCGACGAUCCCUAGCUGGUCUGAGAGGAUGACCAGCCACACUGGAA CUGAGACACGGUCCAGACUCCUACGGGAGGCAGCAGUGGGGAAUAUUGCACAAUGGGCGCAAGCCUGAUGCAGCCAUGCC GCGUGUAUGAAGAAGGCCUUCGGGUUGUAAAGUACUUUCAGCGGGGAGGAAGGGAGUAAAGUUAAUACCUUUGCUCAUUG ACGUUACCCGCAGAAGAAGCACCGGCUAACUCCGUGCCAGCAGCCGCGGUAAUACGGAGGGUGCAAGCGUUAAUCGGAAU UACUGGGCGUAAAGCGCACGCAGGCGGUUUGUUAAGUCAGAUGUGAAAUCCCCGGGCUCAACCUGGGAACUGCAUCUGAU ACUGGCAAGCUUGAGUCUCGUAGAGGGGGGUAGAAUUCCAGGUGUAGCGGUGAAAUGCGUAGAGAUCUGGAGGAAUACCG GUGGCGAAGGCGGCCCCCUGGACGAAGACUGACGCUCAGGUGCGAAAGCGUGGGGAGCAAACAGGAUUAGAUACCCUGGU AGUCCACGCCGUAAACGAUGUCGACUUGGAGGUUGUGCCCUUGAGGCGUGGCUUCCGGAGCUAACGCGUUAAGUCGACCG CCUGGGGAGUACGGCCGCAAGGUUAAAACUCAAAUGAAUUGACGGGGGCCCGCACAAGCGGUGGAGCAUGUGGUUUAAUU CGAUGCAACGCGAAGAACCUUACCUGGUCUUGACAUCCACGGAAGUUUUCAGAGAUGAGAAUGUGCCUUCGGGAACCGUG AGACAGGUGCUGCAUGGCUGUCGUCAGCUCGUGUUGUGAAAUGUUGGGUUAAGUCCCGCAACGAGCGCAACCCUUAUCCU UUGUUGCCAGCGGUCCGGCCGGGAACUCAAAGGAGACUGCCAGUGAUAAACUGGAGGAAGGUGGGGAUGACGUCAAGUCA UCAUGGCCCUUACGACCAGGGCUACACACGUGCUACAAUGGCGCAUACAAAGAGAAGCGACCUCGCGAGAGCAAGCGGAC CUCAUAAAGUGCGUCGUAGUCCGGAUUGGAGUCUGCAACUCGACUCCAUGAAGUCGGAAUCGCUAGUAAUCGUGGAUCAG AAUGCCACGGUGAAUACGUUCCCGGGCCUUGUACACACCGCCCGUCACACCAUGGGAGUGGGUUGCAAAAGAAGUAGGUA GCUUAACCUUCGGGAGGGCGCUUACCACUUUGUGAUUCAUGACUGGGGUGAAGUCGUAACAAGGUAACCGUAGGGGAACC UGCGGUUGGAUCAC >6H4N:I|PDBID|CHAIN|SEQUENCE CUCCU """) pdb_in = iotbx.pdb.input(source_info=None, lines="""\ ATOM 95502 P C I1536 211.989 143.717 147.208 1.00 16.47 P ATOM 95503 OP1 C I1536 213.292 143.696 146.494 1.00 16.47 O ATOM 95504 OP2 C I1536 211.250 144.996 147.359 1.00 16.47 O ATOM 95505 O5' C I1536 211.021 142.666 146.541 1.00 16.47 O ATOM 95506 C5' C I1536 211.671 141.536 146.021 1.00 16.47 C ATOM 95507 C4' C I1536 211.059 140.260 146.502 1.00 16.47 C ATOM 95508 O4' C I1536 209.764 140.432 147.128 1.00 16.47 O ATOM 95509 C3' C I1536 210.818 139.353 145.303 1.00 16.47 C ATOM 95510 O3' C I1536 211.011 137.993 145.604 1.00 16.47 O ATOM 95511 C2' C I1536 209.372 139.646 144.938 1.00 16.47 C ATOM 95512 O2' C I1536 208.735 138.572 144.276 1.00 16.47 O ATOM 95513 C1' C I1536 208.757 139.866 146.316 1.00 16.47 C ATOM 95514 N1 C I1536 207.618 140.788 146.322 1.00 16.47 N ATOM 95515 C2 C I1536 206.610 140.626 145.378 1.00 16.47 C ATOM 95516 O2 C I1536 206.712 139.721 144.535 1.00 16.47 O ATOM 95517 N3 C I1536 205.560 141.463 145.396 1.00 16.47 N ATOM 95518 C4 C I1536 205.492 142.420 146.320 1.00 16.47 C ATOM 95519 N4 C I1536 204.429 143.227 146.302 1.00 16.47 N ATOM 95520 C5 C I1536 206.496 142.595 147.306 1.00 16.47 C ATOM 95521 C6 C I1536 207.522 141.754 147.283 1.00 16.47 C ATOM 95522 P U I1537 212.458 137.366 145.505 1.00 11.96 P ATOM 95523 OP1 U I1537 212.292 135.894 145.567 1.00 11.96 O ATOM 95524 OP2 U I1537 213.344 138.045 146.479 1.00 11.96 O ATOM 95525 O5' U I1537 212.962 137.720 144.038 1.00 11.96 O ATOM 95526 C5' U I1537 214.363 137.934 143.772 1.00 11.96 C ATOM 95527 C4' U I1537 214.522 138.678 142.472 1.00 11.96 C ATOM 95528 O4' U I1537 213.714 137.951 141.515 1.00 11.96 O ATOM 95529 C3' U I1537 213.970 140.098 142.549 1.00 11.96 C ATOM 95530 O3' U I1537 214.924 141.159 142.799 1.00 11.96 O ATOM 95531 C2' U I1537 212.939 140.210 141.413 1.00 11.96 C ATOM 95532 O2' U I1537 212.980 141.292 140.508 1.00 11.96 O ATOM 95533 C1' U I1537 212.990 138.848 140.714 1.00 11.96 C ATOM 95534 N1 U I1537 211.632 138.324 140.509 1.00 11.96 N ATOM 95535 C2 U I1537 211.212 138.082 139.216 1.00 11.96 C ATOM 95536 O2 U I1537 211.943 138.228 138.252 1.00 11.96 O ATOM 95537 N3 U I1537 209.897 137.730 139.076 1.00 11.96 N ATOM 95538 C4 U I1537 208.966 137.602 140.074 1.00 11.96 C ATOM 95539 O4 U I1537 207.834 137.203 139.798 1.00 11.96 O ATOM 95540 C5 U I1537 209.473 137.843 141.382 1.00 11.96 C ATOM 95541 C6 U I1537 210.749 138.206 141.544 1.00 11.96 C ATOM 95542 P C I1538 216.031 141.722 141.738 1.00 11.10 P ATOM 95543 OP1 C I1538 216.814 142.772 142.428 1.00 11.10 O ATOM 95544 OP2 C I1538 215.385 142.057 140.453 1.00 11.10 O ATOM 95545 O5' C I1538 217.081 140.541 141.538 1.00 11.10 O ATOM 95546 C5' C I1538 218.494 140.848 141.429 1.00 11.10 C ATOM 95547 C4' C I1538 218.962 140.916 139.986 1.00 11.10 C ATOM 95548 O4' C I1538 218.034 140.280 139.091 1.00 11.10 O ATOM 95549 C3' C I1538 219.276 142.298 139.408 1.00 11.10 C ATOM 95550 O3' C I1538 220.629 142.126 139.044 1.00 11.10 O ATOM 95551 C2' C I1538 218.657 142.315 138.005 1.00 11.10 C ATOM 95552 O2' C I1538 219.358 142.774 136.857 1.00 11.10 O ATOM 95553 C1' C I1538 218.164 140.883 137.832 1.00 11.10 C ATOM 95554 N1 C I1538 216.943 140.702 137.064 1.00 11.10 N ATOM 95555 C2 C I1538 217.041 140.096 135.813 1.00 11.10 C ATOM 95556 O2 C I1538 218.163 139.770 135.401 1.00 11.10 O ATOM 95557 N3 C I1538 215.932 139.850 135.093 1.00 11.10 N ATOM 95558 C4 C I1538 214.748 140.195 135.580 1.00 11.10 C ATOM 95559 N4 C I1538 213.670 139.968 134.827 1.00 11.10 N ATOM 95560 C5 C I1538 214.617 140.827 136.842 1.00 11.10 C ATOM 95561 C6 C I1538 215.722 141.024 137.566 1.00 11.10 C ATOM 95562 P C I1539 221.798 142.624 139.940 1.00 17.77 P ATOM 95563 OP1 C I1539 221.300 143.669 140.865 1.00 17.77 O ATOM 95564 OP2 C I1539 222.961 142.899 139.061 1.00 17.77 O ATOM 95565 O5' C I1539 222.148 141.341 140.812 1.00 17.77 O ATOM 95566 C5' C I1539 223.493 140.934 140.997 1.00 17.77 C ATOM 95567 C4' C I1539 223.633 139.444 140.845 1.00 17.77 C ATOM 95568 O4' C I1539 222.661 138.972 139.877 1.00 17.77 O ATOM 95569 C3' C I1539 224.967 138.959 140.300 1.00 17.77 C ATOM 95570 O3' C I1539 225.970 138.853 141.295 1.00 17.77 O ATOM 95571 C2' C I1539 224.602 137.629 139.658 1.00 17.77 C ATOM 95572 O2' C I1539 224.482 136.616 140.642 1.00 17.77 O ATOM 95573 C1' C I1539 223.209 137.924 139.109 1.00 17.77 C ATOM 95574 N1 C I1539 223.219 138.333 137.681 1.00 17.77 N ATOM 95575 C2 C I1539 223.353 137.370 136.683 1.00 17.77 C ATOM 95576 O2 C I1539 223.476 136.178 136.982 1.00 17.77 O ATOM 95577 N3 C I1539 223.342 137.742 135.392 1.00 17.77 N ATOM 95578 C4 C I1539 223.202 139.017 135.059 1.00 17.77 C ATOM 95579 N4 C I1539 223.202 139.332 133.762 1.00 17.77 N ATOM 95580 C5 C I1539 223.059 140.033 136.041 1.00 17.77 C ATOM 95581 C6 C I1539 223.067 139.642 137.318 1.00 17.77 C ATOM 95582 P U I1540 227.517 139.071 140.915 1.00 25.44 P ATOM 95583 OP1 U I1540 228.321 138.910 142.156 1.00 25.44 O ATOM 95584 OP2 U I1540 227.626 140.309 140.102 1.00 25.44 O ATOM 95585 O5' U I1540 227.868 137.833 139.978 1.00 25.44 O ATOM 95586 C5' U I1540 228.014 136.524 140.520 1.00 25.44 C ATOM 95587 C4' U I1540 228.308 135.503 139.447 1.00 25.44 C ATOM 95588 O4' U I1540 227.513 135.808 138.268 1.00 25.44 O ATOM 95589 C3' U I1540 229.761 135.445 138.980 1.00 25.44 C ATOM 95590 O3' U I1540 230.104 134.098 138.659 1.00 25.44 O ATOM 95591 C2' U I1540 229.740 136.281 137.705 1.00 25.44 C ATOM 95592 O2' U I1540 230.767 135.976 136.785 1.00 25.44 O ATOM 95593 C1' U I1540 228.360 135.950 137.145 1.00 25.44 C ATOM 95594 N1 U I1540 227.809 136.996 136.268 1.00 25.44 N ATOM 95595 C2 U I1540 227.053 136.589 135.186 1.00 25.44 C ATOM 95596 O2 U I1540 226.815 135.418 134.956 1.00 25.44 O ATOM 95597 N3 U I1540 226.574 137.600 134.393 1.00 25.44 N ATOM 95598 C4 U I1540 226.781 138.951 134.566 1.00 25.44 C ATOM 95599 O4 U I1540 226.286 139.746 133.765 1.00 25.44 O ATOM 95600 C5 U I1540 227.583 139.293 135.701 1.00 25.44 C ATOM 95601 C6 U I1540 228.061 138.329 136.493 1.00 25.44 C END """) v = validation( pdb_hierarchy=pdb_in.construct_hierarchy(), sequences=sequences, log=null_out(), nproc=1, ) assert (v.chains[0].get_alignment() == ['CUCCU', 'CUCCU']) # all tests below here have additional dependencies if (not libtbx.env.has_module("ksdssp")): print("Skipping advanced tests (require ksdssp module)") return pdb_file = libtbx.env.find_in_repositories( relative_path="phenix_regression/pdb/1ywf.pdb", test=os.path.isfile) if (pdb_file is not None): seq = iotbx.bioinformatics.sequence( "MGSSHHHHHHSSGLVPRGSHMAVRELPGAWNFRDVADTATALRPGRLFRSSELSRLDDAGRATLRRLGITDVADLRSSREVARRGPGRVPDGIDVHLLPFPDLADDDADDSAPHETAFKRLLTNDGSNGESGESSQSINDAATRYMTDEYRQFPTRNGAQRALHRVVTLLAAGRPVLTHCFAGKDRTGFVVALVLEAVGLDRDVIVADYLRSNDSVPQLRARISEMIQQRFDTELAPEVVTFTKARLSDGVLGVRAEYLAAARQTIDETYGSLGGYLRDAGISQATVNRMRGVLLG" ) pdb_in = file_reader.any_file(pdb_file, force_type="pdb") hierarchy = pdb_in.file_object.hierarchy v = validation(pdb_hierarchy=hierarchy, sequences=[seq], log=null_out(), nproc=1, include_secondary_structure=True, extract_coordinates=True) out = StringIO() v.show(out=out) aln1, aln2, ss = v.chains[0].get_alignment(include_sec_str=True) assert ("HHH" in ss) and ("LLL" in ss) and ("---" in ss) cif_block = v.sequence_as_cif_block() assert cif_block[ '_struct_ref.pdbx_seq_one_letter_code'] == seq.sequence # assert list( # cif_block['_struct_ref_seq.pdbx_auth_seq_align_beg']) == ['4', '117'] # assert list( # cif_block['_struct_ref_seq.pdbx_auth_seq_align_end']) == ['85', '275'] # assert list(cif_block['_struct_ref_seq.seq_align_beg']) == ['1', '114'] # assert list(cif_block['_struct_ref_seq.seq_align_end']) == ['82', '272'] # determine relative counts of sequences and chains n_seq = get_sequence_n_copies(pdb_hierarchy=hierarchy, sequences=[seq] * 4, copies_from_xtriage=4, out=null_out()) assert (n_seq == 1) hierarchy = hierarchy.deep_copy() chain2 = hierarchy.only_model().chains()[0].detached_copy() hierarchy.only_model().append_chain(chain2) n_seq = get_sequence_n_copies(pdb_hierarchy=hierarchy, sequences=[seq] * 4, copies_from_xtriage=2, out=null_out()) assert (n_seq == 1) n_seq = get_sequence_n_copies(pdb_hierarchy=hierarchy, sequences=[seq], copies_from_xtriage=2, out=null_out()) assert (n_seq == 4) try: n_seq = get_sequence_n_copies(pdb_hierarchy=hierarchy, sequences=[seq] * 3, copies_from_xtriage=2, out=null_out()) except Sorry as s: assert ("round number" in str(s)) else: raise Exception_expected n_seq = get_sequence_n_copies(pdb_hierarchy=hierarchy, sequences=[seq] * 3, copies_from_xtriage=2, force_accept_composition=True, out=null_out()) assert (n_seq == 1) try: n_seq = get_sequence_n_copies(pdb_hierarchy=hierarchy, sequences=[seq] * 4, copies_from_xtriage=1, out=null_out()) except Sorry as s: assert ("less than" in str(s)) else: raise Exception_expected n_seq = get_sequence_n_copies( pdb_hierarchy=hierarchy, sequences=[seq] * 4, copies_from_xtriage=1, assume_xtriage_copies_from_sequence_file=True, out=null_out()) assert (n_seq == 0.5) hierarchy = hierarchy.deep_copy() chain2 = hierarchy.only_model().chains()[0].detached_copy() hierarchy.only_model().append_chain(chain2) try: n_seq = get_sequence_n_copies(pdb_hierarchy=hierarchy, sequences=[seq] * 2, copies_from_xtriage=2, out=null_out()) except Sorry as s: assert ("round number" in str(s)) else: raise Exception_expected n_seq = get_sequence_n_copies(pdb_hierarchy=hierarchy, sequences=[seq], copies_from_xtriage=1, out=null_out()) assert (n_seq == 3) hierarchy = hierarchy.deep_copy() chain2 = hierarchy.only_model().chains()[0].detached_copy() hierarchy.only_model().append_chain(chain2) n_seq = get_sequence_n_copies(pdb_hierarchy=hierarchy, sequences=[seq] * 2, copies_from_xtriage=2, out=null_out()) assert (n_seq == 4) # now with files as input seq_file = "tmp_mmtbx_validation_sequence.fa" open(seq_file, "w").write(">1ywf\n%s" % seq.sequence) n_seq = get_sequence_n_copies_from_files(pdb_file=pdb_file, seq_file=seq_file, copies_from_xtriage=4, out=null_out()) try: assert (n_seq == 4) finally: os.remove(seq_file)
def exercise () : import libtbx.utils if (libtbx.utils.detect_multiprocessing_problem() is not None) : print "multiprocessing not available, skipping this test" return if (os.name == "nt"): print "easy_mp fixed_func not supported under Windows, skipping this test" return from mmtbx.validation.sequence import validation, get_sequence_n_copies, \ get_sequence_n_copies_from_files import iotbx.bioinformatics import iotbx.pdb from iotbx import file_reader import libtbx.load_env # import dependency from libtbx.test_utils import Exception_expected, contains_lines, approx_equal from cStringIO import StringIO pdb_in = iotbx.pdb.input(source_info=None, lines="""\ ATOM 2 CA ARG A 10 -6.299 36.344 7.806 1.00 55.20 C ATOM 25 CA TYR A 11 -3.391 33.962 7.211 1.00 40.56 C ATOM 46 CA ALA A 12 -0.693 34.802 4.693 1.00 67.95 C ATOM 56 CA ALA A 13 0.811 31.422 3.858 1.00 57.97 C ATOM 66 CA GLY A 14 4.466 31.094 2.905 1.00 49.24 C ATOM 73 CA ALA A 15 7.163 28.421 2.671 1.00 54.70 C ATOM 83 CA ILE A 16 6.554 24.685 2.957 1.00 51.79 C ATOM 102 CA LEU A 17 7.691 23.612 6.406 1.00 42.30 C ATOM 121 CA PTY A 18 7.292 19.882 5.861 1.00 36.68 C ATOM 128 CA PHE A 19 5.417 16.968 4.327 1.00 44.99 C ATOM 148 CA GLY A 20 3.466 14.289 6.150 1.00 41.99 C ATOM 155 CA GLY A 21 1.756 11.130 4.965 1.00 35.77 C ATOM 190 CA ALA A 24 1.294 19.658 3.683 1.00 47.02 C ATOM 200 CA VAL A 24A 2.361 22.009 6.464 1.00 37.13 C ATOM 216 CA HIS A 25 2.980 25.633 5.535 1.00 42.52 C ATOM 234 CA LEU A 26 4.518 28.425 7.577 1.00 47.63 C ATOM 253 CA ALA A 27 2.095 31.320 7.634 1.00 38.61 C ATOM 263 CA ARG A 28 1.589 34.719 9.165 1.00 37.04 C END""") seq1 = iotbx.bioinformatics.sequence("MTTPSHLSDRYELGEILGFGGMSEVHLARD".lower()) v = validation( pdb_hierarchy=pdb_in.construct_hierarchy(), sequences=[seq1], log=null_out(), nproc=1) out = StringIO() v.show(out=out) assert contains_lines(out.getvalue(), """\ sequence identity: 76.47% 13 residue(s) missing from PDB chain (9 at start, 1 at end) 2 gap(s) in chain 4 mismatches to sequence residue IDs: 12 13 15 24""") cif_block = v.as_cif_block() assert list(cif_block['_struct_ref.pdbx_seq_one_letter_code']) == [ 'MTTPSHLSDRYELGEILGFGGMSEVHLARD'] assert approx_equal(cif_block['_struct_ref_seq.pdbx_auth_seq_align_beg'], ['10', '14', '16', '19', '24']) assert approx_equal(cif_block['_struct_ref_seq.pdbx_auth_seq_align_end'], ['11', '14', '17', '21', '28']) assert approx_equal(cif_block['_struct_ref_seq.db_align_beg'], ['10', '14', '16', '19', '25']) assert approx_equal(cif_block['_struct_ref_seq.db_align_end'], ['11', '14', '17', '21', '29']) assert cif_block['_struct_ref_seq.pdbx_seq_align_beg_ins_code'][4] == 'A' seq2 = iotbx.bioinformatics.sequence("MTTPSHLSDRYELGEILGFGGMSEVHLA") v = validation( pdb_hierarchy=pdb_in.construct_hierarchy(), sequences=[seq2], log=null_out(), nproc=1) out = StringIO() v.show(out=out) assert contains_lines(out.getvalue(), """\ 1 residues not found in sequence residue IDs: 28""") try : v = validation( pdb_hierarchy=pdb_in.construct_hierarchy(), sequences=[], log=null_out(), nproc=1) except AssertionError : pass else : raise Exception_expected cif_block = v.as_cif_block() assert list(cif_block['_struct_ref.pdbx_seq_one_letter_code']) == [ 'MTTPSHLSDRYELGEILGFGGMSEVHLA-'] assert approx_equal(cif_block['_struct_ref_seq.pdbx_auth_seq_align_end'], ['11', '14', '17', '21', '27']) assert approx_equal(cif_block['_struct_ref_seq.db_align_end'], ['11', '14', '17', '21', '28']) # pdb_in2 = iotbx.pdb.input(source_info=None, lines="""\ ATOM 2 CA ARG A 10 -6.299 36.344 7.806 1.00 55.20 C ATOM 25 CA TYR A 11 -3.391 33.962 7.211 1.00 40.56 C ATOM 46 CA ALA A 12 -0.693 34.802 4.693 1.00 67.95 C ATOM 56 CA ALA A 13 0.811 31.422 3.858 1.00 57.97 C ATOM 66 CA GLY A 14 4.466 31.094 2.905 1.00 49.24 C ATOM 73 CA ALA A 15 7.163 28.421 2.671 1.00 54.70 C ATOM 83 CA ILE A 16 6.554 24.685 2.957 1.00 51.79 C ATOM 102 CA LEU A 17 7.691 23.612 6.406 1.00 42.30 C TER ATOM 1936 P G B 2 -22.947 -23.615 15.323 1.00123.20 P ATOM 1959 P C B 3 -26.398 -26.111 19.062 1.00110.06 P ATOM 1979 P U B 4 -29.512 -30.638 21.164 1.00101.06 P ATOM 1999 P C B 5 -30.524 -36.109 21.527 1.00 92.76 P ATOM 2019 P U B 6 -28.684 -41.458 21.223 1.00 87.42 P ATOM 2062 P G B 8 -18.396 -45.415 21.903 1.00 80.35 P ATOM 2085 P A B 9 -13.852 -43.272 24.156 1.00 77.76 P ATOM 2107 P G B 10 -8.285 -44.242 26.815 1.00 79.86 P END """) seq3 = iotbx.bioinformatics.sequence("AGCUUUGGAG") v = validation( pdb_hierarchy=pdb_in2.construct_hierarchy(), sequences=[seq2,seq3], log=null_out(), nproc=1, extract_coordinates=True) out = StringIO() v.show(out=out) cif_block = v.as_cif_block() assert approx_equal(cif_block['_struct_ref.pdbx_seq_one_letter_code'], ['MTTPSHLSDRYELGEILGFGGMSEVHLA', 'AGCUUUGGAG']) assert approx_equal(cif_block['_struct_ref_seq.pdbx_auth_seq_align_beg'], ['10', '14', '16', '2', '6', '8']) assert approx_equal(cif_block['_struct_ref_seq.pdbx_auth_seq_align_end'], ['11', '14', '17', '4', '6', '10']) assert (len(v.chains[0].get_outliers_table()) == 3) assert (len(v.get_table_data()) == 4) assert approx_equal( v.chains[0].get_mean_coordinate_for_alignment_range(11,11), (-0.693, 34.802, 4.693)) assert approx_equal( v.chains[0].get_mean_coordinate_for_alignment_range(11,14), (2.93675, 31.43475, 3.53175)) assert (v.chains[0].get_highlighted_residues() == [11,12,14]) assert contains_lines(out.getvalue(), """\ 3 mismatches to sequence residue IDs: 12 13 15""") assert contains_lines(out.getvalue(), """\ sequence identity: 87.50% 2 residue(s) missing from PDB chain (1 at start, 0 at end) 1 gap(s) in chain 1 mismatches to sequence residue IDs: 5""") s = easy_pickle.dumps(v) seq4 = iotbx.bioinformatics.sequence("") try : v = validation( pdb_hierarchy=pdb_in2.construct_hierarchy(), sequences=[seq4], log=null_out(), nproc=1, extract_coordinates=True) except AssertionError : pass else : raise Exception_expected # check that nucleic acid chain doesn't get aligned against protein sequence pdb_in = iotbx.pdb.input(source_info=None, lines="""\ ATOM 18932 P B DG D 1 -12.183 60.531 25.090 0.50364.79 P ATOM 18963 P B DG D 2 -9.738 55.258 20.689 0.50278.77 P ATOM 18994 P B DA D 3 -10.119 47.855 19.481 0.50355.17 P ATOM 19025 P B DT D 4 -13.664 42.707 21.119 0.50237.06 P ATOM 19056 P B DG D 5 -19.510 39.821 21.770 0.50255.45 P ATOM 19088 P B DA D 6 -26.096 40.001 21.038 0.50437.49 P ATOM 19120 P B DC D 7 -31.790 41.189 18.413 0.50210.00 P ATOM 19149 P B DG D 8 -34.639 41.306 12.582 0.50313.99 P ATOM 19179 P B DA D 9 -34.987 38.244 6.813 0.50158.92 P ATOM 19210 P B DT D 10 -32.560 35.160 1.082 0.50181.38 P HETATM19241 P BTSP D 11 -27.614 30.137 0.455 0.50508.17 P """) sequences, _ = iotbx.bioinformatics.fasta_sequence_parse.parse( """>4GFH:A|PDBID|CHAIN|SEQUENCE MSTEPVSASDKYQKISQLEHILKRPDTYIGSVETQEQLQWIYDEETDCMIEKNVTIVPGLFKIFDEILVNAADNKVRDPS MKRIDVNIHAEEHTIEVKNDGKGIPIEIHNKENIYIPEMIFGHLLTSSNYDDDEKKVTGGRNGYGAKLCNIFSTEFILET ADLNVGQKYVQKWENNMSICHPPKITSYKKGPSYTKVTFKPDLTRFGMKELDNDILGVMRRRVYDINGSVRDINVYLNGK SLKIRNFKNYVELYLKSLEKKRQLDNGEDGAAKSDIPTILYERINNRWEVAFAVSDISFQQISFVNSIATTMGGTHVNYI TDQIVKKISEILKKKKKKSVKSFQIKNNMFIFINCLIENPAFTSQTKEQLTTRVKDFGSRCEIPLEYINKIMKTDLATRM FEIADANEENALKKSDGTRKSRITNYPKLEDANKAGTKEGYKCTLVLTEGDSALSLAVAGLAVVGRDYYGCYPLRGKMLN VREASADQILKNAEIQAIKKIMGLQHRKKYEDTKSLRYGHLMIMTDQDHDGSHIKGLIINFLESSFPGLLDIQGFLLEFI TPIIKVSITKPTKNTIAFYNMPDYEKWREEESHKFTWKQKYYKGLGTSLAQEVREYFSNLDRHLKIFHSLQGNDKDYIDL AFSKKKADDRKEWLRQYEPGTVLDPTLKEIPISDFINKELILFSLADNIRSIPNVLDGFKPGQRKVLYGCFKKNLKSELK VAQLAPYVSECTAYHHGEQSLAQTIIGLAQNFVGSNNIYLLLPNGAFGTRATGGKDAAAARYIYTELNKLTRKIFHPADD PLYKYIQEDEKTVEPEWYLPILPMILVNGAEGIGTGWSTYIPPFNPLEIIKNIRHLMNDEELEQMHPWFRGWTGTIEEIE PLRYRMYGRIEQIGDNVLEITELPARTWTSTIKEYLLLGLSGNDKIKPWIKDMEEQHDDNIKFIITLSPEEMAKTRKIGF YERFKLISPISLMNMVAFDPHGKIKKYNSVNEILSEFYYVRLEYYQKRKDHMSERLQWEVEKYSFQVKFIKMIIEKELTV TNKPRNAIIQELENLGFPRFNKEGKPYYGSPNDEIAEQINDVKGATSDEEDEESSHEDTENVINGPEELYGTYEYLLGMR IWSLTKERYQKLLKQKQEKETELENLLKLSAKDIWNTDLKAFEVGYQEFLQRDAEAR >4GFH:D|PDBID|CHAIN|SEQUENCE GGATGACGATX """) v = validation( pdb_hierarchy=pdb_in.construct_hierarchy(), sequences=sequences, log=null_out(), nproc=1,) out = StringIO() v.show(out=out) assert v.chains[0].n_missing == 0 assert v.chains[0].n_missing_end == 0 assert v.chains[0].n_missing_start == 0 assert len(v.chains[0].alignment.matches()) == 11 # pdb_in = iotbx.pdb.input(source_info=None, lines="""\ ATOM 2 CA GLY A 1 1.367 0.551 0.300 1.00 7.71 C ATOM 6 CA CYS A 2 2.782 3.785 1.683 1.00 5.18 C ATOM 12 CA CYS A 3 -0.375 5.128 3.282 1.00 5.21 C ATOM 18 CA SER A 4 -0.870 2.048 5.492 1.00 7.19 C ATOM 25 CA LEU A 5 2.786 2.056 6.642 1.00 6.78 C ATOM 33 CA PRO A 6 3.212 4.746 9.312 1.00 7.03 C ATOM 40 CA PRO A 7 6.870 5.690 8.552 1.00 7.97 C ATOM 47 CA CYS A 8 6.021 6.070 4.855 1.00 6.48 C ATOM 53 CA ALA A 9 2.812 8.041 5.452 1.00 7.15 C ATOM 58 CA LEU A 10 4.739 10.382 7.748 1.00 8.36 C ATOM 66 CA SER A 11 7.292 11.200 5.016 1.00 7.00 C ATOM 73 CA ASN A 12 4.649 11.435 2.264 1.00 5.40 C ATOM 81 CA PRO A 13 1.879 13.433 3.968 1.00 5.97 C ATOM 88 CA ASP A 14 0.485 15.371 0.986 1.00 7.70 C ATOM 96 CA TYR A 15 0.565 12.245 -1.180 1.00 6.55 C ATOM 108 CA CYS A 16 -1.466 10.260 1.363 1.00 7.32 C ATOM 113 N NH2 A 17 -2.612 12.308 2.058 1.00 8.11 N """) seq = iotbx.bioinformatics.sequence("GCCSLPPCALSNPDYCX") v = validation( pdb_hierarchy=pdb_in.construct_hierarchy(), sequences=[seq], log=null_out(), nproc=1,) out = StringIO() v.show(out=out) assert v.chains[0].n_missing == 0 assert v.chains[0].n_missing_end == 0 assert v.chains[0].n_missing_start == 0 assert len(v.chains[0].alignment.matches()) == 17 # pdb_in = iotbx.pdb.input(source_info=None, lines="""\ ATOM 2518 CA PRO C 3 23.450 -5.848 45.723 1.00 85.24 C ATOM 2525 CA GLY C 4 20.066 -4.416 44.815 1.00 79.25 C ATOM 2529 CA PHE C 5 19.408 -0.913 46.032 1.00 77.13 C ATOM 2540 CA GLY C 6 17.384 -1.466 49.208 1.00 83.44 C ATOM 2544 CA GLN C 7 17.316 -5.259 49.606 1.00 89.25 C ATOM 2553 CA GLY C 8 19.061 -6.829 52.657 1.00 90.67 C """) sequences, _ = iotbx.bioinformatics.fasta_sequence_parse.parse( """>1JN5:A|PDBID|CHAIN|SEQUENCE MASVDFKTYVDQACRAAEEFVNVYYTTMDKRRRLLSRLYMGTATLVWNGNAVSGQESLSEFFEMLPSSEFQISVVDCQPV HDEATPSQTTVLVVICGSVKFEGNKQRDFNQNFILTAQASPSNTVWKIASDCFRFQDWAS >1JN5:B|PDBID|CHAIN|SEQUENCE APPCKGSYFGTENLKSLVLHFLQQYYAIYDSGDRQGLLDAYHDGACCSLSIPFIPQNPARSSLAEYFKDSRNVKKLKDPT LRFRLLKHTRLNVVAFLNELPKTQHDVNSFVVDISAQTSTLLCFSVNGVFKEVDGKSRDSLRAFTRTFIAVPASNSGLCI VNDELFVRNASSEEIQRAFAMPAPTPSSSPVPTLSPEQQEMLQAFSTQSGMNLEWSQKCLQDNNWDYTRSAQAFTHLKAK GEIPEVAFMK >1JN5:C|PDBID|CHAIN|SEQUENCE GQSPGFGQGGSV """) v = validation( pdb_hierarchy=pdb_in.construct_hierarchy(), sequences=sequences, log=null_out(), nproc=1,) out = StringIO() v.show(out=out) assert v.chains[0].n_missing_start == 3 assert v.chains[0].n_missing_end == 3 assert v.chains[0].identity == 1.0 assert v.chains[0].alignment.match_codes == 'iiimmmmmmiii' # pdb_in = iotbx.pdb.input(source_info=None, lines="""\ ATOM 2 CA ALA A 2 -8.453 57.214 -12.754 1.00 52.95 C ATOM 7 CA LEU A 3 -8.574 59.274 -9.471 1.00 24.33 C ATOM 15 CA ARG A 4 -12.178 60.092 -8.575 1.00 28.40 C ATOM 26 CA GLY A 5 -14.170 61.485 -5.667 1.00 26.54 C ATOM 30 CA THR A 6 -17.784 60.743 -4.783 1.00 31.78 C ATOM 37 CA VAL A 7 -19.080 64.405 -4.464 1.00 21.31 C """) seq = iotbx.bioinformatics.sequence("XALRGTV") v = validation( pdb_hierarchy=pdb_in.construct_hierarchy(), sequences=[seq], log=null_out(), nproc=1,) out = StringIO() v.show(out=out) assert v.chains[0].n_missing_start == 1 assert v.chains[0].n_missing_end == 0 assert v.chains[0].identity == 1.0 assert v.chains[0].alignment.match_codes == 'immmmmm' # pdb_in = iotbx.pdb.input(source_info=None, lines="""\ ATOM 2171 CA ASP I 355 5.591 -11.903 1.133 1.00 41.60 C ATOM 2175 CA PHE I 356 7.082 -8.454 0.828 1.00 39.82 C ATOM 2186 CA GLU I 357 5.814 -6.112 -1.877 1.00 41.12 C ATOM 2195 CA GLU I 358 8.623 -5.111 -4.219 1.00 42.70 C ATOM 2199 CA ILE I 359 10.346 -1.867 -3.363 1.00 43.32 C ATOM 2207 CA PRO I 360 11.658 0.659 -5.880 1.00 44.86 C ATOM 2214 CA GLU I 361 14.921 -0.125 -7.592 1.00 44.32 C ATOM 2219 CA GLU I 362 15.848 3.489 -6.866 1.00 44.27 C HETATM 2224 CA TYS I 363 16.482 2.005 -3.448 1.00 44.52 C """) seq = iotbx.bioinformatics.sequence("NGDFEEIPEEYL") v = validation( pdb_hierarchy=pdb_in.construct_hierarchy(), sequences=[seq], log=null_out(), nproc=1,) out = StringIO() v.show(out=out) assert v.chains[0].n_missing_start == 2 assert v.chains[0].n_missing_end == 1 assert v.chains[0].identity == 1.0 pdb_in = iotbx.pdb.input(source_info=None, lines="""\ ATOM 450 CA ASN A 1 37.242 41.665 44.160 1.00 35.89 C ATOM 458 CA GLY A 2 37.796 38.269 42.523 1.00 30.13 C HETATM 463 CA AMSE A 3 35.878 39.005 39.326 0.54 22.83 C HETATM 464 CA BMSE A 3 35.892 39.018 39.323 0.46 22.96 C ATOM 478 CA ILE A 4 37.580 38.048 36.061 1.00 22.00 C ATOM 486 CA SER A 5 37.593 40.843 33.476 1.00 18.73 C ATOM 819 CA ALA A 8 25.982 34.781 27.220 1.00 18.43 C ATOM 824 CA ALA A 9 23.292 32.475 28.614 1.00 19.60 C HETATM 830 CA BMSE A 10 22.793 30.814 25.223 0.41 22.60 C HETATM 831 CA CMSE A 10 22.801 30.850 25.208 0.59 22.54 C ATOM 845 CA GLU A 11 26.504 30.054 24.966 1.00 25.19 C ATOM 854 CA GLY A 12 25.907 28.394 28.320 1.00 38.88 C """) seq = iotbx.bioinformatics.sequence("NGMISAAAAMEG") v = validation( pdb_hierarchy=pdb_in.construct_hierarchy(), sequences=[seq], log=null_out(), nproc=1,) out = StringIO() v.show(out=out) assert v.chains[0].alignment.a == 'NGMISXXAAMEG' assert v.chains[0].alignment.b == 'NGMISAAAAMEG' pdb_in = iotbx.pdb.input(source_info=None, lines="""\ ATOM 4615 CA ALA C 1 1.000 1.000 1.000 1.00 10.00 ATOM 4622 CA ALA C 2 1.000 1.000 1.000 1.00 10.00 ATOM 4627 CA ALA C 3 1.000 1.000 1.000 1.00 10.00 ATOM 4634 CA ALA C 4 1.000 1.000 1.000 1.00 10.00 ATOM 4646 CA ALA C 5 1.000 1.000 1.000 1.00 10.00 ATOM 4658 CA ALA C 6 1.000 1.000 1.000 1.00 10.00 ATOM 4664 CA ALA C 7 1.000 1.000 1.000 1.00 10.00 ATOM 4669 CA ALA C 8 1.000 1.000 1.000 1.00 10.00 ATOM 4680 CA ARG C 9 1.000 1.000 1.000 1.00 10.00 ATOM 4690 CA GLY C 10 1.000 1.000 1.000 1.00 10.00 ATOM 4698 CA PRO C 11 1.000 1.000 1.000 1.00 10.00 ATOM 4705 CA LYS C 12 1.000 1.000 1.000 1.00 10.00 ATOM 4712 CA TRP C 13 1.000 1.000 1.000 1.00 10.00 ATOM 4726 CA GLU C 14 1.000 1.000 1.000 1.00 10.00 ATOM 4738 CA SER C 15 1.000 1.000 1.000 1.00 10.00 ATOM 4744 CA THR C 16 1.000 1.000 1.000 1.00 10.00 ATOM 4751 CA GLY C 17 1.000 1.000 1.000 1.00 10.00 ATOM 4755 CA TYR C 18 1.000 1.000 1.000 1.00 10.00 ATOM 4767 CA PHE C 19 1.000 1.000 1.000 1.00 10.00 ATOM 4778 CA ALA C 20 1.000 1.000 1.000 1.00 10.00 ATOM 4786 CA ALA C 21 1.000 1.000 1.000 1.00 10.00 ATOM 4798 CA TRP C 22 1.000 1.000 1.000 1.00 10.00 ATOM 4812 CA GLY C 23 1.000 1.000 1.000 1.00 10.00 ATOM 4816 CA GLN C 24 1.000 1.000 1.000 1.00 10.00 ATOM 4822 CA GLY C 25 1.000 1.000 1.000 1.00 10.00 ATOM 4826 CA THR C 26 1.000 1.000 1.000 1.00 10.00 ATOM 4833 CA LEU C 27 1.000 1.000 1.000 1.00 10.00 ATOM 4841 CA VAL C 28 1.000 1.000 1.000 1.00 10.00 ATOM 4848 CA THR C 29 1.000 1.000 1.000 1.00 10.00 ATOM 4855 CA VAL C 30 1.000 1.000 1.000 1.00 10.00 ATOM 4862 CA SER C 31 1.000 1.000 1.000 1.00 10.00 ATOM 4868 CA SER C 32 1.000 1.000 1.000 1.00 10.00 END """) seq = iotbx.bioinformatics.sequence( "AAAAAAAARGKWESPAALLKKAAWCSGTLVTVSSASAPKWKSTSGCYFAAPWNKRALRVTVLQSS") v = validation( pdb_hierarchy=pdb_in.construct_hierarchy(), sequences=[seq], log=null_out(), nproc=1,) out = StringIO() v.show(out=out) # all tests below here have additional dependencies if (not libtbx.env.has_module("ksdssp")) : print "Skipping advanced tests (require ksdssp module)" return pdb_file = libtbx.env.find_in_repositories( relative_path="phenix_regression/pdb/1ywf.pdb", test=os.path.isfile) if (pdb_file is not None) : seq = iotbx.bioinformatics.sequence("MGSSHHHHHHSSGLVPRGSHMAVRELPGAWNFRDVADTATALRPGRLFRSSELSRLDDAGRATLRRLGITDVADLRSSREVARRGPGRVPDGIDVHLLPFPDLADDDADDSAPHETAFKRLLTNDGSNGESGESSQSINDAATRYMTDEYRQFPTRNGAQRALHRVVTLLAAGRPVLTHCFAGKDRTGFVVALVLEAVGLDRDVIVADYLRSNDSVPQLRARISEMIQQRFDTELAPEVVTFTKARLSDGVLGVRAEYLAAARQTIDETYGSLGGYLRDAGISQATVNRMRGVLLG") pdb_in = file_reader.any_file(pdb_file, force_type="pdb") hierarchy = pdb_in.file_object.hierarchy v = validation( pdb_hierarchy=hierarchy, sequences=[seq], log=null_out(), nproc=1, include_secondary_structure=True, extract_coordinates=True) out = StringIO() v.show(out=out) aln1, aln2, ss = v.chains[0].get_alignment(include_sec_str=True) assert ("HHH" in ss) and ("LLL" in ss) and ("---" in ss) cif_block = v.as_cif_block() assert cif_block['_struct_ref.pdbx_seq_one_letter_code'] == seq.sequence assert list( cif_block['_struct_ref_seq.pdbx_auth_seq_align_beg']) == ['4', '117'] assert list( cif_block['_struct_ref_seq.pdbx_auth_seq_align_end']) == ['85', '275'] assert list(cif_block['_struct_ref_seq.seq_align_beg']) == ['1', '114'] assert list(cif_block['_struct_ref_seq.seq_align_end']) == ['82', '272'] # determine relative counts of sequences and chains n_seq = get_sequence_n_copies( pdb_hierarchy=hierarchy, sequences=[seq] * 4, copies_from_xtriage=4, out=null_out()) assert (n_seq == 1) hierarchy = hierarchy.deep_copy() chain2 = hierarchy.only_model().chains()[0].detached_copy() hierarchy.only_model().append_chain(chain2) n_seq = get_sequence_n_copies( pdb_hierarchy=hierarchy, sequences=[seq] * 4, copies_from_xtriage=2, out=null_out()) assert (n_seq == 1) n_seq = get_sequence_n_copies( pdb_hierarchy=hierarchy, sequences=[seq], copies_from_xtriage=2, out=null_out()) assert (n_seq == 4) try : n_seq = get_sequence_n_copies( pdb_hierarchy=hierarchy, sequences=[seq] * 3, copies_from_xtriage=2, out=null_out()) except Sorry, s : assert ("round number" in str(s)) else : raise Exception_expected n_seq = get_sequence_n_copies( pdb_hierarchy=hierarchy, sequences=[seq] * 3, copies_from_xtriage=2, force_accept_composition=True, out=null_out()) assert (n_seq == 1) try : n_seq = get_sequence_n_copies( pdb_hierarchy=hierarchy, sequences=[seq] * 4, copies_from_xtriage=1, out=null_out()) except Sorry, s : assert ("less than" in str(s))