def extend_protein_model(pdb_hierarchy, mon_lib_srv, add_hydrogens=None, selection=None): """ Rebuild a sidechain by substituting an ideal amino acid and rotating the sidechain to match the old conformation as closely as possible. Limited functionality: 1) Amino-acids only, 2) side chain atoms only. 3) Not terminii aware 4) Not aware of v2.3 vs v3.2 atom names e.g. HB1,HB2 vs HB2,HB3 """ from mmtbx.monomer_library import idealized_aa from mmtbx.rotamer import rotamer_eval from scitbx.array_family import flex ideal_dict = idealized_aa.residue_dict() pdb_atoms = pdb_hierarchy.atoms() if (selection is None): selection = flex.bool(pdb_atoms.size(), True) partial_sidechains = [] for chain in pdb_hierarchy.only_model().chains(): for residue_group in chain.residue_groups(): for residue in residue_group.atom_groups(): i_seqs = residue.atoms().extract_i_seq() residue_sel = selection.select(i_seqs) if (not residue.resname.lower() in ideal_dict.keys()): continue missing_atoms = rotamer_eval.eval_residue_completeness( residue=residue, mon_lib_srv=mon_lib_srv, ignore_hydrogens=False) if (len(missing_atoms) > 0): all_h = list(set([s.strip()[0] for s in missing_atoms ])) in [['H'], ['D'], ['T']] if (add_hydrogens is False and all_h): continue partial_sidechains.append(residue) for residue in partial_sidechains: residue_elements = [ e.strip() for e in residue.atoms().extract_element() ] res_key = residue.resname.lower() if (add_hydrogens is None): if ("H" in residue_elements): res_key += "_h" if (add_hydrogens is True): res_key += "_h" target_atom_group = ideal_dict[res_key].only_model().only_chain().\ only_residue_group().only_atom_group() new_residue = extend_residue(residue=residue, target_atom_group=target_atom_group, mon_lib_srv=mon_lib_srv) missing_atoms = rotamer_eval.eval_residue_completeness( residue=new_residue, mon_lib_srv=mon_lib_srv, ignore_hydrogens=False) #assert len(missing_atoms) == 0, missing_atoms rg = residue.parent() rg.remove_atom_group(residue) rg.append_atom_group(new_residue.detached_copy()) pdb_hierarchy.atoms().reset_i_seq() pdb_hierarchy.atoms().reset_serial() return len(partial_sidechains)
def check_missing_atom(pdb_filename): pdb_inp = iotbx.pdb.input(file_name=pdb_filename) pdb_hierarchy = pdb_inp.construct_hierarchy() ideal_dict = idealized_aa.residue_dict() pdb_atoms = pdb_hierarchy.atoms() selection = flex.bool(pdb_atoms.size(), True) partial_sidechains = [] for chain in pdb_hierarchy.only_model().chains(): for residue_group in chain.residue_groups(): if (residue_group.atom_groups_size() != 1): continue for residue in residue_group.atom_groups(): i_seqs = residue.atoms().extract_i_seq() residue_sel = selection.select(i_seqs) if (not residue.resname.lower() in ideal_dict.keys()): continue missing_atoms = rotamer_eval.eval_residue_completeness( residue=residue, mon_lib_srv=mon_lib_server, ignore_hydrogens=True) if (len(missing_atoms) > 0): return True return False
def filter_before_build ( pdb_hierarchy, fmodel, geometry_restraints_manager, selection=None, params=None, verbose=True, log=sys.stdout) : """ Pick residues suitable for building alternate conformations - by default, this means no MolProbity/geometry outliers, good fit to map, no missing atoms, and no pre-existing alternates, but with significant difference density nearby. """ from mmtbx.validation import molprobity from mmtbx.rotamer import rotamer_eval import mmtbx.monomer_library.server from mmtbx import building from iotbx.pdb import common_residue_names_get_class from scitbx.array_family import flex if (selection is None) : selection = flex.bool(fmodel.xray_structure.scatterers().size(), True) pdb_atoms = pdb_hierarchy.atoms() assert (pdb_atoms.size() == fmodel.xray_structure.scatterers().size()) pdb_atoms.reset_i_seq() full_validation = molprobity.molprobity( pdb_hierarchy=pdb_hierarchy, fmodel=fmodel, geometry_restraints_manager=geometry_restraints_manager, outliers_only=False, rotamer_library="8000") if (verbose) : full_validation.show(out=log) multi_criterion = full_validation.as_multi_criterion_view() if (params is None) : params = libtbx.phil.parse(filter_params_str).extract() mon_lib_srv = mmtbx.monomer_library.server.server() two_fofc_map, fofc_map = building.get_difference_maps(fmodel=fmodel) residues = [] filters = params.discard_outliers make_sub_header("Identifying candidates for building", out=log) # TODO parallelize for chain in pdb_hierarchy.only_model().chains() : if (not chain.is_protein()) : continue for residue_group in chain.residue_groups() : atom_groups = residue_group.atom_groups() id_str = residue_group.id_str() i_seqs = residue_group.atoms().extract_i_seq() residue_sel = selection.select(i_seqs) if (not residue_sel.all_eq(True)) : continue if (len(atom_groups) > 1) : print >> log, " %s is already multi-conformer" % id_str continue atom_group = atom_groups[0] res_class = common_residue_names_get_class(atom_group.resname) if (res_class != "common_amino_acid") : print >> log, " %s: non-standard residue" % id_str continue missing_atoms = rotamer_eval.eval_residue_completeness( residue=atom_group, mon_lib_srv=mon_lib_srv, ignore_hydrogens=True) if (len(missing_atoms) > 0) : # residues modeled as pseudo-ALA are allowed by default; partially # missing sidechains are more problematic if ((building.is_stub_residue(atom_group)) and (not params.ignore_stub_residues)) : pass else : print >> log, " %s: missing or incomplete sidechain" % \ (id_str, len(missing_atoms)) continue validation = multi_criterion.get_residue_group_data(residue_group) is_outlier = is_validation_outlier(validation, params) if (is_outlier) : print >> log, " %s" % str(validation) continue if (params.use_difference_map) : i_seqs_no_hd = building.get_non_hydrogen_atom_indices(residue_group) map_stats = building.local_density_quality( fofc_map=fofc_map, two_fofc_map=two_fofc_map, atom_selection=i_seqs_no_hd, xray_structure=fmodel.xray_structure, radius=params.sampling_radius) if ((map_stats.number_of_atoms_below_fofc_map_level() == 0) and (map_stats.fraction_of_nearby_grid_points_above_cutoff()==0)) : if (verbose) : print >> log, " no difference density for %s" % id_str continue residues.append(residue_group.only_atom_group()) if (len(residues) == 0) : raise Sorry("No residues passed the filtering criteria.") print >> log, "" print >> log, "Alternate conformations will be tried for %d residue(s):" % \ len(residues) building.show_chain_resseq_ranges(residues, out=log, prefix=" ") print >> log, "" return residues
def extend_protein_model(pdb_hierarchy, selection=None, hydrogens=Auto, max_atoms_missing=None, log=None, modify_segids=True, prefilter_callback=None, idealized_residue_dict=None, skip_non_protein_chains=True): """ Replace all sidechains with missing non-hydrogen atoms in a PDB hierarchy. """ from mmtbx.monomer_library import idealized_aa from mmtbx.rotamer import rotamer_eval import mmtbx.monomer_library.server from iotbx.pdb import common_residue_names_get_class from scitbx.array_family import flex if (prefilter_callback is not None): assert hasattr(prefilter_callback, "__call__") else: prefilter_callback = lambda r: True ideal_dict = idealized_residue_dict if (ideal_dict is None): ideal_dict = idealized_aa.residue_dict() if (log is None): log = null_out() mon_lib_srv = mmtbx.monomer_library.server.server() pdb_atoms = pdb_hierarchy.atoms() if (selection is None): selection = flex.bool(pdb_atoms.size(), True) partial_sidechains = [] for chain in pdb_hierarchy.only_model().chains(): if (not chain.is_protein()) and (skip_non_protein_chains): print >> log, " skipping non-protein chain '%s'" % chain.id continue for residue_group in chain.residue_groups(): atom_groups = residue_group.atom_groups() if (len(atom_groups) > 1): print >> log, " %s %s has multiple conformations, skipping" % \ (chain.id, residue_group.resid()) continue residue = atom_groups[0] i_seqs = residue.atoms().extract_i_seq() residue_sel = selection.select(i_seqs) if (not residue_sel.all_eq(True)): continue if (idealized_residue_dict is None): res_class = common_residue_names_get_class(residue.resname) if (res_class != "common_amino_acid"): print >> log, " skipping non-standard residue %s" % residue.resname continue else: key = residue.resname.lower() if (hydrogens == True): key = key + "_h" if (not key in idealized_residue_dict.keys()): pass missing_atoms = rotamer_eval.eval_residue_completeness( residue=residue, mon_lib_srv=mon_lib_srv, ignore_hydrogens=True) if (len(missing_atoms) > 0): print >> log, " missing %d atoms in %s: %s" % (len( missing_atoms), residue.id_str(), ",".join(missing_atoms)) if ((max_atoms_missing is None) or (len(missing_atoms) < max_atoms_missing)): if (prefilter_callback(residue)): partial_sidechains.append(residue) for residue in partial_sidechains: new_residue = extend_residue(residue=residue, ideal_dict=ideal_dict, hydrogens=hydrogens, mon_lib_srv=mon_lib_srv, match_conformation=True) if (modify_segids): for atom in new_residue.atoms(): atom.segid = "XXXX" rg = residue.parent() rg.remove_atom_group(residue) rg.append_atom_group(new_residue.detached_copy()) pdb_hierarchy.atoms().reset_i_seq() pdb_hierarchy.atoms().reset_serial() return len(partial_sidechains)
def extend_protein_model (pdb_hierarchy, selection=None, hydrogens=Auto, max_atoms_missing=None, log=None, modify_segids=True, prefilter_callback=None, idealized_residue_dict=None, skip_non_protein_chains=True) : """ Replace all sidechains with missing non-hydrogen atoms in a PDB hierarchy. """ from mmtbx.monomer_library import idealized_aa from mmtbx.rotamer import rotamer_eval import mmtbx.monomer_library.server from iotbx.pdb import common_residue_names_get_class from scitbx.array_family import flex if (prefilter_callback is not None) : assert hasattr(prefilter_callback, "__call__") else : prefilter_callback = lambda r: True ideal_dict = idealized_residue_dict if (ideal_dict is None) : ideal_dict = idealized_aa.residue_dict() if (log is None) : log = null_out() mon_lib_srv = mmtbx.monomer_library.server.server() pdb_atoms = pdb_hierarchy.atoms() if (selection is None) : selection = flex.bool(pdb_atoms.size(), True) partial_sidechains = [] for chain in pdb_hierarchy.only_model().chains() : if (not chain.is_protein()) and (skip_non_protein_chains) : print >> log, " skipping non-protein chain '%s'" % chain.id continue for residue_group in chain.residue_groups() : atom_groups = residue_group.atom_groups() if (len(atom_groups) > 1) : print >> log, " %s %s has multiple conformations, skipping" % \ (chain.id, residue_group.resid()) continue residue = atom_groups[0] i_seqs = residue.atoms().extract_i_seq() residue_sel = selection.select(i_seqs) if (not residue_sel.all_eq(True)) : continue if (idealized_residue_dict is None) : res_class = common_residue_names_get_class(residue.resname) if (res_class != "common_amino_acid") : print >> log, " skipping non-standard residue %s" % residue.resname continue else : key = residue.resname.lower() if (hydrogens == True) : key = key + "_h" if (not key in idealized_residue_dict.keys()) : pass missing_atoms = rotamer_eval.eval_residue_completeness( residue=residue, mon_lib_srv=mon_lib_srv, ignore_hydrogens=True) if (len(missing_atoms) > 0) : print >> log, " missing %d atoms in %s: %s" % (len(missing_atoms), residue.id_str(), ",".join(missing_atoms)) if ((max_atoms_missing is None) or (len(missing_atoms) < max_atoms_missing)) : if (prefilter_callback(residue)) : partial_sidechains.append(residue) for residue in partial_sidechains : new_residue = extend_residue(residue=residue, ideal_dict=ideal_dict, hydrogens=hydrogens, mon_lib_srv=mon_lib_srv, match_conformation=True) if (modify_segids) : for atom in new_residue.atoms() : atom.segid = "XXXX" rg = residue.parent() rg.remove_atom_group(residue) rg.append_atom_group(new_residue.detached_copy()) pdb_hierarchy.atoms().reset_i_seq() pdb_hierarchy.atoms().reset_serial() return len(partial_sidechains)