Ejemplo n.º 1
0
def extend_protein_model(pdb_hierarchy,
                         mon_lib_srv,
                         add_hydrogens=None,
                         selection=None):
    """
  Rebuild a sidechain by substituting an ideal amino acid and rotating the
  sidechain to match the old conformation as closely as possible.
  Limited functionality:
    1) Amino-acids only, 2) side chain atoms only.
    3) Not terminii aware
    4) Not aware of v2.3 vs v3.2 atom names e.g. HB1,HB2 vs HB2,HB3
  """
    from mmtbx.monomer_library import idealized_aa
    from mmtbx.rotamer import rotamer_eval
    from scitbx.array_family import flex
    ideal_dict = idealized_aa.residue_dict()
    pdb_atoms = pdb_hierarchy.atoms()
    if (selection is None):
        selection = flex.bool(pdb_atoms.size(), True)
    partial_sidechains = []
    for chain in pdb_hierarchy.only_model().chains():
        for residue_group in chain.residue_groups():
            for residue in residue_group.atom_groups():
                i_seqs = residue.atoms().extract_i_seq()
                residue_sel = selection.select(i_seqs)
                if (not residue.resname.lower() in ideal_dict.keys()): continue
                missing_atoms = rotamer_eval.eval_residue_completeness(
                    residue=residue,
                    mon_lib_srv=mon_lib_srv,
                    ignore_hydrogens=False)
                if (len(missing_atoms) > 0):
                    all_h = list(set([s.strip()[0] for s in missing_atoms
                                      ])) in [['H'], ['D'], ['T']]
                    if (add_hydrogens is False and all_h): continue
                    partial_sidechains.append(residue)
    for residue in partial_sidechains:
        residue_elements = [
            e.strip() for e in residue.atoms().extract_element()
        ]
        res_key = residue.resname.lower()
        if (add_hydrogens is None):
            if ("H" in residue_elements): res_key += "_h"
        if (add_hydrogens is True): res_key += "_h"
        target_atom_group = ideal_dict[res_key].only_model().only_chain().\
          only_residue_group().only_atom_group()
        new_residue = extend_residue(residue=residue,
                                     target_atom_group=target_atom_group,
                                     mon_lib_srv=mon_lib_srv)
        missing_atoms = rotamer_eval.eval_residue_completeness(
            residue=new_residue,
            mon_lib_srv=mon_lib_srv,
            ignore_hydrogens=False)
        #assert len(missing_atoms) == 0, missing_atoms
        rg = residue.parent()
        rg.remove_atom_group(residue)
        rg.append_atom_group(new_residue.detached_copy())
    pdb_hierarchy.atoms().reset_i_seq()
    pdb_hierarchy.atoms().reset_serial()
    return len(partial_sidechains)
Ejemplo n.º 2
0
def check_missing_atom(pdb_filename):
    pdb_inp = iotbx.pdb.input(file_name=pdb_filename)
    pdb_hierarchy = pdb_inp.construct_hierarchy()
    ideal_dict = idealized_aa.residue_dict()
    pdb_atoms = pdb_hierarchy.atoms()
    selection = flex.bool(pdb_atoms.size(), True)
    partial_sidechains = []
    for chain in pdb_hierarchy.only_model().chains():
        for residue_group in chain.residue_groups():
            if (residue_group.atom_groups_size() != 1): continue
            for residue in residue_group.atom_groups():
                i_seqs = residue.atoms().extract_i_seq()
                residue_sel = selection.select(i_seqs)
                if (not residue.resname.lower() in ideal_dict.keys()): continue
                missing_atoms = rotamer_eval.eval_residue_completeness(
                    residue=residue,
                    mon_lib_srv=mon_lib_server,
                    ignore_hydrogens=True)
                if (len(missing_atoms) > 0):
                    return True
    return False
Ejemplo n.º 3
0
def filter_before_build (
    pdb_hierarchy,
    fmodel,
    geometry_restraints_manager,
    selection=None,
    params=None,
    verbose=True,
    log=sys.stdout) :
  """
  Pick residues suitable for building alternate conformations - by default,
  this means no MolProbity/geometry outliers, good fit to map, no missing
  atoms, and no pre-existing alternates, but with significant difference
  density nearby.
  """
  from mmtbx.validation import molprobity
  from mmtbx.rotamer import rotamer_eval
  import mmtbx.monomer_library.server
  from mmtbx import building
  from iotbx.pdb import common_residue_names_get_class
  from scitbx.array_family import flex
  if (selection is None) :
    selection = flex.bool(fmodel.xray_structure.scatterers().size(), True)
  pdb_atoms = pdb_hierarchy.atoms()
  assert (pdb_atoms.size() == fmodel.xray_structure.scatterers().size())
  pdb_atoms.reset_i_seq()
  full_validation = molprobity.molprobity(
    pdb_hierarchy=pdb_hierarchy,
    fmodel=fmodel,
    geometry_restraints_manager=geometry_restraints_manager,
    outliers_only=False,
    rotamer_library="8000")
  if (verbose) :
    full_validation.show(out=log)
  multi_criterion = full_validation.as_multi_criterion_view()
  if (params is None) :
    params = libtbx.phil.parse(filter_params_str).extract()
  mon_lib_srv = mmtbx.monomer_library.server.server()
  two_fofc_map, fofc_map = building.get_difference_maps(fmodel=fmodel)
  residues = []
  filters = params.discard_outliers
  make_sub_header("Identifying candidates for building", out=log)
  # TODO parallelize
  for chain in pdb_hierarchy.only_model().chains() :
    if (not chain.is_protein()) :
      continue
    for residue_group in chain.residue_groups() :
      atom_groups = residue_group.atom_groups()
      id_str = residue_group.id_str()
      i_seqs = residue_group.atoms().extract_i_seq()
      residue_sel = selection.select(i_seqs)
      if (not residue_sel.all_eq(True)) :
        continue
      if (len(atom_groups) > 1) :
        print >> log, "  %s is already multi-conformer" % id_str
        continue
      atom_group = atom_groups[0]
      res_class = common_residue_names_get_class(atom_group.resname)
      if (res_class != "common_amino_acid") :
        print >> log, "  %s: non-standard residue" % id_str
        continue
      missing_atoms = rotamer_eval.eval_residue_completeness(
        residue=atom_group,
        mon_lib_srv=mon_lib_srv,
        ignore_hydrogens=True)
      if (len(missing_atoms) > 0) :
        # residues modeled as pseudo-ALA are allowed by default; partially
        # missing sidechains are more problematic
        if ((building.is_stub_residue(atom_group)) and
            (not params.ignore_stub_residues)) :
          pass
        else :
          print >> log, "  %s: missing or incomplete sidechain" % \
            (id_str, len(missing_atoms))
          continue
      validation = multi_criterion.get_residue_group_data(residue_group)
      is_outlier = is_validation_outlier(validation, params)
      if (is_outlier) :
        print >> log, "  %s" % str(validation)
        continue
      if (params.use_difference_map) :
        i_seqs_no_hd = building.get_non_hydrogen_atom_indices(residue_group)
        map_stats = building.local_density_quality(
          fofc_map=fofc_map,
          two_fofc_map=two_fofc_map,
          atom_selection=i_seqs_no_hd,
          xray_structure=fmodel.xray_structure,
          radius=params.sampling_radius)
        if ((map_stats.number_of_atoms_below_fofc_map_level() == 0) and
            (map_stats.fraction_of_nearby_grid_points_above_cutoff()==0)) :
          if (verbose) :
            print >> log, "  no difference density for %s" % id_str
          continue
      residues.append(residue_group.only_atom_group())
  if (len(residues) == 0) :
    raise Sorry("No residues passed the filtering criteria.")
  print >> log, ""
  print >> log, "Alternate conformations will be tried for %d residue(s):" % \
      len(residues)
  building.show_chain_resseq_ranges(residues, out=log, prefix="  ")
  print >> log, ""
  return residues
Ejemplo n.º 4
0
def extend_protein_model(pdb_hierarchy,
                         selection=None,
                         hydrogens=Auto,
                         max_atoms_missing=None,
                         log=None,
                         modify_segids=True,
                         prefilter_callback=None,
                         idealized_residue_dict=None,
                         skip_non_protein_chains=True):
    """
  Replace all sidechains with missing non-hydrogen atoms in a PDB hierarchy.
  """
    from mmtbx.monomer_library import idealized_aa
    from mmtbx.rotamer import rotamer_eval
    import mmtbx.monomer_library.server
    from iotbx.pdb import common_residue_names_get_class
    from scitbx.array_family import flex
    if (prefilter_callback is not None):
        assert hasattr(prefilter_callback, "__call__")
    else:
        prefilter_callback = lambda r: True
    ideal_dict = idealized_residue_dict
    if (ideal_dict is None):
        ideal_dict = idealized_aa.residue_dict()
    if (log is None): log = null_out()
    mon_lib_srv = mmtbx.monomer_library.server.server()
    pdb_atoms = pdb_hierarchy.atoms()
    if (selection is None):
        selection = flex.bool(pdb_atoms.size(), True)
    partial_sidechains = []
    for chain in pdb_hierarchy.only_model().chains():
        if (not chain.is_protein()) and (skip_non_protein_chains):
            print >> log, "    skipping non-protein chain '%s'" % chain.id
            continue
        for residue_group in chain.residue_groups():
            atom_groups = residue_group.atom_groups()
            if (len(atom_groups) > 1):
                print >> log, "    %s %s has multiple conformations, skipping" % \
                  (chain.id, residue_group.resid())
                continue
            residue = atom_groups[0]
            i_seqs = residue.atoms().extract_i_seq()
            residue_sel = selection.select(i_seqs)
            if (not residue_sel.all_eq(True)):
                continue
            if (idealized_residue_dict is None):
                res_class = common_residue_names_get_class(residue.resname)
                if (res_class != "common_amino_acid"):
                    print >> log, "    skipping non-standard residue %s" % residue.resname
                    continue
            else:
                key = residue.resname.lower()
                if (hydrogens == True):
                    key = key + "_h"
                if (not key in idealized_residue_dict.keys()):
                    pass
            missing_atoms = rotamer_eval.eval_residue_completeness(
                residue=residue,
                mon_lib_srv=mon_lib_srv,
                ignore_hydrogens=True)
            if (len(missing_atoms) > 0):
                print >> log, "    missing %d atoms in %s: %s" % (len(
                    missing_atoms), residue.id_str(), ",".join(missing_atoms))
                if ((max_atoms_missing is None)
                        or (len(missing_atoms) < max_atoms_missing)):
                    if (prefilter_callback(residue)):
                        partial_sidechains.append(residue)
    for residue in partial_sidechains:
        new_residue = extend_residue(residue=residue,
                                     ideal_dict=ideal_dict,
                                     hydrogens=hydrogens,
                                     mon_lib_srv=mon_lib_srv,
                                     match_conformation=True)
        if (modify_segids):
            for atom in new_residue.atoms():
                atom.segid = "XXXX"
        rg = residue.parent()
        rg.remove_atom_group(residue)
        rg.append_atom_group(new_residue.detached_copy())
    pdb_hierarchy.atoms().reset_i_seq()
    pdb_hierarchy.atoms().reset_serial()
    return len(partial_sidechains)
Ejemplo n.º 5
0
def extend_protein_model (pdb_hierarchy,
    selection=None,
    hydrogens=Auto,
    max_atoms_missing=None,
    log=None,
    modify_segids=True,
    prefilter_callback=None,
    idealized_residue_dict=None,
    skip_non_protein_chains=True) :
  """
  Replace all sidechains with missing non-hydrogen atoms in a PDB hierarchy.
  """
  from mmtbx.monomer_library import idealized_aa
  from mmtbx.rotamer import rotamer_eval
  import mmtbx.monomer_library.server
  from iotbx.pdb import common_residue_names_get_class
  from scitbx.array_family import flex
  if (prefilter_callback is not None) :
    assert hasattr(prefilter_callback, "__call__")
  else :
    prefilter_callback = lambda r: True
  ideal_dict = idealized_residue_dict
  if (ideal_dict is None) :
    ideal_dict = idealized_aa.residue_dict()
  if (log is None) : log = null_out()
  mon_lib_srv = mmtbx.monomer_library.server.server()
  pdb_atoms = pdb_hierarchy.atoms()
  if (selection is None) :
    selection = flex.bool(pdb_atoms.size(), True)
  partial_sidechains = []
  for chain in pdb_hierarchy.only_model().chains() :
    if (not chain.is_protein()) and (skip_non_protein_chains) :
      print >> log, "    skipping non-protein chain '%s'" % chain.id
      continue
    for residue_group in chain.residue_groups() :
      atom_groups = residue_group.atom_groups()
      if (len(atom_groups) > 1) :
        print >> log, "    %s %s has multiple conformations, skipping" % \
          (chain.id, residue_group.resid())
        continue
      residue = atom_groups[0]
      i_seqs = residue.atoms().extract_i_seq()
      residue_sel = selection.select(i_seqs)
      if (not residue_sel.all_eq(True)) :
        continue
      if (idealized_residue_dict is None) :
        res_class = common_residue_names_get_class(residue.resname)
        if (res_class != "common_amino_acid") :
          print >> log, "    skipping non-standard residue %s" % residue.resname
          continue
      else :
        key = residue.resname.lower()
        if (hydrogens == True) :
          key = key + "_h"
        if (not key in idealized_residue_dict.keys()) :
          pass
      missing_atoms = rotamer_eval.eval_residue_completeness(
        residue=residue,
        mon_lib_srv=mon_lib_srv,
        ignore_hydrogens=True)
      if (len(missing_atoms) > 0) :
        print >> log, "    missing %d atoms in %s: %s" % (len(missing_atoms),
          residue.id_str(), ",".join(missing_atoms))
        if ((max_atoms_missing is None) or
            (len(missing_atoms) < max_atoms_missing)) :
          if (prefilter_callback(residue)) :
            partial_sidechains.append(residue)
  for residue in partial_sidechains :
    new_residue = extend_residue(residue=residue,
      ideal_dict=ideal_dict,
      hydrogens=hydrogens,
      mon_lib_srv=mon_lib_srv,
      match_conformation=True)
    if (modify_segids) :
      for atom in new_residue.atoms() :
        atom.segid = "XXXX"
    rg = residue.parent()
    rg.remove_atom_group(residue)
    rg.append_atom_group(new_residue.detached_copy())
  pdb_hierarchy.atoms().reset_i_seq()
  pdb_hierarchy.atoms().reset_serial()
  return len(partial_sidechains)