Example #1
0
def exercise_model_utils () :
  pdb_in = get_1yjp_pdb()
  residue = pdb_in.hierarchy.only_model().chains()[0].residue_groups()[0].only_atom_group()
  sele = pdb_in.hierarchy.atom_selection_cache().selection("resname TYR")
  water_sel = building.get_nearby_water_selection(
    pdb_hierarchy=pdb_in.hierarchy,
    xray_structure=pdb_in.input.xray_structure_simple(),
    selection=sele)
  assert (list(water_sel.iselection()) == [59, 60, 61, 62, 63])
  from mmtbx.monomer_library import idealized_aa
  from mmtbx.monomer_library import server
  mon_lib_srv = server.server()
  ideal_dict = idealized_aa.residue_dict()
  for resname, hierarchy in ideal_dict.iteritems() :
    residue = hierarchy.only_model().only_chain().only_residue_group().only_atom_group()
    result = building.generate_sidechain_clusters(residue, mon_lib_srv)
    if (len(result) == 0) :
      # no side-chain clusters for UNK as well
      assert (residue.resname in ["ALA", "GLY", "UNK"]), residue.resname
  # show_chain_resseq_ranges
  resids = [ (1,''),(2,''),(2,'A'),(4,''),(5,''),(6,''),(10,'B') ]
  import iotbx.pdb.hierarchy
  chain = iotbx.pdb.hierarchy.chain(id='A')
  for (resseq, icode) in resids :
    rg = iotbx.pdb.hierarchy.residue_group(resseq="%4d" % resseq, icode=icode)
    chain.append_residue_group(rg)
  out = StringIO()
  building.show_chain_resseq_ranges(chain.residue_groups(), out=out,
    prefix="  ")
  assert out.getvalue() == """  chain 'A': 1-2A,4-6,10B\n""", out.getvalue()
Example #2
0
 def __init__(self, target_map, pdb_hierarchy, xray_structure,
              geometry_restraints_manager, rotamer_eval, d_min):
     adopt_init_args(self, locals())
     from mmtbx.monomer_library import idealized_aa
     import mmtbx.monomer_library.server
     self.ideal_dict = idealized_aa.residue_dict()
     self.mon_lib_srv = mmtbx.monomer_library.server.server()
Example #3
0
def exercise_00():
    d = iaa.residue_dict()
    assert len(d) == 48
    for aac in iotbx.pdb.amino_acid_codes.one_letter_given_three_letter:
        assert aac.lower() in d
    #
    mon_lib_srv = monomer_library.server.server()
    ener_lib = monomer_library.server.ener_lib()
    for aac in iotbx.pdb.amino_acid_codes.one_letter_given_three_letter:
        aac = aac.lower()
        for aac_ in [aac, aac + "_h"]:
            residue_as_string = iaa.__dict__[aac_]
            rs = flex.std_string(residue_as_string.splitlines())
            processed_pdb_file = monomer_library.pdb_interpretation.process(
                mon_lib_srv=mon_lib_srv, ener_lib=ener_lib, raw_records=rs)
            sites_cart = processed_pdb_file.xray_structure().sites_cart()
            grm = processed_pdb_file.geometry_restraints_manager(
                show_energies=False, plain_pairs_radius=5.0)
            es = grm.energies_sites(sites_cart=sites_cart)
            b = es.bond_deviations()
            a = es.angle_deviations()
            b_z = es.bond_deviations_z()
            a_z = es.angle_deviations_z()
            print("%5s"%aac_, "bonds     : %5.3f %5.3f %5.3f"%b, \
              "angles     : %5.3f %5.3f %5.3f"%a)
            assert a[2] < 1.2, a[2]
            assert b[2] < 0.005, b[2]
            print("%5s"%aac_, "bonds rmsZ: %5.3f %5.3f %5.3f"%b_z, \
              "angles rmsZ: %5.3f %5.3f %5.3f"%a_z)
            assert a_z[2] < 0.7, a_z[2]
            assert b_z[2] < 0.7, b_z[2]
def exercise_00():
  d = iaa.residue_dict()
  assert len(d.keys()) == 44
  for aac in iotbx.pdb.amino_acid_codes.one_letter_given_three_letter:
    assert aac.lower() in d.keys()
  #
  mon_lib_srv = monomer_library.server.server()
  ener_lib    = monomer_library.server.ener_lib()
  for aac in iotbx.pdb.amino_acid_codes.one_letter_given_three_letter:
    aac = aac.lower()
    for aac_ in [aac, aac+"_h"]:
      residue_as_string = iaa.__dict__[aac_]
      rs = flex.std_string(residue_as_string.splitlines())
      processed_pdb_file = monomer_library.pdb_interpretation.process(
         mon_lib_srv = mon_lib_srv,
         ener_lib    = ener_lib,
         raw_records = rs)
      sites_cart = processed_pdb_file.xray_structure().sites_cart()
      grm = processed_pdb_file.geometry_restraints_manager(
        show_energies = False, plain_pairs_radius = 5.0)
      es = grm.energies_sites(
        sites_cart = sites_cart)
      b = es.bond_deviations()
      a = es.angle_deviations()
      b_z = es.bond_deviations_z()
      a_z = es.angle_deviations_z()
      print "%5s"%aac_, "bonds     : %5.3f %5.3f %5.3f"%b, \
        "angles     : %5.3f %5.3f %5.3f"%a
      assert a[2] < 1.2, a[2]
      assert b[2] < 0.005, b[2]
      print "%5s"%aac_, "bonds rmsZ: %5.3f %5.3f %5.3f"%b_z, \
        "angles rmsZ: %5.3f %5.3f %5.3f"%a_z
      assert a_z[2] < 0.7, a_z[2]
      assert b_z[2] < 0.7, b_z[2]
Example #5
0
def exercise_model_utils () :
  pdb_in = get_1yjp_pdb()
  residue = pdb_in.hierarchy.only_model().chains()[0].residue_groups()[0].only_atom_group()
  sele = pdb_in.hierarchy.atom_selection_cache().selection("resname TYR")
  water_sel = building.get_nearby_water_selection(
    pdb_hierarchy=pdb_in.hierarchy,
    xray_structure=pdb_in.input.xray_structure_simple(),
    selection=sele)
  assert (list(water_sel.iselection()) == [59, 60, 61, 62, 63])
  from mmtbx.monomer_library import idealized_aa
  from mmtbx.monomer_library import server
  mon_lib_srv = server.server()
  ideal_dict = idealized_aa.residue_dict()
  for resname, hierarchy in ideal_dict.iteritems() :
    residue = hierarchy.only_model().only_chain().only_residue_group().only_atom_group()
    result = building.generate_sidechain_clusters(residue, mon_lib_srv)
    if (len(result) == 0) :
      # no side-chain clusters for UNK as well
      assert (residue.resname in ["ALA", "GLY", "UNK"]), residue.resname
  # show_chain_resseq_ranges
  resids = [ (1,''),(2,''),(2,'A'),(4,''),(5,''),(6,''),(10,'B') ]
  import iotbx.pdb.hierarchy
  chain = iotbx.pdb.hierarchy.chain(id='A')
  for (resseq, icode) in resids :
    rg = iotbx.pdb.hierarchy.residue_group(resseq="%4d" % resseq, icode=icode)
    chain.append_residue_group(rg)
  out = StringIO()
  building.show_chain_resseq_ranges(chain.residue_groups(), out=out,
    prefix="  ")
  assert out.getvalue() == """  chain 'A': 1-2A,4-6,10B\n""", out.getvalue()
def extend_protein_model(pdb_hierarchy,
                         mon_lib_srv,
                         add_hydrogens=None,
                         selection=None):
    """
  Rebuild a sidechain by substituting an ideal amino acid and rotating the
  sidechain to match the old conformation as closely as possible.
  Limited functionality:
    1) Amino-acids only, 2) side chain atoms only.
    3) Not terminii aware
    4) Not aware of v2.3 vs v3.2 atom names e.g. HB1,HB2 vs HB2,HB3
  """
    from mmtbx.monomer_library import idealized_aa
    from mmtbx.rotamer import rotamer_eval
    from scitbx.array_family import flex
    ideal_dict = idealized_aa.residue_dict()
    pdb_atoms = pdb_hierarchy.atoms()
    if (selection is None):
        selection = flex.bool(pdb_atoms.size(), True)
    partial_sidechains = []
    for chain in pdb_hierarchy.only_model().chains():
        for residue_group in chain.residue_groups():
            for residue in residue_group.atom_groups():
                i_seqs = residue.atoms().extract_i_seq()
                residue_sel = selection.select(i_seqs)
                if (not residue.resname.lower() in ideal_dict.keys()): continue
                missing_atoms = rotamer_eval.eval_residue_completeness(
                    residue=residue,
                    mon_lib_srv=mon_lib_srv,
                    ignore_hydrogens=False)
                if (len(missing_atoms) > 0):
                    all_h = list(set([s.strip()[0] for s in missing_atoms
                                      ])) in [['H'], ['D'], ['T']]
                    if (add_hydrogens is False and all_h): continue
                    partial_sidechains.append(residue)
    for residue in partial_sidechains:
        residue_elements = [
            e.strip() for e in residue.atoms().extract_element()
        ]
        res_key = residue.resname.lower()
        if (add_hydrogens is None):
            if ("H" in residue_elements): res_key += "_h"
        if (add_hydrogens is True): res_key += "_h"
        target_atom_group = ideal_dict[res_key].only_model().only_chain().\
          only_residue_group().only_atom_group()
        new_residue = extend_residue(residue=residue,
                                     target_atom_group=target_atom_group,
                                     mon_lib_srv=mon_lib_srv)
        missing_atoms = rotamer_eval.eval_residue_completeness(
            residue=new_residue,
            mon_lib_srv=mon_lib_srv,
            ignore_hydrogens=False)
        #assert len(missing_atoms) == 0, missing_atoms
        rg = residue.parent()
        rg.remove_atom_group(residue)
        rg.append_atom_group(new_residue.detached_copy())
    pdb_hierarchy.atoms().reset_i_seq()
    pdb_hierarchy.atoms().reset_serial()
    return len(partial_sidechains)
Example #7
0
 def __init__ (self,
     target_map,
     pdb_hierarchy,
     xray_structure,
     geometry_restraints_manager,
     rotamer_eval,
     d_min) :
   adopt_init_args(self, locals())
   from mmtbx.monomer_library import idealized_aa
   import mmtbx.monomer_library.server
   self.ideal_dict = idealized_aa.residue_dict()
   self.mon_lib_srv = mmtbx.monomer_library.server.server()
def place_side_chains(hierarchy, original_h,
    rotamer_manager, placing_range):
  ideal_res_dict = idealized_aa.residue_dict()
  asc = original_h.atom_selection_cache()
  gly_atom_names = set([" N  ", " CA ", " C  ", " O  "])
  for rg in hierarchy.residue_groups():
    if rg.resseq in placing_range:
      # cut extra atoms
      ag = rg.only_atom_group()
      for atom in ag.atoms():
        if (atom.name not in gly_atom_names):
          ag.remove_atom(atom=atom)
      # get ag from original hierarchy
      orig_ag = original_h.select(asc.selection("resseq %d" % rg.resseq_as_int())
          ).models()[0].chains()[0].residue_groups()[0].atom_groups()[0]
      # get ideal
      ideal_ag = ideal_res_dict[ag.resname.lower()].models()[0].chains()[0].\
        residue_groups()[0].atom_groups()[0]
      # print "got to placement"
      side_chain_placement(ag, orig_ag, rotamer_manager)
Example #9
0
def place_side_chains(hierarchy, original_h, rotamer_manager, placing_range):
    ideal_res_dict = idealized_aa.residue_dict()
    asc = original_h.atom_selection_cache()
    gly_atom_names = set([" N  ", " CA ", " C  ", " O  "])
    for rg in hierarchy.residue_groups():
        if rg.resseq in placing_range:
            # cut extra atoms
            ag = rg.only_atom_group()
            for atom in ag.atoms():
                if (atom.name not in gly_atom_names):
                    ag.remove_atom(atom=atom)
            # get ag from original hierarchy
            orig_ag = original_h.select(
                asc.selection("resseq %d" % rg.resseq_as_int())).models(
                )[0].chains()[0].residue_groups()[0].atom_groups()[0]
            # get ideal
            # ideal_ag = ideal_res_dict[ag.resname.lower()].models()[0].chains()[0].\
            #   residue_groups()[0].atom_groups()[0]
            # print "got to placement"
            side_chain_placement(ag, orig_ag, rotamer_manager)
Example #10
0
def check_missing_atom(pdb_filename):
    pdb_inp = iotbx.pdb.input(file_name=pdb_filename)
    pdb_hierarchy = pdb_inp.construct_hierarchy()
    ideal_dict = idealized_aa.residue_dict()
    pdb_atoms = pdb_hierarchy.atoms()
    selection = flex.bool(pdb_atoms.size(), True)
    partial_sidechains = []
    for chain in pdb_hierarchy.only_model().chains():
        for residue_group in chain.residue_groups():
            if (residue_group.atom_groups_size() != 1): continue
            for residue in residue_group.atom_groups():
                i_seqs = residue.atoms().extract_i_seq()
                residue_sel = selection.select(i_seqs)
                if (not residue.resname.lower() in ideal_dict.keys()): continue
                missing_atoms = rotamer_eval.eval_residue_completeness(
                    residue=residue,
                    mon_lib_srv=mon_lib_server,
                    ignore_hydrogens=True)
                if (len(missing_atoms) > 0):
                    return True
    return False
  def __init__(self,
               pdb_hierarchy,
               params=None,
               secondary_structure_annotation=None,
               reference_map=None,
               crystal_symmetry=None,
               grm=None,
               rama_manager=None,
               rotamer_manager=None,
               log=null_out(),
               verbose=False,
               tried_rama_angles={},
               tried_final_rama_angles={},
               n_run=0):
    if len(pdb_hierarchy.models()) > 1:
      raise Sorry("Multi-model files are not supported")
    self.original_pdb_h = pdb_hierarchy
    self.secondary_structure_annotation=secondary_structure_annotation
    asc = pdb_hierarchy.atom_selection_cache()
    self.xrs = pdb_hierarchy.extract_xray_structure(crystal_symmetry=crystal_symmetry)
    self.reference_map = reference_map
    self.resulting_pdb_h = pdb_hierarchy.deep_copy()
    self.resulting_pdb_h.reset_atom_i_seqs()
    self.params = self.process_params(params)
    self.log = log
    self.verbose = verbose
    self.grm = grm
    self.r = rama_manager
    self.ideal_res_dict = idealized_aa.residue_dict()
    self.n_run = n_run
    if self.r is None:
      self.r = rama_eval()
    self.rotamer_manager = rotamer_manager
    if self.rotamer_manager is None:
      self.rotamer_manager = RotamerEval()
    ram = ramalyze.ramalyze(pdb_hierarchy=pdb_hierarchy)
    self.p_initial_rama_outliers = ram.out_percent
    self.p_before_minimization_rama_outliers = None
    self.p_after_minimiaztion_rama_outliers = None
    n_inputs = [reference_map, crystal_symmetry].count(None)
    if not (n_inputs == 0 or n_inputs == 2):
      print >> log, "Need to have both map and symmetry info. Not using map."
      self.reference_map = None

    # here we are recording what CCD solutions were used to fix particular
    # outliers to not use the same in the next CCD try.
    # Nested dict. First level:
    # key: chain id, value: dict
    #   key: resid (string), value: list of tried variants.
    self.tried_rama_angles = tried_rama_angles
    self.tried_final_rama_angles = tried_final_rama_angles

    berkeley_count = utils.list_rama_outliers_h(self.resulting_pdb_h).count("\n")
    self.berkeley_p_before_minimization_rama_outliers = \
        berkeley_count/float(self.resulting_pdb_h.overall_counts().n_residues)*100
    n_bad_omegas = utils.n_bad_omegas(self.resulting_pdb_h)

    self.berkeley_p_after_minimiaztion_rama_outliers = self.berkeley_p_before_minimization_rama_outliers
    self.ref_exclusion_selection = ""
    self.number_of_ccd_trials = 0
    # print "logic expr outcome:", (self.number_of_ccd_trials < 10 and self.berkeley_p_before_minimization_rama_outliers > 0.001)
    # print self.number_of_ccd_trials < 10
    # print "berkeley before rama out:", self.berkeley_p_before_minimization_rama_outliers
    if (self.berkeley_p_before_minimization_rama_outliers <= 0.001 and
        (n_bad_omegas<1 and self.params.make_all_trans)):
      print >> self.log, "No ramachandran outliers, skipping CCD step."
    print "n_bad_omegas", n_bad_omegas
    print "self.params.make_all_trans",self.params.make_all_trans
    if not self.params.enabled:
      print >> self.log, "Loop idealization is not enabled, use 'enabled=True'."
    while (self.number_of_ccd_trials < self.params.number_of_ccd_trials
        and (self.berkeley_p_after_minimiaztion_rama_outliers > 0.001 or
            (n_bad_omegas>=1 and self.params.make_all_trans))
        and self.params.enabled):
      print >> self.log, "CCD try number, outliers:", self.number_of_ccd_trials, self.berkeley_p_before_minimization_rama_outliers
      processed_chain_ids = []
      for chain in self.resulting_pdb_h.only_model().chains():
        if chain.id not in self.tried_rama_angles.keys():
          self.tried_rama_angles[chain.id] = {}
        if chain.id not in self.tried_final_rama_angles.keys():
          self.tried_final_rama_angles[chain.id] = {}
        print >> self.log, "Idealizing chain %s" % chain.id
        if chain.id not in processed_chain_ids:
          processed_chain_ids.append(chain.id)
        else:
          continue
        selection = "protein and chain %s and (name N or name CA or name C or name O)" % chain.id
        sel = asc.selection("chain %s" % chain.id)
        chain_h = self.resulting_pdb_h.select(sel)
        m = chain_h.only_model()
        i = 0
        cutted_chain_h = None
        for c in m.chains():
          if i == 0:
            cutted_chain_h = iotbx.pdb.hierarchy.new_hierarchy_from_chain(c)
          else:
            print >> self.log, "WARNING!!! Duplicating chain ids! Only the first chain will be processed."
            print >> self.log, "  Removing chain %s with %d residues" % (c.id, len(c.residues()))
            m.remove_chain(c)
          i += 1
        exclusions, ch_h = self.idealize_chain(
            hierarchy=(cutted_chain_h if cutted_chain_h else chain_h),
            tried_rama_angles_for_chain=self.tried_rama_angles[chain.id],
            tried_final_rama_angles_for_chain=self.tried_final_rama_angles[chain.id])
        if ch_h is not None:
          set_xyz_smart(
              # dest_h=self.resulting_pdb_h,
              dest_h=chain,
              source_h=ch_h)
          for resnum in exclusions:
            selection += " and not resseq %s" % resnum
        self.ref_exclusion_selection += "(%s) or " % selection
        print "self.tried_rama_angles", self.tried_rama_angles
        print "self.tried_final_rama_angles", self.tried_final_rama_angles
      #
      # dumping and reloading hierarchy to do proper rounding of coordinates
      self.resulting_pdb_h = iotbx.pdb.input(
          source_info=None,
          lines=self.resulting_pdb_h.as_pdb_string()).construct_hierarchy()
      berkeley_count = utils.list_rama_outliers_h(self.resulting_pdb_h).count("\n")
      self.berkeley_p_before_minimization_rama_outliers = \
          berkeley_count/float(self.resulting_pdb_h.overall_counts().n_residues)*100
      if len(self.ref_exclusion_selection) > 0:
        self.ref_exclusion_selection = self.ref_exclusion_selection[:-3]
      ram = ramalyze.ramalyze(pdb_hierarchy=self.resulting_pdb_h)
      self.p_before_minimization_rama_outliers = ram.out_percent

      duke_count = ram.get_outliers_count_and_fraction()[0]
      if berkeley_count != duke_count:
        print >> self.log, "Discrepancy between berkeley and duke after ccd:", berkeley_count, duke_count
        self.resulting_pdb_h.write_pdb_file(file_name="%d%s_discrepancy.pdb" % (self.number_of_ccd_trials, self.params.output_prefix))
      if self.params.debug:
        self.resulting_pdb_h.write_pdb_file(
            file_name="%d%s_all_not_minized.pdb" % (self.number_of_ccd_trials,
                self.params.output_prefix))
      if self.params.minimize_whole:
        print >> self.log, "minimizing whole chain..."
        print >> self.log, "self.ref_exclusion_selection", self.ref_exclusion_selection
        # print >> sel
        # XXX but first let's check and fix rotamers...
        print >> self.log, "Fixing/checking rotamers in loop idealization..."
        excl_sel = self.ref_exclusion_selection
        if len(excl_sel) == 0:
          excl_sel = None
        non_outliers_for_check = asc.selection("(%s)" % self.ref_exclusion_selection)
        pre_result_h = mmtbx.utils.fix_rotamer_outliers(
          pdb_hierarchy=self.resulting_pdb_h,
          grm=self.grm.geometry,
          xrs=self.xrs,
          map_data=self.reference_map,
          radius=5,
          mon_lib_srv=None,
          rotamer_manager=self.rotamer_manager,
          backrub_range=None, # don't sample backrub at this point
          non_outliers_to_check=non_outliers_for_check, # bool selection
          asc=asc,
          verbose=True,
          log=self.log)

        if self.reference_map is None:
          minimize_wrapper_for_ramachandran(
              hierarchy=self.resulting_pdb_h,
              xrs=self.xrs,
              original_pdb_h=self.original_pdb_h,
              excl_string_selection=self.ref_exclusion_selection,
              grm=self.grm,
              log=None,
              ss_annotation=self.secondary_structure_annotation)
        else:
          mwwm = minimize_wrapper_with_map(
              pdb_h=self.resulting_pdb_h,
              xrs=self.xrs,
              target_map=self.reference_map,
              grm=self.grm,
              ss_annotation=self.secondary_structure_annotation,
              number_of_cycles=Auto,
              log=self.log)
      if self.params.debug:
        self.resulting_pdb_h.write_pdb_file(
            file_name="%d%s_all_minized.pdb" % (self.number_of_ccd_trials,
                self.params.output_prefix))
      ram = ramalyze.ramalyze(pdb_hierarchy=self.resulting_pdb_h)
      self.p_after_minimiaztion_rama_outliers = ram.out_percent
      berkeley_count = utils.list_rama_outliers_h(self.resulting_pdb_h).count("\n")
      duke_count = ram.get_outliers_count_and_fraction()[0]
      n_bad_omegas = utils.n_bad_omegas(self.resulting_pdb_h)
      self.berkeley_p_after_minimiaztion_rama_outliers = \
          berkeley_count/float(self.resulting_pdb_h.overall_counts().n_residues)*100
      if berkeley_count != duke_count:
        print >> self.log, "Discrepancy between berkeley and duke after min:", berkeley_count, duke_count
      else:
        print >> self.log, "Number of Rama outliers after min:", berkeley_count
      print >> self.log, "Number of bad omegas:", n_bad_omegas
      self.number_of_ccd_trials += 1
Example #12
0
def secondary_structure_from_sequence(pdb_str,
      sequence=None,
      pdb_hierarchy_template=None,
      rotamer_manager=None):
  """ Return pdb.hierarchy with secondary structure according to sequence or
  reference hierarcy. If reference hierarchy provided, the resulting hierarchy
  will be rigid body aligned to it. Residue numbers will start from 1.

  pdb_str - "ideal" structure at least 2 residues long.
  sequence - string with sequence (one-letter codes)
  pdb_hierarchy_template - reference hierarchy.
  """
  if rotamer_manager is None:
    rotamer_manager = RotamerEval()
  pht = pdb_hierarchy_template
  assert [sequence, pht].count(None) == 1
  if pht is not None:
    lk = len(pht.altloc_indices().keys())
    if lk ==0:
      raise Sorry(
          "Hierarchy template in secondary_structure_from_sequence is empty")
    else:
      assert len(pht.altloc_indices().keys()) == 1, \
          "Alternative conformations are not supported"
  number_of_residues = len(sequence) if sequence!=None else \
    len(pht.models()[0].chains()[0].conformers()[0].residues())
  if number_of_residues<1:
    raise Sorry('sequence should contain at least one residue.')
  ideal_res_dict = idealized_aa.residue_dict()
  real_res_list = None
  if pht:
    real_res_list = pht.models()[0].chains()[0].residue_groups()
  pdb_hierarchy = iotbx.pdb.input(source_info=None, lines=pdb_str).\
      construct_hierarchy()
  truncate_to_poly_gly(pdb_hierarchy)
  chain = pdb_hierarchy.models()[0].chains()[0]
  current_gly_ag = chain.residue_groups()[0].atom_groups()[0]
  new_chain = iotbx.pdb.hierarchy.chain(id="A")
  new_chain.pre_allocate_residue_groups(number_of_additional_residue_groups=\
                                                            number_of_residues)
  r, t = get_r_t_matrices_from_structure(pdb_str)
  for j in range(number_of_residues):
    # put ALA
    rg = iotbx.pdb.hierarchy.residue_group(icode="")
    rg.resseq = j+1
    new_chain.append_residue_group(residue_group=rg)
    ag_to_place = current_gly_ag.detached_copy()
    rg.append_atom_group(atom_group=ag_to_place)
    current_gly_ag.atoms().set_xyz(
                          r.elems*current_gly_ag.atoms().extract_xyz()+t.elems)
    current_reference_ag = real_res_list[j].atom_groups()[0] if pht else \
        ideal_res_dict[three_one[sequence[j]].lower()].models()[0].chains()[0].\
        residue_groups()[0].atom_groups()[0]
    side_chain_placement(ag_to_place, current_reference_ag, rotamer_manager)
  new_pdb_h = iotbx.pdb.hierarchy.new_hierarchy_from_chain(new_chain)
  # align to real
  if pht != None:
    fixed_sites, moving_sites = get_matching_sites_cart_in_both_h(pht, new_pdb_h)
    assert len(fixed_sites) == len(moving_sites)
    lsq_fit_obj = superpose.least_squares_fit(reference_sites = fixed_sites,
                                              other_sites = moving_sites)
    new_pdb_h.atoms().set_xyz(
        lsq_fit_obj.r.elems*new_pdb_h.atoms().extract_xyz()+lsq_fit_obj.t.elems)
  return new_pdb_h
def correct_sequence (pdb_hierarchy,
    sequences,
    truncate_to_cbeta=False,
    out=sys.stdout) :
  """
  Modify the sequence for the pdb hierarchy to match that of the aligned
  sequence.  This will remove incompatible atoms; the sidechains will still
  need to be extended separated.  For proteins only - mismatches in nucleic
  acids will only result in a warning.

  :param pdb_hierarchy: iotbx.pdb.hierarchy.root object
  :param sequences: list of iotbx.bioinformatics.sequence objects
  :param trucate_to_cbeta: chop off entire sidechain to C-beta (default: leave
                           common atoms in place)
  :param out: output filehandle (default = stdout)
  :returns: number of atom_group objects renamed
  """
  from mmtbx.monomer_library import idealized_aa
  import mmtbx.validation.sequence
  from iotbx.pdb.amino_acid_codes import three_letter_given_one_letter
  seq_validation = mmtbx.validation.sequence.validation(
    pdb_hierarchy=pdb_hierarchy,
    sequences=sequences,
    log=out)
  for chain_seq in seq_validation.chains :
    if (chain_seq.chain_type == mmtbx.validation.sequence.NUCLEIC_ACID) :
      if (len(chain_seq.mismatch) > 0) :
        print >> out, \
          "  WARNING: will skip %d mismatches in nucleic acid chain '%s'" % \
          chain_seq.chain_id
  res_dict = idealized_aa.residue_dict()
  expected_names = {}
  for resname in res_dict.keys() :
    if (not "_h" in resname) :
      ideal_res = res_dict[resname]
      expected_names[resname] = set([ a.name for a in ideal_res.atoms() ])
  n_changed = 0
  for chain in pdb_hierarchy.only_model().chains() :
    if (not chain.is_protein()) :
      continue
    for chain_seq in seq_validation.chains :
      if (chain.id == chain_seq.chain_id) and (len(chain_seq.mismatch) > 0) :
        for residue_group in chain.residue_groups() :
          resid = residue_group.resid()
          if (resid in chain_seq.mismatch) :
            idx = chain_seq.mismatch.index(resid)
            new_code = chain_seq.actual_code[idx]
            new_resname = three_letter_given_one_letter.get(new_code)
            if (new_resname is not None) :
              expected_atoms = expected_names[new_resname.lower()]
              if (truncate_to_cbeta) :
                expected_atoms = expected_names["ala"]
              for atom_group in residue_group.atom_groups() :
                n_changed += 1
                n_removed = 0
                atom_group.resname = new_resname
                for atom in atom_group.atoms() :
                  if (not atom.name in expected_atoms) :
                    atom_group.remove_atom(atom)
                    n_removed += 1
              print >> out, "  chain '%s' %s %s --> %s (%d atoms removed)" % \
                (chain.id, resid, residue_group.atom_groups()[0].resname,
                 new_resname, n_removed)
  pdb_hierarchy.atoms().reset_i_seq()
  return n_changed
Example #14
0
def extend_protein_model(pdb_hierarchy,
                         selection=None,
                         hydrogens=Auto,
                         max_atoms_missing=None,
                         log=None,
                         modify_segids=True,
                         prefilter_callback=None,
                         idealized_residue_dict=None,
                         skip_non_protein_chains=True):
    """
  Replace all sidechains with missing non-hydrogen atoms in a PDB hierarchy.
  """
    from mmtbx.monomer_library import idealized_aa
    from mmtbx.rotamer import rotamer_eval
    import mmtbx.monomer_library.server
    from iotbx.pdb import common_residue_names_get_class
    from scitbx.array_family import flex
    if (prefilter_callback is not None):
        assert hasattr(prefilter_callback, "__call__")
    else:
        prefilter_callback = lambda r: True
    ideal_dict = idealized_residue_dict
    if (ideal_dict is None):
        ideal_dict = idealized_aa.residue_dict()
    if (log is None): log = null_out()
    mon_lib_srv = mmtbx.monomer_library.server.server()
    pdb_atoms = pdb_hierarchy.atoms()
    if (selection is None):
        selection = flex.bool(pdb_atoms.size(), True)
    partial_sidechains = []
    for chain in pdb_hierarchy.only_model().chains():
        if (not chain.is_protein()) and (skip_non_protein_chains):
            print >> log, "    skipping non-protein chain '%s'" % chain.id
            continue
        for residue_group in chain.residue_groups():
            atom_groups = residue_group.atom_groups()
            if (len(atom_groups) > 1):
                print >> log, "    %s %s has multiple conformations, skipping" % \
                  (chain.id, residue_group.resid())
                continue
            residue = atom_groups[0]
            i_seqs = residue.atoms().extract_i_seq()
            residue_sel = selection.select(i_seqs)
            if (not residue_sel.all_eq(True)):
                continue
            if (idealized_residue_dict is None):
                res_class = common_residue_names_get_class(residue.resname)
                if (res_class != "common_amino_acid"):
                    print >> log, "    skipping non-standard residue %s" % residue.resname
                    continue
            else:
                key = residue.resname.lower()
                if (hydrogens == True):
                    key = key + "_h"
                if (not key in idealized_residue_dict.keys()):
                    pass
            missing_atoms = rotamer_eval.eval_residue_completeness(
                residue=residue,
                mon_lib_srv=mon_lib_srv,
                ignore_hydrogens=True)
            if (len(missing_atoms) > 0):
                print >> log, "    missing %d atoms in %s: %s" % (len(
                    missing_atoms), residue.id_str(), ",".join(missing_atoms))
                if ((max_atoms_missing is None)
                        or (len(missing_atoms) < max_atoms_missing)):
                    if (prefilter_callback(residue)):
                        partial_sidechains.append(residue)
    for residue in partial_sidechains:
        new_residue = extend_residue(residue=residue,
                                     ideal_dict=ideal_dict,
                                     hydrogens=hydrogens,
                                     mon_lib_srv=mon_lib_srv,
                                     match_conformation=True)
        if (modify_segids):
            for atom in new_residue.atoms():
                atom.segid = "XXXX"
        rg = residue.parent()
        rg.remove_atom_group(residue)
        rg.append_atom_group(new_residue.detached_copy())
    pdb_hierarchy.atoms().reset_i_seq()
    pdb_hierarchy.atoms().reset_serial()
    return len(partial_sidechains)
def correct_sequence(pdb_hierarchy,
                     sequences,
                     truncate_to_cbeta=False,
                     out=sys.stdout):
    """
  Modify the sequence for the pdb hierarchy to match that of the aligned
  sequence.  This will remove incompatible atoms; the sidechains will still
  need to be extended separated.  For proteins only - mismatches in nucleic
  acids will only result in a warning.

  :param pdb_hierarchy: iotbx.pdb.hierarchy.root object
  :param sequences: list of iotbx.bioinformatics.sequence objects
  :param trucate_to_cbeta: chop off entire sidechain to C-beta (default: leave
                           common atoms in place)
  :param out: output filehandle (default = stdout)
  :returns: number of atom_group objects renamed
  """
    from mmtbx.monomer_library import idealized_aa
    import mmtbx.validation.sequence
    from iotbx.pdb.amino_acid_codes import three_letter_given_one_letter
    seq_validation = mmtbx.validation.sequence.validation(
        pdb_hierarchy=pdb_hierarchy, sequences=sequences, log=out)
    for chain_seq in seq_validation.chains:
        if (chain_seq.chain_type == mmtbx.validation.sequence.NUCLEIC_ACID):
            if (len(chain_seq.mismatch) > 0):
                print("  WARNING: will skip %d mismatches in nucleic acid chain '%s'" % \
                  chain_seq.chain_id, file=out)
    res_dict = idealized_aa.residue_dict()
    expected_names = {}
    for resname in res_dict.keys():
        if (not "_h" in resname):
            ideal_res = res_dict[resname]
            expected_names[resname] = set([a.name for a in ideal_res.atoms()])
    n_changed = 0
    for chain in pdb_hierarchy.only_model().chains():
        if (not chain.is_protein()):
            continue
        for chain_seq in seq_validation.chains:
            if (chain.id
                    == chain_seq.chain_id) and (len(chain_seq.mismatch) > 0):
                for residue_group in chain.residue_groups():
                    resid = residue_group.resid()
                    if (resid in chain_seq.mismatch):
                        idx = chain_seq.mismatch.index(resid)
                        new_code = chain_seq.actual_code[idx]
                        new_resname = three_letter_given_one_letter.get(
                            new_code)
                        if (new_resname is not None):
                            expected_atoms = expected_names[
                                new_resname.lower()]
                            if (truncate_to_cbeta):
                                expected_atoms = expected_names["ala"]
                            for atom_group in residue_group.atom_groups():
                                n_changed += 1
                                n_removed = 0
                                atom_group.resname = new_resname
                                for atom in atom_group.atoms():
                                    if (not atom.name in expected_atoms):
                                        atom_group.remove_atom(atom)
                                        n_removed += 1
                            print("  chain '%s' %s %s --> %s (%d atoms removed)" % \
                              (chain.id, resid, residue_group.atom_groups()[0].resname,
                               new_resname, n_removed), file=out)
    pdb_hierarchy.atoms().reset_i_seq()
    return n_changed
Example #16
0
def secondary_structure_from_sequence(pdb_str,
      sequence=None,
      pdb_hierarchy_template=None,
      rotamer_manager=None):
  """ Return pdb.hierarchy with secondary structure according to sequence or
  reference hierarcy. If reference hierarchy provided, the resulting hierarchy
  will be rigid body aligned to it. Residue numbers will start from 1.

  pdb_str - "ideal" structure at least 2 residues long.
  sequence - string with sequence (one-letter codes)
  pdb_hierarchy_template - reference hierarchy.
  """
  if rotamer_manager is None:
    rotamer_manager = RotamerEval()
  pht = pdb_hierarchy_template
  assert [sequence, pht].count(None) == 1
  if pht is not None:
    lk = len(pht.altloc_indices().keys())
    if lk ==0:
      raise Sorry(
          "Hierarchy template in secondary_structure_from_sequence is empty")
    else:
      assert len(pht.altloc_indices().keys()) == 1, \
          "Alternative conformations are not supported"
  number_of_residues = len(sequence) if sequence!=None else \
    len(pht.models()[0].chains()[0].conformers()[0].residues())
  if number_of_residues<1:
    raise Sorry('sequence should contain at least one residue.')
  ideal_res_dict = idealized_aa.residue_dict()
  real_res_list = None
  if pht:
    real_res_list = pht.models()[0].chains()[0].residue_groups()
  pdb_hierarchy = iotbx.pdb.input(source_info=None, lines=pdb_str).\
      construct_hierarchy()
  truncate_to_poly_gly(pdb_hierarchy)
  chain = pdb_hierarchy.models()[0].chains()[0]
  current_gly_ag = chain.residue_groups()[0].atom_groups()[0]
  new_chain = iotbx.pdb.hierarchy.chain(id="A")
  new_chain.pre_allocate_residue_groups(number_of_additional_residue_groups=\
                                                            number_of_residues)
  r, t = get_r_t_matrices_from_structure(pdb_str)
  for j in range(number_of_residues):
    # put ALA
    rg = iotbx.pdb.hierarchy.residue_group(icode="")
    rg.resseq = j+1
    new_chain.append_residue_group(residue_group=rg)
    ag_to_place = current_gly_ag.detached_copy()
    rg.append_atom_group(atom_group=ag_to_place)
    current_gly_ag.atoms().set_xyz(
                          r.elems*current_gly_ag.atoms().extract_xyz()+t.elems)
    current_reference_ag = real_res_list[j].atom_groups()[0] if pht else \
        ideal_res_dict[three_one[sequence[j]].lower()].models()[0].chains()[0].\
        residue_groups()[0].atom_groups()[0]
    side_chain_placement(ag_to_place, current_reference_ag, rotamer_manager)
  new_pdb_h = iotbx.pdb.hierarchy.new_hierarchy_from_chain(new_chain)
  # align to real
  if pht != None:
    fixed_sites, moving_sites = get_matching_sites_cart_in_both_h(pht, new_pdb_h)
    assert len(fixed_sites) == len(moving_sites)
    lsq_fit_obj = superpose.least_squares_fit(reference_sites = fixed_sites,
                                              other_sites = moving_sites)
    new_pdb_h.atoms().set_xyz(
        lsq_fit_obj.r.elems*new_pdb_h.atoms().extract_xyz()+lsq_fit_obj.t.elems)
  return new_pdb_h
def extend_protein_model (pdb_hierarchy,
    selection=None,
    hydrogens=Auto,
    max_atoms_missing=None,
    log=None,
    modify_segids=True,
    prefilter_callback=None,
    idealized_residue_dict=None,
    skip_non_protein_chains=True) :
  """
  Replace all sidechains with missing non-hydrogen atoms in a PDB hierarchy.
  """
  from mmtbx.monomer_library import idealized_aa
  from mmtbx.rotamer import rotamer_eval
  import mmtbx.monomer_library.server
  from iotbx.pdb import common_residue_names_get_class
  from scitbx.array_family import flex
  if (prefilter_callback is not None) :
    assert hasattr(prefilter_callback, "__call__")
  else :
    prefilter_callback = lambda r: True
  ideal_dict = idealized_residue_dict
  if (ideal_dict is None) :
    ideal_dict = idealized_aa.residue_dict()
  if (log is None) : log = null_out()
  mon_lib_srv = mmtbx.monomer_library.server.server()
  pdb_atoms = pdb_hierarchy.atoms()
  if (selection is None) :
    selection = flex.bool(pdb_atoms.size(), True)
  partial_sidechains = []
  for chain in pdb_hierarchy.only_model().chains() :
    if (not chain.is_protein()) and (skip_non_protein_chains) :
      print >> log, "    skipping non-protein chain '%s'" % chain.id
      continue
    for residue_group in chain.residue_groups() :
      atom_groups = residue_group.atom_groups()
      if (len(atom_groups) > 1) :
        print >> log, "    %s %s has multiple conformations, skipping" % \
          (chain.id, residue_group.resid())
        continue
      residue = atom_groups[0]
      i_seqs = residue.atoms().extract_i_seq()
      residue_sel = selection.select(i_seqs)
      if (not residue_sel.all_eq(True)) :
        continue
      if (idealized_residue_dict is None) :
        res_class = common_residue_names_get_class(residue.resname)
        if (res_class != "common_amino_acid") :
          print >> log, "    skipping non-standard residue %s" % residue.resname
          continue
      else :
        key = residue.resname.lower()
        if (hydrogens == True) :
          key = key + "_h"
        if (not key in idealized_residue_dict.keys()) :
          pass
      missing_atoms = rotamer_eval.eval_residue_completeness(
        residue=residue,
        mon_lib_srv=mon_lib_srv,
        ignore_hydrogens=True)
      if (len(missing_atoms) > 0) :
        print >> log, "    missing %d atoms in %s: %s" % (len(missing_atoms),
          residue.id_str(), ",".join(missing_atoms))
        if ((max_atoms_missing is None) or
            (len(missing_atoms) < max_atoms_missing)) :
          if (prefilter_callback(residue)) :
            partial_sidechains.append(residue)
  for residue in partial_sidechains :
    new_residue = extend_residue(residue=residue,
      ideal_dict=ideal_dict,
      hydrogens=hydrogens,
      mon_lib_srv=mon_lib_srv,
      match_conformation=True)
    if (modify_segids) :
      for atom in new_residue.atoms() :
        atom.segid = "XXXX"
    rg = residue.parent()
    rg.remove_atom_group(residue)
    rg.append_atom_group(new_residue.detached_copy())
  pdb_hierarchy.atoms().reset_i_seq()
  pdb_hierarchy.atoms().reset_serial()
  return len(partial_sidechains)