def validate_residues(self):
     from mmtbx.conformation_dependent_library import generate_protein_threes
     from mmtbx.rotamer import ramachandran_eval, rotamer_eval
     # this is so we generate rama_eval only once
     rama_eval = ramachandran_eval.RamachandranEval()
     rota_eval = rotamer_eval.RotamerEval()
     rotamer_id = rotamer_eval.RotamerID()  # loads in the rotamer names
     threes = generate_protein_threes(hierarchy=self.pdb_hierarchy,
                                      include_non_linked=True,
                                      backbone_only=False,
                                      geometry=None)
     for i, three in enumerate(threes):
         if i == 0:
             self.residues.append(
                 ValidationResidue(three,
                                   rama_eval,
                                   rota_eval,
                                   rotamer_id,
                                   index=0))
         self.residues.append(
             ValidationResidue(three, rama_eval, rota_eval, rotamer_id))
         if three.end:
             self.residues.append(
                 ValidationResidue(three,
                                   rama_eval,
                                   rota_eval,
                                   rotamer_id,
                                   index=2))
def exercise_phi_psi_extraction():
  for n_prox, raw_records in [
      ([0, 0], rec_1_residue),
      ([0, 0], rec_2_residues),
      ([4, 2], rec_3_residues),
      ([6, 4], rec_4_residues),
      ([0, 0], rec_2_chains),
      ([0, 0], rec_2_segids),
      ([8, 4], rec_2_acs_edge),
      ([8, 4], rec_2_acs_middle),
      ([6, 4], rec_4_residues_isertions),
      ([12, 10], pdb_1yjp),
      ([8, 4], pdb_1yjp_minus_4),
      ([4, 2], rec_3_res_ac_h),
      ([8, 4], rec_2_acs_middle_one_atom_1),
      ([8, 4], rec_2_acs_middle_one_atom_2),
      ([8, 4], rec_2_acs_middle_one_atom_3),
      ]:
    tmp_hierarchy = iotbx.pdb.input(
      source_info=None,
      lines=flex.split_lines(raw_records)).construct_hierarchy()
    for opp in range(2):
      proxies = []
      for three in generate_protein_threes(
          hierarchy=tmp_hierarchy,
          geometry=None):
        ppp = three.get_dummy_dihedral_proxies(only_psi_phi_pairs=opp)
        print three,'ppp',len(ppp)
        proxies.extend(ppp)
      print len(proxies), n_prox
      assert len(proxies) == n_prox[opp], \
         "Expected %d, got %d" % (
           n_prox[opp],
           len(proxies),
           )
Exemple #3
0
def update_restraints(
    hierarchy,
    geometry,  # restraints_manager,
    current_geometry=None,  # xray_structure!!
    sites_cart=None,
    cdl_proxies=None,
    ideal=True,
    esd=True,
    esd_factor=1.0,
    log=None,
    verbose=False,
):
    global registry
    registry = RestraintsRegistry()
    if current_geometry:
        assert not sites_cart
        sites_cart = current_geometry.sites_cart()
    if sites_cart:
        pdb_atoms = hierarchy.atoms()
        # XXX PDB_TRANSITION VERY SLOW
        for j_seq, atom in enumerate(pdb_atoms):
            atom.xyz = sites_cart[j_seq]

    threes = None
    average_updates = 0
    total_updates = 0
    for threes in generate_protein_threes(
        hierarchy,
        geometry,
        omega_cdl=True,
        # verbose=verbose,
    ):
        threes.apply_updates = apply_updates
        if threes.cis_group():
            if verbose and 0:
                print "cis " * 20
                print threes
            continue

        restraint_values = get_restraint_values(threes)
        if restraint_values is None:
            continue
        if restraint_values[0] == "I":
            average_updates += 1
        else:
            total_updates += 1
        threes.apply_updates(threes, restraint_values, cdl_proxies, ideal=ideal, esd=esd, esd_factor=esd_factor)
    if registry.n:
        threes.apply_average_updates(registry)
        assert 0
    geometry.reset_internals()
    if verbose and threes and threes.errors:
        if log:
            log.write("  Residues not completely updated with CDL restraints\n\n")
        for line in threes.errors:
            if log:
                log.write("%s\n" % line)
            else:
                print line
    return geometry
def exercise_phi_psi_extraction():
    for n_prox, raw_records in [
        ([0, 0], rec_1_residue),
        ([0, 0], rec_2_residues),
        ([4, 2], rec_3_residues),
        ([6, 4], rec_4_residues),
        ([0, 0], rec_2_chains),
        ([0, 0], rec_2_segids),
        ([8, 4], rec_2_acs_edge),
        ([8, 4], rec_2_acs_middle),
        ([6, 4], rec_4_residues_isertions),
        ([12, 10], pdb_1yjp),
        ([8, 4], pdb_1yjp_minus_4),
        ([4, 2], rec_3_res_ac_h),
        ([8, 4], rec_2_acs_middle_one_atom_1),
        ([8, 4], rec_2_acs_middle_one_atom_2),
        ([8, 4], rec_2_acs_middle_one_atom_3),
    ]:
        tmp_hierarchy = iotbx.pdb.input(
            source_info=None,
            lines=flex.split_lines(raw_records)).construct_hierarchy()
        for opp in range(2):
            proxies = []
            for three in generate_protein_threes(hierarchy=tmp_hierarchy,
                                                 geometry=None):
                ppp = three.get_dummy_dihedral_proxies(only_psi_phi_pairs=opp)
                print(three, 'ppp', len(ppp))
                proxies.extend(ppp)
            print(len(proxies), n_prox)
            assert len(proxies) == n_prox[opp], \
               "Expected %d, got %d" % (
                 n_prox[opp],
                 len(proxies),
                 )
 def extract_proxies(self):
   self.proxies = ext.shared_phi_psi_proxy()
   from mmtbx.conformation_dependent_library import generate_protein_threes
   selected_h = self.pdb_hierarchy.select(self.bool_atom_selection)
   n_seq = flex.max(selected_h.atoms().extract_i_seq())
   for three in generate_protein_threes(
       hierarchy=selected_h,
       geometry=None):
     rc = three.get_phi_psi_atoms()
     if rc is None: continue
     phi_atoms, psi_atoms = rc
     rama_key = three.get_ramalyze_key()
     i_seqs = [atom.i_seq for atom in phi_atoms] + [psi_atoms[-1].i_seq]
     resnames = three.get_resnames()
     r_name = resnames[1]
     assert rama_key in range(6)
     text_rama_key = ramalyze.res_types[rama_key]
     assert text_rama_key in ["general", "glycine", "cis-proline", "trans-proline",
                "pre-proline", "isoleucine or valine"]
     proxy = ext.phi_psi_proxy(
         residue_name=r_name,
         residue_type=text_rama_key,
         i_seqs=i_seqs)
     if not is_proxy_present(self.proxies, n_seq, proxy):
       self.proxies.append(proxy)
   print >> self.log, ""
   print >> self.log, "  %d Ramachandran restraints generated." % (
       self.get_n_proxies())
 def extract_proxies(self, log):
     self.proxies = ext.shared_phi_psi_proxy()
     from mmtbx.conformation_dependent_library import generate_protein_threes
     selected_h = self.pdb_hierarchy.select(self.bool_atom_selection)
     n_seq = flex.max(selected_h.atoms().extract_i_seq())
     for three in generate_protein_threes(hierarchy=selected_h,
                                          geometry=None):
         rc = three.get_phi_psi_atoms()
         if rc is None: continue
         phi_atoms, psi_atoms = rc
         rama_key = three.get_ramalyze_key()
         i_seqs = [atom.i_seq for atom in phi_atoms] + [psi_atoms[-1].i_seq]
         resnames = three.get_resnames()
         r_name = resnames[1]
         assert rama_key in range(6)
         text_rama_key = ramalyze.res_types[rama_key]
         assert text_rama_key in [
             "general", "glycine", "cis-proline", "trans-proline",
             "pre-proline", "isoleucine or valine"
         ]
         proxy = ext.phi_psi_proxy(residue_name=r_name,
                                   residue_type=text_rama_key,
                                   i_seqs=i_seqs)
         if not is_proxy_present(self.proxies, n_seq, proxy):
             self.proxies.append(proxy)
     print >> log, ""
     print >> log, "  %d Ramachandran restraints generated." % (
         self.get_n_proxies())
Exemple #7
0
def run():
    filename = 'tst_multi.pdb'
    f = open(filename, 'w')
    f.write(pdb_lines)
    f.close()
    pdb_inp = pdb.input(filename)
    pdb_hierarchy = pdb_inp.construct_hierarchy()
    from mmtbx.conformation_dependent_library.tst_rdl import \
        get_geometry_restraints_manager
    geometry_restraints_manager = get_geometry_restraints_manager(filename)
    pdb_hierarchy.reset_i_seq_if_necessary()
    refine = [
        False,  # -179
        True,  # -44
        False,  # 86
        True,  # -22
        False,  # -179
    ]
    refine += [True] * 5
    refine += [False] * 14
    omegalyze = [
        False,
        False,
        False,
        True,
        False,
    ]
    omegalyze += [True] * 3
    omegalyze += [False] * 16
    from mmtbx.conformation_dependent_library import generate_protein_threes
    for i, threes in enumerate(
            generate_protein_threes(
                pdb_hierarchy,
                geometry_restraints_manager,
                cdl_class=True,
                #verbose=verbose,
            )):
        print(i, threes)
        print('  omega   %5.1f' % threes.get_omega_value())
        print("  cis?    %-5s %s" %
              (threes.cis_group(), threes.cis_group(limit=30)))
        print("  trans?  %-5s %s" %
              (threes.trans_group(), threes.trans_group(limit=30)))
        print("  rama    %s" % threes.get_ramalyze_key())
        print('  conf    %s' % threes.is_pure_main_conf())
        assert threes.cis_group() == refine[i], '%s!=%s' % (threes.cis_group(),
                                                            refine[i])
        assert threes.cis_group(limit=30) == omegalyze[i]

    for j in range(0, 181, 10):
        i += 1
        print("  %3d %-5s %-8s %-5s" % (
            j,
            threes._define_omega_a_la_duke_using_limit(j) == 'cis',
            threes._define_omega_a_la_duke_using_limit(j, limit=30),
            refine[i],
        ))
        assert (threes._define_omega_a_la_duke_using_limit(j) == 'cis'
                ) == refine[i]
 def extract_proxies(self, hierarchy):
     self.hierarchy = hierarchy
     selected_h = hierarchy.select(self.bool_atom_selection)
     n_seq = flex.max(selected_h.atoms().extract_i_seq())
     # Drop all previous proxies
     self._oldfield_proxies = ext.shared_phi_psi_proxy()
     self._emsley_proxies = ext.shared_phi_psi_proxy()
     # it would be great to save rama_eval, but the fact that this is called in
     # pdb_interpretation, not in mmtbx.model makes it impossible
     if self.need_filtering:
         self.rama_eval = rama_eval()
     for three in generate_protein_threes(hierarchy=selected_h,
                                          geometry=None):
         rc = three.get_phi_psi_atoms()
         if rc is None: continue
         rama_key = three.get_ramalyze_key()
         if self.need_filtering:
             angles = three.get_phi_psi_angles()
             rama_score = self.rama_eval.get_score(rama_key, angles[0],
                                                   angles[1])
             r_evaluation = self.rama_eval.evaluate_score(
                 rama_key, rama_score)
         phi_atoms, psi_atoms = rc
         i_seqs = [atom.i_seq for atom in phi_atoms] + [psi_atoms[-1].i_seq]
         resnames = three.get_resnames()
         r_name = resnames[1]
         assert rama_key in range(6)
         text_rama_key = ramalyze.res_types[rama_key]
         assert text_rama_key in [
             "general", "glycine", "cis-proline", "trans-proline",
             "pre-proline", "isoleucine or valine"
         ]
         proxy = ext.phi_psi_proxy(residue_name=r_name,
                                   residue_type=text_rama_key,
                                   i_seqs=i_seqs)
         # pick where to put...
         if self.params.rama_potential == "oldfield":
             if self.need_filtering:
                 if r_evaluation == ramalyze.RAMALYZE_FAVORED:
                     self.append_oldfield_proxies(proxy, n_seq)
                 elif r_evaluation == ramalyze.RAMALYZE_ALLOWED and self.params.restrain_rama_allowed:
                     self.append_oldfield_proxies(proxy, n_seq)
                 elif r_evaluation == ramalyze.RAMALYZE_OUTLIER and self.params.restrain_rama_outliers:
                     self.append_oldfield_proxies(proxy, n_seq)
                 elif self.params.restrain_allowed_outliers_with_emsley:
                     self.append_emsley_proxies(proxy, n_seq)
             else:
                 self.append_oldfield_proxies(proxy, n_seq)
         else:  # self.params.rama_potential == "emsley":
             self.append_emsley_proxies(proxy, n_seq)
     print("", file=self.log)
     print("  %d Ramachandran restraints generated." %
           (self.get_n_proxies()),
           file=self.log)
     print("    %d Oldfield and %d Emsley." %
           (self.get_n_oldfield_proxies(), self.get_n_emsley_proxies()),
           file=self.log)
Exemple #9
0
def get_phi_psi_atoms(hierarchy):
  phi_psi_atoms = []
  for three in generate_protein_threes(
        hierarchy=hierarchy,
        geometry=None):
    phi_atoms, psi_atoms = three.get_phi_psi_atoms()
    rama_key = three.get_ramalyze_key()
    # print "rama_key", rama_key
    phi_psi_atoms.append(([phi_atoms, psi_atoms],rama_key))
  return phi_psi_atoms
Exemple #10
0
def get_phi_psi_atoms(hierarchy):
    phi_psi_atoms = []
    for three in generate_protein_threes(hierarchy=hierarchy, geometry=None):
        psatoms = three.get_phi_psi_atoms()
        if psatoms is not None:
            phi_atoms, psi_atoms = psatoms
        else:
            phi_atoms, psi_atoms = None, None
        rama_key = three.get_ramalyze_key()
        # print "rama_key", rama_key
        phi_psi_atoms.append(([phi_atoms, psi_atoms], rama_key))
    return phi_psi_atoms
def test_phi_psi_key(hierarchy,
                     filename,
                     restraints_manager,
                     ):
  for i, threes in enumerate(cdl.generate_protein_threes(
    hierarchy,
    #restraints_manager=restraints_manager
    geometry=restraints_manager.geometry,
    )
                             ):
    key = threes.get_cdl_key(force_plus_one=True)
    print key, filenames[filename][1]
    assert key == filenames[filename][1][i]
def test_phi_psi_key(hierarchy,
                     filename,
                     restraints_manager,
                     ):
  for i, threes in enumerate(cdl.generate_protein_threes(
    hierarchy,
    #restraints_manager=restraints_manager
    geometry=restraints_manager.geometry,
    )
                             ):
    key = threes.get_cdl_key(force_plus_one=True)
    print key, filenames[filename][1]
    assert key == filenames[filename][1][i]
def add_main_chain_atoms(
    hierarchy,
    geometry_restraints_manager,
    verbose=False,
):
    from mmtbx.conformation_dependent_library import generate_protein_threes
    for three in generate_protein_threes(
            hierarchy,
            geometry_restraints_manager,
            verbose=verbose,
    ):
        print(three)
        add_main_chain_atoms_to_protein_three(three)
    assert 0
Exemple #14
0
def get_dihedrals_and_phi_psi(model):
    from cctbx.geometry_restraints import dihedral_proxy_registry
    dihedral_registry = dihedral_proxy_registry(strict_conflict_handling=True)
    dihedral_registry.initialize_table()
    from mmtbx.conformation_dependent_library import generate_protein_threes
    grm = model._processed_pdb_file.geometry_restraints_manager()
    dihedral_proxies = grm.get_dihedral_proxies().deep_copy()
    for p in dihedral_proxies:
        dihedral_registry.add_if_not_duplicated(p)
    for three in generate_protein_threes(hierarchy=model.get_hierarchy(),
                                         geometry=None):
        proxies = three.get_dummy_dihedral_proxies(only_psi_phi_pairs=False)
        for p in proxies:
            dihedral_registry.add_if_not_duplicated(p)
    return dihedral_registry.proxies
Exemple #15
0
def get_phi_psi_dict(pdb_hierarchy):
    rc = {}
    for i, three in enumerate(
            generate_protein_threes(hierarchy=pdb_hierarchy, geometry=None)):
        phi_psi_angles = three.get_phi_psi_angles()
        is_alt_conf = ' '
        relevant_atoms = {}
        for atom in three[1].atoms():
            if (atom.name in relevant_atom_names):
                if (len(atom.parent().altloc) != 0):
                    is_alt_conf = atom.parent().altloc
                    break
        id_str = '|%s:%s|' % (three[1].id_str(), is_alt_conf)
        rc[id_str] = phi_psi_angles
    return rc
def test_average(hierarchy,
                 filename,
                 restraints_manager,
                 ):
  for i, threes in enumerate(cdl.generate_protein_threes(
    hierarchy,
    geometry=restraints_manager.geometry,
    )
                             ):
    if threes.registry.n:
      atoms = hierarchy.atoms()
      for key in threes.registry.n:
        print key
        for atom in key:
          print atoms[atom].quote()
      #assert threes.registry.n.keys() == filenames[filename][3]
      assert filenames[filename][3][0] in threes.registry.n
def test_average(hierarchy,
                 filename,
                 restraints_manager,
                 ):
  for i, threes in enumerate(cdl.generate_protein_threes(
    hierarchy,
    geometry=restraints_manager.geometry,
    )
                             ):
    if threes.registry.n:
      atoms = hierarchy.atoms()
      for key in threes.registry.n:
        print key
        for atom in key:
          print atoms[atom].quote()
      #assert threes.registry.n.keys() == filenames[filename][3]
      assert filenames[filename][3][0] in threes.registry.n
Exemple #18
0
def get_complete_dihedral_proxies_2(model, log=None):
    from six.moves import cStringIO as StringIO
    from cctbx.geometry_restraints import dihedral_proxy_registry
    from mmtbx.conformation_dependent_library import generate_protein_threes
    if log is None:
        log = StringIO
    dihedral_registry = dihedral_proxy_registry(strict_conflict_handling=True)
    dihedral_registry.initialize_table()
    grm = model.get_restraints_manager().geometry
    dihedral_proxies = grm.get_dihedral_proxies().deep_copy()
    for p in dihedral_proxies:
        dihedral_registry.add_if_not_duplicated(p)
    for three in generate_protein_threes(hierarchy=model.get_hierarchy(),
                                         geometry=None):
        proxies = three.get_dummy_dihedral_proxies(only_psi_phi_pairs=False)
        for p in proxies:
            dihedral_registry.add_if_not_duplicated(p)
    return dihedral_registry.proxies
Exemple #19
0
def get_phi_psi_atoms(hierarchy, omega=False):
    phi_psi_atoms = []
    for three in generate_protein_threes(hierarchy=hierarchy,
                                         geometry=None,
                                         cdl_class=True):
        psatoms = three.get_phi_psi_atoms()
        if psatoms is not None:
            phi_atoms, psi_atoms = psatoms
        else:
            phi_atoms, psi_atoms = None, None
        rama_key = three.get_ramalyze_key()
        # print "rama_key", rama_key
        if omega:
            phi_psi_atoms.append(
                ([phi_atoms, psi_atoms], rama_key, three.get_omega_value()))
        else:
            phi_psi_atoms.append(([phi_atoms, psi_atoms], rama_key))
    return phi_psi_atoms
Exemple #20
0
def get_dihedrals_and_phi_psi(processed_pdb_file):
  from cctbx.geometry_restraints import dihedral_proxy_registry
  dihedral_registry = dihedral_proxy_registry(
      strict_conflict_handling=True)
  dihedral_registry.initialize_table()
  from mmtbx.conformation_dependent_library import generate_protein_threes
  grm = processed_pdb_file.geometry_restraints_manager()
  dihedral_proxies = grm.get_dihedral_proxies().deep_copy()
  for p in dihedral_proxies:
    dihedral_registry.add_if_not_duplicated(p)
  for three in generate_protein_threes(
      hierarchy=processed_pdb_file.all_chain_proxies.pdb_hierarchy,
      geometry=None):
    proxies = three.get_dummy_dihedral_proxies(
        only_psi_phi_pairs=False)
    for p in proxies:
      dihedral_registry.add_if_not_duplicated(p)
  return dihedral_registry.proxies
 def validate_residues(self) :
   from mmtbx.conformation_dependent_library import generate_protein_threes
   from mmtbx.rotamer import ramachandran_eval,rotamer_eval
   # this is so we generate rama_eval only once
   rama_eval = ramachandran_eval.RamachandranEval()
   rota_eval = rotamer_eval.RotamerEval()
   rotamer_id = rotamer_eval.RotamerID() # loads in the rotamer names
   threes = generate_protein_threes(
       hierarchy = self.pdb_hierarchy,
       include_non_linked=True,
       backbone_only=False,
       geometry=None)
   for i,three in enumerate(threes) :
     if i == 0 :
       self.residues.append(ValidationResidue(three,rama_eval,
                                              rota_eval,rotamer_id,index=0))
     self.residues.append(ValidationResidue(three,rama_eval,
                                              rota_eval,rotamer_id))
     if three.end :
       self.residues.append(ValidationResidue(three,rama_eval,
                                              rota_eval,rotamer_id,index=2))
def test_cdl_lookup(hierarchy,
                    filename,
                    restraints_manager,
                    ):
  for i, threes in enumerate(cdl.generate_protein_threes(
    hierarchy,
    #restraints_manager=restraints_manager
    geometry=restraints_manager.geometry,
    )
                             ):
    res_type_group = cdl_utils.get_res_type_group(
      threes[1].resname,
      threes[2].resname,
      )
    key = threes.get_cdl_key(force_plus_one=True)
    key = key[-2:]
    restraint_values = omega_database[res_type_group][key]
    print i, key, restraint_values[:4], filenames[filename][2]
    del threes.registry.n
    threes.registry.n = {}
    assert restraint_values[:4] == filenames[filename][2][i]
def test_cdl_lookup(hierarchy,
                    filename,
                    restraints_manager,
                    ):
  for i, threes in enumerate(cdl.generate_protein_threes(
    hierarchy,
    #restraints_manager=restraints_manager
    geometry=restraints_manager.geometry,
    )
                             ):
    res_type_group = cdl_utils.get_res_type_group(
      threes[1].resname,
      threes[2].resname,
      )
    key = threes.get_cdl_key(force_plus_one=True)
    key = key[-2:]
    print 'res_type_group',res_type_group,key
    restraint_values = omega_database[res_type_group][key]
    print i, key, restraint_values[:4], filenames[filename][2]
    del threes.registry.n
    threes.registry.n = {}
    assert restraint_values[:4] == filenames[filename][2][i]
Exemple #24
0
    def __init__(self, models, log):
        db_path = libtbx.env.find_in_repositories(
            relative_path="chem_data/rama_z/top8000_rama_z_dict.pkl",
            test=os.path.isfile)
        self.log = log
        # this takes ~0.15 seconds, so I don't see a need to cache it somehow.
        self.db = easy_pickle.load(db_path)

        # =========================================================================
        # change keys in pickle to Python 3 string
        # very temporary fix until pickle is updated
        if sys.version_info.major == 3:
            from libtbx.utils import to_str
            for key in list(self.db.keys()):
                self.db[to_str(key)] = self.db[key]
                for subkey in list(self.db[key].keys()):
                    self.db[to_str(key)][to_str(subkey)] = self.db[key][subkey]
        # =========================================================================

        self.calibration_values = {
            'H': (-0.045355950779513175, 0.1951165524439217),
            'S': (-0.0425581278436754, 0.20068584887814633),
            'L': (-0.018457764754231075, 0.15788374669456848),
            'W': (-0.016806654295023003, 0.12044960331869274)
        }
        self.residue_counts = {"H": 0, "S": 0, "L": 0}
        self.z_score = {"H": None, "S": None, "L": None, 'W': None}
        self.means = {"H": {}, "S": {}, "L": {}}
        self.stds = {"H": {}, "S": {}, "L": {}}

        self.phi_step = 4
        self.psi_step = 4
        self.n_phi_half = 45
        self.n_psi_half = 45

        # this is needed to disable e.g. selection functionality when
        # multiple models are present
        self.n_models = len(models)
        self.res_info = []
        for model in models:
            if model.get_hierarchy().models_size() > 1:
                hierarchy = iotbx.pdb.hierarchy.root()
                m = model.get_hierarchy().models()[0].detached_copy()
                hierarchy.append_model(m)
                asc = hierarchy.atom_selection_cache()
            else:
                hierarchy = model.get_hierarchy()
                asc = model.get_atom_selection_cache()
            sec_str_master_phil = iotbx.phil.parse(sec_str_master_phil_str)
            ss_params = sec_str_master_phil.fetch().extract()
            ss_params.secondary_structure.protein.search_method = "from_ca"
            ss_params.secondary_structure.from_ca_conservative = True

            ssm = ss_manager(
                hierarchy,
                atom_selection_cache=asc,
                geometry_restraints_manager=None,
                sec_str_from_pdb_file=None,
                # params=None,
                params=ss_params.secondary_structure,
                was_initialized=False,
                mon_lib_srv=None,
                verbose=-1,
                log=null_out(),
                # log=sys.stdout,
            )

            filtered_ann = ssm.actual_sec_str.deep_copy()
            filtered_ann.remove_short_annotations(
                helix_min_len=4,
                sheet_min_len=4,
                keep_one_stranded_sheets=True)
            self.helix_sel = asc.selection(
                filtered_ann.overall_helices_selection())
            self.sheet_sel = asc.selection(
                filtered_ann.overall_sheets_selection())

            used_atoms = set()
            for three in generate_protein_threes(hierarchy=hierarchy,
                                                 geometry=None):
                main_residue = three[1]
                phi_psi_atoms = three.get_phi_psi_atoms()
                if phi_psi_atoms is None:
                    continue
                phi_atoms, psi_atoms = phi_psi_atoms
                key = [x.i_seq for x in phi_atoms] + [psi_atoms[-1].i_seq]
                key = "%s" % key
                if key not in used_atoms:
                    phi, psi = three.get_phi_psi_angles()
                    rkey = three.get_ramalyze_key()
                    resname = main_residue.resname
                    ss_type = self._figure_out_ss(three)
                    self.res_info.append(
                        ["", rkey, resname, ss_type, phi, psi])
                    self.residue_counts[ss_type] += 1
                    used_atoms.add(key)
        self.residue_counts["W"] = self.residue_counts[
            "H"] + self.residue_counts["S"] + self.residue_counts["L"]
Exemple #25
0
  def __init__ (self,
      pdb_hierarchy,
      outliers_only=False,
      show_errors=False,
      out=sys.stdout,
      quiet=False) :
    # Optimization hint: make it possible to pass
    # ramachandran_eval.RamachandranEval() from outside.
    # Better - convert this to using mmtbx.model.manager where
    # RamachandranEval is already available.
    validation.__init__(self)
    self.n_allowed = 0
    self.n_favored = 0
    self.n_type = [ 0 ] * 6
    self._outlier_i_seqs = flex.size_t()
    pdb_atoms = pdb_hierarchy.atoms()
    all_i_seqs = pdb_atoms.extract_i_seq()
    if (all_i_seqs.all_eq(0)) :
      pdb_atoms.reset_i_seq()
    use_segids = utils.use_segids_in_place_of_chainids(
      hierarchy=pdb_hierarchy)
    analysis = ""
    output_list = []
    count_keys = []
    uniqueness_keys = []
    r = ramachandran_eval.RamachandranEval()
    ##if use_segids:
    ##      chain_id = utils.get_segid_as_chainid(chain=chain)
    ##    else:
    ##      chain_id = chain.id
    for three in generate_protein_threes(hierarchy=pdb_hierarchy, geometry=None):
      main_residue = three[1]
      phi_psi_atoms = three.get_phi_psi_atoms()
      if phi_psi_atoms is None:
        continue
      phi_atoms, psi_atoms = phi_psi_atoms
      phi = get_dihedral(phi_atoms)
      psi = get_dihedral(psi_atoms)
      coords = get_center(main_residue) #should find the CA of the center residue

      if (phi is not None and psi is not None):
        res_type = RAMA_GENERAL
        #self.n_total += 1
        if (main_residue.resname[0:3] == "GLY"):
          res_type = RAMA_GLYCINE
        elif (main_residue.resname[0:3] == "PRO"):
          is_cis = is_cis_peptide(three)
          if is_cis:
            res_type = RAMA_CISPRO
          else:
            res_type = RAMA_TRANSPRO
        elif (three[2].resname == "PRO"):
          res_type = RAMA_PREPRO
        elif (main_residue.resname[0:3] == "ILE" or \
              main_residue.resname[0:3] == "VAL"):
          res_type = RAMA_ILE_VAL
        #self.n_type[res_type] += 1
        value = r.evaluate(res_types[res_type], [phi, psi])
        ramaType = self.evaluateScore(res_type, value)
        is_outlier = ramaType == RAMALYZE_OUTLIER

        c_alphas = None
        # XXX only save kinemage data for outliers
        if is_outlier :
          c_alphas = get_cas_from_three(three)
          assert (len(c_alphas) == 3)
          markup = self.as_markup_for_kinemage(c_alphas)
        else:
          markup = None
        result = ramachandran(
          model_id=main_residue.parent().parent().parent().id,
          chain_id=main_residue.parent().parent().id,
          resseq=main_residue.resseq,
          icode=main_residue.icode,
          resname=main_residue.resname,
          #altloc=main_residue.parent().altloc,
          altloc=get_altloc_from_three(three),
          segid=None, # XXX ???
          phi=phi,
          psi=psi,
          rama_type=ramaType,
          res_type=res_type,
          score=value*100,
          outlier=is_outlier,
          xyz=coords,
          markup=markup)
        #if result.chain_id+result.resseq+result.icode not in count_keys:
        result_key = result.model_id+result.chain_id+result.resseq+result.icode
        if result.altloc in ['','A'] and result_key not in count_keys:
          self.n_total += 1
          self.n_type[res_type] += 1
          self.add_to_validation_counts(ramaType)
          count_keys.append(result_key)
        if (not outliers_only or is_outlier) :
          if (result.altloc != '' or
            result_key not in uniqueness_keys):
            #the threes/conformers method results in some redundant result
            #  calculations in structures with alternates. Using the
            #  uniqueness_keys list prevents redundant results being added to
            #  the final list
            self.results.append(result)
            uniqueness_keys.append(result_key)
        if is_outlier :
          i_seqs = main_residue.atoms().extract_i_seq()
          assert (not i_seqs.all_eq(0))
          self._outlier_i_seqs.extend(i_seqs)
    self.results.sort(key=lambda r: r.model_id+r.id_str())
    out_count, out_percent = self.get_outliers_count_and_fraction()
    fav_count, fav_percent = self.get_favored_count_and_fraction()
    self.out_percent = out_percent * 100.0
    self.fav_percent = fav_percent * 100.0
Exemple #26
0
    def extract_proxies(self, hierarchy):
        def _get_motifs():
            from phenix.programs.phi_psi_2 import results_manager as pp2
            pp2_manager = pp2(model=None, log=self.log)
            phi_psi_2_motifs = pp2_manager.get_overall_motif_count_and_output(
                None,
                self.hierarchy,
                return_rama_restraints=True,
            )
            return phi_psi_2_motifs

        phi_psi_2_motifs = None
        favored = ramalyze.RAMALYZE_FAVORED
        allowed = ramalyze.RAMALYZE_ALLOWED
        outlier = ramalyze.RAMALYZE_OUTLIER
        self.hierarchy = hierarchy
        bool_atom_selection = self._determine_bool_atom_selection(hierarchy)
        selected_h = hierarchy.select(bool_atom_selection)
        n_seq = flex.max(selected_h.atoms().extract_i_seq())
        # Drop all previous proxies
        self._oldfield_proxies = ext.shared_phi_psi_proxy()
        self._emsley_proxies = ext.shared_phi_psi_proxy()
        self._emsley8k_proxies = ext.shared_phi_psi_proxy()
        self._phi_psi_2_proxies = ext.shared_phi_psi_proxy()
        # it would be great to save rama_eval, but the fact that this is called in
        # pdb_interpretation, not in mmtbx.model makes it impossible
        self.rama_eval = rama_eval()
        outl = []
        for three in generate_protein_threes(hierarchy=selected_h,
                                             geometry=None):
            rc = three.get_phi_psi_atoms()
            if rc is None: continue
            rama_key = three.get_ramalyze_key()
            angles = three.get_phi_psi_angles()
            rama_score = self.rama_eval.get_score(rama_key, angles[0],
                                                  angles[1])
            r_eval = self.rama_eval.evaluate_score(rama_key, rama_score)
            phi_atoms, psi_atoms = rc
            i_seqs = [atom.i_seq for atom in phi_atoms] + [psi_atoms[-1].i_seq]
            resnames = three.get_resnames()
            r_name = resnames[1]
            assert rama_key in range(6)
            text_rama_key = ramalyze.res_types[rama_key]
            assert text_rama_key in [
                "general", "glycine", "cis-proline", "trans-proline",
                "pre-proline", "isoleucine or valine"
            ]
            # pick where to put...
            ev_match_dict = {
                favored: self.params.favored,
                allowed: self.params.allowed,
                outlier: self.params.outlier
            }
            r_type = ev_match_dict[r_eval]
            if r_type == 'oldfield':
                proxy = ext.phi_psi_proxy(residue_type=text_rama_key,
                                          i_seqs=i_seqs,
                                          weight=1)  # XXX Not used in oldfield
                self.append_oldfield_proxies(proxy, n_seq)

                ### THIS IS CRUEL. REMOVE ONCE favored/allowed/outlier are made multiple!
                if (self.params.inject_emsley8k_into_oldfield_favored):
                    proxy = ext.phi_psi_proxy(residue_type=text_rama_key,
                                              i_seqs=i_seqs,
                                              weight=5)
                    self.append_emsley8k_proxies(proxy, n_seq)
                ###

            elif r_type == 'emsley':
                weight = self.params.emsley.weight
                proxy = ext.phi_psi_proxy(residue_type=text_rama_key,
                                          i_seqs=i_seqs,
                                          weight=weight)
                self.append_emsley_proxies(proxy, n_seq)
            elif r_type == 'emsley8k':
                if (r_eval is favored):
                    weight = self.params.emsley8k.weight_favored
                elif (r_eval is allowed):
                    weight = self.params.emsley8k.weight_allowed
                elif (r_eval is outlier):
                    weight = self.params.emsley8k.weight_outlier
                else:
                    raise RuntimeError("Rama eveluation failed.")
                proxy = ext.phi_psi_proxy(residue_type=text_rama_key,
                                          i_seqs=i_seqs,
                                          weight=weight)
                self.append_emsley8k_proxies(proxy, n_seq)
            elif r_type == 'phi_psi_2':
                from phenix.pdb_tools.phi_psi_2_data import get_phi_psi_key_for_rama_proxy
                if phi_psi_2_motifs is None: phi_psi_2_motifs = _get_motifs()
                if (r_eval is favored):
                    strategy = self.params.phi_psi_2.favored_strategy
                elif (r_eval is allowed):
                    strategy = self.params.phi_psi_2.allowed_strategy
                elif (r_eval is outlier):
                    strategy = self.params.phi_psi_2.outlier_strategy
                else:
                    raise RuntimeError("Rama eveluation failed.")
                if strategy == 'closest':
                    strategy += '_%0.1f_%0.1f' % tuple(
                        three.get_phi_psi_angles())
                pp2_key = get_phi_psi_key_for_rama_proxy(
                    phi_psi_2_motifs,
                    three,
                    strategy=strategy,
                )
                if pp2_key is None: continue
                weight = 1
                proxy = ext.phi_psi_proxy(residue_type=pp2_key,
                                          i_seqs=i_seqs,
                                          weight=weight)
                outl.append([proxy.residue_type, three])
                self.append_phi_psi_2_proxies(proxy, n_seq)
            elif (r_type is None):
                pass
            else:
                raise RuntimeError("Not an option: %s" % str(r_type))

        print("", file=self.log)
        print("  %d Ramachandran restraints generated." %
              (self.get_n_proxies()),
              file=self.log)
        print("    %d Oldfield, %d Emsley, %d emsley8k and %d Phi/Psi/2." %
              (self.get_n_oldfield_proxies(), self.get_n_emsley_proxies(),
               self.get_n_emsley8k_proxies(), self.get_n_phi_psi_2_proxies()),
              file=self.log)
        if outl:
            print('    Rama restraints by Phi/Psi/2')
            for pp2, three in outl:
                print('      %s : %s' % (three[1].id_str(), pp2.split('|')[0]),
                      file=self.log)
  def fix_rama_outlier(self,
      pdb_hierarchy, out_res_num_list, prefix="", minimize=True,
      ss_annotation=None,
      tried_rama_angles_for_chain={},
      tried_final_rama_angles_for_chain={}):

    def comb_pair_in_bad_pairs(comb_pair, bad_pairs):
      if None in comb_pair:
        return False
      all_combs = [comb_pair]
      all_combs.append((comb_pair[0]-20, comb_pair[1]))
      all_combs.append((comb_pair[0]+20, comb_pair[1]))
      all_combs.append((comb_pair[0], comb_pair[1]-20))
      all_combs.append((comb_pair[0], comb_pair[1]+20))
      all_c_adj = []
      for p in all_combs:
        new_p = p
        if p[0] > 180:
          new_p = (p[0]-180, p[1])
        if p[0] < -180:
          new_p = (p[0]+180, p[1])
        if p[1] > 180:
          new_p = (p[0], p[1]-180)
        if p[0] < -180:
          new_p = (p[0], p[1]+180)
        all_c_adj.append(new_p)
      for p in all_c_adj:
        if p in bad_pairs:
          return True
      return False

    original_pdb_h = pdb_hierarchy.deep_copy()
    original_pdb_h.reset_atom_i_seqs()
    original_pdb_h_asc = original_pdb_h.atom_selection_cache()
    chain_id = original_pdb_h.only_model().only_chain().id
    all_results = []
    # only forward
    # variants_searches = [
    #     #ccd_radius, change_all, change_radius, direction_forward
    #     ((1, False, 0, True ),1),
    #     # ((1, False, 0, False),1),
    #     ((2, False, 0, True ),1),
    #     # ((2, False, 0, False),1),
    #     ((3, False, 0, True ),2),
    #     # ((3, False, 0, False),2),
    #     ((2, True,  1, True ),1),
    #     # ((2, True,  1, False),1),
    #     ((3, True,  1, True ),2),
    #     # ((3, True,  1, False),2),
    #     ((3, True,  2, True ),3),
    #     # ((3, True,  2, False),3),
    # ]
    # only backward
    # variants_searches = [
    #     #ccd_radius, change_all, change_radius, direction_forward
    #     # ((1, False, 0, True ),1),
    #     ((1, False, 0, False),1),
    #     # ((2, False, 0, True ),1),
    #     ((2, False, 0, False),1),
    #     # ((3, False, 0, True ),2),
    #     ((3, False, 0, False),2),
    #     # ((2, True,  1, True ),1),
    #     ((2, True,  1, False),1),
    #     # ((3, True,  1, True ),2),
    #     ((3, True,  1, False),2),
    #     # ((3, True,  2, True ),3),
    #     ((3, True,  2, False),3),
    # ]
    # both
    variants_searches = [
        #ccd_radius, change_all, change_radius, direction_forward
        ((1, False, 0, True ),1),
        ((1, False, 0, False),1),
        ((2, False, 0, True ),1),
        ((2, False, 0, False),1),
        ((3, False, 0, True ),2),
        ((3, False, 0, False),2),
        ((2, True,  1, True ),1),
        ((2, True,  1, False),1),
        ((3, True,  1, True ),2),
        ((3, True,  1, False),2),
        ((3, True,  2, True ),3),
        ((3, True,  2, False),3),
    ]
    decided_variants = []
    for variant, level in variants_searches:
      if level <= self.params.variant_search_level:
        decided_variants.append(variant)

    for ccd_radius, change_all, change_radius, direction_forward in decided_variants:
    # while ccd_radius <= 3:
      fixing_omega = False
      print >> self.log, "  Starting optimization with radius=%d, " % ccd_radius,
      print >> self.log, "change_all=%s, change_radius=%d, " % (change_all, change_radius),
      print >> self.log, "direction=forward" if direction_forward else "direction=backwards"
      self.log.flush()
      #
      (moving_h, moving_ref_atoms_iseqs, fixed_ref_atoms,
          m_selection, contains_ss_element, anchor_present) = get_fixed_moving_parts(
              pdb_hierarchy=pdb_hierarchy,
              out_res_num_list=out_res_num_list,
              # n_following=1,
              # n_previous=ccd_radius+ccd_radius-1,
              n_following=ccd_radius,
              n_previous=ccd_radius,
              ss_annotation=ss_annotation,
              direction_forward=direction_forward,
              log=self.log)
      # print "  moving_ref_atoms_iseqs", moving_ref_atoms_iseqs
      print "  moving_h resseqs:", [x.resseq for x in moving_h.residue_groups()]
      moving_h_set = []
      all_angles_combination_f = starting_conformations.get_all_starting_conformations(
          moving_h,
          change_radius,
          n_outliers=len(out_res_num_list),
          direction_forward=direction_forward,
          cutoff=self.params.variant_number_cutoff,
          change_all=change_all,
          # log=self.log,
          check_omega=self.params.make_all_trans,
          )

      #

      # print "len(all_angles_combination_f)", len(all_angles_combination_f)
      if len(all_angles_combination_f) == 0:
        print "In starting conformations - outlier was fixed?"
        # return result
      else:
        # here we should filter  first ones that in
        # tried_rama_angles_for_chain
        filter_out = [] # [[tried values],[tried values],...]
        for three in generate_protein_threes(
            hierarchy=moving_h,
            geometry=None):
          if three[1].resseq in tried_rama_angles_for_chain.keys():
            filter_out.append(tried_rama_angles_for_chain[three[1].resseq])
          else:
            filter_out.append((None, None))
        ff_all_angles = []
        print "filter_out", filter_out
        for comb in all_angles_combination_f:
          good = True
          for comb_pair, bad_pairs in zip(comb, filter_out):
            if bad_pairs == (None, None):
              continue
            # print "comb_pair, bad_pairs", comb_pair, bad_pairs
            # if comb_pair in bad_pairs:
            if comb_pair_in_bad_pairs(comb_pair, bad_pairs):
              good = False
              # print "  Rejecting comb_pair", comb_pair
              break
          if good:
            ff_all_angles.append(comb)
        print "len(all_angles_combination_f)", len(all_angles_combination_f)
        print "len(ff_all_angles)", len(ff_all_angles)
        n_added = 0
        n_all_combination = len(ff_all_angles)
        if n_all_combination == 0:
          print >> self.log, "Strange - got 0 combinations."
        i_max = min(self.params.variant_number_cutoff, n_all_combination)
        # assert i_max > 0
        step = 0
        if i_max > 1:
          step = float(n_all_combination-1)/float(i_max-1)
        if step < 1:
          step = 1
        for i in range(i_max):
          comb = ff_all_angles[int(round(step*i))]
          setted_h, fixed_omega = starting_conformations.set_rama_angles(
                  moving_h,
                  list(comb),
                  direction_forward=direction_forward,
                  check_omega=self.params.make_all_trans)
          fixing_omega = fixing_omega or fixed_omega
          moving_h_set.append(setted_h)
          # print >> self.log, "Model %d, angles:" % i, comb
          if self.params.make_all_trans and utils.n_bad_omegas(moving_h_set[-1]) != 0:
            print "Model_%d_angles_%s.pdb" % (i, comb),
            print "got ", utils.n_bad_omegas(moving_h_set[-1]), "bad omegas"
            moving_h_set[-1].write_pdb_file("Model_%d_angles_%s.pdb" % (i, comb))
            utils.list_omega(moving_h_set[-1], self.log)
            assert 0

      if len(moving_h_set) == 0:
        # outlier was fixed before somehow...
        # or there's a bug in get_starting_conformations
        print >> self.log, "outlier was fixed before somehow"
        return original_pdb_h
      print "self.tried_rama_angles inside", self.tried_rama_angles
      print "tried_rama_angles_for_chain", tried_rama_angles_for_chain
      print "checking values", ccd_radius, change_all, change_radius, direction_forward
      for i, h in enumerate(moving_h_set):
        # if [x in tried_rama_angles_for_chain.keys() for x in out_res_num_list].count(True) > 0:
        #   print >> self.log, "Warning!!! make something here (check angles or so)"
        #   print >> self.log, "Skipping nonstable solution, tried previously:", (ccd_radius, change_all, change_radius, direction_forward, i)
        #   continue
        resulting_rmsd = None
        n_iter = 0
        if anchor_present:
          fixed_ref_atoms_coors = [x.xyz for x in fixed_ref_atoms]
          # print "params to constructor", fixed_ref_atoms, h, moving_ref_atoms_iseqs
          # easy_pickle.dump(file_name="crash.pkl", obj=[
          #     fixed_ref_atoms_coors,
          #     h,
          #     moving_ref_atoms_iseqs,
          #     direction_forward,
          #     self.params.save_states])
          ccd_obj = ccd_cpp(fixed_ref_atoms_coors, h, moving_ref_atoms_iseqs)
          ccd_obj.run(direction_forward=direction_forward, save_states=self.params.save_states)
          resulting_rmsd = ccd_obj.resulting_rmsd
          n_iter = ccd_obj.n_iter

          if self.params.save_states:
            states = ccd_obj.states
            states.write(file_name="%s%s_%d_%s_%d_%i_states.pdb" % (chain_id, out_res_num_list[0], ccd_radius, change_all, change_radius, i))
        map_target = 0
        if self.reference_map is not None:
          map_target = maptbx.real_space_target_simple(
              unit_cell   = self.xrs.crystal_symmetry().unit_cell(),
              density_map = self.reference_map,
              sites_cart  = h.atoms().extract_xyz())

        mc_rmsd = get_main_chain_rmsd_range(moving_h, h, all_atoms=True)
        if self.verbose:
          print >> self.log, "Resulting anchor and backbone RMSDs, mapcc, n_iter for model %d:" % i,
          print >> self.log, resulting_rmsd, ",", mc_rmsd, ",", map_target, ",", n_iter
          self.log.flush()
        #
        # setting new coordinates
        #
        moved_with_side_chains_h = pdb_hierarchy.deep_copy()

        # setting xyz
        #
        for i_source, i_dest in enumerate(m_selection):
          moved_with_side_chains_h.atoms()[i_dest].set_xyz(h.atoms()[i_source].xyz)

        # set_xyz_smart(
        #     dest_h=moved_with_side_chains_h,
        #     source_h=h)

        #
        # placing side-chains
        #
        # moved_with_side_chains_h.write_pdb_file(
        #     file_name="%s_before_sc_placement_%d.pdb" % (prefix, i))
        placing_range = get_res_nums_around(moved_with_side_chains_h,
            center_resnum_list=out_res_num_list,
            n_following=ccd_radius,
            n_previous=ccd_radius,
            include_intermediate=True,
            avoid_ss_annot=ss_annotation)
        place_side_chains(moved_with_side_chains_h, original_pdb_h, original_pdb_h_asc,
            self.rotamer_manager, placing_range, self.ideal_res_dict)
        # moved_with_side_chains_h.write_pdb_file(
        #     file_name="%s_after_sc_placement_%d.pdb" % (prefix, i))


        #
        # finalizing with geometry_minimization
        #

        # determining angles of interest
        # print "Recording picked angle for outliers"
        threes = generate_protein_threes(
          # hierarchy=moving_h,
          hierarchy=h,
          geometry=None)
        start_angles = []
        final_angles = []
        for angle_pair, three in zip(ff_all_angles[int(round(step*i))], threes):
          # print "three[1].resseq in out_res_num_list, angle_pair", three[1].resseq, out_res_num_list, angle_pair
          if three[1].resseq in out_res_num_list:
            # if three[1].resseq not in tried_rama_angles_for_chain.keys():
            #   tried_rama_angles_for_chain[three[1].resseq] = []
            start_angles.append((three[1].resseq, angle_pair))
            ps_angles = three.get_phi_psi_angles()
            final_angles.append((three[1].resseq, tuple(ps_angles)))
            # tried_rama_angles_for_chain[three[1].resseq].append(angle_pair)
            # print >> self.log, "Ended up with", three[1].resseq, "%.1f %.1f" % (ps_angles[0], ps_angles[1])
        # print "Updated tried_rama_angles_for_chain:", tried_rama_angles_for_chain
        if (not self.ccd_solution_is_duplicated(
            final_angles=final_angles,
            tried_final_rama_angles_for_chain=tried_final_rama_angles_for_chain)):
          all_results.append((moved_with_side_chains_h.deep_copy(), mc_rmsd, resulting_rmsd, map_target, n_iter))
        else:
          continue
        if self.ccd_solution_is_ok(
            anchor_rmsd=resulting_rmsd,
            mc_rmsd=mc_rmsd,
            n_outliers=len(out_res_num_list),
            ccd_radius=ccd_radius,
            change_all_angles=change_all,
            change_radius=change_radius,
            contains_ss_element=contains_ss_element,
            fixing_omega=fixing_omega):
          print "Choosen result (mc_rmsd, anchor_rmsd, map_target, n_iter):", mc_rmsd, resulting_rmsd, map_target, n_iter
          # Save to tried_ccds
          for rn, angles in start_angles:
            if rn not in tried_rama_angles_for_chain.keys():
              tried_rama_angles_for_chain[rn] = []
            tried_rama_angles_for_chain[rn].append(angles)
          # Save final angles
          for rn, angles in final_angles:
            if rn not in tried_final_rama_angles_for_chain.keys():
              tried_final_rama_angles_for_chain[rn] = []
            tried_final_rama_angles_for_chain[rn].append(angles)
          print >> self.log, "Ended up with", final_angles
          print >> self.log, "Updated tried_rama_angles_for_chain:", tried_rama_angles_for_chain
          print >> self.log, "Updated tried_final_rama_angles_for_chain:", tried_final_rama_angles_for_chain

          self.log.flush()
          if minimize:
            print >> self.log, "minimizing..."
            # moved_with_side_chains_h.write_pdb_file(
            #     file_name="%s_result_before_min_%d.pdb" % (prefix, i))
            if self.reference_map is None:
              minimize_wrapper_for_ramachandran(
                  hierarchy=moved_with_side_chains_h,
                  xrs=xrs,
                  original_pdb_h=original_pdb_h,
                  log=self.log,
                  grm=self.grm,
                  ss_annotation=self.secondary_structure_annotation)
            else:
              mwwm = minimize_wrapper_with_map(
                  pdb_h=moved_with_side_chains_h,
                  xrs=xrs,
                  target_map=self.reference_map,
                  grm=self.grm,
                  ss_annotation=self.secondary_structure_annotation,
                  log=self.log)
          # moved_with_side_chains_h.write_pdb_file(
          #     file_name="%s_result_minimized_%d.pdb" % (prefix, i))
          final_rmsd = get_main_chain_rmsd_range(moved_with_side_chains_h,
              original_pdb_h, placing_range)
          print >> self.log, "FINAL RMSD after minimization:", final_rmsd
          return moved_with_side_chains_h


    all_results.sort(key=lambda tup: tup[1])
    if self.verbose:
      print >> self.log, "ALL RESULTS:"
      i = 0
      for ar in all_results:
        print >> self.log, ar[1:],
        if ar[2] < 0.4:
          # fn = "variant_%d.pdb" % i
          # ar[0].write_pdb_file(file_name=fn)
          # print fn
          i += 1
        else:
          print >> self.log, "  no output"
    if self.params.force_rama_fixes:
      # find and apply the best varian from all_results. This would be the one
      # with the smallest rmsd given satisfactory closure
      print >> self.log, "Applying the best found variant:",
      i = 0
      while i < len(all_results) and all_results[i][2] > 1.5:
        i += 1
      # apply
      # === duplication!!!!
      if i < len(all_results):
        print >> self.log, all_results[i][1:]
        if minimize:
          print >> self.log, "minimizing..."
          # all_results[i][0].write_pdb_file(
          #     file_name="%s_result_before_min_%d.pdb" % (prefix, i))
          if self.reference_map is None:
            minimize_wrapper_for_ramachandran(
                hierarchy=all_results[i][0],
                xrs=xrs,
                original_pdb_h=original_pdb_h,
                log=self.log,
                grm=self.grm,
                ss_annotation=self.secondary_structure_annotation)
          else:
            mwwm = minimize_wrapper_with_map(
                pdb_h=all_results[i][0],
                xrs=xrs,
                target_map=self.reference_map,
                grm=self.grm,
                ss_annotation=self.secondary_structure_annotation,
                log=self.log)
        # all_results[i][0].write_pdb_file(
        #     file_name="%s_result_minimized_%d.pdb" % (prefix, i))
        final_rmsd = get_main_chain_rmsd_range(all_results[i][0],
            original_pdb_h, placing_range)
        print >> self.log, "FINAL RMSD after minimization:", final_rmsd
        return all_results[i][0]
      else:
        print >> self.log, " NOT FOUND!"
        for i in all_results:
          print >> self.log, i[1:]
      # === end of duplication!!!!

    else:
      print >> self.log, "Epic FAIL: failed to fix rama outlier:", out_res_num_list
      print >> self.log, "  Options were: (mc_rmsd, resultign_rmsd, n_iter)"
      for i in all_results:
        print >> self.log, i[1:]
    return original_pdb_h
Exemple #28
0
    def extract_proxies(self, hierarchy):
        favored = ramalyze.RAMALYZE_FAVORED
        allowed = ramalyze.RAMALYZE_ALLOWED
        outlier = ramalyze.RAMALYZE_OUTLIER
        self.hierarchy = hierarchy
        bool_atom_selection = self._determine_bool_atom_selection(hierarchy)
        selected_h = hierarchy.select(bool_atom_selection)
        n_seq = flex.max(selected_h.atoms().extract_i_seq())
        # Drop all previous proxies
        self._oldfield_proxies = ext.shared_phi_psi_proxy()
        self._emsley_proxies = ext.shared_phi_psi_proxy()
        self._emsley8k_proxies = ext.shared_phi_psi_proxy()
        # it would be great to save rama_eval, but the fact that this is called in
        # pdb_interpretation, not in mmtbx.model makes it impossible
        for three in generate_protein_threes(hierarchy=selected_h,
                                             geometry=None):
            rc = three.get_phi_psi_atoms()
            if rc is None: continue
            rama_key = three.get_ramalyze_key()
            angles = three.get_phi_psi_angles()
            rama_score = self.rama_eval.get_score(rama_key, angles[0],
                                                  angles[1])
            r_eval = self.rama_eval.evaluate_score(rama_key, rama_score)
            phi_atoms, psi_atoms = rc
            i_seqs = [atom.i_seq for atom in phi_atoms] + [psi_atoms[-1].i_seq]
            resnames = three.get_resnames()
            r_name = resnames[1]
            assert rama_key in range(6)
            text_rama_key = ramalyze.res_types[rama_key]
            assert text_rama_key in [
                "general", "glycine", "cis-proline", "trans-proline",
                "pre-proline", "isoleucine or valine"
            ]
            # pick where to put...
            ev_match_dict = {
                favored: self.params.favored,
                allowed: self.params.allowed,
                outlier: self.params.outlier
            }
            r_type = ev_match_dict[r_eval]
            if r_type == 'oldfield':
                proxy = ext.phi_psi_proxy(residue_type=text_rama_key,
                                          i_seqs=i_seqs,
                                          weight=1)  # XXX Not used in oldfield
                self.append_oldfield_proxies(proxy, n_seq)

                ### THIS IS CRUEL. REMOVE ONCE favored/allowed/outlier are made multiple!
                if (self.params.inject_emsley8k_into_oldfield_favored):
                    proxy = ext.phi_psi_proxy(residue_type=text_rama_key,
                                              i_seqs=i_seqs,
                                              weight=5)
                    self.append_emsley8k_proxies(proxy, n_seq)
                ###

            elif r_type == 'emsley':
                weight = self.params.emsley.weight
                proxy = ext.phi_psi_proxy(residue_type=text_rama_key,
                                          i_seqs=i_seqs,
                                          weight=weight)
                self.append_emsley_proxies(proxy, n_seq)
            elif r_type == 'emsley8k':
                if (r_eval is favored):
                    weight = self.params.emsley8k.weight_favored
                elif (r_eval is allowed):
                    weight = self.params.emsley8k.weight_allowed
                elif (r_eval is outlier):
                    weight = self.params.emsley8k.weight_outlier
                else:
                    raise RuntimeError("Rama eveluation failed.")
                proxy = ext.phi_psi_proxy(residue_type=text_rama_key,
                                          i_seqs=i_seqs,
                                          weight=weight)
                self.append_emsley8k_proxies(proxy, n_seq)
            elif (r_type is None):
                pass
            else:
                raise RuntimeError("Not an option: %s" % str(r_type))

        print("", file=self.log)
        print("  %d Ramachandran restraints generated." %
              (self.get_n_proxies()),
              file=self.log)
        print("    %d Oldfield and %d Emsley and %d emsley8k." %
              (self.get_n_oldfield_proxies(), self.get_n_emsley_proxies(),
               self.get_n_emsley8k_proxies()),
              file=self.log)
        bond.distance_ideal = averages[key]/averages.n[key]
      elif len(key)==3:
        rkey = (key[2],key[1],key[0])
        averages.n[rkey]=averages.n[key]
    for angle in self.geometry.angle_proxies:
      if angle.i_seqs in averages.n:
        key = angle.i_seqs
        if key not in averages:
          assert 0
        angle.angle_ideal = averages[key]/averages.n[key]

if __name__=="__main__":
  import sys
  from iotbx import pdb
  from test_rdl import get_geometry_restraints_manager
  filename=sys.argv[1]
  pdb_inp = pdb.input(filename)
  pdb_hierarchy = pdb_inp.construct_hierarchy()
  geometry_restraints_manager = get_geometry_restraints_manager(filename)
  pdb_hierarchy.reset_i_seq_if_necessary()
  from mmtbx.conformation_dependent_library import generate_protein_threes
  for threes in generate_protein_threes(pdb_hierarchy,
                                        geometry_restraints_manager,
                                        #verbose=verbose,
                                        ):
    print threes
    print "  cis? %s" % threes.cis_group()
    print "  rama %s" % threes.get_ramalyze_key()
    print '  conf %s' % threes.is_pure_main_conf()
  print "OK"
def add_terminal_hydrogens_threes(
    hierarchy,
    geometry_restraints_manager,
    terminate_all_N_terminals=False,
    terminate_all_C_terminals=False,
    use_capping_hydrogens=False,
    append_to_end_of_model=False,
    verbose=False,
):
    from mmtbx.conformation_dependent_library import generate_protein_threes
    additional_hydrogens = []  #hierarchy_utils.smart_add_atoms()
    for three in generate_protein_threes(
            hierarchy,
            geometry_restraints_manager,
            #include_non_linked=False,
            backbone_only=False,
            include_linked_via_restraints_manager=True,
            verbose=verbose,
    ):
        # print three
        #print dir(three)
        # print geometry_restraints_manager
        #print dir(geometry_restraints_manager)
        bond_params_table = geometry_restraints_manager.bond_params_table

        # print bond_params_table
        #print dir(bond_params_table)
        # print 'use_capping_hydrogens',use_capping_hydrogens

        def get_bonds():
            bonds = {}
            for i, a1 in enumerate(residue_group.atoms()):
                for j, a2 in enumerate(residue_group.atoms()):
                    if i >= j: continue
                    bond = three.bond_params_table.lookup(a1.i_seq, a2.i_seq)
                    if bond:
                        bonds[(a1.i_seq, a2.i_seq)] = True
                        bonds[(a2.i_seq, a1.i_seq)] = True
            return bonds

        if use_capping_hydrogens:
            for i in range(len(three)):
                residue_group = three.get_residue_group_from_hierarchy(
                    hierarchy, i)
                rc = conditional_add_cys_hg_to_atom_group(
                    geometry_restraints_manager, residue_group)
            #assert not rc, '%s' % rc
        if three.start:
            residue_group = three.get_residue_group_from_hierarchy(
                hierarchy, 0)
            rc = add_n_terminal_hydrogens_to_residue_group(
                residue_group,
                bonds=get_bonds(),
                use_capping_hydrogens=use_capping_hydrogens,
                append_to_end_of_model=append_to_end_of_model,
            )
            if rc: additional_hydrogens.append(rc)
        if three.end:
            residue_group = three.get_residue_group_from_hierarchy(
                hierarchy, 2)
            rc = add_c_terminal_oxygens_to_residue_group(
                residue_group,
                bonds=get_bonds(),
                use_capping_hydrogens=use_capping_hydrogens,
                append_to_end_of_model=append_to_end_of_model,
            )
            if rc: additional_hydrogens.append(rc)
    return additional_hydrogens
Exemple #31
0
  def __init__ (self,
      pdb_hierarchy,
      outliers_only=False,
      show_errors=False,
      out=sys.stdout,
      quiet=False) :
    validation.__init__(self)
    self.n_allowed = 0
    self.n_favored = 0
    self.n_type = [ 0 ] * 6
    self._outlier_i_seqs = flex.size_t()
    pdb_atoms = pdb_hierarchy.atoms()
    all_i_seqs = pdb_atoms.extract_i_seq()
    if (all_i_seqs.all_eq(0)) :
      pdb_atoms.reset_i_seq()
    use_segids = utils.use_segids_in_place_of_chainids(
      hierarchy=pdb_hierarchy)
    analysis = ""
    output_list = []
    count_keys = []
    uniqueness_keys = []
    r = ramachandran_eval.RamachandranEval()
    ##if use_segids:
    ##      chain_id = utils.get_segid_as_chainid(chain=chain)
    ##    else:
    ##      chain_id = chain.id
    for three in generate_protein_threes(hierarchy=pdb_hierarchy, geometry=None):
      main_residue = three[1]
      phi_psi_atoms = three.get_phi_psi_atoms()
      if phi_psi_atoms is None:
        continue
      phi_atoms, psi_atoms = phi_psi_atoms
      phi = get_dihedral(phi_atoms)
      psi = get_dihedral(psi_atoms)
      coords = get_center(main_residue) #should find the CA of the center residue

      if (phi is not None and psi is not None):
        res_type = RAMA_GENERAL
        #self.n_total += 1
        if (main_residue.resname[0:3] == "GLY"):
          res_type = RAMA_GLYCINE
        elif (main_residue.resname[0:3] == "PRO"):
          is_cis = is_cis_peptide(three)
          if is_cis:
            res_type = RAMA_CISPRO
          else:
            res_type = RAMA_TRANSPRO
        elif (three[2].resname == "PRO"):
          res_type = RAMA_PREPRO
        elif (main_residue.resname[0:3] == "ILE" or \
              main_residue.resname[0:3] == "VAL"):
          res_type = RAMA_ILE_VAL
        #self.n_type[res_type] += 1
        value = r.evaluate(res_types[res_type], [phi, psi])
        ramaType = self.evaluateScore(res_type, value)
        is_outlier = ramaType == RAMALYZE_OUTLIER

        c_alphas = None
        # XXX only save kinemage data for outliers
        if is_outlier :
          c_alphas = get_cas_from_three(three)
          assert (len(c_alphas) == 3)
          markup = self.as_markup_for_kinemage(c_alphas)
        else:
          markup = None
        result = ramachandran(
          chain_id=main_residue.parent().parent().id,
          resseq=main_residue.resseq,
          icode=main_residue.icode,
          resname=main_residue.resname,
          #altloc=main_residue.parent().altloc,
          altloc=get_altloc_from_three(three),
          segid=None, # XXX ???
          phi=phi,
          psi=psi,
          rama_type=ramaType,
          res_type=res_type,
          score=value*100,
          outlier=is_outlier,
          xyz=coords,
          markup=markup)
        #if result.chain_id+result.resseq+result.icode not in count_keys:
        if result.altloc in ['','A'] and result.chain_id+result.resseq+result.icode not in count_keys:
          self.n_total += 1
          self.n_type[res_type] += 1
          self.add_to_validation_counts(ramaType)
          count_keys.append(result.chain_id+result.resseq+result.icode)
        if (not outliers_only or is_outlier) :
          if (result.altloc != '' or
            result.chain_id+result.resseq+result.icode not in uniqueness_keys):
            #the threes/conformers method results in some redundant result
            #  calculations in structures with alternates. Using the
            #  uniqueness_keys list prevents redundant results being added to
            #  the final list
            self.results.append(result)
            uniqueness_keys.append(result.chain_id+result.resseq+result.icode)
        if is_outlier :
          i_seqs = main_residue.atoms().extract_i_seq()
          assert (not i_seqs.all_eq(0))
          self._outlier_i_seqs.extend(i_seqs)
    self.results.sort(key=lambda r: r.id_str())
    out_count, out_percent = self.get_outliers_count_and_fraction()
    fav_count, fav_percent = self.get_favored_count_and_fraction()
    self.out_percent = out_percent * 100.0
    self.fav_percent = fav_percent * 100.0
Exemple #32
0
def update_restraints(
    hierarchy,
    geometry,  # restraints_manager,
    current_geometry=None,  # xray_structure!!
    sites_cart=None,
    cdl_proxies=None,
    ideal=True,
    esd=True,
    esd_factor=1.,
    log=None,
    verbose=False,
):
    global registry
    registry = RestraintsRegistry()
    if current_geometry:
        assert not sites_cart
        sites_cart = current_geometry.sites_cart()
    if sites_cart:
        pdb_atoms = hierarchy.atoms()
        # XXX PDB_TRANSITION VERY SLOW
        for j_seq, atom in enumerate(pdb_atoms):
            atom.xyz = sites_cart[j_seq]

    threes = None
    average_updates = 0
    total_updates = 0
    for threes in generate_protein_threes(
            hierarchy,
            geometry,
            cdl_class=True,
            omega_cdl=True,
            #verbose=verbose,
    ):
        threes.apply_updates = apply_updates
        if threes.cis_group(omega_cdl=True):
            if verbose and 0:
                print 'cis ' * 20
                print threes
            continue

        restraint_values = get_restraint_values(threes)
        if restraint_values is None: continue
        if restraint_values[0] == "I":
            average_updates += 1
        else:
            total_updates += 1
        threes.apply_updates(
            threes,
            restraint_values,
            cdl_proxies,
            ideal=ideal,
            esd=esd,
            esd_factor=esd_factor,
        )
    if registry.n:
        threes.apply_average_updates(registry)
        assert 0
    geometry.reset_internals()
    if verbose and threes and threes.errors:
        if log:
            log.write(
                "  Residues not completely updated with CDL restraints\n\n")
        for line in threes.errors:
            if log:
                log.write("%s\n" % line)
            else:
                print line
    return geometry
        bond.distance_ideal = averages[key]/averages.n[key]
      elif len(key)==3:
        rkey = (key[2],key[1],key[0])
        averages.n[rkey]=averages.n[key]
    for angle in self.geometry.angle_proxies:
      if angle.i_seqs in averages.n:
        key = angle.i_seqs
        if key not in averages:
          assert 0
        angle.angle_ideal = averages[key]/averages.n[key]

if __name__=="__main__":
  import sys
  from iotbx import pdb
  from test_rdl import get_geometry_restraints_manager
  filename=sys.argv[1]
  pdb_inp = pdb.input(filename)
  pdb_hierarchy = pdb_inp.construct_hierarchy()
  geometry_restraints_manager = get_geometry_restraints_manager(filename)
  pdb_hierarchy.reset_i_seq_if_necessary()
  from mmtbx.conformation_dependent_library import generate_protein_threes
  for threes in generate_protein_threes(pdb_hierarchy,
                                        geometry_restraints_manager,
                                        #verbose=verbose,
                                        ):
    print threes
    print "  cis? %s" % threes.cis_group()
    print "  rama %s" % threes.get_ramalyze_key()
    print '  conf %s' % threes.is_pure_main_conf()
  print "OK"
Exemple #34
0
  def __init__(self, model, log):
    db_path = libtbx.env.find_in_repositories(
        relative_path="chem_data/rama_z/top8000_rama_z_dict.pkl",
        test=os.path.isfile)
    rmsd_path = libtbx.env.find_in_repositories(
        relative_path="chem_data/rama_z/rmsd.pkl",
        test=os.path.isfile)
    self.log = log
    # this takes ~0.15 seconds, so I don't see a need to cache it somehow.
    self.db = easy_pickle.load(db_path)
    self.rmsd_estimator = easy_pickle.load(rmsd_path)
    self.calibration_values = {
        'H': (-0.045355950779513175, 0.1951165524439217),
        'S': (-0.0425581278436754, 0.20068584887814633),
        'L': (-0.018457764754231075, 0.15788374669456848),
        'W': (-0.016806654295023003, 0.12044960331869274)}
    self.residue_counts = {"H": 0, "S": 0, "L":0}
    self.z_score = {"H": None, "S": None, "L":None, 'W': None}
    self.interpolation_fs = {"H": {}, "S": {}, "L": {}}
    self.means = {"H": {}, "S": {}, "L": {}}
    self.stds = {"H": {}, "S": {}, "L": {}}

    self.phi_step = 4
    self.psi_step = 4
    self.n_phi_half = 45
    self.n_psi_half = 45

    self.res_info = []
    asc = model.get_atom_selection_cache()
    sec_str_master_phil = iotbx.phil.parse(sec_str_master_phil_str)
    ss_params = sec_str_master_phil.fetch().extract()
    ss_params.secondary_structure.protein.search_method = "from_ca"
    ss_params.secondary_structure.from_ca_conservative = True

    self.ssm = ss_manager(model.get_hierarchy(),
        atom_selection_cache=asc,
        geometry_restraints_manager=None,
        sec_str_from_pdb_file=None,
        # params=None,
        params = ss_params.secondary_structure,
        was_initialized=False,
        mon_lib_srv=None,
        verbose=-1,
        log=null_out(),
        # log=sys.stdout,
        )

    filtered_ann = self.ssm.actual_sec_str.deep_copy()
    filtered_ann.remove_short_annotations(
        helix_min_len=4, sheet_min_len=4, keep_one_stranded_sheets=True)
    self.helix_sel = asc.selection(filtered_ann.overall_helices_selection())
    self.sheet_sel = asc.selection(filtered_ann.overall_sheets_selection())

    used_atoms = set()
    for three in generate_protein_threes(hierarchy=model.get_hierarchy(), geometry=None):
      main_residue = three[1]
      phi_psi_atoms = three.get_phi_psi_atoms()
      if phi_psi_atoms is None:
        continue
      phi_atoms, psi_atoms = phi_psi_atoms
      key = [x.i_seq for x in phi_atoms]+[psi_atoms[-1].i_seq]
      key = "%s" % key
      if key not in used_atoms:
        phi, psi = three.get_phi_psi_angles()
        rkey = three.get_ramalyze_key()
        resname = main_residue.resname
        ss_type = self._figure_out_ss(three)
        self.res_info.append( ["", rkey, resname, ss_type, phi, psi] )
        self.residue_counts[ss_type] += 1
        used_atoms.add(key)
    self.residue_counts["W"] = self.residue_counts["H"] + self.residue_counts["S"] + self.residue_counts["L"]
    for i in self.res_info:
      print(i, file=self.log)