Example #1
0
def ncs_grouping_and_group_dict(match_dict, hierarchy):
    """
  The implementation of simplest way to do NCS grouping. Maximum one chain
  in selection.
  Do the job of minimal_master_ncs_grouping/minimal_ncs_operators_grouping.
  """
    ncs_restraints_group_list = class_ncs_restraints_group_list()
    preliminary_ncs_groups = get_preliminary_ncs_groups(match_dict)

    # now we need to just transform preliminary_ncs_groups using match_dict
    # into ncs_restraints_group_list. This means that for every dict in preliminary_ncs_groups
    # we need to determine master, and find out rot and transl functions for all
    # the rest chains (selections). Master is going to be the first in
    # alphabetical order.

    for prel_gr_dict in preliminary_ncs_groups:
        # print "==============="
        sorted_gr_chains = sorted(prel_gr_dict.keys())

        # master should be the chain with minimal number of selected atoms
        # just to make it easier filter out the rest of chains
        # print "sorted_gr_chains", sorted_gr_chains
        # print "prel_gr_dict", prel_gr_dict
        min_n_atoms = 1e100
        master = None
        for ch in sorted_gr_chains:
            sel, _, _ = get_info_from_match_dict(match_dict, prel_gr_dict[ch],
                                                 ch)
            if sel.size() < min_n_atoms:
                min_n_atoms = sel.size()
                master = ch
        assert master is not None
        # print "selected master first:", master

        # second option to master selection:
        # let's try to select common chain to be a master. I'm not sure that this
        # will be always possible though
        # also, we should try to determine the smallest selection for the master
        # chain straight away
        all_pairs = list(prel_gr_dict.values())
        left = set(all_pairs[0])
        # print "left", left
        # print "all_pairs", all_pairs
        # FIXME indexing dict.values order changes with py2/3
        for i in all_pairs[1:]:
            left = left & set(i)
        # should be 1 (a lot of chains) or 2 (if there only 2 chains)
        # if len
        if len(left) == 0:
            # means that all something like
            # all_pairs = [('chain C', 'chain E'), ('chain A', 'chain E'),
            #              ('chain A', 'chain C')]
            # any should work then?...

            # master = all_pairs[0][0]
            master = sorted_gr_chains[0]

        # assert len(left) > 0
        # print "left", left
        elif len(left) > 1:
            master = sorted(left)[0]
        else:
            master = left.pop()

        # selecting smallest master key - for no reason actually
        key_with_smallest_selection = None
        len_of_smallest_selection = 1e100
        for ch, key in six.iteritems(prel_gr_dict):
            # print "ch, master, key:", ch, master, key
            if master in key:
                master_sel, master_res, master_rmsd = get_info_from_match_dict(
                    match_dict, key, master)
                if master_sel.size() < len_of_smallest_selection:
                    len_of_smallest_selection = master_sel.size()
                    key_with_smallest_selection = key
        # print "key_with_smallest_selection, len_of_smallest_selection",key_with_smallest_selection, len_of_smallest_selection
        # print "selected master second:", master

        assert master is not None
        assert master in key_with_smallest_selection, "%s, %s" % (
            master, key_with_smallest_selection)

        #
        # Let's do intersection of all master selection to determine
        # the minimum selection suitable to all copies.
        min_master_selection = None
        for ch, key in six.iteritems(prel_gr_dict):
            if master in key:
                master_sel, master_res, master_rmsd = get_info_from_match_dict(
                    match_dict, key, master)
                if min_master_selection is None:
                    min_master_selection = master_sel
                else:
                    min_master_selection = min_master_selection.intersection(
                        master_sel)
        # print "size of min_master_selection", min_master_selection.size()

        # create a new group
        g = NCS_restraint_group(master_iselection=min_master_selection,
                                str_selection=None)
        for ch_copy in sorted_gr_chains:
            # print "ch_copy", ch_copy
            master_size = min_master_selection.size()
            copy_sel, copy_res, m_sel = get_copy_master_selections_from_match_dict(
                match_dict, prel_gr_dict, master, ch_copy)
            if copy_sel is None:
                # print " Continue"
                continue
            new_copy_sel = copy_sel
            new_master_sel = min_master_selection
            if copy_sel.size() > min_master_selection.size():
                # clean copy sel
                # print "copy is bigger", copy_sel.size(), min_master_selection.size()
                filter_sel = get_bool_selection_to_keep(
                    big_selection=m_sel, small_selection=min_master_selection)
                new_copy_sel = copy_sel.select(filter_sel)
            elif copy_sel.size() < min_master_selection.size():
                assert 0, "This should never be the case"
            if new_master_sel.size() > 0 and new_copy_sel.size() > 0:
                r, t, copy_rmsd = my_get_rot_trans(
                    ph=hierarchy,
                    master_selection=new_master_sel,
                    copy_selection=new_copy_sel,
                    master_chain_id=master,
                    copy_chain_id=ch_copy)
                c = NCS_copy(copy_iselection=new_copy_sel,
                             rot=r,
                             tran=t,
                             str_selection=None,
                             rmsd=copy_rmsd)
                g.append_copy(c)
                assert master_size == new_copy_sel.size(), "%d %d" % (
                    master_size, new_copy_sel.size())
        ncs_restraints_group_list.append(g)
    return ncs_restraints_group_list
Example #2
0
def shortcut_1(hierarchy, chains_info, chain_similarity_threshold,
               chain_max_rmsd, log, residue_match_radius):
    """
  Checking the case when whole hierarchy was produced by multiplication of
  molecule with BIOMT or MTRIX matrices (or both). In this case we are expecting
  to find identical chains with 0 rmsd between them.
  """
    def flatten_list_of_list(lofl):
        return [x for y in lofl for x in y]

    assert chains_info is not None
    assert len(chains_info) > 1
    empty_result = class_ncs_restraints_group_list()

    # new convenience structure: {<n_atoms>:[ch_id, ch_id, ch_id]}
    n_atom_chain_id_dict = {}
    for k, v in six.iteritems(chains_info):
        if v.chains_atom_number not in n_atom_chain_id_dict:
            n_atom_chain_id_dict[v.chains_atom_number] = [k]
        else:
            n_atom_chain_id_dict[v.chains_atom_number].append(k)
    print("n_atom_chain_id_dict", n_atom_chain_id_dict, file=log)
    for k, v in six.iteritems(n_atom_chain_id_dict):
        if len(v) == 1:
            print("No shortcut, there is a chain with unique number of atoms:",
                  v,
                  file=log)
            return empty_result
    # now we starting to check atom names, align chains, check rmsd and
    # populate result. If at some point we are not satisfied with any measure,
    # we will return empty result.
    result = class_ncs_restraints_group_list()
    for n_atoms, chains_list in six.iteritems(n_atom_chain_id_dict):
        # this should make one ncs group
        master_chain_id = chains_list[0]
        master_iselection = flatten_list_of_list(
            chains_info[master_chain_id].atom_selection)
        ncs_gr = NCS_restraint_group(
            master_iselection=flex.size_t(master_iselection),
            str_selection="chain '%s'" % master_chain_id)
        master_xyz = get_chain_xyz(hierarchy, master_chain_id)
        for copy_chain_id in chains_list[1:]:
            # these are copies
            if chains_info[master_chain_id].atom_names != chains_info[
                    copy_chain_id].atom_names:
                print("No shortcut, atom names are not identical", file=log)
                return empty_result
            copy_iselection = flatten_list_of_list(
                chains_info[copy_chain_id].atom_selection)
            copy_xyz = get_chain_xyz(hierarchy, copy_chain_id)
            lsq_fit_obj = superpose.least_squares_fit(reference_sites=copy_xyz,
                                                      other_sites=master_xyz)
            r = lsq_fit_obj.r
            t = lsq_fit_obj.t
            rmsd = copy_xyz.rms_difference(lsq_fit_obj.other_sites_best_fit())
            print("rmsd", master_chain_id, copy_chain_id, rmsd, file=log)
            #
            # XXX should we compare rmsd to chain_max_rmsd to be more relaxed and
            #     process more structures quickly?
            #
            if rmsd is None or rmsd > 0.2:
                print("No shortcut, low rmsd:",
                      rmsd,
                      "for chains",
                      master_chain_id,
                      copy_chain_id,
                      file=log)
                return empty_result
            # seems like a good enough copy
            c = NCS_copy(copy_iselection=flex.size_t(copy_iselection),
                         rot=r,
                         tran=t,
                         str_selection="chain '%s'" % copy_chain_id,
                         rmsd=rmsd)
            ncs_gr.append_copy(c)
        result.append(ncs_gr)
    print("Shortcut complete.", file=log)
    return result
Example #3
0
  def validate_ncs_phil_groups(
      self,
      pdb_h,
      ncs_phil_groups,
      asc,
      validate_user_supplied_groups=True):
    """
    Note that the result of this procedure is corrected ncs_phil_groups.
    These groups will be later submitted to build_ncs_obj_from_phil
    procedure. This is sub-optimal and should be changed because
    everything is already processed here and ready to build proper
    NCS_restraint_group object.
    add filtered groups in self.ncs_restraints_group_list
    """
    def show_particular_ncs_group(ncs_gr):
      p_obj = ncs_group_master_phil.extract()
      p_obj.ncs_group[0].reference = ncs_gr.reference
      p_obj.ncs_group[0].selection = ncs_gr.selection
      to_show = ncs_group_master_phil.format(python_object=p_obj)
      to_show.show(out=self.log)

    def show_empty_selection_error_message(ng, where="reference"):
      print("  Missing or corrupted %s field:" % where, file=self.log)
      print("  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!", file=self.log)
      print("      _ALL_ user-supplied groups will be ignored", file=self.log)
      print("  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!", file=self.log)
      show_particular_ncs_group(ng)

    # Massage NCS groups
    # return ncs_phil_groups
    validated_ncs_groups = []
    if ncs_phil_groups is None:
      return None
    if(ncs_phil_groups is not None and len(ncs_phil_groups)==0):
      # print "exiting here"
      ncs_phil_groups=None
      return None
    if (ncs_phil_groups is not None and
        len(ncs_phil_groups)==1 and
        ncs_phil_groups[0].reference is None and
        len(ncs_phil_groups[0].selection) == 1 and
        ncs_phil_groups[0].selection[0] is None):
      # This is empty ncs_group definition somehow creeped into here.
      # Not a big deal.
      return None
    if(ncs_phil_groups is not None):
      print("Validating user-supplied NCS groups...", file=self.log)
      empty_cntr = 0
      for ng in ncs_phil_groups:
        if ng.reference is None or len(ng.reference.strip())==0:
          show_empty_selection_error_message(ng, where="reference")
          empty_cntr += 1
        for s in ng.selection:
          if s is None or len(s.strip())==0:
            show_empty_selection_error_message(ng, where="selection")
            empty_cntr += 1
      if(empty_cntr>0):
        print("  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!", file=self.log)
        print("      _ALL_ user-supplied groups are ignored.", file=self.log)
        print("  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!", file=self.log)
        ncs_phil_groups=None
        return None
    # Verify NCS selections
    msg="Empty selection in NCS group definition: %s"
    if not validate_user_supplied_groups:
      for ncs_group in ncs_phil_groups:
        print("  Copying user-supplied groups without validation:", file=self.log)
        show_particular_ncs_group(ncs_group)
        m_isel = asc.iselection(ncs_group.reference)
        ng = NCS_restraint_group(
            master_iselection = m_isel,
            str_selection = ncs_group.reference)
        for s_string in ncs_group.selection:
          c_isel = asc.iselection(s_string)
          c = NCS_copy(
              copy_iselection=c_isel,
              rot = None,
              tran = None,
              str_selection=s_string,
              rmsd=999)
          ng.append_copy(c)
        self.ncs_restraints_group_list.append(ng)
        validated_ncs_groups.append(ng)
      master_sel = flex.bool(pdb_h.atoms_size(), False)
      for gr in self.ncs_restraints_group_list:
        master_sel.set_selected(gr.master_iselection, True)
      # asu_sites = pdb_h.atoms().extract_xyz().select(master_sel)
      # self.ncs_restraints_group_list._show(hierarchy=pdb_h,brief=False)
      # STOP()
      self.ncs_restraints_group_list.recalculate_ncs_transforms(asu_site_cart=pdb_h.atoms().extract_xyz())
      # validated_ncs_groups.recalculate_ncs_transforms(asu_site_cart=pdb_h.atoms().extract_xyz())

    else:
      for ncs_group in ncs_phil_groups:
        print("  Validating:", file=self.log)
        show_particular_ncs_group(ncs_group)
        selection_list = []
        # first, check for selections producing 0 atoms
        user_original_reference_iselection = None
        user_original_copies_iselections = []
        n_atoms_in_user_ncs = 0
        s_string = ncs_group.reference
        if s_string is not None:
          sel = asc.iselection(s_string)
          selection_list.append(s_string)
          n_atoms_in_user_ncs = sel.size()
          if(n_atoms_in_user_ncs==0):
            raise Sorry(msg%s_string)
          user_original_reference_iselection = sel
        for s_string in ncs_group.selection:
          if(s_string is not None):
            sel = asc.iselection(s_string)
            selection_list.append(s_string)
            n_copy = sel.size()
            if(n_copy==0):
              raise Sorry(msg%s_string)
            user_original_copies_iselections.append(sel)
        #
        # The idea for user's groups is to pick them one by one,
        # select only reference and selections from the model,
        # If there are multiple chains in ref or selection -
        # combine them in one chain,
        # save atom original i_seq in atom.tmp
        # run searching procedure for the resulting hierarchy
        # if the user's selections were more or less OK - there should be
        # one group, get atom.tmp values for the selected atoms and using
        # original hierarchy convert them into string selections when needed.
        # If multiple groups produced - use them, most likely the user
        # provided something really wrong.
        # Need to pay some attention to what came out as master and what order
        # of references.
        #
        combined_h = iotbx.pdb.hierarchy.root()
        combined_h.append_model(iotbx.pdb.hierarchy.model())
        all_c_ids = all_chain_ids()
        cur_ch_id_n = 0
        master_chain = self.pdb_h_into_chain(pdb_h.select(
            user_original_reference_iselection),ch_id=all_c_ids[cur_ch_id_n])
        # print "tmp in master chain:", list(master_chain.atoms().extract_tmp_as_size_t())
        cur_ch_id_n += 1
        combined_h.only_model().append_chain(master_chain)

        # combined_h = iotbx.pdb.hierarchy.new_hierarchy_from_chain(master_chain)
        # print "tmp combined_h1:", list(combined_h.atoms().extract_tmp_as_size_t())
        for uocis in user_original_copies_iselections:
          # print "adding selection to combined:", s_string
          sel_chain = self.pdb_h_into_chain(pdb_h.select(
            uocis),ch_id=all_c_ids[cur_ch_id_n])
          combined_h.only_model().append_chain(sel_chain)
          cur_ch_id_n += 1

        combined_h.reset_atom_i_seqs()
        # combined_h.write_pdb_file("combined_in_validation.pdb")
        # print "tmp:", list(combined_h.atoms().extract_tmp_as_size_t())


        # XXX Here we will regenerate phil selections using the mechanism
        # for finding NCS in this module. Afterwards we should have perfectly
        # good phil selections, and later the object will be created from
        # them.
        # Most likely this is not the best way to validate user selections.

        # selection_list
        nrgl_fake_iseqs = ncs_search.find_ncs_in_hierarchy(
            ph=combined_h,
            chains_info=None,
            chain_max_rmsd=max(self.params.chain_max_rmsd, 10.0),
            log=None,
            chain_similarity_threshold=min(self.params.chain_similarity_threshold, 0.5),
            residue_match_radius=max(self.params.residue_match_radius, 1000.0))
        # hopefully, we will get only 1 ncs group
        # ncs_group.selection = []
        if nrgl_fake_iseqs.get_n_groups() == 0:
          # this means that user's selection doesn't match
          # print "ZERO NCS groups found"
          rejected_msg = "  REJECTED because copies don't match good enough.\n" + \
          "Try to revise selections or adjust chain_similarity_threshold or \n" + \
          "chain_max_rmsd parameters."
          print(rejected_msg, file=self.log)
          continue
        # User triggered the fail of this assert!
        selections_were_modified = False
        #
        for ncs_gr in nrgl_fake_iseqs:
          new_gr = ncs_gr.deep_copy()
          new_ncs_group = ncs_group_master_phil.extract().ncs_group[0]
          for i, isel in enumerate(ncs_gr.get_iselections_list()):
            m_all_isel = isel.deep_copy()
            original_m_all_isel = combined_h.atoms().\
                select(m_all_isel).extract_tmp_as_size_t()
            if n_atoms_in_user_ncs > original_m_all_isel.size():
              selections_were_modified = True
            # print "new isels", list(m_all_isel)
            # print "old isels", list(original_m_all_isel)
            all_m_select_str = selection_string_from_selection(
                pdb_h=pdb_h,
                selection=original_m_all_isel,
                chains_info=self.chains_info,
                atom_selection_cache=asc)
            # print "all_m_select_str", all_m_select_str
            if i == 0:
              new_gr.master_iselection = original_m_all_isel
              new_gr.master_str_selection = all_m_select_str
              new_ncs_group.reference=all_m_select_str
            else:
              new_gr.copies[i-1].iselection = original_m_all_isel
              new_gr.copies[i-1].str_selection = all_m_select_str
              new_ncs_group.selection.append(all_m_select_str)
          self.ncs_restraints_group_list.append(new_gr)
          new_ncs_group.selection = new_ncs_group.selection[1:]
          validated_ncs_groups.append(new_ncs_group)
        # Finally, we may check the number of atoms in selections that will
        # go further.
        # XXX Deleted, because this is taken care of previously
        ok_msg = "  OK. All atoms were included in" +\
        " validated selection.\n"
        modified_msg = "  MODIFIED. Some of the atoms were excluded from" + \
        " your selection.\n  The most common reasons are:\n" + \
        "    1. Missing residues in one or several copies in NCS group.\n" + \
        "    2. Presence of alternative conformations (they are excluded).\n" + \
        "    3. Residue mismatch in requested copies.\n" + \
        "  Please check the validated selection further down.\n"
        if selections_were_modified:
          print(modified_msg, file=self.log)
          self.phil_groups_modified = True
        else:
          print(ok_msg, file=self.log)
    # print "len(validated_ncs_groups)", len(validated_ncs_groups)
    # for ncs_gr in validated_ncs_groups:
    #   print "  reference:", ncs_gr.reference
    #   print "  selection:", ncs_gr.selection
    self.finalize_nrgl()
    return validated_ncs_groups