def ncs_grouping_and_group_dict(match_dict, hierarchy): """ The implementation of simplest way to do NCS grouping. Maximum one chain in selection. Do the job of minimal_master_ncs_grouping/minimal_ncs_operators_grouping. """ ncs_restraints_group_list = class_ncs_restraints_group_list() preliminary_ncs_groups = get_preliminary_ncs_groups(match_dict) # now we need to just transform preliminary_ncs_groups using match_dict # into ncs_restraints_group_list. This means that for every dict in preliminary_ncs_groups # we need to determine master, and find out rot and transl functions for all # the rest chains (selections). Master is going to be the first in # alphabetical order. for prel_gr_dict in preliminary_ncs_groups: # print "===============" sorted_gr_chains = sorted(prel_gr_dict.keys()) # master should be the chain with minimal number of selected atoms # just to make it easier filter out the rest of chains # print "sorted_gr_chains", sorted_gr_chains # print "prel_gr_dict", prel_gr_dict min_n_atoms = 1e100 master = None for ch in sorted_gr_chains: sel, _, _ = get_info_from_match_dict(match_dict, prel_gr_dict[ch], ch) if sel.size() < min_n_atoms: min_n_atoms = sel.size() master = ch assert master is not None # print "selected master first:", master # second option to master selection: # let's try to select common chain to be a master. I'm not sure that this # will be always possible though # also, we should try to determine the smallest selection for the master # chain straight away all_pairs = list(prel_gr_dict.values()) left = set(all_pairs[0]) # print "left", left # print "all_pairs", all_pairs # FIXME indexing dict.values order changes with py2/3 for i in all_pairs[1:]: left = left & set(i) # should be 1 (a lot of chains) or 2 (if there only 2 chains) # if len if len(left) == 0: # means that all something like # all_pairs = [('chain C', 'chain E'), ('chain A', 'chain E'), # ('chain A', 'chain C')] # any should work then?... # master = all_pairs[0][0] master = sorted_gr_chains[0] # assert len(left) > 0 # print "left", left elif len(left) > 1: master = sorted(left)[0] else: master = left.pop() # selecting smallest master key - for no reason actually key_with_smallest_selection = None len_of_smallest_selection = 1e100 for ch, key in six.iteritems(prel_gr_dict): # print "ch, master, key:", ch, master, key if master in key: master_sel, master_res, master_rmsd = get_info_from_match_dict( match_dict, key, master) if master_sel.size() < len_of_smallest_selection: len_of_smallest_selection = master_sel.size() key_with_smallest_selection = key # print "key_with_smallest_selection, len_of_smallest_selection",key_with_smallest_selection, len_of_smallest_selection # print "selected master second:", master assert master is not None assert master in key_with_smallest_selection, "%s, %s" % ( master, key_with_smallest_selection) # # Let's do intersection of all master selection to determine # the minimum selection suitable to all copies. min_master_selection = None for ch, key in six.iteritems(prel_gr_dict): if master in key: master_sel, master_res, master_rmsd = get_info_from_match_dict( match_dict, key, master) if min_master_selection is None: min_master_selection = master_sel else: min_master_selection = min_master_selection.intersection( master_sel) # print "size of min_master_selection", min_master_selection.size() # create a new group g = NCS_restraint_group(master_iselection=min_master_selection, str_selection=None) for ch_copy in sorted_gr_chains: # print "ch_copy", ch_copy master_size = min_master_selection.size() copy_sel, copy_res, m_sel = get_copy_master_selections_from_match_dict( match_dict, prel_gr_dict, master, ch_copy) if copy_sel is None: # print " Continue" continue new_copy_sel = copy_sel new_master_sel = min_master_selection if copy_sel.size() > min_master_selection.size(): # clean copy sel # print "copy is bigger", copy_sel.size(), min_master_selection.size() filter_sel = get_bool_selection_to_keep( big_selection=m_sel, small_selection=min_master_selection) new_copy_sel = copy_sel.select(filter_sel) elif copy_sel.size() < min_master_selection.size(): assert 0, "This should never be the case" if new_master_sel.size() > 0 and new_copy_sel.size() > 0: r, t, copy_rmsd = my_get_rot_trans( ph=hierarchy, master_selection=new_master_sel, copy_selection=new_copy_sel, master_chain_id=master, copy_chain_id=ch_copy) c = NCS_copy(copy_iselection=new_copy_sel, rot=r, tran=t, str_selection=None, rmsd=copy_rmsd) g.append_copy(c) assert master_size == new_copy_sel.size(), "%d %d" % ( master_size, new_copy_sel.size()) ncs_restraints_group_list.append(g) return ncs_restraints_group_list
def shortcut_1(hierarchy, chains_info, chain_similarity_threshold, chain_max_rmsd, log, residue_match_radius): """ Checking the case when whole hierarchy was produced by multiplication of molecule with BIOMT or MTRIX matrices (or both). In this case we are expecting to find identical chains with 0 rmsd between them. """ def flatten_list_of_list(lofl): return [x for y in lofl for x in y] assert chains_info is not None assert len(chains_info) > 1 empty_result = class_ncs_restraints_group_list() # new convenience structure: {<n_atoms>:[ch_id, ch_id, ch_id]} n_atom_chain_id_dict = {} for k, v in six.iteritems(chains_info): if v.chains_atom_number not in n_atom_chain_id_dict: n_atom_chain_id_dict[v.chains_atom_number] = [k] else: n_atom_chain_id_dict[v.chains_atom_number].append(k) print("n_atom_chain_id_dict", n_atom_chain_id_dict, file=log) for k, v in six.iteritems(n_atom_chain_id_dict): if len(v) == 1: print("No shortcut, there is a chain with unique number of atoms:", v, file=log) return empty_result # now we starting to check atom names, align chains, check rmsd and # populate result. If at some point we are not satisfied with any measure, # we will return empty result. result = class_ncs_restraints_group_list() for n_atoms, chains_list in six.iteritems(n_atom_chain_id_dict): # this should make one ncs group master_chain_id = chains_list[0] master_iselection = flatten_list_of_list( chains_info[master_chain_id].atom_selection) ncs_gr = NCS_restraint_group( master_iselection=flex.size_t(master_iselection), str_selection="chain '%s'" % master_chain_id) master_xyz = get_chain_xyz(hierarchy, master_chain_id) for copy_chain_id in chains_list[1:]: # these are copies if chains_info[master_chain_id].atom_names != chains_info[ copy_chain_id].atom_names: print("No shortcut, atom names are not identical", file=log) return empty_result copy_iselection = flatten_list_of_list( chains_info[copy_chain_id].atom_selection) copy_xyz = get_chain_xyz(hierarchy, copy_chain_id) lsq_fit_obj = superpose.least_squares_fit(reference_sites=copy_xyz, other_sites=master_xyz) r = lsq_fit_obj.r t = lsq_fit_obj.t rmsd = copy_xyz.rms_difference(lsq_fit_obj.other_sites_best_fit()) print("rmsd", master_chain_id, copy_chain_id, rmsd, file=log) # # XXX should we compare rmsd to chain_max_rmsd to be more relaxed and # process more structures quickly? # if rmsd is None or rmsd > 0.2: print("No shortcut, low rmsd:", rmsd, "for chains", master_chain_id, copy_chain_id, file=log) return empty_result # seems like a good enough copy c = NCS_copy(copy_iselection=flex.size_t(copy_iselection), rot=r, tran=t, str_selection="chain '%s'" % copy_chain_id, rmsd=rmsd) ncs_gr.append_copy(c) result.append(ncs_gr) print("Shortcut complete.", file=log) return result
def validate_ncs_phil_groups( self, pdb_h, ncs_phil_groups, asc, validate_user_supplied_groups=True): """ Note that the result of this procedure is corrected ncs_phil_groups. These groups will be later submitted to build_ncs_obj_from_phil procedure. This is sub-optimal and should be changed because everything is already processed here and ready to build proper NCS_restraint_group object. add filtered groups in self.ncs_restraints_group_list """ def show_particular_ncs_group(ncs_gr): p_obj = ncs_group_master_phil.extract() p_obj.ncs_group[0].reference = ncs_gr.reference p_obj.ncs_group[0].selection = ncs_gr.selection to_show = ncs_group_master_phil.format(python_object=p_obj) to_show.show(out=self.log) def show_empty_selection_error_message(ng, where="reference"): print(" Missing or corrupted %s field:" % where, file=self.log) print(" !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!", file=self.log) print(" _ALL_ user-supplied groups will be ignored", file=self.log) print(" !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!", file=self.log) show_particular_ncs_group(ng) # Massage NCS groups # return ncs_phil_groups validated_ncs_groups = [] if ncs_phil_groups is None: return None if(ncs_phil_groups is not None and len(ncs_phil_groups)==0): # print "exiting here" ncs_phil_groups=None return None if (ncs_phil_groups is not None and len(ncs_phil_groups)==1 and ncs_phil_groups[0].reference is None and len(ncs_phil_groups[0].selection) == 1 and ncs_phil_groups[0].selection[0] is None): # This is empty ncs_group definition somehow creeped into here. # Not a big deal. return None if(ncs_phil_groups is not None): print("Validating user-supplied NCS groups...", file=self.log) empty_cntr = 0 for ng in ncs_phil_groups: if ng.reference is None or len(ng.reference.strip())==0: show_empty_selection_error_message(ng, where="reference") empty_cntr += 1 for s in ng.selection: if s is None or len(s.strip())==0: show_empty_selection_error_message(ng, where="selection") empty_cntr += 1 if(empty_cntr>0): print(" !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!", file=self.log) print(" _ALL_ user-supplied groups are ignored.", file=self.log) print(" !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!", file=self.log) ncs_phil_groups=None return None # Verify NCS selections msg="Empty selection in NCS group definition: %s" if not validate_user_supplied_groups: for ncs_group in ncs_phil_groups: print(" Copying user-supplied groups without validation:", file=self.log) show_particular_ncs_group(ncs_group) m_isel = asc.iselection(ncs_group.reference) ng = NCS_restraint_group( master_iselection = m_isel, str_selection = ncs_group.reference) for s_string in ncs_group.selection: c_isel = asc.iselection(s_string) c = NCS_copy( copy_iselection=c_isel, rot = None, tran = None, str_selection=s_string, rmsd=999) ng.append_copy(c) self.ncs_restraints_group_list.append(ng) validated_ncs_groups.append(ng) master_sel = flex.bool(pdb_h.atoms_size(), False) for gr in self.ncs_restraints_group_list: master_sel.set_selected(gr.master_iselection, True) # asu_sites = pdb_h.atoms().extract_xyz().select(master_sel) # self.ncs_restraints_group_list._show(hierarchy=pdb_h,brief=False) # STOP() self.ncs_restraints_group_list.recalculate_ncs_transforms(asu_site_cart=pdb_h.atoms().extract_xyz()) # validated_ncs_groups.recalculate_ncs_transforms(asu_site_cart=pdb_h.atoms().extract_xyz()) else: for ncs_group in ncs_phil_groups: print(" Validating:", file=self.log) show_particular_ncs_group(ncs_group) selection_list = [] # first, check for selections producing 0 atoms user_original_reference_iselection = None user_original_copies_iselections = [] n_atoms_in_user_ncs = 0 s_string = ncs_group.reference if s_string is not None: sel = asc.iselection(s_string) selection_list.append(s_string) n_atoms_in_user_ncs = sel.size() if(n_atoms_in_user_ncs==0): raise Sorry(msg%s_string) user_original_reference_iselection = sel for s_string in ncs_group.selection: if(s_string is not None): sel = asc.iselection(s_string) selection_list.append(s_string) n_copy = sel.size() if(n_copy==0): raise Sorry(msg%s_string) user_original_copies_iselections.append(sel) # # The idea for user's groups is to pick them one by one, # select only reference and selections from the model, # If there are multiple chains in ref or selection - # combine them in one chain, # save atom original i_seq in atom.tmp # run searching procedure for the resulting hierarchy # if the user's selections were more or less OK - there should be # one group, get atom.tmp values for the selected atoms and using # original hierarchy convert them into string selections when needed. # If multiple groups produced - use them, most likely the user # provided something really wrong. # Need to pay some attention to what came out as master and what order # of references. # combined_h = iotbx.pdb.hierarchy.root() combined_h.append_model(iotbx.pdb.hierarchy.model()) all_c_ids = all_chain_ids() cur_ch_id_n = 0 master_chain = self.pdb_h_into_chain(pdb_h.select( user_original_reference_iselection),ch_id=all_c_ids[cur_ch_id_n]) # print "tmp in master chain:", list(master_chain.atoms().extract_tmp_as_size_t()) cur_ch_id_n += 1 combined_h.only_model().append_chain(master_chain) # combined_h = iotbx.pdb.hierarchy.new_hierarchy_from_chain(master_chain) # print "tmp combined_h1:", list(combined_h.atoms().extract_tmp_as_size_t()) for uocis in user_original_copies_iselections: # print "adding selection to combined:", s_string sel_chain = self.pdb_h_into_chain(pdb_h.select( uocis),ch_id=all_c_ids[cur_ch_id_n]) combined_h.only_model().append_chain(sel_chain) cur_ch_id_n += 1 combined_h.reset_atom_i_seqs() # combined_h.write_pdb_file("combined_in_validation.pdb") # print "tmp:", list(combined_h.atoms().extract_tmp_as_size_t()) # XXX Here we will regenerate phil selections using the mechanism # for finding NCS in this module. Afterwards we should have perfectly # good phil selections, and later the object will be created from # them. # Most likely this is not the best way to validate user selections. # selection_list nrgl_fake_iseqs = ncs_search.find_ncs_in_hierarchy( ph=combined_h, chains_info=None, chain_max_rmsd=max(self.params.chain_max_rmsd, 10.0), log=None, chain_similarity_threshold=min(self.params.chain_similarity_threshold, 0.5), residue_match_radius=max(self.params.residue_match_radius, 1000.0)) # hopefully, we will get only 1 ncs group # ncs_group.selection = [] if nrgl_fake_iseqs.get_n_groups() == 0: # this means that user's selection doesn't match # print "ZERO NCS groups found" rejected_msg = " REJECTED because copies don't match good enough.\n" + \ "Try to revise selections or adjust chain_similarity_threshold or \n" + \ "chain_max_rmsd parameters." print(rejected_msg, file=self.log) continue # User triggered the fail of this assert! selections_were_modified = False # for ncs_gr in nrgl_fake_iseqs: new_gr = ncs_gr.deep_copy() new_ncs_group = ncs_group_master_phil.extract().ncs_group[0] for i, isel in enumerate(ncs_gr.get_iselections_list()): m_all_isel = isel.deep_copy() original_m_all_isel = combined_h.atoms().\ select(m_all_isel).extract_tmp_as_size_t() if n_atoms_in_user_ncs > original_m_all_isel.size(): selections_were_modified = True # print "new isels", list(m_all_isel) # print "old isels", list(original_m_all_isel) all_m_select_str = selection_string_from_selection( pdb_h=pdb_h, selection=original_m_all_isel, chains_info=self.chains_info, atom_selection_cache=asc) # print "all_m_select_str", all_m_select_str if i == 0: new_gr.master_iselection = original_m_all_isel new_gr.master_str_selection = all_m_select_str new_ncs_group.reference=all_m_select_str else: new_gr.copies[i-1].iselection = original_m_all_isel new_gr.copies[i-1].str_selection = all_m_select_str new_ncs_group.selection.append(all_m_select_str) self.ncs_restraints_group_list.append(new_gr) new_ncs_group.selection = new_ncs_group.selection[1:] validated_ncs_groups.append(new_ncs_group) # Finally, we may check the number of atoms in selections that will # go further. # XXX Deleted, because this is taken care of previously ok_msg = " OK. All atoms were included in" +\ " validated selection.\n" modified_msg = " MODIFIED. Some of the atoms were excluded from" + \ " your selection.\n The most common reasons are:\n" + \ " 1. Missing residues in one or several copies in NCS group.\n" + \ " 2. Presence of alternative conformations (they are excluded).\n" + \ " 3. Residue mismatch in requested copies.\n" + \ " Please check the validated selection further down.\n" if selections_were_modified: print(modified_msg, file=self.log) self.phil_groups_modified = True else: print(ok_msg, file=self.log) # print "len(validated_ncs_groups)", len(validated_ncs_groups) # for ncs_gr in validated_ncs_groups: # print " reference:", ncs_gr.reference # print " selection:", ncs_gr.selection self.finalize_nrgl() return validated_ncs_groups