def __init__(self, model, ncs_params, ext_groups=None): # create bunch of group objects self.ncs_params = ncs_params self.n_excessive_site_distances = None self.ncs_restraints_group_list = class_ncs_restraints_group_list() if self.ncs_params is None: self.ncs_params = global_ncs_params.extract().ncs if ext_groups is not None: self.groups_list = ext_groups else: self.groups_list = [] ncs_obj = model.get_ncs_obj() if ncs_obj is None: return self.ncs_restraints_group_list = ncs_obj.get_ncs_restraints_group_list( ) ncs_groups_selection_string_list = self.ncs_restraints_group_list.get_array_of_str_selections( ) for i_gr, gr in enumerate(self.ncs_restraints_group_list): n_copies = gr.get_number_of_copies() registry = pair_registry(n_seq=model.get_number_of_atoms(), n_ncs=n_copies + 1) for i_copy, c in enumerate(gr.copies): for i_seq, j_seq in zip(gr.master_iselection, c.iselection): stat, i_diag = registry.enter(i_seq=i_seq, j_seq=j_seq, j_ncs=i_copy + 1) for i_pair, pair in enumerate(registry.selection_pairs()): if (pair[0].size() < 2): detail = [ "do not produce any pairs", "produce only one pair" ][pair[0].size()] raise Sorry("\n".join([ "NCS restraints selections %s of matching atoms:" % detail, " Reference selection: %s" % show_string(self.selection_strings[0]), " Other selection: %s" % show_string(self.selection_strings[i_pair + 1]) ])) g = _group( selection_strings=ncs_groups_selection_string_list[i_gr], registry=registry, u_average_min=1.e-6, ) self.groups_list.append(g)
def __init__( self, hierarchy=None, # XXX warning, ncs_phil_groups can be changed inside... ncs_phil_groups=None, params=None, log=None, ): """ TODO: 1. Transfer get_ncs_info_as_spec() to ncs/ncs.py:ncs Select method to build ncs_group_object order of implementation: 1) ncs_phil_groups - user-supplied definitions are filtered 2) hierarchy only - Performing NCS search Args: ----- ncs_phil_groups: iotbx.phil.parse(ncs_group_phil_str).extract().ncs_group chain_max_rmsd (float): limit of rms difference between chains to be considered as copies min_percent (float): Threshold for similarity between chains similarity define as: (number of matching res) / (number of res in longer chain) chain_similarity_threshold (float): min similarity between matching chains residue_match_radius (float): max allow distance difference between pairs of matching atoms of two residues """ self.number_of_ncs_groups = 0 # consider removing/replacing with function self.ncs_restraints_group_list = class_ncs_restraints_group_list() # keep hierarchy for writing (To have a source of atoms labels) self.hierarchy = hierarchy # residues common to NCS copies. Used for .spec representation self.common_res_dict = {} # Collect messages, recommendation and errors self.messages = '' # Not used outside... self.old_i_seqs = None self.original_hierarchy = None self.truncated_hierarchy = None self.truncated_h_asc = None self.chains_info = None extension = '' # set search parameters self.params = params if self.params is None: self.params = input.get_default_params().ncs_search # if log is None: self.log = sys.stdout else: self.log = log if hierarchy: # for a in hierarchy.atoms(): # print "oo", a.i_seq, a.id_str() # print "=====" hierarchy.atoms().reset_i_seq() self.original_hierarchy = hierarchy.deep_copy() self.original_hierarchy.reset_atom_i_seqs() if self.params.exclude_selection is not None: # pdb_hierarchy_inp.hierarchy.write_pdb_file("in_ncs_pre_before.pdb") cache = hierarchy.atom_selection_cache() sel = cache.selection("not (%s)" % self.params.exclude_selection) self.truncated_hierarchy = hierarchy.select(sel) else: # this could be to save iseqs but I'm not sure self.truncated_hierarchy = hierarchy.select( flex.size_t_range(hierarchy.atoms_size())) self.old_i_seqs = self.truncated_hierarchy.atoms().extract_i_seq() # print "self.old_i_seqs", list(self.old_i_seqs) # self.truncated_hierarchy.atoms().reset_i_seq() self.truncated_hierarchy.reset_atom_i_seqs() self.truncated_h_asc = self.truncated_hierarchy.atom_selection_cache( ) # self.truncated_hierarchy.write_pdb_file("in_ncs_pre_after.pdb") self.chains_info = ncs_search.get_chains_info( self.truncated_hierarchy) if self.truncated_hierarchy.atoms_size() == 0: return # # print "ncs_groups before validation", ncs_phil_groups validated_ncs_phil_groups = None validated_ncs_phil_groups = self.validate_ncs_phil_groups( pdb_h=self.truncated_hierarchy, ncs_phil_groups=ncs_phil_groups, asc=self.truncated_h_asc) if validated_ncs_phil_groups is None: # print "Last chance, building from hierarchy" self.build_ncs_obj_from_pdb_asu(pdb_h=self.truncated_hierarchy, asc=self.truncated_h_asc) # error handling if self.ncs_restraints_group_list.get_n_groups() == 0: print >> self.log, '========== WARNING! ============\n' print >> self.log, ' No NCS relation were found !!!\n' print >> self.log, '================================\n' if self.messages != '': print >> self.log, self.messages
def shortcut_1(hierarchy, chains_info, chain_similarity_threshold, chain_max_rmsd, log, residue_match_radius): """ Checking the case when whole hierarchy was produced by multiplication of molecule with BIOMT or MTRIX matrices (or both). In this case we are expecting to find identical chains with 0 rmsd between them. """ def flatten_list_of_list(lofl): return [x for y in lofl for x in y] assert chains_info is not None assert len(chains_info) > 1 empty_result = class_ncs_restraints_group_list() # new convenience structure: {<n_atoms>:[ch_id, ch_id, ch_id]} n_atom_chain_id_dict = {} for k, v in six.iteritems(chains_info): if v.chains_atom_number not in n_atom_chain_id_dict: n_atom_chain_id_dict[v.chains_atom_number] = [k] else: n_atom_chain_id_dict[v.chains_atom_number].append(k) print("n_atom_chain_id_dict", n_atom_chain_id_dict, file=log) for k, v in six.iteritems(n_atom_chain_id_dict): if len(v) == 1: print("No shortcut, there is a chain with unique number of atoms:", v, file=log) return empty_result # now we starting to check atom names, align chains, check rmsd and # populate result. If at some point we are not satisfied with any measure, # we will return empty result. result = class_ncs_restraints_group_list() for n_atoms, chains_list in six.iteritems(n_atom_chain_id_dict): # this should make one ncs group master_chain_id = chains_list[0] master_iselection = flatten_list_of_list( chains_info[master_chain_id].atom_selection) ncs_gr = NCS_restraint_group( master_iselection=flex.size_t(master_iselection), str_selection="chain '%s'" % master_chain_id) master_xyz = get_chain_xyz(hierarchy, master_chain_id) for copy_chain_id in chains_list[1:]: # these are copies if chains_info[master_chain_id].atom_names != chains_info[ copy_chain_id].atom_names: print("No shortcut, atom names are not identical", file=log) return empty_result copy_iselection = flatten_list_of_list( chains_info[copy_chain_id].atom_selection) copy_xyz = get_chain_xyz(hierarchy, copy_chain_id) lsq_fit_obj = superpose.least_squares_fit(reference_sites=copy_xyz, other_sites=master_xyz) r = lsq_fit_obj.r t = lsq_fit_obj.t rmsd = copy_xyz.rms_difference(lsq_fit_obj.other_sites_best_fit()) print("rmsd", master_chain_id, copy_chain_id, rmsd, file=log) # # XXX should we compare rmsd to chain_max_rmsd to be more relaxed and # process more structures quickly? # if rmsd is None or rmsd > 0.2: print("No shortcut, low rmsd:", rmsd, "for chains", master_chain_id, copy_chain_id, file=log) return empty_result # seems like a good enough copy c = NCS_copy(copy_iselection=flex.size_t(copy_iselection), rot=r, tran=t, str_selection="chain '%s'" % copy_chain_id, rmsd=rmsd) ncs_gr.append_copy(c) result.append(ncs_gr) print("Shortcut complete.", file=log) return result
def ncs_grouping_and_group_dict(match_dict, hierarchy): """ The implementation of simplest way to do NCS grouping. Maximum one chain in selection. Do the job of minimal_master_ncs_grouping/minimal_ncs_operators_grouping. """ ncs_restraints_group_list = class_ncs_restraints_group_list() preliminary_ncs_groups = get_preliminary_ncs_groups(match_dict) # now we need to just transform preliminary_ncs_groups using match_dict # into ncs_restraints_group_list. This means that for every dict in preliminary_ncs_groups # we need to determine master, and find out rot and transl functions for all # the rest chains (selections). Master is going to be the first in # alphabetical order. for prel_gr_dict in preliminary_ncs_groups: # print "===============" sorted_gr_chains = sorted(prel_gr_dict.keys()) # master should be the chain with minimal number of selected atoms # just to make it easier filter out the rest of chains # print "sorted_gr_chains", sorted_gr_chains # print "prel_gr_dict", prel_gr_dict min_n_atoms = 1e100 master = None for ch in sorted_gr_chains: sel, _, _ = get_info_from_match_dict(match_dict, prel_gr_dict[ch], ch) if sel.size() < min_n_atoms: min_n_atoms = sel.size() master = ch assert master is not None # print "selected master first:", master # second option to master selection: # let's try to select common chain to be a master. I'm not sure that this # will be always possible though # also, we should try to determine the smallest selection for the master # chain straight away all_pairs = list(prel_gr_dict.values()) left = set(all_pairs[0]) # print "left", left # print "all_pairs", all_pairs # FIXME indexing dict.values order changes with py2/3 for i in all_pairs[1:]: left = left & set(i) # should be 1 (a lot of chains) or 2 (if there only 2 chains) # if len if len(left) == 0: # means that all something like # all_pairs = [('chain C', 'chain E'), ('chain A', 'chain E'), # ('chain A', 'chain C')] # any should work then?... # master = all_pairs[0][0] master = sorted_gr_chains[0] # assert len(left) > 0 # print "left", left elif len(left) > 1: master = sorted(left)[0] else: master = left.pop() # selecting smallest master key - for no reason actually key_with_smallest_selection = None len_of_smallest_selection = 1e100 for ch, key in six.iteritems(prel_gr_dict): # print "ch, master, key:", ch, master, key if master in key: master_sel, master_res, master_rmsd = get_info_from_match_dict( match_dict, key, master) if master_sel.size() < len_of_smallest_selection: len_of_smallest_selection = master_sel.size() key_with_smallest_selection = key # print "key_with_smallest_selection, len_of_smallest_selection",key_with_smallest_selection, len_of_smallest_selection # print "selected master second:", master assert master is not None assert master in key_with_smallest_selection, "%s, %s" % ( master, key_with_smallest_selection) # # Let's do intersection of all master selection to determine # the minimum selection suitable to all copies. min_master_selection = None for ch, key in six.iteritems(prel_gr_dict): if master in key: master_sel, master_res, master_rmsd = get_info_from_match_dict( match_dict, key, master) if min_master_selection is None: min_master_selection = master_sel else: min_master_selection = min_master_selection.intersection( master_sel) # print "size of min_master_selection", min_master_selection.size() # create a new group g = NCS_restraint_group(master_iselection=min_master_selection, str_selection=None) for ch_copy in sorted_gr_chains: # print "ch_copy", ch_copy master_size = min_master_selection.size() copy_sel, copy_res, m_sel = get_copy_master_selections_from_match_dict( match_dict, prel_gr_dict, master, ch_copy) if copy_sel is None: # print " Continue" continue new_copy_sel = copy_sel new_master_sel = min_master_selection if copy_sel.size() > min_master_selection.size(): # clean copy sel # print "copy is bigger", copy_sel.size(), min_master_selection.size() filter_sel = get_bool_selection_to_keep( big_selection=m_sel, small_selection=min_master_selection) new_copy_sel = copy_sel.select(filter_sel) elif copy_sel.size() < min_master_selection.size(): assert 0, "This should never be the case" if new_master_sel.size() > 0 and new_copy_sel.size() > 0: r, t, copy_rmsd = my_get_rot_trans( ph=hierarchy, master_selection=new_master_sel, copy_selection=new_copy_sel, master_chain_id=master, copy_chain_id=ch_copy) c = NCS_copy(copy_iselection=new_copy_sel, rot=r, tran=t, str_selection=None, rmsd=copy_rmsd) g.append_copy(c) assert master_size == new_copy_sel.size(), "%d %d" % ( master_size, new_copy_sel.size()) ncs_restraints_group_list.append(g) return ncs_restraints_group_list