def __init__(self, fixed_ref_atoms, moving_h, moving_ref_atoms_iseqs, max_number_of_iterations=500, needed_rmsd=0.1): """ fixed_ref_atoms - list of 3 atom objects, actually, only xyz's are needed moving_ref_atoms_iseqs - list of 3 indeces matching atoms in moving_h.atoms()[<here!>]. moving_h - hierarchy to make closure. Atom positions in it will be changed! """ assert len(fixed_ref_atoms) == 3 assert len(moving_ref_atoms_iseqs) == 3 assert moving_h is not None assert moving_h.atoms_size() > 10 # arbitrary # adopt_init_args(self, locals()) self.moving_h = moving_h self.fixed_ref_atoms = fixed_ref_atoms self.moving_ref_atoms_iseqs = moving_ref_atoms_iseqs self.max_number_of_iterations = max_number_of_iterations self.needed_rmsd = needed_rmsd self.set_modify_angle_procedure(self._modify_angle) self.r = rama_eval() # self.states = mmtbx.utils.states(pdb_hierarchy=moving_h) self.convergence_diff = 1e-5 # will be bool, True if converged before max_number_of_iterations reached self.early_exit = None self.resulting_rmsd = None
def get_all_starting_conformations(moving_h, change_radius, cutoff=50, log=null_out()): variants = [] r = rama_eval() phi_psi_atoms = utils.get_phi_psi_atoms(moving_h) n_rama = len(phi_psi_atoms) change_angles = range((n_rama)//2-change_radius, (n_rama)//2+change_radius+1) # print " change_angles", change_angles for i, (phi_psi_pair, rama_key) in enumerate(phi_psi_atoms): if i in change_angles or (utils.rama_evaluate(phi_psi_pair, r, rama_key) == ramalyze.RAMALYZE_OUTLIER): if utils.rama_evaluate(phi_psi_pair, r, rama_key) == ramalyze.RAMALYZE_OUTLIER: vs = get_sampled_rama_favored_angles(rama_key, r) else: vs = ramalyze.get_favored_regions(rama_key) variants.append(vs) # variants.append(ramalyze.get_favored_regions(rama_key)) else: variants.append([(None, None)]) print >> log, "variants", variants all_angles_combination = list(itertools.product(*variants)) result = [] i = 0 n_added = 0 n_all_combination = len(all_angles_combination) i_max = min(cutoff, n_all_combination) while n_added < i_max: comb = all_angles_combination[i] if is_not_none_combination(comb): result.append(set_rama_angles(moving_h, list(comb))) print >> log, "Model %d, angles:" % i, comb n_added += 1 i += 1 # STOP() return result
def get_starting_conformations(moving_h, cutoff=50, log=null_out()): """ modify only ramachandran outliers. """ variants = [] r = rama_eval() phi_psi_atoms = utils.get_phi_psi_atoms(moving_h) for phi_psi_pair, rama_key in phi_psi_atoms: if (utils.rama_evaluate(phi_psi_pair, r, rama_key) == ramalyze.RAMALYZE_OUTLIER): vs = get_sampled_rama_favored_angles(rama_key, r) # print len(vs) # print vs # STOP() variants.append(vs) # variants.append(ramalyze.get_favored_regions(rama_key)) else: variants.append([(None, None)]) result = [] print >> log, "variants", variants if variants.count([(None, None)]) == len(variants): print "Nothing to CCD" return result all_angles_combination = list(itertools.product(*variants)) i = 0 n_added = 0 n_all_combination = len(all_angles_combination) i_max = min(cutoff, n_all_combination) while n_added < i_max: comb = all_angles_combination[i] if is_not_none_combination(comb): result.append(set_rama_angles(moving_h, list(comb))) print >> log, "Model %d, angles:" % i, comb n_added += 1 i += 1 return result
def get_all_starting_conformations(moving_h, change_radius, n_outliers, direction_forward=True, cutoff=50, change_all=True, log=null_out(), check_omega=False): if log is None: log = StringIO() variants = [] result = [] r = rama_eval() phi_psi_atoms = utils.get_phi_psi_atoms(moving_h, omega=True) # print "N residue groups in h", [x.resseq for x in moving_h.residue_groups()] if len(phi_psi_atoms) == 0: print >> log, "Strange input to starting conformations!!!" return result n_rama = len(phi_psi_atoms) # print "n_rama", n_rama change_angles = [None] if change_all: change_angles = range( (n_rama) // 2 - change_radius - n_outliers // 2, (n_rama) // 2 + change_radius + 1 + n_outliers // 2) # if change_angles[0] < 0: # change_angles = range(change_angles[-1]-change_angles[0]) has_twisted = False if check_omega: omegas = [x[2] for x in phi_psi_atoms] for o in omegas: if o is not None and abs(abs(o) - 180) > 30: has_twisted = True print >> log, "n_outliers", n_outliers for i, (phi_psi_pair, rama_key, omega) in enumerate(phi_psi_atoms): angle_is_outlier = utils.rama_evaluate( phi_psi_pair, r, rama_key) == ramalyze.RAMALYZE_OUTLIER twisted = omega is not None and ((abs(abs(omega) - 180) > 30) and check_omega) print >> log, "in cycle, N, outlier?, change?, twisted?", i, angle_is_outlier, i in change_angles, twisted if angle_is_outlier and n_outliers < 3: vs = get_sampled_rama_favored_angles(rama_key, r) elif (i in change_angles) or angle_is_outlier or has_twisted: # vs = get_sampled_rama_favored_angles(rama_key, r) vs = ramalyze.get_favored_regions(rama_key) else: vs = [(None, None)] variants.append(vs) print >> log, "variants", variants all_angles_combination = list(itertools.product(*variants)) # filter none combinations # print "len(all_angles_combination)", len(all_angles_combination) all_angles_combination_f = [] for comb in all_angles_combination: if is_not_none_combination(comb): all_angles_combination_f.append(comb) print >> log, "len(all_angles_combination_f)", len( all_angles_combination_f) return all_angles_combination_f
def list_rama_outliers_h(hierarchy, r=None, include_allowed=False): if r is None: r = rama_eval() phi_psi_atoms = get_phi_psi_atoms(hierarchy) outp = list_rama_outliers(phi_psi_atoms, r, include_allowed=include_allowed) return outp
def extract_proxies(self, hierarchy): self.hierarchy = hierarchy selected_h = hierarchy.select(self.bool_atom_selection) n_seq = flex.max(selected_h.atoms().extract_i_seq()) # Drop all previous proxies self._oldfield_proxies = ext.shared_phi_psi_proxy() self._emsley_proxies = ext.shared_phi_psi_proxy() # it would be great to save rama_eval, but the fact that this is called in # pdb_interpretation, not in mmtbx.model makes it impossible if self.need_filtering: self.rama_eval = rama_eval() for three in generate_protein_threes(hierarchy=selected_h, geometry=None): rc = three.get_phi_psi_atoms() if rc is None: continue rama_key = three.get_ramalyze_key() if self.need_filtering: angles = three.get_phi_psi_angles() rama_score = self.rama_eval.get_score(rama_key, angles[0], angles[1]) r_evaluation = self.rama_eval.evaluate_score( rama_key, rama_score) phi_atoms, psi_atoms = rc i_seqs = [atom.i_seq for atom in phi_atoms] + [psi_atoms[-1].i_seq] resnames = three.get_resnames() r_name = resnames[1] assert rama_key in range(6) text_rama_key = ramalyze.res_types[rama_key] assert text_rama_key in [ "general", "glycine", "cis-proline", "trans-proline", "pre-proline", "isoleucine or valine" ] proxy = ext.phi_psi_proxy(residue_name=r_name, residue_type=text_rama_key, i_seqs=i_seqs) # pick where to put... if self.params.rama_potential == "oldfield": if self.need_filtering: if r_evaluation == ramalyze.RAMALYZE_FAVORED: self.append_oldfield_proxies(proxy, n_seq) elif r_evaluation == ramalyze.RAMALYZE_ALLOWED and self.params.restrain_rama_allowed: self.append_oldfield_proxies(proxy, n_seq) elif r_evaluation == ramalyze.RAMALYZE_OUTLIER and self.params.restrain_rama_outliers: self.append_oldfield_proxies(proxy, n_seq) elif self.params.restrain_allowed_outliers_with_emsley: self.append_emsley_proxies(proxy, n_seq) else: self.append_oldfield_proxies(proxy, n_seq) else: # self.params.rama_potential == "emsley": self.append_emsley_proxies(proxy, n_seq) print("", file=self.log) print(" %d Ramachandran restraints generated." % (self.get_n_proxies()), file=self.log) print(" %d Oldfield and %d Emsley." % (self.get_n_oldfield_proxies(), self.get_n_emsley_proxies()), file=self.log)
def get_sampled_rama_favored_angles(rama_key, r=None, step=20): if r is None: r = rama_eval() result = [] for i in range(-180, 180, step): for j in range(-180, 180, step): score = r.evaluate_angles(ramalyze.res_types[rama_key], i,j) r_ev = ramalyze.ramalyze.evalScore(ramalyze.res_types[rama_key], score) if r_ev == ramalyze.RAMALYZE_FAVORED: result.append((i,j)) return result
def rama_outliers_selection(hierarchy, r=None, margin=1): if r is None: r = rama_eval() out_sel = [] phi_psi_atoms = get_phi_psi_atoms(hierarchy) for phi_psi_pair, rama_key in phi_psi_atoms: rama_score = get_rama_score(phi_psi_pair, r, rama_key) if rama_evaluate(phi_psi_pair, r, rama_key) == ramalyze.RAMALYZE_OUTLIER: out_sel.append(pair_selection(phi_psi_pair, margin)) out_sel_txt = " or ".join(out_sel) return out_sel_txt
def __init__(self, pdb_h, mediocre_hbond_cutoff=3.0, bad_hbond_cutoff=3.5, rama_eval_manager=None): self.pdb_h = pdb_h self.bad_hbond_cutoff = bad_hbond_cutoff self.mediocre_hbond_cutoff = mediocre_hbond_cutoff self.asc = self.pdb_h.atom_selection_cache() self.atoms = pdb_h.atoms() self.r = rama_eval_manager if self.r is None: self.r = rama_eval()
def rama_score_selection(hierarchy, r=None, score="outlier", margin=1): assert score in ["outlier", "allowed"] test = ramalyze.RAMALYZE_OUTLIER if score == "allowed": test = ramalyze.RAMALYZE_ALLOWED if r is None: r = rama_eval() out_sel = [] phi_psi_atoms = get_phi_psi_atoms(hierarchy) for phi_psi_pair, rama_key in phi_psi_atoms: rama_score = get_rama_score(phi_psi_pair, r, rama_key) if rama_evaluate(phi_psi_pair, r, rama_key) == test: out_sel.append(pair_selection(phi_psi_pair, margin)) out_sel_txt = " or ".join(out_sel) return out_sel_txt
def __init__(self, pdb_input, cif_objects=None, params=None, log=sys.stdout, verbose=True): t_0 = time() self.pdb_input = pdb_input self.cif_objects = cif_objects self.params = params self.log = log self.verbose = verbose self.rmsd_from_start = None self.init_model_statistics = None self.after_ss_idealization = None self.after_loop_idealization = None self.after_rotamer_fixing = None self.final_model_statistics = None self.reference_map = None self.whole_grm = None self.master_grm = None self.working_grm = None self.mon_lib_srv = None self.ener_lib = None self.rotamer_manager = None self.rama_manager = rama_eval() self.original_hierarchy = None # original pdb_h, without any processing self.original_boxed_hierarchy = None # original and boxed (if needed) self.whole_pdb_h = None # boxed with processing (AC trimming, H trimming,...) self.master_pdb_h = None # master copy in case of NCS self.working_pdb_h = None # one to use for fixing (master_pdb_h or working_pdb_h) # various checks, shifts, trims self.cs = self.pdb_input.crystal_symmetry() # check self.cs (copy-paste from secondary_sturcure_restraints) corrupted_cs = False if self.cs is not None: if [self.cs.unit_cell(), self.cs.space_group()].count(None) > 0: corrupted_cs = True self.cs = None elif self.cs.unit_cell().volume() < 10: corrupted_cs = True self.cs = None self.original_hierarchy = self.pdb_input.construct_hierarchy() # couple checks if pdb_h is ok o_c = self.original_hierarchy.overall_counts() o_c.raise_duplicate_atom_labels_if_necessary() o_c.raise_residue_groups_with_multiple_resnames_using_same_altloc_if_necessary() o_c.raise_chains_with_mix_of_proper_and_improper_alt_conf_if_necessary() o_c.raise_improper_alt_conf_if_necessary() if len(self.original_hierarchy.models()) > 1: raise Sorry("Multi model files are not supported") ca_only_present = False for c in self.original_hierarchy.only_model().chains(): if c.is_ca_only(): ca_only_present = True if ca_only_present: raise Sorry("Don't support models with chains containing only CA atoms.") self.original_boxed_hierarchy = self.original_hierarchy.deep_copy() self.original_boxed_hierarchy.reset_atom_i_seqs() self.shift_vector = None if self.cs is None: if corrupted_cs: print >> self.log, "Symmetry information is corrupted, " else: print >> self.log, "Symmetry information was not found, " print >> self.log, "putting molecule in P1 box." self.log.flush() from cctbx import uctbx atoms = self.original_boxed_hierarchy.atoms() box = uctbx.non_crystallographic_unit_cell_with_the_sites_in_its_center( sites_cart=atoms.extract_xyz(), buffer_layer=3) atoms.set_xyz(new_xyz=box.sites_cart) self.cs = box.crystal_symmetry() self.shift_vector = box.shift_vector # self.original_boxed_hierarchy.write_pdb_file(file_name="original_boxed_h.pdb") if self.shift_vector is not None: write_whole_pdb_file( file_name="%s_boxed.pdb" % self.params.output_prefix, pdb_hierarchy=self.original_boxed_hierarchy, crystal_symmetry=self.cs, ss_annotation=self.pdb_input.extract_secondary_structure()) asc = self.original_boxed_hierarchy.atom_selection_cache() if self.params.trim_alternative_conformations: sel = asc.selection("altloc ' '") self.whole_pdb_h = self.original_boxed_hierarchy.select(sel).deep_copy() print >> self.log, "Atoms in original/working model: %d/%d" % ( self.original_boxed_hierarchy.atoms_size(), self.whole_pdb_h.atoms_size()) else: self.whole_pdb_h = self.original_boxed_hierarchy.deep_copy() # self.whole_pdb_h.reset_atom_i_seqs() # Trimming hydrogens # Many intermediate variables are needed due to strange behavior of # selections described in # iotbx/pdb/tst_hierarchy.py:exercise_selection_and_deep_copy() asc2 = self.whole_pdb_h.atom_selection_cache() h_sel = asc2.selection("not (element H or element D)") temp_h = self.whole_pdb_h.select(h_sel) self.whole_pdb_h = temp_h.deep_copy() self.whole_pdb_h.reset_atom_i_seqs() self.init_model_statistics = geometry_no_grm( pdb_hierarchy=iotbx.pdb.input( source_info=None, lines=self.whole_pdb_h.as_pdb_string()).construct_hierarchy(), molprobity_scores=True) self.time_for_init = time()-t_0
def list_rama_outliers_h(hierarchy, r=None): if r is None: r = rama_eval() phi_psi_atoms = get_phi_psi_atoms(hierarchy) outp = list_rama_outliers(phi_psi_atoms, r) return outp
def __init__(self, pdb_hierarchy, params=None, log=sys.stdout, proxies=None, tables=None, initialize=True): assert pdb_hierarchy is not None assert not pdb_hierarchy.atoms().extract_i_seq().all_eq(0), ""+\ "Probably all atoms have i_seq = 0 which is wrong" if params is None: # print ('init, params is None') w_params = master_phil.fetch().extract() w_params = w_params.ramachandran_plot_restraints elif hasattr(params, 'enabled'): # print ("init, hasattr(params, 'enabled')") # New params w_params = params elif hasattr(params, 'ramachandran_plot_restraints'): # print ("init, hasattr(params, 'ramachandran_plot_restraints'") # print ("init, ", type(params), type(params.ramachandran_plot_restraints), params.ramachandran_plot_restraints) w_params = params.ramachandran_plot_restraints else: # print ("init, else") w_params = master_phil.fetch().extract() w_params = w_params.ramachandran_plot_restraints # old params, make transfer w_params.selection = params.rama_selection # oldfield w_params.enabled = True w_params.oldfield.weight = \ params.oldfield.weight if (params.oldfield.weight is None or params.oldfield.weight > 0) else 0 w_params.oldfield.weight_scale = \ 1/(params.oldfield.esd**2) * params.oldfield.weight_scale w_params.oldfield.distance_weight_min = 2.0 w_params.oldfield.distance_weight_max = params.oldfield.dist_weight_max # emsley w_params.emsley.weight = params.rama_weight w_params.emsley.scale_allowed = params.scale_allowed # strategy if params.rama_potential == 'oldfield': pass elif params.rama_potential == 'emsley': w_params.favored = 'emsley' w_params.allowed = 'emsley' w_params.outlier = 'emsley' if params.restrain_rama_outliers: w_params.outlier = params.rama_potential else: w_params.outlier = None if params.restrain_rama_allowed: w_params.allowed = params.rama_potential else: w_params.allowed = None if params.restrain_allowed_outliers_with_emsley: if not params.restrain_rama_allowed: w_params.allowed = 'emsley' if not params.restrain_rama_outliers: w_params.outlier = 'emsley' self.params = w_params self.rama_eval = rama_eval() self.hierarchy = pdb_hierarchy # only for def select() self.log = log self._oldfield_proxies = ext.shared_phi_psi_proxy() self._emsley_proxies = ext.shared_phi_psi_proxy() self._emsley8k_proxies = ext.shared_phi_psi_proxy() self._phi_psi_2_proxies = ext.shared_phi_psi_proxy() self._oldfield_tables = None self._emsley_tables = None self._emsley8k_tables = None self._phi_psi_2_tables = None if proxies is not None: self._oldfield_proxies, \ self._emsley_proxies, \ self._emsley8k_proxies, \ self._phi_psi_2_proxies = proxies if tables is not None: self._oldfield_tables, \ self._emsley_tables, \ self._emsley8k_tables, \ self._phi_psi_2_tables = tables self.initialize = initialize # bad hack to keep emsley potential in working(?) condition after # changing from rama500 to rama8000 self.new_to_old_conversion = { "general": "ala", "glycine": "gly", "cis-proline": "pro", "trans-proline": "pro", "pre-proline": "prepro", "isoleucine or valine": "ala" } bool_atom_selection = self._determine_bool_atom_selection( pdb_hierarchy) fao = [self.params.favored, self.params.allowed, self.params.outlier] if initialize: if 'oldfield' in fao: self._oldfield_tables = ramachandran_plot_data( plot_cutoff=self.params.oldfield.plot_cutoff) if 'emsley' in fao: self._emsley_tables = load_tables() # ### THIS IS CRUEL. REMOVE ONCE favored/allowed/outlier are made multiple! # if 'emsley8k' in fao or self.params.inject_emsley8k_into_oldfield_favored: self._emsley8k_tables = load_emsley8k_tables() if 'phi_psi_2' in fao: self._phi_psi_2_tables = load_phi_psi_2_tables() # get proxies self.extract_proxies(pdb_hierarchy) if 'oldfield' in fao: self.target_phi_psi = self.update_phi_psi_targets_on_init( hierarchy=pdb_hierarchy) self.initialize = False
def extract_proxies(self, hierarchy): def _get_motifs(): from phenix.programs.phi_psi_2 import results_manager as pp2 pp2_manager = pp2(model=None, log=self.log) phi_psi_2_motifs = pp2_manager.get_overall_motif_count_and_output( None, self.hierarchy, return_rama_restraints=True, ) return phi_psi_2_motifs phi_psi_2_motifs = None favored = ramalyze.RAMALYZE_FAVORED allowed = ramalyze.RAMALYZE_ALLOWED outlier = ramalyze.RAMALYZE_OUTLIER self.hierarchy = hierarchy bool_atom_selection = self._determine_bool_atom_selection(hierarchy) selected_h = hierarchy.select(bool_atom_selection) n_seq = flex.max(selected_h.atoms().extract_i_seq()) # Drop all previous proxies self._oldfield_proxies = ext.shared_phi_psi_proxy() self._emsley_proxies = ext.shared_phi_psi_proxy() self._emsley8k_proxies = ext.shared_phi_psi_proxy() self._phi_psi_2_proxies = ext.shared_phi_psi_proxy() # it would be great to save rama_eval, but the fact that this is called in # pdb_interpretation, not in mmtbx.model makes it impossible self.rama_eval = rama_eval() outl = [] for three in generate_protein_threes(hierarchy=selected_h, geometry=None): rc = three.get_phi_psi_atoms() if rc is None: continue rama_key = three.get_ramalyze_key() angles = three.get_phi_psi_angles() rama_score = self.rama_eval.get_score(rama_key, angles[0], angles[1]) r_eval = self.rama_eval.evaluate_score(rama_key, rama_score) phi_atoms, psi_atoms = rc i_seqs = [atom.i_seq for atom in phi_atoms] + [psi_atoms[-1].i_seq] resnames = three.get_resnames() r_name = resnames[1] assert rama_key in range(6) text_rama_key = ramalyze.res_types[rama_key] assert text_rama_key in [ "general", "glycine", "cis-proline", "trans-proline", "pre-proline", "isoleucine or valine" ] # pick where to put... ev_match_dict = { favored: self.params.favored, allowed: self.params.allowed, outlier: self.params.outlier } r_type = ev_match_dict[r_eval] if r_type == 'oldfield': proxy = ext.phi_psi_proxy(residue_type=text_rama_key, i_seqs=i_seqs, weight=1) # XXX Not used in oldfield self.append_oldfield_proxies(proxy, n_seq) ### THIS IS CRUEL. REMOVE ONCE favored/allowed/outlier are made multiple! if (self.params.inject_emsley8k_into_oldfield_favored): proxy = ext.phi_psi_proxy(residue_type=text_rama_key, i_seqs=i_seqs, weight=5) self.append_emsley8k_proxies(proxy, n_seq) ### elif r_type == 'emsley': weight = self.params.emsley.weight proxy = ext.phi_psi_proxy(residue_type=text_rama_key, i_seqs=i_seqs, weight=weight) self.append_emsley_proxies(proxy, n_seq) elif r_type == 'emsley8k': if (r_eval is favored): weight = self.params.emsley8k.weight_favored elif (r_eval is allowed): weight = self.params.emsley8k.weight_allowed elif (r_eval is outlier): weight = self.params.emsley8k.weight_outlier else: raise RuntimeError("Rama eveluation failed.") proxy = ext.phi_psi_proxy(residue_type=text_rama_key, i_seqs=i_seqs, weight=weight) self.append_emsley8k_proxies(proxy, n_seq) elif r_type == 'phi_psi_2': from phenix.pdb_tools.phi_psi_2_data import get_phi_psi_key_for_rama_proxy if phi_psi_2_motifs is None: phi_psi_2_motifs = _get_motifs() if (r_eval is favored): strategy = self.params.phi_psi_2.favored_strategy elif (r_eval is allowed): strategy = self.params.phi_psi_2.allowed_strategy elif (r_eval is outlier): strategy = self.params.phi_psi_2.outlier_strategy else: raise RuntimeError("Rama eveluation failed.") if strategy == 'closest': strategy += '_%0.1f_%0.1f' % tuple( three.get_phi_psi_angles()) pp2_key = get_phi_psi_key_for_rama_proxy( phi_psi_2_motifs, three, strategy=strategy, ) if pp2_key is None: continue weight = 1 proxy = ext.phi_psi_proxy(residue_type=pp2_key, i_seqs=i_seqs, weight=weight) outl.append([proxy.residue_type, three]) self.append_phi_psi_2_proxies(proxy, n_seq) elif (r_type is None): pass else: raise RuntimeError("Not an option: %s" % str(r_type)) print("", file=self.log) print(" %d Ramachandran restraints generated." % (self.get_n_proxies()), file=self.log) print(" %d Oldfield, %d Emsley, %d emsley8k and %d Phi/Psi/2." % (self.get_n_oldfield_proxies(), self.get_n_emsley_proxies(), self.get_n_emsley8k_proxies(), self.get_n_phi_psi_2_proxies()), file=self.log) if outl: print(' Rama restraints by Phi/Psi/2') for pp2, three in outl: print(' %s : %s' % (three[1].id_str(), pp2.split('|')[0]), file=self.log)
def __init__(self): self.rama_eval = rama_eval()
def __init__(self, pdb_hierarchy, params=None, secondary_structure_annotation=None, reference_map=None, crystal_symmetry=None, grm=None, rama_manager=None, rotamer_manager=None, log=null_out(), verbose=False, tried_rama_angles={}, tried_final_rama_angles={}, n_run=0): if len(pdb_hierarchy.models()) > 1: raise Sorry("Multi-model files are not supported") self.original_pdb_h = pdb_hierarchy self.secondary_structure_annotation=secondary_structure_annotation asc = pdb_hierarchy.atom_selection_cache() self.xrs = pdb_hierarchy.extract_xray_structure(crystal_symmetry=crystal_symmetry) self.reference_map = reference_map self.resulting_pdb_h = pdb_hierarchy.deep_copy() self.resulting_pdb_h.reset_atom_i_seqs() self.params = self.process_params(params) self.log = log self.verbose = verbose self.grm = grm self.r = rama_manager self.ideal_res_dict = idealized_aa.residue_dict() self.n_run = n_run if self.r is None: self.r = rama_eval() self.rotamer_manager = rotamer_manager if self.rotamer_manager is None: self.rotamer_manager = RotamerEval() ram = ramalyze.ramalyze(pdb_hierarchy=pdb_hierarchy) self.p_initial_rama_outliers = ram.out_percent self.p_before_minimization_rama_outliers = None self.p_after_minimiaztion_rama_outliers = None n_inputs = [reference_map, crystal_symmetry].count(None) if not (n_inputs == 0 or n_inputs == 2): print >> log, "Need to have both map and symmetry info. Not using map." self.reference_map = None # here we are recording what CCD solutions were used to fix particular # outliers to not use the same in the next CCD try. # Nested dict. First level: # key: chain id, value: dict # key: resid (string), value: list of tried variants. self.tried_rama_angles = tried_rama_angles self.tried_final_rama_angles = tried_final_rama_angles berkeley_count = utils.list_rama_outliers_h(self.resulting_pdb_h).count("\n") self.berkeley_p_before_minimization_rama_outliers = \ berkeley_count/float(self.resulting_pdb_h.overall_counts().n_residues)*100 n_bad_omegas = utils.n_bad_omegas(self.resulting_pdb_h) self.berkeley_p_after_minimiaztion_rama_outliers = self.berkeley_p_before_minimization_rama_outliers self.ref_exclusion_selection = "" self.number_of_ccd_trials = 0 # print "logic expr outcome:", (self.number_of_ccd_trials < 10 and self.berkeley_p_before_minimization_rama_outliers > 0.001) # print self.number_of_ccd_trials < 10 # print "berkeley before rama out:", self.berkeley_p_before_minimization_rama_outliers if (self.berkeley_p_before_minimization_rama_outliers <= 0.001 and (n_bad_omegas<1 and self.params.make_all_trans)): print >> self.log, "No ramachandran outliers, skipping CCD step." print "n_bad_omegas", n_bad_omegas print "self.params.make_all_trans",self.params.make_all_trans if not self.params.enabled: print >> self.log, "Loop idealization is not enabled, use 'enabled=True'." while (self.number_of_ccd_trials < self.params.number_of_ccd_trials and (self.berkeley_p_after_minimiaztion_rama_outliers > 0.001 or (n_bad_omegas>=1 and self.params.make_all_trans)) and self.params.enabled): print >> self.log, "CCD try number, outliers:", self.number_of_ccd_trials, self.berkeley_p_before_minimization_rama_outliers processed_chain_ids = [] for chain in self.resulting_pdb_h.only_model().chains(): if chain.id not in self.tried_rama_angles.keys(): self.tried_rama_angles[chain.id] = {} if chain.id not in self.tried_final_rama_angles.keys(): self.tried_final_rama_angles[chain.id] = {} print >> self.log, "Idealizing chain %s" % chain.id if chain.id not in processed_chain_ids: processed_chain_ids.append(chain.id) else: continue selection = "protein and chain %s and (name N or name CA or name C or name O)" % chain.id sel = asc.selection("chain %s" % chain.id) chain_h = self.resulting_pdb_h.select(sel) m = chain_h.only_model() i = 0 cutted_chain_h = None for c in m.chains(): if i == 0: cutted_chain_h = iotbx.pdb.hierarchy.new_hierarchy_from_chain(c) else: print >> self.log, "WARNING!!! Duplicating chain ids! Only the first chain will be processed." print >> self.log, " Removing chain %s with %d residues" % (c.id, len(c.residues())) m.remove_chain(c) i += 1 exclusions, ch_h = self.idealize_chain( hierarchy=(cutted_chain_h if cutted_chain_h else chain_h), tried_rama_angles_for_chain=self.tried_rama_angles[chain.id], tried_final_rama_angles_for_chain=self.tried_final_rama_angles[chain.id]) if ch_h is not None: set_xyz_smart( # dest_h=self.resulting_pdb_h, dest_h=chain, source_h=ch_h) for resnum in exclusions: selection += " and not resseq %s" % resnum self.ref_exclusion_selection += "(%s) or " % selection print "self.tried_rama_angles", self.tried_rama_angles print "self.tried_final_rama_angles", self.tried_final_rama_angles # # dumping and reloading hierarchy to do proper rounding of coordinates self.resulting_pdb_h = iotbx.pdb.input( source_info=None, lines=self.resulting_pdb_h.as_pdb_string()).construct_hierarchy() berkeley_count = utils.list_rama_outliers_h(self.resulting_pdb_h).count("\n") self.berkeley_p_before_minimization_rama_outliers = \ berkeley_count/float(self.resulting_pdb_h.overall_counts().n_residues)*100 if len(self.ref_exclusion_selection) > 0: self.ref_exclusion_selection = self.ref_exclusion_selection[:-3] ram = ramalyze.ramalyze(pdb_hierarchy=self.resulting_pdb_h) self.p_before_minimization_rama_outliers = ram.out_percent duke_count = ram.get_outliers_count_and_fraction()[0] if berkeley_count != duke_count: print >> self.log, "Discrepancy between berkeley and duke after ccd:", berkeley_count, duke_count self.resulting_pdb_h.write_pdb_file(file_name="%d%s_discrepancy.pdb" % (self.number_of_ccd_trials, self.params.output_prefix)) if self.params.debug: self.resulting_pdb_h.write_pdb_file( file_name="%d%s_all_not_minized.pdb" % (self.number_of_ccd_trials, self.params.output_prefix)) if self.params.minimize_whole: print >> self.log, "minimizing whole chain..." print >> self.log, "self.ref_exclusion_selection", self.ref_exclusion_selection # print >> sel # XXX but first let's check and fix rotamers... print >> self.log, "Fixing/checking rotamers in loop idealization..." excl_sel = self.ref_exclusion_selection if len(excl_sel) == 0: excl_sel = None non_outliers_for_check = asc.selection("(%s)" % self.ref_exclusion_selection) pre_result_h = mmtbx.utils.fix_rotamer_outliers( pdb_hierarchy=self.resulting_pdb_h, grm=self.grm.geometry, xrs=self.xrs, map_data=self.reference_map, radius=5, mon_lib_srv=None, rotamer_manager=self.rotamer_manager, backrub_range=None, # don't sample backrub at this point non_outliers_to_check=non_outliers_for_check, # bool selection asc=asc, verbose=True, log=self.log) if self.reference_map is None: minimize_wrapper_for_ramachandran( hierarchy=self.resulting_pdb_h, xrs=self.xrs, original_pdb_h=self.original_pdb_h, excl_string_selection=self.ref_exclusion_selection, grm=self.grm, log=None, ss_annotation=self.secondary_structure_annotation) else: mwwm = minimize_wrapper_with_map( pdb_h=self.resulting_pdb_h, xrs=self.xrs, target_map=self.reference_map, grm=self.grm, ss_annotation=self.secondary_structure_annotation, number_of_cycles=Auto, log=self.log) if self.params.debug: self.resulting_pdb_h.write_pdb_file( file_name="%d%s_all_minized.pdb" % (self.number_of_ccd_trials, self.params.output_prefix)) ram = ramalyze.ramalyze(pdb_hierarchy=self.resulting_pdb_h) self.p_after_minimiaztion_rama_outliers = ram.out_percent berkeley_count = utils.list_rama_outliers_h(self.resulting_pdb_h).count("\n") duke_count = ram.get_outliers_count_and_fraction()[0] n_bad_omegas = utils.n_bad_omegas(self.resulting_pdb_h) self.berkeley_p_after_minimiaztion_rama_outliers = \ berkeley_count/float(self.resulting_pdb_h.overall_counts().n_residues)*100 if berkeley_count != duke_count: print >> self.log, "Discrepancy between berkeley and duke after min:", berkeley_count, duke_count else: print >> self.log, "Number of Rama outliers after min:", berkeley_count print >> self.log, "Number of bad omegas:", n_bad_omegas self.number_of_ccd_trials += 1
def backrub_move( prev_res, cur_res, next_res, angle, move_oxygens=False, accept_worse_rama=False, rotamer_manager=None, rama_manager=None): import boost.python ext = boost.python.import_ext("mmtbx_validation_ramachandran_ext") from mmtbx_validation_ramachandran_ext import rama_eval from scitbx.matrix import rotate_point_around_axis from mmtbx.conformation_dependent_library.multi_residue_class import ThreeProteinResidues, \ RestraintsRegistry if abs(angle) < 1e-4: return if prev_res is None or next_res is None: return saved_res = [{},{},{}] for i, r in enumerate([prev_res, cur_res, next_res]): for a in r.atoms(): saved_res[i][a.name.strip()] = a.xyz if rotamer_manager is None: rotamer_manager = RotamerEval() prev_ca = prev_res.find_atom_by(name=" CA ") cur_ca = cur_res.find_atom_by(name=" CA ") next_ca = next_res.find_atom_by(name=" CA ") if prev_ca is None or next_ca is None or cur_ca is None: return atoms_to_move = [] atoms_to_move.append(prev_res.find_atom_by(name=" C ")) atoms_to_move.append(prev_res.find_atom_by(name=" O ")) for atom in cur_res.atoms(): atoms_to_move.append(atom) atoms_to_move.append(next_res.find_atom_by(name=" N ")) for atom in atoms_to_move: assert atom is not None new_xyz = rotate_point_around_axis( axis_point_1 = prev_ca.xyz, axis_point_2 = next_ca.xyz, point = atom.xyz, angle = angle, deg = True) atom.xyz = new_xyz if move_oxygens: registry = RestraintsRegistry() if rama_manager is None: rama_manager = rama_eval() tpr = ThreeProteinResidues(geometry=None, registry=registry) tpr.append(prev_res) tpr.append(cur_res) tpr.append(next_res) phi_psi_angles = tpr.get_phi_psi_angles() rama_key = tpr.get_ramalyze_key() ev_before = rama_manager.evaluate_angles(rama_key, phi_psi_angles[0], phi_psi_angles[1]) theta1 = _find_theta( ap1 = prev_ca.xyz, ap2 = cur_ca.xyz, cur_xyz = prev_res.find_atom_by(name=" O ").xyz, needed_xyz = saved_res[0]["O"]) theta2 = _find_theta( ap1 = cur_ca.xyz, ap2 = next_ca.xyz, cur_xyz = cur_res.find_atom_by(name=" O ").xyz, needed_xyz = saved_res[1]["O"]) for a in [prev_res.find_atom_by(name=" C "), prev_res.find_atom_by(name=" O "), cur_res.find_atom_by(name=" C ")]: new_xyz = rotate_point_around_axis( axis_point_1 = prev_ca.xyz, axis_point_2 = cur_ca.xyz, point = a.xyz, angle = theta1, deg = True) a.xyz = new_xyz for a in [cur_res.find_atom_by(name=" C "), cur_res.find_atom_by(name=" O "), next_res.find_atom_by(name=" N ")]: new_xyz = rotate_point_around_axis( axis_point_1 = cur_ca.xyz, axis_point_2 = next_ca.xyz, point = a.xyz, angle = theta2, deg = True) a.xyz = new_xyz phi_psi_angles = tpr.get_phi_psi_angles() rama_key = tpr.get_ramalyze_key() ev_after = rama_manager.evaluate_angles(rama_key, phi_psi_angles[0], phi_psi_angles[1]) if ev_before > ev_after and not accept_worse_rama: for a in [prev_res.find_atom_by(name=" C "), prev_res.find_atom_by(name=" O "), cur_res.find_atom_by(name=" C ")]: new_xyz = rotate_point_around_axis( axis_point_1 = prev_ca.xyz, axis_point_2 = cur_ca.xyz, point = a.xyz, angle = -theta1, deg = True) a.xyz = new_xyz for a in [cur_res.find_atom_by(name=" C "), cur_res.find_atom_by(name=" O "), next_res.find_atom_by(name=" N ")]: new_xyz = rotate_point_around_axis( axis_point_1 = cur_ca.xyz, axis_point_2 = next_ca.xyz, point = a.xyz, angle = -theta2, deg = True) a.xyz = new_xyz
def __init__(self, pdb_hierarchy, params=None, secondary_structure_annotation=None, log=null_out(), verbose=True): if len(pdb_hierarchy.models()) > 1: raise Sorry("Multi-model files are not supported") self.original_pdb_h = pdb_hierarchy self.secondary_structure_annotation=secondary_structure_annotation xrs = pdb_hierarchy.extract_xray_structure() asc = pdb_hierarchy.atom_selection_cache() self.resulting_pdb_h = pdb_hierarchy.deep_copy() self.resulting_pdb_h.reset_atom_i_seqs() self.params = self.process_params(params) self.log = log self.verbose = verbose self.r = rama_eval() self.rotamer_manager = RotamerEval() ram = ramalyze.ramalyze(pdb_hierarchy=pdb_hierarchy) self.p_initial_rama_outliers = ram.out_percent self.p_before_minimization_rama_outliers = None self.p_after_minimiaztion_rama_outliers = None berkeley_count = utils.list_rama_outliers_h(self.resulting_pdb_h).count("\n") self.berkeley_p_before_minimization_rama_outliers = \ berkeley_count/float(self.resulting_pdb_h.overall_counts().n_residues)*100 # self.berkeley_p_before_minimization_rama_outliers = None self.berkeley_p_after_minimiaztion_rama_outliers = None self.ref_exclusion_selection = "" number_of_ccd_trials = 0 # print "logic expr outcome:", (number_of_ccd_trials < 10 and self.berkeley_p_before_minimization_rama_outliers > 0.001) # print number_of_ccd_trials < 10 # print "berkeley before rama out:", self.berkeley_p_before_minimization_rama_outliers if self.berkeley_p_before_minimization_rama_outliers <= 0.001: print >> self.log, "No ramachandran outliers, skipping CCD step." if not self.params.enabled: print >> self.log, "Loop idealization is not enabled, use 'enabled=True'." while (number_of_ccd_trials < self.params.number_of_ccd_trials and self.berkeley_p_before_minimization_rama_outliers > 0.001 and self.params.enabled): print "CCD try number, outliers:", number_of_ccd_trials, self.berkeley_p_before_minimization_rama_outliers number_of_ccd_trials += 1 processed_chain_ids = [] for chain in self.resulting_pdb_h.only_model().chains(): print >> self.log, "Idealizing chain %s" % chain.id if chain.id not in processed_chain_ids: processed_chain_ids.append(chain.id) else: continue selection = "protein and chain %s and (name N or name CA or name C or name O)" % chain.id sel = asc.selection("chain %s" % chain.id) chain_h = self.resulting_pdb_h.select(sel) m = chain_h.only_model() i = 0 cutted_chain_h = None for c in m.chains(): if i == 0: cutted_chain_h = iotbx.pdb.hierarchy.new_hierarchy_from_chain(c) else: print >> self.log, "WARNING!!! Duplicating chain ids! Only the first chain will be processed." print >> self.log, " Removing chain %s with %d residues" % (c.id, len(c.residues())) m.remove_chain(c) i += 1 exclusions, ch_h = self.idealize_chain( hierarchy=(cutted_chain_h if cutted_chain_h else chain_h)) if ch_h is not None: set_xyz_smart( # dest_h=self.resulting_pdb_h, dest_h=chain, source_h=ch_h) for resnum in exclusions: selection += " and not resseq %s" % resnum self.ref_exclusion_selection += "(%s) or " % selection # # dumping and reloading hierarchy to do proper rounding of coordinates self.resulting_pdb_h = iotbx.pdb.input( source_info=None, lines=self.resulting_pdb_h.as_pdb_string()).construct_hierarchy() berkeley_count = utils.list_rama_outliers_h(self.resulting_pdb_h).count("\n") self.berkeley_p_before_minimization_rama_outliers = \ berkeley_count/float(self.resulting_pdb_h.overall_counts().n_residues)*100 if len(self.ref_exclusion_selection) > 0: self.ref_exclusion_selection = self.ref_exclusion_selection[:-3] self.resulting_pdb_h.write_pdb_file(file_name="%s_before_minimization.pdb" % self.params.output_prefix) ram = ramalyze.ramalyze(pdb_hierarchy=self.resulting_pdb_h) self.p_before_minimization_rama_outliers = ram.out_percent duke_count = ram.get_outliers_count_and_fraction()[0] if berkeley_count != duke_count: print >> self.log, "Discrepancy between berkeley and duke after ccd:", berkeley_count, duke_count if self.params.minimize_whole: print >> self.log, "minimizing whole thing..." print >> self.log, "self.ref_exclusion_selection", self.ref_exclusion_selection # print >> sel minimize_wrapper_for_ramachandran( hierarchy=self.resulting_pdb_h, xrs=xrs, original_pdb_h=self.original_pdb_h, excl_string_selection=self.ref_exclusion_selection, log=None, ss_annotation=self.secondary_structure_annotation) # self.resulting_pdb_h.write_pdb_file(file_name="%s_all_minized.pdb" % self.params.output_prefix) ram = ramalyze.ramalyze(pdb_hierarchy=self.resulting_pdb_h) self.p_after_minimiaztion_rama_outliers = ram.out_percent berkeley_count = utils.list_rama_outliers_h(self.resulting_pdb_h).count("\n") duke_count = ram.get_outliers_count_and_fraction()[0] self.berkeley_p_after_minimiaztion_rama_outliers = \ berkeley_count/float(self.resulting_pdb_h.overall_counts().n_residues)*100 if berkeley_count != duke_count: print >> self.log, "Discrepancy between berkeley and duke after min:", berkeley_count, duke_count else: print >> self.log, "Number of Rama outliers after min:", berkeley_count
def extract_proxies(self, hierarchy): favored = ramalyze.RAMALYZE_FAVORED allowed = ramalyze.RAMALYZE_ALLOWED outlier = ramalyze.RAMALYZE_OUTLIER self.hierarchy = hierarchy bool_atom_selection = self._determine_bool_atom_selection(hierarchy) selected_h = hierarchy.select(bool_atom_selection) n_seq = flex.max(selected_h.atoms().extract_i_seq()) # Drop all previous proxies self._oldfield_proxies = ext.shared_phi_psi_proxy() self._emsley_proxies = ext.shared_phi_psi_proxy() self._emsley8k_proxies = ext.shared_phi_psi_proxy() # it would be great to save rama_eval, but the fact that this is called in # pdb_interpretation, not in mmtbx.model makes it impossible self.rama_eval = rama_eval() for three in generate_protein_threes(hierarchy=selected_h, geometry=None): rc = three.get_phi_psi_atoms() if rc is None: continue rama_key = three.get_ramalyze_key() angles = three.get_phi_psi_angles() rama_score = self.rama_eval.get_score(rama_key, angles[0], angles[1]) r_eval = self.rama_eval.evaluate_score(rama_key, rama_score) phi_atoms, psi_atoms = rc i_seqs = [atom.i_seq for atom in phi_atoms] + [psi_atoms[-1].i_seq] resnames = three.get_resnames() r_name = resnames[1] assert rama_key in range(6) text_rama_key = ramalyze.res_types[rama_key] assert text_rama_key in [ "general", "glycine", "cis-proline", "trans-proline", "pre-proline", "isoleucine or valine" ] # pick where to put... ev_match_dict = { favored: self.params.favored, allowed: self.params.allowed, outlier: self.params.outlier } r_type = ev_match_dict[r_eval] if r_type == 'oldfield': proxy = ext.phi_psi_proxy(residue_type=text_rama_key, i_seqs=i_seqs, weight=1) # XXX Not used in oldfield self.append_oldfield_proxies(proxy, n_seq) ### THIS IS CRUEL. REMOVE ONCE favored/allowed/outlier are made multiple! if (self.params.inject_emsley8k_into_oldfield_favored): proxy = ext.phi_psi_proxy(residue_type=text_rama_key, i_seqs=i_seqs, weight=5) self.append_emsley8k_proxies(proxy, n_seq) ### elif r_type == 'emsley': weight = self.params.emsley.weight proxy = ext.phi_psi_proxy(residue_type=text_rama_key, i_seqs=i_seqs, weight=weight) self.append_emsley_proxies(proxy, n_seq) elif r_type == 'emsley8k': if (r_eval is favored): weight = self.params.emsley8k.weight_favored elif (r_eval is allowed): weight = self.params.emsley8k.weight_allowed elif (r_eval is outlier): weight = self.params.emsley8k.weight_outlier else: raise RuntimeError("Rama eveluation failed.") proxy = ext.phi_psi_proxy(residue_type=text_rama_key, i_seqs=i_seqs, weight=weight) self.append_emsley8k_proxies(proxy, n_seq) elif (r_type is None): pass else: raise RuntimeError("Not an option: %s" % str(r_type)) print("", file=self.log) print(" %d Ramachandran restraints generated." % (self.get_n_proxies()), file=self.log) print(" %d Oldfield and %d Emsley and %d emsley8k." % (self.get_n_oldfield_proxies(), self.get_n_emsley_proxies(), self.get_n_emsley8k_proxies()), file=self.log)
def __init__(self, pdb_hierarchy, params=None, secondary_structure_annotation=None, reference_map=None, crystal_symmetry=None, grm=None, rama_manager=None, rotamer_manager=None, log=null_out(), verbose=False): if len(pdb_hierarchy.models()) > 1: raise Sorry("Multi-model files are not supported") self.original_pdb_h = pdb_hierarchy self.secondary_structure_annotation = secondary_structure_annotation asc = pdb_hierarchy.atom_selection_cache() self.xrs = pdb_hierarchy.extract_xray_structure( crystal_symmetry=crystal_symmetry) self.reference_map = reference_map self.resulting_pdb_h = pdb_hierarchy.deep_copy() self.resulting_pdb_h.reset_atom_i_seqs() self.params = self.process_params(params) self.log = log self.verbose = verbose self.grm = grm self.r = rama_manager if self.r is None: self.r = rama_eval() self.rotamer_manager = rotamer_manager if self.rotamer_manager is None: self.rotamer_manager = RotamerEval() ram = ramalyze.ramalyze(pdb_hierarchy=pdb_hierarchy) self.p_initial_rama_outliers = ram.out_percent self.p_before_minimization_rama_outliers = None self.p_after_minimiaztion_rama_outliers = None n_inputs = [reference_map, crystal_symmetry].count(None) if not (n_inputs == 0 or n_inputs == 2): print >> log, "Need to have both map and symmetry info. Not using map." self.reference_map = None berkeley_count = utils.list_rama_outliers_h( self.resulting_pdb_h).count("\n") self.berkeley_p_before_minimization_rama_outliers = \ berkeley_count/float(self.resulting_pdb_h.overall_counts().n_residues)*100 # self.berkeley_p_before_minimization_rama_outliers = None self.berkeley_p_after_minimiaztion_rama_outliers = self.berkeley_p_before_minimization_rama_outliers self.ref_exclusion_selection = "" number_of_ccd_trials = 0 # print "logic expr outcome:", (number_of_ccd_trials < 10 and self.berkeley_p_before_minimization_rama_outliers > 0.001) # print number_of_ccd_trials < 10 # print "berkeley before rama out:", self.berkeley_p_before_minimization_rama_outliers if self.berkeley_p_before_minimization_rama_outliers <= 0.001: print >> self.log, "No ramachandran outliers, skipping CCD step." if not self.params.enabled: print >> self.log, "Loop idealization is not enabled, use 'enabled=True'." while (number_of_ccd_trials < self.params.number_of_ccd_trials and self.berkeley_p_after_minimiaztion_rama_outliers > 0.001 and self.params.enabled): print "CCD try number, outliers:", number_of_ccd_trials, self.berkeley_p_before_minimization_rama_outliers number_of_ccd_trials += 1 processed_chain_ids = [] for chain in self.resulting_pdb_h.only_model().chains(): print >> self.log, "Idealizing chain %s" % chain.id if chain.id not in processed_chain_ids: processed_chain_ids.append(chain.id) else: continue selection = "protein and chain %s and (name N or name CA or name C or name O)" % chain.id sel = asc.selection("chain %s" % chain.id) chain_h = self.resulting_pdb_h.select(sel) m = chain_h.only_model() i = 0 cutted_chain_h = None for c in m.chains(): if i == 0: cutted_chain_h = iotbx.pdb.hierarchy.new_hierarchy_from_chain( c) else: print >> self.log, "WARNING!!! Duplicating chain ids! Only the first chain will be processed." print >> self.log, " Removing chain %s with %d residues" % ( c.id, len(c.residues())) m.remove_chain(c) i += 1 exclusions, ch_h = self.idealize_chain( hierarchy=(cutted_chain_h if cutted_chain_h else chain_h)) if ch_h is not None: set_xyz_smart( # dest_h=self.resulting_pdb_h, dest_h=chain, source_h=ch_h) for resnum in exclusions: selection += " and not resseq %s" % resnum self.ref_exclusion_selection += "(%s) or " % selection # # dumping and reloading hierarchy to do proper rounding of coordinates self.resulting_pdb_h = iotbx.pdb.input( source_info=None, lines=self.resulting_pdb_h.as_pdb_string( )).construct_hierarchy() berkeley_count = utils.list_rama_outliers_h( self.resulting_pdb_h).count("\n") self.berkeley_p_before_minimization_rama_outliers = \ berkeley_count/float(self.resulting_pdb_h.overall_counts().n_residues)*100 if len(self.ref_exclusion_selection) > 0: self.ref_exclusion_selection = self.ref_exclusion_selection[: -3] # self.resulting_pdb_h.write_pdb_file(file_name="%s_before_minimization.pdb" % self.params.output_prefix) ram = ramalyze.ramalyze(pdb_hierarchy=self.resulting_pdb_h) self.p_before_minimization_rama_outliers = ram.out_percent duke_count = ram.get_outliers_count_and_fraction()[0] if berkeley_count != duke_count: print >> self.log, "Discrepancy between berkeley and duke after ccd:", berkeley_count, duke_count if self.params.minimize_whole: print >> self.log, "minimizing whole thing..." print >> self.log, "self.ref_exclusion_selection", self.ref_exclusion_selection # print >> sel if self.reference_map is None: minimize_wrapper_for_ramachandran( hierarchy=self.resulting_pdb_h, xrs=self.xrs, original_pdb_h=self.original_pdb_h, excl_string_selection=self.ref_exclusion_selection, grm=self.grm, log=None, ss_annotation=self.secondary_structure_annotation) else: mwwm = minimize_wrapper_with_map( pdb_h=self.resulting_pdb_h, xrs=self.xrs, target_map=self.reference_map, grm=self.grm, ss_annotation=self.secondary_structure_annotation, log=self.log) # self.resulting_pdb_h.write_pdb_file(file_name="%s_all_minized.pdb" % self.params.output_prefix) ram = ramalyze.ramalyze(pdb_hierarchy=self.resulting_pdb_h) self.p_after_minimiaztion_rama_outliers = ram.out_percent berkeley_count = utils.list_rama_outliers_h( self.resulting_pdb_h).count("\n") duke_count = ram.get_outliers_count_and_fraction()[0] self.berkeley_p_after_minimiaztion_rama_outliers = \ berkeley_count/float(self.resulting_pdb_h.overall_counts().n_residues)*100 if berkeley_count != duke_count: print >> self.log, "Discrepancy between berkeley and duke after min:", berkeley_count, duke_count else: print >> self.log, "Number of Rama outliers after min:", berkeley_count
def get_all_starting_conformations(moving_h, change_radius, include_allowed, n_outliers, direction_forward=True, cutoff=50, change_all=True, log=null_out(), check_omega=False): if log is None: log = StringIO() variants = [] result = [] r = rama_eval() phi_psi_atoms = utils.get_phi_psi_atoms(moving_h, omega=True) # print "N residue groups in h", [x.resseq for x in moving_h.residue_groups()] if len(phi_psi_atoms) == 0: print("Strange input to starting conformations!!!", file=log) return result n_rama = len(phi_psi_atoms) # print "n_rama", n_rama change_angles = [None] if change_all: change_angles = range( (n_rama) // 2 - change_radius - n_outliers // 2, (n_rama) // 2 + change_radius + 1 + n_outliers // 2) # if change_angles[0] < 0: # change_angles = range(change_angles[-1]-change_angles[0]) has_twisted = False if check_omega: omegas = [x[2] for x in phi_psi_atoms] for o in omegas: if o is not None and abs(abs(o) - 180) > 30: has_twisted = True print("n_outliers", n_outliers, file=log) for i, (phi_psi_pair, rama_key, omega) in enumerate(phi_psi_atoms): angle_is_outlier = utils.rama_evaluate( phi_psi_pair, r, rama_key) == ramalyze.RAMALYZE_OUTLIER angle_is_outlier = angle_is_outlier or ( include_allowed and utils.rama_evaluate( phi_psi_pair, r, rama_key) == ramalyze.RAMALYZE_ALLOWED) twisted = omega is not None and ((abs(abs(omega) - 180) > 30) and check_omega) print("in cycle, N, outlier?, change?, twisted?", i, angle_is_outlier, i in change_angles, twisted, file=log) if angle_is_outlier and n_outliers < 3: vs = get_sampled_rama_favored_angles(rama_key, r) elif (i in change_angles) or angle_is_outlier or has_twisted: # vs = get_sampled_rama_favored_angles(rama_key, r) vs = ramalyze.get_favored_regions(rama_key) else: vs = [(None, None)] variants.append(vs) print("variants", variants, file=log) # Filtering them, since could be # [len(x) for x in variants] = [129, 129, 4, 129, 129] # resulting in 1107691524 all_angles_combination n_comb = numpy.prod([len(x) for x in variants]) if n_comb > cutoff: # still aiming for ~1000 n_in_each = int(1000**(1 / len(variants))) variants = [ random.sample(x, n_in_each) if len(x) > n_in_each else x for x in variants ] all_angles_combination = list(itertools.product(*variants)) # filter none combinations # print "len(all_angles_combination)", len(all_angles_combination) all_angles_combination_f = [] for comb in all_angles_combination: if is_not_none_combination(comb): all_angles_combination_f.append(comb) print("len(all_angles_combination_f)", len(all_angles_combination_f), file=log) return all_angles_combination_f